edn_turbo 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +6 -6
- data/bin/build_docker_image.sh +1 -1
- data/ext/edn_turbo/edn_parser.cc +1090 -954
- data/ext/edn_turbo/edn_parser.rl +872 -842
- data/ext/edn_turbo/main.cc +3 -1
- data/ext/edn_turbo/parser.h +1 -0
- data/ext/edn_turbo/util.cc +18 -10
- data/ext/edn_turbo/util.h +2 -0
- data/lib/edn_turbo.rb +6 -0
- data/lib/edn_turbo/version.rb +2 -2
- data/spec/edn_turbo/edn_parser_spec.rb +34 -8
- metadata +2 -2
data/ext/edn_turbo/edn_parser.rl
CHANGED
@@ -41,43 +41,43 @@
|
|
41
41
|
//
|
42
42
|
|
43
43
|
%%{
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
44
|
+
machine EDN_common;
|
45
|
+
|
46
|
+
cr = '\n';
|
47
|
+
counter = ( cr @{ line_number++; } );
|
48
|
+
cr_neg = [^\n];
|
49
|
+
ws = [\t\v\f\r ] | ',' | counter;
|
50
|
+
comment = ';' cr_neg* counter;
|
51
|
+
ignore = ws | comment;
|
52
|
+
|
53
|
+
operators = [/\.\*!_\?$%&<>\=+\-\'];
|
54
|
+
|
55
|
+
begin_dispatch = '#';
|
56
|
+
begin_keyword = ':';
|
57
|
+
begin_char = '\\';
|
58
|
+
begin_vector = '[';
|
59
|
+
begin_map = '{';
|
60
|
+
begin_list = '(';
|
61
|
+
begin_meta = '^';
|
62
|
+
string_delim = '"';
|
63
|
+
begin_number = digit;
|
64
|
+
begin_value = alnum | [:\"\{\[\(\\\#^] | operators;
|
65
|
+
begin_symbol = alpha;
|
66
|
+
|
67
|
+
# int / decimal rules
|
68
|
+
integer = ('0' | [1-9] digit*);
|
69
|
+
exp = ([Ee] [+\-]? digit+);
|
70
|
+
|
71
|
+
|
72
|
+
# common actions
|
73
|
+
action close_err {
|
74
|
+
std::stringstream s;
|
75
|
+
s << "unterminated " << EDN_TYPE;
|
76
|
+
error(__FUNCTION__, s.str());
|
77
|
+
fhold; fbreak;
|
78
|
+
}
|
79
|
+
|
80
|
+
action exit { fhold; fbreak; }
|
81
81
|
}%%
|
82
82
|
|
83
83
|
// ============================================================
|
@@ -85,135 +85,140 @@
|
|
85
85
|
//
|
86
86
|
|
87
87
|
%%{
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
88
|
+
machine EDN_value;
|
89
|
+
include EDN_common;
|
90
|
+
|
91
|
+
write data;
|
92
|
+
|
93
|
+
action parse_val_string {
|
94
|
+
// string types within double-quotes
|
95
|
+
const char *np = parse_string(fpc, pe, v);
|
96
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
97
|
+
}
|
98
|
+
|
99
|
+
action parse_val_keyword {
|
100
|
+
// tokens with a leading ':'
|
101
|
+
const char *np = parse_keyword(fpc, pe, v);
|
102
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
103
|
+
}
|
104
|
+
|
105
|
+
action parse_val_number {
|
106
|
+
// tokens w/ leading digits: non-negative integers & decimals.
|
107
|
+
// try to parse a decimal first
|
108
|
+
const char *np = parse_decimal(fpc, pe, v);
|
109
|
+
if (np == nullptr) {
|
110
|
+
// if we can't, try to parse it as a ratio
|
111
|
+
np = parse_ratio(fpc, pe, v);
|
112
|
+
|
113
|
+
// otherwise, an int
|
114
|
+
if (np == nullptr) {
|
111
115
|
np = parse_integer(fpc, pe, v);
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
if (np) {
|
120
|
+
fexec np;
|
121
|
+
fhold;
|
122
|
+
fbreak;
|
123
|
+
}
|
124
|
+
else {
|
125
|
+
error(__FUNCTION__, "number format error", *p);
|
126
|
+
fexec pe;
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
action parse_val_operator {
|
131
|
+
// stand-alone operators *, +, -, etc.
|
132
|
+
const char *np = parse_operator(fpc, pe, v);
|
133
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
134
|
+
}
|
135
|
+
|
136
|
+
action parse_val_char {
|
137
|
+
// tokens w/ leading \ (escaped characters \newline, \c, etc.)
|
138
|
+
const char *np = parse_esc_char(fpc, pe, v);
|
139
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
140
|
+
}
|
141
|
+
|
142
|
+
action parse_val_symbol {
|
143
|
+
// user identifiers and reserved keywords (true, false, nil)
|
144
|
+
VALUE sym = Qnil;
|
145
|
+
const char *np = parse_symbol(fpc, pe, sym);
|
146
|
+
if (np == nullptr) { fexec pe; } else {
|
147
|
+
// parse_symbol will make 'sym' a ruby string
|
148
|
+
if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
|
149
|
+
else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
|
150
|
+
else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
|
151
|
+
else {
|
152
|
+
v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
|
153
|
+
}
|
154
|
+
fexec np;
|
155
|
+
}
|
156
|
+
}
|
157
|
+
|
158
|
+
action parse_val_vector {
|
159
|
+
// [
|
160
|
+
const char *np = parse_vector(fpc, pe, v);
|
161
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
162
|
+
}
|
163
|
+
|
164
|
+
action parse_val_list {
|
165
|
+
// (
|
166
|
+
const char *np = parse_list(fpc, pe, v);
|
167
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
168
|
+
}
|
169
|
+
|
170
|
+
action parse_val_map {
|
171
|
+
// {
|
172
|
+
const char *np = parse_map(fpc, pe, v);
|
173
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
174
|
+
}
|
175
|
+
|
176
|
+
action parse_val_meta {
|
177
|
+
// ^
|
178
|
+
const char *np = parse_meta(fpc, pe);
|
179
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
180
|
+
}
|
181
|
+
|
182
|
+
action parse_val_dispatch {
|
183
|
+
// handles tokens w/ leading # ("#_", "#{", and tagged elems)
|
184
|
+
const char *np = parse_dispatch(fpc + 1, pe, v);
|
185
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
186
|
+
}
|
187
|
+
|
188
|
+
|
189
|
+
main := (
|
190
|
+
string_delim >parse_val_string |
|
191
|
+
begin_keyword >parse_val_keyword |
|
192
|
+
begin_number >parse_val_number |
|
193
|
+
operators >parse_val_operator |
|
194
|
+
begin_char >parse_val_char |
|
195
|
+
begin_symbol >parse_val_symbol |
|
196
|
+
begin_vector >parse_val_vector |
|
197
|
+
begin_list >parse_val_list |
|
198
|
+
begin_map >parse_val_map |
|
199
|
+
begin_meta >parse_val_meta |
|
200
|
+
begin_dispatch >parse_val_dispatch
|
201
|
+
) %*exit;
|
197
202
|
}%%
|
198
203
|
|
199
204
|
|
200
205
|
const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
201
206
|
{
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
207
|
+
// std::cerr << __FUNCTION__ << "() p: \"" << p << "\"" << std::endl;
|
208
|
+
int cs;
|
209
|
+
|
210
|
+
%% write init;
|
211
|
+
%% write exec;
|
212
|
+
|
213
|
+
if (cs >= EDN_value_first_final) {
|
214
|
+
return p;
|
215
|
+
}
|
216
|
+
else if (cs == EDN_value_error) {
|
217
|
+
error(__FUNCTION__, "token error", *p);
|
218
|
+
return pe;
|
219
|
+
}
|
220
|
+
else if (cs == EDN_value_en_main) {} // silence ragel warning
|
221
|
+
return nullptr;
|
217
222
|
}
|
218
223
|
|
219
224
|
|
@@ -225,51 +230,51 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
|
225
230
|
// ascii range is found.
|
226
231
|
//
|
227
232
|
%%{
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
233
|
+
machine EDN_string;
|
234
|
+
include EDN_common;
|
235
|
+
|
236
|
+
write data;
|
237
|
+
|
238
|
+
action parse_chars {
|
239
|
+
if (edn::util::parse_byte_stream(p_save + 1, p, v, encode)) {
|
240
|
+
fexec p + 1;
|
241
|
+
} else {
|
242
|
+
fhold; fbreak;
|
243
|
+
}
|
244
|
+
}
|
245
|
+
|
246
|
+
action mark_for_encoding {
|
247
|
+
encode = true;
|
248
|
+
}
|
249
|
+
|
250
|
+
main := string_delim (
|
251
|
+
(^([\"\\] | 0xc2..0xf5) |
|
252
|
+
((0xc2..0xf5) |
|
253
|
+
'\\'[\"\\/bfnrt] |
|
254
|
+
'\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
|
255
|
+
'\\'^([\"\\/bfnrtu]))* %parse_chars
|
256
|
+
) :>> string_delim @err(close_err) @exit;
|
252
257
|
}%%
|
253
258
|
|
254
259
|
|
255
260
|
const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
|
256
261
|
{
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
262
|
+
static const char* EDN_TYPE = "string";
|
263
|
+
int cs;
|
264
|
+
bool encode = false;
|
265
|
+
|
266
|
+
%% write init;
|
267
|
+
const char* p_save = p;
|
268
|
+
%% write exec;
|
269
|
+
|
270
|
+
if (cs >= EDN_string_first_final) {
|
271
|
+
return p + 1;
|
272
|
+
}
|
273
|
+
else if (cs == EDN_string_error) {
|
274
|
+
return pe;
|
275
|
+
}
|
276
|
+
else if (cs == EDN_string_en_main) {} // silence ragel warning
|
277
|
+
return nullptr;
|
273
278
|
}
|
274
279
|
|
275
280
|
|
@@ -278,79 +283,78 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
|
|
278
283
|
// keyword parsing
|
279
284
|
//
|
280
285
|
%%{
|
281
|
-
|
282
|
-
|
286
|
+
machine EDN_keyword;
|
287
|
+
include EDN_common;
|
283
288
|
|
284
|
-
|
285
|
-
|
289
|
+
keyword_start = alpha | [\.\*!_\?$%&<>\=+\-\'\#];
|
290
|
+
keyword_chars = (keyword_start | digit | ':');
|
286
291
|
|
287
|
-
|
288
|
-
|
292
|
+
keyword_name = keyword_start keyword_chars*;
|
293
|
+
keyword = keyword_name ('/' keyword_chars*)?;
|
289
294
|
|
290
|
-
|
295
|
+
write data;
|
291
296
|
|
292
297
|
|
293
|
-
|
298
|
+
main := begin_keyword keyword (^(keyword_chars | '/')? @exit);
|
294
299
|
}%%
|
295
300
|
|
296
301
|
|
297
302
|
const char* edn::Parser::parse_keyword(const char *p, const char *pe, VALUE& v)
|
298
303
|
{
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
304
|
+
int cs;
|
305
|
+
|
306
|
+
%% write init;
|
307
|
+
const char* p_save = p;
|
308
|
+
%% write exec;
|
309
|
+
|
310
|
+
if (cs >= EDN_keyword_first_final) {
|
311
|
+
std::string buf;
|
312
|
+
uintmax_t len = p - p_save;
|
313
|
+
// don't include leading ':' because the ruby symbol will handle it
|
314
|
+
buf.append(p_save + 1, len - 1);
|
315
|
+
v = ID2SYM(rb_intern(buf.c_str()));
|
316
|
+
return p;
|
317
|
+
}
|
318
|
+
else if (cs == EDN_keyword_error) {
|
319
|
+
error(__FUNCTION__, "invalid keyword", *p);
|
320
|
+
return pe;
|
321
|
+
}
|
322
|
+
else if (cs == EDN_keyword_en_main) {} // silence ragel warning
|
323
|
+
return nullptr;
|
319
324
|
}
|
320
325
|
|
321
326
|
|
322
|
-
|
323
327
|
// ============================================================
|
324
328
|
// decimal parsing machine
|
325
329
|
//
|
326
330
|
%%{
|
327
|
-
|
328
|
-
|
331
|
+
machine EDN_decimal;
|
332
|
+
include EDN_common;
|
329
333
|
|
330
|
-
|
334
|
+
write data noerror;
|
331
335
|
|
332
336
|
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
+
main := ('-'|'+')? (
|
338
|
+
(integer '.' digit* (exp? [M]?)) |
|
339
|
+
(integer exp)
|
340
|
+
) (^[0-9Ee.+\-M]? @exit );
|
337
341
|
}%%
|
338
342
|
|
339
343
|
|
340
344
|
const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
|
341
345
|
{
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
346
|
+
int cs;
|
347
|
+
|
348
|
+
%% write init;
|
349
|
+
const char* p_save = p;
|
350
|
+
%% write exec;
|
351
|
+
|
352
|
+
if (cs >= EDN_decimal_first_final) {
|
353
|
+
v = edn::util::float_to_ruby(p_save, p - p_save);
|
354
|
+
return p + 1;
|
355
|
+
}
|
356
|
+
else if (cs == EDN_decimal_en_main) {} // silence ragel warning
|
357
|
+
return nullptr;
|
354
358
|
}
|
355
359
|
|
356
360
|
|
@@ -358,34 +362,65 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
|
|
358
362
|
// integer parsing machine - M suffix will return a BigNum
|
359
363
|
//
|
360
364
|
%%{
|
361
|
-
|
362
|
-
|
365
|
+
machine EDN_integer;
|
366
|
+
include EDN_common;
|
363
367
|
|
364
|
-
|
368
|
+
write data noerror;
|
365
369
|
|
366
370
|
|
367
|
-
|
368
|
-
|
369
|
-
|
371
|
+
main := (
|
372
|
+
('-'|'+')? (integer [MN]?)
|
373
|
+
) (^[0-9MN+\-]? @exit);
|
370
374
|
}%%
|
371
375
|
|
372
376
|
const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
|
373
377
|
{
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
378
|
+
int cs;
|
379
|
+
|
380
|
+
%% write init;
|
381
|
+
const char* p_save = p;
|
382
|
+
%% write exec;
|
383
|
+
|
384
|
+
if (cs >= EDN_integer_first_final) {
|
385
|
+
v = edn::util::integer_to_ruby(p_save, p - p_save);
|
386
|
+
return p + 1;
|
387
|
+
}
|
388
|
+
else if (cs == EDN_integer_en_main) {} // silence ragel warning
|
389
|
+
return nullptr;
|
386
390
|
}
|
387
391
|
|
388
392
|
|
393
|
+
// ============================================================
|
394
|
+
// ratio parsing machine
|
395
|
+
//
|
396
|
+
%%{
|
397
|
+
machine EDN_ratio;
|
398
|
+
include EDN_common;
|
399
|
+
|
400
|
+
write data noerror;
|
401
|
+
|
402
|
+
|
403
|
+
main := (
|
404
|
+
('-'|'+')? (integer '/' integer)
|
405
|
+
) (^[0-9+\-\/]? @exit);
|
406
|
+
}%%
|
407
|
+
|
408
|
+
|
409
|
+
const char* edn::Parser::parse_ratio(const char *p, const char *pe, VALUE& v)
|
410
|
+
{
|
411
|
+
int cs;
|
412
|
+
|
413
|
+
%% write init;
|
414
|
+
const char* p_save = p;
|
415
|
+
%% write exec;
|
416
|
+
|
417
|
+
if (cs >= EDN_ratio_first_final) {
|
418
|
+
v = edn::util::ratio_to_ruby(p_save, p - p_save);
|
419
|
+
return p + 1;
|
420
|
+
}
|
421
|
+
else if (cs == EDN_ratio_en_main) {} // silence ragel warning
|
422
|
+
return nullptr;
|
423
|
+
}
|
389
424
|
|
390
425
|
// ============================================================
|
391
426
|
// operator parsing - handles tokens w/ a leading operator:
|
@@ -395,81 +430,86 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
|
|
395
430
|
// 3. stand-alone operators: +, -, /, *, etc.
|
396
431
|
//
|
397
432
|
%%{
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
433
|
+
machine EDN_operator;
|
434
|
+
include EDN_common;
|
435
|
+
|
436
|
+
write data;
|
437
|
+
|
438
|
+
action parse_op_symbol {
|
439
|
+
// parse a symbol including the leading operator (-, +, .)
|
440
|
+
VALUE sym = Qnil;
|
441
|
+
const char *np = parse_symbol(p_save, pe, sym);
|
442
|
+
if (np == nullptr) { fexec pe; } else {
|
443
|
+
if (sym != Qnil)
|
444
|
+
v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
|
445
|
+
fexec np;
|
446
|
+
}
|
447
|
+
}
|
448
|
+
|
449
|
+
action parse_op_number {
|
450
|
+
// parse a number with the leading symbol - this is slightly
|
451
|
+
// different than the one within EDN_value since it includes
|
452
|
+
// the leading - or +
|
453
|
+
//
|
454
|
+
// try to parse a decimal first
|
455
|
+
const char *np = parse_decimal(p_save, pe, v);
|
456
|
+
if (np == nullptr) {
|
457
|
+
// if we can't, try to parse it as a ratio
|
458
|
+
np = parse_ratio(p_save, pe, v);
|
459
|
+
|
460
|
+
if (np == nullptr) {
|
461
|
+
// again, if we can't, try to parse it as an int
|
423
462
|
np = parse_integer(p_save, pe, v);
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
463
|
+
}
|
464
|
+
}
|
465
|
+
|
466
|
+
if (np) {
|
467
|
+
fexec np;
|
468
|
+
fhold;
|
469
|
+
fbreak;
|
470
|
+
}
|
471
|
+
else {
|
472
|
+
error(__FUNCTION__, "number format error", *p);
|
473
|
+
fexec pe;
|
474
|
+
}
|
475
|
+
}
|
476
|
+
|
477
|
+
action parse_op {
|
478
|
+
// stand-alone operators (-, +, /, ... etc)
|
479
|
+
char op[2] = { *p_save, 0 };
|
480
|
+
VALUE sym = rb_str_new2(op);
|
481
|
+
v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
|
482
|
+
}
|
483
|
+
|
484
|
+
valid_non_numeric_chars = alpha|operators|':'|'#';
|
485
|
+
valid_chars = valid_non_numeric_chars | digit;
|
486
|
+
|
487
|
+
main := (
|
488
|
+
('-'|'+') begin_number >parse_op_number |
|
489
|
+
(operators - [\-\+\.]) valid_chars >parse_op_symbol |
|
490
|
+
[\-\+\.] valid_non_numeric_chars valid_chars* >parse_op_symbol |
|
491
|
+
operators ignore* >parse_op
|
492
|
+
) ^(valid_chars)? @exit;
|
453
493
|
}%%
|
454
494
|
|
455
495
|
|
456
496
|
const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
|
457
497
|
{
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
498
|
+
int cs;
|
499
|
+
|
500
|
+
%% write init;
|
501
|
+
const char* p_save = p;
|
502
|
+
%% write exec;
|
503
|
+
|
504
|
+
if (cs >= EDN_operator_first_final) {
|
505
|
+
return p;
|
506
|
+
}
|
507
|
+
else if (cs == EDN_operator_error) {
|
508
|
+
error(__FUNCTION__, "symbol syntax error", *p);
|
509
|
+
return pe;
|
510
|
+
}
|
511
|
+
else if (cs == EDN_operator_en_main) {} // silence ragel warning
|
512
|
+
return nullptr;
|
473
513
|
}
|
474
514
|
|
475
515
|
|
@@ -478,47 +518,46 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
|
|
478
518
|
// escaped char parsing - handles \c, \newline, \formfeed, etc.
|
479
519
|
//
|
480
520
|
%%{
|
481
|
-
|
482
|
-
|
521
|
+
machine EDN_escaped_char;
|
522
|
+
include EDN_common;
|
483
523
|
|
484
|
-
|
524
|
+
write data;
|
485
525
|
|
486
|
-
|
526
|
+
valid_chars = extend;
|
487
527
|
|
488
528
|
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
529
|
+
main := begin_char (
|
530
|
+
'space' | 'newline' | 'tab' | 'return' | 'formfeed' | 'backspace' |
|
531
|
+
valid_chars
|
532
|
+
) (ignore* | [\\\]\}\)])? @exit;
|
493
533
|
}%%
|
494
534
|
|
495
535
|
|
496
536
|
const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
|
497
537
|
{
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
538
|
+
int cs;
|
539
|
+
|
540
|
+
%% write init;
|
541
|
+
const char* p_save = p;
|
542
|
+
%% write exec;
|
543
|
+
|
544
|
+
if (cs >= EDN_escaped_char_first_final) {
|
545
|
+
// convert the escaped value to a character
|
546
|
+
if (!edn::util::parse_escaped_char(p_save + 1, p, v)) {
|
547
|
+
return pe;
|
548
|
+
}
|
549
|
+
return p;
|
550
|
+
}
|
551
|
+
else if (cs == EDN_escaped_char_error) {
|
552
|
+
error(__FUNCTION__, "unexpected value", *p);
|
553
|
+
return pe;
|
554
|
+
}
|
555
|
+
else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
|
556
|
+
return nullptr;
|
517
557
|
}
|
518
558
|
|
519
559
|
|
520
560
|
|
521
|
-
|
522
561
|
// ============================================================
|
523
562
|
// symbol parsing - handles identifiers that begin with an alpha
|
524
563
|
// character and an optional leading operator (name, -today,
|
@@ -526,58 +565,57 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
|
|
526
565
|
//
|
527
566
|
//
|
528
567
|
%%{
|
529
|
-
|
530
|
-
|
568
|
+
machine EDN_symbol;
|
569
|
+
include EDN_common;
|
531
570
|
|
532
|
-
|
571
|
+
write data;
|
533
572
|
|
534
|
-
|
535
|
-
|
536
|
-
|
573
|
+
symbol_ops_1 = [\.\-\+];
|
574
|
+
symbol_ops_2 = [\*!_\?$%&<>\=\'];
|
575
|
+
symbol_ops_3 = [:\#];
|
537
576
|
|
538
|
-
|
577
|
+
symbol_start = alpha | symbol_ops_1 | symbol_ops_2;
|
539
578
|
|
540
|
-
|
579
|
+
symbol_chars = symbol_start | digit | symbol_ops_3;
|
541
580
|
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
581
|
+
symbol_name = (
|
582
|
+
(alpha symbol_chars*) |
|
583
|
+
(symbol_ops_1 (symbol_start | symbol_ops_3) symbol_chars*) |
|
584
|
+
(symbol_start symbol_chars+) |
|
585
|
+
operators{1}
|
586
|
+
);
|
587
|
+
symbol = '/' | (symbol_name ('/' symbol_name)?);
|
549
588
|
|
550
589
|
|
551
|
-
|
552
|
-
|
553
|
-
|
590
|
+
main := (
|
591
|
+
symbol
|
592
|
+
) ignore* (^(symbol_chars | '/')? @exit);
|
554
593
|
}%%
|
555
594
|
|
556
595
|
|
557
596
|
const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
558
597
|
{
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
598
|
+
int cs;
|
599
|
+
|
600
|
+
%% write init;
|
601
|
+
const char* p_save = p;
|
602
|
+
%% write exec;
|
603
|
+
|
604
|
+
if (cs >= EDN_symbol_first_final) {
|
605
|
+
// copy the symbol text
|
606
|
+
if (s == Qnil)
|
607
|
+
s = rb_str_new2("");
|
608
|
+
rb_str_cat(s, p_save, p - p_save);
|
609
|
+
return p;
|
610
|
+
}
|
611
|
+
else if (cs == EDN_symbol_error) {
|
612
|
+
error(__FUNCTION__, "invalid symbol sequence", *p);
|
613
|
+
}
|
614
|
+
else if (cs == EDN_symbol_en_main) {} // silence ragel warning
|
615
|
+
return nullptr;
|
577
616
|
}
|
578
617
|
|
579
618
|
|
580
|
-
|
581
619
|
// ============================================================
|
582
620
|
// EDN_sequence_common is used to parse EDN containers - elements are
|
583
621
|
// initially stored in an array and then the final corresponding
|
@@ -585,75 +623,75 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
|
585
623
|
// sets the same array is used)
|
586
624
|
//
|
587
625
|
%%{
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
626
|
+
machine EDN_sequence_common;
|
627
|
+
include EDN_common;
|
628
|
+
|
629
|
+
action open_seq {
|
630
|
+
// sequences store elements in an array, then process it to
|
631
|
+
// convert it to a list, set, or map as needed once the
|
632
|
+
// sequence end is reached
|
633
|
+
elems = rb_ary_new();
|
634
|
+
// additionally, metadata for elements in the sequence may be
|
635
|
+
// carried so we must push a new level in the metadata stack
|
636
|
+
new_meta_list();
|
637
|
+
}
|
638
|
+
|
639
|
+
action close_seq {
|
640
|
+
// remove the current metadata level
|
641
|
+
del_top_meta_list();
|
642
|
+
}
|
643
|
+
|
644
|
+
action parse_item {
|
645
|
+
// reads an item within a sequence (vector, list, map, or
|
646
|
+
// set). Regardless of the sequence type, an array of the
|
647
|
+
// items is built. Once done, the sequence parser will convert
|
648
|
+
// if needed
|
649
|
+
VALUE e;
|
650
|
+
std::size_t meta_sz = meta_size();
|
651
|
+
const char *np = parse_value(fpc, pe, e);
|
652
|
+
if (np == nullptr) { fhold; fbreak; } else {
|
653
|
+
// if there's an entry in the discard list, the current
|
654
|
+
// object is not meant to be kept due to a #_ so don't
|
655
|
+
// push it into the list of elements
|
656
|
+
if (!discard.empty()) {
|
657
|
+
discard.pop_back();
|
658
|
+
}
|
659
|
+
else if (!meta_empty()) {
|
660
|
+
// check if parse_value added metadata
|
661
|
+
if (meta_size() == meta_sz) {
|
662
|
+
// there's metadata and it didn't increase so
|
663
|
+
// parse_value() read an element we care
|
664
|
+
// about. Bind the metadata to it and add it to
|
665
|
+
// the sequence
|
666
|
+
e = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, e, ruby_meta());
|
667
|
+
rb_ary_push(elems, e);
|
620
668
|
}
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
// no metadata.. just push it
|
633
|
-
rb_ary_push(elems, e);
|
634
|
-
}
|
635
|
-
fexec np;
|
636
|
-
}
|
637
|
-
}
|
638
|
-
|
639
|
-
element = begin_value >parse_item;
|
640
|
-
next_element = ignore* element;
|
641
|
-
sequence = ((element ignore*) (next_element ignore*)*);
|
669
|
+
} else {
|
670
|
+
// no metadata.. just push it
|
671
|
+
rb_ary_push(elems, e);
|
672
|
+
}
|
673
|
+
fexec np;
|
674
|
+
}
|
675
|
+
}
|
676
|
+
|
677
|
+
element = begin_value >parse_item;
|
678
|
+
next_element = ignore* element;
|
679
|
+
sequence = ((element ignore*) (next_element ignore*)*);
|
642
680
|
}%%
|
643
681
|
|
644
682
|
//
|
645
683
|
// vector-specific machine
|
646
684
|
%%{
|
647
|
-
|
648
|
-
|
685
|
+
machine EDN_vector;
|
686
|
+
include EDN_sequence_common;
|
649
687
|
|
650
|
-
|
688
|
+
end_vector = ']';
|
651
689
|
|
652
|
-
|
690
|
+
write data;
|
653
691
|
|
654
|
-
|
655
|
-
|
656
|
-
|
692
|
+
main := begin_vector @open_seq (
|
693
|
+
ignore* sequence? :>> end_vector @close_seq
|
694
|
+
) @err(close_err) @exit;
|
657
695
|
}%%
|
658
696
|
|
659
697
|
|
@@ -662,24 +700,24 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
|
662
700
|
//
|
663
701
|
const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
664
702
|
{
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
703
|
+
static const char* EDN_TYPE = "vector";
|
704
|
+
|
705
|
+
int cs;
|
706
|
+
VALUE elems; // will store the vector's elements - allocated in @open_seq
|
707
|
+
|
708
|
+
%% write init;
|
709
|
+
%% write exec;
|
710
|
+
|
711
|
+
if (cs >= EDN_vector_first_final) {
|
712
|
+
v = elems;
|
713
|
+
return p + 1;
|
714
|
+
}
|
715
|
+
else if (cs == EDN_vector_error) {
|
716
|
+
error(__FUNCTION__, "vector format error", *p);
|
717
|
+
return pe;
|
718
|
+
}
|
719
|
+
else if (cs == EDN_vector_en_main) {} // silence ragel warning
|
720
|
+
return nullptr;
|
683
721
|
}
|
684
722
|
|
685
723
|
|
@@ -688,16 +726,16 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
|
688
726
|
// list parsing machine
|
689
727
|
//
|
690
728
|
%%{
|
691
|
-
|
692
|
-
|
729
|
+
machine EDN_list;
|
730
|
+
include EDN_sequence_common;
|
693
731
|
|
694
|
-
|
732
|
+
end_list = ')';
|
695
733
|
|
696
|
-
|
734
|
+
write data;
|
697
735
|
|
698
|
-
|
699
|
-
|
700
|
-
|
736
|
+
main := begin_list @open_seq (
|
737
|
+
ignore* sequence? :>> end_list @close_seq
|
738
|
+
) @err(close_err) @exit;
|
701
739
|
}%%
|
702
740
|
|
703
741
|
//
|
@@ -705,26 +743,24 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
|
705
743
|
//
|
706
744
|
const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
|
707
745
|
{
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
else if (cs == EDN_list_en_main) {} // silence ragel warning
|
727
|
-
return nullptr;
|
746
|
+
static const char* EDN_TYPE = "list";
|
747
|
+
|
748
|
+
int cs;
|
749
|
+
VALUE elems; // stores the list's elements - allocated in @open_seq
|
750
|
+
|
751
|
+
%% write init;
|
752
|
+
%% write exec;
|
753
|
+
|
754
|
+
if (cs >= EDN_list_first_final) {
|
755
|
+
v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_LIST_METHOD, elems);
|
756
|
+
return p + 1;
|
757
|
+
}
|
758
|
+
else if (cs == EDN_list_error) {
|
759
|
+
error(__FUNCTION__, *p);
|
760
|
+
return pe;
|
761
|
+
}
|
762
|
+
else if (cs == EDN_list_en_main) {} // silence ragel warning
|
763
|
+
return nullptr;
|
728
764
|
}
|
729
765
|
|
730
766
|
|
@@ -733,56 +769,55 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
|
|
733
769
|
// hash parsing
|
734
770
|
//
|
735
771
|
%%{
|
736
|
-
|
737
|
-
|
772
|
+
machine EDN_map;
|
773
|
+
include EDN_sequence_common;
|
738
774
|
|
739
|
-
|
775
|
+
end_map = '}';
|
740
776
|
|
741
|
-
|
777
|
+
write data;
|
742
778
|
|
743
779
|
|
744
|
-
|
745
|
-
|
746
|
-
|
780
|
+
main := begin_map @open_seq (
|
781
|
+
ignore* (sequence)? :>> end_map @close_seq
|
782
|
+
) @err(close_err) @exit;
|
747
783
|
}%%
|
748
784
|
|
749
785
|
|
750
786
|
const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
|
751
787
|
{
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
return nullptr;
|
788
|
+
static const char* EDN_TYPE = "map";
|
789
|
+
|
790
|
+
int cs;
|
791
|
+
// since we don't know whether we're looking at a key or value,
|
792
|
+
// initially store all elements in an array (allocated in @open_seq)
|
793
|
+
VALUE elems;
|
794
|
+
|
795
|
+
%% write init;
|
796
|
+
%% write exec;
|
797
|
+
|
798
|
+
if (cs >= EDN_map_first_final) {
|
799
|
+
// hash parsing is done. Make sure we have an even count
|
800
|
+
if ((RARRAY_LEN(elems) % 2) != 0) {
|
801
|
+
error(__FUNCTION__, "odd number of elements in map");
|
802
|
+
return pe;
|
803
|
+
}
|
804
|
+
|
805
|
+
// now convert the sequence to a hash
|
806
|
+
VALUE rslt = rb_hash_new();
|
807
|
+
while (RARRAY_LEN(elems) > 0)
|
808
|
+
{
|
809
|
+
VALUE k = rb_ary_shift(elems);
|
810
|
+
rb_hash_aset(rslt, k, rb_ary_shift(elems));
|
811
|
+
}
|
812
|
+
|
813
|
+
v = rslt;
|
814
|
+
return p + 1;
|
815
|
+
}
|
816
|
+
else if (cs == EDN_map_error) {
|
817
|
+
return pe;
|
818
|
+
}
|
819
|
+
else if (cs == EDN_map_en_main) {} // silence ragel warning
|
820
|
+
return nullptr;
|
786
821
|
}
|
787
822
|
|
788
823
|
|
@@ -793,55 +828,54 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
|
|
793
828
|
// the remaining data to the correct parser
|
794
829
|
//
|
795
830
|
%%{
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
831
|
+
machine EDN_dispatch;
|
832
|
+
include EDN_common;
|
833
|
+
|
834
|
+
write data;
|
835
|
+
|
836
|
+
action parse_disp_set {
|
837
|
+
// #{ }
|
838
|
+
const char *np = parse_set(fpc, pe, v);
|
839
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
840
|
+
}
|
841
|
+
|
842
|
+
action parse_disp_discard {
|
843
|
+
// discard token #_
|
844
|
+
const char *np = parse_discard(fpc, pe);
|
845
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
846
|
+
}
|
847
|
+
|
848
|
+
action parse_disp_tagged {
|
849
|
+
// #inst, #uuid, or #user/tag
|
850
|
+
const char *np = parse_tagged(fpc, pe, v);
|
851
|
+
if (np == nullptr) { fhold; fbreak; } else fexec np;
|
852
|
+
}
|
853
|
+
|
854
|
+
|
855
|
+
main := (
|
856
|
+
('{' >parse_disp_set |
|
857
|
+
'_' >parse_disp_discard |
|
858
|
+
alpha >parse_disp_tagged)
|
859
|
+
) @exit;
|
825
860
|
}%%
|
826
861
|
|
827
862
|
|
828
863
|
const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
|
829
864
|
{
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
return nullptr;
|
865
|
+
int cs;
|
866
|
+
|
867
|
+
%% write init;
|
868
|
+
%% write exec;
|
869
|
+
|
870
|
+
if (cs >= EDN_dispatch_first_final) {
|
871
|
+
return p + 1;
|
872
|
+
}
|
873
|
+
else if (cs == EDN_dispatch_error) {
|
874
|
+
error(__FUNCTION__, "dispatch extend error", *p);
|
875
|
+
return pe;
|
876
|
+
}
|
877
|
+
else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
|
878
|
+
return nullptr;
|
845
879
|
}
|
846
880
|
|
847
881
|
|
@@ -849,17 +883,17 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
|
|
849
883
|
// set parsing machine
|
850
884
|
//
|
851
885
|
%%{
|
852
|
-
|
853
|
-
|
886
|
+
machine EDN_set;
|
887
|
+
include EDN_sequence_common;
|
854
888
|
|
855
|
-
|
889
|
+
write data;
|
856
890
|
|
857
|
-
|
858
|
-
|
891
|
+
begin_set = '{';
|
892
|
+
end_set = '}';
|
859
893
|
|
860
|
-
|
861
|
-
|
862
|
-
|
894
|
+
main := begin_set @open_seq (
|
895
|
+
ignore* sequence? :>> end_set @close_seq
|
896
|
+
) @err(close_err) @exit;
|
863
897
|
}%%
|
864
898
|
|
865
899
|
//
|
@@ -867,25 +901,25 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
|
|
867
901
|
//
|
868
902
|
const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
|
869
903
|
{
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
904
|
+
static const char* EDN_TYPE = "set";
|
905
|
+
|
906
|
+
int cs;
|
907
|
+
VALUE elems; // holds the set's elements as an array allocated in @open_seq
|
908
|
+
|
909
|
+
%% write init;
|
910
|
+
%% write exec;
|
911
|
+
|
912
|
+
if (cs >= EDN_set_first_final) {
|
913
|
+
// all elements collected; now convert to a set
|
914
|
+
v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SET_METHOD, elems);
|
915
|
+
return p + 1;
|
916
|
+
}
|
917
|
+
else if (cs == EDN_set_error) {
|
918
|
+
error(__FUNCTION__, *p);
|
919
|
+
return pe;
|
920
|
+
}
|
921
|
+
else if (cs == EDN_set_en_main) {} // silence ragel warning
|
922
|
+
return nullptr;
|
889
923
|
}
|
890
924
|
|
891
925
|
|
@@ -896,55 +930,54 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
|
|
896
930
|
// defining a machine to consume items within container delimiters
|
897
931
|
//
|
898
932
|
%%{
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
933
|
+
machine EDN_discard;
|
934
|
+
include EDN_common;
|
935
|
+
|
936
|
+
write data;
|
937
|
+
|
938
|
+
begin_discard = '_';
|
939
|
+
|
940
|
+
action discard_value {
|
941
|
+
const char *np = parse_value(fpc, pe, v);
|
942
|
+
if (np == nullptr) { fhold; fbreak; } else {
|
943
|
+
// this token is to be discarded so store it in the
|
944
|
+
// discard stack - we really don't need to save it so this
|
945
|
+
// could be simplified
|
946
|
+
discard.push_back(v);
|
947
|
+
fexec np;
|
948
|
+
}
|
949
|
+
}
|
950
|
+
|
951
|
+
action discard_err {
|
952
|
+
std::stringstream s;
|
953
|
+
s << "discard sequence without element to discard";
|
954
|
+
error(__FUNCTION__, s.str());
|
955
|
+
fhold; fbreak;
|
956
|
+
}
|
957
|
+
|
958
|
+
main := begin_discard ignore* (
|
959
|
+
begin_value >discard_value
|
960
|
+
) @err(discard_err) @exit;
|
927
961
|
}%%
|
928
962
|
|
929
963
|
|
930
964
|
const char* edn::Parser::parse_discard(const char *p, const char *pe)
|
931
965
|
{
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
return nullptr;
|
966
|
+
int cs;
|
967
|
+
VALUE v;
|
968
|
+
|
969
|
+
%% write init;
|
970
|
+
%% write exec;
|
971
|
+
|
972
|
+
if (cs >= EDN_discard_first_final) {
|
973
|
+
return p + 1;
|
974
|
+
}
|
975
|
+
else if (cs == EDN_discard_error) {
|
976
|
+
error(__FUNCTION__, *p);
|
977
|
+
return pe;
|
978
|
+
}
|
979
|
+
else if (cs == EDN_discard_en_main) {} // silence ragel warning
|
980
|
+
return nullptr;
|
948
981
|
}
|
949
982
|
|
950
983
|
|
@@ -964,130 +997,127 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
|
|
964
997
|
// 2. add parse checks for uuid and inst for better error reporting
|
965
998
|
//
|
966
999
|
%%{
|
967
|
-
|
968
|
-
|
1000
|
+
machine EDN_tagged;
|
1001
|
+
include EDN_common;
|
969
1002
|
|
970
|
-
|
1003
|
+
write data;
|
971
1004
|
|
972
|
-
|
973
|
-
|
974
|
-
|
1005
|
+
tag_symbol_chars_start = alpha;
|
1006
|
+
tag_symbol_chars_non_numeric = tag_symbol_chars_start | [\.\*!_\?$%&<>\=+\-\'\:\#];
|
1007
|
+
tag_symbol_chars = tag_symbol_chars_non_numeric | digit;
|
975
1008
|
|
976
|
-
|
977
|
-
|
1009
|
+
tag_symbol_namespace = tag_symbol_chars_start (tag_symbol_chars)*;
|
1010
|
+
tag_symbol_name = tag_symbol_chars_non_numeric (tag_symbol_chars)*;
|
978
1011
|
|
979
|
-
|
1012
|
+
tag_symbol = (tag_symbol_namespace ('/' tag_symbol_name)?);
|
980
1013
|
|
981
1014
|
# inst = (string_delim [0-9+\-:\.TZ]* string_delim);
|
982
1015
|
# uuid = (string_delim [a-f0-9\-]* string_delim);
|
983
1016
|
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1017
|
+
action parse_tag {
|
1018
|
+
// parses the symbol portion of the pair
|
1019
|
+
const char *np = parse_symbol(fpc, pe, sym_name);
|
1020
|
+
if (np == nullptr) { fhold; fbreak; } else {
|
1021
|
+
sym_ok = true;
|
1022
|
+
fexec np;
|
1023
|
+
}
|
1024
|
+
}
|
1025
|
+
action parse_data {
|
1026
|
+
// parses the value portion
|
1027
|
+
const char *np = parse_value(fpc, pe, data);
|
1028
|
+
if (np == nullptr) { fhold; fbreak; } else {
|
1029
|
+
data_ok = true;
|
1030
|
+
fexec np;
|
1031
|
+
}
|
1032
|
+
}
|
1033
|
+
|
1034
|
+
main := (
|
1035
|
+
tag_symbol >parse_tag ignore+
|
1036
|
+
begin_value >parse_data
|
1037
|
+
) @exit;
|
1005
1038
|
}%%
|
1006
1039
|
|
1007
1040
|
|
1008
1041
|
const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
|
1009
1042
|
{
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1043
|
+
VALUE sym_name = Qnil;
|
1044
|
+
VALUE data = Qnil;
|
1045
|
+
bool sym_ok = false;
|
1046
|
+
bool data_ok = false;
|
1014
1047
|
|
1015
|
-
|
1048
|
+
int cs;
|
1016
1049
|
|
1017
|
-
|
1018
|
-
|
1050
|
+
%% write init;
|
1051
|
+
%% write exec;
|
1019
1052
|
|
1020
|
-
|
1053
|
+
if (cs >= EDN_tagged_first_final) {
|
1021
1054
|
//std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << data << std::endl;
|
1022
1055
|
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1056
|
+
if (!sym_ok || !data_ok) {
|
1057
|
+
error(__FUNCTION__, "tagged element symbol error", *p);
|
1058
|
+
v = EDN_EOF_CONST;
|
1059
|
+
return nullptr;
|
1060
|
+
}
|
1061
|
+
|
1062
|
+
try {
|
1063
|
+
// tagged_element makes a call to ruby which may throw an
|
1064
|
+
// exception when parsing the data
|
1065
|
+
v = edn::util::call_module_fn(rb_mEDN, EDN_TAGGED_ELEM_METHOD, sym_name, data);
|
1066
|
+
return p + 1;
|
1067
|
+
} catch (std::exception& e) {
|
1068
|
+
error(__FUNCTION__, e.what());
|
1069
|
+
return pe;
|
1070
|
+
}
|
1071
|
+
}
|
1072
|
+
else if (cs == EDN_tagged_error) {
|
1073
|
+
error(__FUNCTION__, "tagged element symbol error", *p);
|
1074
|
+
}
|
1075
|
+
else if (cs == EDN_tagged_en_main) {} // silence ragel warning
|
1076
|
+
v = EDN_EOF_CONST;
|
1077
|
+
return nullptr;
|
1045
1078
|
}
|
1046
1079
|
|
1047
1080
|
|
1048
|
-
|
1049
|
-
|
1050
1081
|
// ============================================================
|
1051
1082
|
// metadata - looks like ruby just discards this but we'll track it
|
1052
1083
|
// and provide a means to retrive after each parse op - might be
|
1053
1084
|
// useful?
|
1054
1085
|
//
|
1055
1086
|
%%{
|
1056
|
-
|
1057
|
-
|
1087
|
+
machine EDN_meta;
|
1088
|
+
include EDN_common;
|
1058
1089
|
|
1059
|
-
|
1090
|
+
write data;
|
1060
1091
|
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1092
|
+
action parse_data {
|
1093
|
+
const char *np = parse_value(fpc, pe, v);
|
1094
|
+
if (np == nullptr) { fhold; fbreak; } else { fexec np; }
|
1095
|
+
}
|
1065
1096
|
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1097
|
+
main := begin_meta (
|
1098
|
+
begin_value >parse_data
|
1099
|
+
) @exit;
|
1069
1100
|
}%%
|
1070
1101
|
|
1071
1102
|
|
1072
1103
|
const char* edn::Parser::parse_meta(const char *p, const char *pe)
|
1073
1104
|
{
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
return nullptr;
|
1105
|
+
int cs;
|
1106
|
+
VALUE v;
|
1107
|
+
|
1108
|
+
%% write init;
|
1109
|
+
%% write exec;
|
1110
|
+
|
1111
|
+
if (cs >= EDN_meta_first_final) {
|
1112
|
+
append_to_meta(v);
|
1113
|
+
return p + 1;
|
1114
|
+
}
|
1115
|
+
else if (cs == EDN_meta_error) {
|
1116
|
+
error(__FUNCTION__, *p);
|
1117
|
+
return pe;
|
1118
|
+
}
|
1119
|
+
else if (cs == EDN_meta_en_main) {} // silence ragel warning
|
1120
|
+
return nullptr;
|
1091
1121
|
}
|
1092
1122
|
|
1093
1123
|
|
@@ -1097,55 +1127,55 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
|
|
1097
1127
|
// top-level, therefore, does not tokenize source stream
|
1098
1128
|
//
|
1099
1129
|
%%{
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1130
|
+
machine EDN_parser;
|
1131
|
+
include EDN_common;
|
1132
|
+
|
1133
|
+
write data;
|
1134
|
+
|
1135
|
+
action parse_elem {
|
1136
|
+
// save the count of metadata items before we parse this value
|
1137
|
+
// so we can determine if we've read another metadata value or
|
1138
|
+
// an actual data item
|
1139
|
+
std::size_t meta_sz = meta_size();
|
1140
|
+
const char* np = parse_value(fpc, pe, result);
|
1141
|
+
if (np == nullptr) { fexec pe; fbreak; } else {
|
1142
|
+
// if we have metadata saved and it matches the count we
|
1143
|
+
// saved before we parsed a value, then we must bind the
|
1144
|
+
// metadata sequence to it
|
1145
|
+
if (!meta_empty() && meta_size() == meta_sz) {
|
1146
|
+
// this will empty the metadata sequence too
|
1147
|
+
result = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, result, ruby_meta());
|
1148
|
+
}
|
1149
|
+
fexec np;
|
1150
|
+
}
|
1151
|
+
}
|
1152
|
+
|
1153
|
+
element = begin_value >parse_elem;
|
1154
|
+
next_element = ignore* element;
|
1155
|
+
sequence = ((element ignore*) (next_element ignore*)*);
|
1156
|
+
|
1157
|
+
main := ignore* sequence? ignore*;
|
1128
1158
|
}%%
|
1129
1159
|
|
1130
1160
|
|
1131
1161
|
VALUE edn::Parser::parse(const char* src, std::size_t len)
|
1132
1162
|
{
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1163
|
+
int cs;
|
1164
|
+
VALUE result = EDN_EOF_CONST;
|
1165
|
+
|
1166
|
+
%% write init;
|
1167
|
+
set_source(src, len);
|
1168
|
+
%% write exec;
|
1169
|
+
|
1170
|
+
if (cs == EDN_parser_error) {
|
1171
|
+
error(__FUNCTION__, *p);
|
1172
|
+
return EDN_EOF_CONST;
|
1173
|
+
}
|
1174
|
+
else if (cs == EDN_parser_first_final) {
|
1175
|
+
p = pe = eof = nullptr;
|
1176
|
+
}
|
1177
|
+
else if (cs == EDN_parser_en_main) {} // silence ragel warning
|
1178
|
+
return result;
|
1149
1179
|
}
|
1150
1180
|
|
1151
1181
|
|
@@ -1153,43 +1183,43 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
|
|
1153
1183
|
// token-by-token machine
|
1154
1184
|
//
|
1155
1185
|
%%{
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
}
|
1176
|
-
else {
|
1177
|
-
// a value was read and there's a pending metadata
|
1178
|
-
// sequence. Bind them.
|
1179
|
-
value = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
|
1180
|
-
state = TOKEN_OK;
|
1181
|
-
}
|
1182
|
-
} else if (!discard.empty()) {
|
1183
|
-
// a discard read. Don't return a value
|
1184
|
-
state = TOKEN_IS_DISCARD;
|
1185
|
-
} else {
|
1186
|
-
state = TOKEN_OK;
|
1186
|
+
machine EDN_tokens;
|
1187
|
+
include EDN_common;
|
1188
|
+
|
1189
|
+
write data nofinal noerror;
|
1190
|
+
|
1191
|
+
action parse_token {
|
1192
|
+
// we won't know if we've parsed a discard or a metadata until
|
1193
|
+
// after parse_value() is done. Save the current number of
|
1194
|
+
// elements in the metadata sequence; then we can check if it
|
1195
|
+
// grew or if the discard sequence grew
|
1196
|
+
meta_sz = meta_size();
|
1197
|
+
|
1198
|
+
const char* np = parse_value(fpc, pe, value);
|
1199
|
+
if (np == nullptr) { fhold; fbreak; } else {
|
1200
|
+
if (!meta_empty()) {
|
1201
|
+
// was an additional metadata entry read? if so, don't
|
1202
|
+
// return a value
|
1203
|
+
if (meta_size() > meta_sz) {
|
1204
|
+
state = TOKEN_IS_META;
|
1187
1205
|
}
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1206
|
+
else {
|
1207
|
+
// a value was read and there's a pending metadata
|
1208
|
+
// sequence. Bind them.
|
1209
|
+
value = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
|
1210
|
+
state = TOKEN_OK;
|
1211
|
+
}
|
1212
|
+
} else if (!discard.empty()) {
|
1213
|
+
// a discard read. Don't return a value
|
1214
|
+
state = TOKEN_IS_DISCARD;
|
1215
|
+
} else {
|
1216
|
+
state = TOKEN_OK;
|
1217
|
+
}
|
1218
|
+
fexec np;
|
1219
|
+
}
|
1220
|
+
}
|
1221
|
+
|
1222
|
+
main := ignore* begin_value >parse_token ignore*;
|
1193
1223
|
}%%
|
1194
1224
|
|
1195
1225
|
|
@@ -1197,21 +1227,21 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
|
|
1197
1227
|
//
|
1198
1228
|
edn::Parser::eTokenState edn::Parser::parse_next(VALUE& value)
|
1199
1229
|
{
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1230
|
+
int cs;
|
1231
|
+
eTokenState state = TOKEN_ERROR;
|
1232
|
+
// need to track metadada read and bind it to the next value read
|
1233
|
+
// - but must account for sequences of metadata values
|
1234
|
+
std::size_t meta_sz;
|
1205
1235
|
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1236
|
+
// clear any previously saved discards; only track if read during
|
1237
|
+
// this op
|
1238
|
+
discard.clear();
|
1209
1239
|
|
1210
|
-
|
1211
|
-
|
1240
|
+
%% write init;
|
1241
|
+
%% write exec;
|
1212
1242
|
|
1213
|
-
|
1214
|
-
|
1243
|
+
if (cs == EDN_tokens_en_main) {} // silence ragel warning
|
1244
|
+
return state;
|
1215
1245
|
}
|
1216
1246
|
|
1217
1247
|
/*
|