edn_turbo 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -4
- data/bin/ppedn +3 -1
- data/ext/edn_turbo/edn_parser.cc +1337 -946
- data/ext/edn_turbo/edn_parser.h +27 -13
- data/ext/edn_turbo/edn_parser.rl +207 -145
- data/ext/edn_turbo/edn_parser_util.cc +56 -47
- data/ext/edn_turbo/main.cc +20 -17
- data/lib/edn_turbo/constants.rb +14 -0
- data/lib/edn_turbo/edn_parser.rb +5 -2
- data/lib/edn_turbo/tags.rb +46 -0
- data/lib/edn_turbo/utils.rb +34 -0
- data/lib/edn_turbo/version.rb +2 -2
- data/lib/edn_turbo.rb +10 -92
- data/test/test_output_diff.rb +56 -26
- metadata +6 -3
data/ext/edn_turbo/edn_parser.rl
CHANGED
@@ -23,9 +23,7 @@
|
|
23
23
|
comment = ';' cr_neg* counter;
|
24
24
|
ignore = ws | comment;
|
25
25
|
|
26
|
-
operators = [/\.\*!_
|
27
|
-
symbol_start = alpha;
|
28
|
-
symbol_chars = symbol_start | digit | [\#:_\-\.\'];
|
26
|
+
operators = [/\.\*!_\?$%&<>\=+\-\'];
|
29
27
|
|
30
28
|
begin_dispatch = '#';
|
31
29
|
begin_keyword = ':';
|
@@ -37,10 +35,7 @@
|
|
37
35
|
string_delim = '"';
|
38
36
|
begin_number = digit;
|
39
37
|
begin_value = alnum | [:\"\{\[\(\\\#^] | operators;
|
40
|
-
begin_symbol =
|
41
|
-
|
42
|
-
symbol_name = symbol_start (symbol_chars)*;
|
43
|
-
symbol = (symbol_name ('/' symbol_name)?);
|
38
|
+
begin_symbol = alpha;
|
44
39
|
|
45
40
|
# int / decimal rules
|
46
41
|
integer = ('0' | [1-9] digit*);
|
@@ -68,19 +63,19 @@
|
|
68
63
|
|
69
64
|
write data;
|
70
65
|
|
71
|
-
action
|
66
|
+
action parse_val_string {
|
72
67
|
// string types within double-quotes
|
73
68
|
const char *np = parse_string(fpc, pe, v);
|
74
69
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
75
70
|
}
|
76
71
|
|
77
|
-
action
|
72
|
+
action parse_val_keyword {
|
78
73
|
// tokens with a leading ':'
|
79
74
|
const char *np = parse_keyword(fpc, pe, v);
|
80
75
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
81
76
|
}
|
82
77
|
|
83
|
-
action
|
78
|
+
action parse_val_number {
|
84
79
|
// tokens w/ leading digits: non-negative integers & decimals.
|
85
80
|
// try to parse a decimal first
|
86
81
|
const char *np = parse_decimal(fpc, pe, v);
|
@@ -95,64 +90,64 @@
|
|
95
90
|
fbreak;
|
96
91
|
}
|
97
92
|
else {
|
98
|
-
error(__FUNCTION__, *p);
|
93
|
+
error(__FUNCTION__, "number format error", *p);
|
99
94
|
fexec pe;
|
100
95
|
}
|
101
96
|
}
|
102
97
|
|
103
|
-
action
|
98
|
+
action parse_val_operator {
|
104
99
|
// stand-alone operators *, +, -, etc.
|
105
100
|
const char *np = parse_operator(fpc, pe, v);
|
106
101
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
107
102
|
}
|
108
103
|
|
109
|
-
action
|
104
|
+
action parse_val_char {
|
110
105
|
// tokens w/ leading \ (escaped characters \newline, \c, etc.)
|
111
106
|
const char *np = parse_esc_char(fpc, pe, v);
|
112
107
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
113
108
|
}
|
114
109
|
|
115
|
-
action
|
110
|
+
action parse_val_symbol {
|
116
111
|
// user identifiers and reserved keywords (true, false, nil)
|
117
112
|
VALUE sym = Qnil;
|
118
113
|
const char *np = parse_symbol(fpc, pe, sym);
|
119
|
-
if (np == NULL) {
|
114
|
+
if (np == NULL) { fexec pe; } else {
|
120
115
|
// parse_symbol will make 'sym' a ruby string
|
121
116
|
if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
|
122
117
|
else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
|
123
118
|
else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
|
124
119
|
else {
|
125
|
-
v = Parser::
|
120
|
+
v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
|
126
121
|
}
|
127
122
|
fexec np;
|
128
123
|
}
|
129
124
|
}
|
130
125
|
|
131
|
-
action
|
126
|
+
action parse_val_vector {
|
132
127
|
// [
|
133
128
|
const char *np = parse_vector(fpc, pe, v);
|
134
129
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
135
130
|
}
|
136
131
|
|
137
|
-
action
|
132
|
+
action parse_val_list {
|
138
133
|
// (
|
139
134
|
const char *np = parse_list(fpc, pe, v);
|
140
135
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
141
136
|
}
|
142
137
|
|
143
|
-
action
|
138
|
+
action parse_val_map {
|
144
139
|
// {
|
145
140
|
const char *np = parse_map(fpc, pe, v);
|
146
141
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
147
142
|
}
|
148
143
|
|
149
|
-
action
|
144
|
+
action parse_val_meta {
|
150
145
|
// ^
|
151
146
|
const char *np = parse_meta(fpc, pe);
|
152
147
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
153
148
|
}
|
154
149
|
|
155
|
-
action
|
150
|
+
action parse_val_dispatch {
|
156
151
|
// handles tokens w/ leading # ("#_", "#{", and tagged elems)
|
157
152
|
const char *np = parse_dispatch(fpc + 1, pe, v);
|
158
153
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
@@ -160,23 +155,24 @@
|
|
160
155
|
|
161
156
|
|
162
157
|
main := (
|
163
|
-
string_delim >
|
164
|
-
begin_keyword >
|
165
|
-
begin_number >
|
166
|
-
operators >
|
167
|
-
begin_char >
|
168
|
-
begin_symbol >
|
169
|
-
begin_vector >
|
170
|
-
begin_list >
|
171
|
-
begin_map >
|
172
|
-
begin_meta >
|
173
|
-
begin_dispatch >
|
158
|
+
string_delim >parse_val_string |
|
159
|
+
begin_keyword >parse_val_keyword |
|
160
|
+
begin_number >parse_val_number |
|
161
|
+
operators >parse_val_operator |
|
162
|
+
begin_char >parse_val_char |
|
163
|
+
begin_symbol >parse_val_symbol |
|
164
|
+
begin_vector >parse_val_vector |
|
165
|
+
begin_list >parse_val_list |
|
166
|
+
begin_map >parse_val_map |
|
167
|
+
begin_meta >parse_val_meta |
|
168
|
+
begin_dispatch >parse_val_dispatch
|
174
169
|
) %*exit;
|
175
170
|
}%%
|
176
171
|
|
177
172
|
|
178
173
|
const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
179
174
|
{
|
175
|
+
// std::cerr << __FUNCTION__ << "() p: \"" << p << "\"" << std::endl;
|
180
176
|
int cs;
|
181
177
|
|
182
178
|
%% write init;
|
@@ -186,7 +182,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
|
186
182
|
return p;
|
187
183
|
}
|
188
184
|
else if (cs == EDN_value_error) {
|
189
|
-
error(__FUNCTION__, *p);
|
185
|
+
error(__FUNCTION__, "token error", *p);
|
190
186
|
return pe;
|
191
187
|
}
|
192
188
|
else if (cs == EDN_value_en_main) {} // silence ragel warning
|
@@ -207,7 +203,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
|
207
203
|
|
208
204
|
write data;
|
209
205
|
|
210
|
-
action
|
206
|
+
action parse_chars {
|
211
207
|
if (Parser::parse_byte_stream(p_save + 1, p, v, encode)) {
|
212
208
|
fexec p + 1;
|
213
209
|
} else {
|
@@ -220,11 +216,11 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
|
220
216
|
}
|
221
217
|
|
222
218
|
main := string_delim (
|
223
|
-
(^([\"\\] |
|
219
|
+
(^([\"\\] | 0xc2..0xf5) |
|
224
220
|
((0xc2..0xf5) |
|
225
221
|
'\\'[\"\\/bfnrt] |
|
226
222
|
'\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
|
227
|
-
'\\'^([\"\\/bfnrtu]
|
223
|
+
'\\'^([\"\\/bfnrtu]))* %parse_chars
|
228
224
|
) :>> string_delim @err(close_err) @exit;
|
229
225
|
}%%
|
230
226
|
|
@@ -258,15 +254,16 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
|
|
258
254
|
machine EDN_keyword;
|
259
255
|
include EDN_common;
|
260
256
|
|
261
|
-
|
262
|
-
|
257
|
+
keyword_start = alpha | [\.\*!_\?$%&<>\=+\-\'\#];
|
258
|
+
keyword_chars = (keyword_start | digit | ':');
|
263
259
|
|
264
|
-
keyword_name
|
260
|
+
keyword_name = keyword_start keyword_chars*;
|
261
|
+
keyword = keyword_name ('/' keyword_chars*)?;
|
265
262
|
|
266
263
|
write data;
|
267
264
|
|
268
265
|
|
269
|
-
main := begin_keyword
|
266
|
+
main := begin_keyword keyword (^(keyword_chars | '/')? @exit);
|
270
267
|
}%%
|
271
268
|
|
272
269
|
|
@@ -287,7 +284,7 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, VALUE& v)
|
|
287
284
|
return p;
|
288
285
|
}
|
289
286
|
else if (cs == EDN_keyword_error) {
|
290
|
-
error(__FUNCTION__, *p);
|
287
|
+
error(__FUNCTION__, "invalid keyword", *p);
|
291
288
|
return pe;
|
292
289
|
}
|
293
290
|
else if (cs == EDN_keyword_en_main) {} // silence ragel warning
|
@@ -331,7 +328,7 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
|
|
331
328
|
|
332
329
|
|
333
330
|
// ============================================================
|
334
|
-
// integer parsing machine
|
331
|
+
// integer parsing machine - M suffix will return a BigNum
|
335
332
|
//
|
336
333
|
%%{
|
337
334
|
machine EDN_integer;
|
@@ -376,17 +373,18 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
|
|
376
373
|
|
377
374
|
write data;
|
378
375
|
|
379
|
-
action
|
376
|
+
action parse_op_symbol {
|
380
377
|
// parse a symbol including the leading operator (-, +, .)
|
381
378
|
VALUE sym = Qnil;
|
382
379
|
const char *np = parse_symbol(p_save, pe, sym);
|
383
|
-
if (np == NULL) {
|
384
|
-
|
380
|
+
if (np == NULL) { fexec pe; } else {
|
381
|
+
if (sym != Qnil)
|
382
|
+
v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
|
385
383
|
fexec np;
|
386
384
|
}
|
387
385
|
}
|
388
386
|
|
389
|
-
action
|
387
|
+
action parse_op_number {
|
390
388
|
// parse a number with the leading symbol - this is slightly
|
391
389
|
// different than the one within EDN_value since it includes
|
392
390
|
// the leading - or +
|
@@ -404,24 +402,27 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
|
|
404
402
|
fbreak;
|
405
403
|
}
|
406
404
|
else {
|
407
|
-
error(__FUNCTION__, *p);
|
405
|
+
error(__FUNCTION__, "number format error", *p);
|
408
406
|
fexec pe;
|
409
407
|
}
|
410
408
|
}
|
411
409
|
|
412
|
-
action
|
410
|
+
action parse_op {
|
413
411
|
// stand-alone operators (-, +, /, ... etc)
|
414
412
|
char op[2] = { *p_save, 0 };
|
415
413
|
VALUE sym = rb_str_new2(op);
|
416
|
-
v = Parser::
|
414
|
+
v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
|
417
415
|
}
|
418
416
|
|
417
|
+
valid_non_numeric_chars = alpha|operators|':'|'#';
|
418
|
+
valid_chars = valid_non_numeric_chars | digit;
|
419
419
|
|
420
420
|
main := (
|
421
|
-
('-'|'+'
|
422
|
-
(
|
423
|
-
|
424
|
-
|
421
|
+
('-'|'+') begin_number >parse_op_number |
|
422
|
+
(operators - [\-\+\.]) valid_chars >parse_op_symbol |
|
423
|
+
[\-\+\.] valid_non_numeric_chars valid_chars* >parse_op_symbol |
|
424
|
+
operators ignore* >parse_op
|
425
|
+
) ^(valid_chars)? @exit;
|
425
426
|
}%%
|
426
427
|
|
427
428
|
|
@@ -437,7 +438,7 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
|
|
437
438
|
return p;
|
438
439
|
}
|
439
440
|
else if (cs == EDN_operator_error) {
|
440
|
-
error(__FUNCTION__, *p);
|
441
|
+
error(__FUNCTION__, "symbol syntax error", *p);
|
441
442
|
return pe;
|
442
443
|
}
|
443
444
|
else if (cs == EDN_operator_en_main) {} // silence ragel warning
|
@@ -455,12 +456,13 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
|
|
455
456
|
|
456
457
|
write data;
|
457
458
|
|
458
|
-
valid_chars =
|
459
|
+
valid_chars = extend;
|
459
460
|
|
460
461
|
|
461
|
-
main := (
|
462
|
-
|
463
|
-
|
462
|
+
main := begin_char (
|
463
|
+
'space' | 'newline' | 'tab' | 'return' | 'formfeed' | 'backspace' |
|
464
|
+
valid_chars
|
465
|
+
) (ignore* | [\\\]\}\)])? @exit;
|
464
466
|
}%%
|
465
467
|
|
466
468
|
|
@@ -480,7 +482,7 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
|
|
480
482
|
return p;
|
481
483
|
}
|
482
484
|
else if (cs == EDN_escaped_char_error) {
|
483
|
-
error(__FUNCTION__, *p);
|
485
|
+
error(__FUNCTION__, "unexpected value", *p);
|
484
486
|
return pe;
|
485
487
|
}
|
486
488
|
else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
|
@@ -502,10 +504,26 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
|
|
502
504
|
|
503
505
|
write data;
|
504
506
|
|
507
|
+
symbol_ops_1 = [\.\-\+];
|
508
|
+
symbol_ops_2 = [\*!_\?$%&<>\=\'];
|
509
|
+
symbol_ops_3 = [:\#];
|
510
|
+
|
511
|
+
symbol_start = alpha | symbol_ops_1 | symbol_ops_2;
|
512
|
+
|
513
|
+
symbol_chars = symbol_start | digit | symbol_ops_3;
|
514
|
+
|
515
|
+
symbol_name = (
|
516
|
+
(alpha symbol_chars*) |
|
517
|
+
(symbol_ops_1 (symbol_start | symbol_ops_3) symbol_chars*) |
|
518
|
+
(symbol_start symbol_chars+) |
|
519
|
+
operators{1}
|
520
|
+
);
|
521
|
+
symbol = '/' | (symbol_name ('/' symbol_name)?);
|
522
|
+
|
505
523
|
|
506
524
|
main := (
|
507
|
-
|
508
|
-
) ignore* (^(symbol_chars |
|
525
|
+
symbol
|
526
|
+
) ignore* (^(symbol_chars | '/')? @exit);
|
509
527
|
}%%
|
510
528
|
|
511
529
|
|
@@ -525,8 +543,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
|
525
543
|
return p;
|
526
544
|
}
|
527
545
|
else if (cs == EDN_symbol_error) {
|
528
|
-
error(__FUNCTION__, *p);
|
529
|
-
return pe;
|
546
|
+
error(__FUNCTION__, "invalid symbol sequence", *p);
|
530
547
|
}
|
531
548
|
else if (cs == EDN_symbol_en_main) {} // silence ragel warning
|
532
549
|
return NULL;
|
@@ -544,25 +561,48 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
|
544
561
|
machine EDN_sequence_common;
|
545
562
|
include EDN_common;
|
546
563
|
|
564
|
+
action open_seq {
|
565
|
+
// sequences store elements in an array, then process it to
|
566
|
+
// convert it to a list, set, or map as needed once the
|
567
|
+
// sequence end is reached
|
568
|
+
elems = rb_ary_new();
|
569
|
+
// additionally, metadata for elements in the sequence may be
|
570
|
+
// carried so we must push a new level in the metadata stack
|
571
|
+
new_meta_list();
|
572
|
+
}
|
573
|
+
|
574
|
+
action close_seq {
|
575
|
+
// remove the current metadata level
|
576
|
+
del_top_meta_list();
|
577
|
+
}
|
578
|
+
|
547
579
|
action parse_item {
|
548
580
|
// reads an item within a sequence (vector, list, map, or
|
549
581
|
// set). Regardless of the sequence type, an array of the
|
550
582
|
// items is built. Once done, the sequence parser will convert
|
551
583
|
// if needed
|
552
584
|
VALUE e;
|
585
|
+
std::size_t meta_sz = meta_size();
|
553
586
|
const char *np = parse_value(fpc, pe, e);
|
554
|
-
if (np == NULL) {
|
555
|
-
fhold; fbreak;
|
556
|
-
} else {
|
587
|
+
if (np == NULL) { fhold; fbreak; } else {
|
557
588
|
// if there's an entry in the discard list, the current
|
558
589
|
// object is not meant to be kept due to a #_ so don't
|
559
590
|
// push it into the list of elements
|
560
591
|
if (!discard.empty()) {
|
561
592
|
discard.pop_back();
|
562
593
|
}
|
563
|
-
else {
|
564
|
-
//
|
565
|
-
|
594
|
+
else if (!meta_empty()) {
|
595
|
+
// check if parse_value added metadata
|
596
|
+
if (meta_size() == meta_sz) {
|
597
|
+
// there's metadata and it didn't increase so
|
598
|
+
// parse_value() read an element we care
|
599
|
+
// about. Bind the metadata to it and add it to
|
600
|
+
// the sequence
|
601
|
+
e = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, e, ruby_meta());
|
602
|
+
rb_ary_push(elems, e);
|
603
|
+
}
|
604
|
+
} else {
|
605
|
+
// no metadata.. just push it
|
566
606
|
rb_ary_push(elems, e);
|
567
607
|
}
|
568
608
|
fexec np;
|
@@ -584,10 +624,9 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
|
584
624
|
|
585
625
|
write data;
|
586
626
|
|
587
|
-
main := begin_vector (
|
588
|
-
|
589
|
-
|
590
|
-
@err(close_err) @exit;
|
627
|
+
main := begin_vector @open_seq (
|
628
|
+
ignore* sequence? :>> end_vector @close_seq
|
629
|
+
) @err(close_err) @exit;
|
591
630
|
}%%
|
592
631
|
|
593
632
|
|
@@ -599,7 +638,7 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
|
599
638
|
static const char* EDN_TYPE = "vector";
|
600
639
|
|
601
640
|
int cs;
|
602
|
-
VALUE elems
|
641
|
+
VALUE elems; // will store the vector's elements - allocated in @open_seq
|
603
642
|
|
604
643
|
%% write init;
|
605
644
|
%% write exec;
|
@@ -609,7 +648,7 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
|
609
648
|
return p + 1;
|
610
649
|
}
|
611
650
|
else if (cs == EDN_vector_error) {
|
612
|
-
error(__FUNCTION__, *p);
|
651
|
+
error(__FUNCTION__, "vector format error", *p);
|
613
652
|
return pe;
|
614
653
|
}
|
615
654
|
else if (cs == EDN_vector_en_main) {} // silence ragel warning
|
@@ -629,9 +668,9 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
|
629
668
|
|
630
669
|
write data;
|
631
670
|
|
632
|
-
main := begin_list (
|
633
|
-
|
634
|
-
|
671
|
+
main := begin_list @open_seq (
|
672
|
+
ignore* sequence? :>> end_list @close_seq
|
673
|
+
) @err(close_err) @exit;
|
635
674
|
}%%
|
636
675
|
|
637
676
|
//
|
@@ -642,13 +681,15 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
|
|
642
681
|
static const char* EDN_TYPE = "list";
|
643
682
|
|
644
683
|
int cs;
|
645
|
-
VALUE elems
|
684
|
+
VALUE elems; // stores the list's elements - allocated in @open_seq
|
646
685
|
|
647
686
|
%% write init;
|
648
687
|
%% write exec;
|
649
688
|
|
650
689
|
if (cs >= EDN_list_first_final) {
|
651
690
|
v = elems;
|
691
|
+
// TODO: replace with this but first figure out why array is not unrolled by EDN::list()
|
692
|
+
// v = Parser::make_edn_type(EDNT_MAKE_LIST_METHOD, elems);
|
652
693
|
return p + 1;
|
653
694
|
}
|
654
695
|
else if (cs == EDN_list_error) {
|
@@ -673,9 +714,9 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
|
|
673
714
|
write data;
|
674
715
|
|
675
716
|
|
676
|
-
main := begin_map (
|
677
|
-
|
678
|
-
|
717
|
+
main := begin_map @open_seq (
|
718
|
+
ignore* (sequence)? :>> end_map @close_seq
|
719
|
+
) @err(close_err) @exit;
|
679
720
|
}%%
|
680
721
|
|
681
722
|
|
@@ -685,8 +726,8 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
|
|
685
726
|
|
686
727
|
int cs;
|
687
728
|
// since we don't know whether we're looking at a key or value,
|
688
|
-
// initially store all elements in
|
689
|
-
VALUE elems
|
729
|
+
// initially store all elements in an array (allocated in @open_seq)
|
730
|
+
VALUE elems;
|
690
731
|
|
691
732
|
%% write init;
|
692
733
|
%% write exec;
|
@@ -730,19 +771,19 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
|
|
730
771
|
|
731
772
|
write data;
|
732
773
|
|
733
|
-
action
|
774
|
+
action parse_disp_set {
|
734
775
|
// #{ }
|
735
776
|
const char *np = parse_set(fpc, pe, v);
|
736
777
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
737
778
|
}
|
738
779
|
|
739
|
-
action
|
780
|
+
action parse_disp_discard {
|
740
781
|
// discard token #_
|
741
782
|
const char *np = parse_discard(fpc, pe);
|
742
783
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
743
784
|
}
|
744
785
|
|
745
|
-
action
|
786
|
+
action parse_disp_tagged {
|
746
787
|
// #inst, #uuid, or #user/tag
|
747
788
|
const char *np = parse_tagged(fpc, pe, v);
|
748
789
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
@@ -750,9 +791,9 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
|
|
750
791
|
|
751
792
|
|
752
793
|
main := (
|
753
|
-
('{' >
|
754
|
-
'_' >
|
755
|
-
alpha >
|
794
|
+
('{' >parse_disp_set |
|
795
|
+
'_' >parse_disp_discard |
|
796
|
+
alpha >parse_disp_tagged)
|
756
797
|
) @exit;
|
757
798
|
}%%
|
758
799
|
|
@@ -768,7 +809,7 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
|
|
768
809
|
return p + 1;
|
769
810
|
}
|
770
811
|
else if (cs == EDN_dispatch_error) {
|
771
|
-
error(__FUNCTION__, *p);
|
812
|
+
error(__FUNCTION__, "dispatch extend error", *p);
|
772
813
|
return pe;
|
773
814
|
}
|
774
815
|
else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
|
@@ -789,9 +830,9 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
|
|
789
830
|
begin_set = '{';
|
790
831
|
end_set = '}';
|
791
832
|
|
792
|
-
main := begin_set (
|
793
|
-
|
794
|
-
|
833
|
+
main := begin_set @open_seq (
|
834
|
+
ignore* sequence? :>> end_set @close_seq
|
835
|
+
) @err(close_err) @exit;
|
795
836
|
}%%
|
796
837
|
|
797
838
|
//
|
@@ -802,14 +843,14 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
|
|
802
843
|
static const char* EDN_TYPE = "set";
|
803
844
|
|
804
845
|
int cs;
|
805
|
-
VALUE elems
|
846
|
+
VALUE elems; // holds the set's elements as an array allocated in @open_seq
|
806
847
|
|
807
848
|
%% write init;
|
808
849
|
%% write exec;
|
809
850
|
|
810
851
|
if (cs >= EDN_set_first_final) {
|
811
852
|
// all elements collected; now convert to a set
|
812
|
-
v = Parser::
|
853
|
+
v = Parser::make_edn_type(EDNT_MAKE_SET_METHOD, elems);
|
813
854
|
return p + 1;
|
814
855
|
}
|
815
856
|
else if (cs == EDN_set_error) {
|
@@ -837,14 +878,12 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
|
|
837
878
|
|
838
879
|
action discard_value {
|
839
880
|
const char *np = parse_value(fpc, pe, v);
|
840
|
-
if (np) {
|
841
|
-
// this token is to be
|
881
|
+
if (np == NULL) { fhold; fbreak; } else {
|
882
|
+
// this token is to be discarded so store it in the
|
842
883
|
// discard stack - we really don't need to save it so this
|
843
884
|
// could be simplified
|
844
885
|
discard.push_back(v);
|
845
886
|
fexec np;
|
846
|
-
} else {
|
847
|
-
fhold; fbreak;
|
848
887
|
}
|
849
888
|
}
|
850
889
|
|
@@ -901,24 +940,41 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
|
|
901
940
|
machine EDN_tagged;
|
902
941
|
include EDN_common;
|
903
942
|
|
943
|
+
write data;
|
944
|
+
|
945
|
+
tag_symbol_chars_start = alpha;
|
946
|
+
tag_symbol_chars_non_numeric = tag_symbol_chars_start | [\.\*!_\?$%&<>\=+\-\'\:\#];
|
947
|
+
tag_symbol_chars = tag_symbol_chars_non_numeric | digit;
|
948
|
+
|
949
|
+
tag_symbol_namespace = tag_symbol_chars_start (tag_symbol_chars)*;
|
950
|
+
tag_symbol_name = tag_symbol_chars_non_numeric (tag_symbol_chars)*;
|
951
|
+
|
952
|
+
tag_symbol = (tag_symbol_namespace ('/' tag_symbol_name)?);
|
953
|
+
|
904
954
|
# inst = (string_delim [0-9+\-:\.TZ]* string_delim);
|
905
955
|
# uuid = (string_delim [a-f0-9\-]* string_delim);
|
906
956
|
|
907
|
-
|
908
|
-
|
909
|
-
action parse_symbol {
|
957
|
+
action parse_tag {
|
910
958
|
// parses the symbol portion of the pair
|
911
959
|
const char *np = parse_symbol(fpc, pe, sym_name);
|
912
|
-
if (np == NULL) { fhold; fbreak; } else {
|
960
|
+
if (np == NULL) { fhold; fbreak; } else {
|
961
|
+
sym_ok = true;
|
962
|
+
fexec np;
|
963
|
+
}
|
913
964
|
}
|
914
|
-
action
|
965
|
+
action parse_data {
|
915
966
|
// parses the value portion
|
916
967
|
const char *np = parse_value(fpc, pe, data);
|
917
|
-
if (np == NULL) { fhold; fbreak; } else {
|
968
|
+
if (np == NULL) { fhold; fbreak; } else {
|
969
|
+
data_ok = true;
|
970
|
+
fexec np;
|
971
|
+
}
|
918
972
|
}
|
919
973
|
|
920
|
-
|
921
|
-
|
974
|
+
main := (
|
975
|
+
tag_symbol >parse_tag ignore+
|
976
|
+
begin_value >parse_data
|
977
|
+
) @exit;
|
922
978
|
}%%
|
923
979
|
|
924
980
|
|
@@ -926,6 +982,8 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
|
|
926
982
|
{
|
927
983
|
VALUE sym_name = Qnil;
|
928
984
|
VALUE data = Qnil;
|
985
|
+
bool sym_ok = false;
|
986
|
+
bool data_ok = false;
|
929
987
|
|
930
988
|
int cs;
|
931
989
|
|
@@ -935,20 +993,27 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
|
|
935
993
|
if (cs >= EDN_tagged_first_final) {
|
936
994
|
//std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << data << std::endl;
|
937
995
|
|
996
|
+
if (!sym_ok || !data_ok) {
|
997
|
+
error(__FUNCTION__, "tagged element symbol error", *p);
|
998
|
+
v = EDNT_EOF_CONST;
|
999
|
+
return NULL;
|
1000
|
+
}
|
1001
|
+
|
938
1002
|
try {
|
939
1003
|
// tagged_element makes a call to ruby which may throw an
|
940
1004
|
// exception when parsing the data
|
941
|
-
v = Parser::
|
1005
|
+
v = Parser::make_edn_type(EDNT_TAGGED_ELEM_METHOD, sym_name, data);
|
1006
|
+
return p + 1;
|
942
1007
|
} catch (std::exception& e) {
|
943
1008
|
error(__FUNCTION__, e.what());
|
944
1009
|
return pe;
|
945
1010
|
}
|
946
|
-
return p + 1;
|
947
1011
|
}
|
948
1012
|
else if (cs == EDN_tagged_error) {
|
949
|
-
|
1013
|
+
error(__FUNCTION__, "tagged element symbol error", *p);
|
950
1014
|
}
|
951
1015
|
else if (cs == EDN_tagged_en_main) {} // silence ragel warning
|
1016
|
+
v = EDNT_EOF_CONST;
|
952
1017
|
return NULL;
|
953
1018
|
}
|
954
1019
|
|
@@ -966,13 +1031,13 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
|
|
966
1031
|
|
967
1032
|
write data;
|
968
1033
|
|
969
|
-
action
|
1034
|
+
action parse_data {
|
970
1035
|
const char *np = parse_value(fpc, pe, v);
|
971
|
-
if (np) {
|
1036
|
+
if (np == NULL) { fhold; fbreak; } else { fexec np; }
|
972
1037
|
}
|
973
1038
|
|
974
1039
|
main := begin_meta (
|
975
|
-
begin_value >
|
1040
|
+
begin_value >parse_data
|
976
1041
|
) @exit;
|
977
1042
|
}%%
|
978
1043
|
|
@@ -986,7 +1051,7 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
|
|
986
1051
|
%% write exec;
|
987
1052
|
|
988
1053
|
if (cs >= EDN_meta_first_final) {
|
989
|
-
|
1054
|
+
append_to_meta(v);
|
990
1055
|
return p + 1;
|
991
1056
|
}
|
992
1057
|
else if (cs == EDN_meta_error) {
|
@@ -1010,25 +1075,25 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
|
|
1010
1075
|
|
1011
1076
|
write data;
|
1012
1077
|
|
1013
|
-
action
|
1078
|
+
action parse_elem {
|
1014
1079
|
// save the count of metadata items before we parse this value
|
1015
1080
|
// so we can determine if we've read another metadata value or
|
1016
1081
|
// an actual data item
|
1017
|
-
std::size_t
|
1082
|
+
std::size_t meta_sz = meta_size();
|
1018
1083
|
const char* np = parse_value(fpc, pe, result);
|
1019
1084
|
if (np == NULL) { fexec pe; fbreak; } else {
|
1020
1085
|
// if we have metadata saved and it matches the count we
|
1021
1086
|
// saved before we parsed a value, then we must bind the
|
1022
1087
|
// metadata sequence to it
|
1023
|
-
if (!
|
1088
|
+
if (!meta_empty() && meta_size() == meta_sz) {
|
1024
1089
|
// this will empty the metadata sequence too
|
1025
|
-
result =
|
1090
|
+
result = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, result, ruby_meta());
|
1026
1091
|
}
|
1027
1092
|
fexec np;
|
1028
1093
|
}
|
1029
1094
|
}
|
1030
1095
|
|
1031
|
-
element = begin_value >
|
1096
|
+
element = begin_value >parse_elem;
|
1032
1097
|
next_element = ignore* element;
|
1033
1098
|
sequence = ((element ignore*) (next_element ignore*)*);
|
1034
1099
|
|
@@ -1039,16 +1104,15 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
|
|
1039
1104
|
VALUE edn::Parser::parse(const char* src, std::size_t len)
|
1040
1105
|
{
|
1041
1106
|
int cs;
|
1042
|
-
VALUE result =
|
1107
|
+
VALUE result = EDNT_EOF_CONST;
|
1043
1108
|
|
1044
1109
|
%% write init;
|
1045
1110
|
set_source(src, len);
|
1046
1111
|
%% write exec;
|
1047
1112
|
|
1048
1113
|
if (cs == EDN_parser_error) {
|
1049
|
-
|
1050
|
-
|
1051
|
-
return EDNT_EOF;
|
1114
|
+
error(__FUNCTION__, *p);
|
1115
|
+
return EDNT_EOF_CONST;
|
1052
1116
|
}
|
1053
1117
|
else if (cs == EDN_parser_first_final) {
|
1054
1118
|
p = pe = eof = NULL;
|
@@ -1065,50 +1129,52 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
|
|
1065
1129
|
machine EDN_tokens;
|
1066
1130
|
include EDN_common;
|
1067
1131
|
|
1068
|
-
write data nofinal;
|
1132
|
+
write data nofinal noerror;
|
1069
1133
|
|
1070
|
-
action
|
1134
|
+
action parse_token {
|
1071
1135
|
// we won't know if we've parsed a discard or a metadata until
|
1072
1136
|
// after parse_value() is done. Save the current number of
|
1073
1137
|
// elements in the metadata sequence; then we can check if it
|
1074
1138
|
// grew or if the discard sequence grew
|
1075
|
-
|
1139
|
+
meta_sz = meta_size();
|
1076
1140
|
|
1077
1141
|
const char* np = parse_value(fpc, pe, value);
|
1078
|
-
|
1079
1142
|
if (np == NULL) { fhold; fbreak; } else {
|
1080
|
-
if (
|
1081
|
-
// was
|
1082
|
-
//
|
1083
|
-
if (
|
1084
|
-
|
1143
|
+
if (!meta_empty()) {
|
1144
|
+
// was an additional metadata entry read? if so, don't
|
1145
|
+
// return a value
|
1146
|
+
if (meta_size() > meta_sz) {
|
1147
|
+
state = TOKEN_IS_META;
|
1085
1148
|
}
|
1086
1149
|
else {
|
1087
1150
|
// a value was read and there's a pending metadata
|
1088
1151
|
// sequence. Bind them.
|
1089
|
-
value =
|
1152
|
+
value = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
|
1153
|
+
state = TOKEN_OK;
|
1090
1154
|
}
|
1091
1155
|
} else if (!discard.empty()) {
|
1092
1156
|
// a discard read. Don't return a value
|
1093
|
-
|
1157
|
+
state = TOKEN_IS_DISCARD;
|
1158
|
+
} else {
|
1159
|
+
state = TOKEN_OK;
|
1094
1160
|
}
|
1095
1161
|
fexec np;
|
1096
1162
|
}
|
1097
1163
|
}
|
1098
1164
|
|
1099
|
-
main := ignore* begin_value >
|
1165
|
+
main := ignore* begin_value >parse_token ignore*;
|
1100
1166
|
}%%
|
1101
1167
|
|
1102
1168
|
|
1103
1169
|
//
|
1104
1170
|
//
|
1105
|
-
|
1171
|
+
edn::Parser::eTokenState edn::Parser::parse_next(VALUE& value)
|
1106
1172
|
{
|
1107
1173
|
int cs;
|
1108
|
-
|
1174
|
+
eTokenState state = TOKEN_ERROR;
|
1109
1175
|
// need to track metadada read and bind it to the next value read
|
1110
1176
|
// - but must account for sequences of metadata values
|
1111
|
-
std::size_t
|
1177
|
+
std::size_t meta_sz;
|
1112
1178
|
|
1113
1179
|
// clear any previously saved discards; only track if read during
|
1114
1180
|
// this op
|
@@ -1117,12 +1183,8 @@ bool edn::Parser::parse_next(VALUE& value)
|
|
1117
1183
|
%% write init;
|
1118
1184
|
%% write exec;
|
1119
1185
|
|
1120
|
-
if (cs ==
|
1121
|
-
|
1122
|
-
}
|
1123
|
-
else if (cs == EDN_tokens_en_main) {} // silence ragel warning
|
1124
|
-
|
1125
|
-
return is_value;
|
1186
|
+
if (cs == EDN_tokens_en_main) {} // silence ragel warning
|
1187
|
+
return state;
|
1126
1188
|
}
|
1127
1189
|
|
1128
1190
|
|