edn_turbo 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -4
- data/bin/ppedn +3 -1
- data/ext/edn_turbo/edn_parser.cc +1337 -946
- data/ext/edn_turbo/edn_parser.h +27 -13
- data/ext/edn_turbo/edn_parser.rl +207 -145
- data/ext/edn_turbo/edn_parser_util.cc +56 -47
- data/ext/edn_turbo/main.cc +20 -17
- data/lib/edn_turbo/constants.rb +14 -0
- data/lib/edn_turbo/edn_parser.rb +5 -2
- data/lib/edn_turbo/tags.rb +46 -0
- data/lib/edn_turbo/utils.rb +34 -0
- data/lib/edn_turbo/version.rb +2 -2
- data/lib/edn_turbo.rb +10 -92
- data/test/test_output_diff.rb +56 -26
- metadata +6 -3
data/ext/edn_turbo/edn_parser.rl
CHANGED
@@ -23,9 +23,7 @@
|
|
23
23
|
comment = ';' cr_neg* counter;
|
24
24
|
ignore = ws | comment;
|
25
25
|
|
26
|
-
operators = [/\.\*!_
|
27
|
-
symbol_start = alpha;
|
28
|
-
symbol_chars = symbol_start | digit | [\#:_\-\.\'];
|
26
|
+
operators = [/\.\*!_\?$%&<>\=+\-\'];
|
29
27
|
|
30
28
|
begin_dispatch = '#';
|
31
29
|
begin_keyword = ':';
|
@@ -37,10 +35,7 @@
|
|
37
35
|
string_delim = '"';
|
38
36
|
begin_number = digit;
|
39
37
|
begin_value = alnum | [:\"\{\[\(\\\#^] | operators;
|
40
|
-
begin_symbol =
|
41
|
-
|
42
|
-
symbol_name = symbol_start (symbol_chars)*;
|
43
|
-
symbol = (symbol_name ('/' symbol_name)?);
|
38
|
+
begin_symbol = alpha;
|
44
39
|
|
45
40
|
# int / decimal rules
|
46
41
|
integer = ('0' | [1-9] digit*);
|
@@ -68,19 +63,19 @@
|
|
68
63
|
|
69
64
|
write data;
|
70
65
|
|
71
|
-
action
|
66
|
+
action parse_val_string {
|
72
67
|
// string types within double-quotes
|
73
68
|
const char *np = parse_string(fpc, pe, v);
|
74
69
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
75
70
|
}
|
76
71
|
|
77
|
-
action
|
72
|
+
action parse_val_keyword {
|
78
73
|
// tokens with a leading ':'
|
79
74
|
const char *np = parse_keyword(fpc, pe, v);
|
80
75
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
81
76
|
}
|
82
77
|
|
83
|
-
action
|
78
|
+
action parse_val_number {
|
84
79
|
// tokens w/ leading digits: non-negative integers & decimals.
|
85
80
|
// try to parse a decimal first
|
86
81
|
const char *np = parse_decimal(fpc, pe, v);
|
@@ -95,64 +90,64 @@
|
|
95
90
|
fbreak;
|
96
91
|
}
|
97
92
|
else {
|
98
|
-
error(__FUNCTION__, *p);
|
93
|
+
error(__FUNCTION__, "number format error", *p);
|
99
94
|
fexec pe;
|
100
95
|
}
|
101
96
|
}
|
102
97
|
|
103
|
-
action
|
98
|
+
action parse_val_operator {
|
104
99
|
// stand-alone operators *, +, -, etc.
|
105
100
|
const char *np = parse_operator(fpc, pe, v);
|
106
101
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
107
102
|
}
|
108
103
|
|
109
|
-
action
|
104
|
+
action parse_val_char {
|
110
105
|
// tokens w/ leading \ (escaped characters \newline, \c, etc.)
|
111
106
|
const char *np = parse_esc_char(fpc, pe, v);
|
112
107
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
113
108
|
}
|
114
109
|
|
115
|
-
action
|
110
|
+
action parse_val_symbol {
|
116
111
|
// user identifiers and reserved keywords (true, false, nil)
|
117
112
|
VALUE sym = Qnil;
|
118
113
|
const char *np = parse_symbol(fpc, pe, sym);
|
119
|
-
if (np == NULL) {
|
114
|
+
if (np == NULL) { fexec pe; } else {
|
120
115
|
// parse_symbol will make 'sym' a ruby string
|
121
116
|
if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
|
122
117
|
else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
|
123
118
|
else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
|
124
119
|
else {
|
125
|
-
v = Parser::
|
120
|
+
v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
|
126
121
|
}
|
127
122
|
fexec np;
|
128
123
|
}
|
129
124
|
}
|
130
125
|
|
131
|
-
action
|
126
|
+
action parse_val_vector {
|
132
127
|
// [
|
133
128
|
const char *np = parse_vector(fpc, pe, v);
|
134
129
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
135
130
|
}
|
136
131
|
|
137
|
-
action
|
132
|
+
action parse_val_list {
|
138
133
|
// (
|
139
134
|
const char *np = parse_list(fpc, pe, v);
|
140
135
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
141
136
|
}
|
142
137
|
|
143
|
-
action
|
138
|
+
action parse_val_map {
|
144
139
|
// {
|
145
140
|
const char *np = parse_map(fpc, pe, v);
|
146
141
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
147
142
|
}
|
148
143
|
|
149
|
-
action
|
144
|
+
action parse_val_meta {
|
150
145
|
// ^
|
151
146
|
const char *np = parse_meta(fpc, pe);
|
152
147
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
153
148
|
}
|
154
149
|
|
155
|
-
action
|
150
|
+
action parse_val_dispatch {
|
156
151
|
// handles tokens w/ leading # ("#_", "#{", and tagged elems)
|
157
152
|
const char *np = parse_dispatch(fpc + 1, pe, v);
|
158
153
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
@@ -160,23 +155,24 @@
|
|
160
155
|
|
161
156
|
|
162
157
|
main := (
|
163
|
-
string_delim >
|
164
|
-
begin_keyword >
|
165
|
-
begin_number >
|
166
|
-
operators >
|
167
|
-
begin_char >
|
168
|
-
begin_symbol >
|
169
|
-
begin_vector >
|
170
|
-
begin_list >
|
171
|
-
begin_map >
|
172
|
-
begin_meta >
|
173
|
-
begin_dispatch >
|
158
|
+
string_delim >parse_val_string |
|
159
|
+
begin_keyword >parse_val_keyword |
|
160
|
+
begin_number >parse_val_number |
|
161
|
+
operators >parse_val_operator |
|
162
|
+
begin_char >parse_val_char |
|
163
|
+
begin_symbol >parse_val_symbol |
|
164
|
+
begin_vector >parse_val_vector |
|
165
|
+
begin_list >parse_val_list |
|
166
|
+
begin_map >parse_val_map |
|
167
|
+
begin_meta >parse_val_meta |
|
168
|
+
begin_dispatch >parse_val_dispatch
|
174
169
|
) %*exit;
|
175
170
|
}%%
|
176
171
|
|
177
172
|
|
178
173
|
const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
179
174
|
{
|
175
|
+
// std::cerr << __FUNCTION__ << "() p: \"" << p << "\"" << std::endl;
|
180
176
|
int cs;
|
181
177
|
|
182
178
|
%% write init;
|
@@ -186,7 +182,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
|
186
182
|
return p;
|
187
183
|
}
|
188
184
|
else if (cs == EDN_value_error) {
|
189
|
-
error(__FUNCTION__, *p);
|
185
|
+
error(__FUNCTION__, "token error", *p);
|
190
186
|
return pe;
|
191
187
|
}
|
192
188
|
else if (cs == EDN_value_en_main) {} // silence ragel warning
|
@@ -207,7 +203,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
|
207
203
|
|
208
204
|
write data;
|
209
205
|
|
210
|
-
action
|
206
|
+
action parse_chars {
|
211
207
|
if (Parser::parse_byte_stream(p_save + 1, p, v, encode)) {
|
212
208
|
fexec p + 1;
|
213
209
|
} else {
|
@@ -220,11 +216,11 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
|
220
216
|
}
|
221
217
|
|
222
218
|
main := string_delim (
|
223
|
-
(^([\"\\] |
|
219
|
+
(^([\"\\] | 0xc2..0xf5) |
|
224
220
|
((0xc2..0xf5) |
|
225
221
|
'\\'[\"\\/bfnrt] |
|
226
222
|
'\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
|
227
|
-
'\\'^([\"\\/bfnrtu]
|
223
|
+
'\\'^([\"\\/bfnrtu]))* %parse_chars
|
228
224
|
) :>> string_delim @err(close_err) @exit;
|
229
225
|
}%%
|
230
226
|
|
@@ -258,15 +254,16 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
|
|
258
254
|
machine EDN_keyword;
|
259
255
|
include EDN_common;
|
260
256
|
|
261
|
-
|
262
|
-
|
257
|
+
keyword_start = alpha | [\.\*!_\?$%&<>\=+\-\'\#];
|
258
|
+
keyword_chars = (keyword_start | digit | ':');
|
263
259
|
|
264
|
-
keyword_name
|
260
|
+
keyword_name = keyword_start keyword_chars*;
|
261
|
+
keyword = keyword_name ('/' keyword_chars*)?;
|
265
262
|
|
266
263
|
write data;
|
267
264
|
|
268
265
|
|
269
|
-
main := begin_keyword
|
266
|
+
main := begin_keyword keyword (^(keyword_chars | '/')? @exit);
|
270
267
|
}%%
|
271
268
|
|
272
269
|
|
@@ -287,7 +284,7 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, VALUE& v)
|
|
287
284
|
return p;
|
288
285
|
}
|
289
286
|
else if (cs == EDN_keyword_error) {
|
290
|
-
error(__FUNCTION__, *p);
|
287
|
+
error(__FUNCTION__, "invalid keyword", *p);
|
291
288
|
return pe;
|
292
289
|
}
|
293
290
|
else if (cs == EDN_keyword_en_main) {} // silence ragel warning
|
@@ -331,7 +328,7 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
|
|
331
328
|
|
332
329
|
|
333
330
|
// ============================================================
|
334
|
-
// integer parsing machine
|
331
|
+
// integer parsing machine - M suffix will return a BigNum
|
335
332
|
//
|
336
333
|
%%{
|
337
334
|
machine EDN_integer;
|
@@ -376,17 +373,18 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
|
|
376
373
|
|
377
374
|
write data;
|
378
375
|
|
379
|
-
action
|
376
|
+
action parse_op_symbol {
|
380
377
|
// parse a symbol including the leading operator (-, +, .)
|
381
378
|
VALUE sym = Qnil;
|
382
379
|
const char *np = parse_symbol(p_save, pe, sym);
|
383
|
-
if (np == NULL) {
|
384
|
-
|
380
|
+
if (np == NULL) { fexec pe; } else {
|
381
|
+
if (sym != Qnil)
|
382
|
+
v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
|
385
383
|
fexec np;
|
386
384
|
}
|
387
385
|
}
|
388
386
|
|
389
|
-
action
|
387
|
+
action parse_op_number {
|
390
388
|
// parse a number with the leading symbol - this is slightly
|
391
389
|
// different than the one within EDN_value since it includes
|
392
390
|
// the leading - or +
|
@@ -404,24 +402,27 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
|
|
404
402
|
fbreak;
|
405
403
|
}
|
406
404
|
else {
|
407
|
-
error(__FUNCTION__, *p);
|
405
|
+
error(__FUNCTION__, "number format error", *p);
|
408
406
|
fexec pe;
|
409
407
|
}
|
410
408
|
}
|
411
409
|
|
412
|
-
action
|
410
|
+
action parse_op {
|
413
411
|
// stand-alone operators (-, +, /, ... etc)
|
414
412
|
char op[2] = { *p_save, 0 };
|
415
413
|
VALUE sym = rb_str_new2(op);
|
416
|
-
v = Parser::
|
414
|
+
v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
|
417
415
|
}
|
418
416
|
|
417
|
+
valid_non_numeric_chars = alpha|operators|':'|'#';
|
418
|
+
valid_chars = valid_non_numeric_chars | digit;
|
419
419
|
|
420
420
|
main := (
|
421
|
-
('-'|'+'
|
422
|
-
(
|
423
|
-
|
424
|
-
|
421
|
+
('-'|'+') begin_number >parse_op_number |
|
422
|
+
(operators - [\-\+\.]) valid_chars >parse_op_symbol |
|
423
|
+
[\-\+\.] valid_non_numeric_chars valid_chars* >parse_op_symbol |
|
424
|
+
operators ignore* >parse_op
|
425
|
+
) ^(valid_chars)? @exit;
|
425
426
|
}%%
|
426
427
|
|
427
428
|
|
@@ -437,7 +438,7 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
|
|
437
438
|
return p;
|
438
439
|
}
|
439
440
|
else if (cs == EDN_operator_error) {
|
440
|
-
error(__FUNCTION__, *p);
|
441
|
+
error(__FUNCTION__, "symbol syntax error", *p);
|
441
442
|
return pe;
|
442
443
|
}
|
443
444
|
else if (cs == EDN_operator_en_main) {} // silence ragel warning
|
@@ -455,12 +456,13 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
|
|
455
456
|
|
456
457
|
write data;
|
457
458
|
|
458
|
-
valid_chars =
|
459
|
+
valid_chars = extend;
|
459
460
|
|
460
461
|
|
461
|
-
main := (
|
462
|
-
|
463
|
-
|
462
|
+
main := begin_char (
|
463
|
+
'space' | 'newline' | 'tab' | 'return' | 'formfeed' | 'backspace' |
|
464
|
+
valid_chars
|
465
|
+
) (ignore* | [\\\]\}\)])? @exit;
|
464
466
|
}%%
|
465
467
|
|
466
468
|
|
@@ -480,7 +482,7 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
|
|
480
482
|
return p;
|
481
483
|
}
|
482
484
|
else if (cs == EDN_escaped_char_error) {
|
483
|
-
error(__FUNCTION__, *p);
|
485
|
+
error(__FUNCTION__, "unexpected value", *p);
|
484
486
|
return pe;
|
485
487
|
}
|
486
488
|
else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
|
@@ -502,10 +504,26 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
|
|
502
504
|
|
503
505
|
write data;
|
504
506
|
|
507
|
+
symbol_ops_1 = [\.\-\+];
|
508
|
+
symbol_ops_2 = [\*!_\?$%&<>\=\'];
|
509
|
+
symbol_ops_3 = [:\#];
|
510
|
+
|
511
|
+
symbol_start = alpha | symbol_ops_1 | symbol_ops_2;
|
512
|
+
|
513
|
+
symbol_chars = symbol_start | digit | symbol_ops_3;
|
514
|
+
|
515
|
+
symbol_name = (
|
516
|
+
(alpha symbol_chars*) |
|
517
|
+
(symbol_ops_1 (symbol_start | symbol_ops_3) symbol_chars*) |
|
518
|
+
(symbol_start symbol_chars+) |
|
519
|
+
operators{1}
|
520
|
+
);
|
521
|
+
symbol = '/' | (symbol_name ('/' symbol_name)?);
|
522
|
+
|
505
523
|
|
506
524
|
main := (
|
507
|
-
|
508
|
-
) ignore* (^(symbol_chars |
|
525
|
+
symbol
|
526
|
+
) ignore* (^(symbol_chars | '/')? @exit);
|
509
527
|
}%%
|
510
528
|
|
511
529
|
|
@@ -525,8 +543,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
|
525
543
|
return p;
|
526
544
|
}
|
527
545
|
else if (cs == EDN_symbol_error) {
|
528
|
-
error(__FUNCTION__, *p);
|
529
|
-
return pe;
|
546
|
+
error(__FUNCTION__, "invalid symbol sequence", *p);
|
530
547
|
}
|
531
548
|
else if (cs == EDN_symbol_en_main) {} // silence ragel warning
|
532
549
|
return NULL;
|
@@ -544,25 +561,48 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
|
544
561
|
machine EDN_sequence_common;
|
545
562
|
include EDN_common;
|
546
563
|
|
564
|
+
action open_seq {
|
565
|
+
// sequences store elements in an array, then process it to
|
566
|
+
// convert it to a list, set, or map as needed once the
|
567
|
+
// sequence end is reached
|
568
|
+
elems = rb_ary_new();
|
569
|
+
// additionally, metadata for elements in the sequence may be
|
570
|
+
// carried so we must push a new level in the metadata stack
|
571
|
+
new_meta_list();
|
572
|
+
}
|
573
|
+
|
574
|
+
action close_seq {
|
575
|
+
// remove the current metadata level
|
576
|
+
del_top_meta_list();
|
577
|
+
}
|
578
|
+
|
547
579
|
action parse_item {
|
548
580
|
// reads an item within a sequence (vector, list, map, or
|
549
581
|
// set). Regardless of the sequence type, an array of the
|
550
582
|
// items is built. Once done, the sequence parser will convert
|
551
583
|
// if needed
|
552
584
|
VALUE e;
|
585
|
+
std::size_t meta_sz = meta_size();
|
553
586
|
const char *np = parse_value(fpc, pe, e);
|
554
|
-
if (np == NULL) {
|
555
|
-
fhold; fbreak;
|
556
|
-
} else {
|
587
|
+
if (np == NULL) { fhold; fbreak; } else {
|
557
588
|
// if there's an entry in the discard list, the current
|
558
589
|
// object is not meant to be kept due to a #_ so don't
|
559
590
|
// push it into the list of elements
|
560
591
|
if (!discard.empty()) {
|
561
592
|
discard.pop_back();
|
562
593
|
}
|
563
|
-
else {
|
564
|
-
//
|
565
|
-
|
594
|
+
else if (!meta_empty()) {
|
595
|
+
// check if parse_value added metadata
|
596
|
+
if (meta_size() == meta_sz) {
|
597
|
+
// there's metadata and it didn't increase so
|
598
|
+
// parse_value() read an element we care
|
599
|
+
// about. Bind the metadata to it and add it to
|
600
|
+
// the sequence
|
601
|
+
e = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, e, ruby_meta());
|
602
|
+
rb_ary_push(elems, e);
|
603
|
+
}
|
604
|
+
} else {
|
605
|
+
// no metadata.. just push it
|
566
606
|
rb_ary_push(elems, e);
|
567
607
|
}
|
568
608
|
fexec np;
|
@@ -584,10 +624,9 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
|
584
624
|
|
585
625
|
write data;
|
586
626
|
|
587
|
-
main := begin_vector (
|
588
|
-
|
589
|
-
|
590
|
-
@err(close_err) @exit;
|
627
|
+
main := begin_vector @open_seq (
|
628
|
+
ignore* sequence? :>> end_vector @close_seq
|
629
|
+
) @err(close_err) @exit;
|
591
630
|
}%%
|
592
631
|
|
593
632
|
|
@@ -599,7 +638,7 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
|
599
638
|
static const char* EDN_TYPE = "vector";
|
600
639
|
|
601
640
|
int cs;
|
602
|
-
VALUE elems
|
641
|
+
VALUE elems; // will store the vector's elements - allocated in @open_seq
|
603
642
|
|
604
643
|
%% write init;
|
605
644
|
%% write exec;
|
@@ -609,7 +648,7 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
|
609
648
|
return p + 1;
|
610
649
|
}
|
611
650
|
else if (cs == EDN_vector_error) {
|
612
|
-
error(__FUNCTION__, *p);
|
651
|
+
error(__FUNCTION__, "vector format error", *p);
|
613
652
|
return pe;
|
614
653
|
}
|
615
654
|
else if (cs == EDN_vector_en_main) {} // silence ragel warning
|
@@ -629,9 +668,9 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
|
629
668
|
|
630
669
|
write data;
|
631
670
|
|
632
|
-
main := begin_list (
|
633
|
-
|
634
|
-
|
671
|
+
main := begin_list @open_seq (
|
672
|
+
ignore* sequence? :>> end_list @close_seq
|
673
|
+
) @err(close_err) @exit;
|
635
674
|
}%%
|
636
675
|
|
637
676
|
//
|
@@ -642,13 +681,15 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
|
|
642
681
|
static const char* EDN_TYPE = "list";
|
643
682
|
|
644
683
|
int cs;
|
645
|
-
VALUE elems
|
684
|
+
VALUE elems; // stores the list's elements - allocated in @open_seq
|
646
685
|
|
647
686
|
%% write init;
|
648
687
|
%% write exec;
|
649
688
|
|
650
689
|
if (cs >= EDN_list_first_final) {
|
651
690
|
v = elems;
|
691
|
+
// TODO: replace with this but first figure out why array is not unrolled by EDN::list()
|
692
|
+
// v = Parser::make_edn_type(EDNT_MAKE_LIST_METHOD, elems);
|
652
693
|
return p + 1;
|
653
694
|
}
|
654
695
|
else if (cs == EDN_list_error) {
|
@@ -673,9 +714,9 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
|
|
673
714
|
write data;
|
674
715
|
|
675
716
|
|
676
|
-
main := begin_map (
|
677
|
-
|
678
|
-
|
717
|
+
main := begin_map @open_seq (
|
718
|
+
ignore* (sequence)? :>> end_map @close_seq
|
719
|
+
) @err(close_err) @exit;
|
679
720
|
}%%
|
680
721
|
|
681
722
|
|
@@ -685,8 +726,8 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
|
|
685
726
|
|
686
727
|
int cs;
|
687
728
|
// since we don't know whether we're looking at a key or value,
|
688
|
-
// initially store all elements in
|
689
|
-
VALUE elems
|
729
|
+
// initially store all elements in an array (allocated in @open_seq)
|
730
|
+
VALUE elems;
|
690
731
|
|
691
732
|
%% write init;
|
692
733
|
%% write exec;
|
@@ -730,19 +771,19 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
|
|
730
771
|
|
731
772
|
write data;
|
732
773
|
|
733
|
-
action
|
774
|
+
action parse_disp_set {
|
734
775
|
// #{ }
|
735
776
|
const char *np = parse_set(fpc, pe, v);
|
736
777
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
737
778
|
}
|
738
779
|
|
739
|
-
action
|
780
|
+
action parse_disp_discard {
|
740
781
|
// discard token #_
|
741
782
|
const char *np = parse_discard(fpc, pe);
|
742
783
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
743
784
|
}
|
744
785
|
|
745
|
-
action
|
786
|
+
action parse_disp_tagged {
|
746
787
|
// #inst, #uuid, or #user/tag
|
747
788
|
const char *np = parse_tagged(fpc, pe, v);
|
748
789
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
@@ -750,9 +791,9 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
|
|
750
791
|
|
751
792
|
|
752
793
|
main := (
|
753
|
-
('{' >
|
754
|
-
'_' >
|
755
|
-
alpha >
|
794
|
+
('{' >parse_disp_set |
|
795
|
+
'_' >parse_disp_discard |
|
796
|
+
alpha >parse_disp_tagged)
|
756
797
|
) @exit;
|
757
798
|
}%%
|
758
799
|
|
@@ -768,7 +809,7 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
|
|
768
809
|
return p + 1;
|
769
810
|
}
|
770
811
|
else if (cs == EDN_dispatch_error) {
|
771
|
-
error(__FUNCTION__, *p);
|
812
|
+
error(__FUNCTION__, "dispatch extend error", *p);
|
772
813
|
return pe;
|
773
814
|
}
|
774
815
|
else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
|
@@ -789,9 +830,9 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
|
|
789
830
|
begin_set = '{';
|
790
831
|
end_set = '}';
|
791
832
|
|
792
|
-
main := begin_set (
|
793
|
-
|
794
|
-
|
833
|
+
main := begin_set @open_seq (
|
834
|
+
ignore* sequence? :>> end_set @close_seq
|
835
|
+
) @err(close_err) @exit;
|
795
836
|
}%%
|
796
837
|
|
797
838
|
//
|
@@ -802,14 +843,14 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
|
|
802
843
|
static const char* EDN_TYPE = "set";
|
803
844
|
|
804
845
|
int cs;
|
805
|
-
VALUE elems
|
846
|
+
VALUE elems; // holds the set's elements as an array allocated in @open_seq
|
806
847
|
|
807
848
|
%% write init;
|
808
849
|
%% write exec;
|
809
850
|
|
810
851
|
if (cs >= EDN_set_first_final) {
|
811
852
|
// all elements collected; now convert to a set
|
812
|
-
v = Parser::
|
853
|
+
v = Parser::make_edn_type(EDNT_MAKE_SET_METHOD, elems);
|
813
854
|
return p + 1;
|
814
855
|
}
|
815
856
|
else if (cs == EDN_set_error) {
|
@@ -837,14 +878,12 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
|
|
837
878
|
|
838
879
|
action discard_value {
|
839
880
|
const char *np = parse_value(fpc, pe, v);
|
840
|
-
if (np) {
|
841
|
-
// this token is to be
|
881
|
+
if (np == NULL) { fhold; fbreak; } else {
|
882
|
+
// this token is to be discarded so store it in the
|
842
883
|
// discard stack - we really don't need to save it so this
|
843
884
|
// could be simplified
|
844
885
|
discard.push_back(v);
|
845
886
|
fexec np;
|
846
|
-
} else {
|
847
|
-
fhold; fbreak;
|
848
887
|
}
|
849
888
|
}
|
850
889
|
|
@@ -901,24 +940,41 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
|
|
901
940
|
machine EDN_tagged;
|
902
941
|
include EDN_common;
|
903
942
|
|
943
|
+
write data;
|
944
|
+
|
945
|
+
tag_symbol_chars_start = alpha;
|
946
|
+
tag_symbol_chars_non_numeric = tag_symbol_chars_start | [\.\*!_\?$%&<>\=+\-\'\:\#];
|
947
|
+
tag_symbol_chars = tag_symbol_chars_non_numeric | digit;
|
948
|
+
|
949
|
+
tag_symbol_namespace = tag_symbol_chars_start (tag_symbol_chars)*;
|
950
|
+
tag_symbol_name = tag_symbol_chars_non_numeric (tag_symbol_chars)*;
|
951
|
+
|
952
|
+
tag_symbol = (tag_symbol_namespace ('/' tag_symbol_name)?);
|
953
|
+
|
904
954
|
# inst = (string_delim [0-9+\-:\.TZ]* string_delim);
|
905
955
|
# uuid = (string_delim [a-f0-9\-]* string_delim);
|
906
956
|
|
907
|
-
|
908
|
-
|
909
|
-
action parse_symbol {
|
957
|
+
action parse_tag {
|
910
958
|
// parses the symbol portion of the pair
|
911
959
|
const char *np = parse_symbol(fpc, pe, sym_name);
|
912
|
-
if (np == NULL) { fhold; fbreak; } else {
|
960
|
+
if (np == NULL) { fhold; fbreak; } else {
|
961
|
+
sym_ok = true;
|
962
|
+
fexec np;
|
963
|
+
}
|
913
964
|
}
|
914
|
-
action
|
965
|
+
action parse_data {
|
915
966
|
// parses the value portion
|
916
967
|
const char *np = parse_value(fpc, pe, data);
|
917
|
-
if (np == NULL) { fhold; fbreak; } else {
|
968
|
+
if (np == NULL) { fhold; fbreak; } else {
|
969
|
+
data_ok = true;
|
970
|
+
fexec np;
|
971
|
+
}
|
918
972
|
}
|
919
973
|
|
920
|
-
|
921
|
-
|
974
|
+
main := (
|
975
|
+
tag_symbol >parse_tag ignore+
|
976
|
+
begin_value >parse_data
|
977
|
+
) @exit;
|
922
978
|
}%%
|
923
979
|
|
924
980
|
|
@@ -926,6 +982,8 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
|
|
926
982
|
{
|
927
983
|
VALUE sym_name = Qnil;
|
928
984
|
VALUE data = Qnil;
|
985
|
+
bool sym_ok = false;
|
986
|
+
bool data_ok = false;
|
929
987
|
|
930
988
|
int cs;
|
931
989
|
|
@@ -935,20 +993,27 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
|
|
935
993
|
if (cs >= EDN_tagged_first_final) {
|
936
994
|
//std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << data << std::endl;
|
937
995
|
|
996
|
+
if (!sym_ok || !data_ok) {
|
997
|
+
error(__FUNCTION__, "tagged element symbol error", *p);
|
998
|
+
v = EDNT_EOF_CONST;
|
999
|
+
return NULL;
|
1000
|
+
}
|
1001
|
+
|
938
1002
|
try {
|
939
1003
|
// tagged_element makes a call to ruby which may throw an
|
940
1004
|
// exception when parsing the data
|
941
|
-
v = Parser::
|
1005
|
+
v = Parser::make_edn_type(EDNT_TAGGED_ELEM_METHOD, sym_name, data);
|
1006
|
+
return p + 1;
|
942
1007
|
} catch (std::exception& e) {
|
943
1008
|
error(__FUNCTION__, e.what());
|
944
1009
|
return pe;
|
945
1010
|
}
|
946
|
-
return p + 1;
|
947
1011
|
}
|
948
1012
|
else if (cs == EDN_tagged_error) {
|
949
|
-
|
1013
|
+
error(__FUNCTION__, "tagged element symbol error", *p);
|
950
1014
|
}
|
951
1015
|
else if (cs == EDN_tagged_en_main) {} // silence ragel warning
|
1016
|
+
v = EDNT_EOF_CONST;
|
952
1017
|
return NULL;
|
953
1018
|
}
|
954
1019
|
|
@@ -966,13 +1031,13 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
|
|
966
1031
|
|
967
1032
|
write data;
|
968
1033
|
|
969
|
-
action
|
1034
|
+
action parse_data {
|
970
1035
|
const char *np = parse_value(fpc, pe, v);
|
971
|
-
if (np) {
|
1036
|
+
if (np == NULL) { fhold; fbreak; } else { fexec np; }
|
972
1037
|
}
|
973
1038
|
|
974
1039
|
main := begin_meta (
|
975
|
-
begin_value >
|
1040
|
+
begin_value >parse_data
|
976
1041
|
) @exit;
|
977
1042
|
}%%
|
978
1043
|
|
@@ -986,7 +1051,7 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
|
|
986
1051
|
%% write exec;
|
987
1052
|
|
988
1053
|
if (cs >= EDN_meta_first_final) {
|
989
|
-
|
1054
|
+
append_to_meta(v);
|
990
1055
|
return p + 1;
|
991
1056
|
}
|
992
1057
|
else if (cs == EDN_meta_error) {
|
@@ -1010,25 +1075,25 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
|
|
1010
1075
|
|
1011
1076
|
write data;
|
1012
1077
|
|
1013
|
-
action
|
1078
|
+
action parse_elem {
|
1014
1079
|
// save the count of metadata items before we parse this value
|
1015
1080
|
// so we can determine if we've read another metadata value or
|
1016
1081
|
// an actual data item
|
1017
|
-
std::size_t
|
1082
|
+
std::size_t meta_sz = meta_size();
|
1018
1083
|
const char* np = parse_value(fpc, pe, result);
|
1019
1084
|
if (np == NULL) { fexec pe; fbreak; } else {
|
1020
1085
|
// if we have metadata saved and it matches the count we
|
1021
1086
|
// saved before we parsed a value, then we must bind the
|
1022
1087
|
// metadata sequence to it
|
1023
|
-
if (!
|
1088
|
+
if (!meta_empty() && meta_size() == meta_sz) {
|
1024
1089
|
// this will empty the metadata sequence too
|
1025
|
-
result =
|
1090
|
+
result = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, result, ruby_meta());
|
1026
1091
|
}
|
1027
1092
|
fexec np;
|
1028
1093
|
}
|
1029
1094
|
}
|
1030
1095
|
|
1031
|
-
element = begin_value >
|
1096
|
+
element = begin_value >parse_elem;
|
1032
1097
|
next_element = ignore* element;
|
1033
1098
|
sequence = ((element ignore*) (next_element ignore*)*);
|
1034
1099
|
|
@@ -1039,16 +1104,15 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
|
|
1039
1104
|
VALUE edn::Parser::parse(const char* src, std::size_t len)
|
1040
1105
|
{
|
1041
1106
|
int cs;
|
1042
|
-
VALUE result =
|
1107
|
+
VALUE result = EDNT_EOF_CONST;
|
1043
1108
|
|
1044
1109
|
%% write init;
|
1045
1110
|
set_source(src, len);
|
1046
1111
|
%% write exec;
|
1047
1112
|
|
1048
1113
|
if (cs == EDN_parser_error) {
|
1049
|
-
|
1050
|
-
|
1051
|
-
return EDNT_EOF;
|
1114
|
+
error(__FUNCTION__, *p);
|
1115
|
+
return EDNT_EOF_CONST;
|
1052
1116
|
}
|
1053
1117
|
else if (cs == EDN_parser_first_final) {
|
1054
1118
|
p = pe = eof = NULL;
|
@@ -1065,50 +1129,52 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
|
|
1065
1129
|
machine EDN_tokens;
|
1066
1130
|
include EDN_common;
|
1067
1131
|
|
1068
|
-
write data nofinal;
|
1132
|
+
write data nofinal noerror;
|
1069
1133
|
|
1070
|
-
action
|
1134
|
+
action parse_token {
|
1071
1135
|
// we won't know if we've parsed a discard or a metadata until
|
1072
1136
|
// after parse_value() is done. Save the current number of
|
1073
1137
|
// elements in the metadata sequence; then we can check if it
|
1074
1138
|
// grew or if the discard sequence grew
|
1075
|
-
|
1139
|
+
meta_sz = meta_size();
|
1076
1140
|
|
1077
1141
|
const char* np = parse_value(fpc, pe, value);
|
1078
|
-
|
1079
1142
|
if (np == NULL) { fhold; fbreak; } else {
|
1080
|
-
if (
|
1081
|
-
// was
|
1082
|
-
//
|
1083
|
-
if (
|
1084
|
-
|
1143
|
+
if (!meta_empty()) {
|
1144
|
+
// was an additional metadata entry read? if so, don't
|
1145
|
+
// return a value
|
1146
|
+
if (meta_size() > meta_sz) {
|
1147
|
+
state = TOKEN_IS_META;
|
1085
1148
|
}
|
1086
1149
|
else {
|
1087
1150
|
// a value was read and there's a pending metadata
|
1088
1151
|
// sequence. Bind them.
|
1089
|
-
value =
|
1152
|
+
value = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
|
1153
|
+
state = TOKEN_OK;
|
1090
1154
|
}
|
1091
1155
|
} else if (!discard.empty()) {
|
1092
1156
|
// a discard read. Don't return a value
|
1093
|
-
|
1157
|
+
state = TOKEN_IS_DISCARD;
|
1158
|
+
} else {
|
1159
|
+
state = TOKEN_OK;
|
1094
1160
|
}
|
1095
1161
|
fexec np;
|
1096
1162
|
}
|
1097
1163
|
}
|
1098
1164
|
|
1099
|
-
main := ignore* begin_value >
|
1165
|
+
main := ignore* begin_value >parse_token ignore*;
|
1100
1166
|
}%%
|
1101
1167
|
|
1102
1168
|
|
1103
1169
|
//
|
1104
1170
|
//
|
1105
|
-
|
1171
|
+
edn::Parser::eTokenState edn::Parser::parse_next(VALUE& value)
|
1106
1172
|
{
|
1107
1173
|
int cs;
|
1108
|
-
|
1174
|
+
eTokenState state = TOKEN_ERROR;
|
1109
1175
|
// need to track metadada read and bind it to the next value read
|
1110
1176
|
// - but must account for sequences of metadata values
|
1111
|
-
std::size_t
|
1177
|
+
std::size_t meta_sz;
|
1112
1178
|
|
1113
1179
|
// clear any previously saved discards; only track if read during
|
1114
1180
|
// this op
|
@@ -1117,12 +1183,8 @@ bool edn::Parser::parse_next(VALUE& value)
|
|
1117
1183
|
%% write init;
|
1118
1184
|
%% write exec;
|
1119
1185
|
|
1120
|
-
if (cs ==
|
1121
|
-
|
1122
|
-
}
|
1123
|
-
else if (cs == EDN_tokens_en_main) {} // silence ragel warning
|
1124
|
-
|
1125
|
-
return is_value;
|
1186
|
+
if (cs == EDN_tokens_en_main) {} // silence ragel warning
|
1187
|
+
return state;
|
1126
1188
|
}
|
1127
1189
|
|
1128
1190
|
|