edn_turbo 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -23,9 +23,7 @@
23
23
  comment = ';' cr_neg* counter;
24
24
  ignore = ws | comment;
25
25
 
26
- operators = [/\.\*!_\?$%&<>\=+\-];
27
- symbol_start = alpha;
28
- symbol_chars = symbol_start | digit | [\#:_\-\.\'];
26
+ operators = [/\.\*!_\?$%&<>\=+\-\'];
29
27
 
30
28
  begin_dispatch = '#';
31
29
  begin_keyword = ':';
@@ -37,10 +35,7 @@
37
35
  string_delim = '"';
38
36
  begin_number = digit;
39
37
  begin_value = alnum | [:\"\{\[\(\\\#^] | operators;
40
- begin_symbol = symbol_start;
41
-
42
- symbol_name = symbol_start (symbol_chars)*;
43
- symbol = (symbol_name ('/' symbol_name)?);
38
+ begin_symbol = alpha;
44
39
 
45
40
  # int / decimal rules
46
41
  integer = ('0' | [1-9] digit*);
@@ -68,19 +63,19 @@
68
63
 
69
64
  write data;
70
65
 
71
- action parse_string {
66
+ action parse_val_string {
72
67
  // string types within double-quotes
73
68
  const char *np = parse_string(fpc, pe, v);
74
69
  if (np == NULL) { fhold; fbreak; } else fexec np;
75
70
  }
76
71
 
77
- action parse_keyword {
72
+ action parse_val_keyword {
78
73
  // tokens with a leading ':'
79
74
  const char *np = parse_keyword(fpc, pe, v);
80
75
  if (np == NULL) { fhold; fbreak; } else fexec np;
81
76
  }
82
77
 
83
- action parse_number {
78
+ action parse_val_number {
84
79
  // tokens w/ leading digits: non-negative integers & decimals.
85
80
  // try to parse a decimal first
86
81
  const char *np = parse_decimal(fpc, pe, v);
@@ -95,64 +90,64 @@
95
90
  fbreak;
96
91
  }
97
92
  else {
98
- error(__FUNCTION__, *p);
93
+ error(__FUNCTION__, "number format error", *p);
99
94
  fexec pe;
100
95
  }
101
96
  }
102
97
 
103
- action parse_operator {
98
+ action parse_val_operator {
104
99
  // stand-alone operators *, +, -, etc.
105
100
  const char *np = parse_operator(fpc, pe, v);
106
101
  if (np == NULL) { fhold; fbreak; } else fexec np;
107
102
  }
108
103
 
109
- action parse_char {
104
+ action parse_val_char {
110
105
  // tokens w/ leading \ (escaped characters \newline, \c, etc.)
111
106
  const char *np = parse_esc_char(fpc, pe, v);
112
107
  if (np == NULL) { fhold; fbreak; } else fexec np;
113
108
  }
114
109
 
115
- action parse_symbol {
110
+ action parse_val_symbol {
116
111
  // user identifiers and reserved keywords (true, false, nil)
117
112
  VALUE sym = Qnil;
118
113
  const char *np = parse_symbol(fpc, pe, sym);
119
- if (np == NULL) { fhold; fbreak; } else {
114
+ if (np == NULL) { fexec pe; } else {
120
115
  // parse_symbol will make 'sym' a ruby string
121
116
  if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
122
117
  else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
123
118
  else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
124
119
  else {
125
- v = Parser::make_edn_symbol(sym);
120
+ v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
126
121
  }
127
122
  fexec np;
128
123
  }
129
124
  }
130
125
 
131
- action parse_vector {
126
+ action parse_val_vector {
132
127
  // [
133
128
  const char *np = parse_vector(fpc, pe, v);
134
129
  if (np == NULL) { fhold; fbreak; } else fexec np;
135
130
  }
136
131
 
137
- action parse_list {
132
+ action parse_val_list {
138
133
  // (
139
134
  const char *np = parse_list(fpc, pe, v);
140
135
  if (np == NULL) { fhold; fbreak; } else fexec np;
141
136
  }
142
137
 
143
- action parse_map {
138
+ action parse_val_map {
144
139
  // {
145
140
  const char *np = parse_map(fpc, pe, v);
146
141
  if (np == NULL) { fhold; fbreak; } else fexec np;
147
142
  }
148
143
 
149
- action parse_meta {
144
+ action parse_val_meta {
150
145
  // ^
151
146
  const char *np = parse_meta(fpc, pe);
152
147
  if (np == NULL) { fhold; fbreak; } else fexec np;
153
148
  }
154
149
 
155
- action parse_dispatch {
150
+ action parse_val_dispatch {
156
151
  // handles tokens w/ leading # ("#_", "#{", and tagged elems)
157
152
  const char *np = parse_dispatch(fpc + 1, pe, v);
158
153
  if (np == NULL) { fhold; fbreak; } else fexec np;
@@ -160,23 +155,24 @@
160
155
 
161
156
 
162
157
  main := (
163
- string_delim >parse_string |
164
- begin_keyword >parse_keyword |
165
- begin_number >parse_number |
166
- operators >parse_operator |
167
- begin_char >parse_char |
168
- begin_symbol >parse_symbol |
169
- begin_vector >parse_vector |
170
- begin_list >parse_list |
171
- begin_map >parse_map |
172
- begin_meta >parse_meta |
173
- begin_dispatch >parse_dispatch
158
+ string_delim >parse_val_string |
159
+ begin_keyword >parse_val_keyword |
160
+ begin_number >parse_val_number |
161
+ operators >parse_val_operator |
162
+ begin_char >parse_val_char |
163
+ begin_symbol >parse_val_symbol |
164
+ begin_vector >parse_val_vector |
165
+ begin_list >parse_val_list |
166
+ begin_map >parse_val_map |
167
+ begin_meta >parse_val_meta |
168
+ begin_dispatch >parse_val_dispatch
174
169
  ) %*exit;
175
170
  }%%
176
171
 
177
172
 
178
173
  const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
179
174
  {
175
+ // std::cerr << __FUNCTION__ << "() p: \"" << p << "\"" << std::endl;
180
176
  int cs;
181
177
 
182
178
  %% write init;
@@ -186,7 +182,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
186
182
  return p;
187
183
  }
188
184
  else if (cs == EDN_value_error) {
189
- error(__FUNCTION__, *p);
185
+ error(__FUNCTION__, "token error", *p);
190
186
  return pe;
191
187
  }
192
188
  else if (cs == EDN_value_en_main) {} // silence ragel warning
@@ -207,7 +203,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
207
203
 
208
204
  write data;
209
205
 
210
- action parse_string {
206
+ action parse_chars {
211
207
  if (Parser::parse_byte_stream(p_save + 1, p, v, encode)) {
212
208
  fexec p + 1;
213
209
  } else {
@@ -220,11 +216,11 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
220
216
  }
221
217
 
222
218
  main := string_delim (
223
- (^([\"\\] | 0..0x1f | 0xc2..0xf5) |
219
+ (^([\"\\] | 0xc2..0xf5) |
224
220
  ((0xc2..0xf5) |
225
221
  '\\'[\"\\/bfnrt] |
226
222
  '\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
227
- '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string
223
+ '\\'^([\"\\/bfnrtu]))* %parse_chars
228
224
  ) :>> string_delim @err(close_err) @exit;
229
225
  }%%
230
226
 
@@ -258,15 +254,16 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
258
254
  machine EDN_keyword;
259
255
  include EDN_common;
260
256
 
261
- keyword_chars = symbol_chars | operators;
262
- keyword_start = symbol_start | [\#\./];
257
+ keyword_start = alpha | [\.\*!_\?$%&<>\=+\-\'\#];
258
+ keyword_chars = (keyword_start | digit | ':');
263
259
 
264
- keyword_name = keyword_start (keyword_chars)*;
260
+ keyword_name = keyword_start keyword_chars*;
261
+ keyword = keyword_name ('/' keyword_chars*)?;
265
262
 
266
263
  write data;
267
264
 
268
265
 
269
- main := begin_keyword keyword_name (^keyword_chars? @exit);
266
+ main := begin_keyword keyword (^(keyword_chars | '/')? @exit);
270
267
  }%%
271
268
 
272
269
 
@@ -287,7 +284,7 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, VALUE& v)
287
284
  return p;
288
285
  }
289
286
  else if (cs == EDN_keyword_error) {
290
- error(__FUNCTION__, *p);
287
+ error(__FUNCTION__, "invalid keyword", *p);
291
288
  return pe;
292
289
  }
293
290
  else if (cs == EDN_keyword_en_main) {} // silence ragel warning
@@ -331,7 +328,7 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
331
328
 
332
329
 
333
330
  // ============================================================
334
- // integer parsing machine
331
+ // integer parsing machine - M suffix will return a BigNum
335
332
  //
336
333
  %%{
337
334
  machine EDN_integer;
@@ -376,17 +373,18 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
376
373
 
377
374
  write data;
378
375
 
379
- action parse_symbol {
376
+ action parse_op_symbol {
380
377
  // parse a symbol including the leading operator (-, +, .)
381
378
  VALUE sym = Qnil;
382
379
  const char *np = parse_symbol(p_save, pe, sym);
383
- if (np == NULL) { fhold; fbreak; } else {
384
- v = Parser::make_edn_symbol(sym);
380
+ if (np == NULL) { fexec pe; } else {
381
+ if (sym != Qnil)
382
+ v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
385
383
  fexec np;
386
384
  }
387
385
  }
388
386
 
389
- action parse_number {
387
+ action parse_op_number {
390
388
  // parse a number with the leading symbol - this is slightly
391
389
  // different than the one within EDN_value since it includes
392
390
  // the leading - or +
@@ -404,24 +402,27 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
404
402
  fbreak;
405
403
  }
406
404
  else {
407
- error(__FUNCTION__, *p);
405
+ error(__FUNCTION__, "number format error", *p);
408
406
  fexec pe;
409
407
  }
410
408
  }
411
409
 
412
- action parse_operator {
410
+ action parse_op {
413
411
  // stand-alone operators (-, +, /, ... etc)
414
412
  char op[2] = { *p_save, 0 };
415
413
  VALUE sym = rb_str_new2(op);
416
- v = Parser::make_edn_symbol(sym);
414
+ v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
417
415
  }
418
416
 
417
+ valid_non_numeric_chars = alpha|operators|':'|'#';
418
+ valid_chars = valid_non_numeric_chars | digit;
419
419
 
420
420
  main := (
421
- ('-'|'+'|'.') alpha >parse_symbol |
422
- ('-'|'+') begin_number >parse_number |
423
- operators ignore* >parse_operator
424
- ) ^(operators|alpha|digit)? @exit;
421
+ ('-'|'+') begin_number >parse_op_number |
422
+ (operators - [\-\+\.]) valid_chars >parse_op_symbol |
423
+ [\-\+\.] valid_non_numeric_chars valid_chars* >parse_op_symbol |
424
+ operators ignore* >parse_op
425
+ ) ^(valid_chars)? @exit;
425
426
  }%%
426
427
 
427
428
 
@@ -437,7 +438,7 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
437
438
  return p;
438
439
  }
439
440
  else if (cs == EDN_operator_error) {
440
- error(__FUNCTION__, *p);
441
+ error(__FUNCTION__, "symbol syntax error", *p);
441
442
  return pe;
442
443
  }
443
444
  else if (cs == EDN_operator_en_main) {} // silence ragel warning
@@ -455,12 +456,13 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
455
456
 
456
457
  write data;
457
458
 
458
- valid_chars = alpha;
459
+ valid_chars = extend;
459
460
 
460
461
 
461
- main := (
462
- begin_char valid_chars+ ignore*
463
- ) (^(valid_chars | '\\')? @exit);
462
+ main := begin_char (
463
+ 'space' | 'newline' | 'tab' | 'return' | 'formfeed' | 'backspace' |
464
+ valid_chars
465
+ ) (ignore* | [\\\]\}\)])? @exit;
464
466
  }%%
465
467
 
466
468
 
@@ -480,7 +482,7 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
480
482
  return p;
481
483
  }
482
484
  else if (cs == EDN_escaped_char_error) {
483
- error(__FUNCTION__, *p);
485
+ error(__FUNCTION__, "unexpected value", *p);
484
486
  return pe;
485
487
  }
486
488
  else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
@@ -502,10 +504,26 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
502
504
 
503
505
  write data;
504
506
 
507
+ symbol_ops_1 = [\.\-\+];
508
+ symbol_ops_2 = [\*!_\?$%&<>\=\'];
509
+ symbol_ops_3 = [:\#];
510
+
511
+ symbol_start = alpha | symbol_ops_1 | symbol_ops_2;
512
+
513
+ symbol_chars = symbol_start | digit | symbol_ops_3;
514
+
515
+ symbol_name = (
516
+ (alpha symbol_chars*) |
517
+ (symbol_ops_1 (symbol_start | symbol_ops_3) symbol_chars*) |
518
+ (symbol_start symbol_chars+) |
519
+ operators{1}
520
+ );
521
+ symbol = '/' | (symbol_name ('/' symbol_name)?);
522
+
505
523
 
506
524
  main := (
507
- operators? symbol
508
- ) ignore* (^(symbol_chars | operators)? @exit);
525
+ symbol
526
+ ) ignore* (^(symbol_chars | '/')? @exit);
509
527
  }%%
510
528
 
511
529
 
@@ -525,8 +543,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
525
543
  return p;
526
544
  }
527
545
  else if (cs == EDN_symbol_error) {
528
- error(__FUNCTION__, *p);
529
- return pe;
546
+ error(__FUNCTION__, "invalid symbol sequence", *p);
530
547
  }
531
548
  else if (cs == EDN_symbol_en_main) {} // silence ragel warning
532
549
  return NULL;
@@ -544,25 +561,48 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
544
561
  machine EDN_sequence_common;
545
562
  include EDN_common;
546
563
 
564
+ action open_seq {
565
+ // sequences store elements in an array, then process it to
566
+ // convert it to a list, set, or map as needed once the
567
+ // sequence end is reached
568
+ elems = rb_ary_new();
569
+ // additionally, metadata for elements in the sequence may be
570
+ // carried so we must push a new level in the metadata stack
571
+ new_meta_list();
572
+ }
573
+
574
+ action close_seq {
575
+ // remove the current metadata level
576
+ del_top_meta_list();
577
+ }
578
+
547
579
  action parse_item {
548
580
  // reads an item within a sequence (vector, list, map, or
549
581
  // set). Regardless of the sequence type, an array of the
550
582
  // items is built. Once done, the sequence parser will convert
551
583
  // if needed
552
584
  VALUE e;
585
+ std::size_t meta_sz = meta_size();
553
586
  const char *np = parse_value(fpc, pe, e);
554
- if (np == NULL) {
555
- fhold; fbreak;
556
- } else {
587
+ if (np == NULL) { fhold; fbreak; } else {
557
588
  // if there's an entry in the discard list, the current
558
589
  // object is not meant to be kept due to a #_ so don't
559
590
  // push it into the list of elements
560
591
  if (!discard.empty()) {
561
592
  discard.pop_back();
562
593
  }
563
- else {
564
- // otherwise we add it to the list of elements for the
565
- // corresponding container
594
+ else if (!meta_empty()) {
595
+ // check if parse_value added metadata
596
+ if (meta_size() == meta_sz) {
597
+ // there's metadata and it didn't increase so
598
+ // parse_value() read an element we care
599
+ // about. Bind the metadata to it and add it to
600
+ // the sequence
601
+ e = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, e, ruby_meta());
602
+ rb_ary_push(elems, e);
603
+ }
604
+ } else {
605
+ // no metadata.. just push it
566
606
  rb_ary_push(elems, e);
567
607
  }
568
608
  fexec np;
@@ -584,10 +624,9 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
584
624
 
585
625
  write data;
586
626
 
587
- main := begin_vector (
588
- ignore* sequence? :>> end_vector
589
- )
590
- @err(close_err) @exit;
627
+ main := begin_vector @open_seq (
628
+ ignore* sequence? :>> end_vector @close_seq
629
+ ) @err(close_err) @exit;
591
630
  }%%
592
631
 
593
632
 
@@ -599,7 +638,7 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
599
638
  static const char* EDN_TYPE = "vector";
600
639
 
601
640
  int cs;
602
- VALUE elems = rb_ary_new(); // will store the vector's elements
641
+ VALUE elems; // will store the vector's elements - allocated in @open_seq
603
642
 
604
643
  %% write init;
605
644
  %% write exec;
@@ -609,7 +648,7 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
609
648
  return p + 1;
610
649
  }
611
650
  else if (cs == EDN_vector_error) {
612
- error(__FUNCTION__, *p);
651
+ error(__FUNCTION__, "vector format error", *p);
613
652
  return pe;
614
653
  }
615
654
  else if (cs == EDN_vector_en_main) {} // silence ragel warning
@@ -629,9 +668,9 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
629
668
 
630
669
  write data;
631
670
 
632
- main := begin_list (
633
- ignore* sequence? :>> end_list
634
- ) @err(close_err) @exit;
671
+ main := begin_list @open_seq (
672
+ ignore* sequence? :>> end_list @close_seq
673
+ ) @err(close_err) @exit;
635
674
  }%%
636
675
 
637
676
  //
@@ -642,13 +681,15 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
642
681
  static const char* EDN_TYPE = "list";
643
682
 
644
683
  int cs;
645
- VALUE elems = rb_ary_new();
684
+ VALUE elems; // stores the list's elements - allocated in @open_seq
646
685
 
647
686
  %% write init;
648
687
  %% write exec;
649
688
 
650
689
  if (cs >= EDN_list_first_final) {
651
690
  v = elems;
691
+ // TODO: replace with this but first figure out why array is not unrolled by EDN::list()
692
+ // v = Parser::make_edn_type(EDNT_MAKE_LIST_METHOD, elems);
652
693
  return p + 1;
653
694
  }
654
695
  else if (cs == EDN_list_error) {
@@ -673,9 +714,9 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
673
714
  write data;
674
715
 
675
716
 
676
- main := begin_map (
677
- ignore* (sequence)? :>> end_map
678
- ) @err(close_err) @exit;
717
+ main := begin_map @open_seq (
718
+ ignore* (sequence)? :>> end_map @close_seq
719
+ ) @err(close_err) @exit;
679
720
  }%%
680
721
 
681
722
 
@@ -685,8 +726,8 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
685
726
 
686
727
  int cs;
687
728
  // since we don't know whether we're looking at a key or value,
688
- // initially store all elements in a list
689
- VALUE elems = rb_ary_new();
729
+ // initially store all elements in an array (allocated in @open_seq)
730
+ VALUE elems;
690
731
 
691
732
  %% write init;
692
733
  %% write exec;
@@ -730,19 +771,19 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
730
771
 
731
772
  write data;
732
773
 
733
- action parse_set {
774
+ action parse_disp_set {
734
775
  // #{ }
735
776
  const char *np = parse_set(fpc, pe, v);
736
777
  if (np == NULL) { fhold; fbreak; } else fexec np;
737
778
  }
738
779
 
739
- action parse_discard {
780
+ action parse_disp_discard {
740
781
  // discard token #_
741
782
  const char *np = parse_discard(fpc, pe);
742
783
  if (np == NULL) { fhold; fbreak; } else fexec np;
743
784
  }
744
785
 
745
- action parse_tagged {
786
+ action parse_disp_tagged {
746
787
  // #inst, #uuid, or #user/tag
747
788
  const char *np = parse_tagged(fpc, pe, v);
748
789
  if (np == NULL) { fhold; fbreak; } else fexec np;
@@ -750,9 +791,9 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
750
791
 
751
792
 
752
793
  main := (
753
- ('{' >parse_set |
754
- '_' >parse_discard |
755
- alpha >parse_tagged)
794
+ ('{' >parse_disp_set |
795
+ '_' >parse_disp_discard |
796
+ alpha >parse_disp_tagged)
756
797
  ) @exit;
757
798
  }%%
758
799
 
@@ -768,7 +809,7 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
768
809
  return p + 1;
769
810
  }
770
811
  else if (cs == EDN_dispatch_error) {
771
- error(__FUNCTION__, *p);
812
+ error(__FUNCTION__, "dispatch extend error", *p);
772
813
  return pe;
773
814
  }
774
815
  else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
@@ -789,9 +830,9 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
789
830
  begin_set = '{';
790
831
  end_set = '}';
791
832
 
792
- main := begin_set (
793
- ignore* sequence? :>> end_set
794
- ) @err(close_err) @exit;
833
+ main := begin_set @open_seq (
834
+ ignore* sequence? :>> end_set @close_seq
835
+ ) @err(close_err) @exit;
795
836
  }%%
796
837
 
797
838
  //
@@ -802,14 +843,14 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
802
843
  static const char* EDN_TYPE = "set";
803
844
 
804
845
  int cs;
805
- VALUE elems = rb_ary_new(); // stored as an array
846
+ VALUE elems; // holds the set's elements as an array allocated in @open_seq
806
847
 
807
848
  %% write init;
808
849
  %% write exec;
809
850
 
810
851
  if (cs >= EDN_set_first_final) {
811
852
  // all elements collected; now convert to a set
812
- v = Parser::make_ruby_set(elems);
853
+ v = Parser::make_edn_type(EDNT_MAKE_SET_METHOD, elems);
813
854
  return p + 1;
814
855
  }
815
856
  else if (cs == EDN_set_error) {
@@ -837,14 +878,12 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
837
878
 
838
879
  action discard_value {
839
880
  const char *np = parse_value(fpc, pe, v);
840
- if (np) {
841
- // this token is to be discard it so store it in the
881
+ if (np == NULL) { fhold; fbreak; } else {
882
+ // this token is to be discarded so store it in the
842
883
  // discard stack - we really don't need to save it so this
843
884
  // could be simplified
844
885
  discard.push_back(v);
845
886
  fexec np;
846
- } else {
847
- fhold; fbreak;
848
887
  }
849
888
  }
850
889
 
@@ -901,24 +940,41 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
901
940
  machine EDN_tagged;
902
941
  include EDN_common;
903
942
 
943
+ write data;
944
+
945
+ tag_symbol_chars_start = alpha;
946
+ tag_symbol_chars_non_numeric = tag_symbol_chars_start | [\.\*!_\?$%&<>\=+\-\'\:\#];
947
+ tag_symbol_chars = tag_symbol_chars_non_numeric | digit;
948
+
949
+ tag_symbol_namespace = tag_symbol_chars_start (tag_symbol_chars)*;
950
+ tag_symbol_name = tag_symbol_chars_non_numeric (tag_symbol_chars)*;
951
+
952
+ tag_symbol = (tag_symbol_namespace ('/' tag_symbol_name)?);
953
+
904
954
  # inst = (string_delim [0-9+\-:\.TZ]* string_delim);
905
955
  # uuid = (string_delim [a-f0-9\-]* string_delim);
906
956
 
907
- write data;
908
-
909
- action parse_symbol {
957
+ action parse_tag {
910
958
  // parses the symbol portion of the pair
911
959
  const char *np = parse_symbol(fpc, pe, sym_name);
912
- if (np == NULL) { fhold; fbreak; } else { fexec np; }
960
+ if (np == NULL) { fhold; fbreak; } else {
961
+ sym_ok = true;
962
+ fexec np;
963
+ }
913
964
  }
914
- action parse_value {
965
+ action parse_data {
915
966
  // parses the value portion
916
967
  const char *np = parse_value(fpc, pe, data);
917
- if (np == NULL) { fhold; fbreak; } else { fexec np; }
968
+ if (np == NULL) { fhold; fbreak; } else {
969
+ data_ok = true;
970
+ fexec np;
971
+ }
918
972
  }
919
973
 
920
-
921
- main := (symbol >parse_symbol ignore* begin_value >parse_value) @exit;
974
+ main := (
975
+ tag_symbol >parse_tag ignore+
976
+ begin_value >parse_data
977
+ ) @exit;
922
978
  }%%
923
979
 
924
980
 
@@ -926,6 +982,8 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
926
982
  {
927
983
  VALUE sym_name = Qnil;
928
984
  VALUE data = Qnil;
985
+ bool sym_ok = false;
986
+ bool data_ok = false;
929
987
 
930
988
  int cs;
931
989
 
@@ -935,20 +993,27 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
935
993
  if (cs >= EDN_tagged_first_final) {
936
994
  //std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << data << std::endl;
937
995
 
996
+ if (!sym_ok || !data_ok) {
997
+ error(__FUNCTION__, "tagged element symbol error", *p);
998
+ v = EDNT_EOF_CONST;
999
+ return NULL;
1000
+ }
1001
+
938
1002
  try {
939
1003
  // tagged_element makes a call to ruby which may throw an
940
1004
  // exception when parsing the data
941
- v = Parser::tagged_element(sym_name, data);
1005
+ v = Parser::make_edn_type(EDNT_TAGGED_ELEM_METHOD, sym_name, data);
1006
+ return p + 1;
942
1007
  } catch (std::exception& e) {
943
1008
  error(__FUNCTION__, e.what());
944
1009
  return pe;
945
1010
  }
946
- return p + 1;
947
1011
  }
948
1012
  else if (cs == EDN_tagged_error) {
949
- return pe;
1013
+ error(__FUNCTION__, "tagged element symbol error", *p);
950
1014
  }
951
1015
  else if (cs == EDN_tagged_en_main) {} // silence ragel warning
1016
+ v = EDNT_EOF_CONST;
952
1017
  return NULL;
953
1018
  }
954
1019
 
@@ -966,13 +1031,13 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
966
1031
 
967
1032
  write data;
968
1033
 
969
- action parse_meta {
1034
+ action parse_data {
970
1035
  const char *np = parse_value(fpc, pe, v);
971
- if (np) { fexec np; } else { fhold; fbreak; }
1036
+ if (np == NULL) { fhold; fbreak; } else { fexec np; }
972
1037
  }
973
1038
 
974
1039
  main := begin_meta (
975
- begin_value >parse_meta
1040
+ begin_value >parse_data
976
1041
  ) @exit;
977
1042
  }%%
978
1043
 
@@ -986,7 +1051,7 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
986
1051
  %% write exec;
987
1052
 
988
1053
  if (cs >= EDN_meta_first_final) {
989
- metadata.push_back(v);
1054
+ append_to_meta(v);
990
1055
  return p + 1;
991
1056
  }
992
1057
  else if (cs == EDN_meta_error) {
@@ -1010,25 +1075,25 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
1010
1075
 
1011
1076
  write data;
1012
1077
 
1013
- action parse_value {
1078
+ action parse_elem {
1014
1079
  // save the count of metadata items before we parse this value
1015
1080
  // so we can determine if we've read another metadata value or
1016
1081
  // an actual data item
1017
- std::size_t meta_size = metadata.size();
1082
+ std::size_t meta_sz = meta_size();
1018
1083
  const char* np = parse_value(fpc, pe, result);
1019
1084
  if (np == NULL) { fexec pe; fbreak; } else {
1020
1085
  // if we have metadata saved and it matches the count we
1021
1086
  // saved before we parsed a value, then we must bind the
1022
1087
  // metadata sequence to it
1023
- if (!metadata.empty() && metadata.size() == meta_size) {
1088
+ if (!meta_empty() && meta_size() == meta_sz) {
1024
1089
  // this will empty the metadata sequence too
1025
- result = bind_meta_to_value(result);
1090
+ result = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, result, ruby_meta());
1026
1091
  }
1027
1092
  fexec np;
1028
1093
  }
1029
1094
  }
1030
1095
 
1031
- element = begin_value >parse_value;
1096
+ element = begin_value >parse_elem;
1032
1097
  next_element = ignore* element;
1033
1098
  sequence = ((element ignore*) (next_element ignore*)*);
1034
1099
 
@@ -1039,16 +1104,15 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
1039
1104
  VALUE edn::Parser::parse(const char* src, std::size_t len)
1040
1105
  {
1041
1106
  int cs;
1042
- VALUE result = Qnil;
1107
+ VALUE result = EDNT_EOF_CONST;
1043
1108
 
1044
1109
  %% write init;
1045
1110
  set_source(src, len);
1046
1111
  %% write exec;
1047
1112
 
1048
1113
  if (cs == EDN_parser_error) {
1049
- if (p)
1050
- error(__FUNCTION__, *p);
1051
- return EDNT_EOF;
1114
+ error(__FUNCTION__, *p);
1115
+ return EDNT_EOF_CONST;
1052
1116
  }
1053
1117
  else if (cs == EDN_parser_first_final) {
1054
1118
  p = pe = eof = NULL;
@@ -1065,50 +1129,52 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
1065
1129
  machine EDN_tokens;
1066
1130
  include EDN_common;
1067
1131
 
1068
- write data nofinal;
1132
+ write data nofinal noerror;
1069
1133
 
1070
- action parse_value {
1134
+ action parse_token {
1071
1135
  // we won't know if we've parsed a discard or a metadata until
1072
1136
  // after parse_value() is done. Save the current number of
1073
1137
  // elements in the metadata sequence; then we can check if it
1074
1138
  // grew or if the discard sequence grew
1075
- meta_size = metadata.size();
1139
+ meta_sz = meta_size();
1076
1140
 
1077
1141
  const char* np = parse_value(fpc, pe, value);
1078
-
1079
1142
  if (np == NULL) { fhold; fbreak; } else {
1080
- if (metadata.size() > 0) {
1081
- // was anotheran additional metadata entry read? if
1082
- // so, don't return a value
1083
- if (metadata.size() > meta_size) {
1084
- is_value = false;
1143
+ if (!meta_empty()) {
1144
+ // was an additional metadata entry read? if so, don't
1145
+ // return a value
1146
+ if (meta_size() > meta_sz) {
1147
+ state = TOKEN_IS_META;
1085
1148
  }
1086
1149
  else {
1087
1150
  // a value was read and there's a pending metadata
1088
1151
  // sequence. Bind them.
1089
- value = bind_meta_to_value(value);
1152
+ value = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
1153
+ state = TOKEN_OK;
1090
1154
  }
1091
1155
  } else if (!discard.empty()) {
1092
1156
  // a discard read. Don't return a value
1093
- is_value = false;
1157
+ state = TOKEN_IS_DISCARD;
1158
+ } else {
1159
+ state = TOKEN_OK;
1094
1160
  }
1095
1161
  fexec np;
1096
1162
  }
1097
1163
  }
1098
1164
 
1099
- main := ignore* begin_value >parse_value ignore*;
1165
+ main := ignore* begin_value >parse_token ignore*;
1100
1166
  }%%
1101
1167
 
1102
1168
 
1103
1169
  //
1104
1170
  //
1105
- bool edn::Parser::parse_next(VALUE& value)
1171
+ edn::Parser::eTokenState edn::Parser::parse_next(VALUE& value)
1106
1172
  {
1107
1173
  int cs;
1108
- bool is_value = true;
1174
+ eTokenState state = TOKEN_ERROR;
1109
1175
  // need to track metadada read and bind it to the next value read
1110
1176
  // - but must account for sequences of metadata values
1111
- std::size_t meta_size;
1177
+ std::size_t meta_sz;
1112
1178
 
1113
1179
  // clear any previously saved discards; only track if read during
1114
1180
  // this op
@@ -1117,12 +1183,8 @@ bool edn::Parser::parse_next(VALUE& value)
1117
1183
  %% write init;
1118
1184
  %% write exec;
1119
1185
 
1120
- if (cs == EDN_parser_error) {
1121
- value = EDNT_EOF;
1122
- }
1123
- else if (cs == EDN_tokens_en_main) {} // silence ragel warning
1124
-
1125
- return is_value;
1186
+ if (cs == EDN_tokens_en_main) {} // silence ragel warning
1187
+ return state;
1126
1188
  }
1127
1189
 
1128
1190