edn_turbo 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,9 +23,7 @@
23
23
  comment = ';' cr_neg* counter;
24
24
  ignore = ws | comment;
25
25
 
26
- operators = [/\.\*!_\?$%&<>\=+\-];
27
- symbol_start = alpha;
28
- symbol_chars = symbol_start | digit | [\#:_\-\.\'];
26
+ operators = [/\.\*!_\?$%&<>\=+\-\'];
29
27
 
30
28
  begin_dispatch = '#';
31
29
  begin_keyword = ':';
@@ -37,10 +35,7 @@
37
35
  string_delim = '"';
38
36
  begin_number = digit;
39
37
  begin_value = alnum | [:\"\{\[\(\\\#^] | operators;
40
- begin_symbol = symbol_start;
41
-
42
- symbol_name = symbol_start (symbol_chars)*;
43
- symbol = (symbol_name ('/' symbol_name)?);
38
+ begin_symbol = alpha;
44
39
 
45
40
  # int / decimal rules
46
41
  integer = ('0' | [1-9] digit*);
@@ -68,19 +63,19 @@
68
63
 
69
64
  write data;
70
65
 
71
- action parse_string {
66
+ action parse_val_string {
72
67
  // string types within double-quotes
73
68
  const char *np = parse_string(fpc, pe, v);
74
69
  if (np == NULL) { fhold; fbreak; } else fexec np;
75
70
  }
76
71
 
77
- action parse_keyword {
72
+ action parse_val_keyword {
78
73
  // tokens with a leading ':'
79
74
  const char *np = parse_keyword(fpc, pe, v);
80
75
  if (np == NULL) { fhold; fbreak; } else fexec np;
81
76
  }
82
77
 
83
- action parse_number {
78
+ action parse_val_number {
84
79
  // tokens w/ leading digits: non-negative integers & decimals.
85
80
  // try to parse a decimal first
86
81
  const char *np = parse_decimal(fpc, pe, v);
@@ -95,64 +90,64 @@
95
90
  fbreak;
96
91
  }
97
92
  else {
98
- error(__FUNCTION__, *p);
93
+ error(__FUNCTION__, "number format error", *p);
99
94
  fexec pe;
100
95
  }
101
96
  }
102
97
 
103
- action parse_operator {
98
+ action parse_val_operator {
104
99
  // stand-alone operators *, +, -, etc.
105
100
  const char *np = parse_operator(fpc, pe, v);
106
101
  if (np == NULL) { fhold; fbreak; } else fexec np;
107
102
  }
108
103
 
109
- action parse_char {
104
+ action parse_val_char {
110
105
  // tokens w/ leading \ (escaped characters \newline, \c, etc.)
111
106
  const char *np = parse_esc_char(fpc, pe, v);
112
107
  if (np == NULL) { fhold; fbreak; } else fexec np;
113
108
  }
114
109
 
115
- action parse_symbol {
110
+ action parse_val_symbol {
116
111
  // user identifiers and reserved keywords (true, false, nil)
117
112
  VALUE sym = Qnil;
118
113
  const char *np = parse_symbol(fpc, pe, sym);
119
- if (np == NULL) { fhold; fbreak; } else {
114
+ if (np == NULL) { fexec pe; } else {
120
115
  // parse_symbol will make 'sym' a ruby string
121
116
  if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
122
117
  else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
123
118
  else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
124
119
  else {
125
- v = Parser::make_edn_symbol(sym);
120
+ v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
126
121
  }
127
122
  fexec np;
128
123
  }
129
124
  }
130
125
 
131
- action parse_vector {
126
+ action parse_val_vector {
132
127
  // [
133
128
  const char *np = parse_vector(fpc, pe, v);
134
129
  if (np == NULL) { fhold; fbreak; } else fexec np;
135
130
  }
136
131
 
137
- action parse_list {
132
+ action parse_val_list {
138
133
  // (
139
134
  const char *np = parse_list(fpc, pe, v);
140
135
  if (np == NULL) { fhold; fbreak; } else fexec np;
141
136
  }
142
137
 
143
- action parse_map {
138
+ action parse_val_map {
144
139
  // {
145
140
  const char *np = parse_map(fpc, pe, v);
146
141
  if (np == NULL) { fhold; fbreak; } else fexec np;
147
142
  }
148
143
 
149
- action parse_meta {
144
+ action parse_val_meta {
150
145
  // ^
151
146
  const char *np = parse_meta(fpc, pe);
152
147
  if (np == NULL) { fhold; fbreak; } else fexec np;
153
148
  }
154
149
 
155
- action parse_dispatch {
150
+ action parse_val_dispatch {
156
151
  // handles tokens w/ leading # ("#_", "#{", and tagged elems)
157
152
  const char *np = parse_dispatch(fpc + 1, pe, v);
158
153
  if (np == NULL) { fhold; fbreak; } else fexec np;
@@ -160,23 +155,24 @@
160
155
 
161
156
 
162
157
  main := (
163
- string_delim >parse_string |
164
- begin_keyword >parse_keyword |
165
- begin_number >parse_number |
166
- operators >parse_operator |
167
- begin_char >parse_char |
168
- begin_symbol >parse_symbol |
169
- begin_vector >parse_vector |
170
- begin_list >parse_list |
171
- begin_map >parse_map |
172
- begin_meta >parse_meta |
173
- begin_dispatch >parse_dispatch
158
+ string_delim >parse_val_string |
159
+ begin_keyword >parse_val_keyword |
160
+ begin_number >parse_val_number |
161
+ operators >parse_val_operator |
162
+ begin_char >parse_val_char |
163
+ begin_symbol >parse_val_symbol |
164
+ begin_vector >parse_val_vector |
165
+ begin_list >parse_val_list |
166
+ begin_map >parse_val_map |
167
+ begin_meta >parse_val_meta |
168
+ begin_dispatch >parse_val_dispatch
174
169
  ) %*exit;
175
170
  }%%
176
171
 
177
172
 
178
173
  const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
179
174
  {
175
+ // std::cerr << __FUNCTION__ << "() p: \"" << p << "\"" << std::endl;
180
176
  int cs;
181
177
 
182
178
  %% write init;
@@ -186,7 +182,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
186
182
  return p;
187
183
  }
188
184
  else if (cs == EDN_value_error) {
189
- error(__FUNCTION__, *p);
185
+ error(__FUNCTION__, "token error", *p);
190
186
  return pe;
191
187
  }
192
188
  else if (cs == EDN_value_en_main) {} // silence ragel warning
@@ -207,7 +203,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
207
203
 
208
204
  write data;
209
205
 
210
- action parse_string {
206
+ action parse_chars {
211
207
  if (Parser::parse_byte_stream(p_save + 1, p, v, encode)) {
212
208
  fexec p + 1;
213
209
  } else {
@@ -220,11 +216,11 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
220
216
  }
221
217
 
222
218
  main := string_delim (
223
- (^([\"\\] | 0..0x1f | 0xc2..0xf5) |
219
+ (^([\"\\] | 0xc2..0xf5) |
224
220
  ((0xc2..0xf5) |
225
221
  '\\'[\"\\/bfnrt] |
226
222
  '\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
227
- '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string
223
+ '\\'^([\"\\/bfnrtu]))* %parse_chars
228
224
  ) :>> string_delim @err(close_err) @exit;
229
225
  }%%
230
226
 
@@ -258,15 +254,16 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
258
254
  machine EDN_keyword;
259
255
  include EDN_common;
260
256
 
261
- keyword_chars = symbol_chars | operators;
262
- keyword_start = symbol_start | [\#\./];
257
+ keyword_start = alpha | [\.\*!_\?$%&<>\=+\-\'\#];
258
+ keyword_chars = (keyword_start | digit | ':');
263
259
 
264
- keyword_name = keyword_start (keyword_chars)*;
260
+ keyword_name = keyword_start keyword_chars*;
261
+ keyword = keyword_name ('/' keyword_chars*)?;
265
262
 
266
263
  write data;
267
264
 
268
265
 
269
- main := begin_keyword keyword_name (^keyword_chars? @exit);
266
+ main := begin_keyword keyword (^(keyword_chars | '/')? @exit);
270
267
  }%%
271
268
 
272
269
 
@@ -287,7 +284,7 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, VALUE& v)
287
284
  return p;
288
285
  }
289
286
  else if (cs == EDN_keyword_error) {
290
- error(__FUNCTION__, *p);
287
+ error(__FUNCTION__, "invalid keyword", *p);
291
288
  return pe;
292
289
  }
293
290
  else if (cs == EDN_keyword_en_main) {} // silence ragel warning
@@ -331,7 +328,7 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
331
328
 
332
329
 
333
330
  // ============================================================
334
- // integer parsing machine
331
+ // integer parsing machine - M suffix will return a BigNum
335
332
  //
336
333
  %%{
337
334
  machine EDN_integer;
@@ -376,17 +373,18 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
376
373
 
377
374
  write data;
378
375
 
379
- action parse_symbol {
376
+ action parse_op_symbol {
380
377
  // parse a symbol including the leading operator (-, +, .)
381
378
  VALUE sym = Qnil;
382
379
  const char *np = parse_symbol(p_save, pe, sym);
383
- if (np == NULL) { fhold; fbreak; } else {
384
- v = Parser::make_edn_symbol(sym);
380
+ if (np == NULL) { fexec pe; } else {
381
+ if (sym != Qnil)
382
+ v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
385
383
  fexec np;
386
384
  }
387
385
  }
388
386
 
389
- action parse_number {
387
+ action parse_op_number {
390
388
  // parse a number with the leading symbol - this is slightly
391
389
  // different than the one within EDN_value since it includes
392
390
  // the leading - or +
@@ -404,24 +402,27 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
404
402
  fbreak;
405
403
  }
406
404
  else {
407
- error(__FUNCTION__, *p);
405
+ error(__FUNCTION__, "number format error", *p);
408
406
  fexec pe;
409
407
  }
410
408
  }
411
409
 
412
- action parse_operator {
410
+ action parse_op {
413
411
  // stand-alone operators (-, +, /, ... etc)
414
412
  char op[2] = { *p_save, 0 };
415
413
  VALUE sym = rb_str_new2(op);
416
- v = Parser::make_edn_symbol(sym);
414
+ v = Parser::make_edn_type(EDNT_MAKE_SYMBOL_METHOD, sym);
417
415
  }
418
416
 
417
+ valid_non_numeric_chars = alpha|operators|':'|'#';
418
+ valid_chars = valid_non_numeric_chars | digit;
419
419
 
420
420
  main := (
421
- ('-'|'+'|'.') alpha >parse_symbol |
422
- ('-'|'+') begin_number >parse_number |
423
- operators ignore* >parse_operator
424
- ) ^(operators|alpha|digit)? @exit;
421
+ ('-'|'+') begin_number >parse_op_number |
422
+ (operators - [\-\+\.]) valid_chars >parse_op_symbol |
423
+ [\-\+\.] valid_non_numeric_chars valid_chars* >parse_op_symbol |
424
+ operators ignore* >parse_op
425
+ ) ^(valid_chars)? @exit;
425
426
  }%%
426
427
 
427
428
 
@@ -437,7 +438,7 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
437
438
  return p;
438
439
  }
439
440
  else if (cs == EDN_operator_error) {
440
- error(__FUNCTION__, *p);
441
+ error(__FUNCTION__, "symbol syntax error", *p);
441
442
  return pe;
442
443
  }
443
444
  else if (cs == EDN_operator_en_main) {} // silence ragel warning
@@ -455,12 +456,13 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
455
456
 
456
457
  write data;
457
458
 
458
- valid_chars = alpha;
459
+ valid_chars = extend;
459
460
 
460
461
 
461
- main := (
462
- begin_char valid_chars+ ignore*
463
- ) (^(valid_chars | '\\')? @exit);
462
+ main := begin_char (
463
+ 'space' | 'newline' | 'tab' | 'return' | 'formfeed' | 'backspace' |
464
+ valid_chars
465
+ ) (ignore* | [\\\]\}\)])? @exit;
464
466
  }%%
465
467
 
466
468
 
@@ -480,7 +482,7 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
480
482
  return p;
481
483
  }
482
484
  else if (cs == EDN_escaped_char_error) {
483
- error(__FUNCTION__, *p);
485
+ error(__FUNCTION__, "unexpected value", *p);
484
486
  return pe;
485
487
  }
486
488
  else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
@@ -502,10 +504,26 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
502
504
 
503
505
  write data;
504
506
 
507
+ symbol_ops_1 = [\.\-\+];
508
+ symbol_ops_2 = [\*!_\?$%&<>\=\'];
509
+ symbol_ops_3 = [:\#];
510
+
511
+ symbol_start = alpha | symbol_ops_1 | symbol_ops_2;
512
+
513
+ symbol_chars = symbol_start | digit | symbol_ops_3;
514
+
515
+ symbol_name = (
516
+ (alpha symbol_chars*) |
517
+ (symbol_ops_1 (symbol_start | symbol_ops_3) symbol_chars*) |
518
+ (symbol_start symbol_chars+) |
519
+ operators{1}
520
+ );
521
+ symbol = '/' | (symbol_name ('/' symbol_name)?);
522
+
505
523
 
506
524
  main := (
507
- operators? symbol
508
- ) ignore* (^(symbol_chars | operators)? @exit);
525
+ symbol
526
+ ) ignore* (^(symbol_chars | '/')? @exit);
509
527
  }%%
510
528
 
511
529
 
@@ -525,8 +543,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
525
543
  return p;
526
544
  }
527
545
  else if (cs == EDN_symbol_error) {
528
- error(__FUNCTION__, *p);
529
- return pe;
546
+ error(__FUNCTION__, "invalid symbol sequence", *p);
530
547
  }
531
548
  else if (cs == EDN_symbol_en_main) {} // silence ragel warning
532
549
  return NULL;
@@ -544,25 +561,48 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
544
561
  machine EDN_sequence_common;
545
562
  include EDN_common;
546
563
 
564
+ action open_seq {
565
+ // sequences store elements in an array, then process it to
566
+ // convert it to a list, set, or map as needed once the
567
+ // sequence end is reached
568
+ elems = rb_ary_new();
569
+ // additionally, metadata for elements in the sequence may be
570
+ // carried so we must push a new level in the metadata stack
571
+ new_meta_list();
572
+ }
573
+
574
+ action close_seq {
575
+ // remove the current metadata level
576
+ del_top_meta_list();
577
+ }
578
+
547
579
  action parse_item {
548
580
  // reads an item within a sequence (vector, list, map, or
549
581
  // set). Regardless of the sequence type, an array of the
550
582
  // items is built. Once done, the sequence parser will convert
551
583
  // if needed
552
584
  VALUE e;
585
+ std::size_t meta_sz = meta_size();
553
586
  const char *np = parse_value(fpc, pe, e);
554
- if (np == NULL) {
555
- fhold; fbreak;
556
- } else {
587
+ if (np == NULL) { fhold; fbreak; } else {
557
588
  // if there's an entry in the discard list, the current
558
589
  // object is not meant to be kept due to a #_ so don't
559
590
  // push it into the list of elements
560
591
  if (!discard.empty()) {
561
592
  discard.pop_back();
562
593
  }
563
- else {
564
- // otherwise we add it to the list of elements for the
565
- // corresponding container
594
+ else if (!meta_empty()) {
595
+ // check if parse_value added metadata
596
+ if (meta_size() == meta_sz) {
597
+ // there's metadata and it didn't increase so
598
+ // parse_value() read an element we care
599
+ // about. Bind the metadata to it and add it to
600
+ // the sequence
601
+ e = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, e, ruby_meta());
602
+ rb_ary_push(elems, e);
603
+ }
604
+ } else {
605
+ // no metadata.. just push it
566
606
  rb_ary_push(elems, e);
567
607
  }
568
608
  fexec np;
@@ -584,10 +624,9 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
584
624
 
585
625
  write data;
586
626
 
587
- main := begin_vector (
588
- ignore* sequence? :>> end_vector
589
- )
590
- @err(close_err) @exit;
627
+ main := begin_vector @open_seq (
628
+ ignore* sequence? :>> end_vector @close_seq
629
+ ) @err(close_err) @exit;
591
630
  }%%
592
631
 
593
632
 
@@ -599,7 +638,7 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
599
638
  static const char* EDN_TYPE = "vector";
600
639
 
601
640
  int cs;
602
- VALUE elems = rb_ary_new(); // will store the vector's elements
641
+ VALUE elems; // will store the vector's elements - allocated in @open_seq
603
642
 
604
643
  %% write init;
605
644
  %% write exec;
@@ -609,7 +648,7 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
609
648
  return p + 1;
610
649
  }
611
650
  else if (cs == EDN_vector_error) {
612
- error(__FUNCTION__, *p);
651
+ error(__FUNCTION__, "vector format error", *p);
613
652
  return pe;
614
653
  }
615
654
  else if (cs == EDN_vector_en_main) {} // silence ragel warning
@@ -629,9 +668,9 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
629
668
 
630
669
  write data;
631
670
 
632
- main := begin_list (
633
- ignore* sequence? :>> end_list
634
- ) @err(close_err) @exit;
671
+ main := begin_list @open_seq (
672
+ ignore* sequence? :>> end_list @close_seq
673
+ ) @err(close_err) @exit;
635
674
  }%%
636
675
 
637
676
  //
@@ -642,13 +681,15 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
642
681
  static const char* EDN_TYPE = "list";
643
682
 
644
683
  int cs;
645
- VALUE elems = rb_ary_new();
684
+ VALUE elems; // stores the list's elements - allocated in @open_seq
646
685
 
647
686
  %% write init;
648
687
  %% write exec;
649
688
 
650
689
  if (cs >= EDN_list_first_final) {
651
690
  v = elems;
691
+ // TODO: replace with this but first figure out why array is not unrolled by EDN::list()
692
+ // v = Parser::make_edn_type(EDNT_MAKE_LIST_METHOD, elems);
652
693
  return p + 1;
653
694
  }
654
695
  else if (cs == EDN_list_error) {
@@ -673,9 +714,9 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
673
714
  write data;
674
715
 
675
716
 
676
- main := begin_map (
677
- ignore* (sequence)? :>> end_map
678
- ) @err(close_err) @exit;
717
+ main := begin_map @open_seq (
718
+ ignore* (sequence)? :>> end_map @close_seq
719
+ ) @err(close_err) @exit;
679
720
  }%%
680
721
 
681
722
 
@@ -685,8 +726,8 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
685
726
 
686
727
  int cs;
687
728
  // since we don't know whether we're looking at a key or value,
688
- // initially store all elements in a list
689
- VALUE elems = rb_ary_new();
729
+ // initially store all elements in an array (allocated in @open_seq)
730
+ VALUE elems;
690
731
 
691
732
  %% write init;
692
733
  %% write exec;
@@ -730,19 +771,19 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
730
771
 
731
772
  write data;
732
773
 
733
- action parse_set {
774
+ action parse_disp_set {
734
775
  // #{ }
735
776
  const char *np = parse_set(fpc, pe, v);
736
777
  if (np == NULL) { fhold; fbreak; } else fexec np;
737
778
  }
738
779
 
739
- action parse_discard {
780
+ action parse_disp_discard {
740
781
  // discard token #_
741
782
  const char *np = parse_discard(fpc, pe);
742
783
  if (np == NULL) { fhold; fbreak; } else fexec np;
743
784
  }
744
785
 
745
- action parse_tagged {
786
+ action parse_disp_tagged {
746
787
  // #inst, #uuid, or #user/tag
747
788
  const char *np = parse_tagged(fpc, pe, v);
748
789
  if (np == NULL) { fhold; fbreak; } else fexec np;
@@ -750,9 +791,9 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
750
791
 
751
792
 
752
793
  main := (
753
- ('{' >parse_set |
754
- '_' >parse_discard |
755
- alpha >parse_tagged)
794
+ ('{' >parse_disp_set |
795
+ '_' >parse_disp_discard |
796
+ alpha >parse_disp_tagged)
756
797
  ) @exit;
757
798
  }%%
758
799
 
@@ -768,7 +809,7 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
768
809
  return p + 1;
769
810
  }
770
811
  else if (cs == EDN_dispatch_error) {
771
- error(__FUNCTION__, *p);
812
+ error(__FUNCTION__, "dispatch extend error", *p);
772
813
  return pe;
773
814
  }
774
815
  else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
@@ -789,9 +830,9 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
789
830
  begin_set = '{';
790
831
  end_set = '}';
791
832
 
792
- main := begin_set (
793
- ignore* sequence? :>> end_set
794
- ) @err(close_err) @exit;
833
+ main := begin_set @open_seq (
834
+ ignore* sequence? :>> end_set @close_seq
835
+ ) @err(close_err) @exit;
795
836
  }%%
796
837
 
797
838
  //
@@ -802,14 +843,14 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
802
843
  static const char* EDN_TYPE = "set";
803
844
 
804
845
  int cs;
805
- VALUE elems = rb_ary_new(); // stored as an array
846
+ VALUE elems; // holds the set's elements as an array allocated in @open_seq
806
847
 
807
848
  %% write init;
808
849
  %% write exec;
809
850
 
810
851
  if (cs >= EDN_set_first_final) {
811
852
  // all elements collected; now convert to a set
812
- v = Parser::make_ruby_set(elems);
853
+ v = Parser::make_edn_type(EDNT_MAKE_SET_METHOD, elems);
813
854
  return p + 1;
814
855
  }
815
856
  else if (cs == EDN_set_error) {
@@ -837,14 +878,12 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
837
878
 
838
879
  action discard_value {
839
880
  const char *np = parse_value(fpc, pe, v);
840
- if (np) {
841
- // this token is to be discard it so store it in the
881
+ if (np == NULL) { fhold; fbreak; } else {
882
+ // this token is to be discarded so store it in the
842
883
  // discard stack - we really don't need to save it so this
843
884
  // could be simplified
844
885
  discard.push_back(v);
845
886
  fexec np;
846
- } else {
847
- fhold; fbreak;
848
887
  }
849
888
  }
850
889
 
@@ -901,24 +940,41 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
901
940
  machine EDN_tagged;
902
941
  include EDN_common;
903
942
 
943
+ write data;
944
+
945
+ tag_symbol_chars_start = alpha;
946
+ tag_symbol_chars_non_numeric = tag_symbol_chars_start | [\.\*!_\?$%&<>\=+\-\'\:\#];
947
+ tag_symbol_chars = tag_symbol_chars_non_numeric | digit;
948
+
949
+ tag_symbol_namespace = tag_symbol_chars_start (tag_symbol_chars)*;
950
+ tag_symbol_name = tag_symbol_chars_non_numeric (tag_symbol_chars)*;
951
+
952
+ tag_symbol = (tag_symbol_namespace ('/' tag_symbol_name)?);
953
+
904
954
  # inst = (string_delim [0-9+\-:\.TZ]* string_delim);
905
955
  # uuid = (string_delim [a-f0-9\-]* string_delim);
906
956
 
907
- write data;
908
-
909
- action parse_symbol {
957
+ action parse_tag {
910
958
  // parses the symbol portion of the pair
911
959
  const char *np = parse_symbol(fpc, pe, sym_name);
912
- if (np == NULL) { fhold; fbreak; } else { fexec np; }
960
+ if (np == NULL) { fhold; fbreak; } else {
961
+ sym_ok = true;
962
+ fexec np;
963
+ }
913
964
  }
914
- action parse_value {
965
+ action parse_data {
915
966
  // parses the value portion
916
967
  const char *np = parse_value(fpc, pe, data);
917
- if (np == NULL) { fhold; fbreak; } else { fexec np; }
968
+ if (np == NULL) { fhold; fbreak; } else {
969
+ data_ok = true;
970
+ fexec np;
971
+ }
918
972
  }
919
973
 
920
-
921
- main := (symbol >parse_symbol ignore* begin_value >parse_value) @exit;
974
+ main := (
975
+ tag_symbol >parse_tag ignore+
976
+ begin_value >parse_data
977
+ ) @exit;
922
978
  }%%
923
979
 
924
980
 
@@ -926,6 +982,8 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
926
982
  {
927
983
  VALUE sym_name = Qnil;
928
984
  VALUE data = Qnil;
985
+ bool sym_ok = false;
986
+ bool data_ok = false;
929
987
 
930
988
  int cs;
931
989
 
@@ -935,20 +993,27 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
935
993
  if (cs >= EDN_tagged_first_final) {
936
994
  //std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << data << std::endl;
937
995
 
996
+ if (!sym_ok || !data_ok) {
997
+ error(__FUNCTION__, "tagged element symbol error", *p);
998
+ v = EDNT_EOF_CONST;
999
+ return NULL;
1000
+ }
1001
+
938
1002
  try {
939
1003
  // tagged_element makes a call to ruby which may throw an
940
1004
  // exception when parsing the data
941
- v = Parser::tagged_element(sym_name, data);
1005
+ v = Parser::make_edn_type(EDNT_TAGGED_ELEM_METHOD, sym_name, data);
1006
+ return p + 1;
942
1007
  } catch (std::exception& e) {
943
1008
  error(__FUNCTION__, e.what());
944
1009
  return pe;
945
1010
  }
946
- return p + 1;
947
1011
  }
948
1012
  else if (cs == EDN_tagged_error) {
949
- return pe;
1013
+ error(__FUNCTION__, "tagged element symbol error", *p);
950
1014
  }
951
1015
  else if (cs == EDN_tagged_en_main) {} // silence ragel warning
1016
+ v = EDNT_EOF_CONST;
952
1017
  return NULL;
953
1018
  }
954
1019
 
@@ -966,13 +1031,13 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
966
1031
 
967
1032
  write data;
968
1033
 
969
- action parse_meta {
1034
+ action parse_data {
970
1035
  const char *np = parse_value(fpc, pe, v);
971
- if (np) { fexec np; } else { fhold; fbreak; }
1036
+ if (np == NULL) { fhold; fbreak; } else { fexec np; }
972
1037
  }
973
1038
 
974
1039
  main := begin_meta (
975
- begin_value >parse_meta
1040
+ begin_value >parse_data
976
1041
  ) @exit;
977
1042
  }%%
978
1043
 
@@ -986,7 +1051,7 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
986
1051
  %% write exec;
987
1052
 
988
1053
  if (cs >= EDN_meta_first_final) {
989
- metadata.push_back(v);
1054
+ append_to_meta(v);
990
1055
  return p + 1;
991
1056
  }
992
1057
  else if (cs == EDN_meta_error) {
@@ -1010,25 +1075,25 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
1010
1075
 
1011
1076
  write data;
1012
1077
 
1013
- action parse_value {
1078
+ action parse_elem {
1014
1079
  // save the count of metadata items before we parse this value
1015
1080
  // so we can determine if we've read another metadata value or
1016
1081
  // an actual data item
1017
- std::size_t meta_size = metadata.size();
1082
+ std::size_t meta_sz = meta_size();
1018
1083
  const char* np = parse_value(fpc, pe, result);
1019
1084
  if (np == NULL) { fexec pe; fbreak; } else {
1020
1085
  // if we have metadata saved and it matches the count we
1021
1086
  // saved before we parsed a value, then we must bind the
1022
1087
  // metadata sequence to it
1023
- if (!metadata.empty() && metadata.size() == meta_size) {
1088
+ if (!meta_empty() && meta_size() == meta_sz) {
1024
1089
  // this will empty the metadata sequence too
1025
- result = bind_meta_to_value(result);
1090
+ result = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, result, ruby_meta());
1026
1091
  }
1027
1092
  fexec np;
1028
1093
  }
1029
1094
  }
1030
1095
 
1031
- element = begin_value >parse_value;
1096
+ element = begin_value >parse_elem;
1032
1097
  next_element = ignore* element;
1033
1098
  sequence = ((element ignore*) (next_element ignore*)*);
1034
1099
 
@@ -1039,16 +1104,15 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
1039
1104
  VALUE edn::Parser::parse(const char* src, std::size_t len)
1040
1105
  {
1041
1106
  int cs;
1042
- VALUE result = Qnil;
1107
+ VALUE result = EDNT_EOF_CONST;
1043
1108
 
1044
1109
  %% write init;
1045
1110
  set_source(src, len);
1046
1111
  %% write exec;
1047
1112
 
1048
1113
  if (cs == EDN_parser_error) {
1049
- if (p)
1050
- error(__FUNCTION__, *p);
1051
- return EDNT_EOF;
1114
+ error(__FUNCTION__, *p);
1115
+ return EDNT_EOF_CONST;
1052
1116
  }
1053
1117
  else if (cs == EDN_parser_first_final) {
1054
1118
  p = pe = eof = NULL;
@@ -1065,50 +1129,52 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
1065
1129
  machine EDN_tokens;
1066
1130
  include EDN_common;
1067
1131
 
1068
- write data nofinal;
1132
+ write data nofinal noerror;
1069
1133
 
1070
- action parse_value {
1134
+ action parse_token {
1071
1135
  // we won't know if we've parsed a discard or a metadata until
1072
1136
  // after parse_value() is done. Save the current number of
1073
1137
  // elements in the metadata sequence; then we can check if it
1074
1138
  // grew or if the discard sequence grew
1075
- meta_size = metadata.size();
1139
+ meta_sz = meta_size();
1076
1140
 
1077
1141
  const char* np = parse_value(fpc, pe, value);
1078
-
1079
1142
  if (np == NULL) { fhold; fbreak; } else {
1080
- if (metadata.size() > 0) {
1081
- // was anotheran additional metadata entry read? if
1082
- // so, don't return a value
1083
- if (metadata.size() > meta_size) {
1084
- is_value = false;
1143
+ if (!meta_empty()) {
1144
+ // was an additional metadata entry read? if so, don't
1145
+ // return a value
1146
+ if (meta_size() > meta_sz) {
1147
+ state = TOKEN_IS_META;
1085
1148
  }
1086
1149
  else {
1087
1150
  // a value was read and there's a pending metadata
1088
1151
  // sequence. Bind them.
1089
- value = bind_meta_to_value(value);
1152
+ value = Parser::make_edn_type(EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
1153
+ state = TOKEN_OK;
1090
1154
  }
1091
1155
  } else if (!discard.empty()) {
1092
1156
  // a discard read. Don't return a value
1093
- is_value = false;
1157
+ state = TOKEN_IS_DISCARD;
1158
+ } else {
1159
+ state = TOKEN_OK;
1094
1160
  }
1095
1161
  fexec np;
1096
1162
  }
1097
1163
  }
1098
1164
 
1099
- main := ignore* begin_value >parse_value ignore*;
1165
+ main := ignore* begin_value >parse_token ignore*;
1100
1166
  }%%
1101
1167
 
1102
1168
 
1103
1169
  //
1104
1170
  //
1105
- bool edn::Parser::parse_next(VALUE& value)
1171
+ edn::Parser::eTokenState edn::Parser::parse_next(VALUE& value)
1106
1172
  {
1107
1173
  int cs;
1108
- bool is_value = true;
1174
+ eTokenState state = TOKEN_ERROR;
1109
1175
  // need to track metadada read and bind it to the next value read
1110
1176
  // - but must account for sequences of metadata values
1111
- std::size_t meta_size;
1177
+ std::size_t meta_sz;
1112
1178
 
1113
1179
  // clear any previously saved discards; only track if read during
1114
1180
  // this op
@@ -1117,12 +1183,8 @@ bool edn::Parser::parse_next(VALUE& value)
1117
1183
  %% write init;
1118
1184
  %% write exec;
1119
1185
 
1120
- if (cs == EDN_parser_error) {
1121
- value = EDNT_EOF;
1122
- }
1123
- else if (cs == EDN_tokens_en_main) {} // silence ragel warning
1124
-
1125
- return is_value;
1186
+ if (cs == EDN_tokens_en_main) {} // silence ragel warning
1187
+ return state;
1126
1188
  }
1127
1189
 
1128
1190