edn_turbo 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,43 +41,43 @@
41
41
  //
42
42
 
43
43
  %%{
44
- machine EDN_common;
45
-
46
- cr = '\n';
47
- counter = ( cr @{ line_number++; } );
48
- cr_neg = [^\n];
49
- ws = [\t\v\f\r ] | ',' | counter;
50
- comment = ';' cr_neg* counter;
51
- ignore = ws | comment;
52
-
53
- operators = [/\.\*!_\?$%&<>\=+\-\'];
54
-
55
- begin_dispatch = '#';
56
- begin_keyword = ':';
57
- begin_char = '\\';
58
- begin_vector = '[';
59
- begin_map = '{';
60
- begin_list = '(';
61
- begin_meta = '^';
62
- string_delim = '"';
63
- begin_number = digit;
64
- begin_value = alnum | [:\"\{\[\(\\\#^] | operators;
65
- begin_symbol = alpha;
66
-
67
- # int / decimal rules
68
- integer = ('0' | [1-9] digit*);
69
- exp = ([Ee] [+\-]? digit+);
70
-
71
-
72
- # common actions
73
- action close_err {
74
- std::stringstream s;
75
- s << "unterminated " << EDN_TYPE;
76
- error(__FUNCTION__, s.str());
77
- fhold; fbreak;
78
- }
79
-
80
- action exit { fhold; fbreak; }
44
+ machine EDN_common;
45
+
46
+ cr = '\n';
47
+ counter = ( cr @{ line_number++; } );
48
+ cr_neg = [^\n];
49
+ ws = [\t\v\f\r ] | ',' | counter;
50
+ comment = ';' cr_neg* counter;
51
+ ignore = ws | comment;
52
+
53
+ operators = [/\.\*!_\?$%&<>\=+\-\'];
54
+
55
+ begin_dispatch = '#';
56
+ begin_keyword = ':';
57
+ begin_char = '\\';
58
+ begin_vector = '[';
59
+ begin_map = '{';
60
+ begin_list = '(';
61
+ begin_meta = '^';
62
+ string_delim = '"';
63
+ begin_number = digit;
64
+ begin_value = alnum | [:\"\{\[\(\\\#^] | operators;
65
+ begin_symbol = alpha;
66
+
67
+ # int / decimal rules
68
+ integer = ('0' | [1-9] digit*);
69
+ exp = ([Ee] [+\-]? digit+);
70
+
71
+
72
+ # common actions
73
+ action close_err {
74
+ std::stringstream s;
75
+ s << "unterminated " << EDN_TYPE;
76
+ error(__FUNCTION__, s.str());
77
+ fhold; fbreak;
78
+ }
79
+
80
+ action exit { fhold; fbreak; }
81
81
  }%%
82
82
 
83
83
  // ============================================================
@@ -85,135 +85,140 @@
85
85
  //
86
86
 
87
87
  %%{
88
- machine EDN_value;
89
- include EDN_common;
90
-
91
- write data;
92
-
93
- action parse_val_string {
94
- // string types within double-quotes
95
- const char *np = parse_string(fpc, pe, v);
96
- if (np == nullptr) { fhold; fbreak; } else fexec np;
97
- }
98
-
99
- action parse_val_keyword {
100
- // tokens with a leading ':'
101
- const char *np = parse_keyword(fpc, pe, v);
102
- if (np == nullptr) { fhold; fbreak; } else fexec np;
103
- }
104
-
105
- action parse_val_number {
106
- // tokens w/ leading digits: non-negative integers & decimals.
107
- // try to parse a decimal first
108
- const char *np = parse_decimal(fpc, pe, v);
109
- if (np == nullptr) {
110
- // if we can't, try to parse it as an int
88
+ machine EDN_value;
89
+ include EDN_common;
90
+
91
+ write data;
92
+
93
+ action parse_val_string {
94
+ // string types within double-quotes
95
+ const char *np = parse_string(fpc, pe, v);
96
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
97
+ }
98
+
99
+ action parse_val_keyword {
100
+ // tokens with a leading ':'
101
+ const char *np = parse_keyword(fpc, pe, v);
102
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
103
+ }
104
+
105
+ action parse_val_number {
106
+ // tokens w/ leading digits: non-negative integers & decimals.
107
+ // try to parse a decimal first
108
+ const char *np = parse_decimal(fpc, pe, v);
109
+ if (np == nullptr) {
110
+ // if we can't, try to parse it as a ratio
111
+ np = parse_ratio(fpc, pe, v);
112
+
113
+ // otherwise, an int
114
+ if (np == nullptr) {
111
115
  np = parse_integer(fpc, pe, v);
112
- }
113
-
114
- if (np) {
115
- fexec np;
116
- fhold;
117
- fbreak;
118
- }
119
- else {
120
- error(__FUNCTION__, "number format error", *p);
121
- fexec pe;
122
- }
123
- }
124
-
125
- action parse_val_operator {
126
- // stand-alone operators *, +, -, etc.
127
- const char *np = parse_operator(fpc, pe, v);
128
- if (np == nullptr) { fhold; fbreak; } else fexec np;
129
- }
130
-
131
- action parse_val_char {
132
- // tokens w/ leading \ (escaped characters \newline, \c, etc.)
133
- const char *np = parse_esc_char(fpc, pe, v);
134
- if (np == nullptr) { fhold; fbreak; } else fexec np;
135
- }
136
-
137
- action parse_val_symbol {
138
- // user identifiers and reserved keywords (true, false, nil)
139
- VALUE sym = Qnil;
140
- const char *np = parse_symbol(fpc, pe, sym);
141
- if (np == nullptr) { fexec pe; } else {
142
- // parse_symbol will make 'sym' a ruby string
143
- if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
144
- else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
145
- else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
146
- else {
147
- v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
148
- }
149
- fexec np;
150
- }
151
- }
152
-
153
- action parse_val_vector {
154
- // [
155
- const char *np = parse_vector(fpc, pe, v);
156
- if (np == nullptr) { fhold; fbreak; } else fexec np;
157
- }
158
-
159
- action parse_val_list {
160
- // (
161
- const char *np = parse_list(fpc, pe, v);
162
- if (np == nullptr) { fhold; fbreak; } else fexec np;
163
- }
164
-
165
- action parse_val_map {
166
- // {
167
- const char *np = parse_map(fpc, pe, v);
168
- if (np == nullptr) { fhold; fbreak; } else fexec np;
169
- }
170
-
171
- action parse_val_meta {
172
- // ^
173
- const char *np = parse_meta(fpc, pe);
174
- if (np == nullptr) { fhold; fbreak; } else fexec np;
175
- }
176
-
177
- action parse_val_dispatch {
178
- // handles tokens w/ leading # ("#_", "#{", and tagged elems)
179
- const char *np = parse_dispatch(fpc + 1, pe, v);
180
- if (np == nullptr) { fhold; fbreak; } else fexec np;
181
- }
182
-
183
-
184
- main := (
185
- string_delim >parse_val_string |
186
- begin_keyword >parse_val_keyword |
187
- begin_number >parse_val_number |
188
- operators >parse_val_operator |
189
- begin_char >parse_val_char |
190
- begin_symbol >parse_val_symbol |
191
- begin_vector >parse_val_vector |
192
- begin_list >parse_val_list |
193
- begin_map >parse_val_map |
194
- begin_meta >parse_val_meta |
195
- begin_dispatch >parse_val_dispatch
196
- ) %*exit;
116
+ }
117
+ }
118
+
119
+ if (np) {
120
+ fexec np;
121
+ fhold;
122
+ fbreak;
123
+ }
124
+ else {
125
+ error(__FUNCTION__, "number format error", *p);
126
+ fexec pe;
127
+ }
128
+ }
129
+
130
+ action parse_val_operator {
131
+ // stand-alone operators *, +, -, etc.
132
+ const char *np = parse_operator(fpc, pe, v);
133
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
134
+ }
135
+
136
+ action parse_val_char {
137
+ // tokens w/ leading \ (escaped characters \newline, \c, etc.)
138
+ const char *np = parse_esc_char(fpc, pe, v);
139
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
140
+ }
141
+
142
+ action parse_val_symbol {
143
+ // user identifiers and reserved keywords (true, false, nil)
144
+ VALUE sym = Qnil;
145
+ const char *np = parse_symbol(fpc, pe, sym);
146
+ if (np == nullptr) { fexec pe; } else {
147
+ // parse_symbol will make 'sym' a ruby string
148
+ if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
149
+ else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
150
+ else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
151
+ else {
152
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
153
+ }
154
+ fexec np;
155
+ }
156
+ }
157
+
158
+ action parse_val_vector {
159
+ // [
160
+ const char *np = parse_vector(fpc, pe, v);
161
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
162
+ }
163
+
164
+ action parse_val_list {
165
+ // (
166
+ const char *np = parse_list(fpc, pe, v);
167
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
168
+ }
169
+
170
+ action parse_val_map {
171
+ // {
172
+ const char *np = parse_map(fpc, pe, v);
173
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
174
+ }
175
+
176
+ action parse_val_meta {
177
+ // ^
178
+ const char *np = parse_meta(fpc, pe);
179
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
180
+ }
181
+
182
+ action parse_val_dispatch {
183
+ // handles tokens w/ leading # ("#_", "#{", and tagged elems)
184
+ const char *np = parse_dispatch(fpc + 1, pe, v);
185
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
186
+ }
187
+
188
+
189
+ main := (
190
+ string_delim >parse_val_string |
191
+ begin_keyword >parse_val_keyword |
192
+ begin_number >parse_val_number |
193
+ operators >parse_val_operator |
194
+ begin_char >parse_val_char |
195
+ begin_symbol >parse_val_symbol |
196
+ begin_vector >parse_val_vector |
197
+ begin_list >parse_val_list |
198
+ begin_map >parse_val_map |
199
+ begin_meta >parse_val_meta |
200
+ begin_dispatch >parse_val_dispatch
201
+ ) %*exit;
197
202
  }%%
198
203
 
199
204
 
200
205
  const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
201
206
  {
202
- // std::cerr << __FUNCTION__ << "() p: \"" << p << "\"" << std::endl;
203
- int cs;
204
-
205
- %% write init;
206
- %% write exec;
207
-
208
- if (cs >= EDN_value_first_final) {
209
- return p;
210
- }
211
- else if (cs == EDN_value_error) {
212
- error(__FUNCTION__, "token error", *p);
213
- return pe;
214
- }
215
- else if (cs == EDN_value_en_main) {} // silence ragel warning
216
- return nullptr;
207
+ // std::cerr << __FUNCTION__ << "() p: \"" << p << "\"" << std::endl;
208
+ int cs;
209
+
210
+ %% write init;
211
+ %% write exec;
212
+
213
+ if (cs >= EDN_value_first_final) {
214
+ return p;
215
+ }
216
+ else if (cs == EDN_value_error) {
217
+ error(__FUNCTION__, "token error", *p);
218
+ return pe;
219
+ }
220
+ else if (cs == EDN_value_en_main) {} // silence ragel warning
221
+ return nullptr;
217
222
  }
218
223
 
219
224
 
@@ -225,51 +230,51 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
225
230
  // ascii range is found.
226
231
  //
227
232
  %%{
228
- machine EDN_string;
229
- include EDN_common;
230
-
231
- write data;
232
-
233
- action parse_chars {
234
- if (edn::util::parse_byte_stream(p_save + 1, p, v, encode)) {
235
- fexec p + 1;
236
- } else {
237
- fhold; fbreak;
238
- }
239
- }
240
-
241
- action mark_for_encoding {
242
- encode = true;
243
- }
244
-
245
- main := string_delim (
246
- (^([\"\\] | 0xc2..0xf5) |
247
- ((0xc2..0xf5) |
248
- '\\'[\"\\/bfnrt] |
249
- '\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
250
- '\\'^([\"\\/bfnrtu]))* %parse_chars
251
- ) :>> string_delim @err(close_err) @exit;
233
+ machine EDN_string;
234
+ include EDN_common;
235
+
236
+ write data;
237
+
238
+ action parse_chars {
239
+ if (edn::util::parse_byte_stream(p_save + 1, p, v, encode)) {
240
+ fexec p + 1;
241
+ } else {
242
+ fhold; fbreak;
243
+ }
244
+ }
245
+
246
+ action mark_for_encoding {
247
+ encode = true;
248
+ }
249
+
250
+ main := string_delim (
251
+ (^([\"\\] | 0xc2..0xf5) |
252
+ ((0xc2..0xf5) |
253
+ '\\'[\"\\/bfnrt] |
254
+ '\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
255
+ '\\'^([\"\\/bfnrtu]))* %parse_chars
256
+ ) :>> string_delim @err(close_err) @exit;
252
257
  }%%
253
258
 
254
259
 
255
260
  const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
256
261
  {
257
- static const char* EDN_TYPE = "string";
258
- int cs;
259
- bool encode = false;
260
-
261
- %% write init;
262
- const char* p_save = p;
263
- %% write exec;
264
-
265
- if (cs >= EDN_string_first_final) {
266
- return p + 1;
267
- }
268
- else if (cs == EDN_string_error) {
269
- return pe;
270
- }
271
- else if (cs == EDN_string_en_main) {} // silence ragel warning
272
- return nullptr;
262
+ static const char* EDN_TYPE = "string";
263
+ int cs;
264
+ bool encode = false;
265
+
266
+ %% write init;
267
+ const char* p_save = p;
268
+ %% write exec;
269
+
270
+ if (cs >= EDN_string_first_final) {
271
+ return p + 1;
272
+ }
273
+ else if (cs == EDN_string_error) {
274
+ return pe;
275
+ }
276
+ else if (cs == EDN_string_en_main) {} // silence ragel warning
277
+ return nullptr;
273
278
  }
274
279
 
275
280
 
@@ -278,79 +283,78 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
278
283
  // keyword parsing
279
284
  //
280
285
  %%{
281
- machine EDN_keyword;
282
- include EDN_common;
286
+ machine EDN_keyword;
287
+ include EDN_common;
283
288
 
284
- keyword_start = alpha | [\.\*!_\?$%&<>\=+\-\'\#];
285
- keyword_chars = (keyword_start | digit | ':');
289
+ keyword_start = alpha | [\.\*!_\?$%&<>\=+\-\'\#];
290
+ keyword_chars = (keyword_start | digit | ':');
286
291
 
287
- keyword_name = keyword_start keyword_chars*;
288
- keyword = keyword_name ('/' keyword_chars*)?;
292
+ keyword_name = keyword_start keyword_chars*;
293
+ keyword = keyword_name ('/' keyword_chars*)?;
289
294
 
290
- write data;
295
+ write data;
291
296
 
292
297
 
293
- main := begin_keyword keyword (^(keyword_chars | '/')? @exit);
298
+ main := begin_keyword keyword (^(keyword_chars | '/')? @exit);
294
299
  }%%
295
300
 
296
301
 
297
302
  const char* edn::Parser::parse_keyword(const char *p, const char *pe, VALUE& v)
298
303
  {
299
- int cs;
300
-
301
- %% write init;
302
- const char* p_save = p;
303
- %% write exec;
304
-
305
- if (cs >= EDN_keyword_first_final) {
306
- std::string buf;
307
- uintmax_t len = p - p_save;
308
- // don't include leading ':' because the ruby symbol will handle it
309
- buf.append(p_save + 1, len - 1);
310
- v = ID2SYM(rb_intern(buf.c_str()));
311
- return p;
312
- }
313
- else if (cs == EDN_keyword_error) {
314
- error(__FUNCTION__, "invalid keyword", *p);
315
- return pe;
316
- }
317
- else if (cs == EDN_keyword_en_main) {} // silence ragel warning
318
- return nullptr;
304
+ int cs;
305
+
306
+ %% write init;
307
+ const char* p_save = p;
308
+ %% write exec;
309
+
310
+ if (cs >= EDN_keyword_first_final) {
311
+ std::string buf;
312
+ uintmax_t len = p - p_save;
313
+ // don't include leading ':' because the ruby symbol will handle it
314
+ buf.append(p_save + 1, len - 1);
315
+ v = ID2SYM(rb_intern(buf.c_str()));
316
+ return p;
317
+ }
318
+ else if (cs == EDN_keyword_error) {
319
+ error(__FUNCTION__, "invalid keyword", *p);
320
+ return pe;
321
+ }
322
+ else if (cs == EDN_keyword_en_main) {} // silence ragel warning
323
+ return nullptr;
319
324
  }
320
325
 
321
326
 
322
-
323
327
  // ============================================================
324
328
  // decimal parsing machine
325
329
  //
326
330
  %%{
327
- machine EDN_decimal;
328
- include EDN_common;
331
+ machine EDN_decimal;
332
+ include EDN_common;
329
333
 
330
- write data noerror;
334
+ write data noerror;
331
335
 
332
336
 
333
- main := ('-'|'+')? (
334
- (integer '.' digit* (exp? [M]?)) |
335
- (integer exp)
336
- ) (^[0-9Ee.+\-M]? @exit );
337
+ main := ('-'|'+')? (
338
+ (integer '.' digit* (exp? [M]?)) |
339
+ (integer exp)
340
+ ) (^[0-9Ee.+\-M]? @exit );
337
341
  }%%
338
342
 
339
343
 
340
344
  const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
341
345
  {
342
- int cs;
343
-
344
- %% write init;
345
- const char* p_save = p;
346
- %% write exec;
347
-
348
- if (cs >= EDN_decimal_first_final) {
349
- v = edn::util::float_to_ruby(p_save, p - p_save);
350
- return p + 1;
351
- }
352
- else if (cs == EDN_decimal_en_main) {} // silence ragel warning
353
- return nullptr;
346
+ int cs;
347
+
348
+ %% write init;
349
+ const char* p_save = p;
350
+ %% write exec;
351
+
352
+ if (cs >= EDN_decimal_first_final) {
353
+ v = edn::util::float_to_ruby(p_save, p - p_save);
354
+ return p + 1;
355
+ }
356
+ else if (cs == EDN_decimal_en_main) {} // silence ragel warning
357
+ return nullptr;
354
358
  }
355
359
 
356
360
 
@@ -358,34 +362,65 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
358
362
  // integer parsing machine - M suffix will return a BigNum
359
363
  //
360
364
  %%{
361
- machine EDN_integer;
362
- include EDN_common;
365
+ machine EDN_integer;
366
+ include EDN_common;
363
367
 
364
- write data noerror;
368
+ write data noerror;
365
369
 
366
370
 
367
- main := (
368
- ('-'|'+')? (integer [MN]?)
369
- ) (^[0-9MN+\-]? @exit);
371
+ main := (
372
+ ('-'|'+')? (integer [MN]?)
373
+ ) (^[0-9MN+\-]? @exit);
370
374
  }%%
371
375
 
372
376
  const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
373
377
  {
374
- int cs;
375
-
376
- %% write init;
377
- const char* p_save = p;
378
- %% write exec;
379
-
380
- if (cs >= EDN_integer_first_final) {
381
- v = edn::util::integer_to_ruby(p_save, p - p_save);
382
- return p + 1;
383
- }
384
- else if (cs == EDN_integer_en_main) {} // silence ragel warning
385
- return nullptr;
378
+ int cs;
379
+
380
+ %% write init;
381
+ const char* p_save = p;
382
+ %% write exec;
383
+
384
+ if (cs >= EDN_integer_first_final) {
385
+ v = edn::util::integer_to_ruby(p_save, p - p_save);
386
+ return p + 1;
387
+ }
388
+ else if (cs == EDN_integer_en_main) {} // silence ragel warning
389
+ return nullptr;
386
390
  }
387
391
 
388
392
 
393
+ // ============================================================
394
+ // ratio parsing machine
395
+ //
396
+ %%{
397
+ machine EDN_ratio;
398
+ include EDN_common;
399
+
400
+ write data noerror;
401
+
402
+
403
+ main := (
404
+ ('-'|'+')? (integer '/' integer)
405
+ ) (^[0-9+\-\/]? @exit);
406
+ }%%
407
+
408
+
409
+ const char* edn::Parser::parse_ratio(const char *p, const char *pe, VALUE& v)
410
+ {
411
+ int cs;
412
+
413
+ %% write init;
414
+ const char* p_save = p;
415
+ %% write exec;
416
+
417
+ if (cs >= EDN_ratio_first_final) {
418
+ v = edn::util::ratio_to_ruby(p_save, p - p_save);
419
+ return p + 1;
420
+ }
421
+ else if (cs == EDN_ratio_en_main) {} // silence ragel warning
422
+ return nullptr;
423
+ }
389
424
 
390
425
  // ============================================================
391
426
  // operator parsing - handles tokens w/ a leading operator:
@@ -395,81 +430,86 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
395
430
  // 3. stand-alone operators: +, -, /, *, etc.
396
431
  //
397
432
  %%{
398
- machine EDN_operator;
399
- include EDN_common;
400
-
401
- write data;
402
-
403
- action parse_op_symbol {
404
- // parse a symbol including the leading operator (-, +, .)
405
- VALUE sym = Qnil;
406
- const char *np = parse_symbol(p_save, pe, sym);
407
- if (np == nullptr) { fexec pe; } else {
408
- if (sym != Qnil)
409
- v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
410
- fexec np;
411
- }
412
- }
413
-
414
- action parse_op_number {
415
- // parse a number with the leading symbol - this is slightly
416
- // different than the one within EDN_value since it includes
417
- // the leading - or +
418
- //
419
- // try to parse a decimal first
420
- const char *np = parse_decimal(p_save, pe, v);
421
- if (np == nullptr) {
422
- // if we can't, try to parse it as an int
433
+ machine EDN_operator;
434
+ include EDN_common;
435
+
436
+ write data;
437
+
438
+ action parse_op_symbol {
439
+ // parse a symbol including the leading operator (-, +, .)
440
+ VALUE sym = Qnil;
441
+ const char *np = parse_symbol(p_save, pe, sym);
442
+ if (np == nullptr) { fexec pe; } else {
443
+ if (sym != Qnil)
444
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
445
+ fexec np;
446
+ }
447
+ }
448
+
449
+ action parse_op_number {
450
+ // parse a number with the leading symbol - this is slightly
451
+ // different than the one within EDN_value since it includes
452
+ // the leading - or +
453
+ //
454
+ // try to parse a decimal first
455
+ const char *np = parse_decimal(p_save, pe, v);
456
+ if (np == nullptr) {
457
+ // if we can't, try to parse it as a ratio
458
+ np = parse_ratio(p_save, pe, v);
459
+
460
+ if (np == nullptr) {
461
+ // again, if we can't, try to parse it as an int
423
462
  np = parse_integer(p_save, pe, v);
424
- }
425
-
426
- if (np) {
427
- fexec np;
428
- fhold;
429
- fbreak;
430
- }
431
- else {
432
- error(__FUNCTION__, "number format error", *p);
433
- fexec pe;
434
- }
435
- }
436
-
437
- action parse_op {
438
- // stand-alone operators (-, +, /, ... etc)
439
- char op[2] = { *p_save, 0 };
440
- VALUE sym = rb_str_new2(op);
441
- v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
442
- }
443
-
444
- valid_non_numeric_chars = alpha|operators|':'|'#';
445
- valid_chars = valid_non_numeric_chars | digit;
446
-
447
- main := (
448
- ('-'|'+') begin_number >parse_op_number |
449
- (operators - [\-\+\.]) valid_chars >parse_op_symbol |
450
- [\-\+\.] valid_non_numeric_chars valid_chars* >parse_op_symbol |
451
- operators ignore* >parse_op
452
- ) ^(valid_chars)? @exit;
463
+ }
464
+ }
465
+
466
+ if (np) {
467
+ fexec np;
468
+ fhold;
469
+ fbreak;
470
+ }
471
+ else {
472
+ error(__FUNCTION__, "number format error", *p);
473
+ fexec pe;
474
+ }
475
+ }
476
+
477
+ action parse_op {
478
+ // stand-alone operators (-, +, /, ... etc)
479
+ char op[2] = { *p_save, 0 };
480
+ VALUE sym = rb_str_new2(op);
481
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
482
+ }
483
+
484
+ valid_non_numeric_chars = alpha|operators|':'|'#';
485
+ valid_chars = valid_non_numeric_chars | digit;
486
+
487
+ main := (
488
+ ('-'|'+') begin_number >parse_op_number |
489
+ (operators - [\-\+\.]) valid_chars >parse_op_symbol |
490
+ [\-\+\.] valid_non_numeric_chars valid_chars* >parse_op_symbol |
491
+ operators ignore* >parse_op
492
+ ) ^(valid_chars)? @exit;
453
493
  }%%
454
494
 
455
495
 
456
496
  const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
457
497
  {
458
- int cs;
459
-
460
- %% write init;
461
- const char* p_save = p;
462
- %% write exec;
463
-
464
- if (cs >= EDN_operator_first_final) {
465
- return p;
466
- }
467
- else if (cs == EDN_operator_error) {
468
- error(__FUNCTION__, "symbol syntax error", *p);
469
- return pe;
470
- }
471
- else if (cs == EDN_operator_en_main) {} // silence ragel warning
472
- return nullptr;
498
+ int cs;
499
+
500
+ %% write init;
501
+ const char* p_save = p;
502
+ %% write exec;
503
+
504
+ if (cs >= EDN_operator_first_final) {
505
+ return p;
506
+ }
507
+ else if (cs == EDN_operator_error) {
508
+ error(__FUNCTION__, "symbol syntax error", *p);
509
+ return pe;
510
+ }
511
+ else if (cs == EDN_operator_en_main) {} // silence ragel warning
512
+ return nullptr;
473
513
  }
474
514
 
475
515
 
@@ -478,47 +518,46 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
478
518
  // escaped char parsing - handles \c, \newline, \formfeed, etc.
479
519
  //
480
520
  %%{
481
- machine EDN_escaped_char;
482
- include EDN_common;
521
+ machine EDN_escaped_char;
522
+ include EDN_common;
483
523
 
484
- write data;
524
+ write data;
485
525
 
486
- valid_chars = extend;
526
+ valid_chars = extend;
487
527
 
488
528
 
489
- main := begin_char (
490
- 'space' | 'newline' | 'tab' | 'return' | 'formfeed' | 'backspace' |
491
- valid_chars
492
- ) (ignore* | [\\\]\}\)])? @exit;
529
+ main := begin_char (
530
+ 'space' | 'newline' | 'tab' | 'return' | 'formfeed' | 'backspace' |
531
+ valid_chars
532
+ ) (ignore* | [\\\]\}\)])? @exit;
493
533
  }%%
494
534
 
495
535
 
496
536
  const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
497
537
  {
498
- int cs;
499
-
500
- %% write init;
501
- const char* p_save = p;
502
- %% write exec;
503
-
504
- if (cs >= EDN_escaped_char_first_final) {
505
- // convert the escaped value to a character
506
- if (!edn::util::parse_escaped_char(p_save + 1, p, v)) {
507
- return pe;
508
- }
509
- return p;
510
- }
511
- else if (cs == EDN_escaped_char_error) {
512
- error(__FUNCTION__, "unexpected value", *p);
513
- return pe;
514
- }
515
- else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
516
- return nullptr;
538
+ int cs;
539
+
540
+ %% write init;
541
+ const char* p_save = p;
542
+ %% write exec;
543
+
544
+ if (cs >= EDN_escaped_char_first_final) {
545
+ // convert the escaped value to a character
546
+ if (!edn::util::parse_escaped_char(p_save + 1, p, v)) {
547
+ return pe;
548
+ }
549
+ return p;
550
+ }
551
+ else if (cs == EDN_escaped_char_error) {
552
+ error(__FUNCTION__, "unexpected value", *p);
553
+ return pe;
554
+ }
555
+ else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
556
+ return nullptr;
517
557
  }
518
558
 
519
559
 
520
560
 
521
-
522
561
  // ============================================================
523
562
  // symbol parsing - handles identifiers that begin with an alpha
524
563
  // character and an optional leading operator (name, -today,
@@ -526,58 +565,57 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
526
565
  //
527
566
  //
528
567
  %%{
529
- machine EDN_symbol;
530
- include EDN_common;
568
+ machine EDN_symbol;
569
+ include EDN_common;
531
570
 
532
- write data;
571
+ write data;
533
572
 
534
- symbol_ops_1 = [\.\-\+];
535
- symbol_ops_2 = [\*!_\?$%&<>\=\'];
536
- symbol_ops_3 = [:\#];
573
+ symbol_ops_1 = [\.\-\+];
574
+ symbol_ops_2 = [\*!_\?$%&<>\=\'];
575
+ symbol_ops_3 = [:\#];
537
576
 
538
- symbol_start = alpha | symbol_ops_1 | symbol_ops_2;
577
+ symbol_start = alpha | symbol_ops_1 | symbol_ops_2;
539
578
 
540
- symbol_chars = symbol_start | digit | symbol_ops_3;
579
+ symbol_chars = symbol_start | digit | symbol_ops_3;
541
580
 
542
- symbol_name = (
543
- (alpha symbol_chars*) |
544
- (symbol_ops_1 (symbol_start | symbol_ops_3) symbol_chars*) |
545
- (symbol_start symbol_chars+) |
546
- operators{1}
547
- );
548
- symbol = '/' | (symbol_name ('/' symbol_name)?);
581
+ symbol_name = (
582
+ (alpha symbol_chars*) |
583
+ (symbol_ops_1 (symbol_start | symbol_ops_3) symbol_chars*) |
584
+ (symbol_start symbol_chars+) |
585
+ operators{1}
586
+ );
587
+ symbol = '/' | (symbol_name ('/' symbol_name)?);
549
588
 
550
589
 
551
- main := (
552
- symbol
553
- ) ignore* (^(symbol_chars | '/')? @exit);
590
+ main := (
591
+ symbol
592
+ ) ignore* (^(symbol_chars | '/')? @exit);
554
593
  }%%
555
594
 
556
595
 
557
596
  const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
558
597
  {
559
- int cs;
560
-
561
- %% write init;
562
- const char* p_save = p;
563
- %% write exec;
564
-
565
- if (cs >= EDN_symbol_first_final) {
566
- // copy the symbol text
567
- if (s == Qnil)
568
- s = rb_str_new2("");
569
- rb_str_cat(s, p_save, p - p_save);
570
- return p;
571
- }
572
- else if (cs == EDN_symbol_error) {
573
- error(__FUNCTION__, "invalid symbol sequence", *p);
574
- }
575
- else if (cs == EDN_symbol_en_main) {} // silence ragel warning
576
- return nullptr;
598
+ int cs;
599
+
600
+ %% write init;
601
+ const char* p_save = p;
602
+ %% write exec;
603
+
604
+ if (cs >= EDN_symbol_first_final) {
605
+ // copy the symbol text
606
+ if (s == Qnil)
607
+ s = rb_str_new2("");
608
+ rb_str_cat(s, p_save, p - p_save);
609
+ return p;
610
+ }
611
+ else if (cs == EDN_symbol_error) {
612
+ error(__FUNCTION__, "invalid symbol sequence", *p);
613
+ }
614
+ else if (cs == EDN_symbol_en_main) {} // silence ragel warning
615
+ return nullptr;
577
616
  }
578
617
 
579
618
 
580
-
581
619
  // ============================================================
582
620
  // EDN_sequence_common is used to parse EDN containers - elements are
583
621
  // initially stored in an array and then the final corresponding
@@ -585,75 +623,75 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
585
623
  // sets the same array is used)
586
624
  //
587
625
  %%{
588
- machine EDN_sequence_common;
589
- include EDN_common;
590
-
591
- action open_seq {
592
- // sequences store elements in an array, then process it to
593
- // convert it to a list, set, or map as needed once the
594
- // sequence end is reached
595
- elems = rb_ary_new();
596
- // additionally, metadata for elements in the sequence may be
597
- // carried so we must push a new level in the metadata stack
598
- new_meta_list();
599
- }
600
-
601
- action close_seq {
602
- // remove the current metadata level
603
- del_top_meta_list();
604
- }
605
-
606
- action parse_item {
607
- // reads an item within a sequence (vector, list, map, or
608
- // set). Regardless of the sequence type, an array of the
609
- // items is built. Once done, the sequence parser will convert
610
- // if needed
611
- VALUE e;
612
- std::size_t meta_sz = meta_size();
613
- const char *np = parse_value(fpc, pe, e);
614
- if (np == nullptr) { fhold; fbreak; } else {
615
- // if there's an entry in the discard list, the current
616
- // object is not meant to be kept due to a #_ so don't
617
- // push it into the list of elements
618
- if (!discard.empty()) {
619
- discard.pop_back();
626
+ machine EDN_sequence_common;
627
+ include EDN_common;
628
+
629
+ action open_seq {
630
+ // sequences store elements in an array, then process it to
631
+ // convert it to a list, set, or map as needed once the
632
+ // sequence end is reached
633
+ elems = rb_ary_new();
634
+ // additionally, metadata for elements in the sequence may be
635
+ // carried so we must push a new level in the metadata stack
636
+ new_meta_list();
637
+ }
638
+
639
+ action close_seq {
640
+ // remove the current metadata level
641
+ del_top_meta_list();
642
+ }
643
+
644
+ action parse_item {
645
+ // reads an item within a sequence (vector, list, map, or
646
+ // set). Regardless of the sequence type, an array of the
647
+ // items is built. Once done, the sequence parser will convert
648
+ // if needed
649
+ VALUE e;
650
+ std::size_t meta_sz = meta_size();
651
+ const char *np = parse_value(fpc, pe, e);
652
+ if (np == nullptr) { fhold; fbreak; } else {
653
+ // if there's an entry in the discard list, the current
654
+ // object is not meant to be kept due to a #_ so don't
655
+ // push it into the list of elements
656
+ if (!discard.empty()) {
657
+ discard.pop_back();
658
+ }
659
+ else if (!meta_empty()) {
660
+ // check if parse_value added metadata
661
+ if (meta_size() == meta_sz) {
662
+ // there's metadata and it didn't increase so
663
+ // parse_value() read an element we care
664
+ // about. Bind the metadata to it and add it to
665
+ // the sequence
666
+ e = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, e, ruby_meta());
667
+ rb_ary_push(elems, e);
620
668
  }
621
- else if (!meta_empty()) {
622
- // check if parse_value added metadata
623
- if (meta_size() == meta_sz) {
624
- // there's metadata and it didn't increase so
625
- // parse_value() read an element we care
626
- // about. Bind the metadata to it and add it to
627
- // the sequence
628
- e = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, e, ruby_meta());
629
- rb_ary_push(elems, e);
630
- }
631
- } else {
632
- // no metadata.. just push it
633
- rb_ary_push(elems, e);
634
- }
635
- fexec np;
636
- }
637
- }
638
-
639
- element = begin_value >parse_item;
640
- next_element = ignore* element;
641
- sequence = ((element ignore*) (next_element ignore*)*);
669
+ } else {
670
+ // no metadata.. just push it
671
+ rb_ary_push(elems, e);
672
+ }
673
+ fexec np;
674
+ }
675
+ }
676
+
677
+ element = begin_value >parse_item;
678
+ next_element = ignore* element;
679
+ sequence = ((element ignore*) (next_element ignore*)*);
642
680
  }%%
643
681
 
644
682
  //
645
683
  // vector-specific machine
646
684
  %%{
647
- machine EDN_vector;
648
- include EDN_sequence_common;
685
+ machine EDN_vector;
686
+ include EDN_sequence_common;
649
687
 
650
- end_vector = ']';
688
+ end_vector = ']';
651
689
 
652
- write data;
690
+ write data;
653
691
 
654
- main := begin_vector @open_seq (
655
- ignore* sequence? :>> end_vector @close_seq
656
- ) @err(close_err) @exit;
692
+ main := begin_vector @open_seq (
693
+ ignore* sequence? :>> end_vector @close_seq
694
+ ) @err(close_err) @exit;
657
695
  }%%
658
696
 
659
697
 
@@ -662,24 +700,24 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
662
700
  //
663
701
  const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
664
702
  {
665
- static const char* EDN_TYPE = "vector";
666
-
667
- int cs;
668
- VALUE elems; // will store the vector's elements - allocated in @open_seq
669
-
670
- %% write init;
671
- %% write exec;
672
-
673
- if (cs >= EDN_vector_first_final) {
674
- v = elems;
675
- return p + 1;
676
- }
677
- else if (cs == EDN_vector_error) {
678
- error(__FUNCTION__, "vector format error", *p);
679
- return pe;
680
- }
681
- else if (cs == EDN_vector_en_main) {} // silence ragel warning
682
- return nullptr;
703
+ static const char* EDN_TYPE = "vector";
704
+
705
+ int cs;
706
+ VALUE elems; // will store the vector's elements - allocated in @open_seq
707
+
708
+ %% write init;
709
+ %% write exec;
710
+
711
+ if (cs >= EDN_vector_first_final) {
712
+ v = elems;
713
+ return p + 1;
714
+ }
715
+ else if (cs == EDN_vector_error) {
716
+ error(__FUNCTION__, "vector format error", *p);
717
+ return pe;
718
+ }
719
+ else if (cs == EDN_vector_en_main) {} // silence ragel warning
720
+ return nullptr;
683
721
  }
684
722
 
685
723
 
@@ -688,16 +726,16 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
688
726
  // list parsing machine
689
727
  //
690
728
  %%{
691
- machine EDN_list;
692
- include EDN_sequence_common;
729
+ machine EDN_list;
730
+ include EDN_sequence_common;
693
731
 
694
- end_list = ')';
732
+ end_list = ')';
695
733
 
696
- write data;
734
+ write data;
697
735
 
698
- main := begin_list @open_seq (
699
- ignore* sequence? :>> end_list @close_seq
700
- ) @err(close_err) @exit;
736
+ main := begin_list @open_seq (
737
+ ignore* sequence? :>> end_list @close_seq
738
+ ) @err(close_err) @exit;
701
739
  }%%
702
740
 
703
741
  //
@@ -705,26 +743,24 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
705
743
  //
706
744
  const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
707
745
  {
708
- static const char* EDN_TYPE = "list";
709
-
710
- int cs;
711
- VALUE elems; // stores the list's elements - allocated in @open_seq
712
-
713
- %% write init;
714
- %% write exec;
715
-
716
- if (cs >= EDN_list_first_final) {
717
- v = elems;
718
- // TODO: replace with this but first figure out why array is not unrolled by EDN::list()
719
- // v = edn::util::call_module_fn(EDN_MAKE_LIST_METHOD, elems);
720
- return p + 1;
721
- }
722
- else if (cs == EDN_list_error) {
723
- error(__FUNCTION__, *p);
724
- return pe;
725
- }
726
- else if (cs == EDN_list_en_main) {} // silence ragel warning
727
- return nullptr;
746
+ static const char* EDN_TYPE = "list";
747
+
748
+ int cs;
749
+ VALUE elems; // stores the list's elements - allocated in @open_seq
750
+
751
+ %% write init;
752
+ %% write exec;
753
+
754
+ if (cs >= EDN_list_first_final) {
755
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_LIST_METHOD, elems);
756
+ return p + 1;
757
+ }
758
+ else if (cs == EDN_list_error) {
759
+ error(__FUNCTION__, *p);
760
+ return pe;
761
+ }
762
+ else if (cs == EDN_list_en_main) {} // silence ragel warning
763
+ return nullptr;
728
764
  }
729
765
 
730
766
 
@@ -733,56 +769,55 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
733
769
  // hash parsing
734
770
  //
735
771
  %%{
736
- machine EDN_map;
737
- include EDN_sequence_common;
772
+ machine EDN_map;
773
+ include EDN_sequence_common;
738
774
 
739
- end_map = '}';
775
+ end_map = '}';
740
776
 
741
- write data;
777
+ write data;
742
778
 
743
779
 
744
- main := begin_map @open_seq (
745
- ignore* (sequence)? :>> end_map @close_seq
746
- ) @err(close_err) @exit;
780
+ main := begin_map @open_seq (
781
+ ignore* (sequence)? :>> end_map @close_seq
782
+ ) @err(close_err) @exit;
747
783
  }%%
748
784
 
749
785
 
750
786
  const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
751
787
  {
752
- static const char* EDN_TYPE = "map";
753
-
754
- int cs;
755
- // since we don't know whether we're looking at a key or value,
756
- // initially store all elements in an array (allocated in @open_seq)
757
- VALUE elems;
758
-
759
- %% write init;
760
- %% write exec;
761
-
762
- if (cs >= EDN_map_first_final) {
763
-
764
- // hash parsing is done. Make sure we have an even count
765
- if ((RARRAY_LEN(elems) % 2) != 0) {
766
- error(__FUNCTION__, "odd number of elements in map");
767
- return pe;
768
- }
769
-
770
- // now convert the sequence to a hash
771
- VALUE rslt = rb_hash_new();
772
- while (RARRAY_LEN(elems) > 0)
773
- {
774
- VALUE k = rb_ary_shift(elems);
775
- rb_hash_aset(rslt, k, rb_ary_shift(elems));
776
- }
777
-
778
- v = rslt;
779
- return p + 1;
780
- }
781
- else if (cs == EDN_map_error) {
782
- return pe;
783
- }
784
- else if (cs == EDN_map_en_main) {} // silence ragel warning
785
- return nullptr;
788
+ static const char* EDN_TYPE = "map";
789
+
790
+ int cs;
791
+ // since we don't know whether we're looking at a key or value,
792
+ // initially store all elements in an array (allocated in @open_seq)
793
+ VALUE elems;
794
+
795
+ %% write init;
796
+ %% write exec;
797
+
798
+ if (cs >= EDN_map_first_final) {
799
+ // hash parsing is done. Make sure we have an even count
800
+ if ((RARRAY_LEN(elems) % 2) != 0) {
801
+ error(__FUNCTION__, "odd number of elements in map");
802
+ return pe;
803
+ }
804
+
805
+ // now convert the sequence to a hash
806
+ VALUE rslt = rb_hash_new();
807
+ while (RARRAY_LEN(elems) > 0)
808
+ {
809
+ VALUE k = rb_ary_shift(elems);
810
+ rb_hash_aset(rslt, k, rb_ary_shift(elems));
811
+ }
812
+
813
+ v = rslt;
814
+ return p + 1;
815
+ }
816
+ else if (cs == EDN_map_error) {
817
+ return pe;
818
+ }
819
+ else if (cs == EDN_map_en_main) {} // silence ragel warning
820
+ return nullptr;
786
821
  }
787
822
 
788
823
 
@@ -793,55 +828,54 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
793
828
  // the remaining data to the correct parser
794
829
  //
795
830
  %%{
796
- machine EDN_dispatch;
797
- include EDN_common;
798
-
799
- write data;
800
-
801
- action parse_disp_set {
802
- // #{ }
803
- const char *np = parse_set(fpc, pe, v);
804
- if (np == nullptr) { fhold; fbreak; } else fexec np;
805
- }
806
-
807
- action parse_disp_discard {
808
- // discard token #_
809
- const char *np = parse_discard(fpc, pe);
810
- if (np == nullptr) { fhold; fbreak; } else fexec np;
811
- }
812
-
813
- action parse_disp_tagged {
814
- // #inst, #uuid, or #user/tag
815
- const char *np = parse_tagged(fpc, pe, v);
816
- if (np == nullptr) { fhold; fbreak; } else fexec np;
817
- }
818
-
819
-
820
- main := (
821
- ('{' >parse_disp_set |
822
- '_' >parse_disp_discard |
823
- alpha >parse_disp_tagged)
824
- ) @exit;
831
+ machine EDN_dispatch;
832
+ include EDN_common;
833
+
834
+ write data;
835
+
836
+ action parse_disp_set {
837
+ // #{ }
838
+ const char *np = parse_set(fpc, pe, v);
839
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
840
+ }
841
+
842
+ action parse_disp_discard {
843
+ // discard token #_
844
+ const char *np = parse_discard(fpc, pe);
845
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
846
+ }
847
+
848
+ action parse_disp_tagged {
849
+ // #inst, #uuid, or #user/tag
850
+ const char *np = parse_tagged(fpc, pe, v);
851
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
852
+ }
853
+
854
+
855
+ main := (
856
+ ('{' >parse_disp_set |
857
+ '_' >parse_disp_discard |
858
+ alpha >parse_disp_tagged)
859
+ ) @exit;
825
860
  }%%
826
861
 
827
862
 
828
863
  const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
829
864
  {
830
- int cs;
831
-
832
- %% write init;
833
- %% write exec;
834
-
835
- if (cs >= EDN_dispatch_first_final) {
836
- return p + 1;
837
- }
838
- else if (cs == EDN_dispatch_error) {
839
- error(__FUNCTION__, "dispatch extend error", *p);
840
- return pe;
841
- }
842
- else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
843
-
844
- return nullptr;
865
+ int cs;
866
+
867
+ %% write init;
868
+ %% write exec;
869
+
870
+ if (cs >= EDN_dispatch_first_final) {
871
+ return p + 1;
872
+ }
873
+ else if (cs == EDN_dispatch_error) {
874
+ error(__FUNCTION__, "dispatch extend error", *p);
875
+ return pe;
876
+ }
877
+ else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
878
+ return nullptr;
845
879
  }
846
880
 
847
881
 
@@ -849,17 +883,17 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
849
883
  // set parsing machine
850
884
  //
851
885
  %%{
852
- machine EDN_set;
853
- include EDN_sequence_common;
886
+ machine EDN_set;
887
+ include EDN_sequence_common;
854
888
 
855
- write data;
889
+ write data;
856
890
 
857
- begin_set = '{';
858
- end_set = '}';
891
+ begin_set = '{';
892
+ end_set = '}';
859
893
 
860
- main := begin_set @open_seq (
861
- ignore* sequence? :>> end_set @close_seq
862
- ) @err(close_err) @exit;
894
+ main := begin_set @open_seq (
895
+ ignore* sequence? :>> end_set @close_seq
896
+ ) @err(close_err) @exit;
863
897
  }%%
864
898
 
865
899
  //
@@ -867,25 +901,25 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
867
901
  //
868
902
  const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
869
903
  {
870
- static const char* EDN_TYPE = "set";
871
-
872
- int cs;
873
- VALUE elems; // holds the set's elements as an array allocated in @open_seq
874
-
875
- %% write init;
876
- %% write exec;
877
-
878
- if (cs >= EDN_set_first_final) {
879
- // all elements collected; now convert to a set
880
- v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SET_METHOD, elems);
881
- return p + 1;
882
- }
883
- else if (cs == EDN_set_error) {
884
- error(__FUNCTION__, *p);
885
- return pe;
886
- }
887
- else if (cs == EDN_set_en_main) {} // silence ragel warning
888
- return nullptr;
904
+ static const char* EDN_TYPE = "set";
905
+
906
+ int cs;
907
+ VALUE elems; // holds the set's elements as an array allocated in @open_seq
908
+
909
+ %% write init;
910
+ %% write exec;
911
+
912
+ if (cs >= EDN_set_first_final) {
913
+ // all elements collected; now convert to a set
914
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SET_METHOD, elems);
915
+ return p + 1;
916
+ }
917
+ else if (cs == EDN_set_error) {
918
+ error(__FUNCTION__, *p);
919
+ return pe;
920
+ }
921
+ else if (cs == EDN_set_en_main) {} // silence ragel warning
922
+ return nullptr;
889
923
  }
890
924
 
891
925
 
@@ -896,55 +930,54 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
896
930
  // defining a machine to consume items within container delimiters
897
931
  //
898
932
  %%{
899
- machine EDN_discard;
900
- include EDN_common;
901
-
902
- write data;
903
-
904
- begin_discard = '_';
905
-
906
- action discard_value {
907
- const char *np = parse_value(fpc, pe, v);
908
- if (np == nullptr) { fhold; fbreak; } else {
909
- // this token is to be discarded so store it in the
910
- // discard stack - we really don't need to save it so this
911
- // could be simplified
912
- discard.push_back(v);
913
- fexec np;
914
- }
915
- }
916
-
917
- action discard_err {
918
- std::stringstream s;
919
- s << "discard sequence without element to discard";
920
- error(__FUNCTION__, s.str());
921
- fhold; fbreak;
922
- }
923
-
924
- main := begin_discard ignore* (
925
- begin_value >discard_value
926
- ) @err(discard_err) @exit;
933
+ machine EDN_discard;
934
+ include EDN_common;
935
+
936
+ write data;
937
+
938
+ begin_discard = '_';
939
+
940
+ action discard_value {
941
+ const char *np = parse_value(fpc, pe, v);
942
+ if (np == nullptr) { fhold; fbreak; } else {
943
+ // this token is to be discarded so store it in the
944
+ // discard stack - we really don't need to save it so this
945
+ // could be simplified
946
+ discard.push_back(v);
947
+ fexec np;
948
+ }
949
+ }
950
+
951
+ action discard_err {
952
+ std::stringstream s;
953
+ s << "discard sequence without element to discard";
954
+ error(__FUNCTION__, s.str());
955
+ fhold; fbreak;
956
+ }
957
+
958
+ main := begin_discard ignore* (
959
+ begin_value >discard_value
960
+ ) @err(discard_err) @exit;
927
961
  }%%
928
962
 
929
963
 
930
964
  const char* edn::Parser::parse_discard(const char *p, const char *pe)
931
965
  {
932
- int cs;
933
- VALUE v;
934
-
935
- %% write init;
936
- %% write exec;
937
-
938
- if (cs >= EDN_discard_first_final) {
939
- return p + 1;
940
- }
941
- else if (cs == EDN_discard_error) {
942
- error(__FUNCTION__, *p);
943
- return pe;
944
- }
945
- else if (cs == EDN_discard_en_main) {} // silence ragel warning
946
-
947
- return nullptr;
966
+ int cs;
967
+ VALUE v;
968
+
969
+ %% write init;
970
+ %% write exec;
971
+
972
+ if (cs >= EDN_discard_first_final) {
973
+ return p + 1;
974
+ }
975
+ else if (cs == EDN_discard_error) {
976
+ error(__FUNCTION__, *p);
977
+ return pe;
978
+ }
979
+ else if (cs == EDN_discard_en_main) {} // silence ragel warning
980
+ return nullptr;
948
981
  }
949
982
 
950
983
 
@@ -964,130 +997,127 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
964
997
  // 2. add parse checks for uuid and inst for better error reporting
965
998
  //
966
999
  %%{
967
- machine EDN_tagged;
968
- include EDN_common;
1000
+ machine EDN_tagged;
1001
+ include EDN_common;
969
1002
 
970
- write data;
1003
+ write data;
971
1004
 
972
- tag_symbol_chars_start = alpha;
973
- tag_symbol_chars_non_numeric = tag_symbol_chars_start | [\.\*!_\?$%&<>\=+\-\'\:\#];
974
- tag_symbol_chars = tag_symbol_chars_non_numeric | digit;
1005
+ tag_symbol_chars_start = alpha;
1006
+ tag_symbol_chars_non_numeric = tag_symbol_chars_start | [\.\*!_\?$%&<>\=+\-\'\:\#];
1007
+ tag_symbol_chars = tag_symbol_chars_non_numeric | digit;
975
1008
 
976
- tag_symbol_namespace = tag_symbol_chars_start (tag_symbol_chars)*;
977
- tag_symbol_name = tag_symbol_chars_non_numeric (tag_symbol_chars)*;
1009
+ tag_symbol_namespace = tag_symbol_chars_start (tag_symbol_chars)*;
1010
+ tag_symbol_name = tag_symbol_chars_non_numeric (tag_symbol_chars)*;
978
1011
 
979
- tag_symbol = (tag_symbol_namespace ('/' tag_symbol_name)?);
1012
+ tag_symbol = (tag_symbol_namespace ('/' tag_symbol_name)?);
980
1013
 
981
1014
  # inst = (string_delim [0-9+\-:\.TZ]* string_delim);
982
1015
  # uuid = (string_delim [a-f0-9\-]* string_delim);
983
1016
 
984
- action parse_tag {
985
- // parses the symbol portion of the pair
986
- const char *np = parse_symbol(fpc, pe, sym_name);
987
- if (np == nullptr) { fhold; fbreak; } else {
988
- sym_ok = true;
989
- fexec np;
990
- }
991
- }
992
- action parse_data {
993
- // parses the value portion
994
- const char *np = parse_value(fpc, pe, data);
995
- if (np == nullptr) { fhold; fbreak; } else {
996
- data_ok = true;
997
- fexec np;
998
- }
999
- }
1000
-
1001
- main := (
1002
- tag_symbol >parse_tag ignore+
1003
- begin_value >parse_data
1004
- ) @exit;
1017
+ action parse_tag {
1018
+ // parses the symbol portion of the pair
1019
+ const char *np = parse_symbol(fpc, pe, sym_name);
1020
+ if (np == nullptr) { fhold; fbreak; } else {
1021
+ sym_ok = true;
1022
+ fexec np;
1023
+ }
1024
+ }
1025
+ action parse_data {
1026
+ // parses the value portion
1027
+ const char *np = parse_value(fpc, pe, data);
1028
+ if (np == nullptr) { fhold; fbreak; } else {
1029
+ data_ok = true;
1030
+ fexec np;
1031
+ }
1032
+ }
1033
+
1034
+ main := (
1035
+ tag_symbol >parse_tag ignore+
1036
+ begin_value >parse_data
1037
+ ) @exit;
1005
1038
  }%%
1006
1039
 
1007
1040
 
1008
1041
  const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
1009
1042
  {
1010
- VALUE sym_name = Qnil;
1011
- VALUE data = Qnil;
1012
- bool sym_ok = false;
1013
- bool data_ok = false;
1043
+ VALUE sym_name = Qnil;
1044
+ VALUE data = Qnil;
1045
+ bool sym_ok = false;
1046
+ bool data_ok = false;
1014
1047
 
1015
- int cs;
1048
+ int cs;
1016
1049
 
1017
- %% write init;
1018
- %% write exec;
1050
+ %% write init;
1051
+ %% write exec;
1019
1052
 
1020
- if (cs >= EDN_tagged_first_final) {
1053
+ if (cs >= EDN_tagged_first_final) {
1021
1054
  //std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << data << std::endl;
1022
1055
 
1023
- if (!sym_ok || !data_ok) {
1024
- error(__FUNCTION__, "tagged element symbol error", *p);
1025
- v = EDN_EOF_CONST;
1026
- return nullptr;
1027
- }
1028
-
1029
- try {
1030
- // tagged_element makes a call to ruby which may throw an
1031
- // exception when parsing the data
1032
- v = edn::util::call_module_fn(rb_mEDN, EDN_TAGGED_ELEM_METHOD, sym_name, data);
1033
- return p + 1;
1034
- } catch (std::exception& e) {
1035
- error(__FUNCTION__, e.what());
1036
- return pe;
1037
- }
1038
- }
1039
- else if (cs == EDN_tagged_error) {
1040
- error(__FUNCTION__, "tagged element symbol error", *p);
1041
- }
1042
- else if (cs == EDN_tagged_en_main) {} // silence ragel warning
1043
- v = EDN_EOF_CONST;
1044
- return nullptr;
1056
+ if (!sym_ok || !data_ok) {
1057
+ error(__FUNCTION__, "tagged element symbol error", *p);
1058
+ v = EDN_EOF_CONST;
1059
+ return nullptr;
1060
+ }
1061
+
1062
+ try {
1063
+ // tagged_element makes a call to ruby which may throw an
1064
+ // exception when parsing the data
1065
+ v = edn::util::call_module_fn(rb_mEDN, EDN_TAGGED_ELEM_METHOD, sym_name, data);
1066
+ return p + 1;
1067
+ } catch (std::exception& e) {
1068
+ error(__FUNCTION__, e.what());
1069
+ return pe;
1070
+ }
1071
+ }
1072
+ else if (cs == EDN_tagged_error) {
1073
+ error(__FUNCTION__, "tagged element symbol error", *p);
1074
+ }
1075
+ else if (cs == EDN_tagged_en_main) {} // silence ragel warning
1076
+ v = EDN_EOF_CONST;
1077
+ return nullptr;
1045
1078
  }
1046
1079
 
1047
1080
 
1048
-
1049
-
1050
1081
  // ============================================================
1051
1082
  // metadata - looks like ruby just discards this but we'll track it
1052
1083
  // and provide a means to retrive after each parse op - might be
1053
1084
  // useful?
1054
1085
  //
1055
1086
  %%{
1056
- machine EDN_meta;
1057
- include EDN_common;
1087
+ machine EDN_meta;
1088
+ include EDN_common;
1058
1089
 
1059
- write data;
1090
+ write data;
1060
1091
 
1061
- action parse_data {
1062
- const char *np = parse_value(fpc, pe, v);
1063
- if (np == nullptr) { fhold; fbreak; } else { fexec np; }
1064
- }
1092
+ action parse_data {
1093
+ const char *np = parse_value(fpc, pe, v);
1094
+ if (np == nullptr) { fhold; fbreak; } else { fexec np; }
1095
+ }
1065
1096
 
1066
- main := begin_meta (
1067
- begin_value >parse_data
1068
- ) @exit;
1097
+ main := begin_meta (
1098
+ begin_value >parse_data
1099
+ ) @exit;
1069
1100
  }%%
1070
1101
 
1071
1102
 
1072
1103
  const char* edn::Parser::parse_meta(const char *p, const char *pe)
1073
1104
  {
1074
- int cs;
1075
- VALUE v;
1076
-
1077
- %% write init;
1078
- %% write exec;
1079
-
1080
- if (cs >= EDN_meta_first_final) {
1081
- append_to_meta(v);
1082
- return p + 1;
1083
- }
1084
- else if (cs == EDN_meta_error) {
1085
- error(__FUNCTION__, *p);
1086
- return pe;
1087
- }
1088
- else if (cs == EDN_meta_en_main) {} // silence ragel warning
1089
-
1090
- return nullptr;
1105
+ int cs;
1106
+ VALUE v;
1107
+
1108
+ %% write init;
1109
+ %% write exec;
1110
+
1111
+ if (cs >= EDN_meta_first_final) {
1112
+ append_to_meta(v);
1113
+ return p + 1;
1114
+ }
1115
+ else if (cs == EDN_meta_error) {
1116
+ error(__FUNCTION__, *p);
1117
+ return pe;
1118
+ }
1119
+ else if (cs == EDN_meta_en_main) {} // silence ragel warning
1120
+ return nullptr;
1091
1121
  }
1092
1122
 
1093
1123
 
@@ -1097,55 +1127,55 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
1097
1127
  // top-level, therefore, does not tokenize source stream
1098
1128
  //
1099
1129
  %%{
1100
- machine EDN_parser;
1101
- include EDN_common;
1102
-
1103
- write data;
1104
-
1105
- action parse_elem {
1106
- // save the count of metadata items before we parse this value
1107
- // so we can determine if we've read another metadata value or
1108
- // an actual data item
1109
- std::size_t meta_sz = meta_size();
1110
- const char* np = parse_value(fpc, pe, result);
1111
- if (np == nullptr) { fexec pe; fbreak; } else {
1112
- // if we have metadata saved and it matches the count we
1113
- // saved before we parsed a value, then we must bind the
1114
- // metadata sequence to it
1115
- if (!meta_empty() && meta_size() == meta_sz) {
1116
- // this will empty the metadata sequence too
1117
- result = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, result, ruby_meta());
1118
- }
1119
- fexec np;
1120
- }
1121
- }
1122
-
1123
- element = begin_value >parse_elem;
1124
- next_element = ignore* element;
1125
- sequence = ((element ignore*) (next_element ignore*)*);
1126
-
1127
- main := ignore* sequence? ignore*;
1130
+ machine EDN_parser;
1131
+ include EDN_common;
1132
+
1133
+ write data;
1134
+
1135
+ action parse_elem {
1136
+ // save the count of metadata items before we parse this value
1137
+ // so we can determine if we've read another metadata value or
1138
+ // an actual data item
1139
+ std::size_t meta_sz = meta_size();
1140
+ const char* np = parse_value(fpc, pe, result);
1141
+ if (np == nullptr) { fexec pe; fbreak; } else {
1142
+ // if we have metadata saved and it matches the count we
1143
+ // saved before we parsed a value, then we must bind the
1144
+ // metadata sequence to it
1145
+ if (!meta_empty() && meta_size() == meta_sz) {
1146
+ // this will empty the metadata sequence too
1147
+ result = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, result, ruby_meta());
1148
+ }
1149
+ fexec np;
1150
+ }
1151
+ }
1152
+
1153
+ element = begin_value >parse_elem;
1154
+ next_element = ignore* element;
1155
+ sequence = ((element ignore*) (next_element ignore*)*);
1156
+
1157
+ main := ignore* sequence? ignore*;
1128
1158
  }%%
1129
1159
 
1130
1160
 
1131
1161
  VALUE edn::Parser::parse(const char* src, std::size_t len)
1132
1162
  {
1133
- int cs;
1134
- VALUE result = EDN_EOF_CONST;
1135
-
1136
- %% write init;
1137
- set_source(src, len);
1138
- %% write exec;
1139
-
1140
- if (cs == EDN_parser_error) {
1141
- error(__FUNCTION__, *p);
1142
- return EDN_EOF_CONST;
1143
- }
1144
- else if (cs == EDN_parser_first_final) {
1145
- p = pe = eof = nullptr;
1146
- }
1147
- else if (cs == EDN_parser_en_main) {} // silence ragel warning
1148
- return result;
1163
+ int cs;
1164
+ VALUE result = EDN_EOF_CONST;
1165
+
1166
+ %% write init;
1167
+ set_source(src, len);
1168
+ %% write exec;
1169
+
1170
+ if (cs == EDN_parser_error) {
1171
+ error(__FUNCTION__, *p);
1172
+ return EDN_EOF_CONST;
1173
+ }
1174
+ else if (cs == EDN_parser_first_final) {
1175
+ p = pe = eof = nullptr;
1176
+ }
1177
+ else if (cs == EDN_parser_en_main) {} // silence ragel warning
1178
+ return result;
1149
1179
  }
1150
1180
 
1151
1181
 
@@ -1153,43 +1183,43 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
1153
1183
  // token-by-token machine
1154
1184
  //
1155
1185
  %%{
1156
- machine EDN_tokens;
1157
- include EDN_common;
1158
-
1159
- write data nofinal noerror;
1160
-
1161
- action parse_token {
1162
- // we won't know if we've parsed a discard or a metadata until
1163
- // after parse_value() is done. Save the current number of
1164
- // elements in the metadata sequence; then we can check if it
1165
- // grew or if the discard sequence grew
1166
- meta_sz = meta_size();
1167
-
1168
- const char* np = parse_value(fpc, pe, value);
1169
- if (np == nullptr) { fhold; fbreak; } else {
1170
- if (!meta_empty()) {
1171
- // was an additional metadata entry read? if so, don't
1172
- // return a value
1173
- if (meta_size() > meta_sz) {
1174
- state = TOKEN_IS_META;
1175
- }
1176
- else {
1177
- // a value was read and there's a pending metadata
1178
- // sequence. Bind them.
1179
- value = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
1180
- state = TOKEN_OK;
1181
- }
1182
- } else if (!discard.empty()) {
1183
- // a discard read. Don't return a value
1184
- state = TOKEN_IS_DISCARD;
1185
- } else {
1186
- state = TOKEN_OK;
1186
+ machine EDN_tokens;
1187
+ include EDN_common;
1188
+
1189
+ write data nofinal noerror;
1190
+
1191
+ action parse_token {
1192
+ // we won't know if we've parsed a discard or a metadata until
1193
+ // after parse_value() is done. Save the current number of
1194
+ // elements in the metadata sequence; then we can check if it
1195
+ // grew or if the discard sequence grew
1196
+ meta_sz = meta_size();
1197
+
1198
+ const char* np = parse_value(fpc, pe, value);
1199
+ if (np == nullptr) { fhold; fbreak; } else {
1200
+ if (!meta_empty()) {
1201
+ // was an additional metadata entry read? if so, don't
1202
+ // return a value
1203
+ if (meta_size() > meta_sz) {
1204
+ state = TOKEN_IS_META;
1187
1205
  }
1188
- fexec np;
1189
- }
1190
- }
1191
-
1192
- main := ignore* begin_value >parse_token ignore*;
1206
+ else {
1207
+ // a value was read and there's a pending metadata
1208
+ // sequence. Bind them.
1209
+ value = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
1210
+ state = TOKEN_OK;
1211
+ }
1212
+ } else if (!discard.empty()) {
1213
+ // a discard read. Don't return a value
1214
+ state = TOKEN_IS_DISCARD;
1215
+ } else {
1216
+ state = TOKEN_OK;
1217
+ }
1218
+ fexec np;
1219
+ }
1220
+ }
1221
+
1222
+ main := ignore* begin_value >parse_token ignore*;
1193
1223
  }%%
1194
1224
 
1195
1225
 
@@ -1197,21 +1227,21 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
1197
1227
  //
1198
1228
  edn::Parser::eTokenState edn::Parser::parse_next(VALUE& value)
1199
1229
  {
1200
- int cs;
1201
- eTokenState state = TOKEN_ERROR;
1202
- // need to track metadada read and bind it to the next value read
1203
- // - but must account for sequences of metadata values
1204
- std::size_t meta_sz;
1230
+ int cs;
1231
+ eTokenState state = TOKEN_ERROR;
1232
+ // need to track metadada read and bind it to the next value read
1233
+ // - but must account for sequences of metadata values
1234
+ std::size_t meta_sz;
1205
1235
 
1206
- // clear any previously saved discards; only track if read during
1207
- // this op
1208
- discard.clear();
1236
+ // clear any previously saved discards; only track if read during
1237
+ // this op
1238
+ discard.clear();
1209
1239
 
1210
- %% write init;
1211
- %% write exec;
1240
+ %% write init;
1241
+ %% write exec;
1212
1242
 
1213
- if (cs == EDN_tokens_en_main) {} // silence ragel warning
1214
- return state;
1243
+ if (cs == EDN_tokens_en_main) {} // silence ragel warning
1244
+ return state;
1215
1245
  }
1216
1246
 
1217
1247
  /*