edn_turbo 0.5.7 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,25 @@
1
+ // The MIT License (MIT)
2
+
3
+ // Copyright (c) 2015-2021 Ed Porras
4
+
5
+ // Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ // of this software and associated documentation files (the "Software"), to deal
7
+ // in the Software without restriction, including without limitation the rights
8
+ // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ // copies of the Software, and to permit persons to whom the Software is
10
+ // furnished to do so, subject to the following conditions:
11
+
12
+ // The above copyright notice and this permission notice shall be included in
13
+ // all copies or substantial portions of the Software.
14
+
15
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ // THE SOFTWARE.
22
+
1
23
  #include <iostream>
2
24
  #include <string>
3
25
  #include <sstream>
@@ -19,43 +41,43 @@
19
41
  //
20
42
 
21
43
  %%{
22
- machine EDN_common;
23
-
24
- cr = '\n';
25
- counter = ( cr @{ line_number++; } );
26
- cr_neg = [^\n];
27
- ws = [\t\v\f\r ] | ',' | counter;
28
- comment = ';' cr_neg* counter;
29
- ignore = ws | comment;
30
-
31
- operators = [/\.\*!_\?$%&<>\=+\-\'];
32
-
33
- begin_dispatch = '#';
34
- begin_keyword = ':';
35
- begin_char = '\\';
36
- begin_vector = '[';
37
- begin_map = '{';
38
- begin_list = '(';
39
- begin_meta = '^';
40
- string_delim = '"';
41
- begin_number = digit;
42
- begin_value = alnum | [:\"\{\[\(\\\#^] | operators;
43
- begin_symbol = alpha;
44
-
45
- # int / decimal rules
46
- integer = ('0' | [1-9] digit*);
47
- exp = ([Ee] [+\-]? digit+);
48
-
49
-
50
- # common actions
51
- action close_err {
52
- std::stringstream s;
53
- s << "unterminated " << EDN_TYPE;
54
- error(__FUNCTION__, s.str());
55
- fhold; fbreak;
56
- }
57
-
58
- action exit { fhold; fbreak; }
44
+ machine EDN_common;
45
+
46
+ cr = '\n';
47
+ counter = ( cr @{ line_number++; } );
48
+ cr_neg = [^\n];
49
+ ws = [\t\v\f\r ] | ',' | counter;
50
+ comment = ';' cr_neg* counter;
51
+ ignore = ws | comment;
52
+
53
+ operators = [/\.\*!_\?$%&<>\=+\-\'];
54
+
55
+ begin_dispatch = '#';
56
+ begin_keyword = ':';
57
+ begin_char = '\\';
58
+ begin_vector = '[';
59
+ begin_map = '{';
60
+ begin_list = '(';
61
+ begin_meta = '^';
62
+ string_delim = '"';
63
+ begin_number = digit;
64
+ begin_value = alnum | [:\"\{\[\(\\\#^] | operators;
65
+ begin_symbol = alpha;
66
+
67
+ # int / decimal rules
68
+ integer = ('0' | [1-9] digit*);
69
+ exp = ([Ee] [+\-]? digit+);
70
+
71
+
72
+ # common actions
73
+ action close_err {
74
+ std::stringstream s;
75
+ s << "unterminated " << EDN_TYPE;
76
+ error(__FUNCTION__, s.str());
77
+ fhold; fbreak;
78
+ }
79
+
80
+ action exit { fhold; fbreak; }
59
81
  }%%
60
82
 
61
83
  // ============================================================
@@ -63,135 +85,140 @@
63
85
  //
64
86
 
65
87
  %%{
66
- machine EDN_value;
67
- include EDN_common;
68
-
69
- write data;
70
-
71
- action parse_val_string {
72
- // string types within double-quotes
73
- const char *np = parse_string(fpc, pe, v);
74
- if (np == NULL) { fhold; fbreak; } else fexec np;
75
- }
76
-
77
- action parse_val_keyword {
78
- // tokens with a leading ':'
79
- const char *np = parse_keyword(fpc, pe, v);
80
- if (np == NULL) { fhold; fbreak; } else fexec np;
81
- }
82
-
83
- action parse_val_number {
84
- // tokens w/ leading digits: non-negative integers & decimals.
85
- // try to parse a decimal first
86
- const char *np = parse_decimal(fpc, pe, v);
87
- if (np == NULL) {
88
- // if we can't, try to parse it as an int
88
+ machine EDN_value;
89
+ include EDN_common;
90
+
91
+ write data;
92
+
93
+ action parse_val_string {
94
+ // string types within double-quotes
95
+ const char *np = parse_string(fpc, pe, v);
96
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
97
+ }
98
+
99
+ action parse_val_keyword {
100
+ // tokens with a leading ':'
101
+ const char *np = parse_keyword(fpc, pe, v);
102
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
103
+ }
104
+
105
+ action parse_val_number {
106
+ // tokens w/ leading digits: non-negative integers & decimals.
107
+ // try to parse a decimal first
108
+ const char *np = parse_decimal(fpc, pe, v);
109
+ if (np == nullptr) {
110
+ // if we can't, try to parse it as a ratio
111
+ np = parse_ratio(fpc, pe, v);
112
+
113
+ // otherwise, an int
114
+ if (np == nullptr) {
89
115
  np = parse_integer(fpc, pe, v);
90
- }
91
-
92
- if (np) {
93
- fexec np;
94
- fhold;
95
- fbreak;
96
- }
97
- else {
98
- error(__FUNCTION__, "number format error", *p);
99
- fexec pe;
100
- }
101
- }
102
-
103
- action parse_val_operator {
104
- // stand-alone operators *, +, -, etc.
105
- const char *np = parse_operator(fpc, pe, v);
106
- if (np == NULL) { fhold; fbreak; } else fexec np;
107
- }
108
-
109
- action parse_val_char {
110
- // tokens w/ leading \ (escaped characters \newline, \c, etc.)
111
- const char *np = parse_esc_char(fpc, pe, v);
112
- if (np == NULL) { fhold; fbreak; } else fexec np;
113
- }
114
-
115
- action parse_val_symbol {
116
- // user identifiers and reserved keywords (true, false, nil)
117
- VALUE sym = Qnil;
118
- const char *np = parse_symbol(fpc, pe, sym);
119
- if (np == NULL) { fexec pe; } else {
120
- // parse_symbol will make 'sym' a ruby string
121
- if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
122
- else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
123
- else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
124
- else {
125
- v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
126
- }
127
- fexec np;
128
- }
129
- }
130
-
131
- action parse_val_vector {
132
- // [
133
- const char *np = parse_vector(fpc, pe, v);
134
- if (np == NULL) { fhold; fbreak; } else fexec np;
135
- }
136
-
137
- action parse_val_list {
138
- // (
139
- const char *np = parse_list(fpc, pe, v);
140
- if (np == NULL) { fhold; fbreak; } else fexec np;
141
- }
142
-
143
- action parse_val_map {
144
- // {
145
- const char *np = parse_map(fpc, pe, v);
146
- if (np == NULL) { fhold; fbreak; } else fexec np;
147
- }
148
-
149
- action parse_val_meta {
150
- // ^
151
- const char *np = parse_meta(fpc, pe);
152
- if (np == NULL) { fhold; fbreak; } else fexec np;
153
- }
154
-
155
- action parse_val_dispatch {
156
- // handles tokens w/ leading # ("#_", "#{", and tagged elems)
157
- const char *np = parse_dispatch(fpc + 1, pe, v);
158
- if (np == NULL) { fhold; fbreak; } else fexec np;
159
- }
160
-
161
-
162
- main := (
163
- string_delim >parse_val_string |
164
- begin_keyword >parse_val_keyword |
165
- begin_number >parse_val_number |
166
- operators >parse_val_operator |
167
- begin_char >parse_val_char |
168
- begin_symbol >parse_val_symbol |
169
- begin_vector >parse_val_vector |
170
- begin_list >parse_val_list |
171
- begin_map >parse_val_map |
172
- begin_meta >parse_val_meta |
173
- begin_dispatch >parse_val_dispatch
174
- ) %*exit;
116
+ }
117
+ }
118
+
119
+ if (np) {
120
+ fexec np;
121
+ fhold;
122
+ fbreak;
123
+ }
124
+ else {
125
+ error(__FUNCTION__, "number format error", *p);
126
+ fexec pe;
127
+ }
128
+ }
129
+
130
+ action parse_val_operator {
131
+ // stand-alone operators *, +, -, etc.
132
+ const char *np = parse_operator(fpc, pe, v);
133
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
134
+ }
135
+
136
+ action parse_val_char {
137
+ // tokens w/ leading \ (escaped characters \newline, \c, etc.)
138
+ const char *np = parse_esc_char(fpc, pe, v);
139
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
140
+ }
141
+
142
+ action parse_val_symbol {
143
+ // user identifiers and reserved keywords (true, false, nil)
144
+ VALUE sym = Qnil;
145
+ const char *np = parse_symbol(fpc, pe, sym);
146
+ if (np == nullptr) { fexec pe; } else {
147
+ // parse_symbol will make 'sym' a ruby string
148
+ if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
149
+ else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
150
+ else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
151
+ else {
152
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
153
+ }
154
+ fexec np;
155
+ }
156
+ }
157
+
158
+ action parse_val_vector {
159
+ // [
160
+ const char *np = parse_vector(fpc, pe, v);
161
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
162
+ }
163
+
164
+ action parse_val_list {
165
+ // (
166
+ const char *np = parse_list(fpc, pe, v);
167
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
168
+ }
169
+
170
+ action parse_val_map {
171
+ // {
172
+ const char *np = parse_map(fpc, pe, v);
173
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
174
+ }
175
+
176
+ action parse_val_meta {
177
+ // ^
178
+ const char *np = parse_meta(fpc, pe);
179
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
180
+ }
181
+
182
+ action parse_val_dispatch {
183
+ // handles tokens w/ leading # ("#_", "#{", and tagged elems)
184
+ const char *np = parse_dispatch(fpc + 1, pe, v);
185
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
186
+ }
187
+
188
+
189
+ main := (
190
+ string_delim >parse_val_string |
191
+ begin_keyword >parse_val_keyword |
192
+ begin_number >parse_val_number |
193
+ operators >parse_val_operator |
194
+ begin_char >parse_val_char |
195
+ begin_symbol >parse_val_symbol |
196
+ begin_vector >parse_val_vector |
197
+ begin_list >parse_val_list |
198
+ begin_map >parse_val_map |
199
+ begin_meta >parse_val_meta |
200
+ begin_dispatch >parse_val_dispatch
201
+ ) %*exit;
175
202
  }%%
176
203
 
177
204
 
178
205
  const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
179
206
  {
180
- // std::cerr << __FUNCTION__ << "() p: \"" << p << "\"" << std::endl;
181
- int cs;
182
-
183
- %% write init;
184
- %% write exec;
185
-
186
- if (cs >= EDN_value_first_final) {
187
- return p;
188
- }
189
- else if (cs == EDN_value_error) {
190
- error(__FUNCTION__, "token error", *p);
191
- return pe;
192
- }
193
- else if (cs == EDN_value_en_main) {} // silence ragel warning
194
- return NULL;
207
+ // std::cerr << __FUNCTION__ << "() p: \"" << p << "\"" << std::endl;
208
+ int cs;
209
+
210
+ %% write init;
211
+ %% write exec;
212
+
213
+ if (cs >= EDN_value_first_final) {
214
+ return p;
215
+ }
216
+ else if (cs == EDN_value_error) {
217
+ error(__FUNCTION__, "token error", *p);
218
+ return pe;
219
+ }
220
+ else if (cs == EDN_value_en_main) {} // silence ragel warning
221
+ return nullptr;
195
222
  }
196
223
 
197
224
 
@@ -203,51 +230,51 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
203
230
  // ascii range is found.
204
231
  //
205
232
  %%{
206
- machine EDN_string;
207
- include EDN_common;
208
-
209
- write data;
210
-
211
- action parse_chars {
212
- if (edn::util::parse_byte_stream(p_save + 1, p, v, encode)) {
213
- fexec p + 1;
214
- } else {
215
- fhold; fbreak;
216
- }
217
- }
218
-
219
- action mark_for_encoding {
220
- encode = true;
221
- }
222
-
223
- main := string_delim (
224
- (^([\"\\] | 0xc2..0xf5) |
225
- ((0xc2..0xf5) |
226
- '\\'[\"\\/bfnrt] |
227
- '\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
228
- '\\'^([\"\\/bfnrtu]))* %parse_chars
229
- ) :>> string_delim @err(close_err) @exit;
233
+ machine EDN_string;
234
+ include EDN_common;
235
+
236
+ write data;
237
+
238
+ action parse_chars {
239
+ if (edn::util::parse_byte_stream(p_save + 1, p, v, encode)) {
240
+ fexec p + 1;
241
+ } else {
242
+ fhold; fbreak;
243
+ }
244
+ }
245
+
246
+ action mark_for_encoding {
247
+ encode = true;
248
+ }
249
+
250
+ main := string_delim (
251
+ (^([\"\\] | 0xc2..0xf5) |
252
+ ((0xc2..0xf5) |
253
+ '\\'[\"\\/bfnrt] |
254
+ '\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
255
+ '\\'^([\"\\/bfnrtu]))* %parse_chars
256
+ ) :>> string_delim @err(close_err) @exit;
230
257
  }%%
231
258
 
232
259
 
233
260
  const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
234
261
  {
235
- static const char* EDN_TYPE = "string";
236
- int cs;
237
- bool encode = false;
238
-
239
- %% write init;
240
- const char* p_save = p;
241
- %% write exec;
242
-
243
- if (cs >= EDN_string_first_final) {
244
- return p + 1;
245
- }
246
- else if (cs == EDN_string_error) {
247
- return pe;
248
- }
249
- else if (cs == EDN_string_en_main) {} // silence ragel warning
250
- return NULL;
262
+ static const char* EDN_TYPE = "string";
263
+ int cs;
264
+ bool encode = false;
265
+
266
+ %% write init;
267
+ const char* p_save = p;
268
+ %% write exec;
269
+
270
+ if (cs >= EDN_string_first_final) {
271
+ return p + 1;
272
+ }
273
+ else if (cs == EDN_string_error) {
274
+ return pe;
275
+ }
276
+ else if (cs == EDN_string_en_main) {} // silence ragel warning
277
+ return nullptr;
251
278
  }
252
279
 
253
280
 
@@ -256,79 +283,78 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
256
283
  // keyword parsing
257
284
  //
258
285
  %%{
259
- machine EDN_keyword;
260
- include EDN_common;
286
+ machine EDN_keyword;
287
+ include EDN_common;
261
288
 
262
- keyword_start = alpha | [\.\*!_\?$%&<>\=+\-\'\#];
263
- keyword_chars = (keyword_start | digit | ':');
289
+ keyword_start = alpha | [\.\*!_\?$%&<>\=+\-\'\#];
290
+ keyword_chars = (keyword_start | digit | ':');
264
291
 
265
- keyword_name = keyword_start keyword_chars*;
266
- keyword = keyword_name ('/' keyword_chars*)?;
292
+ keyword_name = keyword_start keyword_chars*;
293
+ keyword = keyword_name ('/' keyword_chars*)?;
267
294
 
268
- write data;
295
+ write data;
269
296
 
270
297
 
271
- main := begin_keyword keyword (^(keyword_chars | '/')? @exit);
298
+ main := begin_keyword keyword (^(keyword_chars | '/')? @exit);
272
299
  }%%
273
300
 
274
301
 
275
302
  const char* edn::Parser::parse_keyword(const char *p, const char *pe, VALUE& v)
276
303
  {
277
- int cs;
278
-
279
- %% write init;
280
- const char* p_save = p;
281
- %% write exec;
282
-
283
- if (cs >= EDN_keyword_first_final) {
284
- std::string buf;
285
- uintmax_t len = p - p_save;
286
- // don't include leading ':' because the ruby symbol will handle it
287
- buf.append(p_save + 1, len - 1);
288
- v = ID2SYM(rb_intern(buf.c_str()));
289
- return p;
290
- }
291
- else if (cs == EDN_keyword_error) {
292
- error(__FUNCTION__, "invalid keyword", *p);
293
- return pe;
294
- }
295
- else if (cs == EDN_keyword_en_main) {} // silence ragel warning
296
- return NULL;
304
+ int cs;
305
+
306
+ %% write init;
307
+ const char* p_save = p;
308
+ %% write exec;
309
+
310
+ if (cs >= EDN_keyword_first_final) {
311
+ std::string buf;
312
+ uintmax_t len = p - p_save;
313
+ // don't include leading ':' because the ruby symbol will handle it
314
+ buf.append(p_save + 1, len - 1);
315
+ v = ID2SYM(rb_intern(buf.c_str()));
316
+ return p;
317
+ }
318
+ else if (cs == EDN_keyword_error) {
319
+ error(__FUNCTION__, "invalid keyword", *p);
320
+ return pe;
321
+ }
322
+ else if (cs == EDN_keyword_en_main) {} // silence ragel warning
323
+ return nullptr;
297
324
  }
298
325
 
299
326
 
300
-
301
327
  // ============================================================
302
328
  // decimal parsing machine
303
329
  //
304
330
  %%{
305
- machine EDN_decimal;
306
- include EDN_common;
331
+ machine EDN_decimal;
332
+ include EDN_common;
307
333
 
308
- write data noerror;
334
+ write data noerror;
309
335
 
310
336
 
311
- main := ('-'|'+')? (
312
- (integer '.' digit* (exp? [M]?)) |
313
- (integer exp)
314
- ) (^[0-9Ee.+\-M]? @exit );
337
+ main := ('-'|'+')? (
338
+ (integer '.' digit* (exp? [M]?)) |
339
+ (integer exp)
340
+ ) (^[0-9Ee.+\-M]? @exit );
315
341
  }%%
316
342
 
317
343
 
318
344
  const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
319
345
  {
320
- int cs;
321
-
322
- %% write init;
323
- const char* p_save = p;
324
- %% write exec;
325
-
326
- if (cs >= EDN_decimal_first_final) {
327
- v = edn::util::float_to_ruby(p_save, p - p_save);
328
- return p + 1;
329
- }
330
- else if (cs == EDN_decimal_en_main) {} // silence ragel warning
331
- return NULL;
346
+ int cs;
347
+
348
+ %% write init;
349
+ const char* p_save = p;
350
+ %% write exec;
351
+
352
+ if (cs >= EDN_decimal_first_final) {
353
+ v = edn::util::float_to_ruby(p_save, p - p_save);
354
+ return p + 1;
355
+ }
356
+ else if (cs == EDN_decimal_en_main) {} // silence ragel warning
357
+ return nullptr;
332
358
  }
333
359
 
334
360
 
@@ -336,34 +362,65 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
336
362
  // integer parsing machine - M suffix will return a BigNum
337
363
  //
338
364
  %%{
339
- machine EDN_integer;
340
- include EDN_common;
365
+ machine EDN_integer;
366
+ include EDN_common;
341
367
 
342
- write data noerror;
368
+ write data noerror;
343
369
 
344
370
 
345
- main := (
346
- ('-'|'+')? (integer [MN]?)
347
- ) (^[0-9MN+\-]? @exit);
371
+ main := (
372
+ ('-'|'+')? (integer [MN]?)
373
+ ) (^[0-9MN+\-]? @exit);
348
374
  }%%
349
375
 
350
376
  const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
351
377
  {
352
- int cs;
353
-
354
- %% write init;
355
- const char* p_save = p;
356
- %% write exec;
357
-
358
- if (cs >= EDN_integer_first_final) {
359
- v = edn::util::integer_to_ruby(p_save, p - p_save);
360
- return p + 1;
361
- }
362
- else if (cs == EDN_integer_en_main) {} // silence ragel warning
363
- return NULL;
378
+ int cs;
379
+
380
+ %% write init;
381
+ const char* p_save = p;
382
+ %% write exec;
383
+
384
+ if (cs >= EDN_integer_first_final) {
385
+ v = edn::util::integer_to_ruby(p_save, p - p_save);
386
+ return p + 1;
387
+ }
388
+ else if (cs == EDN_integer_en_main) {} // silence ragel warning
389
+ return nullptr;
364
390
  }
365
391
 
366
392
 
393
+ // ============================================================
394
+ // ratio parsing machine
395
+ //
396
+ %%{
397
+ machine EDN_ratio;
398
+ include EDN_common;
399
+
400
+ write data noerror;
401
+
402
+
403
+ main := (
404
+ ('-'|'+')? (integer '/' integer)
405
+ ) (^[0-9+\-\/]? @exit);
406
+ }%%
407
+
408
+
409
+ const char* edn::Parser::parse_ratio(const char *p, const char *pe, VALUE& v)
410
+ {
411
+ int cs;
412
+
413
+ %% write init;
414
+ const char* p_save = p;
415
+ %% write exec;
416
+
417
+ if (cs >= EDN_ratio_first_final) {
418
+ v = edn::util::ratio_to_ruby(p_save, p - p_save);
419
+ return p + 1;
420
+ }
421
+ else if (cs == EDN_ratio_en_main) {} // silence ragel warning
422
+ return nullptr;
423
+ }
367
424
 
368
425
  // ============================================================
369
426
  // operator parsing - handles tokens w/ a leading operator:
@@ -373,81 +430,86 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
373
430
  // 3. stand-alone operators: +, -, /, *, etc.
374
431
  //
375
432
  %%{
376
- machine EDN_operator;
377
- include EDN_common;
378
-
379
- write data;
380
-
381
- action parse_op_symbol {
382
- // parse a symbol including the leading operator (-, +, .)
383
- VALUE sym = Qnil;
384
- const char *np = parse_symbol(p_save, pe, sym);
385
- if (np == NULL) { fexec pe; } else {
386
- if (sym != Qnil)
387
- v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
388
- fexec np;
389
- }
390
- }
391
-
392
- action parse_op_number {
393
- // parse a number with the leading symbol - this is slightly
394
- // different than the one within EDN_value since it includes
395
- // the leading - or +
396
- //
397
- // try to parse a decimal first
398
- const char *np = parse_decimal(p_save, pe, v);
399
- if (np == NULL) {
400
- // if we can't, try to parse it as an int
433
+ machine EDN_operator;
434
+ include EDN_common;
435
+
436
+ write data;
437
+
438
+ action parse_op_symbol {
439
+ // parse a symbol including the leading operator (-, +, .)
440
+ VALUE sym = Qnil;
441
+ const char *np = parse_symbol(p_save, pe, sym);
442
+ if (np == nullptr) { fexec pe; } else {
443
+ if (sym != Qnil)
444
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
445
+ fexec np;
446
+ }
447
+ }
448
+
449
+ action parse_op_number {
450
+ // parse a number with the leading symbol - this is slightly
451
+ // different than the one within EDN_value since it includes
452
+ // the leading - or +
453
+ //
454
+ // try to parse a decimal first
455
+ const char *np = parse_decimal(p_save, pe, v);
456
+ if (np == nullptr) {
457
+ // if we can't, try to parse it as a ratio
458
+ np = parse_ratio(p_save, pe, v);
459
+
460
+ if (np == nullptr) {
461
+ // again, if we can't, try to parse it as an int
401
462
  np = parse_integer(p_save, pe, v);
402
- }
403
-
404
- if (np) {
405
- fexec np;
406
- fhold;
407
- fbreak;
408
- }
409
- else {
410
- error(__FUNCTION__, "number format error", *p);
411
- fexec pe;
412
- }
413
- }
414
-
415
- action parse_op {
416
- // stand-alone operators (-, +, /, ... etc)
417
- char op[2] = { *p_save, 0 };
418
- VALUE sym = rb_str_new2(op);
419
- v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
420
- }
421
-
422
- valid_non_numeric_chars = alpha|operators|':'|'#';
423
- valid_chars = valid_non_numeric_chars | digit;
424
-
425
- main := (
426
- ('-'|'+') begin_number >parse_op_number |
427
- (operators - [\-\+\.]) valid_chars >parse_op_symbol |
428
- [\-\+\.] valid_non_numeric_chars valid_chars* >parse_op_symbol |
429
- operators ignore* >parse_op
430
- ) ^(valid_chars)? @exit;
463
+ }
464
+ }
465
+
466
+ if (np) {
467
+ fexec np;
468
+ fhold;
469
+ fbreak;
470
+ }
471
+ else {
472
+ error(__FUNCTION__, "number format error", *p);
473
+ fexec pe;
474
+ }
475
+ }
476
+
477
+ action parse_op {
478
+ // stand-alone operators (-, +, /, ... etc)
479
+ char op[2] = { *p_save, 0 };
480
+ VALUE sym = rb_str_new2(op);
481
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
482
+ }
483
+
484
+ valid_non_numeric_chars = alpha|operators|':'|'#';
485
+ valid_chars = valid_non_numeric_chars | digit;
486
+
487
+ main := (
488
+ ('-'|'+') begin_number >parse_op_number |
489
+ (operators - [\-\+\.]) valid_chars >parse_op_symbol |
490
+ [\-\+\.] valid_non_numeric_chars valid_chars* >parse_op_symbol |
491
+ operators ignore* >parse_op
492
+ ) ^(valid_chars)? @exit;
431
493
  }%%
432
494
 
433
495
 
434
496
  const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
435
497
  {
436
- int cs;
437
-
438
- %% write init;
439
- const char* p_save = p;
440
- %% write exec;
441
-
442
- if (cs >= EDN_operator_first_final) {
443
- return p;
444
- }
445
- else if (cs == EDN_operator_error) {
446
- error(__FUNCTION__, "symbol syntax error", *p);
447
- return pe;
448
- }
449
- else if (cs == EDN_operator_en_main) {} // silence ragel warning
450
- return NULL;
498
+ int cs;
499
+
500
+ %% write init;
501
+ const char* p_save = p;
502
+ %% write exec;
503
+
504
+ if (cs >= EDN_operator_first_final) {
505
+ return p;
506
+ }
507
+ else if (cs == EDN_operator_error) {
508
+ error(__FUNCTION__, "symbol syntax error", *p);
509
+ return pe;
510
+ }
511
+ else if (cs == EDN_operator_en_main) {} // silence ragel warning
512
+ return nullptr;
451
513
  }
452
514
 
453
515
 
@@ -456,47 +518,46 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
456
518
  // escaped char parsing - handles \c, \newline, \formfeed, etc.
457
519
  //
458
520
  %%{
459
- machine EDN_escaped_char;
460
- include EDN_common;
521
+ machine EDN_escaped_char;
522
+ include EDN_common;
461
523
 
462
- write data;
524
+ write data;
463
525
 
464
- valid_chars = extend;
526
+ valid_chars = extend;
465
527
 
466
528
 
467
- main := begin_char (
468
- 'space' | 'newline' | 'tab' | 'return' | 'formfeed' | 'backspace' |
469
- valid_chars
470
- ) (ignore* | [\\\]\}\)])? @exit;
529
+ main := begin_char (
530
+ 'space' | 'newline' | 'tab' | 'return' | 'formfeed' | 'backspace' |
531
+ valid_chars
532
+ ) (ignore* | [\\\]\}\)])? @exit;
471
533
  }%%
472
534
 
473
535
 
474
536
  const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
475
537
  {
476
- int cs;
477
-
478
- %% write init;
479
- const char* p_save = p;
480
- %% write exec;
481
-
482
- if (cs >= EDN_escaped_char_first_final) {
483
- // convert the escaped value to a character
484
- if (!edn::util::parse_escaped_char(p_save + 1, p, v)) {
485
- return pe;
486
- }
487
- return p;
488
- }
489
- else if (cs == EDN_escaped_char_error) {
490
- error(__FUNCTION__, "unexpected value", *p);
491
- return pe;
492
- }
493
- else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
494
- return NULL;
538
+ int cs;
539
+
540
+ %% write init;
541
+ const char* p_save = p;
542
+ %% write exec;
543
+
544
+ if (cs >= EDN_escaped_char_first_final) {
545
+ // convert the escaped value to a character
546
+ if (!edn::util::parse_escaped_char(p_save + 1, p, v)) {
547
+ return pe;
548
+ }
549
+ return p;
550
+ }
551
+ else if (cs == EDN_escaped_char_error) {
552
+ error(__FUNCTION__, "unexpected value", *p);
553
+ return pe;
554
+ }
555
+ else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
556
+ return nullptr;
495
557
  }
496
558
 
497
559
 
498
560
 
499
-
500
561
  // ============================================================
501
562
  // symbol parsing - handles identifiers that begin with an alpha
502
563
  // character and an optional leading operator (name, -today,
@@ -504,58 +565,57 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
504
565
  //
505
566
  //
506
567
  %%{
507
- machine EDN_symbol;
508
- include EDN_common;
568
+ machine EDN_symbol;
569
+ include EDN_common;
509
570
 
510
- write data;
571
+ write data;
511
572
 
512
- symbol_ops_1 = [\.\-\+];
513
- symbol_ops_2 = [\*!_\?$%&<>\=\'];
514
- symbol_ops_3 = [:\#];
573
+ symbol_ops_1 = [\.\-\+];
574
+ symbol_ops_2 = [\*!_\?$%&<>\=\'];
575
+ symbol_ops_3 = [:\#];
515
576
 
516
- symbol_start = alpha | symbol_ops_1 | symbol_ops_2;
577
+ symbol_start = alpha | symbol_ops_1 | symbol_ops_2;
517
578
 
518
- symbol_chars = symbol_start | digit | symbol_ops_3;
579
+ symbol_chars = symbol_start | digit | symbol_ops_3;
519
580
 
520
- symbol_name = (
521
- (alpha symbol_chars*) |
522
- (symbol_ops_1 (symbol_start | symbol_ops_3) symbol_chars*) |
523
- (symbol_start symbol_chars+) |
524
- operators{1}
525
- );
526
- symbol = '/' | (symbol_name ('/' symbol_name)?);
581
+ symbol_name = (
582
+ (alpha symbol_chars*) |
583
+ (symbol_ops_1 (symbol_start | symbol_ops_3) symbol_chars*) |
584
+ (symbol_start symbol_chars+) |
585
+ operators{1}
586
+ );
587
+ symbol = '/' | (symbol_name ('/' symbol_name)?);
527
588
 
528
589
 
529
- main := (
530
- symbol
531
- ) ignore* (^(symbol_chars | '/')? @exit);
590
+ main := (
591
+ symbol
592
+ ) ignore* (^(symbol_chars | '/')? @exit);
532
593
  }%%
533
594
 
534
595
 
535
596
  const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
536
597
  {
537
- int cs;
538
-
539
- %% write init;
540
- const char* p_save = p;
541
- %% write exec;
542
-
543
- if (cs >= EDN_symbol_first_final) {
544
- // copy the symbol text
545
- if (s == Qnil)
546
- s = rb_str_new2("");
547
- rb_str_cat(s, p_save, p - p_save);
548
- return p;
549
- }
550
- else if (cs == EDN_symbol_error) {
551
- error(__FUNCTION__, "invalid symbol sequence", *p);
552
- }
553
- else if (cs == EDN_symbol_en_main) {} // silence ragel warning
554
- return NULL;
598
+ int cs;
599
+
600
+ %% write init;
601
+ const char* p_save = p;
602
+ %% write exec;
603
+
604
+ if (cs >= EDN_symbol_first_final) {
605
+ // copy the symbol text
606
+ if (s == Qnil)
607
+ s = rb_str_new2("");
608
+ rb_str_cat(s, p_save, p - p_save);
609
+ return p;
610
+ }
611
+ else if (cs == EDN_symbol_error) {
612
+ error(__FUNCTION__, "invalid symbol sequence", *p);
613
+ }
614
+ else if (cs == EDN_symbol_en_main) {} // silence ragel warning
615
+ return nullptr;
555
616
  }
556
617
 
557
618
 
558
-
559
619
  // ============================================================
560
620
  // EDN_sequence_common is used to parse EDN containers - elements are
561
621
  // initially stored in an array and then the final corresponding
@@ -563,75 +623,75 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
563
623
  // sets the same array is used)
564
624
  //
565
625
  %%{
566
- machine EDN_sequence_common;
567
- include EDN_common;
568
-
569
- action open_seq {
570
- // sequences store elements in an array, then process it to
571
- // convert it to a list, set, or map as needed once the
572
- // sequence end is reached
573
- elems = rb_ary_new();
574
- // additionally, metadata for elements in the sequence may be
575
- // carried so we must push a new level in the metadata stack
576
- new_meta_list();
577
- }
578
-
579
- action close_seq {
580
- // remove the current metadata level
581
- del_top_meta_list();
582
- }
583
-
584
- action parse_item {
585
- // reads an item within a sequence (vector, list, map, or
586
- // set). Regardless of the sequence type, an array of the
587
- // items is built. Once done, the sequence parser will convert
588
- // if needed
589
- VALUE e;
590
- std::size_t meta_sz = meta_size();
591
- const char *np = parse_value(fpc, pe, e);
592
- if (np == NULL) { fhold; fbreak; } else {
593
- // if there's an entry in the discard list, the current
594
- // object is not meant to be kept due to a #_ so don't
595
- // push it into the list of elements
596
- if (!discard.empty()) {
597
- discard.pop_back();
626
+ machine EDN_sequence_common;
627
+ include EDN_common;
628
+
629
+ action open_seq {
630
+ // sequences store elements in an array, then process it to
631
+ // convert it to a list, set, or map as needed once the
632
+ // sequence end is reached
633
+ elems = rb_ary_new();
634
+ // additionally, metadata for elements in the sequence may be
635
+ // carried so we must push a new level in the metadata stack
636
+ new_meta_list();
637
+ }
638
+
639
+ action close_seq {
640
+ // remove the current metadata level
641
+ del_top_meta_list();
642
+ }
643
+
644
+ action parse_item {
645
+ // reads an item within a sequence (vector, list, map, or
646
+ // set). Regardless of the sequence type, an array of the
647
+ // items is built. Once done, the sequence parser will convert
648
+ // if needed
649
+ VALUE e;
650
+ std::size_t meta_sz = meta_size();
651
+ const char *np = parse_value(fpc, pe, e);
652
+ if (np == nullptr) { fhold; fbreak; } else {
653
+ // if there's an entry in the discard list, the current
654
+ // object is not meant to be kept due to a #_ so don't
655
+ // push it into the list of elements
656
+ if (!discard.empty()) {
657
+ discard.pop_back();
658
+ }
659
+ else if (!meta_empty()) {
660
+ // check if parse_value added metadata
661
+ if (meta_size() == meta_sz) {
662
+ // there's metadata and it didn't increase so
663
+ // parse_value() read an element we care
664
+ // about. Bind the metadata to it and add it to
665
+ // the sequence
666
+ e = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, e, ruby_meta());
667
+ rb_ary_push(elems, e);
598
668
  }
599
- else if (!meta_empty()) {
600
- // check if parse_value added metadata
601
- if (meta_size() == meta_sz) {
602
- // there's metadata and it didn't increase so
603
- // parse_value() read an element we care
604
- // about. Bind the metadata to it and add it to
605
- // the sequence
606
- e = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, e, ruby_meta());
607
- rb_ary_push(elems, e);
608
- }
609
- } else {
610
- // no metadata.. just push it
611
- rb_ary_push(elems, e);
612
- }
613
- fexec np;
614
- }
615
- }
616
-
617
- element = begin_value >parse_item;
618
- next_element = ignore* element;
619
- sequence = ((element ignore*) (next_element ignore*)*);
669
+ } else {
670
+ // no metadata.. just push it
671
+ rb_ary_push(elems, e);
672
+ }
673
+ fexec np;
674
+ }
675
+ }
676
+
677
+ element = begin_value >parse_item;
678
+ next_element = ignore* element;
679
+ sequence = ((element ignore*) (next_element ignore*)*);
620
680
  }%%
621
681
 
622
682
  //
623
683
  // vector-specific machine
624
684
  %%{
625
- machine EDN_vector;
626
- include EDN_sequence_common;
685
+ machine EDN_vector;
686
+ include EDN_sequence_common;
627
687
 
628
- end_vector = ']';
688
+ end_vector = ']';
629
689
 
630
- write data;
690
+ write data;
631
691
 
632
- main := begin_vector @open_seq (
633
- ignore* sequence? :>> end_vector @close_seq
634
- ) @err(close_err) @exit;
692
+ main := begin_vector @open_seq (
693
+ ignore* sequence? :>> end_vector @close_seq
694
+ ) @err(close_err) @exit;
635
695
  }%%
636
696
 
637
697
 
@@ -640,24 +700,24 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
640
700
  //
641
701
  const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
642
702
  {
643
- static const char* EDN_TYPE = "vector";
644
-
645
- int cs;
646
- VALUE elems; // will store the vector's elements - allocated in @open_seq
647
-
648
- %% write init;
649
- %% write exec;
650
-
651
- if (cs >= EDN_vector_first_final) {
652
- v = elems;
653
- return p + 1;
654
- }
655
- else if (cs == EDN_vector_error) {
656
- error(__FUNCTION__, "vector format error", *p);
657
- return pe;
658
- }
659
- else if (cs == EDN_vector_en_main) {} // silence ragel warning
660
- return NULL;
703
+ static const char* EDN_TYPE = "vector";
704
+
705
+ int cs;
706
+ VALUE elems; // will store the vector's elements - allocated in @open_seq
707
+
708
+ %% write init;
709
+ %% write exec;
710
+
711
+ if (cs >= EDN_vector_first_final) {
712
+ v = elems;
713
+ return p + 1;
714
+ }
715
+ else if (cs == EDN_vector_error) {
716
+ error(__FUNCTION__, "vector format error", *p);
717
+ return pe;
718
+ }
719
+ else if (cs == EDN_vector_en_main) {} // silence ragel warning
720
+ return nullptr;
661
721
  }
662
722
 
663
723
 
@@ -666,16 +726,16 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
666
726
  // list parsing machine
667
727
  //
668
728
  %%{
669
- machine EDN_list;
670
- include EDN_sequence_common;
729
+ machine EDN_list;
730
+ include EDN_sequence_common;
671
731
 
672
- end_list = ')';
732
+ end_list = ')';
673
733
 
674
- write data;
734
+ write data;
675
735
 
676
- main := begin_list @open_seq (
677
- ignore* sequence? :>> end_list @close_seq
678
- ) @err(close_err) @exit;
736
+ main := begin_list @open_seq (
737
+ ignore* sequence? :>> end_list @close_seq
738
+ ) @err(close_err) @exit;
679
739
  }%%
680
740
 
681
741
  //
@@ -683,26 +743,24 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
683
743
  //
684
744
  const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
685
745
  {
686
- static const char* EDN_TYPE = "list";
687
-
688
- int cs;
689
- VALUE elems; // stores the list's elements - allocated in @open_seq
690
-
691
- %% write init;
692
- %% write exec;
693
-
694
- if (cs >= EDN_list_first_final) {
695
- v = elems;
696
- // TODO: replace with this but first figure out why array is not unrolled by EDN::list()
697
- // v = edn::util::call_module_fn(EDN_MAKE_LIST_METHOD, elems);
698
- return p + 1;
699
- }
700
- else if (cs == EDN_list_error) {
701
- error(__FUNCTION__, *p);
702
- return pe;
703
- }
704
- else if (cs == EDN_list_en_main) {} // silence ragel warning
705
- return NULL;
746
+ static const char* EDN_TYPE = "list";
747
+
748
+ int cs;
749
+ VALUE elems; // stores the list's elements - allocated in @open_seq
750
+
751
+ %% write init;
752
+ %% write exec;
753
+
754
+ if (cs >= EDN_list_first_final) {
755
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_LIST_METHOD, elems);
756
+ return p + 1;
757
+ }
758
+ else if (cs == EDN_list_error) {
759
+ error(__FUNCTION__, *p);
760
+ return pe;
761
+ }
762
+ else if (cs == EDN_list_en_main) {} // silence ragel warning
763
+ return nullptr;
706
764
  }
707
765
 
708
766
 
@@ -711,56 +769,55 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
711
769
  // hash parsing
712
770
  //
713
771
  %%{
714
- machine EDN_map;
715
- include EDN_sequence_common;
772
+ machine EDN_map;
773
+ include EDN_sequence_common;
716
774
 
717
- end_map = '}';
775
+ end_map = '}';
718
776
 
719
- write data;
777
+ write data;
720
778
 
721
779
 
722
- main := begin_map @open_seq (
723
- ignore* (sequence)? :>> end_map @close_seq
724
- ) @err(close_err) @exit;
780
+ main := begin_map @open_seq (
781
+ ignore* (sequence)? :>> end_map @close_seq
782
+ ) @err(close_err) @exit;
725
783
  }%%
726
784
 
727
785
 
728
786
  const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
729
787
  {
730
- static const char* EDN_TYPE = "map";
731
-
732
- int cs;
733
- // since we don't know whether we're looking at a key or value,
734
- // initially store all elements in an array (allocated in @open_seq)
735
- VALUE elems;
736
-
737
- %% write init;
738
- %% write exec;
739
-
740
- if (cs >= EDN_map_first_final) {
741
-
742
- // hash parsing is done. Make sure we have an even count
743
- if ((RARRAY_LEN(elems) % 2) != 0) {
744
- error(__FUNCTION__, "odd number of elements in map");
745
- return pe;
746
- }
747
-
748
- // now convert the sequence to a hash
749
- VALUE rslt = rb_hash_new();
750
- while (RARRAY_LEN(elems) > 0)
751
- {
752
- VALUE k = rb_ary_shift(elems);
753
- rb_hash_aset(rslt, k, rb_ary_shift(elems));
754
- }
755
-
756
- v = rslt;
757
- return p + 1;
758
- }
759
- else if (cs == EDN_map_error) {
760
- return pe;
761
- }
762
- else if (cs == EDN_map_en_main) {} // silence ragel warning
763
- return NULL;
788
+ static const char* EDN_TYPE = "map";
789
+
790
+ int cs;
791
+ // since we don't know whether we're looking at a key or value,
792
+ // initially store all elements in an array (allocated in @open_seq)
793
+ VALUE elems;
794
+
795
+ %% write init;
796
+ %% write exec;
797
+
798
+ if (cs >= EDN_map_first_final) {
799
+ // hash parsing is done. Make sure we have an even count
800
+ if ((RARRAY_LEN(elems) % 2) != 0) {
801
+ error(__FUNCTION__, "odd number of elements in map");
802
+ return pe;
803
+ }
804
+
805
+ // now convert the sequence to a hash
806
+ VALUE rslt = rb_hash_new();
807
+ while (RARRAY_LEN(elems) > 0)
808
+ {
809
+ VALUE k = rb_ary_shift(elems);
810
+ rb_hash_aset(rslt, k, rb_ary_shift(elems));
811
+ }
812
+
813
+ v = rslt;
814
+ return p + 1;
815
+ }
816
+ else if (cs == EDN_map_error) {
817
+ return pe;
818
+ }
819
+ else if (cs == EDN_map_en_main) {} // silence ragel warning
820
+ return nullptr;
764
821
  }
765
822
 
766
823
 
@@ -771,55 +828,73 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
771
828
  // the remaining data to the correct parser
772
829
  //
773
830
  %%{
774
- machine EDN_dispatch;
775
- include EDN_common;
776
-
777
- write data;
778
-
779
- action parse_disp_set {
780
- // #{ }
781
- const char *np = parse_set(fpc, pe, v);
782
- if (np == NULL) { fhold; fbreak; } else fexec np;
783
- }
784
-
785
- action parse_disp_discard {
786
- // discard token #_
787
- const char *np = parse_discard(fpc, pe);
788
- if (np == NULL) { fhold; fbreak; } else fexec np;
789
- }
790
-
791
- action parse_disp_tagged {
792
- // #inst, #uuid, or #user/tag
793
- const char *np = parse_tagged(fpc, pe, v);
794
- if (np == NULL) { fhold; fbreak; } else fexec np;
795
- }
796
-
797
-
798
- main := (
799
- ('{' >parse_disp_set |
800
- '_' >parse_disp_discard |
801
- alpha >parse_disp_tagged)
802
- ) @exit;
831
+ machine EDN_dispatch;
832
+ include EDN_common;
833
+
834
+ write data;
835
+
836
+ action parse_disp_set {
837
+ // #{ }
838
+ const char *np = parse_set(fpc, pe, v);
839
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
840
+ }
841
+
842
+ action parse_disp_discard {
843
+ // discard token #_
844
+ const char *np = parse_discard(fpc, pe);
845
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
846
+ }
847
+
848
+ action parse_disp_tagged {
849
+ // #inst, #uuid, or #user/tag
850
+ const char *np = parse_tagged(fpc, pe, v);
851
+ if (np == nullptr) { fhold; fbreak; } else fexec np;
852
+ }
853
+
854
+ action parse_disp_symbol {
855
+ // ##Inf, ##NaN, etc.
856
+ VALUE sym = Qnil;
857
+ const char *np = parse_symbol(fpc+1, pe, sym);
858
+ if (np == nullptr) { fhold; fbreak; } else {
859
+ if (std::strcmp(RSTRING_PTR(sym), "NaN") == 0) {
860
+ v = RUBY_NAN_CONST;
861
+ }
862
+ else if (std::strcmp(RSTRING_PTR(sym), "Inf") == 0) {
863
+ v = RUBY_INF_CONST;
864
+ }
865
+ else {
866
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SYMBOL_METHOD, sym);
867
+ }
868
+
869
+ fexec np;
870
+ }
871
+ }
872
+
873
+ main := (
874
+ ('{' >parse_disp_set |
875
+ '_' >parse_disp_discard |
876
+ '#' >parse_disp_symbol |
877
+ alpha >parse_disp_tagged)
878
+ ) @exit;
803
879
  }%%
804
880
 
805
881
 
806
882
  const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
807
883
  {
808
- int cs;
809
-
810
- %% write init;
811
- %% write exec;
812
-
813
- if (cs >= EDN_dispatch_first_final) {
814
- return p + 1;
815
- }
816
- else if (cs == EDN_dispatch_error) {
817
- error(__FUNCTION__, "dispatch extend error", *p);
818
- return pe;
819
- }
820
- else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
821
-
822
- return NULL;
884
+ int cs;
885
+
886
+ %% write init;
887
+ %% write exec;
888
+
889
+ if (cs >= EDN_dispatch_first_final) {
890
+ return p + 1;
891
+ }
892
+ else if (cs == EDN_dispatch_error) {
893
+ error(__FUNCTION__, "dispatch extend error", *p);
894
+ return pe;
895
+ }
896
+ else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
897
+ return nullptr;
823
898
  }
824
899
 
825
900
 
@@ -827,17 +902,17 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
827
902
  // set parsing machine
828
903
  //
829
904
  %%{
830
- machine EDN_set;
831
- include EDN_sequence_common;
905
+ machine EDN_set;
906
+ include EDN_sequence_common;
832
907
 
833
- write data;
908
+ write data;
834
909
 
835
- begin_set = '{';
836
- end_set = '}';
910
+ begin_set = '{';
911
+ end_set = '}';
837
912
 
838
- main := begin_set @open_seq (
839
- ignore* sequence? :>> end_set @close_seq
840
- ) @err(close_err) @exit;
913
+ main := begin_set @open_seq (
914
+ ignore* sequence? :>> end_set @close_seq
915
+ ) @err(close_err) @exit;
841
916
  }%%
842
917
 
843
918
  //
@@ -845,25 +920,25 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
845
920
  //
846
921
  const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
847
922
  {
848
- static const char* EDN_TYPE = "set";
849
-
850
- int cs;
851
- VALUE elems; // holds the set's elements as an array allocated in @open_seq
852
-
853
- %% write init;
854
- %% write exec;
855
-
856
- if (cs >= EDN_set_first_final) {
857
- // all elements collected; now convert to a set
858
- v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SET_METHOD, elems);
859
- return p + 1;
860
- }
861
- else if (cs == EDN_set_error) {
862
- error(__FUNCTION__, *p);
863
- return pe;
864
- }
865
- else if (cs == EDN_set_en_main) {} // silence ragel warning
866
- return NULL;
923
+ static const char* EDN_TYPE = "set";
924
+
925
+ int cs;
926
+ VALUE elems; // holds the set's elements as an array allocated in @open_seq
927
+
928
+ %% write init;
929
+ %% write exec;
930
+
931
+ if (cs >= EDN_set_first_final) {
932
+ // all elements collected; now convert to a set
933
+ v = edn::util::call_module_fn(rb_mEDN, EDN_MAKE_SET_METHOD, elems);
934
+ return p + 1;
935
+ }
936
+ else if (cs == EDN_set_error) {
937
+ error(__FUNCTION__, *p);
938
+ return pe;
939
+ }
940
+ else if (cs == EDN_set_en_main) {} // silence ragel warning
941
+ return nullptr;
867
942
  }
868
943
 
869
944
 
@@ -874,55 +949,54 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
874
949
  // defining a machine to consume items within container delimiters
875
950
  //
876
951
  %%{
877
- machine EDN_discard;
878
- include EDN_common;
879
-
880
- write data;
881
-
882
- begin_discard = '_';
883
-
884
- action discard_value {
885
- const char *np = parse_value(fpc, pe, v);
886
- if (np == NULL) { fhold; fbreak; } else {
887
- // this token is to be discarded so store it in the
888
- // discard stack - we really don't need to save it so this
889
- // could be simplified
890
- discard.push_back(v);
891
- fexec np;
892
- }
893
- }
894
-
895
- action discard_err {
896
- std::stringstream s;
897
- s << "discard sequence without element to discard";
898
- error(__FUNCTION__, s.str());
899
- fhold; fbreak;
900
- }
901
-
902
- main := begin_discard ignore* (
903
- begin_value >discard_value
904
- ) @err(discard_err) @exit;
952
+ machine EDN_discard;
953
+ include EDN_common;
954
+
955
+ write data;
956
+
957
+ begin_discard = '_';
958
+
959
+ action discard_value {
960
+ const char *np = parse_value(fpc, pe, v);
961
+ if (np == nullptr) { fhold; fbreak; } else {
962
+ // this token is to be discarded so store it in the
963
+ // discard stack - we really don't need to save it so this
964
+ // could be simplified
965
+ discard.push_back(v);
966
+ fexec np;
967
+ }
968
+ }
969
+
970
+ action discard_err {
971
+ std::stringstream s;
972
+ s << "discard sequence without element to discard";
973
+ error(__FUNCTION__, s.str());
974
+ fhold; fbreak;
975
+ }
976
+
977
+ main := begin_discard ignore* (
978
+ begin_value >discard_value
979
+ ) @err(discard_err) @exit;
905
980
  }%%
906
981
 
907
982
 
908
983
  const char* edn::Parser::parse_discard(const char *p, const char *pe)
909
984
  {
910
- int cs;
911
- VALUE v;
912
-
913
- %% write init;
914
- %% write exec;
915
-
916
- if (cs >= EDN_discard_first_final) {
917
- return p + 1;
918
- }
919
- else if (cs == EDN_discard_error) {
920
- error(__FUNCTION__, *p);
921
- return pe;
922
- }
923
- else if (cs == EDN_discard_en_main) {} // silence ragel warning
924
-
925
- return NULL;
985
+ int cs;
986
+ VALUE v;
987
+
988
+ %% write init;
989
+ %% write exec;
990
+
991
+ if (cs >= EDN_discard_first_final) {
992
+ return p + 1;
993
+ }
994
+ else if (cs == EDN_discard_error) {
995
+ error(__FUNCTION__, *p);
996
+ return pe;
997
+ }
998
+ else if (cs == EDN_discard_en_main) {} // silence ragel warning
999
+ return nullptr;
926
1000
  }
927
1001
 
928
1002
 
@@ -942,130 +1016,127 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
942
1016
  // 2. add parse checks for uuid and inst for better error reporting
943
1017
  //
944
1018
  %%{
945
- machine EDN_tagged;
946
- include EDN_common;
1019
+ machine EDN_tagged;
1020
+ include EDN_common;
947
1021
 
948
- write data;
1022
+ write data;
949
1023
 
950
- tag_symbol_chars_start = alpha;
951
- tag_symbol_chars_non_numeric = tag_symbol_chars_start | [\.\*!_\?$%&<>\=+\-\'\:\#];
952
- tag_symbol_chars = tag_symbol_chars_non_numeric | digit;
1024
+ tag_symbol_chars_start = alpha;
1025
+ tag_symbol_chars_non_numeric = tag_symbol_chars_start | [\.\*!_\?$%&<>\=+\-\'\:\#];
1026
+ tag_symbol_chars = tag_symbol_chars_non_numeric | digit;
953
1027
 
954
- tag_symbol_namespace = tag_symbol_chars_start (tag_symbol_chars)*;
955
- tag_symbol_name = tag_symbol_chars_non_numeric (tag_symbol_chars)*;
1028
+ tag_symbol_namespace = tag_symbol_chars_start (tag_symbol_chars)*;
1029
+ tag_symbol_name = tag_symbol_chars_non_numeric (tag_symbol_chars)*;
956
1030
 
957
- tag_symbol = (tag_symbol_namespace ('/' tag_symbol_name)?);
1031
+ tag_symbol = (tag_symbol_namespace ('/' tag_symbol_name)?);
958
1032
 
959
1033
  # inst = (string_delim [0-9+\-:\.TZ]* string_delim);
960
1034
  # uuid = (string_delim [a-f0-9\-]* string_delim);
961
1035
 
962
- action parse_tag {
963
- // parses the symbol portion of the pair
964
- const char *np = parse_symbol(fpc, pe, sym_name);
965
- if (np == NULL) { fhold; fbreak; } else {
966
- sym_ok = true;
967
- fexec np;
968
- }
969
- }
970
- action parse_data {
971
- // parses the value portion
972
- const char *np = parse_value(fpc, pe, data);
973
- if (np == NULL) { fhold; fbreak; } else {
974
- data_ok = true;
975
- fexec np;
976
- }
977
- }
978
-
979
- main := (
980
- tag_symbol >parse_tag ignore+
981
- begin_value >parse_data
982
- ) @exit;
1036
+ action parse_tag {
1037
+ // parses the symbol portion of the pair
1038
+ const char *np = parse_symbol(fpc, pe, sym_name);
1039
+ if (np == nullptr) { fhold; fbreak; } else {
1040
+ sym_ok = true;
1041
+ fexec np;
1042
+ }
1043
+ }
1044
+ action parse_data {
1045
+ // parses the value portion
1046
+ const char *np = parse_value(fpc, pe, data);
1047
+ if (np == nullptr) { fhold; fbreak; } else {
1048
+ data_ok = true;
1049
+ fexec np;
1050
+ }
1051
+ }
1052
+
1053
+ main := (
1054
+ tag_symbol >parse_tag ignore+
1055
+ begin_value >parse_data
1056
+ ) @exit;
983
1057
  }%%
984
1058
 
985
1059
 
986
1060
  const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
987
1061
  {
988
- VALUE sym_name = Qnil;
989
- VALUE data = Qnil;
990
- bool sym_ok = false;
991
- bool data_ok = false;
1062
+ VALUE sym_name = Qnil;
1063
+ VALUE data = Qnil;
1064
+ bool sym_ok = false;
1065
+ bool data_ok = false;
992
1066
 
993
- int cs;
1067
+ int cs;
994
1068
 
995
- %% write init;
996
- %% write exec;
1069
+ %% write init;
1070
+ %% write exec;
997
1071
 
998
- if (cs >= EDN_tagged_first_final) {
1072
+ if (cs >= EDN_tagged_first_final) {
999
1073
  //std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << data << std::endl;
1000
1074
 
1001
- if (!sym_ok || !data_ok) {
1002
- error(__FUNCTION__, "tagged element symbol error", *p);
1003
- v = EDN_EOF_CONST;
1004
- return NULL;
1005
- }
1006
-
1007
- try {
1008
- // tagged_element makes a call to ruby which may throw an
1009
- // exception when parsing the data
1010
- v = edn::util::call_module_fn(rb_mEDN, EDN_TAGGED_ELEM_METHOD, sym_name, data);
1011
- return p + 1;
1012
- } catch (std::exception& e) {
1013
- error(__FUNCTION__, e.what());
1014
- return pe;
1015
- }
1016
- }
1017
- else if (cs == EDN_tagged_error) {
1018
- error(__FUNCTION__, "tagged element symbol error", *p);
1019
- }
1020
- else if (cs == EDN_tagged_en_main) {} // silence ragel warning
1021
- v = EDN_EOF_CONST;
1022
- return NULL;
1075
+ if (!sym_ok || !data_ok) {
1076
+ error(__FUNCTION__, "tagged element symbol error", *p);
1077
+ v = EDN_EOF_CONST;
1078
+ return nullptr;
1079
+ }
1080
+
1081
+ try {
1082
+ // tagged_element makes a call to ruby which may throw an
1083
+ // exception when parsing the data
1084
+ v = edn::util::call_module_fn(rb_mEDN, EDN_TAGGED_ELEM_METHOD, sym_name, data);
1085
+ return p + 1;
1086
+ } catch (std::exception& e) {
1087
+ error(__FUNCTION__, e.what());
1088
+ return pe;
1089
+ }
1090
+ }
1091
+ else if (cs == EDN_tagged_error) {
1092
+ error(__FUNCTION__, "tagged element symbol error", *p);
1093
+ }
1094
+ else if (cs == EDN_tagged_en_main) {} // silence ragel warning
1095
+ v = EDN_EOF_CONST;
1096
+ return nullptr;
1023
1097
  }
1024
1098
 
1025
1099
 
1026
-
1027
-
1028
1100
  // ============================================================
1029
1101
  // metadata - looks like ruby just discards this but we'll track it
1030
1102
  // and provide a means to retrive after each parse op - might be
1031
1103
  // useful?
1032
1104
  //
1033
1105
  %%{
1034
- machine EDN_meta;
1035
- include EDN_common;
1106
+ machine EDN_meta;
1107
+ include EDN_common;
1036
1108
 
1037
- write data;
1109
+ write data;
1038
1110
 
1039
- action parse_data {
1040
- const char *np = parse_value(fpc, pe, v);
1041
- if (np == NULL) { fhold; fbreak; } else { fexec np; }
1042
- }
1111
+ action parse_data {
1112
+ const char *np = parse_value(fpc, pe, v);
1113
+ if (np == nullptr) { fhold; fbreak; } else { fexec np; }
1114
+ }
1043
1115
 
1044
- main := begin_meta (
1045
- begin_value >parse_data
1046
- ) @exit;
1116
+ main := begin_meta (
1117
+ begin_value >parse_data
1118
+ ) @exit;
1047
1119
  }%%
1048
1120
 
1049
1121
 
1050
1122
  const char* edn::Parser::parse_meta(const char *p, const char *pe)
1051
1123
  {
1052
- int cs;
1053
- VALUE v;
1054
-
1055
- %% write init;
1056
- %% write exec;
1057
-
1058
- if (cs >= EDN_meta_first_final) {
1059
- append_to_meta(v);
1060
- return p + 1;
1061
- }
1062
- else if (cs == EDN_meta_error) {
1063
- error(__FUNCTION__, *p);
1064
- return pe;
1065
- }
1066
- else if (cs == EDN_meta_en_main) {} // silence ragel warning
1067
-
1068
- return NULL;
1124
+ int cs;
1125
+ VALUE v;
1126
+
1127
+ %% write init;
1128
+ %% write exec;
1129
+
1130
+ if (cs >= EDN_meta_first_final) {
1131
+ append_to_meta(v);
1132
+ return p + 1;
1133
+ }
1134
+ else if (cs == EDN_meta_error) {
1135
+ error(__FUNCTION__, *p);
1136
+ return pe;
1137
+ }
1138
+ else if (cs == EDN_meta_en_main) {} // silence ragel warning
1139
+ return nullptr;
1069
1140
  }
1070
1141
 
1071
1142
 
@@ -1075,55 +1146,55 @@ const char* edn::Parser::parse_meta(const char *p, const char *pe)
1075
1146
  // top-level, therefore, does not tokenize source stream
1076
1147
  //
1077
1148
  %%{
1078
- machine EDN_parser;
1079
- include EDN_common;
1080
-
1081
- write data;
1082
-
1083
- action parse_elem {
1084
- // save the count of metadata items before we parse this value
1085
- // so we can determine if we've read another metadata value or
1086
- // an actual data item
1087
- std::size_t meta_sz = meta_size();
1088
- const char* np = parse_value(fpc, pe, result);
1089
- if (np == NULL) { fexec pe; fbreak; } else {
1090
- // if we have metadata saved and it matches the count we
1091
- // saved before we parsed a value, then we must bind the
1092
- // metadata sequence to it
1093
- if (!meta_empty() && meta_size() == meta_sz) {
1094
- // this will empty the metadata sequence too
1095
- result = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, result, ruby_meta());
1096
- }
1097
- fexec np;
1098
- }
1099
- }
1100
-
1101
- element = begin_value >parse_elem;
1102
- next_element = ignore* element;
1103
- sequence = ((element ignore*) (next_element ignore*)*);
1104
-
1105
- main := ignore* sequence? ignore*;
1149
+ machine EDN_parser;
1150
+ include EDN_common;
1151
+
1152
+ write data;
1153
+
1154
+ action parse_elem {
1155
+ // save the count of metadata items before we parse this value
1156
+ // so we can determine if we've read another metadata value or
1157
+ // an actual data item
1158
+ std::size_t meta_sz = meta_size();
1159
+ const char* np = parse_value(fpc, pe, result);
1160
+ if (np == nullptr) { fexec pe; fbreak; } else {
1161
+ // if we have metadata saved and it matches the count we
1162
+ // saved before we parsed a value, then we must bind the
1163
+ // metadata sequence to it
1164
+ if (!meta_empty() && meta_size() == meta_sz) {
1165
+ // this will empty the metadata sequence too
1166
+ result = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, result, ruby_meta());
1167
+ }
1168
+ fexec np;
1169
+ }
1170
+ }
1171
+
1172
+ element = begin_value >parse_elem;
1173
+ next_element = ignore* element;
1174
+ sequence = ((element ignore*) (next_element ignore*)*);
1175
+
1176
+ main := ignore* sequence? ignore*;
1106
1177
  }%%
1107
1178
 
1108
1179
 
1109
1180
  VALUE edn::Parser::parse(const char* src, std::size_t len)
1110
1181
  {
1111
- int cs;
1112
- VALUE result = EDN_EOF_CONST;
1113
-
1114
- %% write init;
1115
- set_source(src, len);
1116
- %% write exec;
1117
-
1118
- if (cs == EDN_parser_error) {
1119
- error(__FUNCTION__, *p);
1120
- return EDN_EOF_CONST;
1121
- }
1122
- else if (cs == EDN_parser_first_final) {
1123
- p = pe = eof = NULL;
1124
- }
1125
- else if (cs == EDN_parser_en_main) {} // silence ragel warning
1126
- return result;
1182
+ int cs;
1183
+ VALUE result = EDN_EOF_CONST;
1184
+
1185
+ %% write init;
1186
+ set_source(src, len);
1187
+ %% write exec;
1188
+
1189
+ if (cs == EDN_parser_error) {
1190
+ error(__FUNCTION__, *p);
1191
+ return EDN_EOF_CONST;
1192
+ }
1193
+ else if (cs == EDN_parser_first_final) {
1194
+ p = pe = eof = nullptr;
1195
+ }
1196
+ else if (cs == EDN_parser_en_main) {} // silence ragel warning
1197
+ return result;
1127
1198
  }
1128
1199
 
1129
1200
 
@@ -1131,43 +1202,43 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
1131
1202
  // token-by-token machine
1132
1203
  //
1133
1204
  %%{
1134
- machine EDN_tokens;
1135
- include EDN_common;
1136
-
1137
- write data nofinal noerror;
1138
-
1139
- action parse_token {
1140
- // we won't know if we've parsed a discard or a metadata until
1141
- // after parse_value() is done. Save the current number of
1142
- // elements in the metadata sequence; then we can check if it
1143
- // grew or if the discard sequence grew
1144
- meta_sz = meta_size();
1145
-
1146
- const char* np = parse_value(fpc, pe, value);
1147
- if (np == NULL) { fhold; fbreak; } else {
1148
- if (!meta_empty()) {
1149
- // was an additional metadata entry read? if so, don't
1150
- // return a value
1151
- if (meta_size() > meta_sz) {
1152
- state = TOKEN_IS_META;
1153
- }
1154
- else {
1155
- // a value was read and there's a pending metadata
1156
- // sequence. Bind them.
1157
- value = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
1158
- state = TOKEN_OK;
1159
- }
1160
- } else if (!discard.empty()) {
1161
- // a discard read. Don't return a value
1162
- state = TOKEN_IS_DISCARD;
1163
- } else {
1164
- state = TOKEN_OK;
1205
+ machine EDN_tokens;
1206
+ include EDN_common;
1207
+
1208
+ write data nofinal noerror;
1209
+
1210
+ action parse_token {
1211
+ // we won't know if we've parsed a discard or a metadata until
1212
+ // after parse_value() is done. Save the current number of
1213
+ // elements in the metadata sequence; then we can check if it
1214
+ // grew or if the discard sequence grew
1215
+ meta_sz = meta_size();
1216
+
1217
+ const char* np = parse_value(fpc, pe, value);
1218
+ if (np == nullptr) { fhold; fbreak; } else {
1219
+ if (!meta_empty()) {
1220
+ // was an additional metadata entry read? if so, don't
1221
+ // return a value
1222
+ if (meta_size() > meta_sz) {
1223
+ state = TOKEN_IS_META;
1165
1224
  }
1166
- fexec np;
1167
- }
1168
- }
1169
-
1170
- main := ignore* begin_value >parse_token ignore*;
1225
+ else {
1226
+ // a value was read and there's a pending metadata
1227
+ // sequence. Bind them.
1228
+ value = edn::util::call_module_fn(rb_mEDNT, EDNT_EXTENDED_VALUE_METHOD, value, ruby_meta());
1229
+ state = TOKEN_OK;
1230
+ }
1231
+ } else if (!discard.empty()) {
1232
+ // a discard read. Don't return a value
1233
+ state = TOKEN_IS_DISCARD;
1234
+ } else {
1235
+ state = TOKEN_OK;
1236
+ }
1237
+ fexec np;
1238
+ }
1239
+ }
1240
+
1241
+ main := ignore* begin_value >parse_token ignore*;
1171
1242
  }%%
1172
1243
 
1173
1244
 
@@ -1175,21 +1246,21 @@ VALUE edn::Parser::parse(const char* src, std::size_t len)
1175
1246
  //
1176
1247
  edn::Parser::eTokenState edn::Parser::parse_next(VALUE& value)
1177
1248
  {
1178
- int cs;
1179
- eTokenState state = TOKEN_ERROR;
1180
- // need to track metadada read and bind it to the next value read
1181
- // - but must account for sequences of metadata values
1182
- std::size_t meta_sz;
1249
+ int cs;
1250
+ eTokenState state = TOKEN_ERROR;
1251
+ // need to track metadada read and bind it to the next value read
1252
+ // - but must account for sequences of metadata values
1253
+ std::size_t meta_sz;
1183
1254
 
1184
- // clear any previously saved discards; only track if read during
1185
- // this op
1186
- discard.clear();
1255
+ // clear any previously saved discards; only track if read during
1256
+ // this op
1257
+ discard.clear();
1187
1258
 
1188
- %% write init;
1189
- %% write exec;
1259
+ %% write init;
1260
+ %% write exec;
1190
1261
 
1191
- if (cs == EDN_tokens_en_main) {} // silence ragel warning
1192
- return state;
1262
+ if (cs == EDN_tokens_en_main) {} // silence ragel warning
1263
+ return state;
1193
1264
  }
1194
1265
 
1195
1266
  /*