edn_turbo 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -6
- data/Rakefile +6 -3
- data/ext/edn_turbo/edn_parser.cc +1140 -414
- data/ext/edn_turbo/edn_parser.h +24 -17
- data/ext/edn_turbo/edn_parser.rl +208 -85
- data/ext/edn_turbo/edn_parser_def.cc +61 -14
- data/ext/edn_turbo/main.cc +4 -0
- data/lib/edn_turbo/version.rb +2 -2
- data/lib/edn_turbo.rb +14 -0
- data/test/test_output_diff.rb +8 -8
- metadata +2 -2
data/ext/edn_turbo/edn_parser.h
CHANGED
@@ -20,6 +20,8 @@ namespace edn
|
|
20
20
|
extern VALUE EDNT_MAKE_EDN_SYMBOL;
|
21
21
|
extern VALUE EDNT_MAKE_SET_METHOD;
|
22
22
|
extern VALUE EDNT_TAGGED_ELEM;
|
23
|
+
extern VALUE EDNT_STR_INT_TO_BIGNUM;
|
24
|
+
extern VALUE EDNT_STR_DBL_TO_BIGNUM;
|
23
25
|
|
24
26
|
//
|
25
27
|
// C-extension EDN Parser class representation
|
@@ -34,24 +36,25 @@ namespace edn
|
|
34
36
|
|
35
37
|
Rice::Object parse(const char* s, std::size_t len);
|
36
38
|
|
37
|
-
const char*
|
38
|
-
const char*
|
39
|
-
const char*
|
40
|
-
const char*
|
41
|
-
const char*
|
42
|
-
const char*
|
43
|
-
const char*
|
44
|
-
const char*
|
45
|
-
const char*
|
46
|
-
const char*
|
47
|
-
const char*
|
39
|
+
const char* parse_value (const char *p, const char *pe, Rice::Object& o);
|
40
|
+
const char* parse_operator(const char *p, const char *pe, Rice::Object& o);
|
41
|
+
const char* parse_esc_char(const char *p, const char *pe, Rice::Object& o);
|
42
|
+
const char* parse_symbol (const char *p, const char *pe, std::string& s);
|
43
|
+
const char* parse_keyword (const char *p, const char *pe, Rice::Object& o);
|
44
|
+
const char* parse_string (const char *p, const char *pe, Rice::Object& o);
|
45
|
+
const char* parse_decimal (const char *p, const char *pe, Rice::Object& o);
|
46
|
+
const char* parse_integer (const char *p, const char *pe, Rice::Object& o);
|
47
|
+
const char* parse_vector (const char *p, const char *pe, Rice::Object& o);
|
48
|
+
const char* parse_list (const char *p, const char *pe, Rice::Object& o);
|
49
|
+
const char* parse_set (const char *p, const char *pe, Rice::Object& o);
|
50
|
+
const char* parse_map (const char *p, const char *pe, Rice::Object& o);
|
51
|
+
const char* parse_tagged (const char *p, const char *pe, Rice::Object& o);
|
52
|
+
const char* parse_discard (const char *p, const char *pe);
|
48
53
|
const char* parse_dispatch(const char *p, const char *pe, Rice::Object& o);
|
49
54
|
|
50
|
-
|
51
|
-
|
52
|
-
static bool
|
53
|
-
static bool parse_escaped_char(char c, Rice::String& s);
|
54
|
-
static bool unicode_to_utf8(const char *s, std::size_t len, std::string& rslt);
|
55
|
+
static bool parse_byte_stream (const char *p, const char *pe, Rice::String& s);
|
56
|
+
static bool parse_escaped_char(const char *p, const char *pe, Rice::Object& s);
|
57
|
+
static bool unicode_to_utf8 (const char *s, std::size_t len, std::string& rslt);
|
55
58
|
|
56
59
|
static Rice::Object make_edn_symbol(const std::string& name);
|
57
60
|
static Rice::Object make_ruby_set(const Rice::Array& elems);
|
@@ -64,13 +67,17 @@ namespace edn
|
|
64
67
|
// utility method to convert a primitive in string form to a
|
65
68
|
// ruby type
|
66
69
|
template <class T>
|
67
|
-
static Rice::Object buftotype(const char* p,
|
70
|
+
static Rice::Object buftotype(const char* p, std::size_t len) {
|
71
|
+
T val;
|
68
72
|
std::string buf;
|
69
73
|
buf.append(p, len);
|
70
74
|
std::istringstream(buf) >> val;
|
71
75
|
return to_ruby<T>(val);
|
72
76
|
}
|
73
77
|
|
78
|
+
static Rice::Object integer_to_ruby(const char* str, std::size_t len);
|
79
|
+
static Rice::Object float_to_ruby(const char* str, std::size_t len);
|
80
|
+
|
74
81
|
public:
|
75
82
|
Parser() : line_number(1), p_save(NULL), eof(NULL) { }
|
76
83
|
|
data/ext/edn_turbo/edn_parser.rl
CHANGED
@@ -26,34 +26,26 @@
|
|
26
26
|
comment = ';' cr_neg* counter;
|
27
27
|
ignore = ws | comment;
|
28
28
|
|
29
|
-
operators = [/\.\*!_
|
30
|
-
|
29
|
+
operators = [/\.\*!_\?$%&<>\=+\-];
|
30
|
+
symbol_start = alpha;
|
31
|
+
symbol_chars = symbol_start | digit | [\#:_\-\.];
|
31
32
|
|
32
|
-
symbol_first_c = symbol_chars - [0-9\#\:]; # non-numeric, no '#' or ':'
|
33
|
-
|
34
|
-
k_nil = 'nil';
|
35
|
-
k_true = 'true';
|
36
|
-
k_false = 'false';
|
37
33
|
begin_dispatch = '#';
|
38
34
|
begin_keyword = ':';
|
39
35
|
begin_char = '\\';
|
40
36
|
begin_value = alnum | [:\"\{\[\(\\\#] | operators;
|
41
|
-
|
42
|
-
begin_symbol = symbol_first_c - ('-'|'+');
|
37
|
+
begin_symbol = symbol_start;
|
43
38
|
begin_vector = '[';
|
44
|
-
end_vector = ']';
|
45
39
|
begin_map = '{';
|
46
40
|
begin_list = '(';
|
47
|
-
end_list = ')';
|
48
41
|
string_delim = '"';
|
49
|
-
begin_number = digit
|
50
|
-
|
51
|
-
symbol_name = [\-\+\.]? symbol_first_c (symbol_chars)*;
|
42
|
+
begin_number = digit;
|
52
43
|
|
53
|
-
|
44
|
+
symbol_name = symbol_start (symbol_chars)*;
|
45
|
+
symbol = (symbol_name ('/' symbol_name)?);
|
54
46
|
|
55
47
|
# int / decimal rules
|
56
|
-
integer =
|
48
|
+
integer = ('0' | [1-9] digit*);
|
57
49
|
exp = ([Ee] [+\-]? digit+);
|
58
50
|
|
59
51
|
|
@@ -61,7 +53,7 @@
|
|
61
53
|
std::stringstream s;
|
62
54
|
s << "unterminated " << EDN_TYPE;
|
63
55
|
error(__FUNCTION__, s.str());
|
64
|
-
|
56
|
+
fhold; fbreak;
|
65
57
|
}
|
66
58
|
}%%
|
67
59
|
|
@@ -75,6 +67,31 @@
|
|
75
67
|
|
76
68
|
write data;
|
77
69
|
|
70
|
+
action parse_dispatch {
|
71
|
+
const char *np = parse_dispatch(fpc + 1, pe, o);
|
72
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
73
|
+
}
|
74
|
+
|
75
|
+
action parse_char {
|
76
|
+
const char *np = parse_esc_char(fpc, pe, o);
|
77
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
78
|
+
}
|
79
|
+
|
80
|
+
action parse_string {
|
81
|
+
const char *np = parse_string(fpc, pe, o);
|
82
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
83
|
+
}
|
84
|
+
|
85
|
+
action parse_keyword {
|
86
|
+
const char *np = parse_keyword(fpc, pe, o);
|
87
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
88
|
+
}
|
89
|
+
|
90
|
+
action parse_operator {
|
91
|
+
const char *np = parse_operator(fpc, pe, o);
|
92
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
93
|
+
}
|
94
|
+
|
78
95
|
action parse_symbol {
|
79
96
|
std::string sym;
|
80
97
|
const char *np = parse_symbol(fpc, pe, sym);
|
@@ -89,26 +106,6 @@
|
|
89
106
|
}
|
90
107
|
}
|
91
108
|
|
92
|
-
action parse_keyword {
|
93
|
-
const char *np = parse_keyword(fpc, pe, o);
|
94
|
-
if (np == NULL) { fhold; fbreak; } else fexec np;
|
95
|
-
}
|
96
|
-
|
97
|
-
action parse_char {
|
98
|
-
Rice::String s;
|
99
|
-
|
100
|
-
if (!parse_escaped_char(*fpc, s)) {
|
101
|
-
fhold; fbreak;
|
102
|
-
} else {
|
103
|
-
o = s;
|
104
|
-
}
|
105
|
-
}
|
106
|
-
|
107
|
-
action parse_string {
|
108
|
-
const char *np = parse_string(fpc, pe, o);
|
109
|
-
if (np == NULL) { fhold; fbreak; } else fexec np;
|
110
|
-
}
|
111
|
-
|
112
109
|
action parse_number {
|
113
110
|
// try to parse a decimal first
|
114
111
|
const char *np = parse_decimal(fpc, pe, o);
|
@@ -143,19 +140,15 @@
|
|
143
140
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
144
141
|
}
|
145
142
|
|
146
|
-
action parse_dispatch {
|
147
|
-
const char *np = parse_dispatch(fpc + 1, pe, o);
|
148
|
-
if (np == NULL) { fhold; fbreak; } else fexec np;
|
149
|
-
}
|
150
|
-
|
151
143
|
action exit { fhold; fbreak; }
|
152
144
|
|
153
145
|
main := (
|
154
146
|
begin_dispatch >parse_dispatch |
|
155
|
-
begin_char
|
147
|
+
begin_char >parse_char |
|
156
148
|
string_delim >parse_string |
|
157
|
-
begin_symbol >parse_symbol |
|
158
149
|
begin_keyword >parse_keyword |
|
150
|
+
operators >parse_operator |
|
151
|
+
begin_symbol >parse_symbol |
|
159
152
|
begin_number >parse_number |
|
160
153
|
begin_vector >parse_vector |
|
161
154
|
begin_list >parse_list |
|
@@ -166,7 +159,7 @@
|
|
166
159
|
|
167
160
|
const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object& o)
|
168
161
|
{
|
169
|
-
//
|
162
|
+
//std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
170
163
|
int cs;
|
171
164
|
|
172
165
|
%% write init;
|
@@ -185,6 +178,133 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
|
|
185
178
|
|
186
179
|
|
187
180
|
|
181
|
+
// ============================================================
|
182
|
+
// operator parsing
|
183
|
+
//
|
184
|
+
%%{
|
185
|
+
machine EDN_operator;
|
186
|
+
include EDN_common;
|
187
|
+
|
188
|
+
write data;
|
189
|
+
|
190
|
+
action parse_symbol {
|
191
|
+
// parse a symbol including the leading operator (-, +, .)
|
192
|
+
std::string sym;
|
193
|
+
const char *np = parse_symbol(p_save, pe, sym);
|
194
|
+
if (np == NULL) { fhold; fbreak; } else {
|
195
|
+
o = Parser::make_edn_symbol(sym);
|
196
|
+
fexec np;
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
action parse_number {
|
201
|
+
// parse a number with the leading symbol - this is slightly
|
202
|
+
// different than the one within EDN_value since it includes
|
203
|
+
// the leading - or +
|
204
|
+
//
|
205
|
+
// try to parse a decimal first
|
206
|
+
const char *np = parse_decimal(p_save, pe, o);
|
207
|
+
if (np == NULL) {
|
208
|
+
// if we can't, try to parse it as an int
|
209
|
+
np = parse_integer(p_save, pe, o);
|
210
|
+
}
|
211
|
+
|
212
|
+
if (np) {
|
213
|
+
fexec np;
|
214
|
+
fhold;
|
215
|
+
fbreak;
|
216
|
+
}
|
217
|
+
else {
|
218
|
+
error(__FUNCTION__, *p);
|
219
|
+
fexec pe;
|
220
|
+
}
|
221
|
+
}
|
222
|
+
|
223
|
+
action parse_operator {
|
224
|
+
// stand-alone operators (-, +, /, ... etc)
|
225
|
+
std::string sym;
|
226
|
+
sym += *(fpc - 1);
|
227
|
+
o = Parser::make_edn_symbol(sym);
|
228
|
+
}
|
229
|
+
|
230
|
+
action exit { fhold; fbreak; }
|
231
|
+
|
232
|
+
main := (
|
233
|
+
('-'|'+'|'.') alpha >parse_symbol |
|
234
|
+
('-'|'+') begin_number >parse_number |
|
235
|
+
operators ignore* >parse_operator
|
236
|
+
) ^(operators|alpha|digit)? @exit;
|
237
|
+
}%%
|
238
|
+
|
239
|
+
|
240
|
+
const char* edn::Parser::parse_operator(const char *p, const char *pe, Rice::Object& o)
|
241
|
+
{
|
242
|
+
// std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
243
|
+
int cs;
|
244
|
+
std::string op;
|
245
|
+
|
246
|
+
%% write init;
|
247
|
+
p_save = p;
|
248
|
+
%% write exec;
|
249
|
+
|
250
|
+
if (cs >= EDN_operator_first_final) {
|
251
|
+
return p;
|
252
|
+
}
|
253
|
+
else if (cs == EDN_operator_error) {
|
254
|
+
error(__FUNCTION__, *p);
|
255
|
+
return pe;
|
256
|
+
}
|
257
|
+
else if (cs == EDN_operator_en_main) {} // silence ragel warning
|
258
|
+
return NULL;
|
259
|
+
}
|
260
|
+
|
261
|
+
|
262
|
+
|
263
|
+
// ============================================================
|
264
|
+
// escaped char parsing
|
265
|
+
//
|
266
|
+
%%{
|
267
|
+
machine EDN_escaped_char;
|
268
|
+
include EDN_common;
|
269
|
+
|
270
|
+
write data;
|
271
|
+
|
272
|
+
valid_chars = alpha;
|
273
|
+
|
274
|
+
action exit { fhold; fbreak; }
|
275
|
+
|
276
|
+
main := (
|
277
|
+
begin_char valid_chars+ ignore*
|
278
|
+
) (^(valid_chars | '\\')? @exit);
|
279
|
+
}%%
|
280
|
+
|
281
|
+
|
282
|
+
const char* edn::Parser::parse_esc_char(const char *p, const char *pe, Rice::Object& o)
|
283
|
+
{
|
284
|
+
//std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
285
|
+
int cs;
|
286
|
+
|
287
|
+
%% write init;
|
288
|
+
p_save = p;
|
289
|
+
%% write exec;
|
290
|
+
|
291
|
+
if (cs >= EDN_escaped_char_first_final) {
|
292
|
+
if (!Parser::parse_escaped_char(p_save + 1, p, o)) {
|
293
|
+
return pe;
|
294
|
+
}
|
295
|
+
return p;
|
296
|
+
}
|
297
|
+
else if (cs == EDN_escaped_char_error) {
|
298
|
+
error(__FUNCTION__, *p);
|
299
|
+
return pe;
|
300
|
+
}
|
301
|
+
else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
|
302
|
+
return NULL;
|
303
|
+
}
|
304
|
+
|
305
|
+
|
306
|
+
|
307
|
+
|
188
308
|
// ============================================================
|
189
309
|
// symbol parsing
|
190
310
|
//
|
@@ -197,14 +317,15 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
|
|
197
317
|
action exit { fhold; fbreak; }
|
198
318
|
|
199
319
|
main := (
|
200
|
-
symbol
|
201
|
-
|
320
|
+
operators? symbol |
|
321
|
+
operators
|
322
|
+
) ignore* (^(symbol_chars | operators)? @exit);
|
202
323
|
}%%
|
203
324
|
|
204
325
|
|
205
326
|
const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string& s)
|
206
327
|
{
|
207
|
-
//std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
328
|
+
// std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
208
329
|
int cs;
|
209
330
|
|
210
331
|
%% write init;
|
@@ -237,8 +358,8 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
|
|
237
358
|
machine EDN_keyword;
|
238
359
|
include EDN_common;
|
239
360
|
|
240
|
-
keyword_chars = symbol_chars;
|
241
|
-
keyword_start =
|
361
|
+
keyword_chars = symbol_chars | operators;
|
362
|
+
keyword_start = symbol_start | [\#\./];
|
242
363
|
|
243
364
|
keyword_name = keyword_start (keyword_chars)*;
|
244
365
|
|
@@ -253,6 +374,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
|
|
253
374
|
const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Object& o)
|
254
375
|
{
|
255
376
|
int cs;
|
377
|
+
// std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
256
378
|
|
257
379
|
%% write init;
|
258
380
|
p_save = p;
|
@@ -285,7 +407,7 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Obje
|
|
285
407
|
write data;
|
286
408
|
|
287
409
|
action parse_string {
|
288
|
-
if (!parse_byte_stream(p_save + 1, p, s)) {
|
410
|
+
if (!Parser::parse_byte_stream(p_save + 1, p, s)) {
|
289
411
|
fhold;
|
290
412
|
fbreak;
|
291
413
|
} else {
|
@@ -299,9 +421,8 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Obje
|
|
299
421
|
(^([\"\\] | 0..0x1f) |
|
300
422
|
'\\'[\"\\/bfnrt] |
|
301
423
|
'\\u'[0-9a-fA-F]{4} |
|
302
|
-
'\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string
|
303
|
-
|
304
|
-
@exit;
|
424
|
+
'\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string
|
425
|
+
) :>> string_delim @err(close_err) @exit;
|
305
426
|
}%%
|
306
427
|
|
307
428
|
|
@@ -339,11 +460,10 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, Rice::Objec
|
|
339
460
|
|
340
461
|
action exit { fhold; fbreak; }
|
341
462
|
|
342
|
-
main := (
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
(^[0-9Ee.+\-M]? @exit );
|
463
|
+
main := ('-'|'+')? (
|
464
|
+
(integer '.' digit* (exp? [M]?)) |
|
465
|
+
(integer exp)
|
466
|
+
) (^[0-9Ee.+\-M]? @exit );
|
347
467
|
}%%
|
348
468
|
|
349
469
|
|
@@ -356,8 +476,7 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Obje
|
|
356
476
|
%% write exec;
|
357
477
|
|
358
478
|
if (cs >= EDN_decimal_first_final) {
|
359
|
-
|
360
|
-
o = Parser::buftotype<double>(p_save, p - p_save, value);
|
479
|
+
o = Parser::float_to_ruby(p_save, p - p_save);
|
361
480
|
return p + 1;
|
362
481
|
}
|
363
482
|
else if (cs == EDN_decimal_en_main) {} // silence ragel warning
|
@@ -376,7 +495,9 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Obje
|
|
376
495
|
|
377
496
|
action exit { fhold; fbreak; }
|
378
497
|
|
379
|
-
main := (
|
498
|
+
main := (
|
499
|
+
('-'|'+')? (integer [MN]?)
|
500
|
+
) (^[0-9MN+\-]? @exit);
|
380
501
|
}%%
|
381
502
|
|
382
503
|
const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Object& o)
|
@@ -388,8 +509,7 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
|
|
388
509
|
%% write exec;
|
389
510
|
|
390
511
|
if (cs >= EDN_integer_first_final) {
|
391
|
-
|
392
|
-
o = Parser::buftotype<int>(p_save, p - p_save, value);
|
512
|
+
o = Parser::integer_to_ruby(p_save, p - p_save);
|
393
513
|
return p + 1;
|
394
514
|
}
|
395
515
|
else if (cs == EDN_integer_en_main) {} // silence ragel warning
|
@@ -439,10 +559,14 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
|
|
439
559
|
machine EDN_vector;
|
440
560
|
include EDN_sequence_common;
|
441
561
|
|
562
|
+
end_vector = ']';
|
563
|
+
|
442
564
|
write data;
|
443
565
|
|
444
|
-
main := begin_vector
|
445
|
-
|
566
|
+
main := begin_vector (
|
567
|
+
ignore* sequence? :>> end_vector
|
568
|
+
)
|
569
|
+
@err(close_err) @exit;
|
446
570
|
}%%
|
447
571
|
|
448
572
|
|
@@ -481,10 +605,13 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, Rice::Objec
|
|
481
605
|
machine EDN_list;
|
482
606
|
include EDN_sequence_common;
|
483
607
|
|
608
|
+
end_list = ')';
|
609
|
+
|
484
610
|
write data;
|
485
611
|
|
486
|
-
main := begin_list
|
487
|
-
|
612
|
+
main := begin_list (
|
613
|
+
ignore* sequence? :>> end_list
|
614
|
+
) @err(close_err) @exit;
|
488
615
|
}%%
|
489
616
|
|
490
617
|
//
|
@@ -526,7 +653,9 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, Rice::Object&
|
|
526
653
|
begin_set = '{';
|
527
654
|
end_set = '}';
|
528
655
|
|
529
|
-
main := begin_set
|
656
|
+
main := begin_set (
|
657
|
+
ignore* sequence? :>> end_set
|
658
|
+
) @err(close_err) @exit;
|
530
659
|
}%%
|
531
660
|
|
532
661
|
//
|
@@ -544,7 +673,7 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
|
|
544
673
|
%% write exec;
|
545
674
|
|
546
675
|
if (cs >= EDN_set_first_final) {
|
547
|
-
o = make_ruby_set(arr);
|
676
|
+
o = Parser::make_ruby_set(arr);
|
548
677
|
return p + 1;
|
549
678
|
}
|
550
679
|
else if (cs == EDN_set_error) {
|
@@ -574,7 +703,9 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
|
|
574
703
|
fexec pe;
|
575
704
|
}
|
576
705
|
|
577
|
-
main := begin_map
|
706
|
+
main := begin_map (
|
707
|
+
ignore* (sequence)? :>> end_map
|
708
|
+
) @err(close_err) @exit;
|
578
709
|
}%%
|
579
710
|
|
580
711
|
|
@@ -620,14 +751,9 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
|
|
620
751
|
machine EDN_tagged;
|
621
752
|
include EDN_common;
|
622
753
|
|
623
|
-
# inst = (string_delim [0-9
|
754
|
+
# inst = (string_delim [0-9+\-:\.TZ]* string_delim);
|
624
755
|
# uuid = (string_delim [a-f0-9\-]* string_delim);
|
625
756
|
|
626
|
-
# tags
|
627
|
-
tagged_symbol = alpha [a-zA-z0-9]*;
|
628
|
-
built_in_tag = tagged_symbol;
|
629
|
-
user_tag = tagged_symbol '/' tagged_symbol;
|
630
|
-
|
631
757
|
write data;
|
632
758
|
|
633
759
|
action parse_symbol {
|
@@ -647,7 +773,7 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
|
|
647
773
|
|
648
774
|
const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Object& o)
|
649
775
|
{
|
650
|
-
//
|
776
|
+
// std::cerr << __FUNCTION__ << " p '" << p << "'" << std::endl;
|
651
777
|
std::string sym_name;
|
652
778
|
Rice::Object object;
|
653
779
|
|
@@ -657,7 +783,7 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
|
|
657
783
|
%% write exec;
|
658
784
|
|
659
785
|
if (cs >= EDN_tagged_first_final) {
|
660
|
-
//
|
786
|
+
//std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << object << std::endl;
|
661
787
|
o = Parser::tagged_element(sym_name, object);
|
662
788
|
return p + 1;
|
663
789
|
}
|
@@ -689,9 +815,9 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
|
|
689
815
|
fhold; fbreak;
|
690
816
|
}
|
691
817
|
|
692
|
-
main :=
|
693
|
-
|
694
|
-
|
818
|
+
main := begin_discard ignore* (
|
819
|
+
begin_value >discard_value
|
820
|
+
) @exit;
|
695
821
|
}%%
|
696
822
|
|
697
823
|
|
@@ -728,19 +854,16 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
|
|
728
854
|
write data;
|
729
855
|
|
730
856
|
action parse_discard {
|
731
|
-
// std::cerr << "--- DISPATCH DISCARD: fpc is '" << fpc << "'" << std::endl;
|
732
857
|
const char *np = parse_discard(fpc, pe);
|
733
858
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
734
859
|
}
|
735
860
|
|
736
861
|
action parse_set {
|
737
|
-
// std::cerr << "--- DISPATCH SET: fpc is '" << fpc << "'" << std::endl;
|
738
862
|
const char *np = parse_set(fpc, pe, o);
|
739
863
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
740
864
|
}
|
741
865
|
|
742
866
|
action parse_tagged {
|
743
|
-
// std::cerr << "--- DISPATCH TAGGED: fpc is '" << fpc << "'" << std::endl;
|
744
867
|
const char *np = parse_tagged(fpc, pe, o);
|
745
868
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
746
869
|
}
|
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <iostream>
|
2
|
+
#include <iomanip>
|
2
3
|
#include <string>
|
4
|
+
#include <limits>
|
3
5
|
|
4
6
|
#include <rice/String.hpp>
|
5
7
|
#include <rice/Array.hpp>
|
@@ -11,6 +13,47 @@
|
|
11
13
|
|
12
14
|
namespace edn
|
13
15
|
{
|
16
|
+
template <typename T>
|
17
|
+
static std::size_t get_max_chars(T)
|
18
|
+
{
|
19
|
+
std::stringstream s;
|
20
|
+
s << std::fixed << std::numeric_limits<T>::max();
|
21
|
+
return s.str().length();
|
22
|
+
}
|
23
|
+
|
24
|
+
static const std::size_t LL_max_chars = get_max_chars<>((long long) 1);
|
25
|
+
static const std::size_t LD_max_chars = get_max_chars<>((long double) 1);
|
26
|
+
|
27
|
+
//
|
28
|
+
// convert to int.. if string rep has more digits than long can
|
29
|
+
// hold, call into ruby to get a big num
|
30
|
+
Rice::Object Parser::integer_to_ruby(const char* str, std::size_t len)
|
31
|
+
{
|
32
|
+
if (len < LL_max_chars)
|
33
|
+
{
|
34
|
+
return buftotype<long>(str, len);
|
35
|
+
}
|
36
|
+
|
37
|
+
// value is outside of range of long type. Use ruby to convert it
|
38
|
+
VALUE rb_s = Rice::protect(rb_str_new2, str);
|
39
|
+
return Rice::protect(rb_funcall, rb_mEDNT, EDNT_STR_INT_TO_BIGNUM, 1, rb_s);
|
40
|
+
}
|
41
|
+
|
42
|
+
//
|
43
|
+
// as above.. TODO: check exponential
|
44
|
+
Rice::Object Parser::float_to_ruby(const char* str, std::size_t len)
|
45
|
+
{
|
46
|
+
if (len < LD_max_chars)
|
47
|
+
{
|
48
|
+
return buftotype<double>(str, len);
|
49
|
+
}
|
50
|
+
|
51
|
+
// value is outside of range of long type. Use ruby to convert it
|
52
|
+
VALUE rb_s = Rice::protect(rb_str_new2, str);
|
53
|
+
return Rice::protect(rb_funcall, rb_mEDNT, EDNT_STR_DBL_TO_BIGNUM, 1, rb_s);
|
54
|
+
}
|
55
|
+
|
56
|
+
|
14
57
|
//
|
15
58
|
// copies the string data, unescaping any present values that need to be replaced
|
16
59
|
//
|
@@ -18,9 +61,8 @@ namespace edn
|
|
18
61
|
{
|
19
62
|
if (p_end > p_start) {
|
20
63
|
std::string buf;
|
21
|
-
std::size_t len = p_end - p_start;
|
22
64
|
|
23
|
-
if (unicode_to_utf8(p_start,
|
65
|
+
if (unicode_to_utf8(p_start, p_end - p_start, buf))
|
24
66
|
{
|
25
67
|
// utf-8 encode
|
26
68
|
VALUE vs = Rice::protect( rb_str_new2, buf.c_str() );
|
@@ -34,22 +76,27 @@ namespace edn
|
|
34
76
|
}
|
35
77
|
|
36
78
|
//
|
37
|
-
// handles things like \c, \
|
79
|
+
// handles things like \c, \newline
|
38
80
|
//
|
39
|
-
bool Parser::parse_escaped_char(char
|
81
|
+
bool Parser::parse_escaped_char(const char *p, const char *pe, Rice::Object& o)
|
40
82
|
{
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
83
|
+
std::string buf;
|
84
|
+
std::size_t len = pe - p;
|
85
|
+
buf.append(p, len);
|
86
|
+
|
87
|
+
if (len > 1) {
|
88
|
+
if (buf == "newline") buf = "\\n";
|
89
|
+
else if (buf == "tab") buf = "\\t";
|
90
|
+
else if (buf == "return") buf = "\\r";
|
91
|
+
else if (buf == "space") buf = " ";
|
92
|
+
else if (buf == "formfeed") buf = "\\f";
|
93
|
+
else if (buf == "backspace") buf = "\\b";
|
94
|
+
// TODO: is this supported?
|
95
|
+
else if (buf == "verticaltab") buf = "\\v";
|
96
|
+
else return false;
|
50
97
|
}
|
51
98
|
|
52
|
-
|
99
|
+
o = Rice::String(buf);
|
53
100
|
return true;
|
54
101
|
}
|
55
102
|
|