edn_turbo 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -6
- data/Rakefile +6 -3
- data/ext/edn_turbo/edn_parser.cc +1140 -414
- data/ext/edn_turbo/edn_parser.h +24 -17
- data/ext/edn_turbo/edn_parser.rl +208 -85
- data/ext/edn_turbo/edn_parser_def.cc +61 -14
- data/ext/edn_turbo/main.cc +4 -0
- data/lib/edn_turbo/version.rb +2 -2
- data/lib/edn_turbo.rb +14 -0
- data/test/test_output_diff.rb +8 -8
- metadata +2 -2
data/ext/edn_turbo/edn_parser.h
CHANGED
@@ -20,6 +20,8 @@ namespace edn
|
|
20
20
|
extern VALUE EDNT_MAKE_EDN_SYMBOL;
|
21
21
|
extern VALUE EDNT_MAKE_SET_METHOD;
|
22
22
|
extern VALUE EDNT_TAGGED_ELEM;
|
23
|
+
extern VALUE EDNT_STR_INT_TO_BIGNUM;
|
24
|
+
extern VALUE EDNT_STR_DBL_TO_BIGNUM;
|
23
25
|
|
24
26
|
//
|
25
27
|
// C-extension EDN Parser class representation
|
@@ -34,24 +36,25 @@ namespace edn
|
|
34
36
|
|
35
37
|
Rice::Object parse(const char* s, std::size_t len);
|
36
38
|
|
37
|
-
const char*
|
38
|
-
const char*
|
39
|
-
const char*
|
40
|
-
const char*
|
41
|
-
const char*
|
42
|
-
const char*
|
43
|
-
const char*
|
44
|
-
const char*
|
45
|
-
const char*
|
46
|
-
const char*
|
47
|
-
const char*
|
39
|
+
const char* parse_value (const char *p, const char *pe, Rice::Object& o);
|
40
|
+
const char* parse_operator(const char *p, const char *pe, Rice::Object& o);
|
41
|
+
const char* parse_esc_char(const char *p, const char *pe, Rice::Object& o);
|
42
|
+
const char* parse_symbol (const char *p, const char *pe, std::string& s);
|
43
|
+
const char* parse_keyword (const char *p, const char *pe, Rice::Object& o);
|
44
|
+
const char* parse_string (const char *p, const char *pe, Rice::Object& o);
|
45
|
+
const char* parse_decimal (const char *p, const char *pe, Rice::Object& o);
|
46
|
+
const char* parse_integer (const char *p, const char *pe, Rice::Object& o);
|
47
|
+
const char* parse_vector (const char *p, const char *pe, Rice::Object& o);
|
48
|
+
const char* parse_list (const char *p, const char *pe, Rice::Object& o);
|
49
|
+
const char* parse_set (const char *p, const char *pe, Rice::Object& o);
|
50
|
+
const char* parse_map (const char *p, const char *pe, Rice::Object& o);
|
51
|
+
const char* parse_tagged (const char *p, const char *pe, Rice::Object& o);
|
52
|
+
const char* parse_discard (const char *p, const char *pe);
|
48
53
|
const char* parse_dispatch(const char *p, const char *pe, Rice::Object& o);
|
49
54
|
|
50
|
-
|
51
|
-
|
52
|
-
static bool
|
53
|
-
static bool parse_escaped_char(char c, Rice::String& s);
|
54
|
-
static bool unicode_to_utf8(const char *s, std::size_t len, std::string& rslt);
|
55
|
+
static bool parse_byte_stream (const char *p, const char *pe, Rice::String& s);
|
56
|
+
static bool parse_escaped_char(const char *p, const char *pe, Rice::Object& s);
|
57
|
+
static bool unicode_to_utf8 (const char *s, std::size_t len, std::string& rslt);
|
55
58
|
|
56
59
|
static Rice::Object make_edn_symbol(const std::string& name);
|
57
60
|
static Rice::Object make_ruby_set(const Rice::Array& elems);
|
@@ -64,13 +67,17 @@ namespace edn
|
|
64
67
|
// utility method to convert a primitive in string form to a
|
65
68
|
// ruby type
|
66
69
|
template <class T>
|
67
|
-
static Rice::Object buftotype(const char* p,
|
70
|
+
static Rice::Object buftotype(const char* p, std::size_t len) {
|
71
|
+
T val;
|
68
72
|
std::string buf;
|
69
73
|
buf.append(p, len);
|
70
74
|
std::istringstream(buf) >> val;
|
71
75
|
return to_ruby<T>(val);
|
72
76
|
}
|
73
77
|
|
78
|
+
static Rice::Object integer_to_ruby(const char* str, std::size_t len);
|
79
|
+
static Rice::Object float_to_ruby(const char* str, std::size_t len);
|
80
|
+
|
74
81
|
public:
|
75
82
|
Parser() : line_number(1), p_save(NULL), eof(NULL) { }
|
76
83
|
|
data/ext/edn_turbo/edn_parser.rl
CHANGED
@@ -26,34 +26,26 @@
|
|
26
26
|
comment = ';' cr_neg* counter;
|
27
27
|
ignore = ws | comment;
|
28
28
|
|
29
|
-
operators = [/\.\*!_
|
30
|
-
|
29
|
+
operators = [/\.\*!_\?$%&<>\=+\-];
|
30
|
+
symbol_start = alpha;
|
31
|
+
symbol_chars = symbol_start | digit | [\#:_\-\.];
|
31
32
|
|
32
|
-
symbol_first_c = symbol_chars - [0-9\#\:]; # non-numeric, no '#' or ':'
|
33
|
-
|
34
|
-
k_nil = 'nil';
|
35
|
-
k_true = 'true';
|
36
|
-
k_false = 'false';
|
37
33
|
begin_dispatch = '#';
|
38
34
|
begin_keyword = ':';
|
39
35
|
begin_char = '\\';
|
40
36
|
begin_value = alnum | [:\"\{\[\(\\\#] | operators;
|
41
|
-
|
42
|
-
begin_symbol = symbol_first_c - ('-'|'+');
|
37
|
+
begin_symbol = symbol_start;
|
43
38
|
begin_vector = '[';
|
44
|
-
end_vector = ']';
|
45
39
|
begin_map = '{';
|
46
40
|
begin_list = '(';
|
47
|
-
end_list = ')';
|
48
41
|
string_delim = '"';
|
49
|
-
begin_number = digit
|
50
|
-
|
51
|
-
symbol_name = [\-\+\.]? symbol_first_c (symbol_chars)*;
|
42
|
+
begin_number = digit;
|
52
43
|
|
53
|
-
|
44
|
+
symbol_name = symbol_start (symbol_chars)*;
|
45
|
+
symbol = (symbol_name ('/' symbol_name)?);
|
54
46
|
|
55
47
|
# int / decimal rules
|
56
|
-
integer =
|
48
|
+
integer = ('0' | [1-9] digit*);
|
57
49
|
exp = ([Ee] [+\-]? digit+);
|
58
50
|
|
59
51
|
|
@@ -61,7 +53,7 @@
|
|
61
53
|
std::stringstream s;
|
62
54
|
s << "unterminated " << EDN_TYPE;
|
63
55
|
error(__FUNCTION__, s.str());
|
64
|
-
|
56
|
+
fhold; fbreak;
|
65
57
|
}
|
66
58
|
}%%
|
67
59
|
|
@@ -75,6 +67,31 @@
|
|
75
67
|
|
76
68
|
write data;
|
77
69
|
|
70
|
+
action parse_dispatch {
|
71
|
+
const char *np = parse_dispatch(fpc + 1, pe, o);
|
72
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
73
|
+
}
|
74
|
+
|
75
|
+
action parse_char {
|
76
|
+
const char *np = parse_esc_char(fpc, pe, o);
|
77
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
78
|
+
}
|
79
|
+
|
80
|
+
action parse_string {
|
81
|
+
const char *np = parse_string(fpc, pe, o);
|
82
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
83
|
+
}
|
84
|
+
|
85
|
+
action parse_keyword {
|
86
|
+
const char *np = parse_keyword(fpc, pe, o);
|
87
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
88
|
+
}
|
89
|
+
|
90
|
+
action parse_operator {
|
91
|
+
const char *np = parse_operator(fpc, pe, o);
|
92
|
+
if (np == NULL) { fhold; fbreak; } else fexec np;
|
93
|
+
}
|
94
|
+
|
78
95
|
action parse_symbol {
|
79
96
|
std::string sym;
|
80
97
|
const char *np = parse_symbol(fpc, pe, sym);
|
@@ -89,26 +106,6 @@
|
|
89
106
|
}
|
90
107
|
}
|
91
108
|
|
92
|
-
action parse_keyword {
|
93
|
-
const char *np = parse_keyword(fpc, pe, o);
|
94
|
-
if (np == NULL) { fhold; fbreak; } else fexec np;
|
95
|
-
}
|
96
|
-
|
97
|
-
action parse_char {
|
98
|
-
Rice::String s;
|
99
|
-
|
100
|
-
if (!parse_escaped_char(*fpc, s)) {
|
101
|
-
fhold; fbreak;
|
102
|
-
} else {
|
103
|
-
o = s;
|
104
|
-
}
|
105
|
-
}
|
106
|
-
|
107
|
-
action parse_string {
|
108
|
-
const char *np = parse_string(fpc, pe, o);
|
109
|
-
if (np == NULL) { fhold; fbreak; } else fexec np;
|
110
|
-
}
|
111
|
-
|
112
109
|
action parse_number {
|
113
110
|
// try to parse a decimal first
|
114
111
|
const char *np = parse_decimal(fpc, pe, o);
|
@@ -143,19 +140,15 @@
|
|
143
140
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
144
141
|
}
|
145
142
|
|
146
|
-
action parse_dispatch {
|
147
|
-
const char *np = parse_dispatch(fpc + 1, pe, o);
|
148
|
-
if (np == NULL) { fhold; fbreak; } else fexec np;
|
149
|
-
}
|
150
|
-
|
151
143
|
action exit { fhold; fbreak; }
|
152
144
|
|
153
145
|
main := (
|
154
146
|
begin_dispatch >parse_dispatch |
|
155
|
-
begin_char
|
147
|
+
begin_char >parse_char |
|
156
148
|
string_delim >parse_string |
|
157
|
-
begin_symbol >parse_symbol |
|
158
149
|
begin_keyword >parse_keyword |
|
150
|
+
operators >parse_operator |
|
151
|
+
begin_symbol >parse_symbol |
|
159
152
|
begin_number >parse_number |
|
160
153
|
begin_vector >parse_vector |
|
161
154
|
begin_list >parse_list |
|
@@ -166,7 +159,7 @@
|
|
166
159
|
|
167
160
|
const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object& o)
|
168
161
|
{
|
169
|
-
//
|
162
|
+
//std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
170
163
|
int cs;
|
171
164
|
|
172
165
|
%% write init;
|
@@ -185,6 +178,133 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
|
|
185
178
|
|
186
179
|
|
187
180
|
|
181
|
+
// ============================================================
|
182
|
+
// operator parsing
|
183
|
+
//
|
184
|
+
%%{
|
185
|
+
machine EDN_operator;
|
186
|
+
include EDN_common;
|
187
|
+
|
188
|
+
write data;
|
189
|
+
|
190
|
+
action parse_symbol {
|
191
|
+
// parse a symbol including the leading operator (-, +, .)
|
192
|
+
std::string sym;
|
193
|
+
const char *np = parse_symbol(p_save, pe, sym);
|
194
|
+
if (np == NULL) { fhold; fbreak; } else {
|
195
|
+
o = Parser::make_edn_symbol(sym);
|
196
|
+
fexec np;
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
action parse_number {
|
201
|
+
// parse a number with the leading symbol - this is slightly
|
202
|
+
// different than the one within EDN_value since it includes
|
203
|
+
// the leading - or +
|
204
|
+
//
|
205
|
+
// try to parse a decimal first
|
206
|
+
const char *np = parse_decimal(p_save, pe, o);
|
207
|
+
if (np == NULL) {
|
208
|
+
// if we can't, try to parse it as an int
|
209
|
+
np = parse_integer(p_save, pe, o);
|
210
|
+
}
|
211
|
+
|
212
|
+
if (np) {
|
213
|
+
fexec np;
|
214
|
+
fhold;
|
215
|
+
fbreak;
|
216
|
+
}
|
217
|
+
else {
|
218
|
+
error(__FUNCTION__, *p);
|
219
|
+
fexec pe;
|
220
|
+
}
|
221
|
+
}
|
222
|
+
|
223
|
+
action parse_operator {
|
224
|
+
// stand-alone operators (-, +, /, ... etc)
|
225
|
+
std::string sym;
|
226
|
+
sym += *(fpc - 1);
|
227
|
+
o = Parser::make_edn_symbol(sym);
|
228
|
+
}
|
229
|
+
|
230
|
+
action exit { fhold; fbreak; }
|
231
|
+
|
232
|
+
main := (
|
233
|
+
('-'|'+'|'.') alpha >parse_symbol |
|
234
|
+
('-'|'+') begin_number >parse_number |
|
235
|
+
operators ignore* >parse_operator
|
236
|
+
) ^(operators|alpha|digit)? @exit;
|
237
|
+
}%%
|
238
|
+
|
239
|
+
|
240
|
+
const char* edn::Parser::parse_operator(const char *p, const char *pe, Rice::Object& o)
|
241
|
+
{
|
242
|
+
// std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
243
|
+
int cs;
|
244
|
+
std::string op;
|
245
|
+
|
246
|
+
%% write init;
|
247
|
+
p_save = p;
|
248
|
+
%% write exec;
|
249
|
+
|
250
|
+
if (cs >= EDN_operator_first_final) {
|
251
|
+
return p;
|
252
|
+
}
|
253
|
+
else if (cs == EDN_operator_error) {
|
254
|
+
error(__FUNCTION__, *p);
|
255
|
+
return pe;
|
256
|
+
}
|
257
|
+
else if (cs == EDN_operator_en_main) {} // silence ragel warning
|
258
|
+
return NULL;
|
259
|
+
}
|
260
|
+
|
261
|
+
|
262
|
+
|
263
|
+
// ============================================================
|
264
|
+
// escaped char parsing
|
265
|
+
//
|
266
|
+
%%{
|
267
|
+
machine EDN_escaped_char;
|
268
|
+
include EDN_common;
|
269
|
+
|
270
|
+
write data;
|
271
|
+
|
272
|
+
valid_chars = alpha;
|
273
|
+
|
274
|
+
action exit { fhold; fbreak; }
|
275
|
+
|
276
|
+
main := (
|
277
|
+
begin_char valid_chars+ ignore*
|
278
|
+
) (^(valid_chars | '\\')? @exit);
|
279
|
+
}%%
|
280
|
+
|
281
|
+
|
282
|
+
const char* edn::Parser::parse_esc_char(const char *p, const char *pe, Rice::Object& o)
|
283
|
+
{
|
284
|
+
//std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
285
|
+
int cs;
|
286
|
+
|
287
|
+
%% write init;
|
288
|
+
p_save = p;
|
289
|
+
%% write exec;
|
290
|
+
|
291
|
+
if (cs >= EDN_escaped_char_first_final) {
|
292
|
+
if (!Parser::parse_escaped_char(p_save + 1, p, o)) {
|
293
|
+
return pe;
|
294
|
+
}
|
295
|
+
return p;
|
296
|
+
}
|
297
|
+
else if (cs == EDN_escaped_char_error) {
|
298
|
+
error(__FUNCTION__, *p);
|
299
|
+
return pe;
|
300
|
+
}
|
301
|
+
else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
|
302
|
+
return NULL;
|
303
|
+
}
|
304
|
+
|
305
|
+
|
306
|
+
|
307
|
+
|
188
308
|
// ============================================================
|
189
309
|
// symbol parsing
|
190
310
|
//
|
@@ -197,14 +317,15 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
|
|
197
317
|
action exit { fhold; fbreak; }
|
198
318
|
|
199
319
|
main := (
|
200
|
-
symbol
|
201
|
-
|
320
|
+
operators? symbol |
|
321
|
+
operators
|
322
|
+
) ignore* (^(symbol_chars | operators)? @exit);
|
202
323
|
}%%
|
203
324
|
|
204
325
|
|
205
326
|
const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string& s)
|
206
327
|
{
|
207
|
-
//std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
328
|
+
// std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
208
329
|
int cs;
|
209
330
|
|
210
331
|
%% write init;
|
@@ -237,8 +358,8 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
|
|
237
358
|
machine EDN_keyword;
|
238
359
|
include EDN_common;
|
239
360
|
|
240
|
-
keyword_chars = symbol_chars;
|
241
|
-
keyword_start =
|
361
|
+
keyword_chars = symbol_chars | operators;
|
362
|
+
keyword_start = symbol_start | [\#\./];
|
242
363
|
|
243
364
|
keyword_name = keyword_start (keyword_chars)*;
|
244
365
|
|
@@ -253,6 +374,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
|
|
253
374
|
const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Object& o)
|
254
375
|
{
|
255
376
|
int cs;
|
377
|
+
// std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
256
378
|
|
257
379
|
%% write init;
|
258
380
|
p_save = p;
|
@@ -285,7 +407,7 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Obje
|
|
285
407
|
write data;
|
286
408
|
|
287
409
|
action parse_string {
|
288
|
-
if (!parse_byte_stream(p_save + 1, p, s)) {
|
410
|
+
if (!Parser::parse_byte_stream(p_save + 1, p, s)) {
|
289
411
|
fhold;
|
290
412
|
fbreak;
|
291
413
|
} else {
|
@@ -299,9 +421,8 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Obje
|
|
299
421
|
(^([\"\\] | 0..0x1f) |
|
300
422
|
'\\'[\"\\/bfnrt] |
|
301
423
|
'\\u'[0-9a-fA-F]{4} |
|
302
|
-
'\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string
|
303
|
-
|
304
|
-
@exit;
|
424
|
+
'\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string
|
425
|
+
) :>> string_delim @err(close_err) @exit;
|
305
426
|
}%%
|
306
427
|
|
307
428
|
|
@@ -339,11 +460,10 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, Rice::Objec
|
|
339
460
|
|
340
461
|
action exit { fhold; fbreak; }
|
341
462
|
|
342
|
-
main := (
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
(^[0-9Ee.+\-M]? @exit );
|
463
|
+
main := ('-'|'+')? (
|
464
|
+
(integer '.' digit* (exp? [M]?)) |
|
465
|
+
(integer exp)
|
466
|
+
) (^[0-9Ee.+\-M]? @exit );
|
347
467
|
}%%
|
348
468
|
|
349
469
|
|
@@ -356,8 +476,7 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Obje
|
|
356
476
|
%% write exec;
|
357
477
|
|
358
478
|
if (cs >= EDN_decimal_first_final) {
|
359
|
-
|
360
|
-
o = Parser::buftotype<double>(p_save, p - p_save, value);
|
479
|
+
o = Parser::float_to_ruby(p_save, p - p_save);
|
361
480
|
return p + 1;
|
362
481
|
}
|
363
482
|
else if (cs == EDN_decimal_en_main) {} // silence ragel warning
|
@@ -376,7 +495,9 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Obje
|
|
376
495
|
|
377
496
|
action exit { fhold; fbreak; }
|
378
497
|
|
379
|
-
main := (
|
498
|
+
main := (
|
499
|
+
('-'|'+')? (integer [MN]?)
|
500
|
+
) (^[0-9MN+\-]? @exit);
|
380
501
|
}%%
|
381
502
|
|
382
503
|
const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Object& o)
|
@@ -388,8 +509,7 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
|
|
388
509
|
%% write exec;
|
389
510
|
|
390
511
|
if (cs >= EDN_integer_first_final) {
|
391
|
-
|
392
|
-
o = Parser::buftotype<int>(p_save, p - p_save, value);
|
512
|
+
o = Parser::integer_to_ruby(p_save, p - p_save);
|
393
513
|
return p + 1;
|
394
514
|
}
|
395
515
|
else if (cs == EDN_integer_en_main) {} // silence ragel warning
|
@@ -439,10 +559,14 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
|
|
439
559
|
machine EDN_vector;
|
440
560
|
include EDN_sequence_common;
|
441
561
|
|
562
|
+
end_vector = ']';
|
563
|
+
|
442
564
|
write data;
|
443
565
|
|
444
|
-
main := begin_vector
|
445
|
-
|
566
|
+
main := begin_vector (
|
567
|
+
ignore* sequence? :>> end_vector
|
568
|
+
)
|
569
|
+
@err(close_err) @exit;
|
446
570
|
}%%
|
447
571
|
|
448
572
|
|
@@ -481,10 +605,13 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, Rice::Objec
|
|
481
605
|
machine EDN_list;
|
482
606
|
include EDN_sequence_common;
|
483
607
|
|
608
|
+
end_list = ')';
|
609
|
+
|
484
610
|
write data;
|
485
611
|
|
486
|
-
main := begin_list
|
487
|
-
|
612
|
+
main := begin_list (
|
613
|
+
ignore* sequence? :>> end_list
|
614
|
+
) @err(close_err) @exit;
|
488
615
|
}%%
|
489
616
|
|
490
617
|
//
|
@@ -526,7 +653,9 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, Rice::Object&
|
|
526
653
|
begin_set = '{';
|
527
654
|
end_set = '}';
|
528
655
|
|
529
|
-
main := begin_set
|
656
|
+
main := begin_set (
|
657
|
+
ignore* sequence? :>> end_set
|
658
|
+
) @err(close_err) @exit;
|
530
659
|
}%%
|
531
660
|
|
532
661
|
//
|
@@ -544,7 +673,7 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
|
|
544
673
|
%% write exec;
|
545
674
|
|
546
675
|
if (cs >= EDN_set_first_final) {
|
547
|
-
o = make_ruby_set(arr);
|
676
|
+
o = Parser::make_ruby_set(arr);
|
548
677
|
return p + 1;
|
549
678
|
}
|
550
679
|
else if (cs == EDN_set_error) {
|
@@ -574,7 +703,9 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
|
|
574
703
|
fexec pe;
|
575
704
|
}
|
576
705
|
|
577
|
-
main := begin_map
|
706
|
+
main := begin_map (
|
707
|
+
ignore* (sequence)? :>> end_map
|
708
|
+
) @err(close_err) @exit;
|
578
709
|
}%%
|
579
710
|
|
580
711
|
|
@@ -620,14 +751,9 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
|
|
620
751
|
machine EDN_tagged;
|
621
752
|
include EDN_common;
|
622
753
|
|
623
|
-
# inst = (string_delim [0-9
|
754
|
+
# inst = (string_delim [0-9+\-:\.TZ]* string_delim);
|
624
755
|
# uuid = (string_delim [a-f0-9\-]* string_delim);
|
625
756
|
|
626
|
-
# tags
|
627
|
-
tagged_symbol = alpha [a-zA-z0-9]*;
|
628
|
-
built_in_tag = tagged_symbol;
|
629
|
-
user_tag = tagged_symbol '/' tagged_symbol;
|
630
|
-
|
631
757
|
write data;
|
632
758
|
|
633
759
|
action parse_symbol {
|
@@ -647,7 +773,7 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
|
|
647
773
|
|
648
774
|
const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Object& o)
|
649
775
|
{
|
650
|
-
//
|
776
|
+
// std::cerr << __FUNCTION__ << " p '" << p << "'" << std::endl;
|
651
777
|
std::string sym_name;
|
652
778
|
Rice::Object object;
|
653
779
|
|
@@ -657,7 +783,7 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
|
|
657
783
|
%% write exec;
|
658
784
|
|
659
785
|
if (cs >= EDN_tagged_first_final) {
|
660
|
-
//
|
786
|
+
//std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << object << std::endl;
|
661
787
|
o = Parser::tagged_element(sym_name, object);
|
662
788
|
return p + 1;
|
663
789
|
}
|
@@ -689,9 +815,9 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
|
|
689
815
|
fhold; fbreak;
|
690
816
|
}
|
691
817
|
|
692
|
-
main :=
|
693
|
-
|
694
|
-
|
818
|
+
main := begin_discard ignore* (
|
819
|
+
begin_value >discard_value
|
820
|
+
) @exit;
|
695
821
|
}%%
|
696
822
|
|
697
823
|
|
@@ -728,19 +854,16 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
|
|
728
854
|
write data;
|
729
855
|
|
730
856
|
action parse_discard {
|
731
|
-
// std::cerr << "--- DISPATCH DISCARD: fpc is '" << fpc << "'" << std::endl;
|
732
857
|
const char *np = parse_discard(fpc, pe);
|
733
858
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
734
859
|
}
|
735
860
|
|
736
861
|
action parse_set {
|
737
|
-
// std::cerr << "--- DISPATCH SET: fpc is '" << fpc << "'" << std::endl;
|
738
862
|
const char *np = parse_set(fpc, pe, o);
|
739
863
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
740
864
|
}
|
741
865
|
|
742
866
|
action parse_tagged {
|
743
|
-
// std::cerr << "--- DISPATCH TAGGED: fpc is '" << fpc << "'" << std::endl;
|
744
867
|
const char *np = parse_tagged(fpc, pe, o);
|
745
868
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
746
869
|
}
|
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <iostream>
|
2
|
+
#include <iomanip>
|
2
3
|
#include <string>
|
4
|
+
#include <limits>
|
3
5
|
|
4
6
|
#include <rice/String.hpp>
|
5
7
|
#include <rice/Array.hpp>
|
@@ -11,6 +13,47 @@
|
|
11
13
|
|
12
14
|
namespace edn
|
13
15
|
{
|
16
|
+
template <typename T>
|
17
|
+
static std::size_t get_max_chars(T)
|
18
|
+
{
|
19
|
+
std::stringstream s;
|
20
|
+
s << std::fixed << std::numeric_limits<T>::max();
|
21
|
+
return s.str().length();
|
22
|
+
}
|
23
|
+
|
24
|
+
static const std::size_t LL_max_chars = get_max_chars<>((long long) 1);
|
25
|
+
static const std::size_t LD_max_chars = get_max_chars<>((long double) 1);
|
26
|
+
|
27
|
+
//
|
28
|
+
// convert to int.. if string rep has more digits than long can
|
29
|
+
// hold, call into ruby to get a big num
|
30
|
+
Rice::Object Parser::integer_to_ruby(const char* str, std::size_t len)
|
31
|
+
{
|
32
|
+
if (len < LL_max_chars)
|
33
|
+
{
|
34
|
+
return buftotype<long>(str, len);
|
35
|
+
}
|
36
|
+
|
37
|
+
// value is outside of range of long type. Use ruby to convert it
|
38
|
+
VALUE rb_s = Rice::protect(rb_str_new2, str);
|
39
|
+
return Rice::protect(rb_funcall, rb_mEDNT, EDNT_STR_INT_TO_BIGNUM, 1, rb_s);
|
40
|
+
}
|
41
|
+
|
42
|
+
//
|
43
|
+
// as above.. TODO: check exponential
|
44
|
+
Rice::Object Parser::float_to_ruby(const char* str, std::size_t len)
|
45
|
+
{
|
46
|
+
if (len < LD_max_chars)
|
47
|
+
{
|
48
|
+
return buftotype<double>(str, len);
|
49
|
+
}
|
50
|
+
|
51
|
+
// value is outside of range of long type. Use ruby to convert it
|
52
|
+
VALUE rb_s = Rice::protect(rb_str_new2, str);
|
53
|
+
return Rice::protect(rb_funcall, rb_mEDNT, EDNT_STR_DBL_TO_BIGNUM, 1, rb_s);
|
54
|
+
}
|
55
|
+
|
56
|
+
|
14
57
|
//
|
15
58
|
// copies the string data, unescaping any present values that need to be replaced
|
16
59
|
//
|
@@ -18,9 +61,8 @@ namespace edn
|
|
18
61
|
{
|
19
62
|
if (p_end > p_start) {
|
20
63
|
std::string buf;
|
21
|
-
std::size_t len = p_end - p_start;
|
22
64
|
|
23
|
-
if (unicode_to_utf8(p_start,
|
65
|
+
if (unicode_to_utf8(p_start, p_end - p_start, buf))
|
24
66
|
{
|
25
67
|
// utf-8 encode
|
26
68
|
VALUE vs = Rice::protect( rb_str_new2, buf.c_str() );
|
@@ -34,22 +76,27 @@ namespace edn
|
|
34
76
|
}
|
35
77
|
|
36
78
|
//
|
37
|
-
// handles things like \c, \
|
79
|
+
// handles things like \c, \newline
|
38
80
|
//
|
39
|
-
bool Parser::parse_escaped_char(char
|
81
|
+
bool Parser::parse_escaped_char(const char *p, const char *pe, Rice::Object& o)
|
40
82
|
{
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
83
|
+
std::string buf;
|
84
|
+
std::size_t len = pe - p;
|
85
|
+
buf.append(p, len);
|
86
|
+
|
87
|
+
if (len > 1) {
|
88
|
+
if (buf == "newline") buf = "\\n";
|
89
|
+
else if (buf == "tab") buf = "\\t";
|
90
|
+
else if (buf == "return") buf = "\\r";
|
91
|
+
else if (buf == "space") buf = " ";
|
92
|
+
else if (buf == "formfeed") buf = "\\f";
|
93
|
+
else if (buf == "backspace") buf = "\\b";
|
94
|
+
// TODO: is this supported?
|
95
|
+
else if (buf == "verticaltab") buf = "\\v";
|
96
|
+
else return false;
|
50
97
|
}
|
51
98
|
|
52
|
-
|
99
|
+
o = Rice::String(buf);
|
53
100
|
return true;
|
54
101
|
}
|
55
102
|
|