edn_turbo 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +54 -9
- data/ext/edn_turbo/edn_parser.cc +493 -370
- data/ext/edn_turbo/edn_parser.h +48 -47
- data/ext/edn_turbo/edn_parser.rl +138 -107
- data/ext/edn_turbo/edn_parser_util.cc +122 -28
- data/ext/edn_turbo/extconf.rb +1 -1
- data/ext/edn_turbo/main.cc +90 -15
- data/lib/edn_turbo.rb +1 -1
- data/lib/edn_turbo/edn_parser.rb +19 -2
- data/lib/edn_turbo/version.rb +2 -2
- data/test/test_output_diff.rb +8 -8
- metadata +2 -16
data/ext/edn_turbo/edn_parser.h
CHANGED
@@ -5,18 +5,12 @@
|
|
5
5
|
#include <sstream>
|
6
6
|
#include <stack>
|
7
7
|
|
8
|
-
#include <
|
9
|
-
#include <rice/Object.hpp>
|
10
|
-
#include <rice/Array.hpp>
|
11
|
-
#include <rice/String.hpp>
|
12
|
-
#include <rice/to_from_ruby.hpp>
|
13
|
-
|
14
|
-
typedef unsigned char ui8;
|
8
|
+
#include <ruby/ruby.h>
|
15
9
|
|
16
10
|
|
17
11
|
namespace edn
|
18
12
|
{
|
19
|
-
extern
|
13
|
+
extern VALUE rb_mEDNT;
|
20
14
|
extern VALUE EDNT_MAKE_EDN_SYMBOL;
|
21
15
|
extern VALUE EDNT_MAKE_SET_METHOD;
|
22
16
|
extern VALUE EDNT_TAGGED_ELEM;
|
@@ -27,65 +21,72 @@ namespace edn
|
|
27
21
|
// C-extension EDN Parser class representation
|
28
22
|
class Parser
|
29
23
|
{
|
30
|
-
|
24
|
+
public:
|
25
|
+
Parser() : p(NULL), pe(NULL), eof(NULL), line_number(1) { }
|
31
26
|
|
32
|
-
std::size_t
|
33
|
-
|
27
|
+
void set_source(const char* src, std::size_t len);
|
28
|
+
|
29
|
+
bool is_eof() const { return (p != NULL && p == pe); }
|
30
|
+
VALUE parse(const char* s, std::size_t len);
|
31
|
+
// VALUE read(const std::string& data) { return parse(data.c_str(), data.length()); }
|
32
|
+
VALUE next();
|
33
|
+
|
34
|
+
static void throw_error(int error);
|
35
|
+
|
36
|
+
private:
|
37
|
+
// ragel needs these
|
38
|
+
const char* p;
|
39
|
+
const char* pe;
|
34
40
|
const char* eof;
|
35
|
-
std::
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
const char*
|
41
|
-
const char*
|
42
|
-
const char*
|
43
|
-
const char*
|
44
|
-
const char*
|
45
|
-
const char*
|
46
|
-
const char*
|
47
|
-
const char*
|
48
|
-
const char*
|
49
|
-
const char*
|
50
|
-
const char*
|
51
|
-
const char*
|
41
|
+
std::size_t line_number;
|
42
|
+
std::stack<VALUE> discard;
|
43
|
+
|
44
|
+
void reset();
|
45
|
+
|
46
|
+
const char* parse_value (const char *p, const char *pe, VALUE& v);
|
47
|
+
const char* parse_string (const char *p, const char *pe, VALUE& v);
|
48
|
+
const char* parse_keyword (const char *p, const char *pe, VALUE& v);
|
49
|
+
const char* parse_decimal (const char *p, const char *pe, VALUE& v);
|
50
|
+
const char* parse_integer (const char *p, const char *pe, VALUE& v);
|
51
|
+
const char* parse_operator(const char *p, const char *pe, VALUE& v);
|
52
|
+
const char* parse_esc_char(const char *p, const char *pe, VALUE& v);
|
53
|
+
const char* parse_symbol (const char *p, const char *pe, VALUE& v);
|
54
|
+
const char* parse_vector (const char *p, const char *pe, VALUE& v);
|
55
|
+
const char* parse_list (const char *p, const char *pe, VALUE& v);
|
56
|
+
const char* parse_map (const char *p, const char *pe, VALUE& v);
|
57
|
+
const char* parse_dispatch(const char *p, const char *pe, VALUE& v);
|
58
|
+
const char* parse_set (const char *p, const char *pe, VALUE& v);
|
52
59
|
const char* parse_discard (const char *p, const char *pe);
|
53
|
-
const char* parse_tagged (const char *p, const char *pe,
|
60
|
+
const char* parse_tagged (const char *p, const char *pe, VALUE& v);
|
54
61
|
|
55
62
|
// defined in edn_parser_unicode.cc
|
56
63
|
static bool to_utf8(const char *s, std::size_t len, std::string& rslt);
|
57
64
|
|
58
65
|
// defined in edn_parser_util.cc
|
59
|
-
static
|
60
|
-
static
|
66
|
+
static VALUE integer_to_ruby(const char* str, std::size_t len);
|
67
|
+
static VALUE float_to_ruby (const char* str, std::size_t len);
|
61
68
|
|
62
|
-
static bool parse_byte_stream (const char *p, const char *pe,
|
63
|
-
static bool parse_escaped_char(const char *p, const char *pe,
|
69
|
+
static bool parse_byte_stream (const char *p, const char *pe, VALUE& rslt, bool encode);
|
70
|
+
static bool parse_escaped_char(const char *p, const char *pe, VALUE& rslt);
|
64
71
|
|
65
|
-
static
|
66
|
-
static
|
67
|
-
static
|
68
|
-
|
69
|
-
void error(const std::string& f, const std::string& err, char c) const;
|
70
|
-
void error(const std::string& f, char err_c) const { error(f, "", err_c); }
|
71
|
-
void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
|
72
|
+
static VALUE make_edn_symbol(VALUE sym);
|
73
|
+
static VALUE make_ruby_set(VALUE elems);
|
74
|
+
static VALUE tagged_element(VALUE name, VALUE data);
|
72
75
|
|
73
76
|
// utility method to convert a primitive in string form to a
|
74
77
|
// ruby type
|
75
78
|
template <class T>
|
76
|
-
static
|
79
|
+
static inline T buftotype(const char* p, std::size_t len) {
|
77
80
|
T val;
|
78
81
|
std::string buf;
|
79
82
|
buf.append(p, len);
|
80
83
|
std::istringstream(buf) >> val;
|
81
|
-
return
|
84
|
+
return val;
|
82
85
|
}
|
83
86
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
Rice::Object process(const std::string& data) { return parse(data.c_str(), data.length()); }
|
88
|
-
|
87
|
+
void error(const std::string& f, const std::string& err, char c) const;
|
88
|
+
void error(const std::string& f, char err_c) const { error(f, "", err_c); }
|
89
|
+
void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
|
89
90
|
}; // Engine
|
90
91
|
|
91
92
|
} // namespace
|
data/ext/edn_turbo/edn_parser.rl
CHANGED
@@ -1,11 +1,7 @@
|
|
1
1
|
#include <iostream>
|
2
2
|
#include <string>
|
3
3
|
#include <stack>
|
4
|
-
|
5
|
-
#include <rice/Hash.hpp>
|
6
|
-
#include <rice/Array.hpp>
|
7
|
-
#include <rice/to_from_ruby.hpp>
|
8
|
-
#include <rice/Exception.hpp>
|
4
|
+
#include <exception>
|
9
5
|
|
10
6
|
#include "edn_parser.h"
|
11
7
|
|
@@ -73,23 +69,23 @@
|
|
73
69
|
|
74
70
|
action parse_string {
|
75
71
|
// string types within double-quotes
|
76
|
-
const char *np = parse_string(fpc, pe,
|
72
|
+
const char *np = parse_string(fpc, pe, v);
|
77
73
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
78
74
|
}
|
79
75
|
|
80
76
|
action parse_keyword {
|
81
77
|
// tokens with a leading ':'
|
82
|
-
const char *np = parse_keyword(fpc, pe,
|
78
|
+
const char *np = parse_keyword(fpc, pe, v);
|
83
79
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
84
80
|
}
|
85
81
|
|
86
82
|
action parse_number {
|
87
83
|
// tokens w/ leading digits: non-negative integers & decimals.
|
88
84
|
// try to parse a decimal first
|
89
|
-
const char *np = parse_decimal(fpc, pe,
|
85
|
+
const char *np = parse_decimal(fpc, pe, v);
|
90
86
|
if (np == NULL) {
|
91
87
|
// if we can't, try to parse it as an int
|
92
|
-
np = parse_integer(fpc, pe,
|
88
|
+
np = parse_integer(fpc, pe, v);
|
93
89
|
}
|
94
90
|
|
95
91
|
if (np) {
|
@@ -105,26 +101,27 @@
|
|
105
101
|
|
106
102
|
action parse_operator {
|
107
103
|
// stand-alone operators *, +, -, etc.
|
108
|
-
const char *np = parse_operator(fpc, pe,
|
104
|
+
const char *np = parse_operator(fpc, pe, v);
|
109
105
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
110
106
|
}
|
111
107
|
|
112
108
|
action parse_char {
|
113
109
|
// tokens w/ leading \ (escaped characters \newline, \c, etc.)
|
114
|
-
const char *np = parse_esc_char(fpc, pe,
|
110
|
+
const char *np = parse_esc_char(fpc, pe, v);
|
115
111
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
116
112
|
}
|
117
113
|
|
118
114
|
action parse_symbol {
|
119
115
|
// user identifiers and reserved keywords (true, false, nil)
|
120
|
-
|
116
|
+
VALUE sym = Qnil;
|
121
117
|
const char *np = parse_symbol(fpc, pe, sym);
|
122
118
|
if (np == NULL) { fhold; fbreak; } else {
|
123
|
-
|
124
|
-
|
125
|
-
else if (sym
|
119
|
+
// parse_symbol will make 'sym' a ruby string
|
120
|
+
if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
|
121
|
+
else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
|
122
|
+
else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
|
126
123
|
else {
|
127
|
-
|
124
|
+
v = Parser::make_edn_symbol(sym);
|
128
125
|
}
|
129
126
|
fexec np;
|
130
127
|
}
|
@@ -132,25 +129,25 @@
|
|
132
129
|
|
133
130
|
action parse_vector {
|
134
131
|
// [
|
135
|
-
const char *np = parse_vector(fpc, pe,
|
132
|
+
const char *np = parse_vector(fpc, pe, v);
|
136
133
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
137
134
|
}
|
138
135
|
|
139
136
|
action parse_list {
|
140
137
|
// (
|
141
|
-
const char *np = parse_list(fpc, pe,
|
138
|
+
const char *np = parse_list(fpc, pe, v);
|
142
139
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
143
140
|
}
|
144
141
|
|
145
142
|
action parse_map {
|
146
143
|
// {
|
147
|
-
const char *np = parse_map(fpc, pe,
|
144
|
+
const char *np = parse_map(fpc, pe, v);
|
148
145
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
149
146
|
}
|
150
147
|
|
151
148
|
action parse_dispatch {
|
152
149
|
// handles tokens w/ leading # ("#_", "#{", and tagged elems)
|
153
|
-
const char *np = parse_dispatch(fpc + 1, pe,
|
150
|
+
const char *np = parse_dispatch(fpc + 1, pe, v);
|
154
151
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
155
152
|
}
|
156
153
|
|
@@ -170,7 +167,7 @@
|
|
170
167
|
}%%
|
171
168
|
|
172
169
|
|
173
|
-
const char *edn::Parser::parse_value(const char *p, const char *pe,
|
170
|
+
const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
174
171
|
{
|
175
172
|
int cs;
|
176
173
|
|
@@ -203,7 +200,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
|
|
203
200
|
write data;
|
204
201
|
|
205
202
|
action parse_string {
|
206
|
-
if (Parser::parse_byte_stream(p_save + 1, p,
|
203
|
+
if (Parser::parse_byte_stream(p_save + 1, p, v, encode)) {
|
207
204
|
fexec p + 1;
|
208
205
|
} else {
|
209
206
|
fhold; fbreak;
|
@@ -224,21 +221,18 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
|
|
224
221
|
}%%
|
225
222
|
|
226
223
|
|
227
|
-
const char* edn::Parser::parse_string(const char *p, const char *pe,
|
224
|
+
const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
|
228
225
|
{
|
229
226
|
// std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
|
230
227
|
static const char* EDN_TYPE = "string";
|
231
228
|
int cs;
|
232
229
|
bool encode = false;
|
233
|
-
const char *eof = pe;
|
234
|
-
Rice::String s;
|
235
230
|
|
236
231
|
%% write init;
|
237
|
-
p_save = p;
|
232
|
+
const char* p_save = p;
|
238
233
|
%% write exec;
|
239
234
|
|
240
235
|
if (cs >= EDN_string_first_final) {
|
241
|
-
o = s;
|
242
236
|
return p + 1;
|
243
237
|
}
|
244
238
|
else if (cs == EDN_string_error) {
|
@@ -269,20 +263,20 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, Rice::Objec
|
|
269
263
|
}%%
|
270
264
|
|
271
265
|
|
272
|
-
const char* edn::Parser::parse_keyword(const char *p, const char *pe,
|
266
|
+
const char* edn::Parser::parse_keyword(const char *p, const char *pe, VALUE& v)
|
273
267
|
{
|
274
268
|
int cs;
|
275
269
|
|
276
270
|
%% write init;
|
277
|
-
p_save = p;
|
271
|
+
const char* p_save = p;
|
278
272
|
%% write exec;
|
279
273
|
|
280
274
|
if (cs >= EDN_keyword_first_final) {
|
281
275
|
std::string buf;
|
282
276
|
uint32_t len = p - p_save;
|
283
|
-
// don't include leading ':' because
|
277
|
+
// don't include leading ':' because the ruby symbol will handle it
|
284
278
|
buf.append(p_save + 1, len - 1);
|
285
|
-
|
279
|
+
v = ID2SYM(rb_intern(buf.c_str()));
|
286
280
|
return p;
|
287
281
|
}
|
288
282
|
else if (cs == EDN_keyword_error) {
|
@@ -312,16 +306,16 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Obje
|
|
312
306
|
}%%
|
313
307
|
|
314
308
|
|
315
|
-
const char* edn::Parser::parse_decimal(const char *p, const char *pe,
|
309
|
+
const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
|
316
310
|
{
|
317
311
|
int cs;
|
318
312
|
|
319
313
|
%% write init;
|
320
|
-
p_save = p;
|
314
|
+
const char* p_save = p;
|
321
315
|
%% write exec;
|
322
316
|
|
323
317
|
if (cs >= EDN_decimal_first_final) {
|
324
|
-
|
318
|
+
v = Parser::float_to_ruby(p_save, p - p_save);
|
325
319
|
return p + 1;
|
326
320
|
}
|
327
321
|
else if (cs == EDN_decimal_en_main) {} // silence ragel warning
|
@@ -344,16 +338,16 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Obje
|
|
344
338
|
) (^[0-9MN+\-]? @exit);
|
345
339
|
}%%
|
346
340
|
|
347
|
-
const char* edn::Parser::parse_integer(const char *p, const char *pe,
|
341
|
+
const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
|
348
342
|
{
|
349
343
|
int cs;
|
350
344
|
|
351
345
|
%% write init;
|
352
|
-
p_save = p;
|
346
|
+
const char* p_save = p;
|
353
347
|
%% write exec;
|
354
348
|
|
355
349
|
if (cs >= EDN_integer_first_final) {
|
356
|
-
|
350
|
+
v = Parser::integer_to_ruby(p_save, p - p_save);
|
357
351
|
return p + 1;
|
358
352
|
}
|
359
353
|
else if (cs == EDN_integer_en_main) {} // silence ragel warning
|
@@ -377,10 +371,10 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
|
|
377
371
|
|
378
372
|
action parse_symbol {
|
379
373
|
// parse a symbol including the leading operator (-, +, .)
|
380
|
-
|
374
|
+
VALUE sym = Qnil;
|
381
375
|
const char *np = parse_symbol(p_save, pe, sym);
|
382
376
|
if (np == NULL) { fhold; fbreak; } else {
|
383
|
-
|
377
|
+
v = Parser::make_edn_symbol(sym);
|
384
378
|
fexec np;
|
385
379
|
}
|
386
380
|
}
|
@@ -391,10 +385,10 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
|
|
391
385
|
// the leading - or +
|
392
386
|
//
|
393
387
|
// try to parse a decimal first
|
394
|
-
const char *np = parse_decimal(p_save, pe,
|
388
|
+
const char *np = parse_decimal(p_save, pe, v);
|
395
389
|
if (np == NULL) {
|
396
390
|
// if we can't, try to parse it as an int
|
397
|
-
np = parse_integer(p_save, pe,
|
391
|
+
np = parse_integer(p_save, pe, v);
|
398
392
|
}
|
399
393
|
|
400
394
|
if (np) {
|
@@ -410,9 +404,9 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
|
|
410
404
|
|
411
405
|
action parse_operator {
|
412
406
|
// stand-alone operators (-, +, /, ... etc)
|
413
|
-
|
414
|
-
sym
|
415
|
-
|
407
|
+
char op[2] = { *p_save, 0 };
|
408
|
+
VALUE sym = rb_str_new2(op);
|
409
|
+
v = Parser::make_edn_symbol(sym);
|
416
410
|
}
|
417
411
|
|
418
412
|
|
@@ -424,12 +418,12 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
|
|
424
418
|
}%%
|
425
419
|
|
426
420
|
|
427
|
-
const char* edn::Parser::parse_operator(const char *p, const char *pe,
|
421
|
+
const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
|
428
422
|
{
|
429
423
|
int cs;
|
430
424
|
|
431
425
|
%% write init;
|
432
|
-
p_save = p;
|
426
|
+
const char* p_save = p;
|
433
427
|
%% write exec;
|
434
428
|
|
435
429
|
if (cs >= EDN_operator_first_final) {
|
@@ -463,17 +457,17 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, Rice::Obj
|
|
463
457
|
}%%
|
464
458
|
|
465
459
|
|
466
|
-
const char* edn::Parser::parse_esc_char(const char *p, const char *pe,
|
460
|
+
const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
|
467
461
|
{
|
468
462
|
int cs;
|
469
463
|
|
470
464
|
%% write init;
|
471
|
-
p_save = p;
|
465
|
+
const char* p_save = p;
|
472
466
|
%% write exec;
|
473
467
|
|
474
468
|
if (cs >= EDN_escaped_char_first_final) {
|
475
469
|
// convert the escaped value to a character
|
476
|
-
if (!Parser::parse_escaped_char(p_save + 1, p,
|
470
|
+
if (!Parser::parse_escaped_char(p_save + 1, p, v)) {
|
477
471
|
return pe;
|
478
472
|
}
|
479
473
|
return p;
|
@@ -494,6 +488,7 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, Rice::Obj
|
|
494
488
|
// character and an optional leading operator (name, -today,
|
495
489
|
// .yesterday)
|
496
490
|
//
|
491
|
+
//
|
497
492
|
%%{
|
498
493
|
machine EDN_symbol;
|
499
494
|
include EDN_common;
|
@@ -507,18 +502,19 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, Rice::Obj
|
|
507
502
|
}%%
|
508
503
|
|
509
504
|
|
510
|
-
const char* edn::Parser::parse_symbol(const char *p, const char *pe,
|
505
|
+
const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
|
511
506
|
{
|
512
507
|
int cs;
|
513
508
|
|
514
509
|
%% write init;
|
515
|
-
p_save = p;
|
510
|
+
const char* p_save = p;
|
516
511
|
%% write exec;
|
517
512
|
|
518
513
|
if (cs >= EDN_symbol_first_final) {
|
519
514
|
// copy the symbol text
|
520
|
-
|
521
|
-
|
515
|
+
if (s == Qnil)
|
516
|
+
s = rb_str_new2("");
|
517
|
+
rb_str_cat(s, p_save, p - p_save);
|
522
518
|
return p;
|
523
519
|
}
|
524
520
|
else if (cs == EDN_symbol_error) {
|
@@ -533,7 +529,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
|
|
533
529
|
|
534
530
|
// ============================================================
|
535
531
|
// EDN_sequence_common is used to parse EDN containers - elements are
|
536
|
-
// initially stored in
|
532
|
+
// initially stored in an array and then the final corresponding
|
537
533
|
// container is built from the list (although, for vectors, lists, and
|
538
534
|
// sets the same array is used)
|
539
535
|
//
|
@@ -546,7 +542,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
|
|
546
542
|
// set). Regardless of the sequence type, an array of the
|
547
543
|
// items is built. Once done, the sequence parser will convert
|
548
544
|
// if needed
|
549
|
-
|
545
|
+
VALUE e;
|
550
546
|
const char *np = parse_value(fpc, pe, e);
|
551
547
|
if (np == NULL) {
|
552
548
|
fhold; fbreak;
|
@@ -560,7 +556,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
|
|
560
556
|
else {
|
561
557
|
// otherwise we add it to the list of elements for the
|
562
558
|
// corresponding container
|
563
|
-
elems
|
559
|
+
rb_ary_push(elems, e);
|
564
560
|
}
|
565
561
|
fexec np;
|
566
562
|
}
|
@@ -591,18 +587,18 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
|
|
591
587
|
//
|
592
588
|
// vector parsing
|
593
589
|
//
|
594
|
-
const char* edn::Parser::parse_vector(const char *p, const char *pe,
|
590
|
+
const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
|
595
591
|
{
|
596
592
|
static const char* EDN_TYPE = "vector";
|
597
593
|
|
598
594
|
int cs;
|
599
|
-
|
595
|
+
VALUE elems = rb_ary_new(); // will store the vector's elements
|
600
596
|
|
601
597
|
%% write init;
|
602
598
|
%% write exec;
|
603
599
|
|
604
600
|
if (cs >= EDN_vector_first_final) {
|
605
|
-
|
601
|
+
v = elems;
|
606
602
|
return p + 1;
|
607
603
|
}
|
608
604
|
else if (cs == EDN_vector_error) {
|
@@ -634,18 +630,18 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, Rice::Objec
|
|
634
630
|
//
|
635
631
|
// list parsing
|
636
632
|
//
|
637
|
-
const char* edn::Parser::parse_list(const char *p, const char *pe,
|
633
|
+
const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
|
638
634
|
{
|
639
635
|
static const char* EDN_TYPE = "list";
|
640
636
|
|
641
637
|
int cs;
|
642
|
-
|
638
|
+
VALUE elems = rb_ary_new();
|
643
639
|
|
644
640
|
%% write init;
|
645
641
|
%% write exec;
|
646
642
|
|
647
643
|
if (cs >= EDN_list_first_final) {
|
648
|
-
|
644
|
+
v = elems;
|
649
645
|
return p + 1;
|
650
646
|
}
|
651
647
|
else if (cs == EDN_list_error) {
|
@@ -676,14 +672,14 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, Rice::Object&
|
|
676
672
|
}%%
|
677
673
|
|
678
674
|
|
679
|
-
const char* edn::Parser::parse_map(const char *p, const char *pe,
|
675
|
+
const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
|
680
676
|
{
|
681
677
|
static const char* EDN_TYPE = "map";
|
682
678
|
|
683
679
|
int cs;
|
684
680
|
// since we don't know whether we're looking at a key or value,
|
685
681
|
// initially store all elements in a list
|
686
|
-
|
682
|
+
VALUE elems = rb_ary_new();
|
687
683
|
|
688
684
|
%% write init;
|
689
685
|
%% write exec;
|
@@ -691,20 +687,20 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
|
|
691
687
|
if (cs >= EDN_map_first_final) {
|
692
688
|
|
693
689
|
// hash parsing is done. Make sure we have an even count
|
694
|
-
if ((elems
|
690
|
+
if ((RARRAY_LEN(elems) % 2) != 0) {
|
695
691
|
error(__FUNCTION__, "odd number of elements in map");
|
696
692
|
return pe;
|
697
693
|
}
|
698
694
|
|
699
695
|
// now convert the sequence to a hash
|
700
|
-
|
701
|
-
while (elems
|
696
|
+
VALUE rslt = rb_hash_new();
|
697
|
+
while (RARRAY_LEN(elems) > 0)
|
702
698
|
{
|
703
|
-
|
704
|
-
rslt
|
699
|
+
VALUE k = rb_ary_shift(elems);
|
700
|
+
rb_hash_aset(rslt, k, rb_ary_shift(elems));
|
705
701
|
}
|
706
702
|
|
707
|
-
|
703
|
+
v = rslt;
|
708
704
|
return p + 1;
|
709
705
|
}
|
710
706
|
else if (cs == EDN_map_error) {
|
@@ -729,7 +725,7 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
|
|
729
725
|
|
730
726
|
action parse_set {
|
731
727
|
// #{ }
|
732
|
-
const char *np = parse_set(fpc, pe,
|
728
|
+
const char *np = parse_set(fpc, pe, v);
|
733
729
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
734
730
|
}
|
735
731
|
|
@@ -741,7 +737,7 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
|
|
741
737
|
|
742
738
|
action parse_tagged {
|
743
739
|
// #inst, #uuid, or #user/tag
|
744
|
-
const char *np = parse_tagged(fpc, pe,
|
740
|
+
const char *np = parse_tagged(fpc, pe, v);
|
745
741
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
746
742
|
}
|
747
743
|
|
@@ -754,7 +750,7 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
|
|
754
750
|
}%%
|
755
751
|
|
756
752
|
|
757
|
-
const char* edn::Parser::parse_dispatch(const char *p, const char *pe,
|
753
|
+
const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
|
758
754
|
{
|
759
755
|
int cs;
|
760
756
|
|
@@ -794,19 +790,19 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, Rice::Obj
|
|
794
790
|
//
|
795
791
|
// set parsing
|
796
792
|
//
|
797
|
-
const char* edn::Parser::parse_set(const char *p, const char *pe,
|
793
|
+
const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
|
798
794
|
{
|
799
795
|
static const char* EDN_TYPE = "set";
|
800
796
|
|
801
797
|
int cs;
|
802
|
-
|
798
|
+
VALUE elems = rb_ary_new(); // stored as an array
|
803
799
|
|
804
800
|
%% write init;
|
805
801
|
%% write exec;
|
806
802
|
|
807
803
|
if (cs >= EDN_set_first_final) {
|
808
804
|
// all elements collected; now convert to a set
|
809
|
-
|
805
|
+
v = Parser::make_ruby_set(elems);
|
810
806
|
return p + 1;
|
811
807
|
}
|
812
808
|
else if (cs == EDN_set_error) {
|
@@ -821,9 +817,8 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
|
|
821
817
|
|
822
818
|
// ============================================================
|
823
819
|
// discard - consume the discard token and parse the next value to
|
824
|
-
// discard. TODO: perhaps optimize this so no object data is built
|
825
|
-
//
|
826
|
-
// delimiters
|
820
|
+
// discard. TODO: perhaps optimize this so no object data is built by
|
821
|
+
// defining a machine to consume items within container delimiters
|
827
822
|
//
|
828
823
|
%%{
|
829
824
|
machine EDN_discard;
|
@@ -834,12 +829,12 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
|
|
834
829
|
begin_discard = '_';
|
835
830
|
|
836
831
|
action discard_value {
|
837
|
-
const char *np = parse_value(fpc, pe,
|
832
|
+
const char *np = parse_value(fpc, pe, v);
|
838
833
|
if (np) {
|
839
834
|
// this token is to be discard it so store it in the
|
840
835
|
// discard stack - we really don't need to save it so this
|
841
836
|
// could be simplified
|
842
|
-
discard.push(
|
837
|
+
discard.push(v);
|
843
838
|
fexec np;
|
844
839
|
} else {
|
845
840
|
fhold; fbreak;
|
@@ -856,7 +851,7 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
|
|
856
851
|
const char* edn::Parser::parse_discard(const char *p, const char *pe)
|
857
852
|
{
|
858
853
|
int cs;
|
859
|
-
|
854
|
+
VALUE v;
|
860
855
|
|
861
856
|
%% write init;
|
862
857
|
%% write exec;
|
@@ -914,10 +909,10 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
|
|
914
909
|
}%%
|
915
910
|
|
916
911
|
|
917
|
-
const char* edn::Parser::parse_tagged(const char *p, const char *pe,
|
912
|
+
const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
|
918
913
|
{
|
919
|
-
|
920
|
-
|
914
|
+
VALUE sym_name = Qnil;
|
915
|
+
VALUE data = Qnil;
|
921
916
|
|
922
917
|
int cs;
|
923
918
|
|
@@ -930,9 +925,9 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
|
|
930
925
|
try {
|
931
926
|
// tagged_element makes a call to ruby which may throw an
|
932
927
|
// exception when parsing the data
|
933
|
-
|
934
|
-
} catch (
|
935
|
-
error(__FUNCTION__, e.
|
928
|
+
v = Parser::tagged_element(sym_name, data);
|
929
|
+
} catch (std::exception& e) {
|
930
|
+
error(__FUNCTION__, e.what());
|
936
931
|
return pe;
|
937
932
|
}
|
938
933
|
return p + 1;
|
@@ -948,13 +943,14 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
|
|
948
943
|
|
949
944
|
|
950
945
|
// ============================================================
|
951
|
-
//
|
946
|
+
// parses entire input but expects single valid token at the
|
947
|
+
// top-level, therefore, does not tokenize source stream
|
952
948
|
//
|
953
949
|
%%{
|
954
|
-
machine
|
950
|
+
machine EDN_parser;
|
955
951
|
include EDN_common;
|
956
952
|
|
957
|
-
write data
|
953
|
+
write data;
|
958
954
|
|
959
955
|
action parse_value {
|
960
956
|
const char* np = parse_value(fpc, pe, result);
|
@@ -968,36 +964,71 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
|
|
968
964
|
main := ignore* sequence? ignore*;
|
969
965
|
}%%
|
970
966
|
|
971
|
-
|
972
|
-
|
973
|
-
// but EDN's Reader allows token by token parsing
|
974
|
-
Rice::Object edn::Parser::parse(const char* buf, std::size_t len)
|
967
|
+
|
968
|
+
VALUE edn::Parser::parse(const char* src, std::size_t len)
|
975
969
|
{
|
976
970
|
int cs;
|
977
|
-
|
978
|
-
const char *pe;
|
979
|
-
Rice::Object result;
|
971
|
+
VALUE result = Qnil;
|
980
972
|
|
981
|
-
//
|
982
|
-
|
983
|
-
p_save = NULL;
|
984
|
-
while (!discard.empty())
|
985
|
-
discard.pop();
|
973
|
+
// reset line counter & discard stack
|
974
|
+
reset();
|
986
975
|
|
987
976
|
%% write init;
|
988
|
-
p =
|
977
|
+
p = src;
|
989
978
|
pe = p + len;
|
990
|
-
eof = pe;
|
979
|
+
eof = pe;
|
991
980
|
%% write exec;
|
992
981
|
|
993
|
-
if (cs ==
|
982
|
+
if (cs == EDN_parser_error) {
|
994
983
|
error(__FUNCTION__, *p);
|
995
984
|
return Qnil;
|
996
985
|
}
|
997
|
-
else if (cs ==
|
986
|
+
else if (cs == EDN_parser_first_final) {
|
987
|
+
// whole source is parsed so reset
|
988
|
+
p = pe = eof = NULL;
|
989
|
+
reset();
|
990
|
+
}
|
991
|
+
else if (cs == EDN_parser_en_main) {} // silence ragel warning
|
992
|
+
return result;
|
993
|
+
}
|
994
|
+
|
995
|
+
|
996
|
+
// ============================================================
|
997
|
+
// token-by-token machine
|
998
|
+
//
|
999
|
+
%%{
|
1000
|
+
machine EDN_tokens;
|
1001
|
+
include EDN_common;
|
1002
|
+
|
1003
|
+
write data noerror nofinal;
|
1004
|
+
|
1005
|
+
action parse_value {
|
1006
|
+
const char* np = parse_value(fpc, pe, result);
|
1007
|
+
if (np == NULL) { fhold; fbreak; } else { fexec np; }
|
1008
|
+
}
|
1009
|
+
|
1010
|
+
element = begin_value >parse_value;
|
1011
|
+
|
1012
|
+
main := ignore* element ignore*;
|
1013
|
+
}%%
|
1014
|
+
|
1015
|
+
|
1016
|
+
//
|
1017
|
+
//
|
1018
|
+
VALUE edn::Parser::next()
|
1019
|
+
{
|
1020
|
+
VALUE result = Qnil;
|
1021
|
+
int cs;
|
1022
|
+
|
1023
|
+
%% write init;
|
1024
|
+
%% write exec;
|
1025
|
+
|
1026
|
+
if (cs == EDN_tokens_en_main) {} // silence ragel warning
|
1027
|
+
|
998
1028
|
return result;
|
999
1029
|
}
|
1000
1030
|
|
1031
|
+
|
1001
1032
|
/*
|
1002
1033
|
* Local variables:
|
1003
1034
|
* mode: c
|