edn_turbo 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,18 +5,12 @@
5
5
  #include <sstream>
6
6
  #include <stack>
7
7
 
8
- #include <rice/Module.hpp>
9
- #include <rice/Object.hpp>
10
- #include <rice/Array.hpp>
11
- #include <rice/String.hpp>
12
- #include <rice/to_from_ruby.hpp>
13
-
14
- typedef unsigned char ui8;
8
+ #include <ruby/ruby.h>
15
9
 
16
10
 
17
11
  namespace edn
18
12
  {
19
- extern Rice::Module rb_mEDNT;
13
+ extern VALUE rb_mEDNT;
20
14
  extern VALUE EDNT_MAKE_EDN_SYMBOL;
21
15
  extern VALUE EDNT_MAKE_SET_METHOD;
22
16
  extern VALUE EDNT_TAGGED_ELEM;
@@ -27,65 +21,72 @@ namespace edn
27
21
  // C-extension EDN Parser class representation
28
22
  class Parser
29
23
  {
30
- private:
24
+ public:
25
+ Parser() : p(NULL), pe(NULL), eof(NULL), line_number(1) { }
31
26
 
32
- std::size_t line_number;
33
- const char* p_save;
27
+ void set_source(const char* src, std::size_t len);
28
+
29
+ bool is_eof() const { return (p != NULL && p == pe); }
30
+ VALUE parse(const char* s, std::size_t len);
31
+ // VALUE read(const std::string& data) { return parse(data.c_str(), data.length()); }
32
+ VALUE next();
33
+
34
+ static void throw_error(int error);
35
+
36
+ private:
37
+ // ragel needs these
38
+ const char* p;
39
+ const char* pe;
34
40
  const char* eof;
35
- std::stack<Rice::Object> discard;
36
-
37
- Rice::Object parse(const char* s, std::size_t len);
38
-
39
- const char* parse_value (const char *p, const char *pe, Rice::Object& o);
40
- const char* parse_string (const char *p, const char *pe, Rice::Object& o);
41
- const char* parse_keyword (const char *p, const char *pe, Rice::Object& o);
42
- const char* parse_decimal (const char *p, const char *pe, Rice::Object& o);
43
- const char* parse_integer (const char *p, const char *pe, Rice::Object& o);
44
- const char* parse_operator(const char *p, const char *pe, Rice::Object& o);
45
- const char* parse_esc_char(const char *p, const char *pe, Rice::Object& o);
46
- const char* parse_symbol (const char *p, const char *pe, std::string& s);
47
- const char* parse_vector (const char *p, const char *pe, Rice::Object& o);
48
- const char* parse_list (const char *p, const char *pe, Rice::Object& o);
49
- const char* parse_map (const char *p, const char *pe, Rice::Object& o);
50
- const char* parse_dispatch(const char *p, const char *pe, Rice::Object& o);
51
- const char* parse_set (const char *p, const char *pe, Rice::Object& o);
41
+ std::size_t line_number;
42
+ std::stack<VALUE> discard;
43
+
44
+ void reset();
45
+
46
+ const char* parse_value (const char *p, const char *pe, VALUE& v);
47
+ const char* parse_string (const char *p, const char *pe, VALUE& v);
48
+ const char* parse_keyword (const char *p, const char *pe, VALUE& v);
49
+ const char* parse_decimal (const char *p, const char *pe, VALUE& v);
50
+ const char* parse_integer (const char *p, const char *pe, VALUE& v);
51
+ const char* parse_operator(const char *p, const char *pe, VALUE& v);
52
+ const char* parse_esc_char(const char *p, const char *pe, VALUE& v);
53
+ const char* parse_symbol (const char *p, const char *pe, VALUE& v);
54
+ const char* parse_vector (const char *p, const char *pe, VALUE& v);
55
+ const char* parse_list (const char *p, const char *pe, VALUE& v);
56
+ const char* parse_map (const char *p, const char *pe, VALUE& v);
57
+ const char* parse_dispatch(const char *p, const char *pe, VALUE& v);
58
+ const char* parse_set (const char *p, const char *pe, VALUE& v);
52
59
  const char* parse_discard (const char *p, const char *pe);
53
- const char* parse_tagged (const char *p, const char *pe, Rice::Object& o);
60
+ const char* parse_tagged (const char *p, const char *pe, VALUE& v);
54
61
 
55
62
  // defined in edn_parser_unicode.cc
56
63
  static bool to_utf8(const char *s, std::size_t len, std::string& rslt);
57
64
 
58
65
  // defined in edn_parser_util.cc
59
- static Rice::Object integer_to_ruby(const char* str, std::size_t len);
60
- static Rice::Object float_to_ruby (const char* str, std::size_t len);
66
+ static VALUE integer_to_ruby(const char* str, std::size_t len);
67
+ static VALUE float_to_ruby (const char* str, std::size_t len);
61
68
 
62
- static bool parse_byte_stream (const char *p, const char *pe, Rice::String& s, bool encode);
63
- static bool parse_escaped_char(const char *p, const char *pe, Rice::Object& s);
69
+ static bool parse_byte_stream (const char *p, const char *pe, VALUE& rslt, bool encode);
70
+ static bool parse_escaped_char(const char *p, const char *pe, VALUE& rslt);
64
71
 
65
- static Rice::Object make_edn_symbol(const std::string& name);
66
- static Rice::Object make_ruby_set(const Rice::Array& elems);
67
- static Rice::Object tagged_element(const std::string& name, const Rice::Object& data);
68
-
69
- void error(const std::string& f, const std::string& err, char c) const;
70
- void error(const std::string& f, char err_c) const { error(f, "", err_c); }
71
- void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
72
+ static VALUE make_edn_symbol(VALUE sym);
73
+ static VALUE make_ruby_set(VALUE elems);
74
+ static VALUE tagged_element(VALUE name, VALUE data);
72
75
 
73
76
  // utility method to convert a primitive in string form to a
74
77
  // ruby type
75
78
  template <class T>
76
- static Rice::Object buftotype(const char* p, std::size_t len) {
79
+ static inline T buftotype(const char* p, std::size_t len) {
77
80
  T val;
78
81
  std::string buf;
79
82
  buf.append(p, len);
80
83
  std::istringstream(buf) >> val;
81
- return to_ruby<T>(val);
84
+ return val;
82
85
  }
83
86
 
84
- public:
85
- Parser() : line_number(1), p_save(NULL), eof(NULL) { }
86
-
87
- Rice::Object process(const std::string& data) { return parse(data.c_str(), data.length()); }
88
-
87
+ void error(const std::string& f, const std::string& err, char c) const;
88
+ void error(const std::string& f, char err_c) const { error(f, "", err_c); }
89
+ void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
89
90
  }; // Engine
90
91
 
91
92
  } // namespace
@@ -1,11 +1,7 @@
1
1
  #include <iostream>
2
2
  #include <string>
3
3
  #include <stack>
4
-
5
- #include <rice/Hash.hpp>
6
- #include <rice/Array.hpp>
7
- #include <rice/to_from_ruby.hpp>
8
- #include <rice/Exception.hpp>
4
+ #include <exception>
9
5
 
10
6
  #include "edn_parser.h"
11
7
 
@@ -73,23 +69,23 @@
73
69
 
74
70
  action parse_string {
75
71
  // string types within double-quotes
76
- const char *np = parse_string(fpc, pe, o);
72
+ const char *np = parse_string(fpc, pe, v);
77
73
  if (np == NULL) { fhold; fbreak; } else fexec np;
78
74
  }
79
75
 
80
76
  action parse_keyword {
81
77
  // tokens with a leading ':'
82
- const char *np = parse_keyword(fpc, pe, o);
78
+ const char *np = parse_keyword(fpc, pe, v);
83
79
  if (np == NULL) { fhold; fbreak; } else fexec np;
84
80
  }
85
81
 
86
82
  action parse_number {
87
83
  // tokens w/ leading digits: non-negative integers & decimals.
88
84
  // try to parse a decimal first
89
- const char *np = parse_decimal(fpc, pe, o);
85
+ const char *np = parse_decimal(fpc, pe, v);
90
86
  if (np == NULL) {
91
87
  // if we can't, try to parse it as an int
92
- np = parse_integer(fpc, pe, o);
88
+ np = parse_integer(fpc, pe, v);
93
89
  }
94
90
 
95
91
  if (np) {
@@ -105,26 +101,27 @@
105
101
 
106
102
  action parse_operator {
107
103
  // stand-alone operators *, +, -, etc.
108
- const char *np = parse_operator(fpc, pe, o);
104
+ const char *np = parse_operator(fpc, pe, v);
109
105
  if (np == NULL) { fhold; fbreak; } else fexec np;
110
106
  }
111
107
 
112
108
  action parse_char {
113
109
  // tokens w/ leading \ (escaped characters \newline, \c, etc.)
114
- const char *np = parse_esc_char(fpc, pe, o);
110
+ const char *np = parse_esc_char(fpc, pe, v);
115
111
  if (np == NULL) { fhold; fbreak; } else fexec np;
116
112
  }
117
113
 
118
114
  action parse_symbol {
119
115
  // user identifiers and reserved keywords (true, false, nil)
120
- std::string sym;
116
+ VALUE sym = Qnil;
121
117
  const char *np = parse_symbol(fpc, pe, sym);
122
118
  if (np == NULL) { fhold; fbreak; } else {
123
- if (sym == "true") { o = Qtrue; }
124
- else if (sym == "false") { o = Qfalse; }
125
- else if (sym == "nil") { o = Qnil; }
119
+ // parse_symbol will make 'sym' a ruby string
120
+ if (std::strcmp(RSTRING_PTR(sym), "true") == 0) { v = Qtrue; }
121
+ else if (std::strcmp(RSTRING_PTR(sym), "false") == 0) { v = Qfalse; }
122
+ else if (std::strcmp(RSTRING_PTR(sym), "nil") == 0) { v = Qnil; }
126
123
  else {
127
- o = Parser::make_edn_symbol(sym);
124
+ v = Parser::make_edn_symbol(sym);
128
125
  }
129
126
  fexec np;
130
127
  }
@@ -132,25 +129,25 @@
132
129
 
133
130
  action parse_vector {
134
131
  // [
135
- const char *np = parse_vector(fpc, pe, o);
132
+ const char *np = parse_vector(fpc, pe, v);
136
133
  if (np == NULL) { fhold; fbreak; } else fexec np;
137
134
  }
138
135
 
139
136
  action parse_list {
140
137
  // (
141
- const char *np = parse_list(fpc, pe, o);
138
+ const char *np = parse_list(fpc, pe, v);
142
139
  if (np == NULL) { fhold; fbreak; } else fexec np;
143
140
  }
144
141
 
145
142
  action parse_map {
146
143
  // {
147
- const char *np = parse_map(fpc, pe, o);
144
+ const char *np = parse_map(fpc, pe, v);
148
145
  if (np == NULL) { fhold; fbreak; } else fexec np;
149
146
  }
150
147
 
151
148
  action parse_dispatch {
152
149
  // handles tokens w/ leading # ("#_", "#{", and tagged elems)
153
- const char *np = parse_dispatch(fpc + 1, pe, o);
150
+ const char *np = parse_dispatch(fpc + 1, pe, v);
154
151
  if (np == NULL) { fhold; fbreak; } else fexec np;
155
152
  }
156
153
 
@@ -170,7 +167,7 @@
170
167
  }%%
171
168
 
172
169
 
173
- const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object& o)
170
+ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
174
171
  {
175
172
  int cs;
176
173
 
@@ -203,7 +200,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
203
200
  write data;
204
201
 
205
202
  action parse_string {
206
- if (Parser::parse_byte_stream(p_save + 1, p, s, encode)) {
203
+ if (Parser::parse_byte_stream(p_save + 1, p, v, encode)) {
207
204
  fexec p + 1;
208
205
  } else {
209
206
  fhold; fbreak;
@@ -224,21 +221,18 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
224
221
  }%%
225
222
 
226
223
 
227
- const char* edn::Parser::parse_string(const char *p, const char *pe, Rice::Object& o)
224
+ const char* edn::Parser::parse_string(const char *p, const char *pe, VALUE& v)
228
225
  {
229
226
  // std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
230
227
  static const char* EDN_TYPE = "string";
231
228
  int cs;
232
229
  bool encode = false;
233
- const char *eof = pe;
234
- Rice::String s;
235
230
 
236
231
  %% write init;
237
- p_save = p;
232
+ const char* p_save = p;
238
233
  %% write exec;
239
234
 
240
235
  if (cs >= EDN_string_first_final) {
241
- o = s;
242
236
  return p + 1;
243
237
  }
244
238
  else if (cs == EDN_string_error) {
@@ -269,20 +263,20 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, Rice::Objec
269
263
  }%%
270
264
 
271
265
 
272
- const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Object& o)
266
+ const char* edn::Parser::parse_keyword(const char *p, const char *pe, VALUE& v)
273
267
  {
274
268
  int cs;
275
269
 
276
270
  %% write init;
277
- p_save = p;
271
+ const char* p_save = p;
278
272
  %% write exec;
279
273
 
280
274
  if (cs >= EDN_keyword_first_final) {
281
275
  std::string buf;
282
276
  uint32_t len = p - p_save;
283
- // don't include leading ':' because Rice::Symbol will handle it
277
+ // don't include leading ':' because the ruby symbol will handle it
284
278
  buf.append(p_save + 1, len - 1);
285
- o = Rice::Symbol(buf);
279
+ v = ID2SYM(rb_intern(buf.c_str()));
286
280
  return p;
287
281
  }
288
282
  else if (cs == EDN_keyword_error) {
@@ -312,16 +306,16 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Obje
312
306
  }%%
313
307
 
314
308
 
315
- const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Object& o)
309
+ const char* edn::Parser::parse_decimal(const char *p, const char *pe, VALUE& v)
316
310
  {
317
311
  int cs;
318
312
 
319
313
  %% write init;
320
- p_save = p;
314
+ const char* p_save = p;
321
315
  %% write exec;
322
316
 
323
317
  if (cs >= EDN_decimal_first_final) {
324
- o = Parser::float_to_ruby(p_save, p - p_save);
318
+ v = Parser::float_to_ruby(p_save, p - p_save);
325
319
  return p + 1;
326
320
  }
327
321
  else if (cs == EDN_decimal_en_main) {} // silence ragel warning
@@ -344,16 +338,16 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Obje
344
338
  ) (^[0-9MN+\-]? @exit);
345
339
  }%%
346
340
 
347
- const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Object& o)
341
+ const char* edn::Parser::parse_integer(const char *p, const char *pe, VALUE& v)
348
342
  {
349
343
  int cs;
350
344
 
351
345
  %% write init;
352
- p_save = p;
346
+ const char* p_save = p;
353
347
  %% write exec;
354
348
 
355
349
  if (cs >= EDN_integer_first_final) {
356
- o = Parser::integer_to_ruby(p_save, p - p_save);
350
+ v = Parser::integer_to_ruby(p_save, p - p_save);
357
351
  return p + 1;
358
352
  }
359
353
  else if (cs == EDN_integer_en_main) {} // silence ragel warning
@@ -377,10 +371,10 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
377
371
 
378
372
  action parse_symbol {
379
373
  // parse a symbol including the leading operator (-, +, .)
380
- std::string sym;
374
+ VALUE sym = Qnil;
381
375
  const char *np = parse_symbol(p_save, pe, sym);
382
376
  if (np == NULL) { fhold; fbreak; } else {
383
- o = Parser::make_edn_symbol(sym);
377
+ v = Parser::make_edn_symbol(sym);
384
378
  fexec np;
385
379
  }
386
380
  }
@@ -391,10 +385,10 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
391
385
  // the leading - or +
392
386
  //
393
387
  // try to parse a decimal first
394
- const char *np = parse_decimal(p_save, pe, o);
388
+ const char *np = parse_decimal(p_save, pe, v);
395
389
  if (np == NULL) {
396
390
  // if we can't, try to parse it as an int
397
- np = parse_integer(p_save, pe, o);
391
+ np = parse_integer(p_save, pe, v);
398
392
  }
399
393
 
400
394
  if (np) {
@@ -410,9 +404,9 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
410
404
 
411
405
  action parse_operator {
412
406
  // stand-alone operators (-, +, /, ... etc)
413
- std::string sym;
414
- sym += *(p_save);
415
- o = Parser::make_edn_symbol(sym);
407
+ char op[2] = { *p_save, 0 };
408
+ VALUE sym = rb_str_new2(op);
409
+ v = Parser::make_edn_symbol(sym);
416
410
  }
417
411
 
418
412
 
@@ -424,12 +418,12 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
424
418
  }%%
425
419
 
426
420
 
427
- const char* edn::Parser::parse_operator(const char *p, const char *pe, Rice::Object& o)
421
+ const char* edn::Parser::parse_operator(const char *p, const char *pe, VALUE& v)
428
422
  {
429
423
  int cs;
430
424
 
431
425
  %% write init;
432
- p_save = p;
426
+ const char* p_save = p;
433
427
  %% write exec;
434
428
 
435
429
  if (cs >= EDN_operator_first_final) {
@@ -463,17 +457,17 @@ const char* edn::Parser::parse_operator(const char *p, const char *pe, Rice::Obj
463
457
  }%%
464
458
 
465
459
 
466
- const char* edn::Parser::parse_esc_char(const char *p, const char *pe, Rice::Object& o)
460
+ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, VALUE& v)
467
461
  {
468
462
  int cs;
469
463
 
470
464
  %% write init;
471
- p_save = p;
465
+ const char* p_save = p;
472
466
  %% write exec;
473
467
 
474
468
  if (cs >= EDN_escaped_char_first_final) {
475
469
  // convert the escaped value to a character
476
- if (!Parser::parse_escaped_char(p_save + 1, p, o)) {
470
+ if (!Parser::parse_escaped_char(p_save + 1, p, v)) {
477
471
  return pe;
478
472
  }
479
473
  return p;
@@ -494,6 +488,7 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, Rice::Obj
494
488
  // character and an optional leading operator (name, -today,
495
489
  // .yesterday)
496
490
  //
491
+ //
497
492
  %%{
498
493
  machine EDN_symbol;
499
494
  include EDN_common;
@@ -507,18 +502,19 @@ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, Rice::Obj
507
502
  }%%
508
503
 
509
504
 
510
- const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string& sym)
505
+ const char* edn::Parser::parse_symbol(const char *p, const char *pe, VALUE& s)
511
506
  {
512
507
  int cs;
513
508
 
514
509
  %% write init;
515
- p_save = p;
510
+ const char* p_save = p;
516
511
  %% write exec;
517
512
 
518
513
  if (cs >= EDN_symbol_first_final) {
519
514
  // copy the symbol text
520
- sym.clear();
521
- sym.append(p_save, p - p_save);
515
+ if (s == Qnil)
516
+ s = rb_str_new2("");
517
+ rb_str_cat(s, p_save, p - p_save);
522
518
  return p;
523
519
  }
524
520
  else if (cs == EDN_symbol_error) {
@@ -533,7 +529,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
533
529
 
534
530
  // ============================================================
535
531
  // EDN_sequence_common is used to parse EDN containers - elements are
536
- // initially stored in a rice array and then the final corresponding
532
+ // initially stored in an array and then the final corresponding
537
533
  // container is built from the list (although, for vectors, lists, and
538
534
  // sets the same array is used)
539
535
  //
@@ -546,7 +542,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
546
542
  // set). Regardless of the sequence type, an array of the
547
543
  // items is built. Once done, the sequence parser will convert
548
544
  // if needed
549
- Rice::Object e;
545
+ VALUE e;
550
546
  const char *np = parse_value(fpc, pe, e);
551
547
  if (np == NULL) {
552
548
  fhold; fbreak;
@@ -560,7 +556,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
560
556
  else {
561
557
  // otherwise we add it to the list of elements for the
562
558
  // corresponding container
563
- elems.push(e);
559
+ rb_ary_push(elems, e);
564
560
  }
565
561
  fexec np;
566
562
  }
@@ -591,18 +587,18 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
591
587
  //
592
588
  // vector parsing
593
589
  //
594
- const char* edn::Parser::parse_vector(const char *p, const char *pe, Rice::Object& o)
590
+ const char* edn::Parser::parse_vector(const char *p, const char *pe, VALUE& v)
595
591
  {
596
592
  static const char* EDN_TYPE = "vector";
597
593
 
598
594
  int cs;
599
- Rice::Array elems; // will store the vector's elements
595
+ VALUE elems = rb_ary_new(); // will store the vector's elements
600
596
 
601
597
  %% write init;
602
598
  %% write exec;
603
599
 
604
600
  if (cs >= EDN_vector_first_final) {
605
- o = elems;
601
+ v = elems;
606
602
  return p + 1;
607
603
  }
608
604
  else if (cs == EDN_vector_error) {
@@ -634,18 +630,18 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, Rice::Objec
634
630
  //
635
631
  // list parsing
636
632
  //
637
- const char* edn::Parser::parse_list(const char *p, const char *pe, Rice::Object& o)
633
+ const char* edn::Parser::parse_list(const char *p, const char *pe, VALUE& v)
638
634
  {
639
635
  static const char* EDN_TYPE = "list";
640
636
 
641
637
  int cs;
642
- Rice::Array elems;
638
+ VALUE elems = rb_ary_new();
643
639
 
644
640
  %% write init;
645
641
  %% write exec;
646
642
 
647
643
  if (cs >= EDN_list_first_final) {
648
- o = elems;
644
+ v = elems;
649
645
  return p + 1;
650
646
  }
651
647
  else if (cs == EDN_list_error) {
@@ -676,14 +672,14 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, Rice::Object&
676
672
  }%%
677
673
 
678
674
 
679
- const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object& o)
675
+ const char* edn::Parser::parse_map(const char *p, const char *pe, VALUE& v)
680
676
  {
681
677
  static const char* EDN_TYPE = "map";
682
678
 
683
679
  int cs;
684
680
  // since we don't know whether we're looking at a key or value,
685
681
  // initially store all elements in a list
686
- Rice::Array elems;
682
+ VALUE elems = rb_ary_new();
687
683
 
688
684
  %% write init;
689
685
  %% write exec;
@@ -691,20 +687,20 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
691
687
  if (cs >= EDN_map_first_final) {
692
688
 
693
689
  // hash parsing is done. Make sure we have an even count
694
- if ((elems.size() % 2) != 0) {
690
+ if ((RARRAY_LEN(elems) % 2) != 0) {
695
691
  error(__FUNCTION__, "odd number of elements in map");
696
692
  return pe;
697
693
  }
698
694
 
699
695
  // now convert the sequence to a hash
700
- Rice::Hash rslt;
701
- while (elems.size())
696
+ VALUE rslt = rb_hash_new();
697
+ while (RARRAY_LEN(elems) > 0)
702
698
  {
703
- Rice::Object k = elems.shift();
704
- rslt[k] = elems.shift();
699
+ VALUE k = rb_ary_shift(elems);
700
+ rb_hash_aset(rslt, k, rb_ary_shift(elems));
705
701
  }
706
702
 
707
- o = rslt;
703
+ v = rslt;
708
704
  return p + 1;
709
705
  }
710
706
  else if (cs == EDN_map_error) {
@@ -729,7 +725,7 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
729
725
 
730
726
  action parse_set {
731
727
  // #{ }
732
- const char *np = parse_set(fpc, pe, o);
728
+ const char *np = parse_set(fpc, pe, v);
733
729
  if (np == NULL) { fhold; fbreak; } else fexec np;
734
730
  }
735
731
 
@@ -741,7 +737,7 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
741
737
 
742
738
  action parse_tagged {
743
739
  // #inst, #uuid, or #user/tag
744
- const char *np = parse_tagged(fpc, pe, o);
740
+ const char *np = parse_tagged(fpc, pe, v);
745
741
  if (np == NULL) { fhold; fbreak; } else fexec np;
746
742
  }
747
743
 
@@ -754,7 +750,7 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
754
750
  }%%
755
751
 
756
752
 
757
- const char* edn::Parser::parse_dispatch(const char *p, const char *pe, Rice::Object& o)
753
+ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, VALUE& v)
758
754
  {
759
755
  int cs;
760
756
 
@@ -794,19 +790,19 @@ const char* edn::Parser::parse_dispatch(const char *p, const char *pe, Rice::Obj
794
790
  //
795
791
  // set parsing
796
792
  //
797
- const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object& o)
793
+ const char* edn::Parser::parse_set(const char *p, const char *pe, VALUE& v)
798
794
  {
799
795
  static const char* EDN_TYPE = "set";
800
796
 
801
797
  int cs;
802
- Rice::Array elems; // stored as a vector
798
+ VALUE elems = rb_ary_new(); // stored as an array
803
799
 
804
800
  %% write init;
805
801
  %% write exec;
806
802
 
807
803
  if (cs >= EDN_set_first_final) {
808
804
  // all elements collected; now convert to a set
809
- o = Parser::make_ruby_set(elems);
805
+ v = Parser::make_ruby_set(elems);
810
806
  return p + 1;
811
807
  }
812
808
  else if (cs == EDN_set_error) {
@@ -821,9 +817,8 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
821
817
 
822
818
  // ============================================================
823
819
  // discard - consume the discard token and parse the next value to
824
- // discard. TODO: perhaps optimize this so no object data is built
825
- // by defining a new machine(s) to consume items within container
826
- // delimiters
820
+ // discard. TODO: perhaps optimize this so no object data is built by
821
+ // defining a machine to consume items within container delimiters
827
822
  //
828
823
  %%{
829
824
  machine EDN_discard;
@@ -834,12 +829,12 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
834
829
  begin_discard = '_';
835
830
 
836
831
  action discard_value {
837
- const char *np = parse_value(fpc, pe, o);
832
+ const char *np = parse_value(fpc, pe, v);
838
833
  if (np) {
839
834
  // this token is to be discard it so store it in the
840
835
  // discard stack - we really don't need to save it so this
841
836
  // could be simplified
842
- discard.push(o);
837
+ discard.push(v);
843
838
  fexec np;
844
839
  } else {
845
840
  fhold; fbreak;
@@ -856,7 +851,7 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
856
851
  const char* edn::Parser::parse_discard(const char *p, const char *pe)
857
852
  {
858
853
  int cs;
859
- Rice::Object o;
854
+ VALUE v;
860
855
 
861
856
  %% write init;
862
857
  %% write exec;
@@ -914,10 +909,10 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
914
909
  }%%
915
910
 
916
911
 
917
- const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Object& o)
912
+ const char* edn::Parser::parse_tagged(const char *p, const char *pe, VALUE& v)
918
913
  {
919
- std::string sym_name;
920
- Rice::Object data;
914
+ VALUE sym_name = Qnil;
915
+ VALUE data = Qnil;
921
916
 
922
917
  int cs;
923
918
 
@@ -930,9 +925,9 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
930
925
  try {
931
926
  // tagged_element makes a call to ruby which may throw an
932
927
  // exception when parsing the data
933
- o = Parser::tagged_element(sym_name, data);
934
- } catch (Rice::Exception& e) {
935
- error(__FUNCTION__, e.message().str());
928
+ v = Parser::tagged_element(sym_name, data);
929
+ } catch (std::exception& e) {
930
+ error(__FUNCTION__, e.what());
936
931
  return pe;
937
932
  }
938
933
  return p + 1;
@@ -948,13 +943,14 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
948
943
 
949
944
 
950
945
  // ============================================================
951
- // main parsing machine
946
+ // parses entire input but expects single valid token at the
947
+ // top-level, therefore, does not tokenize source stream
952
948
  //
953
949
  %%{
954
- machine EDN;
950
+ machine EDN_parser;
955
951
  include EDN_common;
956
952
 
957
- write data nofinal;
953
+ write data;
958
954
 
959
955
  action parse_value {
960
956
  const char* np = parse_value(fpc, pe, result);
@@ -968,36 +964,71 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
968
964
  main := ignore* sequence? ignore*;
969
965
  }%%
970
966
 
971
- //
972
- // TODO: Currently using a sequence to handle cases with a discard
973
- // but EDN's Reader allows token by token parsing
974
- Rice::Object edn::Parser::parse(const char* buf, std::size_t len)
967
+
968
+ VALUE edn::Parser::parse(const char* src, std::size_t len)
975
969
  {
976
970
  int cs;
977
- const char *p;
978
- const char *pe;
979
- Rice::Object result;
971
+ VALUE result = Qnil;
980
972
 
981
- // init
982
- line_number = 1;
983
- p_save = NULL;
984
- while (!discard.empty())
985
- discard.pop();
973
+ // reset line counter & discard stack
974
+ reset();
986
975
 
987
976
  %% write init;
988
- p = &buf[0];
977
+ p = src;
989
978
  pe = p + len;
990
- eof = pe; // eof defined in Parser class
979
+ eof = pe;
991
980
  %% write exec;
992
981
 
993
- if (cs == EDN_error) {
982
+ if (cs == EDN_parser_error) {
994
983
  error(__FUNCTION__, *p);
995
984
  return Qnil;
996
985
  }
997
- else if (cs == EDN_en_main) {} // silence ragel warning
986
+ else if (cs == EDN_parser_first_final) {
987
+ // whole source is parsed so reset
988
+ p = pe = eof = NULL;
989
+ reset();
990
+ }
991
+ else if (cs == EDN_parser_en_main) {} // silence ragel warning
992
+ return result;
993
+ }
994
+
995
+
996
+ // ============================================================
997
+ // token-by-token machine
998
+ //
999
+ %%{
1000
+ machine EDN_tokens;
1001
+ include EDN_common;
1002
+
1003
+ write data noerror nofinal;
1004
+
1005
+ action parse_value {
1006
+ const char* np = parse_value(fpc, pe, result);
1007
+ if (np == NULL) { fhold; fbreak; } else { fexec np; }
1008
+ }
1009
+
1010
+ element = begin_value >parse_value;
1011
+
1012
+ main := ignore* element ignore*;
1013
+ }%%
1014
+
1015
+
1016
+ //
1017
+ //
1018
+ VALUE edn::Parser::next()
1019
+ {
1020
+ VALUE result = Qnil;
1021
+ int cs;
1022
+
1023
+ %% write init;
1024
+ %% write exec;
1025
+
1026
+ if (cs == EDN_tokens_en_main) {} // silence ragel warning
1027
+
998
1028
  return result;
999
1029
  }
1000
1030
 
1031
+
1001
1032
  /*
1002
1033
  * Local variables:
1003
1034
  * mode: c