edn_turbo 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -20,6 +20,8 @@ namespace edn
20
20
  extern VALUE EDNT_MAKE_EDN_SYMBOL;
21
21
  extern VALUE EDNT_MAKE_SET_METHOD;
22
22
  extern VALUE EDNT_TAGGED_ELEM;
23
+ extern VALUE EDNT_STR_INT_TO_BIGNUM;
24
+ extern VALUE EDNT_STR_DBL_TO_BIGNUM;
23
25
 
24
26
  //
25
27
  // C-extension EDN Parser class representation
@@ -34,24 +36,25 @@ namespace edn
34
36
 
35
37
  Rice::Object parse(const char* s, std::size_t len);
36
38
 
37
- const char* parse_decimal(const char *p, const char *pe, Rice::Object& o);
38
- const char* parse_integer(const char *p, const char *pe, Rice::Object& o);
39
- const char* parse_symbol (const char *p, const char *pe, std::string& s);
40
- const char* parse_keyword(const char *p, const char *pe, Rice::Object& o);
41
- const char* parse_string (const char *p, const char *pe, Rice::Object& o);
42
- const char* parse_value (const char *p, const char *pe, Rice::Object& o);
43
- const char* parse_vector (const char *p, const char *pe, Rice::Object& o);
44
- const char* parse_map (const char *p, const char *pe, Rice::Object& o);
45
- const char* parse_list (const char *p, const char *pe, Rice::Object& o);
46
- const char* parse_set (const char *p, const char *pe, Rice::Object& o);
47
- const char* parse_tagged (const char *p, const char *pe, Rice::Object& o);
39
+ const char* parse_value (const char *p, const char *pe, Rice::Object& o);
40
+ const char* parse_operator(const char *p, const char *pe, Rice::Object& o);
41
+ const char* parse_esc_char(const char *p, const char *pe, Rice::Object& o);
42
+ const char* parse_symbol (const char *p, const char *pe, std::string& s);
43
+ const char* parse_keyword (const char *p, const char *pe, Rice::Object& o);
44
+ const char* parse_string (const char *p, const char *pe, Rice::Object& o);
45
+ const char* parse_decimal (const char *p, const char *pe, Rice::Object& o);
46
+ const char* parse_integer (const char *p, const char *pe, Rice::Object& o);
47
+ const char* parse_vector (const char *p, const char *pe, Rice::Object& o);
48
+ const char* parse_list (const char *p, const char *pe, Rice::Object& o);
49
+ const char* parse_set (const char *p, const char *pe, Rice::Object& o);
50
+ const char* parse_map (const char *p, const char *pe, Rice::Object& o);
51
+ const char* parse_tagged (const char *p, const char *pe, Rice::Object& o);
52
+ const char* parse_discard (const char *p, const char *pe);
48
53
  const char* parse_dispatch(const char *p, const char *pe, Rice::Object& o);
49
54
 
50
- const char* parse_discard(const char *p, const char *pe);
51
-
52
- static bool parse_byte_stream(const char *p, const char *pe, Rice::String& s);
53
- static bool parse_escaped_char(char c, Rice::String& s);
54
- static bool unicode_to_utf8(const char *s, std::size_t len, std::string& rslt);
55
+ static bool parse_byte_stream (const char *p, const char *pe, Rice::String& s);
56
+ static bool parse_escaped_char(const char *p, const char *pe, Rice::Object& s);
57
+ static bool unicode_to_utf8 (const char *s, std::size_t len, std::string& rslt);
55
58
 
56
59
  static Rice::Object make_edn_symbol(const std::string& name);
57
60
  static Rice::Object make_ruby_set(const Rice::Array& elems);
@@ -64,13 +67,17 @@ namespace edn
64
67
  // utility method to convert a primitive in string form to a
65
68
  // ruby type
66
69
  template <class T>
67
- static Rice::Object buftotype(const char* p, long len, T& val) {
70
+ static Rice::Object buftotype(const char* p, std::size_t len) {
71
+ T val;
68
72
  std::string buf;
69
73
  buf.append(p, len);
70
74
  std::istringstream(buf) >> val;
71
75
  return to_ruby<T>(val);
72
76
  }
73
77
 
78
+ static Rice::Object integer_to_ruby(const char* str, std::size_t len);
79
+ static Rice::Object float_to_ruby(const char* str, std::size_t len);
80
+
74
81
  public:
75
82
  Parser() : line_number(1), p_save(NULL), eof(NULL) { }
76
83
 
@@ -26,34 +26,26 @@
26
26
  comment = ';' cr_neg* counter;
27
27
  ignore = ws | comment;
28
28
 
29
- operators = [/\.\*!_\?$%&<>\=\-\+];
30
- symbol_chars = [a-zA-Z0-9\#:] | operators;
29
+ operators = [/\.\*!_\?$%&<>\=+\-];
30
+ symbol_start = alpha;
31
+ symbol_chars = symbol_start | digit | [\#:_\-\.];
31
32
 
32
- symbol_first_c = symbol_chars - [0-9\#\:]; # non-numeric, no '#' or ':'
33
-
34
- k_nil = 'nil';
35
- k_true = 'true';
36
- k_false = 'false';
37
33
  begin_dispatch = '#';
38
34
  begin_keyword = ':';
39
35
  begin_char = '\\';
40
36
  begin_value = alnum | [:\"\{\[\(\\\#] | operators;
41
- # TODO: support - and + symbols. Currently conflicting with numeric values
42
- begin_symbol = symbol_first_c - ('-'|'+');
37
+ begin_symbol = symbol_start;
43
38
  begin_vector = '[';
44
- end_vector = ']';
45
39
  begin_map = '{';
46
40
  begin_list = '(';
47
- end_list = ')';
48
41
  string_delim = '"';
49
- begin_number = digit | '-';
50
-
51
- symbol_name = [\-\+\.]? symbol_first_c (symbol_chars)*;
42
+ begin_number = digit;
52
43
 
53
- symbol = (operators | (symbol_name ('/' symbol_name)?));
44
+ symbol_name = symbol_start (symbol_chars)*;
45
+ symbol = (symbol_name ('/' symbol_name)?);
54
46
 
55
47
  # int / decimal rules
56
- integer = '-'? ('0' | [1-9] digit*);
48
+ integer = ('0' | [1-9] digit*);
57
49
  exp = ([Ee] [+\-]? digit+);
58
50
 
59
51
 
@@ -61,7 +53,7 @@
61
53
  std::stringstream s;
62
54
  s << "unterminated " << EDN_TYPE;
63
55
  error(__FUNCTION__, s.str());
64
- exit(-1);
56
+ fhold; fbreak;
65
57
  }
66
58
  }%%
67
59
 
@@ -75,6 +67,31 @@
75
67
 
76
68
  write data;
77
69
 
70
+ action parse_dispatch {
71
+ const char *np = parse_dispatch(fpc + 1, pe, o);
72
+ if (np == NULL) { fhold; fbreak; } else fexec np;
73
+ }
74
+
75
+ action parse_char {
76
+ const char *np = parse_esc_char(fpc, pe, o);
77
+ if (np == NULL) { fhold; fbreak; } else fexec np;
78
+ }
79
+
80
+ action parse_string {
81
+ const char *np = parse_string(fpc, pe, o);
82
+ if (np == NULL) { fhold; fbreak; } else fexec np;
83
+ }
84
+
85
+ action parse_keyword {
86
+ const char *np = parse_keyword(fpc, pe, o);
87
+ if (np == NULL) { fhold; fbreak; } else fexec np;
88
+ }
89
+
90
+ action parse_operator {
91
+ const char *np = parse_operator(fpc, pe, o);
92
+ if (np == NULL) { fhold; fbreak; } else fexec np;
93
+ }
94
+
78
95
  action parse_symbol {
79
96
  std::string sym;
80
97
  const char *np = parse_symbol(fpc, pe, sym);
@@ -89,26 +106,6 @@
89
106
  }
90
107
  }
91
108
 
92
- action parse_keyword {
93
- const char *np = parse_keyword(fpc, pe, o);
94
- if (np == NULL) { fhold; fbreak; } else fexec np;
95
- }
96
-
97
- action parse_char {
98
- Rice::String s;
99
-
100
- if (!parse_escaped_char(*fpc, s)) {
101
- fhold; fbreak;
102
- } else {
103
- o = s;
104
- }
105
- }
106
-
107
- action parse_string {
108
- const char *np = parse_string(fpc, pe, o);
109
- if (np == NULL) { fhold; fbreak; } else fexec np;
110
- }
111
-
112
109
  action parse_number {
113
110
  // try to parse a decimal first
114
111
  const char *np = parse_decimal(fpc, pe, o);
@@ -143,19 +140,15 @@
143
140
  if (np == NULL) { fhold; fbreak; } else fexec np;
144
141
  }
145
142
 
146
- action parse_dispatch {
147
- const char *np = parse_dispatch(fpc + 1, pe, o);
148
- if (np == NULL) { fhold; fbreak; } else fexec np;
149
- }
150
-
151
143
  action exit { fhold; fbreak; }
152
144
 
153
145
  main := (
154
146
  begin_dispatch >parse_dispatch |
155
- begin_char (alnum|punct) >parse_char |
147
+ begin_char >parse_char |
156
148
  string_delim >parse_string |
157
- begin_symbol >parse_symbol |
158
149
  begin_keyword >parse_keyword |
150
+ operators >parse_operator |
151
+ begin_symbol >parse_symbol |
159
152
  begin_number >parse_number |
160
153
  begin_vector >parse_vector |
161
154
  begin_list >parse_list |
@@ -166,7 +159,7 @@
166
159
 
167
160
  const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object& o)
168
161
  {
169
- // std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
162
+ //std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
170
163
  int cs;
171
164
 
172
165
  %% write init;
@@ -185,6 +178,133 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
185
178
 
186
179
 
187
180
 
181
+ // ============================================================
182
+ // operator parsing
183
+ //
184
+ %%{
185
+ machine EDN_operator;
186
+ include EDN_common;
187
+
188
+ write data;
189
+
190
+ action parse_symbol {
191
+ // parse a symbol including the leading operator (-, +, .)
192
+ std::string sym;
193
+ const char *np = parse_symbol(p_save, pe, sym);
194
+ if (np == NULL) { fhold; fbreak; } else {
195
+ o = Parser::make_edn_symbol(sym);
196
+ fexec np;
197
+ }
198
+ }
199
+
200
+ action parse_number {
201
+ // parse a number with the leading symbol - this is slightly
202
+ // different than the one within EDN_value since it includes
203
+ // the leading - or +
204
+ //
205
+ // try to parse a decimal first
206
+ const char *np = parse_decimal(p_save, pe, o);
207
+ if (np == NULL) {
208
+ // if we can't, try to parse it as an int
209
+ np = parse_integer(p_save, pe, o);
210
+ }
211
+
212
+ if (np) {
213
+ fexec np;
214
+ fhold;
215
+ fbreak;
216
+ }
217
+ else {
218
+ error(__FUNCTION__, *p);
219
+ fexec pe;
220
+ }
221
+ }
222
+
223
+ action parse_operator {
224
+ // stand-alone operators (-, +, /, ... etc)
225
+ std::string sym;
226
+ sym += *(fpc - 1);
227
+ o = Parser::make_edn_symbol(sym);
228
+ }
229
+
230
+ action exit { fhold; fbreak; }
231
+
232
+ main := (
233
+ ('-'|'+'|'.') alpha >parse_symbol |
234
+ ('-'|'+') begin_number >parse_number |
235
+ operators ignore* >parse_operator
236
+ ) ^(operators|alpha|digit)? @exit;
237
+ }%%
238
+
239
+
240
+ const char* edn::Parser::parse_operator(const char *p, const char *pe, Rice::Object& o)
241
+ {
242
+ // std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
243
+ int cs;
244
+ std::string op;
245
+
246
+ %% write init;
247
+ p_save = p;
248
+ %% write exec;
249
+
250
+ if (cs >= EDN_operator_first_final) {
251
+ return p;
252
+ }
253
+ else if (cs == EDN_operator_error) {
254
+ error(__FUNCTION__, *p);
255
+ return pe;
256
+ }
257
+ else if (cs == EDN_operator_en_main) {} // silence ragel warning
258
+ return NULL;
259
+ }
260
+
261
+
262
+
263
+ // ============================================================
264
+ // escaped char parsing
265
+ //
266
+ %%{
267
+ machine EDN_escaped_char;
268
+ include EDN_common;
269
+
270
+ write data;
271
+
272
+ valid_chars = alpha;
273
+
274
+ action exit { fhold; fbreak; }
275
+
276
+ main := (
277
+ begin_char valid_chars+ ignore*
278
+ ) (^(valid_chars | '\\')? @exit);
279
+ }%%
280
+
281
+
282
+ const char* edn::Parser::parse_esc_char(const char *p, const char *pe, Rice::Object& o)
283
+ {
284
+ //std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
285
+ int cs;
286
+
287
+ %% write init;
288
+ p_save = p;
289
+ %% write exec;
290
+
291
+ if (cs >= EDN_escaped_char_first_final) {
292
+ if (!Parser::parse_escaped_char(p_save + 1, p, o)) {
293
+ return pe;
294
+ }
295
+ return p;
296
+ }
297
+ else if (cs == EDN_escaped_char_error) {
298
+ error(__FUNCTION__, *p);
299
+ return pe;
300
+ }
301
+ else if (cs == EDN_escaped_char_en_main) {} // silence ragel warning
302
+ return NULL;
303
+ }
304
+
305
+
306
+
307
+
188
308
  // ============================================================
189
309
  // symbol parsing
190
310
  //
@@ -197,14 +317,15 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object
197
317
  action exit { fhold; fbreak; }
198
318
 
199
319
  main := (
200
- symbol
201
- ) ignore* (^symbol_chars? @exit);
320
+ operators? symbol |
321
+ operators
322
+ ) ignore* (^(symbol_chars | operators)? @exit);
202
323
  }%%
203
324
 
204
325
 
205
326
  const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string& s)
206
327
  {
207
- //std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
328
+ // std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
208
329
  int cs;
209
330
 
210
331
  %% write init;
@@ -237,8 +358,8 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
237
358
  machine EDN_keyword;
238
359
  include EDN_common;
239
360
 
240
- keyword_chars = symbol_chars;
241
- keyword_start = symbol_first_c | '#'; # keywords can have '#' after ':'
361
+ keyword_chars = symbol_chars | operators;
362
+ keyword_start = symbol_start | [\#\./];
242
363
 
243
364
  keyword_name = keyword_start (keyword_chars)*;
244
365
 
@@ -253,6 +374,7 @@ const char* edn::Parser::parse_symbol(const char *p, const char *pe, std::string
253
374
  const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Object& o)
254
375
  {
255
376
  int cs;
377
+ // std::cerr << __FUNCTION__ << " - p: '" << p << "'" << std::endl;
256
378
 
257
379
  %% write init;
258
380
  p_save = p;
@@ -285,7 +407,7 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Obje
285
407
  write data;
286
408
 
287
409
  action parse_string {
288
- if (!parse_byte_stream(p_save + 1, p, s)) {
410
+ if (!Parser::parse_byte_stream(p_save + 1, p, s)) {
289
411
  fhold;
290
412
  fbreak;
291
413
  } else {
@@ -299,9 +421,8 @@ const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Obje
299
421
  (^([\"\\] | 0..0x1f) |
300
422
  '\\'[\"\\/bfnrt] |
301
423
  '\\u'[0-9a-fA-F]{4} |
302
- '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string)
303
- string_delim @err(close_err)
304
- @exit;
424
+ '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string
425
+ ) :>> string_delim @err(close_err) @exit;
305
426
  }%%
306
427
 
307
428
 
@@ -339,11 +460,10 @@ const char* edn::Parser::parse_string(const char *p, const char *pe, Rice::Objec
339
460
 
340
461
  action exit { fhold; fbreak; }
341
462
 
342
- main := (
343
- (integer '.' digit* (exp? [M]?)) |
344
- (integer exp)
345
- )
346
- (^[0-9Ee.+\-M]? @exit );
463
+ main := ('-'|'+')? (
464
+ (integer '.' digit* (exp? [M]?)) |
465
+ (integer exp)
466
+ ) (^[0-9Ee.+\-M]? @exit );
347
467
  }%%
348
468
 
349
469
 
@@ -356,8 +476,7 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Obje
356
476
  %% write exec;
357
477
 
358
478
  if (cs >= EDN_decimal_first_final) {
359
- double value;
360
- o = Parser::buftotype<double>(p_save, p - p_save, value);
479
+ o = Parser::float_to_ruby(p_save, p - p_save);
361
480
  return p + 1;
362
481
  }
363
482
  else if (cs == EDN_decimal_en_main) {} // silence ragel warning
@@ -376,7 +495,9 @@ const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Obje
376
495
 
377
496
  action exit { fhold; fbreak; }
378
497
 
379
- main := (integer [M|N]?) (^[0-9MN]? @exit);
498
+ main := (
499
+ ('-'|'+')? (integer [MN]?)
500
+ ) (^[0-9MN+\-]? @exit);
380
501
  }%%
381
502
 
382
503
  const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Object& o)
@@ -388,8 +509,7 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
388
509
  %% write exec;
389
510
 
390
511
  if (cs >= EDN_integer_first_final) {
391
- int value;
392
- o = Parser::buftotype<int>(p_save, p - p_save, value);
512
+ o = Parser::integer_to_ruby(p_save, p - p_save);
393
513
  return p + 1;
394
514
  }
395
515
  else if (cs == EDN_integer_en_main) {} // silence ragel warning
@@ -439,10 +559,14 @@ const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Obje
439
559
  machine EDN_vector;
440
560
  include EDN_sequence_common;
441
561
 
562
+ end_vector = ']';
563
+
442
564
  write data;
443
565
 
444
- main := begin_vector ignore* sequence? end_vector @err(close_err)
445
- @exit;
566
+ main := begin_vector (
567
+ ignore* sequence? :>> end_vector
568
+ )
569
+ @err(close_err) @exit;
446
570
  }%%
447
571
 
448
572
 
@@ -481,10 +605,13 @@ const char* edn::Parser::parse_vector(const char *p, const char *pe, Rice::Objec
481
605
  machine EDN_list;
482
606
  include EDN_sequence_common;
483
607
 
608
+ end_list = ')';
609
+
484
610
  write data;
485
611
 
486
- main := begin_list ignore* sequence? end_list @err(close_err)
487
- @exit;
612
+ main := begin_list (
613
+ ignore* sequence? :>> end_list
614
+ ) @err(close_err) @exit;
488
615
  }%%
489
616
 
490
617
  //
@@ -526,7 +653,9 @@ const char* edn::Parser::parse_list(const char *p, const char *pe, Rice::Object&
526
653
  begin_set = '{';
527
654
  end_set = '}';
528
655
 
529
- main := begin_set ignore* sequence? end_set @err(close_err) @exit;
656
+ main := begin_set (
657
+ ignore* sequence? :>> end_set
658
+ ) @err(close_err) @exit;
530
659
  }%%
531
660
 
532
661
  //
@@ -544,7 +673,7 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
544
673
  %% write exec;
545
674
 
546
675
  if (cs >= EDN_set_first_final) {
547
- o = make_ruby_set(arr);
676
+ o = Parser::make_ruby_set(arr);
548
677
  return p + 1;
549
678
  }
550
679
  else if (cs == EDN_set_error) {
@@ -574,7 +703,9 @@ const char* edn::Parser::parse_set(const char *p, const char *pe, Rice::Object&
574
703
  fexec pe;
575
704
  }
576
705
 
577
- main := begin_map ignore* (sequence)? :>> end_map @err(close_err) @exit;
706
+ main := begin_map (
707
+ ignore* (sequence)? :>> end_map
708
+ ) @err(close_err) @exit;
578
709
  }%%
579
710
 
580
711
 
@@ -620,14 +751,9 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
620
751
  machine EDN_tagged;
621
752
  include EDN_common;
622
753
 
623
- # inst = (string_delim [0-9\-\+:\.TZ]* string_delim);
754
+ # inst = (string_delim [0-9+\-:\.TZ]* string_delim);
624
755
  # uuid = (string_delim [a-f0-9\-]* string_delim);
625
756
 
626
- # tags
627
- tagged_symbol = alpha [a-zA-z0-9]*;
628
- built_in_tag = tagged_symbol;
629
- user_tag = tagged_symbol '/' tagged_symbol;
630
-
631
757
  write data;
632
758
 
633
759
  action parse_symbol {
@@ -647,7 +773,7 @@ const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object&
647
773
 
648
774
  const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Object& o)
649
775
  {
650
- // std::cerr << __FUNCTION__ << " p '" << p << "'" << std::endl;
776
+ // std::cerr << __FUNCTION__ << " p '" << p << "'" << std::endl;
651
777
  std::string sym_name;
652
778
  Rice::Object object;
653
779
 
@@ -657,7 +783,7 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
657
783
  %% write exec;
658
784
 
659
785
  if (cs >= EDN_tagged_first_final) {
660
- // std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << object << std::endl;
786
+ //std::cerr << __FUNCTION__ << " parse symbol name as '" << sym_name << "', value is: " << object << std::endl;
661
787
  o = Parser::tagged_element(sym_name, object);
662
788
  return p + 1;
663
789
  }
@@ -689,9 +815,9 @@ const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Objec
689
815
  fhold; fbreak;
690
816
  }
691
817
 
692
- main := '_' ignore* (
693
- begin_value >discard_value
694
- ) @exit;
818
+ main := begin_discard ignore* (
819
+ begin_value >discard_value
820
+ ) @exit;
695
821
  }%%
696
822
 
697
823
 
@@ -728,19 +854,16 @@ const char* edn::Parser::parse_discard(const char *p, const char *pe)
728
854
  write data;
729
855
 
730
856
  action parse_discard {
731
- // std::cerr << "--- DISPATCH DISCARD: fpc is '" << fpc << "'" << std::endl;
732
857
  const char *np = parse_discard(fpc, pe);
733
858
  if (np == NULL) { fhold; fbreak; } else fexec np;
734
859
  }
735
860
 
736
861
  action parse_set {
737
- // std::cerr << "--- DISPATCH SET: fpc is '" << fpc << "'" << std::endl;
738
862
  const char *np = parse_set(fpc, pe, o);
739
863
  if (np == NULL) { fhold; fbreak; } else fexec np;
740
864
  }
741
865
 
742
866
  action parse_tagged {
743
- // std::cerr << "--- DISPATCH TAGGED: fpc is '" << fpc << "'" << std::endl;
744
867
  const char *np = parse_tagged(fpc, pe, o);
745
868
  if (np == NULL) { fhold; fbreak; } else fexec np;
746
869
  }
@@ -1,5 +1,7 @@
1
1
  #include <iostream>
2
+ #include <iomanip>
2
3
  #include <string>
4
+ #include <limits>
3
5
 
4
6
  #include <rice/String.hpp>
5
7
  #include <rice/Array.hpp>
@@ -11,6 +13,47 @@
11
13
 
12
14
  namespace edn
13
15
  {
16
+ template <typename T>
17
+ static std::size_t get_max_chars(T)
18
+ {
19
+ std::stringstream s;
20
+ s << std::fixed << std::numeric_limits<T>::max();
21
+ return s.str().length();
22
+ }
23
+
24
+ static const std::size_t LL_max_chars = get_max_chars<>((long long) 1);
25
+ static const std::size_t LD_max_chars = get_max_chars<>((long double) 1);
26
+
27
+ //
28
+ // convert to int.. if string rep has more digits than long can
29
+ // hold, call into ruby to get a big num
30
+ Rice::Object Parser::integer_to_ruby(const char* str, std::size_t len)
31
+ {
32
+ if (len < LL_max_chars)
33
+ {
34
+ return buftotype<long>(str, len);
35
+ }
36
+
37
+ // value is outside of range of long type. Use ruby to convert it
38
+ VALUE rb_s = Rice::protect(rb_str_new2, str);
39
+ return Rice::protect(rb_funcall, rb_mEDNT, EDNT_STR_INT_TO_BIGNUM, 1, rb_s);
40
+ }
41
+
42
+ //
43
+ // as above.. TODO: check exponential
44
+ Rice::Object Parser::float_to_ruby(const char* str, std::size_t len)
45
+ {
46
+ if (len < LD_max_chars)
47
+ {
48
+ return buftotype<double>(str, len);
49
+ }
50
+
51
+ // value is outside of range of long type. Use ruby to convert it
52
+ VALUE rb_s = Rice::protect(rb_str_new2, str);
53
+ return Rice::protect(rb_funcall, rb_mEDNT, EDNT_STR_DBL_TO_BIGNUM, 1, rb_s);
54
+ }
55
+
56
+
14
57
  //
15
58
  // copies the string data, unescaping any present values that need to be replaced
16
59
  //
@@ -18,9 +61,8 @@ namespace edn
18
61
  {
19
62
  if (p_end > p_start) {
20
63
  std::string buf;
21
- std::size_t len = p_end - p_start;
22
64
 
23
- if (unicode_to_utf8(p_start, len, buf))
65
+ if (unicode_to_utf8(p_start, p_end - p_start, buf))
24
66
  {
25
67
  // utf-8 encode
26
68
  VALUE vs = Rice::protect( rb_str_new2, buf.c_str() );
@@ -34,22 +76,27 @@ namespace edn
34
76
  }
35
77
 
36
78
  //
37
- // handles things like \c, \n
79
+ // handles things like \c, \newline
38
80
  //
39
- bool Parser::parse_escaped_char(char c, Rice::String& s)
81
+ bool Parser::parse_escaped_char(const char *p, const char *pe, Rice::Object& o)
40
82
  {
41
- char str[2] = { c, 0 };
42
-
43
- switch (c) {
44
- case 'n': str[0] = '\n'; break;
45
- case 't': str[0] = '\t'; break;
46
- case 'r': str[0] = '\r'; break;
47
- case 'v': str[0] = '\v'; break;
48
- case 'f': str[0] = '\f'; break;
49
- default: break;
83
+ std::string buf;
84
+ std::size_t len = pe - p;
85
+ buf.append(p, len);
86
+
87
+ if (len > 1) {
88
+ if (buf == "newline") buf = "\\n";
89
+ else if (buf == "tab") buf = "\\t";
90
+ else if (buf == "return") buf = "\\r";
91
+ else if (buf == "space") buf = " ";
92
+ else if (buf == "formfeed") buf = "\\f";
93
+ else if (buf == "backspace") buf = "\\b";
94
+ // TODO: is this supported?
95
+ else if (buf == "verticaltab") buf = "\\v";
96
+ else return false;
50
97
  }
51
98
 
52
- s = str;
99
+ o = Rice::String(buf);
53
100
  return true;
54
101
  }
55
102