edn_turbo 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +52 -12
- data/ext/edn_turbo/edn_parser.cc +504 -576
- data/ext/edn_turbo/edn_parser.h +12 -14
- data/ext/edn_turbo/edn_parser.rl +103 -175
- data/ext/edn_turbo/edn_parser_def.cc +22 -24
- data/ext/edn_turbo/edn_parser_unicode.cc +29 -0
- data/ext/edn_turbo/extconf.rb +23 -0
- data/ext/edn_turbo/main.cc +0 -1
- data/lib/edn_turbo/version.rb +1 -1
- data/test/test_output_diff.rb +9 -0
- metadata +2 -1
data/ext/edn_turbo/edn_parser.h
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#define EDN_RUBY_EXT_PARSER_H
|
3
3
|
|
4
4
|
#include <string>
|
5
|
-
#include <
|
5
|
+
#include <sstream>
|
6
6
|
#include <rice/Object.hpp>
|
7
7
|
#include <rice/to_from_ruby.hpp>
|
8
8
|
|
@@ -21,17 +21,18 @@ namespace edn
|
|
21
21
|
|
22
22
|
Rice::Object parse(const char* s, std::size_t len);
|
23
23
|
|
24
|
-
const char*
|
25
|
-
const char*
|
26
|
-
const char*
|
27
|
-
const char*
|
28
|
-
const char*
|
29
|
-
const char*
|
30
|
-
const char*
|
31
|
-
const char*
|
32
|
-
const char*
|
24
|
+
const char* parse_decimal(const char *p, const char *pe, Rice::Object& o);
|
25
|
+
const char* parse_integer(const char *p, const char *pe, Rice::Object& o);
|
26
|
+
const char* parse_keyword(const char *p, const char *pe, Rice::Object& o);
|
27
|
+
const char* parse_tagged (const char *p, const char *pe, Rice::Object& o, bool& dicard);
|
28
|
+
const char* parse_string (const char *p, const char *pe, Rice::Object& o);
|
29
|
+
const char* parse_value (const char *p, const char *pe, Rice::Object& o);
|
30
|
+
const char* parse_vector (const char *p, const char *pe, Rice::Object& o);
|
31
|
+
const char* parse_map (const char *p, const char *pe, Rice::Object& o);
|
32
|
+
const char* parse_list (const char *p, const char *pe, Rice::Object& o);
|
33
33
|
|
34
|
-
bool
|
34
|
+
static bool parse_byte_stream(const char *p, const char *pe, Rice::String& s);
|
35
|
+
static bool unicode_to_utf8(const char *s, std::size_t len, std::string& rslt);
|
35
36
|
|
36
37
|
void error(const std::string& err, char c) const;
|
37
38
|
void error(char err_c) const { error("", err_c); }
|
@@ -52,9 +53,6 @@ namespace edn
|
|
52
53
|
|
53
54
|
Rice::Object process(const std::string& data) { return parse(data.c_str(), data.length()); }
|
54
55
|
|
55
|
-
// handle file read from the c-side
|
56
|
-
Rice::Object open(const std::string& file);
|
57
|
-
|
58
56
|
}; // Engine
|
59
57
|
|
60
58
|
} // namespace
|
data/ext/edn_turbo/edn_parser.rl
CHANGED
@@ -1,9 +1,6 @@
|
|
1
1
|
#include <iostream>
|
2
2
|
#include <string>
|
3
3
|
|
4
|
-
#include <ruby/ruby.h>
|
5
|
-
#include <ruby/encoding.h>
|
6
|
-
|
7
4
|
#include <rice/Hash.hpp>
|
8
5
|
#include <rice/Array.hpp>
|
9
6
|
#include <rice/to_from_ruby.hpp>
|
@@ -13,6 +10,10 @@
|
|
13
10
|
//
|
14
11
|
// EDN spec at: https://github.com/edn-format/edn
|
15
12
|
//
|
13
|
+
//
|
14
|
+
// many thanks to Florian Frank for json-ruby which was essential in
|
15
|
+
// helping me learn about ragel
|
16
|
+
//
|
16
17
|
|
17
18
|
%%{
|
18
19
|
machine EDN_common;
|
@@ -67,21 +68,21 @@
|
|
67
68
|
}
|
68
69
|
|
69
70
|
action parse_keyword {
|
70
|
-
const char *np =
|
71
|
+
const char *np = parse_keyword(fpc, pe, o);
|
71
72
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
72
73
|
}
|
73
74
|
|
74
75
|
action parse_string {
|
75
|
-
const char *np =
|
76
|
+
const char *np = parse_string(fpc, pe, o);
|
76
77
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
77
78
|
}
|
78
79
|
|
79
80
|
action parse_number {
|
80
81
|
// try to parse a decimal first
|
81
|
-
const char *np =
|
82
|
+
const char *np = parse_decimal(fpc, pe, o);
|
82
83
|
if (np == NULL) {
|
83
84
|
// if we can't, try to parse it as an int
|
84
|
-
np =
|
85
|
+
np = parse_integer(fpc, pe, o);
|
85
86
|
}
|
86
87
|
|
87
88
|
if (np) {
|
@@ -96,17 +97,17 @@
|
|
96
97
|
}
|
97
98
|
|
98
99
|
action parse_vector {
|
99
|
-
const char *np =
|
100
|
+
const char *np = parse_vector(fpc, pe, o);
|
100
101
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
101
102
|
}
|
102
103
|
|
103
104
|
action parse_list {
|
104
|
-
const char *np =
|
105
|
+
const char *np = parse_list(fpc, pe, o);
|
105
106
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
106
107
|
}
|
107
108
|
|
108
109
|
action parse_map {
|
109
|
-
const char *np =
|
110
|
+
const char *np = parse_map(fpc, pe, o);
|
110
111
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
111
112
|
}
|
112
113
|
|
@@ -126,7 +127,7 @@
|
|
126
127
|
}%%
|
127
128
|
|
128
129
|
|
129
|
-
const char *edn::Parser::
|
130
|
+
const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object& o)
|
130
131
|
{
|
131
132
|
int cs;
|
132
133
|
|
@@ -145,80 +146,6 @@ const char *edn::Parser::EDN_parse_value(const char *p, const char *pe, Rice::Ob
|
|
145
146
|
}
|
146
147
|
|
147
148
|
|
148
|
-
// ============================================================
|
149
|
-
// tagged element parsing - any of #uuid, #inst, #{, #(some symbol)
|
150
|
-
// discard (#_ <ident>) is handled by the top-level machine
|
151
|
-
//
|
152
|
-
%%{
|
153
|
-
machine EDN_dispatch;
|
154
|
-
include EDN_common;
|
155
|
-
|
156
|
-
begin_discard = '_';
|
157
|
-
begin_set = '{';
|
158
|
-
end_set = '}';
|
159
|
-
|
160
|
-
write data;
|
161
|
-
|
162
|
-
action exit { fhold; fbreak; }
|
163
|
-
|
164
|
-
main := begin_dispatch (
|
165
|
-
(begin_discard (space)? ([a-zA-Z0-9\-\.]*)) |
|
166
|
-
('inst ' string_delim ([0-9\-+:\.TZ])* string_delim) |
|
167
|
-
('uuid ' string_delim ([a-f0-9\-]* string_delim))
|
168
|
-
)
|
169
|
-
(^[a-zA-Z0-9:\.\-+ ]* @exit);
|
170
|
-
}%%
|
171
|
-
|
172
|
-
|
173
|
-
const char* edn::Parser::EDN_parse_tagged(const char *p, const char *pe, Rice::Object& o, bool& discard)
|
174
|
-
{
|
175
|
-
int cs;
|
176
|
-
Rice::String str;
|
177
|
-
|
178
|
-
%% write init;
|
179
|
-
p_save = p;
|
180
|
-
%% write exec;
|
181
|
-
|
182
|
-
if (cs >= EDN_dispatch_first_final) {
|
183
|
-
|
184
|
-
//is it a discard? if so, just drop the following token
|
185
|
-
if (*(p_save + 1) == '_')
|
186
|
-
{
|
187
|
-
discard = true;
|
188
|
-
return p + 1;
|
189
|
-
}
|
190
|
-
|
191
|
-
std::size_t len = p - p_save;
|
192
|
-
std::string buf;
|
193
|
-
buf.reserve(len);
|
194
|
-
|
195
|
-
if (len > 10)
|
196
|
-
{
|
197
|
-
// there's enough room to be #inst or #uuid, copy the
|
198
|
-
// string portion
|
199
|
-
if (std::strncmp(p_save + 1, "inst", 4) == 0) {
|
200
|
-
buf.append(p_save + 7, len - 8);
|
201
|
-
} else if (std::strncmp(p_save + 1, "uuid", 4) == 0) {
|
202
|
-
buf.append(p_save + 7, len - 8);
|
203
|
-
}
|
204
|
-
|
205
|
-
o = Rice::String(buf);
|
206
|
-
return p;
|
207
|
-
}
|
208
|
-
|
209
|
-
// tagged element
|
210
|
-
o = Rice::String(buf);
|
211
|
-
return p;
|
212
|
-
}
|
213
|
-
else if (cs == EDN_dispatch_error) {
|
214
|
-
error(*p);
|
215
|
-
return pe;
|
216
|
-
}
|
217
|
-
else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
|
218
|
-
return NULL;
|
219
|
-
}
|
220
|
-
|
221
|
-
|
222
149
|
|
223
150
|
// ============================================================
|
224
151
|
// keyword parsing
|
@@ -237,7 +164,7 @@ const char* edn::Parser::EDN_parse_tagged(const char *p, const char *pe, Rice::O
|
|
237
164
|
}%%
|
238
165
|
|
239
166
|
|
240
|
-
const char* edn::Parser::
|
167
|
+
const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Object& o)
|
241
168
|
{
|
242
169
|
int cs;
|
243
170
|
|
@@ -272,7 +199,7 @@ const char* edn::Parser::EDN_parse_keyword(const char *p, const char *pe, Rice::
|
|
272
199
|
write data;
|
273
200
|
|
274
201
|
action parse_string {
|
275
|
-
if (!
|
202
|
+
if (!parse_byte_stream(p_save + 1, p, s)) {
|
276
203
|
fhold;
|
277
204
|
fbreak;
|
278
205
|
} else {
|
@@ -292,82 +219,7 @@ const char* edn::Parser::EDN_parse_keyword(const char *p, const char *pe, Rice::
|
|
292
219
|
}%%
|
293
220
|
|
294
221
|
|
295
|
-
|
296
|
-
// copies the string data, unescaping any present values that need to be replaced
|
297
|
-
//
|
298
|
-
bool edn::Parser::EDN_parse_byte_stream(const char *p, const char *pe, Rice::String& s)
|
299
|
-
{
|
300
|
-
if (pe > p) {
|
301
|
-
std::string buf;
|
302
|
-
std::size_t len = pe - p;
|
303
|
-
|
304
|
-
// pre-allocate storage needed
|
305
|
-
buf.reserve(len);
|
306
|
-
|
307
|
-
const char* cp = p;
|
308
|
-
std::size_t pos = 0;
|
309
|
-
char c, replacement;
|
310
|
-
|
311
|
-
while (cp < pe)
|
312
|
-
{
|
313
|
-
// append any other character that is not the escaping slash
|
314
|
-
if (*cp != '\\') {
|
315
|
-
buf.replace(pos++, 1, 1, *cp++);
|
316
|
-
continue;
|
317
|
-
}
|
318
|
-
|
319
|
-
// looking at a '\' - check what it escapes if there's a
|
320
|
-
// following character
|
321
|
-
if (++cp == pe)
|
322
|
-
break;
|
323
|
-
|
324
|
-
c = *cp++;
|
325
|
-
replacement = '?';
|
326
|
-
|
327
|
-
switch (c)
|
328
|
-
{
|
329
|
-
case 't':
|
330
|
-
replacement = '\t';
|
331
|
-
break;
|
332
|
-
case 'n':
|
333
|
-
replacement = '\n';
|
334
|
-
break;
|
335
|
-
case 'r':
|
336
|
-
replacement = '\r';
|
337
|
-
break;
|
338
|
-
case '\"':
|
339
|
-
replacement = '\"';
|
340
|
-
break;
|
341
|
-
case '\\':
|
342
|
-
replacement = '\\';
|
343
|
-
break;
|
344
|
-
/* TODO: add support for this!
|
345
|
-
case 'u':
|
346
|
-
replacement = '\u';
|
347
|
-
break;
|
348
|
-
*/
|
349
|
-
default:
|
350
|
-
std::cerr << "value must be unescaped but case is unhandled: '" << c << "'" << std::endl;
|
351
|
-
break;
|
352
|
-
}
|
353
|
-
|
354
|
-
// substitute the escaped walue
|
355
|
-
if (replacement != '?')
|
356
|
-
buf.replace(pos++, 1, 1, replacement);
|
357
|
-
}
|
358
|
-
|
359
|
-
// utf-8 encode
|
360
|
-
VALUE vs = Rice::protect( rb_str_new2, buf.c_str() );
|
361
|
-
VALUE s_utf8 = Rice::protect( rb_enc_associate, vs, rb_utf8_encoding() );
|
362
|
-
s = Rice::String(s_utf8);
|
363
|
-
return true;
|
364
|
-
}
|
365
|
-
|
366
|
-
return false;
|
367
|
-
}
|
368
|
-
|
369
|
-
|
370
|
-
const char* edn::Parser::EDN_parse_string(const char *p, const char *pe, Rice::Object& o)
|
222
|
+
const char* edn::Parser::parse_string(const char *p, const char *pe, Rice::Object& o)
|
371
223
|
{
|
372
224
|
static const char* EDN_TYPE = "string";
|
373
225
|
int cs;
|
@@ -410,7 +262,7 @@ const char* edn::Parser::EDN_parse_string(const char *p, const char *pe, Rice::O
|
|
410
262
|
}%%
|
411
263
|
|
412
264
|
|
413
|
-
const char* edn::Parser::
|
265
|
+
const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Object& o)
|
414
266
|
{
|
415
267
|
int cs;
|
416
268
|
|
@@ -441,7 +293,7 @@ const char* edn::Parser::EDN_parse_decimal(const char *p, const char *pe, Rice::
|
|
441
293
|
main := '-'? ('0' | [1-9][0-9]* [M]?) (^[0-9M]? @exit);
|
442
294
|
}%%
|
443
295
|
|
444
|
-
const char* edn::Parser::
|
296
|
+
const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Object& o)
|
445
297
|
{
|
446
298
|
int cs;
|
447
299
|
|
@@ -469,7 +321,7 @@ const char* edn::Parser::EDN_parse_integer(const char *p, const char *pe, Rice::
|
|
469
321
|
|
470
322
|
action parse_value {
|
471
323
|
Rice::Object v;
|
472
|
-
const char *np =
|
324
|
+
const char *np = parse_value(fpc, pe, v);
|
473
325
|
if (np == NULL) {
|
474
326
|
fhold; fbreak;
|
475
327
|
} else {
|
@@ -481,7 +333,7 @@ const char* edn::Parser::EDN_parse_integer(const char *p, const char *pe, Rice::
|
|
481
333
|
action parse_dispatch {
|
482
334
|
bool discard = false;
|
483
335
|
Rice::Object v;
|
484
|
-
const char *np =
|
336
|
+
const char *np = parse_tagged(fpc, pe, v, discard);
|
485
337
|
if (np == NULL) {
|
486
338
|
fhold; fbreak;
|
487
339
|
} else {
|
@@ -520,7 +372,7 @@ const char* edn::Parser::EDN_parse_integer(const char *p, const char *pe, Rice::
|
|
520
372
|
//
|
521
373
|
// vector parsing
|
522
374
|
//
|
523
|
-
const char* edn::Parser::
|
375
|
+
const char* edn::Parser::parse_vector(const char *p, const char *pe, Rice::Object& o)
|
524
376
|
{
|
525
377
|
static const char* EDN_TYPE = "vector";
|
526
378
|
|
@@ -563,7 +415,7 @@ const char* edn::Parser::EDN_parse_vector(const char *p, const char *pe, Rice::O
|
|
563
415
|
//
|
564
416
|
// list parsing
|
565
417
|
//
|
566
|
-
const char* edn::Parser::
|
418
|
+
const char* edn::Parser::parse_list(const char *p, const char *pe, Rice::Object& o)
|
567
419
|
{
|
568
420
|
static const char* EDN_TYPE = "list";
|
569
421
|
|
@@ -597,7 +449,7 @@ const char* edn::Parser::EDN_parse_list(const char *p, const char *pe, Rice::Obj
|
|
597
449
|
write data;
|
598
450
|
|
599
451
|
action parse_key {
|
600
|
-
const char *np =
|
452
|
+
const char *np = parse_value(fpc, pe, k);
|
601
453
|
if (np == NULL) {
|
602
454
|
fhold; fbreak;
|
603
455
|
} else {
|
@@ -606,7 +458,7 @@ const char* edn::Parser::EDN_parse_list(const char *p, const char *pe, Rice::Obj
|
|
606
458
|
}
|
607
459
|
|
608
460
|
action parse_value {
|
609
|
-
const char *np =
|
461
|
+
const char *np = parse_value(fpc, pe, v);
|
610
462
|
if (np == NULL) {
|
611
463
|
fhold; fbreak;
|
612
464
|
} else {
|
@@ -634,7 +486,7 @@ const char* edn::Parser::EDN_parse_list(const char *p, const char *pe, Rice::Obj
|
|
634
486
|
}%%
|
635
487
|
|
636
488
|
|
637
|
-
const char* edn::Parser::
|
489
|
+
const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object& o)
|
638
490
|
{
|
639
491
|
static const char* EDN_TYPE = "map";
|
640
492
|
|
@@ -659,6 +511,82 @@ const char* edn::Parser::EDN_parse_map(const char *p, const char *pe, Rice::Obje
|
|
659
511
|
|
660
512
|
|
661
513
|
|
514
|
+
// ============================================================
|
515
|
+
// tagged element parsing - any of #uuid, #inst, #{, #(some symbol)
|
516
|
+
// discard (#_ <ident>) is handled by the top-level machine
|
517
|
+
//
|
518
|
+
// NOTE: this is not fully implemented yet
|
519
|
+
//
|
520
|
+
%%{
|
521
|
+
machine EDN_dispatch;
|
522
|
+
include EDN_common;
|
523
|
+
|
524
|
+
begin_discard = '_';
|
525
|
+
begin_set = '{';
|
526
|
+
end_set = '}';
|
527
|
+
|
528
|
+
write data;
|
529
|
+
|
530
|
+
action exit { fhold; fbreak; }
|
531
|
+
|
532
|
+
main := begin_dispatch (
|
533
|
+
(begin_discard (space)? ([a-zA-Z0-9\-\.]*)) |
|
534
|
+
('inst ' string_delim ([0-9\-+:\.TZ])* string_delim) |
|
535
|
+
('uuid ' string_delim ([a-f0-9\-]* string_delim))
|
536
|
+
)
|
537
|
+
(^[a-zA-Z0-9:\.\-+ ]* @exit);
|
538
|
+
}%%
|
539
|
+
|
540
|
+
|
541
|
+
const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Object& o, bool& discard)
|
542
|
+
{
|
543
|
+
int cs;
|
544
|
+
Rice::String str;
|
545
|
+
|
546
|
+
%% write init;
|
547
|
+
p_save = p;
|
548
|
+
%% write exec;
|
549
|
+
|
550
|
+
if (cs >= EDN_dispatch_first_final) {
|
551
|
+
|
552
|
+
//is it a discard? if so, just drop the following token
|
553
|
+
if (*(p_save + 1) == '_')
|
554
|
+
{
|
555
|
+
discard = true;
|
556
|
+
return p + 1;
|
557
|
+
}
|
558
|
+
|
559
|
+
std::size_t len = p - p_save;
|
560
|
+
std::string buf;
|
561
|
+
buf.reserve(len);
|
562
|
+
|
563
|
+
if (len > 10)
|
564
|
+
{
|
565
|
+
// there's enough room to be #inst or #uuid, copy the
|
566
|
+
// string portion
|
567
|
+
if (std::strncmp(p_save + 1, "inst", 4) == 0) {
|
568
|
+
buf.append(p_save + 7, len - 8);
|
569
|
+
} else if (std::strncmp(p_save + 1, "uuid", 4) == 0) {
|
570
|
+
buf.append(p_save + 7, len - 8);
|
571
|
+
}
|
572
|
+
|
573
|
+
o = Rice::String(buf);
|
574
|
+
return p;
|
575
|
+
}
|
576
|
+
|
577
|
+
// tagged element
|
578
|
+
o = Rice::String(buf);
|
579
|
+
return p;
|
580
|
+
}
|
581
|
+
else if (cs == EDN_dispatch_error) {
|
582
|
+
error(*p);
|
583
|
+
return pe;
|
584
|
+
}
|
585
|
+
else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
|
586
|
+
return NULL;
|
587
|
+
}
|
588
|
+
|
589
|
+
|
662
590
|
// ============================================================
|
663
591
|
// main parsing machine
|
664
592
|
//
|
@@ -669,17 +597,17 @@ const char* edn::Parser::EDN_parse_map(const char *p, const char *pe, Rice::Obje
|
|
669
597
|
write data nofinal;
|
670
598
|
|
671
599
|
action parse_vector {
|
672
|
-
const char* np =
|
600
|
+
const char* np = parse_vector(fpc, pe, result);
|
673
601
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
674
602
|
}
|
675
603
|
|
676
604
|
action parse_map {
|
677
|
-
const char *np =
|
605
|
+
const char *np = parse_map(fpc, pe, result);
|
678
606
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
679
607
|
}
|
680
608
|
|
681
609
|
action parse_list {
|
682
|
-
const char *np =
|
610
|
+
const char *np = parse_list(fpc, pe, result);
|
683
611
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
684
612
|
}
|
685
613
|
|
@@ -1,41 +1,39 @@
|
|
1
1
|
#include <iostream>
|
2
2
|
#include <string>
|
3
3
|
#include <fstream>
|
4
|
+
|
4
5
|
#include <rice/Object.hpp>
|
5
6
|
|
7
|
+
#include <ruby/ruby.h>
|
8
|
+
#include <ruby/encoding.h>
|
9
|
+
|
6
10
|
#include "edn_parser.h"
|
7
11
|
|
8
12
|
namespace edn
|
9
13
|
{
|
10
|
-
// ============================================================
|
11
|
-
// reads the contents of a file and begins the parsing process
|
12
14
|
//
|
13
|
-
|
15
|
+
// copies the string data, unescaping any present values that need to be replaced
|
16
|
+
//
|
17
|
+
bool Parser::parse_byte_stream(const char *p_start, const char *p_end, Rice::String& s)
|
14
18
|
{
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
char* buf = new char[len];
|
28
|
-
f.read(buf, len);
|
29
|
-
f.close();
|
30
|
-
|
31
|
-
// parse the buffer
|
32
|
-
rslt = parse(buf, len);
|
33
|
-
|
34
|
-
delete [] buf;
|
19
|
+
if (p_end > p_start) {
|
20
|
+
std::string buf;
|
21
|
+
std::size_t len = p_end - p_start;
|
22
|
+
|
23
|
+
if (unicode_to_utf8(p_start, len, buf))
|
24
|
+
{
|
25
|
+
// utf-8 encode
|
26
|
+
VALUE vs = Rice::protect( rb_str_new2, buf.c_str() );
|
27
|
+
VALUE s_utf8 = Rice::protect( rb_enc_associate, vs, rb_utf8_encoding() );
|
28
|
+
s = Rice::String(s_utf8);
|
29
|
+
return true;
|
30
|
+
}
|
35
31
|
}
|
36
|
-
|
32
|
+
|
33
|
+
return false;
|
37
34
|
}
|
38
35
|
|
36
|
+
|
39
37
|
//
|
40
38
|
// error reporting
|
41
39
|
void Parser::error(const std::string& err, char c) const
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#include <string>
|
2
|
+
|
3
|
+
//
|
4
|
+
// needed to define this in its own file because icu and ruby have
|
5
|
+
// differing definitions for Uchar and the compiler complains
|
6
|
+
//
|
7
|
+
#include <unicode/utypes.h>
|
8
|
+
#include <unicode/ustring.h>
|
9
|
+
#include <unicode/ucnv.h>
|
10
|
+
|
11
|
+
#include "edn_parser.h"
|
12
|
+
|
13
|
+
namespace edn
|
14
|
+
{
|
15
|
+
//
|
16
|
+
// unescapes any values that need to be replaced, saves it to utf8
|
17
|
+
//
|
18
|
+
bool Parser::unicode_to_utf8(const char *s, std::size_t len, std::string& rslt)
|
19
|
+
{
|
20
|
+
icu::UnicodeString ustr(s, len);
|
21
|
+
|
22
|
+
if (ustr.isBogus()) {
|
23
|
+
return false;
|
24
|
+
}
|
25
|
+
|
26
|
+
ustr.unescape().toUTF8String(rslt);
|
27
|
+
return true;
|
28
|
+
}
|
29
|
+
}
|
data/ext/edn_turbo/extconf.rb
CHANGED
@@ -1,3 +1,26 @@
|
|
1
1
|
require 'mkmf-rice'
|
2
2
|
|
3
|
+
HEADER_DIRS = [
|
4
|
+
'/usr/local/include',
|
5
|
+
'/usr/local/opt/icu4c/include',
|
6
|
+
'/usr/include'
|
7
|
+
]
|
8
|
+
|
9
|
+
LIB_DIRS = [
|
10
|
+
'/usr/local/lib', # must be the first entry; add others after it
|
11
|
+
'/usr/local/opt/icu4c/lib'
|
12
|
+
]
|
13
|
+
|
14
|
+
unless find_header('unicode/uversion.h', *HEADER_DIRS)
|
15
|
+
abort "icu4c headers missing"
|
16
|
+
end
|
17
|
+
|
18
|
+
# haven't figured out how this ever works so..
|
19
|
+
#unless have_library('icuuc', 'uconv_close', *LIB_DIRS)
|
20
|
+
# abort "ic4c lib missing"
|
21
|
+
#end
|
22
|
+
|
23
|
+
# do this instead. sigh
|
24
|
+
$LOCAL_LIBS="-L#{LIB_DIRS[1]} -licuuc"
|
25
|
+
|
3
26
|
create_makefile("edn_turbo/edn_turbo")
|
data/ext/edn_turbo/main.cc
CHANGED
@@ -41,7 +41,6 @@ void Init_edn_turbo(void)
|
|
41
41
|
Rice::define_class_under<edn::Parser>(rb_mEDNT, "Parser")
|
42
42
|
.define_constructor(Rice::Constructor<edn::Parser>())
|
43
43
|
.define_method("ext_read", &edn::Parser::process, (Rice::Arg("data")))
|
44
|
-
.define_method("ext_open", &edn::Parser::open, (Rice::Arg("file")))
|
45
44
|
;
|
46
45
|
|
47
46
|
// import whatever else we've defined in the ruby side
|
data/lib/edn_turbo/version.rb
CHANGED
data/test/test_output_diff.rb
CHANGED
@@ -38,6 +38,15 @@ class EDNT_Test < Minitest::Test
|
|
38
38
|
)
|
39
39
|
end
|
40
40
|
|
41
|
+
def test_unicode
|
42
|
+
|
43
|
+
check_file('test/unicode.edn',
|
44
|
+
[:text,
|
45
|
+
"Page \u0018, October 2009 TechTIPS",
|
46
|
+
"This should be an unfilled star: ☆"]
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
41
50
|
def test_vector
|
42
51
|
|
43
52
|
check_file('test/vector_1.edn',
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: edn_turbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ed Porras
|
@@ -70,6 +70,7 @@ files:
|
|
70
70
|
- ext/edn_turbo/edn_parser.h
|
71
71
|
- ext/edn_turbo/edn_parser.rl
|
72
72
|
- ext/edn_turbo/edn_parser_def.cc
|
73
|
+
- ext/edn_turbo/edn_parser_unicode.cc
|
73
74
|
- ext/edn_turbo/extconf.rb
|
74
75
|
- ext/edn_turbo/main.cc
|
75
76
|
- lib/edn_turbo.rb
|