edn_turbo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +52 -12
- data/ext/edn_turbo/edn_parser.cc +504 -576
- data/ext/edn_turbo/edn_parser.h +12 -14
- data/ext/edn_turbo/edn_parser.rl +103 -175
- data/ext/edn_turbo/edn_parser_def.cc +22 -24
- data/ext/edn_turbo/edn_parser_unicode.cc +29 -0
- data/ext/edn_turbo/extconf.rb +23 -0
- data/ext/edn_turbo/main.cc +0 -1
- data/lib/edn_turbo/version.rb +1 -1
- data/test/test_output_diff.rb +9 -0
- metadata +2 -1
data/ext/edn_turbo/edn_parser.h
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#define EDN_RUBY_EXT_PARSER_H
|
3
3
|
|
4
4
|
#include <string>
|
5
|
-
#include <
|
5
|
+
#include <sstream>
|
6
6
|
#include <rice/Object.hpp>
|
7
7
|
#include <rice/to_from_ruby.hpp>
|
8
8
|
|
@@ -21,17 +21,18 @@ namespace edn
|
|
21
21
|
|
22
22
|
Rice::Object parse(const char* s, std::size_t len);
|
23
23
|
|
24
|
-
const char*
|
25
|
-
const char*
|
26
|
-
const char*
|
27
|
-
const char*
|
28
|
-
const char*
|
29
|
-
const char*
|
30
|
-
const char*
|
31
|
-
const char*
|
32
|
-
const char*
|
24
|
+
const char* parse_decimal(const char *p, const char *pe, Rice::Object& o);
|
25
|
+
const char* parse_integer(const char *p, const char *pe, Rice::Object& o);
|
26
|
+
const char* parse_keyword(const char *p, const char *pe, Rice::Object& o);
|
27
|
+
const char* parse_tagged (const char *p, const char *pe, Rice::Object& o, bool& dicard);
|
28
|
+
const char* parse_string (const char *p, const char *pe, Rice::Object& o);
|
29
|
+
const char* parse_value (const char *p, const char *pe, Rice::Object& o);
|
30
|
+
const char* parse_vector (const char *p, const char *pe, Rice::Object& o);
|
31
|
+
const char* parse_map (const char *p, const char *pe, Rice::Object& o);
|
32
|
+
const char* parse_list (const char *p, const char *pe, Rice::Object& o);
|
33
33
|
|
34
|
-
bool
|
34
|
+
static bool parse_byte_stream(const char *p, const char *pe, Rice::String& s);
|
35
|
+
static bool unicode_to_utf8(const char *s, std::size_t len, std::string& rslt);
|
35
36
|
|
36
37
|
void error(const std::string& err, char c) const;
|
37
38
|
void error(char err_c) const { error("", err_c); }
|
@@ -52,9 +53,6 @@ namespace edn
|
|
52
53
|
|
53
54
|
Rice::Object process(const std::string& data) { return parse(data.c_str(), data.length()); }
|
54
55
|
|
55
|
-
// handle file read from the c-side
|
56
|
-
Rice::Object open(const std::string& file);
|
57
|
-
|
58
56
|
}; // Engine
|
59
57
|
|
60
58
|
} // namespace
|
data/ext/edn_turbo/edn_parser.rl
CHANGED
@@ -1,9 +1,6 @@
|
|
1
1
|
#include <iostream>
|
2
2
|
#include <string>
|
3
3
|
|
4
|
-
#include <ruby/ruby.h>
|
5
|
-
#include <ruby/encoding.h>
|
6
|
-
|
7
4
|
#include <rice/Hash.hpp>
|
8
5
|
#include <rice/Array.hpp>
|
9
6
|
#include <rice/to_from_ruby.hpp>
|
@@ -13,6 +10,10 @@
|
|
13
10
|
//
|
14
11
|
// EDN spec at: https://github.com/edn-format/edn
|
15
12
|
//
|
13
|
+
//
|
14
|
+
// many thanks to Florian Frank for json-ruby which was essential in
|
15
|
+
// helping me learn about ragel
|
16
|
+
//
|
16
17
|
|
17
18
|
%%{
|
18
19
|
machine EDN_common;
|
@@ -67,21 +68,21 @@
|
|
67
68
|
}
|
68
69
|
|
69
70
|
action parse_keyword {
|
70
|
-
const char *np =
|
71
|
+
const char *np = parse_keyword(fpc, pe, o);
|
71
72
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
72
73
|
}
|
73
74
|
|
74
75
|
action parse_string {
|
75
|
-
const char *np =
|
76
|
+
const char *np = parse_string(fpc, pe, o);
|
76
77
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
77
78
|
}
|
78
79
|
|
79
80
|
action parse_number {
|
80
81
|
// try to parse a decimal first
|
81
|
-
const char *np =
|
82
|
+
const char *np = parse_decimal(fpc, pe, o);
|
82
83
|
if (np == NULL) {
|
83
84
|
// if we can't, try to parse it as an int
|
84
|
-
np =
|
85
|
+
np = parse_integer(fpc, pe, o);
|
85
86
|
}
|
86
87
|
|
87
88
|
if (np) {
|
@@ -96,17 +97,17 @@
|
|
96
97
|
}
|
97
98
|
|
98
99
|
action parse_vector {
|
99
|
-
const char *np =
|
100
|
+
const char *np = parse_vector(fpc, pe, o);
|
100
101
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
101
102
|
}
|
102
103
|
|
103
104
|
action parse_list {
|
104
|
-
const char *np =
|
105
|
+
const char *np = parse_list(fpc, pe, o);
|
105
106
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
106
107
|
}
|
107
108
|
|
108
109
|
action parse_map {
|
109
|
-
const char *np =
|
110
|
+
const char *np = parse_map(fpc, pe, o);
|
110
111
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
111
112
|
}
|
112
113
|
|
@@ -126,7 +127,7 @@
|
|
126
127
|
}%%
|
127
128
|
|
128
129
|
|
129
|
-
const char *edn::Parser::
|
130
|
+
const char *edn::Parser::parse_value(const char *p, const char *pe, Rice::Object& o)
|
130
131
|
{
|
131
132
|
int cs;
|
132
133
|
|
@@ -145,80 +146,6 @@ const char *edn::Parser::EDN_parse_value(const char *p, const char *pe, Rice::Ob
|
|
145
146
|
}
|
146
147
|
|
147
148
|
|
148
|
-
// ============================================================
|
149
|
-
// tagged element parsing - any of #uuid, #inst, #{, #(some symbol)
|
150
|
-
// discard (#_ <ident>) is handled by the top-level machine
|
151
|
-
//
|
152
|
-
%%{
|
153
|
-
machine EDN_dispatch;
|
154
|
-
include EDN_common;
|
155
|
-
|
156
|
-
begin_discard = '_';
|
157
|
-
begin_set = '{';
|
158
|
-
end_set = '}';
|
159
|
-
|
160
|
-
write data;
|
161
|
-
|
162
|
-
action exit { fhold; fbreak; }
|
163
|
-
|
164
|
-
main := begin_dispatch (
|
165
|
-
(begin_discard (space)? ([a-zA-Z0-9\-\.]*)) |
|
166
|
-
('inst ' string_delim ([0-9\-+:\.TZ])* string_delim) |
|
167
|
-
('uuid ' string_delim ([a-f0-9\-]* string_delim))
|
168
|
-
)
|
169
|
-
(^[a-zA-Z0-9:\.\-+ ]* @exit);
|
170
|
-
}%%
|
171
|
-
|
172
|
-
|
173
|
-
const char* edn::Parser::EDN_parse_tagged(const char *p, const char *pe, Rice::Object& o, bool& discard)
|
174
|
-
{
|
175
|
-
int cs;
|
176
|
-
Rice::String str;
|
177
|
-
|
178
|
-
%% write init;
|
179
|
-
p_save = p;
|
180
|
-
%% write exec;
|
181
|
-
|
182
|
-
if (cs >= EDN_dispatch_first_final) {
|
183
|
-
|
184
|
-
//is it a discard? if so, just drop the following token
|
185
|
-
if (*(p_save + 1) == '_')
|
186
|
-
{
|
187
|
-
discard = true;
|
188
|
-
return p + 1;
|
189
|
-
}
|
190
|
-
|
191
|
-
std::size_t len = p - p_save;
|
192
|
-
std::string buf;
|
193
|
-
buf.reserve(len);
|
194
|
-
|
195
|
-
if (len > 10)
|
196
|
-
{
|
197
|
-
// there's enough room to be #inst or #uuid, copy the
|
198
|
-
// string portion
|
199
|
-
if (std::strncmp(p_save + 1, "inst", 4) == 0) {
|
200
|
-
buf.append(p_save + 7, len - 8);
|
201
|
-
} else if (std::strncmp(p_save + 1, "uuid", 4) == 0) {
|
202
|
-
buf.append(p_save + 7, len - 8);
|
203
|
-
}
|
204
|
-
|
205
|
-
o = Rice::String(buf);
|
206
|
-
return p;
|
207
|
-
}
|
208
|
-
|
209
|
-
// tagged element
|
210
|
-
o = Rice::String(buf);
|
211
|
-
return p;
|
212
|
-
}
|
213
|
-
else if (cs == EDN_dispatch_error) {
|
214
|
-
error(*p);
|
215
|
-
return pe;
|
216
|
-
}
|
217
|
-
else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
|
218
|
-
return NULL;
|
219
|
-
}
|
220
|
-
|
221
|
-
|
222
149
|
|
223
150
|
// ============================================================
|
224
151
|
// keyword parsing
|
@@ -237,7 +164,7 @@ const char* edn::Parser::EDN_parse_tagged(const char *p, const char *pe, Rice::O
|
|
237
164
|
}%%
|
238
165
|
|
239
166
|
|
240
|
-
const char* edn::Parser::
|
167
|
+
const char* edn::Parser::parse_keyword(const char *p, const char *pe, Rice::Object& o)
|
241
168
|
{
|
242
169
|
int cs;
|
243
170
|
|
@@ -272,7 +199,7 @@ const char* edn::Parser::EDN_parse_keyword(const char *p, const char *pe, Rice::
|
|
272
199
|
write data;
|
273
200
|
|
274
201
|
action parse_string {
|
275
|
-
if (!
|
202
|
+
if (!parse_byte_stream(p_save + 1, p, s)) {
|
276
203
|
fhold;
|
277
204
|
fbreak;
|
278
205
|
} else {
|
@@ -292,82 +219,7 @@ const char* edn::Parser::EDN_parse_keyword(const char *p, const char *pe, Rice::
|
|
292
219
|
}%%
|
293
220
|
|
294
221
|
|
295
|
-
|
296
|
-
// copies the string data, unescaping any present values that need to be replaced
|
297
|
-
//
|
298
|
-
bool edn::Parser::EDN_parse_byte_stream(const char *p, const char *pe, Rice::String& s)
|
299
|
-
{
|
300
|
-
if (pe > p) {
|
301
|
-
std::string buf;
|
302
|
-
std::size_t len = pe - p;
|
303
|
-
|
304
|
-
// pre-allocate storage needed
|
305
|
-
buf.reserve(len);
|
306
|
-
|
307
|
-
const char* cp = p;
|
308
|
-
std::size_t pos = 0;
|
309
|
-
char c, replacement;
|
310
|
-
|
311
|
-
while (cp < pe)
|
312
|
-
{
|
313
|
-
// append any other character that is not the escaping slash
|
314
|
-
if (*cp != '\\') {
|
315
|
-
buf.replace(pos++, 1, 1, *cp++);
|
316
|
-
continue;
|
317
|
-
}
|
318
|
-
|
319
|
-
// looking at a '\' - check what it escapes if there's a
|
320
|
-
// following character
|
321
|
-
if (++cp == pe)
|
322
|
-
break;
|
323
|
-
|
324
|
-
c = *cp++;
|
325
|
-
replacement = '?';
|
326
|
-
|
327
|
-
switch (c)
|
328
|
-
{
|
329
|
-
case 't':
|
330
|
-
replacement = '\t';
|
331
|
-
break;
|
332
|
-
case 'n':
|
333
|
-
replacement = '\n';
|
334
|
-
break;
|
335
|
-
case 'r':
|
336
|
-
replacement = '\r';
|
337
|
-
break;
|
338
|
-
case '\"':
|
339
|
-
replacement = '\"';
|
340
|
-
break;
|
341
|
-
case '\\':
|
342
|
-
replacement = '\\';
|
343
|
-
break;
|
344
|
-
/* TODO: add support for this!
|
345
|
-
case 'u':
|
346
|
-
replacement = '\u';
|
347
|
-
break;
|
348
|
-
*/
|
349
|
-
default:
|
350
|
-
std::cerr << "value must be unescaped but case is unhandled: '" << c << "'" << std::endl;
|
351
|
-
break;
|
352
|
-
}
|
353
|
-
|
354
|
-
// substitute the escaped walue
|
355
|
-
if (replacement != '?')
|
356
|
-
buf.replace(pos++, 1, 1, replacement);
|
357
|
-
}
|
358
|
-
|
359
|
-
// utf-8 encode
|
360
|
-
VALUE vs = Rice::protect( rb_str_new2, buf.c_str() );
|
361
|
-
VALUE s_utf8 = Rice::protect( rb_enc_associate, vs, rb_utf8_encoding() );
|
362
|
-
s = Rice::String(s_utf8);
|
363
|
-
return true;
|
364
|
-
}
|
365
|
-
|
366
|
-
return false;
|
367
|
-
}
|
368
|
-
|
369
|
-
|
370
|
-
const char* edn::Parser::EDN_parse_string(const char *p, const char *pe, Rice::Object& o)
|
222
|
+
const char* edn::Parser::parse_string(const char *p, const char *pe, Rice::Object& o)
|
371
223
|
{
|
372
224
|
static const char* EDN_TYPE = "string";
|
373
225
|
int cs;
|
@@ -410,7 +262,7 @@ const char* edn::Parser::EDN_parse_string(const char *p, const char *pe, Rice::O
|
|
410
262
|
}%%
|
411
263
|
|
412
264
|
|
413
|
-
const char* edn::Parser::
|
265
|
+
const char* edn::Parser::parse_decimal(const char *p, const char *pe, Rice::Object& o)
|
414
266
|
{
|
415
267
|
int cs;
|
416
268
|
|
@@ -441,7 +293,7 @@ const char* edn::Parser::EDN_parse_decimal(const char *p, const char *pe, Rice::
|
|
441
293
|
main := '-'? ('0' | [1-9][0-9]* [M]?) (^[0-9M]? @exit);
|
442
294
|
}%%
|
443
295
|
|
444
|
-
const char* edn::Parser::
|
296
|
+
const char* edn::Parser::parse_integer(const char *p, const char *pe, Rice::Object& o)
|
445
297
|
{
|
446
298
|
int cs;
|
447
299
|
|
@@ -469,7 +321,7 @@ const char* edn::Parser::EDN_parse_integer(const char *p, const char *pe, Rice::
|
|
469
321
|
|
470
322
|
action parse_value {
|
471
323
|
Rice::Object v;
|
472
|
-
const char *np =
|
324
|
+
const char *np = parse_value(fpc, pe, v);
|
473
325
|
if (np == NULL) {
|
474
326
|
fhold; fbreak;
|
475
327
|
} else {
|
@@ -481,7 +333,7 @@ const char* edn::Parser::EDN_parse_integer(const char *p, const char *pe, Rice::
|
|
481
333
|
action parse_dispatch {
|
482
334
|
bool discard = false;
|
483
335
|
Rice::Object v;
|
484
|
-
const char *np =
|
336
|
+
const char *np = parse_tagged(fpc, pe, v, discard);
|
485
337
|
if (np == NULL) {
|
486
338
|
fhold; fbreak;
|
487
339
|
} else {
|
@@ -520,7 +372,7 @@ const char* edn::Parser::EDN_parse_integer(const char *p, const char *pe, Rice::
|
|
520
372
|
//
|
521
373
|
// vector parsing
|
522
374
|
//
|
523
|
-
const char* edn::Parser::
|
375
|
+
const char* edn::Parser::parse_vector(const char *p, const char *pe, Rice::Object& o)
|
524
376
|
{
|
525
377
|
static const char* EDN_TYPE = "vector";
|
526
378
|
|
@@ -563,7 +415,7 @@ const char* edn::Parser::EDN_parse_vector(const char *p, const char *pe, Rice::O
|
|
563
415
|
//
|
564
416
|
// list parsing
|
565
417
|
//
|
566
|
-
const char* edn::Parser::
|
418
|
+
const char* edn::Parser::parse_list(const char *p, const char *pe, Rice::Object& o)
|
567
419
|
{
|
568
420
|
static const char* EDN_TYPE = "list";
|
569
421
|
|
@@ -597,7 +449,7 @@ const char* edn::Parser::EDN_parse_list(const char *p, const char *pe, Rice::Obj
|
|
597
449
|
write data;
|
598
450
|
|
599
451
|
action parse_key {
|
600
|
-
const char *np =
|
452
|
+
const char *np = parse_value(fpc, pe, k);
|
601
453
|
if (np == NULL) {
|
602
454
|
fhold; fbreak;
|
603
455
|
} else {
|
@@ -606,7 +458,7 @@ const char* edn::Parser::EDN_parse_list(const char *p, const char *pe, Rice::Obj
|
|
606
458
|
}
|
607
459
|
|
608
460
|
action parse_value {
|
609
|
-
const char *np =
|
461
|
+
const char *np = parse_value(fpc, pe, v);
|
610
462
|
if (np == NULL) {
|
611
463
|
fhold; fbreak;
|
612
464
|
} else {
|
@@ -634,7 +486,7 @@ const char* edn::Parser::EDN_parse_list(const char *p, const char *pe, Rice::Obj
|
|
634
486
|
}%%
|
635
487
|
|
636
488
|
|
637
|
-
const char* edn::Parser::
|
489
|
+
const char* edn::Parser::parse_map(const char *p, const char *pe, Rice::Object& o)
|
638
490
|
{
|
639
491
|
static const char* EDN_TYPE = "map";
|
640
492
|
|
@@ -659,6 +511,82 @@ const char* edn::Parser::EDN_parse_map(const char *p, const char *pe, Rice::Obje
|
|
659
511
|
|
660
512
|
|
661
513
|
|
514
|
+
// ============================================================
|
515
|
+
// tagged element parsing - any of #uuid, #inst, #{, #(some symbol)
|
516
|
+
// discard (#_ <ident>) is handled by the top-level machine
|
517
|
+
//
|
518
|
+
// NOTE: this is not fully implemented yet
|
519
|
+
//
|
520
|
+
%%{
|
521
|
+
machine EDN_dispatch;
|
522
|
+
include EDN_common;
|
523
|
+
|
524
|
+
begin_discard = '_';
|
525
|
+
begin_set = '{';
|
526
|
+
end_set = '}';
|
527
|
+
|
528
|
+
write data;
|
529
|
+
|
530
|
+
action exit { fhold; fbreak; }
|
531
|
+
|
532
|
+
main := begin_dispatch (
|
533
|
+
(begin_discard (space)? ([a-zA-Z0-9\-\.]*)) |
|
534
|
+
('inst ' string_delim ([0-9\-+:\.TZ])* string_delim) |
|
535
|
+
('uuid ' string_delim ([a-f0-9\-]* string_delim))
|
536
|
+
)
|
537
|
+
(^[a-zA-Z0-9:\.\-+ ]* @exit);
|
538
|
+
}%%
|
539
|
+
|
540
|
+
|
541
|
+
const char* edn::Parser::parse_tagged(const char *p, const char *pe, Rice::Object& o, bool& discard)
|
542
|
+
{
|
543
|
+
int cs;
|
544
|
+
Rice::String str;
|
545
|
+
|
546
|
+
%% write init;
|
547
|
+
p_save = p;
|
548
|
+
%% write exec;
|
549
|
+
|
550
|
+
if (cs >= EDN_dispatch_first_final) {
|
551
|
+
|
552
|
+
//is it a discard? if so, just drop the following token
|
553
|
+
if (*(p_save + 1) == '_')
|
554
|
+
{
|
555
|
+
discard = true;
|
556
|
+
return p + 1;
|
557
|
+
}
|
558
|
+
|
559
|
+
std::size_t len = p - p_save;
|
560
|
+
std::string buf;
|
561
|
+
buf.reserve(len);
|
562
|
+
|
563
|
+
if (len > 10)
|
564
|
+
{
|
565
|
+
// there's enough room to be #inst or #uuid, copy the
|
566
|
+
// string portion
|
567
|
+
if (std::strncmp(p_save + 1, "inst", 4) == 0) {
|
568
|
+
buf.append(p_save + 7, len - 8);
|
569
|
+
} else if (std::strncmp(p_save + 1, "uuid", 4) == 0) {
|
570
|
+
buf.append(p_save + 7, len - 8);
|
571
|
+
}
|
572
|
+
|
573
|
+
o = Rice::String(buf);
|
574
|
+
return p;
|
575
|
+
}
|
576
|
+
|
577
|
+
// tagged element
|
578
|
+
o = Rice::String(buf);
|
579
|
+
return p;
|
580
|
+
}
|
581
|
+
else if (cs == EDN_dispatch_error) {
|
582
|
+
error(*p);
|
583
|
+
return pe;
|
584
|
+
}
|
585
|
+
else if (cs == EDN_dispatch_en_main) {} // silence ragel warning
|
586
|
+
return NULL;
|
587
|
+
}
|
588
|
+
|
589
|
+
|
662
590
|
// ============================================================
|
663
591
|
// main parsing machine
|
664
592
|
//
|
@@ -669,17 +597,17 @@ const char* edn::Parser::EDN_parse_map(const char *p, const char *pe, Rice::Obje
|
|
669
597
|
write data nofinal;
|
670
598
|
|
671
599
|
action parse_vector {
|
672
|
-
const char* np =
|
600
|
+
const char* np = parse_vector(fpc, pe, result);
|
673
601
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
674
602
|
}
|
675
603
|
|
676
604
|
action parse_map {
|
677
|
-
const char *np =
|
605
|
+
const char *np = parse_map(fpc, pe, result);
|
678
606
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
679
607
|
}
|
680
608
|
|
681
609
|
action parse_list {
|
682
|
-
const char *np =
|
610
|
+
const char *np = parse_list(fpc, pe, result);
|
683
611
|
if (np == NULL) { fhold; fbreak; } else fexec np;
|
684
612
|
}
|
685
613
|
|
@@ -1,41 +1,39 @@
|
|
1
1
|
#include <iostream>
|
2
2
|
#include <string>
|
3
3
|
#include <fstream>
|
4
|
+
|
4
5
|
#include <rice/Object.hpp>
|
5
6
|
|
7
|
+
#include <ruby/ruby.h>
|
8
|
+
#include <ruby/encoding.h>
|
9
|
+
|
6
10
|
#include "edn_parser.h"
|
7
11
|
|
8
12
|
namespace edn
|
9
13
|
{
|
10
|
-
// ============================================================
|
11
|
-
// reads the contents of a file and begins the parsing process
|
12
14
|
//
|
13
|
-
|
15
|
+
// copies the string data, unescaping any present values that need to be replaced
|
16
|
+
//
|
17
|
+
bool Parser::parse_byte_stream(const char *p_start, const char *p_end, Rice::String& s)
|
14
18
|
{
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
char* buf = new char[len];
|
28
|
-
f.read(buf, len);
|
29
|
-
f.close();
|
30
|
-
|
31
|
-
// parse the buffer
|
32
|
-
rslt = parse(buf, len);
|
33
|
-
|
34
|
-
delete [] buf;
|
19
|
+
if (p_end > p_start) {
|
20
|
+
std::string buf;
|
21
|
+
std::size_t len = p_end - p_start;
|
22
|
+
|
23
|
+
if (unicode_to_utf8(p_start, len, buf))
|
24
|
+
{
|
25
|
+
// utf-8 encode
|
26
|
+
VALUE vs = Rice::protect( rb_str_new2, buf.c_str() );
|
27
|
+
VALUE s_utf8 = Rice::protect( rb_enc_associate, vs, rb_utf8_encoding() );
|
28
|
+
s = Rice::String(s_utf8);
|
29
|
+
return true;
|
30
|
+
}
|
35
31
|
}
|
36
|
-
|
32
|
+
|
33
|
+
return false;
|
37
34
|
}
|
38
35
|
|
36
|
+
|
39
37
|
//
|
40
38
|
// error reporting
|
41
39
|
void Parser::error(const std::string& err, char c) const
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#include <string>
|
2
|
+
|
3
|
+
//
|
4
|
+
// needed to define this in its own file because icu and ruby have
|
5
|
+
// differing definitions for Uchar and the compiler complains
|
6
|
+
//
|
7
|
+
#include <unicode/utypes.h>
|
8
|
+
#include <unicode/ustring.h>
|
9
|
+
#include <unicode/ucnv.h>
|
10
|
+
|
11
|
+
#include "edn_parser.h"
|
12
|
+
|
13
|
+
namespace edn
|
14
|
+
{
|
15
|
+
//
|
16
|
+
// unescapes any values that need to be replaced, saves it to utf8
|
17
|
+
//
|
18
|
+
bool Parser::unicode_to_utf8(const char *s, std::size_t len, std::string& rslt)
|
19
|
+
{
|
20
|
+
icu::UnicodeString ustr(s, len);
|
21
|
+
|
22
|
+
if (ustr.isBogus()) {
|
23
|
+
return false;
|
24
|
+
}
|
25
|
+
|
26
|
+
ustr.unescape().toUTF8String(rslt);
|
27
|
+
return true;
|
28
|
+
}
|
29
|
+
}
|
data/ext/edn_turbo/extconf.rb
CHANGED
@@ -1,3 +1,26 @@
|
|
1
1
|
require 'mkmf-rice'
|
2
2
|
|
3
|
+
HEADER_DIRS = [
|
4
|
+
'/usr/local/include',
|
5
|
+
'/usr/local/opt/icu4c/include',
|
6
|
+
'/usr/include'
|
7
|
+
]
|
8
|
+
|
9
|
+
LIB_DIRS = [
|
10
|
+
'/usr/local/lib', # must be the first entry; add others after it
|
11
|
+
'/usr/local/opt/icu4c/lib'
|
12
|
+
]
|
13
|
+
|
14
|
+
unless find_header('unicode/uversion.h', *HEADER_DIRS)
|
15
|
+
abort "icu4c headers missing"
|
16
|
+
end
|
17
|
+
|
18
|
+
# haven't figured out how this ever works so..
|
19
|
+
#unless have_library('icuuc', 'uconv_close', *LIB_DIRS)
|
20
|
+
# abort "ic4c lib missing"
|
21
|
+
#end
|
22
|
+
|
23
|
+
# do this instead. sigh
|
24
|
+
$LOCAL_LIBS="-L#{LIB_DIRS[1]} -licuuc"
|
25
|
+
|
3
26
|
create_makefile("edn_turbo/edn_turbo")
|
data/ext/edn_turbo/main.cc
CHANGED
@@ -41,7 +41,6 @@ void Init_edn_turbo(void)
|
|
41
41
|
Rice::define_class_under<edn::Parser>(rb_mEDNT, "Parser")
|
42
42
|
.define_constructor(Rice::Constructor<edn::Parser>())
|
43
43
|
.define_method("ext_read", &edn::Parser::process, (Rice::Arg("data")))
|
44
|
-
.define_method("ext_open", &edn::Parser::open, (Rice::Arg("file")))
|
45
44
|
;
|
46
45
|
|
47
46
|
// import whatever else we've defined in the ruby side
|
data/lib/edn_turbo/version.rb
CHANGED
data/test/test_output_diff.rb
CHANGED
@@ -38,6 +38,15 @@ class EDNT_Test < Minitest::Test
|
|
38
38
|
)
|
39
39
|
end
|
40
40
|
|
41
|
+
def test_unicode
|
42
|
+
|
43
|
+
check_file('test/unicode.edn',
|
44
|
+
[:text,
|
45
|
+
"Page \u0018, October 2009 TechTIPS",
|
46
|
+
"This should be an unfilled star: ☆"]
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
41
50
|
def test_vector
|
42
51
|
|
43
52
|
check_file('test/vector_1.edn',
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: edn_turbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ed Porras
|
@@ -70,6 +70,7 @@ files:
|
|
70
70
|
- ext/edn_turbo/edn_parser.h
|
71
71
|
- ext/edn_turbo/edn_parser.rl
|
72
72
|
- ext/edn_turbo/edn_parser_def.cc
|
73
|
+
- ext/edn_turbo/edn_parser_unicode.cc
|
73
74
|
- ext/edn_turbo/extconf.rb
|
74
75
|
- ext/edn_turbo/main.cc
|
75
76
|
- lib/edn_turbo.rb
|