edn_turbo 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/edn_turbo/depend +3 -0
- data/ext/edn_turbo/edn_parser.cc +274 -275
- data/ext/edn_turbo/edn_parser.h +16 -12
- data/ext/edn_turbo/edn_parser.rl +8 -9
- data/ext/edn_turbo/edn_parser_util.cc +105 -1
- data/ext/edn_turbo/edn_parser_util.h +11 -0
- data/ext/edn_turbo/edn_parser_util_unicode.cc +32 -0
- data/ext/edn_turbo/main.cc +57 -14
- data/lib/edn_turbo/version.rb +2 -2
- data/test/test_output_diff.rb +169 -129
- metadata +6 -4
- data/ext/edn_turbo/edn_parser_unicode.cc +0 -29
data/ext/edn_turbo/edn_parser.h
CHANGED
@@ -1,13 +1,10 @@
|
|
1
|
-
#
|
2
|
-
#define EDN_RAGEL_PARSER_H
|
1
|
+
#pragma once
|
3
2
|
|
4
3
|
#include <string>
|
5
4
|
#include <sstream>
|
6
5
|
#include <vector>
|
7
6
|
#include <stack>
|
8
7
|
|
9
|
-
#include <ruby/ruby.h>
|
10
|
-
|
11
8
|
|
12
9
|
namespace edn
|
13
10
|
{
|
@@ -22,6 +19,7 @@ namespace edn
|
|
22
19
|
|
23
20
|
extern VALUE EDNT_STRING_TO_I_METHOD;
|
24
21
|
extern VALUE EDNT_STRING_TO_F_METHOD;
|
22
|
+
extern VALUE EDNT_READ_METHOD;
|
25
23
|
|
26
24
|
extern VALUE EDNT_EOF_CONST;
|
27
25
|
|
@@ -31,13 +29,18 @@ namespace edn
|
|
31
29
|
class Parser
|
32
30
|
{
|
33
31
|
public:
|
34
|
-
Parser() : p(NULL), pe(NULL), eof(NULL),
|
32
|
+
Parser() : p(NULL), pe(NULL), eof(NULL),
|
33
|
+
core_io(NULL), read_io(Qnil),
|
34
|
+
io_buffer(NULL), io_buffer_len(0),
|
35
|
+
line_number(1) {
|
35
36
|
new_meta_list();
|
36
37
|
}
|
37
|
-
~Parser()
|
38
|
+
~Parser();
|
38
39
|
|
39
40
|
// change input source
|
40
41
|
void set_source(const char* src, std::size_t len);
|
42
|
+
void set_source(FILE* fp);
|
43
|
+
void set_source(VALUE string_io);
|
41
44
|
|
42
45
|
bool is_eof() const { return (p == eof); }
|
43
46
|
|
@@ -54,11 +57,16 @@ namespace edn
|
|
54
57
|
const char* p;
|
55
58
|
const char* pe;
|
56
59
|
const char* eof;
|
60
|
+
FILE* core_io; // for IO streams
|
61
|
+
VALUE read_io; // for non-core IO that responds to read()
|
62
|
+
char* io_buffer;
|
63
|
+
uintmax_t io_buffer_len;
|
57
64
|
std::size_t line_number;
|
58
65
|
std::vector<VALUE> discard;
|
59
66
|
std::stack<std::vector<VALUE>* > metadata;
|
60
67
|
|
61
68
|
void reset_state();
|
69
|
+
void fill_buf();
|
62
70
|
|
63
71
|
const char* parse_value (const char *p, const char *pe, VALUE& v);
|
64
72
|
const char* parse_string (const char *p, const char *pe, VALUE& v);
|
@@ -81,12 +89,10 @@ namespace edn
|
|
81
89
|
|
82
90
|
eTokenState parse_next(VALUE& value);
|
83
91
|
|
84
|
-
// defined in edn_parser_unicode.cc
|
85
|
-
static bool to_utf8(const char *s, std::size_t len, std::string& rslt);
|
86
|
-
|
87
92
|
// defined in edn_parser_util.cc
|
88
93
|
static VALUE integer_to_ruby(const char* str, std::size_t len);
|
89
94
|
static VALUE float_to_ruby (const char* str, std::size_t len);
|
95
|
+
static VALUE ruby_io_read(VALUE io);
|
90
96
|
|
91
97
|
static bool parse_byte_stream (const char *p, const char *pe, VALUE& rslt, bool encode);
|
92
98
|
static bool parse_escaped_char(const char *p, const char *pe, VALUE& rslt);
|
@@ -117,8 +123,6 @@ namespace edn
|
|
117
123
|
void error(const std::string& f, const std::string& err, char c) const;
|
118
124
|
void error(const std::string& f, char err_c) const { error(f, "", err_c); }
|
119
125
|
void error(const std::string& f, const std::string& err_msg) const { error(f, err_msg, '\0'); }
|
120
|
-
}; //
|
126
|
+
}; // Parser
|
121
127
|
|
122
128
|
} // namespace
|
123
|
-
|
124
|
-
#endif
|
data/ext/edn_turbo/edn_parser.rl
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
#include <string>
|
3
3
|
#include <vector>
|
4
4
|
#include <exception>
|
5
|
-
|
6
5
|
#include <cstring>
|
7
6
|
|
7
|
+
#include <ruby/ruby.h>
|
8
|
+
|
8
9
|
#include "edn_parser.h"
|
9
10
|
|
10
11
|
//
|
@@ -222,7 +223,7 @@ const char *edn::Parser::parse_value(const char *p, const char *pe, VALUE& v)
|
|
222
223
|
((0xc2..0xf5) |
|
223
224
|
'\\'[\"\\/bfnrt] |
|
224
225
|
'\\u'[0-9a-fA-F]{4}) $mark_for_encoding |
|
225
|
-
|
226
|
+
'\\'^([\"\\/bfnrtu]))* %parse_chars
|
226
227
|
) :>> string_delim @err(close_err) @exit;
|
227
228
|
}%%
|
228
229
|
|
@@ -1189,11 +1190,9 @@ edn::Parser::eTokenState edn::Parser::parse_next(VALUE& value)
|
|
1189
1190
|
return state;
|
1190
1191
|
}
|
1191
1192
|
|
1192
|
-
|
1193
1193
|
/*
|
1194
|
-
* Local variables:
|
1195
|
-
* mode: c
|
1196
|
-
*
|
1197
|
-
*
|
1198
|
-
|
1199
|
-
*/
|
1194
|
+
- * Local variables:
|
1195
|
+
- * mode: c
|
1196
|
+
- * indent-tabs-mode: nil
|
1197
|
+
- * End:
|
1198
|
+
- */
|
@@ -12,6 +12,7 @@
|
|
12
12
|
#include <ruby/encoding.h>
|
13
13
|
|
14
14
|
#include "edn_parser.h"
|
15
|
+
#include "edn_parser_util.h"
|
15
16
|
|
16
17
|
namespace edn
|
17
18
|
{
|
@@ -29,6 +30,18 @@ namespace edn
|
|
29
30
|
static const std::size_t LD_max_chars = get_max_chars<>((double) 1);
|
30
31
|
|
31
32
|
|
33
|
+
// parser destructor
|
34
|
+
//
|
35
|
+
Parser::~Parser()
|
36
|
+
{
|
37
|
+
reset_state();
|
38
|
+
del_top_meta_list();
|
39
|
+
|
40
|
+
if (io_buffer) {
|
41
|
+
free(reinterpret_cast<void*>(io_buffer));
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
32
45
|
// =================================================================
|
33
46
|
// for token-by-token parsing. If a discard or metadata is parsed,
|
34
47
|
// attempt to get the following value
|
@@ -37,6 +50,11 @@ namespace edn
|
|
37
50
|
{
|
38
51
|
VALUE token = EDNT_EOF_CONST;
|
39
52
|
|
53
|
+
// buffer if reading from an IO
|
54
|
+
if (core_io || (read_io != Qnil)) {
|
55
|
+
fill_buf();
|
56
|
+
}
|
57
|
+
|
40
58
|
while (!is_eof())
|
41
59
|
{
|
42
60
|
// fetch a token. If it's metadata or discard
|
@@ -70,6 +88,11 @@ namespace edn
|
|
70
88
|
}
|
71
89
|
// but clear any metadata on the first
|
72
90
|
metadata.top()->clear();
|
91
|
+
|
92
|
+
// clean up
|
93
|
+
core_io = NULL;
|
94
|
+
read_io = Qnil;
|
95
|
+
p = pe = eof = NULL;
|
73
96
|
}
|
74
97
|
|
75
98
|
//
|
@@ -83,6 +106,78 @@ namespace edn
|
|
83
106
|
eof = pe;
|
84
107
|
}
|
85
108
|
|
109
|
+
void Parser::set_source(FILE* fp)
|
110
|
+
{
|
111
|
+
reset_state();
|
112
|
+
core_io = fp;
|
113
|
+
}
|
114
|
+
|
115
|
+
void Parser::set_source(VALUE str_io)
|
116
|
+
{
|
117
|
+
reset_state();
|
118
|
+
read_io = str_io;
|
119
|
+
}
|
120
|
+
|
121
|
+
//
|
122
|
+
// for IO sources, read and fill a buffer
|
123
|
+
void Parser::fill_buf()
|
124
|
+
{
|
125
|
+
std::string str_buf;
|
126
|
+
|
127
|
+
// read as much data available
|
128
|
+
if (core_io) {
|
129
|
+
// ruby core IO types
|
130
|
+
char c;
|
131
|
+
while (1)
|
132
|
+
{
|
133
|
+
c = fgetc(core_io);
|
134
|
+
if (c == EOF) {
|
135
|
+
break;
|
136
|
+
}
|
137
|
+
str_buf += c;
|
138
|
+
}
|
139
|
+
|
140
|
+
} else if (read_io != Qnil) {
|
141
|
+
// StringIO, etc. Call read() from ruby side
|
142
|
+
VALUE v = ruby_io_read(read_io);
|
143
|
+
if (TYPE(v) == T_STRING) {
|
144
|
+
str_buf.assign( StringValuePtr(v), RSTRING_LEN(v));
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
// set the buffer to read from
|
149
|
+
if (str_buf.length() > 0) {
|
150
|
+
// first time when io_buffer is NULL, pe & p = 0
|
151
|
+
uintmax_t new_length = ((uintmax_t) (pe - p)) + str_buf.length();
|
152
|
+
char* start = NULL;
|
153
|
+
|
154
|
+
// allocate or extend storage needed
|
155
|
+
if (!io_buffer) {
|
156
|
+
io_buffer = reinterpret_cast<char*>(malloc(new_length));
|
157
|
+
start = io_buffer;
|
158
|
+
} else if (io_buffer_len < new_length) {
|
159
|
+
// resize the buffer
|
160
|
+
realloc(reinterpret_cast<void*>(io_buffer), new_length);
|
161
|
+
}
|
162
|
+
|
163
|
+
if (!start) {
|
164
|
+
// appending to the buffer but move the data not yet
|
165
|
+
// parsed first to the front
|
166
|
+
memmove(io_buffer, p, pe - p);
|
167
|
+
start = io_buffer + (pe - p);
|
168
|
+
}
|
169
|
+
|
170
|
+
// and copy
|
171
|
+
memcpy(start, str_buf.c_str(), str_buf.length());
|
172
|
+
io_buffer_len = new_length;
|
173
|
+
|
174
|
+
// set ragel state
|
175
|
+
p = io_buffer;
|
176
|
+
pe = p + new_length;
|
177
|
+
eof = pe;
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
86
181
|
|
87
182
|
// =================================================================
|
88
183
|
// work-around for idiotic rb_protect convention in order to avoid
|
@@ -188,6 +283,15 @@ namespace edn
|
|
188
283
|
}
|
189
284
|
|
190
285
|
|
286
|
+
//
|
287
|
+
// read from a StringIO - expensive!!!
|
288
|
+
//
|
289
|
+
VALUE Parser::ruby_io_read(VALUE io)
|
290
|
+
{
|
291
|
+
prot_args args(io, EDNT_READ_METHOD);
|
292
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
293
|
+
}
|
294
|
+
|
191
295
|
//
|
192
296
|
// copies the string data, unescaping any present values that need to be replaced
|
193
297
|
//
|
@@ -198,7 +302,7 @@ namespace edn
|
|
198
302
|
std::string buf;
|
199
303
|
|
200
304
|
if (encode) {
|
201
|
-
if (!to_utf8(p_start, p_end - p_start, buf))
|
305
|
+
if (!util::to_utf8(p_start, p_end - p_start, buf))
|
202
306
|
return false;
|
203
307
|
}
|
204
308
|
else {
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#include <string>
|
2
|
+
|
3
|
+
//
|
4
|
+
// needed to define this in its own file because icu and ruby have
|
5
|
+
// differing definitions for Uchar and the compiler complains
|
6
|
+
//
|
7
|
+
#include <unicode/utypes.h>
|
8
|
+
#include <unicode/ustring.h>
|
9
|
+
#include <unicode/ucnv.h>
|
10
|
+
|
11
|
+
#include "edn_parser_util.h"
|
12
|
+
|
13
|
+
namespace edn
|
14
|
+
{
|
15
|
+
namespace util
|
16
|
+
{
|
17
|
+
//
|
18
|
+
// unescapes any values that need to be replaced, saves it to utf8
|
19
|
+
//
|
20
|
+
bool to_utf8(const char *s, std::size_t len, std::string& rslt)
|
21
|
+
{
|
22
|
+
icu::UnicodeString ustr(s, len);
|
23
|
+
|
24
|
+
if (ustr.isBogus()) {
|
25
|
+
return false;
|
26
|
+
}
|
27
|
+
|
28
|
+
ustr.unescape().toUTF8String(rslt);
|
29
|
+
return true;
|
30
|
+
}
|
31
|
+
}
|
32
|
+
}
|
data/ext/edn_turbo/main.cc
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
#include <signal.h>
|
2
2
|
#include <iostream>
|
3
3
|
#include <clocale>
|
4
|
-
|
5
4
|
#include <cstring>
|
6
5
|
|
6
|
+
#include <ruby/ruby.h>
|
7
|
+
#include <ruby/io.h>
|
8
|
+
|
7
9
|
#include "edn_parser.h"
|
8
10
|
|
9
|
-
#include <ruby/ruby.h>
|
10
11
|
|
11
12
|
|
12
13
|
namespace edn {
|
@@ -25,6 +26,7 @@ namespace edn {
|
|
25
26
|
|
26
27
|
VALUE EDNT_STRING_TO_I_METHOD = Qnil;
|
27
28
|
VALUE EDNT_STRING_TO_F_METHOD = Qnil;
|
29
|
+
VALUE EDNT_READ_METHOD = Qnil;
|
28
30
|
|
29
31
|
// returned when EOF - defined as a constant in EDN module
|
30
32
|
VALUE EDNT_EOF_CONST = Qnil;
|
@@ -49,7 +51,7 @@ namespace edn {
|
|
49
51
|
Data_Get_Struct( self, edn::Parser, p );
|
50
52
|
return p;
|
51
53
|
}
|
52
|
-
|
54
|
+
static VALUE set_source(VALUE self, VALUE data);
|
53
55
|
|
54
56
|
//
|
55
57
|
// Called by the constructor - sets the source if passed.
|
@@ -57,11 +59,8 @@ namespace edn {
|
|
57
59
|
{
|
58
60
|
Parser* p = get_parser(self);
|
59
61
|
|
60
|
-
if (argc > 0)
|
61
|
-
|
62
|
-
const char* stream = StringValueCStr(argv[0]);
|
63
|
-
if (stream)
|
64
|
-
p->set_source( stream, std::strlen(stream) );
|
62
|
+
if (argc > 0) {
|
63
|
+
set_source( self, argv[0] );
|
65
64
|
}
|
66
65
|
return self;
|
67
66
|
}
|
@@ -72,9 +71,49 @@ namespace edn {
|
|
72
71
|
{
|
73
72
|
Parser* p = get_parser(self);
|
74
73
|
|
75
|
-
|
76
|
-
|
77
|
-
|
74
|
+
switch (TYPE(data))
|
75
|
+
{
|
76
|
+
case T_STRING:
|
77
|
+
{
|
78
|
+
const char* stream = StringValueCStr(data);
|
79
|
+
if (stream) {
|
80
|
+
p->set_source( stream, std::strlen(stream) );
|
81
|
+
}
|
82
|
+
break;
|
83
|
+
}
|
84
|
+
case T_FILE:
|
85
|
+
{
|
86
|
+
// extract the stream pointer
|
87
|
+
rb_io_t* fptr = RFILE(data)->fptr;
|
88
|
+
if (!fptr) {
|
89
|
+
rb_raise(rb_eRuntimeError, "Ruby IO - fptr is NULL");
|
90
|
+
}
|
91
|
+
|
92
|
+
rb_io_check_char_readable(fptr);
|
93
|
+
|
94
|
+
FILE* fp = rb_io_stdio_file(fptr);
|
95
|
+
if (!fp) {
|
96
|
+
rb_raise(rb_eRuntimeError, "Ruby IO - fptr->fp is NULL");
|
97
|
+
}
|
98
|
+
|
99
|
+
p->set_source(fp);
|
100
|
+
break;
|
101
|
+
}
|
102
|
+
case T_DATA:
|
103
|
+
{
|
104
|
+
// StringIO or some other IO not part of the ruby core -
|
105
|
+
// this is very inefficient as it'll require read()
|
106
|
+
// calls from the ruby side (involves a lot of data
|
107
|
+
// wrapping, etc)
|
108
|
+
if (rb_respond_to(data, EDNT_READ_METHOD)) {
|
109
|
+
p->set_source(data);
|
110
|
+
break;
|
111
|
+
}
|
112
|
+
}
|
113
|
+
default:
|
114
|
+
rb_raise(rb_eRuntimeError, "set_source expected String, core IO, or IO that responds to read()");
|
115
|
+
break;
|
116
|
+
}
|
78
117
|
|
79
118
|
return self;
|
80
119
|
}
|
@@ -90,9 +129,13 @@ namespace edn {
|
|
90
129
|
// parses an entire stream
|
91
130
|
static VALUE read(VALUE self, VALUE data)
|
92
131
|
{
|
132
|
+
if (TYPE(data) != T_STRING) {
|
133
|
+
rb_raise(rb_eTypeError, "Expected String data");
|
134
|
+
}
|
93
135
|
const char* stream = StringValueCStr(data);
|
94
|
-
if (stream)
|
136
|
+
if (stream) {
|
95
137
|
return get_parser(self)->parse(stream, std::strlen(stream) );
|
138
|
+
}
|
96
139
|
return Qnil;
|
97
140
|
}
|
98
141
|
|
@@ -125,8 +168,7 @@ void Init_edn_turbo(void)
|
|
125
168
|
|
126
169
|
// pass things back as utf-8
|
127
170
|
if (!setlocale( LC_ALL, "" )) {
|
128
|
-
|
129
|
-
return;
|
171
|
+
rb_raise(rb_eRuntimeError, "Extension init error calling setlocale() - It appears your system's locale is not configured correctly.\n");
|
130
172
|
}
|
131
173
|
|
132
174
|
edn::rb_mEDNT = rb_define_module("EDNT");
|
@@ -152,6 +194,7 @@ void Init_edn_turbo(void)
|
|
152
194
|
|
153
195
|
edn::EDNT_STRING_TO_I_METHOD = rb_intern("to_i");
|
154
196
|
edn::EDNT_STRING_TO_F_METHOD = rb_intern("to_f");
|
197
|
+
edn::EDNT_READ_METHOD = rb_intern("read");
|
155
198
|
|
156
199
|
// so we can return EOF directly
|
157
200
|
VALUE edn_module = rb_const_get(rb_cObject, edn::EDN_MODULE_SYMBOL);
|
data/lib/edn_turbo/version.rb
CHANGED