edn_turbo 0.5.7 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/CHANGELOG.md +15 -0
- data/Dockerfile +34 -0
- data/LICENSE +1 -1
- data/README.md +8 -22
- data/Rakefile +22 -19
- data/bin/build_docker_image.sh +11 -0
- data/bin/console.sh +5 -0
- data/docker-compose.yml +10 -0
- data/ext/edn_turbo/edn_parser.cc +336 -314
- data/ext/edn_turbo/edn_parser.rl +63 -41
- data/ext/edn_turbo/extconf.rb +24 -1
- data/ext/edn_turbo/main.cc +189 -166
- data/ext/edn_turbo/parser.h +104 -76
- data/ext/edn_turbo/parser_def.cc +204 -182
- data/ext/edn_turbo/util.cc +241 -219
- data/ext/edn_turbo/util.h +48 -26
- data/ext/edn_turbo/util_unicode.cc +41 -19
- data/ext/edn_turbo/util_unicode.h +29 -7
- data/lib/edn_turbo.rb +22 -0
- data/lib/edn_turbo/edn_parser.rb +22 -0
- data/lib/edn_turbo/version.rb +23 -3
- data/spec/edn_turbo/edn_parser_spec.rb +384 -0
- data/spec/spec_helper.rb +96 -0
- metadata +42 -11
- data/test/test_output_diff.rb +0 -408
data/ext/edn_turbo/util.cc
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
// The MIT License (MIT)
|
2
|
+
|
3
|
+
// Copyright (c) 2015-2019 Ed Porras
|
4
|
+
|
5
|
+
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
// of this software and associated documentation files (the "Software"), to deal
|
7
|
+
// in the Software without restriction, including without limitation the rights
|
8
|
+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
// copies of the Software, and to permit persons to whom the Software is
|
10
|
+
// furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
// The above copyright notice and this permission notice shall be included in
|
13
|
+
// all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
// THE SOFTWARE.
|
22
|
+
|
1
23
|
#include <iostream>
|
2
24
|
#include <string>
|
3
25
|
#include <sstream>
|
@@ -11,230 +33,230 @@
|
|
11
33
|
|
12
34
|
namespace edn
|
13
35
|
{
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
127
|
-
}
|
128
|
-
|
129
|
-
return LONG2NUM(buftotype<long>(str, len));
|
130
|
-
}
|
131
|
-
|
132
|
-
//
|
133
|
-
// as above.. TODO: check exponential..
|
134
|
-
VALUE float_to_ruby(const char* str, std::size_t len)
|
135
|
-
{
|
136
|
-
// if big decimal is needed, call into ruby side to get
|
137
|
-
// the correct value
|
138
|
-
if (str[len-1] == 'M' || len >= LD_max_chars)
|
139
|
-
{
|
140
|
-
std::string buf(str, len);
|
141
|
-
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
142
|
-
|
143
|
-
if (str[len-1] == 'M') {
|
144
|
-
return call_module_fn(rb_mEDN, EDN_MAKE_BIG_DECIMAL_METHOD, vs);
|
145
|
-
}
|
146
|
-
|
147
|
-
prot_args args(vs, RUBY_STRING_TO_F_METHOD);
|
148
|
-
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
149
|
-
}
|
150
|
-
|
151
|
-
return rb_float_new(buftotype<double>(str, len));
|
152
|
-
}
|
153
|
-
|
154
|
-
|
155
|
-
//
|
156
|
-
// read from a StringIO - expensive!!!
|
157
|
-
//
|
158
|
-
VALUE ruby_io_read(VALUE io)
|
159
|
-
{
|
160
|
-
prot_args args(io, RUBY_READ_METHOD);
|
36
|
+
//
|
37
|
+
// used to determine max number of chars in string value of a type
|
38
|
+
template <typename T>
|
39
|
+
static std::size_t get_max_chars(T)
|
40
|
+
{
|
41
|
+
std::stringstream s;
|
42
|
+
s << std::fixed << std::numeric_limits<T>::max();
|
43
|
+
return s.str().length();
|
44
|
+
}
|
45
|
+
|
46
|
+
static const std::size_t LL_max_chars = get_max_chars<>(1l);
|
47
|
+
static const std::size_t LD_max_chars = get_max_chars<>(1.0);
|
48
|
+
|
49
|
+
//
|
50
|
+
// throw runtime error
|
51
|
+
static void throw_error(int error)
|
52
|
+
{
|
53
|
+
if (error == 0)
|
54
|
+
return;
|
55
|
+
|
56
|
+
VALUE err = rb_errinfo();
|
57
|
+
rb_raise(CLASS_OF(err), "%s", RSTRING_PTR(rb_obj_as_string(err)));
|
58
|
+
}
|
59
|
+
|
60
|
+
// =================================================================
|
61
|
+
// work-around for idiotic rb_protect convention in order to avoid
|
62
|
+
// using ruby/rice
|
63
|
+
//
|
64
|
+
typedef VALUE (edn_rb_f_type)( VALUE arg );
|
65
|
+
|
66
|
+
// we're using at most 2 args
|
67
|
+
struct prot_args {
|
68
|
+
prot_args(VALUE r, ID m) :
|
69
|
+
receiver(r), method(m), count(0) {
|
70
|
+
}
|
71
|
+
prot_args(VALUE r, ID m, VALUE arg) :
|
72
|
+
receiver(r), method(m), count(1) {
|
73
|
+
args[0] = arg;
|
74
|
+
}
|
75
|
+
prot_args(VALUE r, ID m, VALUE arg1, VALUE arg2) :
|
76
|
+
receiver(r), method(m), count(2) {
|
77
|
+
args[0] = arg1;
|
78
|
+
args[1] = arg2;
|
79
|
+
}
|
80
|
+
|
81
|
+
VALUE call() const {
|
82
|
+
return ((count == 0) ?
|
83
|
+
rb_funcall( receiver, method, 0 ) :
|
84
|
+
rb_funcall2( receiver, method, count, args ));
|
85
|
+
}
|
86
|
+
|
87
|
+
private:
|
88
|
+
VALUE receiver;
|
89
|
+
ID method;
|
90
|
+
int count;
|
91
|
+
VALUE args[2];
|
92
|
+
};
|
93
|
+
|
94
|
+
// this allows us to wrap with rb_protect()
|
95
|
+
static inline VALUE edn_wrap_funcall2( VALUE arg ) {
|
96
|
+
const prot_args* a = reinterpret_cast<const prot_args*>(arg);
|
97
|
+
if (a)
|
98
|
+
return a->call();
|
99
|
+
return Qnil;
|
100
|
+
}
|
101
|
+
|
102
|
+
static inline VALUE edn_prot_rb_funcall( edn_rb_f_type func, VALUE args ) {
|
103
|
+
int error;
|
104
|
+
VALUE s = rb_protect( func, args, &error );
|
105
|
+
if (error) throw_error(error);
|
106
|
+
return s;
|
107
|
+
}
|
108
|
+
|
109
|
+
static inline VALUE edn_prot_rb_new_str(const char* str) {
|
110
|
+
int error;
|
111
|
+
VALUE s = rb_protect( reinterpret_cast<VALUE (*)(VALUE)>(rb_str_new_cstr),
|
112
|
+
reinterpret_cast<VALUE>(str), &error );
|
113
|
+
if (error) throw_error(error);
|
114
|
+
return s;
|
115
|
+
}
|
116
|
+
|
117
|
+
static inline VALUE edn_rb_enc_associate_utf8(VALUE str) {
|
118
|
+
return rb_enc_associate(str, rb_utf8_encoding() );
|
119
|
+
}
|
120
|
+
|
121
|
+
// =================================================================
|
122
|
+
// utils
|
123
|
+
namespace util
|
124
|
+
{
|
125
|
+
// utility method to convert a primitive in string form to a
|
126
|
+
// ruby type
|
127
|
+
template <class T>
|
128
|
+
static inline T buftotype(const char* p, std::size_t len) {
|
129
|
+
T val;
|
130
|
+
std::string buf;
|
131
|
+
buf.append(p, len);
|
132
|
+
std::istringstream(buf) >> val;
|
133
|
+
return val;
|
134
|
+
}
|
135
|
+
|
136
|
+
//
|
137
|
+
// convert to int.. if string rep has more digits than long can
|
138
|
+
// hold, call into ruby to get a big num
|
139
|
+
VALUE integer_to_ruby(const char* str, std::size_t len)
|
140
|
+
{
|
141
|
+
// if something bigger than a long is needed, call into
|
142
|
+
// ruby side to get the correct value
|
143
|
+
if (str[len-1] == 'M' || len >= LL_max_chars)
|
144
|
+
{
|
145
|
+
std::string buf(str, len);
|
146
|
+
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
147
|
+
prot_args args(vs, RUBY_STRING_TO_I_METHOD);
|
161
148
|
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
// utf-8 encode
|
181
|
-
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
182
|
-
int error;
|
183
|
-
v_utf8 = rb_protect( edn_rb_enc_associate_utf8, vs, &error);
|
184
|
-
if (error) throw_error(error);
|
185
|
-
return true;
|
186
|
-
} else if (p_end == p_start) {
|
187
|
-
v_utf8 = rb_str_new("", 0);
|
188
|
-
return true;
|
149
|
+
}
|
150
|
+
|
151
|
+
return LONG2NUM(buftotype<long>(str, len));
|
152
|
+
}
|
153
|
+
|
154
|
+
//
|
155
|
+
// as above.. TODO: check exponential..
|
156
|
+
VALUE float_to_ruby(const char* str, std::size_t len)
|
157
|
+
{
|
158
|
+
// if big decimal is needed, call into ruby side to get
|
159
|
+
// the correct value
|
160
|
+
if (str[len-1] == 'M' || len >= LD_max_chars)
|
161
|
+
{
|
162
|
+
std::string buf(str, len);
|
163
|
+
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
164
|
+
|
165
|
+
if (str[len-1] == 'M') {
|
166
|
+
return call_module_fn(rb_mEDN, EDN_MAKE_BIG_DECIMAL_METHOD, vs);
|
189
167
|
}
|
190
168
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
169
|
+
prot_args args(vs, RUBY_STRING_TO_F_METHOD);
|
170
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
171
|
+
}
|
172
|
+
|
173
|
+
return rb_float_new(buftotype<double>(str, len));
|
174
|
+
}
|
175
|
+
|
176
|
+
|
177
|
+
//
|
178
|
+
// read from a StringIO - handled from ruby side
|
179
|
+
//
|
180
|
+
VALUE ruby_io_read(VALUE io)
|
181
|
+
{
|
182
|
+
prot_args args(io, RUBY_READ_METHOD);
|
183
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
184
|
+
}
|
185
|
+
|
186
|
+
//
|
187
|
+
// copies the string data, unescaping any present values that need to be replaced
|
188
|
+
//
|
189
|
+
bool parse_byte_stream(const char *p_start, const char *p_end, VALUE& v_utf8, bool encode)
|
190
|
+
{
|
191
|
+
if (p_end > p_start) {
|
199
192
|
std::string buf;
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
else if (buf == "space") buf = ' ';
|
208
|
-
else if (buf == "formfeed") buf = '\f';
|
209
|
-
else if (buf == "backspace") buf = '\b';
|
210
|
-
// TODO: is this supported?
|
211
|
-
else if (buf == "verticaltab") buf = '\v';
|
212
|
-
else return false;
|
193
|
+
|
194
|
+
if (encode) {
|
195
|
+
if (!util::unicode::to_utf8(p_start, static_cast<uint32_t>(p_end - p_start), buf))
|
196
|
+
return false;
|
197
|
+
}
|
198
|
+
else {
|
199
|
+
buf.append(p_start, p_end - p_start);
|
213
200
|
}
|
214
201
|
|
215
|
-
|
202
|
+
// utf-8 encode
|
203
|
+
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
204
|
+
int error;
|
205
|
+
v_utf8 = rb_protect( edn_rb_enc_associate_utf8, vs, &error);
|
206
|
+
if (error) throw_error(error);
|
216
207
|
return true;
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
208
|
+
} else if (p_end == p_start) {
|
209
|
+
v_utf8 = rb_str_new("", 0);
|
210
|
+
return true;
|
211
|
+
}
|
212
|
+
|
213
|
+
return false;
|
214
|
+
}
|
215
|
+
|
216
|
+
//
|
217
|
+
// handles things like \c, \newline
|
218
|
+
//
|
219
|
+
bool parse_escaped_char(const char *p, const char *pe, VALUE& v)
|
220
|
+
{
|
221
|
+
std::string buf;
|
222
|
+
std::size_t len = pe - p;
|
223
|
+
buf.append(p, len);
|
224
|
+
|
225
|
+
if (len > 1) {
|
226
|
+
if (buf == "newline") buf = '\n';
|
227
|
+
else if (buf == "tab") buf = '\t';
|
228
|
+
else if (buf == "return") buf = '\r';
|
229
|
+
else if (buf == "space") buf = ' ';
|
230
|
+
else if (buf == "formfeed") buf = '\f';
|
231
|
+
else if (buf == "backspace") buf = '\b';
|
232
|
+
// TODO: is this supported?
|
233
|
+
else if (buf == "verticaltab") buf = '\v';
|
234
|
+
else return false;
|
235
|
+
}
|
236
|
+
|
237
|
+
v = edn_prot_rb_new_str( buf.c_str() );
|
238
|
+
return true;
|
239
|
+
}
|
240
|
+
|
241
|
+
|
242
|
+
//
|
243
|
+
// get a set representation from the ruby side. See edn_turbo.rb
|
244
|
+
VALUE call_module_fn(VALUE module, ID method)
|
245
|
+
{
|
246
|
+
prot_args args(module, method);
|
247
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
248
|
+
}
|
249
|
+
|
250
|
+
VALUE call_module_fn(VALUE module, ID method, VALUE value)
|
251
|
+
{
|
252
|
+
prot_args args(module, method, value);
|
253
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
254
|
+
}
|
255
|
+
|
256
|
+
VALUE call_module_fn(VALUE module, ID method, VALUE name, VALUE data)
|
257
|
+
{
|
258
|
+
prot_args args(module, method, name, data);
|
259
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
260
|
+
}
|
261
|
+
}
|
240
262
|
}
|