edn_turbo 0.5.7 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rspec +1 -0
- data/CHANGELOG.md +15 -0
- data/Dockerfile +34 -0
- data/LICENSE +1 -1
- data/README.md +8 -22
- data/Rakefile +22 -19
- data/bin/build_docker_image.sh +11 -0
- data/bin/console.sh +5 -0
- data/docker-compose.yml +10 -0
- data/ext/edn_turbo/edn_parser.cc +336 -314
- data/ext/edn_turbo/edn_parser.rl +63 -41
- data/ext/edn_turbo/extconf.rb +24 -1
- data/ext/edn_turbo/main.cc +189 -166
- data/ext/edn_turbo/parser.h +104 -76
- data/ext/edn_turbo/parser_def.cc +204 -182
- data/ext/edn_turbo/util.cc +241 -219
- data/ext/edn_turbo/util.h +48 -26
- data/ext/edn_turbo/util_unicode.cc +41 -19
- data/ext/edn_turbo/util_unicode.h +29 -7
- data/lib/edn_turbo.rb +22 -0
- data/lib/edn_turbo/edn_parser.rb +22 -0
- data/lib/edn_turbo/version.rb +23 -3
- data/spec/edn_turbo/edn_parser_spec.rb +384 -0
- data/spec/spec_helper.rb +96 -0
- metadata +42 -11
- data/test/test_output_diff.rb +0 -408
data/ext/edn_turbo/util.cc
CHANGED
@@ -1,3 +1,25 @@
|
|
1
|
+
// The MIT License (MIT)
|
2
|
+
|
3
|
+
// Copyright (c) 2015-2019 Ed Porras
|
4
|
+
|
5
|
+
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
// of this software and associated documentation files (the "Software"), to deal
|
7
|
+
// in the Software without restriction, including without limitation the rights
|
8
|
+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
// copies of the Software, and to permit persons to whom the Software is
|
10
|
+
// furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
// The above copyright notice and this permission notice shall be included in
|
13
|
+
// all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
// THE SOFTWARE.
|
22
|
+
|
1
23
|
#include <iostream>
|
2
24
|
#include <string>
|
3
25
|
#include <sstream>
|
@@ -11,230 +33,230 @@
|
|
11
33
|
|
12
34
|
namespace edn
|
13
35
|
{
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
127
|
-
}
|
128
|
-
|
129
|
-
return LONG2NUM(buftotype<long>(str, len));
|
130
|
-
}
|
131
|
-
|
132
|
-
//
|
133
|
-
// as above.. TODO: check exponential..
|
134
|
-
VALUE float_to_ruby(const char* str, std::size_t len)
|
135
|
-
{
|
136
|
-
// if big decimal is needed, call into ruby side to get
|
137
|
-
// the correct value
|
138
|
-
if (str[len-1] == 'M' || len >= LD_max_chars)
|
139
|
-
{
|
140
|
-
std::string buf(str, len);
|
141
|
-
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
142
|
-
|
143
|
-
if (str[len-1] == 'M') {
|
144
|
-
return call_module_fn(rb_mEDN, EDN_MAKE_BIG_DECIMAL_METHOD, vs);
|
145
|
-
}
|
146
|
-
|
147
|
-
prot_args args(vs, RUBY_STRING_TO_F_METHOD);
|
148
|
-
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
149
|
-
}
|
150
|
-
|
151
|
-
return rb_float_new(buftotype<double>(str, len));
|
152
|
-
}
|
153
|
-
|
154
|
-
|
155
|
-
//
|
156
|
-
// read from a StringIO - expensive!!!
|
157
|
-
//
|
158
|
-
VALUE ruby_io_read(VALUE io)
|
159
|
-
{
|
160
|
-
prot_args args(io, RUBY_READ_METHOD);
|
36
|
+
//
|
37
|
+
// used to determine max number of chars in string value of a type
|
38
|
+
template <typename T>
|
39
|
+
static std::size_t get_max_chars(T)
|
40
|
+
{
|
41
|
+
std::stringstream s;
|
42
|
+
s << std::fixed << std::numeric_limits<T>::max();
|
43
|
+
return s.str().length();
|
44
|
+
}
|
45
|
+
|
46
|
+
static const std::size_t LL_max_chars = get_max_chars<>(1l);
|
47
|
+
static const std::size_t LD_max_chars = get_max_chars<>(1.0);
|
48
|
+
|
49
|
+
//
|
50
|
+
// throw runtime error
|
51
|
+
static void throw_error(int error)
|
52
|
+
{
|
53
|
+
if (error == 0)
|
54
|
+
return;
|
55
|
+
|
56
|
+
VALUE err = rb_errinfo();
|
57
|
+
rb_raise(CLASS_OF(err), "%s", RSTRING_PTR(rb_obj_as_string(err)));
|
58
|
+
}
|
59
|
+
|
60
|
+
// =================================================================
|
61
|
+
// work-around for idiotic rb_protect convention in order to avoid
|
62
|
+
// using ruby/rice
|
63
|
+
//
|
64
|
+
typedef VALUE (edn_rb_f_type)( VALUE arg );
|
65
|
+
|
66
|
+
// we're using at most 2 args
|
67
|
+
struct prot_args {
|
68
|
+
prot_args(VALUE r, ID m) :
|
69
|
+
receiver(r), method(m), count(0) {
|
70
|
+
}
|
71
|
+
prot_args(VALUE r, ID m, VALUE arg) :
|
72
|
+
receiver(r), method(m), count(1) {
|
73
|
+
args[0] = arg;
|
74
|
+
}
|
75
|
+
prot_args(VALUE r, ID m, VALUE arg1, VALUE arg2) :
|
76
|
+
receiver(r), method(m), count(2) {
|
77
|
+
args[0] = arg1;
|
78
|
+
args[1] = arg2;
|
79
|
+
}
|
80
|
+
|
81
|
+
VALUE call() const {
|
82
|
+
return ((count == 0) ?
|
83
|
+
rb_funcall( receiver, method, 0 ) :
|
84
|
+
rb_funcall2( receiver, method, count, args ));
|
85
|
+
}
|
86
|
+
|
87
|
+
private:
|
88
|
+
VALUE receiver;
|
89
|
+
ID method;
|
90
|
+
int count;
|
91
|
+
VALUE args[2];
|
92
|
+
};
|
93
|
+
|
94
|
+
// this allows us to wrap with rb_protect()
|
95
|
+
static inline VALUE edn_wrap_funcall2( VALUE arg ) {
|
96
|
+
const prot_args* a = reinterpret_cast<const prot_args*>(arg);
|
97
|
+
if (a)
|
98
|
+
return a->call();
|
99
|
+
return Qnil;
|
100
|
+
}
|
101
|
+
|
102
|
+
static inline VALUE edn_prot_rb_funcall( edn_rb_f_type func, VALUE args ) {
|
103
|
+
int error;
|
104
|
+
VALUE s = rb_protect( func, args, &error );
|
105
|
+
if (error) throw_error(error);
|
106
|
+
return s;
|
107
|
+
}
|
108
|
+
|
109
|
+
static inline VALUE edn_prot_rb_new_str(const char* str) {
|
110
|
+
int error;
|
111
|
+
VALUE s = rb_protect( reinterpret_cast<VALUE (*)(VALUE)>(rb_str_new_cstr),
|
112
|
+
reinterpret_cast<VALUE>(str), &error );
|
113
|
+
if (error) throw_error(error);
|
114
|
+
return s;
|
115
|
+
}
|
116
|
+
|
117
|
+
static inline VALUE edn_rb_enc_associate_utf8(VALUE str) {
|
118
|
+
return rb_enc_associate(str, rb_utf8_encoding() );
|
119
|
+
}
|
120
|
+
|
121
|
+
// =================================================================
|
122
|
+
// utils
|
123
|
+
namespace util
|
124
|
+
{
|
125
|
+
// utility method to convert a primitive in string form to a
|
126
|
+
// ruby type
|
127
|
+
template <class T>
|
128
|
+
static inline T buftotype(const char* p, std::size_t len) {
|
129
|
+
T val;
|
130
|
+
std::string buf;
|
131
|
+
buf.append(p, len);
|
132
|
+
std::istringstream(buf) >> val;
|
133
|
+
return val;
|
134
|
+
}
|
135
|
+
|
136
|
+
//
|
137
|
+
// convert to int.. if string rep has more digits than long can
|
138
|
+
// hold, call into ruby to get a big num
|
139
|
+
VALUE integer_to_ruby(const char* str, std::size_t len)
|
140
|
+
{
|
141
|
+
// if something bigger than a long is needed, call into
|
142
|
+
// ruby side to get the correct value
|
143
|
+
if (str[len-1] == 'M' || len >= LL_max_chars)
|
144
|
+
{
|
145
|
+
std::string buf(str, len);
|
146
|
+
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
147
|
+
prot_args args(vs, RUBY_STRING_TO_I_METHOD);
|
161
148
|
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
// utf-8 encode
|
181
|
-
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
182
|
-
int error;
|
183
|
-
v_utf8 = rb_protect( edn_rb_enc_associate_utf8, vs, &error);
|
184
|
-
if (error) throw_error(error);
|
185
|
-
return true;
|
186
|
-
} else if (p_end == p_start) {
|
187
|
-
v_utf8 = rb_str_new("", 0);
|
188
|
-
return true;
|
149
|
+
}
|
150
|
+
|
151
|
+
return LONG2NUM(buftotype<long>(str, len));
|
152
|
+
}
|
153
|
+
|
154
|
+
//
|
155
|
+
// as above.. TODO: check exponential..
|
156
|
+
VALUE float_to_ruby(const char* str, std::size_t len)
|
157
|
+
{
|
158
|
+
// if big decimal is needed, call into ruby side to get
|
159
|
+
// the correct value
|
160
|
+
if (str[len-1] == 'M' || len >= LD_max_chars)
|
161
|
+
{
|
162
|
+
std::string buf(str, len);
|
163
|
+
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
164
|
+
|
165
|
+
if (str[len-1] == 'M') {
|
166
|
+
return call_module_fn(rb_mEDN, EDN_MAKE_BIG_DECIMAL_METHOD, vs);
|
189
167
|
}
|
190
168
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
169
|
+
prot_args args(vs, RUBY_STRING_TO_F_METHOD);
|
170
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
171
|
+
}
|
172
|
+
|
173
|
+
return rb_float_new(buftotype<double>(str, len));
|
174
|
+
}
|
175
|
+
|
176
|
+
|
177
|
+
//
|
178
|
+
// read from a StringIO - handled from ruby side
|
179
|
+
//
|
180
|
+
VALUE ruby_io_read(VALUE io)
|
181
|
+
{
|
182
|
+
prot_args args(io, RUBY_READ_METHOD);
|
183
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
184
|
+
}
|
185
|
+
|
186
|
+
//
|
187
|
+
// copies the string data, unescaping any present values that need to be replaced
|
188
|
+
//
|
189
|
+
bool parse_byte_stream(const char *p_start, const char *p_end, VALUE& v_utf8, bool encode)
|
190
|
+
{
|
191
|
+
if (p_end > p_start) {
|
199
192
|
std::string buf;
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
else if (buf == "space") buf = ' ';
|
208
|
-
else if (buf == "formfeed") buf = '\f';
|
209
|
-
else if (buf == "backspace") buf = '\b';
|
210
|
-
// TODO: is this supported?
|
211
|
-
else if (buf == "verticaltab") buf = '\v';
|
212
|
-
else return false;
|
193
|
+
|
194
|
+
if (encode) {
|
195
|
+
if (!util::unicode::to_utf8(p_start, static_cast<uint32_t>(p_end - p_start), buf))
|
196
|
+
return false;
|
197
|
+
}
|
198
|
+
else {
|
199
|
+
buf.append(p_start, p_end - p_start);
|
213
200
|
}
|
214
201
|
|
215
|
-
|
202
|
+
// utf-8 encode
|
203
|
+
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
204
|
+
int error;
|
205
|
+
v_utf8 = rb_protect( edn_rb_enc_associate_utf8, vs, &error);
|
206
|
+
if (error) throw_error(error);
|
216
207
|
return true;
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
208
|
+
} else if (p_end == p_start) {
|
209
|
+
v_utf8 = rb_str_new("", 0);
|
210
|
+
return true;
|
211
|
+
}
|
212
|
+
|
213
|
+
return false;
|
214
|
+
}
|
215
|
+
|
216
|
+
//
|
217
|
+
// handles things like \c, \newline
|
218
|
+
//
|
219
|
+
bool parse_escaped_char(const char *p, const char *pe, VALUE& v)
|
220
|
+
{
|
221
|
+
std::string buf;
|
222
|
+
std::size_t len = pe - p;
|
223
|
+
buf.append(p, len);
|
224
|
+
|
225
|
+
if (len > 1) {
|
226
|
+
if (buf == "newline") buf = '\n';
|
227
|
+
else if (buf == "tab") buf = '\t';
|
228
|
+
else if (buf == "return") buf = '\r';
|
229
|
+
else if (buf == "space") buf = ' ';
|
230
|
+
else if (buf == "formfeed") buf = '\f';
|
231
|
+
else if (buf == "backspace") buf = '\b';
|
232
|
+
// TODO: is this supported?
|
233
|
+
else if (buf == "verticaltab") buf = '\v';
|
234
|
+
else return false;
|
235
|
+
}
|
236
|
+
|
237
|
+
v = edn_prot_rb_new_str( buf.c_str() );
|
238
|
+
return true;
|
239
|
+
}
|
240
|
+
|
241
|
+
|
242
|
+
//
|
243
|
+
// get a set representation from the ruby side. See edn_turbo.rb
|
244
|
+
VALUE call_module_fn(VALUE module, ID method)
|
245
|
+
{
|
246
|
+
prot_args args(module, method);
|
247
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
248
|
+
}
|
249
|
+
|
250
|
+
VALUE call_module_fn(VALUE module, ID method, VALUE value)
|
251
|
+
{
|
252
|
+
prot_args args(module, method, value);
|
253
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
254
|
+
}
|
255
|
+
|
256
|
+
VALUE call_module_fn(VALUE module, ID method, VALUE name, VALUE data)
|
257
|
+
{
|
258
|
+
prot_args args(module, method, name, data);
|
259
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
260
|
+
}
|
261
|
+
}
|
240
262
|
}
|