edn_turbo 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +54 -9
- data/ext/edn_turbo/edn_parser.cc +493 -370
- data/ext/edn_turbo/edn_parser.h +48 -47
- data/ext/edn_turbo/edn_parser.rl +138 -107
- data/ext/edn_turbo/edn_parser_util.cc +122 -28
- data/ext/edn_turbo/extconf.rb +1 -1
- data/ext/edn_turbo/main.cc +90 -15
- data/lib/edn_turbo.rb +1 -1
- data/lib/edn_turbo/edn_parser.rb +19 -2
- data/lib/edn_turbo/version.rb +2 -2
- data/test/test_output_diff.rb +8 -8
- metadata +2 -16
@@ -2,9 +2,7 @@
|
|
2
2
|
#include <iomanip>
|
3
3
|
#include <string>
|
4
4
|
#include <limits>
|
5
|
-
|
6
|
-
#include <rice/String.hpp>
|
7
|
-
#include <rice/Array.hpp>
|
5
|
+
#include <exception>
|
8
6
|
|
9
7
|
#include <ruby/ruby.h>
|
10
8
|
#include <ruby/encoding.h>
|
@@ -23,43 +21,121 @@ namespace edn
|
|
23
21
|
return s.str().length();
|
24
22
|
}
|
25
23
|
|
26
|
-
static const std::size_t LL_max_chars = get_max_chars<>((long
|
27
|
-
static const std::size_t LD_max_chars = get_max_chars<>((
|
24
|
+
static const std::size_t LL_max_chars = get_max_chars<>((long) 1);
|
25
|
+
static const std::size_t LD_max_chars = get_max_chars<>((double) 1);
|
26
|
+
|
27
|
+
|
28
|
+
// =================================================================
|
29
|
+
// reset parsing state
|
30
|
+
//
|
31
|
+
void Parser::reset()
|
32
|
+
{
|
33
|
+
line_number = 1;
|
34
|
+
while (!discard.empty())
|
35
|
+
discard.pop();
|
36
|
+
}
|
37
|
+
|
38
|
+
//
|
39
|
+
// set a new source
|
40
|
+
void Parser::set_source(const char* src, std::size_t len)
|
41
|
+
{
|
42
|
+
reset();
|
43
|
+
// set ragel state
|
44
|
+
p = src;
|
45
|
+
pe = src + len;
|
46
|
+
eof = pe;
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
// =================================================================
|
51
|
+
// work-around for idiotic rb_protect convention in order to avoid
|
52
|
+
// using ruby/rice
|
53
|
+
//
|
54
|
+
typedef VALUE (edn_rb_f_type)( VALUE arg );
|
55
|
+
|
56
|
+
// we're using at most 2 args
|
57
|
+
struct prot_args {
|
58
|
+
prot_args(ID m, VALUE arg) :
|
59
|
+
method(m), count(1) {
|
60
|
+
args[0] = arg;
|
61
|
+
}
|
62
|
+
prot_args(ID m, VALUE arg1, VALUE arg2) :
|
63
|
+
method(m), count(2) {
|
64
|
+
args[0] = arg1;
|
65
|
+
args[1] = arg2;
|
66
|
+
}
|
67
|
+
|
68
|
+
ID method;
|
69
|
+
VALUE count;
|
70
|
+
VALUE args[2];
|
71
|
+
};
|
72
|
+
|
73
|
+
// this allows us to wrap with rb_protect()
|
74
|
+
static inline VALUE edn_wrap_funcall2( VALUE arg )
|
75
|
+
{
|
76
|
+
prot_args* a = reinterpret_cast<prot_args*>(arg);
|
77
|
+
return rb_funcall2( edn::rb_mEDNT, a->method, a->count, a->args );
|
78
|
+
}
|
79
|
+
|
80
|
+
static inline VALUE edn_prot_rb_funcall( edn_rb_f_type func, VALUE args )
|
81
|
+
{
|
82
|
+
int error;
|
83
|
+
VALUE s = rb_protect( func, args, &error );
|
84
|
+
if (error) Parser::throw_error(error);
|
85
|
+
return s;
|
86
|
+
}
|
87
|
+
|
88
|
+
static inline VALUE edn_prot_rb_new_str(const char* str) {
|
89
|
+
int error;
|
90
|
+
VALUE s = rb_protect( reinterpret_cast<VALUE (*)(VALUE)>(rb_str_new_cstr),
|
91
|
+
reinterpret_cast<VALUE>(str), &error );
|
92
|
+
if (error) Parser::throw_error(error);
|
93
|
+
return s;
|
94
|
+
}
|
95
|
+
|
96
|
+
static inline VALUE edn_rb_enc_associate_utf8(VALUE str)
|
97
|
+
{
|
98
|
+
return rb_enc_associate(str, rb_utf8_encoding() );
|
99
|
+
}
|
100
|
+
|
101
|
+
// =================================================================
|
102
|
+
// utils
|
28
103
|
|
29
104
|
//
|
30
105
|
// convert to int.. if string rep has more digits than long can
|
31
106
|
// hold, call into ruby to get a big num
|
32
|
-
|
107
|
+
VALUE Parser::integer_to_ruby(const char* str, std::size_t len)
|
33
108
|
{
|
34
109
|
if (len < LL_max_chars)
|
35
110
|
{
|
36
|
-
return buftotype<long>(str, len);
|
111
|
+
return LONG2NUM(buftotype<long>(str, len));
|
37
112
|
}
|
38
113
|
|
39
114
|
// value is outside of range of long type. Use ruby to convert it
|
40
|
-
VALUE rb_s =
|
41
|
-
|
115
|
+
VALUE rb_s = edn_prot_rb_new_str( str );
|
116
|
+
prot_args args(EDNT_STR_INT_TO_BIGNUM, rb_s);
|
117
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
42
118
|
}
|
43
119
|
|
44
120
|
//
|
45
|
-
// as above.. TODO: check exponential
|
46
|
-
|
121
|
+
// as above.. TODO: check exponential..
|
122
|
+
VALUE Parser::float_to_ruby(const char* str, std::size_t len)
|
47
123
|
{
|
48
124
|
if (len < LD_max_chars)
|
49
125
|
{
|
50
|
-
return buftotype<double>(str, len);
|
126
|
+
return rb_float_new(buftotype<double>(str, len));
|
51
127
|
}
|
52
128
|
|
53
129
|
// value is outside of range of long type. Use ruby to convert it
|
54
|
-
|
55
|
-
return
|
130
|
+
prot_args args(EDNT_STR_DBL_TO_BIGNUM, edn_prot_rb_new_str(str));
|
131
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
56
132
|
}
|
57
133
|
|
58
134
|
|
59
135
|
//
|
60
136
|
// copies the string data, unescaping any present values that need to be replaced
|
61
137
|
//
|
62
|
-
bool Parser::parse_byte_stream(const char *p_start, const char *p_end,
|
138
|
+
bool Parser::parse_byte_stream(const char *p_start, const char *p_end, VALUE& v_utf8,
|
63
139
|
bool encode)
|
64
140
|
{
|
65
141
|
if (p_end > p_start) {
|
@@ -74,9 +150,13 @@ namespace edn
|
|
74
150
|
}
|
75
151
|
|
76
152
|
// utf-8 encode
|
77
|
-
VALUE vs =
|
78
|
-
|
79
|
-
|
153
|
+
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
154
|
+
int error;
|
155
|
+
v_utf8 = rb_protect( edn_rb_enc_associate_utf8, vs, &error);
|
156
|
+
if (error) Parser::throw_error(error);
|
157
|
+
return true;
|
158
|
+
} else if (p_end == p_start) {
|
159
|
+
v_utf8 = rb_str_new("", 0);
|
80
160
|
return true;
|
81
161
|
}
|
82
162
|
|
@@ -86,7 +166,7 @@ namespace edn
|
|
86
166
|
//
|
87
167
|
// handles things like \c, \newline
|
88
168
|
//
|
89
|
-
bool Parser::parse_escaped_char(const char *p, const char *pe,
|
169
|
+
bool Parser::parse_escaped_char(const char *p, const char *pe, VALUE& v)
|
90
170
|
{
|
91
171
|
std::string buf;
|
92
172
|
std::size_t len = pe - p;
|
@@ -104,37 +184,51 @@ namespace edn
|
|
104
184
|
else return false;
|
105
185
|
}
|
106
186
|
|
107
|
-
|
187
|
+
v = edn_prot_rb_new_str( buf.c_str() );
|
108
188
|
return true;
|
109
189
|
}
|
110
190
|
|
111
191
|
|
112
192
|
//
|
113
193
|
// get a set representation from the ruby side. See edn_turbo.rb
|
114
|
-
|
194
|
+
VALUE Parser::make_edn_symbol(VALUE sym)
|
115
195
|
{
|
116
|
-
|
117
|
-
return
|
196
|
+
prot_args args(edn::EDNT_MAKE_EDN_SYMBOL, sym);
|
197
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
118
198
|
}
|
119
199
|
|
120
200
|
//
|
121
201
|
// get a set representation from the ruby side. See edn_turbo.rb
|
122
|
-
|
202
|
+
VALUE Parser::make_ruby_set(VALUE elems)
|
123
203
|
{
|
124
|
-
|
204
|
+
prot_args args(edn::EDNT_MAKE_SET_METHOD, elems);
|
205
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
125
206
|
}
|
126
207
|
|
127
208
|
//
|
128
209
|
// get an object representation from the ruby side using the given symbol name
|
129
|
-
|
210
|
+
VALUE Parser::tagged_element(VALUE name, VALUE data)
|
130
211
|
{
|
131
|
-
|
132
|
-
return
|
212
|
+
prot_args args(edn::EDNT_TAGGED_ELEM, name, data);
|
213
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
133
214
|
}
|
134
215
|
|
135
216
|
|
136
217
|
//
|
137
218
|
// error reporting
|
219
|
+
void Parser::throw_error(int error)
|
220
|
+
{
|
221
|
+
if (error == 0)
|
222
|
+
return;
|
223
|
+
|
224
|
+
VALUE err = rb_errinfo();
|
225
|
+
VALUE klass = rb_class_path(CLASS_OF(err));
|
226
|
+
VALUE message = rb_obj_as_string(err);
|
227
|
+
std::stringstream msg;
|
228
|
+
msg << RSTRING_PTR(klass) << " exception: " << RSTRING_PTR(message);
|
229
|
+
throw std::runtime_error(msg.str());
|
230
|
+
}
|
231
|
+
|
138
232
|
void Parser::error(const std::string& func, const std::string& err, char c) const
|
139
233
|
{
|
140
234
|
std::cerr << "Parse error "
|
data/ext/edn_turbo/extconf.rb
CHANGED
data/ext/edn_turbo/main.cc
CHANGED
@@ -2,16 +2,14 @@
|
|
2
2
|
#include <iostream>
|
3
3
|
#include <clocale>
|
4
4
|
|
5
|
-
// always include rice headers before ruby.h
|
6
|
-
#include <rice/Data_Type.hpp>
|
7
|
-
#include <rice/Constructor.hpp>
|
8
|
-
|
9
5
|
#include "edn_parser.h"
|
10
6
|
|
7
|
+
#include <ruby/ruby.h>
|
8
|
+
|
11
9
|
|
12
10
|
namespace edn {
|
13
11
|
|
14
|
-
|
12
|
+
VALUE rb_mEDNT;
|
15
13
|
|
16
14
|
// methods on the ruby side we'll call from here
|
17
15
|
VALUE EDNT_MAKE_EDN_SYMBOL = Qnil;
|
@@ -20,8 +18,83 @@ namespace edn {
|
|
20
18
|
VALUE EDNT_STR_INT_TO_BIGNUM = Qnil;
|
21
19
|
VALUE EDNT_STR_DBL_TO_BIGNUM = Qnil;
|
22
20
|
|
21
|
+
//
|
22
|
+
// wrappers to hook the class w/ the C-api
|
23
|
+
template<class T>
|
24
|
+
static void delete_obj(T *ptr) {
|
25
|
+
delete ptr;
|
26
|
+
}
|
27
|
+
|
28
|
+
template<class T>
|
29
|
+
static VALUE wrap_ptr(VALUE klass, T* ptr) {
|
30
|
+
return Data_Wrap_Struct(klass, 0, delete_obj<T>, ptr);
|
31
|
+
}
|
23
32
|
|
24
|
-
|
33
|
+
static VALUE alloc_obj(VALUE self){
|
34
|
+
return wrap_ptr<edn::Parser>(self, new Parser());
|
35
|
+
}
|
36
|
+
|
37
|
+
static inline Parser* get_parser(VALUE self)
|
38
|
+
{
|
39
|
+
Parser *p;
|
40
|
+
Data_Get_Struct( self, edn::Parser, p );
|
41
|
+
return p;
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
//
|
46
|
+
// called by the constructor - sets the source if passed
|
47
|
+
static VALUE initialize(int argc, VALUE* argv, VALUE self)
|
48
|
+
{
|
49
|
+
Parser* p = get_parser(self);
|
50
|
+
|
51
|
+
if (argc > 0)
|
52
|
+
{
|
53
|
+
const char* stream = StringValueCStr(argv[0]);
|
54
|
+
if (stream)
|
55
|
+
p->set_source( stream, std::strlen(stream) );
|
56
|
+
}
|
57
|
+
return self;
|
58
|
+
}
|
59
|
+
|
60
|
+
//
|
61
|
+
// change the input source
|
62
|
+
static VALUE set_source(VALUE self, VALUE data)
|
63
|
+
{
|
64
|
+
Parser* p = get_parser(self);
|
65
|
+
|
66
|
+
const char* stream = StringValueCStr(data);
|
67
|
+
if (stream)
|
68
|
+
p->set_source( stream, std::strlen(stream) );
|
69
|
+
|
70
|
+
return self;
|
71
|
+
}
|
72
|
+
|
73
|
+
//
|
74
|
+
// eof?
|
75
|
+
static VALUE eof(VALUE self, VALUE data)
|
76
|
+
{
|
77
|
+
return get_parser(self)->is_eof();
|
78
|
+
}
|
79
|
+
|
80
|
+
//
|
81
|
+
// parses an entire stream
|
82
|
+
static VALUE read(VALUE self, VALUE data)
|
83
|
+
{
|
84
|
+
const char* stream = StringValueCStr(data);
|
85
|
+
return get_parser(self)->parse(stream, std::strlen(stream) );
|
86
|
+
}
|
87
|
+
|
88
|
+
//
|
89
|
+
// gets the next token in the current stream
|
90
|
+
static VALUE next(VALUE self, VALUE data)
|
91
|
+
{
|
92
|
+
return get_parser(self)->next();
|
93
|
+
}
|
94
|
+
|
95
|
+
//
|
96
|
+
// signal handler
|
97
|
+
static void die(int sig)
|
25
98
|
{
|
26
99
|
exit(-1);
|
27
100
|
}
|
@@ -45,22 +118,24 @@ void Init_edn_turbo(void)
|
|
45
118
|
return;
|
46
119
|
}
|
47
120
|
|
48
|
-
edn::rb_mEDNT =
|
121
|
+
edn::rb_mEDNT = rb_define_module("EDNT");
|
49
122
|
|
50
|
-
// bind
|
123
|
+
// bind the ruby Parser class to the C++ one
|
124
|
+
VALUE rb_cParser = rb_define_class_under(edn::rb_mEDNT, "Parser", rb_cObject);
|
125
|
+
rb_define_alloc_func(rb_cParser, edn::alloc_obj);
|
126
|
+
rb_define_method(rb_cParser, "initialize", (VALUE(*)(ANYARGS)) &edn::initialize, -1 );
|
127
|
+
rb_define_method(rb_cParser, "ext_set_stream", (VALUE(*)(ANYARGS)) &edn::set_source, 1 );
|
128
|
+
rb_define_method(rb_cParser, "ext_eof", (VALUE(*)(ANYARGS)) &edn::eof, 0 );
|
129
|
+
rb_define_method(rb_cParser, "ext_read", (VALUE(*)(ANYARGS)) &edn::read, 1 );
|
130
|
+
rb_define_method(rb_cParser, "ext_next", (VALUE(*)(ANYARGS)) &edn::next, 0 );
|
131
|
+
|
132
|
+
// bind ruby methods we'll call - these should be defined in edn_turbo.rb
|
51
133
|
edn::EDNT_MAKE_EDN_SYMBOL = rb_intern("make_edn_symbol");
|
52
134
|
edn::EDNT_MAKE_SET_METHOD = rb_intern("make_set");
|
53
135
|
edn::EDNT_TAGGED_ELEM = rb_intern("tagged_element");
|
54
136
|
edn::EDNT_STR_INT_TO_BIGNUM = rb_intern("string_int_to_bignum");
|
55
137
|
edn::EDNT_STR_DBL_TO_BIGNUM = rb_intern("string_double_to_bignum");
|
56
138
|
|
57
|
-
// bind the ruby Parser class to the C++ one
|
58
|
-
Rice::Data_Type<edn::Parser> rb_cParser =
|
59
|
-
Rice::define_class_under<edn::Parser>(edn::rb_mEDNT, "Parser")
|
60
|
-
.define_constructor(Rice::Constructor<edn::Parser>())
|
61
|
-
.define_method("ext_read", &edn::Parser::process, (Rice::Arg("data")))
|
62
|
-
;
|
63
|
-
|
64
139
|
// import whatever else we've defined in the ruby side
|
65
140
|
rb_require("edn_turbo/edn_parser");
|
66
141
|
}
|
data/lib/edn_turbo.rb
CHANGED
data/lib/edn_turbo/edn_parser.rb
CHANGED
@@ -1,10 +1,27 @@
|
|
1
1
|
module EDNT
|
2
2
|
|
3
|
+
EOF = Object.new
|
4
|
+
|
3
5
|
class Parser
|
4
|
-
|
5
|
-
|
6
|
+
|
7
|
+
# initialize() is defined in the c-side (main.cc)
|
8
|
+
|
9
|
+
# call the c-side method
|
10
|
+
def set_input(data)
|
11
|
+
ext_set_stream(data)
|
12
|
+
end
|
13
|
+
|
14
|
+
# token-by-token read
|
15
|
+
def read
|
16
|
+
return EOF if ext_eof
|
17
|
+
ext_next
|
18
|
+
end
|
19
|
+
|
20
|
+
# entire stream read
|
21
|
+
def parse(data)
|
6
22
|
ext_read(data)
|
7
23
|
end
|
24
|
+
|
8
25
|
end
|
9
26
|
|
10
27
|
end
|
data/lib/edn_turbo/version.rb
CHANGED
data/test/test_output_diff.rb
CHANGED
@@ -13,17 +13,17 @@ class EDNT_Test < Minitest::Test
|
|
13
13
|
|
14
14
|
def check_file(file, expected_output)
|
15
15
|
File.open(file) { |file|
|
16
|
-
assert_equal(expected_output, @parser.
|
16
|
+
assert_equal(expected_output, @parser.parse(file.read))
|
17
17
|
}
|
18
18
|
end
|
19
19
|
|
20
20
|
def test_basic
|
21
21
|
|
22
|
-
assert_equal(false, @parser.
|
23
|
-
assert_equal(true, @parser.
|
24
|
-
assert_equal("a string", @parser.
|
25
|
-
assert_equal(:"namespace.of.some.length/keyword-name", @parser.
|
26
|
-
assert_equal(:'/', @parser.
|
22
|
+
assert_equal(false, @parser.parse('false'))
|
23
|
+
assert_equal(true, @parser.parse('true'))
|
24
|
+
assert_equal("a string", @parser.parse('"a string"'))
|
25
|
+
assert_equal(:"namespace.of.some.length/keyword-name", @parser.parse(':namespace.of.some.length/keyword-name'))
|
26
|
+
assert_equal(:'/', @parser.parse(':/'))
|
27
27
|
end
|
28
28
|
|
29
29
|
def test_number
|
@@ -103,7 +103,7 @@ class EDNT_Test < Minitest::Test
|
|
103
103
|
def test_read
|
104
104
|
|
105
105
|
# check read for using string
|
106
|
-
assert_equal({:a=>1, :b=>2}, @parser.
|
106
|
+
assert_equal({:a=>1, :b=>2}, @parser.parse('{:a 1 :b 2}'))
|
107
107
|
|
108
108
|
end
|
109
109
|
|
@@ -148,7 +148,7 @@ class EDNT_Test < Minitest::Test
|
|
148
148
|
Tagged.new(data).to_s
|
149
149
|
end
|
150
150
|
|
151
|
-
assert_equal([345, :a], @parser.
|
151
|
+
assert_equal([345, :a], @parser.parse('#edn_turbo/test_tagged { :item 345 :other :a }'))
|
152
152
|
end
|
153
153
|
|
154
154
|
def test_symbols
|