edn_turbo 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +54 -9
- data/ext/edn_turbo/edn_parser.cc +493 -370
- data/ext/edn_turbo/edn_parser.h +48 -47
- data/ext/edn_turbo/edn_parser.rl +138 -107
- data/ext/edn_turbo/edn_parser_util.cc +122 -28
- data/ext/edn_turbo/extconf.rb +1 -1
- data/ext/edn_turbo/main.cc +90 -15
- data/lib/edn_turbo.rb +1 -1
- data/lib/edn_turbo/edn_parser.rb +19 -2
- data/lib/edn_turbo/version.rb +2 -2
- data/test/test_output_diff.rb +8 -8
- metadata +2 -16
@@ -2,9 +2,7 @@
|
|
2
2
|
#include <iomanip>
|
3
3
|
#include <string>
|
4
4
|
#include <limits>
|
5
|
-
|
6
|
-
#include <rice/String.hpp>
|
7
|
-
#include <rice/Array.hpp>
|
5
|
+
#include <exception>
|
8
6
|
|
9
7
|
#include <ruby/ruby.h>
|
10
8
|
#include <ruby/encoding.h>
|
@@ -23,43 +21,121 @@ namespace edn
|
|
23
21
|
return s.str().length();
|
24
22
|
}
|
25
23
|
|
26
|
-
static const std::size_t LL_max_chars = get_max_chars<>((long
|
27
|
-
static const std::size_t LD_max_chars = get_max_chars<>((
|
24
|
+
static const std::size_t LL_max_chars = get_max_chars<>((long) 1);
|
25
|
+
static const std::size_t LD_max_chars = get_max_chars<>((double) 1);
|
26
|
+
|
27
|
+
|
28
|
+
// =================================================================
|
29
|
+
// reset parsing state
|
30
|
+
//
|
31
|
+
void Parser::reset()
|
32
|
+
{
|
33
|
+
line_number = 1;
|
34
|
+
while (!discard.empty())
|
35
|
+
discard.pop();
|
36
|
+
}
|
37
|
+
|
38
|
+
//
|
39
|
+
// set a new source
|
40
|
+
void Parser::set_source(const char* src, std::size_t len)
|
41
|
+
{
|
42
|
+
reset();
|
43
|
+
// set ragel state
|
44
|
+
p = src;
|
45
|
+
pe = src + len;
|
46
|
+
eof = pe;
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
// =================================================================
|
51
|
+
// work-around for idiotic rb_protect convention in order to avoid
|
52
|
+
// using ruby/rice
|
53
|
+
//
|
54
|
+
typedef VALUE (edn_rb_f_type)( VALUE arg );
|
55
|
+
|
56
|
+
// we're using at most 2 args
|
57
|
+
struct prot_args {
|
58
|
+
prot_args(ID m, VALUE arg) :
|
59
|
+
method(m), count(1) {
|
60
|
+
args[0] = arg;
|
61
|
+
}
|
62
|
+
prot_args(ID m, VALUE arg1, VALUE arg2) :
|
63
|
+
method(m), count(2) {
|
64
|
+
args[0] = arg1;
|
65
|
+
args[1] = arg2;
|
66
|
+
}
|
67
|
+
|
68
|
+
ID method;
|
69
|
+
VALUE count;
|
70
|
+
VALUE args[2];
|
71
|
+
};
|
72
|
+
|
73
|
+
// this allows us to wrap with rb_protect()
|
74
|
+
static inline VALUE edn_wrap_funcall2( VALUE arg )
|
75
|
+
{
|
76
|
+
prot_args* a = reinterpret_cast<prot_args*>(arg);
|
77
|
+
return rb_funcall2( edn::rb_mEDNT, a->method, a->count, a->args );
|
78
|
+
}
|
79
|
+
|
80
|
+
static inline VALUE edn_prot_rb_funcall( edn_rb_f_type func, VALUE args )
|
81
|
+
{
|
82
|
+
int error;
|
83
|
+
VALUE s = rb_protect( func, args, &error );
|
84
|
+
if (error) Parser::throw_error(error);
|
85
|
+
return s;
|
86
|
+
}
|
87
|
+
|
88
|
+
static inline VALUE edn_prot_rb_new_str(const char* str) {
|
89
|
+
int error;
|
90
|
+
VALUE s = rb_protect( reinterpret_cast<VALUE (*)(VALUE)>(rb_str_new_cstr),
|
91
|
+
reinterpret_cast<VALUE>(str), &error );
|
92
|
+
if (error) Parser::throw_error(error);
|
93
|
+
return s;
|
94
|
+
}
|
95
|
+
|
96
|
+
static inline VALUE edn_rb_enc_associate_utf8(VALUE str)
|
97
|
+
{
|
98
|
+
return rb_enc_associate(str, rb_utf8_encoding() );
|
99
|
+
}
|
100
|
+
|
101
|
+
// =================================================================
|
102
|
+
// utils
|
28
103
|
|
29
104
|
//
|
30
105
|
// convert to int.. if string rep has more digits than long can
|
31
106
|
// hold, call into ruby to get a big num
|
32
|
-
|
107
|
+
VALUE Parser::integer_to_ruby(const char* str, std::size_t len)
|
33
108
|
{
|
34
109
|
if (len < LL_max_chars)
|
35
110
|
{
|
36
|
-
return buftotype<long>(str, len);
|
111
|
+
return LONG2NUM(buftotype<long>(str, len));
|
37
112
|
}
|
38
113
|
|
39
114
|
// value is outside of range of long type. Use ruby to convert it
|
40
|
-
VALUE rb_s =
|
41
|
-
|
115
|
+
VALUE rb_s = edn_prot_rb_new_str( str );
|
116
|
+
prot_args args(EDNT_STR_INT_TO_BIGNUM, rb_s);
|
117
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
42
118
|
}
|
43
119
|
|
44
120
|
//
|
45
|
-
// as above.. TODO: check exponential
|
46
|
-
|
121
|
+
// as above.. TODO: check exponential..
|
122
|
+
VALUE Parser::float_to_ruby(const char* str, std::size_t len)
|
47
123
|
{
|
48
124
|
if (len < LD_max_chars)
|
49
125
|
{
|
50
|
-
return buftotype<double>(str, len);
|
126
|
+
return rb_float_new(buftotype<double>(str, len));
|
51
127
|
}
|
52
128
|
|
53
129
|
// value is outside of range of long type. Use ruby to convert it
|
54
|
-
|
55
|
-
return
|
130
|
+
prot_args args(EDNT_STR_DBL_TO_BIGNUM, edn_prot_rb_new_str(str));
|
131
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
56
132
|
}
|
57
133
|
|
58
134
|
|
59
135
|
//
|
60
136
|
// copies the string data, unescaping any present values that need to be replaced
|
61
137
|
//
|
62
|
-
bool Parser::parse_byte_stream(const char *p_start, const char *p_end,
|
138
|
+
bool Parser::parse_byte_stream(const char *p_start, const char *p_end, VALUE& v_utf8,
|
63
139
|
bool encode)
|
64
140
|
{
|
65
141
|
if (p_end > p_start) {
|
@@ -74,9 +150,13 @@ namespace edn
|
|
74
150
|
}
|
75
151
|
|
76
152
|
// utf-8 encode
|
77
|
-
VALUE vs =
|
78
|
-
|
79
|
-
|
153
|
+
VALUE vs = edn_prot_rb_new_str(buf.c_str());
|
154
|
+
int error;
|
155
|
+
v_utf8 = rb_protect( edn_rb_enc_associate_utf8, vs, &error);
|
156
|
+
if (error) Parser::throw_error(error);
|
157
|
+
return true;
|
158
|
+
} else if (p_end == p_start) {
|
159
|
+
v_utf8 = rb_str_new("", 0);
|
80
160
|
return true;
|
81
161
|
}
|
82
162
|
|
@@ -86,7 +166,7 @@ namespace edn
|
|
86
166
|
//
|
87
167
|
// handles things like \c, \newline
|
88
168
|
//
|
89
|
-
bool Parser::parse_escaped_char(const char *p, const char *pe,
|
169
|
+
bool Parser::parse_escaped_char(const char *p, const char *pe, VALUE& v)
|
90
170
|
{
|
91
171
|
std::string buf;
|
92
172
|
std::size_t len = pe - p;
|
@@ -104,37 +184,51 @@ namespace edn
|
|
104
184
|
else return false;
|
105
185
|
}
|
106
186
|
|
107
|
-
|
187
|
+
v = edn_prot_rb_new_str( buf.c_str() );
|
108
188
|
return true;
|
109
189
|
}
|
110
190
|
|
111
191
|
|
112
192
|
//
|
113
193
|
// get a set representation from the ruby side. See edn_turbo.rb
|
114
|
-
|
194
|
+
VALUE Parser::make_edn_symbol(VALUE sym)
|
115
195
|
{
|
116
|
-
|
117
|
-
return
|
196
|
+
prot_args args(edn::EDNT_MAKE_EDN_SYMBOL, sym);
|
197
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
118
198
|
}
|
119
199
|
|
120
200
|
//
|
121
201
|
// get a set representation from the ruby side. See edn_turbo.rb
|
122
|
-
|
202
|
+
VALUE Parser::make_ruby_set(VALUE elems)
|
123
203
|
{
|
124
|
-
|
204
|
+
prot_args args(edn::EDNT_MAKE_SET_METHOD, elems);
|
205
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
125
206
|
}
|
126
207
|
|
127
208
|
//
|
128
209
|
// get an object representation from the ruby side using the given symbol name
|
129
|
-
|
210
|
+
VALUE Parser::tagged_element(VALUE name, VALUE data)
|
130
211
|
{
|
131
|
-
|
132
|
-
return
|
212
|
+
prot_args args(edn::EDNT_TAGGED_ELEM, name, data);
|
213
|
+
return edn_prot_rb_funcall( edn_wrap_funcall2, reinterpret_cast<VALUE>(&args) );
|
133
214
|
}
|
134
215
|
|
135
216
|
|
136
217
|
//
|
137
218
|
// error reporting
|
219
|
+
void Parser::throw_error(int error)
|
220
|
+
{
|
221
|
+
if (error == 0)
|
222
|
+
return;
|
223
|
+
|
224
|
+
VALUE err = rb_errinfo();
|
225
|
+
VALUE klass = rb_class_path(CLASS_OF(err));
|
226
|
+
VALUE message = rb_obj_as_string(err);
|
227
|
+
std::stringstream msg;
|
228
|
+
msg << RSTRING_PTR(klass) << " exception: " << RSTRING_PTR(message);
|
229
|
+
throw std::runtime_error(msg.str());
|
230
|
+
}
|
231
|
+
|
138
232
|
void Parser::error(const std::string& func, const std::string& err, char c) const
|
139
233
|
{
|
140
234
|
std::cerr << "Parse error "
|
data/ext/edn_turbo/extconf.rb
CHANGED
data/ext/edn_turbo/main.cc
CHANGED
@@ -2,16 +2,14 @@
|
|
2
2
|
#include <iostream>
|
3
3
|
#include <clocale>
|
4
4
|
|
5
|
-
// always include rice headers before ruby.h
|
6
|
-
#include <rice/Data_Type.hpp>
|
7
|
-
#include <rice/Constructor.hpp>
|
8
|
-
|
9
5
|
#include "edn_parser.h"
|
10
6
|
|
7
|
+
#include <ruby/ruby.h>
|
8
|
+
|
11
9
|
|
12
10
|
namespace edn {
|
13
11
|
|
14
|
-
|
12
|
+
VALUE rb_mEDNT;
|
15
13
|
|
16
14
|
// methods on the ruby side we'll call from here
|
17
15
|
VALUE EDNT_MAKE_EDN_SYMBOL = Qnil;
|
@@ -20,8 +18,83 @@ namespace edn {
|
|
20
18
|
VALUE EDNT_STR_INT_TO_BIGNUM = Qnil;
|
21
19
|
VALUE EDNT_STR_DBL_TO_BIGNUM = Qnil;
|
22
20
|
|
21
|
+
//
|
22
|
+
// wrappers to hook the class w/ the C-api
|
23
|
+
template<class T>
|
24
|
+
static void delete_obj(T *ptr) {
|
25
|
+
delete ptr;
|
26
|
+
}
|
27
|
+
|
28
|
+
template<class T>
|
29
|
+
static VALUE wrap_ptr(VALUE klass, T* ptr) {
|
30
|
+
return Data_Wrap_Struct(klass, 0, delete_obj<T>, ptr);
|
31
|
+
}
|
23
32
|
|
24
|
-
|
33
|
+
static VALUE alloc_obj(VALUE self){
|
34
|
+
return wrap_ptr<edn::Parser>(self, new Parser());
|
35
|
+
}
|
36
|
+
|
37
|
+
static inline Parser* get_parser(VALUE self)
|
38
|
+
{
|
39
|
+
Parser *p;
|
40
|
+
Data_Get_Struct( self, edn::Parser, p );
|
41
|
+
return p;
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
//
|
46
|
+
// called by the constructor - sets the source if passed
|
47
|
+
static VALUE initialize(int argc, VALUE* argv, VALUE self)
|
48
|
+
{
|
49
|
+
Parser* p = get_parser(self);
|
50
|
+
|
51
|
+
if (argc > 0)
|
52
|
+
{
|
53
|
+
const char* stream = StringValueCStr(argv[0]);
|
54
|
+
if (stream)
|
55
|
+
p->set_source( stream, std::strlen(stream) );
|
56
|
+
}
|
57
|
+
return self;
|
58
|
+
}
|
59
|
+
|
60
|
+
//
|
61
|
+
// change the input source
|
62
|
+
static VALUE set_source(VALUE self, VALUE data)
|
63
|
+
{
|
64
|
+
Parser* p = get_parser(self);
|
65
|
+
|
66
|
+
const char* stream = StringValueCStr(data);
|
67
|
+
if (stream)
|
68
|
+
p->set_source( stream, std::strlen(stream) );
|
69
|
+
|
70
|
+
return self;
|
71
|
+
}
|
72
|
+
|
73
|
+
//
|
74
|
+
// eof?
|
75
|
+
static VALUE eof(VALUE self, VALUE data)
|
76
|
+
{
|
77
|
+
return get_parser(self)->is_eof();
|
78
|
+
}
|
79
|
+
|
80
|
+
//
|
81
|
+
// parses an entire stream
|
82
|
+
static VALUE read(VALUE self, VALUE data)
|
83
|
+
{
|
84
|
+
const char* stream = StringValueCStr(data);
|
85
|
+
return get_parser(self)->parse(stream, std::strlen(stream) );
|
86
|
+
}
|
87
|
+
|
88
|
+
//
|
89
|
+
// gets the next token in the current stream
|
90
|
+
static VALUE next(VALUE self, VALUE data)
|
91
|
+
{
|
92
|
+
return get_parser(self)->next();
|
93
|
+
}
|
94
|
+
|
95
|
+
//
|
96
|
+
// signal handler
|
97
|
+
static void die(int sig)
|
25
98
|
{
|
26
99
|
exit(-1);
|
27
100
|
}
|
@@ -45,22 +118,24 @@ void Init_edn_turbo(void)
|
|
45
118
|
return;
|
46
119
|
}
|
47
120
|
|
48
|
-
edn::rb_mEDNT =
|
121
|
+
edn::rb_mEDNT = rb_define_module("EDNT");
|
49
122
|
|
50
|
-
// bind
|
123
|
+
// bind the ruby Parser class to the C++ one
|
124
|
+
VALUE rb_cParser = rb_define_class_under(edn::rb_mEDNT, "Parser", rb_cObject);
|
125
|
+
rb_define_alloc_func(rb_cParser, edn::alloc_obj);
|
126
|
+
rb_define_method(rb_cParser, "initialize", (VALUE(*)(ANYARGS)) &edn::initialize, -1 );
|
127
|
+
rb_define_method(rb_cParser, "ext_set_stream", (VALUE(*)(ANYARGS)) &edn::set_source, 1 );
|
128
|
+
rb_define_method(rb_cParser, "ext_eof", (VALUE(*)(ANYARGS)) &edn::eof, 0 );
|
129
|
+
rb_define_method(rb_cParser, "ext_read", (VALUE(*)(ANYARGS)) &edn::read, 1 );
|
130
|
+
rb_define_method(rb_cParser, "ext_next", (VALUE(*)(ANYARGS)) &edn::next, 0 );
|
131
|
+
|
132
|
+
// bind ruby methods we'll call - these should be defined in edn_turbo.rb
|
51
133
|
edn::EDNT_MAKE_EDN_SYMBOL = rb_intern("make_edn_symbol");
|
52
134
|
edn::EDNT_MAKE_SET_METHOD = rb_intern("make_set");
|
53
135
|
edn::EDNT_TAGGED_ELEM = rb_intern("tagged_element");
|
54
136
|
edn::EDNT_STR_INT_TO_BIGNUM = rb_intern("string_int_to_bignum");
|
55
137
|
edn::EDNT_STR_DBL_TO_BIGNUM = rb_intern("string_double_to_bignum");
|
56
138
|
|
57
|
-
// bind the ruby Parser class to the C++ one
|
58
|
-
Rice::Data_Type<edn::Parser> rb_cParser =
|
59
|
-
Rice::define_class_under<edn::Parser>(edn::rb_mEDNT, "Parser")
|
60
|
-
.define_constructor(Rice::Constructor<edn::Parser>())
|
61
|
-
.define_method("ext_read", &edn::Parser::process, (Rice::Arg("data")))
|
62
|
-
;
|
63
|
-
|
64
139
|
// import whatever else we've defined in the ruby side
|
65
140
|
rb_require("edn_turbo/edn_parser");
|
66
141
|
}
|
data/lib/edn_turbo.rb
CHANGED
data/lib/edn_turbo/edn_parser.rb
CHANGED
@@ -1,10 +1,27 @@
|
|
1
1
|
module EDNT
|
2
2
|
|
3
|
+
EOF = Object.new
|
4
|
+
|
3
5
|
class Parser
|
4
|
-
|
5
|
-
|
6
|
+
|
7
|
+
# initialize() is defined in the c-side (main.cc)
|
8
|
+
|
9
|
+
# call the c-side method
|
10
|
+
def set_input(data)
|
11
|
+
ext_set_stream(data)
|
12
|
+
end
|
13
|
+
|
14
|
+
# token-by-token read
|
15
|
+
def read
|
16
|
+
return EOF if ext_eof
|
17
|
+
ext_next
|
18
|
+
end
|
19
|
+
|
20
|
+
# entire stream read
|
21
|
+
def parse(data)
|
6
22
|
ext_read(data)
|
7
23
|
end
|
24
|
+
|
8
25
|
end
|
9
26
|
|
10
27
|
end
|
data/lib/edn_turbo/version.rb
CHANGED
data/test/test_output_diff.rb
CHANGED
@@ -13,17 +13,17 @@ class EDNT_Test < Minitest::Test
|
|
13
13
|
|
14
14
|
def check_file(file, expected_output)
|
15
15
|
File.open(file) { |file|
|
16
|
-
assert_equal(expected_output, @parser.
|
16
|
+
assert_equal(expected_output, @parser.parse(file.read))
|
17
17
|
}
|
18
18
|
end
|
19
19
|
|
20
20
|
def test_basic
|
21
21
|
|
22
|
-
assert_equal(false, @parser.
|
23
|
-
assert_equal(true, @parser.
|
24
|
-
assert_equal("a string", @parser.
|
25
|
-
assert_equal(:"namespace.of.some.length/keyword-name", @parser.
|
26
|
-
assert_equal(:'/', @parser.
|
22
|
+
assert_equal(false, @parser.parse('false'))
|
23
|
+
assert_equal(true, @parser.parse('true'))
|
24
|
+
assert_equal("a string", @parser.parse('"a string"'))
|
25
|
+
assert_equal(:"namespace.of.some.length/keyword-name", @parser.parse(':namespace.of.some.length/keyword-name'))
|
26
|
+
assert_equal(:'/', @parser.parse(':/'))
|
27
27
|
end
|
28
28
|
|
29
29
|
def test_number
|
@@ -103,7 +103,7 @@ class EDNT_Test < Minitest::Test
|
|
103
103
|
def test_read
|
104
104
|
|
105
105
|
# check read for using string
|
106
|
-
assert_equal({:a=>1, :b=>2}, @parser.
|
106
|
+
assert_equal({:a=>1, :b=>2}, @parser.parse('{:a 1 :b 2}'))
|
107
107
|
|
108
108
|
end
|
109
109
|
|
@@ -148,7 +148,7 @@ class EDNT_Test < Minitest::Test
|
|
148
148
|
Tagged.new(data).to_s
|
149
149
|
end
|
150
150
|
|
151
|
-
assert_equal([345, :a], @parser.
|
151
|
+
assert_equal([345, :a], @parser.parse('#edn_turbo/test_tagged { :item 345 :other :a }'))
|
152
152
|
end
|
153
153
|
|
154
154
|
def test_symbols
|