wankel 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/LICENSE +20 -0
- data/README.md +43 -0
- data/Rakefile +63 -0
- data/benchmark/subjects/item.json +1 -0
- data/benchmark/subjects/ohai.json +1216 -0
- data/benchmark/subjects/twitter_search.json +1 -0
- data/benchmark/subjects/twitter_stream.json +430 -0
- data/ext/wankel/extconf.rb +15 -0
- data/ext/wankel/wankel.c +50 -0
- data/ext/wankel/wankel.h +17 -0
- data/ext/wankel/wankel_encoder.c +232 -0
- data/ext/wankel/wankel_encoder.h +13 -0
- data/ext/wankel/wankel_parser.c +345 -0
- data/ext/wankel/wankel_parser.h +26 -0
- data/ext/wankel/wankel_sax_encoder.c +290 -0
- data/ext/wankel/wankel_sax_encoder.h +13 -0
- data/ext/wankel/wankel_sax_parser.c +232 -0
- data/ext/wankel/wankel_sax_parser.h +23 -0
- data/ext/wankel/yajl_helpers.c +124 -0
- data/ext/wankel/yajl_helpers.h +22 -0
- data/lib/wankel/ex_sax_parser.rb +75 -0
- data/lib/wankel.rb +19 -0
- data/logo.png +0 -0
- data/test/encoding/encoding_test.rb +230 -0
- data/test/encoding/sax_encoder_test.rb +89 -0
- data/test/parsing/active_support_test.rb +66 -0
- data/test/parsing/fixtures/fail.15.json +1 -0
- data/test/parsing/fixtures/fail.16.json +1 -0
- data/test/parsing/fixtures/fail.17.json +1 -0
- data/test/parsing/fixtures/fail.26.json +1 -0
- data/test/parsing/fixtures/fail11.json +1 -0
- data/test/parsing/fixtures/fail12.json +1 -0
- data/test/parsing/fixtures/fail13.json +1 -0
- data/test/parsing/fixtures/fail14.json +1 -0
- data/test/parsing/fixtures/fail19.json +1 -0
- data/test/parsing/fixtures/fail20.json +1 -0
- data/test/parsing/fixtures/fail21.json +1 -0
- data/test/parsing/fixtures/fail22.json +1 -0
- data/test/parsing/fixtures/fail23.json +1 -0
- data/test/parsing/fixtures/fail24.json +1 -0
- data/test/parsing/fixtures/fail25.json +1 -0
- data/test/parsing/fixtures/fail27.json +2 -0
- data/test/parsing/fixtures/fail28.json +2 -0
- data/test/parsing/fixtures/fail3.json +1 -0
- data/test/parsing/fixtures/fail4.json +1 -0
- data/test/parsing/fixtures/fail5.json +1 -0
- data/test/parsing/fixtures/fail6.json +1 -0
- data/test/parsing/fixtures/fail9.json +1 -0
- data/test/parsing/fixtures/pass.array.json +6 -0
- data/test/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
- data/test/parsing/fixtures/pass.contacts.json +1 -0
- data/test/parsing/fixtures/pass.db100.xml.json +1 -0
- data/test/parsing/fixtures/pass.db1000.xml.json +1 -0
- data/test/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
- data/test/parsing/fixtures/pass.deep_arrays.json +1 -0
- data/test/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
- data/test/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
- data/test/parsing/fixtures/pass.doubles.json +1 -0
- data/test/parsing/fixtures/pass.empty_array.json +1 -0
- data/test/parsing/fixtures/pass.empty_string.json +1 -0
- data/test/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
- data/test/parsing/fixtures/pass.escaped_foobar.json +1 -0
- data/test/parsing/fixtures/pass.item.json +1 -0
- data/test/parsing/fixtures/pass.json-org-sample1.json +23 -0
- data/test/parsing/fixtures/pass.json-org-sample2.json +11 -0
- data/test/parsing/fixtures/pass.json-org-sample3.json +26 -0
- data/test/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
- data/test/parsing/fixtures/pass.json-org-sample4.json +89 -0
- data/test/parsing/fixtures/pass.json-org-sample5.json +27 -0
- data/test/parsing/fixtures/pass.map-spain.xml.json +1 -0
- data/test/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
- data/test/parsing/fixtures/pass.ns-soap.xml.json +1 -0
- data/test/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
- data/test/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
- data/test/parsing/fixtures/pass.numbers-int-4k.json +11 -0
- data/test/parsing/fixtures/pass.numbers-int-64k.json +154 -0
- data/test/parsing/fixtures/pass.twitter-search.json +1 -0
- data/test/parsing/fixtures/pass.twitter-search2.json +1 -0
- data/test/parsing/fixtures/pass.unicode.json +3315 -0
- data/test/parsing/fixtures/pass.yelp.json +1 -0
- data/test/parsing/fixtures/pass1.json +56 -0
- data/test/parsing/fixtures/pass2.json +1 -0
- data/test/parsing/fixtures/pass3.json +6 -0
- data/test/parsing/fixtures_test.rb +43 -0
- data/test/parsing/multiple_values_test.rb +100 -0
- data/test/parsing/one_off_test.rb +65 -0
- data/test/parsing/sax_parser_test.rb +125 -0
- data/test/performance.rb +135 -0
- data/test/test_helper.rb +36 -0
- data/test/wankel_test.rb +53 -0
- data/wankel.gemspec +23 -0
- metadata +259 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
require 'rbconfig'
|
3
|
+
|
4
|
+
$CFLAGS << '-Wall'
|
5
|
+
|
6
|
+
if ARGV.include?('--coverage')
|
7
|
+
$CFLAGS << " -fprofile-arcs -ftest-coverage"
|
8
|
+
$DLDFLAGS << ' --coverage'
|
9
|
+
end
|
10
|
+
|
11
|
+
if have_library('yajl')
|
12
|
+
create_makefile('wankel/wankel')
|
13
|
+
else
|
14
|
+
puts "Couldn't find YAJL library"
|
15
|
+
end
|
data/ext/wankel/wankel.c
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#include "wankel.h"
|
2
|
+
|
3
|
+
static ID intern_new, intern_parse, intern_encode;
|
4
|
+
|
5
|
+
static VALUE c_wankel, c_wankelParser, c_wankelEncoder, e_parseError, e_encodeError;
|
6
|
+
|
7
|
+
// Class Methods =============================================================
|
8
|
+
static VALUE wankel_parse(int argc, VALUE * argv, VALUE klass) {
|
9
|
+
VALUE parser, input, options, callback;
|
10
|
+
rb_scan_args(argc, argv, "11&", &input, &options, &callback);
|
11
|
+
|
12
|
+
parser = rb_funcall(c_wankelParser, intern_new, 1, options);
|
13
|
+
return rb_funcall(parser, intern_parse, 2, input, callback);
|
14
|
+
}
|
15
|
+
|
16
|
+
static VALUE wankel_encode(int argc, VALUE * argv, VALUE klass) {
|
17
|
+
VALUE encoder, input, output, options;
|
18
|
+
rb_scan_args(argc, argv, "12", &input, &output, &options);
|
19
|
+
|
20
|
+
|
21
|
+
if (TYPE(output) == T_HASH) {
|
22
|
+
encoder = rb_funcall(c_wankelEncoder, intern_new, 1, output);
|
23
|
+
return rb_funcall(encoder, intern_encode, 1, input);
|
24
|
+
} else {
|
25
|
+
encoder = rb_funcall(c_wankelEncoder, intern_new, 1, options);
|
26
|
+
return rb_funcall(encoder, intern_encode, 2, input, output);
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
30
|
+
void Init_wankel() {
|
31
|
+
c_wankel = rb_define_class("Wankel", rb_cObject);
|
32
|
+
e_parseError = rb_define_class_under(c_wankel, "ParseError", rb_eStandardError);
|
33
|
+
e_encodeError = rb_define_class_under(c_wankel, "EncodeError", rb_eStandardError);
|
34
|
+
|
35
|
+
intern_new = rb_intern("new");
|
36
|
+
intern_parse = rb_intern("parse");
|
37
|
+
intern_encode = rb_intern("encode");
|
38
|
+
|
39
|
+
rb_define_singleton_method(c_wankel, "parse", wankel_parse, -1);
|
40
|
+
rb_define_singleton_method(c_wankel, "encode", wankel_encode, -1);
|
41
|
+
|
42
|
+
rb_define_singleton_method(c_wankel, "load", wankel_parse, -1);
|
43
|
+
rb_define_singleton_method(c_wankel, "dump", wankel_encode, -1);
|
44
|
+
|
45
|
+
c_wankelParser = Init_wankel_parser();
|
46
|
+
c_wankelEncoder = Init_wankel_encoder();
|
47
|
+
Init_wankel_sax_parser();
|
48
|
+
Init_wankel_sax_encoder();
|
49
|
+
Init_yajl_helpers();
|
50
|
+
}
|
data/ext/wankel/wankel.h
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#ifndef WANKEL
|
2
|
+
#define WANKEL
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
#include <ruby/encoding.h>
|
6
|
+
#include <yajl/yajl_common.h>
|
7
|
+
#include <yajl/yajl_parse.h>
|
8
|
+
|
9
|
+
#include "wankel_parser.h"
|
10
|
+
#include "wankel_encoder.h"
|
11
|
+
#include "wankel_sax_parser.h"
|
12
|
+
#include "wankel_sax_encoder.h"
|
13
|
+
#include "yajl_helpers.h"
|
14
|
+
|
15
|
+
void Init_wankel();
|
16
|
+
|
17
|
+
#endif
|
@@ -0,0 +1,232 @@
|
|
1
|
+
// TODO: i should use the Wankel::SaxEncoder?
|
2
|
+
#include "wankel_encoder.h"
|
3
|
+
|
4
|
+
static VALUE c_wankel, c_wankelEncoder, e_encodeError;
|
5
|
+
|
6
|
+
static ID intern_to_s, intern_keys, intern_io_write, intern_to_json, intern_clone, intern_merge, intern_DEFAULTS;
|
7
|
+
|
8
|
+
static ID sym_beautify, sym_indent_string, sym_validate_utf8, sym_escape_solidus;
|
9
|
+
|
10
|
+
|
11
|
+
static void wankelEncoder_flush(yajl_gen g, VALUE io, int write_buffer_size);
|
12
|
+
static void yajl_encode_part(yajl_gen g, VALUE obj, VALUE io, int write_buffer_size);
|
13
|
+
static void wankelEncoder_flush(yajl_gen g, VALUE io, int write_buffer_size);
|
14
|
+
|
15
|
+
/*
|
16
|
+
* Document-method: new
|
17
|
+
*
|
18
|
+
* call-seq: new([options])
|
19
|
+
*
|
20
|
+
* +:beautify+ generate indented (beautiful) output. Default `false`.
|
21
|
+
*
|
22
|
+
* +:indent_string+ Set an indent string which is used when yajl_gen_beautify
|
23
|
+
* is enabled. Maybe something like \\t or some number of
|
24
|
+
* spaces. The default is four spaces ' '.
|
25
|
+
*
|
26
|
+
* +:validate_utf8+ Normally the generator does not validate that strings you
|
27
|
+
* pass to it are valid UTF8. Enabling this option will cause
|
28
|
+
* it to do so.
|
29
|
+
*
|
30
|
+
* +:escape_solidus+ the forward solidus (slash or '/' in human) is not required
|
31
|
+
* to be escaped in json text. By default, YAJL will not escape
|
32
|
+
* it in the iterest of saving bytes. Setting this flag will
|
33
|
+
* cause YAJL to always escape '/' in generated JSON strings.
|
34
|
+
*/
|
35
|
+
static VALUE wankelEncoder_initialize(int argc, VALUE * argv, VALUE self) {
|
36
|
+
VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
|
37
|
+
VALUE options;
|
38
|
+
|
39
|
+
rb_scan_args(argc, argv, "01", &options);
|
40
|
+
if(options == Qnil) {
|
41
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
|
42
|
+
} else {
|
43
|
+
Check_Type(options, T_HASH);
|
44
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
|
45
|
+
}
|
46
|
+
|
47
|
+
return self;
|
48
|
+
}
|
49
|
+
|
50
|
+
/*
|
51
|
+
* Document-method: encode
|
52
|
+
*
|
53
|
+
* call-seq: encode(obj[, io])
|
54
|
+
*
|
55
|
+
* +obj+ is the Ruby object to encode to JSON
|
56
|
+
*
|
57
|
+
* +io+ is an optional IO used to stream the encoded JSON string to. If no io
|
58
|
+
* is specified the resulting JSON string is returned. If io is specified,
|
59
|
+
* this method returns nil
|
60
|
+
*/
|
61
|
+
static VALUE wankelEncoder_encode(int argc, VALUE * argv, VALUE self) {
|
62
|
+
VALUE obj, io, options;
|
63
|
+
yajl_gen g;
|
64
|
+
yajl_alloc_funcs alloc_funcs;
|
65
|
+
yajl_gen_status status;
|
66
|
+
int write_buffer_size;
|
67
|
+
const unsigned char * buffer;
|
68
|
+
size_t len;
|
69
|
+
|
70
|
+
rb_scan_args(argc, argv, "11", &obj, &io);
|
71
|
+
options = rb_iv_get(self, "@options");
|
72
|
+
|
73
|
+
alloc_funcs.malloc = yajl_helper_malloc;
|
74
|
+
alloc_funcs.realloc = yajl_helper_realloc;
|
75
|
+
alloc_funcs.free = yajl_helper_free;
|
76
|
+
g = yajl_gen_alloc(&alloc_funcs);
|
77
|
+
|
78
|
+
yajl_gen_configure(g, options);
|
79
|
+
|
80
|
+
if (io != Qnil && !rb_respond_to(io, intern_io_write)) {
|
81
|
+
rb_raise(e_encodeError, "output must be a an IO");
|
82
|
+
}
|
83
|
+
|
84
|
+
write_buffer_size = FIX2INT(rb_hash_aref(options, ID2SYM(rb_intern("write_buffer_size"))));
|
85
|
+
|
86
|
+
yajl_encode_part(g, obj, io, write_buffer_size);
|
87
|
+
|
88
|
+
// TODO: add terminator here if desired
|
89
|
+
if (io == Qnil) {
|
90
|
+
status = yajl_gen_get_buf(g, &buffer, &len);
|
91
|
+
yajl_helper_check_gen_status(status);
|
92
|
+
io = rb_str_new((const char *)buffer, len);
|
93
|
+
rb_enc_associate(io, rb_utf8_encoding());
|
94
|
+
yajl_gen_clear(g);
|
95
|
+
yajl_gen_free(g);
|
96
|
+
return io;
|
97
|
+
} else {
|
98
|
+
wankelEncoder_flush(g, io, 1);
|
99
|
+
yajl_gen_free(g);
|
100
|
+
return Qnil;
|
101
|
+
}
|
102
|
+
return self;
|
103
|
+
}
|
104
|
+
|
105
|
+
void wankelEncoder_flush(yajl_gen g, VALUE io, int write_buffer_size) {
|
106
|
+
VALUE rbBuffer;
|
107
|
+
yajl_gen_status status;
|
108
|
+
const unsigned char * buffer;
|
109
|
+
size_t len;
|
110
|
+
|
111
|
+
if (io != Qnil) {
|
112
|
+
status = yajl_gen_get_buf(g, &buffer, &len);
|
113
|
+
yajl_helper_check_gen_status(status);
|
114
|
+
|
115
|
+
if (len >= (size_t)write_buffer_size) {
|
116
|
+
rbBuffer = rb_str_new((const char *)buffer, len);
|
117
|
+
rb_enc_associate(rbBuffer, rb_utf8_encoding());
|
118
|
+
rb_io_write(io, rbBuffer);
|
119
|
+
yajl_gen_clear(g);
|
120
|
+
}
|
121
|
+
}
|
122
|
+
}
|
123
|
+
|
124
|
+
void yajl_encode_part(yajl_gen g, VALUE obj, VALUE io, int write_buffer_size) {
|
125
|
+
size_t len;
|
126
|
+
int idx = 0;
|
127
|
+
VALUE keys, entry, str;
|
128
|
+
const char * cptr;
|
129
|
+
yajl_gen_status status;
|
130
|
+
|
131
|
+
switch (TYPE(obj)) {
|
132
|
+
case T_HASH:
|
133
|
+
status = yajl_gen_map_open(g);
|
134
|
+
yajl_helper_check_gen_status(status);
|
135
|
+
|
136
|
+
keys = rb_funcall(obj, intern_keys, 0);
|
137
|
+
for(idx = 0; idx < RARRAY_LEN(keys); idx++) {
|
138
|
+
entry = rb_ary_entry(keys, idx);
|
139
|
+
str = rb_funcall(entry, intern_to_s, 0); /* key must be a string */
|
140
|
+
/* the key */
|
141
|
+
yajl_encode_part(g, str, io, write_buffer_size);
|
142
|
+
/* the value */
|
143
|
+
yajl_encode_part(g, rb_hash_aref(obj, entry), io, write_buffer_size);
|
144
|
+
}
|
145
|
+
|
146
|
+
status = yajl_gen_map_close(g);
|
147
|
+
yajl_helper_check_gen_status(status);
|
148
|
+
break;
|
149
|
+
case T_ARRAY:
|
150
|
+
status = yajl_gen_array_open(g);
|
151
|
+
yajl_helper_check_gen_status(status);
|
152
|
+
|
153
|
+
for(idx = 0; idx < RARRAY_LEN(obj); idx++) {
|
154
|
+
yajl_encode_part(g, rb_ary_entry(obj, idx), io, write_buffer_size);
|
155
|
+
}
|
156
|
+
status = yajl_gen_array_close(g);
|
157
|
+
yajl_helper_check_gen_status(status);
|
158
|
+
break;
|
159
|
+
case T_NIL:
|
160
|
+
status = yajl_gen_null(g);
|
161
|
+
yajl_helper_check_gen_status(status);
|
162
|
+
break;
|
163
|
+
case T_TRUE:
|
164
|
+
status = yajl_gen_bool(g, 1);
|
165
|
+
yajl_helper_check_gen_status(status);
|
166
|
+
break;
|
167
|
+
case T_FALSE:
|
168
|
+
status = yajl_gen_bool(g, 0);
|
169
|
+
yajl_helper_check_gen_status(status);
|
170
|
+
break;
|
171
|
+
case T_FIXNUM:
|
172
|
+
case T_FLOAT:
|
173
|
+
case T_BIGNUM:
|
174
|
+
str = rb_funcall(obj, intern_to_s, 0);
|
175
|
+
cptr = RSTRING_PTR(str);
|
176
|
+
len = RSTRING_LEN(str);
|
177
|
+
if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) {
|
178
|
+
rb_raise(e_encodeError, "'%s' is an invalid number", cptr);
|
179
|
+
}
|
180
|
+
status = yajl_gen_number(g, cptr, len);
|
181
|
+
yajl_helper_check_gen_status(status);
|
182
|
+
break;
|
183
|
+
case T_STRING:
|
184
|
+
cptr = RSTRING_PTR(obj);
|
185
|
+
len = RSTRING_LEN(obj);
|
186
|
+
status = yajl_gen_string(g, (const unsigned char *)cptr, len);
|
187
|
+
yajl_helper_check_gen_status(status);
|
188
|
+
break;
|
189
|
+
default:
|
190
|
+
if (rb_respond_to(obj, intern_to_json)) {
|
191
|
+
str = rb_funcall(obj, intern_to_json, 0);
|
192
|
+
Check_Type(str, T_STRING);
|
193
|
+
cptr = RSTRING_PTR(str);
|
194
|
+
len = RSTRING_LEN(str);
|
195
|
+
status = yajl_gen_number(g, cptr, len);
|
196
|
+
yajl_helper_check_gen_status(status);
|
197
|
+
} else {
|
198
|
+
str = rb_funcall(obj, intern_to_s, 0);
|
199
|
+
Check_Type(str, T_STRING);
|
200
|
+
cptr = RSTRING_PTR(str);
|
201
|
+
len = RSTRING_LEN(str);
|
202
|
+
status = yajl_gen_string(g, (const unsigned char *)cptr, len);
|
203
|
+
yajl_helper_check_gen_status(status);
|
204
|
+
}
|
205
|
+
break;
|
206
|
+
}
|
207
|
+
|
208
|
+
wankelEncoder_flush(g, io, write_buffer_size);
|
209
|
+
}
|
210
|
+
|
211
|
+
ID Init_wankel_encoder() {
|
212
|
+
c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
|
213
|
+
c_wankelEncoder = rb_define_class_under(c_wankel, "Encoder", rb_cObject);
|
214
|
+
e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
|
215
|
+
|
216
|
+
rb_define_method(c_wankelEncoder, "initialize", wankelEncoder_initialize, -1);
|
217
|
+
rb_define_method(c_wankelEncoder, "encode", wankelEncoder_encode, -1);
|
218
|
+
|
219
|
+
intern_to_s = rb_intern("to_s");
|
220
|
+
intern_io_write = rb_intern("write");
|
221
|
+
intern_to_json = rb_intern("to_json");
|
222
|
+
intern_keys = rb_intern("keys");
|
223
|
+
intern_clone = rb_intern("clone");
|
224
|
+
intern_merge = rb_intern("merge");
|
225
|
+
intern_DEFAULTS = rb_intern("DEFAULTS");
|
226
|
+
sym_beautify = ID2SYM(rb_intern("beautify"));
|
227
|
+
sym_indent_string = ID2SYM(rb_intern("indent_string"));
|
228
|
+
sym_validate_utf8 = ID2SYM(rb_intern("validate_utf8"));
|
229
|
+
sym_escape_solidus = ID2SYM(rb_intern("escape_solidus"));
|
230
|
+
|
231
|
+
return c_wankelEncoder;
|
232
|
+
}
|
@@ -0,0 +1,345 @@
|
|
1
|
+
#include "wankel_parser.h"
|
2
|
+
|
3
|
+
// Callbacks =================================================================
|
4
|
+
static int wankel_parse_callback_on_null(void *ctx);
|
5
|
+
static int wankel_parse_callback_on_boolean(void *ctx, int boolVal);
|
6
|
+
// static int wankel_parse_callback_on_integer(void *ctx, long long integerVal);
|
7
|
+
// static int wankel_parse_callback_on_double(void *ctx, double doubleVal);
|
8
|
+
static int wankel_parse_callback_on_number(void *ctx, const char * numberVal, size_t numberLen);
|
9
|
+
static int wankel_parse_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen);
|
10
|
+
static int wankel_parse_callback_on_map_start(void *ctx);
|
11
|
+
static int wankel_parse_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen);
|
12
|
+
static int wankel_parse_callback_on_map_end(void *ctx);
|
13
|
+
static int wankel_parse_callback_on_array_start(void *ctx);
|
14
|
+
static int wankel_parse_callback_on_array_end(void *ctx);
|
15
|
+
|
16
|
+
static yajl_callbacks callbacks = {
|
17
|
+
wankel_parse_callback_on_null,
|
18
|
+
wankel_parse_callback_on_boolean,
|
19
|
+
NULL,
|
20
|
+
NULL,
|
21
|
+
wankel_parse_callback_on_number,
|
22
|
+
wankel_parse_callback_on_string,
|
23
|
+
wankel_parse_callback_on_map_start,
|
24
|
+
wankel_parse_callback_on_map_key,
|
25
|
+
wankel_parse_callback_on_map_end,
|
26
|
+
wankel_parse_callback_on_array_start,
|
27
|
+
wankel_parse_callback_on_array_end
|
28
|
+
};
|
29
|
+
|
30
|
+
// Ruby GC ===================================================================
|
31
|
+
static VALUE wankel_alloc(VALUE klass);
|
32
|
+
static void wankel_free(void * handle);
|
33
|
+
static void wankel_mark(void * handle);
|
34
|
+
|
35
|
+
static ID intern_io_read, intern_clone, intern_merge, intern_call,
|
36
|
+
intern_DEFAULTS, sym_multiple_values;
|
37
|
+
|
38
|
+
static ID sym_read_buffer_size, sym_symbolize_keys;
|
39
|
+
|
40
|
+
static VALUE c_wankel, c_wankelParser, e_parseError, e_encodeError;
|
41
|
+
|
42
|
+
/*
|
43
|
+
* Document-method: new
|
44
|
+
*
|
45
|
+
* call-seq: new([options])
|
46
|
+
*
|
47
|
+
* +:symbolize_keys+ will turn hash keys into Ruby symbols, defaults to false.
|
48
|
+
* Default `false`.
|
49
|
+
*
|
50
|
+
* +:allow_comments+ will ignore javascript style comments in JSON input.
|
51
|
+
* Default `false`.
|
52
|
+
*
|
53
|
+
* +:validate_strings+ will verify that all strings in JSON input are valid UTF8
|
54
|
+
* and will emit a parse error if this is not so. This option
|
55
|
+
* makes parsing slightly more expensive (~7% depending on
|
56
|
+
* processor and compiler in use). Default `false`.
|
57
|
+
*
|
58
|
+
* +:allow_trailing_garbage+ will ensure the entire input text was consumed and
|
59
|
+
* will raise an error otherwise. Default `false`.
|
60
|
+
*
|
61
|
+
* +:multiple_values+ allow multiple values to be parsed by a single parser. The
|
62
|
+
* entire text must be valid JSON, and values can be seperated
|
63
|
+
* by any kind of whitespace. Default `false`.
|
64
|
+
*
|
65
|
+
* +:allow_partial_values+ check that the top level value was completely consumed/
|
66
|
+
* Default `false`.
|
67
|
+
*
|
68
|
+
*
|
69
|
+
* +:read_buffer_size+ is the size of chunk that will be parsed off the input
|
70
|
+
* (if it's an IO) for each loop of the parsing process.
|
71
|
+
* 8092 is a good balance between the different types of
|
72
|
+
* streams (off disk, off a socket, etc...), but this option
|
73
|
+
* is here so the caller can better tune their parsing depending
|
74
|
+
* on the type of stream being passed. A larger read buffer
|
75
|
+
* will perform better for files off disk, where as a smaller
|
76
|
+
* size may be more efficient for reading off of a socket
|
77
|
+
* directly.
|
78
|
+
*/
|
79
|
+
static VALUE wankelParser_initialize(int argc, VALUE * argv, VALUE self) {
|
80
|
+
VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
|
81
|
+
VALUE options, rbufsize;
|
82
|
+
wankel_parser * p;
|
83
|
+
|
84
|
+
rb_scan_args(argc, argv, "01", &options);
|
85
|
+
if(options == Qnil) {
|
86
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
|
87
|
+
} else {
|
88
|
+
Check_Type(options, T_HASH);
|
89
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
|
90
|
+
}
|
91
|
+
options = rb_iv_get(self, "@options");
|
92
|
+
rbufsize = rb_hash_aref(options, sym_read_buffer_size);
|
93
|
+
Check_Type(rbufsize, T_FIXNUM);
|
94
|
+
|
95
|
+
Data_Get_Struct(self, wankel_parser, p);
|
96
|
+
p->alloc_funcs.malloc = yajl_helper_malloc;
|
97
|
+
p->alloc_funcs.realloc = yajl_helper_realloc;
|
98
|
+
p->alloc_funcs.free = yajl_helper_free;
|
99
|
+
p->rbufsize = rbufsize;
|
100
|
+
|
101
|
+
if(rb_hash_aref(options, sym_symbolize_keys) == Qtrue) {
|
102
|
+
p->symbolize_keys = 1;
|
103
|
+
} else {
|
104
|
+
p->symbolize_keys = 0;
|
105
|
+
}
|
106
|
+
|
107
|
+
return self;
|
108
|
+
}
|
109
|
+
|
110
|
+
/*
|
111
|
+
* Document-method: parse
|
112
|
+
*
|
113
|
+
* call-seq: parse(input[, &block])
|
114
|
+
*
|
115
|
+
* input can either be a String or and IO Object
|
116
|
+
*
|
117
|
+
* If a block is passed, it is called when the input is finished parsing. If
|
118
|
+
* parsing multiple json values in an input it is called once for each value
|
119
|
+
*
|
120
|
+
*/
|
121
|
+
static VALUE wankelParser_parse(int argc, VALUE * argv, VALUE self) {
|
122
|
+
const char * cptr;
|
123
|
+
unsigned int len;
|
124
|
+
yajl_status status;
|
125
|
+
wankel_parser * p;
|
126
|
+
VALUE input, callback;
|
127
|
+
VALUE options = rb_iv_get(self, "@options");
|
128
|
+
rb_scan_args(argc, argv, "11", &input, &callback); // Hack, cuz i'm not sure how to call a method with a block from c
|
129
|
+
|
130
|
+
if(callback == Qnil && rb_block_given_p()) {
|
131
|
+
callback = rb_block_proc();
|
132
|
+
}
|
133
|
+
|
134
|
+
Data_Get_Struct(self, wankel_parser, p);
|
135
|
+
p->h = yajl_alloc(&callbacks, &p->alloc_funcs, (void *)p);
|
136
|
+
yajl_configure(p->h, options);
|
137
|
+
p->callback = callback;
|
138
|
+
p->stack = rb_ary_new();
|
139
|
+
p->stack_index = 0;
|
140
|
+
if (TYPE(input) == T_STRING) {
|
141
|
+
cptr = RSTRING_PTR(input);
|
142
|
+
len = (unsigned int)RSTRING_LEN(input);
|
143
|
+
status = yajl_parse(p->h, (const unsigned char*)cptr, len);
|
144
|
+
yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
|
145
|
+
} else if (rb_respond_to(input, intern_io_read)) {
|
146
|
+
VALUE chunk = rb_str_new(0, NUM2LONG(p->rbufsize));
|
147
|
+
while (rb_funcall(input, intern_io_read, 2, p->rbufsize, chunk) != Qnil) {
|
148
|
+
cptr = RSTRING_PTR(chunk);
|
149
|
+
len = (unsigned int)RSTRING_LEN(chunk);
|
150
|
+
status = yajl_parse(p->h, (const unsigned char*)cptr, len);
|
151
|
+
yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
|
152
|
+
}
|
153
|
+
} else {
|
154
|
+
rb_raise(e_parseError, "input must be a string or an IO");
|
155
|
+
}
|
156
|
+
|
157
|
+
status = yajl_complete_parse(p->h);
|
158
|
+
if(status != yajl_status_ok) {
|
159
|
+
rb_raise(e_parseError, "Error completing parse");
|
160
|
+
}
|
161
|
+
|
162
|
+
if(rb_block_given_p()) {
|
163
|
+
return Qnil;
|
164
|
+
} else if(rb_hash_aref(options, sym_multiple_values) == Qtrue) {
|
165
|
+
return p->stack;
|
166
|
+
} else {
|
167
|
+
return rb_ary_pop(p->stack);
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
ID Init_wankel_parser() {
|
172
|
+
c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
|
173
|
+
c_wankelParser = rb_define_class_under(c_wankel, "Parser", rb_cObject);
|
174
|
+
e_parseError = rb_const_get(c_wankel, rb_intern("ParseError"));
|
175
|
+
e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
|
176
|
+
|
177
|
+
rb_define_alloc_func(c_wankelParser, wankel_alloc);
|
178
|
+
rb_define_method(c_wankelParser, "initialize", wankelParser_initialize, -1);
|
179
|
+
rb_define_method(c_wankelParser, "parse", wankelParser_parse, -1);
|
180
|
+
|
181
|
+
intern_io_read = rb_intern("read");
|
182
|
+
intern_clone = rb_intern("clone");
|
183
|
+
intern_merge = rb_intern("merge");
|
184
|
+
intern_call = rb_intern("call");
|
185
|
+
intern_DEFAULTS = rb_intern("DEFAULTS");
|
186
|
+
sym_read_buffer_size = ID2SYM(rb_intern("read_buffer_size"));
|
187
|
+
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
|
188
|
+
sym_multiple_values = ID2SYM(rb_intern("multiple_values"));
|
189
|
+
|
190
|
+
return c_wankelParser;
|
191
|
+
}
|
192
|
+
// Ruby GC ===================================================================
|
193
|
+
static VALUE wankel_alloc(VALUE klass) {
|
194
|
+
VALUE self;
|
195
|
+
wankel_parser * p;
|
196
|
+
self = Data_Make_Struct(klass, wankel_parser, wankel_mark, wankel_free, p);
|
197
|
+
p->h = 0;
|
198
|
+
return self;
|
199
|
+
}
|
200
|
+
|
201
|
+
static void wankel_free(void * handle) {
|
202
|
+
wankel_parser * p = handle;
|
203
|
+
if (p->h){
|
204
|
+
yajl_free(p->h);
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
static void wankel_mark(void * handle) {
|
209
|
+
wankel_parser * p = handle;
|
210
|
+
rb_gc_mark(p->stack);
|
211
|
+
rb_gc_mark(p->rbufsize);
|
212
|
+
}
|
213
|
+
|
214
|
+
// Parse Builder =============================================================
|
215
|
+
static void wankel_builder_push(void *ctx, VALUE val) {
|
216
|
+
int len;
|
217
|
+
wankel_parser * p = ctx;
|
218
|
+
VALUE lastEntry, hash;
|
219
|
+
|
220
|
+
if (p->stack_index > 0) {
|
221
|
+
len = (int)RARRAY_LEN(p->stack);
|
222
|
+
lastEntry = rb_ary_entry(p->stack, len-1);
|
223
|
+
switch (TYPE(lastEntry)) {
|
224
|
+
case T_ARRAY:
|
225
|
+
rb_ary_push(lastEntry, val);
|
226
|
+
if (TYPE(val) == T_HASH || TYPE(val) == T_ARRAY) {
|
227
|
+
rb_ary_push(p->stack, val);
|
228
|
+
p->stack_index++;
|
229
|
+
}
|
230
|
+
break;
|
231
|
+
case T_HASH:
|
232
|
+
rb_hash_aset(lastEntry, val, Qnil);
|
233
|
+
rb_ary_push(p->stack, val);
|
234
|
+
p->stack_index++;
|
235
|
+
break;
|
236
|
+
case T_STRING:
|
237
|
+
case T_SYMBOL:
|
238
|
+
hash = rb_ary_entry(p->stack, len-2);
|
239
|
+
if (TYPE(hash) == T_HASH) {
|
240
|
+
rb_hash_aset(hash, lastEntry, val);
|
241
|
+
rb_ary_pop(p->stack);
|
242
|
+
p->stack_index--;
|
243
|
+
if (TYPE(val) == T_HASH || TYPE(val) == T_ARRAY) {
|
244
|
+
rb_ary_push(p->stack, val);
|
245
|
+
p->stack_index++;
|
246
|
+
}
|
247
|
+
}
|
248
|
+
break;
|
249
|
+
}
|
250
|
+
} else {
|
251
|
+
rb_ary_push(p->stack, val);
|
252
|
+
p->stack_index++;
|
253
|
+
}
|
254
|
+
}
|
255
|
+
|
256
|
+
static int wankel_parse_callback_on_null(void *ctx) {
|
257
|
+
wankel_builder_push(ctx, Qnil);
|
258
|
+
return 1;
|
259
|
+
}
|
260
|
+
|
261
|
+
static int wankel_parse_callback_on_boolean(void *ctx, int boolVal) {
|
262
|
+
wankel_builder_push(ctx, boolVal ? Qtrue : Qfalse);
|
263
|
+
return 1;
|
264
|
+
}
|
265
|
+
|
266
|
+
// static int wankel_parse_callback_on_integer(void *ctx, long long integerVal) {
|
267
|
+
// wankel_builder_push(ctx, LL2NUM(integerVal));
|
268
|
+
// return 1;
|
269
|
+
// }
|
270
|
+
// static int wankel_parse_callback_on_double(void *ctx, double doubleVal) {
|
271
|
+
// wankel_builder_push(ctx, rb_float_new(doubleVal));
|
272
|
+
// return 1;
|
273
|
+
// }
|
274
|
+
static int wankel_parse_callback_on_number(void * ctx, const char * numberVal, size_t numberLen){
|
275
|
+
char buf[numberLen+1];
|
276
|
+
buf[numberLen] = 0;
|
277
|
+
memcpy(buf, numberVal, numberLen);
|
278
|
+
|
279
|
+
if (memchr(buf, '.', numberLen) || memchr(buf, 'e', numberLen) || memchr(buf, 'E', numberLen)) {
|
280
|
+
wankel_builder_push(ctx, rb_float_new(strtod(buf, NULL)));
|
281
|
+
} else {
|
282
|
+
wankel_builder_push(ctx, rb_cstr2inum(buf, 10));
|
283
|
+
}
|
284
|
+
return 1;
|
285
|
+
}
|
286
|
+
static int wankel_parse_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen) {
|
287
|
+
VALUE str = rb_str_new((const char *)stringVal, stringLen);
|
288
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
289
|
+
rb_enc_associate(str, rb_utf8_encoding());
|
290
|
+
if (default_internal_enc) {
|
291
|
+
str = rb_str_export_to_enc(str, default_internal_enc);
|
292
|
+
}
|
293
|
+
|
294
|
+
wankel_builder_push(ctx, str);
|
295
|
+
return 1;
|
296
|
+
}
|
297
|
+
static int wankel_parse_callback_on_map_start(void *ctx) {
|
298
|
+
wankel_builder_push(ctx, rb_hash_new());
|
299
|
+
return 1;
|
300
|
+
}
|
301
|
+
static int wankel_parse_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen) {
|
302
|
+
wankel_parser * p = ctx;
|
303
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
304
|
+
VALUE str = rb_str_new((const char *)key, keyLen);
|
305
|
+
rb_enc_associate(str, rb_utf8_encoding());
|
306
|
+
if (default_internal_enc) {
|
307
|
+
str = rb_str_export_to_enc(str, default_internal_enc);
|
308
|
+
}
|
309
|
+
|
310
|
+
if(p->symbolize_keys) {
|
311
|
+
wankel_builder_push(ctx, ID2SYM(rb_to_id(str)) );
|
312
|
+
} else {
|
313
|
+
wankel_builder_push(ctx, str);
|
314
|
+
}
|
315
|
+
|
316
|
+
return 1;
|
317
|
+
}
|
318
|
+
static int wankel_parse_callback_on_map_end(void *ctx) {
|
319
|
+
wankel_parser * p = ctx;
|
320
|
+
p->stack_index--;
|
321
|
+
|
322
|
+
if(p->stack_index > 0) {
|
323
|
+
rb_ary_pop(p->stack);
|
324
|
+
} else if(p->stack_index == 0 && p->callback != Qnil) {
|
325
|
+
rb_funcall(p->callback, intern_call, 1, rb_ary_pop(p->stack));
|
326
|
+
}
|
327
|
+
|
328
|
+
return 1;
|
329
|
+
}
|
330
|
+
static int wankel_parse_callback_on_array_start(void *ctx) {
|
331
|
+
wankel_builder_push(ctx, rb_ary_new());
|
332
|
+
return 1;
|
333
|
+
}
|
334
|
+
static int wankel_parse_callback_on_array_end(void *ctx) {
|
335
|
+
wankel_parser * p = ctx;
|
336
|
+
p->stack_index--;
|
337
|
+
|
338
|
+
if(p->stack_index > 0) {
|
339
|
+
rb_ary_pop(p->stack);
|
340
|
+
} else if(p->stack_index == 0 && p->callback != Qnil) {
|
341
|
+
rb_funcall(p->callback, intern_call, 1, rb_ary_pop(p->stack));
|
342
|
+
}
|
343
|
+
|
344
|
+
return 1;
|
345
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#ifndef WANKEL_PARSER
|
2
|
+
#define WANKEL_PARSER
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
#include <ruby/encoding.h>
|
6
|
+
#include <yajl/yajl_common.h>
|
7
|
+
#include <yajl/yajl_parse.h>
|
8
|
+
|
9
|
+
#include "yajl_helpers.h"
|
10
|
+
|
11
|
+
ID Init_wankel_parser();
|
12
|
+
|
13
|
+
typedef struct {
|
14
|
+
yajl_handle h;
|
15
|
+
yajl_alloc_funcs alloc_funcs;
|
16
|
+
VALUE stack;
|
17
|
+
int stack_index;
|
18
|
+
int symbolize_keys;
|
19
|
+
VALUE rbufsize;
|
20
|
+
VALUE callback;
|
21
|
+
VALUE last_entry;
|
22
|
+
} wankel_parser;
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
#endif
|