wankel 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/LICENSE +20 -0
- data/README.md +43 -0
- data/Rakefile +63 -0
- data/benchmark/subjects/item.json +1 -0
- data/benchmark/subjects/ohai.json +1216 -0
- data/benchmark/subjects/twitter_search.json +1 -0
- data/benchmark/subjects/twitter_stream.json +430 -0
- data/ext/wankel/extconf.rb +15 -0
- data/ext/wankel/wankel.c +50 -0
- data/ext/wankel/wankel.h +17 -0
- data/ext/wankel/wankel_encoder.c +232 -0
- data/ext/wankel/wankel_encoder.h +13 -0
- data/ext/wankel/wankel_parser.c +345 -0
- data/ext/wankel/wankel_parser.h +26 -0
- data/ext/wankel/wankel_sax_encoder.c +290 -0
- data/ext/wankel/wankel_sax_encoder.h +13 -0
- data/ext/wankel/wankel_sax_parser.c +232 -0
- data/ext/wankel/wankel_sax_parser.h +23 -0
- data/ext/wankel/yajl_helpers.c +124 -0
- data/ext/wankel/yajl_helpers.h +22 -0
- data/lib/wankel/ex_sax_parser.rb +75 -0
- data/lib/wankel.rb +19 -0
- data/logo.png +0 -0
- data/test/encoding/encoding_test.rb +230 -0
- data/test/encoding/sax_encoder_test.rb +89 -0
- data/test/parsing/active_support_test.rb +66 -0
- data/test/parsing/fixtures/fail.15.json +1 -0
- data/test/parsing/fixtures/fail.16.json +1 -0
- data/test/parsing/fixtures/fail.17.json +1 -0
- data/test/parsing/fixtures/fail.26.json +1 -0
- data/test/parsing/fixtures/fail11.json +1 -0
- data/test/parsing/fixtures/fail12.json +1 -0
- data/test/parsing/fixtures/fail13.json +1 -0
- data/test/parsing/fixtures/fail14.json +1 -0
- data/test/parsing/fixtures/fail19.json +1 -0
- data/test/parsing/fixtures/fail20.json +1 -0
- data/test/parsing/fixtures/fail21.json +1 -0
- data/test/parsing/fixtures/fail22.json +1 -0
- data/test/parsing/fixtures/fail23.json +1 -0
- data/test/parsing/fixtures/fail24.json +1 -0
- data/test/parsing/fixtures/fail25.json +1 -0
- data/test/parsing/fixtures/fail27.json +2 -0
- data/test/parsing/fixtures/fail28.json +2 -0
- data/test/parsing/fixtures/fail3.json +1 -0
- data/test/parsing/fixtures/fail4.json +1 -0
- data/test/parsing/fixtures/fail5.json +1 -0
- data/test/parsing/fixtures/fail6.json +1 -0
- data/test/parsing/fixtures/fail9.json +1 -0
- data/test/parsing/fixtures/pass.array.json +6 -0
- data/test/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
- data/test/parsing/fixtures/pass.contacts.json +1 -0
- data/test/parsing/fixtures/pass.db100.xml.json +1 -0
- data/test/parsing/fixtures/pass.db1000.xml.json +1 -0
- data/test/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
- data/test/parsing/fixtures/pass.deep_arrays.json +1 -0
- data/test/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
- data/test/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
- data/test/parsing/fixtures/pass.doubles.json +1 -0
- data/test/parsing/fixtures/pass.empty_array.json +1 -0
- data/test/parsing/fixtures/pass.empty_string.json +1 -0
- data/test/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
- data/test/parsing/fixtures/pass.escaped_foobar.json +1 -0
- data/test/parsing/fixtures/pass.item.json +1 -0
- data/test/parsing/fixtures/pass.json-org-sample1.json +23 -0
- data/test/parsing/fixtures/pass.json-org-sample2.json +11 -0
- data/test/parsing/fixtures/pass.json-org-sample3.json +26 -0
- data/test/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
- data/test/parsing/fixtures/pass.json-org-sample4.json +89 -0
- data/test/parsing/fixtures/pass.json-org-sample5.json +27 -0
- data/test/parsing/fixtures/pass.map-spain.xml.json +1 -0
- data/test/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
- data/test/parsing/fixtures/pass.ns-soap.xml.json +1 -0
- data/test/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
- data/test/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
- data/test/parsing/fixtures/pass.numbers-int-4k.json +11 -0
- data/test/parsing/fixtures/pass.numbers-int-64k.json +154 -0
- data/test/parsing/fixtures/pass.twitter-search.json +1 -0
- data/test/parsing/fixtures/pass.twitter-search2.json +1 -0
- data/test/parsing/fixtures/pass.unicode.json +3315 -0
- data/test/parsing/fixtures/pass.yelp.json +1 -0
- data/test/parsing/fixtures/pass1.json +56 -0
- data/test/parsing/fixtures/pass2.json +1 -0
- data/test/parsing/fixtures/pass3.json +6 -0
- data/test/parsing/fixtures_test.rb +43 -0
- data/test/parsing/multiple_values_test.rb +100 -0
- data/test/parsing/one_off_test.rb +65 -0
- data/test/parsing/sax_parser_test.rb +125 -0
- data/test/performance.rb +135 -0
- data/test/test_helper.rb +36 -0
- data/test/wankel_test.rb +53 -0
- data/wankel.gemspec +23 -0
- metadata +259 -0
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
#include "wankel_sax_encoder.h"
|
|
2
|
+
|
|
3
|
+
typedef struct {
|
|
4
|
+
yajl_gen g;
|
|
5
|
+
VALUE output;
|
|
6
|
+
int write_buffer_size;
|
|
7
|
+
} wankel_encoder;
|
|
8
|
+
|
|
9
|
+
static VALUE wankelSaxEncoder_initialize(int argc, VALUE * argv, VALUE self);
|
|
10
|
+
static VALUE wankelSaxEncoder_number(VALUE self, VALUE number);
|
|
11
|
+
static VALUE wankelSaxEncoder_string(VALUE self, VALUE string);
|
|
12
|
+
static VALUE wankelSaxEncoder_null(VALUE self);
|
|
13
|
+
static VALUE wankelSaxEncoder_bool(VALUE self, VALUE b);
|
|
14
|
+
static VALUE wankelSaxEncoder_map_open(VALUE self);
|
|
15
|
+
static VALUE wankelSaxEncoder_map_close(VALUE self);
|
|
16
|
+
static VALUE wankelSaxEncoder_array_open(VALUE self);
|
|
17
|
+
static VALUE wankelSaxEncoder_array_close(VALUE self);
|
|
18
|
+
static VALUE wankelSaxEncoder_complete(VALUE self);
|
|
19
|
+
static void wankelSaxEncoder_flush(wankel_encoder * p);
|
|
20
|
+
static VALUE wankel_sax_encoder_alloc(VALUE klass);
|
|
21
|
+
static void wankel_sax_encoder_free(void * handle);
|
|
22
|
+
static void wankel_sax_encoder_mark(void * handle);
|
|
23
|
+
|
|
24
|
+
static VALUE c_wankel, c_wankelSaxEncoder, e_encodeError;
|
|
25
|
+
|
|
26
|
+
static ID intern_to_s, intern_keys, intern_io_write, intern_to_json, intern_clone, intern_merge, intern_DEFAULTS;
|
|
27
|
+
|
|
28
|
+
static ID sym_beautify, sym_indent_string, sym_validate_utf8, sym_escape_solidus;
|
|
29
|
+
|
|
30
|
+
/*
|
|
31
|
+
* Document-method: new
|
|
32
|
+
*
|
|
33
|
+
* call-seq: new([options])
|
|
34
|
+
*
|
|
35
|
+
* +:beautify+ generate indented (beautiful) output. Default `false`.
|
|
36
|
+
*
|
|
37
|
+
* +:indent_string+ Set an indent string which is used when yajl_gen_beautify
|
|
38
|
+
* is enabled. Maybe something like \\t or some number of
|
|
39
|
+
* spaces. The default is four spaces ' '.
|
|
40
|
+
*
|
|
41
|
+
* +:validate_utf8+ Normally the generator does not validate that strings you
|
|
42
|
+
* pass to it are valid UTF8. Enabling this option will cause
|
|
43
|
+
* it to do so.
|
|
44
|
+
*
|
|
45
|
+
* +:escape_solidus+ the forward solidus (slash or '/' in human) is not required
|
|
46
|
+
* to be escaped in json text. By default, YAJL will not escape
|
|
47
|
+
* it in the iterest of saving bytes. Setting this flag will
|
|
48
|
+
* cause YAJL to always escape '/' in generated JSON strings.
|
|
49
|
+
*/
|
|
50
|
+
static VALUE wankelSaxEncoder_initialize(int argc, VALUE * argv, VALUE self) {
|
|
51
|
+
VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
|
|
52
|
+
VALUE io, options;
|
|
53
|
+
wankel_encoder * p;
|
|
54
|
+
yajl_alloc_funcs alloc_funcs;
|
|
55
|
+
|
|
56
|
+
rb_scan_args(argc, argv, "11", &io, &options);
|
|
57
|
+
|
|
58
|
+
if(options == Qnil) {
|
|
59
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
|
|
60
|
+
} else {
|
|
61
|
+
Check_Type(options, T_HASH);
|
|
62
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
|
|
63
|
+
}
|
|
64
|
+
options = rb_iv_get(self, "@options");
|
|
65
|
+
|
|
66
|
+
if (!rb_respond_to(io, intern_io_write)) {
|
|
67
|
+
rb_raise(e_encodeError, "output must be a an IO");
|
|
68
|
+
}
|
|
69
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
70
|
+
p->output = io;
|
|
71
|
+
|
|
72
|
+
alloc_funcs.malloc = yajl_helper_malloc;
|
|
73
|
+
alloc_funcs.realloc = yajl_helper_realloc;
|
|
74
|
+
alloc_funcs.free = yajl_helper_free;
|
|
75
|
+
p->g = yajl_gen_alloc(&alloc_funcs);
|
|
76
|
+
yajl_gen_configure(p->g, options);
|
|
77
|
+
|
|
78
|
+
p->write_buffer_size = FIX2INT(rb_hash_aref(options, ID2SYM(rb_intern("write_buffer_size"))));
|
|
79
|
+
|
|
80
|
+
return self;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
static VALUE wankelSaxEncoder_number(VALUE self, VALUE number) {
|
|
84
|
+
size_t len;
|
|
85
|
+
const char * cptr;
|
|
86
|
+
wankel_encoder * p;
|
|
87
|
+
yajl_gen_status status;
|
|
88
|
+
VALUE str = rb_funcall(number, intern_to_s, 0);
|
|
89
|
+
|
|
90
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
91
|
+
cptr = RSTRING_PTR(str);
|
|
92
|
+
len = RSTRING_LEN(str);
|
|
93
|
+
|
|
94
|
+
if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) {
|
|
95
|
+
rb_raise(e_encodeError, "'%s' is an invalid number", cptr);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
status = yajl_gen_number(p->g, cptr, len);
|
|
99
|
+
yajl_helper_check_gen_status(status);
|
|
100
|
+
|
|
101
|
+
wankelSaxEncoder_flush(p);
|
|
102
|
+
|
|
103
|
+
return Qnil;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
static VALUE wankelSaxEncoder_string(VALUE self, VALUE string) {
|
|
107
|
+
size_t len;
|
|
108
|
+
const char * cptr;
|
|
109
|
+
wankel_encoder * p;
|
|
110
|
+
yajl_gen_status status;
|
|
111
|
+
|
|
112
|
+
Check_Type(string, T_STRING);
|
|
113
|
+
|
|
114
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
115
|
+
cptr = RSTRING_PTR(string);
|
|
116
|
+
len = RSTRING_LEN(string);
|
|
117
|
+
|
|
118
|
+
status = yajl_gen_string(p->g, (const unsigned char *)cptr, len);
|
|
119
|
+
yajl_helper_check_gen_status(status);
|
|
120
|
+
|
|
121
|
+
wankelSaxEncoder_flush(p);
|
|
122
|
+
|
|
123
|
+
return Qnil;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
static VALUE wankelSaxEncoder_null(VALUE self) {
|
|
127
|
+
wankel_encoder * p;
|
|
128
|
+
yajl_gen_status status;
|
|
129
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
130
|
+
|
|
131
|
+
status = yajl_gen_null(p->g);
|
|
132
|
+
yajl_helper_check_gen_status(status);
|
|
133
|
+
|
|
134
|
+
wankelSaxEncoder_flush(p);
|
|
135
|
+
|
|
136
|
+
return Qnil;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
static VALUE wankelSaxEncoder_bool(VALUE self, VALUE b) {
|
|
140
|
+
wankel_encoder * p;
|
|
141
|
+
yajl_gen_status status;
|
|
142
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
143
|
+
|
|
144
|
+
status = yajl_gen_bool(p->g, RTEST(b));
|
|
145
|
+
yajl_helper_check_gen_status(status);
|
|
146
|
+
|
|
147
|
+
wankelSaxEncoder_flush(p);
|
|
148
|
+
|
|
149
|
+
return Qnil;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
static VALUE wankelSaxEncoder_map_open(VALUE self) {
|
|
153
|
+
wankel_encoder * p;
|
|
154
|
+
yajl_gen_status status;
|
|
155
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
156
|
+
|
|
157
|
+
status = yajl_gen_map_open(p->g);
|
|
158
|
+
yajl_helper_check_gen_status(status);
|
|
159
|
+
|
|
160
|
+
wankelSaxEncoder_flush(p);
|
|
161
|
+
|
|
162
|
+
return Qnil;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
static VALUE wankelSaxEncoder_map_close(VALUE self) {
|
|
166
|
+
wankel_encoder * p;
|
|
167
|
+
yajl_gen_status status;
|
|
168
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
169
|
+
|
|
170
|
+
status = yajl_gen_map_close(p->g);
|
|
171
|
+
yajl_helper_check_gen_status(status);
|
|
172
|
+
|
|
173
|
+
wankelSaxEncoder_flush(p);
|
|
174
|
+
|
|
175
|
+
return Qnil;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
static VALUE wankelSaxEncoder_array_open(VALUE self) {
|
|
179
|
+
wankel_encoder * p;
|
|
180
|
+
yajl_gen_status status;
|
|
181
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
182
|
+
|
|
183
|
+
status = yajl_gen_array_open(p->g);
|
|
184
|
+
yajl_helper_check_gen_status(status);
|
|
185
|
+
|
|
186
|
+
wankelSaxEncoder_flush(p);
|
|
187
|
+
|
|
188
|
+
return Qnil;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
static VALUE wankelSaxEncoder_array_close(VALUE self) {
|
|
192
|
+
wankel_encoder * p;
|
|
193
|
+
yajl_gen_status status;
|
|
194
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
195
|
+
|
|
196
|
+
status = yajl_gen_array_close(p->g);
|
|
197
|
+
yajl_helper_check_gen_status(status);
|
|
198
|
+
|
|
199
|
+
wankelSaxEncoder_flush(p);
|
|
200
|
+
|
|
201
|
+
return Qnil;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
static VALUE wankelSaxEncoder_complete(VALUE self) {
|
|
205
|
+
size_t len;
|
|
206
|
+
VALUE rbBuffer;
|
|
207
|
+
wankel_encoder * p;
|
|
208
|
+
yajl_gen_status status;
|
|
209
|
+
const unsigned char * buffer;
|
|
210
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
|
211
|
+
|
|
212
|
+
status = yajl_gen_get_buf(p->g, &buffer, &len);
|
|
213
|
+
yajl_helper_check_gen_status(status);
|
|
214
|
+
|
|
215
|
+
rbBuffer = rb_str_new((const char *)buffer, len);
|
|
216
|
+
rb_enc_associate(rbBuffer, rb_utf8_encoding());
|
|
217
|
+
rb_io_write(p->output, rbBuffer);
|
|
218
|
+
yajl_gen_clear(p->g);
|
|
219
|
+
|
|
220
|
+
return Qnil;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
void wankelSaxEncoder_flush(wankel_encoder * p) {
|
|
224
|
+
VALUE rbBuffer;
|
|
225
|
+
yajl_gen_status status;
|
|
226
|
+
const unsigned char * buffer;
|
|
227
|
+
size_t len;
|
|
228
|
+
|
|
229
|
+
status = yajl_gen_get_buf(p->g, &buffer, &len);
|
|
230
|
+
yajl_helper_check_gen_status(status);
|
|
231
|
+
|
|
232
|
+
if (len >= (size_t)p->write_buffer_size) {
|
|
233
|
+
rbBuffer = rb_str_new((const char *)buffer, len);
|
|
234
|
+
rb_enc_associate(rbBuffer, rb_utf8_encoding());
|
|
235
|
+
rb_io_write(p->output, rbBuffer);
|
|
236
|
+
yajl_gen_clear(p->g);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
void Init_wankel_sax_encoder() {
|
|
241
|
+
c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
|
|
242
|
+
c_wankelSaxEncoder = rb_define_class_under(c_wankel, "SaxEncoder", rb_cObject);
|
|
243
|
+
e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
|
|
244
|
+
|
|
245
|
+
rb_define_alloc_func(c_wankelSaxEncoder, wankel_sax_encoder_alloc);
|
|
246
|
+
rb_define_method(c_wankelSaxEncoder, "initialize", wankelSaxEncoder_initialize, -1);
|
|
247
|
+
rb_define_method(c_wankelSaxEncoder, "number", wankelSaxEncoder_number, 1);
|
|
248
|
+
rb_define_method(c_wankelSaxEncoder, "string", wankelSaxEncoder_string, 1);
|
|
249
|
+
rb_define_method(c_wankelSaxEncoder, "null", wankelSaxEncoder_null, 0);
|
|
250
|
+
rb_define_method(c_wankelSaxEncoder, "bool", wankelSaxEncoder_bool, 1);
|
|
251
|
+
rb_define_method(c_wankelSaxEncoder, "map_open", wankelSaxEncoder_map_open, 0);
|
|
252
|
+
rb_define_method(c_wankelSaxEncoder, "map_close", wankelSaxEncoder_map_close, 0);
|
|
253
|
+
rb_define_method(c_wankelSaxEncoder, "array_open", wankelSaxEncoder_array_open, 0);
|
|
254
|
+
rb_define_method(c_wankelSaxEncoder, "array_close", wankelSaxEncoder_array_close, 0);
|
|
255
|
+
rb_define_method(c_wankelSaxEncoder, "complete", wankelSaxEncoder_complete, 0);
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
intern_to_s = rb_intern("to_s");
|
|
259
|
+
intern_io_write = rb_intern("write");
|
|
260
|
+
intern_to_json = rb_intern("to_json");
|
|
261
|
+
intern_keys = rb_intern("keys");
|
|
262
|
+
intern_clone = rb_intern("clone");
|
|
263
|
+
intern_merge = rb_intern("merge");
|
|
264
|
+
intern_DEFAULTS = rb_intern("DEFAULTS");
|
|
265
|
+
sym_beautify = ID2SYM(rb_intern("beautify"));
|
|
266
|
+
sym_indent_string = ID2SYM(rb_intern("indent_string"));
|
|
267
|
+
sym_validate_utf8 = ID2SYM(rb_intern("validate_utf8"));
|
|
268
|
+
sym_escape_solidus = ID2SYM(rb_intern("escape_solidus"));
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Ruby GC ===================================================================
|
|
272
|
+
static VALUE wankel_sax_encoder_alloc(VALUE klass) {
|
|
273
|
+
VALUE self;
|
|
274
|
+
wankel_encoder * p;
|
|
275
|
+
self = Data_Make_Struct(klass, wankel_encoder, wankel_sax_encoder_mark, wankel_sax_encoder_free, p);
|
|
276
|
+
p->g = 0;
|
|
277
|
+
return self;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
static void wankel_sax_encoder_free(void * handle) {
|
|
281
|
+
wankel_encoder * p = handle;
|
|
282
|
+
if (p->g){
|
|
283
|
+
yajl_gen_free(p->g);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
static void wankel_sax_encoder_mark(void * handle) {
|
|
288
|
+
wankel_encoder * p = handle;
|
|
289
|
+
rb_gc_mark(p->output);
|
|
290
|
+
}
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
#include "wankel_sax_parser.h"
|
|
2
|
+
|
|
3
|
+
static VALUE sax_parser_initialize(int argc, VALUE * argv, VALUE self);
|
|
4
|
+
|
|
5
|
+
static ID sym_read_buffer_size, sym_symbolize_keys;
|
|
6
|
+
|
|
7
|
+
static ID intern_io_read, intern_merge, intern_clone, intern_DEFAULTS;
|
|
8
|
+
|
|
9
|
+
static ID intern_on_null, intern_on_boolean, intern_on_integer,
|
|
10
|
+
intern_on_double, intern_on_string, intern_on_map_start, intern_on_map_key,
|
|
11
|
+
intern_on_map_end, intern_on_array_start, intern_on_array_end;
|
|
12
|
+
|
|
13
|
+
static VALUE c_wankel, c_wankelParser, c_saxParser, e_parseError, e_encodeError;
|
|
14
|
+
|
|
15
|
+
// Callbacks =================================================================
|
|
16
|
+
yajl_callbacks sax_parser_callbacks(VALUE self);
|
|
17
|
+
int sax_parser_callback_on_null(void *ctx);
|
|
18
|
+
int sax_parser_callback_on_boolean(void *ctx, int boolVal);
|
|
19
|
+
int sax_parser_callback_on_number(void *ctx, const char * numberVal, size_t numberLen);
|
|
20
|
+
int sax_parser_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen);
|
|
21
|
+
int sax_parser_callback_on_map_start(void *ctx);
|
|
22
|
+
int sax_parser_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen);
|
|
23
|
+
int sax_parser_callback_on_map_end(void *ctx);
|
|
24
|
+
int sax_parser_callback_on_array_start(void *ctx);
|
|
25
|
+
int sax_parser_callback_on_array_end(void *ctx);
|
|
26
|
+
|
|
27
|
+
// Ruby GC ===================================================================
|
|
28
|
+
VALUE sax_parser_alloc(VALUE);
|
|
29
|
+
//void sax_parser_mark(void *);
|
|
30
|
+
void sax_parser_free(void * parser);
|
|
31
|
+
|
|
32
|
+
VALUE sax_parser_initialize(int argc, VALUE * argv, VALUE self) {
|
|
33
|
+
VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
|
|
34
|
+
VALUE klass = rb_funcall(self, rb_intern("class"), 0);
|
|
35
|
+
VALUE options, rbufsize;
|
|
36
|
+
sax_parser * p;
|
|
37
|
+
|
|
38
|
+
rb_scan_args(argc, argv, "01", &options);
|
|
39
|
+
if (rb_const_defined(klass, intern_DEFAULTS)) {
|
|
40
|
+
defaults = rb_funcall(defaults, intern_merge, 1, rb_const_get(klass, intern_DEFAULTS));
|
|
41
|
+
}
|
|
42
|
+
if(options == Qnil) {
|
|
43
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
|
|
44
|
+
} else {
|
|
45
|
+
Check_Type(options, T_HASH);
|
|
46
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
|
|
47
|
+
}
|
|
48
|
+
options = rb_iv_get(self, "@options");
|
|
49
|
+
|
|
50
|
+
Data_Get_Struct(self, sax_parser, p);
|
|
51
|
+
p->callbacks = sax_parser_callbacks(self);
|
|
52
|
+
p->alloc_funcs.malloc = yajl_helper_malloc;
|
|
53
|
+
p->alloc_funcs.realloc = yajl_helper_realloc;
|
|
54
|
+
p->alloc_funcs.free = yajl_helper_free;
|
|
55
|
+
p->h = yajl_alloc(&p->callbacks, &p->alloc_funcs, (void *)self);
|
|
56
|
+
|
|
57
|
+
yajl_configure(p->h, options);
|
|
58
|
+
|
|
59
|
+
rbufsize = rb_hash_aref(options, sym_read_buffer_size);
|
|
60
|
+
Check_Type(rbufsize, T_FIXNUM);
|
|
61
|
+
p->rbufsize = rbufsize;
|
|
62
|
+
|
|
63
|
+
if(rb_hash_aref(options, sym_symbolize_keys) == Qtrue) {
|
|
64
|
+
p->symbolize_keys = 1;
|
|
65
|
+
} else {
|
|
66
|
+
p->symbolize_keys = 0;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return self;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
static VALUE sax_parser_parse(int argc, VALUE * argv, VALUE self) {
|
|
73
|
+
const char * cptr;
|
|
74
|
+
size_t len;
|
|
75
|
+
yajl_status status;
|
|
76
|
+
sax_parser * p;
|
|
77
|
+
VALUE input;
|
|
78
|
+
Data_Get_Struct(self, sax_parser, p);
|
|
79
|
+
|
|
80
|
+
rb_scan_args(argc, argv, "10", &input);
|
|
81
|
+
if (TYPE(input) == T_STRING) {
|
|
82
|
+
cptr = RSTRING_PTR(input);
|
|
83
|
+
len = RSTRING_LEN(input);
|
|
84
|
+
status = yajl_parse(p->h, (const unsigned char*)cptr, len);
|
|
85
|
+
yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
|
|
86
|
+
} else if (rb_respond_to(input, intern_io_read)) {
|
|
87
|
+
VALUE chunk = rb_str_new(0, FIX2LONG(p->rbufsize));
|
|
88
|
+
while (rb_funcall(input, intern_io_read, 2, p->rbufsize, chunk) != Qnil) {
|
|
89
|
+
cptr = RSTRING_PTR(chunk);
|
|
90
|
+
len = RSTRING_LEN(chunk);
|
|
91
|
+
status = yajl_parse(p->h, (const unsigned char*)cptr, len);
|
|
92
|
+
yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
|
|
93
|
+
}
|
|
94
|
+
} else {
|
|
95
|
+
rb_raise(e_parseError, "input must be a string or an IO");
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
status = yajl_complete_parse(p->h);
|
|
99
|
+
yajl_helper_check_status(p->h, status, 0, NULL, 0);
|
|
100
|
+
|
|
101
|
+
return Qnil;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
void Init_wankel_sax_parser() {
|
|
105
|
+
c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
|
|
106
|
+
c_wankelParser = rb_const_get(c_wankel, rb_intern("Parser"));
|
|
107
|
+
c_saxParser = rb_define_class_under(c_wankel, "SaxParser", rb_cObject);
|
|
108
|
+
e_parseError = rb_const_get(c_wankel, rb_intern("ParseError"));
|
|
109
|
+
e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
|
|
110
|
+
|
|
111
|
+
rb_define_alloc_func(c_saxParser, sax_parser_alloc);
|
|
112
|
+
rb_define_method(c_saxParser, "initialize", sax_parser_initialize, -1);
|
|
113
|
+
rb_define_method(c_saxParser, "parse", sax_parser_parse, -1);
|
|
114
|
+
|
|
115
|
+
intern_merge = rb_intern("merge");
|
|
116
|
+
intern_clone = rb_intern("clone");
|
|
117
|
+
intern_DEFAULTS = rb_intern("DEFAULTS");
|
|
118
|
+
|
|
119
|
+
sym_read_buffer_size = ID2SYM(rb_intern("read_buffer_size"));
|
|
120
|
+
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
|
|
121
|
+
|
|
122
|
+
intern_on_null = rb_intern("on_null");
|
|
123
|
+
intern_on_boolean = rb_intern("on_boolean");
|
|
124
|
+
intern_on_integer = rb_intern("on_integer");
|
|
125
|
+
intern_on_double = rb_intern("on_double");
|
|
126
|
+
intern_on_string = rb_intern("on_string");
|
|
127
|
+
intern_on_map_start = rb_intern("on_map_start");
|
|
128
|
+
intern_on_map_key = rb_intern("on_map_key");
|
|
129
|
+
intern_on_map_end = rb_intern("on_map_end");
|
|
130
|
+
intern_on_array_start = rb_intern("on_array_start");
|
|
131
|
+
intern_on_array_end = rb_intern("on_array_end");
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Callbacks =================================================================
|
|
135
|
+
yajl_callbacks sax_parser_callbacks(VALUE self) {
|
|
136
|
+
yajl_callbacks callbacks = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
|
|
137
|
+
|
|
138
|
+
if(rb_respond_to(self, intern_on_null)) {
|
|
139
|
+
callbacks.yajl_null = sax_parser_callback_on_null;
|
|
140
|
+
}
|
|
141
|
+
if(rb_respond_to(self, intern_on_boolean)) {
|
|
142
|
+
callbacks.yajl_boolean = sax_parser_callback_on_boolean;
|
|
143
|
+
}
|
|
144
|
+
if(rb_respond_to(self, intern_on_integer) || rb_respond_to(self, intern_on_double)) {
|
|
145
|
+
callbacks.yajl_number = sax_parser_callback_on_number;
|
|
146
|
+
}
|
|
147
|
+
if(rb_respond_to(self, intern_on_string)) {
|
|
148
|
+
callbacks.yajl_string = sax_parser_callback_on_string;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if(rb_respond_to(self, intern_on_map_start)) {
|
|
152
|
+
callbacks.yajl_start_map = sax_parser_callback_on_map_start;
|
|
153
|
+
}
|
|
154
|
+
if(rb_respond_to(self, intern_on_map_key)) {
|
|
155
|
+
callbacks.yajl_map_key = sax_parser_callback_on_map_key;
|
|
156
|
+
}
|
|
157
|
+
if(rb_respond_to(self, intern_on_map_end)) {
|
|
158
|
+
callbacks.yajl_end_map = sax_parser_callback_on_map_end;
|
|
159
|
+
}
|
|
160
|
+
if(rb_respond_to(self, intern_on_array_start)) {
|
|
161
|
+
callbacks.yajl_start_array = sax_parser_callback_on_array_start;
|
|
162
|
+
}
|
|
163
|
+
if(rb_respond_to(self, intern_on_array_end)) {
|
|
164
|
+
callbacks.yajl_end_array = sax_parser_callback_on_array_end;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
return callbacks;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
int sax_parser_callback_on_null(void *ctx) {
|
|
171
|
+
rb_funcall((VALUE)ctx, intern_on_null, 0);
|
|
172
|
+
return 1;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
int sax_parser_callback_on_boolean(void *ctx, int boolVal) {
|
|
176
|
+
rb_funcall((VALUE)ctx, intern_on_boolean, 1, (boolVal ? Qtrue : Qfalse));
|
|
177
|
+
return 1;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
int sax_parser_callback_on_number(void *ctx, const char * numberVal, size_t numberLen) {
|
|
181
|
+
char buf[numberLen+1];
|
|
182
|
+
buf[numberLen] = 0;
|
|
183
|
+
memcpy(buf, numberVal, numberLen);
|
|
184
|
+
VALUE obj = (VALUE)ctx;
|
|
185
|
+
|
|
186
|
+
if (memchr(buf, '.', numberLen) || memchr(buf, 'e', numberLen) || memchr(buf, 'E', numberLen)) {
|
|
187
|
+
if (rb_respond_to(obj, intern_on_double)) {
|
|
188
|
+
rb_funcall((VALUE)ctx, intern_on_double, 1, rb_float_new(strtod(buf, NULL)));
|
|
189
|
+
}
|
|
190
|
+
} else {
|
|
191
|
+
if (rb_respond_to(obj, intern_on_integer)) {
|
|
192
|
+
rb_funcall((VALUE)ctx, intern_on_integer, 1, rb_cstr2inum(buf, 10));
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
return 1;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
int sax_parser_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen) {
|
|
199
|
+
rb_funcall((VALUE)ctx, intern_on_string, 1, rb_str_new((const char *) stringVal, stringLen));
|
|
200
|
+
return 1;
|
|
201
|
+
}
|
|
202
|
+
int sax_parser_callback_on_map_start(void *ctx) {
|
|
203
|
+
rb_funcall((VALUE)ctx, intern_on_map_start, 0);
|
|
204
|
+
return 1;
|
|
205
|
+
}
|
|
206
|
+
int sax_parser_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen) {
|
|
207
|
+
rb_funcall((VALUE)ctx, intern_on_map_key, 1, rb_str_new((const char *)key, keyLen));
|
|
208
|
+
return 1;
|
|
209
|
+
}
|
|
210
|
+
int sax_parser_callback_on_map_end(void *ctx) {
|
|
211
|
+
rb_funcall((VALUE)ctx, intern_on_map_end, 0);
|
|
212
|
+
return 1;
|
|
213
|
+
}
|
|
214
|
+
int sax_parser_callback_on_array_start(void *ctx) {
|
|
215
|
+
rb_funcall((VALUE)ctx, intern_on_array_start, 0);
|
|
216
|
+
return 1;
|
|
217
|
+
}
|
|
218
|
+
int sax_parser_callback_on_array_end(void *ctx) {
|
|
219
|
+
rb_funcall((VALUE)ctx, intern_on_array_end, 0);
|
|
220
|
+
return 1;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Ruby GC ===================================================================
|
|
224
|
+
VALUE sax_parser_alloc(VALUE klass) {
|
|
225
|
+
sax_parser * p;
|
|
226
|
+
return Data_Make_Struct(klass, sax_parser, 0, sax_parser_free, p);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
void sax_parser_free(void * handle) {
|
|
230
|
+
sax_parser * p = handle;
|
|
231
|
+
yajl_free(p->h);
|
|
232
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#ifndef WANKEL_SAX_PARSER
|
|
2
|
+
#define WANKEL_SAX_PARSER
|
|
3
|
+
|
|
4
|
+
#include <ruby.h>
|
|
5
|
+
#include <ruby/encoding.h>
|
|
6
|
+
#include <yajl/yajl_common.h>
|
|
7
|
+
#include <yajl/yajl_parse.h>
|
|
8
|
+
|
|
9
|
+
#include "wankel.h"
|
|
10
|
+
#include "yajl_helpers.h"
|
|
11
|
+
|
|
12
|
+
void Init_wankel_sax_parser();
|
|
13
|
+
|
|
14
|
+
typedef struct {
|
|
15
|
+
yajl_handle h;
|
|
16
|
+
yajl_callbacks callbacks;
|
|
17
|
+
yajl_alloc_funcs alloc_funcs;
|
|
18
|
+
int symbolize_keys;
|
|
19
|
+
VALUE rbufsize;
|
|
20
|
+
} sax_parser;
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
#endif
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
#include "wankel.h"
|
|
2
|
+
#include "yajl_helpers.h"
|
|
3
|
+
|
|
4
|
+
static VALUE sym_allow_comments;
|
|
5
|
+
static VALUE sym_validate_strings;
|
|
6
|
+
static VALUE sym_trailing_garbage;
|
|
7
|
+
static VALUE sym_multiple_values;
|
|
8
|
+
static VALUE sym_partial_values;
|
|
9
|
+
static VALUE sym_beautify;
|
|
10
|
+
static VALUE sym_indent_string;
|
|
11
|
+
static VALUE sym_validate_utf8;
|
|
12
|
+
static VALUE sym_escape_solidus;
|
|
13
|
+
|
|
14
|
+
void Init_yajl_helpers() {
|
|
15
|
+
sym_allow_comments = ID2SYM(rb_intern("allow_comments")); rb_gc_register_address(&sym_allow_comments);
|
|
16
|
+
sym_validate_strings = ID2SYM(rb_intern("validate_strings")); rb_gc_register_address(&sym_validate_strings);
|
|
17
|
+
sym_trailing_garbage = ID2SYM(rb_intern("trailing_garbage")); rb_gc_register_address(&sym_trailing_garbage);
|
|
18
|
+
sym_multiple_values = ID2SYM(rb_intern("multiple_values")); rb_gc_register_address(&sym_multiple_values);
|
|
19
|
+
sym_partial_values = ID2SYM(rb_intern("partial_values")); rb_gc_register_address(&sym_partial_values);
|
|
20
|
+
sym_beautify = ID2SYM(rb_intern("beautify")); rb_gc_register_address(&sym_beautify);
|
|
21
|
+
sym_indent_string = ID2SYM(rb_intern("indent_string")); rb_gc_register_address(&sym_indent_string);
|
|
22
|
+
sym_validate_utf8 = ID2SYM(rb_intern("validate_utf8")); rb_gc_register_address(&sym_validate_utf8);
|
|
23
|
+
sym_escape_solidus = ID2SYM(rb_intern("escape_solidus")); rb_gc_register_address(&sym_escape_solidus);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Yajl Helpers ==============================================================
|
|
27
|
+
void yajl_helper_check_status(yajl_handle handle, yajl_status status, int verbose, const unsigned char * jsonText, size_t jsonTextLength) {
|
|
28
|
+
if(status != yajl_status_ok) {
|
|
29
|
+
unsigned char * str = yajl_get_error(handle, verbose, jsonText, jsonTextLength);
|
|
30
|
+
yajl_free_error(handle, str);
|
|
31
|
+
rb_raise(rb_const_get(rb_const_get(rb_cObject, rb_intern("Wankel")), rb_intern("ParseError")), (const char*) str);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
void yajl_helper_check_gen_status(yajl_gen_status status) {
|
|
36
|
+
VALUE error = rb_const_get(rb_const_get(rb_cObject, rb_intern("Wankel")), rb_intern("EncodeError"));
|
|
37
|
+
|
|
38
|
+
if (status == yajl_gen_status_ok || status == yajl_gen_generation_complete) {
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (status == yajl_gen_keys_must_be_strings) {
|
|
43
|
+
rb_raise(error, "at a point where a map key is generated, a function other than yajl_gen_string was called");
|
|
44
|
+
} else if (status == yajl_max_depth_exceeded) {
|
|
45
|
+
rb_raise(error, "YAJL's maximum generation depth was exceeded. see YAJL_MAX_DEPTH");
|
|
46
|
+
} else if (status == yajl_gen_in_error_state) {
|
|
47
|
+
rb_raise(error, "A generator function (yajl_gen_XXX) was called while in an error state");
|
|
48
|
+
} else if (status == yajl_gen_invalid_number) {
|
|
49
|
+
rb_raise(error, "yajl_gen_double was passed an invalid floating point value (infinity or NaN).");
|
|
50
|
+
} else if (status == yajl_gen_no_buf) {
|
|
51
|
+
rb_raise(error, "A print callback was passed in, so there is no internal buffer to get from");
|
|
52
|
+
} else if (status == yajl_gen_invalid_string) {
|
|
53
|
+
rb_raise(error, "returned from yajl_gen_string() when the yajl_gen_validate_utf8 option is enabled and an invalid was passed by client code.");
|
|
54
|
+
} else {
|
|
55
|
+
rb_raise(error, "unkown yajl_gen_status error");
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
// Memory funcs
|
|
61
|
+
void * yajl_helper_malloc(void *ctx, size_t size) {
|
|
62
|
+
return xmalloc(size);
|
|
63
|
+
}
|
|
64
|
+
void * yajl_helper_realloc(void *ctx, void *ptr, size_t size) {
|
|
65
|
+
return xrealloc(ptr, size);
|
|
66
|
+
}
|
|
67
|
+
void yajl_helper_free(void *ctx, void *ptr) {
|
|
68
|
+
return xfree(ptr);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Configure
|
|
72
|
+
void yajl_configure(yajl_handle handle, VALUE options) {
|
|
73
|
+
if(rb_hash_aref(options, sym_allow_comments) == Qtrue) {
|
|
74
|
+
yajl_config(handle, yajl_allow_comments, 1);
|
|
75
|
+
} else {
|
|
76
|
+
yajl_config(handle, yajl_allow_comments, 0);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if(rb_hash_aref(options, sym_validate_strings) == Qtrue) {
|
|
80
|
+
yajl_config(handle, yajl_dont_validate_strings, 0);
|
|
81
|
+
} else {
|
|
82
|
+
yajl_config(handle, yajl_dont_validate_strings, 1);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if(rb_hash_aref(options, sym_trailing_garbage) == Qtrue) {
|
|
86
|
+
yajl_config(handle, yajl_allow_trailing_garbage, 1);
|
|
87
|
+
} else {
|
|
88
|
+
yajl_config(handle, yajl_allow_trailing_garbage, 0);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if(rb_hash_aref(options, sym_multiple_values) == Qtrue) {
|
|
92
|
+
yajl_config(handle, yajl_allow_multiple_values, 1);
|
|
93
|
+
} else {
|
|
94
|
+
yajl_config(handle, yajl_allow_multiple_values, 0);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if(rb_hash_aref(options, sym_partial_values) == Qtrue) {
|
|
98
|
+
yajl_config(handle, yajl_allow_partial_values, 1);
|
|
99
|
+
} else {
|
|
100
|
+
yajl_config(handle, yajl_allow_partial_values, 0);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
void yajl_gen_configure(yajl_gen g, VALUE options) {
|
|
105
|
+
if(rb_hash_aref(options, sym_beautify) == Qtrue) {
|
|
106
|
+
yajl_gen_config(g, yajl_gen_beautify, 1);
|
|
107
|
+
} else {
|
|
108
|
+
yajl_gen_config(g, yajl_gen_beautify, 0);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
yajl_gen_config(g, yajl_gen_indent_string, RSTRING_PTR(rb_hash_aref(options, sym_indent_string)));
|
|
112
|
+
|
|
113
|
+
if(rb_hash_aref(options, sym_validate_utf8) == Qtrue) {
|
|
114
|
+
yajl_gen_config(g, yajl_gen_validate_utf8, 1);
|
|
115
|
+
} else {
|
|
116
|
+
yajl_gen_config(g, yajl_gen_validate_utf8, 0);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if(rb_hash_aref(options, sym_escape_solidus ) == Qtrue) {
|
|
120
|
+
yajl_gen_config(g, yajl_gen_escape_solidus, 1);
|
|
121
|
+
} else {
|
|
122
|
+
yajl_gen_config(g, yajl_gen_escape_solidus, 0);
|
|
123
|
+
}
|
|
124
|
+
}
|