wankel 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/LICENSE +20 -0
- data/README.md +43 -0
- data/Rakefile +63 -0
- data/benchmark/subjects/item.json +1 -0
- data/benchmark/subjects/ohai.json +1216 -0
- data/benchmark/subjects/twitter_search.json +1 -0
- data/benchmark/subjects/twitter_stream.json +430 -0
- data/ext/wankel/extconf.rb +15 -0
- data/ext/wankel/wankel.c +50 -0
- data/ext/wankel/wankel.h +17 -0
- data/ext/wankel/wankel_encoder.c +232 -0
- data/ext/wankel/wankel_encoder.h +13 -0
- data/ext/wankel/wankel_parser.c +345 -0
- data/ext/wankel/wankel_parser.h +26 -0
- data/ext/wankel/wankel_sax_encoder.c +290 -0
- data/ext/wankel/wankel_sax_encoder.h +13 -0
- data/ext/wankel/wankel_sax_parser.c +232 -0
- data/ext/wankel/wankel_sax_parser.h +23 -0
- data/ext/wankel/yajl_helpers.c +124 -0
- data/ext/wankel/yajl_helpers.h +22 -0
- data/lib/wankel/ex_sax_parser.rb +75 -0
- data/lib/wankel.rb +19 -0
- data/logo.png +0 -0
- data/test/encoding/encoding_test.rb +230 -0
- data/test/encoding/sax_encoder_test.rb +89 -0
- data/test/parsing/active_support_test.rb +66 -0
- data/test/parsing/fixtures/fail.15.json +1 -0
- data/test/parsing/fixtures/fail.16.json +1 -0
- data/test/parsing/fixtures/fail.17.json +1 -0
- data/test/parsing/fixtures/fail.26.json +1 -0
- data/test/parsing/fixtures/fail11.json +1 -0
- data/test/parsing/fixtures/fail12.json +1 -0
- data/test/parsing/fixtures/fail13.json +1 -0
- data/test/parsing/fixtures/fail14.json +1 -0
- data/test/parsing/fixtures/fail19.json +1 -0
- data/test/parsing/fixtures/fail20.json +1 -0
- data/test/parsing/fixtures/fail21.json +1 -0
- data/test/parsing/fixtures/fail22.json +1 -0
- data/test/parsing/fixtures/fail23.json +1 -0
- data/test/parsing/fixtures/fail24.json +1 -0
- data/test/parsing/fixtures/fail25.json +1 -0
- data/test/parsing/fixtures/fail27.json +2 -0
- data/test/parsing/fixtures/fail28.json +2 -0
- data/test/parsing/fixtures/fail3.json +1 -0
- data/test/parsing/fixtures/fail4.json +1 -0
- data/test/parsing/fixtures/fail5.json +1 -0
- data/test/parsing/fixtures/fail6.json +1 -0
- data/test/parsing/fixtures/fail9.json +1 -0
- data/test/parsing/fixtures/pass.array.json +6 -0
- data/test/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
- data/test/parsing/fixtures/pass.contacts.json +1 -0
- data/test/parsing/fixtures/pass.db100.xml.json +1 -0
- data/test/parsing/fixtures/pass.db1000.xml.json +1 -0
- data/test/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
- data/test/parsing/fixtures/pass.deep_arrays.json +1 -0
- data/test/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
- data/test/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
- data/test/parsing/fixtures/pass.doubles.json +1 -0
- data/test/parsing/fixtures/pass.empty_array.json +1 -0
- data/test/parsing/fixtures/pass.empty_string.json +1 -0
- data/test/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
- data/test/parsing/fixtures/pass.escaped_foobar.json +1 -0
- data/test/parsing/fixtures/pass.item.json +1 -0
- data/test/parsing/fixtures/pass.json-org-sample1.json +23 -0
- data/test/parsing/fixtures/pass.json-org-sample2.json +11 -0
- data/test/parsing/fixtures/pass.json-org-sample3.json +26 -0
- data/test/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
- data/test/parsing/fixtures/pass.json-org-sample4.json +89 -0
- data/test/parsing/fixtures/pass.json-org-sample5.json +27 -0
- data/test/parsing/fixtures/pass.map-spain.xml.json +1 -0
- data/test/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
- data/test/parsing/fixtures/pass.ns-soap.xml.json +1 -0
- data/test/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
- data/test/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
- data/test/parsing/fixtures/pass.numbers-int-4k.json +11 -0
- data/test/parsing/fixtures/pass.numbers-int-64k.json +154 -0
- data/test/parsing/fixtures/pass.twitter-search.json +1 -0
- data/test/parsing/fixtures/pass.twitter-search2.json +1 -0
- data/test/parsing/fixtures/pass.unicode.json +3315 -0
- data/test/parsing/fixtures/pass.yelp.json +1 -0
- data/test/parsing/fixtures/pass1.json +56 -0
- data/test/parsing/fixtures/pass2.json +1 -0
- data/test/parsing/fixtures/pass3.json +6 -0
- data/test/parsing/fixtures_test.rb +43 -0
- data/test/parsing/multiple_values_test.rb +100 -0
- data/test/parsing/one_off_test.rb +65 -0
- data/test/parsing/sax_parser_test.rb +125 -0
- data/test/performance.rb +135 -0
- data/test/test_helper.rb +36 -0
- data/test/wankel_test.rb +53 -0
- data/wankel.gemspec +23 -0
- metadata +259 -0
@@ -0,0 +1,290 @@
|
|
1
|
+
#include "wankel_sax_encoder.h"
|
2
|
+
|
3
|
+
typedef struct {
|
4
|
+
yajl_gen g;
|
5
|
+
VALUE output;
|
6
|
+
int write_buffer_size;
|
7
|
+
} wankel_encoder;
|
8
|
+
|
9
|
+
static VALUE wankelSaxEncoder_initialize(int argc, VALUE * argv, VALUE self);
|
10
|
+
static VALUE wankelSaxEncoder_number(VALUE self, VALUE number);
|
11
|
+
static VALUE wankelSaxEncoder_string(VALUE self, VALUE string);
|
12
|
+
static VALUE wankelSaxEncoder_null(VALUE self);
|
13
|
+
static VALUE wankelSaxEncoder_bool(VALUE self, VALUE b);
|
14
|
+
static VALUE wankelSaxEncoder_map_open(VALUE self);
|
15
|
+
static VALUE wankelSaxEncoder_map_close(VALUE self);
|
16
|
+
static VALUE wankelSaxEncoder_array_open(VALUE self);
|
17
|
+
static VALUE wankelSaxEncoder_array_close(VALUE self);
|
18
|
+
static VALUE wankelSaxEncoder_complete(VALUE self);
|
19
|
+
static void wankelSaxEncoder_flush(wankel_encoder * p);
|
20
|
+
static VALUE wankel_sax_encoder_alloc(VALUE klass);
|
21
|
+
static void wankel_sax_encoder_free(void * handle);
|
22
|
+
static void wankel_sax_encoder_mark(void * handle);
|
23
|
+
|
24
|
+
static VALUE c_wankel, c_wankelSaxEncoder, e_encodeError;
|
25
|
+
|
26
|
+
static ID intern_to_s, intern_keys, intern_io_write, intern_to_json, intern_clone, intern_merge, intern_DEFAULTS;
|
27
|
+
|
28
|
+
static ID sym_beautify, sym_indent_string, sym_validate_utf8, sym_escape_solidus;
|
29
|
+
|
30
|
+
/*
|
31
|
+
* Document-method: new
|
32
|
+
*
|
33
|
+
* call-seq: new([options])
|
34
|
+
*
|
35
|
+
* +:beautify+ generate indented (beautiful) output. Default `false`.
|
36
|
+
*
|
37
|
+
* +:indent_string+ Set an indent string which is used when yajl_gen_beautify
|
38
|
+
* is enabled. Maybe something like \\t or some number of
|
39
|
+
* spaces. The default is four spaces ' '.
|
40
|
+
*
|
41
|
+
* +:validate_utf8+ Normally the generator does not validate that strings you
|
42
|
+
* pass to it are valid UTF8. Enabling this option will cause
|
43
|
+
* it to do so.
|
44
|
+
*
|
45
|
+
* +:escape_solidus+ the forward solidus (slash or '/' in human) is not required
|
46
|
+
* to be escaped in json text. By default, YAJL will not escape
|
47
|
+
* it in the iterest of saving bytes. Setting this flag will
|
48
|
+
* cause YAJL to always escape '/' in generated JSON strings.
|
49
|
+
*/
|
50
|
+
static VALUE wankelSaxEncoder_initialize(int argc, VALUE * argv, VALUE self) {
|
51
|
+
VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
|
52
|
+
VALUE io, options;
|
53
|
+
wankel_encoder * p;
|
54
|
+
yajl_alloc_funcs alloc_funcs;
|
55
|
+
|
56
|
+
rb_scan_args(argc, argv, "11", &io, &options);
|
57
|
+
|
58
|
+
if(options == Qnil) {
|
59
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
|
60
|
+
} else {
|
61
|
+
Check_Type(options, T_HASH);
|
62
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
|
63
|
+
}
|
64
|
+
options = rb_iv_get(self, "@options");
|
65
|
+
|
66
|
+
if (!rb_respond_to(io, intern_io_write)) {
|
67
|
+
rb_raise(e_encodeError, "output must be a an IO");
|
68
|
+
}
|
69
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
70
|
+
p->output = io;
|
71
|
+
|
72
|
+
alloc_funcs.malloc = yajl_helper_malloc;
|
73
|
+
alloc_funcs.realloc = yajl_helper_realloc;
|
74
|
+
alloc_funcs.free = yajl_helper_free;
|
75
|
+
p->g = yajl_gen_alloc(&alloc_funcs);
|
76
|
+
yajl_gen_configure(p->g, options);
|
77
|
+
|
78
|
+
p->write_buffer_size = FIX2INT(rb_hash_aref(options, ID2SYM(rb_intern("write_buffer_size"))));
|
79
|
+
|
80
|
+
return self;
|
81
|
+
}
|
82
|
+
|
83
|
+
static VALUE wankelSaxEncoder_number(VALUE self, VALUE number) {
|
84
|
+
size_t len;
|
85
|
+
const char * cptr;
|
86
|
+
wankel_encoder * p;
|
87
|
+
yajl_gen_status status;
|
88
|
+
VALUE str = rb_funcall(number, intern_to_s, 0);
|
89
|
+
|
90
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
91
|
+
cptr = RSTRING_PTR(str);
|
92
|
+
len = RSTRING_LEN(str);
|
93
|
+
|
94
|
+
if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) {
|
95
|
+
rb_raise(e_encodeError, "'%s' is an invalid number", cptr);
|
96
|
+
}
|
97
|
+
|
98
|
+
status = yajl_gen_number(p->g, cptr, len);
|
99
|
+
yajl_helper_check_gen_status(status);
|
100
|
+
|
101
|
+
wankelSaxEncoder_flush(p);
|
102
|
+
|
103
|
+
return Qnil;
|
104
|
+
}
|
105
|
+
|
106
|
+
static VALUE wankelSaxEncoder_string(VALUE self, VALUE string) {
|
107
|
+
size_t len;
|
108
|
+
const char * cptr;
|
109
|
+
wankel_encoder * p;
|
110
|
+
yajl_gen_status status;
|
111
|
+
|
112
|
+
Check_Type(string, T_STRING);
|
113
|
+
|
114
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
115
|
+
cptr = RSTRING_PTR(string);
|
116
|
+
len = RSTRING_LEN(string);
|
117
|
+
|
118
|
+
status = yajl_gen_string(p->g, (const unsigned char *)cptr, len);
|
119
|
+
yajl_helper_check_gen_status(status);
|
120
|
+
|
121
|
+
wankelSaxEncoder_flush(p);
|
122
|
+
|
123
|
+
return Qnil;
|
124
|
+
}
|
125
|
+
|
126
|
+
static VALUE wankelSaxEncoder_null(VALUE self) {
|
127
|
+
wankel_encoder * p;
|
128
|
+
yajl_gen_status status;
|
129
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
130
|
+
|
131
|
+
status = yajl_gen_null(p->g);
|
132
|
+
yajl_helper_check_gen_status(status);
|
133
|
+
|
134
|
+
wankelSaxEncoder_flush(p);
|
135
|
+
|
136
|
+
return Qnil;
|
137
|
+
}
|
138
|
+
|
139
|
+
static VALUE wankelSaxEncoder_bool(VALUE self, VALUE b) {
|
140
|
+
wankel_encoder * p;
|
141
|
+
yajl_gen_status status;
|
142
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
143
|
+
|
144
|
+
status = yajl_gen_bool(p->g, RTEST(b));
|
145
|
+
yajl_helper_check_gen_status(status);
|
146
|
+
|
147
|
+
wankelSaxEncoder_flush(p);
|
148
|
+
|
149
|
+
return Qnil;
|
150
|
+
}
|
151
|
+
|
152
|
+
static VALUE wankelSaxEncoder_map_open(VALUE self) {
|
153
|
+
wankel_encoder * p;
|
154
|
+
yajl_gen_status status;
|
155
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
156
|
+
|
157
|
+
status = yajl_gen_map_open(p->g);
|
158
|
+
yajl_helper_check_gen_status(status);
|
159
|
+
|
160
|
+
wankelSaxEncoder_flush(p);
|
161
|
+
|
162
|
+
return Qnil;
|
163
|
+
}
|
164
|
+
|
165
|
+
static VALUE wankelSaxEncoder_map_close(VALUE self) {
|
166
|
+
wankel_encoder * p;
|
167
|
+
yajl_gen_status status;
|
168
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
169
|
+
|
170
|
+
status = yajl_gen_map_close(p->g);
|
171
|
+
yajl_helper_check_gen_status(status);
|
172
|
+
|
173
|
+
wankelSaxEncoder_flush(p);
|
174
|
+
|
175
|
+
return Qnil;
|
176
|
+
}
|
177
|
+
|
178
|
+
static VALUE wankelSaxEncoder_array_open(VALUE self) {
|
179
|
+
wankel_encoder * p;
|
180
|
+
yajl_gen_status status;
|
181
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
182
|
+
|
183
|
+
status = yajl_gen_array_open(p->g);
|
184
|
+
yajl_helper_check_gen_status(status);
|
185
|
+
|
186
|
+
wankelSaxEncoder_flush(p);
|
187
|
+
|
188
|
+
return Qnil;
|
189
|
+
}
|
190
|
+
|
191
|
+
static VALUE wankelSaxEncoder_array_close(VALUE self) {
|
192
|
+
wankel_encoder * p;
|
193
|
+
yajl_gen_status status;
|
194
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
195
|
+
|
196
|
+
status = yajl_gen_array_close(p->g);
|
197
|
+
yajl_helper_check_gen_status(status);
|
198
|
+
|
199
|
+
wankelSaxEncoder_flush(p);
|
200
|
+
|
201
|
+
return Qnil;
|
202
|
+
}
|
203
|
+
|
204
|
+
static VALUE wankelSaxEncoder_complete(VALUE self) {
|
205
|
+
size_t len;
|
206
|
+
VALUE rbBuffer;
|
207
|
+
wankel_encoder * p;
|
208
|
+
yajl_gen_status status;
|
209
|
+
const unsigned char * buffer;
|
210
|
+
Data_Get_Struct(self, wankel_encoder, p);
|
211
|
+
|
212
|
+
status = yajl_gen_get_buf(p->g, &buffer, &len);
|
213
|
+
yajl_helper_check_gen_status(status);
|
214
|
+
|
215
|
+
rbBuffer = rb_str_new((const char *)buffer, len);
|
216
|
+
rb_enc_associate(rbBuffer, rb_utf8_encoding());
|
217
|
+
rb_io_write(p->output, rbBuffer);
|
218
|
+
yajl_gen_clear(p->g);
|
219
|
+
|
220
|
+
return Qnil;
|
221
|
+
}
|
222
|
+
|
223
|
+
void wankelSaxEncoder_flush(wankel_encoder * p) {
|
224
|
+
VALUE rbBuffer;
|
225
|
+
yajl_gen_status status;
|
226
|
+
const unsigned char * buffer;
|
227
|
+
size_t len;
|
228
|
+
|
229
|
+
status = yajl_gen_get_buf(p->g, &buffer, &len);
|
230
|
+
yajl_helper_check_gen_status(status);
|
231
|
+
|
232
|
+
if (len >= (size_t)p->write_buffer_size) {
|
233
|
+
rbBuffer = rb_str_new((const char *)buffer, len);
|
234
|
+
rb_enc_associate(rbBuffer, rb_utf8_encoding());
|
235
|
+
rb_io_write(p->output, rbBuffer);
|
236
|
+
yajl_gen_clear(p->g);
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
void Init_wankel_sax_encoder() {
|
241
|
+
c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
|
242
|
+
c_wankelSaxEncoder = rb_define_class_under(c_wankel, "SaxEncoder", rb_cObject);
|
243
|
+
e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
|
244
|
+
|
245
|
+
rb_define_alloc_func(c_wankelSaxEncoder, wankel_sax_encoder_alloc);
|
246
|
+
rb_define_method(c_wankelSaxEncoder, "initialize", wankelSaxEncoder_initialize, -1);
|
247
|
+
rb_define_method(c_wankelSaxEncoder, "number", wankelSaxEncoder_number, 1);
|
248
|
+
rb_define_method(c_wankelSaxEncoder, "string", wankelSaxEncoder_string, 1);
|
249
|
+
rb_define_method(c_wankelSaxEncoder, "null", wankelSaxEncoder_null, 0);
|
250
|
+
rb_define_method(c_wankelSaxEncoder, "bool", wankelSaxEncoder_bool, 1);
|
251
|
+
rb_define_method(c_wankelSaxEncoder, "map_open", wankelSaxEncoder_map_open, 0);
|
252
|
+
rb_define_method(c_wankelSaxEncoder, "map_close", wankelSaxEncoder_map_close, 0);
|
253
|
+
rb_define_method(c_wankelSaxEncoder, "array_open", wankelSaxEncoder_array_open, 0);
|
254
|
+
rb_define_method(c_wankelSaxEncoder, "array_close", wankelSaxEncoder_array_close, 0);
|
255
|
+
rb_define_method(c_wankelSaxEncoder, "complete", wankelSaxEncoder_complete, 0);
|
256
|
+
|
257
|
+
|
258
|
+
intern_to_s = rb_intern("to_s");
|
259
|
+
intern_io_write = rb_intern("write");
|
260
|
+
intern_to_json = rb_intern("to_json");
|
261
|
+
intern_keys = rb_intern("keys");
|
262
|
+
intern_clone = rb_intern("clone");
|
263
|
+
intern_merge = rb_intern("merge");
|
264
|
+
intern_DEFAULTS = rb_intern("DEFAULTS");
|
265
|
+
sym_beautify = ID2SYM(rb_intern("beautify"));
|
266
|
+
sym_indent_string = ID2SYM(rb_intern("indent_string"));
|
267
|
+
sym_validate_utf8 = ID2SYM(rb_intern("validate_utf8"));
|
268
|
+
sym_escape_solidus = ID2SYM(rb_intern("escape_solidus"));
|
269
|
+
}
|
270
|
+
|
271
|
+
// Ruby GC ===================================================================
|
272
|
+
static VALUE wankel_sax_encoder_alloc(VALUE klass) {
|
273
|
+
VALUE self;
|
274
|
+
wankel_encoder * p;
|
275
|
+
self = Data_Make_Struct(klass, wankel_encoder, wankel_sax_encoder_mark, wankel_sax_encoder_free, p);
|
276
|
+
p->g = 0;
|
277
|
+
return self;
|
278
|
+
}
|
279
|
+
|
280
|
+
static void wankel_sax_encoder_free(void * handle) {
|
281
|
+
wankel_encoder * p = handle;
|
282
|
+
if (p->g){
|
283
|
+
yajl_gen_free(p->g);
|
284
|
+
}
|
285
|
+
}
|
286
|
+
|
287
|
+
static void wankel_sax_encoder_mark(void * handle) {
|
288
|
+
wankel_encoder * p = handle;
|
289
|
+
rb_gc_mark(p->output);
|
290
|
+
}
|
@@ -0,0 +1,232 @@
|
|
1
|
+
#include "wankel_sax_parser.h"
|
2
|
+
|
3
|
+
static VALUE sax_parser_initialize(int argc, VALUE * argv, VALUE self);
|
4
|
+
|
5
|
+
static ID sym_read_buffer_size, sym_symbolize_keys;
|
6
|
+
|
7
|
+
static ID intern_io_read, intern_merge, intern_clone, intern_DEFAULTS;
|
8
|
+
|
9
|
+
static ID intern_on_null, intern_on_boolean, intern_on_integer,
|
10
|
+
intern_on_double, intern_on_string, intern_on_map_start, intern_on_map_key,
|
11
|
+
intern_on_map_end, intern_on_array_start, intern_on_array_end;
|
12
|
+
|
13
|
+
static VALUE c_wankel, c_wankelParser, c_saxParser, e_parseError, e_encodeError;
|
14
|
+
|
15
|
+
// Callbacks =================================================================
|
16
|
+
yajl_callbacks sax_parser_callbacks(VALUE self);
|
17
|
+
int sax_parser_callback_on_null(void *ctx);
|
18
|
+
int sax_parser_callback_on_boolean(void *ctx, int boolVal);
|
19
|
+
int sax_parser_callback_on_number(void *ctx, const char * numberVal, size_t numberLen);
|
20
|
+
int sax_parser_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen);
|
21
|
+
int sax_parser_callback_on_map_start(void *ctx);
|
22
|
+
int sax_parser_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen);
|
23
|
+
int sax_parser_callback_on_map_end(void *ctx);
|
24
|
+
int sax_parser_callback_on_array_start(void *ctx);
|
25
|
+
int sax_parser_callback_on_array_end(void *ctx);
|
26
|
+
|
27
|
+
// Ruby GC ===================================================================
|
28
|
+
VALUE sax_parser_alloc(VALUE);
|
29
|
+
//void sax_parser_mark(void *);
|
30
|
+
void sax_parser_free(void * parser);
|
31
|
+
|
32
|
+
VALUE sax_parser_initialize(int argc, VALUE * argv, VALUE self) {
|
33
|
+
VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
|
34
|
+
VALUE klass = rb_funcall(self, rb_intern("class"), 0);
|
35
|
+
VALUE options, rbufsize;
|
36
|
+
sax_parser * p;
|
37
|
+
|
38
|
+
rb_scan_args(argc, argv, "01", &options);
|
39
|
+
if (rb_const_defined(klass, intern_DEFAULTS)) {
|
40
|
+
defaults = rb_funcall(defaults, intern_merge, 1, rb_const_get(klass, intern_DEFAULTS));
|
41
|
+
}
|
42
|
+
if(options == Qnil) {
|
43
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
|
44
|
+
} else {
|
45
|
+
Check_Type(options, T_HASH);
|
46
|
+
rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
|
47
|
+
}
|
48
|
+
options = rb_iv_get(self, "@options");
|
49
|
+
|
50
|
+
Data_Get_Struct(self, sax_parser, p);
|
51
|
+
p->callbacks = sax_parser_callbacks(self);
|
52
|
+
p->alloc_funcs.malloc = yajl_helper_malloc;
|
53
|
+
p->alloc_funcs.realloc = yajl_helper_realloc;
|
54
|
+
p->alloc_funcs.free = yajl_helper_free;
|
55
|
+
p->h = yajl_alloc(&p->callbacks, &p->alloc_funcs, (void *)self);
|
56
|
+
|
57
|
+
yajl_configure(p->h, options);
|
58
|
+
|
59
|
+
rbufsize = rb_hash_aref(options, sym_read_buffer_size);
|
60
|
+
Check_Type(rbufsize, T_FIXNUM);
|
61
|
+
p->rbufsize = rbufsize;
|
62
|
+
|
63
|
+
if(rb_hash_aref(options, sym_symbolize_keys) == Qtrue) {
|
64
|
+
p->symbolize_keys = 1;
|
65
|
+
} else {
|
66
|
+
p->symbolize_keys = 0;
|
67
|
+
}
|
68
|
+
|
69
|
+
return self;
|
70
|
+
}
|
71
|
+
|
72
|
+
static VALUE sax_parser_parse(int argc, VALUE * argv, VALUE self) {
|
73
|
+
const char * cptr;
|
74
|
+
size_t len;
|
75
|
+
yajl_status status;
|
76
|
+
sax_parser * p;
|
77
|
+
VALUE input;
|
78
|
+
Data_Get_Struct(self, sax_parser, p);
|
79
|
+
|
80
|
+
rb_scan_args(argc, argv, "10", &input);
|
81
|
+
if (TYPE(input) == T_STRING) {
|
82
|
+
cptr = RSTRING_PTR(input);
|
83
|
+
len = RSTRING_LEN(input);
|
84
|
+
status = yajl_parse(p->h, (const unsigned char*)cptr, len);
|
85
|
+
yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
|
86
|
+
} else if (rb_respond_to(input, intern_io_read)) {
|
87
|
+
VALUE chunk = rb_str_new(0, FIX2LONG(p->rbufsize));
|
88
|
+
while (rb_funcall(input, intern_io_read, 2, p->rbufsize, chunk) != Qnil) {
|
89
|
+
cptr = RSTRING_PTR(chunk);
|
90
|
+
len = RSTRING_LEN(chunk);
|
91
|
+
status = yajl_parse(p->h, (const unsigned char*)cptr, len);
|
92
|
+
yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
|
93
|
+
}
|
94
|
+
} else {
|
95
|
+
rb_raise(e_parseError, "input must be a string or an IO");
|
96
|
+
}
|
97
|
+
|
98
|
+
status = yajl_complete_parse(p->h);
|
99
|
+
yajl_helper_check_status(p->h, status, 0, NULL, 0);
|
100
|
+
|
101
|
+
return Qnil;
|
102
|
+
}
|
103
|
+
|
104
|
+
void Init_wankel_sax_parser() {
|
105
|
+
c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
|
106
|
+
c_wankelParser = rb_const_get(c_wankel, rb_intern("Parser"));
|
107
|
+
c_saxParser = rb_define_class_under(c_wankel, "SaxParser", rb_cObject);
|
108
|
+
e_parseError = rb_const_get(c_wankel, rb_intern("ParseError"));
|
109
|
+
e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
|
110
|
+
|
111
|
+
rb_define_alloc_func(c_saxParser, sax_parser_alloc);
|
112
|
+
rb_define_method(c_saxParser, "initialize", sax_parser_initialize, -1);
|
113
|
+
rb_define_method(c_saxParser, "parse", sax_parser_parse, -1);
|
114
|
+
|
115
|
+
intern_merge = rb_intern("merge");
|
116
|
+
intern_clone = rb_intern("clone");
|
117
|
+
intern_DEFAULTS = rb_intern("DEFAULTS");
|
118
|
+
|
119
|
+
sym_read_buffer_size = ID2SYM(rb_intern("read_buffer_size"));
|
120
|
+
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
|
121
|
+
|
122
|
+
intern_on_null = rb_intern("on_null");
|
123
|
+
intern_on_boolean = rb_intern("on_boolean");
|
124
|
+
intern_on_integer = rb_intern("on_integer");
|
125
|
+
intern_on_double = rb_intern("on_double");
|
126
|
+
intern_on_string = rb_intern("on_string");
|
127
|
+
intern_on_map_start = rb_intern("on_map_start");
|
128
|
+
intern_on_map_key = rb_intern("on_map_key");
|
129
|
+
intern_on_map_end = rb_intern("on_map_end");
|
130
|
+
intern_on_array_start = rb_intern("on_array_start");
|
131
|
+
intern_on_array_end = rb_intern("on_array_end");
|
132
|
+
}
|
133
|
+
|
134
|
+
// Callbacks =================================================================
|
135
|
+
yajl_callbacks sax_parser_callbacks(VALUE self) {
|
136
|
+
yajl_callbacks callbacks = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
|
137
|
+
|
138
|
+
if(rb_respond_to(self, intern_on_null)) {
|
139
|
+
callbacks.yajl_null = sax_parser_callback_on_null;
|
140
|
+
}
|
141
|
+
if(rb_respond_to(self, intern_on_boolean)) {
|
142
|
+
callbacks.yajl_boolean = sax_parser_callback_on_boolean;
|
143
|
+
}
|
144
|
+
if(rb_respond_to(self, intern_on_integer) || rb_respond_to(self, intern_on_double)) {
|
145
|
+
callbacks.yajl_number = sax_parser_callback_on_number;
|
146
|
+
}
|
147
|
+
if(rb_respond_to(self, intern_on_string)) {
|
148
|
+
callbacks.yajl_string = sax_parser_callback_on_string;
|
149
|
+
}
|
150
|
+
|
151
|
+
if(rb_respond_to(self, intern_on_map_start)) {
|
152
|
+
callbacks.yajl_start_map = sax_parser_callback_on_map_start;
|
153
|
+
}
|
154
|
+
if(rb_respond_to(self, intern_on_map_key)) {
|
155
|
+
callbacks.yajl_map_key = sax_parser_callback_on_map_key;
|
156
|
+
}
|
157
|
+
if(rb_respond_to(self, intern_on_map_end)) {
|
158
|
+
callbacks.yajl_end_map = sax_parser_callback_on_map_end;
|
159
|
+
}
|
160
|
+
if(rb_respond_to(self, intern_on_array_start)) {
|
161
|
+
callbacks.yajl_start_array = sax_parser_callback_on_array_start;
|
162
|
+
}
|
163
|
+
if(rb_respond_to(self, intern_on_array_end)) {
|
164
|
+
callbacks.yajl_end_array = sax_parser_callback_on_array_end;
|
165
|
+
}
|
166
|
+
|
167
|
+
return callbacks;
|
168
|
+
}
|
169
|
+
|
170
|
+
int sax_parser_callback_on_null(void *ctx) {
|
171
|
+
rb_funcall((VALUE)ctx, intern_on_null, 0);
|
172
|
+
return 1;
|
173
|
+
}
|
174
|
+
|
175
|
+
int sax_parser_callback_on_boolean(void *ctx, int boolVal) {
|
176
|
+
rb_funcall((VALUE)ctx, intern_on_boolean, 1, (boolVal ? Qtrue : Qfalse));
|
177
|
+
return 1;
|
178
|
+
}
|
179
|
+
|
180
|
+
int sax_parser_callback_on_number(void *ctx, const char * numberVal, size_t numberLen) {
|
181
|
+
char buf[numberLen+1];
|
182
|
+
buf[numberLen] = 0;
|
183
|
+
memcpy(buf, numberVal, numberLen);
|
184
|
+
VALUE obj = (VALUE)ctx;
|
185
|
+
|
186
|
+
if (memchr(buf, '.', numberLen) || memchr(buf, 'e', numberLen) || memchr(buf, 'E', numberLen)) {
|
187
|
+
if (rb_respond_to(obj, intern_on_double)) {
|
188
|
+
rb_funcall((VALUE)ctx, intern_on_double, 1, rb_float_new(strtod(buf, NULL)));
|
189
|
+
}
|
190
|
+
} else {
|
191
|
+
if (rb_respond_to(obj, intern_on_integer)) {
|
192
|
+
rb_funcall((VALUE)ctx, intern_on_integer, 1, rb_cstr2inum(buf, 10));
|
193
|
+
}
|
194
|
+
}
|
195
|
+
return 1;
|
196
|
+
}
|
197
|
+
|
198
|
+
int sax_parser_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen) {
|
199
|
+
rb_funcall((VALUE)ctx, intern_on_string, 1, rb_str_new((const char *) stringVal, stringLen));
|
200
|
+
return 1;
|
201
|
+
}
|
202
|
+
int sax_parser_callback_on_map_start(void *ctx) {
|
203
|
+
rb_funcall((VALUE)ctx, intern_on_map_start, 0);
|
204
|
+
return 1;
|
205
|
+
}
|
206
|
+
int sax_parser_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen) {
|
207
|
+
rb_funcall((VALUE)ctx, intern_on_map_key, 1, rb_str_new((const char *)key, keyLen));
|
208
|
+
return 1;
|
209
|
+
}
|
210
|
+
int sax_parser_callback_on_map_end(void *ctx) {
|
211
|
+
rb_funcall((VALUE)ctx, intern_on_map_end, 0);
|
212
|
+
return 1;
|
213
|
+
}
|
214
|
+
int sax_parser_callback_on_array_start(void *ctx) {
|
215
|
+
rb_funcall((VALUE)ctx, intern_on_array_start, 0);
|
216
|
+
return 1;
|
217
|
+
}
|
218
|
+
int sax_parser_callback_on_array_end(void *ctx) {
|
219
|
+
rb_funcall((VALUE)ctx, intern_on_array_end, 0);
|
220
|
+
return 1;
|
221
|
+
}
|
222
|
+
|
223
|
+
// Ruby GC ===================================================================
|
224
|
+
VALUE sax_parser_alloc(VALUE klass) {
|
225
|
+
sax_parser * p;
|
226
|
+
return Data_Make_Struct(klass, sax_parser, 0, sax_parser_free, p);
|
227
|
+
}
|
228
|
+
|
229
|
+
void sax_parser_free(void * handle) {
|
230
|
+
sax_parser * p = handle;
|
231
|
+
yajl_free(p->h);
|
232
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#ifndef WANKEL_SAX_PARSER
|
2
|
+
#define WANKEL_SAX_PARSER
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
#include <ruby/encoding.h>
|
6
|
+
#include <yajl/yajl_common.h>
|
7
|
+
#include <yajl/yajl_parse.h>
|
8
|
+
|
9
|
+
#include "wankel.h"
|
10
|
+
#include "yajl_helpers.h"
|
11
|
+
|
12
|
+
void Init_wankel_sax_parser();
|
13
|
+
|
14
|
+
typedef struct {
|
15
|
+
yajl_handle h;
|
16
|
+
yajl_callbacks callbacks;
|
17
|
+
yajl_alloc_funcs alloc_funcs;
|
18
|
+
int symbolize_keys;
|
19
|
+
VALUE rbufsize;
|
20
|
+
} sax_parser;
|
21
|
+
|
22
|
+
|
23
|
+
#endif
|
@@ -0,0 +1,124 @@
|
|
1
|
+
#include "wankel.h"
|
2
|
+
#include "yajl_helpers.h"
|
3
|
+
|
4
|
+
static VALUE sym_allow_comments;
|
5
|
+
static VALUE sym_validate_strings;
|
6
|
+
static VALUE sym_trailing_garbage;
|
7
|
+
static VALUE sym_multiple_values;
|
8
|
+
static VALUE sym_partial_values;
|
9
|
+
static VALUE sym_beautify;
|
10
|
+
static VALUE sym_indent_string;
|
11
|
+
static VALUE sym_validate_utf8;
|
12
|
+
static VALUE sym_escape_solidus;
|
13
|
+
|
14
|
+
void Init_yajl_helpers() {
|
15
|
+
sym_allow_comments = ID2SYM(rb_intern("allow_comments")); rb_gc_register_address(&sym_allow_comments);
|
16
|
+
sym_validate_strings = ID2SYM(rb_intern("validate_strings")); rb_gc_register_address(&sym_validate_strings);
|
17
|
+
sym_trailing_garbage = ID2SYM(rb_intern("trailing_garbage")); rb_gc_register_address(&sym_trailing_garbage);
|
18
|
+
sym_multiple_values = ID2SYM(rb_intern("multiple_values")); rb_gc_register_address(&sym_multiple_values);
|
19
|
+
sym_partial_values = ID2SYM(rb_intern("partial_values")); rb_gc_register_address(&sym_partial_values);
|
20
|
+
sym_beautify = ID2SYM(rb_intern("beautify")); rb_gc_register_address(&sym_beautify);
|
21
|
+
sym_indent_string = ID2SYM(rb_intern("indent_string")); rb_gc_register_address(&sym_indent_string);
|
22
|
+
sym_validate_utf8 = ID2SYM(rb_intern("validate_utf8")); rb_gc_register_address(&sym_validate_utf8);
|
23
|
+
sym_escape_solidus = ID2SYM(rb_intern("escape_solidus")); rb_gc_register_address(&sym_escape_solidus);
|
24
|
+
}
|
25
|
+
|
26
|
+
// Yajl Helpers ==============================================================
|
27
|
+
void yajl_helper_check_status(yajl_handle handle, yajl_status status, int verbose, const unsigned char * jsonText, size_t jsonTextLength) {
|
28
|
+
if(status != yajl_status_ok) {
|
29
|
+
unsigned char * str = yajl_get_error(handle, verbose, jsonText, jsonTextLength);
|
30
|
+
yajl_free_error(handle, str);
|
31
|
+
rb_raise(rb_const_get(rb_const_get(rb_cObject, rb_intern("Wankel")), rb_intern("ParseError")), (const char*) str);
|
32
|
+
}
|
33
|
+
}
|
34
|
+
|
35
|
+
void yajl_helper_check_gen_status(yajl_gen_status status) {
|
36
|
+
VALUE error = rb_const_get(rb_const_get(rb_cObject, rb_intern("Wankel")), rb_intern("EncodeError"));
|
37
|
+
|
38
|
+
if (status == yajl_gen_status_ok || status == yajl_gen_generation_complete) {
|
39
|
+
return;
|
40
|
+
}
|
41
|
+
|
42
|
+
if (status == yajl_gen_keys_must_be_strings) {
|
43
|
+
rb_raise(error, "at a point where a map key is generated, a function other than yajl_gen_string was called");
|
44
|
+
} else if (status == yajl_max_depth_exceeded) {
|
45
|
+
rb_raise(error, "YAJL's maximum generation depth was exceeded. see YAJL_MAX_DEPTH");
|
46
|
+
} else if (status == yajl_gen_in_error_state) {
|
47
|
+
rb_raise(error, "A generator function (yajl_gen_XXX) was called while in an error state");
|
48
|
+
} else if (status == yajl_gen_invalid_number) {
|
49
|
+
rb_raise(error, "yajl_gen_double was passed an invalid floating point value (infinity or NaN).");
|
50
|
+
} else if (status == yajl_gen_no_buf) {
|
51
|
+
rb_raise(error, "A print callback was passed in, so there is no internal buffer to get from");
|
52
|
+
} else if (status == yajl_gen_invalid_string) {
|
53
|
+
rb_raise(error, "returned from yajl_gen_string() when the yajl_gen_validate_utf8 option is enabled and an invalid was passed by client code.");
|
54
|
+
} else {
|
55
|
+
rb_raise(error, "unkown yajl_gen_status error");
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
|
60
|
+
// Memory funcs
|
61
|
+
void * yajl_helper_malloc(void *ctx, size_t size) {
|
62
|
+
return xmalloc(size);
|
63
|
+
}
|
64
|
+
void * yajl_helper_realloc(void *ctx, void *ptr, size_t size) {
|
65
|
+
return xrealloc(ptr, size);
|
66
|
+
}
|
67
|
+
void yajl_helper_free(void *ctx, void *ptr) {
|
68
|
+
return xfree(ptr);
|
69
|
+
}
|
70
|
+
|
71
|
+
// Configure
|
72
|
+
void yajl_configure(yajl_handle handle, VALUE options) {
|
73
|
+
if(rb_hash_aref(options, sym_allow_comments) == Qtrue) {
|
74
|
+
yajl_config(handle, yajl_allow_comments, 1);
|
75
|
+
} else {
|
76
|
+
yajl_config(handle, yajl_allow_comments, 0);
|
77
|
+
}
|
78
|
+
|
79
|
+
if(rb_hash_aref(options, sym_validate_strings) == Qtrue) {
|
80
|
+
yajl_config(handle, yajl_dont_validate_strings, 0);
|
81
|
+
} else {
|
82
|
+
yajl_config(handle, yajl_dont_validate_strings, 1);
|
83
|
+
}
|
84
|
+
|
85
|
+
if(rb_hash_aref(options, sym_trailing_garbage) == Qtrue) {
|
86
|
+
yajl_config(handle, yajl_allow_trailing_garbage, 1);
|
87
|
+
} else {
|
88
|
+
yajl_config(handle, yajl_allow_trailing_garbage, 0);
|
89
|
+
}
|
90
|
+
|
91
|
+
if(rb_hash_aref(options, sym_multiple_values) == Qtrue) {
|
92
|
+
yajl_config(handle, yajl_allow_multiple_values, 1);
|
93
|
+
} else {
|
94
|
+
yajl_config(handle, yajl_allow_multiple_values, 0);
|
95
|
+
}
|
96
|
+
|
97
|
+
if(rb_hash_aref(options, sym_partial_values) == Qtrue) {
|
98
|
+
yajl_config(handle, yajl_allow_partial_values, 1);
|
99
|
+
} else {
|
100
|
+
yajl_config(handle, yajl_allow_partial_values, 0);
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
void yajl_gen_configure(yajl_gen g, VALUE options) {
|
105
|
+
if(rb_hash_aref(options, sym_beautify) == Qtrue) {
|
106
|
+
yajl_gen_config(g, yajl_gen_beautify, 1);
|
107
|
+
} else {
|
108
|
+
yajl_gen_config(g, yajl_gen_beautify, 0);
|
109
|
+
}
|
110
|
+
|
111
|
+
yajl_gen_config(g, yajl_gen_indent_string, RSTRING_PTR(rb_hash_aref(options, sym_indent_string)));
|
112
|
+
|
113
|
+
if(rb_hash_aref(options, sym_validate_utf8) == Qtrue) {
|
114
|
+
yajl_gen_config(g, yajl_gen_validate_utf8, 1);
|
115
|
+
} else {
|
116
|
+
yajl_gen_config(g, yajl_gen_validate_utf8, 0);
|
117
|
+
}
|
118
|
+
|
119
|
+
if(rb_hash_aref(options, sym_escape_solidus ) == Qtrue) {
|
120
|
+
yajl_gen_config(g, yajl_gen_escape_solidus, 1);
|
121
|
+
} else {
|
122
|
+
yajl_gen_config(g, yajl_gen_escape_solidus, 0);
|
123
|
+
}
|
124
|
+
}
|