wankel 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/LICENSE +20 -0
  4. data/README.md +43 -0
  5. data/Rakefile +63 -0
  6. data/benchmark/subjects/item.json +1 -0
  7. data/benchmark/subjects/ohai.json +1216 -0
  8. data/benchmark/subjects/twitter_search.json +1 -0
  9. data/benchmark/subjects/twitter_stream.json +430 -0
  10. data/ext/wankel/extconf.rb +15 -0
  11. data/ext/wankel/wankel.c +50 -0
  12. data/ext/wankel/wankel.h +17 -0
  13. data/ext/wankel/wankel_encoder.c +232 -0
  14. data/ext/wankel/wankel_encoder.h +13 -0
  15. data/ext/wankel/wankel_parser.c +345 -0
  16. data/ext/wankel/wankel_parser.h +26 -0
  17. data/ext/wankel/wankel_sax_encoder.c +290 -0
  18. data/ext/wankel/wankel_sax_encoder.h +13 -0
  19. data/ext/wankel/wankel_sax_parser.c +232 -0
  20. data/ext/wankel/wankel_sax_parser.h +23 -0
  21. data/ext/wankel/yajl_helpers.c +124 -0
  22. data/ext/wankel/yajl_helpers.h +22 -0
  23. data/lib/wankel/ex_sax_parser.rb +75 -0
  24. data/lib/wankel.rb +19 -0
  25. data/logo.png +0 -0
  26. data/test/encoding/encoding_test.rb +230 -0
  27. data/test/encoding/sax_encoder_test.rb +89 -0
  28. data/test/parsing/active_support_test.rb +66 -0
  29. data/test/parsing/fixtures/fail.15.json +1 -0
  30. data/test/parsing/fixtures/fail.16.json +1 -0
  31. data/test/parsing/fixtures/fail.17.json +1 -0
  32. data/test/parsing/fixtures/fail.26.json +1 -0
  33. data/test/parsing/fixtures/fail11.json +1 -0
  34. data/test/parsing/fixtures/fail12.json +1 -0
  35. data/test/parsing/fixtures/fail13.json +1 -0
  36. data/test/parsing/fixtures/fail14.json +1 -0
  37. data/test/parsing/fixtures/fail19.json +1 -0
  38. data/test/parsing/fixtures/fail20.json +1 -0
  39. data/test/parsing/fixtures/fail21.json +1 -0
  40. data/test/parsing/fixtures/fail22.json +1 -0
  41. data/test/parsing/fixtures/fail23.json +1 -0
  42. data/test/parsing/fixtures/fail24.json +1 -0
  43. data/test/parsing/fixtures/fail25.json +1 -0
  44. data/test/parsing/fixtures/fail27.json +2 -0
  45. data/test/parsing/fixtures/fail28.json +2 -0
  46. data/test/parsing/fixtures/fail3.json +1 -0
  47. data/test/parsing/fixtures/fail4.json +1 -0
  48. data/test/parsing/fixtures/fail5.json +1 -0
  49. data/test/parsing/fixtures/fail6.json +1 -0
  50. data/test/parsing/fixtures/fail9.json +1 -0
  51. data/test/parsing/fixtures/pass.array.json +6 -0
  52. data/test/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
  53. data/test/parsing/fixtures/pass.contacts.json +1 -0
  54. data/test/parsing/fixtures/pass.db100.xml.json +1 -0
  55. data/test/parsing/fixtures/pass.db1000.xml.json +1 -0
  56. data/test/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
  57. data/test/parsing/fixtures/pass.deep_arrays.json +1 -0
  58. data/test/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
  59. data/test/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
  60. data/test/parsing/fixtures/pass.doubles.json +1 -0
  61. data/test/parsing/fixtures/pass.empty_array.json +1 -0
  62. data/test/parsing/fixtures/pass.empty_string.json +1 -0
  63. data/test/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
  64. data/test/parsing/fixtures/pass.escaped_foobar.json +1 -0
  65. data/test/parsing/fixtures/pass.item.json +1 -0
  66. data/test/parsing/fixtures/pass.json-org-sample1.json +23 -0
  67. data/test/parsing/fixtures/pass.json-org-sample2.json +11 -0
  68. data/test/parsing/fixtures/pass.json-org-sample3.json +26 -0
  69. data/test/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
  70. data/test/parsing/fixtures/pass.json-org-sample4.json +89 -0
  71. data/test/parsing/fixtures/pass.json-org-sample5.json +27 -0
  72. data/test/parsing/fixtures/pass.map-spain.xml.json +1 -0
  73. data/test/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
  74. data/test/parsing/fixtures/pass.ns-soap.xml.json +1 -0
  75. data/test/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
  76. data/test/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
  77. data/test/parsing/fixtures/pass.numbers-int-4k.json +11 -0
  78. data/test/parsing/fixtures/pass.numbers-int-64k.json +154 -0
  79. data/test/parsing/fixtures/pass.twitter-search.json +1 -0
  80. data/test/parsing/fixtures/pass.twitter-search2.json +1 -0
  81. data/test/parsing/fixtures/pass.unicode.json +3315 -0
  82. data/test/parsing/fixtures/pass.yelp.json +1 -0
  83. data/test/parsing/fixtures/pass1.json +56 -0
  84. data/test/parsing/fixtures/pass2.json +1 -0
  85. data/test/parsing/fixtures/pass3.json +6 -0
  86. data/test/parsing/fixtures_test.rb +43 -0
  87. data/test/parsing/multiple_values_test.rb +100 -0
  88. data/test/parsing/one_off_test.rb +65 -0
  89. data/test/parsing/sax_parser_test.rb +125 -0
  90. data/test/performance.rb +135 -0
  91. data/test/test_helper.rb +36 -0
  92. data/test/wankel_test.rb +53 -0
  93. data/wankel.gemspec +23 -0
  94. metadata +259 -0
@@ -0,0 +1,15 @@
1
+ require 'mkmf'
2
+ require 'rbconfig'
3
+
4
+ $CFLAGS << '-Wall'
5
+
6
+ if ARGV.include?('--coverage')
7
+ $CFLAGS << " -fprofile-arcs -ftest-coverage"
8
+ $DLDFLAGS << ' --coverage'
9
+ end
10
+
11
+ if have_library('yajl')
12
+ create_makefile('wankel/wankel')
13
+ else
14
+ puts "Couldn't find YAJL library"
15
+ end
@@ -0,0 +1,50 @@
1
+ #include "wankel.h"
2
+
3
+ static ID intern_new, intern_parse, intern_encode;
4
+
5
+ static VALUE c_wankel, c_wankelParser, c_wankelEncoder, e_parseError, e_encodeError;
6
+
7
+ // Class Methods =============================================================
8
+ static VALUE wankel_parse(int argc, VALUE * argv, VALUE klass) {
9
+ VALUE parser, input, options, callback;
10
+ rb_scan_args(argc, argv, "11&", &input, &options, &callback);
11
+
12
+ parser = rb_funcall(c_wankelParser, intern_new, 1, options);
13
+ return rb_funcall(parser, intern_parse, 2, input, callback);
14
+ }
15
+
16
+ static VALUE wankel_encode(int argc, VALUE * argv, VALUE klass) {
17
+ VALUE encoder, input, output, options;
18
+ rb_scan_args(argc, argv, "12", &input, &output, &options);
19
+
20
+
21
+ if (TYPE(output) == T_HASH) {
22
+ encoder = rb_funcall(c_wankelEncoder, intern_new, 1, output);
23
+ return rb_funcall(encoder, intern_encode, 1, input);
24
+ } else {
25
+ encoder = rb_funcall(c_wankelEncoder, intern_new, 1, options);
26
+ return rb_funcall(encoder, intern_encode, 2, input, output);
27
+ }
28
+ }
29
+
30
+ void Init_wankel() {
31
+ c_wankel = rb_define_class("Wankel", rb_cObject);
32
+ e_parseError = rb_define_class_under(c_wankel, "ParseError", rb_eStandardError);
33
+ e_encodeError = rb_define_class_under(c_wankel, "EncodeError", rb_eStandardError);
34
+
35
+ intern_new = rb_intern("new");
36
+ intern_parse = rb_intern("parse");
37
+ intern_encode = rb_intern("encode");
38
+
39
+ rb_define_singleton_method(c_wankel, "parse", wankel_parse, -1);
40
+ rb_define_singleton_method(c_wankel, "encode", wankel_encode, -1);
41
+
42
+ rb_define_singleton_method(c_wankel, "load", wankel_parse, -1);
43
+ rb_define_singleton_method(c_wankel, "dump", wankel_encode, -1);
44
+
45
+ c_wankelParser = Init_wankel_parser();
46
+ c_wankelEncoder = Init_wankel_encoder();
47
+ Init_wankel_sax_parser();
48
+ Init_wankel_sax_encoder();
49
+ Init_yajl_helpers();
50
+ }
@@ -0,0 +1,17 @@
1
+ #ifndef WANKEL
2
+ #define WANKEL
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+ #include <yajl/yajl_common.h>
7
+ #include <yajl/yajl_parse.h>
8
+
9
+ #include "wankel_parser.h"
10
+ #include "wankel_encoder.h"
11
+ #include "wankel_sax_parser.h"
12
+ #include "wankel_sax_encoder.h"
13
+ #include "yajl_helpers.h"
14
+
15
+ void Init_wankel();
16
+
17
+ #endif
@@ -0,0 +1,232 @@
1
+ // TODO: i should use the Wankel::SaxEncoder?
2
+ #include "wankel_encoder.h"
3
+
4
+ static VALUE c_wankel, c_wankelEncoder, e_encodeError;
5
+
6
+ static ID intern_to_s, intern_keys, intern_io_write, intern_to_json, intern_clone, intern_merge, intern_DEFAULTS;
7
+
8
+ static ID sym_beautify, sym_indent_string, sym_validate_utf8, sym_escape_solidus;
9
+
10
+
11
+ static void wankelEncoder_flush(yajl_gen g, VALUE io, int write_buffer_size);
12
+ static void yajl_encode_part(yajl_gen g, VALUE obj, VALUE io, int write_buffer_size);
13
+ static void wankelEncoder_flush(yajl_gen g, VALUE io, int write_buffer_size);
14
+
15
+ /*
16
+ * Document-method: new
17
+ *
18
+ * call-seq: new([options])
19
+ *
20
+ * +:beautify+ generate indented (beautiful) output. Default `false`.
21
+ *
22
+ * +:indent_string+ Set an indent string which is used when yajl_gen_beautify
23
+ * is enabled. Maybe something like \\t or some number of
24
+ * spaces. The default is four spaces ' '.
25
+ *
26
+ * +:validate_utf8+ Normally the generator does not validate that strings you
27
+ * pass to it are valid UTF8. Enabling this option will cause
28
+ * it to do so.
29
+ *
30
+ * +:escape_solidus+ the forward solidus (slash or '/' in human) is not required
31
+ * to be escaped in json text. By default, YAJL will not escape
32
+ * it in the iterest of saving bytes. Setting this flag will
33
+ * cause YAJL to always escape '/' in generated JSON strings.
34
+ */
35
+ static VALUE wankelEncoder_initialize(int argc, VALUE * argv, VALUE self) {
36
+ VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
37
+ VALUE options;
38
+
39
+ rb_scan_args(argc, argv, "01", &options);
40
+ if(options == Qnil) {
41
+ rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
42
+ } else {
43
+ Check_Type(options, T_HASH);
44
+ rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
45
+ }
46
+
47
+ return self;
48
+ }
49
+
50
+ /*
51
+ * Document-method: encode
52
+ *
53
+ * call-seq: encode(obj[, io])
54
+ *
55
+ * +obj+ is the Ruby object to encode to JSON
56
+ *
57
+ * +io+ is an optional IO used to stream the encoded JSON string to. If no io
58
+ * is specified the resulting JSON string is returned. If io is specified,
59
+ * this method returns nil
60
+ */
61
+ static VALUE wankelEncoder_encode(int argc, VALUE * argv, VALUE self) {
62
+ VALUE obj, io, options;
63
+ yajl_gen g;
64
+ yajl_alloc_funcs alloc_funcs;
65
+ yajl_gen_status status;
66
+ int write_buffer_size;
67
+ const unsigned char * buffer;
68
+ size_t len;
69
+
70
+ rb_scan_args(argc, argv, "11", &obj, &io);
71
+ options = rb_iv_get(self, "@options");
72
+
73
+ alloc_funcs.malloc = yajl_helper_malloc;
74
+ alloc_funcs.realloc = yajl_helper_realloc;
75
+ alloc_funcs.free = yajl_helper_free;
76
+ g = yajl_gen_alloc(&alloc_funcs);
77
+
78
+ yajl_gen_configure(g, options);
79
+
80
+ if (io != Qnil && !rb_respond_to(io, intern_io_write)) {
81
+ rb_raise(e_encodeError, "output must be a an IO");
82
+ }
83
+
84
+ write_buffer_size = FIX2INT(rb_hash_aref(options, ID2SYM(rb_intern("write_buffer_size"))));
85
+
86
+ yajl_encode_part(g, obj, io, write_buffer_size);
87
+
88
+ // TODO: add terminator here if desired
89
+ if (io == Qnil) {
90
+ status = yajl_gen_get_buf(g, &buffer, &len);
91
+ yajl_helper_check_gen_status(status);
92
+ io = rb_str_new((const char *)buffer, len);
93
+ rb_enc_associate(io, rb_utf8_encoding());
94
+ yajl_gen_clear(g);
95
+ yajl_gen_free(g);
96
+ return io;
97
+ } else {
98
+ wankelEncoder_flush(g, io, 1);
99
+ yajl_gen_free(g);
100
+ return Qnil;
101
+ }
102
+ return self;
103
+ }
104
+
105
+ void wankelEncoder_flush(yajl_gen g, VALUE io, int write_buffer_size) {
106
+ VALUE rbBuffer;
107
+ yajl_gen_status status;
108
+ const unsigned char * buffer;
109
+ size_t len;
110
+
111
+ if (io != Qnil) {
112
+ status = yajl_gen_get_buf(g, &buffer, &len);
113
+ yajl_helper_check_gen_status(status);
114
+
115
+ if (len >= (size_t)write_buffer_size) {
116
+ rbBuffer = rb_str_new((const char *)buffer, len);
117
+ rb_enc_associate(rbBuffer, rb_utf8_encoding());
118
+ rb_io_write(io, rbBuffer);
119
+ yajl_gen_clear(g);
120
+ }
121
+ }
122
+ }
123
+
124
+ void yajl_encode_part(yajl_gen g, VALUE obj, VALUE io, int write_buffer_size) {
125
+ size_t len;
126
+ int idx = 0;
127
+ VALUE keys, entry, str;
128
+ const char * cptr;
129
+ yajl_gen_status status;
130
+
131
+ switch (TYPE(obj)) {
132
+ case T_HASH:
133
+ status = yajl_gen_map_open(g);
134
+ yajl_helper_check_gen_status(status);
135
+
136
+ keys = rb_funcall(obj, intern_keys, 0);
137
+ for(idx = 0; idx < RARRAY_LEN(keys); idx++) {
138
+ entry = rb_ary_entry(keys, idx);
139
+ str = rb_funcall(entry, intern_to_s, 0); /* key must be a string */
140
+ /* the key */
141
+ yajl_encode_part(g, str, io, write_buffer_size);
142
+ /* the value */
143
+ yajl_encode_part(g, rb_hash_aref(obj, entry), io, write_buffer_size);
144
+ }
145
+
146
+ status = yajl_gen_map_close(g);
147
+ yajl_helper_check_gen_status(status);
148
+ break;
149
+ case T_ARRAY:
150
+ status = yajl_gen_array_open(g);
151
+ yajl_helper_check_gen_status(status);
152
+
153
+ for(idx = 0; idx < RARRAY_LEN(obj); idx++) {
154
+ yajl_encode_part(g, rb_ary_entry(obj, idx), io, write_buffer_size);
155
+ }
156
+ status = yajl_gen_array_close(g);
157
+ yajl_helper_check_gen_status(status);
158
+ break;
159
+ case T_NIL:
160
+ status = yajl_gen_null(g);
161
+ yajl_helper_check_gen_status(status);
162
+ break;
163
+ case T_TRUE:
164
+ status = yajl_gen_bool(g, 1);
165
+ yajl_helper_check_gen_status(status);
166
+ break;
167
+ case T_FALSE:
168
+ status = yajl_gen_bool(g, 0);
169
+ yajl_helper_check_gen_status(status);
170
+ break;
171
+ case T_FIXNUM:
172
+ case T_FLOAT:
173
+ case T_BIGNUM:
174
+ str = rb_funcall(obj, intern_to_s, 0);
175
+ cptr = RSTRING_PTR(str);
176
+ len = RSTRING_LEN(str);
177
+ if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) {
178
+ rb_raise(e_encodeError, "'%s' is an invalid number", cptr);
179
+ }
180
+ status = yajl_gen_number(g, cptr, len);
181
+ yajl_helper_check_gen_status(status);
182
+ break;
183
+ case T_STRING:
184
+ cptr = RSTRING_PTR(obj);
185
+ len = RSTRING_LEN(obj);
186
+ status = yajl_gen_string(g, (const unsigned char *)cptr, len);
187
+ yajl_helper_check_gen_status(status);
188
+ break;
189
+ default:
190
+ if (rb_respond_to(obj, intern_to_json)) {
191
+ str = rb_funcall(obj, intern_to_json, 0);
192
+ Check_Type(str, T_STRING);
193
+ cptr = RSTRING_PTR(str);
194
+ len = RSTRING_LEN(str);
195
+ status = yajl_gen_number(g, cptr, len);
196
+ yajl_helper_check_gen_status(status);
197
+ } else {
198
+ str = rb_funcall(obj, intern_to_s, 0);
199
+ Check_Type(str, T_STRING);
200
+ cptr = RSTRING_PTR(str);
201
+ len = RSTRING_LEN(str);
202
+ status = yajl_gen_string(g, (const unsigned char *)cptr, len);
203
+ yajl_helper_check_gen_status(status);
204
+ }
205
+ break;
206
+ }
207
+
208
+ wankelEncoder_flush(g, io, write_buffer_size);
209
+ }
210
+
211
+ ID Init_wankel_encoder() {
212
+ c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
213
+ c_wankelEncoder = rb_define_class_under(c_wankel, "Encoder", rb_cObject);
214
+ e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
215
+
216
+ rb_define_method(c_wankelEncoder, "initialize", wankelEncoder_initialize, -1);
217
+ rb_define_method(c_wankelEncoder, "encode", wankelEncoder_encode, -1);
218
+
219
+ intern_to_s = rb_intern("to_s");
220
+ intern_io_write = rb_intern("write");
221
+ intern_to_json = rb_intern("to_json");
222
+ intern_keys = rb_intern("keys");
223
+ intern_clone = rb_intern("clone");
224
+ intern_merge = rb_intern("merge");
225
+ intern_DEFAULTS = rb_intern("DEFAULTS");
226
+ sym_beautify = ID2SYM(rb_intern("beautify"));
227
+ sym_indent_string = ID2SYM(rb_intern("indent_string"));
228
+ sym_validate_utf8 = ID2SYM(rb_intern("validate_utf8"));
229
+ sym_escape_solidus = ID2SYM(rb_intern("escape_solidus"));
230
+
231
+ return c_wankelEncoder;
232
+ }
@@ -0,0 +1,13 @@
1
+ #ifndef WANKEL_ENCODER
2
+ #define WANKEL_ENCODER
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+ #include <yajl/yajl_common.h>
7
+ #include <yajl/yajl_gen.h>
8
+
9
+ #include "yajl_helpers.h"
10
+
11
+ ID Init_wankel_encoder();
12
+
13
+ #endif
@@ -0,0 +1,345 @@
1
+ #include "wankel_parser.h"
2
+
3
+ // Callbacks =================================================================
4
+ static int wankel_parse_callback_on_null(void *ctx);
5
+ static int wankel_parse_callback_on_boolean(void *ctx, int boolVal);
6
+ // static int wankel_parse_callback_on_integer(void *ctx, long long integerVal);
7
+ // static int wankel_parse_callback_on_double(void *ctx, double doubleVal);
8
+ static int wankel_parse_callback_on_number(void *ctx, const char * numberVal, size_t numberLen);
9
+ static int wankel_parse_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen);
10
+ static int wankel_parse_callback_on_map_start(void *ctx);
11
+ static int wankel_parse_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen);
12
+ static int wankel_parse_callback_on_map_end(void *ctx);
13
+ static int wankel_parse_callback_on_array_start(void *ctx);
14
+ static int wankel_parse_callback_on_array_end(void *ctx);
15
+
16
+ static yajl_callbacks callbacks = {
17
+ wankel_parse_callback_on_null,
18
+ wankel_parse_callback_on_boolean,
19
+ NULL,
20
+ NULL,
21
+ wankel_parse_callback_on_number,
22
+ wankel_parse_callback_on_string,
23
+ wankel_parse_callback_on_map_start,
24
+ wankel_parse_callback_on_map_key,
25
+ wankel_parse_callback_on_map_end,
26
+ wankel_parse_callback_on_array_start,
27
+ wankel_parse_callback_on_array_end
28
+ };
29
+
30
+ // Ruby GC ===================================================================
31
+ static VALUE wankel_alloc(VALUE klass);
32
+ static void wankel_free(void * handle);
33
+ static void wankel_mark(void * handle);
34
+
35
+ static ID intern_io_read, intern_clone, intern_merge, intern_call,
36
+ intern_DEFAULTS, sym_multiple_values;
37
+
38
+ static ID sym_read_buffer_size, sym_symbolize_keys;
39
+
40
+ static VALUE c_wankel, c_wankelParser, e_parseError, e_encodeError;
41
+
42
+ /*
43
+ * Document-method: new
44
+ *
45
+ * call-seq: new([options])
46
+ *
47
+ * +:symbolize_keys+ will turn hash keys into Ruby symbols, defaults to false.
48
+ * Default `false`.
49
+ *
50
+ * +:allow_comments+ will ignore javascript style comments in JSON input.
51
+ * Default `false`.
52
+ *
53
+ * +:validate_strings+ will verify that all strings in JSON input are valid UTF8
54
+ * and will emit a parse error if this is not so. This option
55
+ * makes parsing slightly more expensive (~7% depending on
56
+ * processor and compiler in use). Default `false`.
57
+ *
58
+ * +:allow_trailing_garbage+ will ensure the entire input text was consumed and
59
+ * will raise an error otherwise. Default `false`.
60
+ *
61
+ * +:multiple_values+ allow multiple values to be parsed by a single parser. The
62
+ * entire text must be valid JSON, and values can be seperated
63
+ * by any kind of whitespace. Default `false`.
64
+ *
65
+ * +:allow_partial_values+ check that the top level value was completely consumed/
66
+ * Default `false`.
67
+ *
68
+ *
69
+ * +:read_buffer_size+ is the size of chunk that will be parsed off the input
70
+ * (if it's an IO) for each loop of the parsing process.
71
+ * 8092 is a good balance between the different types of
72
+ * streams (off disk, off a socket, etc...), but this option
73
+ * is here so the caller can better tune their parsing depending
74
+ * on the type of stream being passed. A larger read buffer
75
+ * will perform better for files off disk, where as a smaller
76
+ * size may be more efficient for reading off of a socket
77
+ * directly.
78
+ */
79
+ static VALUE wankelParser_initialize(int argc, VALUE * argv, VALUE self) {
80
+ VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
81
+ VALUE options, rbufsize;
82
+ wankel_parser * p;
83
+
84
+ rb_scan_args(argc, argv, "01", &options);
85
+ if(options == Qnil) {
86
+ rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
87
+ } else {
88
+ Check_Type(options, T_HASH);
89
+ rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
90
+ }
91
+ options = rb_iv_get(self, "@options");
92
+ rbufsize = rb_hash_aref(options, sym_read_buffer_size);
93
+ Check_Type(rbufsize, T_FIXNUM);
94
+
95
+ Data_Get_Struct(self, wankel_parser, p);
96
+ p->alloc_funcs.malloc = yajl_helper_malloc;
97
+ p->alloc_funcs.realloc = yajl_helper_realloc;
98
+ p->alloc_funcs.free = yajl_helper_free;
99
+ p->rbufsize = rbufsize;
100
+
101
+ if(rb_hash_aref(options, sym_symbolize_keys) == Qtrue) {
102
+ p->symbolize_keys = 1;
103
+ } else {
104
+ p->symbolize_keys = 0;
105
+ }
106
+
107
+ return self;
108
+ }
109
+
110
+ /*
111
+ * Document-method: parse
112
+ *
113
+ * call-seq: parse(input[, &block])
114
+ *
115
+ * input can either be a String or and IO Object
116
+ *
117
+ * If a block is passed, it is called when the input is finished parsing. If
118
+ * parsing multiple json values in an input it is called once for each value
119
+ *
120
+ */
121
+ static VALUE wankelParser_parse(int argc, VALUE * argv, VALUE self) {
122
+ const char * cptr;
123
+ unsigned int len;
124
+ yajl_status status;
125
+ wankel_parser * p;
126
+ VALUE input, callback;
127
+ VALUE options = rb_iv_get(self, "@options");
128
+ rb_scan_args(argc, argv, "11", &input, &callback); // Hack, cuz i'm not sure how to call a method with a block from c
129
+
130
+ if(callback == Qnil && rb_block_given_p()) {
131
+ callback = rb_block_proc();
132
+ }
133
+
134
+ Data_Get_Struct(self, wankel_parser, p);
135
+ p->h = yajl_alloc(&callbacks, &p->alloc_funcs, (void *)p);
136
+ yajl_configure(p->h, options);
137
+ p->callback = callback;
138
+ p->stack = rb_ary_new();
139
+ p->stack_index = 0;
140
+ if (TYPE(input) == T_STRING) {
141
+ cptr = RSTRING_PTR(input);
142
+ len = (unsigned int)RSTRING_LEN(input);
143
+ status = yajl_parse(p->h, (const unsigned char*)cptr, len);
144
+ yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
145
+ } else if (rb_respond_to(input, intern_io_read)) {
146
+ VALUE chunk = rb_str_new(0, NUM2LONG(p->rbufsize));
147
+ while (rb_funcall(input, intern_io_read, 2, p->rbufsize, chunk) != Qnil) {
148
+ cptr = RSTRING_PTR(chunk);
149
+ len = (unsigned int)RSTRING_LEN(chunk);
150
+ status = yajl_parse(p->h, (const unsigned char*)cptr, len);
151
+ yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
152
+ }
153
+ } else {
154
+ rb_raise(e_parseError, "input must be a string or an IO");
155
+ }
156
+
157
+ status = yajl_complete_parse(p->h);
158
+ if(status != yajl_status_ok) {
159
+ rb_raise(e_parseError, "Error completing parse");
160
+ }
161
+
162
+ if(rb_block_given_p()) {
163
+ return Qnil;
164
+ } else if(rb_hash_aref(options, sym_multiple_values) == Qtrue) {
165
+ return p->stack;
166
+ } else {
167
+ return rb_ary_pop(p->stack);
168
+ }
169
+ }
170
+
171
+ ID Init_wankel_parser() {
172
+ c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
173
+ c_wankelParser = rb_define_class_under(c_wankel, "Parser", rb_cObject);
174
+ e_parseError = rb_const_get(c_wankel, rb_intern("ParseError"));
175
+ e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
176
+
177
+ rb_define_alloc_func(c_wankelParser, wankel_alloc);
178
+ rb_define_method(c_wankelParser, "initialize", wankelParser_initialize, -1);
179
+ rb_define_method(c_wankelParser, "parse", wankelParser_parse, -1);
180
+
181
+ intern_io_read = rb_intern("read");
182
+ intern_clone = rb_intern("clone");
183
+ intern_merge = rb_intern("merge");
184
+ intern_call = rb_intern("call");
185
+ intern_DEFAULTS = rb_intern("DEFAULTS");
186
+ sym_read_buffer_size = ID2SYM(rb_intern("read_buffer_size"));
187
+ sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
188
+ sym_multiple_values = ID2SYM(rb_intern("multiple_values"));
189
+
190
+ return c_wankelParser;
191
+ }
192
+ // Ruby GC ===================================================================
193
+ static VALUE wankel_alloc(VALUE klass) {
194
+ VALUE self;
195
+ wankel_parser * p;
196
+ self = Data_Make_Struct(klass, wankel_parser, wankel_mark, wankel_free, p);
197
+ p->h = 0;
198
+ return self;
199
+ }
200
+
201
+ static void wankel_free(void * handle) {
202
+ wankel_parser * p = handle;
203
+ if (p->h){
204
+ yajl_free(p->h);
205
+ }
206
+ }
207
+
208
+ static void wankel_mark(void * handle) {
209
+ wankel_parser * p = handle;
210
+ rb_gc_mark(p->stack);
211
+ rb_gc_mark(p->rbufsize);
212
+ }
213
+
214
+ // Parse Builder =============================================================
215
+ static void wankel_builder_push(void *ctx, VALUE val) {
216
+ int len;
217
+ wankel_parser * p = ctx;
218
+ VALUE lastEntry, hash;
219
+
220
+ if (p->stack_index > 0) {
221
+ len = (int)RARRAY_LEN(p->stack);
222
+ lastEntry = rb_ary_entry(p->stack, len-1);
223
+ switch (TYPE(lastEntry)) {
224
+ case T_ARRAY:
225
+ rb_ary_push(lastEntry, val);
226
+ if (TYPE(val) == T_HASH || TYPE(val) == T_ARRAY) {
227
+ rb_ary_push(p->stack, val);
228
+ p->stack_index++;
229
+ }
230
+ break;
231
+ case T_HASH:
232
+ rb_hash_aset(lastEntry, val, Qnil);
233
+ rb_ary_push(p->stack, val);
234
+ p->stack_index++;
235
+ break;
236
+ case T_STRING:
237
+ case T_SYMBOL:
238
+ hash = rb_ary_entry(p->stack, len-2);
239
+ if (TYPE(hash) == T_HASH) {
240
+ rb_hash_aset(hash, lastEntry, val);
241
+ rb_ary_pop(p->stack);
242
+ p->stack_index--;
243
+ if (TYPE(val) == T_HASH || TYPE(val) == T_ARRAY) {
244
+ rb_ary_push(p->stack, val);
245
+ p->stack_index++;
246
+ }
247
+ }
248
+ break;
249
+ }
250
+ } else {
251
+ rb_ary_push(p->stack, val);
252
+ p->stack_index++;
253
+ }
254
+ }
255
+
256
+ static int wankel_parse_callback_on_null(void *ctx) {
257
+ wankel_builder_push(ctx, Qnil);
258
+ return 1;
259
+ }
260
+
261
+ static int wankel_parse_callback_on_boolean(void *ctx, int boolVal) {
262
+ wankel_builder_push(ctx, boolVal ? Qtrue : Qfalse);
263
+ return 1;
264
+ }
265
+
266
+ // static int wankel_parse_callback_on_integer(void *ctx, long long integerVal) {
267
+ // wankel_builder_push(ctx, LL2NUM(integerVal));
268
+ // return 1;
269
+ // }
270
+ // static int wankel_parse_callback_on_double(void *ctx, double doubleVal) {
271
+ // wankel_builder_push(ctx, rb_float_new(doubleVal));
272
+ // return 1;
273
+ // }
274
+ static int wankel_parse_callback_on_number(void * ctx, const char * numberVal, size_t numberLen){
275
+ char buf[numberLen+1];
276
+ buf[numberLen] = 0;
277
+ memcpy(buf, numberVal, numberLen);
278
+
279
+ if (memchr(buf, '.', numberLen) || memchr(buf, 'e', numberLen) || memchr(buf, 'E', numberLen)) {
280
+ wankel_builder_push(ctx, rb_float_new(strtod(buf, NULL)));
281
+ } else {
282
+ wankel_builder_push(ctx, rb_cstr2inum(buf, 10));
283
+ }
284
+ return 1;
285
+ }
286
+ static int wankel_parse_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen) {
287
+ VALUE str = rb_str_new((const char *)stringVal, stringLen);
288
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
289
+ rb_enc_associate(str, rb_utf8_encoding());
290
+ if (default_internal_enc) {
291
+ str = rb_str_export_to_enc(str, default_internal_enc);
292
+ }
293
+
294
+ wankel_builder_push(ctx, str);
295
+ return 1;
296
+ }
297
+ static int wankel_parse_callback_on_map_start(void *ctx) {
298
+ wankel_builder_push(ctx, rb_hash_new());
299
+ return 1;
300
+ }
301
+ static int wankel_parse_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen) {
302
+ wankel_parser * p = ctx;
303
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
304
+ VALUE str = rb_str_new((const char *)key, keyLen);
305
+ rb_enc_associate(str, rb_utf8_encoding());
306
+ if (default_internal_enc) {
307
+ str = rb_str_export_to_enc(str, default_internal_enc);
308
+ }
309
+
310
+ if(p->symbolize_keys) {
311
+ wankel_builder_push(ctx, ID2SYM(rb_to_id(str)) );
312
+ } else {
313
+ wankel_builder_push(ctx, str);
314
+ }
315
+
316
+ return 1;
317
+ }
318
+ static int wankel_parse_callback_on_map_end(void *ctx) {
319
+ wankel_parser * p = ctx;
320
+ p->stack_index--;
321
+
322
+ if(p->stack_index > 0) {
323
+ rb_ary_pop(p->stack);
324
+ } else if(p->stack_index == 0 && p->callback != Qnil) {
325
+ rb_funcall(p->callback, intern_call, 1, rb_ary_pop(p->stack));
326
+ }
327
+
328
+ return 1;
329
+ }
330
+ static int wankel_parse_callback_on_array_start(void *ctx) {
331
+ wankel_builder_push(ctx, rb_ary_new());
332
+ return 1;
333
+ }
334
+ static int wankel_parse_callback_on_array_end(void *ctx) {
335
+ wankel_parser * p = ctx;
336
+ p->stack_index--;
337
+
338
+ if(p->stack_index > 0) {
339
+ rb_ary_pop(p->stack);
340
+ } else if(p->stack_index == 0 && p->callback != Qnil) {
341
+ rb_funcall(p->callback, intern_call, 1, rb_ary_pop(p->stack));
342
+ }
343
+
344
+ return 1;
345
+ }
@@ -0,0 +1,26 @@
1
+ #ifndef WANKEL_PARSER
2
+ #define WANKEL_PARSER
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+ #include <yajl/yajl_common.h>
7
+ #include <yajl/yajl_parse.h>
8
+
9
+ #include "yajl_helpers.h"
10
+
11
+ ID Init_wankel_parser();
12
+
13
+ typedef struct {
14
+ yajl_handle h;
15
+ yajl_alloc_funcs alloc_funcs;
16
+ VALUE stack;
17
+ int stack_index;
18
+ int symbolize_keys;
19
+ VALUE rbufsize;
20
+ VALUE callback;
21
+ VALUE last_entry;
22
+ } wankel_parser;
23
+
24
+
25
+
26
+ #endif