wankel 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/LICENSE +20 -0
  4. data/README.md +43 -0
  5. data/Rakefile +63 -0
  6. data/benchmark/subjects/item.json +1 -0
  7. data/benchmark/subjects/ohai.json +1216 -0
  8. data/benchmark/subjects/twitter_search.json +1 -0
  9. data/benchmark/subjects/twitter_stream.json +430 -0
  10. data/ext/wankel/extconf.rb +15 -0
  11. data/ext/wankel/wankel.c +50 -0
  12. data/ext/wankel/wankel.h +17 -0
  13. data/ext/wankel/wankel_encoder.c +232 -0
  14. data/ext/wankel/wankel_encoder.h +13 -0
  15. data/ext/wankel/wankel_parser.c +345 -0
  16. data/ext/wankel/wankel_parser.h +26 -0
  17. data/ext/wankel/wankel_sax_encoder.c +290 -0
  18. data/ext/wankel/wankel_sax_encoder.h +13 -0
  19. data/ext/wankel/wankel_sax_parser.c +232 -0
  20. data/ext/wankel/wankel_sax_parser.h +23 -0
  21. data/ext/wankel/yajl_helpers.c +124 -0
  22. data/ext/wankel/yajl_helpers.h +22 -0
  23. data/lib/wankel/ex_sax_parser.rb +75 -0
  24. data/lib/wankel.rb +19 -0
  25. data/logo.png +0 -0
  26. data/test/encoding/encoding_test.rb +230 -0
  27. data/test/encoding/sax_encoder_test.rb +89 -0
  28. data/test/parsing/active_support_test.rb +66 -0
  29. data/test/parsing/fixtures/fail.15.json +1 -0
  30. data/test/parsing/fixtures/fail.16.json +1 -0
  31. data/test/parsing/fixtures/fail.17.json +1 -0
  32. data/test/parsing/fixtures/fail.26.json +1 -0
  33. data/test/parsing/fixtures/fail11.json +1 -0
  34. data/test/parsing/fixtures/fail12.json +1 -0
  35. data/test/parsing/fixtures/fail13.json +1 -0
  36. data/test/parsing/fixtures/fail14.json +1 -0
  37. data/test/parsing/fixtures/fail19.json +1 -0
  38. data/test/parsing/fixtures/fail20.json +1 -0
  39. data/test/parsing/fixtures/fail21.json +1 -0
  40. data/test/parsing/fixtures/fail22.json +1 -0
  41. data/test/parsing/fixtures/fail23.json +1 -0
  42. data/test/parsing/fixtures/fail24.json +1 -0
  43. data/test/parsing/fixtures/fail25.json +1 -0
  44. data/test/parsing/fixtures/fail27.json +2 -0
  45. data/test/parsing/fixtures/fail28.json +2 -0
  46. data/test/parsing/fixtures/fail3.json +1 -0
  47. data/test/parsing/fixtures/fail4.json +1 -0
  48. data/test/parsing/fixtures/fail5.json +1 -0
  49. data/test/parsing/fixtures/fail6.json +1 -0
  50. data/test/parsing/fixtures/fail9.json +1 -0
  51. data/test/parsing/fixtures/pass.array.json +6 -0
  52. data/test/parsing/fixtures/pass.codepoints_from_unicode_org.json +1 -0
  53. data/test/parsing/fixtures/pass.contacts.json +1 -0
  54. data/test/parsing/fixtures/pass.db100.xml.json +1 -0
  55. data/test/parsing/fixtures/pass.db1000.xml.json +1 -0
  56. data/test/parsing/fixtures/pass.dc_simple_with_comments.json +11 -0
  57. data/test/parsing/fixtures/pass.deep_arrays.json +1 -0
  58. data/test/parsing/fixtures/pass.difficult_json_c_test_case.json +1 -0
  59. data/test/parsing/fixtures/pass.difficult_json_c_test_case_with_comments.json +1 -0
  60. data/test/parsing/fixtures/pass.doubles.json +1 -0
  61. data/test/parsing/fixtures/pass.empty_array.json +1 -0
  62. data/test/parsing/fixtures/pass.empty_string.json +1 -0
  63. data/test/parsing/fixtures/pass.escaped_bulgarian.json +4 -0
  64. data/test/parsing/fixtures/pass.escaped_foobar.json +1 -0
  65. data/test/parsing/fixtures/pass.item.json +1 -0
  66. data/test/parsing/fixtures/pass.json-org-sample1.json +23 -0
  67. data/test/parsing/fixtures/pass.json-org-sample2.json +11 -0
  68. data/test/parsing/fixtures/pass.json-org-sample3.json +26 -0
  69. data/test/parsing/fixtures/pass.json-org-sample4-nows.json +88 -0
  70. data/test/parsing/fixtures/pass.json-org-sample4.json +89 -0
  71. data/test/parsing/fixtures/pass.json-org-sample5.json +27 -0
  72. data/test/parsing/fixtures/pass.map-spain.xml.json +1 -0
  73. data/test/parsing/fixtures/pass.ns-invoice100.xml.json +1 -0
  74. data/test/parsing/fixtures/pass.ns-soap.xml.json +1 -0
  75. data/test/parsing/fixtures/pass.numbers-fp-4k.json +6 -0
  76. data/test/parsing/fixtures/pass.numbers-fp-64k.json +61 -0
  77. data/test/parsing/fixtures/pass.numbers-int-4k.json +11 -0
  78. data/test/parsing/fixtures/pass.numbers-int-64k.json +154 -0
  79. data/test/parsing/fixtures/pass.twitter-search.json +1 -0
  80. data/test/parsing/fixtures/pass.twitter-search2.json +1 -0
  81. data/test/parsing/fixtures/pass.unicode.json +3315 -0
  82. data/test/parsing/fixtures/pass.yelp.json +1 -0
  83. data/test/parsing/fixtures/pass1.json +56 -0
  84. data/test/parsing/fixtures/pass2.json +1 -0
  85. data/test/parsing/fixtures/pass3.json +6 -0
  86. data/test/parsing/fixtures_test.rb +43 -0
  87. data/test/parsing/multiple_values_test.rb +100 -0
  88. data/test/parsing/one_off_test.rb +65 -0
  89. data/test/parsing/sax_parser_test.rb +125 -0
  90. data/test/performance.rb +135 -0
  91. data/test/test_helper.rb +36 -0
  92. data/test/wankel_test.rb +53 -0
  93. data/wankel.gemspec +23 -0
  94. metadata +259 -0
@@ -0,0 +1,15 @@
1
+ require 'mkmf'
2
+ require 'rbconfig'
3
+
4
+ $CFLAGS << '-Wall'
5
+
6
+ if ARGV.include?('--coverage')
7
+ $CFLAGS << " -fprofile-arcs -ftest-coverage"
8
+ $DLDFLAGS << ' --coverage'
9
+ end
10
+
11
+ if have_library('yajl')
12
+ create_makefile('wankel/wankel')
13
+ else
14
+ puts "Couldn't find YAJL library"
15
+ end
@@ -0,0 +1,50 @@
1
+ #include "wankel.h"
2
+
3
+ static ID intern_new, intern_parse, intern_encode;
4
+
5
+ static VALUE c_wankel, c_wankelParser, c_wankelEncoder, e_parseError, e_encodeError;
6
+
7
+ // Class Methods =============================================================
8
+ static VALUE wankel_parse(int argc, VALUE * argv, VALUE klass) {
9
+ VALUE parser, input, options, callback;
10
+ rb_scan_args(argc, argv, "11&", &input, &options, &callback);
11
+
12
+ parser = rb_funcall(c_wankelParser, intern_new, 1, options);
13
+ return rb_funcall(parser, intern_parse, 2, input, callback);
14
+ }
15
+
16
+ static VALUE wankel_encode(int argc, VALUE * argv, VALUE klass) {
17
+ VALUE encoder, input, output, options;
18
+ rb_scan_args(argc, argv, "12", &input, &output, &options);
19
+
20
+
21
+ if (TYPE(output) == T_HASH) {
22
+ encoder = rb_funcall(c_wankelEncoder, intern_new, 1, output);
23
+ return rb_funcall(encoder, intern_encode, 1, input);
24
+ } else {
25
+ encoder = rb_funcall(c_wankelEncoder, intern_new, 1, options);
26
+ return rb_funcall(encoder, intern_encode, 2, input, output);
27
+ }
28
+ }
29
+
30
+ void Init_wankel() {
31
+ c_wankel = rb_define_class("Wankel", rb_cObject);
32
+ e_parseError = rb_define_class_under(c_wankel, "ParseError", rb_eStandardError);
33
+ e_encodeError = rb_define_class_under(c_wankel, "EncodeError", rb_eStandardError);
34
+
35
+ intern_new = rb_intern("new");
36
+ intern_parse = rb_intern("parse");
37
+ intern_encode = rb_intern("encode");
38
+
39
+ rb_define_singleton_method(c_wankel, "parse", wankel_parse, -1);
40
+ rb_define_singleton_method(c_wankel, "encode", wankel_encode, -1);
41
+
42
+ rb_define_singleton_method(c_wankel, "load", wankel_parse, -1);
43
+ rb_define_singleton_method(c_wankel, "dump", wankel_encode, -1);
44
+
45
+ c_wankelParser = Init_wankel_parser();
46
+ c_wankelEncoder = Init_wankel_encoder();
47
+ Init_wankel_sax_parser();
48
+ Init_wankel_sax_encoder();
49
+ Init_yajl_helpers();
50
+ }
@@ -0,0 +1,17 @@
1
+ #ifndef WANKEL
2
+ #define WANKEL
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+ #include <yajl/yajl_common.h>
7
+ #include <yajl/yajl_parse.h>
8
+
9
+ #include "wankel_parser.h"
10
+ #include "wankel_encoder.h"
11
+ #include "wankel_sax_parser.h"
12
+ #include "wankel_sax_encoder.h"
13
+ #include "yajl_helpers.h"
14
+
15
+ void Init_wankel();
16
+
17
+ #endif
@@ -0,0 +1,232 @@
1
+ // TODO: i should use the Wankel::SaxEncoder?
2
+ #include "wankel_encoder.h"
3
+
4
+ static VALUE c_wankel, c_wankelEncoder, e_encodeError;
5
+
6
+ static ID intern_to_s, intern_keys, intern_io_write, intern_to_json, intern_clone, intern_merge, intern_DEFAULTS;
7
+
8
+ static ID sym_beautify, sym_indent_string, sym_validate_utf8, sym_escape_solidus;
9
+
10
+
11
+ static void wankelEncoder_flush(yajl_gen g, VALUE io, int write_buffer_size);
12
+ static void yajl_encode_part(yajl_gen g, VALUE obj, VALUE io, int write_buffer_size);
13
+ static void wankelEncoder_flush(yajl_gen g, VALUE io, int write_buffer_size);
14
+
15
+ /*
16
+ * Document-method: new
17
+ *
18
+ * call-seq: new([options])
19
+ *
20
+ * +:beautify+ generate indented (beautiful) output. Default `false`.
21
+ *
22
+ * +:indent_string+ Set an indent string which is used when yajl_gen_beautify
23
+ * is enabled. Maybe something like \\t or some number of
24
+ * spaces. The default is four spaces ' '.
25
+ *
26
+ * +:validate_utf8+ Normally the generator does not validate that strings you
27
+ * pass to it are valid UTF8. Enabling this option will cause
28
+ * it to do so.
29
+ *
30
+ * +:escape_solidus+ the forward solidus (slash or '/' in human) is not required
31
+ * to be escaped in json text. By default, YAJL will not escape
32
+ * it in the iterest of saving bytes. Setting this flag will
33
+ * cause YAJL to always escape '/' in generated JSON strings.
34
+ */
35
+ static VALUE wankelEncoder_initialize(int argc, VALUE * argv, VALUE self) {
36
+ VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
37
+ VALUE options;
38
+
39
+ rb_scan_args(argc, argv, "01", &options);
40
+ if(options == Qnil) {
41
+ rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
42
+ } else {
43
+ Check_Type(options, T_HASH);
44
+ rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
45
+ }
46
+
47
+ return self;
48
+ }
49
+
50
+ /*
51
+ * Document-method: encode
52
+ *
53
+ * call-seq: encode(obj[, io])
54
+ *
55
+ * +obj+ is the Ruby object to encode to JSON
56
+ *
57
+ * +io+ is an optional IO used to stream the encoded JSON string to. If no io
58
+ * is specified the resulting JSON string is returned. If io is specified,
59
+ * this method returns nil
60
+ */
61
+ static VALUE wankelEncoder_encode(int argc, VALUE * argv, VALUE self) {
62
+ VALUE obj, io, options;
63
+ yajl_gen g;
64
+ yajl_alloc_funcs alloc_funcs;
65
+ yajl_gen_status status;
66
+ int write_buffer_size;
67
+ const unsigned char * buffer;
68
+ size_t len;
69
+
70
+ rb_scan_args(argc, argv, "11", &obj, &io);
71
+ options = rb_iv_get(self, "@options");
72
+
73
+ alloc_funcs.malloc = yajl_helper_malloc;
74
+ alloc_funcs.realloc = yajl_helper_realloc;
75
+ alloc_funcs.free = yajl_helper_free;
76
+ g = yajl_gen_alloc(&alloc_funcs);
77
+
78
+ yajl_gen_configure(g, options);
79
+
80
+ if (io != Qnil && !rb_respond_to(io, intern_io_write)) {
81
+ rb_raise(e_encodeError, "output must be a an IO");
82
+ }
83
+
84
+ write_buffer_size = FIX2INT(rb_hash_aref(options, ID2SYM(rb_intern("write_buffer_size"))));
85
+
86
+ yajl_encode_part(g, obj, io, write_buffer_size);
87
+
88
+ // TODO: add terminator here if desired
89
+ if (io == Qnil) {
90
+ status = yajl_gen_get_buf(g, &buffer, &len);
91
+ yajl_helper_check_gen_status(status);
92
+ io = rb_str_new((const char *)buffer, len);
93
+ rb_enc_associate(io, rb_utf8_encoding());
94
+ yajl_gen_clear(g);
95
+ yajl_gen_free(g);
96
+ return io;
97
+ } else {
98
+ wankelEncoder_flush(g, io, 1);
99
+ yajl_gen_free(g);
100
+ return Qnil;
101
+ }
102
+ return self;
103
+ }
104
+
105
+ void wankelEncoder_flush(yajl_gen g, VALUE io, int write_buffer_size) {
106
+ VALUE rbBuffer;
107
+ yajl_gen_status status;
108
+ const unsigned char * buffer;
109
+ size_t len;
110
+
111
+ if (io != Qnil) {
112
+ status = yajl_gen_get_buf(g, &buffer, &len);
113
+ yajl_helper_check_gen_status(status);
114
+
115
+ if (len >= (size_t)write_buffer_size) {
116
+ rbBuffer = rb_str_new((const char *)buffer, len);
117
+ rb_enc_associate(rbBuffer, rb_utf8_encoding());
118
+ rb_io_write(io, rbBuffer);
119
+ yajl_gen_clear(g);
120
+ }
121
+ }
122
+ }
123
+
124
+ void yajl_encode_part(yajl_gen g, VALUE obj, VALUE io, int write_buffer_size) {
125
+ size_t len;
126
+ int idx = 0;
127
+ VALUE keys, entry, str;
128
+ const char * cptr;
129
+ yajl_gen_status status;
130
+
131
+ switch (TYPE(obj)) {
132
+ case T_HASH:
133
+ status = yajl_gen_map_open(g);
134
+ yajl_helper_check_gen_status(status);
135
+
136
+ keys = rb_funcall(obj, intern_keys, 0);
137
+ for(idx = 0; idx < RARRAY_LEN(keys); idx++) {
138
+ entry = rb_ary_entry(keys, idx);
139
+ str = rb_funcall(entry, intern_to_s, 0); /* key must be a string */
140
+ /* the key */
141
+ yajl_encode_part(g, str, io, write_buffer_size);
142
+ /* the value */
143
+ yajl_encode_part(g, rb_hash_aref(obj, entry), io, write_buffer_size);
144
+ }
145
+
146
+ status = yajl_gen_map_close(g);
147
+ yajl_helper_check_gen_status(status);
148
+ break;
149
+ case T_ARRAY:
150
+ status = yajl_gen_array_open(g);
151
+ yajl_helper_check_gen_status(status);
152
+
153
+ for(idx = 0; idx < RARRAY_LEN(obj); idx++) {
154
+ yajl_encode_part(g, rb_ary_entry(obj, idx), io, write_buffer_size);
155
+ }
156
+ status = yajl_gen_array_close(g);
157
+ yajl_helper_check_gen_status(status);
158
+ break;
159
+ case T_NIL:
160
+ status = yajl_gen_null(g);
161
+ yajl_helper_check_gen_status(status);
162
+ break;
163
+ case T_TRUE:
164
+ status = yajl_gen_bool(g, 1);
165
+ yajl_helper_check_gen_status(status);
166
+ break;
167
+ case T_FALSE:
168
+ status = yajl_gen_bool(g, 0);
169
+ yajl_helper_check_gen_status(status);
170
+ break;
171
+ case T_FIXNUM:
172
+ case T_FLOAT:
173
+ case T_BIGNUM:
174
+ str = rb_funcall(obj, intern_to_s, 0);
175
+ cptr = RSTRING_PTR(str);
176
+ len = RSTRING_LEN(str);
177
+ if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) {
178
+ rb_raise(e_encodeError, "'%s' is an invalid number", cptr);
179
+ }
180
+ status = yajl_gen_number(g, cptr, len);
181
+ yajl_helper_check_gen_status(status);
182
+ break;
183
+ case T_STRING:
184
+ cptr = RSTRING_PTR(obj);
185
+ len = RSTRING_LEN(obj);
186
+ status = yajl_gen_string(g, (const unsigned char *)cptr, len);
187
+ yajl_helper_check_gen_status(status);
188
+ break;
189
+ default:
190
+ if (rb_respond_to(obj, intern_to_json)) {
191
+ str = rb_funcall(obj, intern_to_json, 0);
192
+ Check_Type(str, T_STRING);
193
+ cptr = RSTRING_PTR(str);
194
+ len = RSTRING_LEN(str);
195
+ status = yajl_gen_number(g, cptr, len);
196
+ yajl_helper_check_gen_status(status);
197
+ } else {
198
+ str = rb_funcall(obj, intern_to_s, 0);
199
+ Check_Type(str, T_STRING);
200
+ cptr = RSTRING_PTR(str);
201
+ len = RSTRING_LEN(str);
202
+ status = yajl_gen_string(g, (const unsigned char *)cptr, len);
203
+ yajl_helper_check_gen_status(status);
204
+ }
205
+ break;
206
+ }
207
+
208
+ wankelEncoder_flush(g, io, write_buffer_size);
209
+ }
210
+
211
+ ID Init_wankel_encoder() {
212
+ c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
213
+ c_wankelEncoder = rb_define_class_under(c_wankel, "Encoder", rb_cObject);
214
+ e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
215
+
216
+ rb_define_method(c_wankelEncoder, "initialize", wankelEncoder_initialize, -1);
217
+ rb_define_method(c_wankelEncoder, "encode", wankelEncoder_encode, -1);
218
+
219
+ intern_to_s = rb_intern("to_s");
220
+ intern_io_write = rb_intern("write");
221
+ intern_to_json = rb_intern("to_json");
222
+ intern_keys = rb_intern("keys");
223
+ intern_clone = rb_intern("clone");
224
+ intern_merge = rb_intern("merge");
225
+ intern_DEFAULTS = rb_intern("DEFAULTS");
226
+ sym_beautify = ID2SYM(rb_intern("beautify"));
227
+ sym_indent_string = ID2SYM(rb_intern("indent_string"));
228
+ sym_validate_utf8 = ID2SYM(rb_intern("validate_utf8"));
229
+ sym_escape_solidus = ID2SYM(rb_intern("escape_solidus"));
230
+
231
+ return c_wankelEncoder;
232
+ }
@@ -0,0 +1,13 @@
1
+ #ifndef WANKEL_ENCODER
2
+ #define WANKEL_ENCODER
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+ #include <yajl/yajl_common.h>
7
+ #include <yajl/yajl_gen.h>
8
+
9
+ #include "yajl_helpers.h"
10
+
11
+ ID Init_wankel_encoder();
12
+
13
+ #endif
@@ -0,0 +1,345 @@
1
+ #include "wankel_parser.h"
2
+
3
+ // Callbacks =================================================================
4
+ static int wankel_parse_callback_on_null(void *ctx);
5
+ static int wankel_parse_callback_on_boolean(void *ctx, int boolVal);
6
+ // static int wankel_parse_callback_on_integer(void *ctx, long long integerVal);
7
+ // static int wankel_parse_callback_on_double(void *ctx, double doubleVal);
8
+ static int wankel_parse_callback_on_number(void *ctx, const char * numberVal, size_t numberLen);
9
+ static int wankel_parse_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen);
10
+ static int wankel_parse_callback_on_map_start(void *ctx);
11
+ static int wankel_parse_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen);
12
+ static int wankel_parse_callback_on_map_end(void *ctx);
13
+ static int wankel_parse_callback_on_array_start(void *ctx);
14
+ static int wankel_parse_callback_on_array_end(void *ctx);
15
+
16
+ static yajl_callbacks callbacks = {
17
+ wankel_parse_callback_on_null,
18
+ wankel_parse_callback_on_boolean,
19
+ NULL,
20
+ NULL,
21
+ wankel_parse_callback_on_number,
22
+ wankel_parse_callback_on_string,
23
+ wankel_parse_callback_on_map_start,
24
+ wankel_parse_callback_on_map_key,
25
+ wankel_parse_callback_on_map_end,
26
+ wankel_parse_callback_on_array_start,
27
+ wankel_parse_callback_on_array_end
28
+ };
29
+
30
+ // Ruby GC ===================================================================
31
+ static VALUE wankel_alloc(VALUE klass);
32
+ static void wankel_free(void * handle);
33
+ static void wankel_mark(void * handle);
34
+
35
+ static ID intern_io_read, intern_clone, intern_merge, intern_call,
36
+ intern_DEFAULTS, sym_multiple_values;
37
+
38
+ static ID sym_read_buffer_size, sym_symbolize_keys;
39
+
40
+ static VALUE c_wankel, c_wankelParser, e_parseError, e_encodeError;
41
+
42
+ /*
43
+ * Document-method: new
44
+ *
45
+ * call-seq: new([options])
46
+ *
47
+ * +:symbolize_keys+ will turn hash keys into Ruby symbols, defaults to false.
48
+ * Default `false`.
49
+ *
50
+ * +:allow_comments+ will ignore javascript style comments in JSON input.
51
+ * Default `false`.
52
+ *
53
+ * +:validate_strings+ will verify that all strings in JSON input are valid UTF8
54
+ * and will emit a parse error if this is not so. This option
55
+ * makes parsing slightly more expensive (~7% depending on
56
+ * processor and compiler in use). Default `false`.
57
+ *
58
+ * +:allow_trailing_garbage+ will ensure the entire input text was consumed and
59
+ * will raise an error otherwise. Default `false`.
60
+ *
61
+ * +:multiple_values+ allow multiple values to be parsed by a single parser. The
62
+ * entire text must be valid JSON, and values can be seperated
63
+ * by any kind of whitespace. Default `false`.
64
+ *
65
+ * +:allow_partial_values+ check that the top level value was completely consumed/
66
+ * Default `false`.
67
+ *
68
+ *
69
+ * +:read_buffer_size+ is the size of chunk that will be parsed off the input
70
+ * (if it's an IO) for each loop of the parsing process.
71
+ * 8092 is a good balance between the different types of
72
+ * streams (off disk, off a socket, etc...), but this option
73
+ * is here so the caller can better tune their parsing depending
74
+ * on the type of stream being passed. A larger read buffer
75
+ * will perform better for files off disk, where as a smaller
76
+ * size may be more efficient for reading off of a socket
77
+ * directly.
78
+ */
79
+ static VALUE wankelParser_initialize(int argc, VALUE * argv, VALUE self) {
80
+ VALUE defaults = rb_const_get(c_wankel, intern_DEFAULTS);
81
+ VALUE options, rbufsize;
82
+ wankel_parser * p;
83
+
84
+ rb_scan_args(argc, argv, "01", &options);
85
+ if(options == Qnil) {
86
+ rb_iv_set(self, "@options", rb_funcall(defaults, intern_clone, 0) );
87
+ } else {
88
+ Check_Type(options, T_HASH);
89
+ rb_iv_set(self, "@options", rb_funcall(defaults, intern_merge, 1, options) );
90
+ }
91
+ options = rb_iv_get(self, "@options");
92
+ rbufsize = rb_hash_aref(options, sym_read_buffer_size);
93
+ Check_Type(rbufsize, T_FIXNUM);
94
+
95
+ Data_Get_Struct(self, wankel_parser, p);
96
+ p->alloc_funcs.malloc = yajl_helper_malloc;
97
+ p->alloc_funcs.realloc = yajl_helper_realloc;
98
+ p->alloc_funcs.free = yajl_helper_free;
99
+ p->rbufsize = rbufsize;
100
+
101
+ if(rb_hash_aref(options, sym_symbolize_keys) == Qtrue) {
102
+ p->symbolize_keys = 1;
103
+ } else {
104
+ p->symbolize_keys = 0;
105
+ }
106
+
107
+ return self;
108
+ }
109
+
110
+ /*
111
+ * Document-method: parse
112
+ *
113
+ * call-seq: parse(input[, &block])
114
+ *
115
+ * input can either be a String or and IO Object
116
+ *
117
+ * If a block is passed, it is called when the input is finished parsing. If
118
+ * parsing multiple json values in an input it is called once for each value
119
+ *
120
+ */
121
+ static VALUE wankelParser_parse(int argc, VALUE * argv, VALUE self) {
122
+ const char * cptr;
123
+ unsigned int len;
124
+ yajl_status status;
125
+ wankel_parser * p;
126
+ VALUE input, callback;
127
+ VALUE options = rb_iv_get(self, "@options");
128
+ rb_scan_args(argc, argv, "11", &input, &callback); // Hack, cuz i'm not sure how to call a method with a block from c
129
+
130
+ if(callback == Qnil && rb_block_given_p()) {
131
+ callback = rb_block_proc();
132
+ }
133
+
134
+ Data_Get_Struct(self, wankel_parser, p);
135
+ p->h = yajl_alloc(&callbacks, &p->alloc_funcs, (void *)p);
136
+ yajl_configure(p->h, options);
137
+ p->callback = callback;
138
+ p->stack = rb_ary_new();
139
+ p->stack_index = 0;
140
+ if (TYPE(input) == T_STRING) {
141
+ cptr = RSTRING_PTR(input);
142
+ len = (unsigned int)RSTRING_LEN(input);
143
+ status = yajl_parse(p->h, (const unsigned char*)cptr, len);
144
+ yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
145
+ } else if (rb_respond_to(input, intern_io_read)) {
146
+ VALUE chunk = rb_str_new(0, NUM2LONG(p->rbufsize));
147
+ while (rb_funcall(input, intern_io_read, 2, p->rbufsize, chunk) != Qnil) {
148
+ cptr = RSTRING_PTR(chunk);
149
+ len = (unsigned int)RSTRING_LEN(chunk);
150
+ status = yajl_parse(p->h, (const unsigned char*)cptr, len);
151
+ yajl_helper_check_status(p->h, status, 1, (const unsigned char*)cptr, len);
152
+ }
153
+ } else {
154
+ rb_raise(e_parseError, "input must be a string or an IO");
155
+ }
156
+
157
+ status = yajl_complete_parse(p->h);
158
+ if(status != yajl_status_ok) {
159
+ rb_raise(e_parseError, "Error completing parse");
160
+ }
161
+
162
+ if(rb_block_given_p()) {
163
+ return Qnil;
164
+ } else if(rb_hash_aref(options, sym_multiple_values) == Qtrue) {
165
+ return p->stack;
166
+ } else {
167
+ return rb_ary_pop(p->stack);
168
+ }
169
+ }
170
+
171
+ ID Init_wankel_parser() {
172
+ c_wankel = rb_const_get(rb_cObject, rb_intern("Wankel"));
173
+ c_wankelParser = rb_define_class_under(c_wankel, "Parser", rb_cObject);
174
+ e_parseError = rb_const_get(c_wankel, rb_intern("ParseError"));
175
+ e_encodeError = rb_const_get(c_wankel, rb_intern("EncodeError"));
176
+
177
+ rb_define_alloc_func(c_wankelParser, wankel_alloc);
178
+ rb_define_method(c_wankelParser, "initialize", wankelParser_initialize, -1);
179
+ rb_define_method(c_wankelParser, "parse", wankelParser_parse, -1);
180
+
181
+ intern_io_read = rb_intern("read");
182
+ intern_clone = rb_intern("clone");
183
+ intern_merge = rb_intern("merge");
184
+ intern_call = rb_intern("call");
185
+ intern_DEFAULTS = rb_intern("DEFAULTS");
186
+ sym_read_buffer_size = ID2SYM(rb_intern("read_buffer_size"));
187
+ sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
188
+ sym_multiple_values = ID2SYM(rb_intern("multiple_values"));
189
+
190
+ return c_wankelParser;
191
+ }
192
+ // Ruby GC ===================================================================
193
+ static VALUE wankel_alloc(VALUE klass) {
194
+ VALUE self;
195
+ wankel_parser * p;
196
+ self = Data_Make_Struct(klass, wankel_parser, wankel_mark, wankel_free, p);
197
+ p->h = 0;
198
+ return self;
199
+ }
200
+
201
+ static void wankel_free(void * handle) {
202
+ wankel_parser * p = handle;
203
+ if (p->h){
204
+ yajl_free(p->h);
205
+ }
206
+ }
207
+
208
+ static void wankel_mark(void * handle) {
209
+ wankel_parser * p = handle;
210
+ rb_gc_mark(p->stack);
211
+ rb_gc_mark(p->rbufsize);
212
+ }
213
+
214
+ // Parse Builder =============================================================
215
+ static void wankel_builder_push(void *ctx, VALUE val) {
216
+ int len;
217
+ wankel_parser * p = ctx;
218
+ VALUE lastEntry, hash;
219
+
220
+ if (p->stack_index > 0) {
221
+ len = (int)RARRAY_LEN(p->stack);
222
+ lastEntry = rb_ary_entry(p->stack, len-1);
223
+ switch (TYPE(lastEntry)) {
224
+ case T_ARRAY:
225
+ rb_ary_push(lastEntry, val);
226
+ if (TYPE(val) == T_HASH || TYPE(val) == T_ARRAY) {
227
+ rb_ary_push(p->stack, val);
228
+ p->stack_index++;
229
+ }
230
+ break;
231
+ case T_HASH:
232
+ rb_hash_aset(lastEntry, val, Qnil);
233
+ rb_ary_push(p->stack, val);
234
+ p->stack_index++;
235
+ break;
236
+ case T_STRING:
237
+ case T_SYMBOL:
238
+ hash = rb_ary_entry(p->stack, len-2);
239
+ if (TYPE(hash) == T_HASH) {
240
+ rb_hash_aset(hash, lastEntry, val);
241
+ rb_ary_pop(p->stack);
242
+ p->stack_index--;
243
+ if (TYPE(val) == T_HASH || TYPE(val) == T_ARRAY) {
244
+ rb_ary_push(p->stack, val);
245
+ p->stack_index++;
246
+ }
247
+ }
248
+ break;
249
+ }
250
+ } else {
251
+ rb_ary_push(p->stack, val);
252
+ p->stack_index++;
253
+ }
254
+ }
255
+
256
+ static int wankel_parse_callback_on_null(void *ctx) {
257
+ wankel_builder_push(ctx, Qnil);
258
+ return 1;
259
+ }
260
+
261
+ static int wankel_parse_callback_on_boolean(void *ctx, int boolVal) {
262
+ wankel_builder_push(ctx, boolVal ? Qtrue : Qfalse);
263
+ return 1;
264
+ }
265
+
266
+ // static int wankel_parse_callback_on_integer(void *ctx, long long integerVal) {
267
+ // wankel_builder_push(ctx, LL2NUM(integerVal));
268
+ // return 1;
269
+ // }
270
+ // static int wankel_parse_callback_on_double(void *ctx, double doubleVal) {
271
+ // wankel_builder_push(ctx, rb_float_new(doubleVal));
272
+ // return 1;
273
+ // }
274
+ static int wankel_parse_callback_on_number(void * ctx, const char * numberVal, size_t numberLen){
275
+ char buf[numberLen+1];
276
+ buf[numberLen] = 0;
277
+ memcpy(buf, numberVal, numberLen);
278
+
279
+ if (memchr(buf, '.', numberLen) || memchr(buf, 'e', numberLen) || memchr(buf, 'E', numberLen)) {
280
+ wankel_builder_push(ctx, rb_float_new(strtod(buf, NULL)));
281
+ } else {
282
+ wankel_builder_push(ctx, rb_cstr2inum(buf, 10));
283
+ }
284
+ return 1;
285
+ }
286
+ static int wankel_parse_callback_on_string(void *ctx, const unsigned char * stringVal, size_t stringLen) {
287
+ VALUE str = rb_str_new((const char *)stringVal, stringLen);
288
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
289
+ rb_enc_associate(str, rb_utf8_encoding());
290
+ if (default_internal_enc) {
291
+ str = rb_str_export_to_enc(str, default_internal_enc);
292
+ }
293
+
294
+ wankel_builder_push(ctx, str);
295
+ return 1;
296
+ }
297
+ static int wankel_parse_callback_on_map_start(void *ctx) {
298
+ wankel_builder_push(ctx, rb_hash_new());
299
+ return 1;
300
+ }
301
+ static int wankel_parse_callback_on_map_key(void *ctx, const unsigned char * key, size_t keyLen) {
302
+ wankel_parser * p = ctx;
303
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
304
+ VALUE str = rb_str_new((const char *)key, keyLen);
305
+ rb_enc_associate(str, rb_utf8_encoding());
306
+ if (default_internal_enc) {
307
+ str = rb_str_export_to_enc(str, default_internal_enc);
308
+ }
309
+
310
+ if(p->symbolize_keys) {
311
+ wankel_builder_push(ctx, ID2SYM(rb_to_id(str)) );
312
+ } else {
313
+ wankel_builder_push(ctx, str);
314
+ }
315
+
316
+ return 1;
317
+ }
318
+ static int wankel_parse_callback_on_map_end(void *ctx) {
319
+ wankel_parser * p = ctx;
320
+ p->stack_index--;
321
+
322
+ if(p->stack_index > 0) {
323
+ rb_ary_pop(p->stack);
324
+ } else if(p->stack_index == 0 && p->callback != Qnil) {
325
+ rb_funcall(p->callback, intern_call, 1, rb_ary_pop(p->stack));
326
+ }
327
+
328
+ return 1;
329
+ }
330
+ static int wankel_parse_callback_on_array_start(void *ctx) {
331
+ wankel_builder_push(ctx, rb_ary_new());
332
+ return 1;
333
+ }
334
+ static int wankel_parse_callback_on_array_end(void *ctx) {
335
+ wankel_parser * p = ctx;
336
+ p->stack_index--;
337
+
338
+ if(p->stack_index > 0) {
339
+ rb_ary_pop(p->stack);
340
+ } else if(p->stack_index == 0 && p->callback != Qnil) {
341
+ rb_funcall(p->callback, intern_call, 1, rb_ary_pop(p->stack));
342
+ }
343
+
344
+ return 1;
345
+ }
@@ -0,0 +1,26 @@
1
+ #ifndef WANKEL_PARSER
2
+ #define WANKEL_PARSER
3
+
4
+ #include <ruby.h>
5
+ #include <ruby/encoding.h>
6
+ #include <yajl/yajl_common.h>
7
+ #include <yajl/yajl_parse.h>
8
+
9
+ #include "yajl_helpers.h"
10
+
11
+ ID Init_wankel_parser();
12
+
13
+ typedef struct {
14
+ yajl_handle h;
15
+ yajl_alloc_funcs alloc_funcs;
16
+ VALUE stack;
17
+ int stack_index;
18
+ int symbolize_keys;
19
+ VALUE rbufsize;
20
+ VALUE callback;
21
+ VALUE last_entry;
22
+ } wankel_parser;
23
+
24
+
25
+
26
+ #endif