oj 1.4.7 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of oj might be problematic. Click here for more details.

data/README.md CHANGED
@@ -32,9 +32,11 @@ A fast JSON parser and Object marshaller as a Ruby gem.
32
32
 
33
33
  ## <a name="release">Release Notes</a>
34
34
 
35
- ### Release 1.4.7
35
+ ### Release 2.0.0
36
36
 
37
- - In compat mode non-String keys are converted to Strings instead of raising and error. (issue #52)
37
+ - Thanks to yuki24 Floats are now output with a decimal even if they are an integer value.
38
+
39
+ - <b>The Simple API for JSON (SAJ) API has been added. Read more about it on the [Oj::Saj page](http://www.ohler.com/oj/Oj/Saj.html).</b>
38
40
 
39
41
  ## <a name="description">Description</a>
40
42
 
@@ -71,6 +73,15 @@ build with extensions enabled. Check the documenation for JRuby installs in your
71
73
  Oj is also compatible with Rails. Just make sure the Oj gem is installed and
72
74
  [multi_json](https://github.com/intridea/multi_json) will pick it up and use it.
73
75
 
76
+ Oj offers two alternative APIs for processing JSON. The fastest one is the Oj::Doc API. The Oj::Doc API takes a
77
+ completely different approach by opening a JSON document and providing calls to navigate around the JSON while it is
78
+ open. With this approach JSON access can be well over 20 times faster than conventional JSON parsing.
79
+
80
+ Another API, the Oj::Saj API follows an XML SAX model and walks the JSON document depth first and makes callbacks for
81
+ each element. The Oj::Saj API is useful when only portions of the JSON are of interest. Performance up to 20 times
82
+ faster than conventional JSON are possible. The API is simple to use but does require a different approach than the
83
+ conventional parse followed by access approach used by conventional JSON parsing.
84
+
74
85
  ## <a name="compare">Comparisons</a>
75
86
 
76
87
  ### Fast Oj::Doc parser comparisons
@@ -51,7 +51,6 @@ oj_cache_new(Cache *cache) {
51
51
  }
52
52
  (*cache)->key = 0;
53
53
  (*cache)->value = Qundef;
54
- //bzero((*cache)->slots, sizeof((*cache)->slots));
55
54
  memset((*cache)->slots, 0, sizeof((*cache)->slots));
56
55
  }
57
56
 
@@ -436,9 +436,11 @@ dump_float(VALUE obj, Out out) {
436
436
  } else if (-INFINITY == d) {
437
437
  strcpy(buf, "-Infinity");
438
438
  cnt = 9;
439
+ } else if (d == (double)(long long int)d) {
440
+ cnt = sprintf(buf, "%.1f", d); // used sprintf due to bug in snprintf
439
441
  } else {
440
442
  cnt = sprintf(buf, "%0.16g", d); // used sprintf due to bug in snprintf
441
- }
443
+ }
442
444
  if (out->end - out->cur <= (long)cnt) {
443
445
  grow(out, cnt);
444
446
  }
@@ -47,8 +47,14 @@ void Init_oj();
47
47
 
48
48
  VALUE Oj = Qnil;
49
49
 
50
+ ID oj_add_value_id;
51
+ ID oj_array_end_id;
52
+ ID oj_array_start_id;
50
53
  ID oj_as_json_id;
54
+ ID oj_error_id;
51
55
  ID oj_fileno_id;
56
+ ID oj_hash_end_id;
57
+ ID oj_hash_start_id;
52
58
  ID oj_instance_variables_id;
53
59
  ID oj_json_create_id;
54
60
  ID oj_new_id;
@@ -543,6 +549,85 @@ to_file(int argc, VALUE *argv, VALUE self) {
543
549
  return Qnil;
544
550
  }
545
551
 
552
+ /* call-seq: saj_parse(handler, io)
553
+ *
554
+ * Parses an IO stream or file containing an JSON document. Raises an exception
555
+ * if the JSON is malformed.
556
+ * @param [Oj::Saj] handler SAJ (responds to Oj::Saj methods) like handler
557
+ * @param [IO|String] io IO Object to read from
558
+ */
559
+ static VALUE
560
+ saj_parse(int argc, VALUE *argv, VALUE self) {
561
+ struct _Options copts = oj_default_options;
562
+ char *json;
563
+ size_t len;
564
+ VALUE input = argv[1];
565
+
566
+ if (argc < 2) {
567
+ rb_raise(rb_eArgError, "Wrong number of arguments to saj_parse.\n");
568
+ }
569
+ if (rb_type(input) == T_STRING) {
570
+ // the json string gets modified so make a copy of it
571
+ len = RSTRING_LEN(input) + 1;
572
+ if (copts.max_stack < len) {
573
+ json = ALLOC_N(char, len);
574
+ } else {
575
+ json = ALLOCA_N(char, len);
576
+ }
577
+ strcpy(json, StringValuePtr(input));
578
+ } else {
579
+ VALUE clas = rb_obj_class(input);
580
+ VALUE s;
581
+
582
+ if (oj_stringio_class == clas) {
583
+ s = rb_funcall2(input, oj_string_id, 0, 0);
584
+ len = RSTRING_LEN(s) + 1;
585
+ if (copts.max_stack < len) {
586
+ json = ALLOC_N(char, len);
587
+ } else {
588
+ json = ALLOCA_N(char, len);
589
+ }
590
+ strcpy(json, StringValuePtr(s));
591
+ #ifndef JRUBY_RUBY
592
+ #if !IS_WINDOWS
593
+ // JRuby gets confused with what is the real fileno.
594
+ } else if (rb_respond_to(input, oj_fileno_id) && Qnil != (s = rb_funcall(input, oj_fileno_id, 0))) {
595
+ int fd = FIX2INT(s);
596
+ ssize_t cnt;
597
+
598
+ len = lseek(fd, 0, SEEK_END);
599
+ lseek(fd, 0, SEEK_SET);
600
+ if (copts.max_stack < len) {
601
+ json = ALLOC_N(char, len + 1);
602
+ } else {
603
+ json = ALLOCA_N(char, len + 1);
604
+ }
605
+ if (0 >= (cnt = read(fd, json, len)) || cnt != (ssize_t)len) {
606
+ rb_raise(rb_eIOError, "failed to read from IO Object.");
607
+ }
608
+ json[len] = '\0';
609
+ #endif
610
+ #endif
611
+ } else if (rb_respond_to(input, oj_read_id)) {
612
+ s = rb_funcall2(input, oj_read_id, 0, 0);
613
+ len = RSTRING_LEN(s) + 1;
614
+ if (copts.max_stack < len) {
615
+ json = ALLOC_N(char, len);
616
+ } else {
617
+ json = ALLOCA_N(char, len);
618
+ }
619
+ strcpy(json, StringValuePtr(s));
620
+ } else {
621
+ rb_raise(rb_eArgError, "saj_parse() expected a String or IO Object.");
622
+ }
623
+ }
624
+ oj_saj_parse(*argv, json);
625
+ if (copts.max_stack < len) {
626
+ xfree(json);
627
+ }
628
+ return Qnil;
629
+ }
630
+
546
631
  // Mimic JSON section
547
632
 
548
633
  static VALUE
@@ -880,8 +965,16 @@ void Init_oj() {
880
965
  rb_define_module_function(Oj, "dump", dump, -1);
881
966
  rb_define_module_function(Oj, "to_file", to_file, -1);
882
967
 
968
+ rb_define_module_function(Oj, "saj_parse", saj_parse, -1);
969
+
970
+ oj_add_value_id = rb_intern("add_value");
971
+ oj_array_end_id = rb_intern("array_end");
972
+ oj_array_start_id = rb_intern("array_start");
883
973
  oj_as_json_id = rb_intern("as_json");
974
+ oj_error_id = rb_intern("error");
884
975
  oj_fileno_id = rb_intern("fileno");
976
+ oj_hash_end_id = rb_intern("hash_end");
977
+ oj_hash_start_id = rb_intern("hash_start");
885
978
  oj_instance_variables_id = rb_intern("instance_variables");
886
979
  oj_json_create_id = rb_intern("json_create");
887
980
  oj_new_id = rb_intern("new");
@@ -144,6 +144,8 @@ typedef struct _Leaf {
144
144
  } *Leaf;
145
145
 
146
146
  extern VALUE oj_parse(char *json, Options options);
147
+ extern void oj_saj_parse(VALUE handler, char *json);
148
+
147
149
  extern char* oj_write_obj_to_str(VALUE obj, Options copts);
148
150
  extern void oj_write_obj_to_file(VALUE obj, const char *path, Options copts);
149
151
  extern char* oj_write_leaf_to_str(Leaf leaf, Options copts);
@@ -164,13 +166,20 @@ extern rb_encoding *oj_utf8_encoding;
164
166
  extern VALUE oj_bag_class;
165
167
  extern VALUE oj_bigdecimal_class;
166
168
  extern VALUE oj_doc_class;
169
+ extern VALUE oj_parse_error_class;
167
170
  extern VALUE oj_stringio_class;
168
171
  extern VALUE oj_struct_class;
169
172
  extern VALUE oj_time_class;
170
173
 
171
174
  extern VALUE oj_slash_string;
172
175
 
176
+ extern ID oj_add_value_id;
177
+ extern ID oj_array_end_id;
178
+ extern ID oj_array_start_id;
173
179
  extern ID oj_as_json_id;
180
+ extern ID oj_error_id;
181
+ extern ID oj_hash_end_id;
182
+ extern ID oj_hash_start_id;
174
183
  extern ID oj_instance_variables_id;
175
184
  extern ID oj_json_create_id;
176
185
  extern ID oj_new_id;
@@ -0,0 +1,812 @@
1
+ /* saj.c
2
+ * Copyright (c) 2012, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #if !IS_WINDOWS
32
+ #include <sys/resource.h> /* for getrlimit() on linux */
33
+ #endif
34
+ #include <stdlib.h>
35
+ #include <stdio.h>
36
+ #include <string.h>
37
+ #include <math.h>
38
+
39
+ /* Workaround: */
40
+ #ifndef INFINITY
41
+ #define INFINITY (1.0/0.0)
42
+ #endif
43
+
44
+ #include "oj.h"
45
+
46
+ typedef struct _CX {
47
+ VALUE *cur;
48
+ VALUE *end;
49
+ VALUE stack[1024];
50
+ } *CX;
51
+
52
+ typedef struct _ParseInfo {
53
+ char *str; /* buffer being read from */
54
+ char *s; /* current position in buffer */
55
+ void *stack_min;
56
+ VALUE handler;
57
+ int has_hash_start;
58
+ int has_hash_end;
59
+ int has_array_start;
60
+ int has_array_end;
61
+ int has_add_value;
62
+ int has_error;
63
+ } *ParseInfo;
64
+
65
+ static void read_next(ParseInfo pi, const char *key);
66
+ static void read_hash(ParseInfo pi, const char *key);
67
+ static void read_array(ParseInfo pi, const char *key);
68
+ static void read_str(ParseInfo pi, const char *key);
69
+ static void read_num(ParseInfo pi, const char *key);
70
+ static void read_true(ParseInfo pi, const char *key);
71
+ static void read_false(ParseInfo pi, const char *key);
72
+ static void read_nil(ParseInfo pi, const char *key);
73
+ static void next_non_white(ParseInfo pi);
74
+ static char* read_quoted_value(ParseInfo pi);
75
+ static void skip_comment(ParseInfo pi);
76
+
77
+ /* This XML parser is a single pass, destructive, callback parser. It is a
78
+ * single pass parse since it only make one pass over the characters in the
79
+ * XML document string. It is destructive because it re-uses the content of
80
+ * the string for values in the callback and places \0 characters at various
81
+ * places to mark the end of tokens and strings. It is a callback parser like
82
+ * a SAX parser because it uses callback when document elements are
83
+ * encountered.
84
+ *
85
+ * Parsing is very tolerant. Lack of headers and even mispelled element
86
+ * endings are passed over without raising an error. A best attempt is made in
87
+ * all cases to parse the string.
88
+ */
89
+
90
+ inline static void
91
+ call_error(const char *msg, ParseInfo pi, const char* file, int line) {
92
+ char buf[128];
93
+ const char *s = pi->s;
94
+ int jline = 1;
95
+ int col = 1;
96
+
97
+ for (; pi->str < s && '\n' != *s; s--) {
98
+ col++;
99
+ }
100
+ for (; pi->str < s; s--) {
101
+ if ('\n' == *s) {
102
+ jline++;
103
+ }
104
+ }
105
+ sprintf(buf, "%s at line %d, column %d [%s:%d]", msg, jline, col, file, line);
106
+ rb_funcall(pi->handler, oj_error_id, 3, rb_str_new2(buf), LONG2NUM(jline), LONG2NUM(col));
107
+ }
108
+
109
+ inline static void
110
+ next_non_white(ParseInfo pi) {
111
+ for (; 1; pi->s++) {
112
+ switch(*pi->s) {
113
+ case ' ':
114
+ case '\t':
115
+ case '\f':
116
+ case '\n':
117
+ case '\r':
118
+ break;
119
+ case '/':
120
+ skip_comment(pi);
121
+ break;
122
+ default:
123
+ return;
124
+ }
125
+ }
126
+ }
127
+
128
+ inline static void
129
+ next_white(ParseInfo pi) {
130
+ for (; 1; pi->s++) {
131
+ switch(*pi->s) {
132
+ case ' ':
133
+ case '\t':
134
+ case '\f':
135
+ case '\n':
136
+ case '\r':
137
+ case '\0':
138
+ return;
139
+ default:
140
+ break;
141
+ }
142
+ }
143
+ }
144
+
145
+ inline static void
146
+ call_add_value(VALUE handler, VALUE value, const char *key) {
147
+ VALUE k;
148
+
149
+ if (0 == key) {
150
+ k = Qnil;
151
+ } else {
152
+ k = rb_str_new2(key);
153
+ #if HAS_ENCODING_SUPPORT
154
+ rb_enc_associate(k, oj_utf8_encoding);
155
+ #endif
156
+ }
157
+ rb_funcall(handler, oj_add_value_id, 2, value, k);
158
+ }
159
+
160
+ inline static void
161
+ call_no_value(VALUE handler, ID method, const char *key) {
162
+ VALUE k;
163
+
164
+ if (0 == key) {
165
+ k = Qnil;
166
+ } else {
167
+ k = rb_str_new2(key);
168
+ #if HAS_ENCODING_SUPPORT
169
+ rb_enc_associate(k, oj_utf8_encoding);
170
+ #endif
171
+ }
172
+ rb_funcall(handler, method, 1, k);
173
+ }
174
+
175
+ static void
176
+ skip_comment(ParseInfo pi) {
177
+ pi->s++; /* skip first / */
178
+ if ('*' == *pi->s) {
179
+ pi->s++;
180
+ for (; '\0' != *pi->s; pi->s++) {
181
+ if ('*' == *pi->s && '/' == *(pi->s + 1)) {
182
+ pi->s++;
183
+ return;
184
+ } else if ('\0' == *pi->s) {
185
+ if (pi->has_error) {
186
+ call_error("comment not terminated", pi, __FILE__, __LINE__);
187
+ } else {
188
+ raise_error("comment not terminated", pi->str, pi->s);
189
+ }
190
+ }
191
+ }
192
+ } else if ('/' == *pi->s) {
193
+ for (; 1; pi->s++) {
194
+ switch (*pi->s) {
195
+ case '\n':
196
+ case '\r':
197
+ case '\f':
198
+ case '\0':
199
+ return;
200
+ default:
201
+ break;
202
+ }
203
+ }
204
+ } else {
205
+ if (pi->has_error) {
206
+ call_error("invalid comment", pi, __FILE__, __LINE__);
207
+ } else {
208
+ raise_error("invalid comment", pi->str, pi->s);
209
+ }
210
+ }
211
+ }
212
+
213
+ static void
214
+ read_next(ParseInfo pi, const char *key) {
215
+ VALUE obj;
216
+
217
+ if ((void*)&obj < pi->stack_min) {
218
+ rb_raise(rb_eSysStackError, "JSON is too deeply nested");
219
+ }
220
+ next_non_white(pi); /* skip white space */
221
+ switch (*pi->s) {
222
+ case '{':
223
+ read_hash(pi, key);
224
+ break;
225
+ case '[':
226
+ read_array(pi, key);
227
+ break;
228
+ case '"':
229
+ read_str(pi, key);
230
+ break;
231
+ case '+':
232
+ case '-':
233
+ case '0':
234
+ case '1':
235
+ case '2':
236
+ case '3':
237
+ case '4':
238
+ case '5':
239
+ case '6':
240
+ case '7':
241
+ case '8':
242
+ case '9':
243
+ read_num(pi, key);
244
+ break;
245
+ case 'I':
246
+ read_num(pi, key);
247
+ break;
248
+ case 't':
249
+ read_true(pi, key);
250
+ break;
251
+ case 'f':
252
+ read_false(pi, key);
253
+ break;
254
+ case 'n':
255
+ read_nil(pi, key);
256
+ break;
257
+ case '\0':
258
+ return;
259
+ default:
260
+ return;
261
+ }
262
+ }
263
+
264
+ static void
265
+ read_hash(ParseInfo pi, const char *key) {
266
+ const char *ks;
267
+
268
+ if (pi->has_hash_start) {
269
+ call_no_value(pi->handler, oj_hash_start_id, key);
270
+ }
271
+ pi->s++;
272
+ next_non_white(pi);
273
+ if ('}' == *pi->s) {
274
+ pi->s++;
275
+ } else {
276
+ while (1) {
277
+ next_non_white(pi);
278
+ ks = read_quoted_value(pi);
279
+ next_non_white(pi);
280
+ if (':' == *pi->s) {
281
+ pi->s++;
282
+ } else {
283
+ if (pi->has_error) {
284
+ call_error("invalid format, expected :", pi, __FILE__, __LINE__);
285
+ }
286
+ raise_error("invalid format, expected :", pi->str, pi->s);
287
+ }
288
+ read_next(pi, ks);
289
+ next_non_white(pi);
290
+ if ('}' == *pi->s) {
291
+ pi->s++;
292
+ break;
293
+ } else if (',' == *pi->s) {
294
+ pi->s++;
295
+ } else {
296
+ if (pi->has_error) {
297
+ call_error("invalid format, expected , or } while in an object", pi, __FILE__, __LINE__);
298
+ }
299
+ raise_error("invalid format, expected , or } while in an object", pi->str, pi->s);
300
+ }
301
+ }
302
+ }
303
+ if (pi->has_hash_end) {
304
+ call_no_value(pi->handler, oj_hash_end_id, key);
305
+ }
306
+ }
307
+
308
+ static void
309
+ read_array(ParseInfo pi, const char *key) {
310
+ if (pi->has_array_start) {
311
+ call_no_value(pi->handler, oj_array_start_id, key);
312
+ }
313
+ pi->s++;
314
+ next_non_white(pi);
315
+ if (']' == *pi->s) {
316
+ pi->s++;
317
+ } else {
318
+ while (1) {
319
+ read_next(pi, 0);
320
+ next_non_white(pi);
321
+ if (',' == *pi->s) {
322
+ pi->s++;
323
+ } else if (']' == *pi->s) {
324
+ pi->s++;
325
+ break;
326
+ } else {
327
+ if (pi->has_error) {
328
+ call_error("invalid format, expected , or ] while in an array", pi, __FILE__, __LINE__);
329
+ }
330
+ raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s);
331
+ }
332
+ }
333
+ }
334
+ if (pi->has_array_end) {
335
+ call_no_value(pi->handler, oj_array_end_id, key);
336
+ }
337
+ }
338
+
339
+ static void
340
+ read_str(ParseInfo pi, const char *key) {
341
+ char *text;
342
+
343
+ text = read_quoted_value(pi);
344
+ if (pi->has_add_value) {
345
+ VALUE s = rb_str_new2(text);
346
+
347
+ #if HAS_ENCODING_SUPPORT
348
+ rb_enc_associate(s, oj_utf8_encoding);
349
+ #endif
350
+ call_add_value(pi->handler, s, key);
351
+ }
352
+ }
353
+
354
+ #ifdef RUBINIUS_RUBY
355
+ #define NUM_MAX 0x07FFFFFF
356
+ #else
357
+ #define NUM_MAX (FIXNUM_MAX >> 8)
358
+ #endif
359
+
360
+ static void
361
+ read_num(ParseInfo pi, const char *key) {
362
+ char *start = pi->s;
363
+ int64_t n = 0;
364
+ long a = 0;
365
+ long div = 1;
366
+ long e = 0;
367
+ int neg = 0;
368
+ int eneg = 0;
369
+ int big = 0;
370
+
371
+ if ('-' == *pi->s) {
372
+ pi->s++;
373
+ neg = 1;
374
+ } else if ('+' == *pi->s) {
375
+ pi->s++;
376
+ }
377
+ if ('I' == *pi->s) {
378
+ if (0 != strncmp("Infinity", pi->s, 8)) {
379
+ if (pi->has_error) {
380
+ call_error("number or other value", pi, __FILE__, __LINE__);
381
+ }
382
+ raise_error("number or other value", pi->str, pi->s);
383
+ }
384
+ pi->s += 8;
385
+ if (neg) {
386
+ if (pi->has_add_value) {
387
+ call_add_value(pi->handler, rb_float_new(-INFINITY), key);
388
+ }
389
+ } else {
390
+ if (pi->has_add_value) {
391
+ call_add_value(pi->handler, rb_float_new(INFINITY), key);
392
+ }
393
+ }
394
+ return;
395
+ }
396
+ for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
397
+ if (big) {
398
+ big++;
399
+ } else {
400
+ n = n * 10 + (*pi->s - '0');
401
+ if (NUM_MAX <= n) {
402
+ big = 1;
403
+ }
404
+ }
405
+ }
406
+ if ('.' == *pi->s) {
407
+ pi->s++;
408
+ for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
409
+ a = a * 10 + (*pi->s - '0');
410
+ div *= 10;
411
+ if (NUM_MAX <= div) {
412
+ big = 1;
413
+ }
414
+ }
415
+ }
416
+ if ('e' == *pi->s || 'E' == *pi->s) {
417
+ pi->s++;
418
+ if ('-' == *pi->s) {
419
+ pi->s++;
420
+ eneg = 1;
421
+ } else if ('+' == *pi->s) {
422
+ pi->s++;
423
+ }
424
+ for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
425
+ e = e * 10 + (*pi->s - '0');
426
+ if (NUM_MAX <= e) {
427
+ big = 1;
428
+ }
429
+ }
430
+ }
431
+ if (0 == e && 0 == a && 1 == div) {
432
+ if (big) {
433
+ char c = *pi->s;
434
+
435
+ *pi->s = '\0';
436
+ if (pi->has_add_value) {
437
+ call_add_value(pi->handler, rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new2(start)), key);
438
+ }
439
+ *pi->s = c;
440
+ } else {
441
+ if (neg) {
442
+ n = -n;
443
+ }
444
+ if (pi->has_add_value) {
445
+ call_add_value(pi->handler, LONG2NUM(n), key);
446
+ }
447
+ }
448
+ return;
449
+ } else { /* decimal */
450
+ if (big) {
451
+ char c = *pi->s;
452
+
453
+ *pi->s = '\0';
454
+ if (pi->has_add_value) {
455
+ call_add_value(pi->handler, rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new2(start)), key);
456
+ }
457
+ *pi->s = c;
458
+ } else {
459
+ double d = (double)n + (double)a / (double)div;
460
+
461
+ if (neg) {
462
+ d = -d;
463
+ }
464
+ if (1 < big) {
465
+ e += big - 1;
466
+ }
467
+ if (0 != e) {
468
+ if (eneg) {
469
+ e = -e;
470
+ }
471
+ d *= pow(10.0, e);
472
+ }
473
+ if (pi->has_add_value) {
474
+ call_add_value(pi->handler, rb_float_new(d), key);
475
+ }
476
+ }
477
+ }
478
+ }
479
+
480
+ static void
481
+ read_true(ParseInfo pi, const char *key) {
482
+ pi->s++;
483
+ if ('r' != *pi->s || 'u' != *(pi->s + 1) || 'e' != *(pi->s + 2)) {
484
+ if (pi->has_error) {
485
+ call_error("invalid format, expected 'true'", pi, __FILE__, __LINE__);
486
+ }
487
+ raise_error("invalid format, expected 'true'", pi->str, pi->s);
488
+ }
489
+ pi->s += 3;
490
+ if (pi->has_add_value) {
491
+ call_add_value(pi->handler, Qtrue, key);
492
+ }
493
+ }
494
+
495
+ static void
496
+ read_false(ParseInfo pi, const char *key) {
497
+ pi->s++;
498
+ if ('a' != *pi->s || 'l' != *(pi->s + 1) || 's' != *(pi->s + 2) || 'e' != *(pi->s + 3)) {
499
+ if (pi->has_error) {
500
+ call_error("invalid format, expected 'false'", pi, __FILE__, __LINE__);
501
+ }
502
+ raise_error("invalid format, expected 'false'", pi->str, pi->s);
503
+ }
504
+ pi->s += 4;
505
+ if (pi->has_add_value) {
506
+ call_add_value(pi->handler, Qfalse, key);
507
+ }
508
+ }
509
+
510
+ static void
511
+ read_nil(ParseInfo pi, const char *key) {
512
+ pi->s++;
513
+ if ('u' != *pi->s || 'l' != *(pi->s + 1) || 'l' != *(pi->s + 2)) {
514
+ if (pi->has_error) {
515
+ call_error("invalid format, expected 'null'", pi, __FILE__, __LINE__);
516
+ }
517
+ raise_error("invalid format, expected 'null'", pi->str, pi->s);
518
+ }
519
+ pi->s += 3;
520
+ if (pi->has_add_value) {
521
+ call_add_value(pi->handler, Qnil, key);
522
+ }
523
+ }
524
+
525
+ static uint32_t
526
+ read_hex(ParseInfo pi, char *h) {
527
+ uint32_t b = 0;
528
+ int i;
529
+
530
+ /* TBD this can be made faster with a table */
531
+ for (i = 0; i < 4; i++, h++) {
532
+ b = b << 4;
533
+ if ('0' <= *h && *h <= '9') {
534
+ b += *h - '0';
535
+ } else if ('A' <= *h && *h <= 'F') {
536
+ b += *h - 'A' + 10;
537
+ } else if ('a' <= *h && *h <= 'f') {
538
+ b += *h - 'a' + 10;
539
+ } else {
540
+ pi->s = h;
541
+ if (pi->has_error) {
542
+ call_error("invalid hex character", pi, __FILE__, __LINE__);
543
+ }
544
+ raise_error("invalid hex character", pi->str, pi->s);
545
+ }
546
+ }
547
+ return b;
548
+ }
549
+
550
+ static char*
551
+ unicode_to_chars(ParseInfo pi, char *t, uint32_t code) {
552
+ if (0x0000007F >= code) {
553
+ *t = (char)code;
554
+ } else if (0x000007FF >= code) {
555
+ *t++ = 0xC0 | (code >> 6);
556
+ *t = 0x80 | (0x3F & code);
557
+ } else if (0x0000FFFF >= code) {
558
+ *t++ = 0xE0 | (code >> 12);
559
+ *t++ = 0x80 | ((code >> 6) & 0x3F);
560
+ *t = 0x80 | (0x3F & code);
561
+ } else if (0x001FFFFF >= code) {
562
+ *t++ = 0xF0 | (code >> 18);
563
+ *t++ = 0x80 | ((code >> 12) & 0x3F);
564
+ *t++ = 0x80 | ((code >> 6) & 0x3F);
565
+ *t = 0x80 | (0x3F & code);
566
+ } else if (0x03FFFFFF >= code) {
567
+ *t++ = 0xF8 | (code >> 24);
568
+ *t++ = 0x80 | ((code >> 18) & 0x3F);
569
+ *t++ = 0x80 | ((code >> 12) & 0x3F);
570
+ *t++ = 0x80 | ((code >> 6) & 0x3F);
571
+ *t = 0x80 | (0x3F & code);
572
+ } else if (0x7FFFFFFF >= code) {
573
+ *t++ = 0xFC | (code >> 30);
574
+ *t++ = 0x80 | ((code >> 24) & 0x3F);
575
+ *t++ = 0x80 | ((code >> 18) & 0x3F);
576
+ *t++ = 0x80 | ((code >> 12) & 0x3F);
577
+ *t++ = 0x80 | ((code >> 6) & 0x3F);
578
+ *t = 0x80 | (0x3F & code);
579
+ } else {
580
+ if (pi->has_error) {
581
+ call_error("invalid Unicode", pi, __FILE__, __LINE__);
582
+ }
583
+ raise_error("invalid Unicode", pi->str, pi->s);
584
+ }
585
+ return t;
586
+ }
587
+
588
+ /* Assume the value starts immediately and goes until the quote character is
589
+ * reached again. Do not read the character after the terminating quote.
590
+ */
591
+ static char*
592
+ read_quoted_value(ParseInfo pi) {
593
+ char *value = 0;
594
+ char *h = pi->s; /* head */
595
+ char *t = h; /* tail */
596
+ uint32_t code;
597
+
598
+ h++; /* skip quote character */
599
+ t++;
600
+ value = h;
601
+ for (; '"' != *h; h++, t++) {
602
+ if ('\0' == *h) {
603
+ pi->s = h;
604
+ raise_error("quoted string not terminated", pi->str, pi->s);
605
+ } else if ('\\' == *h) {
606
+ h++;
607
+ switch (*h) {
608
+ case 'n': *t = '\n'; break;
609
+ case 'r': *t = '\r'; break;
610
+ case 't': *t = '\t'; break;
611
+ case 'f': *t = '\f'; break;
612
+ case 'b': *t = '\b'; break;
613
+ case '"': *t = '"'; break;
614
+ case '/': *t = '/'; break;
615
+ case '\\': *t = '\\'; break;
616
+ case 'u':
617
+ h++;
618
+ code = read_hex(pi, h);
619
+ h += 3;
620
+ if (0x0000D800 <= code && code <= 0x0000DFFF) {
621
+ uint32_t c1 = (code - 0x0000D800) & 0x000003FF;
622
+ uint32_t c2;
623
+
624
+ h++;
625
+ if ('\\' != *h || 'u' != *(h + 1)) {
626
+ pi->s = h;
627
+ if (pi->has_error) {
628
+ call_error("invalid escaped character", pi, __FILE__, __LINE__);
629
+ }
630
+ raise_error("invalid escaped character", pi->str, pi->s);
631
+ }
632
+ h += 2;
633
+ c2 = read_hex(pi, h);
634
+ h += 3;
635
+ c2 = (c2 - 0x0000DC00) & 0x000003FF;
636
+ code = ((c1 << 10) | c2) + 0x00010000;
637
+ }
638
+ t = unicode_to_chars(pi, t, code);
639
+ break;
640
+ default:
641
+ pi->s = h;
642
+ if (pi->has_error) {
643
+ call_error("invalid escaped character", pi, __FILE__, __LINE__);
644
+ }
645
+ raise_error("invalid escaped character", pi->str, pi->s);
646
+ break;
647
+ }
648
+ } else if (t != h) {
649
+ *t = *h;
650
+ }
651
+ }
652
+ *t = '\0'; /* terminate value */
653
+ pi->s = h + 1;
654
+
655
+ return value;
656
+ }
657
+
658
+ inline static int
659
+ respond_to(VALUE obj, ID method) {
660
+ #ifdef JRUBY_RUBY
661
+ /* There is a bug in JRuby where rb_respond_to() returns true (1) even if
662
+ * a method is private. */
663
+ {
664
+ VALUE args[1];
665
+
666
+ *args = ID2SYM(method);
667
+ return (Qtrue == rb_funcall2(obj, rb_intern("respond_to?"), 1, args));
668
+ }
669
+ #else
670
+ return rb_respond_to(obj, method);
671
+ #endif
672
+ }
673
+
674
+ void
675
+ oj_saj_parse(VALUE handler, char *json) {
676
+ VALUE obj = Qnil;
677
+ struct _ParseInfo pi;
678
+
679
+ if (0 == json) {
680
+ if (pi.has_error) {
681
+ call_error("Invalid arg, xml string can not be null", &pi, __FILE__, __LINE__);
682
+ }
683
+ raise_error("Invalid arg, xml string can not be null", json, 0);
684
+ }
685
+ /* skip UTF-8 BOM if present */
686
+ if (0xEF == (uint8_t)*json && 0xBB == (uint8_t)json[1] && 0xBF == (uint8_t)json[2]) {
687
+ json += 3;
688
+ }
689
+ /* initialize parse info */
690
+ pi.str = json;
691
+ pi.s = json;
692
+ #if IS_WINDOWS
693
+ pi.stack_min = (void*)((char*)&obj - (512 * 1024)); /* assume a 1M stack and give half to ruby */
694
+ #else
695
+ {
696
+ struct rlimit lim;
697
+
698
+ if (0 == getrlimit(RLIMIT_STACK, &lim)) {
699
+ pi.stack_min = (void*)((char*)&obj - (lim.rlim_cur / 4 * 3)); /* let 3/4ths of the stack be used only */
700
+ } else {
701
+ pi.stack_min = 0; /* indicates not to check stack limit */
702
+ }
703
+ }
704
+ #endif
705
+ pi.handler = handler;
706
+ pi.has_hash_start = respond_to(handler, oj_hash_start_id);
707
+ pi.has_hash_end = respond_to(handler, oj_hash_end_id);
708
+ pi.has_array_start = respond_to(handler, oj_array_start_id);
709
+ pi.has_array_end = respond_to(handler, oj_array_end_id);
710
+ pi.has_add_value = respond_to(handler, oj_add_value_id);
711
+ pi.has_error = respond_to(handler, oj_error_id);
712
+ read_next(&pi, 0);
713
+ next_non_white(&pi);
714
+ if ('\0' != *pi.s) {
715
+ if (pi.has_error) {
716
+ call_error("invalid format, extra characters", &pi, __FILE__, __LINE__);
717
+ } else {
718
+ raise_error("invalid format, extra characters", pi.str, pi.s);
719
+ }
720
+ }
721
+ }
722
+
723
+
724
+ #if 0
725
+ static void
726
+ cx_add(CX cx, VALUE obj, const char *key) {
727
+ if (0 == cx->cur) {
728
+ cx->cur = cx->stack;
729
+ *cx->cur = obj;
730
+ } else {
731
+ if (0 != key) {
732
+ VALUE ks = rb_str_new2(key);
733
+ #if HAS_ENCODING_SUPPORT
734
+ rb_enc_associate(ks, oj_utf8_encoding);
735
+ #endif
736
+ rb_hash_aset(*cx->cur, ks, obj);
737
+ } else {
738
+ rb_ary_push(*cx->cur, obj);
739
+ }
740
+ }
741
+ }
742
+
743
+ static void
744
+ cx_push(CX cx, VALUE obj, const char *key) {
745
+ if (0 == cx->cur) {
746
+ cx->cur = cx->stack;
747
+ } else {
748
+ if (cx->end <= cx->cur) {
749
+ rb_raise(oj_parse_error_class, "too deeply nested");
750
+ }
751
+ cx_add(cx, obj, key);
752
+ cx->cur++;
753
+ }
754
+ *cx->cur = obj;
755
+ }
756
+
757
+ static void
758
+ hash_start(void *context, const char *key) {
759
+ cx_push((CX)context, rb_hash_new(), key);
760
+ }
761
+
762
+ static void
763
+ col_end(void *context, const char *key) {
764
+ ((CX)context)->cur--;
765
+ }
766
+
767
+ static void
768
+ array_start(void *context, const char *key) {
769
+ cx_push((CX)context, rb_ary_new(), key);
770
+ }
771
+
772
+ static void
773
+ add_str(void *context, const char *str, const char *key) {
774
+ VALUE s;
775
+
776
+ s = rb_str_new2(str);
777
+ #if HAS_ENCODING_SUPPORT
778
+ rb_enc_associate(s, oj_utf8_encoding);
779
+ #endif
780
+ cx_add((CX)context, s, key);
781
+ }
782
+
783
+ static void
784
+ add_big(void *context, const char *str, const char *key) {
785
+ cx_add((CX)context, rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new2(str)), key);
786
+ }
787
+
788
+ static void
789
+ add_float(void *context, double num, const char *key) {
790
+ cx_add((CX)context, rb_float_new(num), key);
791
+ }
792
+
793
+ static void
794
+ add_fixnum(void *context, int64_t num, const char *key) {
795
+ cx_add((CX)context, LONG2NUM(num), key);
796
+ }
797
+
798
+ static void
799
+ add_true(void *context, const char *key) {
800
+ cx_add((CX)context, Qtrue, key);
801
+ }
802
+
803
+ static void
804
+ add_false(void *context, const char *key) {
805
+ cx_add((CX)context, Qfalse, key);
806
+ }
807
+
808
+ static void
809
+ add_nil(void *context, const char *key) {
810
+ cx_add((CX)context, Qnil, key);
811
+ }
812
+ #endif