oj 1.4.7 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of oj might be problematic. Click here for more details.
- data/README.md +13 -2
- data/ext/oj/cache.c +0 -1
- data/ext/oj/dump.c +3 -1
- data/ext/oj/oj.c +93 -0
- data/ext/oj/oj.h +9 -0
- data/ext/oj/saj.c +812 -0
- data/lib/oj.rb +1 -0
- data/lib/oj/saj.rb +63 -0
- data/lib/oj/version.rb +1 -1
- data/test/a.rb +38 -0
- data/test/perf_saj.rb +109 -0
- data/test/test_fast.rb +0 -1
- data/test/test_saj.rb +186 -0
- metadata +7 -2
data/README.md
CHANGED
@@ -32,9 +32,11 @@ A fast JSON parser and Object marshaller as a Ruby gem.
|
|
32
32
|
|
33
33
|
## <a name="release">Release Notes</a>
|
34
34
|
|
35
|
-
### Release
|
35
|
+
### Release 2.0.0
|
36
36
|
|
37
|
-
-
|
37
|
+
- Thanks to yuki24 Floats are now output with a decimal even if they are an integer value.
|
38
|
+
|
39
|
+
- <b>The Simple API for JSON (SAJ) API has been added. Read more about it on the [Oj::Saj page](http://www.ohler.com/oj/Oj/Saj.html).</b>
|
38
40
|
|
39
41
|
## <a name="description">Description</a>
|
40
42
|
|
@@ -71,6 +73,15 @@ build with extensions enabled. Check the documenation for JRuby installs in your
|
|
71
73
|
Oj is also compatible with Rails. Just make sure the Oj gem is installed and
|
72
74
|
[multi_json](https://github.com/intridea/multi_json) will pick it up and use it.
|
73
75
|
|
76
|
+
Oj offers two alternative APIs for processing JSON. The fastest one is the Oj::Doc API. The Oj::Doc API takes a
|
77
|
+
completely different approach by opening a JSON document and providing calls to navigate around the JSON while it is
|
78
|
+
open. With this approach JSON access can be well over 20 times faster than conventional JSON parsing.
|
79
|
+
|
80
|
+
Another API, the Oj::Saj API follows an XML SAX model and walks the JSON document depth first and makes callbacks for
|
81
|
+
each element. The Oj::Saj API is useful when only portions of the JSON are of interest. Performance up to 20 times
|
82
|
+
faster than conventional JSON are possible. The API is simple to use but does require a different approach than the
|
83
|
+
conventional parse followed by access approach used by conventional JSON parsing.
|
84
|
+
|
74
85
|
## <a name="compare">Comparisons</a>
|
75
86
|
|
76
87
|
### Fast Oj::Doc parser comparisons
|
data/ext/oj/cache.c
CHANGED
data/ext/oj/dump.c
CHANGED
@@ -436,9 +436,11 @@ dump_float(VALUE obj, Out out) {
|
|
436
436
|
} else if (-INFINITY == d) {
|
437
437
|
strcpy(buf, "-Infinity");
|
438
438
|
cnt = 9;
|
439
|
+
} else if (d == (double)(long long int)d) {
|
440
|
+
cnt = sprintf(buf, "%.1f", d); // used sprintf due to bug in snprintf
|
439
441
|
} else {
|
440
442
|
cnt = sprintf(buf, "%0.16g", d); // used sprintf due to bug in snprintf
|
441
|
-
}
|
443
|
+
}
|
442
444
|
if (out->end - out->cur <= (long)cnt) {
|
443
445
|
grow(out, cnt);
|
444
446
|
}
|
data/ext/oj/oj.c
CHANGED
@@ -47,8 +47,14 @@ void Init_oj();
|
|
47
47
|
|
48
48
|
VALUE Oj = Qnil;
|
49
49
|
|
50
|
+
ID oj_add_value_id;
|
51
|
+
ID oj_array_end_id;
|
52
|
+
ID oj_array_start_id;
|
50
53
|
ID oj_as_json_id;
|
54
|
+
ID oj_error_id;
|
51
55
|
ID oj_fileno_id;
|
56
|
+
ID oj_hash_end_id;
|
57
|
+
ID oj_hash_start_id;
|
52
58
|
ID oj_instance_variables_id;
|
53
59
|
ID oj_json_create_id;
|
54
60
|
ID oj_new_id;
|
@@ -543,6 +549,85 @@ to_file(int argc, VALUE *argv, VALUE self) {
|
|
543
549
|
return Qnil;
|
544
550
|
}
|
545
551
|
|
552
|
+
/* call-seq: saj_parse(handler, io)
|
553
|
+
*
|
554
|
+
* Parses an IO stream or file containing an JSON document. Raises an exception
|
555
|
+
* if the JSON is malformed.
|
556
|
+
* @param [Oj::Saj] handler SAJ (responds to Oj::Saj methods) like handler
|
557
|
+
* @param [IO|String] io IO Object to read from
|
558
|
+
*/
|
559
|
+
static VALUE
|
560
|
+
saj_parse(int argc, VALUE *argv, VALUE self) {
|
561
|
+
struct _Options copts = oj_default_options;
|
562
|
+
char *json;
|
563
|
+
size_t len;
|
564
|
+
VALUE input = argv[1];
|
565
|
+
|
566
|
+
if (argc < 2) {
|
567
|
+
rb_raise(rb_eArgError, "Wrong number of arguments to saj_parse.\n");
|
568
|
+
}
|
569
|
+
if (rb_type(input) == T_STRING) {
|
570
|
+
// the json string gets modified so make a copy of it
|
571
|
+
len = RSTRING_LEN(input) + 1;
|
572
|
+
if (copts.max_stack < len) {
|
573
|
+
json = ALLOC_N(char, len);
|
574
|
+
} else {
|
575
|
+
json = ALLOCA_N(char, len);
|
576
|
+
}
|
577
|
+
strcpy(json, StringValuePtr(input));
|
578
|
+
} else {
|
579
|
+
VALUE clas = rb_obj_class(input);
|
580
|
+
VALUE s;
|
581
|
+
|
582
|
+
if (oj_stringio_class == clas) {
|
583
|
+
s = rb_funcall2(input, oj_string_id, 0, 0);
|
584
|
+
len = RSTRING_LEN(s) + 1;
|
585
|
+
if (copts.max_stack < len) {
|
586
|
+
json = ALLOC_N(char, len);
|
587
|
+
} else {
|
588
|
+
json = ALLOCA_N(char, len);
|
589
|
+
}
|
590
|
+
strcpy(json, StringValuePtr(s));
|
591
|
+
#ifndef JRUBY_RUBY
|
592
|
+
#if !IS_WINDOWS
|
593
|
+
// JRuby gets confused with what is the real fileno.
|
594
|
+
} else if (rb_respond_to(input, oj_fileno_id) && Qnil != (s = rb_funcall(input, oj_fileno_id, 0))) {
|
595
|
+
int fd = FIX2INT(s);
|
596
|
+
ssize_t cnt;
|
597
|
+
|
598
|
+
len = lseek(fd, 0, SEEK_END);
|
599
|
+
lseek(fd, 0, SEEK_SET);
|
600
|
+
if (copts.max_stack < len) {
|
601
|
+
json = ALLOC_N(char, len + 1);
|
602
|
+
} else {
|
603
|
+
json = ALLOCA_N(char, len + 1);
|
604
|
+
}
|
605
|
+
if (0 >= (cnt = read(fd, json, len)) || cnt != (ssize_t)len) {
|
606
|
+
rb_raise(rb_eIOError, "failed to read from IO Object.");
|
607
|
+
}
|
608
|
+
json[len] = '\0';
|
609
|
+
#endif
|
610
|
+
#endif
|
611
|
+
} else if (rb_respond_to(input, oj_read_id)) {
|
612
|
+
s = rb_funcall2(input, oj_read_id, 0, 0);
|
613
|
+
len = RSTRING_LEN(s) + 1;
|
614
|
+
if (copts.max_stack < len) {
|
615
|
+
json = ALLOC_N(char, len);
|
616
|
+
} else {
|
617
|
+
json = ALLOCA_N(char, len);
|
618
|
+
}
|
619
|
+
strcpy(json, StringValuePtr(s));
|
620
|
+
} else {
|
621
|
+
rb_raise(rb_eArgError, "saj_parse() expected a String or IO Object.");
|
622
|
+
}
|
623
|
+
}
|
624
|
+
oj_saj_parse(*argv, json);
|
625
|
+
if (copts.max_stack < len) {
|
626
|
+
xfree(json);
|
627
|
+
}
|
628
|
+
return Qnil;
|
629
|
+
}
|
630
|
+
|
546
631
|
// Mimic JSON section
|
547
632
|
|
548
633
|
static VALUE
|
@@ -880,8 +965,16 @@ void Init_oj() {
|
|
880
965
|
rb_define_module_function(Oj, "dump", dump, -1);
|
881
966
|
rb_define_module_function(Oj, "to_file", to_file, -1);
|
882
967
|
|
968
|
+
rb_define_module_function(Oj, "saj_parse", saj_parse, -1);
|
969
|
+
|
970
|
+
oj_add_value_id = rb_intern("add_value");
|
971
|
+
oj_array_end_id = rb_intern("array_end");
|
972
|
+
oj_array_start_id = rb_intern("array_start");
|
883
973
|
oj_as_json_id = rb_intern("as_json");
|
974
|
+
oj_error_id = rb_intern("error");
|
884
975
|
oj_fileno_id = rb_intern("fileno");
|
976
|
+
oj_hash_end_id = rb_intern("hash_end");
|
977
|
+
oj_hash_start_id = rb_intern("hash_start");
|
885
978
|
oj_instance_variables_id = rb_intern("instance_variables");
|
886
979
|
oj_json_create_id = rb_intern("json_create");
|
887
980
|
oj_new_id = rb_intern("new");
|
data/ext/oj/oj.h
CHANGED
@@ -144,6 +144,8 @@ typedef struct _Leaf {
|
|
144
144
|
} *Leaf;
|
145
145
|
|
146
146
|
extern VALUE oj_parse(char *json, Options options);
|
147
|
+
extern void oj_saj_parse(VALUE handler, char *json);
|
148
|
+
|
147
149
|
extern char* oj_write_obj_to_str(VALUE obj, Options copts);
|
148
150
|
extern void oj_write_obj_to_file(VALUE obj, const char *path, Options copts);
|
149
151
|
extern char* oj_write_leaf_to_str(Leaf leaf, Options copts);
|
@@ -164,13 +166,20 @@ extern rb_encoding *oj_utf8_encoding;
|
|
164
166
|
extern VALUE oj_bag_class;
|
165
167
|
extern VALUE oj_bigdecimal_class;
|
166
168
|
extern VALUE oj_doc_class;
|
169
|
+
extern VALUE oj_parse_error_class;
|
167
170
|
extern VALUE oj_stringio_class;
|
168
171
|
extern VALUE oj_struct_class;
|
169
172
|
extern VALUE oj_time_class;
|
170
173
|
|
171
174
|
extern VALUE oj_slash_string;
|
172
175
|
|
176
|
+
extern ID oj_add_value_id;
|
177
|
+
extern ID oj_array_end_id;
|
178
|
+
extern ID oj_array_start_id;
|
173
179
|
extern ID oj_as_json_id;
|
180
|
+
extern ID oj_error_id;
|
181
|
+
extern ID oj_hash_end_id;
|
182
|
+
extern ID oj_hash_start_id;
|
174
183
|
extern ID oj_instance_variables_id;
|
175
184
|
extern ID oj_json_create_id;
|
176
185
|
extern ID oj_new_id;
|
data/ext/oj/saj.c
ADDED
@@ -0,0 +1,812 @@
|
|
1
|
+
/* saj.c
|
2
|
+
* Copyright (c) 2012, Peter Ohler
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* Redistribution and use in source and binary forms, with or without
|
6
|
+
* modification, are permitted provided that the following conditions are met:
|
7
|
+
*
|
8
|
+
* - Redistributions of source code must retain the above copyright notice, this
|
9
|
+
* list of conditions and the following disclaimer.
|
10
|
+
*
|
11
|
+
* - Redistributions in binary form must reproduce the above copyright notice,
|
12
|
+
* this list of conditions and the following disclaimer in the documentation
|
13
|
+
* and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* - Neither the name of Peter Ohler nor the names of its contributors may be
|
16
|
+
* used to endorse or promote products derived from this software without
|
17
|
+
* specific prior written permission.
|
18
|
+
*
|
19
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
22
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
23
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
24
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
25
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
26
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
27
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
*/
|
30
|
+
|
31
|
+
#if !IS_WINDOWS
|
32
|
+
#include <sys/resource.h> /* for getrlimit() on linux */
|
33
|
+
#endif
|
34
|
+
#include <stdlib.h>
|
35
|
+
#include <stdio.h>
|
36
|
+
#include <string.h>
|
37
|
+
#include <math.h>
|
38
|
+
|
39
|
+
/* Workaround: */
|
40
|
+
#ifndef INFINITY
|
41
|
+
#define INFINITY (1.0/0.0)
|
42
|
+
#endif
|
43
|
+
|
44
|
+
#include "oj.h"
|
45
|
+
|
46
|
+
typedef struct _CX {
|
47
|
+
VALUE *cur;
|
48
|
+
VALUE *end;
|
49
|
+
VALUE stack[1024];
|
50
|
+
} *CX;
|
51
|
+
|
52
|
+
typedef struct _ParseInfo {
|
53
|
+
char *str; /* buffer being read from */
|
54
|
+
char *s; /* current position in buffer */
|
55
|
+
void *stack_min;
|
56
|
+
VALUE handler;
|
57
|
+
int has_hash_start;
|
58
|
+
int has_hash_end;
|
59
|
+
int has_array_start;
|
60
|
+
int has_array_end;
|
61
|
+
int has_add_value;
|
62
|
+
int has_error;
|
63
|
+
} *ParseInfo;
|
64
|
+
|
65
|
+
static void read_next(ParseInfo pi, const char *key);
|
66
|
+
static void read_hash(ParseInfo pi, const char *key);
|
67
|
+
static void read_array(ParseInfo pi, const char *key);
|
68
|
+
static void read_str(ParseInfo pi, const char *key);
|
69
|
+
static void read_num(ParseInfo pi, const char *key);
|
70
|
+
static void read_true(ParseInfo pi, const char *key);
|
71
|
+
static void read_false(ParseInfo pi, const char *key);
|
72
|
+
static void read_nil(ParseInfo pi, const char *key);
|
73
|
+
static void next_non_white(ParseInfo pi);
|
74
|
+
static char* read_quoted_value(ParseInfo pi);
|
75
|
+
static void skip_comment(ParseInfo pi);
|
76
|
+
|
77
|
+
/* This XML parser is a single pass, destructive, callback parser. It is a
|
78
|
+
* single pass parse since it only make one pass over the characters in the
|
79
|
+
* XML document string. It is destructive because it re-uses the content of
|
80
|
+
* the string for values in the callback and places \0 characters at various
|
81
|
+
* places to mark the end of tokens and strings. It is a callback parser like
|
82
|
+
* a SAX parser because it uses callback when document elements are
|
83
|
+
* encountered.
|
84
|
+
*
|
85
|
+
* Parsing is very tolerant. Lack of headers and even mispelled element
|
86
|
+
* endings are passed over without raising an error. A best attempt is made in
|
87
|
+
* all cases to parse the string.
|
88
|
+
*/
|
89
|
+
|
90
|
+
inline static void
|
91
|
+
call_error(const char *msg, ParseInfo pi, const char* file, int line) {
|
92
|
+
char buf[128];
|
93
|
+
const char *s = pi->s;
|
94
|
+
int jline = 1;
|
95
|
+
int col = 1;
|
96
|
+
|
97
|
+
for (; pi->str < s && '\n' != *s; s--) {
|
98
|
+
col++;
|
99
|
+
}
|
100
|
+
for (; pi->str < s; s--) {
|
101
|
+
if ('\n' == *s) {
|
102
|
+
jline++;
|
103
|
+
}
|
104
|
+
}
|
105
|
+
sprintf(buf, "%s at line %d, column %d [%s:%d]", msg, jline, col, file, line);
|
106
|
+
rb_funcall(pi->handler, oj_error_id, 3, rb_str_new2(buf), LONG2NUM(jline), LONG2NUM(col));
|
107
|
+
}
|
108
|
+
|
109
|
+
inline static void
|
110
|
+
next_non_white(ParseInfo pi) {
|
111
|
+
for (; 1; pi->s++) {
|
112
|
+
switch(*pi->s) {
|
113
|
+
case ' ':
|
114
|
+
case '\t':
|
115
|
+
case '\f':
|
116
|
+
case '\n':
|
117
|
+
case '\r':
|
118
|
+
break;
|
119
|
+
case '/':
|
120
|
+
skip_comment(pi);
|
121
|
+
break;
|
122
|
+
default:
|
123
|
+
return;
|
124
|
+
}
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
inline static void
|
129
|
+
next_white(ParseInfo pi) {
|
130
|
+
for (; 1; pi->s++) {
|
131
|
+
switch(*pi->s) {
|
132
|
+
case ' ':
|
133
|
+
case '\t':
|
134
|
+
case '\f':
|
135
|
+
case '\n':
|
136
|
+
case '\r':
|
137
|
+
case '\0':
|
138
|
+
return;
|
139
|
+
default:
|
140
|
+
break;
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
inline static void
|
146
|
+
call_add_value(VALUE handler, VALUE value, const char *key) {
|
147
|
+
VALUE k;
|
148
|
+
|
149
|
+
if (0 == key) {
|
150
|
+
k = Qnil;
|
151
|
+
} else {
|
152
|
+
k = rb_str_new2(key);
|
153
|
+
#if HAS_ENCODING_SUPPORT
|
154
|
+
rb_enc_associate(k, oj_utf8_encoding);
|
155
|
+
#endif
|
156
|
+
}
|
157
|
+
rb_funcall(handler, oj_add_value_id, 2, value, k);
|
158
|
+
}
|
159
|
+
|
160
|
+
inline static void
|
161
|
+
call_no_value(VALUE handler, ID method, const char *key) {
|
162
|
+
VALUE k;
|
163
|
+
|
164
|
+
if (0 == key) {
|
165
|
+
k = Qnil;
|
166
|
+
} else {
|
167
|
+
k = rb_str_new2(key);
|
168
|
+
#if HAS_ENCODING_SUPPORT
|
169
|
+
rb_enc_associate(k, oj_utf8_encoding);
|
170
|
+
#endif
|
171
|
+
}
|
172
|
+
rb_funcall(handler, method, 1, k);
|
173
|
+
}
|
174
|
+
|
175
|
+
static void
|
176
|
+
skip_comment(ParseInfo pi) {
|
177
|
+
pi->s++; /* skip first / */
|
178
|
+
if ('*' == *pi->s) {
|
179
|
+
pi->s++;
|
180
|
+
for (; '\0' != *pi->s; pi->s++) {
|
181
|
+
if ('*' == *pi->s && '/' == *(pi->s + 1)) {
|
182
|
+
pi->s++;
|
183
|
+
return;
|
184
|
+
} else if ('\0' == *pi->s) {
|
185
|
+
if (pi->has_error) {
|
186
|
+
call_error("comment not terminated", pi, __FILE__, __LINE__);
|
187
|
+
} else {
|
188
|
+
raise_error("comment not terminated", pi->str, pi->s);
|
189
|
+
}
|
190
|
+
}
|
191
|
+
}
|
192
|
+
} else if ('/' == *pi->s) {
|
193
|
+
for (; 1; pi->s++) {
|
194
|
+
switch (*pi->s) {
|
195
|
+
case '\n':
|
196
|
+
case '\r':
|
197
|
+
case '\f':
|
198
|
+
case '\0':
|
199
|
+
return;
|
200
|
+
default:
|
201
|
+
break;
|
202
|
+
}
|
203
|
+
}
|
204
|
+
} else {
|
205
|
+
if (pi->has_error) {
|
206
|
+
call_error("invalid comment", pi, __FILE__, __LINE__);
|
207
|
+
} else {
|
208
|
+
raise_error("invalid comment", pi->str, pi->s);
|
209
|
+
}
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
static void
|
214
|
+
read_next(ParseInfo pi, const char *key) {
|
215
|
+
VALUE obj;
|
216
|
+
|
217
|
+
if ((void*)&obj < pi->stack_min) {
|
218
|
+
rb_raise(rb_eSysStackError, "JSON is too deeply nested");
|
219
|
+
}
|
220
|
+
next_non_white(pi); /* skip white space */
|
221
|
+
switch (*pi->s) {
|
222
|
+
case '{':
|
223
|
+
read_hash(pi, key);
|
224
|
+
break;
|
225
|
+
case '[':
|
226
|
+
read_array(pi, key);
|
227
|
+
break;
|
228
|
+
case '"':
|
229
|
+
read_str(pi, key);
|
230
|
+
break;
|
231
|
+
case '+':
|
232
|
+
case '-':
|
233
|
+
case '0':
|
234
|
+
case '1':
|
235
|
+
case '2':
|
236
|
+
case '3':
|
237
|
+
case '4':
|
238
|
+
case '5':
|
239
|
+
case '6':
|
240
|
+
case '7':
|
241
|
+
case '8':
|
242
|
+
case '9':
|
243
|
+
read_num(pi, key);
|
244
|
+
break;
|
245
|
+
case 'I':
|
246
|
+
read_num(pi, key);
|
247
|
+
break;
|
248
|
+
case 't':
|
249
|
+
read_true(pi, key);
|
250
|
+
break;
|
251
|
+
case 'f':
|
252
|
+
read_false(pi, key);
|
253
|
+
break;
|
254
|
+
case 'n':
|
255
|
+
read_nil(pi, key);
|
256
|
+
break;
|
257
|
+
case '\0':
|
258
|
+
return;
|
259
|
+
default:
|
260
|
+
return;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
static void
|
265
|
+
read_hash(ParseInfo pi, const char *key) {
|
266
|
+
const char *ks;
|
267
|
+
|
268
|
+
if (pi->has_hash_start) {
|
269
|
+
call_no_value(pi->handler, oj_hash_start_id, key);
|
270
|
+
}
|
271
|
+
pi->s++;
|
272
|
+
next_non_white(pi);
|
273
|
+
if ('}' == *pi->s) {
|
274
|
+
pi->s++;
|
275
|
+
} else {
|
276
|
+
while (1) {
|
277
|
+
next_non_white(pi);
|
278
|
+
ks = read_quoted_value(pi);
|
279
|
+
next_non_white(pi);
|
280
|
+
if (':' == *pi->s) {
|
281
|
+
pi->s++;
|
282
|
+
} else {
|
283
|
+
if (pi->has_error) {
|
284
|
+
call_error("invalid format, expected :", pi, __FILE__, __LINE__);
|
285
|
+
}
|
286
|
+
raise_error("invalid format, expected :", pi->str, pi->s);
|
287
|
+
}
|
288
|
+
read_next(pi, ks);
|
289
|
+
next_non_white(pi);
|
290
|
+
if ('}' == *pi->s) {
|
291
|
+
pi->s++;
|
292
|
+
break;
|
293
|
+
} else if (',' == *pi->s) {
|
294
|
+
pi->s++;
|
295
|
+
} else {
|
296
|
+
if (pi->has_error) {
|
297
|
+
call_error("invalid format, expected , or } while in an object", pi, __FILE__, __LINE__);
|
298
|
+
}
|
299
|
+
raise_error("invalid format, expected , or } while in an object", pi->str, pi->s);
|
300
|
+
}
|
301
|
+
}
|
302
|
+
}
|
303
|
+
if (pi->has_hash_end) {
|
304
|
+
call_no_value(pi->handler, oj_hash_end_id, key);
|
305
|
+
}
|
306
|
+
}
|
307
|
+
|
308
|
+
static void
|
309
|
+
read_array(ParseInfo pi, const char *key) {
|
310
|
+
if (pi->has_array_start) {
|
311
|
+
call_no_value(pi->handler, oj_array_start_id, key);
|
312
|
+
}
|
313
|
+
pi->s++;
|
314
|
+
next_non_white(pi);
|
315
|
+
if (']' == *pi->s) {
|
316
|
+
pi->s++;
|
317
|
+
} else {
|
318
|
+
while (1) {
|
319
|
+
read_next(pi, 0);
|
320
|
+
next_non_white(pi);
|
321
|
+
if (',' == *pi->s) {
|
322
|
+
pi->s++;
|
323
|
+
} else if (']' == *pi->s) {
|
324
|
+
pi->s++;
|
325
|
+
break;
|
326
|
+
} else {
|
327
|
+
if (pi->has_error) {
|
328
|
+
call_error("invalid format, expected , or ] while in an array", pi, __FILE__, __LINE__);
|
329
|
+
}
|
330
|
+
raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s);
|
331
|
+
}
|
332
|
+
}
|
333
|
+
}
|
334
|
+
if (pi->has_array_end) {
|
335
|
+
call_no_value(pi->handler, oj_array_end_id, key);
|
336
|
+
}
|
337
|
+
}
|
338
|
+
|
339
|
+
static void
|
340
|
+
read_str(ParseInfo pi, const char *key) {
|
341
|
+
char *text;
|
342
|
+
|
343
|
+
text = read_quoted_value(pi);
|
344
|
+
if (pi->has_add_value) {
|
345
|
+
VALUE s = rb_str_new2(text);
|
346
|
+
|
347
|
+
#if HAS_ENCODING_SUPPORT
|
348
|
+
rb_enc_associate(s, oj_utf8_encoding);
|
349
|
+
#endif
|
350
|
+
call_add_value(pi->handler, s, key);
|
351
|
+
}
|
352
|
+
}
|
353
|
+
|
354
|
+
#ifdef RUBINIUS_RUBY
|
355
|
+
#define NUM_MAX 0x07FFFFFF
|
356
|
+
#else
|
357
|
+
#define NUM_MAX (FIXNUM_MAX >> 8)
|
358
|
+
#endif
|
359
|
+
|
360
|
+
static void
|
361
|
+
read_num(ParseInfo pi, const char *key) {
|
362
|
+
char *start = pi->s;
|
363
|
+
int64_t n = 0;
|
364
|
+
long a = 0;
|
365
|
+
long div = 1;
|
366
|
+
long e = 0;
|
367
|
+
int neg = 0;
|
368
|
+
int eneg = 0;
|
369
|
+
int big = 0;
|
370
|
+
|
371
|
+
if ('-' == *pi->s) {
|
372
|
+
pi->s++;
|
373
|
+
neg = 1;
|
374
|
+
} else if ('+' == *pi->s) {
|
375
|
+
pi->s++;
|
376
|
+
}
|
377
|
+
if ('I' == *pi->s) {
|
378
|
+
if (0 != strncmp("Infinity", pi->s, 8)) {
|
379
|
+
if (pi->has_error) {
|
380
|
+
call_error("number or other value", pi, __FILE__, __LINE__);
|
381
|
+
}
|
382
|
+
raise_error("number or other value", pi->str, pi->s);
|
383
|
+
}
|
384
|
+
pi->s += 8;
|
385
|
+
if (neg) {
|
386
|
+
if (pi->has_add_value) {
|
387
|
+
call_add_value(pi->handler, rb_float_new(-INFINITY), key);
|
388
|
+
}
|
389
|
+
} else {
|
390
|
+
if (pi->has_add_value) {
|
391
|
+
call_add_value(pi->handler, rb_float_new(INFINITY), key);
|
392
|
+
}
|
393
|
+
}
|
394
|
+
return;
|
395
|
+
}
|
396
|
+
for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
|
397
|
+
if (big) {
|
398
|
+
big++;
|
399
|
+
} else {
|
400
|
+
n = n * 10 + (*pi->s - '0');
|
401
|
+
if (NUM_MAX <= n) {
|
402
|
+
big = 1;
|
403
|
+
}
|
404
|
+
}
|
405
|
+
}
|
406
|
+
if ('.' == *pi->s) {
|
407
|
+
pi->s++;
|
408
|
+
for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
|
409
|
+
a = a * 10 + (*pi->s - '0');
|
410
|
+
div *= 10;
|
411
|
+
if (NUM_MAX <= div) {
|
412
|
+
big = 1;
|
413
|
+
}
|
414
|
+
}
|
415
|
+
}
|
416
|
+
if ('e' == *pi->s || 'E' == *pi->s) {
|
417
|
+
pi->s++;
|
418
|
+
if ('-' == *pi->s) {
|
419
|
+
pi->s++;
|
420
|
+
eneg = 1;
|
421
|
+
} else if ('+' == *pi->s) {
|
422
|
+
pi->s++;
|
423
|
+
}
|
424
|
+
for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
|
425
|
+
e = e * 10 + (*pi->s - '0');
|
426
|
+
if (NUM_MAX <= e) {
|
427
|
+
big = 1;
|
428
|
+
}
|
429
|
+
}
|
430
|
+
}
|
431
|
+
if (0 == e && 0 == a && 1 == div) {
|
432
|
+
if (big) {
|
433
|
+
char c = *pi->s;
|
434
|
+
|
435
|
+
*pi->s = '\0';
|
436
|
+
if (pi->has_add_value) {
|
437
|
+
call_add_value(pi->handler, rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new2(start)), key);
|
438
|
+
}
|
439
|
+
*pi->s = c;
|
440
|
+
} else {
|
441
|
+
if (neg) {
|
442
|
+
n = -n;
|
443
|
+
}
|
444
|
+
if (pi->has_add_value) {
|
445
|
+
call_add_value(pi->handler, LONG2NUM(n), key);
|
446
|
+
}
|
447
|
+
}
|
448
|
+
return;
|
449
|
+
} else { /* decimal */
|
450
|
+
if (big) {
|
451
|
+
char c = *pi->s;
|
452
|
+
|
453
|
+
*pi->s = '\0';
|
454
|
+
if (pi->has_add_value) {
|
455
|
+
call_add_value(pi->handler, rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new2(start)), key);
|
456
|
+
}
|
457
|
+
*pi->s = c;
|
458
|
+
} else {
|
459
|
+
double d = (double)n + (double)a / (double)div;
|
460
|
+
|
461
|
+
if (neg) {
|
462
|
+
d = -d;
|
463
|
+
}
|
464
|
+
if (1 < big) {
|
465
|
+
e += big - 1;
|
466
|
+
}
|
467
|
+
if (0 != e) {
|
468
|
+
if (eneg) {
|
469
|
+
e = -e;
|
470
|
+
}
|
471
|
+
d *= pow(10.0, e);
|
472
|
+
}
|
473
|
+
if (pi->has_add_value) {
|
474
|
+
call_add_value(pi->handler, rb_float_new(d), key);
|
475
|
+
}
|
476
|
+
}
|
477
|
+
}
|
478
|
+
}
|
479
|
+
|
480
|
+
static void
|
481
|
+
read_true(ParseInfo pi, const char *key) {
|
482
|
+
pi->s++;
|
483
|
+
if ('r' != *pi->s || 'u' != *(pi->s + 1) || 'e' != *(pi->s + 2)) {
|
484
|
+
if (pi->has_error) {
|
485
|
+
call_error("invalid format, expected 'true'", pi, __FILE__, __LINE__);
|
486
|
+
}
|
487
|
+
raise_error("invalid format, expected 'true'", pi->str, pi->s);
|
488
|
+
}
|
489
|
+
pi->s += 3;
|
490
|
+
if (pi->has_add_value) {
|
491
|
+
call_add_value(pi->handler, Qtrue, key);
|
492
|
+
}
|
493
|
+
}
|
494
|
+
|
495
|
+
static void
|
496
|
+
read_false(ParseInfo pi, const char *key) {
|
497
|
+
pi->s++;
|
498
|
+
if ('a' != *pi->s || 'l' != *(pi->s + 1) || 's' != *(pi->s + 2) || 'e' != *(pi->s + 3)) {
|
499
|
+
if (pi->has_error) {
|
500
|
+
call_error("invalid format, expected 'false'", pi, __FILE__, __LINE__);
|
501
|
+
}
|
502
|
+
raise_error("invalid format, expected 'false'", pi->str, pi->s);
|
503
|
+
}
|
504
|
+
pi->s += 4;
|
505
|
+
if (pi->has_add_value) {
|
506
|
+
call_add_value(pi->handler, Qfalse, key);
|
507
|
+
}
|
508
|
+
}
|
509
|
+
|
510
|
+
static void
|
511
|
+
read_nil(ParseInfo pi, const char *key) {
|
512
|
+
pi->s++;
|
513
|
+
if ('u' != *pi->s || 'l' != *(pi->s + 1) || 'l' != *(pi->s + 2)) {
|
514
|
+
if (pi->has_error) {
|
515
|
+
call_error("invalid format, expected 'null'", pi, __FILE__, __LINE__);
|
516
|
+
}
|
517
|
+
raise_error("invalid format, expected 'null'", pi->str, pi->s);
|
518
|
+
}
|
519
|
+
pi->s += 3;
|
520
|
+
if (pi->has_add_value) {
|
521
|
+
call_add_value(pi->handler, Qnil, key);
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
static uint32_t
|
526
|
+
read_hex(ParseInfo pi, char *h) {
|
527
|
+
uint32_t b = 0;
|
528
|
+
int i;
|
529
|
+
|
530
|
+
/* TBD this can be made faster with a table */
|
531
|
+
for (i = 0; i < 4; i++, h++) {
|
532
|
+
b = b << 4;
|
533
|
+
if ('0' <= *h && *h <= '9') {
|
534
|
+
b += *h - '0';
|
535
|
+
} else if ('A' <= *h && *h <= 'F') {
|
536
|
+
b += *h - 'A' + 10;
|
537
|
+
} else if ('a' <= *h && *h <= 'f') {
|
538
|
+
b += *h - 'a' + 10;
|
539
|
+
} else {
|
540
|
+
pi->s = h;
|
541
|
+
if (pi->has_error) {
|
542
|
+
call_error("invalid hex character", pi, __FILE__, __LINE__);
|
543
|
+
}
|
544
|
+
raise_error("invalid hex character", pi->str, pi->s);
|
545
|
+
}
|
546
|
+
}
|
547
|
+
return b;
|
548
|
+
}
|
549
|
+
|
550
|
+
static char*
|
551
|
+
unicode_to_chars(ParseInfo pi, char *t, uint32_t code) {
|
552
|
+
if (0x0000007F >= code) {
|
553
|
+
*t = (char)code;
|
554
|
+
} else if (0x000007FF >= code) {
|
555
|
+
*t++ = 0xC0 | (code >> 6);
|
556
|
+
*t = 0x80 | (0x3F & code);
|
557
|
+
} else if (0x0000FFFF >= code) {
|
558
|
+
*t++ = 0xE0 | (code >> 12);
|
559
|
+
*t++ = 0x80 | ((code >> 6) & 0x3F);
|
560
|
+
*t = 0x80 | (0x3F & code);
|
561
|
+
} else if (0x001FFFFF >= code) {
|
562
|
+
*t++ = 0xF0 | (code >> 18);
|
563
|
+
*t++ = 0x80 | ((code >> 12) & 0x3F);
|
564
|
+
*t++ = 0x80 | ((code >> 6) & 0x3F);
|
565
|
+
*t = 0x80 | (0x3F & code);
|
566
|
+
} else if (0x03FFFFFF >= code) {
|
567
|
+
*t++ = 0xF8 | (code >> 24);
|
568
|
+
*t++ = 0x80 | ((code >> 18) & 0x3F);
|
569
|
+
*t++ = 0x80 | ((code >> 12) & 0x3F);
|
570
|
+
*t++ = 0x80 | ((code >> 6) & 0x3F);
|
571
|
+
*t = 0x80 | (0x3F & code);
|
572
|
+
} else if (0x7FFFFFFF >= code) {
|
573
|
+
*t++ = 0xFC | (code >> 30);
|
574
|
+
*t++ = 0x80 | ((code >> 24) & 0x3F);
|
575
|
+
*t++ = 0x80 | ((code >> 18) & 0x3F);
|
576
|
+
*t++ = 0x80 | ((code >> 12) & 0x3F);
|
577
|
+
*t++ = 0x80 | ((code >> 6) & 0x3F);
|
578
|
+
*t = 0x80 | (0x3F & code);
|
579
|
+
} else {
|
580
|
+
if (pi->has_error) {
|
581
|
+
call_error("invalid Unicode", pi, __FILE__, __LINE__);
|
582
|
+
}
|
583
|
+
raise_error("invalid Unicode", pi->str, pi->s);
|
584
|
+
}
|
585
|
+
return t;
|
586
|
+
}
|
587
|
+
|
588
|
+
/* Assume the value starts immediately and goes until the quote character is
|
589
|
+
* reached again. Do not read the character after the terminating quote.
|
590
|
+
*/
|
591
|
+
static char*
|
592
|
+
read_quoted_value(ParseInfo pi) {
|
593
|
+
char *value = 0;
|
594
|
+
char *h = pi->s; /* head */
|
595
|
+
char *t = h; /* tail */
|
596
|
+
uint32_t code;
|
597
|
+
|
598
|
+
h++; /* skip quote character */
|
599
|
+
t++;
|
600
|
+
value = h;
|
601
|
+
for (; '"' != *h; h++, t++) {
|
602
|
+
if ('\0' == *h) {
|
603
|
+
pi->s = h;
|
604
|
+
raise_error("quoted string not terminated", pi->str, pi->s);
|
605
|
+
} else if ('\\' == *h) {
|
606
|
+
h++;
|
607
|
+
switch (*h) {
|
608
|
+
case 'n': *t = '\n'; break;
|
609
|
+
case 'r': *t = '\r'; break;
|
610
|
+
case 't': *t = '\t'; break;
|
611
|
+
case 'f': *t = '\f'; break;
|
612
|
+
case 'b': *t = '\b'; break;
|
613
|
+
case '"': *t = '"'; break;
|
614
|
+
case '/': *t = '/'; break;
|
615
|
+
case '\\': *t = '\\'; break;
|
616
|
+
case 'u':
|
617
|
+
h++;
|
618
|
+
code = read_hex(pi, h);
|
619
|
+
h += 3;
|
620
|
+
if (0x0000D800 <= code && code <= 0x0000DFFF) {
|
621
|
+
uint32_t c1 = (code - 0x0000D800) & 0x000003FF;
|
622
|
+
uint32_t c2;
|
623
|
+
|
624
|
+
h++;
|
625
|
+
if ('\\' != *h || 'u' != *(h + 1)) {
|
626
|
+
pi->s = h;
|
627
|
+
if (pi->has_error) {
|
628
|
+
call_error("invalid escaped character", pi, __FILE__, __LINE__);
|
629
|
+
}
|
630
|
+
raise_error("invalid escaped character", pi->str, pi->s);
|
631
|
+
}
|
632
|
+
h += 2;
|
633
|
+
c2 = read_hex(pi, h);
|
634
|
+
h += 3;
|
635
|
+
c2 = (c2 - 0x0000DC00) & 0x000003FF;
|
636
|
+
code = ((c1 << 10) | c2) + 0x00010000;
|
637
|
+
}
|
638
|
+
t = unicode_to_chars(pi, t, code);
|
639
|
+
break;
|
640
|
+
default:
|
641
|
+
pi->s = h;
|
642
|
+
if (pi->has_error) {
|
643
|
+
call_error("invalid escaped character", pi, __FILE__, __LINE__);
|
644
|
+
}
|
645
|
+
raise_error("invalid escaped character", pi->str, pi->s);
|
646
|
+
break;
|
647
|
+
}
|
648
|
+
} else if (t != h) {
|
649
|
+
*t = *h;
|
650
|
+
}
|
651
|
+
}
|
652
|
+
*t = '\0'; /* terminate value */
|
653
|
+
pi->s = h + 1;
|
654
|
+
|
655
|
+
return value;
|
656
|
+
}
|
657
|
+
|
658
|
+
inline static int
|
659
|
+
respond_to(VALUE obj, ID method) {
|
660
|
+
#ifdef JRUBY_RUBY
|
661
|
+
/* There is a bug in JRuby where rb_respond_to() returns true (1) even if
|
662
|
+
* a method is private. */
|
663
|
+
{
|
664
|
+
VALUE args[1];
|
665
|
+
|
666
|
+
*args = ID2SYM(method);
|
667
|
+
return (Qtrue == rb_funcall2(obj, rb_intern("respond_to?"), 1, args));
|
668
|
+
}
|
669
|
+
#else
|
670
|
+
return rb_respond_to(obj, method);
|
671
|
+
#endif
|
672
|
+
}
|
673
|
+
|
674
|
+
void
|
675
|
+
oj_saj_parse(VALUE handler, char *json) {
|
676
|
+
VALUE obj = Qnil;
|
677
|
+
struct _ParseInfo pi;
|
678
|
+
|
679
|
+
if (0 == json) {
|
680
|
+
if (pi.has_error) {
|
681
|
+
call_error("Invalid arg, xml string can not be null", &pi, __FILE__, __LINE__);
|
682
|
+
}
|
683
|
+
raise_error("Invalid arg, xml string can not be null", json, 0);
|
684
|
+
}
|
685
|
+
/* skip UTF-8 BOM if present */
|
686
|
+
if (0xEF == (uint8_t)*json && 0xBB == (uint8_t)json[1] && 0xBF == (uint8_t)json[2]) {
|
687
|
+
json += 3;
|
688
|
+
}
|
689
|
+
/* initialize parse info */
|
690
|
+
pi.str = json;
|
691
|
+
pi.s = json;
|
692
|
+
#if IS_WINDOWS
|
693
|
+
pi.stack_min = (void*)((char*)&obj - (512 * 1024)); /* assume a 1M stack and give half to ruby */
|
694
|
+
#else
|
695
|
+
{
|
696
|
+
struct rlimit lim;
|
697
|
+
|
698
|
+
if (0 == getrlimit(RLIMIT_STACK, &lim)) {
|
699
|
+
pi.stack_min = (void*)((char*)&obj - (lim.rlim_cur / 4 * 3)); /* let 3/4ths of the stack be used only */
|
700
|
+
} else {
|
701
|
+
pi.stack_min = 0; /* indicates not to check stack limit */
|
702
|
+
}
|
703
|
+
}
|
704
|
+
#endif
|
705
|
+
pi.handler = handler;
|
706
|
+
pi.has_hash_start = respond_to(handler, oj_hash_start_id);
|
707
|
+
pi.has_hash_end = respond_to(handler, oj_hash_end_id);
|
708
|
+
pi.has_array_start = respond_to(handler, oj_array_start_id);
|
709
|
+
pi.has_array_end = respond_to(handler, oj_array_end_id);
|
710
|
+
pi.has_add_value = respond_to(handler, oj_add_value_id);
|
711
|
+
pi.has_error = respond_to(handler, oj_error_id);
|
712
|
+
read_next(&pi, 0);
|
713
|
+
next_non_white(&pi);
|
714
|
+
if ('\0' != *pi.s) {
|
715
|
+
if (pi.has_error) {
|
716
|
+
call_error("invalid format, extra characters", &pi, __FILE__, __LINE__);
|
717
|
+
} else {
|
718
|
+
raise_error("invalid format, extra characters", pi.str, pi.s);
|
719
|
+
}
|
720
|
+
}
|
721
|
+
}
|
722
|
+
|
723
|
+
|
724
|
+
#if 0
|
725
|
+
static void
|
726
|
+
cx_add(CX cx, VALUE obj, const char *key) {
|
727
|
+
if (0 == cx->cur) {
|
728
|
+
cx->cur = cx->stack;
|
729
|
+
*cx->cur = obj;
|
730
|
+
} else {
|
731
|
+
if (0 != key) {
|
732
|
+
VALUE ks = rb_str_new2(key);
|
733
|
+
#if HAS_ENCODING_SUPPORT
|
734
|
+
rb_enc_associate(ks, oj_utf8_encoding);
|
735
|
+
#endif
|
736
|
+
rb_hash_aset(*cx->cur, ks, obj);
|
737
|
+
} else {
|
738
|
+
rb_ary_push(*cx->cur, obj);
|
739
|
+
}
|
740
|
+
}
|
741
|
+
}
|
742
|
+
|
743
|
+
static void
|
744
|
+
cx_push(CX cx, VALUE obj, const char *key) {
|
745
|
+
if (0 == cx->cur) {
|
746
|
+
cx->cur = cx->stack;
|
747
|
+
} else {
|
748
|
+
if (cx->end <= cx->cur) {
|
749
|
+
rb_raise(oj_parse_error_class, "too deeply nested");
|
750
|
+
}
|
751
|
+
cx_add(cx, obj, key);
|
752
|
+
cx->cur++;
|
753
|
+
}
|
754
|
+
*cx->cur = obj;
|
755
|
+
}
|
756
|
+
|
757
|
+
static void
|
758
|
+
hash_start(void *context, const char *key) {
|
759
|
+
cx_push((CX)context, rb_hash_new(), key);
|
760
|
+
}
|
761
|
+
|
762
|
+
static void
|
763
|
+
col_end(void *context, const char *key) {
|
764
|
+
((CX)context)->cur--;
|
765
|
+
}
|
766
|
+
|
767
|
+
static void
|
768
|
+
array_start(void *context, const char *key) {
|
769
|
+
cx_push((CX)context, rb_ary_new(), key);
|
770
|
+
}
|
771
|
+
|
772
|
+
static void
|
773
|
+
add_str(void *context, const char *str, const char *key) {
|
774
|
+
VALUE s;
|
775
|
+
|
776
|
+
s = rb_str_new2(str);
|
777
|
+
#if HAS_ENCODING_SUPPORT
|
778
|
+
rb_enc_associate(s, oj_utf8_encoding);
|
779
|
+
#endif
|
780
|
+
cx_add((CX)context, s, key);
|
781
|
+
}
|
782
|
+
|
783
|
+
static void
|
784
|
+
add_big(void *context, const char *str, const char *key) {
|
785
|
+
cx_add((CX)context, rb_funcall(oj_bigdecimal_class, oj_new_id, 1, rb_str_new2(str)), key);
|
786
|
+
}
|
787
|
+
|
788
|
+
static void
|
789
|
+
add_float(void *context, double num, const char *key) {
|
790
|
+
cx_add((CX)context, rb_float_new(num), key);
|
791
|
+
}
|
792
|
+
|
793
|
+
static void
|
794
|
+
add_fixnum(void *context, int64_t num, const char *key) {
|
795
|
+
cx_add((CX)context, LONG2NUM(num), key);
|
796
|
+
}
|
797
|
+
|
798
|
+
static void
|
799
|
+
add_true(void *context, const char *key) {
|
800
|
+
cx_add((CX)context, Qtrue, key);
|
801
|
+
}
|
802
|
+
|
803
|
+
static void
|
804
|
+
add_false(void *context, const char *key) {
|
805
|
+
cx_add((CX)context, Qfalse, key);
|
806
|
+
}
|
807
|
+
|
808
|
+
static void
|
809
|
+
add_nil(void *context, const char *key) {
|
810
|
+
cx_add((CX)context, Qnil, key);
|
811
|
+
}
|
812
|
+
#endif
|