oj_windows 3.16.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE +21 -0
- data/README.md +164 -0
- data/ext/oj_windows/buf.h +85 -0
- data/ext/oj_windows/cache.c +339 -0
- data/ext/oj_windows/cache.h +22 -0
- data/ext/oj_windows/cache8.c +105 -0
- data/ext/oj_windows/cache8.h +21 -0
- data/ext/oj_windows/circarray.c +64 -0
- data/ext/oj_windows/circarray.h +22 -0
- data/ext/oj_windows/code.c +214 -0
- data/ext/oj_windows/code.h +40 -0
- data/ext/oj_windows/compat.c +239 -0
- data/ext/oj_windows/custom.c +1074 -0
- data/ext/oj_windows/debug.c +126 -0
- data/ext/oj_windows/dump.c +1556 -0
- data/ext/oj_windows/dump.h +110 -0
- data/ext/oj_windows/dump_compat.c +901 -0
- data/ext/oj_windows/dump_leaf.c +162 -0
- data/ext/oj_windows/dump_object.c +710 -0
- data/ext/oj_windows/dump_strict.c +405 -0
- data/ext/oj_windows/encode.h +16 -0
- data/ext/oj_windows/err.c +57 -0
- data/ext/oj_windows/err.h +67 -0
- data/ext/oj_windows/extconf.rb +77 -0
- data/ext/oj_windows/fast.c +1710 -0
- data/ext/oj_windows/intern.c +325 -0
- data/ext/oj_windows/intern.h +22 -0
- data/ext/oj_windows/mem.c +320 -0
- data/ext/oj_windows/mem.h +53 -0
- data/ext/oj_windows/mimic_json.c +919 -0
- data/ext/oj_windows/object.c +726 -0
- data/ext/oj_windows/odd.c +245 -0
- data/ext/oj_windows/odd.h +43 -0
- data/ext/oj_windows/oj.c +2097 -0
- data/ext/oj_windows/oj.h +420 -0
- data/ext/oj_windows/parse.c +1317 -0
- data/ext/oj_windows/parse.h +113 -0
- data/ext/oj_windows/parser.c +1600 -0
- data/ext/oj_windows/parser.h +103 -0
- data/ext/oj_windows/rails.c +1484 -0
- data/ext/oj_windows/rails.h +18 -0
- data/ext/oj_windows/reader.c +222 -0
- data/ext/oj_windows/reader.h +137 -0
- data/ext/oj_windows/resolve.c +80 -0
- data/ext/oj_windows/resolve.h +12 -0
- data/ext/oj_windows/rxclass.c +144 -0
- data/ext/oj_windows/rxclass.h +26 -0
- data/ext/oj_windows/saj.c +675 -0
- data/ext/oj_windows/saj2.c +584 -0
- data/ext/oj_windows/saj2.h +23 -0
- data/ext/oj_windows/scp.c +187 -0
- data/ext/oj_windows/simd.h +47 -0
- data/ext/oj_windows/sparse.c +946 -0
- data/ext/oj_windows/stream_writer.c +329 -0
- data/ext/oj_windows/strict.c +189 -0
- data/ext/oj_windows/string_writer.c +517 -0
- data/ext/oj_windows/trace.c +72 -0
- data/ext/oj_windows/trace.h +55 -0
- data/ext/oj_windows/usual.c +1218 -0
- data/ext/oj_windows/usual.h +69 -0
- data/ext/oj_windows/util.c +136 -0
- data/ext/oj_windows/util.h +20 -0
- data/ext/oj_windows/val_stack.c +101 -0
- data/ext/oj_windows/val_stack.h +151 -0
- data/ext/oj_windows/validate.c +46 -0
- data/ext/oj_windows/wab.c +584 -0
- data/lib/oj/active_support_helper.rb +39 -0
- data/lib/oj/bag.rb +95 -0
- data/lib/oj/easy_hash.rb +52 -0
- data/lib/oj/error.rb +21 -0
- data/lib/oj/json.rb +188 -0
- data/lib/oj/mimic.rb +301 -0
- data/lib/oj/saj.rb +80 -0
- data/lib/oj/schandler.rb +143 -0
- data/lib/oj/state.rb +135 -0
- data/lib/oj/version.rb +4 -0
- data/lib/oj_windows/active_support_helper.rb +39 -0
- data/lib/oj_windows/bag.rb +95 -0
- data/lib/oj_windows/easy_hash.rb +52 -0
- data/lib/oj_windows/error.rb +21 -0
- data/lib/oj_windows/json.rb +188 -0
- data/lib/oj_windows/mimic.rb +301 -0
- data/lib/oj_windows/saj.rb +80 -0
- data/lib/oj_windows/schandler.rb +143 -0
- data/lib/oj_windows/state.rb +135 -0
- data/lib/oj_windows/version.rb +4 -0
- data/lib/oj_windows.rb +15 -0
- data/pages/Advanced.md +38 -0
- data/pages/Compatibility.md +49 -0
- data/pages/Custom.md +37 -0
- data/pages/Encoding.md +61 -0
- data/pages/InstallOptions.md +20 -0
- data/pages/JsonGem.md +60 -0
- data/pages/Modes.md +94 -0
- data/pages/Options.md +339 -0
- data/pages/Parser.md +134 -0
- data/pages/Rails.md +85 -0
- data/pages/Security.md +43 -0
- data/pages/WAB.md +12 -0
- metadata +242 -0
|
@@ -0,0 +1,1317 @@
|
|
|
1
|
+
// Copyright (c) 2013 Peter Ohler. All rights reserved.
|
|
2
|
+
// Licensed under the MIT License. See LICENSE file in the project root for license details.
|
|
3
|
+
|
|
4
|
+
#include "parse.h"
|
|
5
|
+
|
|
6
|
+
#include <math.h>
|
|
7
|
+
#include <ruby/util.h>
|
|
8
|
+
#include <stdio.h>
|
|
9
|
+
#include <stdlib.h>
|
|
10
|
+
#include <string.h>
|
|
11
|
+
#if !IS_WINDOWS
|
|
12
|
+
#include <unistd.h>
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#include "buf.h"
|
|
16
|
+
#include "encode.h"
|
|
17
|
+
#include "mem.h"
|
|
18
|
+
#include "oj.h"
|
|
19
|
+
#include "rxclass.h"
|
|
20
|
+
#include "simd.h"
|
|
21
|
+
#include "val_stack.h"
|
|
22
|
+
|
|
23
|
+
// Workaround in case INFINITY is not defined in math.h or if the OS is CentOS
|
|
24
|
+
#ifdef _MSC_VER
|
|
25
|
+
#define OJ_INFINITY HUGE_VAL
|
|
26
|
+
#ifndef NAN
|
|
27
|
+
#include <float.h>
|
|
28
|
+
#define NAN (DBL_MAX + DBL_MAX - DBL_MAX - DBL_MAX) /* hack to get NaN? NO, just use standard nan */
|
|
29
|
+
/* Actually MSVC has NAN in math.h, but let's use a safe fallback if needed. */
|
|
30
|
+
/* Using division by zero like 0.0/0.0 triggers error. */
|
|
31
|
+
static const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
|
|
32
|
+
#undef NAN
|
|
33
|
+
#define NAN (*(const double *)__nan)
|
|
34
|
+
#endif
|
|
35
|
+
#else
|
|
36
|
+
#define OJ_INFINITY (1.0 / 0.0)
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
// #define EXP_MAX 1023
|
|
40
|
+
#define EXP_MAX 100000
|
|
41
|
+
#define DEC_MAX 15
|
|
42
|
+
|
|
43
|
+
static void next_non_white(ParseInfo pi) {
|
|
44
|
+
for (; 1; pi->cur++) {
|
|
45
|
+
switch (*pi->cur) {
|
|
46
|
+
case ' ':
|
|
47
|
+
case '\t':
|
|
48
|
+
case '\f':
|
|
49
|
+
case '\n':
|
|
50
|
+
case '\r': break;
|
|
51
|
+
default: return;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
static void skip_comment(ParseInfo pi) {
|
|
57
|
+
if ('*' == *pi->cur) {
|
|
58
|
+
pi->cur++;
|
|
59
|
+
for (; pi->cur < pi->end; pi->cur++) {
|
|
60
|
+
if ('*' == *pi->cur && '/' == *(pi->cur + 1)) {
|
|
61
|
+
pi->cur += 2;
|
|
62
|
+
return;
|
|
63
|
+
} else if (pi->end <= pi->cur) {
|
|
64
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "comment not terminated");
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
} else if ('/' == *pi->cur) {
|
|
69
|
+
for (; 1; pi->cur++) {
|
|
70
|
+
switch (*pi->cur) {
|
|
71
|
+
case '\n':
|
|
72
|
+
case '\r':
|
|
73
|
+
case '\f':
|
|
74
|
+
case '\0': return;
|
|
75
|
+
default: break;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
} else {
|
|
79
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid comment format");
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
static void add_value(ParseInfo pi, VALUE rval) {
|
|
84
|
+
Val parent = stack_peek(&pi->stack);
|
|
85
|
+
|
|
86
|
+
if (0 == parent) { // simple add
|
|
87
|
+
pi->add_value(pi, rval);
|
|
88
|
+
} else {
|
|
89
|
+
switch (parent->next) {
|
|
90
|
+
case NEXT_ARRAY_NEW:
|
|
91
|
+
case NEXT_ARRAY_ELEMENT:
|
|
92
|
+
pi->array_append_value(pi, rval);
|
|
93
|
+
parent->next = NEXT_ARRAY_COMMA;
|
|
94
|
+
break;
|
|
95
|
+
case NEXT_HASH_VALUE:
|
|
96
|
+
pi->hash_set_value(pi, parent, rval);
|
|
97
|
+
if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
|
|
98
|
+
OJ_R_FREE((char *)parent->key);
|
|
99
|
+
parent->key = 0;
|
|
100
|
+
}
|
|
101
|
+
parent->next = NEXT_HASH_COMMA;
|
|
102
|
+
break;
|
|
103
|
+
case NEXT_HASH_NEW:
|
|
104
|
+
case NEXT_HASH_KEY:
|
|
105
|
+
case NEXT_HASH_COMMA:
|
|
106
|
+
case NEXT_NONE:
|
|
107
|
+
case NEXT_ARRAY_COMMA:
|
|
108
|
+
case NEXT_HASH_COLON:
|
|
109
|
+
default:
|
|
110
|
+
oj_set_error_at(pi,
|
|
111
|
+
oj_parse_error_class,
|
|
112
|
+
__FILE__,
|
|
113
|
+
__LINE__,
|
|
114
|
+
"expected %s",
|
|
115
|
+
oj_stack_next_string(parent->next));
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
static void read_null(ParseInfo pi) {
|
|
122
|
+
if ('u' == *pi->cur++ && 'l' == *pi->cur++ && 'l' == *pi->cur++) {
|
|
123
|
+
add_value(pi, Qnil);
|
|
124
|
+
} else {
|
|
125
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected null");
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
static void read_true(ParseInfo pi) {
|
|
130
|
+
if ('r' == *pi->cur++ && 'u' == *pi->cur++ && 'e' == *pi->cur++) {
|
|
131
|
+
add_value(pi, Qtrue);
|
|
132
|
+
} else {
|
|
133
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected true");
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
static void read_false(ParseInfo pi) {
|
|
138
|
+
if ('a' == *pi->cur++ && 'l' == *pi->cur++ && 's' == *pi->cur++ && 'e' == *pi->cur++) {
|
|
139
|
+
add_value(pi, Qfalse);
|
|
140
|
+
} else {
|
|
141
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "expected false");
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
static uint32_t read_hex(ParseInfo pi, const char *h) {
|
|
146
|
+
uint32_t b = 0;
|
|
147
|
+
int i;
|
|
148
|
+
|
|
149
|
+
for (i = 0; i < 4; i++, h++) {
|
|
150
|
+
b = b << 4;
|
|
151
|
+
if ('0' <= *h && *h <= '9') {
|
|
152
|
+
b += *h - '0';
|
|
153
|
+
} else if ('A' <= *h && *h <= 'F') {
|
|
154
|
+
b += *h - 'A' + 10;
|
|
155
|
+
} else if ('a' <= *h && *h <= 'f') {
|
|
156
|
+
b += *h - 'a' + 10;
|
|
157
|
+
} else {
|
|
158
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid hex character");
|
|
159
|
+
return 0;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return b;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
static void unicode_to_chars(ParseInfo pi, Buf buf, uint32_t code) {
|
|
166
|
+
if (0x0000007F >= code) {
|
|
167
|
+
buf_append(buf, (char)code);
|
|
168
|
+
} else if (0x000007FF >= code) {
|
|
169
|
+
buf_append(buf, 0xC0 | (code >> 6));
|
|
170
|
+
buf_append(buf, 0x80 | (0x3F & code));
|
|
171
|
+
} else if (0x0000FFFF >= code) {
|
|
172
|
+
buf_append(buf, 0xE0 | (code >> 12));
|
|
173
|
+
buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
|
|
174
|
+
buf_append(buf, 0x80 | (0x3F & code));
|
|
175
|
+
} else if (0x001FFFFF >= code) {
|
|
176
|
+
buf_append(buf, 0xF0 | (code >> 18));
|
|
177
|
+
buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
|
|
178
|
+
buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
|
|
179
|
+
buf_append(buf, 0x80 | (0x3F & code));
|
|
180
|
+
} else if (0x03FFFFFF >= code) {
|
|
181
|
+
buf_append(buf, 0xF8 | (code >> 24));
|
|
182
|
+
buf_append(buf, 0x80 | ((code >> 18) & 0x3F));
|
|
183
|
+
buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
|
|
184
|
+
buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
|
|
185
|
+
buf_append(buf, 0x80 | (0x3F & code));
|
|
186
|
+
} else if (0x7FFFFFFF >= code) {
|
|
187
|
+
buf_append(buf, 0xFC | (code >> 30));
|
|
188
|
+
buf_append(buf, 0x80 | ((code >> 24) & 0x3F));
|
|
189
|
+
buf_append(buf, 0x80 | ((code >> 18) & 0x3F));
|
|
190
|
+
buf_append(buf, 0x80 | ((code >> 12) & 0x3F));
|
|
191
|
+
buf_append(buf, 0x80 | ((code >> 6) & 0x3F));
|
|
192
|
+
buf_append(buf, 0x80 | (0x3F & code));
|
|
193
|
+
} else {
|
|
194
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid Unicode character");
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
static const unsigned char end_of_scan_string[] = {
|
|
199
|
+
// Filled 1 at the positions of '\0', '\\', and '"'
|
|
200
|
+
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
|
|
201
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
202
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
203
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
204
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
205
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
206
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
207
|
+
};
|
|
208
|
+
static inline const char *scan_string_noSIMD(const char *str, const char *end) {
|
|
209
|
+
for (; str < end; str++) {
|
|
210
|
+
if (end_of_scan_string[(unsigned char)*str]) {
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return str;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
#ifdef HAVE_SIMD_SSE4_2
|
|
218
|
+
// Optimized SIMD string scanner using SSE4.2 instructions
|
|
219
|
+
// Uses prefetching and processes multiple chunks in parallel to reduce latency
|
|
220
|
+
static inline const char *scan_string_SSE42(const char *str, const char *end) {
|
|
221
|
+
static const char chars[16] = "\x00\\\"";
|
|
222
|
+
const __m128i terminate = _mm_loadu_si128((const __m128i *)&chars[0]);
|
|
223
|
+
const char *safe_end_64 = end - 64;
|
|
224
|
+
const char *safe_end_16 = end - 16;
|
|
225
|
+
|
|
226
|
+
// Process 64 bytes at a time with parallel SIMD operations
|
|
227
|
+
// This reduces pipeline stalls and improves instruction-level parallelism
|
|
228
|
+
while (str <= safe_end_64) {
|
|
229
|
+
// Prefetch next cache line for better memory throughput
|
|
230
|
+
__builtin_prefetch(str + 64, 0, 0);
|
|
231
|
+
|
|
232
|
+
// Load and compare 4 chunks in parallel
|
|
233
|
+
const __m128i chunk0 = _mm_loadu_si128((const __m128i *)(str));
|
|
234
|
+
const __m128i chunk1 = _mm_loadu_si128((const __m128i *)(str + 16));
|
|
235
|
+
const __m128i chunk2 = _mm_loadu_si128((const __m128i *)(str + 32));
|
|
236
|
+
const __m128i chunk3 = _mm_loadu_si128((const __m128i *)(str + 48));
|
|
237
|
+
|
|
238
|
+
const int r0 = _mm_cmpestri(terminate,
|
|
239
|
+
3,
|
|
240
|
+
chunk0,
|
|
241
|
+
16,
|
|
242
|
+
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
243
|
+
if (__builtin_expect(r0 != 16, 0))
|
|
244
|
+
return str + r0;
|
|
245
|
+
|
|
246
|
+
const int r1 = _mm_cmpestri(terminate,
|
|
247
|
+
3,
|
|
248
|
+
chunk1,
|
|
249
|
+
16,
|
|
250
|
+
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
251
|
+
if (__builtin_expect(r1 != 16, 0))
|
|
252
|
+
return str + 16 + r1;
|
|
253
|
+
|
|
254
|
+
const int r2 = _mm_cmpestri(terminate,
|
|
255
|
+
3,
|
|
256
|
+
chunk2,
|
|
257
|
+
16,
|
|
258
|
+
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
259
|
+
if (__builtin_expect(r2 != 16, 0))
|
|
260
|
+
return str + 32 + r2;
|
|
261
|
+
|
|
262
|
+
const int r3 = _mm_cmpestri(terminate,
|
|
263
|
+
3,
|
|
264
|
+
chunk3,
|
|
265
|
+
16,
|
|
266
|
+
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
267
|
+
if (__builtin_expect(r3 != 16, 0))
|
|
268
|
+
return str + 48 + r3;
|
|
269
|
+
|
|
270
|
+
str += 64;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Handle remaining 16-byte chunks
|
|
274
|
+
for (; str <= safe_end_16; str += 16) {
|
|
275
|
+
const __m128i string = _mm_loadu_si128((const __m128i *)str);
|
|
276
|
+
const int r = _mm_cmpestri(terminate,
|
|
277
|
+
3,
|
|
278
|
+
string,
|
|
279
|
+
16,
|
|
280
|
+
_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT);
|
|
281
|
+
if (r != 16)
|
|
282
|
+
return str + r;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return scan_string_noSIMD(str, end);
|
|
286
|
+
}
|
|
287
|
+
#endif
|
|
288
|
+
|
|
289
|
+
#ifdef HAVE_SIMD_SSE2
|
|
290
|
+
// Optimized SSE2 string scanner (fallback for older x86_64 CPUs)
|
|
291
|
+
// Uses SSE2 instructions with prefetching and parallel processing
|
|
292
|
+
static inline const char *scan_string_SSE2(const char *str, const char *end) {
|
|
293
|
+
const char *safe_end_64 = end - 64;
|
|
294
|
+
const char *safe_end_16 = end - 16;
|
|
295
|
+
|
|
296
|
+
// Create comparison vectors for our three special characters
|
|
297
|
+
const __m128i null_char = _mm_setzero_si128();
|
|
298
|
+
const __m128i backslash = _mm_set1_epi8('\\');
|
|
299
|
+
const __m128i quote = _mm_set1_epi8('"');
|
|
300
|
+
|
|
301
|
+
// Process 64 bytes at a time for better throughput
|
|
302
|
+
while (str <= safe_end_64) {
|
|
303
|
+
__builtin_prefetch(str + 64, 0, 0);
|
|
304
|
+
|
|
305
|
+
// Load 4 chunks
|
|
306
|
+
const __m128i chunk0 = _mm_loadu_si128((const __m128i *)(str));
|
|
307
|
+
const __m128i chunk1 = _mm_loadu_si128((const __m128i *)(str + 16));
|
|
308
|
+
const __m128i chunk2 = _mm_loadu_si128((const __m128i *)(str + 32));
|
|
309
|
+
const __m128i chunk3 = _mm_loadu_si128((const __m128i *)(str + 48));
|
|
310
|
+
|
|
311
|
+
// Compare all chunks (allows CPU to parallelize)
|
|
312
|
+
const __m128i cmp0 = _mm_or_si128(
|
|
313
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk0, null_char), _mm_cmpeq_epi8(chunk0, backslash)),
|
|
314
|
+
_mm_cmpeq_epi8(chunk0, quote));
|
|
315
|
+
const __m128i cmp1 = _mm_or_si128(
|
|
316
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk1, null_char), _mm_cmpeq_epi8(chunk1, backslash)),
|
|
317
|
+
_mm_cmpeq_epi8(chunk1, quote));
|
|
318
|
+
const __m128i cmp2 = _mm_or_si128(
|
|
319
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk2, null_char), _mm_cmpeq_epi8(chunk2, backslash)),
|
|
320
|
+
_mm_cmpeq_epi8(chunk2, quote));
|
|
321
|
+
const __m128i cmp3 = _mm_or_si128(
|
|
322
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk3, null_char), _mm_cmpeq_epi8(chunk3, backslash)),
|
|
323
|
+
_mm_cmpeq_epi8(chunk3, quote));
|
|
324
|
+
|
|
325
|
+
// Convert to masks
|
|
326
|
+
int mask0 = _mm_movemask_epi8(cmp0);
|
|
327
|
+
if (__builtin_expect(mask0 != 0, 0))
|
|
328
|
+
return str + __builtin_ctz(mask0);
|
|
329
|
+
|
|
330
|
+
int mask1 = _mm_movemask_epi8(cmp1);
|
|
331
|
+
if (__builtin_expect(mask1 != 0, 0))
|
|
332
|
+
return str + 16 + __builtin_ctz(mask1);
|
|
333
|
+
|
|
334
|
+
int mask2 = _mm_movemask_epi8(cmp2);
|
|
335
|
+
if (__builtin_expect(mask2 != 0, 0))
|
|
336
|
+
return str + 32 + __builtin_ctz(mask2);
|
|
337
|
+
|
|
338
|
+
int mask3 = _mm_movemask_epi8(cmp3);
|
|
339
|
+
if (__builtin_expect(mask3 != 0, 0))
|
|
340
|
+
return str + 48 + __builtin_ctz(mask3);
|
|
341
|
+
|
|
342
|
+
str += 64;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Handle remaining 16-byte chunks
|
|
346
|
+
for (; str <= safe_end_16; str += 16) {
|
|
347
|
+
const __m128i chunk = _mm_loadu_si128((const __m128i *)str);
|
|
348
|
+
const __m128i matches = _mm_or_si128(
|
|
349
|
+
_mm_or_si128(_mm_cmpeq_epi8(chunk, null_char), _mm_cmpeq_epi8(chunk, backslash)),
|
|
350
|
+
_mm_cmpeq_epi8(chunk, quote));
|
|
351
|
+
int mask = _mm_movemask_epi8(matches);
|
|
352
|
+
if (mask != 0)
|
|
353
|
+
return str + __builtin_ctz(mask);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
return scan_string_noSIMD(str, end);
|
|
357
|
+
}
|
|
358
|
+
#endif
|
|
359
|
+
|
|
360
|
+
static const char *(*scan_func)(const char *str, const char *end) = scan_string_noSIMD;
|
|
361
|
+
|
|
362
|
+
void oj_scanner_init(void) {
|
|
363
|
+
#ifdef HAVE_SIMD_SSE4_2
|
|
364
|
+
scan_func = scan_string_SSE42;
|
|
365
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
366
|
+
scan_func = scan_string_SSE2;
|
|
367
|
+
#endif
|
|
368
|
+
// Note: ARM NEON string scanning would be added here if needed
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// entered at /
|
|
372
|
+
static void read_escaped_str(ParseInfo pi, const char *start) {
|
|
373
|
+
struct _buf buf;
|
|
374
|
+
const char *s;
|
|
375
|
+
int cnt = (int)(pi->cur - start);
|
|
376
|
+
uint32_t code;
|
|
377
|
+
Val parent = stack_peek(&pi->stack);
|
|
378
|
+
|
|
379
|
+
buf_init(&buf);
|
|
380
|
+
buf_append_string(&buf, start, cnt);
|
|
381
|
+
|
|
382
|
+
for (s = pi->cur; '"' != *s;) {
|
|
383
|
+
const char *scanned = scan_func(s, pi->end);
|
|
384
|
+
if (scanned >= pi->end || '\0' == *scanned) {
|
|
385
|
+
// if (scanned >= pi->end) {
|
|
386
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "quoted string not terminated");
|
|
387
|
+
buf_cleanup(&buf);
|
|
388
|
+
return;
|
|
389
|
+
}
|
|
390
|
+
buf_append_string(&buf, s, (size_t)(scanned - s));
|
|
391
|
+
s = scanned;
|
|
392
|
+
|
|
393
|
+
if ('\\' == *s) {
|
|
394
|
+
s++;
|
|
395
|
+
switch (*s) {
|
|
396
|
+
case 'n': buf_append(&buf, '\n'); break;
|
|
397
|
+
case 'r': buf_append(&buf, '\r'); break;
|
|
398
|
+
case 't': buf_append(&buf, '\t'); break;
|
|
399
|
+
case 'f': buf_append(&buf, '\f'); break;
|
|
400
|
+
case 'b': buf_append(&buf, '\b'); break;
|
|
401
|
+
case '"': buf_append(&buf, '"'); break;
|
|
402
|
+
case '/': buf_append(&buf, '/'); break;
|
|
403
|
+
case '\\': buf_append(&buf, '\\'); break;
|
|
404
|
+
case 'u':
|
|
405
|
+
s++;
|
|
406
|
+
if (0 == (code = read_hex(pi, s)) && err_has(&pi->err)) {
|
|
407
|
+
buf_cleanup(&buf);
|
|
408
|
+
return;
|
|
409
|
+
}
|
|
410
|
+
s += 3;
|
|
411
|
+
if (0x0000D800 <= code && code <= 0x0000DFFF) {
|
|
412
|
+
uint32_t c1 = (code - 0x0000D800) & 0x000003FF;
|
|
413
|
+
uint32_t c2;
|
|
414
|
+
|
|
415
|
+
s++;
|
|
416
|
+
if ('\\' != *s || 'u' != *(s + 1)) {
|
|
417
|
+
if (Yes == pi->options.allow_invalid) {
|
|
418
|
+
s--;
|
|
419
|
+
unicode_to_chars(pi, &buf, code);
|
|
420
|
+
break;
|
|
421
|
+
}
|
|
422
|
+
pi->cur = s;
|
|
423
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid escaped character");
|
|
424
|
+
buf_cleanup(&buf);
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
s += 2;
|
|
428
|
+
if (0 == (c2 = read_hex(pi, s)) && err_has(&pi->err)) {
|
|
429
|
+
buf_cleanup(&buf);
|
|
430
|
+
return;
|
|
431
|
+
}
|
|
432
|
+
s += 3;
|
|
433
|
+
c2 = (c2 - 0x0000DC00) & 0x000003FF;
|
|
434
|
+
code = ((c1 << 10) | c2) + 0x00010000;
|
|
435
|
+
}
|
|
436
|
+
unicode_to_chars(pi, &buf, code);
|
|
437
|
+
if (err_has(&pi->err)) {
|
|
438
|
+
buf_cleanup(&buf);
|
|
439
|
+
return;
|
|
440
|
+
}
|
|
441
|
+
break;
|
|
442
|
+
default:
|
|
443
|
+
// The json gem claims this is not an error despite the
|
|
444
|
+
// ECMA-404 indicating it is not valid.
|
|
445
|
+
if (CompatMode == pi->options.mode) {
|
|
446
|
+
buf_append(&buf, *s);
|
|
447
|
+
break;
|
|
448
|
+
}
|
|
449
|
+
pi->cur = s;
|
|
450
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "invalid escaped character");
|
|
451
|
+
buf_cleanup(&buf);
|
|
452
|
+
return;
|
|
453
|
+
}
|
|
454
|
+
s++;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
if (0 == parent) {
|
|
458
|
+
pi->add_cstr(pi, buf.head, buf_len(&buf), start);
|
|
459
|
+
} else {
|
|
460
|
+
switch (parent->next) {
|
|
461
|
+
case NEXT_ARRAY_NEW:
|
|
462
|
+
case NEXT_ARRAY_ELEMENT:
|
|
463
|
+
pi->array_append_cstr(pi, buf.head, buf_len(&buf), start);
|
|
464
|
+
parent->next = NEXT_ARRAY_COMMA;
|
|
465
|
+
break;
|
|
466
|
+
case NEXT_HASH_NEW:
|
|
467
|
+
case NEXT_HASH_KEY:
|
|
468
|
+
if (Qundef == (parent->key_val = pi->hash_key(pi, buf.head, buf_len(&buf)))) {
|
|
469
|
+
parent->klen = buf_len(&buf);
|
|
470
|
+
parent->key = OJ_MALLOC(parent->klen + 1);
|
|
471
|
+
memcpy((char *)parent->key, buf.head, parent->klen);
|
|
472
|
+
*(char *)(parent->key + parent->klen) = '\0';
|
|
473
|
+
} else {
|
|
474
|
+
parent->key = "";
|
|
475
|
+
parent->klen = 0;
|
|
476
|
+
}
|
|
477
|
+
parent->k1 = *start;
|
|
478
|
+
parent->next = NEXT_HASH_COLON;
|
|
479
|
+
break;
|
|
480
|
+
case NEXT_HASH_VALUE:
|
|
481
|
+
pi->hash_set_cstr(pi, parent, buf.head, buf_len(&buf), start);
|
|
482
|
+
if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
|
|
483
|
+
OJ_R_FREE((char *)parent->key);
|
|
484
|
+
parent->key = 0;
|
|
485
|
+
}
|
|
486
|
+
parent->next = NEXT_HASH_COMMA;
|
|
487
|
+
break;
|
|
488
|
+
case NEXT_HASH_COMMA:
|
|
489
|
+
case NEXT_NONE:
|
|
490
|
+
case NEXT_ARRAY_COMMA:
|
|
491
|
+
case NEXT_HASH_COLON:
|
|
492
|
+
default:
|
|
493
|
+
oj_set_error_at(pi,
|
|
494
|
+
oj_parse_error_class,
|
|
495
|
+
__FILE__,
|
|
496
|
+
__LINE__,
|
|
497
|
+
"expected %s, not a string",
|
|
498
|
+
oj_stack_next_string(parent->next));
|
|
499
|
+
break;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
pi->cur = s + 1;
|
|
503
|
+
buf_cleanup(&buf);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
static void read_str(ParseInfo pi) {
|
|
507
|
+
const char *str = pi->cur;
|
|
508
|
+
Val parent = stack_peek(&pi->stack);
|
|
509
|
+
|
|
510
|
+
pi->cur = scan_func(pi->cur, pi->end);
|
|
511
|
+
if (RB_UNLIKELY(pi->end <= pi->cur)) {
|
|
512
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "quoted string not terminated");
|
|
513
|
+
return;
|
|
514
|
+
}
|
|
515
|
+
if (RB_UNLIKELY('\0' == *pi->cur)) {
|
|
516
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "NULL byte in string");
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
if ('\\' == *pi->cur) {
|
|
520
|
+
read_escaped_str(pi, str);
|
|
521
|
+
return;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
if (0 == parent) { // simple add
|
|
525
|
+
pi->add_cstr(pi, str, pi->cur - str, str);
|
|
526
|
+
} else {
|
|
527
|
+
switch (parent->next) {
|
|
528
|
+
case NEXT_ARRAY_NEW:
|
|
529
|
+
case NEXT_ARRAY_ELEMENT:
|
|
530
|
+
pi->array_append_cstr(pi, str, pi->cur - str, str);
|
|
531
|
+
parent->next = NEXT_ARRAY_COMMA;
|
|
532
|
+
break;
|
|
533
|
+
case NEXT_HASH_NEW:
|
|
534
|
+
case NEXT_HASH_KEY:
|
|
535
|
+
if (Qundef == (parent->key_val = pi->hash_key(pi, str, pi->cur - str))) {
|
|
536
|
+
parent->key = str;
|
|
537
|
+
parent->klen = pi->cur - str;
|
|
538
|
+
} else {
|
|
539
|
+
parent->key = "";
|
|
540
|
+
parent->klen = 0;
|
|
541
|
+
}
|
|
542
|
+
parent->k1 = *str;
|
|
543
|
+
parent->next = NEXT_HASH_COLON;
|
|
544
|
+
break;
|
|
545
|
+
case NEXT_HASH_VALUE:
|
|
546
|
+
pi->hash_set_cstr(pi, parent, str, pi->cur - str, str);
|
|
547
|
+
if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
|
|
548
|
+
OJ_R_FREE((char *)parent->key);
|
|
549
|
+
parent->key = 0;
|
|
550
|
+
}
|
|
551
|
+
parent->next = NEXT_HASH_COMMA;
|
|
552
|
+
break;
|
|
553
|
+
case NEXT_HASH_COMMA:
|
|
554
|
+
case NEXT_NONE:
|
|
555
|
+
case NEXT_ARRAY_COMMA:
|
|
556
|
+
case NEXT_HASH_COLON:
|
|
557
|
+
default:
|
|
558
|
+
oj_set_error_at(pi,
|
|
559
|
+
oj_parse_error_class,
|
|
560
|
+
__FILE__,
|
|
561
|
+
__LINE__,
|
|
562
|
+
"expected %s, not a string",
|
|
563
|
+
oj_stack_next_string(parent->next));
|
|
564
|
+
break;
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
pi->cur++; // move past "
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
static void read_num(ParseInfo pi) {
|
|
571
|
+
struct _numInfo ni;
|
|
572
|
+
Val parent = stack_peek(&pi->stack);
|
|
573
|
+
|
|
574
|
+
ni.pi = pi;
|
|
575
|
+
ni.str = pi->cur;
|
|
576
|
+
ni.i = 0;
|
|
577
|
+
ni.num = 0;
|
|
578
|
+
ni.div = 1;
|
|
579
|
+
ni.di = 0;
|
|
580
|
+
ni.len = 0;
|
|
581
|
+
ni.exp = 0;
|
|
582
|
+
ni.big = 0;
|
|
583
|
+
ni.infinity = 0;
|
|
584
|
+
ni.nan = 0;
|
|
585
|
+
ni.neg = 0;
|
|
586
|
+
ni.has_exp = 0;
|
|
587
|
+
if (CompatMode == pi->options.mode) {
|
|
588
|
+
ni.no_big = !pi->options.compat_bigdec;
|
|
589
|
+
ni.bigdec_load = pi->options.compat_bigdec;
|
|
590
|
+
} else {
|
|
591
|
+
ni.no_big = (FloatDec == pi->options.bigdec_load || FastDec == pi->options.bigdec_load ||
|
|
592
|
+
RubyDec == pi->options.bigdec_load);
|
|
593
|
+
ni.bigdec_load = pi->options.bigdec_load;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
if ('-' == *pi->cur) {
|
|
597
|
+
pi->cur++;
|
|
598
|
+
ni.neg = 1;
|
|
599
|
+
} else if ('+' == *pi->cur) {
|
|
600
|
+
if (StrictMode == pi->options.mode) {
|
|
601
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
|
|
602
|
+
return;
|
|
603
|
+
}
|
|
604
|
+
pi->cur++;
|
|
605
|
+
}
|
|
606
|
+
if ('I' == *pi->cur) {
|
|
607
|
+
if (No == pi->options.allow_nan || 0 != strncmp("Infinity", pi->cur, 8)) {
|
|
608
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
|
|
609
|
+
return;
|
|
610
|
+
}
|
|
611
|
+
pi->cur += 8;
|
|
612
|
+
ni.infinity = 1;
|
|
613
|
+
} else if ('N' == *pi->cur || 'n' == *pi->cur) {
|
|
614
|
+
if ('a' != pi->cur[1] || ('N' != pi->cur[2] && 'n' != pi->cur[2])) {
|
|
615
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number or other value");
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
pi->cur += 3;
|
|
619
|
+
ni.nan = 1;
|
|
620
|
+
} else {
|
|
621
|
+
int dec_cnt = 0;
|
|
622
|
+
bool zero1 = false;
|
|
623
|
+
|
|
624
|
+
// Skip leading zeros.
|
|
625
|
+
for (; '0' == *pi->cur; pi->cur++) {
|
|
626
|
+
zero1 = true;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
|
|
630
|
+
int d = (*pi->cur - '0');
|
|
631
|
+
|
|
632
|
+
if (RB_LIKELY(0 != ni.i)) {
|
|
633
|
+
dec_cnt++;
|
|
634
|
+
}
|
|
635
|
+
ni.i = ni.i * 10 + d;
|
|
636
|
+
}
|
|
637
|
+
if (RB_UNLIKELY(0 != ni.i && zero1 && CompatMode == pi->options.mode)) {
|
|
638
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
|
|
639
|
+
return;
|
|
640
|
+
}
|
|
641
|
+
if (INT64_MAX <= ni.i || DEC_MAX < dec_cnt) {
|
|
642
|
+
ni.big = true;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
if ('.' == *pi->cur) {
|
|
646
|
+
pi->cur++;
|
|
647
|
+
// A trailing . is not a valid decimal but if encountered allow it
|
|
648
|
+
// except when mimicking the JSON gem or in strict mode.
|
|
649
|
+
if (StrictMode == pi->options.mode || CompatMode == pi->options.mode) {
|
|
650
|
+
int pos = (int)(pi->cur - ni.str);
|
|
651
|
+
|
|
652
|
+
if (1 == pos || (2 == pos && ni.neg)) {
|
|
653
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
if (*pi->cur < '0' || '9' < *pi->cur) {
|
|
657
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "not a number");
|
|
658
|
+
return;
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
|
|
662
|
+
int d = (*pi->cur - '0');
|
|
663
|
+
|
|
664
|
+
if (RB_LIKELY(0 != ni.num || 0 != ni.i)) {
|
|
665
|
+
dec_cnt++;
|
|
666
|
+
}
|
|
667
|
+
ni.num = ni.num * 10 + d;
|
|
668
|
+
ni.div *= 10;
|
|
669
|
+
ni.di++;
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
if (INT64_MAX <= ni.div || DEC_MAX < dec_cnt) {
|
|
673
|
+
if (!ni.no_big) {
|
|
674
|
+
ni.big = true;
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
if ('e' == *pi->cur || 'E' == *pi->cur) {
|
|
679
|
+
int eneg = 0;
|
|
680
|
+
|
|
681
|
+
ni.has_exp = 1;
|
|
682
|
+
pi->cur++;
|
|
683
|
+
if ('-' == *pi->cur) {
|
|
684
|
+
pi->cur++;
|
|
685
|
+
eneg = 1;
|
|
686
|
+
} else if ('+' == *pi->cur) {
|
|
687
|
+
pi->cur++;
|
|
688
|
+
}
|
|
689
|
+
for (; '0' <= *pi->cur && *pi->cur <= '9'; pi->cur++) {
|
|
690
|
+
ni.exp = ni.exp * 10 + (*pi->cur - '0');
|
|
691
|
+
if (EXP_MAX <= ni.exp) {
|
|
692
|
+
ni.big = true;
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
if (eneg) {
|
|
696
|
+
ni.exp = -ni.exp;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
ni.len = pi->cur - ni.str;
|
|
700
|
+
}
|
|
701
|
+
// Check for special reserved values for Infinity and NaN.
|
|
702
|
+
if (ni.big) {
|
|
703
|
+
if (0 == strcasecmp(INF_VAL, ni.str)) {
|
|
704
|
+
ni.infinity = 1;
|
|
705
|
+
} else if (0 == strcasecmp(NINF_VAL, ni.str)) {
|
|
706
|
+
ni.infinity = 1;
|
|
707
|
+
ni.neg = 1;
|
|
708
|
+
} else if (0 == strcasecmp(NAN_VAL, ni.str)) {
|
|
709
|
+
ni.nan = 1;
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
if (CompatMode == pi->options.mode) {
|
|
713
|
+
if (pi->options.compat_bigdec) {
|
|
714
|
+
ni.big = 1;
|
|
715
|
+
}
|
|
716
|
+
} else if (BigDec == pi->options.bigdec_load) {
|
|
717
|
+
ni.big = 1;
|
|
718
|
+
}
|
|
719
|
+
if (0 == parent) {
|
|
720
|
+
pi->add_num(pi, &ni);
|
|
721
|
+
} else {
|
|
722
|
+
switch (parent->next) {
|
|
723
|
+
case NEXT_ARRAY_NEW:
|
|
724
|
+
case NEXT_ARRAY_ELEMENT:
|
|
725
|
+
pi->array_append_num(pi, &ni);
|
|
726
|
+
parent->next = NEXT_ARRAY_COMMA;
|
|
727
|
+
break;
|
|
728
|
+
case NEXT_HASH_VALUE:
|
|
729
|
+
pi->hash_set_num(pi, parent, &ni);
|
|
730
|
+
if (0 != parent->key && 0 < parent->klen && (parent->key < pi->json || pi->cur < parent->key)) {
|
|
731
|
+
OJ_R_FREE((char *)parent->key);
|
|
732
|
+
parent->key = 0;
|
|
733
|
+
}
|
|
734
|
+
parent->next = NEXT_HASH_COMMA;
|
|
735
|
+
break;
|
|
736
|
+
default:
|
|
737
|
+
oj_set_error_at(pi,
|
|
738
|
+
oj_parse_error_class,
|
|
739
|
+
__FILE__,
|
|
740
|
+
__LINE__,
|
|
741
|
+
"expected %s",
|
|
742
|
+
oj_stack_next_string(parent->next));
|
|
743
|
+
break;
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
static void array_start(ParseInfo pi) {
|
|
749
|
+
VALUE v = pi->start_array(pi);
|
|
750
|
+
|
|
751
|
+
stack_push(&pi->stack, v, NEXT_ARRAY_NEW);
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
static void array_end(ParseInfo pi) {
|
|
755
|
+
Val array = stack_pop(&pi->stack);
|
|
756
|
+
|
|
757
|
+
if (0 == array) {
|
|
758
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected array close");
|
|
759
|
+
} else if (NEXT_ARRAY_COMMA != array->next && NEXT_ARRAY_NEW != array->next) {
|
|
760
|
+
oj_set_error_at(pi,
|
|
761
|
+
oj_parse_error_class,
|
|
762
|
+
__FILE__,
|
|
763
|
+
__LINE__,
|
|
764
|
+
"expected %s, not an array close",
|
|
765
|
+
oj_stack_next_string(array->next));
|
|
766
|
+
} else {
|
|
767
|
+
pi->end_array(pi);
|
|
768
|
+
add_value(pi, array->val);
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
static void hash_start(ParseInfo pi) {
|
|
773
|
+
VALUE v = pi->start_hash(pi);
|
|
774
|
+
|
|
775
|
+
stack_push(&pi->stack, v, NEXT_HASH_NEW);
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
static void hash_end(ParseInfo pi) {
|
|
779
|
+
Val hash = stack_peek(&pi->stack);
|
|
780
|
+
|
|
781
|
+
// leave hash on stack until just before
|
|
782
|
+
if (0 == hash) {
|
|
783
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected hash close");
|
|
784
|
+
} else if (NEXT_HASH_COMMA != hash->next && NEXT_HASH_NEW != hash->next) {
|
|
785
|
+
oj_set_error_at(pi,
|
|
786
|
+
oj_parse_error_class,
|
|
787
|
+
__FILE__,
|
|
788
|
+
__LINE__,
|
|
789
|
+
"expected %s, not a hash close",
|
|
790
|
+
oj_stack_next_string(hash->next));
|
|
791
|
+
} else {
|
|
792
|
+
pi->end_hash(pi);
|
|
793
|
+
stack_pop(&pi->stack);
|
|
794
|
+
add_value(pi, hash->val);
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
static void comma(ParseInfo pi) {
|
|
799
|
+
Val parent = stack_peek(&pi->stack);
|
|
800
|
+
|
|
801
|
+
if (0 == parent) {
|
|
802
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected comma");
|
|
803
|
+
} else if (NEXT_ARRAY_COMMA == parent->next) {
|
|
804
|
+
parent->next = NEXT_ARRAY_ELEMENT;
|
|
805
|
+
} else if (NEXT_HASH_COMMA == parent->next) {
|
|
806
|
+
parent->next = NEXT_HASH_KEY;
|
|
807
|
+
} else {
|
|
808
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected comma");
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
static void colon(ParseInfo pi) {
|
|
813
|
+
Val parent = stack_peek(&pi->stack);
|
|
814
|
+
|
|
815
|
+
if (0 != parent && NEXT_HASH_COLON == parent->next) {
|
|
816
|
+
parent->next = NEXT_HASH_VALUE;
|
|
817
|
+
} else {
|
|
818
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected colon");
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
void oj_parse2(ParseInfo pi) {
|
|
823
|
+
int first = 1;
|
|
824
|
+
long start = 0;
|
|
825
|
+
|
|
826
|
+
pi->cur = pi->json;
|
|
827
|
+
err_init(&pi->err);
|
|
828
|
+
while (1) {
|
|
829
|
+
if (RB_UNLIKELY(0 < pi->max_depth && pi->max_depth <= pi->stack.tail - pi->stack.head - 1)) {
|
|
830
|
+
VALUE err_clas = oj_get_json_err_class("NestingError");
|
|
831
|
+
|
|
832
|
+
oj_set_error_at(pi, err_clas, __FILE__, __LINE__, "Too deeply nested.");
|
|
833
|
+
pi->err_class = err_clas;
|
|
834
|
+
return;
|
|
835
|
+
}
|
|
836
|
+
next_non_white(pi);
|
|
837
|
+
if (first) {
|
|
838
|
+
// If no tokens are consumed (i.e. empty string), throw a parse error
|
|
839
|
+
// this is the behavior of JSON.parse in both Ruby and JS.
|
|
840
|
+
if (RB_UNLIKELY('\0' == *pi->cur && No == pi->options.empty_string)) {
|
|
841
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
|
|
842
|
+
}
|
|
843
|
+
} else {
|
|
844
|
+
if (RB_UNLIKELY('\0' != *pi->cur)) {
|
|
845
|
+
oj_set_error_at(pi,
|
|
846
|
+
oj_parse_error_class,
|
|
847
|
+
__FILE__,
|
|
848
|
+
__LINE__,
|
|
849
|
+
"unexpected characters after the JSON document");
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
switch (*pi->cur++) {
|
|
854
|
+
case '{': hash_start(pi); break;
|
|
855
|
+
case '}': hash_end(pi); break;
|
|
856
|
+
case ':': colon(pi); break;
|
|
857
|
+
case '[': array_start(pi); break;
|
|
858
|
+
case ']': array_end(pi); break;
|
|
859
|
+
case ',': comma(pi); break;
|
|
860
|
+
case '"': read_str(pi); break;
|
|
861
|
+
case '+':
|
|
862
|
+
if (CompatMode == pi->options.mode) {
|
|
863
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
|
|
864
|
+
return;
|
|
865
|
+
}
|
|
866
|
+
pi->cur--;
|
|
867
|
+
read_num(pi);
|
|
868
|
+
break;
|
|
869
|
+
case '-':
|
|
870
|
+
case '0':
|
|
871
|
+
case '1':
|
|
872
|
+
case '2':
|
|
873
|
+
case '3':
|
|
874
|
+
case '4':
|
|
875
|
+
case '5':
|
|
876
|
+
case '6':
|
|
877
|
+
case '7':
|
|
878
|
+
case '8':
|
|
879
|
+
case '9':
|
|
880
|
+
pi->cur--;
|
|
881
|
+
read_num(pi);
|
|
882
|
+
break;
|
|
883
|
+
case 'I':
|
|
884
|
+
case 'N':
|
|
885
|
+
if (Yes == pi->options.allow_nan) {
|
|
886
|
+
pi->cur--;
|
|
887
|
+
read_num(pi);
|
|
888
|
+
} else {
|
|
889
|
+
oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character");
|
|
890
|
+
}
|
|
891
|
+
break;
|
|
892
|
+
case 't': read_true(pi); break;
|
|
893
|
+
case 'f': read_false(pi); break;
|
|
894
|
+
case 'n':
|
|
895
|
+
if ('u' == *pi->cur) {
|
|
896
|
+
read_null(pi);
|
|
897
|
+
} else {
|
|
898
|
+
pi->cur--;
|
|
899
|
+
read_num(pi);
|
|
900
|
+
}
|
|
901
|
+
break;
|
|
902
|
+
case '/':
|
|
903
|
+
skip_comment(pi);
|
|
904
|
+
if (first) {
|
|
905
|
+
continue;
|
|
906
|
+
}
|
|
907
|
+
break;
|
|
908
|
+
case '\0': pi->cur--; return;
|
|
909
|
+
default: oj_set_error_at(pi, oj_parse_error_class, __FILE__, __LINE__, "unexpected character"); return;
|
|
910
|
+
}
|
|
911
|
+
if (RB_UNLIKELY(err_has(&pi->err))) {
|
|
912
|
+
return;
|
|
913
|
+
}
|
|
914
|
+
if (stack_empty(&pi->stack)) {
|
|
915
|
+
if (Qundef != pi->proc) {
|
|
916
|
+
VALUE args[3];
|
|
917
|
+
long len = (pi->cur - pi->json) - start;
|
|
918
|
+
|
|
919
|
+
*args = stack_head_val(&pi->stack);
|
|
920
|
+
args[1] = LONG2NUM(start);
|
|
921
|
+
args[2] = LONG2NUM(len);
|
|
922
|
+
|
|
923
|
+
if (Qnil == pi->proc) {
|
|
924
|
+
rb_yield_values2(3, args);
|
|
925
|
+
} else {
|
|
926
|
+
rb_proc_call_with_block(pi->proc, 3, args, Qnil);
|
|
927
|
+
}
|
|
928
|
+
} else if (!pi->has_callbacks) {
|
|
929
|
+
first = 0;
|
|
930
|
+
}
|
|
931
|
+
start = pi->cur - pi->json;
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
static VALUE rescue_big_decimal(VALUE str, VALUE ignore) {
|
|
937
|
+
rb_raise(oj_parse_error_class, "Invalid value for BigDecimal()");
|
|
938
|
+
return Qnil;
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
static VALUE parse_big_decimal(VALUE str) {
|
|
942
|
+
return rb_funcall(rb_cObject, oj_bigdecimal_id, 1, str);
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
static long double exp_plus[] = {
|
|
946
|
+
1.0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, 1.0e7, 1.0e8, 1.0e9, 1.0e10, 1.0e11, 1.0e12,
|
|
947
|
+
1.0e13, 1.0e14, 1.0e15, 1.0e16, 1.0e17, 1.0e18, 1.0e19, 1.0e20, 1.0e21, 1.0e22, 1.0e23, 1.0e24, 1.0e25,
|
|
948
|
+
1.0e26, 1.0e27, 1.0e28, 1.0e29, 1.0e30, 1.0e31, 1.0e32, 1.0e33, 1.0e34, 1.0e35, 1.0e36, 1.0e37, 1.0e38,
|
|
949
|
+
1.0e39, 1.0e40, 1.0e41, 1.0e42, 1.0e43, 1.0e44, 1.0e45, 1.0e46, 1.0e47, 1.0e48, 1.0e49,
|
|
950
|
+
};
|
|
951
|
+
|
|
952
|
+
VALUE
|
|
953
|
+
oj_num_as_value(NumInfo ni) {
|
|
954
|
+
VALUE rnum = Qnil;
|
|
955
|
+
|
|
956
|
+
if (ni->infinity) {
|
|
957
|
+
if (ni->neg) {
|
|
958
|
+
rnum = rb_float_new(-OJ_INFINITY);
|
|
959
|
+
} else {
|
|
960
|
+
rnum = rb_float_new(OJ_INFINITY);
|
|
961
|
+
}
|
|
962
|
+
} else if (ni->nan) {
|
|
963
|
+
rnum = rb_float_new(NAN);
|
|
964
|
+
} else if (1 == ni->div && 0 == ni->exp && !ni->has_exp) { // fixnum
|
|
965
|
+
if (ni->big) {
|
|
966
|
+
if (256 > ni->len) {
|
|
967
|
+
char buf[256];
|
|
968
|
+
|
|
969
|
+
memcpy(buf, ni->str, ni->len);
|
|
970
|
+
buf[ni->len] = '\0';
|
|
971
|
+
rnum = rb_cstr_to_inum(buf, 10, 0);
|
|
972
|
+
} else {
|
|
973
|
+
char *buf = OJ_R_ALLOC_N(char, ni->len + 1);
|
|
974
|
+
|
|
975
|
+
memcpy(buf, ni->str, ni->len);
|
|
976
|
+
buf[ni->len] = '\0';
|
|
977
|
+
rnum = rb_cstr_to_inum(buf, 10, 0);
|
|
978
|
+
OJ_R_FREE(buf);
|
|
979
|
+
}
|
|
980
|
+
} else {
|
|
981
|
+
if (ni->neg) {
|
|
982
|
+
rnum = rb_ll2inum(-ni->i);
|
|
983
|
+
} else {
|
|
984
|
+
rnum = rb_ll2inum(ni->i);
|
|
985
|
+
}
|
|
986
|
+
}
|
|
987
|
+
} else { // decimal
|
|
988
|
+
if (ni->big) {
|
|
989
|
+
VALUE bd = rb_str_new(ni->str, ni->len);
|
|
990
|
+
|
|
991
|
+
rnum = rb_rescue2(parse_big_decimal, bd, rescue_big_decimal, bd, rb_eException, 0);
|
|
992
|
+
if (ni->no_big) {
|
|
993
|
+
rnum = rb_funcall(rnum, rb_intern("to_f"), 0);
|
|
994
|
+
}
|
|
995
|
+
} else if (FastDec == ni->bigdec_load) {
|
|
996
|
+
long double ld = (long double)ni->i * (long double)ni->div + (long double)ni->num;
|
|
997
|
+
int x = (int)((int64_t)ni->exp - ni->di);
|
|
998
|
+
|
|
999
|
+
if (0 < x) {
|
|
1000
|
+
if (x < (int)(sizeof(exp_plus) / sizeof(*exp_plus))) {
|
|
1001
|
+
ld *= exp_plus[x];
|
|
1002
|
+
} else {
|
|
1003
|
+
ld *= powl(10.0, x);
|
|
1004
|
+
}
|
|
1005
|
+
} else if (x < 0) {
|
|
1006
|
+
if (-x < (int)(sizeof(exp_plus) / sizeof(*exp_plus))) {
|
|
1007
|
+
ld /= exp_plus[-x];
|
|
1008
|
+
} else {
|
|
1009
|
+
ld /= powl(10.0, -x);
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
if (ni->neg) {
|
|
1013
|
+
ld = -ld;
|
|
1014
|
+
}
|
|
1015
|
+
rnum = rb_float_new((double)ld);
|
|
1016
|
+
} else if (RubyDec == ni->bigdec_load) {
|
|
1017
|
+
VALUE sv = rb_str_new(ni->str, ni->len);
|
|
1018
|
+
|
|
1019
|
+
rnum = rb_funcall(sv, rb_intern("to_f"), 0);
|
|
1020
|
+
} else {
|
|
1021
|
+
char *end;
|
|
1022
|
+
double d = strtod(ni->str, &end);
|
|
1023
|
+
|
|
1024
|
+
if ((long)ni->len != (long)(end - ni->str)) {
|
|
1025
|
+
if (Qnil == ni->pi->err_class) {
|
|
1026
|
+
rb_raise(oj_parse_error_class, "Invalid float");
|
|
1027
|
+
} else {
|
|
1028
|
+
rb_raise(ni->pi->err_class, "Invalid float");
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
rnum = rb_float_new(d);
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
return rnum;
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
void oj_set_error_at(ParseInfo pi, VALUE err_clas, const char *file, int line, const char *format, ...) {
|
|
1038
|
+
va_list ap;
|
|
1039
|
+
char msg[256];
|
|
1040
|
+
char *p = msg;
|
|
1041
|
+
char *end = p + sizeof(msg) - 2;
|
|
1042
|
+
char *start;
|
|
1043
|
+
Val vp;
|
|
1044
|
+
int mlen;
|
|
1045
|
+
|
|
1046
|
+
va_start(ap, format);
|
|
1047
|
+
mlen = vsnprintf(msg, sizeof(msg) - 1, format, ap);
|
|
1048
|
+
if (0 < mlen) {
|
|
1049
|
+
if (sizeof(msg) - 2 < (size_t)mlen) {
|
|
1050
|
+
p = end - 2;
|
|
1051
|
+
} else {
|
|
1052
|
+
p += mlen;
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
va_end(ap);
|
|
1056
|
+
pi->err.clas = err_clas;
|
|
1057
|
+
if (p + 3 < end) {
|
|
1058
|
+
*p++ = ' ';
|
|
1059
|
+
*p++ = '(';
|
|
1060
|
+
*p++ = 'a';
|
|
1061
|
+
*p++ = 'f';
|
|
1062
|
+
*p++ = 't';
|
|
1063
|
+
*p++ = 'e';
|
|
1064
|
+
*p++ = 'r';
|
|
1065
|
+
*p++ = ' ';
|
|
1066
|
+
start = p;
|
|
1067
|
+
for (vp = pi->stack.head; vp < pi->stack.tail; vp++) {
|
|
1068
|
+
if (end <= p + 1 + vp->klen) {
|
|
1069
|
+
break;
|
|
1070
|
+
}
|
|
1071
|
+
if (NULL != vp->key) {
|
|
1072
|
+
if (start < p) {
|
|
1073
|
+
*p++ = '.';
|
|
1074
|
+
}
|
|
1075
|
+
memcpy(p, vp->key, vp->klen);
|
|
1076
|
+
p += vp->klen;
|
|
1077
|
+
} else {
|
|
1078
|
+
if (RUBY_T_ARRAY == rb_type(vp->val)) {
|
|
1079
|
+
if (end <= p + 12) {
|
|
1080
|
+
break;
|
|
1081
|
+
}
|
|
1082
|
+
p += snprintf(p, end - p, "[%ld]", RARRAY_LEN(vp->val));
|
|
1083
|
+
}
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
*p++ = ')';
|
|
1087
|
+
}
|
|
1088
|
+
*p = '\0';
|
|
1089
|
+
if (0 == pi->json) {
|
|
1090
|
+
oj_err_set(&pi->err, err_clas, "%s at line %d, column %d [%s:%d]", msg, pi->rd.line, pi->rd.col, file, line);
|
|
1091
|
+
} else {
|
|
1092
|
+
_oj_err_set_with_location(&pi->err, err_clas, msg, pi->json, pi->cur - 1, file, line);
|
|
1093
|
+
}
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
static VALUE protect_parse(VALUE pip) {
|
|
1097
|
+
oj_parse2((ParseInfo)pip);
|
|
1098
|
+
|
|
1099
|
+
return Qnil;
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
extern int oj_utf8_index;
|
|
1103
|
+
|
|
1104
|
+
static void oj_pi_set_input_str(ParseInfo pi, VALUE *inputp) {
|
|
1105
|
+
int idx = RB_ENCODING_GET(*inputp);
|
|
1106
|
+
|
|
1107
|
+
if (oj_utf8_encoding_index != idx) {
|
|
1108
|
+
rb_encoding *enc = rb_enc_from_index(idx);
|
|
1109
|
+
*inputp = rb_str_conv_enc(*inputp, enc, oj_utf8_encoding);
|
|
1110
|
+
}
|
|
1111
|
+
pi->json = RSTRING_PTR(*inputp);
|
|
1112
|
+
pi->end = pi->json + RSTRING_LEN(*inputp);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
VALUE
|
|
1116
|
+
oj_pi_parse(int argc, VALUE *argv, ParseInfo pi, char *json, size_t len, int yieldOk) {
|
|
1117
|
+
char *buf = 0;
|
|
1118
|
+
VALUE input;
|
|
1119
|
+
VALUE wrapped_stack;
|
|
1120
|
+
VALUE result = Qnil;
|
|
1121
|
+
int line = 0;
|
|
1122
|
+
int free_json = 0;
|
|
1123
|
+
|
|
1124
|
+
if (argc < 1) {
|
|
1125
|
+
rb_raise(rb_eArgError, "Wrong number of arguments to parse.");
|
|
1126
|
+
}
|
|
1127
|
+
input = argv[0];
|
|
1128
|
+
if (2 <= argc) {
|
|
1129
|
+
if (T_HASH == rb_type(argv[1])) {
|
|
1130
|
+
oj_parse_options(argv[1], &pi->options);
|
|
1131
|
+
} else if (3 <= argc && T_HASH == rb_type(argv[2])) {
|
|
1132
|
+
oj_parse_options(argv[2], &pi->options);
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
if (yieldOk && rb_block_given_p()) {
|
|
1136
|
+
pi->proc = Qnil;
|
|
1137
|
+
} else {
|
|
1138
|
+
pi->proc = Qundef;
|
|
1139
|
+
}
|
|
1140
|
+
if (0 != json) {
|
|
1141
|
+
pi->json = json;
|
|
1142
|
+
pi->end = json + len;
|
|
1143
|
+
free_json = 1;
|
|
1144
|
+
} else if (T_STRING == rb_type(input)) {
|
|
1145
|
+
if (CompatMode == pi->options.mode) {
|
|
1146
|
+
if (No == pi->options.nilnil && 0 == RSTRING_LEN(input)) {
|
|
1147
|
+
rb_raise(oj_json_parser_error_class, "An empty string is not a valid JSON string.");
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
oj_pi_set_input_str(pi, &input);
|
|
1151
|
+
} else if (Qnil == input) {
|
|
1152
|
+
if (Yes == pi->options.nilnil) {
|
|
1153
|
+
return Qnil;
|
|
1154
|
+
} else {
|
|
1155
|
+
rb_raise(rb_eTypeError, "Nil is not a valid JSON source.");
|
|
1156
|
+
}
|
|
1157
|
+
} else {
|
|
1158
|
+
VALUE clas = rb_obj_class(input);
|
|
1159
|
+
VALUE s;
|
|
1160
|
+
|
|
1161
|
+
if (oj_stringio_class == clas) {
|
|
1162
|
+
s = rb_funcall2(input, oj_string_id, 0, 0);
|
|
1163
|
+
oj_pi_set_input_str(pi, &s);
|
|
1164
|
+
#if !IS_WINDOWS
|
|
1165
|
+
} else if (rb_cFile == clas && 0 == FIX2INT(rb_funcall(input, oj_pos_id, 0))) {
|
|
1166
|
+
int fd = FIX2INT(rb_funcall(input, oj_fileno_id, 0));
|
|
1167
|
+
ssize_t cnt;
|
|
1168
|
+
size_t len = lseek(fd, 0, SEEK_END);
|
|
1169
|
+
|
|
1170
|
+
lseek(fd, 0, SEEK_SET);
|
|
1171
|
+
buf = OJ_R_ALLOC_N(char, len + 1);
|
|
1172
|
+
pi->json = buf;
|
|
1173
|
+
pi->end = buf + len;
|
|
1174
|
+
if (0 >= (cnt = read(fd, (char *)pi->json, len)) || cnt != (ssize_t)len) {
|
|
1175
|
+
if (0 != buf) {
|
|
1176
|
+
OJ_R_FREE(buf);
|
|
1177
|
+
}
|
|
1178
|
+
rb_raise(rb_eIOError, "failed to read from IO Object.");
|
|
1179
|
+
}
|
|
1180
|
+
((char *)pi->json)[len] = '\0';
|
|
1181
|
+
/* skip UTF-8 BOM if present */
|
|
1182
|
+
if (0xEF == (uint8_t)*pi->json && 0xBB == (uint8_t)pi->json[1] && 0xBF == (uint8_t)pi->json[2]) {
|
|
1183
|
+
pi->cur += 3;
|
|
1184
|
+
}
|
|
1185
|
+
#endif
|
|
1186
|
+
} else if (rb_respond_to(input, oj_read_id)) {
|
|
1187
|
+
// use stream parser instead
|
|
1188
|
+
return oj_pi_sparse(argc, argv, pi, 0);
|
|
1189
|
+
} else {
|
|
1190
|
+
rb_raise(rb_eArgError, "parse() expected a String or IO Object.");
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
if (Yes == pi->options.circular) {
|
|
1194
|
+
pi->circ_array = oj_circ_array_new();
|
|
1195
|
+
} else {
|
|
1196
|
+
pi->circ_array = 0;
|
|
1197
|
+
}
|
|
1198
|
+
if (No == pi->options.allow_gc) {
|
|
1199
|
+
rb_gc_disable();
|
|
1200
|
+
}
|
|
1201
|
+
// GC can run at any time. When it runs any Object created by C will be
|
|
1202
|
+
// freed. We protect against this by wrapping the value stack in a ruby
|
|
1203
|
+
// data object and poviding a mark function for ruby objects on the
|
|
1204
|
+
// value stack (while it is in scope).
|
|
1205
|
+
wrapped_stack = oj_stack_init(&pi->stack);
|
|
1206
|
+
rb_protect(protect_parse, (VALUE)pi, &line);
|
|
1207
|
+
if (Qundef == pi->stack.head->val && !empty_ok(&pi->options)) {
|
|
1208
|
+
if (No == pi->options.nilnil || (CompatMode == pi->options.mode && 0 < pi->cur - pi->json)) {
|
|
1209
|
+
oj_set_error_at(pi, oj_json_parser_error_class, __FILE__, __LINE__, "Empty input");
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
result = stack_head_val(&pi->stack);
|
|
1213
|
+
DATA_PTR(wrapped_stack) = 0;
|
|
1214
|
+
if (No == pi->options.allow_gc) {
|
|
1215
|
+
rb_gc_enable();
|
|
1216
|
+
}
|
|
1217
|
+
if (!err_has(&pi->err)) {
|
|
1218
|
+
// If the stack is not empty then the JSON terminated early.
|
|
1219
|
+
Val v;
|
|
1220
|
+
VALUE err_class = oj_parse_error_class;
|
|
1221
|
+
|
|
1222
|
+
if (0 != line) {
|
|
1223
|
+
VALUE ec = rb_obj_class(rb_errinfo());
|
|
1224
|
+
|
|
1225
|
+
if (rb_eArgError != ec && 0 != ec) {
|
|
1226
|
+
err_class = ec;
|
|
1227
|
+
}
|
|
1228
|
+
if (rb_eIOError != ec) {
|
|
1229
|
+
goto CLEANUP;
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
if (NULL != (v = stack_peek(&pi->stack))) {
|
|
1233
|
+
switch (v->next) {
|
|
1234
|
+
case NEXT_ARRAY_NEW:
|
|
1235
|
+
case NEXT_ARRAY_ELEMENT:
|
|
1236
|
+
case NEXT_ARRAY_COMMA: oj_set_error_at(pi, err_class, __FILE__, __LINE__, "Array not terminated"); break;
|
|
1237
|
+
case NEXT_HASH_NEW:
|
|
1238
|
+
case NEXT_HASH_KEY:
|
|
1239
|
+
case NEXT_HASH_COLON:
|
|
1240
|
+
case NEXT_HASH_VALUE:
|
|
1241
|
+
case NEXT_HASH_COMMA:
|
|
1242
|
+
oj_set_error_at(pi, err_class, __FILE__, __LINE__, "Hash/Object not terminated");
|
|
1243
|
+
break;
|
|
1244
|
+
default: oj_set_error_at(pi, err_class, __FILE__, __LINE__, "not terminated");
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
CLEANUP:
|
|
1249
|
+
// proceed with cleanup
|
|
1250
|
+
if (0 != pi->circ_array) {
|
|
1251
|
+
oj_circ_array_free(pi->circ_array);
|
|
1252
|
+
}
|
|
1253
|
+
if (0 != buf) {
|
|
1254
|
+
OJ_R_FREE(buf);
|
|
1255
|
+
} else if (free_json) {
|
|
1256
|
+
OJ_R_FREE(json);
|
|
1257
|
+
}
|
|
1258
|
+
stack_cleanup(&pi->stack);
|
|
1259
|
+
if (pi->str_rx.head != oj_default_options.str_rx.head) {
|
|
1260
|
+
oj_rxclass_cleanup(&pi->str_rx);
|
|
1261
|
+
}
|
|
1262
|
+
if (err_has(&pi->err)) {
|
|
1263
|
+
rb_set_errinfo(Qnil);
|
|
1264
|
+
if (Qnil != pi->err_class) {
|
|
1265
|
+
pi->err.clas = pi->err_class;
|
|
1266
|
+
}
|
|
1267
|
+
if ((CompatMode == pi->options.mode || RailsMode == pi->options.mode) && Yes != pi->options.safe) {
|
|
1268
|
+
// The json gem requires the error message be UTF-8 encoded. In
|
|
1269
|
+
// additional the complete JSON source must be returned. There
|
|
1270
|
+
// does not seem to be a size limit.
|
|
1271
|
+
VALUE msg = rb_utf8_str_new_cstr(pi->err.msg);
|
|
1272
|
+
VALUE args[1];
|
|
1273
|
+
|
|
1274
|
+
if (NULL != pi->json) {
|
|
1275
|
+
msg = rb_str_append(msg, rb_utf8_str_new_cstr(" in '"));
|
|
1276
|
+
msg = rb_str_append(msg, rb_utf8_str_new_cstr(pi->json));
|
|
1277
|
+
}
|
|
1278
|
+
args[0] = msg;
|
|
1279
|
+
if (pi->err.clas == oj_parse_error_class) {
|
|
1280
|
+
// The error was an Oj::ParseError so change to a JSON::ParserError.
|
|
1281
|
+
pi->err.clas = oj_json_parser_error_class;
|
|
1282
|
+
}
|
|
1283
|
+
rb_exc_raise(rb_class_new_instance(1, args, pi->err.clas));
|
|
1284
|
+
} else {
|
|
1285
|
+
oj_err_raise(&pi->err);
|
|
1286
|
+
}
|
|
1287
|
+
} else if (0 != line) {
|
|
1288
|
+
rb_jump_tag(line);
|
|
1289
|
+
}
|
|
1290
|
+
if (pi->options.quirks_mode == No) {
|
|
1291
|
+
switch (rb_type(result)) {
|
|
1292
|
+
case T_NIL:
|
|
1293
|
+
case T_TRUE:
|
|
1294
|
+
case T_FALSE:
|
|
1295
|
+
case T_FIXNUM:
|
|
1296
|
+
case T_FLOAT:
|
|
1297
|
+
case T_CLASS:
|
|
1298
|
+
case T_STRING:
|
|
1299
|
+
case T_SYMBOL: {
|
|
1300
|
+
struct _err err;
|
|
1301
|
+
|
|
1302
|
+
if (Qnil == pi->err_class) {
|
|
1303
|
+
err.clas = oj_parse_error_class;
|
|
1304
|
+
} else {
|
|
1305
|
+
err.clas = pi->err_class;
|
|
1306
|
+
}
|
|
1307
|
+
snprintf(err.msg, sizeof(err.msg), "unexpected non-document value");
|
|
1308
|
+
oj_err_raise(&err);
|
|
1309
|
+
break;
|
|
1310
|
+
}
|
|
1311
|
+
default:
|
|
1312
|
+
// okay
|
|
1313
|
+
break;
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
return result;
|
|
1317
|
+
}
|