json-extended 2.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/BSDL +22 -0
- data/CHANGES.md +798 -0
- data/COPYING +56 -0
- data/LEGAL +20 -0
- data/README.md +308 -0
- data/ext/json/ext/fbuffer/fbuffer.h +259 -0
- data/ext/json/ext/generator/extconf.rb +19 -0
- data/ext/json/ext/generator/generator.c +1997 -0
- data/ext/json/ext/json.h +179 -0
- data/ext/json/ext/parser/extconf.rb +53 -0
- data/ext/json/ext/parser/parser.c +2827 -0
- data/ext/json/ext/simd/conf.rb +24 -0
- data/ext/json/ext/simd/simd.h +208 -0
- data/ext/json/ext/vendor/fast_float_parser.h +814 -0
- data/ext/json/ext/vendor/fpconv.c +480 -0
- data/ext/json/ext/vendor/jeaiii-ltoa.h +267 -0
- data/json.gemspec +62 -0
- data/lib/json/add/bigdecimal.rb +58 -0
- data/lib/json/add/complex.rb +51 -0
- data/lib/json/add/core.rb +13 -0
- data/lib/json/add/date.rb +54 -0
- data/lib/json/add/date_time.rb +67 -0
- data/lib/json/add/exception.rb +49 -0
- data/lib/json/add/ostruct.rb +54 -0
- data/lib/json/add/range.rb +54 -0
- data/lib/json/add/rational.rb +49 -0
- data/lib/json/add/regexp.rb +48 -0
- data/lib/json/add/set.rb +48 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/add/struct.rb +52 -0
- data/lib/json/add/symbol.rb +52 -0
- data/lib/json/add/time.rb +52 -0
- data/lib/json/common.rb +1173 -0
- data/lib/json/ext/generator/state.rb +103 -0
- data/lib/json/ext.rb +45 -0
- data/lib/json/generic_object.rb +67 -0
- data/lib/json/truffle_ruby/generator.rb +755 -0
- data/lib/json/version.rb +5 -0
- data/lib/json.rb +689 -0
- metadata +90 -0
|
@@ -0,0 +1,2827 @@
|
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/fast_float_parser.h"
|
|
3
|
+
#include "../simd/simd.h"
|
|
4
|
+
|
|
5
|
+
static VALUE mJSON, eNestingError, eParserError, Encoding_UTF_8;
|
|
6
|
+
static VALUE CNaN, CInfinity, CMinusInfinity, JSON_empty_string;
|
|
7
|
+
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column;
|
|
9
|
+
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_comments,
|
|
11
|
+
sym_allow_control_characters, sym_allow_invalid_escape, sym_symbolize_names,
|
|
12
|
+
sym_freeze, sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
13
|
+
|
|
14
|
+
static int binary_encindex;
|
|
15
|
+
static int utf8_encindex;
|
|
16
|
+
|
|
17
|
+
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
18
|
+
// For TruffleRuby
|
|
19
|
+
static void
|
|
20
|
+
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
21
|
+
{
|
|
22
|
+
long index = 0;
|
|
23
|
+
while (index < count) {
|
|
24
|
+
VALUE name = pairs[index++];
|
|
25
|
+
VALUE value = pairs[index++];
|
|
26
|
+
rb_hash_aset(hash, name, value);
|
|
27
|
+
}
|
|
28
|
+
RB_GC_GUARD(hash);
|
|
29
|
+
}
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
#ifndef HAVE_RB_HASH_NEW_CAPA
|
|
33
|
+
#define rb_hash_new_capa(n) rb_hash_new()
|
|
34
|
+
#endif
|
|
35
|
+
|
|
36
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
37
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
38
|
+
{
|
|
39
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
40
|
+
}
|
|
41
|
+
#endif
|
|
42
|
+
|
|
43
|
+
/* name cache */
|
|
44
|
+
|
|
45
|
+
#include <string.h>
|
|
46
|
+
#include <ctype.h>
|
|
47
|
+
|
|
48
|
+
// Object names are likely to be repeated, and are frozen.
|
|
49
|
+
// As such we can re-use them if we keep a cache of the ones we've seen so far,
|
|
50
|
+
// and save much more expensive lookups into the global fstring table.
|
|
51
|
+
// This cache implementation is deliberately simple, as we're optimizing for compactness,
|
|
52
|
+
// to be able to fit safely on the stack.
|
|
53
|
+
// As such, binary search into a sorted array gives a good tradeoff between compactness and
|
|
54
|
+
// performance.
|
|
55
|
+
#define JSON_RVALUE_CACHE_CAPA 63
|
|
56
|
+
typedef struct rvalue_cache_struct {
|
|
57
|
+
int length;
|
|
58
|
+
VALUE entries[JSON_RVALUE_CACHE_CAPA];
|
|
59
|
+
} rvalue_cache;
|
|
60
|
+
|
|
61
|
+
static void rvalue_cache_mark(rvalue_cache *cache)
|
|
62
|
+
{
|
|
63
|
+
for (int index = 0; index < cache->length; index++) {
|
|
64
|
+
rb_gc_mark_movable(cache->entries[index]);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
static void rvalue_cache_compact(rvalue_cache *cache)
|
|
69
|
+
{
|
|
70
|
+
for (int index = 0; index < cache->length; index++) {
|
|
71
|
+
cache->entries[index] = rb_gc_location(cache->entries[index]);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
static rb_encoding *enc_utf8;
|
|
76
|
+
|
|
77
|
+
#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
|
|
78
|
+
|
|
79
|
+
static inline VALUE build_interned_string(const char *str, const long length)
|
|
80
|
+
{
|
|
81
|
+
# ifdef HAVE_RB_ENC_INTERNED_STR
|
|
82
|
+
return rb_enc_interned_str(str, length, enc_utf8);
|
|
83
|
+
# else
|
|
84
|
+
VALUE rstring = rb_utf8_str_new(str, length);
|
|
85
|
+
return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
|
|
86
|
+
# endif
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
static inline VALUE build_symbol(const char *str, const long length)
|
|
90
|
+
{
|
|
91
|
+
return rb_str_intern(build_interned_string(str, length));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
|
|
95
|
+
{
|
|
96
|
+
MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
|
|
97
|
+
cache->length++;
|
|
98
|
+
cache->entries[index] = rstring;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
#define rstring_cache_memcmp memcmp
|
|
102
|
+
|
|
103
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
104
|
+
#if __has_builtin(__builtin_bswap64)
|
|
105
|
+
#undef rstring_cache_memcmp
|
|
106
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
107
|
+
{
|
|
108
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
109
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
110
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
111
|
+
long i = 0;
|
|
112
|
+
|
|
113
|
+
for (; i + 8 <= length; i += 8) {
|
|
114
|
+
uint64_t a, b;
|
|
115
|
+
memcpy(&a, str + i, 8);
|
|
116
|
+
memcpy(&b, rptr + i, 8);
|
|
117
|
+
if (a != b) {
|
|
118
|
+
a = __builtin_bswap64(a);
|
|
119
|
+
b = __builtin_bswap64(b);
|
|
120
|
+
return (a < b) ? -1 : 1;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
for (; i < length; i++) {
|
|
125
|
+
if (str[i] != rptr[i]) {
|
|
126
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return 0;
|
|
131
|
+
}
|
|
132
|
+
#endif
|
|
133
|
+
#endif
|
|
134
|
+
|
|
135
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
136
|
+
{
|
|
137
|
+
const char *rstring_ptr;
|
|
138
|
+
long rstring_length;
|
|
139
|
+
|
|
140
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
141
|
+
|
|
142
|
+
if (length == rstring_length) {
|
|
143
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
144
|
+
} else {
|
|
145
|
+
return (int)(length - rstring_length);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
150
|
+
{
|
|
151
|
+
int low = 0;
|
|
152
|
+
int high = cache->length - 1;
|
|
153
|
+
|
|
154
|
+
while (low <= high) {
|
|
155
|
+
int mid = (high + low) >> 1;
|
|
156
|
+
VALUE entry = cache->entries[mid];
|
|
157
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
158
|
+
|
|
159
|
+
if (cmp == 0) {
|
|
160
|
+
return entry;
|
|
161
|
+
} else if (cmp > 0) {
|
|
162
|
+
low = mid + 1;
|
|
163
|
+
} else {
|
|
164
|
+
high = mid - 1;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
VALUE rstring = build_interned_string(str, length);
|
|
169
|
+
|
|
170
|
+
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
171
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
172
|
+
}
|
|
173
|
+
return rstring;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
177
|
+
{
|
|
178
|
+
int low = 0;
|
|
179
|
+
int high = cache->length - 1;
|
|
180
|
+
|
|
181
|
+
while (low <= high) {
|
|
182
|
+
int mid = (high + low) >> 1;
|
|
183
|
+
VALUE entry = cache->entries[mid];
|
|
184
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
185
|
+
|
|
186
|
+
if (cmp == 0) {
|
|
187
|
+
return entry;
|
|
188
|
+
} else if (cmp > 0) {
|
|
189
|
+
low = mid + 1;
|
|
190
|
+
} else {
|
|
191
|
+
high = mid - 1;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
VALUE rsymbol = build_symbol(str, length);
|
|
196
|
+
|
|
197
|
+
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
198
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
199
|
+
}
|
|
200
|
+
return rsymbol;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/* rvalue stack */
|
|
204
|
+
|
|
205
|
+
#define RVALUE_STACK_INITIAL_CAPA 128
|
|
206
|
+
|
|
207
|
+
enum rvalue_stack_type {
|
|
208
|
+
RVALUE_STACK_HEAP_ALLOCATED = 0,
|
|
209
|
+
RVALUE_STACK_STACK_ALLOCATED = 1,
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
typedef struct rvalue_stack_struct {
|
|
213
|
+
enum rvalue_stack_type type;
|
|
214
|
+
long capa;
|
|
215
|
+
long head;
|
|
216
|
+
VALUE *ptr;
|
|
217
|
+
} rvalue_stack;
|
|
218
|
+
|
|
219
|
+
static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
|
|
220
|
+
|
|
221
|
+
static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
|
|
222
|
+
{
|
|
223
|
+
long required = stack->capa ? stack->capa * 2 : RVALUE_STACK_INITIAL_CAPA;
|
|
224
|
+
|
|
225
|
+
if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
|
|
226
|
+
stack = rvalue_stack_spill(stack, handle, stack_ref);
|
|
227
|
+
} else {
|
|
228
|
+
JSON_SIZED_REALLOC_N(stack->ptr, VALUE, required, stack->capa);
|
|
229
|
+
stack->capa = required;
|
|
230
|
+
}
|
|
231
|
+
return stack;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
|
|
235
|
+
{
|
|
236
|
+
JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || handle);
|
|
237
|
+
|
|
238
|
+
if (RB_UNLIKELY(stack->head >= stack->capa)) {
|
|
239
|
+
stack = rvalue_stack_grow(stack, handle, stack_ref);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
stack->ptr[stack->head] = value;
|
|
243
|
+
stack->head++;
|
|
244
|
+
|
|
245
|
+
return value;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
|
|
249
|
+
{
|
|
250
|
+
return stack->ptr + (stack->head - count);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
|
|
254
|
+
{
|
|
255
|
+
stack->head -= count;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
static void rvalue_stack_mark(void *ptr)
|
|
259
|
+
{
|
|
260
|
+
rvalue_stack *stack = (rvalue_stack *)ptr;
|
|
261
|
+
long index;
|
|
262
|
+
if (stack && stack->ptr) {
|
|
263
|
+
for (index = 0; index < stack->head; index++) {
|
|
264
|
+
rb_gc_mark_movable(stack->ptr[index]);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
static void rvalue_stack_free_buffer(rvalue_stack *stack)
|
|
270
|
+
{
|
|
271
|
+
JSON_SIZED_FREE_N(stack->ptr, stack->capa);
|
|
272
|
+
stack->ptr = NULL;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
static void rvalue_stack_free(void *ptr)
|
|
276
|
+
{
|
|
277
|
+
rvalue_stack *stack = (rvalue_stack *)ptr;
|
|
278
|
+
if (stack) {
|
|
279
|
+
rvalue_stack_free_buffer(stack);
|
|
280
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
281
|
+
JSON_SIZED_FREE(stack);
|
|
282
|
+
#endif
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
static size_t rvalue_stack_memsize(const void *ptr)
|
|
287
|
+
{
|
|
288
|
+
const rvalue_stack *stack = (const rvalue_stack *)ptr;
|
|
289
|
+
size_t memsize = sizeof(VALUE) * stack->capa;
|
|
290
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
291
|
+
memsize += sizeof(rvalue_stack);
|
|
292
|
+
#endif
|
|
293
|
+
return memsize;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
static void rvalue_stack_compact(void *ptr)
|
|
297
|
+
{
|
|
298
|
+
rvalue_stack *stack = (rvalue_stack *)ptr;
|
|
299
|
+
long index;
|
|
300
|
+
if (stack && stack->ptr) {
|
|
301
|
+
for (index = 0; index < stack->head; index++) {
|
|
302
|
+
stack->ptr[index] = rb_gc_location(stack->ptr[index]);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
|
|
308
|
+
.wrap_struct_name = "JSON::Ext::Parser/rvalue_stack",
|
|
309
|
+
.function = {
|
|
310
|
+
.dmark = rvalue_stack_mark,
|
|
311
|
+
.dfree = rvalue_stack_free,
|
|
312
|
+
.dsize = rvalue_stack_memsize,
|
|
313
|
+
.dcompact = rvalue_stack_compact,
|
|
314
|
+
},
|
|
315
|
+
// We deliberately don't declare rvalue_stack as RUBY_TYPED_WB_PROTECTED
|
|
316
|
+
// because it churns a lot of values so trigering write barriers every time is very costly.
|
|
317
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
|
|
321
|
+
{
|
|
322
|
+
rvalue_stack *stack;
|
|
323
|
+
*handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
|
|
324
|
+
*stack_ref = stack;
|
|
325
|
+
MEMCPY(stack, old_stack, rvalue_stack, 1);
|
|
326
|
+
|
|
327
|
+
stack->capa = old_stack->capa << 1;
|
|
328
|
+
stack->ptr = ALLOC_N(VALUE, stack->capa);
|
|
329
|
+
stack->type = RVALUE_STACK_HEAP_ALLOCATED;
|
|
330
|
+
MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
|
|
331
|
+
return stack;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
static void rvalue_stack_eagerly_release(VALUE handle)
|
|
335
|
+
{
|
|
336
|
+
if (handle) {
|
|
337
|
+
rvalue_stack *stack;
|
|
338
|
+
TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
|
|
339
|
+
#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
340
|
+
rvalue_stack_free_buffer(stack);
|
|
341
|
+
#else
|
|
342
|
+
rvalue_stack_free(stack);
|
|
343
|
+
RTYPEDDATA_DATA(handle) = NULL;
|
|
344
|
+
#endif
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/* frame stack */
|
|
349
|
+
|
|
350
|
+
// Iterative (non-recursive) parsing keeps an explicit stack of the containers
|
|
351
|
+
// currently being built, instead of relying on the C call stack. Each frame
|
|
352
|
+
// only needs enough bookkeeping to close its container: which kind it is, the
|
|
353
|
+
// rvalue_stack position where its children start (so we know how many to pop),
|
|
354
|
+
// and the cursor at its opening brace (used to rewind for duplicate key
|
|
355
|
+
// errors). Frames hold no VALUEs, so this stack needs no GC marking; it reuses
|
|
356
|
+
// the same stack-allocated-with-heap-spill strategy as the rvalue_stack so that
|
|
357
|
+
// it's freed even if parsing raises.
|
|
358
|
+
//
|
|
359
|
+
// The lifecycle helpers below (grow/push/peek/pop/spill/free/eagerly_release
|
|
360
|
+
// and the rb_data_type_t) deliberately mirror their rvalue_stack counterparts
|
|
361
|
+
// -- the element type and the absence of a mark function are the only real
|
|
362
|
+
// differences. Keep the two in sync: a fix to the spill/release or
|
|
363
|
+
// HAVE_RUBY_TYPED_EMBEDDABLE handling in one almost certainly belongs in the
|
|
364
|
+
// other.
|
|
365
|
+
#define JSON_FRAME_STACK_INITIAL_CAPA 32
|
|
366
|
+
|
|
367
|
+
enum json_frame_type {
|
|
368
|
+
JSON_FRAME_ROOT, // == JSON_PHASE_DONE
|
|
369
|
+
JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA
|
|
370
|
+
JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA
|
|
371
|
+
};
|
|
372
|
+
|
|
373
|
+
// Where a frame is within its container's grammar. This is the entirety of the
|
|
374
|
+
// parser's "what to do next" state: json_parse_any dispatches on the top
|
|
375
|
+
// frame's phase and holds no resume state in C locals, so a parse can stop at
|
|
376
|
+
// any value boundary and be resumed purely from the (persistable) frame stack.
|
|
377
|
+
//
|
|
378
|
+
// The first three phases are deliberately equal to the corresponding json_frame_type
|
|
379
|
+
// to simplify the transition of phase in json_value_completed.
|
|
380
|
+
enum json_frame_phase {
|
|
381
|
+
JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed
|
|
382
|
+
JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']'
|
|
383
|
+
JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}'
|
|
384
|
+
JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':')
|
|
385
|
+
JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',')
|
|
386
|
+
JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':'
|
|
387
|
+
};
|
|
388
|
+
|
|
389
|
+
typedef struct json_frame_struct {
|
|
390
|
+
enum json_frame_type type;
|
|
391
|
+
enum json_frame_phase phase;
|
|
392
|
+
long value_stack_head; // rvalue_stack->head when this container opened
|
|
393
|
+
size_t start_offset; // object frames only (the '{'); NULL otherwise
|
|
394
|
+
} json_frame;
|
|
395
|
+
|
|
396
|
+
typedef struct json_frame_stack_struct {
|
|
397
|
+
enum rvalue_stack_type type; // shared with rvalue_stack: is ptr stack- or heap-allocated
|
|
398
|
+
long capa;
|
|
399
|
+
long head;
|
|
400
|
+
json_frame *ptr;
|
|
401
|
+
} json_frame_stack;
|
|
402
|
+
|
|
403
|
+
enum deprecatable_action {
|
|
404
|
+
JSON_DEPRECATED = 0,
|
|
405
|
+
JSON_IGNORE,
|
|
406
|
+
JSON_RAISE,
|
|
407
|
+
};
|
|
408
|
+
|
|
409
|
+
typedef struct JSON_ParserStruct {
|
|
410
|
+
VALUE on_load_proc;
|
|
411
|
+
VALUE decimal_class;
|
|
412
|
+
ID decimal_method_id;
|
|
413
|
+
enum deprecatable_action on_duplicate_key;
|
|
414
|
+
enum deprecatable_action on_comment;
|
|
415
|
+
int max_nesting;
|
|
416
|
+
bool allow_nan;
|
|
417
|
+
bool allow_trailing_comma;
|
|
418
|
+
bool allow_control_characters;
|
|
419
|
+
bool allow_invalid_escape;
|
|
420
|
+
bool symbolize_names;
|
|
421
|
+
bool freeze;
|
|
422
|
+
} JSON_ParserConfig;
|
|
423
|
+
|
|
424
|
+
typedef struct JSON_ParserStateStruct {
|
|
425
|
+
VALUE *value_stack_handle;
|
|
426
|
+
VALUE *frame_stack_handle;
|
|
427
|
+
const char *start;
|
|
428
|
+
const char *cursor;
|
|
429
|
+
const char *end;
|
|
430
|
+
rvalue_stack *value_stack;
|
|
431
|
+
json_frame_stack *frames;
|
|
432
|
+
rvalue_cache name_cache;
|
|
433
|
+
int in_array;
|
|
434
|
+
int current_nesting;
|
|
435
|
+
unsigned int emitted_deprecations;
|
|
436
|
+
VALUE parser;
|
|
437
|
+
} JSON_ParserState;
|
|
438
|
+
|
|
439
|
+
static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref);
|
|
440
|
+
|
|
441
|
+
static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *handle, json_frame_stack **stack_ref)
|
|
442
|
+
{
|
|
443
|
+
long required = stack->capa ? stack->capa * 2 : JSON_FRAME_STACK_INITIAL_CAPA;
|
|
444
|
+
|
|
445
|
+
if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
|
|
446
|
+
stack = json_frame_stack_spill(stack, handle, stack_ref);
|
|
447
|
+
} else {
|
|
448
|
+
JSON_SIZED_REALLOC_N(stack->ptr, json_frame, required, stack->capa);
|
|
449
|
+
stack->capa = required;
|
|
450
|
+
}
|
|
451
|
+
return stack;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame)
|
|
455
|
+
{
|
|
456
|
+
json_frame_stack *stack = state->frames;
|
|
457
|
+
|
|
458
|
+
JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || state->frame_stack_handle);
|
|
459
|
+
|
|
460
|
+
if (RB_UNLIKELY(stack->head >= stack->capa)) {
|
|
461
|
+
stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
json_frame *frame_ptr = &stack->ptr[stack->head++];
|
|
465
|
+
*frame_ptr = frame;
|
|
466
|
+
return frame_ptr;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
static inline json_frame *json_frame_stack_peek(json_frame_stack *stack)
|
|
470
|
+
{
|
|
471
|
+
return &stack->ptr[stack->head - 1];
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
static inline void json_frame_stack_pop(json_frame_stack *stack)
|
|
475
|
+
{
|
|
476
|
+
stack->head--;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
static void json_frame_stack_free_buffer(json_frame_stack *stack)
|
|
480
|
+
{
|
|
481
|
+
JSON_SIZED_FREE_N(stack->ptr, stack->capa);
|
|
482
|
+
stack->ptr = NULL;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
static void json_frame_stack_free(void *ptr)
|
|
486
|
+
{
|
|
487
|
+
json_frame_stack *stack = (json_frame_stack *)ptr;
|
|
488
|
+
if (stack) {
|
|
489
|
+
json_frame_stack_free_buffer(stack);
|
|
490
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
491
|
+
JSON_SIZED_FREE(stack);
|
|
492
|
+
#endif
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
static size_t json_frame_stack_memsize(const void *ptr)
|
|
497
|
+
{
|
|
498
|
+
const json_frame_stack *stack = (const json_frame_stack *)ptr;
|
|
499
|
+
|
|
500
|
+
size_t memsize = sizeof(json_frame) * stack->capa;
|
|
501
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
502
|
+
memsize += sizeof(json_frame_stack);
|
|
503
|
+
#endif
|
|
504
|
+
return memsize;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
static const rb_data_type_t JSON_Parser_frame_stack_type = {
|
|
508
|
+
.wrap_struct_name = "JSON::Ext::Parser/frame_stack",
|
|
509
|
+
.function = {
|
|
510
|
+
.dmark = NULL,
|
|
511
|
+
.dfree = json_frame_stack_free,
|
|
512
|
+
.dsize = json_frame_stack_memsize,
|
|
513
|
+
},
|
|
514
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE,
|
|
515
|
+
};
|
|
516
|
+
|
|
517
|
+
static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref)
|
|
518
|
+
{
|
|
519
|
+
json_frame_stack *stack;
|
|
520
|
+
*handle = TypedData_Make_Struct(0, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
|
|
521
|
+
*stack_ref = stack;
|
|
522
|
+
MEMCPY(stack, old_stack, json_frame_stack, 1);
|
|
523
|
+
|
|
524
|
+
stack->capa = old_stack->capa << 1;
|
|
525
|
+
stack->ptr = ALLOC_N(json_frame, stack->capa);
|
|
526
|
+
stack->type = RVALUE_STACK_HEAP_ALLOCATED;
|
|
527
|
+
MEMCPY(stack->ptr, old_stack->ptr, json_frame, old_stack->head);
|
|
528
|
+
return stack;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
static void json_frame_stack_eagerly_release(VALUE handle)
|
|
532
|
+
{
|
|
533
|
+
if (handle) {
|
|
534
|
+
json_frame_stack *stack;
|
|
535
|
+
TypedData_Get_Struct(handle, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
|
|
536
|
+
#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
537
|
+
json_frame_stack_free_buffer(stack);
|
|
538
|
+
#else
|
|
539
|
+
json_frame_stack_free(stack);
|
|
540
|
+
RTYPEDDATA_DATA(handle) = NULL;
|
|
541
|
+
#endif
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
546
|
+
{
|
|
547
|
+
int len = 1;
|
|
548
|
+
if (ch <= 0x7F) {
|
|
549
|
+
buf[0] = (char) ch;
|
|
550
|
+
} else if (ch <= 0x07FF) {
|
|
551
|
+
buf[0] = (char) ((ch >> 6) | 0xC0);
|
|
552
|
+
buf[1] = (char) ((ch & 0x3F) | 0x80);
|
|
553
|
+
len++;
|
|
554
|
+
} else if (ch <= 0xFFFF) {
|
|
555
|
+
buf[0] = (char) ((ch >> 12) | 0xE0);
|
|
556
|
+
buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
|
|
557
|
+
buf[2] = (char) ((ch & 0x3F) | 0x80);
|
|
558
|
+
len += 2;
|
|
559
|
+
} else if (ch <= 0x1fffff) {
|
|
560
|
+
buf[0] =(char) ((ch >> 18) | 0xF0);
|
|
561
|
+
buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
|
|
562
|
+
buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
|
|
563
|
+
buf[3] =(char) ((ch & 0x3F) | 0x80);
|
|
564
|
+
len += 3;
|
|
565
|
+
} else {
|
|
566
|
+
buf[0] = '?';
|
|
567
|
+
}
|
|
568
|
+
return len;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
572
|
+
return state->end - state->cursor;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
576
|
+
return state->cursor >= state->end;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
static inline char peek(JSON_ParserState *state)
|
|
580
|
+
{
|
|
581
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
582
|
+
return 0;
|
|
583
|
+
}
|
|
584
|
+
return *state->cursor;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
588
|
+
{
|
|
589
|
+
JSON_ASSERT(state->cursor <= state->end);
|
|
590
|
+
|
|
591
|
+
// Redundant but helpful for hardening
|
|
592
|
+
if (RB_UNLIKELY(state->cursor > state->end)) {
|
|
593
|
+
state->cursor = state->end;
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
const char *cursor = state->cursor;
|
|
597
|
+
long column = 0;
|
|
598
|
+
long line = 1;
|
|
599
|
+
|
|
600
|
+
while (cursor >= state->start) {
|
|
601
|
+
if (*cursor-- == '\n') {
|
|
602
|
+
line++;
|
|
603
|
+
break;
|
|
604
|
+
}
|
|
605
|
+
column++;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
while (cursor >= state->start) {
|
|
609
|
+
if (*cursor-- == '\n') {
|
|
610
|
+
line++;
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
*line_out = line;
|
|
614
|
+
*column_out = column;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
static const unsigned int MAX_DEPRECATIONS = 5;
|
|
618
|
+
|
|
619
|
+
static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
620
|
+
{
|
|
621
|
+
long line, column;
|
|
622
|
+
cursor_position(state, &line, &column);
|
|
623
|
+
|
|
624
|
+
VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
|
|
625
|
+
rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
629
|
+
|
|
630
|
+
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state)
|
|
631
|
+
{
|
|
632
|
+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
633
|
+
|
|
634
|
+
const char *ptr = "EOF";
|
|
635
|
+
if (state->cursor && state->cursor < state->end) {
|
|
636
|
+
ptr = state->cursor;
|
|
637
|
+
size_t len = 0;
|
|
638
|
+
while (len < PARSE_ERROR_FRAGMENT_LEN) {
|
|
639
|
+
char ch = ptr[len];
|
|
640
|
+
if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
|
|
641
|
+
break;
|
|
642
|
+
}
|
|
643
|
+
len++;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
if (len) {
|
|
647
|
+
buffer[0] = '\'';
|
|
648
|
+
MEMCPY(buffer + 1, ptr, char, len);
|
|
649
|
+
|
|
650
|
+
while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
|
|
651
|
+
len--;
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
if (buffer[len] >= 0xC0) { // multibyte character start
|
|
655
|
+
len--;
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
buffer[len + 1] = '\'';
|
|
659
|
+
buffer[len + 2] = '\0';
|
|
660
|
+
ptr = (const char *)buffer;
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
return rb_enc_sprintf(enc_utf8, format, ptr);
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, long column, bool eos)
|
|
668
|
+
{
|
|
669
|
+
VALUE exc = rb_exc_new_str(eParserError, message);
|
|
670
|
+
rb_ivar_set(exc, i_at_line, LONG2NUM(line));
|
|
671
|
+
rb_ivar_set(exc, i_at_column, LONG2NUM(column));
|
|
672
|
+
return exc;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state, bool eos)
|
|
676
|
+
{
|
|
677
|
+
if (state->parser) {
|
|
678
|
+
if (eos) {
|
|
679
|
+
// the error will be swallowed by ResumableParser#parse, so no
|
|
680
|
+
// point building a message or backtrace.
|
|
681
|
+
rb_throw_obj(state->parser, state->parser);
|
|
682
|
+
} else {
|
|
683
|
+
// line and columns can't be accurate in resumable
|
|
684
|
+
rb_exc_raise(parse_error_new(state, build_parse_error_message(format, state), 0, 0, eos));
|
|
685
|
+
}
|
|
686
|
+
} else {
|
|
687
|
+
VALUE message = build_parse_error_message(format, state);
|
|
688
|
+
long line, column;
|
|
689
|
+
cursor_position(state, &line, &column);
|
|
690
|
+
rb_str_catf(message, " at line %ld column %ld", line, column);
|
|
691
|
+
rb_exc_raise(parse_error_new(state, message, line, column, eos));
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
NORETURN(static) void raise_eos_error(const char *format, JSON_ParserState *state)
|
|
696
|
+
{
|
|
697
|
+
raise_parse_error(format, state, true);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
NORETURN(static) void raise_syntax_error(const char *format, JSON_ParserState *state)
|
|
701
|
+
{
|
|
702
|
+
raise_parse_error(format, state, false);
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at, bool eos)
|
|
706
|
+
{
|
|
707
|
+
state->cursor = at;
|
|
708
|
+
raise_parse_error(format, state, eos);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
NORETURN(static) void raise_eos_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
712
|
+
{
|
|
713
|
+
raise_parse_error_at(format, state, at, true);
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
NORETURN(static) void raise_syntax_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
717
|
+
{
|
|
718
|
+
raise_parse_error_at(format, state, at, false);
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
/* unicode */
|
|
722
|
+
|
|
723
|
+
static const signed char digit_values[256] = {
|
|
724
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
725
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
726
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
|
|
727
|
+
-1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
|
|
728
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
729
|
+
10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
730
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
731
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
732
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
733
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
734
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
735
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
736
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
737
|
+
-1, -1, -1, -1, -1, -1, -1
|
|
738
|
+
};
|
|
739
|
+
|
|
740
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
741
|
+
{
|
|
742
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
743
|
+
raise_eos_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
747
|
+
|
|
748
|
+
const signed char b0 = digit_values[p[0]];
|
|
749
|
+
const signed char b1 = digit_values[p[1]];
|
|
750
|
+
const signed char b2 = digit_values[p[2]];
|
|
751
|
+
const signed char b3 = digit_values[p[3]];
|
|
752
|
+
|
|
753
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
754
|
+
raise_syntax_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
#define GET_PARSER_CONFIG \
|
|
761
|
+
JSON_ParserConfig *config; \
|
|
762
|
+
TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
|
|
763
|
+
|
|
764
|
+
static const rb_data_type_t JSON_ParserConfig_type;
|
|
765
|
+
|
|
766
|
+
const char *COMMENT_DEPRECATION_MESSAGE = "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`";
|
|
767
|
+
NOINLINE(static) void
|
|
768
|
+
json_eat_comments(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
769
|
+
{
|
|
770
|
+
if (config->on_comment == JSON_RAISE) {
|
|
771
|
+
raise_syntax_error("unexpected token %s", state);
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
const char *start = state->cursor;
|
|
775
|
+
state->cursor++;
|
|
776
|
+
|
|
777
|
+
switch (peek(state)) {
|
|
778
|
+
case '/': {
|
|
779
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
780
|
+
if (!state->cursor) {
|
|
781
|
+
state->cursor = state->end;
|
|
782
|
+
} else {
|
|
783
|
+
state->cursor++;
|
|
784
|
+
}
|
|
785
|
+
break;
|
|
786
|
+
}
|
|
787
|
+
case '*': {
|
|
788
|
+
state->cursor++;
|
|
789
|
+
|
|
790
|
+
while (true) {
|
|
791
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
792
|
+
if (!next_match) {
|
|
793
|
+
raise_eos_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
state->cursor = next_match + 1;
|
|
797
|
+
if (peek(state) == '/') {
|
|
798
|
+
state->cursor++;
|
|
799
|
+
break;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
break;
|
|
803
|
+
}
|
|
804
|
+
default:
|
|
805
|
+
raise_parse_error_at("unexpected token %s", state, start, eos(state));
|
|
806
|
+
break;
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
if (config->on_comment == JSON_DEPRECATED && state->emitted_deprecations < MAX_DEPRECATIONS) {
|
|
810
|
+
state->emitted_deprecations++;
|
|
811
|
+
emit_parse_warning(COMMENT_DEPRECATION_MESSAGE, state);
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
ALWAYS_INLINE(static) void
|
|
816
|
+
json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config, bool include_comments)
|
|
817
|
+
{
|
|
818
|
+
while (true) {
|
|
819
|
+
switch (peek(state)) {
|
|
820
|
+
case ' ':
|
|
821
|
+
state->cursor++;
|
|
822
|
+
break;
|
|
823
|
+
case '\n':
|
|
824
|
+
state->cursor++;
|
|
825
|
+
|
|
826
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
827
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
828
|
+
while (rest(state) > 8) {
|
|
829
|
+
uint64_t chunk;
|
|
830
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
831
|
+
if (chunk == 0x2020202020202020) {
|
|
832
|
+
state->cursor += 8;
|
|
833
|
+
continue;
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
837
|
+
state->cursor += consecutive_spaces;
|
|
838
|
+
break;
|
|
839
|
+
}
|
|
840
|
+
#endif
|
|
841
|
+
break;
|
|
842
|
+
case '\t':
|
|
843
|
+
case '\r':
|
|
844
|
+
state->cursor++;
|
|
845
|
+
break;
|
|
846
|
+
case '/':
|
|
847
|
+
if (!include_comments) {
|
|
848
|
+
return;
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
json_eat_comments(state, config);
|
|
852
|
+
break;
|
|
853
|
+
|
|
854
|
+
default:
|
|
855
|
+
return;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
|
|
861
|
+
{
|
|
862
|
+
if (symbolize) {
|
|
863
|
+
intern = true;
|
|
864
|
+
}
|
|
865
|
+
VALUE result;
|
|
866
|
+
# ifdef HAVE_RB_ENC_INTERNED_STR
|
|
867
|
+
if (intern) {
|
|
868
|
+
result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
|
|
869
|
+
} else {
|
|
870
|
+
result = rb_utf8_str_new(start, (long)(end - start));
|
|
871
|
+
}
|
|
872
|
+
# else
|
|
873
|
+
result = rb_utf8_str_new(start, (long)(end - start));
|
|
874
|
+
if (intern) {
|
|
875
|
+
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
|
|
876
|
+
}
|
|
877
|
+
# endif
|
|
878
|
+
|
|
879
|
+
if (symbolize) {
|
|
880
|
+
result = rb_str_intern(result);
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
return result;
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
887
|
+
{
|
|
888
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
889
|
+
// Simple heuristics:
|
|
890
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
891
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
892
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
896
|
+
{
|
|
897
|
+
bool intern = is_name || config->freeze;
|
|
898
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
899
|
+
size_t bufferSize = stringEnd - string;
|
|
900
|
+
|
|
901
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
902
|
+
VALUE cached_key;
|
|
903
|
+
if (RB_UNLIKELY(symbolize)) {
|
|
904
|
+
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
905
|
+
} else {
|
|
906
|
+
cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
if (RB_LIKELY(cached_key)) {
|
|
910
|
+
return cached_key;
|
|
911
|
+
}
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
return build_string(string, stringEnd, intern, symbolize);
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
918
|
+
typedef struct _json_unescape_positions {
|
|
919
|
+
long size;
|
|
920
|
+
const char **positions;
|
|
921
|
+
unsigned long additional_backslashes;
|
|
922
|
+
} JSON_UnescapePositions;
|
|
923
|
+
|
|
924
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
925
|
+
{
|
|
926
|
+
while (positions->size) {
|
|
927
|
+
positions->size--;
|
|
928
|
+
const char *next_position = positions->positions[0];
|
|
929
|
+
positions->positions++;
|
|
930
|
+
if (next_position >= pe) {
|
|
931
|
+
return next_position;
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
if (positions->additional_backslashes) {
|
|
936
|
+
positions->additional_backslashes--;
|
|
937
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
return NULL;
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
944
|
+
{
|
|
945
|
+
bool intern = is_name || config->freeze;
|
|
946
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
947
|
+
size_t bufferSize = stringEnd - string;
|
|
948
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
949
|
+
char *buffer;
|
|
950
|
+
|
|
951
|
+
VALUE result = rb_str_buf_new(bufferSize);
|
|
952
|
+
rb_enc_associate_index(result, utf8_encindex);
|
|
953
|
+
buffer = RSTRING_PTR(result);
|
|
954
|
+
bufferStart = buffer;
|
|
955
|
+
|
|
956
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
957
|
+
|
|
958
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
959
|
+
if (pe > p) {
|
|
960
|
+
MEMCPY(buffer, p, char, pe - p);
|
|
961
|
+
buffer += pe - p;
|
|
962
|
+
}
|
|
963
|
+
switch (*++pe) {
|
|
964
|
+
case '"':
|
|
965
|
+
case '/':
|
|
966
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
967
|
+
break;
|
|
968
|
+
case '\\':
|
|
969
|
+
APPEND_CHAR('\\');
|
|
970
|
+
break;
|
|
971
|
+
case 'n':
|
|
972
|
+
APPEND_CHAR('\n');
|
|
973
|
+
break;
|
|
974
|
+
case 'r':
|
|
975
|
+
APPEND_CHAR('\r');
|
|
976
|
+
break;
|
|
977
|
+
case 't':
|
|
978
|
+
APPEND_CHAR('\t');
|
|
979
|
+
break;
|
|
980
|
+
case 'b':
|
|
981
|
+
APPEND_CHAR('\b');
|
|
982
|
+
break;
|
|
983
|
+
case 'f':
|
|
984
|
+
APPEND_CHAR('\f');
|
|
985
|
+
break;
|
|
986
|
+
case 'u': {
|
|
987
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
988
|
+
pe += 3;
|
|
989
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
990
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
991
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
992
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
993
|
+
* final codepoint.
|
|
994
|
+
*
|
|
995
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
996
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
997
|
+
* Area".
|
|
998
|
+
*/
|
|
999
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
1000
|
+
pe++;
|
|
1001
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
1002
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
1003
|
+
|
|
1004
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
1005
|
+
raise_syntax_error_at("invalid surrogate pair at %s", state, p);
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
1009
|
+
pe += 5;
|
|
1010
|
+
} else {
|
|
1011
|
+
raise_syntax_error_at("incomplete surrogate pair at %s", state, p);
|
|
1012
|
+
break;
|
|
1013
|
+
}
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
1017
|
+
buffer += unescape_len;
|
|
1018
|
+
p = ++pe;
|
|
1019
|
+
break;
|
|
1020
|
+
}
|
|
1021
|
+
case 0:
|
|
1022
|
+
return Qundef;
|
|
1023
|
+
default:
|
|
1024
|
+
if ((unsigned char)*pe < 0x20) {
|
|
1025
|
+
if (!config->allow_control_characters) {
|
|
1026
|
+
if (*pe == '\n') {
|
|
1027
|
+
raise_syntax_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
1028
|
+
}
|
|
1029
|
+
raise_syntax_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
if (config->allow_invalid_escape) {
|
|
1034
|
+
APPEND_CHAR(*pe);
|
|
1035
|
+
} else {
|
|
1036
|
+
raise_syntax_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
1037
|
+
}
|
|
1038
|
+
break;
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1041
|
+
#undef APPEND_CHAR
|
|
1042
|
+
|
|
1043
|
+
if (stringEnd > p) {
|
|
1044
|
+
MEMCPY(buffer, p, char, stringEnd - p);
|
|
1045
|
+
buffer += stringEnd - p;
|
|
1046
|
+
}
|
|
1047
|
+
rb_str_set_len(result, buffer - bufferStart);
|
|
1048
|
+
|
|
1049
|
+
if (symbolize) {
|
|
1050
|
+
result = rb_str_intern(result);
|
|
1051
|
+
} else if (intern) {
|
|
1052
|
+
result = rb_str_to_interned_str(result);
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
return result;
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
#define MAX_FAST_INTEGER_SIZE 18
|
|
1059
|
+
#define MAX_NUMBER_STACK_BUFFER 128
|
|
1060
|
+
|
|
1061
|
+
typedef VALUE (*json_number_decode_func_t)(const char *ptr);
|
|
1062
|
+
|
|
1063
|
+
static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
|
|
1064
|
+
{
|
|
1065
|
+
if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
|
|
1066
|
+
char buffer[MAX_NUMBER_STACK_BUFFER];
|
|
1067
|
+
MEMCPY(buffer, start, char, len);
|
|
1068
|
+
buffer[len] = '\0';
|
|
1069
|
+
return func(buffer);
|
|
1070
|
+
} else {
|
|
1071
|
+
VALUE buffer_v = rb_str_tmp_new(len);
|
|
1072
|
+
char *buffer = RSTRING_PTR(buffer_v);
|
|
1073
|
+
MEMCPY(buffer, start, char, len);
|
|
1074
|
+
buffer[len] = '\0';
|
|
1075
|
+
VALUE number = func(buffer);
|
|
1076
|
+
RB_GC_GUARD(buffer_v);
|
|
1077
|
+
return number;
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
static VALUE json_decode_inum(const char *buffer)
|
|
1082
|
+
{
|
|
1083
|
+
return rb_cstr2inum(buffer, 10);
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
|
|
1087
|
+
{
|
|
1088
|
+
return json_decode_large_number(start, len, json_decode_inum);
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
1092
|
+
{
|
|
1093
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
1094
|
+
if (negative) {
|
|
1095
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
1096
|
+
}
|
|
1097
|
+
return UINT64T2NUM(mantissa);
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
return json_decode_large_integer(start, end - start);
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
static VALUE json_decode_dnum(const char *buffer)
|
|
1104
|
+
{
|
|
1105
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
|
|
1109
|
+
{
|
|
1110
|
+
return json_decode_large_number(start, len, json_decode_dnum);
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1113
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
1114
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
1115
|
+
*/
|
|
1116
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative,
|
|
1117
|
+
const char *start, const char *end)
|
|
1118
|
+
{
|
|
1119
|
+
if (RB_UNLIKELY(config->decimal_class)) {
|
|
1120
|
+
VALUE text = rb_str_new(start, end - start);
|
|
1121
|
+
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
if (RB_UNLIKELY(exponent > INT32_MAX)) {
|
|
1125
|
+
return negative ? CMinusInfinity : CInfinity;
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
if (RB_UNLIKELY(exponent < INT32_MIN)) {
|
|
1129
|
+
return rb_float_new(negative ? -0.0 : 0.0);
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
if (RB_UNLIKELY(mantissa_digits > 18 || mantissa_digits + exponent < -307)) {
|
|
1133
|
+
return json_decode_large_float(start, end - start);
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
return DBL2NUM(ffp_s2d(exponent, mantissa, negative));
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
1140
|
+
{
|
|
1141
|
+
VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->value_stack, count));
|
|
1142
|
+
rvalue_stack_pop(state->value_stack, count);
|
|
1143
|
+
|
|
1144
|
+
if (config->freeze) {
|
|
1145
|
+
RB_OBJ_FREEZE(array);
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
return array;
|
|
1149
|
+
}
|
|
1150
|
+
|
|
1151
|
+
static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
1152
|
+
{
|
|
1153
|
+
VALUE set = rb_hash_new_capa(count / 2);
|
|
1154
|
+
for (size_t index = 0; index < count; index += 2) {
|
|
1155
|
+
size_t before = RHASH_SIZE(set);
|
|
1156
|
+
VALUE key = pairs[index];
|
|
1157
|
+
rb_hash_aset(set, key, Qtrue);
|
|
1158
|
+
if (RHASH_SIZE(set) == before) {
|
|
1159
|
+
if (RB_SYMBOL_P(key)) {
|
|
1160
|
+
return rb_sym2str(key);
|
|
1161
|
+
}
|
|
1162
|
+
return key;
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
1165
|
+
return Qfalse;
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
1169
|
+
{
|
|
1170
|
+
VALUE message = rb_sprintf(
|
|
1171
|
+
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
1172
|
+
rb_inspect(duplicate_key)
|
|
1173
|
+
);
|
|
1174
|
+
|
|
1175
|
+
emit_parse_warning(RSTRING_PTR(message), state);
|
|
1176
|
+
RB_GC_GUARD(message);
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
1180
|
+
{
|
|
1181
|
+
VALUE message = rb_sprintf(
|
|
1182
|
+
"duplicate key %"PRIsVALUE,
|
|
1183
|
+
rb_inspect(duplicate_key)
|
|
1184
|
+
);
|
|
1185
|
+
|
|
1186
|
+
rb_str_concat(message, build_parse_error_message("", state));
|
|
1187
|
+
if (state->parser) { // line and columns can't be accurate in resumable
|
|
1188
|
+
rb_exc_raise(parse_error_new(state, message, 0, 0, false));
|
|
1189
|
+
} else {
|
|
1190
|
+
long line, column;
|
|
1191
|
+
cursor_position(state, &line, &column);
|
|
1192
|
+
rb_str_catf(message, " at line %ld column %ld", line, column);
|
|
1193
|
+
rb_exc_raise(parse_error_new(state, message, line, column, false));
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs)
|
|
1198
|
+
{
|
|
1199
|
+
switch (config->on_duplicate_key) {
|
|
1200
|
+
case JSON_IGNORE:
|
|
1201
|
+
return;
|
|
1202
|
+
|
|
1203
|
+
case JSON_DEPRECATED:
|
|
1204
|
+
// Only emit the first few deprecations to avoid spamming.
|
|
1205
|
+
if (state->emitted_deprecations < MAX_DEPRECATIONS) {
|
|
1206
|
+
state->emitted_deprecations++;
|
|
1207
|
+
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
|
|
1208
|
+
}
|
|
1209
|
+
return;
|
|
1210
|
+
|
|
1211
|
+
case JSON_RAISE:
|
|
1212
|
+
raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
|
|
1213
|
+
return;
|
|
1214
|
+
}
|
|
1215
|
+
UNREACHABLE;
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
|
1219
|
+
{
|
|
1220
|
+
size_t entries_count = count / 2;
|
|
1221
|
+
VALUE object = rb_hash_new_capa(entries_count);
|
|
1222
|
+
const VALUE *pairs = rvalue_stack_peek(state->value_stack, count);
|
|
1223
|
+
rb_hash_bulk_insert(count, pairs, object);
|
|
1224
|
+
|
|
1225
|
+
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
|
|
1226
|
+
json_on_duplicate_key(state, config, count, pairs);
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
rvalue_stack_pop(state->value_stack, count);
|
|
1230
|
+
|
|
1231
|
+
if (config->freeze) {
|
|
1232
|
+
RB_OBJ_FREEZE(object);
|
|
1233
|
+
}
|
|
1234
|
+
|
|
1235
|
+
return object;
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
1239
|
+
{
|
|
1240
|
+
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
1241
|
+
value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
|
|
1242
|
+
}
|
|
1243
|
+
rvalue_stack_push(state->value_stack, value, state->value_stack_handle, &state->value_stack);
|
|
1244
|
+
return value;
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
static const bool string_scan_table[256] = {
|
|
1248
|
+
// ASCII Control Characters
|
|
1249
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1250
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1251
|
+
// ASCII Characters
|
|
1252
|
+
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
|
|
1253
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1254
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1255
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\'
|
|
1256
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1257
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1258
|
+
};
|
|
1259
|
+
|
|
1260
|
+
#ifdef HAVE_SIMD
|
|
1261
|
+
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
1262
|
+
#endif /* HAVE_SIMD */
|
|
1263
|
+
|
|
1264
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
1265
|
+
{
|
|
1266
|
+
#ifdef HAVE_SIMD
|
|
1267
|
+
#if defined(HAVE_SIMD_NEON)
|
|
1268
|
+
|
|
1269
|
+
uint64_t mask = 0;
|
|
1270
|
+
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
1271
|
+
state->cursor += trailing_zeros64(mask) >> 2;
|
|
1272
|
+
return true;
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
1276
|
+
if (simd_impl == SIMD_SSE2) {
|
|
1277
|
+
int mask = 0;
|
|
1278
|
+
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
1279
|
+
state->cursor += trailing_zeros(mask);
|
|
1280
|
+
return true;
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
1284
|
+
#endif /* HAVE_SIMD */
|
|
1285
|
+
|
|
1286
|
+
while (!eos(state)) {
|
|
1287
|
+
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
1288
|
+
return true;
|
|
1289
|
+
}
|
|
1290
|
+
state->cursor++;
|
|
1291
|
+
}
|
|
1292
|
+
|
|
1293
|
+
// If the string ended with an unterminated escape sequence, we might
|
|
1294
|
+
// have gone past the end.
|
|
1295
|
+
if (RB_UNLIKELY(state->cursor > state->end)) {
|
|
1296
|
+
state->cursor = state->end;
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
return false;
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1302
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
1303
|
+
{
|
|
1304
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
1305
|
+
JSON_UnescapePositions positions = {
|
|
1306
|
+
.size = 0,
|
|
1307
|
+
.positions = backslashes,
|
|
1308
|
+
.additional_backslashes = 0,
|
|
1309
|
+
};
|
|
1310
|
+
|
|
1311
|
+
do {
|
|
1312
|
+
switch (*state->cursor) {
|
|
1313
|
+
case '"': {
|
|
1314
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
1315
|
+
state->cursor++;
|
|
1316
|
+
return string;
|
|
1317
|
+
}
|
|
1318
|
+
case '\\': {
|
|
1319
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1320
|
+
backslashes[positions.size] = state->cursor;
|
|
1321
|
+
positions.size++;
|
|
1322
|
+
} else {
|
|
1323
|
+
positions.additional_backslashes++;
|
|
1324
|
+
}
|
|
1325
|
+
state->cursor++;
|
|
1326
|
+
break;
|
|
1327
|
+
}
|
|
1328
|
+
default:
|
|
1329
|
+
if (!config->allow_control_characters) {
|
|
1330
|
+
raise_syntax_error("invalid ASCII control character in string: %s", state);
|
|
1331
|
+
}
|
|
1332
|
+
break;
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
state->cursor++;
|
|
1336
|
+
} while (string_scan(state));
|
|
1337
|
+
|
|
1338
|
+
return Qundef;
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1342
|
+
{
|
|
1343
|
+
state->cursor++;
|
|
1344
|
+
const char *start = state->cursor;
|
|
1345
|
+
|
|
1346
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1347
|
+
return Qundef;
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
VALUE string;
|
|
1351
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1352
|
+
string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1353
|
+
state->cursor++;
|
|
1354
|
+
}
|
|
1355
|
+
else {
|
|
1356
|
+
string = json_parse_escaped_string(state, config, is_name, start);
|
|
1357
|
+
}
|
|
1358
|
+
|
|
1359
|
+
return string;
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1363
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1364
|
+
// Additional References:
|
|
1365
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1366
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1367
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1368
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1369
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1370
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1371
|
+
val -= 0x3030303030303030;
|
|
1372
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1373
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1374
|
+
return val;
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1378
|
+
const uint32_t mask = 0x000000FF;
|
|
1379
|
+
const uint32_t mul1 = 100;
|
|
1380
|
+
val -= 0x30303030;
|
|
1381
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1382
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1383
|
+
return val;
|
|
1384
|
+
}
|
|
1385
|
+
#endif
|
|
1386
|
+
|
|
1387
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1388
|
+
{
|
|
1389
|
+
const char *start = state->cursor;
|
|
1390
|
+
|
|
1391
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1392
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1393
|
+
uint64_t next_8bytes;
|
|
1394
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1395
|
+
|
|
1396
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1397
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1398
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1399
|
+
|
|
1400
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1401
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1402
|
+
state->cursor += 8;
|
|
1403
|
+
continue;
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1407
|
+
|
|
1408
|
+
if (consecutive_digits >= 4) {
|
|
1409
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1410
|
+
state->cursor += 4;
|
|
1411
|
+
consecutive_digits -= 4;
|
|
1412
|
+
}
|
|
1413
|
+
|
|
1414
|
+
while (consecutive_digits) {
|
|
1415
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1416
|
+
consecutive_digits--;
|
|
1417
|
+
state->cursor++;
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1420
|
+
return (int)(state->cursor - start);
|
|
1421
|
+
}
|
|
1422
|
+
#endif
|
|
1423
|
+
|
|
1424
|
+
char next_char;
|
|
1425
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1426
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1427
|
+
state->cursor++;
|
|
1428
|
+
}
|
|
1429
|
+
return (int)(state->cursor - start);
|
|
1430
|
+
}
|
|
1431
|
+
|
|
1432
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1433
|
+
{
|
|
1434
|
+
bool integer = true;
|
|
1435
|
+
const char first_digit = *state->cursor;
|
|
1436
|
+
|
|
1437
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1438
|
+
int64_t exponent = 0;
|
|
1439
|
+
int decimal_point_pos = -1;
|
|
1440
|
+
uint64_t mantissa = 0;
|
|
1441
|
+
|
|
1442
|
+
// Parse integer part and extract mantissa digits
|
|
1443
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1444
|
+
|
|
1445
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1446
|
+
return Qundef;
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1449
|
+
// Parse fractional part
|
|
1450
|
+
if (peek(state) == '.') {
|
|
1451
|
+
integer = false;
|
|
1452
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1453
|
+
state->cursor++;
|
|
1454
|
+
|
|
1455
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1456
|
+
mantissa_digits += fractional_digits;
|
|
1457
|
+
|
|
1458
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1459
|
+
return Qundef;
|
|
1460
|
+
}
|
|
1461
|
+
}
|
|
1462
|
+
|
|
1463
|
+
// Parse exponent
|
|
1464
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1465
|
+
integer = false;
|
|
1466
|
+
state->cursor++;
|
|
1467
|
+
|
|
1468
|
+
bool negative_exponent = false;
|
|
1469
|
+
const char next_char = peek(state);
|
|
1470
|
+
if (next_char == '-' || next_char == '+') {
|
|
1471
|
+
negative_exponent = next_char == '-';
|
|
1472
|
+
state->cursor++;
|
|
1473
|
+
}
|
|
1474
|
+
|
|
1475
|
+
uint64_t abs_exponent = 0;
|
|
1476
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1477
|
+
|
|
1478
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1479
|
+
return Qundef;
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1482
|
+
if (RB_UNLIKELY(exponent_digits >= 20 || abs_exponent > (uint64_t)INT64_MAX)) {
|
|
1483
|
+
exponent = negative_exponent ? INT64_MIN : INT64_MAX;
|
|
1484
|
+
} else {
|
|
1485
|
+
exponent = negative_exponent ? -(int64_t)abs_exponent : (int64_t)abs_exponent;
|
|
1486
|
+
}
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
if (integer) {
|
|
1490
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
// Adjust exponent based on decimal point position
|
|
1494
|
+
if (decimal_point_pos >= 0) {
|
|
1495
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1498
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1499
|
+
}
|
|
1500
|
+
|
|
1501
|
+
// How many values (array elements, or interleaved object keys+values) have been
|
|
1502
|
+
// pushed onto the rvalue stack since this container opened. Used to size the
|
|
1503
|
+
// bulk decode on close, and to tell the first key/colon from later ones.
|
|
1504
|
+
static inline long json_frame_entry_count(const json_frame *frame, const rvalue_stack *value_stack)
|
|
1505
|
+
{
|
|
1506
|
+
return value_stack->head - frame->value_stack_head;
|
|
1507
|
+
}
|
|
1508
|
+
|
|
1509
|
+
// A complete value now sits on top of the rvalue stack. Advance the frame that
|
|
1510
|
+
// was waiting for it: the root document is done, or the enclosing container
|
|
1511
|
+
// moves on to expecting a ',' or its closing bracket. The caller passes the
|
|
1512
|
+
// frame it already has in hand -- the one that was expecting the value -- which
|
|
1513
|
+
// after a container close is the freshly re-exposed parent.
|
|
1514
|
+
static inline enum json_frame_phase json_value_completed(json_frame *frame)
|
|
1515
|
+
{
|
|
1516
|
+
JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT);
|
|
1517
|
+
JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY);
|
|
1518
|
+
JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT);
|
|
1519
|
+
|
|
1520
|
+
return frame->phase = (enum json_frame_phase) frame->type;
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
ALWAYS_INLINE(static) void json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset)
|
|
1524
|
+
{
|
|
1525
|
+
// It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
|
|
1526
|
+
// `strlen` and several other computations in that routine.
|
|
1527
|
+
|
|
1528
|
+
size_t len = strlen(keyword);
|
|
1529
|
+
|
|
1530
|
+
// Note: memcmp with a small power of two and a literal string compile to an integer comparison /
|
|
1531
|
+
// That's why we sometime compare starting from the first byte and sometimes from the second.
|
|
1532
|
+
if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) {
|
|
1533
|
+
state->cursor += len;
|
|
1534
|
+
return;
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
bool eos = rest(state) < len && memcmp(state->cursor, keyword, rest(state)) == 0;
|
|
1538
|
+
raise_parse_error("unexpected token %s", state, eos);
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1541
|
+
// Parse an arbitrary JSON value iteratively. This is a state machine driven
|
|
1542
|
+
// entirely by the top frame's phase so it can stop at any value boundary and
|
|
1543
|
+
// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the
|
|
1544
|
+
// bottom of the stack, so the stack is never empty mid-parse and the document
|
|
1545
|
+
// itself is just another frame whose value, once parsed, leaves its phase DONE.
|
|
1546
|
+
// When invoked in resumable mode, it returns true after parsing a complete document.
|
|
1547
|
+
// If reaching EOS without having parsed a complete document, either returns false
|
|
1548
|
+
// of raise a JSON::ParserError tagged with `@eos=true`.
|
|
1549
|
+
ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config, bool resumable)
|
|
1550
|
+
{
|
|
1551
|
+
json_frame *frame = json_frame_stack_peek(state->frames);
|
|
1552
|
+
|
|
1553
|
+
switch (frame->phase) {
|
|
1554
|
+
case JSON_PHASE_DONE: JSON_UNREACHABLE_RETURN(false);
|
|
1555
|
+
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
|
|
1556
|
+
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
|
|
1557
|
+
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
|
|
1558
|
+
case JSON_PHASE_OBJECT_KEY: goto JSON_PHASE_OBJECT_KEY;
|
|
1559
|
+
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
|
|
1560
|
+
}
|
|
1561
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1562
|
+
|
|
1563
|
+
JSON_PHASE_VALUE: {
|
|
1564
|
+
json_eat_whitespace(state, config, true);
|
|
1565
|
+
|
|
1566
|
+
VALUE value;
|
|
1567
|
+
const char *value_start = state->cursor;
|
|
1568
|
+
|
|
1569
|
+
switch (peek(state)) {
|
|
1570
|
+
case 'n':
|
|
1571
|
+
json_match_keyword(state, "null", 0);
|
|
1572
|
+
value = Qnil;
|
|
1573
|
+
break;
|
|
1574
|
+
|
|
1575
|
+
case 't':
|
|
1576
|
+
json_match_keyword(state, "true", 0);
|
|
1577
|
+
value = Qtrue;
|
|
1578
|
+
break;
|
|
1579
|
+
|
|
1580
|
+
case 'f':
|
|
1581
|
+
json_match_keyword(state, "false", 1);
|
|
1582
|
+
value = Qfalse;
|
|
1583
|
+
break;
|
|
1584
|
+
|
|
1585
|
+
case 'N':
|
|
1586
|
+
if (!config->allow_nan) {
|
|
1587
|
+
raise_syntax_error("unexpected token %s", state);
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1590
|
+
json_match_keyword(state, "NaN", 1);
|
|
1591
|
+
value = CNaN;
|
|
1592
|
+
break;
|
|
1593
|
+
|
|
1594
|
+
case 'I':
|
|
1595
|
+
if (!config->allow_nan) {
|
|
1596
|
+
raise_syntax_error("unexpected token %s", state);
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
json_match_keyword(state, "Infinity", 0);
|
|
1600
|
+
value = CInfinity;
|
|
1601
|
+
break;
|
|
1602
|
+
|
|
1603
|
+
case '-': {
|
|
1604
|
+
state->cursor++;
|
|
1605
|
+
|
|
1606
|
+
value = json_parse_number(state, config, true, value_start);
|
|
1607
|
+
|
|
1608
|
+
if (RB_UNLIKELY(UNDEF_P(value) && config->allow_nan && peek(state) == 'I')) {
|
|
1609
|
+
state->cursor = value_start;
|
|
1610
|
+
json_match_keyword(state, "-Infinity", 1);
|
|
1611
|
+
value = CMinusInfinity;
|
|
1612
|
+
break;
|
|
1613
|
+
}
|
|
1614
|
+
|
|
1615
|
+
// Top level numbers are ambiguous when parsing streams, we can't
|
|
1616
|
+
// know if we parsed all the digits if we hit EOS.
|
|
1617
|
+
if (RB_UNLIKELY(resumable && eos(state))) {
|
|
1618
|
+
state->cursor = value_start;
|
|
1619
|
+
return false;
|
|
1620
|
+
}
|
|
1621
|
+
|
|
1622
|
+
if (RB_UNLIKELY(UNDEF_P(value))) {
|
|
1623
|
+
raise_syntax_error_at("invalid number: %s", state, value_start);
|
|
1624
|
+
}
|
|
1625
|
+
break;
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
|
|
1629
|
+
value = json_parse_number(state, config, false, value_start);
|
|
1630
|
+
|
|
1631
|
+
// Top level numbers are ambiguous when parsing streams, we can't
|
|
1632
|
+
// know if we parsed all the digits if we hit EOS.
|
|
1633
|
+
if (RB_UNLIKELY(resumable && eos(state))) {
|
|
1634
|
+
state->cursor = value_start;
|
|
1635
|
+
return false;
|
|
1636
|
+
}
|
|
1637
|
+
|
|
1638
|
+
if (RB_UNLIKELY(UNDEF_P(value))) {
|
|
1639
|
+
raise_syntax_error_at("invalid number: %s", state, value_start);
|
|
1640
|
+
}
|
|
1641
|
+
break;
|
|
1642
|
+
}
|
|
1643
|
+
|
|
1644
|
+
case '"': {
|
|
1645
|
+
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1646
|
+
value = json_parse_string(state, config, false);
|
|
1647
|
+
|
|
1648
|
+
if (RB_UNLIKELY(UNDEF_P(value))) {
|
|
1649
|
+
bool is_eos = eos(state);
|
|
1650
|
+
if (resumable && is_eos) {
|
|
1651
|
+
state->cursor = value_start;
|
|
1652
|
+
return false;
|
|
1653
|
+
}
|
|
1654
|
+
raise_parse_error("unexpected end of input, expected closing \"", state, is_eos);
|
|
1655
|
+
}
|
|
1656
|
+
break;
|
|
1657
|
+
}
|
|
1658
|
+
|
|
1659
|
+
case '[': {
|
|
1660
|
+
state->cursor++;
|
|
1661
|
+
json_eat_whitespace(state, config, true);
|
|
1662
|
+
|
|
1663
|
+
const char next = peek(state);
|
|
1664
|
+
if (next == ']') {
|
|
1665
|
+
state->cursor++;
|
|
1666
|
+
value = json_decode_array(state, config, 0);
|
|
1667
|
+
break;
|
|
1668
|
+
} else if (resumable && eos(state)) {
|
|
1669
|
+
state->cursor = value_start;
|
|
1670
|
+
return false;
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
state->current_nesting++;
|
|
1674
|
+
if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
|
|
1675
|
+
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1676
|
+
}
|
|
1677
|
+
state->in_array++;
|
|
1678
|
+
|
|
1679
|
+
// Phase stays VALUE: the next iteration reads the first element.
|
|
1680
|
+
frame = json_frame_stack_push(state, (json_frame){
|
|
1681
|
+
.type = JSON_FRAME_ARRAY,
|
|
1682
|
+
.phase = JSON_PHASE_VALUE,
|
|
1683
|
+
.value_stack_head = state->value_stack->head,
|
|
1684
|
+
});
|
|
1685
|
+
goto JSON_PHASE_VALUE;
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1688
|
+
case '{': {
|
|
1689
|
+
state->cursor++;
|
|
1690
|
+
json_eat_whitespace(state, config, true);
|
|
1691
|
+
|
|
1692
|
+
if (peek(state) == '}') {
|
|
1693
|
+
state->cursor++;
|
|
1694
|
+
value = json_decode_object(state, config, 0);
|
|
1695
|
+
break;
|
|
1696
|
+
} else if (resumable && eos(state)) {
|
|
1697
|
+
state->cursor = value_start;
|
|
1698
|
+
return false;
|
|
1699
|
+
}
|
|
1700
|
+
|
|
1701
|
+
state->current_nesting++;
|
|
1702
|
+
if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
|
|
1703
|
+
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
// Phase KEY: the next iteration reads the first key.
|
|
1707
|
+
frame = json_frame_stack_push(state, (json_frame){
|
|
1708
|
+
.type = JSON_FRAME_OBJECT,
|
|
1709
|
+
.phase = JSON_PHASE_OBJECT_KEY,
|
|
1710
|
+
.value_stack_head = state->value_stack->head,
|
|
1711
|
+
.start_offset = value_start - state->start,
|
|
1712
|
+
});
|
|
1713
|
+
goto JSON_PHASE_OBJECT_KEY;
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
case 0:
|
|
1717
|
+
// peek() returns 0 both at end-of-stream and for a literal NUL byte in the
|
|
1718
|
+
// buffer. Only a genuine EOS means "feed me more"; a NUL byte that is not at
|
|
1719
|
+
// EOS is just an invalid character.
|
|
1720
|
+
if (eos(state)) {
|
|
1721
|
+
return false;
|
|
1722
|
+
} else {
|
|
1723
|
+
raise_syntax_error("unexpected NULL byte: %s", state);
|
|
1724
|
+
}
|
|
1725
|
+
default:
|
|
1726
|
+
raise_syntax_error("unexpected character: %s", state);
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
json_push_value(state, config, value);
|
|
1730
|
+
json_value_completed(frame);
|
|
1731
|
+
|
|
1732
|
+
switch (frame->phase) {
|
|
1733
|
+
case JSON_PHASE_DONE: return true;
|
|
1734
|
+
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
|
|
1735
|
+
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
|
|
1736
|
+
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
|
|
1737
|
+
case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
|
|
1738
|
+
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
|
|
1739
|
+
}
|
|
1740
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1741
|
+
}
|
|
1742
|
+
|
|
1743
|
+
JSON_PHASE_OBJECT_KEY: {
|
|
1744
|
+
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
|
|
1745
|
+
|
|
1746
|
+
json_eat_whitespace(state, config, true);
|
|
1747
|
+
|
|
1748
|
+
const char *start = state->cursor;
|
|
1749
|
+
|
|
1750
|
+
if (RB_LIKELY(peek(state) == '"')) {
|
|
1751
|
+
VALUE string = json_parse_string(state, config, true);
|
|
1752
|
+
if (UNDEF_P(string)) {
|
|
1753
|
+
if (resumable) {
|
|
1754
|
+
state->cursor = start;
|
|
1755
|
+
return false;
|
|
1756
|
+
} else {
|
|
1757
|
+
raise_syntax_error("unexpected end of input, expected closing \"", state);
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
json_push_value(state, config, string);
|
|
1761
|
+
frame->phase = JSON_PHASE_OBJECT_COLON;
|
|
1762
|
+
goto JSON_PHASE_OBJECT_COLON;
|
|
1763
|
+
} else if (resumable && eos(state)) {
|
|
1764
|
+
return false;
|
|
1765
|
+
} else {
|
|
1766
|
+
// The message differs for the first key vs. a key after a
|
|
1767
|
+
// ',': the first is the only one reached with nothing pushed
|
|
1768
|
+
// for this object yet.
|
|
1769
|
+
if (json_frame_entry_count(frame, state->value_stack) == 0) {
|
|
1770
|
+
raise_syntax_error("expected object key, got %s", state);
|
|
1771
|
+
} else {
|
|
1772
|
+
raise_syntax_error("expected object key, got: %s", state);
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
1775
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1776
|
+
}
|
|
1777
|
+
|
|
1778
|
+
JSON_PHASE_OBJECT_COLON: {
|
|
1779
|
+
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
|
|
1780
|
+
|
|
1781
|
+
json_eat_whitespace(state, config, true);
|
|
1782
|
+
|
|
1783
|
+
if (RB_LIKELY(peek(state) == ':')) {
|
|
1784
|
+
state->cursor++;
|
|
1785
|
+
frame->phase = JSON_PHASE_VALUE;
|
|
1786
|
+
goto JSON_PHASE_VALUE;
|
|
1787
|
+
} else if (resumable && eos(state)) {
|
|
1788
|
+
return false;
|
|
1789
|
+
} else {
|
|
1790
|
+
// First colon (only the first pair's key is pushed, nothing
|
|
1791
|
+
// else) vs. a later one.
|
|
1792
|
+
if (json_frame_entry_count(frame, state->value_stack) == 1) {
|
|
1793
|
+
raise_syntax_error("expected ':' after object key", state);
|
|
1794
|
+
} else {
|
|
1795
|
+
raise_syntax_error("expected ':' after object key, got: %s", state);
|
|
1796
|
+
}
|
|
1797
|
+
}
|
|
1798
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
JSON_PHASE_ARRAY_COMMA: {
|
|
1802
|
+
JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);
|
|
1803
|
+
|
|
1804
|
+
json_eat_whitespace(state, config, true);
|
|
1805
|
+
|
|
1806
|
+
const char next_char = peek(state);
|
|
1807
|
+
|
|
1808
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1809
|
+
state->cursor++;
|
|
1810
|
+
if (config->allow_trailing_comma) {
|
|
1811
|
+
json_eat_whitespace(state, config, true);
|
|
1812
|
+
if (peek(state) == ']') {
|
|
1813
|
+
// Trailing comma: stay in COMMA to close on the next iteration.
|
|
1814
|
+
goto JSON_PHASE_ARRAY_COMMA;
|
|
1815
|
+
}
|
|
1816
|
+
}
|
|
1817
|
+
frame->phase = JSON_PHASE_VALUE;
|
|
1818
|
+
goto JSON_PHASE_VALUE;
|
|
1819
|
+
} else if (next_char == ']') {
|
|
1820
|
+
state->cursor++;
|
|
1821
|
+
long count = json_frame_entry_count(frame, state->value_stack);
|
|
1822
|
+
state->current_nesting--;
|
|
1823
|
+
state->in_array--;
|
|
1824
|
+
|
|
1825
|
+
json_push_value(state, config, json_decode_array(state, config, count));
|
|
1826
|
+
json_frame_stack_pop(state->frames);
|
|
1827
|
+
frame = json_frame_stack_peek(state->frames);
|
|
1828
|
+
|
|
1829
|
+
json_value_completed(frame);
|
|
1830
|
+
|
|
1831
|
+
switch (frame->phase) {
|
|
1832
|
+
case JSON_PHASE_DONE: return true;
|
|
1833
|
+
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
|
|
1834
|
+
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
|
|
1835
|
+
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
|
|
1836
|
+
case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
|
|
1837
|
+
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
|
|
1838
|
+
}
|
|
1839
|
+
} else if (resumable && eos(state)) {
|
|
1840
|
+
return false;
|
|
1841
|
+
} else {
|
|
1842
|
+
raise_syntax_error("expected ',' or ']' after array value", state);
|
|
1843
|
+
}
|
|
1844
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
JSON_PHASE_OBJECT_COMMA: {
|
|
1848
|
+
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
|
|
1849
|
+
|
|
1850
|
+
json_eat_whitespace(state, config, true);
|
|
1851
|
+
const char next_char = peek(state);
|
|
1852
|
+
|
|
1853
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1854
|
+
state->cursor++;
|
|
1855
|
+
json_eat_whitespace(state, config, true);
|
|
1856
|
+
|
|
1857
|
+
if (config->allow_trailing_comma) {
|
|
1858
|
+
if (peek(state) == '}') {
|
|
1859
|
+
// Trailing comma: stay in COMMA to close on the next iteration.
|
|
1860
|
+
goto JSON_PHASE_OBJECT_COMMA;
|
|
1861
|
+
}
|
|
1862
|
+
}
|
|
1863
|
+
|
|
1864
|
+
frame->phase = JSON_PHASE_OBJECT_KEY;
|
|
1865
|
+
goto JSON_PHASE_OBJECT_KEY;
|
|
1866
|
+
} else if (next_char == '}') {
|
|
1867
|
+
state->cursor++;
|
|
1868
|
+
state->current_nesting--;
|
|
1869
|
+
size_t count = json_frame_entry_count(frame, state->value_stack);
|
|
1870
|
+
|
|
1871
|
+
// Temporary rewind cursor in case an error is raised
|
|
1872
|
+
const char *final_cursor = state->cursor;
|
|
1873
|
+
state->cursor = state->start + frame->start_offset;
|
|
1874
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1875
|
+
state->cursor = final_cursor;
|
|
1876
|
+
|
|
1877
|
+
json_push_value(state, config, object);
|
|
1878
|
+
json_frame_stack_pop(state->frames);
|
|
1879
|
+
frame = json_frame_stack_peek(state->frames);
|
|
1880
|
+
json_value_completed(frame);
|
|
1881
|
+
|
|
1882
|
+
switch (frame->phase) {
|
|
1883
|
+
case JSON_PHASE_DONE: return true;
|
|
1884
|
+
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
|
|
1885
|
+
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
|
|
1886
|
+
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
|
|
1887
|
+
case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
|
|
1888
|
+
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
|
|
1889
|
+
}
|
|
1890
|
+
} else if (resumable && eos(state)) {
|
|
1891
|
+
return false;
|
|
1892
|
+
} else {
|
|
1893
|
+
raise_syntax_error("expected ',' or '}' after object value, got: %s", state);
|
|
1894
|
+
}
|
|
1895
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1896
|
+
}
|
|
1897
|
+
|
|
1898
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1899
|
+
}
|
|
1900
|
+
|
|
1901
|
+
static void json_ensure_eof(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1902
|
+
{
|
|
1903
|
+
json_eat_whitespace(state, config, true);
|
|
1904
|
+
if (!eos(state)) {
|
|
1905
|
+
raise_syntax_error("unexpected token at end of stream %s", state);
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
|
|
1909
|
+
/*
|
|
1910
|
+
* Document-class: JSON::Ext::Parser
|
|
1911
|
+
*
|
|
1912
|
+
* This is the JSON parser implemented as a C extension. It can be configured
|
|
1913
|
+
* to be used by setting
|
|
1914
|
+
*
|
|
1915
|
+
* JSON.parser = JSON::Ext::Parser
|
|
1916
|
+
*
|
|
1917
|
+
* with the method parser= in JSON.
|
|
1918
|
+
*
|
|
1919
|
+
*/
|
|
1920
|
+
|
|
1921
|
+
static VALUE convert_encoding(VALUE source)
|
|
1922
|
+
{
|
|
1923
|
+
StringValue(source);
|
|
1924
|
+
int encindex = RB_ENCODING_GET(source);
|
|
1925
|
+
|
|
1926
|
+
if (RB_LIKELY(encindex == utf8_encindex)) {
|
|
1927
|
+
return source;
|
|
1928
|
+
}
|
|
1929
|
+
|
|
1930
|
+
if (encindex == binary_encindex) {
|
|
1931
|
+
// For historical reason, we silently reinterpret binary strings as UTF-8
|
|
1932
|
+
return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
|
|
1933
|
+
}
|
|
1934
|
+
|
|
1935
|
+
source = rb_funcall(source, i_encode, 1, Encoding_UTF_8);
|
|
1936
|
+
StringValue(source);
|
|
1937
|
+
return source;
|
|
1938
|
+
}
|
|
1939
|
+
|
|
1940
|
+
struct parser_config_init_args {
|
|
1941
|
+
JSON_ParserConfig *config;
|
|
1942
|
+
VALUE self;
|
|
1943
|
+
VALUE unknown_keywords;
|
|
1944
|
+
bool strict;
|
|
1945
|
+
};
|
|
1946
|
+
|
|
1947
|
+
static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val)
|
|
1948
|
+
{
|
|
1949
|
+
*dest = val;
|
|
1950
|
+
if (self) RB_OBJ_WRITTEN(self, Qundef, val);
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
1954
|
+
{
|
|
1955
|
+
struct parser_config_init_args *args = (struct parser_config_init_args *)data;
|
|
1956
|
+
JSON_ParserConfig *config = args->config;
|
|
1957
|
+
VALUE self = args->self;
|
|
1958
|
+
|
|
1959
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1960
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1961
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1962
|
+
else if (key == sym_allow_comments) { config->on_comment = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1963
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1964
|
+
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1965
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1966
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1967
|
+
else if (key == sym_on_load) { parser_config_wb_write(self, &config->on_load_proc, RTEST(val) ? val : Qfalse); }
|
|
1968
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1969
|
+
else if (key == sym_decimal_class) {
|
|
1970
|
+
if (RTEST(val)) {
|
|
1971
|
+
if (rb_respond_to(val, i_try_convert)) {
|
|
1972
|
+
parser_config_wb_write(self, &config->decimal_class, val);
|
|
1973
|
+
config->decimal_method_id = i_try_convert;
|
|
1974
|
+
} else if (rb_respond_to(val, i_new)) {
|
|
1975
|
+
parser_config_wb_write(self, &config->decimal_class, val);
|
|
1976
|
+
config->decimal_method_id = i_new;
|
|
1977
|
+
} else if (RB_TYPE_P(val, T_CLASS)) {
|
|
1978
|
+
VALUE name = rb_class_name(val);
|
|
1979
|
+
const char *name_cstr = RSTRING_PTR(name);
|
|
1980
|
+
const char *last_colon = strrchr(name_cstr, ':');
|
|
1981
|
+
if (last_colon) {
|
|
1982
|
+
const char *mod_path_end = last_colon - 1;
|
|
1983
|
+
VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
|
|
1984
|
+
parser_config_wb_write(self, &config->decimal_class, rb_path_to_class(mod_path));
|
|
1985
|
+
|
|
1986
|
+
const char *method_name_beg = last_colon + 1;
|
|
1987
|
+
long before_len = method_name_beg - name_cstr;
|
|
1988
|
+
long len = RSTRING_LEN(name) - before_len;
|
|
1989
|
+
VALUE method_name = rb_str_substr(name, before_len, len);
|
|
1990
|
+
config->decimal_method_id = SYM2ID(rb_str_intern(method_name));
|
|
1991
|
+
} else {
|
|
1992
|
+
parser_config_wb_write(self, &config->decimal_class, rb_mKernel);
|
|
1993
|
+
config->decimal_method_id = SYM2ID(rb_str_intern(name));
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
}
|
|
1997
|
+
}
|
|
1998
|
+
else if (args->strict) {
|
|
1999
|
+
if (!args->unknown_keywords) {
|
|
2000
|
+
args->unknown_keywords = rb_obj_hide(rb_ary_new());
|
|
2001
|
+
}
|
|
2002
|
+
rb_ary_push(args->unknown_keywords, key);
|
|
2003
|
+
}
|
|
2004
|
+
|
|
2005
|
+
return ST_CONTINUE;
|
|
2006
|
+
}
|
|
2007
|
+
|
|
2008
|
+
static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self, bool strict)
|
|
2009
|
+
{
|
|
2010
|
+
config->max_nesting = 100;
|
|
2011
|
+
|
|
2012
|
+
struct parser_config_init_args args = {
|
|
2013
|
+
.config = config,
|
|
2014
|
+
.self = self,
|
|
2015
|
+
.strict = strict,
|
|
2016
|
+
};
|
|
2017
|
+
|
|
2018
|
+
if (NIL_P(opts)) return;
|
|
2019
|
+
Check_Type(opts, T_HASH);
|
|
2020
|
+
if (RHASH_SIZE(opts) == 0) return;
|
|
2021
|
+
|
|
2022
|
+
// We assume in most cases few keys are set so it's faster to go over
|
|
2023
|
+
// the provided keys than to check all possible keys.
|
|
2024
|
+
rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args);
|
|
2025
|
+
|
|
2026
|
+
if (RB_UNLIKELY(args.unknown_keywords)) {
|
|
2027
|
+
if (RARRAY_LEN(args.unknown_keywords) == 1) {
|
|
2028
|
+
rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, RARRAY_AREF(args.unknown_keywords, 0));
|
|
2029
|
+
}
|
|
2030
|
+
else {
|
|
2031
|
+
VALUE keywords = rb_ary_join(args.unknown_keywords, rb_utf8_str_new_cstr(", "));
|
|
2032
|
+
rb_raise(rb_eArgError, "unknown keywords: %" PRIsVALUE, keywords);
|
|
2033
|
+
}
|
|
2034
|
+
}
|
|
2035
|
+
}
|
|
2036
|
+
|
|
2037
|
+
/*
|
|
2038
|
+
* call-seq: new(opts => {})
|
|
2039
|
+
*
|
|
2040
|
+
* Creates a new JSON::Ext::ParserConfig instance.
|
|
2041
|
+
*
|
|
2042
|
+
* Argument +opts+, if given, contains a \Hash of options for the parsing.
|
|
2043
|
+
* See {Parsing Options}[#module-JSON-label-Parsing+Options].
|
|
2044
|
+
*
|
|
2045
|
+
*/
|
|
2046
|
+
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
2047
|
+
{
|
|
2048
|
+
rb_check_frozen(self);
|
|
2049
|
+
GET_PARSER_CONFIG;
|
|
2050
|
+
|
|
2051
|
+
parser_config_init(config, opts, self, false);
|
|
2052
|
+
|
|
2053
|
+
return self;
|
|
2054
|
+
}
|
|
2055
|
+
|
|
2056
|
+
static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
|
|
2057
|
+
{
|
|
2058
|
+
VALUE Vsource = convert_encoding(src);
|
|
2059
|
+
|
|
2060
|
+
// Ensure the string isn't mutated under us.
|
|
2061
|
+
// The classic API to use is `rb_str_locktmp`, but then we'd
|
|
2062
|
+
// need to use `rb_protect` to make sure we always unlock.
|
|
2063
|
+
if (Vsource == src) {
|
|
2064
|
+
Vsource = rb_str_new_frozen(Vsource);
|
|
2065
|
+
}
|
|
2066
|
+
|
|
2067
|
+
VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
|
|
2068
|
+
rvalue_stack value_stack = {
|
|
2069
|
+
.type = RVALUE_STACK_STACK_ALLOCATED,
|
|
2070
|
+
.ptr = rvalue_stack_buffer,
|
|
2071
|
+
.capa = RVALUE_STACK_INITIAL_CAPA,
|
|
2072
|
+
};
|
|
2073
|
+
|
|
2074
|
+
// Seed the frame stack with the root frame, establishing the invariant that
|
|
2075
|
+
// json_parse_any always has a top frame to dispatch on (so the stack is never
|
|
2076
|
+
// empty mid-parse).
|
|
2077
|
+
json_frame frame_stack_buffer[JSON_FRAME_STACK_INITIAL_CAPA];
|
|
2078
|
+
frame_stack_buffer[0] = (json_frame){
|
|
2079
|
+
.type = JSON_FRAME_ROOT,
|
|
2080
|
+
.phase = JSON_PHASE_VALUE,
|
|
2081
|
+
};
|
|
2082
|
+
json_frame_stack frames = {
|
|
2083
|
+
.type = RVALUE_STACK_STACK_ALLOCATED,
|
|
2084
|
+
.ptr = frame_stack_buffer,
|
|
2085
|
+
.capa = JSON_FRAME_STACK_INITIAL_CAPA,
|
|
2086
|
+
.head = 1,
|
|
2087
|
+
};
|
|
2088
|
+
|
|
2089
|
+
long len;
|
|
2090
|
+
const char *start;
|
|
2091
|
+
|
|
2092
|
+
RSTRING_GETMEM(Vsource, start, len);
|
|
2093
|
+
|
|
2094
|
+
VALUE value_stack_handle = 0;
|
|
2095
|
+
VALUE frame_stack_handle = 0;
|
|
2096
|
+
JSON_ParserState _state = {
|
|
2097
|
+
.start = start,
|
|
2098
|
+
.cursor = start,
|
|
2099
|
+
.end = start + len,
|
|
2100
|
+
.value_stack = &value_stack,
|
|
2101
|
+
.value_stack_handle = &value_stack_handle,
|
|
2102
|
+
.frames = &frames,
|
|
2103
|
+
.frame_stack_handle = &frame_stack_handle,
|
|
2104
|
+
};
|
|
2105
|
+
JSON_ParserState *state = &_state;
|
|
2106
|
+
|
|
2107
|
+
bool complete = json_parse_any(state, config, false);
|
|
2108
|
+
|
|
2109
|
+
// The root document value is parsed; it is the lone survivor on
|
|
2110
|
+
// the rvalue stack.
|
|
2111
|
+
VALUE result = complete ? *rvalue_stack_peek(state->value_stack, 1) : Qundef;
|
|
2112
|
+
|
|
2113
|
+
// This may be skipped in case of exception, but
|
|
2114
|
+
// it won't cause a leak.
|
|
2115
|
+
rvalue_stack_eagerly_release(value_stack_handle);
|
|
2116
|
+
json_frame_stack_eagerly_release(frame_stack_handle);
|
|
2117
|
+
RB_GC_GUARD(value_stack_handle);
|
|
2118
|
+
RB_GC_GUARD(frame_stack_handle);
|
|
2119
|
+
RB_GC_GUARD(Vsource);
|
|
2120
|
+
|
|
2121
|
+
if (complete) {
|
|
2122
|
+
json_ensure_eof(state, config);
|
|
2123
|
+
} else {
|
|
2124
|
+
raise_eos_error("unexpected end of input", state);
|
|
2125
|
+
}
|
|
2126
|
+
|
|
2127
|
+
return result;
|
|
2128
|
+
}
|
|
2129
|
+
|
|
2130
|
+
/*
|
|
2131
|
+
* call-seq: parse(source)
|
|
2132
|
+
*
|
|
2133
|
+
* Parses the current JSON text _source_ and returns the complete data
|
|
2134
|
+
* structure as a result.
|
|
2135
|
+
* It raises JSON::ParserError if fail to parse.
|
|
2136
|
+
*/
|
|
2137
|
+
static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
|
|
2138
|
+
{
|
|
2139
|
+
GET_PARSER_CONFIG;
|
|
2140
|
+
return cParser_parse(config, Vsource);
|
|
2141
|
+
}
|
|
2142
|
+
|
|
2143
|
+
static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
|
|
2144
|
+
{
|
|
2145
|
+
JSON_ParserConfig _config = {0};
|
|
2146
|
+
JSON_ParserConfig *config = &_config;
|
|
2147
|
+
parser_config_init(config, opts, Qfalse, false);
|
|
2148
|
+
|
|
2149
|
+
return cParser_parse(config, Vsource);
|
|
2150
|
+
}
|
|
2151
|
+
|
|
2152
|
+
static void JSON_ParserConfig_mark(void *ptr)
|
|
2153
|
+
{
|
|
2154
|
+
JSON_ParserConfig *config = ptr;
|
|
2155
|
+
rb_gc_mark_movable(config->on_load_proc);
|
|
2156
|
+
rb_gc_mark_movable(config->decimal_class);
|
|
2157
|
+
}
|
|
2158
|
+
|
|
2159
|
+
static size_t JSON_ParserConfig_memsize(const void *ptr)
|
|
2160
|
+
{
|
|
2161
|
+
#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
2162
|
+
return 0;
|
|
2163
|
+
#else
|
|
2164
|
+
return sizeof(JSON_ParserConfig);
|
|
2165
|
+
#endif
|
|
2166
|
+
}
|
|
2167
|
+
|
|
2168
|
+
static void JSON_ParserConfig_compact(void *ptr)
|
|
2169
|
+
{
|
|
2170
|
+
JSON_ParserConfig *config = ptr;
|
|
2171
|
+
config->on_load_proc = rb_gc_location(config->on_load_proc);
|
|
2172
|
+
config->decimal_class = rb_gc_location(config->decimal_class);
|
|
2173
|
+
}
|
|
2174
|
+
|
|
2175
|
+
static const rb_data_type_t JSON_ParserConfig_type = {
|
|
2176
|
+
.wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
|
|
2177
|
+
.function = {
|
|
2178
|
+
.dmark = JSON_ParserConfig_mark,
|
|
2179
|
+
.dfree = RUBY_DEFAULT_FREE,
|
|
2180
|
+
.dsize = JSON_ParserConfig_memsize,
|
|
2181
|
+
.dcompact = JSON_ParserConfig_compact,
|
|
2182
|
+
},
|
|
2183
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
|
|
2184
|
+
};
|
|
2185
|
+
|
|
2186
|
+
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
2187
|
+
{
|
|
2188
|
+
JSON_ParserConfig *config;
|
|
2189
|
+
return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
|
|
2190
|
+
}
|
|
2191
|
+
|
|
2192
|
+
static void json_str_clear(VALUE str)
|
|
2193
|
+
{
|
|
2194
|
+
if (RB_OBJ_FROZEN_RAW(str)) {
|
|
2195
|
+
return;
|
|
2196
|
+
}
|
|
2197
|
+
rb_str_replace(str, JSON_empty_string);
|
|
2198
|
+
}
|
|
2199
|
+
|
|
2200
|
+
typedef struct JSON_ResumableParserStruct {
|
|
2201
|
+
JSON_ParserConfig config;
|
|
2202
|
+
JSON_ParserState state;
|
|
2203
|
+
rvalue_stack value_stack;
|
|
2204
|
+
json_frame_stack frames;
|
|
2205
|
+
VALUE buffer;
|
|
2206
|
+
size_t parsed_bytes;
|
|
2207
|
+
size_t incomplete_bytes;
|
|
2208
|
+
bool complete;
|
|
2209
|
+
bool in_use;
|
|
2210
|
+
} JSON_ResumableParser;
|
|
2211
|
+
|
|
2212
|
+
static void JSON_ResumableParser_mark(void *ptr)
|
|
2213
|
+
{
|
|
2214
|
+
JSON_ResumableParser *parser = (JSON_ResumableParser *)ptr;
|
|
2215
|
+
JSON_ParserConfig_mark(&parser->config);
|
|
2216
|
+
rvalue_stack_mark(&parser->value_stack);
|
|
2217
|
+
rvalue_cache_mark(&parser->state.name_cache);
|
|
2218
|
+
rb_gc_mark(parser->buffer); // pin the buffer
|
|
2219
|
+
rb_gc_mark_movable(parser->state.parser);
|
|
2220
|
+
}
|
|
2221
|
+
|
|
2222
|
+
static void JSON_ResumableParser_free(void *ptr)
|
|
2223
|
+
{
|
|
2224
|
+
JSON_ResumableParser *parser = (JSON_ResumableParser *)ptr;
|
|
2225
|
+
rvalue_stack_free_buffer(&parser->value_stack);
|
|
2226
|
+
json_frame_stack_free_buffer(&parser->frames);
|
|
2227
|
+
}
|
|
2228
|
+
|
|
2229
|
+
static size_t JSON_ResumableParser_memsize(const void *ptr)
|
|
2230
|
+
{
|
|
2231
|
+
const JSON_ResumableParser *parser = (const JSON_ResumableParser *)ptr;
|
|
2232
|
+
size_t memsize = JSON_ParserConfig_memsize(&parser->config);
|
|
2233
|
+
memsize += rvalue_stack_memsize(&parser->value_stack);
|
|
2234
|
+
memsize += json_frame_stack_memsize(&parser->frames);
|
|
2235
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
2236
|
+
memsize += (
|
|
2237
|
+
sizeof(JSON_ResumableParser)
|
|
2238
|
+
- sizeof(JSON_ParserState)
|
|
2239
|
+
- sizeof(JSON_ParserConfig)
|
|
2240
|
+
- sizeof(rvalue_stack)
|
|
2241
|
+
- sizeof(json_frame_stack)
|
|
2242
|
+
);
|
|
2243
|
+
#endif
|
|
2244
|
+
return memsize;
|
|
2245
|
+
}
|
|
2246
|
+
|
|
2247
|
+
static void JSON_ResumableParser_compact(void *ptr)
|
|
2248
|
+
{
|
|
2249
|
+
JSON_ResumableParser *parser = (JSON_ResumableParser *)ptr;
|
|
2250
|
+
JSON_ParserConfig_compact(&parser->config);
|
|
2251
|
+
rvalue_stack_compact(&parser->value_stack);
|
|
2252
|
+
rvalue_cache_compact(&parser->state.name_cache);
|
|
2253
|
+
parser->buffer = rb_gc_location(parser->buffer);
|
|
2254
|
+
parser->state.parser = rb_gc_location(parser->state.parser);
|
|
2255
|
+
}
|
|
2256
|
+
|
|
2257
|
+
static const rb_data_type_t JSON_ResumableParser_type = {
|
|
2258
|
+
.wrap_struct_name = "JSON::Ext::ResumableParser",
|
|
2259
|
+
.function = {
|
|
2260
|
+
JSON_ResumableParser_mark,
|
|
2261
|
+
JSON_ResumableParser_free,
|
|
2262
|
+
JSON_ResumableParser_memsize,
|
|
2263
|
+
JSON_ResumableParser_compact,
|
|
2264
|
+
},
|
|
2265
|
+
// RUBY_TYPED_WB_PROTECTED is deliberately not declared because
|
|
2266
|
+
// this is a superset of JSON_Parser_rvalue_stack_type, so we'd need
|
|
2267
|
+
// to trigger a lot of write barriers.
|
|
2268
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
|
|
2269
|
+
};
|
|
2270
|
+
|
|
2271
|
+
static VALUE cResumableParser_allocate(VALUE klass)
|
|
2272
|
+
{
|
|
2273
|
+
JSON_ResumableParser *parser;
|
|
2274
|
+
VALUE obj = TypedData_Make_Struct(klass, JSON_ResumableParser, &JSON_ResumableParser_type, parser);
|
|
2275
|
+
parser->state.in_array++;
|
|
2276
|
+
parser->state.parser = obj;
|
|
2277
|
+
return obj;
|
|
2278
|
+
}
|
|
2279
|
+
|
|
2280
|
+
static inline JSON_ResumableParser *cResumableParser_get(VALUE self)
|
|
2281
|
+
{
|
|
2282
|
+
JSON_ResumableParser *parser;
|
|
2283
|
+
TypedData_Get_Struct(self, JSON_ResumableParser, &JSON_ResumableParser_type, parser);
|
|
2284
|
+
return parser;
|
|
2285
|
+
}
|
|
2286
|
+
|
|
2287
|
+
/*
|
|
2288
|
+
* call-seq: new(opts => {})
|
|
2289
|
+
*
|
|
2290
|
+
* Creates a new JSON::ResumableParser instance.
|
|
2291
|
+
*
|
|
2292
|
+
* Argument +opts+, if given, contains a \Hash of options for the parsing.
|
|
2293
|
+
* See {Parsing Options}[#module-JSON-label-Parsing+Options].
|
|
2294
|
+
*
|
|
2295
|
+
* A ResumableParser is able to parse partial documents and resume parsing later
|
|
2296
|
+
* when more of the document is provided:
|
|
2297
|
+
*
|
|
2298
|
+
* parser = JSON::ResumableParser.new
|
|
2299
|
+
* parser << '{"user": "george", "role": "ad'
|
|
2300
|
+
* parser.parse # => false
|
|
2301
|
+
* parser.eos? # => true
|
|
2302
|
+
* parser.partial_value # => { "user" => "george", "role" => nil }
|
|
2303
|
+
* parser.rest # => '"ad'
|
|
2304
|
+
*
|
|
2305
|
+
* parser << 'min" }[1, 2, 3]'
|
|
2306
|
+
* parser.parse # => true
|
|
2307
|
+
* parser.value # => { "user" => "george", "role" => "admin" }
|
|
2308
|
+
*
|
|
2309
|
+
* parser.parse # => true
|
|
2310
|
+
* parser.value # => [1, 2, 3]
|
|
2311
|
+
*
|
|
2312
|
+
* === Limitations
|
|
2313
|
+
*
|
|
2314
|
+
* While ResumableParser is able to parse streams of documents without any
|
|
2315
|
+
* explicit separators between them, it is highly recommended to separate documents
|
|
2316
|
+
* by either spaces or newlines, as otherwise the \JSON syntax for numbers may be ambiguous.
|
|
2317
|
+
* When parsing a number, ResumableParser will not consider the number complete until something follows:
|
|
2318
|
+
*
|
|
2319
|
+
* parser << '123'
|
|
2320
|
+
* parser.parse # => false
|
|
2321
|
+
* parser << ' '
|
|
2322
|
+
* parser.parse # => true
|
|
2323
|
+
* parser.value # => 123
|
|
2324
|
+
*
|
|
2325
|
+
* === Security
|
|
2326
|
+
*
|
|
2327
|
+
* An incomplete document is buffered in full and there is no size limit, so when reading
|
|
2328
|
+
* from an untrusted source the caller is responsible for bounding how much data is fed.
|
|
2329
|
+
* For example:
|
|
2330
|
+
*
|
|
2331
|
+
* loop do
|
|
2332
|
+
* if parser.parsed_bytes > DOCUMENT_MAX_SIZE
|
|
2333
|
+
* raise "document too large"
|
|
2334
|
+
* end
|
|
2335
|
+
*
|
|
2336
|
+
* parser << read_chunk
|
|
2337
|
+
* while parser.parse
|
|
2338
|
+
* process(parser.value)
|
|
2339
|
+
* end
|
|
2340
|
+
* end
|
|
2341
|
+
*/
|
|
2342
|
+
static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self)
|
|
2343
|
+
{
|
|
2344
|
+
rb_check_frozen(self);
|
|
2345
|
+
|
|
2346
|
+
VALUE opts = Qfalse;
|
|
2347
|
+
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "0:", &opts);
|
|
2348
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2349
|
+
|
|
2350
|
+
opts = argc > 0 ? argv[0] : Qnil;
|
|
2351
|
+
parser_config_init(&parser->config, opts, self, true);
|
|
2352
|
+
|
|
2353
|
+
return self;
|
|
2354
|
+
}
|
|
2355
|
+
|
|
2356
|
+
static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock);
|
|
2357
|
+
|
|
2358
|
+
/*
|
|
2359
|
+
* call-seq: self << string -> self
|
|
2360
|
+
*
|
|
2361
|
+
* Appends the given string to the parser's buffer.
|
|
2362
|
+
*/
|
|
2363
|
+
static VALUE cResumableParser_feed(VALUE self, VALUE str)
|
|
2364
|
+
{
|
|
2365
|
+
rb_check_frozen(self);
|
|
2366
|
+
|
|
2367
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
|
|
2368
|
+
|
|
2369
|
+
str = convert_encoding(str);
|
|
2370
|
+
if (!RSTRING_LEN(str)) {
|
|
2371
|
+
return self;
|
|
2372
|
+
}
|
|
2373
|
+
|
|
2374
|
+
size_t offset = parser->state.cursor - parser->state.start;
|
|
2375
|
+
const size_t remaining = parser->state.end - parser->state.cursor;
|
|
2376
|
+
|
|
2377
|
+
if (!remaining) {
|
|
2378
|
+
if (parser->buffer) {
|
|
2379
|
+
json_str_clear(parser->buffer);
|
|
2380
|
+
}
|
|
2381
|
+
parser->buffer = RB_OBJ_FROZEN_RAW(str) ? str : rb_obj_hide(rb_str_new_shared(str));
|
|
2382
|
+
offset = 0;
|
|
2383
|
+
} else {
|
|
2384
|
+
JSON_ASSERT(parser->buffer);
|
|
2385
|
+
|
|
2386
|
+
const size_t size = parser->state.end - parser->state.start;
|
|
2387
|
+
const size_t consumed = size - remaining;
|
|
2388
|
+
|
|
2389
|
+
if (RB_OBJ_FROZEN_RAW(parser->buffer)) {
|
|
2390
|
+
VALUE new_buffer = rb_obj_hide(rb_str_buf_new(remaining + RSTRING_LEN(str)));
|
|
2391
|
+
rb_enc_associate_index(new_buffer, utf8_encindex);
|
|
2392
|
+
|
|
2393
|
+
char *old_ptr = RSTRING_PTR(parser->buffer);
|
|
2394
|
+
memcpy(RSTRING_PTR(new_buffer), old_ptr + consumed, remaining);
|
|
2395
|
+
rb_str_set_len(new_buffer, remaining);
|
|
2396
|
+
offset = 0;
|
|
2397
|
+
parser->buffer = new_buffer;
|
|
2398
|
+
} else if (consumed > (size / 2) && size >= 512) {
|
|
2399
|
+
rb_str_modify(parser->buffer);
|
|
2400
|
+
char *old_ptr = RSTRING_PTR(parser->buffer);
|
|
2401
|
+
memmove(old_ptr, old_ptr + consumed, remaining);
|
|
2402
|
+
rb_str_set_len(parser->buffer, remaining);
|
|
2403
|
+
offset = 0;
|
|
2404
|
+
}
|
|
2405
|
+
rb_str_append(parser->buffer, str);
|
|
2406
|
+
}
|
|
2407
|
+
|
|
2408
|
+
long len;
|
|
2409
|
+
const char *start;
|
|
2410
|
+
RSTRING_GETMEM(parser->buffer, start, len);
|
|
2411
|
+
parser->state.start = start;
|
|
2412
|
+
parser->state.end = start + len;
|
|
2413
|
+
parser->state.cursor = parser->state.start + offset;
|
|
2414
|
+
|
|
2415
|
+
return self;
|
|
2416
|
+
}
|
|
2417
|
+
|
|
2418
|
+
struct json_parse_any_args {
|
|
2419
|
+
JSON_ParserState *state;
|
|
2420
|
+
JSON_ParserConfig *config;
|
|
2421
|
+
VALUE parser;
|
|
2422
|
+
};
|
|
2423
|
+
|
|
2424
|
+
static VALUE json_parse_any_resumable_safe0(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, _args))
|
|
2425
|
+
{
|
|
2426
|
+
struct json_parse_any_args *args = (struct json_parse_any_args *)_args;
|
|
2427
|
+
return (VALUE)json_parse_any(args->state, args->config, true);
|
|
2428
|
+
}
|
|
2429
|
+
|
|
2430
|
+
static VALUE json_parse_any_resumable_safe(VALUE _args)
|
|
2431
|
+
{
|
|
2432
|
+
struct json_parse_any_args *args = (struct json_parse_any_args *)_args;
|
|
2433
|
+
VALUE result = rb_catch_obj(args->parser, json_parse_any_resumable_safe0, _args);
|
|
2434
|
+
return result == args->parser ? Qfalse : result;
|
|
2435
|
+
}
|
|
2436
|
+
|
|
2437
|
+
static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock)
|
|
2438
|
+
{
|
|
2439
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2440
|
+
|
|
2441
|
+
if (parser->in_use) {
|
|
2442
|
+
rb_raise(rb_eArgError, "ResumableParser can't be used recursively");
|
|
2443
|
+
}
|
|
2444
|
+
|
|
2445
|
+
if (lock) {
|
|
2446
|
+
parser->in_use = true;
|
|
2447
|
+
}
|
|
2448
|
+
|
|
2449
|
+
// self may have moved, so we need to update all pointers
|
|
2450
|
+
// Investigate: We might be better off keeping JSON_ParserState on the stack
|
|
2451
|
+
// and only persist what we need.
|
|
2452
|
+
parser->state.value_stack = &parser->value_stack;
|
|
2453
|
+
parser->state.frames = &parser->frames;
|
|
2454
|
+
|
|
2455
|
+
return parser;
|
|
2456
|
+
}
|
|
2457
|
+
|
|
2458
|
+
/*
|
|
2459
|
+
* call-seq: parse -> true or false
|
|
2460
|
+
*
|
|
2461
|
+
* Attemps to parse a JSON document from the internal buffer.
|
|
2462
|
+
* Returns whether a complete document could be parsed.
|
|
2463
|
+
*
|
|
2464
|
+
* It does raise +JSON::ParserError+ when encountering invalid \JSON syntax.
|
|
2465
|
+
*
|
|
2466
|
+
* The parsed object can be retrieved by calling #value
|
|
2467
|
+
*/
|
|
2468
|
+
static VALUE cResumableParser_parse(VALUE self)
|
|
2469
|
+
{
|
|
2470
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
|
|
2471
|
+
|
|
2472
|
+
if (parser->complete) {
|
|
2473
|
+
parser->parsed_bytes = 0;
|
|
2474
|
+
parser->incomplete_bytes = 0;
|
|
2475
|
+
parser->complete = false;
|
|
2476
|
+
}
|
|
2477
|
+
|
|
2478
|
+
if (!parser->buffer) {
|
|
2479
|
+
parser->in_use = false;
|
|
2480
|
+
return Qfalse;
|
|
2481
|
+
}
|
|
2482
|
+
|
|
2483
|
+
if (parser->frames.head == 0) {
|
|
2484
|
+
json_frame_stack_push(&parser->state, (json_frame){
|
|
2485
|
+
.type = JSON_FRAME_ROOT,
|
|
2486
|
+
.phase = JSON_PHASE_VALUE,
|
|
2487
|
+
});
|
|
2488
|
+
}
|
|
2489
|
+
|
|
2490
|
+
VALUE Vsource = parser->buffer; // Prevent compaction
|
|
2491
|
+
|
|
2492
|
+
json_frame *frame = json_frame_stack_peek(&parser->frames);
|
|
2493
|
+
|
|
2494
|
+
if (frame->phase == JSON_PHASE_DONE) {
|
|
2495
|
+
JSON_ASSERT(parser->value_stack.head == 1);
|
|
2496
|
+
JSON_ASSERT(parser->frames.head == 1);
|
|
2497
|
+
|
|
2498
|
+
frame->phase = JSON_PHASE_VALUE;
|
|
2499
|
+
rvalue_stack_pop(parser->state.value_stack, 1);
|
|
2500
|
+
}
|
|
2501
|
+
|
|
2502
|
+
struct json_parse_any_args args = {
|
|
2503
|
+
.state = &parser->state,
|
|
2504
|
+
.config = &parser->config,
|
|
2505
|
+
.parser = self,
|
|
2506
|
+
};
|
|
2507
|
+
int status;
|
|
2508
|
+
const char *initial_cursor = parser->state.cursor;
|
|
2509
|
+
parser->complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status);
|
|
2510
|
+
|
|
2511
|
+
if (status) {
|
|
2512
|
+
parser->complete = true; // a parse error is considered complete
|
|
2513
|
+
}
|
|
2514
|
+
|
|
2515
|
+
parser->parsed_bytes += parser->state.cursor - initial_cursor;
|
|
2516
|
+
parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor;
|
|
2517
|
+
|
|
2518
|
+
json_eat_whitespace(&parser->state, &parser->config, false);
|
|
2519
|
+
if (eos(&parser->state)) {
|
|
2520
|
+
json_str_clear(parser->buffer);
|
|
2521
|
+
parser->buffer = Qfalse;
|
|
2522
|
+
}
|
|
2523
|
+
parser->in_use = false;
|
|
2524
|
+
|
|
2525
|
+
if (status) {
|
|
2526
|
+
rb_jump_tag(status); // reraise
|
|
2527
|
+
}
|
|
2528
|
+
RB_GC_GUARD(Vsource);
|
|
2529
|
+
return parser->complete ? Qtrue : Qfalse;
|
|
2530
|
+
}
|
|
2531
|
+
|
|
2532
|
+
/*
|
|
2533
|
+
* call-seq: value? -> true or false
|
|
2534
|
+
*
|
|
2535
|
+
* Returns whether a parsed value is available.
|
|
2536
|
+
*/
|
|
2537
|
+
static VALUE cResumableParser_value_p(VALUE self)
|
|
2538
|
+
{
|
|
2539
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
|
|
2540
|
+
|
|
2541
|
+
if (parser->value_stack.head > 0) {
|
|
2542
|
+
json_frame *frame = json_frame_stack_peek(&parser->frames);
|
|
2543
|
+
if (frame->phase == JSON_PHASE_DONE) {
|
|
2544
|
+
return Qtrue;
|
|
2545
|
+
}
|
|
2546
|
+
}
|
|
2547
|
+
return Qfalse;
|
|
2548
|
+
}
|
|
2549
|
+
|
|
2550
|
+
/*
|
|
2551
|
+
* call-seq: value -> object
|
|
2552
|
+
*
|
|
2553
|
+
* Returns and consume the last parsed value.
|
|
2554
|
+
* Raises ArgumentError if there is no parsed value or if it was already retrieved:
|
|
2555
|
+
* parser << '[1][2]'
|
|
2556
|
+
* parser.value # ArgumentError no ready value
|
|
2557
|
+
* parser.parse # => true
|
|
2558
|
+
* parser.value # => [1]
|
|
2559
|
+
* parser.value # ArgumentError no ready value
|
|
2560
|
+
*/
|
|
2561
|
+
static VALUE cResumableParser_value(VALUE self)
|
|
2562
|
+
{
|
|
2563
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
|
|
2564
|
+
|
|
2565
|
+
if (parser->frames.head > 0) {
|
|
2566
|
+
json_frame *frame = json_frame_stack_peek(&parser->frames);
|
|
2567
|
+
|
|
2568
|
+
if (frame->phase == JSON_PHASE_DONE) {
|
|
2569
|
+
VALUE result = *rvalue_stack_peek(parser->state.value_stack, 1);
|
|
2570
|
+
rvalue_stack_pop(parser->state.value_stack, 1);
|
|
2571
|
+
json_frame_stack_pop(parser->state.frames);
|
|
2572
|
+
return result;
|
|
2573
|
+
}
|
|
2574
|
+
}
|
|
2575
|
+
rb_raise(rb_eArgError, "no ready value");
|
|
2576
|
+
}
|
|
2577
|
+
|
|
2578
|
+
/*
|
|
2579
|
+
* call-seq: clear -> self
|
|
2580
|
+
*
|
|
2581
|
+
* Entirely reset the parser state and buffer.
|
|
2582
|
+
*/
|
|
2583
|
+
static VALUE cResumableParser_clear(VALUE self)
|
|
2584
|
+
{
|
|
2585
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
|
|
2586
|
+
parser->buffer = 0;
|
|
2587
|
+
parser->complete = true;
|
|
2588
|
+
parser->parsed_bytes = 0;
|
|
2589
|
+
parser->incomplete_bytes = 0;
|
|
2590
|
+
parser->frames.head = 0;
|
|
2591
|
+
parser->value_stack.head = 0;
|
|
2592
|
+
parser->state.name_cache.length = 0;
|
|
2593
|
+
parser->state.current_nesting = 0;
|
|
2594
|
+
parser->state.in_array = 1;
|
|
2595
|
+
parser->state.emitted_deprecations = 0;
|
|
2596
|
+
parser->state.start = parser->state.cursor = parser->state.end = NULL;
|
|
2597
|
+
return self;
|
|
2598
|
+
}
|
|
2599
|
+
|
|
2600
|
+
static VALUE cResumableParser_partial_value_body(VALUE self)
|
|
2601
|
+
{
|
|
2602
|
+
JSON_ResumableParser *original_parser = cResumableParser_get(self);
|
|
2603
|
+
JSON_ResumableParser parser = *original_parser;
|
|
2604
|
+
|
|
2605
|
+
parser.state.frames = &parser.frames;
|
|
2606
|
+
parser.state.value_stack = &parser.value_stack;
|
|
2607
|
+
|
|
2608
|
+
if (parser.value_stack.head == 0) {
|
|
2609
|
+
return Qnil;
|
|
2610
|
+
}
|
|
2611
|
+
|
|
2612
|
+
json_frame *frame = json_frame_stack_peek(parser.state.frames);
|
|
2613
|
+
long missing_object_value = 0;
|
|
2614
|
+
if (frame->type == JSON_FRAME_OBJECT && (frame->phase == JSON_PHASE_VALUE || frame->phase == JSON_PHASE_OBJECT_COLON)) {
|
|
2615
|
+
missing_object_value = 1;
|
|
2616
|
+
}
|
|
2617
|
+
|
|
2618
|
+
// Copy the value stack as we need to mutate it.
|
|
2619
|
+
long capa = parser.value_stack.head;
|
|
2620
|
+
parser.value_stack.capa = (capa + missing_object_value);
|
|
2621
|
+
VALUE tmpbuf, *value_stack_buffer = ALLOCV_N(VALUE, tmpbuf, capa + missing_object_value);
|
|
2622
|
+
MEMCPY(value_stack_buffer, parser.value_stack.ptr, VALUE, parser.value_stack.capa);
|
|
2623
|
+
parser.value_stack.ptr = value_stack_buffer;
|
|
2624
|
+
|
|
2625
|
+
JSON_ParserState *state = &parser.state;
|
|
2626
|
+
JSON_ParserConfig *config = &parser.config;
|
|
2627
|
+
|
|
2628
|
+
if (missing_object_value) {
|
|
2629
|
+
rvalue_stack_push(state->value_stack, Qnil, NULL, &state->value_stack);
|
|
2630
|
+
}
|
|
2631
|
+
|
|
2632
|
+
VALUE partial_result = Qundef;
|
|
2633
|
+
|
|
2634
|
+
while (UNDEF_P(partial_result)) {
|
|
2635
|
+
frame = json_frame_stack_peek(state->frames);
|
|
2636
|
+
|
|
2637
|
+
switch (frame->type) {
|
|
2638
|
+
case JSON_FRAME_ROOT: {
|
|
2639
|
+
partial_result = *rvalue_stack_peek(state->value_stack, 1);
|
|
2640
|
+
break;
|
|
2641
|
+
}
|
|
2642
|
+
|
|
2643
|
+
case JSON_FRAME_ARRAY: {
|
|
2644
|
+
long count = json_frame_entry_count(frame, state->value_stack);
|
|
2645
|
+
json_push_value(state, config, json_decode_array(state, config, count));
|
|
2646
|
+
json_frame_stack_pop(state->frames);
|
|
2647
|
+
|
|
2648
|
+
break;
|
|
2649
|
+
}
|
|
2650
|
+
|
|
2651
|
+
case JSON_FRAME_OBJECT: {
|
|
2652
|
+
long count = json_frame_entry_count(frame, state->value_stack);
|
|
2653
|
+
json_push_value(state, config, json_decode_object(state, config, count));
|
|
2654
|
+
json_frame_stack_pop(state->frames);
|
|
2655
|
+
break;
|
|
2656
|
+
}
|
|
2657
|
+
|
|
2658
|
+
default: {
|
|
2659
|
+
JSON_UNREACHABLE_RETURN(Qundef);
|
|
2660
|
+
break;
|
|
2661
|
+
}
|
|
2662
|
+
}
|
|
2663
|
+
}
|
|
2664
|
+
|
|
2665
|
+
ALLOCV_END(tmpbuf);
|
|
2666
|
+
return partial_result;
|
|
2667
|
+
}
|
|
2668
|
+
|
|
2669
|
+
/*
|
|
2670
|
+
* call-seq: partial_value -> object
|
|
2671
|
+
*
|
|
2672
|
+
* Returns the Ruby objects parsed up to this point:
|
|
2673
|
+
* parser << '[1, [2, 3,'
|
|
2674
|
+
* parser.parse # => false
|
|
2675
|
+
* parser.value # ArgumentError no ready value
|
|
2676
|
+
* parser.partial_value # => [1, [2, 3]]
|
|
2677
|
+
*/
|
|
2678
|
+
static VALUE cResumableParser_partial_value(VALUE self)
|
|
2679
|
+
{
|
|
2680
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
|
|
2681
|
+
|
|
2682
|
+
int status;
|
|
2683
|
+
VALUE result = rb_protect(cResumableParser_partial_value_body, self, &status);
|
|
2684
|
+
parser->in_use = false;
|
|
2685
|
+
if (status) {
|
|
2686
|
+
rb_jump_tag(status);
|
|
2687
|
+
}
|
|
2688
|
+
return result;
|
|
2689
|
+
}
|
|
2690
|
+
|
|
2691
|
+
/*
|
|
2692
|
+
* call-seq: rest -> string
|
|
2693
|
+
*
|
|
2694
|
+
* Returns a string containing what remains to be parsed in the buffer
|
|
2695
|
+
* parser << '{ "message": "unterminated message'
|
|
2696
|
+
* parser.parse # => false
|
|
2697
|
+
* parser.rest # => '"unterminated message"'
|
|
2698
|
+
*/
|
|
2699
|
+
static VALUE cResumableParser_rest(VALUE self)
|
|
2700
|
+
{
|
|
2701
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2702
|
+
|
|
2703
|
+
if (!parser->buffer) {
|
|
2704
|
+
return rb_utf8_str_new("", 0);
|
|
2705
|
+
}
|
|
2706
|
+
|
|
2707
|
+
size_t offset = parser->state.cursor - parser->state.start;
|
|
2708
|
+
const char *ptr;
|
|
2709
|
+
long len;
|
|
2710
|
+
RSTRING_GETMEM(parser->buffer, ptr, len);
|
|
2711
|
+
return rb_utf8_str_new(ptr + offset, len - offset);
|
|
2712
|
+
}
|
|
2713
|
+
|
|
2714
|
+
/*
|
|
2715
|
+
* call-seq: value? -> true or false
|
|
2716
|
+
*
|
|
2717
|
+
* Returns whether the internal buffer has been entirely consumed.
|
|
2718
|
+
*/
|
|
2719
|
+
static VALUE cResumableParser_eos_p(VALUE self)
|
|
2720
|
+
{
|
|
2721
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2722
|
+
return eos(&parser->state) ? Qtrue : Qfalse;
|
|
2723
|
+
}
|
|
2724
|
+
|
|
2725
|
+
/*
|
|
2726
|
+
* call-seq: parsed_bytes -> integer
|
|
2727
|
+
*
|
|
2728
|
+
* Returns the number of bytes parsed since the start of the current partial value.
|
|
2729
|
+
* This is intended to be used for securing against untrusted input:
|
|
2730
|
+
*
|
|
2731
|
+
* loop do
|
|
2732
|
+
* if parser.parsed_bytes > DOCUMENT_MAX_SIZE
|
|
2733
|
+
* raise "document too large"
|
|
2734
|
+
* end
|
|
2735
|
+
*
|
|
2736
|
+
* parser << read_chunk
|
|
2737
|
+
* while parser.parse
|
|
2738
|
+
* process(parser.value)
|
|
2739
|
+
* end
|
|
2740
|
+
* end
|
|
2741
|
+
*/
|
|
2742
|
+
static VALUE cResumableParser_parsed_bytes(VALUE self)
|
|
2743
|
+
{
|
|
2744
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2745
|
+
return ULL2NUM(parser->parsed_bytes + parser->incomplete_bytes);
|
|
2746
|
+
}
|
|
2747
|
+
|
|
2748
|
+
void Init_parser(void)
|
|
2749
|
+
{
|
|
2750
|
+
#ifdef HAVE_RB_EXT_RACTOR_SAFE
|
|
2751
|
+
rb_ext_ractor_safe(true);
|
|
2752
|
+
#endif
|
|
2753
|
+
|
|
2754
|
+
#undef rb_intern
|
|
2755
|
+
rb_require("json/common");
|
|
2756
|
+
mJSON = rb_define_module("JSON");
|
|
2757
|
+
VALUE mExt = rb_define_module_under(mJSON, "Ext");
|
|
2758
|
+
VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
|
|
2759
|
+
|
|
2760
|
+
rb_global_variable(&eParserError);
|
|
2761
|
+
eParserError = rb_path2class("JSON::ParserError");
|
|
2762
|
+
|
|
2763
|
+
rb_global_variable(&eNestingError);
|
|
2764
|
+
eNestingError = rb_path2class("JSON::NestingError");
|
|
2765
|
+
|
|
2766
|
+
rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
|
|
2767
|
+
rb_define_private_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
|
|
2768
|
+
rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
|
|
2769
|
+
|
|
2770
|
+
VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
|
|
2771
|
+
rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
|
|
2772
|
+
|
|
2773
|
+
VALUE cResumableParser = rb_define_class_under(mJSON, "ResumableParser", rb_cObject);
|
|
2774
|
+
rb_define_alloc_func(cResumableParser, cResumableParser_allocate);
|
|
2775
|
+
rb_define_private_method(cResumableParser, "initialize", cResumableParser_initialize, -1);
|
|
2776
|
+
rb_define_method(cResumableParser, "<<", cResumableParser_feed, 1);
|
|
2777
|
+
rb_define_method(cResumableParser, "parse", cResumableParser_parse, 0);
|
|
2778
|
+
rb_define_method(cResumableParser, "value", cResumableParser_value, 0);
|
|
2779
|
+
rb_define_method(cResumableParser, "value?", cResumableParser_value_p, 0);
|
|
2780
|
+
rb_define_method(cResumableParser, "partial_value", cResumableParser_partial_value, 0);
|
|
2781
|
+
rb_define_method(cResumableParser, "clear", cResumableParser_clear, 0);
|
|
2782
|
+
rb_define_method(cResumableParser, "rest", cResumableParser_rest, 0);
|
|
2783
|
+
rb_define_method(cResumableParser, "eos?", cResumableParser_eos_p, 0);
|
|
2784
|
+
rb_define_method(cResumableParser, "parsed_bytes", cResumableParser_parsed_bytes, 0);
|
|
2785
|
+
|
|
2786
|
+
rb_global_variable(&CNaN);
|
|
2787
|
+
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
|
|
2788
|
+
|
|
2789
|
+
rb_global_variable(&CInfinity);
|
|
2790
|
+
CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
|
|
2791
|
+
|
|
2792
|
+
rb_global_variable(&CMinusInfinity);
|
|
2793
|
+
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
|
|
2794
|
+
|
|
2795
|
+
rb_global_variable(&Encoding_UTF_8);
|
|
2796
|
+
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
|
|
2797
|
+
|
|
2798
|
+
rb_global_variable(&JSON_empty_string);
|
|
2799
|
+
JSON_empty_string = rb_obj_hide(rb_utf8_str_new("", 0));
|
|
2800
|
+
|
|
2801
|
+
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
2802
|
+
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
2803
|
+
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
2804
|
+
sym_allow_comments = ID2SYM(rb_intern("allow_comments"));
|
|
2805
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
2806
|
+
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
2807
|
+
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
2808
|
+
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
2809
|
+
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
2810
|
+
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
2811
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
2812
|
+
|
|
2813
|
+
i_new = rb_intern("new");
|
|
2814
|
+
i_try_convert = rb_intern("try_convert");
|
|
2815
|
+
i_uminus = rb_intern("-@");
|
|
2816
|
+
i_encode = rb_intern("encode");
|
|
2817
|
+
i_at_line = rb_intern("@line");
|
|
2818
|
+
i_at_column = rb_intern("@column");
|
|
2819
|
+
|
|
2820
|
+
binary_encindex = rb_ascii8bit_encindex();
|
|
2821
|
+
utf8_encindex = rb_utf8_encindex();
|
|
2822
|
+
enc_utf8 = rb_utf8_encoding();
|
|
2823
|
+
|
|
2824
|
+
#ifdef HAVE_SIMD
|
|
2825
|
+
simd_impl = find_simd_implementation();
|
|
2826
|
+
#endif
|
|
2827
|
+
}
|