json 2.13.2 → 2.19.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +114 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +47 -66
- data/ext/json/ext/generator/extconf.rb +3 -1
- data/ext/json/ext/generator/generator.c +393 -563
- data/ext/json/ext/json.h +116 -0
- data/ext/json/ext/parser/extconf.rb +6 -1
- data/ext/json/ext/parser/parser.c +665 -502
- data/ext/json/ext/simd/simd.h +42 -22
- data/ext/json/ext/vendor/fpconv.c +13 -12
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +101 -33
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +133 -71
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +58 -3
- metadata +6 -3
|
@@ -1,50 +1,22 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma,
|
|
40
|
-
sym_decimal_class, sym_on_load,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
|
|
11
|
+
sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
|
|
12
|
+
sym_allow_duplicate_key;
|
|
41
13
|
|
|
42
14
|
static int binary_encindex;
|
|
43
15
|
static int utf8_encindex;
|
|
44
16
|
|
|
45
17
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
18
|
// For TruffleRuby
|
|
47
|
-
void
|
|
19
|
+
static void
|
|
48
20
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
21
|
{
|
|
50
22
|
long index = 0;
|
|
@@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
33
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
34
|
#endif
|
|
63
35
|
|
|
36
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
37
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
38
|
+
{
|
|
39
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
40
|
+
}
|
|
41
|
+
#endif
|
|
64
42
|
|
|
65
43
|
/* name cache */
|
|
66
44
|
|
|
@@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
84
|
cache->entries[index] = rstring;
|
|
107
85
|
}
|
|
108
86
|
|
|
109
|
-
|
|
87
|
+
#define rstring_cache_memcmp memcmp
|
|
88
|
+
|
|
89
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
90
|
+
#if __has_builtin(__builtin_bswap64)
|
|
91
|
+
#undef rstring_cache_memcmp
|
|
92
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
93
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
94
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
95
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
96
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
97
|
+
long i = 0;
|
|
98
|
+
|
|
99
|
+
for (; i + 8 <= length; i += 8) {
|
|
100
|
+
uint64_t a, b;
|
|
101
|
+
memcpy(&a, str + i, 8);
|
|
102
|
+
memcpy(&b, rptr + i, 8);
|
|
103
|
+
if (a != b) {
|
|
104
|
+
a = __builtin_bswap64(a);
|
|
105
|
+
b = __builtin_bswap64(b);
|
|
106
|
+
return (a < b) ? -1 : 1;
|
|
107
|
+
}
|
|
116
108
|
}
|
|
109
|
+
|
|
110
|
+
for (; i < length; i++) {
|
|
111
|
+
if (str[i] != rptr[i]) {
|
|
112
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return 0;
|
|
117
117
|
}
|
|
118
|
+
#endif
|
|
119
|
+
#endif
|
|
118
120
|
|
|
119
|
-
static
|
|
121
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
122
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
123
|
+
const char *rstring_ptr;
|
|
124
|
+
long rstring_length;
|
|
125
|
+
|
|
126
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
127
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
128
|
+
if (length == rstring_length) {
|
|
129
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
130
|
+
} else {
|
|
131
|
+
return (int)(length - rstring_length);
|
|
132
132
|
}
|
|
133
|
+
}
|
|
133
134
|
|
|
135
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
136
|
+
{
|
|
134
137
|
int low = 0;
|
|
135
138
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
139
|
|
|
139
140
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
141
|
+
int mid = (high + low) >> 1;
|
|
141
142
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
143
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
144
|
|
|
144
|
-
if (
|
|
145
|
+
if (cmp == 0) {
|
|
145
146
|
return entry;
|
|
146
|
-
} else if (
|
|
147
|
+
} else if (cmp > 0) {
|
|
147
148
|
low = mid + 1;
|
|
148
149
|
} else {
|
|
149
150
|
high = mid - 1;
|
|
150
151
|
}
|
|
151
152
|
}
|
|
152
153
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
154
|
VALUE rstring = build_interned_string(str, length);
|
|
160
155
|
|
|
161
156
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
157
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
158
|
}
|
|
168
159
|
return rstring;
|
|
169
160
|
}
|
|
170
161
|
|
|
171
162
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
163
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
164
|
int low = 0;
|
|
187
165
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
166
|
|
|
191
167
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
168
|
+
int mid = (high + low) >> 1;
|
|
193
169
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
170
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
171
|
|
|
196
|
-
if (
|
|
172
|
+
if (cmp == 0) {
|
|
197
173
|
return entry;
|
|
198
|
-
} else if (
|
|
174
|
+
} else if (cmp > 0) {
|
|
199
175
|
low = mid + 1;
|
|
200
176
|
} else {
|
|
201
177
|
high = mid - 1;
|
|
202
178
|
}
|
|
203
179
|
}
|
|
204
180
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
181
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
182
|
|
|
213
183
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
184
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
185
|
}
|
|
220
186
|
return rsymbol;
|
|
221
187
|
}
|
|
@@ -275,17 +241,27 @@ static void rvalue_stack_mark(void *ptr)
|
|
|
275
241
|
{
|
|
276
242
|
rvalue_stack *stack = (rvalue_stack *)ptr;
|
|
277
243
|
long index;
|
|
278
|
-
|
|
279
|
-
|
|
244
|
+
if (stack && stack->ptr) {
|
|
245
|
+
for (index = 0; index < stack->head; index++) {
|
|
246
|
+
rb_gc_mark(stack->ptr[index]);
|
|
247
|
+
}
|
|
280
248
|
}
|
|
281
249
|
}
|
|
282
250
|
|
|
251
|
+
static void rvalue_stack_free_buffer(rvalue_stack *stack)
|
|
252
|
+
{
|
|
253
|
+
ruby_xfree(stack->ptr);
|
|
254
|
+
stack->ptr = NULL;
|
|
255
|
+
}
|
|
256
|
+
|
|
283
257
|
static void rvalue_stack_free(void *ptr)
|
|
284
258
|
{
|
|
285
259
|
rvalue_stack *stack = (rvalue_stack *)ptr;
|
|
286
260
|
if (stack) {
|
|
287
|
-
|
|
261
|
+
rvalue_stack_free_buffer(stack);
|
|
262
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
288
263
|
ruby_xfree(stack);
|
|
264
|
+
#endif
|
|
289
265
|
}
|
|
290
266
|
}
|
|
291
267
|
|
|
@@ -296,14 +272,13 @@ static size_t rvalue_stack_memsize(const void *ptr)
|
|
|
296
272
|
}
|
|
297
273
|
|
|
298
274
|
static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
|
|
299
|
-
"JSON::Ext::Parser/rvalue_stack",
|
|
300
|
-
{
|
|
275
|
+
.wrap_struct_name = "JSON::Ext::Parser/rvalue_stack",
|
|
276
|
+
.function = {
|
|
301
277
|
.dmark = rvalue_stack_mark,
|
|
302
278
|
.dfree = rvalue_stack_free,
|
|
303
279
|
.dsize = rvalue_stack_memsize,
|
|
304
280
|
},
|
|
305
|
-
|
|
306
|
-
RUBY_TYPED_FREE_IMMEDIATELY,
|
|
281
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
|
|
307
282
|
};
|
|
308
283
|
|
|
309
284
|
static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
|
|
@@ -325,20 +300,15 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
325
300
|
if (handle) {
|
|
326
301
|
rvalue_stack *stack;
|
|
327
302
|
TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
|
|
328
|
-
|
|
303
|
+
#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
304
|
+
rvalue_stack_free_buffer(stack);
|
|
305
|
+
#else
|
|
329
306
|
rvalue_stack_free(stack);
|
|
307
|
+
RTYPEDDATA_DATA(handle) = NULL;
|
|
308
|
+
#endif
|
|
330
309
|
}
|
|
331
310
|
}
|
|
332
311
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
312
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
313
|
{
|
|
344
314
|
int len = 1;
|
|
@@ -379,13 +349,14 @@ typedef struct JSON_ParserStruct {
|
|
|
379
349
|
int max_nesting;
|
|
380
350
|
bool allow_nan;
|
|
381
351
|
bool allow_trailing_comma;
|
|
382
|
-
bool
|
|
352
|
+
bool allow_control_characters;
|
|
353
|
+
bool allow_invalid_escape;
|
|
383
354
|
bool symbolize_names;
|
|
384
355
|
bool freeze;
|
|
385
356
|
} JSON_ParserConfig;
|
|
386
357
|
|
|
387
358
|
typedef struct JSON_ParserStateStruct {
|
|
388
|
-
VALUE stack_handle;
|
|
359
|
+
VALUE *stack_handle;
|
|
389
360
|
const char *start;
|
|
390
361
|
const char *cursor;
|
|
391
362
|
const char *end;
|
|
@@ -393,8 +364,25 @@ typedef struct JSON_ParserStateStruct {
|
|
|
393
364
|
rvalue_cache name_cache;
|
|
394
365
|
int in_array;
|
|
395
366
|
int current_nesting;
|
|
367
|
+
unsigned int emitted_deprecations;
|
|
396
368
|
} JSON_ParserState;
|
|
397
369
|
|
|
370
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
371
|
+
return state->end - state->cursor;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
375
|
+
return state->cursor >= state->end;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
static inline char peek(JSON_ParserState *state)
|
|
379
|
+
{
|
|
380
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
381
|
+
return 0;
|
|
382
|
+
}
|
|
383
|
+
return *state->cursor;
|
|
384
|
+
}
|
|
385
|
+
|
|
398
386
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
387
|
{
|
|
400
388
|
const char *cursor = state->cursor;
|
|
@@ -428,14 +416,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
428
416
|
|
|
429
417
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
430
418
|
|
|
431
|
-
|
|
432
|
-
RBIMPL_ATTR_NORETURN()
|
|
433
|
-
#endif
|
|
434
|
-
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
419
|
+
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
|
|
435
420
|
{
|
|
436
421
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
437
|
-
long line, column;
|
|
438
|
-
cursor_position(state, &line, &column);
|
|
439
422
|
|
|
440
423
|
const char *ptr = "EOF";
|
|
441
424
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -467,20 +450,28 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
467
450
|
}
|
|
468
451
|
}
|
|
469
452
|
|
|
470
|
-
VALUE
|
|
471
|
-
|
|
472
|
-
|
|
453
|
+
VALUE message = rb_enc_sprintf(enc_utf8, format, ptr);
|
|
454
|
+
rb_str_catf(message, " at line %ld column %ld", line, column);
|
|
455
|
+
return message;
|
|
456
|
+
}
|
|
473
457
|
|
|
458
|
+
static VALUE parse_error_new(VALUE message, long line, long column)
|
|
459
|
+
{
|
|
474
460
|
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
|
475
461
|
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
|
476
462
|
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
|
477
|
-
|
|
463
|
+
return exc;
|
|
478
464
|
}
|
|
479
465
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
466
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
467
|
+
{
|
|
468
|
+
long line, column;
|
|
469
|
+
cursor_position(state, &line, &column);
|
|
470
|
+
VALUE message = build_parse_error_message(format, state, line, column);
|
|
471
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
484
475
|
{
|
|
485
476
|
state->cursor = at;
|
|
486
477
|
raise_parse_error(format, state);
|
|
@@ -505,23 +496,24 @@ static const signed char digit_values[256] = {
|
|
|
505
496
|
-1, -1, -1, -1, -1, -1, -1
|
|
506
497
|
};
|
|
507
498
|
|
|
508
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
509
|
-
{
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
499
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
500
|
+
{
|
|
501
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
502
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
506
|
+
|
|
507
|
+
const signed char b0 = digit_values[p[0]];
|
|
508
|
+
const signed char b1 = digit_values[p[1]];
|
|
509
|
+
const signed char b2 = digit_values[p[2]];
|
|
510
|
+
const signed char b3 = digit_values[p[3]];
|
|
511
|
+
|
|
512
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
513
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
525
517
|
}
|
|
526
518
|
|
|
527
519
|
#define GET_PARSER_CONFIG \
|
|
@@ -530,61 +522,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
522
|
|
|
531
523
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
524
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
525
|
static void
|
|
542
526
|
json_eat_comments(JSON_ParserState *state)
|
|
543
527
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
528
|
+
const char *start = state->cursor;
|
|
529
|
+
state->cursor++;
|
|
530
|
+
|
|
531
|
+
switch (peek(state)) {
|
|
532
|
+
case '/': {
|
|
533
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
534
|
+
if (!state->cursor) {
|
|
535
|
+
state->cursor = state->end;
|
|
536
|
+
} else {
|
|
537
|
+
state->cursor++;
|
|
554
538
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
539
|
+
break;
|
|
540
|
+
}
|
|
541
|
+
case '*': {
|
|
542
|
+
state->cursor++;
|
|
543
|
+
|
|
544
|
+
while (true) {
|
|
545
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
546
|
+
if (!next_match) {
|
|
547
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
state->cursor = next_match + 1;
|
|
551
|
+
if (peek(state) == '/') {
|
|
552
|
+
state->cursor++;
|
|
553
|
+
break;
|
|
568
554
|
}
|
|
569
|
-
break;
|
|
570
555
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
556
|
+
break;
|
|
574
557
|
}
|
|
575
|
-
|
|
576
|
-
|
|
558
|
+
default:
|
|
559
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
560
|
+
break;
|
|
577
561
|
}
|
|
578
562
|
}
|
|
579
563
|
|
|
580
|
-
static
|
|
564
|
+
ALWAYS_INLINE(static) void
|
|
581
565
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
566
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
567
|
+
while (true) {
|
|
568
|
+
switch (peek(state)) {
|
|
569
|
+
case ' ':
|
|
570
|
+
state->cursor++;
|
|
571
|
+
break;
|
|
572
|
+
case '\n':
|
|
573
|
+
state->cursor++;
|
|
574
|
+
|
|
575
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
576
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
577
|
+
while (rest(state) > 8) {
|
|
578
|
+
uint64_t chunk;
|
|
579
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
580
|
+
if (chunk == 0x2020202020202020) {
|
|
581
|
+
state->cursor += 8;
|
|
582
|
+
continue;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
586
|
+
state->cursor += consecutive_spaces;
|
|
587
|
+
break;
|
|
588
|
+
}
|
|
589
|
+
#endif
|
|
590
|
+
break;
|
|
591
|
+
case '\t':
|
|
592
|
+
case '\r':
|
|
593
|
+
state->cursor++;
|
|
594
|
+
break;
|
|
595
|
+
case '/':
|
|
596
|
+
json_eat_comments(state);
|
|
597
|
+
break;
|
|
598
|
+
|
|
599
|
+
default:
|
|
600
|
+
return;
|
|
588
601
|
}
|
|
589
602
|
}
|
|
590
603
|
}
|
|
@@ -615,11 +628,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
628
|
return result;
|
|
616
629
|
}
|
|
617
630
|
|
|
618
|
-
static inline
|
|
631
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
619
632
|
{
|
|
633
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
634
|
+
// Simple heuristics:
|
|
635
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
636
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
637
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
641
|
+
{
|
|
642
|
+
bool intern = is_name || config->freeze;
|
|
643
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
644
|
size_t bufferSize = stringEnd - string;
|
|
621
645
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
646
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
647
|
VALUE cached_key;
|
|
624
648
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
649
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,104 +659,129 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
659
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
660
|
}
|
|
637
661
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
662
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
663
|
+
typedef struct _json_unescape_positions {
|
|
664
|
+
long size;
|
|
665
|
+
const char **positions;
|
|
666
|
+
unsigned long additional_backslashes;
|
|
667
|
+
} JSON_UnescapePositions;
|
|
645
668
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
669
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
670
|
+
{
|
|
671
|
+
while (positions->size) {
|
|
672
|
+
positions->size--;
|
|
673
|
+
const char *next_position = positions->positions[0];
|
|
674
|
+
positions->positions++;
|
|
675
|
+
if (next_position >= pe) {
|
|
676
|
+
return next_position;
|
|
652
677
|
}
|
|
678
|
+
}
|
|
653
679
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
680
|
+
if (positions->additional_backslashes) {
|
|
681
|
+
positions->additional_backslashes--;
|
|
682
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
683
|
}
|
|
658
684
|
|
|
685
|
+
return NULL;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
689
|
+
{
|
|
690
|
+
bool intern = is_name || config->freeze;
|
|
691
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
692
|
+
size_t bufferSize = stringEnd - string;
|
|
693
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
694
|
+
char *buffer;
|
|
695
|
+
|
|
659
696
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
697
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
698
|
buffer = RSTRING_PTR(result);
|
|
662
699
|
bufferStart = buffer;
|
|
663
700
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
701
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
702
|
+
|
|
703
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
704
|
if (pe > p) {
|
|
668
705
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
706
|
buffer += pe - p;
|
|
670
707
|
}
|
|
671
708
|
switch (*++pe) {
|
|
709
|
+
case '"':
|
|
710
|
+
case '/':
|
|
711
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
712
|
+
break;
|
|
713
|
+
case '\\':
|
|
714
|
+
APPEND_CHAR('\\');
|
|
715
|
+
break;
|
|
672
716
|
case 'n':
|
|
673
|
-
|
|
717
|
+
APPEND_CHAR('\n');
|
|
674
718
|
break;
|
|
675
719
|
case 'r':
|
|
676
|
-
|
|
720
|
+
APPEND_CHAR('\r');
|
|
677
721
|
break;
|
|
678
722
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
723
|
+
APPEND_CHAR('\t');
|
|
686
724
|
break;
|
|
687
725
|
case 'b':
|
|
688
|
-
|
|
726
|
+
APPEND_CHAR('\b');
|
|
689
727
|
break;
|
|
690
728
|
case 'f':
|
|
691
|
-
|
|
729
|
+
APPEND_CHAR('\f');
|
|
692
730
|
break;
|
|
693
|
-
case 'u':
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
if (
|
|
712
|
-
raise_parse_error_at("
|
|
713
|
-
}
|
|
714
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
715
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
716
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
717
|
-
| (sur & 0x3FF));
|
|
718
|
-
pe += 5;
|
|
719
|
-
} else {
|
|
720
|
-
unescape = (char *) "?";
|
|
721
|
-
break;
|
|
731
|
+
case 'u': {
|
|
732
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
733
|
+
pe += 3;
|
|
734
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
735
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
736
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
737
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
738
|
+
* final codepoint.
|
|
739
|
+
*
|
|
740
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
741
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
742
|
+
* Area".
|
|
743
|
+
*/
|
|
744
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
745
|
+
pe++;
|
|
746
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
747
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
748
|
+
|
|
749
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
750
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
722
751
|
}
|
|
752
|
+
|
|
753
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
754
|
+
pe += 5;
|
|
755
|
+
} else {
|
|
756
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
757
|
+
break;
|
|
723
758
|
}
|
|
724
|
-
unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
725
|
-
unescape = buf;
|
|
726
759
|
}
|
|
760
|
+
|
|
761
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
762
|
+
buffer += unescape_len;
|
|
763
|
+
p = ++pe;
|
|
727
764
|
break;
|
|
765
|
+
}
|
|
728
766
|
default:
|
|
729
|
-
|
|
730
|
-
|
|
767
|
+
if ((unsigned char)*pe < 0x20) {
|
|
768
|
+
if (!config->allow_control_characters) {
|
|
769
|
+
if (*pe == '\n') {
|
|
770
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
771
|
+
}
|
|
772
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
|
|
776
|
+
if (config->allow_invalid_escape) {
|
|
777
|
+
APPEND_CHAR(*pe);
|
|
778
|
+
} else {
|
|
779
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
780
|
+
}
|
|
781
|
+
break;
|
|
731
782
|
}
|
|
732
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
733
|
-
buffer += unescape_len;
|
|
734
|
-
p = ++pe;
|
|
735
783
|
}
|
|
784
|
+
#undef APPEND_CHAR
|
|
736
785
|
|
|
737
786
|
if (stringEnd > p) {
|
|
738
787
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -743,81 +792,93 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
743
792
|
if (symbolize) {
|
|
744
793
|
result = rb_str_intern(result);
|
|
745
794
|
} else if (intern) {
|
|
746
|
-
result =
|
|
795
|
+
result = rb_str_to_interned_str(result);
|
|
747
796
|
}
|
|
748
797
|
|
|
749
798
|
return result;
|
|
750
799
|
}
|
|
751
800
|
|
|
752
801
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
753
|
-
|
|
754
|
-
{
|
|
755
|
-
bool negative = false;
|
|
756
|
-
if (*p == '-') {
|
|
757
|
-
negative = true;
|
|
758
|
-
p++;
|
|
759
|
-
}
|
|
802
|
+
#define MAX_NUMBER_STACK_BUFFER 128
|
|
760
803
|
|
|
761
|
-
|
|
762
|
-
while (p < pe) {
|
|
763
|
-
memo *= 10;
|
|
764
|
-
memo += *p - '0';
|
|
765
|
-
p++;
|
|
766
|
-
}
|
|
804
|
+
typedef VALUE (*json_number_decode_func_t)(const char *ptr);
|
|
767
805
|
|
|
768
|
-
|
|
769
|
-
|
|
806
|
+
static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
|
|
807
|
+
{
|
|
808
|
+
if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
|
|
809
|
+
char buffer[MAX_NUMBER_STACK_BUFFER];
|
|
810
|
+
MEMCPY(buffer, start, char, len);
|
|
811
|
+
buffer[len] = '\0';
|
|
812
|
+
return func(buffer);
|
|
813
|
+
} else {
|
|
814
|
+
VALUE buffer_v = rb_str_tmp_new(len);
|
|
815
|
+
char *buffer = RSTRING_PTR(buffer_v);
|
|
816
|
+
MEMCPY(buffer, start, char, len);
|
|
817
|
+
buffer[len] = '\0';
|
|
818
|
+
VALUE number = func(buffer);
|
|
819
|
+
RB_GC_GUARD(buffer_v);
|
|
820
|
+
return number;
|
|
770
821
|
}
|
|
771
|
-
return LL2NUM(memo);
|
|
772
822
|
}
|
|
773
823
|
|
|
774
|
-
static VALUE
|
|
824
|
+
static VALUE json_decode_inum(const char *buffer)
|
|
775
825
|
{
|
|
776
|
-
|
|
777
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
778
|
-
MEMCPY(buffer, start, char, len);
|
|
779
|
-
buffer[len] = '\0';
|
|
780
|
-
VALUE number = rb_cstr2inum(buffer, 10);
|
|
781
|
-
RB_ALLOCV_END(buffer_v);
|
|
782
|
-
return number;
|
|
826
|
+
return rb_cstr2inum(buffer, 10);
|
|
783
827
|
}
|
|
784
828
|
|
|
785
|
-
static
|
|
786
|
-
json_decode_integer(const char *start, const char *end)
|
|
829
|
+
NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
|
|
787
830
|
{
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
831
|
+
return json_decode_large_number(start, len, json_decode_inum);
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
835
|
+
{
|
|
836
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
837
|
+
if (negative) {
|
|
838
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
791
839
|
}
|
|
792
|
-
return
|
|
840
|
+
return UINT64T2NUM(mantissa);
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
return json_decode_large_integer(start, end - start);
|
|
793
844
|
}
|
|
794
845
|
|
|
795
|
-
static VALUE
|
|
846
|
+
static VALUE json_decode_dnum(const char *buffer)
|
|
796
847
|
{
|
|
797
|
-
|
|
798
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
799
|
-
MEMCPY(buffer, start, char, len);
|
|
800
|
-
buffer[len] = '\0';
|
|
801
|
-
VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
802
|
-
RB_ALLOCV_END(buffer_v);
|
|
803
|
-
return number;
|
|
848
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
804
849
|
}
|
|
805
850
|
|
|
806
|
-
static VALUE
|
|
851
|
+
NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
|
|
807
852
|
{
|
|
808
|
-
|
|
853
|
+
return json_decode_large_number(start, len, json_decode_dnum);
|
|
854
|
+
}
|
|
809
855
|
|
|
856
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
857
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
858
|
+
*/
|
|
859
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative,
|
|
860
|
+
const char *start, const char *end)
|
|
861
|
+
{
|
|
810
862
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
811
|
-
VALUE text = rb_str_new(start,
|
|
863
|
+
VALUE text = rb_str_new(start, end - start);
|
|
812
864
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
813
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
814
|
-
char buffer[64];
|
|
815
|
-
MEMCPY(buffer, start, char, len);
|
|
816
|
-
buffer[len] = '\0';
|
|
817
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
818
|
-
} else {
|
|
819
|
-
return json_decode_large_float(start, len);
|
|
820
865
|
}
|
|
866
|
+
|
|
867
|
+
if (RB_UNLIKELY(exponent > INT32_MAX)) {
|
|
868
|
+
return negative ? CMinusInfinity : CInfinity;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
if (RB_UNLIKELY(exponent < INT32_MIN)) {
|
|
872
|
+
return rb_float_new(negative ? -0.0 : 0.0);
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
876
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
877
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
878
|
+
return json_decode_large_float(start, end - start);
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative));
|
|
821
882
|
}
|
|
822
883
|
|
|
823
884
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -849,7 +910,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
|
849
910
|
return Qfalse;
|
|
850
911
|
}
|
|
851
912
|
|
|
852
|
-
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
913
|
+
NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
853
914
|
{
|
|
854
915
|
VALUE message = rb_sprintf(
|
|
855
916
|
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
@@ -860,18 +921,17 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
|
|
|
860
921
|
RB_GC_GUARD(message);
|
|
861
922
|
}
|
|
862
923
|
|
|
863
|
-
|
|
864
|
-
RBIMPL_ATTR_NORETURN()
|
|
865
|
-
#endif
|
|
866
|
-
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
924
|
+
NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
867
925
|
{
|
|
868
926
|
VALUE message = rb_sprintf(
|
|
869
927
|
"duplicate key %"PRIsVALUE,
|
|
870
928
|
rb_inspect(duplicate_key)
|
|
871
929
|
);
|
|
872
930
|
|
|
873
|
-
|
|
874
|
-
|
|
931
|
+
long line, column;
|
|
932
|
+
cursor_position(state, &line, &column);
|
|
933
|
+
rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
|
|
934
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
875
935
|
}
|
|
876
936
|
|
|
877
937
|
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
|
@@ -886,7 +946,12 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
886
946
|
case JSON_IGNORE:
|
|
887
947
|
break;
|
|
888
948
|
case JSON_DEPRECATED:
|
|
889
|
-
|
|
949
|
+
// Only emit the first few deprecations to avoid spamming.
|
|
950
|
+
if (state->emitted_deprecations < 5) {
|
|
951
|
+
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
|
|
952
|
+
state->emitted_deprecations++;
|
|
953
|
+
}
|
|
954
|
+
|
|
890
955
|
break;
|
|
891
956
|
case JSON_RAISE:
|
|
892
957
|
raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
|
|
@@ -903,26 +968,12 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
903
968
|
return object;
|
|
904
969
|
}
|
|
905
970
|
|
|
906
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
907
|
-
{
|
|
908
|
-
VALUE string;
|
|
909
|
-
bool intern = is_name || config->freeze;
|
|
910
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
911
|
-
if (escaped) {
|
|
912
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
913
|
-
} else {
|
|
914
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
915
|
-
}
|
|
916
|
-
|
|
917
|
-
return string;
|
|
918
|
-
}
|
|
919
|
-
|
|
920
971
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
921
972
|
{
|
|
922
973
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
923
974
|
value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
|
|
924
975
|
}
|
|
925
|
-
rvalue_stack_push(state->stack, value,
|
|
976
|
+
rvalue_stack_push(state->stack, value, state->stack_handle, &state->stack);
|
|
926
977
|
return value;
|
|
927
978
|
}
|
|
928
979
|
|
|
@@ -939,17 +990,11 @@ static const bool string_scan_table[256] = {
|
|
|
939
990
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
940
991
|
};
|
|
941
992
|
|
|
942
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
943
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
944
|
-
#else
|
|
945
|
-
#define FORCE_INLINE
|
|
946
|
-
#endif
|
|
947
|
-
|
|
948
993
|
#ifdef HAVE_SIMD
|
|
949
994
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
950
995
|
#endif /* HAVE_SIMD */
|
|
951
996
|
|
|
952
|
-
static
|
|
997
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
953
998
|
{
|
|
954
999
|
#ifdef HAVE_SIMD
|
|
955
1000
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -957,7 +1002,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
957
1002
|
uint64_t mask = 0;
|
|
958
1003
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
959
1004
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
960
|
-
return
|
|
1005
|
+
return true;
|
|
961
1006
|
}
|
|
962
1007
|
|
|
963
1008
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -965,64 +1010,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
965
1010
|
int mask = 0;
|
|
966
1011
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
967
1012
|
state->cursor += trailing_zeros(mask);
|
|
968
|
-
return
|
|
1013
|
+
return true;
|
|
969
1014
|
}
|
|
970
1015
|
}
|
|
971
1016
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
972
1017
|
#endif /* HAVE_SIMD */
|
|
973
1018
|
|
|
974
|
-
while (state
|
|
1019
|
+
while (!eos(state)) {
|
|
975
1020
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
976
|
-
return
|
|
1021
|
+
return true;
|
|
977
1022
|
}
|
|
978
|
-
|
|
1023
|
+
state->cursor++;
|
|
979
1024
|
}
|
|
980
|
-
return
|
|
1025
|
+
return false;
|
|
981
1026
|
}
|
|
982
1027
|
|
|
983
|
-
static
|
|
1028
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
984
1029
|
{
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
1030
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
1031
|
+
JSON_UnescapePositions positions = {
|
|
1032
|
+
.size = 0,
|
|
1033
|
+
.positions = backslashes,
|
|
1034
|
+
.additional_backslashes = 0,
|
|
1035
|
+
};
|
|
988
1036
|
|
|
989
|
-
|
|
1037
|
+
do {
|
|
990
1038
|
switch (*state->cursor) {
|
|
991
1039
|
case '"': {
|
|
992
|
-
VALUE string =
|
|
1040
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
993
1041
|
state->cursor++;
|
|
994
1042
|
return json_push_value(state, config, string);
|
|
995
1043
|
}
|
|
996
1044
|
case '\\': {
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1045
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1046
|
+
backslashes[positions.size] = state->cursor;
|
|
1047
|
+
positions.size++;
|
|
1048
|
+
} else {
|
|
1049
|
+
positions.additional_backslashes++;
|
|
1001
1050
|
}
|
|
1051
|
+
state->cursor++;
|
|
1002
1052
|
break;
|
|
1003
1053
|
}
|
|
1004
1054
|
default:
|
|
1005
|
-
|
|
1055
|
+
if (!config->allow_control_characters) {
|
|
1056
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1057
|
+
}
|
|
1006
1058
|
break;
|
|
1007
1059
|
}
|
|
1008
1060
|
|
|
1009
1061
|
state->cursor++;
|
|
1010
|
-
}
|
|
1062
|
+
} while (string_scan(state));
|
|
1011
1063
|
|
|
1012
1064
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1013
1065
|
return Qfalse;
|
|
1014
1066
|
}
|
|
1015
1067
|
|
|
1068
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1069
|
+
{
|
|
1070
|
+
state->cursor++;
|
|
1071
|
+
const char *start = state->cursor;
|
|
1072
|
+
|
|
1073
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1074
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1078
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1079
|
+
state->cursor++;
|
|
1080
|
+
return json_push_value(state, config, string);
|
|
1081
|
+
}
|
|
1082
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1086
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1087
|
+
// Additional References:
|
|
1088
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1089
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1090
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1091
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1092
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1093
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1094
|
+
val -= 0x3030303030303030;
|
|
1095
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1096
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1097
|
+
return val;
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1101
|
+
const uint32_t mask = 0x000000FF;
|
|
1102
|
+
const uint32_t mul1 = 100;
|
|
1103
|
+
val -= 0x30303030;
|
|
1104
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1105
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1106
|
+
return val;
|
|
1107
|
+
}
|
|
1108
|
+
#endif
|
|
1109
|
+
|
|
1110
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1111
|
+
{
|
|
1112
|
+
const char *start = state->cursor;
|
|
1113
|
+
|
|
1114
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1115
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1116
|
+
uint64_t next_8bytes;
|
|
1117
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1118
|
+
|
|
1119
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1120
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1121
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1122
|
+
|
|
1123
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1124
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1125
|
+
state->cursor += 8;
|
|
1126
|
+
continue;
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1130
|
+
|
|
1131
|
+
if (consecutive_digits >= 4) {
|
|
1132
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1133
|
+
state->cursor += 4;
|
|
1134
|
+
consecutive_digits -= 4;
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
while (consecutive_digits) {
|
|
1138
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1139
|
+
consecutive_digits--;
|
|
1140
|
+
state->cursor++;
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
return (int)(state->cursor - start);
|
|
1144
|
+
}
|
|
1145
|
+
#endif
|
|
1146
|
+
|
|
1147
|
+
char next_char;
|
|
1148
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1149
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1150
|
+
state->cursor++;
|
|
1151
|
+
}
|
|
1152
|
+
return (int)(state->cursor - start);
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1156
|
+
{
|
|
1157
|
+
bool integer = true;
|
|
1158
|
+
const char first_digit = *state->cursor;
|
|
1159
|
+
|
|
1160
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1161
|
+
int64_t exponent = 0;
|
|
1162
|
+
int decimal_point_pos = -1;
|
|
1163
|
+
uint64_t mantissa = 0;
|
|
1164
|
+
|
|
1165
|
+
// Parse integer part and extract mantissa digits
|
|
1166
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1167
|
+
|
|
1168
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1169
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
// Parse fractional part
|
|
1173
|
+
if (peek(state) == '.') {
|
|
1174
|
+
integer = false;
|
|
1175
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1176
|
+
state->cursor++;
|
|
1177
|
+
|
|
1178
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1179
|
+
mantissa_digits += fractional_digits;
|
|
1180
|
+
|
|
1181
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1182
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1183
|
+
}
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
// Parse exponent
|
|
1187
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1188
|
+
integer = false;
|
|
1189
|
+
state->cursor++;
|
|
1190
|
+
|
|
1191
|
+
bool negative_exponent = false;
|
|
1192
|
+
const char next_char = peek(state);
|
|
1193
|
+
if (next_char == '-' || next_char == '+') {
|
|
1194
|
+
negative_exponent = next_char == '-';
|
|
1195
|
+
state->cursor++;
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
uint64_t abs_exponent = 0;
|
|
1199
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1200
|
+
|
|
1201
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1202
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
exponent = negative_exponent ? -abs_exponent : abs_exponent;
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
if (integer) {
|
|
1209
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
// Adjust exponent based on decimal point position
|
|
1213
|
+
if (decimal_point_pos >= 0) {
|
|
1214
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1217
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1221
|
+
{
|
|
1222
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1225
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1226
|
+
{
|
|
1227
|
+
const char *start = state->cursor;
|
|
1228
|
+
state->cursor++;
|
|
1229
|
+
return json_parse_number(state, config, true, start);
|
|
1230
|
+
}
|
|
1231
|
+
|
|
1016
1232
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1017
1233
|
{
|
|
1018
1234
|
json_eat_whitespace(state);
|
|
1019
|
-
if (state->cursor >= state->end) {
|
|
1020
|
-
raise_parse_error("unexpected end of input", state);
|
|
1021
|
-
}
|
|
1022
1235
|
|
|
1023
|
-
switch (
|
|
1236
|
+
switch (peek(state)) {
|
|
1024
1237
|
case 'n':
|
|
1025
|
-
if ((state
|
|
1238
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1026
1239
|
state->cursor += 4;
|
|
1027
1240
|
return json_push_value(state, config, Qnil);
|
|
1028
1241
|
}
|
|
@@ -1030,7 +1243,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1030
1243
|
raise_parse_error("unexpected token %s", state);
|
|
1031
1244
|
break;
|
|
1032
1245
|
case 't':
|
|
1033
|
-
if ((state
|
|
1246
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1034
1247
|
state->cursor += 4;
|
|
1035
1248
|
return json_push_value(state, config, Qtrue);
|
|
1036
1249
|
}
|
|
@@ -1039,7 +1252,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1039
1252
|
break;
|
|
1040
1253
|
case 'f':
|
|
1041
1254
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1042
|
-
if ((state
|
|
1255
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1043
1256
|
state->cursor += 5;
|
|
1044
1257
|
return json_push_value(state, config, Qfalse);
|
|
1045
1258
|
}
|
|
@@ -1048,7 +1261,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1048
1261
|
break;
|
|
1049
1262
|
case 'N':
|
|
1050
1263
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1051
|
-
if (config->allow_nan && (state
|
|
1264
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1052
1265
|
state->cursor += 3;
|
|
1053
1266
|
return json_push_value(state, config, CNaN);
|
|
1054
1267
|
}
|
|
@@ -1056,16 +1269,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1056
1269
|
raise_parse_error("unexpected token %s", state);
|
|
1057
1270
|
break;
|
|
1058
1271
|
case 'I':
|
|
1059
|
-
if (config->allow_nan && (state
|
|
1272
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1060
1273
|
state->cursor += 8;
|
|
1061
1274
|
return json_push_value(state, config, CInfinity);
|
|
1062
1275
|
}
|
|
1063
1276
|
|
|
1064
1277
|
raise_parse_error("unexpected token %s", state);
|
|
1065
1278
|
break;
|
|
1066
|
-
case '-':
|
|
1279
|
+
case '-': {
|
|
1067
1280
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1068
|
-
if ((state
|
|
1281
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1069
1282
|
if (config->allow_nan) {
|
|
1070
1283
|
state->cursor += 9;
|
|
1071
1284
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1073,62 +1286,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1073
1286
|
raise_parse_error("unexpected token %s", state);
|
|
1074
1287
|
}
|
|
1075
1288
|
}
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
bool integer = true;
|
|
1079
|
-
|
|
1080
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1081
|
-
const char *start = state->cursor;
|
|
1082
|
-
state->cursor++;
|
|
1083
|
-
|
|
1084
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1085
|
-
state->cursor++;
|
|
1086
|
-
}
|
|
1087
|
-
|
|
1088
|
-
long integer_length = state->cursor - start;
|
|
1089
|
-
|
|
1090
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1091
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1092
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1093
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1094
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1095
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1096
|
-
}
|
|
1097
|
-
|
|
1098
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1099
|
-
integer = false;
|
|
1100
|
-
state->cursor++;
|
|
1101
|
-
|
|
1102
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1103
|
-
raise_parse_error("invalid number: %s", state);
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1107
|
-
state->cursor++;
|
|
1108
|
-
}
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1112
|
-
integer = false;
|
|
1113
|
-
state->cursor++;
|
|
1114
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1115
|
-
state->cursor++;
|
|
1116
|
-
}
|
|
1117
|
-
|
|
1118
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1119
|
-
raise_parse_error("invalid number: %s", state);
|
|
1120
|
-
}
|
|
1121
|
-
|
|
1122
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1123
|
-
state->cursor++;
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
if (integer) {
|
|
1128
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1129
|
-
}
|
|
1130
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1289
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1290
|
+
break;
|
|
1131
1291
|
}
|
|
1292
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1293
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1294
|
+
break;
|
|
1132
1295
|
case '"': {
|
|
1133
1296
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1134
1297
|
return json_parse_string(state, config, false);
|
|
@@ -1139,7 +1302,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1139
1302
|
json_eat_whitespace(state);
|
|
1140
1303
|
long stack_head = state->stack->head;
|
|
1141
1304
|
|
|
1142
|
-
if ((state
|
|
1305
|
+
if (peek(state) == ']') {
|
|
1143
1306
|
state->cursor++;
|
|
1144
1307
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1145
1308
|
} else {
|
|
@@ -1154,26 +1317,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1154
1317
|
while (true) {
|
|
1155
1318
|
json_eat_whitespace(state);
|
|
1156
1319
|
|
|
1157
|
-
|
|
1158
|
-
if (*state->cursor == ']') {
|
|
1159
|
-
state->cursor++;
|
|
1160
|
-
long count = state->stack->head - stack_head;
|
|
1161
|
-
state->current_nesting--;
|
|
1162
|
-
state->in_array--;
|
|
1163
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1164
|
-
}
|
|
1320
|
+
const char next_char = peek(state);
|
|
1165
1321
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
}
|
|
1322
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1323
|
+
state->cursor++;
|
|
1324
|
+
if (config->allow_trailing_comma) {
|
|
1325
|
+
json_eat_whitespace(state);
|
|
1326
|
+
if (peek(state) == ']') {
|
|
1327
|
+
continue;
|
|
1173
1328
|
}
|
|
1174
|
-
json_parse_any(state, config);
|
|
1175
|
-
continue;
|
|
1176
1329
|
}
|
|
1330
|
+
json_parse_any(state, config);
|
|
1331
|
+
continue;
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
if (next_char == ']') {
|
|
1335
|
+
state->cursor++;
|
|
1336
|
+
long count = state->stack->head - stack_head;
|
|
1337
|
+
state->current_nesting--;
|
|
1338
|
+
state->in_array--;
|
|
1339
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1177
1340
|
}
|
|
1178
1341
|
|
|
1179
1342
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1187,7 +1350,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1187
1350
|
json_eat_whitespace(state);
|
|
1188
1351
|
long stack_head = state->stack->head;
|
|
1189
1352
|
|
|
1190
|
-
if ((state
|
|
1353
|
+
if (peek(state) == '}') {
|
|
1191
1354
|
state->cursor++;
|
|
1192
1355
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1193
1356
|
} else {
|
|
@@ -1196,13 +1359,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1196
1359
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1197
1360
|
}
|
|
1198
1361
|
|
|
1199
|
-
if (
|
|
1362
|
+
if (peek(state) != '"') {
|
|
1200
1363
|
raise_parse_error("expected object key, got %s", state);
|
|
1201
1364
|
}
|
|
1202
1365
|
json_parse_string(state, config, true);
|
|
1203
1366
|
|
|
1204
1367
|
json_eat_whitespace(state);
|
|
1205
|
-
if ((state
|
|
1368
|
+
if (peek(state) != ':') {
|
|
1206
1369
|
raise_parse_error("expected ':' after object key", state);
|
|
1207
1370
|
}
|
|
1208
1371
|
state->cursor++;
|
|
@@ -1213,46 +1376,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1213
1376
|
while (true) {
|
|
1214
1377
|
json_eat_whitespace(state);
|
|
1215
1378
|
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1379
|
+
const char next_char = peek(state);
|
|
1380
|
+
if (next_char == '}') {
|
|
1381
|
+
state->cursor++;
|
|
1382
|
+
state->current_nesting--;
|
|
1383
|
+
size_t count = state->stack->head - stack_head;
|
|
1221
1384
|
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1385
|
+
// Temporary rewind cursor in case an error is raised
|
|
1386
|
+
const char *final_cursor = state->cursor;
|
|
1387
|
+
state->cursor = object_start_cursor;
|
|
1388
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1389
|
+
state->cursor = final_cursor;
|
|
1227
1390
|
|
|
1228
|
-
|
|
1229
|
-
|
|
1391
|
+
return json_push_value(state, config, object);
|
|
1392
|
+
}
|
|
1230
1393
|
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1394
|
+
if (next_char == ',') {
|
|
1395
|
+
state->cursor++;
|
|
1396
|
+
json_eat_whitespace(state);
|
|
1234
1397
|
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
}
|
|
1398
|
+
if (config->allow_trailing_comma) {
|
|
1399
|
+
if (peek(state) == '}') {
|
|
1400
|
+
continue;
|
|
1239
1401
|
}
|
|
1402
|
+
}
|
|
1240
1403
|
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1404
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1405
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1406
|
+
}
|
|
1407
|
+
json_parse_string(state, config, true);
|
|
1245
1408
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1409
|
+
json_eat_whitespace(state);
|
|
1410
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1411
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1412
|
+
}
|
|
1413
|
+
state->cursor++;
|
|
1251
1414
|
|
|
1252
|
-
|
|
1415
|
+
json_parse_any(state, config);
|
|
1253
1416
|
|
|
1254
|
-
|
|
1255
|
-
}
|
|
1417
|
+
continue;
|
|
1256
1418
|
}
|
|
1257
1419
|
|
|
1258
1420
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1260,18 +1422,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1260
1422
|
break;
|
|
1261
1423
|
}
|
|
1262
1424
|
|
|
1425
|
+
case 0:
|
|
1426
|
+
raise_parse_error("unexpected end of input", state);
|
|
1427
|
+
break;
|
|
1428
|
+
|
|
1263
1429
|
default:
|
|
1264
1430
|
raise_parse_error("unexpected character: %s", state);
|
|
1265
1431
|
break;
|
|
1266
1432
|
}
|
|
1267
1433
|
|
|
1268
|
-
raise_parse_error("
|
|
1434
|
+
raise_parse_error("unreachable: %s", state);
|
|
1435
|
+
return Qundef;
|
|
1269
1436
|
}
|
|
1270
1437
|
|
|
1271
1438
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1272
1439
|
{
|
|
1273
1440
|
json_eat_whitespace(state);
|
|
1274
|
-
if (state
|
|
1441
|
+
if (!eos(state)) {
|
|
1275
1442
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1276
1443
|
}
|
|
1277
1444
|
}
|
|
@@ -1308,14 +1475,16 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1308
1475
|
{
|
|
1309
1476
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1310
1477
|
|
|
1311
|
-
if (key == sym_max_nesting)
|
|
1312
|
-
else if (key == sym_allow_nan)
|
|
1313
|
-
else if (key == sym_allow_trailing_comma)
|
|
1314
|
-
else if (key ==
|
|
1315
|
-
else if (key ==
|
|
1316
|
-
else if (key ==
|
|
1317
|
-
else if (key ==
|
|
1318
|
-
else if (key ==
|
|
1478
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1479
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1480
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1481
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1482
|
+
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1483
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1484
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1485
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1486
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1487
|
+
else if (key == sym_decimal_class) {
|
|
1319
1488
|
if (RTEST(val)) {
|
|
1320
1489
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1321
1490
|
config->decimal_class = val;
|
|
@@ -1388,6 +1557,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1388
1557
|
*/
|
|
1389
1558
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1390
1559
|
{
|
|
1560
|
+
rb_check_frozen(self);
|
|
1391
1561
|
GET_PARSER_CONFIG;
|
|
1392
1562
|
|
|
1393
1563
|
parser_config_init(config, opts);
|
|
@@ -1413,11 +1583,13 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
|
|
|
1413
1583
|
const char *start;
|
|
1414
1584
|
RSTRING_GETMEM(Vsource, start, len);
|
|
1415
1585
|
|
|
1586
|
+
VALUE stack_handle = 0;
|
|
1416
1587
|
JSON_ParserState _state = {
|
|
1417
1588
|
.start = start,
|
|
1418
1589
|
.cursor = start,
|
|
1419
1590
|
.end = start + len,
|
|
1420
1591
|
.stack = &stack,
|
|
1592
|
+
.stack_handle = &stack_handle,
|
|
1421
1593
|
};
|
|
1422
1594
|
JSON_ParserState *state = &_state;
|
|
1423
1595
|
|
|
@@ -1425,8 +1597,8 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
|
|
|
1425
1597
|
|
|
1426
1598
|
// This may be skipped in case of exception, but
|
|
1427
1599
|
// it won't cause a leak.
|
|
1428
|
-
rvalue_stack_eagerly_release(
|
|
1429
|
-
|
|
1600
|
+
rvalue_stack_eagerly_release(stack_handle);
|
|
1601
|
+
RB_GC_GUARD(stack_handle);
|
|
1430
1602
|
json_ensure_eof(state);
|
|
1431
1603
|
|
|
1432
1604
|
return result;
|
|
@@ -1464,26 +1636,19 @@ static void JSON_ParserConfig_mark(void *ptr)
|
|
|
1464
1636
|
rb_gc_mark(config->decimal_class);
|
|
1465
1637
|
}
|
|
1466
1638
|
|
|
1467
|
-
static void JSON_ParserConfig_free(void *ptr)
|
|
1468
|
-
{
|
|
1469
|
-
JSON_ParserConfig *config = ptr;
|
|
1470
|
-
ruby_xfree(config);
|
|
1471
|
-
}
|
|
1472
|
-
|
|
1473
1639
|
static size_t JSON_ParserConfig_memsize(const void *ptr)
|
|
1474
1640
|
{
|
|
1475
1641
|
return sizeof(JSON_ParserConfig);
|
|
1476
1642
|
}
|
|
1477
1643
|
|
|
1478
1644
|
static const rb_data_type_t JSON_ParserConfig_type = {
|
|
1479
|
-
"JSON::Ext::Parser/ParserConfig",
|
|
1480
|
-
{
|
|
1645
|
+
.wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
|
|
1646
|
+
.function = {
|
|
1481
1647
|
JSON_ParserConfig_mark,
|
|
1482
|
-
|
|
1648
|
+
RUBY_DEFAULT_FREE,
|
|
1483
1649
|
JSON_ParserConfig_memsize,
|
|
1484
1650
|
},
|
|
1485
|
-
|
|
1486
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1651
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
|
|
1487
1652
|
};
|
|
1488
1653
|
|
|
1489
1654
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1527,16 +1692,14 @@ void Init_parser(void)
|
|
|
1527
1692
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1528
1693
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1529
1694
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1695
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1696
|
+
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
1530
1697
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1531
1698
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1532
1699
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1533
1700
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1534
1701
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1535
1702
|
|
|
1536
|
-
i_chr = rb_intern("chr");
|
|
1537
|
-
i_aset = rb_intern("[]=");
|
|
1538
|
-
i_aref = rb_intern("[]");
|
|
1539
|
-
i_leftshift = rb_intern("<<");
|
|
1540
1703
|
i_new = rb_intern("new");
|
|
1541
1704
|
i_try_convert = rb_intern("try_convert");
|
|
1542
1705
|
i_uminus = rb_intern("-@");
|