json 2.14.1 → 2.19.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +68 -1
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +32 -77
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +295 -471
- data/ext/json/ext/json.h +105 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +617 -477
- data/ext/json/ext/simd/simd.h +42 -22
- data/ext/json/ext/vendor/fpconv.c +13 -12
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/common.rb +69 -26
- data/lib/json/ext/generator/state.rb +5 -1
- data/lib/json/truffle_ruby/generator.rb +86 -34
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +33 -0
- metadata +4 -2
|
@@ -1,50 +1,22 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma,
|
|
40
|
-
sym_decimal_class, sym_on_load,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
|
|
11
|
+
sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
|
|
12
|
+
sym_allow_duplicate_key;
|
|
41
13
|
|
|
42
14
|
static int binary_encindex;
|
|
43
15
|
static int utf8_encindex;
|
|
44
16
|
|
|
45
17
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
18
|
// For TruffleRuby
|
|
47
|
-
void
|
|
19
|
+
static void
|
|
48
20
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
21
|
{
|
|
50
22
|
long index = 0;
|
|
@@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
33
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
34
|
#endif
|
|
63
35
|
|
|
36
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
37
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
38
|
+
{
|
|
39
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
40
|
+
}
|
|
41
|
+
#endif
|
|
64
42
|
|
|
65
43
|
/* name cache */
|
|
66
44
|
|
|
@@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
84
|
cache->entries[index] = rstring;
|
|
107
85
|
}
|
|
108
86
|
|
|
109
|
-
|
|
87
|
+
#define rstring_cache_memcmp memcmp
|
|
88
|
+
|
|
89
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
90
|
+
#if __has_builtin(__builtin_bswap64)
|
|
91
|
+
#undef rstring_cache_memcmp
|
|
92
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
93
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
94
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
95
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
96
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
97
|
+
long i = 0;
|
|
98
|
+
|
|
99
|
+
for (; i + 8 <= length; i += 8) {
|
|
100
|
+
uint64_t a, b;
|
|
101
|
+
memcpy(&a, str + i, 8);
|
|
102
|
+
memcpy(&b, rptr + i, 8);
|
|
103
|
+
if (a != b) {
|
|
104
|
+
a = __builtin_bswap64(a);
|
|
105
|
+
b = __builtin_bswap64(b);
|
|
106
|
+
return (a < b) ? -1 : 1;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for (; i < length; i++) {
|
|
111
|
+
if (str[i] != rptr[i]) {
|
|
112
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
113
|
+
}
|
|
116
114
|
}
|
|
115
|
+
|
|
116
|
+
return 0;
|
|
117
117
|
}
|
|
118
|
+
#endif
|
|
119
|
+
#endif
|
|
118
120
|
|
|
119
|
-
static
|
|
121
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
122
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
123
|
+
const char *rstring_ptr;
|
|
124
|
+
long rstring_length;
|
|
125
|
+
|
|
126
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
127
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
128
|
+
if (length == rstring_length) {
|
|
129
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
130
|
+
} else {
|
|
131
|
+
return (int)(length - rstring_length);
|
|
132
132
|
}
|
|
133
|
+
}
|
|
133
134
|
|
|
135
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
136
|
+
{
|
|
134
137
|
int low = 0;
|
|
135
138
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
139
|
|
|
139
140
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
141
|
+
int mid = (high + low) >> 1;
|
|
141
142
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
143
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
144
|
|
|
144
|
-
if (
|
|
145
|
+
if (cmp == 0) {
|
|
145
146
|
return entry;
|
|
146
|
-
} else if (
|
|
147
|
+
} else if (cmp > 0) {
|
|
147
148
|
low = mid + 1;
|
|
148
149
|
} else {
|
|
149
150
|
high = mid - 1;
|
|
150
151
|
}
|
|
151
152
|
}
|
|
152
153
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
154
|
VALUE rstring = build_interned_string(str, length);
|
|
160
155
|
|
|
161
156
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
157
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
158
|
}
|
|
168
159
|
return rstring;
|
|
169
160
|
}
|
|
170
161
|
|
|
171
162
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
163
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
164
|
int low = 0;
|
|
187
165
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
166
|
|
|
191
167
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
168
|
+
int mid = (high + low) >> 1;
|
|
193
169
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
170
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
171
|
|
|
196
|
-
if (
|
|
172
|
+
if (cmp == 0) {
|
|
197
173
|
return entry;
|
|
198
|
-
} else if (
|
|
174
|
+
} else if (cmp > 0) {
|
|
199
175
|
low = mid + 1;
|
|
200
176
|
} else {
|
|
201
177
|
high = mid - 1;
|
|
202
178
|
}
|
|
203
179
|
}
|
|
204
180
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
181
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
182
|
|
|
213
183
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
184
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
185
|
}
|
|
220
186
|
return rsymbol;
|
|
221
187
|
}
|
|
@@ -330,15 +296,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
296
|
}
|
|
331
297
|
}
|
|
332
298
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
299
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
300
|
{
|
|
344
301
|
int len = 1;
|
|
@@ -379,7 +336,8 @@ typedef struct JSON_ParserStruct {
|
|
|
379
336
|
int max_nesting;
|
|
380
337
|
bool allow_nan;
|
|
381
338
|
bool allow_trailing_comma;
|
|
382
|
-
bool
|
|
339
|
+
bool allow_control_characters;
|
|
340
|
+
bool allow_invalid_escape;
|
|
383
341
|
bool symbolize_names;
|
|
384
342
|
bool freeze;
|
|
385
343
|
} JSON_ParserConfig;
|
|
@@ -395,6 +353,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
353
|
int current_nesting;
|
|
396
354
|
} JSON_ParserState;
|
|
397
355
|
|
|
356
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
357
|
+
return state->end - state->cursor;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
361
|
+
return state->cursor >= state->end;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
static inline char peek(JSON_ParserState *state)
|
|
365
|
+
{
|
|
366
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
367
|
+
return 0;
|
|
368
|
+
}
|
|
369
|
+
return *state->cursor;
|
|
370
|
+
}
|
|
371
|
+
|
|
398
372
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
373
|
{
|
|
400
374
|
const char *cursor = state->cursor;
|
|
@@ -428,14 +402,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
428
402
|
|
|
429
403
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
430
404
|
|
|
431
|
-
|
|
432
|
-
RBIMPL_ATTR_NORETURN()
|
|
433
|
-
#endif
|
|
434
|
-
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
405
|
+
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
|
|
435
406
|
{
|
|
436
407
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
437
|
-
long line, column;
|
|
438
|
-
cursor_position(state, &line, &column);
|
|
439
408
|
|
|
440
409
|
const char *ptr = "EOF";
|
|
441
410
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -470,17 +439,26 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
470
439
|
VALUE msg = rb_sprintf(format, ptr);
|
|
471
440
|
VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
|
|
472
441
|
RB_GC_GUARD(msg);
|
|
442
|
+
return message;
|
|
443
|
+
}
|
|
473
444
|
|
|
445
|
+
static VALUE parse_error_new(VALUE message, long line, long column)
|
|
446
|
+
{
|
|
474
447
|
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
|
475
448
|
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
|
476
449
|
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
|
477
|
-
|
|
450
|
+
return exc;
|
|
478
451
|
}
|
|
479
452
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
453
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
454
|
+
{
|
|
455
|
+
long line, column;
|
|
456
|
+
cursor_position(state, &line, &column);
|
|
457
|
+
VALUE message = build_parse_error_message(format, state, line, column);
|
|
458
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
484
462
|
{
|
|
485
463
|
state->cursor = at;
|
|
486
464
|
raise_parse_error(format, state);
|
|
@@ -505,23 +483,24 @@ static const signed char digit_values[256] = {
|
|
|
505
483
|
-1, -1, -1, -1, -1, -1, -1
|
|
506
484
|
};
|
|
507
485
|
|
|
508
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
509
|
-
{
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
486
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
487
|
+
{
|
|
488
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
489
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
493
|
+
|
|
494
|
+
const signed char b0 = digit_values[p[0]];
|
|
495
|
+
const signed char b1 = digit_values[p[1]];
|
|
496
|
+
const signed char b2 = digit_values[p[2]];
|
|
497
|
+
const signed char b3 = digit_values[p[3]];
|
|
498
|
+
|
|
499
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
500
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
525
504
|
}
|
|
526
505
|
|
|
527
506
|
#define GET_PARSER_CONFIG \
|
|
@@ -530,61 +509,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
509
|
|
|
531
510
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
511
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
512
|
static void
|
|
542
513
|
json_eat_comments(JSON_ParserState *state)
|
|
543
514
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
515
|
+
const char *start = state->cursor;
|
|
516
|
+
state->cursor++;
|
|
517
|
+
|
|
518
|
+
switch (peek(state)) {
|
|
519
|
+
case '/': {
|
|
520
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
521
|
+
if (!state->cursor) {
|
|
522
|
+
state->cursor = state->end;
|
|
523
|
+
} else {
|
|
524
|
+
state->cursor++;
|
|
554
525
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
526
|
+
break;
|
|
527
|
+
}
|
|
528
|
+
case '*': {
|
|
529
|
+
state->cursor++;
|
|
530
|
+
|
|
531
|
+
while (true) {
|
|
532
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
533
|
+
if (!next_match) {
|
|
534
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
state->cursor = next_match + 1;
|
|
538
|
+
if (peek(state) == '/') {
|
|
539
|
+
state->cursor++;
|
|
540
|
+
break;
|
|
568
541
|
}
|
|
569
|
-
break;
|
|
570
542
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
543
|
+
break;
|
|
574
544
|
}
|
|
575
|
-
|
|
576
|
-
|
|
545
|
+
default:
|
|
546
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
547
|
+
break;
|
|
577
548
|
}
|
|
578
549
|
}
|
|
579
550
|
|
|
580
|
-
static
|
|
551
|
+
ALWAYS_INLINE(static) void
|
|
581
552
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
553
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
554
|
+
while (true) {
|
|
555
|
+
switch (peek(state)) {
|
|
556
|
+
case ' ':
|
|
557
|
+
state->cursor++;
|
|
558
|
+
break;
|
|
559
|
+
case '\n':
|
|
560
|
+
state->cursor++;
|
|
561
|
+
|
|
562
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
563
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
564
|
+
while (rest(state) > 8) {
|
|
565
|
+
uint64_t chunk;
|
|
566
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
567
|
+
if (chunk == 0x2020202020202020) {
|
|
568
|
+
state->cursor += 8;
|
|
569
|
+
continue;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
573
|
+
state->cursor += consecutive_spaces;
|
|
574
|
+
break;
|
|
575
|
+
}
|
|
576
|
+
#endif
|
|
577
|
+
break;
|
|
578
|
+
case '\t':
|
|
579
|
+
case '\r':
|
|
580
|
+
state->cursor++;
|
|
581
|
+
break;
|
|
582
|
+
case '/':
|
|
583
|
+
json_eat_comments(state);
|
|
584
|
+
break;
|
|
585
|
+
|
|
586
|
+
default:
|
|
587
|
+
return;
|
|
588
588
|
}
|
|
589
589
|
}
|
|
590
590
|
}
|
|
@@ -615,11 +615,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
615
|
return result;
|
|
616
616
|
}
|
|
617
617
|
|
|
618
|
-
static inline
|
|
618
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
619
|
+
{
|
|
620
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
621
|
+
// Simple heuristics:
|
|
622
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
623
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
624
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
619
628
|
{
|
|
629
|
+
bool intern = is_name || config->freeze;
|
|
630
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
631
|
size_t bufferSize = stringEnd - string;
|
|
621
632
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
633
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
634
|
VALUE cached_key;
|
|
624
635
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
636
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,109 +646,129 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
646
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
647
|
}
|
|
637
648
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
649
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
650
|
+
typedef struct _json_unescape_positions {
|
|
651
|
+
long size;
|
|
652
|
+
const char **positions;
|
|
653
|
+
unsigned long additional_backslashes;
|
|
654
|
+
} JSON_UnescapePositions;
|
|
645
655
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
656
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
657
|
+
{
|
|
658
|
+
while (positions->size) {
|
|
659
|
+
positions->size--;
|
|
660
|
+
const char *next_position = positions->positions[0];
|
|
661
|
+
positions->positions++;
|
|
662
|
+
if (next_position >= pe) {
|
|
663
|
+
return next_position;
|
|
652
664
|
}
|
|
665
|
+
}
|
|
653
666
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
667
|
+
if (positions->additional_backslashes) {
|
|
668
|
+
positions->additional_backslashes--;
|
|
669
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
670
|
}
|
|
658
671
|
|
|
672
|
+
return NULL;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
676
|
+
{
|
|
677
|
+
bool intern = is_name || config->freeze;
|
|
678
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
679
|
+
size_t bufferSize = stringEnd - string;
|
|
680
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
681
|
+
char *buffer;
|
|
682
|
+
|
|
659
683
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
684
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
685
|
buffer = RSTRING_PTR(result);
|
|
662
686
|
bufferStart = buffer;
|
|
663
687
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
688
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
689
|
+
|
|
690
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
691
|
if (pe > p) {
|
|
668
692
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
693
|
buffer += pe - p;
|
|
670
694
|
}
|
|
671
695
|
switch (*++pe) {
|
|
696
|
+
case '"':
|
|
697
|
+
case '/':
|
|
698
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
699
|
+
break;
|
|
700
|
+
case '\\':
|
|
701
|
+
APPEND_CHAR('\\');
|
|
702
|
+
break;
|
|
672
703
|
case 'n':
|
|
673
|
-
|
|
704
|
+
APPEND_CHAR('\n');
|
|
674
705
|
break;
|
|
675
706
|
case 'r':
|
|
676
|
-
|
|
707
|
+
APPEND_CHAR('\r');
|
|
677
708
|
break;
|
|
678
709
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
710
|
+
APPEND_CHAR('\t');
|
|
686
711
|
break;
|
|
687
712
|
case 'b':
|
|
688
|
-
|
|
713
|
+
APPEND_CHAR('\b');
|
|
689
714
|
break;
|
|
690
715
|
case 'f':
|
|
691
|
-
|
|
716
|
+
APPEND_CHAR('\f');
|
|
692
717
|
break;
|
|
693
|
-
case 'u':
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
if (
|
|
712
|
-
raise_parse_error_at("
|
|
713
|
-
}
|
|
714
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
715
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
716
|
-
|
|
717
|
-
if ((sur & 0xFC00) != 0xDC00) {
|
|
718
|
-
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
719
|
-
}
|
|
720
|
-
|
|
721
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
722
|
-
| (sur & 0x3FF));
|
|
723
|
-
pe += 5;
|
|
724
|
-
} else {
|
|
725
|
-
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
726
|
-
break;
|
|
718
|
+
case 'u': {
|
|
719
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
720
|
+
pe += 3;
|
|
721
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
722
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
723
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
724
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
725
|
+
* final codepoint.
|
|
726
|
+
*
|
|
727
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
728
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
729
|
+
* Area".
|
|
730
|
+
*/
|
|
731
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
732
|
+
pe++;
|
|
733
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
734
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
735
|
+
|
|
736
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
737
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
727
738
|
}
|
|
739
|
+
|
|
740
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
741
|
+
pe += 5;
|
|
742
|
+
} else {
|
|
743
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
744
|
+
break;
|
|
728
745
|
}
|
|
729
|
-
unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
730
|
-
unescape = buf;
|
|
731
746
|
}
|
|
747
|
+
|
|
748
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
749
|
+
buffer += unescape_len;
|
|
750
|
+
p = ++pe;
|
|
732
751
|
break;
|
|
752
|
+
}
|
|
733
753
|
default:
|
|
734
|
-
|
|
735
|
-
|
|
754
|
+
if ((unsigned char)*pe < 0x20) {
|
|
755
|
+
if (!config->allow_control_characters) {
|
|
756
|
+
if (*pe == '\n') {
|
|
757
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
758
|
+
}
|
|
759
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
if (config->allow_invalid_escape) {
|
|
764
|
+
APPEND_CHAR(*pe);
|
|
765
|
+
} else {
|
|
766
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
767
|
+
}
|
|
768
|
+
break;
|
|
736
769
|
}
|
|
737
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
738
|
-
buffer += unescape_len;
|
|
739
|
-
p = ++pe;
|
|
740
770
|
}
|
|
771
|
+
#undef APPEND_CHAR
|
|
741
772
|
|
|
742
773
|
if (stringEnd > p) {
|
|
743
774
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -748,81 +779,85 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
748
779
|
if (symbolize) {
|
|
749
780
|
result = rb_str_intern(result);
|
|
750
781
|
} else if (intern) {
|
|
751
|
-
result =
|
|
782
|
+
result = rb_str_to_interned_str(result);
|
|
752
783
|
}
|
|
753
784
|
|
|
754
785
|
return result;
|
|
755
786
|
}
|
|
756
787
|
|
|
757
788
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
758
|
-
|
|
759
|
-
{
|
|
760
|
-
bool negative = false;
|
|
761
|
-
if (*p == '-') {
|
|
762
|
-
negative = true;
|
|
763
|
-
p++;
|
|
764
|
-
}
|
|
789
|
+
#define MAX_NUMBER_STACK_BUFFER 128
|
|
765
790
|
|
|
766
|
-
|
|
767
|
-
while (p < pe) {
|
|
768
|
-
memo *= 10;
|
|
769
|
-
memo += *p - '0';
|
|
770
|
-
p++;
|
|
771
|
-
}
|
|
791
|
+
typedef VALUE (*json_number_decode_func_t)(const char *ptr);
|
|
772
792
|
|
|
773
|
-
|
|
774
|
-
|
|
793
|
+
static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
|
|
794
|
+
{
|
|
795
|
+
if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
|
|
796
|
+
char buffer[MAX_NUMBER_STACK_BUFFER];
|
|
797
|
+
MEMCPY(buffer, start, char, len);
|
|
798
|
+
buffer[len] = '\0';
|
|
799
|
+
return func(buffer);
|
|
800
|
+
} else {
|
|
801
|
+
VALUE buffer_v = rb_str_tmp_new(len);
|
|
802
|
+
char *buffer = RSTRING_PTR(buffer_v);
|
|
803
|
+
MEMCPY(buffer, start, char, len);
|
|
804
|
+
buffer[len] = '\0';
|
|
805
|
+
VALUE number = func(buffer);
|
|
806
|
+
RB_GC_GUARD(buffer_v);
|
|
807
|
+
return number;
|
|
775
808
|
}
|
|
776
|
-
return LL2NUM(memo);
|
|
777
809
|
}
|
|
778
810
|
|
|
779
|
-
static VALUE
|
|
811
|
+
static VALUE json_decode_inum(const char *buffer)
|
|
812
|
+
{
|
|
813
|
+
return rb_cstr2inum(buffer, 10);
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
|
|
780
817
|
{
|
|
781
|
-
|
|
782
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
783
|
-
MEMCPY(buffer, start, char, len);
|
|
784
|
-
buffer[len] = '\0';
|
|
785
|
-
VALUE number = rb_cstr2inum(buffer, 10);
|
|
786
|
-
RB_ALLOCV_END(buffer_v);
|
|
787
|
-
return number;
|
|
818
|
+
return json_decode_large_number(start, len, json_decode_inum);
|
|
788
819
|
}
|
|
789
820
|
|
|
790
|
-
static inline VALUE
|
|
791
|
-
json_decode_integer(const char *start, const char *end)
|
|
821
|
+
static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
792
822
|
{
|
|
793
|
-
|
|
794
|
-
if (
|
|
795
|
-
return
|
|
823
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
824
|
+
if (negative) {
|
|
825
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
796
826
|
}
|
|
797
|
-
return
|
|
827
|
+
return UINT64T2NUM(mantissa);
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
return json_decode_large_integer(start, end - start);
|
|
798
831
|
}
|
|
799
832
|
|
|
800
|
-
static VALUE
|
|
833
|
+
static VALUE json_decode_dnum(const char *buffer)
|
|
801
834
|
{
|
|
802
|
-
|
|
803
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
804
|
-
MEMCPY(buffer, start, char, len);
|
|
805
|
-
buffer[len] = '\0';
|
|
806
|
-
VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
807
|
-
RB_ALLOCV_END(buffer_v);
|
|
808
|
-
return number;
|
|
835
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
809
836
|
}
|
|
810
837
|
|
|
811
|
-
static VALUE
|
|
838
|
+
NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
|
|
812
839
|
{
|
|
813
|
-
|
|
840
|
+
return json_decode_large_number(start, len, json_decode_dnum);
|
|
841
|
+
}
|
|
814
842
|
|
|
843
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
844
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
845
|
+
*/
|
|
846
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
847
|
+
const char *start, const char *end)
|
|
848
|
+
{
|
|
815
849
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
816
|
-
VALUE text = rb_str_new(start,
|
|
850
|
+
VALUE text = rb_str_new(start, end - start);
|
|
817
851
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
818
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
819
|
-
char buffer[64];
|
|
820
|
-
MEMCPY(buffer, start, char, len);
|
|
821
|
-
buffer[len] = '\0';
|
|
822
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
823
|
-
} else {
|
|
824
|
-
return json_decode_large_float(start, len);
|
|
825
852
|
}
|
|
853
|
+
|
|
854
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
855
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
856
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
857
|
+
return json_decode_large_float(start, end - start);
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
826
861
|
}
|
|
827
862
|
|
|
828
863
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -854,7 +889,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
|
854
889
|
return Qfalse;
|
|
855
890
|
}
|
|
856
891
|
|
|
857
|
-
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
892
|
+
NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
858
893
|
{
|
|
859
894
|
VALUE message = rb_sprintf(
|
|
860
895
|
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
@@ -865,16 +900,18 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
|
|
|
865
900
|
RB_GC_GUARD(message);
|
|
866
901
|
}
|
|
867
902
|
|
|
868
|
-
|
|
869
|
-
RBIMPL_ATTR_NORETURN()
|
|
870
|
-
#endif
|
|
871
|
-
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
903
|
+
NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
872
904
|
{
|
|
873
905
|
VALUE message = rb_sprintf(
|
|
874
906
|
"duplicate key %"PRIsVALUE,
|
|
875
907
|
rb_inspect(duplicate_key)
|
|
876
908
|
);
|
|
877
909
|
|
|
910
|
+
long line, column;
|
|
911
|
+
cursor_position(state, &line, &column);
|
|
912
|
+
rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
|
|
913
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
914
|
+
|
|
878
915
|
raise_parse_error(RSTRING_PTR(message), state);
|
|
879
916
|
RB_GC_GUARD(message);
|
|
880
917
|
}
|
|
@@ -908,20 +945,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
908
945
|
return object;
|
|
909
946
|
}
|
|
910
947
|
|
|
911
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
912
|
-
{
|
|
913
|
-
VALUE string;
|
|
914
|
-
bool intern = is_name || config->freeze;
|
|
915
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
916
|
-
if (escaped) {
|
|
917
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
918
|
-
} else {
|
|
919
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
920
|
-
}
|
|
921
|
-
|
|
922
|
-
return string;
|
|
923
|
-
}
|
|
924
|
-
|
|
925
948
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
926
949
|
{
|
|
927
950
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -944,17 +967,11 @@ static const bool string_scan_table[256] = {
|
|
|
944
967
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
945
968
|
};
|
|
946
969
|
|
|
947
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
948
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
949
|
-
#else
|
|
950
|
-
#define FORCE_INLINE
|
|
951
|
-
#endif
|
|
952
|
-
|
|
953
970
|
#ifdef HAVE_SIMD
|
|
954
971
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
955
972
|
#endif /* HAVE_SIMD */
|
|
956
973
|
|
|
957
|
-
static
|
|
974
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
958
975
|
{
|
|
959
976
|
#ifdef HAVE_SIMD
|
|
960
977
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -962,7 +979,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
962
979
|
uint64_t mask = 0;
|
|
963
980
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
964
981
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
965
|
-
return
|
|
982
|
+
return true;
|
|
966
983
|
}
|
|
967
984
|
|
|
968
985
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -970,64 +987,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
970
987
|
int mask = 0;
|
|
971
988
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
972
989
|
state->cursor += trailing_zeros(mask);
|
|
973
|
-
return
|
|
990
|
+
return true;
|
|
974
991
|
}
|
|
975
992
|
}
|
|
976
993
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
977
994
|
#endif /* HAVE_SIMD */
|
|
978
995
|
|
|
979
|
-
while (state
|
|
996
|
+
while (!eos(state)) {
|
|
980
997
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
981
|
-
return
|
|
998
|
+
return true;
|
|
982
999
|
}
|
|
983
1000
|
state->cursor++;
|
|
984
1001
|
}
|
|
985
|
-
return
|
|
1002
|
+
return false;
|
|
986
1003
|
}
|
|
987
1004
|
|
|
988
|
-
static
|
|
1005
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
989
1006
|
{
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
1007
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
1008
|
+
JSON_UnescapePositions positions = {
|
|
1009
|
+
.size = 0,
|
|
1010
|
+
.positions = backslashes,
|
|
1011
|
+
.additional_backslashes = 0,
|
|
1012
|
+
};
|
|
993
1013
|
|
|
994
|
-
|
|
1014
|
+
do {
|
|
995
1015
|
switch (*state->cursor) {
|
|
996
1016
|
case '"': {
|
|
997
|
-
VALUE string =
|
|
1017
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
998
1018
|
state->cursor++;
|
|
999
1019
|
return json_push_value(state, config, string);
|
|
1000
1020
|
}
|
|
1001
1021
|
case '\\': {
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1022
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1023
|
+
backslashes[positions.size] = state->cursor;
|
|
1024
|
+
positions.size++;
|
|
1025
|
+
} else {
|
|
1026
|
+
positions.additional_backslashes++;
|
|
1006
1027
|
}
|
|
1028
|
+
state->cursor++;
|
|
1007
1029
|
break;
|
|
1008
1030
|
}
|
|
1009
1031
|
default:
|
|
1010
|
-
|
|
1032
|
+
if (!config->allow_control_characters) {
|
|
1033
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1034
|
+
}
|
|
1011
1035
|
break;
|
|
1012
1036
|
}
|
|
1013
1037
|
|
|
1014
1038
|
state->cursor++;
|
|
1015
|
-
}
|
|
1039
|
+
} while (string_scan(state));
|
|
1016
1040
|
|
|
1017
1041
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1018
1042
|
return Qfalse;
|
|
1019
1043
|
}
|
|
1020
1044
|
|
|
1045
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1046
|
+
{
|
|
1047
|
+
state->cursor++;
|
|
1048
|
+
const char *start = state->cursor;
|
|
1049
|
+
|
|
1050
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1051
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1055
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1056
|
+
state->cursor++;
|
|
1057
|
+
return json_push_value(state, config, string);
|
|
1058
|
+
}
|
|
1059
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1063
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1064
|
+
// Additional References:
|
|
1065
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1066
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1067
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1068
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1069
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1070
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1071
|
+
val -= 0x3030303030303030;
|
|
1072
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1073
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1074
|
+
return val;
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1078
|
+
const uint32_t mask = 0x000000FF;
|
|
1079
|
+
const uint32_t mul1 = 100;
|
|
1080
|
+
val -= 0x30303030;
|
|
1081
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1082
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1083
|
+
return val;
|
|
1084
|
+
}
|
|
1085
|
+
#endif
|
|
1086
|
+
|
|
1087
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1088
|
+
{
|
|
1089
|
+
const char *start = state->cursor;
|
|
1090
|
+
|
|
1091
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1092
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1093
|
+
uint64_t next_8bytes;
|
|
1094
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1095
|
+
|
|
1096
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1097
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1098
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1099
|
+
|
|
1100
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1101
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1102
|
+
state->cursor += 8;
|
|
1103
|
+
continue;
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1107
|
+
|
|
1108
|
+
if (consecutive_digits >= 4) {
|
|
1109
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1110
|
+
state->cursor += 4;
|
|
1111
|
+
consecutive_digits -= 4;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
while (consecutive_digits) {
|
|
1115
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1116
|
+
consecutive_digits--;
|
|
1117
|
+
state->cursor++;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
return (int)(state->cursor - start);
|
|
1121
|
+
}
|
|
1122
|
+
#endif
|
|
1123
|
+
|
|
1124
|
+
char next_char;
|
|
1125
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1126
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1127
|
+
state->cursor++;
|
|
1128
|
+
}
|
|
1129
|
+
return (int)(state->cursor - start);
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1133
|
+
{
|
|
1134
|
+
bool integer = true;
|
|
1135
|
+
const char first_digit = *state->cursor;
|
|
1136
|
+
|
|
1137
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1138
|
+
int32_t exponent = 0;
|
|
1139
|
+
int decimal_point_pos = -1;
|
|
1140
|
+
uint64_t mantissa = 0;
|
|
1141
|
+
|
|
1142
|
+
// Parse integer part and extract mantissa digits
|
|
1143
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1144
|
+
|
|
1145
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1146
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
// Parse fractional part
|
|
1150
|
+
if (peek(state) == '.') {
|
|
1151
|
+
integer = false;
|
|
1152
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1153
|
+
state->cursor++;
|
|
1154
|
+
|
|
1155
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1156
|
+
mantissa_digits += fractional_digits;
|
|
1157
|
+
|
|
1158
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1159
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
// Parse exponent
|
|
1164
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1165
|
+
integer = false;
|
|
1166
|
+
state->cursor++;
|
|
1167
|
+
|
|
1168
|
+
bool negative_exponent = false;
|
|
1169
|
+
const char next_char = peek(state);
|
|
1170
|
+
if (next_char == '-' || next_char == '+') {
|
|
1171
|
+
negative_exponent = next_char == '-';
|
|
1172
|
+
state->cursor++;
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
uint64_t abs_exponent = 0;
|
|
1176
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1177
|
+
|
|
1178
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1179
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
if (integer) {
|
|
1186
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
// Adjust exponent based on decimal point position
|
|
1190
|
+
if (decimal_point_pos >= 0) {
|
|
1191
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1198
|
+
{
|
|
1199
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1203
|
+
{
|
|
1204
|
+
const char *start = state->cursor;
|
|
1205
|
+
state->cursor++;
|
|
1206
|
+
return json_parse_number(state, config, true, start);
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1021
1209
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1022
1210
|
{
|
|
1023
1211
|
json_eat_whitespace(state);
|
|
1024
|
-
if (state->cursor >= state->end) {
|
|
1025
|
-
raise_parse_error("unexpected end of input", state);
|
|
1026
|
-
}
|
|
1027
1212
|
|
|
1028
|
-
switch (
|
|
1213
|
+
switch (peek(state)) {
|
|
1029
1214
|
case 'n':
|
|
1030
|
-
if ((state
|
|
1215
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1031
1216
|
state->cursor += 4;
|
|
1032
1217
|
return json_push_value(state, config, Qnil);
|
|
1033
1218
|
}
|
|
@@ -1035,7 +1220,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1035
1220
|
raise_parse_error("unexpected token %s", state);
|
|
1036
1221
|
break;
|
|
1037
1222
|
case 't':
|
|
1038
|
-
if ((state
|
|
1223
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1039
1224
|
state->cursor += 4;
|
|
1040
1225
|
return json_push_value(state, config, Qtrue);
|
|
1041
1226
|
}
|
|
@@ -1044,7 +1229,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1044
1229
|
break;
|
|
1045
1230
|
case 'f':
|
|
1046
1231
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1047
|
-
if ((state
|
|
1232
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1048
1233
|
state->cursor += 5;
|
|
1049
1234
|
return json_push_value(state, config, Qfalse);
|
|
1050
1235
|
}
|
|
@@ -1053,7 +1238,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1053
1238
|
break;
|
|
1054
1239
|
case 'N':
|
|
1055
1240
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1056
|
-
if (config->allow_nan && (state
|
|
1241
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1057
1242
|
state->cursor += 3;
|
|
1058
1243
|
return json_push_value(state, config, CNaN);
|
|
1059
1244
|
}
|
|
@@ -1061,16 +1246,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1061
1246
|
raise_parse_error("unexpected token %s", state);
|
|
1062
1247
|
break;
|
|
1063
1248
|
case 'I':
|
|
1064
|
-
if (config->allow_nan && (state
|
|
1249
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1065
1250
|
state->cursor += 8;
|
|
1066
1251
|
return json_push_value(state, config, CInfinity);
|
|
1067
1252
|
}
|
|
1068
1253
|
|
|
1069
1254
|
raise_parse_error("unexpected token %s", state);
|
|
1070
1255
|
break;
|
|
1071
|
-
case '-':
|
|
1256
|
+
case '-': {
|
|
1072
1257
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1073
|
-
if ((state
|
|
1258
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1074
1259
|
if (config->allow_nan) {
|
|
1075
1260
|
state->cursor += 9;
|
|
1076
1261
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1078,62 +1263,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1078
1263
|
raise_parse_error("unexpected token %s", state);
|
|
1079
1264
|
}
|
|
1080
1265
|
}
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
bool integer = true;
|
|
1084
|
-
|
|
1085
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1086
|
-
const char *start = state->cursor;
|
|
1087
|
-
state->cursor++;
|
|
1088
|
-
|
|
1089
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1090
|
-
state->cursor++;
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
long integer_length = state->cursor - start;
|
|
1094
|
-
|
|
1095
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1096
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1097
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1098
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1099
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1100
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1104
|
-
integer = false;
|
|
1105
|
-
state->cursor++;
|
|
1106
|
-
|
|
1107
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1108
|
-
raise_parse_error("invalid number: %s", state);
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1112
|
-
state->cursor++;
|
|
1113
|
-
}
|
|
1114
|
-
}
|
|
1115
|
-
|
|
1116
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1117
|
-
integer = false;
|
|
1118
|
-
state->cursor++;
|
|
1119
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1120
|
-
state->cursor++;
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1124
|
-
raise_parse_error("invalid number: %s", state);
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1128
|
-
state->cursor++;
|
|
1129
|
-
}
|
|
1130
|
-
}
|
|
1131
|
-
|
|
1132
|
-
if (integer) {
|
|
1133
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1134
|
-
}
|
|
1135
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1266
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1267
|
+
break;
|
|
1136
1268
|
}
|
|
1269
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1270
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1271
|
+
break;
|
|
1137
1272
|
case '"': {
|
|
1138
1273
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1139
1274
|
return json_parse_string(state, config, false);
|
|
@@ -1144,7 +1279,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1144
1279
|
json_eat_whitespace(state);
|
|
1145
1280
|
long stack_head = state->stack->head;
|
|
1146
1281
|
|
|
1147
|
-
if ((state
|
|
1282
|
+
if (peek(state) == ']') {
|
|
1148
1283
|
state->cursor++;
|
|
1149
1284
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1150
1285
|
} else {
|
|
@@ -1159,26 +1294,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1159
1294
|
while (true) {
|
|
1160
1295
|
json_eat_whitespace(state);
|
|
1161
1296
|
|
|
1162
|
-
|
|
1163
|
-
if (*state->cursor == ']') {
|
|
1164
|
-
state->cursor++;
|
|
1165
|
-
long count = state->stack->head - stack_head;
|
|
1166
|
-
state->current_nesting--;
|
|
1167
|
-
state->in_array--;
|
|
1168
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1169
|
-
}
|
|
1297
|
+
const char next_char = peek(state);
|
|
1170
1298
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
}
|
|
1299
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1300
|
+
state->cursor++;
|
|
1301
|
+
if (config->allow_trailing_comma) {
|
|
1302
|
+
json_eat_whitespace(state);
|
|
1303
|
+
if (peek(state) == ']') {
|
|
1304
|
+
continue;
|
|
1178
1305
|
}
|
|
1179
|
-
json_parse_any(state, config);
|
|
1180
|
-
continue;
|
|
1181
1306
|
}
|
|
1307
|
+
json_parse_any(state, config);
|
|
1308
|
+
continue;
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
if (next_char == ']') {
|
|
1312
|
+
state->cursor++;
|
|
1313
|
+
long count = state->stack->head - stack_head;
|
|
1314
|
+
state->current_nesting--;
|
|
1315
|
+
state->in_array--;
|
|
1316
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1182
1317
|
}
|
|
1183
1318
|
|
|
1184
1319
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1192,7 +1327,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1192
1327
|
json_eat_whitespace(state);
|
|
1193
1328
|
long stack_head = state->stack->head;
|
|
1194
1329
|
|
|
1195
|
-
if ((state
|
|
1330
|
+
if (peek(state) == '}') {
|
|
1196
1331
|
state->cursor++;
|
|
1197
1332
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1198
1333
|
} else {
|
|
@@ -1201,13 +1336,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1201
1336
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1202
1337
|
}
|
|
1203
1338
|
|
|
1204
|
-
if (
|
|
1339
|
+
if (peek(state) != '"') {
|
|
1205
1340
|
raise_parse_error("expected object key, got %s", state);
|
|
1206
1341
|
}
|
|
1207
1342
|
json_parse_string(state, config, true);
|
|
1208
1343
|
|
|
1209
1344
|
json_eat_whitespace(state);
|
|
1210
|
-
if ((state
|
|
1345
|
+
if (peek(state) != ':') {
|
|
1211
1346
|
raise_parse_error("expected ':' after object key", state);
|
|
1212
1347
|
}
|
|
1213
1348
|
state->cursor++;
|
|
@@ -1218,46 +1353,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1218
1353
|
while (true) {
|
|
1219
1354
|
json_eat_whitespace(state);
|
|
1220
1355
|
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1356
|
+
const char next_char = peek(state);
|
|
1357
|
+
if (next_char == '}') {
|
|
1358
|
+
state->cursor++;
|
|
1359
|
+
state->current_nesting--;
|
|
1360
|
+
size_t count = state->stack->head - stack_head;
|
|
1226
1361
|
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1362
|
+
// Temporary rewind cursor in case an error is raised
|
|
1363
|
+
const char *final_cursor = state->cursor;
|
|
1364
|
+
state->cursor = object_start_cursor;
|
|
1365
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1366
|
+
state->cursor = final_cursor;
|
|
1232
1367
|
|
|
1233
|
-
|
|
1234
|
-
|
|
1368
|
+
return json_push_value(state, config, object);
|
|
1369
|
+
}
|
|
1235
1370
|
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1371
|
+
if (next_char == ',') {
|
|
1372
|
+
state->cursor++;
|
|
1373
|
+
json_eat_whitespace(state);
|
|
1239
1374
|
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
}
|
|
1375
|
+
if (config->allow_trailing_comma) {
|
|
1376
|
+
if (peek(state) == '}') {
|
|
1377
|
+
continue;
|
|
1244
1378
|
}
|
|
1379
|
+
}
|
|
1245
1380
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1381
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1382
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1383
|
+
}
|
|
1384
|
+
json_parse_string(state, config, true);
|
|
1250
1385
|
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1386
|
+
json_eat_whitespace(state);
|
|
1387
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1388
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1389
|
+
}
|
|
1390
|
+
state->cursor++;
|
|
1256
1391
|
|
|
1257
|
-
|
|
1392
|
+
json_parse_any(state, config);
|
|
1258
1393
|
|
|
1259
|
-
|
|
1260
|
-
}
|
|
1394
|
+
continue;
|
|
1261
1395
|
}
|
|
1262
1396
|
|
|
1263
1397
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1265,18 +1399,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1265
1399
|
break;
|
|
1266
1400
|
}
|
|
1267
1401
|
|
|
1402
|
+
case 0:
|
|
1403
|
+
raise_parse_error("unexpected end of input", state);
|
|
1404
|
+
break;
|
|
1405
|
+
|
|
1268
1406
|
default:
|
|
1269
1407
|
raise_parse_error("unexpected character: %s", state);
|
|
1270
1408
|
break;
|
|
1271
1409
|
}
|
|
1272
1410
|
|
|
1273
1411
|
raise_parse_error("unreachable: %s", state);
|
|
1412
|
+
return Qundef;
|
|
1274
1413
|
}
|
|
1275
1414
|
|
|
1276
1415
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1277
1416
|
{
|
|
1278
1417
|
json_eat_whitespace(state);
|
|
1279
|
-
if (state
|
|
1418
|
+
if (!eos(state)) {
|
|
1280
1419
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1281
1420
|
}
|
|
1282
1421
|
}
|
|
@@ -1313,14 +1452,16 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1313
1452
|
{
|
|
1314
1453
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1315
1454
|
|
|
1316
|
-
if (key == sym_max_nesting)
|
|
1317
|
-
else if (key == sym_allow_nan)
|
|
1318
|
-
else if (key == sym_allow_trailing_comma)
|
|
1319
|
-
else if (key ==
|
|
1320
|
-
else if (key ==
|
|
1321
|
-
else if (key ==
|
|
1322
|
-
else if (key ==
|
|
1323
|
-
else if (key ==
|
|
1455
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1456
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1457
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1458
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1459
|
+
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1460
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1461
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1462
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1463
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1464
|
+
else if (key == sym_decimal_class) {
|
|
1324
1465
|
if (RTEST(val)) {
|
|
1325
1466
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1326
1467
|
config->decimal_class = val;
|
|
@@ -1393,6 +1534,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1393
1534
|
*/
|
|
1394
1535
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1395
1536
|
{
|
|
1537
|
+
rb_check_frozen(self);
|
|
1396
1538
|
GET_PARSER_CONFIG;
|
|
1397
1539
|
|
|
1398
1540
|
parser_config_init(config, opts);
|
|
@@ -1488,7 +1630,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1488
1630
|
JSON_ParserConfig_memsize,
|
|
1489
1631
|
},
|
|
1490
1632
|
0, 0,
|
|
1491
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1633
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1492
1634
|
};
|
|
1493
1635
|
|
|
1494
1636
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1532,16 +1674,14 @@ void Init_parser(void)
|
|
|
1532
1674
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1533
1675
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1534
1676
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1677
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1678
|
+
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
1535
1679
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1536
1680
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1537
1681
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1538
1682
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1539
1683
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1540
1684
|
|
|
1541
|
-
i_chr = rb_intern("chr");
|
|
1542
|
-
i_aset = rb_intern("[]=");
|
|
1543
|
-
i_aref = rb_intern("[]");
|
|
1544
|
-
i_leftshift = rb_intern("<<");
|
|
1545
1685
|
i_new = rb_intern("new");
|
|
1546
1686
|
i_try_convert = rb_intern("try_convert");
|
|
1547
1687
|
i_uminus = rb_intern("-@");
|