json 2.15.1 → 2.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +46 -1
- data/LEGAL +12 -0
- data/README.md +17 -1
- data/ext/json/ext/fbuffer/fbuffer.h +30 -77
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +288 -472
- data/ext/json/ext/json.h +105 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +597 -474
- data/ext/json/ext/simd/simd.h +42 -22
- data/ext/json/ext/vendor/fpconv.c +3 -3
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/common.rb +69 -26
- data/lib/json/ext/generator/state.rb +5 -1
- data/lib/json/truffle_ruby/generator.rb +66 -22
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +33 -0
- metadata +4 -2
|
@@ -1,50 +1,22 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma,
|
|
40
|
-
sym_decimal_class, sym_on_load,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
|
|
11
|
+
sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
|
|
12
|
+
sym_allow_duplicate_key;
|
|
41
13
|
|
|
42
14
|
static int binary_encindex;
|
|
43
15
|
static int utf8_encindex;
|
|
44
16
|
|
|
45
17
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
18
|
// For TruffleRuby
|
|
47
|
-
void
|
|
19
|
+
static void
|
|
48
20
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
21
|
{
|
|
50
22
|
long index = 0;
|
|
@@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
33
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
34
|
#endif
|
|
63
35
|
|
|
36
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
37
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
38
|
+
{
|
|
39
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
40
|
+
}
|
|
41
|
+
#endif
|
|
64
42
|
|
|
65
43
|
/* name cache */
|
|
66
44
|
|
|
@@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
84
|
cache->entries[index] = rstring;
|
|
107
85
|
}
|
|
108
86
|
|
|
109
|
-
|
|
87
|
+
#define rstring_cache_memcmp memcmp
|
|
88
|
+
|
|
89
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
90
|
+
#if __has_builtin(__builtin_bswap64)
|
|
91
|
+
#undef rstring_cache_memcmp
|
|
92
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
93
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
94
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
95
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
96
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
97
|
+
long i = 0;
|
|
98
|
+
|
|
99
|
+
for (; i + 8 <= length; i += 8) {
|
|
100
|
+
uint64_t a, b;
|
|
101
|
+
memcpy(&a, str + i, 8);
|
|
102
|
+
memcpy(&b, rptr + i, 8);
|
|
103
|
+
if (a != b) {
|
|
104
|
+
a = __builtin_bswap64(a);
|
|
105
|
+
b = __builtin_bswap64(b);
|
|
106
|
+
return (a < b) ? -1 : 1;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for (; i < length; i++) {
|
|
111
|
+
if (str[i] != rptr[i]) {
|
|
112
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
113
|
+
}
|
|
116
114
|
}
|
|
115
|
+
|
|
116
|
+
return 0;
|
|
117
117
|
}
|
|
118
|
+
#endif
|
|
119
|
+
#endif
|
|
118
120
|
|
|
119
|
-
static
|
|
121
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
122
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
// cache names above an arbitrary threshold.
|
|
124
|
-
return Qfalse;
|
|
125
|
-
}
|
|
123
|
+
const char *rstring_ptr;
|
|
124
|
+
long rstring_length;
|
|
126
125
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
126
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
127
|
+
|
|
128
|
+
if (length == rstring_length) {
|
|
129
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
130
|
+
} else {
|
|
131
|
+
return (int)(length - rstring_length);
|
|
132
132
|
}
|
|
133
|
+
}
|
|
133
134
|
|
|
135
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
136
|
+
{
|
|
134
137
|
int low = 0;
|
|
135
138
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
139
|
|
|
139
140
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
141
|
+
int mid = (high + low) >> 1;
|
|
141
142
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
143
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
144
|
|
|
144
|
-
if (
|
|
145
|
+
if (cmp == 0) {
|
|
145
146
|
return entry;
|
|
146
|
-
} else if (
|
|
147
|
+
} else if (cmp > 0) {
|
|
147
148
|
low = mid + 1;
|
|
148
149
|
} else {
|
|
149
150
|
high = mid - 1;
|
|
150
151
|
}
|
|
151
152
|
}
|
|
152
153
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
154
|
VALUE rstring = build_interned_string(str, length);
|
|
160
155
|
|
|
161
156
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
157
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
158
|
}
|
|
168
159
|
return rstring;
|
|
169
160
|
}
|
|
170
161
|
|
|
171
162
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
163
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
164
|
int low = 0;
|
|
187
165
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
166
|
|
|
191
167
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
168
|
+
int mid = (high + low) >> 1;
|
|
193
169
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
170
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
171
|
|
|
196
|
-
if (
|
|
172
|
+
if (cmp == 0) {
|
|
197
173
|
return entry;
|
|
198
|
-
} else if (
|
|
174
|
+
} else if (cmp > 0) {
|
|
199
175
|
low = mid + 1;
|
|
200
176
|
} else {
|
|
201
177
|
high = mid - 1;
|
|
202
178
|
}
|
|
203
179
|
}
|
|
204
180
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
181
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
182
|
|
|
213
183
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
184
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
185
|
}
|
|
220
186
|
return rsymbol;
|
|
221
187
|
}
|
|
@@ -330,15 +296,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
296
|
}
|
|
331
297
|
}
|
|
332
298
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
299
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
300
|
{
|
|
344
301
|
int len = 1;
|
|
@@ -379,7 +336,8 @@ typedef struct JSON_ParserStruct {
|
|
|
379
336
|
int max_nesting;
|
|
380
337
|
bool allow_nan;
|
|
381
338
|
bool allow_trailing_comma;
|
|
382
|
-
bool
|
|
339
|
+
bool allow_control_characters;
|
|
340
|
+
bool allow_invalid_escape;
|
|
383
341
|
bool symbolize_names;
|
|
384
342
|
bool freeze;
|
|
385
343
|
} JSON_ParserConfig;
|
|
@@ -395,6 +353,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
353
|
int current_nesting;
|
|
396
354
|
} JSON_ParserState;
|
|
397
355
|
|
|
356
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
357
|
+
return state->end - state->cursor;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
361
|
+
return state->cursor >= state->end;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
static inline char peek(JSON_ParserState *state)
|
|
365
|
+
{
|
|
366
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
367
|
+
return 0;
|
|
368
|
+
}
|
|
369
|
+
return *state->cursor;
|
|
370
|
+
}
|
|
371
|
+
|
|
398
372
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
373
|
{
|
|
400
374
|
const char *cursor = state->cursor;
|
|
@@ -428,10 +402,7 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
428
402
|
|
|
429
403
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
430
404
|
|
|
431
|
-
|
|
432
|
-
RBIMPL_ATTR_NORETURN()
|
|
433
|
-
#endif
|
|
434
|
-
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
405
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
435
406
|
{
|
|
436
407
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
437
408
|
long line, column;
|
|
@@ -477,10 +448,7 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
477
448
|
rb_exc_raise(exc);
|
|
478
449
|
}
|
|
479
450
|
|
|
480
|
-
|
|
481
|
-
RBIMPL_ATTR_NORETURN()
|
|
482
|
-
#endif
|
|
483
|
-
static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
451
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
484
452
|
{
|
|
485
453
|
state->cursor = at;
|
|
486
454
|
raise_parse_error(format, state);
|
|
@@ -505,23 +473,24 @@ static const signed char digit_values[256] = {
|
|
|
505
473
|
-1, -1, -1, -1, -1, -1, -1
|
|
506
474
|
};
|
|
507
475
|
|
|
508
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
509
|
-
{
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
476
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
477
|
+
{
|
|
478
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
479
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
483
|
+
|
|
484
|
+
const signed char b0 = digit_values[p[0]];
|
|
485
|
+
const signed char b1 = digit_values[p[1]];
|
|
486
|
+
const signed char b2 = digit_values[p[2]];
|
|
487
|
+
const signed char b3 = digit_values[p[3]];
|
|
488
|
+
|
|
489
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
490
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
525
494
|
}
|
|
526
495
|
|
|
527
496
|
#define GET_PARSER_CONFIG \
|
|
@@ -530,61 +499,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
499
|
|
|
531
500
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
501
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
502
|
static void
|
|
542
503
|
json_eat_comments(JSON_ParserState *state)
|
|
543
504
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
505
|
+
const char *start = state->cursor;
|
|
506
|
+
state->cursor++;
|
|
507
|
+
|
|
508
|
+
switch (peek(state)) {
|
|
509
|
+
case '/': {
|
|
510
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
511
|
+
if (!state->cursor) {
|
|
512
|
+
state->cursor = state->end;
|
|
513
|
+
} else {
|
|
514
|
+
state->cursor++;
|
|
554
515
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
516
|
+
break;
|
|
517
|
+
}
|
|
518
|
+
case '*': {
|
|
519
|
+
state->cursor++;
|
|
520
|
+
|
|
521
|
+
while (true) {
|
|
522
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
523
|
+
if (!next_match) {
|
|
524
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
state->cursor = next_match + 1;
|
|
528
|
+
if (peek(state) == '/') {
|
|
529
|
+
state->cursor++;
|
|
530
|
+
break;
|
|
568
531
|
}
|
|
569
|
-
break;
|
|
570
532
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
533
|
+
break;
|
|
574
534
|
}
|
|
575
|
-
|
|
576
|
-
|
|
535
|
+
default:
|
|
536
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
537
|
+
break;
|
|
577
538
|
}
|
|
578
539
|
}
|
|
579
540
|
|
|
580
|
-
static
|
|
541
|
+
ALWAYS_INLINE(static) void
|
|
581
542
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
543
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
544
|
+
while (true) {
|
|
545
|
+
switch (peek(state)) {
|
|
546
|
+
case ' ':
|
|
547
|
+
state->cursor++;
|
|
548
|
+
break;
|
|
549
|
+
case '\n':
|
|
550
|
+
state->cursor++;
|
|
551
|
+
|
|
552
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
553
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
554
|
+
while (rest(state) > 8) {
|
|
555
|
+
uint64_t chunk;
|
|
556
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
557
|
+
if (chunk == 0x2020202020202020) {
|
|
558
|
+
state->cursor += 8;
|
|
559
|
+
continue;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
563
|
+
state->cursor += consecutive_spaces;
|
|
564
|
+
break;
|
|
565
|
+
}
|
|
566
|
+
#endif
|
|
567
|
+
break;
|
|
568
|
+
case '\t':
|
|
569
|
+
case '\r':
|
|
570
|
+
state->cursor++;
|
|
571
|
+
break;
|
|
572
|
+
case '/':
|
|
573
|
+
json_eat_comments(state);
|
|
574
|
+
break;
|
|
575
|
+
|
|
576
|
+
default:
|
|
577
|
+
return;
|
|
588
578
|
}
|
|
589
579
|
}
|
|
590
580
|
}
|
|
@@ -615,11 +605,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
605
|
return result;
|
|
616
606
|
}
|
|
617
607
|
|
|
618
|
-
static inline
|
|
608
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
619
609
|
{
|
|
610
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
611
|
+
// Simple heuristics:
|
|
612
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
613
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
614
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
618
|
+
{
|
|
619
|
+
bool intern = is_name || config->freeze;
|
|
620
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
621
|
size_t bufferSize = stringEnd - string;
|
|
621
622
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
623
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
624
|
VALUE cached_key;
|
|
624
625
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
626
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,109 +636,127 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
636
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
637
|
}
|
|
637
638
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
639
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
640
|
+
typedef struct _json_unescape_positions {
|
|
641
|
+
long size;
|
|
642
|
+
const char **positions;
|
|
643
|
+
unsigned long additional_backslashes;
|
|
644
|
+
} JSON_UnescapePositions;
|
|
645
645
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
646
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
647
|
+
{
|
|
648
|
+
while (positions->size) {
|
|
649
|
+
positions->size--;
|
|
650
|
+
const char *next_position = positions->positions[0];
|
|
651
|
+
positions->positions++;
|
|
652
|
+
if (next_position >= pe) {
|
|
653
|
+
return next_position;
|
|
652
654
|
}
|
|
655
|
+
}
|
|
653
656
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
+
if (positions->additional_backslashes) {
|
|
658
|
+
positions->additional_backslashes--;
|
|
659
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
660
|
}
|
|
658
661
|
|
|
662
|
+
return NULL;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
666
|
+
{
|
|
667
|
+
bool intern = is_name || config->freeze;
|
|
668
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
669
|
+
size_t bufferSize = stringEnd - string;
|
|
670
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
671
|
+
char *buffer;
|
|
672
|
+
|
|
659
673
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
674
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
675
|
buffer = RSTRING_PTR(result);
|
|
662
676
|
bufferStart = buffer;
|
|
663
677
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
678
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
679
|
+
|
|
680
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
681
|
if (pe > p) {
|
|
668
682
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
683
|
buffer += pe - p;
|
|
670
684
|
}
|
|
671
685
|
switch (*++pe) {
|
|
686
|
+
case '"':
|
|
687
|
+
case '/':
|
|
688
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
689
|
+
break;
|
|
690
|
+
case '\\':
|
|
691
|
+
APPEND_CHAR('\\');
|
|
692
|
+
break;
|
|
672
693
|
case 'n':
|
|
673
|
-
|
|
694
|
+
APPEND_CHAR('\n');
|
|
674
695
|
break;
|
|
675
696
|
case 'r':
|
|
676
|
-
|
|
697
|
+
APPEND_CHAR('\r');
|
|
677
698
|
break;
|
|
678
699
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
700
|
+
APPEND_CHAR('\t');
|
|
686
701
|
break;
|
|
687
702
|
case 'b':
|
|
688
|
-
|
|
703
|
+
APPEND_CHAR('\b');
|
|
689
704
|
break;
|
|
690
705
|
case 'f':
|
|
691
|
-
|
|
706
|
+
APPEND_CHAR('\f');
|
|
692
707
|
break;
|
|
693
|
-
case 'u':
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
if (
|
|
712
|
-
raise_parse_error_at("
|
|
713
|
-
}
|
|
714
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
715
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
716
|
-
|
|
717
|
-
if ((sur & 0xFC00) != 0xDC00) {
|
|
718
|
-
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
719
|
-
}
|
|
720
|
-
|
|
721
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
722
|
-
| (sur & 0x3FF));
|
|
723
|
-
pe += 5;
|
|
724
|
-
} else {
|
|
725
|
-
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
726
|
-
break;
|
|
708
|
+
case 'u': {
|
|
709
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
710
|
+
pe += 3;
|
|
711
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
712
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
713
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
714
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
715
|
+
* final codepoint.
|
|
716
|
+
*
|
|
717
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
718
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
719
|
+
* Area".
|
|
720
|
+
*/
|
|
721
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
722
|
+
pe++;
|
|
723
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
724
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
725
|
+
|
|
726
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
727
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
727
728
|
}
|
|
729
|
+
|
|
730
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
731
|
+
pe += 5;
|
|
732
|
+
} else {
|
|
733
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
734
|
+
break;
|
|
728
735
|
}
|
|
729
|
-
unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
730
|
-
unescape = buf;
|
|
731
736
|
}
|
|
737
|
+
|
|
738
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
739
|
+
buffer += unescape_len;
|
|
740
|
+
p = ++pe;
|
|
732
741
|
break;
|
|
742
|
+
}
|
|
733
743
|
default:
|
|
734
|
-
|
|
735
|
-
|
|
744
|
+
if ((unsigned char)*pe < 0x20) {
|
|
745
|
+
if (!config->allow_control_characters) {
|
|
746
|
+
if (*pe == '\n') {
|
|
747
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
748
|
+
}
|
|
749
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
750
|
+
}
|
|
751
|
+
} else if (config->allow_invalid_escape) {
|
|
752
|
+
APPEND_CHAR(*pe);
|
|
753
|
+
} else {
|
|
754
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
755
|
+
}
|
|
756
|
+
break;
|
|
736
757
|
}
|
|
737
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
738
|
-
buffer += unescape_len;
|
|
739
|
-
p = ++pe;
|
|
740
758
|
}
|
|
759
|
+
#undef APPEND_CHAR
|
|
741
760
|
|
|
742
761
|
if (stringEnd > p) {
|
|
743
762
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -748,81 +767,85 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
748
767
|
if (symbolize) {
|
|
749
768
|
result = rb_str_intern(result);
|
|
750
769
|
} else if (intern) {
|
|
751
|
-
result =
|
|
770
|
+
result = rb_str_to_interned_str(result);
|
|
752
771
|
}
|
|
753
772
|
|
|
754
773
|
return result;
|
|
755
774
|
}
|
|
756
775
|
|
|
757
776
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
758
|
-
|
|
759
|
-
{
|
|
760
|
-
bool negative = false;
|
|
761
|
-
if (*p == '-') {
|
|
762
|
-
negative = true;
|
|
763
|
-
p++;
|
|
764
|
-
}
|
|
777
|
+
#define MAX_NUMBER_STACK_BUFFER 128
|
|
765
778
|
|
|
766
|
-
|
|
767
|
-
while (p < pe) {
|
|
768
|
-
memo *= 10;
|
|
769
|
-
memo += *p - '0';
|
|
770
|
-
p++;
|
|
771
|
-
}
|
|
779
|
+
typedef VALUE (*json_number_decode_func_t)(const char *ptr);
|
|
772
780
|
|
|
773
|
-
|
|
774
|
-
|
|
781
|
+
static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
|
|
782
|
+
{
|
|
783
|
+
if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
|
|
784
|
+
char buffer[MAX_NUMBER_STACK_BUFFER];
|
|
785
|
+
MEMCPY(buffer, start, char, len);
|
|
786
|
+
buffer[len] = '\0';
|
|
787
|
+
return func(buffer);
|
|
788
|
+
} else {
|
|
789
|
+
VALUE buffer_v = rb_str_tmp_new(len);
|
|
790
|
+
char *buffer = RSTRING_PTR(buffer_v);
|
|
791
|
+
MEMCPY(buffer, start, char, len);
|
|
792
|
+
buffer[len] = '\0';
|
|
793
|
+
VALUE number = func(buffer);
|
|
794
|
+
RB_GC_GUARD(buffer_v);
|
|
795
|
+
return number;
|
|
775
796
|
}
|
|
776
|
-
return LL2NUM(memo);
|
|
777
797
|
}
|
|
778
798
|
|
|
779
|
-
static VALUE
|
|
799
|
+
static VALUE json_decode_inum(const char *buffer)
|
|
780
800
|
{
|
|
781
|
-
|
|
782
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
783
|
-
MEMCPY(buffer, start, char, len);
|
|
784
|
-
buffer[len] = '\0';
|
|
785
|
-
VALUE number = rb_cstr2inum(buffer, 10);
|
|
786
|
-
RB_ALLOCV_END(buffer_v);
|
|
787
|
-
return number;
|
|
801
|
+
return rb_cstr2inum(buffer, 10);
|
|
788
802
|
}
|
|
789
803
|
|
|
790
|
-
static
|
|
791
|
-
json_decode_integer(const char *start, const char *end)
|
|
804
|
+
NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
|
|
792
805
|
{
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
806
|
+
return json_decode_large_number(start, len, json_decode_inum);
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
810
|
+
{
|
|
811
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
812
|
+
if (negative) {
|
|
813
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
796
814
|
}
|
|
797
|
-
return
|
|
815
|
+
return UINT64T2NUM(mantissa);
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
return json_decode_large_integer(start, end - start);
|
|
798
819
|
}
|
|
799
820
|
|
|
800
|
-
static VALUE
|
|
821
|
+
static VALUE json_decode_dnum(const char *buffer)
|
|
801
822
|
{
|
|
802
|
-
|
|
803
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
804
|
-
MEMCPY(buffer, start, char, len);
|
|
805
|
-
buffer[len] = '\0';
|
|
806
|
-
VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
807
|
-
RB_ALLOCV_END(buffer_v);
|
|
808
|
-
return number;
|
|
823
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
809
824
|
}
|
|
810
825
|
|
|
811
|
-
static VALUE
|
|
826
|
+
NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
|
|
812
827
|
{
|
|
813
|
-
|
|
828
|
+
return json_decode_large_number(start, len, json_decode_dnum);
|
|
829
|
+
}
|
|
814
830
|
|
|
831
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
832
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
833
|
+
*/
|
|
834
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
835
|
+
const char *start, const char *end)
|
|
836
|
+
{
|
|
815
837
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
816
|
-
VALUE text = rb_str_new(start,
|
|
838
|
+
VALUE text = rb_str_new(start, end - start);
|
|
817
839
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
818
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
819
|
-
char buffer[64];
|
|
820
|
-
MEMCPY(buffer, start, char, len);
|
|
821
|
-
buffer[len] = '\0';
|
|
822
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
823
|
-
} else {
|
|
824
|
-
return json_decode_large_float(start, len);
|
|
825
840
|
}
|
|
841
|
+
|
|
842
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
843
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
844
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
845
|
+
return json_decode_large_float(start, end - start);
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
826
849
|
}
|
|
827
850
|
|
|
828
851
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -854,7 +877,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
|
854
877
|
return Qfalse;
|
|
855
878
|
}
|
|
856
879
|
|
|
857
|
-
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
880
|
+
NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
858
881
|
{
|
|
859
882
|
VALUE message = rb_sprintf(
|
|
860
883
|
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
@@ -865,10 +888,7 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
|
|
|
865
888
|
RB_GC_GUARD(message);
|
|
866
889
|
}
|
|
867
890
|
|
|
868
|
-
|
|
869
|
-
RBIMPL_ATTR_NORETURN()
|
|
870
|
-
#endif
|
|
871
|
-
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
891
|
+
NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
872
892
|
{
|
|
873
893
|
VALUE message = rb_sprintf(
|
|
874
894
|
"duplicate key %"PRIsVALUE,
|
|
@@ -908,20 +928,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
908
928
|
return object;
|
|
909
929
|
}
|
|
910
930
|
|
|
911
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
912
|
-
{
|
|
913
|
-
VALUE string;
|
|
914
|
-
bool intern = is_name || config->freeze;
|
|
915
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
916
|
-
if (escaped) {
|
|
917
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
918
|
-
} else {
|
|
919
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
920
|
-
}
|
|
921
|
-
|
|
922
|
-
return string;
|
|
923
|
-
}
|
|
924
|
-
|
|
925
931
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
926
932
|
{
|
|
927
933
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -944,17 +950,11 @@ static const bool string_scan_table[256] = {
|
|
|
944
950
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
945
951
|
};
|
|
946
952
|
|
|
947
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
948
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
949
|
-
#else
|
|
950
|
-
#define FORCE_INLINE
|
|
951
|
-
#endif
|
|
952
|
-
|
|
953
953
|
#ifdef HAVE_SIMD
|
|
954
954
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
955
955
|
#endif /* HAVE_SIMD */
|
|
956
956
|
|
|
957
|
-
static
|
|
957
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
958
958
|
{
|
|
959
959
|
#ifdef HAVE_SIMD
|
|
960
960
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -962,7 +962,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
962
962
|
uint64_t mask = 0;
|
|
963
963
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
964
964
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
965
|
-
return
|
|
965
|
+
return true;
|
|
966
966
|
}
|
|
967
967
|
|
|
968
968
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -970,64 +970,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
970
970
|
int mask = 0;
|
|
971
971
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
972
972
|
state->cursor += trailing_zeros(mask);
|
|
973
|
-
return
|
|
973
|
+
return true;
|
|
974
974
|
}
|
|
975
975
|
}
|
|
976
976
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
977
977
|
#endif /* HAVE_SIMD */
|
|
978
978
|
|
|
979
|
-
while (state
|
|
979
|
+
while (!eos(state)) {
|
|
980
980
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
981
|
-
return
|
|
981
|
+
return true;
|
|
982
982
|
}
|
|
983
983
|
state->cursor++;
|
|
984
984
|
}
|
|
985
|
-
return
|
|
985
|
+
return false;
|
|
986
986
|
}
|
|
987
987
|
|
|
988
|
-
static
|
|
988
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
989
989
|
{
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
990
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
991
|
+
JSON_UnescapePositions positions = {
|
|
992
|
+
.size = 0,
|
|
993
|
+
.positions = backslashes,
|
|
994
|
+
.additional_backslashes = 0,
|
|
995
|
+
};
|
|
993
996
|
|
|
994
|
-
|
|
997
|
+
do {
|
|
995
998
|
switch (*state->cursor) {
|
|
996
999
|
case '"': {
|
|
997
|
-
VALUE string =
|
|
1000
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
998
1001
|
state->cursor++;
|
|
999
1002
|
return json_push_value(state, config, string);
|
|
1000
1003
|
}
|
|
1001
1004
|
case '\\': {
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1005
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1006
|
+
backslashes[positions.size] = state->cursor;
|
|
1007
|
+
positions.size++;
|
|
1008
|
+
} else {
|
|
1009
|
+
positions.additional_backslashes++;
|
|
1006
1010
|
}
|
|
1011
|
+
state->cursor++;
|
|
1007
1012
|
break;
|
|
1008
1013
|
}
|
|
1009
1014
|
default:
|
|
1010
|
-
|
|
1015
|
+
if (!config->allow_control_characters) {
|
|
1016
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1017
|
+
}
|
|
1011
1018
|
break;
|
|
1012
1019
|
}
|
|
1013
1020
|
|
|
1014
1021
|
state->cursor++;
|
|
1015
|
-
}
|
|
1022
|
+
} while (string_scan(state));
|
|
1016
1023
|
|
|
1017
1024
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1018
1025
|
return Qfalse;
|
|
1019
1026
|
}
|
|
1020
1027
|
|
|
1028
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1029
|
+
{
|
|
1030
|
+
state->cursor++;
|
|
1031
|
+
const char *start = state->cursor;
|
|
1032
|
+
|
|
1033
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1034
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1038
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1039
|
+
state->cursor++;
|
|
1040
|
+
return json_push_value(state, config, string);
|
|
1041
|
+
}
|
|
1042
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1046
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1047
|
+
// Additional References:
|
|
1048
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1049
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1050
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1051
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1052
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1053
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1054
|
+
val -= 0x3030303030303030;
|
|
1055
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1056
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1057
|
+
return val;
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1061
|
+
const uint32_t mask = 0x000000FF;
|
|
1062
|
+
const uint32_t mul1 = 100;
|
|
1063
|
+
val -= 0x30303030;
|
|
1064
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1065
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1066
|
+
return val;
|
|
1067
|
+
}
|
|
1068
|
+
#endif
|
|
1069
|
+
|
|
1070
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1071
|
+
{
|
|
1072
|
+
const char *start = state->cursor;
|
|
1073
|
+
|
|
1074
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1075
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1076
|
+
uint64_t next_8bytes;
|
|
1077
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1078
|
+
|
|
1079
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1080
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1081
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1082
|
+
|
|
1083
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1084
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1085
|
+
state->cursor += 8;
|
|
1086
|
+
continue;
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1090
|
+
|
|
1091
|
+
if (consecutive_digits >= 4) {
|
|
1092
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1093
|
+
state->cursor += 4;
|
|
1094
|
+
consecutive_digits -= 4;
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
while (consecutive_digits) {
|
|
1098
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1099
|
+
consecutive_digits--;
|
|
1100
|
+
state->cursor++;
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
return (int)(state->cursor - start);
|
|
1104
|
+
}
|
|
1105
|
+
#endif
|
|
1106
|
+
|
|
1107
|
+
char next_char;
|
|
1108
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1109
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1110
|
+
state->cursor++;
|
|
1111
|
+
}
|
|
1112
|
+
return (int)(state->cursor - start);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1116
|
+
{
|
|
1117
|
+
bool integer = true;
|
|
1118
|
+
const char first_digit = *state->cursor;
|
|
1119
|
+
|
|
1120
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1121
|
+
int32_t exponent = 0;
|
|
1122
|
+
int decimal_point_pos = -1;
|
|
1123
|
+
uint64_t mantissa = 0;
|
|
1124
|
+
|
|
1125
|
+
// Parse integer part and extract mantissa digits
|
|
1126
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1127
|
+
|
|
1128
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1129
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
// Parse fractional part
|
|
1133
|
+
if (peek(state) == '.') {
|
|
1134
|
+
integer = false;
|
|
1135
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1136
|
+
state->cursor++;
|
|
1137
|
+
|
|
1138
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1139
|
+
mantissa_digits += fractional_digits;
|
|
1140
|
+
|
|
1141
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1142
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
// Parse exponent
|
|
1147
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1148
|
+
integer = false;
|
|
1149
|
+
state->cursor++;
|
|
1150
|
+
|
|
1151
|
+
bool negative_exponent = false;
|
|
1152
|
+
const char next_char = peek(state);
|
|
1153
|
+
if (next_char == '-' || next_char == '+') {
|
|
1154
|
+
negative_exponent = next_char == '-';
|
|
1155
|
+
state->cursor++;
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
uint64_t abs_exponent = 0;
|
|
1159
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1160
|
+
|
|
1161
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1162
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
if (integer) {
|
|
1169
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
// Adjust exponent based on decimal point position
|
|
1173
|
+
if (decimal_point_pos >= 0) {
|
|
1174
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1181
|
+
{
|
|
1182
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1186
|
+
{
|
|
1187
|
+
const char *start = state->cursor;
|
|
1188
|
+
state->cursor++;
|
|
1189
|
+
return json_parse_number(state, config, true, start);
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1021
1192
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1022
1193
|
{
|
|
1023
1194
|
json_eat_whitespace(state);
|
|
1024
|
-
if (state->cursor >= state->end) {
|
|
1025
|
-
raise_parse_error("unexpected end of input", state);
|
|
1026
|
-
}
|
|
1027
1195
|
|
|
1028
|
-
switch (
|
|
1196
|
+
switch (peek(state)) {
|
|
1029
1197
|
case 'n':
|
|
1030
|
-
if ((state
|
|
1198
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1031
1199
|
state->cursor += 4;
|
|
1032
1200
|
return json_push_value(state, config, Qnil);
|
|
1033
1201
|
}
|
|
@@ -1035,7 +1203,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1035
1203
|
raise_parse_error("unexpected token %s", state);
|
|
1036
1204
|
break;
|
|
1037
1205
|
case 't':
|
|
1038
|
-
if ((state
|
|
1206
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1039
1207
|
state->cursor += 4;
|
|
1040
1208
|
return json_push_value(state, config, Qtrue);
|
|
1041
1209
|
}
|
|
@@ -1044,7 +1212,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1044
1212
|
break;
|
|
1045
1213
|
case 'f':
|
|
1046
1214
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1047
|
-
if ((state
|
|
1215
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1048
1216
|
state->cursor += 5;
|
|
1049
1217
|
return json_push_value(state, config, Qfalse);
|
|
1050
1218
|
}
|
|
@@ -1053,7 +1221,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1053
1221
|
break;
|
|
1054
1222
|
case 'N':
|
|
1055
1223
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1056
|
-
if (config->allow_nan && (state
|
|
1224
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1057
1225
|
state->cursor += 3;
|
|
1058
1226
|
return json_push_value(state, config, CNaN);
|
|
1059
1227
|
}
|
|
@@ -1061,16 +1229,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1061
1229
|
raise_parse_error("unexpected token %s", state);
|
|
1062
1230
|
break;
|
|
1063
1231
|
case 'I':
|
|
1064
|
-
if (config->allow_nan && (state
|
|
1232
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1065
1233
|
state->cursor += 8;
|
|
1066
1234
|
return json_push_value(state, config, CInfinity);
|
|
1067
1235
|
}
|
|
1068
1236
|
|
|
1069
1237
|
raise_parse_error("unexpected token %s", state);
|
|
1070
1238
|
break;
|
|
1071
|
-
case '-':
|
|
1239
|
+
case '-': {
|
|
1072
1240
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1073
|
-
if ((state
|
|
1241
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1074
1242
|
if (config->allow_nan) {
|
|
1075
1243
|
state->cursor += 9;
|
|
1076
1244
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1078,62 +1246,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1078
1246
|
raise_parse_error("unexpected token %s", state);
|
|
1079
1247
|
}
|
|
1080
1248
|
}
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
bool integer = true;
|
|
1084
|
-
|
|
1085
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1086
|
-
const char *start = state->cursor;
|
|
1087
|
-
state->cursor++;
|
|
1088
|
-
|
|
1089
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1090
|
-
state->cursor++;
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
long integer_length = state->cursor - start;
|
|
1094
|
-
|
|
1095
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1096
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1097
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1098
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1099
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1100
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1104
|
-
integer = false;
|
|
1105
|
-
state->cursor++;
|
|
1106
|
-
|
|
1107
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1108
|
-
raise_parse_error("invalid number: %s", state);
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1112
|
-
state->cursor++;
|
|
1113
|
-
}
|
|
1114
|
-
}
|
|
1115
|
-
|
|
1116
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1117
|
-
integer = false;
|
|
1118
|
-
state->cursor++;
|
|
1119
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1120
|
-
state->cursor++;
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1124
|
-
raise_parse_error("invalid number: %s", state);
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1128
|
-
state->cursor++;
|
|
1129
|
-
}
|
|
1130
|
-
}
|
|
1131
|
-
|
|
1132
|
-
if (integer) {
|
|
1133
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1134
|
-
}
|
|
1135
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1249
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1250
|
+
break;
|
|
1136
1251
|
}
|
|
1252
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1253
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1254
|
+
break;
|
|
1137
1255
|
case '"': {
|
|
1138
1256
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1139
1257
|
return json_parse_string(state, config, false);
|
|
@@ -1144,7 +1262,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1144
1262
|
json_eat_whitespace(state);
|
|
1145
1263
|
long stack_head = state->stack->head;
|
|
1146
1264
|
|
|
1147
|
-
if ((state
|
|
1265
|
+
if (peek(state) == ']') {
|
|
1148
1266
|
state->cursor++;
|
|
1149
1267
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1150
1268
|
} else {
|
|
@@ -1159,26 +1277,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1159
1277
|
while (true) {
|
|
1160
1278
|
json_eat_whitespace(state);
|
|
1161
1279
|
|
|
1162
|
-
|
|
1163
|
-
if (*state->cursor == ']') {
|
|
1164
|
-
state->cursor++;
|
|
1165
|
-
long count = state->stack->head - stack_head;
|
|
1166
|
-
state->current_nesting--;
|
|
1167
|
-
state->in_array--;
|
|
1168
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1169
|
-
}
|
|
1280
|
+
const char next_char = peek(state);
|
|
1170
1281
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
}
|
|
1282
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1283
|
+
state->cursor++;
|
|
1284
|
+
if (config->allow_trailing_comma) {
|
|
1285
|
+
json_eat_whitespace(state);
|
|
1286
|
+
if (peek(state) == ']') {
|
|
1287
|
+
continue;
|
|
1178
1288
|
}
|
|
1179
|
-
json_parse_any(state, config);
|
|
1180
|
-
continue;
|
|
1181
1289
|
}
|
|
1290
|
+
json_parse_any(state, config);
|
|
1291
|
+
continue;
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
if (next_char == ']') {
|
|
1295
|
+
state->cursor++;
|
|
1296
|
+
long count = state->stack->head - stack_head;
|
|
1297
|
+
state->current_nesting--;
|
|
1298
|
+
state->in_array--;
|
|
1299
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1182
1300
|
}
|
|
1183
1301
|
|
|
1184
1302
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1192,7 +1310,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1192
1310
|
json_eat_whitespace(state);
|
|
1193
1311
|
long stack_head = state->stack->head;
|
|
1194
1312
|
|
|
1195
|
-
if ((state
|
|
1313
|
+
if (peek(state) == '}') {
|
|
1196
1314
|
state->cursor++;
|
|
1197
1315
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1198
1316
|
} else {
|
|
@@ -1201,13 +1319,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1201
1319
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1202
1320
|
}
|
|
1203
1321
|
|
|
1204
|
-
if (
|
|
1322
|
+
if (peek(state) != '"') {
|
|
1205
1323
|
raise_parse_error("expected object key, got %s", state);
|
|
1206
1324
|
}
|
|
1207
1325
|
json_parse_string(state, config, true);
|
|
1208
1326
|
|
|
1209
1327
|
json_eat_whitespace(state);
|
|
1210
|
-
if ((state
|
|
1328
|
+
if (peek(state) != ':') {
|
|
1211
1329
|
raise_parse_error("expected ':' after object key", state);
|
|
1212
1330
|
}
|
|
1213
1331
|
state->cursor++;
|
|
@@ -1218,46 +1336,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1218
1336
|
while (true) {
|
|
1219
1337
|
json_eat_whitespace(state);
|
|
1220
1338
|
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1339
|
+
const char next_char = peek(state);
|
|
1340
|
+
if (next_char == '}') {
|
|
1341
|
+
state->cursor++;
|
|
1342
|
+
state->current_nesting--;
|
|
1343
|
+
size_t count = state->stack->head - stack_head;
|
|
1226
1344
|
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1345
|
+
// Temporary rewind cursor in case an error is raised
|
|
1346
|
+
const char *final_cursor = state->cursor;
|
|
1347
|
+
state->cursor = object_start_cursor;
|
|
1348
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1349
|
+
state->cursor = final_cursor;
|
|
1232
1350
|
|
|
1233
|
-
|
|
1234
|
-
|
|
1351
|
+
return json_push_value(state, config, object);
|
|
1352
|
+
}
|
|
1235
1353
|
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1354
|
+
if (next_char == ',') {
|
|
1355
|
+
state->cursor++;
|
|
1356
|
+
json_eat_whitespace(state);
|
|
1239
1357
|
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
}
|
|
1358
|
+
if (config->allow_trailing_comma) {
|
|
1359
|
+
if (peek(state) == '}') {
|
|
1360
|
+
continue;
|
|
1244
1361
|
}
|
|
1362
|
+
}
|
|
1245
1363
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1364
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1365
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1366
|
+
}
|
|
1367
|
+
json_parse_string(state, config, true);
|
|
1250
1368
|
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1369
|
+
json_eat_whitespace(state);
|
|
1370
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1371
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1372
|
+
}
|
|
1373
|
+
state->cursor++;
|
|
1256
1374
|
|
|
1257
|
-
|
|
1375
|
+
json_parse_any(state, config);
|
|
1258
1376
|
|
|
1259
|
-
|
|
1260
|
-
}
|
|
1377
|
+
continue;
|
|
1261
1378
|
}
|
|
1262
1379
|
|
|
1263
1380
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1265,18 +1382,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1265
1382
|
break;
|
|
1266
1383
|
}
|
|
1267
1384
|
|
|
1385
|
+
case 0:
|
|
1386
|
+
raise_parse_error("unexpected end of input", state);
|
|
1387
|
+
break;
|
|
1388
|
+
|
|
1268
1389
|
default:
|
|
1269
1390
|
raise_parse_error("unexpected character: %s", state);
|
|
1270
1391
|
break;
|
|
1271
1392
|
}
|
|
1272
1393
|
|
|
1273
1394
|
raise_parse_error("unreachable: %s", state);
|
|
1395
|
+
return Qundef;
|
|
1274
1396
|
}
|
|
1275
1397
|
|
|
1276
1398
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1277
1399
|
{
|
|
1278
1400
|
json_eat_whitespace(state);
|
|
1279
|
-
if (state
|
|
1401
|
+
if (!eos(state)) {
|
|
1280
1402
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1281
1403
|
}
|
|
1282
1404
|
}
|
|
@@ -1313,14 +1435,16 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1313
1435
|
{
|
|
1314
1436
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1315
1437
|
|
|
1316
|
-
if (key == sym_max_nesting)
|
|
1317
|
-
else if (key == sym_allow_nan)
|
|
1318
|
-
else if (key == sym_allow_trailing_comma)
|
|
1319
|
-
else if (key ==
|
|
1320
|
-
else if (key ==
|
|
1321
|
-
else if (key ==
|
|
1322
|
-
else if (key ==
|
|
1323
|
-
else if (key ==
|
|
1438
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1439
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1440
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1441
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1442
|
+
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1443
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1444
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1445
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1446
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1447
|
+
else if (key == sym_decimal_class) {
|
|
1324
1448
|
if (RTEST(val)) {
|
|
1325
1449
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1326
1450
|
config->decimal_class = val;
|
|
@@ -1393,6 +1517,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1393
1517
|
*/
|
|
1394
1518
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1395
1519
|
{
|
|
1520
|
+
rb_check_frozen(self);
|
|
1396
1521
|
GET_PARSER_CONFIG;
|
|
1397
1522
|
|
|
1398
1523
|
parser_config_init(config, opts);
|
|
@@ -1488,7 +1613,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1488
1613
|
JSON_ParserConfig_memsize,
|
|
1489
1614
|
},
|
|
1490
1615
|
0, 0,
|
|
1491
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1616
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1492
1617
|
};
|
|
1493
1618
|
|
|
1494
1619
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1532,16 +1657,14 @@ void Init_parser(void)
|
|
|
1532
1657
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1533
1658
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1534
1659
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1660
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1661
|
+
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
1535
1662
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1536
1663
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1537
1664
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1538
1665
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1539
1666
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1540
1667
|
|
|
1541
|
-
i_chr = rb_intern("chr");
|
|
1542
|
-
i_aset = rb_intern("[]=");
|
|
1543
|
-
i_aref = rb_intern("[]");
|
|
1544
|
-
i_leftshift = rb_intern("<<");
|
|
1545
1668
|
i_new = rb_intern("new");
|
|
1546
1669
|
i_try_convert = rb_intern("try_convert");
|
|
1547
1670
|
i_uminus = rb_intern("-@");
|