json 2.15.2 → 2.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +49 -1
- data/LEGAL +12 -0
- data/README.md +17 -1
- data/ext/json/ext/fbuffer/fbuffer.h +32 -77
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +281 -494
- data/ext/json/ext/json.h +105 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +615 -477
- data/ext/json/ext/simd/simd.h +42 -22
- data/ext/json/ext/vendor/fpconv.c +3 -3
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/common.rb +69 -26
- data/lib/json/ext/generator/state.rb +5 -1
- data/lib/json/truffle_ruby/generator.rb +65 -25
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +33 -0
- metadata +4 -2
|
@@ -1,50 +1,22 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma,
|
|
40
|
-
sym_decimal_class, sym_on_load,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
|
|
11
|
+
sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
|
|
12
|
+
sym_allow_duplicate_key;
|
|
41
13
|
|
|
42
14
|
static int binary_encindex;
|
|
43
15
|
static int utf8_encindex;
|
|
44
16
|
|
|
45
17
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
18
|
// For TruffleRuby
|
|
47
|
-
void
|
|
19
|
+
static void
|
|
48
20
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
21
|
{
|
|
50
22
|
long index = 0;
|
|
@@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
33
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
34
|
#endif
|
|
63
35
|
|
|
36
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
37
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
38
|
+
{
|
|
39
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
40
|
+
}
|
|
41
|
+
#endif
|
|
64
42
|
|
|
65
43
|
/* name cache */
|
|
66
44
|
|
|
@@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
84
|
cache->entries[index] = rstring;
|
|
107
85
|
}
|
|
108
86
|
|
|
109
|
-
|
|
87
|
+
#define rstring_cache_memcmp memcmp
|
|
88
|
+
|
|
89
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
90
|
+
#if __has_builtin(__builtin_bswap64)
|
|
91
|
+
#undef rstring_cache_memcmp
|
|
92
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
93
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
94
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
95
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
96
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
97
|
+
long i = 0;
|
|
98
|
+
|
|
99
|
+
for (; i + 8 <= length; i += 8) {
|
|
100
|
+
uint64_t a, b;
|
|
101
|
+
memcpy(&a, str + i, 8);
|
|
102
|
+
memcpy(&b, rptr + i, 8);
|
|
103
|
+
if (a != b) {
|
|
104
|
+
a = __builtin_bswap64(a);
|
|
105
|
+
b = __builtin_bswap64(b);
|
|
106
|
+
return (a < b) ? -1 : 1;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for (; i < length; i++) {
|
|
111
|
+
if (str[i] != rptr[i]) {
|
|
112
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
113
|
+
}
|
|
116
114
|
}
|
|
115
|
+
|
|
116
|
+
return 0;
|
|
117
117
|
}
|
|
118
|
+
#endif
|
|
119
|
+
#endif
|
|
118
120
|
|
|
119
|
-
static
|
|
121
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
122
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
123
|
+
const char *rstring_ptr;
|
|
124
|
+
long rstring_length;
|
|
125
|
+
|
|
126
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
127
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
128
|
+
if (length == rstring_length) {
|
|
129
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
130
|
+
} else {
|
|
131
|
+
return (int)(length - rstring_length);
|
|
132
132
|
}
|
|
133
|
+
}
|
|
133
134
|
|
|
135
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
136
|
+
{
|
|
134
137
|
int low = 0;
|
|
135
138
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
139
|
|
|
139
140
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
141
|
+
int mid = (high + low) >> 1;
|
|
141
142
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
143
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
144
|
|
|
144
|
-
if (
|
|
145
|
+
if (cmp == 0) {
|
|
145
146
|
return entry;
|
|
146
|
-
} else if (
|
|
147
|
+
} else if (cmp > 0) {
|
|
147
148
|
low = mid + 1;
|
|
148
149
|
} else {
|
|
149
150
|
high = mid - 1;
|
|
150
151
|
}
|
|
151
152
|
}
|
|
152
153
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
154
|
VALUE rstring = build_interned_string(str, length);
|
|
160
155
|
|
|
161
156
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
157
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
158
|
}
|
|
168
159
|
return rstring;
|
|
169
160
|
}
|
|
170
161
|
|
|
171
162
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
163
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
164
|
int low = 0;
|
|
187
165
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
166
|
|
|
191
167
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
168
|
+
int mid = (high + low) >> 1;
|
|
193
169
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
170
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
171
|
|
|
196
|
-
if (
|
|
172
|
+
if (cmp == 0) {
|
|
197
173
|
return entry;
|
|
198
|
-
} else if (
|
|
174
|
+
} else if (cmp > 0) {
|
|
199
175
|
low = mid + 1;
|
|
200
176
|
} else {
|
|
201
177
|
high = mid - 1;
|
|
202
178
|
}
|
|
203
179
|
}
|
|
204
180
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
181
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
182
|
|
|
213
183
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
184
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
185
|
}
|
|
220
186
|
return rsymbol;
|
|
221
187
|
}
|
|
@@ -330,15 +296,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
296
|
}
|
|
331
297
|
}
|
|
332
298
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
299
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
300
|
{
|
|
344
301
|
int len = 1;
|
|
@@ -379,7 +336,8 @@ typedef struct JSON_ParserStruct {
|
|
|
379
336
|
int max_nesting;
|
|
380
337
|
bool allow_nan;
|
|
381
338
|
bool allow_trailing_comma;
|
|
382
|
-
bool
|
|
339
|
+
bool allow_control_characters;
|
|
340
|
+
bool allow_invalid_escape;
|
|
383
341
|
bool symbolize_names;
|
|
384
342
|
bool freeze;
|
|
385
343
|
} JSON_ParserConfig;
|
|
@@ -395,6 +353,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
353
|
int current_nesting;
|
|
396
354
|
} JSON_ParserState;
|
|
397
355
|
|
|
356
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
357
|
+
return state->end - state->cursor;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
361
|
+
return state->cursor >= state->end;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
static inline char peek(JSON_ParserState *state)
|
|
365
|
+
{
|
|
366
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
367
|
+
return 0;
|
|
368
|
+
}
|
|
369
|
+
return *state->cursor;
|
|
370
|
+
}
|
|
371
|
+
|
|
398
372
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
373
|
{
|
|
400
374
|
const char *cursor = state->cursor;
|
|
@@ -428,14 +402,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
428
402
|
|
|
429
403
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
430
404
|
|
|
431
|
-
|
|
432
|
-
RBIMPL_ATTR_NORETURN()
|
|
433
|
-
#endif
|
|
434
|
-
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
405
|
+
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
|
|
435
406
|
{
|
|
436
407
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
437
|
-
long line, column;
|
|
438
|
-
cursor_position(state, &line, &column);
|
|
439
408
|
|
|
440
409
|
const char *ptr = "EOF";
|
|
441
410
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -470,17 +439,26 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
470
439
|
VALUE msg = rb_sprintf(format, ptr);
|
|
471
440
|
VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
|
|
472
441
|
RB_GC_GUARD(msg);
|
|
442
|
+
return message;
|
|
443
|
+
}
|
|
473
444
|
|
|
445
|
+
static VALUE parse_error_new(VALUE message, long line, long column)
|
|
446
|
+
{
|
|
474
447
|
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
|
475
448
|
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
|
476
449
|
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
|
477
|
-
|
|
450
|
+
return exc;
|
|
478
451
|
}
|
|
479
452
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
453
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
454
|
+
{
|
|
455
|
+
long line, column;
|
|
456
|
+
cursor_position(state, &line, &column);
|
|
457
|
+
VALUE message = build_parse_error_message(format, state, line, column);
|
|
458
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
484
462
|
{
|
|
485
463
|
state->cursor = at;
|
|
486
464
|
raise_parse_error(format, state);
|
|
@@ -505,23 +483,24 @@ static const signed char digit_values[256] = {
|
|
|
505
483
|
-1, -1, -1, -1, -1, -1, -1
|
|
506
484
|
};
|
|
507
485
|
|
|
508
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
509
|
-
{
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
486
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
487
|
+
{
|
|
488
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
489
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
493
|
+
|
|
494
|
+
const signed char b0 = digit_values[p[0]];
|
|
495
|
+
const signed char b1 = digit_values[p[1]];
|
|
496
|
+
const signed char b2 = digit_values[p[2]];
|
|
497
|
+
const signed char b3 = digit_values[p[3]];
|
|
498
|
+
|
|
499
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
500
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
525
504
|
}
|
|
526
505
|
|
|
527
506
|
#define GET_PARSER_CONFIG \
|
|
@@ -530,61 +509,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
509
|
|
|
531
510
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
511
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
512
|
static void
|
|
542
513
|
json_eat_comments(JSON_ParserState *state)
|
|
543
514
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
515
|
+
const char *start = state->cursor;
|
|
516
|
+
state->cursor++;
|
|
517
|
+
|
|
518
|
+
switch (peek(state)) {
|
|
519
|
+
case '/': {
|
|
520
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
521
|
+
if (!state->cursor) {
|
|
522
|
+
state->cursor = state->end;
|
|
523
|
+
} else {
|
|
524
|
+
state->cursor++;
|
|
554
525
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
526
|
+
break;
|
|
527
|
+
}
|
|
528
|
+
case '*': {
|
|
529
|
+
state->cursor++;
|
|
530
|
+
|
|
531
|
+
while (true) {
|
|
532
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
533
|
+
if (!next_match) {
|
|
534
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
state->cursor = next_match + 1;
|
|
538
|
+
if (peek(state) == '/') {
|
|
539
|
+
state->cursor++;
|
|
540
|
+
break;
|
|
568
541
|
}
|
|
569
|
-
break;
|
|
570
542
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
543
|
+
break;
|
|
574
544
|
}
|
|
575
|
-
|
|
576
|
-
|
|
545
|
+
default:
|
|
546
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
547
|
+
break;
|
|
577
548
|
}
|
|
578
549
|
}
|
|
579
550
|
|
|
580
|
-
static
|
|
551
|
+
ALWAYS_INLINE(static) void
|
|
581
552
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
553
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
554
|
+
while (true) {
|
|
555
|
+
switch (peek(state)) {
|
|
556
|
+
case ' ':
|
|
557
|
+
state->cursor++;
|
|
558
|
+
break;
|
|
559
|
+
case '\n':
|
|
560
|
+
state->cursor++;
|
|
561
|
+
|
|
562
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
563
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
564
|
+
while (rest(state) > 8) {
|
|
565
|
+
uint64_t chunk;
|
|
566
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
567
|
+
if (chunk == 0x2020202020202020) {
|
|
568
|
+
state->cursor += 8;
|
|
569
|
+
continue;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
573
|
+
state->cursor += consecutive_spaces;
|
|
574
|
+
break;
|
|
575
|
+
}
|
|
576
|
+
#endif
|
|
577
|
+
break;
|
|
578
|
+
case '\t':
|
|
579
|
+
case '\r':
|
|
580
|
+
state->cursor++;
|
|
581
|
+
break;
|
|
582
|
+
case '/':
|
|
583
|
+
json_eat_comments(state);
|
|
584
|
+
break;
|
|
585
|
+
|
|
586
|
+
default:
|
|
587
|
+
return;
|
|
588
588
|
}
|
|
589
589
|
}
|
|
590
590
|
}
|
|
@@ -615,11 +615,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
615
|
return result;
|
|
616
616
|
}
|
|
617
617
|
|
|
618
|
-
static inline
|
|
618
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
619
|
+
{
|
|
620
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
621
|
+
// Simple heuristics:
|
|
622
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
623
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
624
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
619
628
|
{
|
|
629
|
+
bool intern = is_name || config->freeze;
|
|
630
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
631
|
size_t bufferSize = stringEnd - string;
|
|
621
632
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
633
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
634
|
VALUE cached_key;
|
|
624
635
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
636
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,109 +646,127 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
646
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
647
|
}
|
|
637
648
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
649
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
650
|
+
typedef struct _json_unescape_positions {
|
|
651
|
+
long size;
|
|
652
|
+
const char **positions;
|
|
653
|
+
unsigned long additional_backslashes;
|
|
654
|
+
} JSON_UnescapePositions;
|
|
645
655
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
656
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
657
|
+
{
|
|
658
|
+
while (positions->size) {
|
|
659
|
+
positions->size--;
|
|
660
|
+
const char *next_position = positions->positions[0];
|
|
661
|
+
positions->positions++;
|
|
662
|
+
if (next_position >= pe) {
|
|
663
|
+
return next_position;
|
|
652
664
|
}
|
|
665
|
+
}
|
|
653
666
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
667
|
+
if (positions->additional_backslashes) {
|
|
668
|
+
positions->additional_backslashes--;
|
|
669
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
670
|
}
|
|
658
671
|
|
|
672
|
+
return NULL;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
676
|
+
{
|
|
677
|
+
bool intern = is_name || config->freeze;
|
|
678
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
679
|
+
size_t bufferSize = stringEnd - string;
|
|
680
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
681
|
+
char *buffer;
|
|
682
|
+
|
|
659
683
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
684
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
685
|
buffer = RSTRING_PTR(result);
|
|
662
686
|
bufferStart = buffer;
|
|
663
687
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
688
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
689
|
+
|
|
690
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
691
|
if (pe > p) {
|
|
668
692
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
693
|
buffer += pe - p;
|
|
670
694
|
}
|
|
671
695
|
switch (*++pe) {
|
|
696
|
+
case '"':
|
|
697
|
+
case '/':
|
|
698
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
699
|
+
break;
|
|
700
|
+
case '\\':
|
|
701
|
+
APPEND_CHAR('\\');
|
|
702
|
+
break;
|
|
672
703
|
case 'n':
|
|
673
|
-
|
|
704
|
+
APPEND_CHAR('\n');
|
|
674
705
|
break;
|
|
675
706
|
case 'r':
|
|
676
|
-
|
|
707
|
+
APPEND_CHAR('\r');
|
|
677
708
|
break;
|
|
678
709
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
710
|
+
APPEND_CHAR('\t');
|
|
686
711
|
break;
|
|
687
712
|
case 'b':
|
|
688
|
-
|
|
713
|
+
APPEND_CHAR('\b');
|
|
689
714
|
break;
|
|
690
715
|
case 'f':
|
|
691
|
-
|
|
716
|
+
APPEND_CHAR('\f');
|
|
692
717
|
break;
|
|
693
|
-
case 'u':
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
if (
|
|
712
|
-
raise_parse_error_at("
|
|
713
|
-
}
|
|
714
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
715
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
716
|
-
|
|
717
|
-
if ((sur & 0xFC00) != 0xDC00) {
|
|
718
|
-
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
719
|
-
}
|
|
720
|
-
|
|
721
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
722
|
-
| (sur & 0x3FF));
|
|
723
|
-
pe += 5;
|
|
724
|
-
} else {
|
|
725
|
-
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
726
|
-
break;
|
|
718
|
+
case 'u': {
|
|
719
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
720
|
+
pe += 3;
|
|
721
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
722
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
723
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
724
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
725
|
+
* final codepoint.
|
|
726
|
+
*
|
|
727
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
728
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
729
|
+
* Area".
|
|
730
|
+
*/
|
|
731
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
732
|
+
pe++;
|
|
733
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
734
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
735
|
+
|
|
736
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
737
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
727
738
|
}
|
|
739
|
+
|
|
740
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
741
|
+
pe += 5;
|
|
742
|
+
} else {
|
|
743
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
744
|
+
break;
|
|
728
745
|
}
|
|
729
|
-
unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
730
|
-
unescape = buf;
|
|
731
746
|
}
|
|
747
|
+
|
|
748
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
749
|
+
buffer += unescape_len;
|
|
750
|
+
p = ++pe;
|
|
732
751
|
break;
|
|
752
|
+
}
|
|
733
753
|
default:
|
|
734
|
-
|
|
735
|
-
|
|
754
|
+
if ((unsigned char)*pe < 0x20) {
|
|
755
|
+
if (!config->allow_control_characters) {
|
|
756
|
+
if (*pe == '\n') {
|
|
757
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
758
|
+
}
|
|
759
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
760
|
+
}
|
|
761
|
+
} else if (config->allow_invalid_escape) {
|
|
762
|
+
APPEND_CHAR(*pe);
|
|
763
|
+
} else {
|
|
764
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
765
|
+
}
|
|
766
|
+
break;
|
|
736
767
|
}
|
|
737
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
738
|
-
buffer += unescape_len;
|
|
739
|
-
p = ++pe;
|
|
740
768
|
}
|
|
769
|
+
#undef APPEND_CHAR
|
|
741
770
|
|
|
742
771
|
if (stringEnd > p) {
|
|
743
772
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -748,81 +777,85 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
748
777
|
if (symbolize) {
|
|
749
778
|
result = rb_str_intern(result);
|
|
750
779
|
} else if (intern) {
|
|
751
|
-
result =
|
|
780
|
+
result = rb_str_to_interned_str(result);
|
|
752
781
|
}
|
|
753
782
|
|
|
754
783
|
return result;
|
|
755
784
|
}
|
|
756
785
|
|
|
757
786
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
758
|
-
|
|
759
|
-
{
|
|
760
|
-
bool negative = false;
|
|
761
|
-
if (*p == '-') {
|
|
762
|
-
negative = true;
|
|
763
|
-
p++;
|
|
764
|
-
}
|
|
787
|
+
#define MAX_NUMBER_STACK_BUFFER 128
|
|
765
788
|
|
|
766
|
-
|
|
767
|
-
while (p < pe) {
|
|
768
|
-
memo *= 10;
|
|
769
|
-
memo += *p - '0';
|
|
770
|
-
p++;
|
|
771
|
-
}
|
|
789
|
+
typedef VALUE (*json_number_decode_func_t)(const char *ptr);
|
|
772
790
|
|
|
773
|
-
|
|
774
|
-
|
|
791
|
+
static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
|
|
792
|
+
{
|
|
793
|
+
if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
|
|
794
|
+
char buffer[MAX_NUMBER_STACK_BUFFER];
|
|
795
|
+
MEMCPY(buffer, start, char, len);
|
|
796
|
+
buffer[len] = '\0';
|
|
797
|
+
return func(buffer);
|
|
798
|
+
} else {
|
|
799
|
+
VALUE buffer_v = rb_str_tmp_new(len);
|
|
800
|
+
char *buffer = RSTRING_PTR(buffer_v);
|
|
801
|
+
MEMCPY(buffer, start, char, len);
|
|
802
|
+
buffer[len] = '\0';
|
|
803
|
+
VALUE number = func(buffer);
|
|
804
|
+
RB_GC_GUARD(buffer_v);
|
|
805
|
+
return number;
|
|
775
806
|
}
|
|
776
|
-
return LL2NUM(memo);
|
|
777
807
|
}
|
|
778
808
|
|
|
779
|
-
static VALUE
|
|
809
|
+
static VALUE json_decode_inum(const char *buffer)
|
|
780
810
|
{
|
|
781
|
-
|
|
782
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
783
|
-
MEMCPY(buffer, start, char, len);
|
|
784
|
-
buffer[len] = '\0';
|
|
785
|
-
VALUE number = rb_cstr2inum(buffer, 10);
|
|
786
|
-
RB_ALLOCV_END(buffer_v);
|
|
787
|
-
return number;
|
|
811
|
+
return rb_cstr2inum(buffer, 10);
|
|
788
812
|
}
|
|
789
813
|
|
|
790
|
-
static
|
|
791
|
-
json_decode_integer(const char *start, const char *end)
|
|
814
|
+
NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
|
|
792
815
|
{
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
816
|
+
return json_decode_large_number(start, len, json_decode_inum);
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
820
|
+
{
|
|
821
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
822
|
+
if (negative) {
|
|
823
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
796
824
|
}
|
|
797
|
-
return
|
|
825
|
+
return UINT64T2NUM(mantissa);
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
return json_decode_large_integer(start, end - start);
|
|
798
829
|
}
|
|
799
830
|
|
|
800
|
-
static VALUE
|
|
831
|
+
static VALUE json_decode_dnum(const char *buffer)
|
|
801
832
|
{
|
|
802
|
-
|
|
803
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
804
|
-
MEMCPY(buffer, start, char, len);
|
|
805
|
-
buffer[len] = '\0';
|
|
806
|
-
VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
807
|
-
RB_ALLOCV_END(buffer_v);
|
|
808
|
-
return number;
|
|
833
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
809
834
|
}
|
|
810
835
|
|
|
811
|
-
static VALUE
|
|
836
|
+
NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
|
|
812
837
|
{
|
|
813
|
-
|
|
838
|
+
return json_decode_large_number(start, len, json_decode_dnum);
|
|
839
|
+
}
|
|
814
840
|
|
|
841
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
842
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
843
|
+
*/
|
|
844
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
845
|
+
const char *start, const char *end)
|
|
846
|
+
{
|
|
815
847
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
816
|
-
VALUE text = rb_str_new(start,
|
|
848
|
+
VALUE text = rb_str_new(start, end - start);
|
|
817
849
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
818
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
819
|
-
char buffer[64];
|
|
820
|
-
MEMCPY(buffer, start, char, len);
|
|
821
|
-
buffer[len] = '\0';
|
|
822
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
823
|
-
} else {
|
|
824
|
-
return json_decode_large_float(start, len);
|
|
825
850
|
}
|
|
851
|
+
|
|
852
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
853
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
854
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
855
|
+
return json_decode_large_float(start, end - start);
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
826
859
|
}
|
|
827
860
|
|
|
828
861
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -854,7 +887,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
|
854
887
|
return Qfalse;
|
|
855
888
|
}
|
|
856
889
|
|
|
857
|
-
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
890
|
+
NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
858
891
|
{
|
|
859
892
|
VALUE message = rb_sprintf(
|
|
860
893
|
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
@@ -865,16 +898,18 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
|
|
|
865
898
|
RB_GC_GUARD(message);
|
|
866
899
|
}
|
|
867
900
|
|
|
868
|
-
|
|
869
|
-
RBIMPL_ATTR_NORETURN()
|
|
870
|
-
#endif
|
|
871
|
-
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
901
|
+
NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
872
902
|
{
|
|
873
903
|
VALUE message = rb_sprintf(
|
|
874
904
|
"duplicate key %"PRIsVALUE,
|
|
875
905
|
rb_inspect(duplicate_key)
|
|
876
906
|
);
|
|
877
907
|
|
|
908
|
+
long line, column;
|
|
909
|
+
cursor_position(state, &line, &column);
|
|
910
|
+
rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
|
|
911
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
912
|
+
|
|
878
913
|
raise_parse_error(RSTRING_PTR(message), state);
|
|
879
914
|
RB_GC_GUARD(message);
|
|
880
915
|
}
|
|
@@ -908,20 +943,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
908
943
|
return object;
|
|
909
944
|
}
|
|
910
945
|
|
|
911
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
912
|
-
{
|
|
913
|
-
VALUE string;
|
|
914
|
-
bool intern = is_name || config->freeze;
|
|
915
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
916
|
-
if (escaped) {
|
|
917
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
918
|
-
} else {
|
|
919
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
920
|
-
}
|
|
921
|
-
|
|
922
|
-
return string;
|
|
923
|
-
}
|
|
924
|
-
|
|
925
946
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
926
947
|
{
|
|
927
948
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -944,17 +965,11 @@ static const bool string_scan_table[256] = {
|
|
|
944
965
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
945
966
|
};
|
|
946
967
|
|
|
947
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
948
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
949
|
-
#else
|
|
950
|
-
#define FORCE_INLINE
|
|
951
|
-
#endif
|
|
952
|
-
|
|
953
968
|
#ifdef HAVE_SIMD
|
|
954
969
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
955
970
|
#endif /* HAVE_SIMD */
|
|
956
971
|
|
|
957
|
-
static
|
|
972
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
958
973
|
{
|
|
959
974
|
#ifdef HAVE_SIMD
|
|
960
975
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -962,7 +977,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
962
977
|
uint64_t mask = 0;
|
|
963
978
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
964
979
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
965
|
-
return
|
|
980
|
+
return true;
|
|
966
981
|
}
|
|
967
982
|
|
|
968
983
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -970,64 +985,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
970
985
|
int mask = 0;
|
|
971
986
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
972
987
|
state->cursor += trailing_zeros(mask);
|
|
973
|
-
return
|
|
988
|
+
return true;
|
|
974
989
|
}
|
|
975
990
|
}
|
|
976
991
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
977
992
|
#endif /* HAVE_SIMD */
|
|
978
993
|
|
|
979
|
-
while (state
|
|
994
|
+
while (!eos(state)) {
|
|
980
995
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
981
|
-
return
|
|
996
|
+
return true;
|
|
982
997
|
}
|
|
983
998
|
state->cursor++;
|
|
984
999
|
}
|
|
985
|
-
return
|
|
1000
|
+
return false;
|
|
986
1001
|
}
|
|
987
1002
|
|
|
988
|
-
static
|
|
1003
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
989
1004
|
{
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
1005
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
1006
|
+
JSON_UnescapePositions positions = {
|
|
1007
|
+
.size = 0,
|
|
1008
|
+
.positions = backslashes,
|
|
1009
|
+
.additional_backslashes = 0,
|
|
1010
|
+
};
|
|
993
1011
|
|
|
994
|
-
|
|
1012
|
+
do {
|
|
995
1013
|
switch (*state->cursor) {
|
|
996
1014
|
case '"': {
|
|
997
|
-
VALUE string =
|
|
1015
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
998
1016
|
state->cursor++;
|
|
999
1017
|
return json_push_value(state, config, string);
|
|
1000
1018
|
}
|
|
1001
1019
|
case '\\': {
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1020
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1021
|
+
backslashes[positions.size] = state->cursor;
|
|
1022
|
+
positions.size++;
|
|
1023
|
+
} else {
|
|
1024
|
+
positions.additional_backslashes++;
|
|
1006
1025
|
}
|
|
1026
|
+
state->cursor++;
|
|
1007
1027
|
break;
|
|
1008
1028
|
}
|
|
1009
1029
|
default:
|
|
1010
|
-
|
|
1030
|
+
if (!config->allow_control_characters) {
|
|
1031
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1032
|
+
}
|
|
1011
1033
|
break;
|
|
1012
1034
|
}
|
|
1013
1035
|
|
|
1014
1036
|
state->cursor++;
|
|
1015
|
-
}
|
|
1037
|
+
} while (string_scan(state));
|
|
1016
1038
|
|
|
1017
1039
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1018
1040
|
return Qfalse;
|
|
1019
1041
|
}
|
|
1020
1042
|
|
|
1043
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1044
|
+
{
|
|
1045
|
+
state->cursor++;
|
|
1046
|
+
const char *start = state->cursor;
|
|
1047
|
+
|
|
1048
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1049
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1053
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1054
|
+
state->cursor++;
|
|
1055
|
+
return json_push_value(state, config, string);
|
|
1056
|
+
}
|
|
1057
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1061
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1062
|
+
// Additional References:
|
|
1063
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1064
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1065
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1066
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1067
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1068
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1069
|
+
val -= 0x3030303030303030;
|
|
1070
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1071
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1072
|
+
return val;
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1076
|
+
const uint32_t mask = 0x000000FF;
|
|
1077
|
+
const uint32_t mul1 = 100;
|
|
1078
|
+
val -= 0x30303030;
|
|
1079
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1080
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1081
|
+
return val;
|
|
1082
|
+
}
|
|
1083
|
+
#endif
|
|
1084
|
+
|
|
1085
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1086
|
+
{
|
|
1087
|
+
const char *start = state->cursor;
|
|
1088
|
+
|
|
1089
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1090
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1091
|
+
uint64_t next_8bytes;
|
|
1092
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1093
|
+
|
|
1094
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1095
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1096
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1097
|
+
|
|
1098
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1099
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1100
|
+
state->cursor += 8;
|
|
1101
|
+
continue;
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1105
|
+
|
|
1106
|
+
if (consecutive_digits >= 4) {
|
|
1107
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1108
|
+
state->cursor += 4;
|
|
1109
|
+
consecutive_digits -= 4;
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
while (consecutive_digits) {
|
|
1113
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1114
|
+
consecutive_digits--;
|
|
1115
|
+
state->cursor++;
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
return (int)(state->cursor - start);
|
|
1119
|
+
}
|
|
1120
|
+
#endif
|
|
1121
|
+
|
|
1122
|
+
char next_char;
|
|
1123
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1124
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1125
|
+
state->cursor++;
|
|
1126
|
+
}
|
|
1127
|
+
return (int)(state->cursor - start);
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1131
|
+
{
|
|
1132
|
+
bool integer = true;
|
|
1133
|
+
const char first_digit = *state->cursor;
|
|
1134
|
+
|
|
1135
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1136
|
+
int32_t exponent = 0;
|
|
1137
|
+
int decimal_point_pos = -1;
|
|
1138
|
+
uint64_t mantissa = 0;
|
|
1139
|
+
|
|
1140
|
+
// Parse integer part and extract mantissa digits
|
|
1141
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1142
|
+
|
|
1143
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1144
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
// Parse fractional part
|
|
1148
|
+
if (peek(state) == '.') {
|
|
1149
|
+
integer = false;
|
|
1150
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1151
|
+
state->cursor++;
|
|
1152
|
+
|
|
1153
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1154
|
+
mantissa_digits += fractional_digits;
|
|
1155
|
+
|
|
1156
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1157
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
// Parse exponent
|
|
1162
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1163
|
+
integer = false;
|
|
1164
|
+
state->cursor++;
|
|
1165
|
+
|
|
1166
|
+
bool negative_exponent = false;
|
|
1167
|
+
const char next_char = peek(state);
|
|
1168
|
+
if (next_char == '-' || next_char == '+') {
|
|
1169
|
+
negative_exponent = next_char == '-';
|
|
1170
|
+
state->cursor++;
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
uint64_t abs_exponent = 0;
|
|
1174
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1175
|
+
|
|
1176
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1177
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1183
|
+
if (integer) {
|
|
1184
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
// Adjust exponent based on decimal point position
|
|
1188
|
+
if (decimal_point_pos >= 0) {
|
|
1189
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1196
|
+
{
|
|
1197
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1201
|
+
{
|
|
1202
|
+
const char *start = state->cursor;
|
|
1203
|
+
state->cursor++;
|
|
1204
|
+
return json_parse_number(state, config, true, start);
|
|
1205
|
+
}
|
|
1206
|
+
|
|
1021
1207
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1022
1208
|
{
|
|
1023
1209
|
json_eat_whitespace(state);
|
|
1024
|
-
if (state->cursor >= state->end) {
|
|
1025
|
-
raise_parse_error("unexpected end of input", state);
|
|
1026
|
-
}
|
|
1027
1210
|
|
|
1028
|
-
switch (
|
|
1211
|
+
switch (peek(state)) {
|
|
1029
1212
|
case 'n':
|
|
1030
|
-
if ((state
|
|
1213
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1031
1214
|
state->cursor += 4;
|
|
1032
1215
|
return json_push_value(state, config, Qnil);
|
|
1033
1216
|
}
|
|
@@ -1035,7 +1218,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1035
1218
|
raise_parse_error("unexpected token %s", state);
|
|
1036
1219
|
break;
|
|
1037
1220
|
case 't':
|
|
1038
|
-
if ((state
|
|
1221
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1039
1222
|
state->cursor += 4;
|
|
1040
1223
|
return json_push_value(state, config, Qtrue);
|
|
1041
1224
|
}
|
|
@@ -1044,7 +1227,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1044
1227
|
break;
|
|
1045
1228
|
case 'f':
|
|
1046
1229
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1047
|
-
if ((state
|
|
1230
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1048
1231
|
state->cursor += 5;
|
|
1049
1232
|
return json_push_value(state, config, Qfalse);
|
|
1050
1233
|
}
|
|
@@ -1053,7 +1236,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1053
1236
|
break;
|
|
1054
1237
|
case 'N':
|
|
1055
1238
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1056
|
-
if (config->allow_nan && (state
|
|
1239
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1057
1240
|
state->cursor += 3;
|
|
1058
1241
|
return json_push_value(state, config, CNaN);
|
|
1059
1242
|
}
|
|
@@ -1061,16 +1244,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1061
1244
|
raise_parse_error("unexpected token %s", state);
|
|
1062
1245
|
break;
|
|
1063
1246
|
case 'I':
|
|
1064
|
-
if (config->allow_nan && (state
|
|
1247
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1065
1248
|
state->cursor += 8;
|
|
1066
1249
|
return json_push_value(state, config, CInfinity);
|
|
1067
1250
|
}
|
|
1068
1251
|
|
|
1069
1252
|
raise_parse_error("unexpected token %s", state);
|
|
1070
1253
|
break;
|
|
1071
|
-
case '-':
|
|
1254
|
+
case '-': {
|
|
1072
1255
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1073
|
-
if ((state
|
|
1256
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1074
1257
|
if (config->allow_nan) {
|
|
1075
1258
|
state->cursor += 9;
|
|
1076
1259
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1078,62 +1261,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1078
1261
|
raise_parse_error("unexpected token %s", state);
|
|
1079
1262
|
}
|
|
1080
1263
|
}
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
bool integer = true;
|
|
1084
|
-
|
|
1085
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1086
|
-
const char *start = state->cursor;
|
|
1087
|
-
state->cursor++;
|
|
1088
|
-
|
|
1089
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1090
|
-
state->cursor++;
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
long integer_length = state->cursor - start;
|
|
1094
|
-
|
|
1095
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1096
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1097
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1098
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1099
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1100
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1104
|
-
integer = false;
|
|
1105
|
-
state->cursor++;
|
|
1106
|
-
|
|
1107
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1108
|
-
raise_parse_error("invalid number: %s", state);
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1112
|
-
state->cursor++;
|
|
1113
|
-
}
|
|
1114
|
-
}
|
|
1115
|
-
|
|
1116
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1117
|
-
integer = false;
|
|
1118
|
-
state->cursor++;
|
|
1119
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1120
|
-
state->cursor++;
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1124
|
-
raise_parse_error("invalid number: %s", state);
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1128
|
-
state->cursor++;
|
|
1129
|
-
}
|
|
1130
|
-
}
|
|
1131
|
-
|
|
1132
|
-
if (integer) {
|
|
1133
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1134
|
-
}
|
|
1135
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1264
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1265
|
+
break;
|
|
1136
1266
|
}
|
|
1267
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1268
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1269
|
+
break;
|
|
1137
1270
|
case '"': {
|
|
1138
1271
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1139
1272
|
return json_parse_string(state, config, false);
|
|
@@ -1144,7 +1277,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1144
1277
|
json_eat_whitespace(state);
|
|
1145
1278
|
long stack_head = state->stack->head;
|
|
1146
1279
|
|
|
1147
|
-
if ((state
|
|
1280
|
+
if (peek(state) == ']') {
|
|
1148
1281
|
state->cursor++;
|
|
1149
1282
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1150
1283
|
} else {
|
|
@@ -1159,26 +1292,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1159
1292
|
while (true) {
|
|
1160
1293
|
json_eat_whitespace(state);
|
|
1161
1294
|
|
|
1162
|
-
|
|
1163
|
-
if (*state->cursor == ']') {
|
|
1164
|
-
state->cursor++;
|
|
1165
|
-
long count = state->stack->head - stack_head;
|
|
1166
|
-
state->current_nesting--;
|
|
1167
|
-
state->in_array--;
|
|
1168
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1169
|
-
}
|
|
1295
|
+
const char next_char = peek(state);
|
|
1170
1296
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
}
|
|
1297
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1298
|
+
state->cursor++;
|
|
1299
|
+
if (config->allow_trailing_comma) {
|
|
1300
|
+
json_eat_whitespace(state);
|
|
1301
|
+
if (peek(state) == ']') {
|
|
1302
|
+
continue;
|
|
1178
1303
|
}
|
|
1179
|
-
json_parse_any(state, config);
|
|
1180
|
-
continue;
|
|
1181
1304
|
}
|
|
1305
|
+
json_parse_any(state, config);
|
|
1306
|
+
continue;
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
if (next_char == ']') {
|
|
1310
|
+
state->cursor++;
|
|
1311
|
+
long count = state->stack->head - stack_head;
|
|
1312
|
+
state->current_nesting--;
|
|
1313
|
+
state->in_array--;
|
|
1314
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1182
1315
|
}
|
|
1183
1316
|
|
|
1184
1317
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1192,7 +1325,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1192
1325
|
json_eat_whitespace(state);
|
|
1193
1326
|
long stack_head = state->stack->head;
|
|
1194
1327
|
|
|
1195
|
-
if ((state
|
|
1328
|
+
if (peek(state) == '}') {
|
|
1196
1329
|
state->cursor++;
|
|
1197
1330
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1198
1331
|
} else {
|
|
@@ -1201,13 +1334,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1201
1334
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1202
1335
|
}
|
|
1203
1336
|
|
|
1204
|
-
if (
|
|
1337
|
+
if (peek(state) != '"') {
|
|
1205
1338
|
raise_parse_error("expected object key, got %s", state);
|
|
1206
1339
|
}
|
|
1207
1340
|
json_parse_string(state, config, true);
|
|
1208
1341
|
|
|
1209
1342
|
json_eat_whitespace(state);
|
|
1210
|
-
if ((state
|
|
1343
|
+
if (peek(state) != ':') {
|
|
1211
1344
|
raise_parse_error("expected ':' after object key", state);
|
|
1212
1345
|
}
|
|
1213
1346
|
state->cursor++;
|
|
@@ -1218,46 +1351,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1218
1351
|
while (true) {
|
|
1219
1352
|
json_eat_whitespace(state);
|
|
1220
1353
|
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1354
|
+
const char next_char = peek(state);
|
|
1355
|
+
if (next_char == '}') {
|
|
1356
|
+
state->cursor++;
|
|
1357
|
+
state->current_nesting--;
|
|
1358
|
+
size_t count = state->stack->head - stack_head;
|
|
1226
1359
|
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1360
|
+
// Temporary rewind cursor in case an error is raised
|
|
1361
|
+
const char *final_cursor = state->cursor;
|
|
1362
|
+
state->cursor = object_start_cursor;
|
|
1363
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1364
|
+
state->cursor = final_cursor;
|
|
1232
1365
|
|
|
1233
|
-
|
|
1234
|
-
|
|
1366
|
+
return json_push_value(state, config, object);
|
|
1367
|
+
}
|
|
1235
1368
|
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1369
|
+
if (next_char == ',') {
|
|
1370
|
+
state->cursor++;
|
|
1371
|
+
json_eat_whitespace(state);
|
|
1239
1372
|
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
}
|
|
1373
|
+
if (config->allow_trailing_comma) {
|
|
1374
|
+
if (peek(state) == '}') {
|
|
1375
|
+
continue;
|
|
1244
1376
|
}
|
|
1377
|
+
}
|
|
1245
1378
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1379
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1380
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1381
|
+
}
|
|
1382
|
+
json_parse_string(state, config, true);
|
|
1250
1383
|
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1384
|
+
json_eat_whitespace(state);
|
|
1385
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1386
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1387
|
+
}
|
|
1388
|
+
state->cursor++;
|
|
1256
1389
|
|
|
1257
|
-
|
|
1390
|
+
json_parse_any(state, config);
|
|
1258
1391
|
|
|
1259
|
-
|
|
1260
|
-
}
|
|
1392
|
+
continue;
|
|
1261
1393
|
}
|
|
1262
1394
|
|
|
1263
1395
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1265,18 +1397,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1265
1397
|
break;
|
|
1266
1398
|
}
|
|
1267
1399
|
|
|
1400
|
+
case 0:
|
|
1401
|
+
raise_parse_error("unexpected end of input", state);
|
|
1402
|
+
break;
|
|
1403
|
+
|
|
1268
1404
|
default:
|
|
1269
1405
|
raise_parse_error("unexpected character: %s", state);
|
|
1270
1406
|
break;
|
|
1271
1407
|
}
|
|
1272
1408
|
|
|
1273
1409
|
raise_parse_error("unreachable: %s", state);
|
|
1410
|
+
return Qundef;
|
|
1274
1411
|
}
|
|
1275
1412
|
|
|
1276
1413
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1277
1414
|
{
|
|
1278
1415
|
json_eat_whitespace(state);
|
|
1279
|
-
if (state
|
|
1416
|
+
if (!eos(state)) {
|
|
1280
1417
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1281
1418
|
}
|
|
1282
1419
|
}
|
|
@@ -1313,14 +1450,16 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1313
1450
|
{
|
|
1314
1451
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1315
1452
|
|
|
1316
|
-
if (key == sym_max_nesting)
|
|
1317
|
-
else if (key == sym_allow_nan)
|
|
1318
|
-
else if (key == sym_allow_trailing_comma)
|
|
1319
|
-
else if (key ==
|
|
1320
|
-
else if (key ==
|
|
1321
|
-
else if (key ==
|
|
1322
|
-
else if (key ==
|
|
1323
|
-
else if (key ==
|
|
1453
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1454
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1455
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1456
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1457
|
+
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1458
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1459
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1460
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1461
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1462
|
+
else if (key == sym_decimal_class) {
|
|
1324
1463
|
if (RTEST(val)) {
|
|
1325
1464
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1326
1465
|
config->decimal_class = val;
|
|
@@ -1393,6 +1532,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1393
1532
|
*/
|
|
1394
1533
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1395
1534
|
{
|
|
1535
|
+
rb_check_frozen(self);
|
|
1396
1536
|
GET_PARSER_CONFIG;
|
|
1397
1537
|
|
|
1398
1538
|
parser_config_init(config, opts);
|
|
@@ -1488,7 +1628,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1488
1628
|
JSON_ParserConfig_memsize,
|
|
1489
1629
|
},
|
|
1490
1630
|
0, 0,
|
|
1491
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1631
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1492
1632
|
};
|
|
1493
1633
|
|
|
1494
1634
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1532,16 +1672,14 @@ void Init_parser(void)
|
|
|
1532
1672
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1533
1673
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1534
1674
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1675
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1676
|
+
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
1535
1677
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1536
1678
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1537
1679
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1538
1680
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1539
1681
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1540
1682
|
|
|
1541
|
-
i_chr = rb_intern("chr");
|
|
1542
|
-
i_aset = rb_intern("[]=");
|
|
1543
|
-
i_aref = rb_intern("[]");
|
|
1544
|
-
i_leftshift = rb_intern("<<");
|
|
1545
1683
|
i_new = rb_intern("new");
|
|
1546
1684
|
i_try_convert = rb_intern("try_convert");
|
|
1547
1685
|
i_uminus = rb_intern("-@");
|