json 2.13.2 → 2.19.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +98 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +47 -66
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +375 -552
- data/ext/json/ext/json.h +105 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +619 -474
- data/ext/json/ext/simd/simd.h +42 -22
- data/ext/json/ext/vendor/fpconv.c +13 -12
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +101 -33
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +126 -64
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +56 -1
- metadata +6 -3
|
@@ -1,50 +1,22 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma,
|
|
40
|
-
sym_decimal_class, sym_on_load,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
|
|
11
|
+
sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
|
|
12
|
+
sym_allow_duplicate_key;
|
|
41
13
|
|
|
42
14
|
static int binary_encindex;
|
|
43
15
|
static int utf8_encindex;
|
|
44
16
|
|
|
45
17
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
18
|
// For TruffleRuby
|
|
47
|
-
void
|
|
19
|
+
static void
|
|
48
20
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
21
|
{
|
|
50
22
|
long index = 0;
|
|
@@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
33
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
34
|
#endif
|
|
63
35
|
|
|
36
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
37
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
38
|
+
{
|
|
39
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
40
|
+
}
|
|
41
|
+
#endif
|
|
64
42
|
|
|
65
43
|
/* name cache */
|
|
66
44
|
|
|
@@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
84
|
cache->entries[index] = rstring;
|
|
107
85
|
}
|
|
108
86
|
|
|
109
|
-
|
|
87
|
+
#define rstring_cache_memcmp memcmp
|
|
88
|
+
|
|
89
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
90
|
+
#if __has_builtin(__builtin_bswap64)
|
|
91
|
+
#undef rstring_cache_memcmp
|
|
92
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
93
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
94
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
95
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
96
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
97
|
+
long i = 0;
|
|
98
|
+
|
|
99
|
+
for (; i + 8 <= length; i += 8) {
|
|
100
|
+
uint64_t a, b;
|
|
101
|
+
memcpy(&a, str + i, 8);
|
|
102
|
+
memcpy(&b, rptr + i, 8);
|
|
103
|
+
if (a != b) {
|
|
104
|
+
a = __builtin_bswap64(a);
|
|
105
|
+
b = __builtin_bswap64(b);
|
|
106
|
+
return (a < b) ? -1 : 1;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for (; i < length; i++) {
|
|
111
|
+
if (str[i] != rptr[i]) {
|
|
112
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
113
|
+
}
|
|
116
114
|
}
|
|
115
|
+
|
|
116
|
+
return 0;
|
|
117
117
|
}
|
|
118
|
+
#endif
|
|
119
|
+
#endif
|
|
118
120
|
|
|
119
|
-
static
|
|
121
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
122
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
123
|
+
const char *rstring_ptr;
|
|
124
|
+
long rstring_length;
|
|
125
|
+
|
|
126
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
127
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
128
|
+
if (length == rstring_length) {
|
|
129
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
130
|
+
} else {
|
|
131
|
+
return (int)(length - rstring_length);
|
|
132
132
|
}
|
|
133
|
+
}
|
|
133
134
|
|
|
135
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
136
|
+
{
|
|
134
137
|
int low = 0;
|
|
135
138
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
139
|
|
|
139
140
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
141
|
+
int mid = (high + low) >> 1;
|
|
141
142
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
143
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
144
|
|
|
144
|
-
if (
|
|
145
|
+
if (cmp == 0) {
|
|
145
146
|
return entry;
|
|
146
|
-
} else if (
|
|
147
|
+
} else if (cmp > 0) {
|
|
147
148
|
low = mid + 1;
|
|
148
149
|
} else {
|
|
149
150
|
high = mid - 1;
|
|
150
151
|
}
|
|
151
152
|
}
|
|
152
153
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
154
|
VALUE rstring = build_interned_string(str, length);
|
|
160
155
|
|
|
161
156
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
157
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
158
|
}
|
|
168
159
|
return rstring;
|
|
169
160
|
}
|
|
170
161
|
|
|
171
162
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
163
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
164
|
int low = 0;
|
|
187
165
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
166
|
|
|
191
167
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
168
|
+
int mid = (high + low) >> 1;
|
|
193
169
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
170
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
171
|
|
|
196
|
-
if (
|
|
172
|
+
if (cmp == 0) {
|
|
197
173
|
return entry;
|
|
198
|
-
} else if (
|
|
174
|
+
} else if (cmp > 0) {
|
|
199
175
|
low = mid + 1;
|
|
200
176
|
} else {
|
|
201
177
|
high = mid - 1;
|
|
202
178
|
}
|
|
203
179
|
}
|
|
204
180
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
181
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
182
|
|
|
213
183
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
184
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
185
|
}
|
|
220
186
|
return rsymbol;
|
|
221
187
|
}
|
|
@@ -330,15 +296,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
296
|
}
|
|
331
297
|
}
|
|
332
298
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
299
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
300
|
{
|
|
344
301
|
int len = 1;
|
|
@@ -379,7 +336,8 @@ typedef struct JSON_ParserStruct {
|
|
|
379
336
|
int max_nesting;
|
|
380
337
|
bool allow_nan;
|
|
381
338
|
bool allow_trailing_comma;
|
|
382
|
-
bool
|
|
339
|
+
bool allow_control_characters;
|
|
340
|
+
bool allow_invalid_escape;
|
|
383
341
|
bool symbolize_names;
|
|
384
342
|
bool freeze;
|
|
385
343
|
} JSON_ParserConfig;
|
|
@@ -395,6 +353,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
353
|
int current_nesting;
|
|
396
354
|
} JSON_ParserState;
|
|
397
355
|
|
|
356
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
357
|
+
return state->end - state->cursor;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
361
|
+
return state->cursor >= state->end;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
static inline char peek(JSON_ParserState *state)
|
|
365
|
+
{
|
|
366
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
367
|
+
return 0;
|
|
368
|
+
}
|
|
369
|
+
return *state->cursor;
|
|
370
|
+
}
|
|
371
|
+
|
|
398
372
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
373
|
{
|
|
400
374
|
const char *cursor = state->cursor;
|
|
@@ -428,14 +402,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
428
402
|
|
|
429
403
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
430
404
|
|
|
431
|
-
|
|
432
|
-
RBIMPL_ATTR_NORETURN()
|
|
433
|
-
#endif
|
|
434
|
-
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
405
|
+
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
|
|
435
406
|
{
|
|
436
407
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
437
|
-
long line, column;
|
|
438
|
-
cursor_position(state, &line, &column);
|
|
439
408
|
|
|
440
409
|
const char *ptr = "EOF";
|
|
441
410
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -470,17 +439,26 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
470
439
|
VALUE msg = rb_sprintf(format, ptr);
|
|
471
440
|
VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
|
|
472
441
|
RB_GC_GUARD(msg);
|
|
442
|
+
return message;
|
|
443
|
+
}
|
|
473
444
|
|
|
445
|
+
static VALUE parse_error_new(VALUE message, long line, long column)
|
|
446
|
+
{
|
|
474
447
|
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
|
475
448
|
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
|
476
449
|
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
|
477
|
-
|
|
450
|
+
return exc;
|
|
478
451
|
}
|
|
479
452
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
453
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
454
|
+
{
|
|
455
|
+
long line, column;
|
|
456
|
+
cursor_position(state, &line, &column);
|
|
457
|
+
VALUE message = build_parse_error_message(format, state, line, column);
|
|
458
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
484
462
|
{
|
|
485
463
|
state->cursor = at;
|
|
486
464
|
raise_parse_error(format, state);
|
|
@@ -505,23 +483,24 @@ static const signed char digit_values[256] = {
|
|
|
505
483
|
-1, -1, -1, -1, -1, -1, -1
|
|
506
484
|
};
|
|
507
485
|
|
|
508
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
509
|
-
{
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
486
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
487
|
+
{
|
|
488
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
489
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
493
|
+
|
|
494
|
+
const signed char b0 = digit_values[p[0]];
|
|
495
|
+
const signed char b1 = digit_values[p[1]];
|
|
496
|
+
const signed char b2 = digit_values[p[2]];
|
|
497
|
+
const signed char b3 = digit_values[p[3]];
|
|
498
|
+
|
|
499
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
500
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
525
504
|
}
|
|
526
505
|
|
|
527
506
|
#define GET_PARSER_CONFIG \
|
|
@@ -530,61 +509,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
509
|
|
|
531
510
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
511
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
512
|
static void
|
|
542
513
|
json_eat_comments(JSON_ParserState *state)
|
|
543
514
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
515
|
+
const char *start = state->cursor;
|
|
516
|
+
state->cursor++;
|
|
517
|
+
|
|
518
|
+
switch (peek(state)) {
|
|
519
|
+
case '/': {
|
|
520
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
521
|
+
if (!state->cursor) {
|
|
522
|
+
state->cursor = state->end;
|
|
523
|
+
} else {
|
|
524
|
+
state->cursor++;
|
|
554
525
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
526
|
+
break;
|
|
527
|
+
}
|
|
528
|
+
case '*': {
|
|
529
|
+
state->cursor++;
|
|
530
|
+
|
|
531
|
+
while (true) {
|
|
532
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
533
|
+
if (!next_match) {
|
|
534
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
state->cursor = next_match + 1;
|
|
538
|
+
if (peek(state) == '/') {
|
|
539
|
+
state->cursor++;
|
|
540
|
+
break;
|
|
568
541
|
}
|
|
569
|
-
break;
|
|
570
542
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
543
|
+
break;
|
|
574
544
|
}
|
|
575
|
-
|
|
576
|
-
|
|
545
|
+
default:
|
|
546
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
547
|
+
break;
|
|
577
548
|
}
|
|
578
549
|
}
|
|
579
550
|
|
|
580
|
-
static
|
|
551
|
+
ALWAYS_INLINE(static) void
|
|
581
552
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
553
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
554
|
+
while (true) {
|
|
555
|
+
switch (peek(state)) {
|
|
556
|
+
case ' ':
|
|
557
|
+
state->cursor++;
|
|
558
|
+
break;
|
|
559
|
+
case '\n':
|
|
560
|
+
state->cursor++;
|
|
561
|
+
|
|
562
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
563
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
564
|
+
while (rest(state) > 8) {
|
|
565
|
+
uint64_t chunk;
|
|
566
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
567
|
+
if (chunk == 0x2020202020202020) {
|
|
568
|
+
state->cursor += 8;
|
|
569
|
+
continue;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
573
|
+
state->cursor += consecutive_spaces;
|
|
574
|
+
break;
|
|
575
|
+
}
|
|
576
|
+
#endif
|
|
577
|
+
break;
|
|
578
|
+
case '\t':
|
|
579
|
+
case '\r':
|
|
580
|
+
state->cursor++;
|
|
581
|
+
break;
|
|
582
|
+
case '/':
|
|
583
|
+
json_eat_comments(state);
|
|
584
|
+
break;
|
|
585
|
+
|
|
586
|
+
default:
|
|
587
|
+
return;
|
|
588
588
|
}
|
|
589
589
|
}
|
|
590
590
|
}
|
|
@@ -615,11 +615,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
615
|
return result;
|
|
616
616
|
}
|
|
617
617
|
|
|
618
|
-
static inline
|
|
618
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
619
|
+
{
|
|
620
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
621
|
+
// Simple heuristics:
|
|
622
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
623
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
624
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
619
628
|
{
|
|
629
|
+
bool intern = is_name || config->freeze;
|
|
630
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
631
|
size_t bufferSize = stringEnd - string;
|
|
621
632
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
633
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
634
|
VALUE cached_key;
|
|
624
635
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
636
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,104 +646,129 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
646
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
647
|
}
|
|
637
648
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
649
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
650
|
+
typedef struct _json_unescape_positions {
|
|
651
|
+
long size;
|
|
652
|
+
const char **positions;
|
|
653
|
+
unsigned long additional_backslashes;
|
|
654
|
+
} JSON_UnescapePositions;
|
|
645
655
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
656
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
657
|
+
{
|
|
658
|
+
while (positions->size) {
|
|
659
|
+
positions->size--;
|
|
660
|
+
const char *next_position = positions->positions[0];
|
|
661
|
+
positions->positions++;
|
|
662
|
+
if (next_position >= pe) {
|
|
663
|
+
return next_position;
|
|
652
664
|
}
|
|
665
|
+
}
|
|
653
666
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
667
|
+
if (positions->additional_backslashes) {
|
|
668
|
+
positions->additional_backslashes--;
|
|
669
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
670
|
}
|
|
658
671
|
|
|
672
|
+
return NULL;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
676
|
+
{
|
|
677
|
+
bool intern = is_name || config->freeze;
|
|
678
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
679
|
+
size_t bufferSize = stringEnd - string;
|
|
680
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
681
|
+
char *buffer;
|
|
682
|
+
|
|
659
683
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
684
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
685
|
buffer = RSTRING_PTR(result);
|
|
662
686
|
bufferStart = buffer;
|
|
663
687
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
688
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
689
|
+
|
|
690
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
691
|
if (pe > p) {
|
|
668
692
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
693
|
buffer += pe - p;
|
|
670
694
|
}
|
|
671
695
|
switch (*++pe) {
|
|
696
|
+
case '"':
|
|
697
|
+
case '/':
|
|
698
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
699
|
+
break;
|
|
700
|
+
case '\\':
|
|
701
|
+
APPEND_CHAR('\\');
|
|
702
|
+
break;
|
|
672
703
|
case 'n':
|
|
673
|
-
|
|
704
|
+
APPEND_CHAR('\n');
|
|
674
705
|
break;
|
|
675
706
|
case 'r':
|
|
676
|
-
|
|
707
|
+
APPEND_CHAR('\r');
|
|
677
708
|
break;
|
|
678
709
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
710
|
+
APPEND_CHAR('\t');
|
|
686
711
|
break;
|
|
687
712
|
case 'b':
|
|
688
|
-
|
|
713
|
+
APPEND_CHAR('\b');
|
|
689
714
|
break;
|
|
690
715
|
case 'f':
|
|
691
|
-
|
|
716
|
+
APPEND_CHAR('\f');
|
|
692
717
|
break;
|
|
693
|
-
case 'u':
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
if (
|
|
712
|
-
raise_parse_error_at("
|
|
713
|
-
}
|
|
714
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
715
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
716
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
717
|
-
| (sur & 0x3FF));
|
|
718
|
-
pe += 5;
|
|
719
|
-
} else {
|
|
720
|
-
unescape = (char *) "?";
|
|
721
|
-
break;
|
|
718
|
+
case 'u': {
|
|
719
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
720
|
+
pe += 3;
|
|
721
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
722
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
723
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
724
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
725
|
+
* final codepoint.
|
|
726
|
+
*
|
|
727
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
728
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
729
|
+
* Area".
|
|
730
|
+
*/
|
|
731
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
732
|
+
pe++;
|
|
733
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
734
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
735
|
+
|
|
736
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
737
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
722
738
|
}
|
|
739
|
+
|
|
740
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
741
|
+
pe += 5;
|
|
742
|
+
} else {
|
|
743
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
744
|
+
break;
|
|
723
745
|
}
|
|
724
|
-
unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
725
|
-
unescape = buf;
|
|
726
746
|
}
|
|
747
|
+
|
|
748
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
749
|
+
buffer += unescape_len;
|
|
750
|
+
p = ++pe;
|
|
727
751
|
break;
|
|
752
|
+
}
|
|
728
753
|
default:
|
|
729
|
-
|
|
730
|
-
|
|
754
|
+
if ((unsigned char)*pe < 0x20) {
|
|
755
|
+
if (!config->allow_control_characters) {
|
|
756
|
+
if (*pe == '\n') {
|
|
757
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
758
|
+
}
|
|
759
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
if (config->allow_invalid_escape) {
|
|
764
|
+
APPEND_CHAR(*pe);
|
|
765
|
+
} else {
|
|
766
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
767
|
+
}
|
|
768
|
+
break;
|
|
731
769
|
}
|
|
732
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
733
|
-
buffer += unescape_len;
|
|
734
|
-
p = ++pe;
|
|
735
770
|
}
|
|
771
|
+
#undef APPEND_CHAR
|
|
736
772
|
|
|
737
773
|
if (stringEnd > p) {
|
|
738
774
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -743,81 +779,85 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
743
779
|
if (symbolize) {
|
|
744
780
|
result = rb_str_intern(result);
|
|
745
781
|
} else if (intern) {
|
|
746
|
-
result =
|
|
782
|
+
result = rb_str_to_interned_str(result);
|
|
747
783
|
}
|
|
748
784
|
|
|
749
785
|
return result;
|
|
750
786
|
}
|
|
751
787
|
|
|
752
788
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
753
|
-
|
|
754
|
-
{
|
|
755
|
-
bool negative = false;
|
|
756
|
-
if (*p == '-') {
|
|
757
|
-
negative = true;
|
|
758
|
-
p++;
|
|
759
|
-
}
|
|
789
|
+
#define MAX_NUMBER_STACK_BUFFER 128
|
|
760
790
|
|
|
761
|
-
|
|
762
|
-
while (p < pe) {
|
|
763
|
-
memo *= 10;
|
|
764
|
-
memo += *p - '0';
|
|
765
|
-
p++;
|
|
766
|
-
}
|
|
791
|
+
typedef VALUE (*json_number_decode_func_t)(const char *ptr);
|
|
767
792
|
|
|
768
|
-
|
|
769
|
-
|
|
793
|
+
static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
|
|
794
|
+
{
|
|
795
|
+
if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
|
|
796
|
+
char buffer[MAX_NUMBER_STACK_BUFFER];
|
|
797
|
+
MEMCPY(buffer, start, char, len);
|
|
798
|
+
buffer[len] = '\0';
|
|
799
|
+
return func(buffer);
|
|
800
|
+
} else {
|
|
801
|
+
VALUE buffer_v = rb_str_tmp_new(len);
|
|
802
|
+
char *buffer = RSTRING_PTR(buffer_v);
|
|
803
|
+
MEMCPY(buffer, start, char, len);
|
|
804
|
+
buffer[len] = '\0';
|
|
805
|
+
VALUE number = func(buffer);
|
|
806
|
+
RB_GC_GUARD(buffer_v);
|
|
807
|
+
return number;
|
|
770
808
|
}
|
|
771
|
-
return LL2NUM(memo);
|
|
772
809
|
}
|
|
773
810
|
|
|
774
|
-
static VALUE
|
|
811
|
+
static VALUE json_decode_inum(const char *buffer)
|
|
812
|
+
{
|
|
813
|
+
return rb_cstr2inum(buffer, 10);
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
|
|
775
817
|
{
|
|
776
|
-
|
|
777
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
778
|
-
MEMCPY(buffer, start, char, len);
|
|
779
|
-
buffer[len] = '\0';
|
|
780
|
-
VALUE number = rb_cstr2inum(buffer, 10);
|
|
781
|
-
RB_ALLOCV_END(buffer_v);
|
|
782
|
-
return number;
|
|
818
|
+
return json_decode_large_number(start, len, json_decode_inum);
|
|
783
819
|
}
|
|
784
820
|
|
|
785
|
-
static inline VALUE
|
|
786
|
-
json_decode_integer(const char *start, const char *end)
|
|
821
|
+
static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
787
822
|
{
|
|
788
|
-
|
|
789
|
-
if (
|
|
790
|
-
return
|
|
823
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
824
|
+
if (negative) {
|
|
825
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
791
826
|
}
|
|
792
|
-
return
|
|
827
|
+
return UINT64T2NUM(mantissa);
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
return json_decode_large_integer(start, end - start);
|
|
793
831
|
}
|
|
794
832
|
|
|
795
|
-
static VALUE
|
|
833
|
+
static VALUE json_decode_dnum(const char *buffer)
|
|
796
834
|
{
|
|
797
|
-
|
|
798
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
799
|
-
MEMCPY(buffer, start, char, len);
|
|
800
|
-
buffer[len] = '\0';
|
|
801
|
-
VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
802
|
-
RB_ALLOCV_END(buffer_v);
|
|
803
|
-
return number;
|
|
835
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
804
836
|
}
|
|
805
837
|
|
|
806
|
-
static VALUE
|
|
838
|
+
NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
|
|
807
839
|
{
|
|
808
|
-
|
|
840
|
+
return json_decode_large_number(start, len, json_decode_dnum);
|
|
841
|
+
}
|
|
809
842
|
|
|
843
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
844
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
845
|
+
*/
|
|
846
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
847
|
+
const char *start, const char *end)
|
|
848
|
+
{
|
|
810
849
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
811
|
-
VALUE text = rb_str_new(start,
|
|
850
|
+
VALUE text = rb_str_new(start, end - start);
|
|
812
851
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
813
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
814
|
-
char buffer[64];
|
|
815
|
-
MEMCPY(buffer, start, char, len);
|
|
816
|
-
buffer[len] = '\0';
|
|
817
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
818
|
-
} else {
|
|
819
|
-
return json_decode_large_float(start, len);
|
|
820
852
|
}
|
|
853
|
+
|
|
854
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
855
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
856
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
857
|
+
return json_decode_large_float(start, end - start);
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
821
861
|
}
|
|
822
862
|
|
|
823
863
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -849,7 +889,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
|
849
889
|
return Qfalse;
|
|
850
890
|
}
|
|
851
891
|
|
|
852
|
-
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
892
|
+
NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
853
893
|
{
|
|
854
894
|
VALUE message = rb_sprintf(
|
|
855
895
|
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
@@ -860,16 +900,18 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
|
|
|
860
900
|
RB_GC_GUARD(message);
|
|
861
901
|
}
|
|
862
902
|
|
|
863
|
-
|
|
864
|
-
RBIMPL_ATTR_NORETURN()
|
|
865
|
-
#endif
|
|
866
|
-
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
903
|
+
NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
867
904
|
{
|
|
868
905
|
VALUE message = rb_sprintf(
|
|
869
906
|
"duplicate key %"PRIsVALUE,
|
|
870
907
|
rb_inspect(duplicate_key)
|
|
871
908
|
);
|
|
872
909
|
|
|
910
|
+
long line, column;
|
|
911
|
+
cursor_position(state, &line, &column);
|
|
912
|
+
rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
|
|
913
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
914
|
+
|
|
873
915
|
raise_parse_error(RSTRING_PTR(message), state);
|
|
874
916
|
RB_GC_GUARD(message);
|
|
875
917
|
}
|
|
@@ -903,20 +945,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
903
945
|
return object;
|
|
904
946
|
}
|
|
905
947
|
|
|
906
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
907
|
-
{
|
|
908
|
-
VALUE string;
|
|
909
|
-
bool intern = is_name || config->freeze;
|
|
910
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
911
|
-
if (escaped) {
|
|
912
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
913
|
-
} else {
|
|
914
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
915
|
-
}
|
|
916
|
-
|
|
917
|
-
return string;
|
|
918
|
-
}
|
|
919
|
-
|
|
920
948
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
921
949
|
{
|
|
922
950
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -939,17 +967,11 @@ static const bool string_scan_table[256] = {
|
|
|
939
967
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
940
968
|
};
|
|
941
969
|
|
|
942
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
943
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
944
|
-
#else
|
|
945
|
-
#define FORCE_INLINE
|
|
946
|
-
#endif
|
|
947
|
-
|
|
948
970
|
#ifdef HAVE_SIMD
|
|
949
971
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
950
972
|
#endif /* HAVE_SIMD */
|
|
951
973
|
|
|
952
|
-
static
|
|
974
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
953
975
|
{
|
|
954
976
|
#ifdef HAVE_SIMD
|
|
955
977
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -957,7 +979,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
957
979
|
uint64_t mask = 0;
|
|
958
980
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
959
981
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
960
|
-
return
|
|
982
|
+
return true;
|
|
961
983
|
}
|
|
962
984
|
|
|
963
985
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -965,64 +987,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
965
987
|
int mask = 0;
|
|
966
988
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
967
989
|
state->cursor += trailing_zeros(mask);
|
|
968
|
-
return
|
|
990
|
+
return true;
|
|
969
991
|
}
|
|
970
992
|
}
|
|
971
993
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
972
994
|
#endif /* HAVE_SIMD */
|
|
973
995
|
|
|
974
|
-
while (state
|
|
996
|
+
while (!eos(state)) {
|
|
975
997
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
976
|
-
return
|
|
998
|
+
return true;
|
|
977
999
|
}
|
|
978
|
-
|
|
1000
|
+
state->cursor++;
|
|
979
1001
|
}
|
|
980
|
-
return
|
|
1002
|
+
return false;
|
|
981
1003
|
}
|
|
982
1004
|
|
|
983
|
-
static
|
|
1005
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
984
1006
|
{
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
1007
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
1008
|
+
JSON_UnescapePositions positions = {
|
|
1009
|
+
.size = 0,
|
|
1010
|
+
.positions = backslashes,
|
|
1011
|
+
.additional_backslashes = 0,
|
|
1012
|
+
};
|
|
988
1013
|
|
|
989
|
-
|
|
1014
|
+
do {
|
|
990
1015
|
switch (*state->cursor) {
|
|
991
1016
|
case '"': {
|
|
992
|
-
VALUE string =
|
|
1017
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
993
1018
|
state->cursor++;
|
|
994
1019
|
return json_push_value(state, config, string);
|
|
995
1020
|
}
|
|
996
1021
|
case '\\': {
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1022
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1023
|
+
backslashes[positions.size] = state->cursor;
|
|
1024
|
+
positions.size++;
|
|
1025
|
+
} else {
|
|
1026
|
+
positions.additional_backslashes++;
|
|
1001
1027
|
}
|
|
1028
|
+
state->cursor++;
|
|
1002
1029
|
break;
|
|
1003
1030
|
}
|
|
1004
1031
|
default:
|
|
1005
|
-
|
|
1032
|
+
if (!config->allow_control_characters) {
|
|
1033
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1034
|
+
}
|
|
1006
1035
|
break;
|
|
1007
1036
|
}
|
|
1008
1037
|
|
|
1009
1038
|
state->cursor++;
|
|
1010
|
-
}
|
|
1039
|
+
} while (string_scan(state));
|
|
1011
1040
|
|
|
1012
1041
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1013
1042
|
return Qfalse;
|
|
1014
1043
|
}
|
|
1015
1044
|
|
|
1045
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1046
|
+
{
|
|
1047
|
+
state->cursor++;
|
|
1048
|
+
const char *start = state->cursor;
|
|
1049
|
+
|
|
1050
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1051
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1055
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1056
|
+
state->cursor++;
|
|
1057
|
+
return json_push_value(state, config, string);
|
|
1058
|
+
}
|
|
1059
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1063
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1064
|
+
// Additional References:
|
|
1065
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1066
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1067
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1068
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1069
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1070
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1071
|
+
val -= 0x3030303030303030;
|
|
1072
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1073
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1074
|
+
return val;
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1078
|
+
const uint32_t mask = 0x000000FF;
|
|
1079
|
+
const uint32_t mul1 = 100;
|
|
1080
|
+
val -= 0x30303030;
|
|
1081
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1082
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1083
|
+
return val;
|
|
1084
|
+
}
|
|
1085
|
+
#endif
|
|
1086
|
+
|
|
1087
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1088
|
+
{
|
|
1089
|
+
const char *start = state->cursor;
|
|
1090
|
+
|
|
1091
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1092
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1093
|
+
uint64_t next_8bytes;
|
|
1094
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1095
|
+
|
|
1096
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1097
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1098
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1099
|
+
|
|
1100
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1101
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1102
|
+
state->cursor += 8;
|
|
1103
|
+
continue;
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1107
|
+
|
|
1108
|
+
if (consecutive_digits >= 4) {
|
|
1109
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1110
|
+
state->cursor += 4;
|
|
1111
|
+
consecutive_digits -= 4;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
while (consecutive_digits) {
|
|
1115
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1116
|
+
consecutive_digits--;
|
|
1117
|
+
state->cursor++;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
return (int)(state->cursor - start);
|
|
1121
|
+
}
|
|
1122
|
+
#endif
|
|
1123
|
+
|
|
1124
|
+
char next_char;
|
|
1125
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1126
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1127
|
+
state->cursor++;
|
|
1128
|
+
}
|
|
1129
|
+
return (int)(state->cursor - start);
|
|
1130
|
+
}
|
|
1131
|
+
|
|
1132
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1133
|
+
{
|
|
1134
|
+
bool integer = true;
|
|
1135
|
+
const char first_digit = *state->cursor;
|
|
1136
|
+
|
|
1137
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1138
|
+
int32_t exponent = 0;
|
|
1139
|
+
int decimal_point_pos = -1;
|
|
1140
|
+
uint64_t mantissa = 0;
|
|
1141
|
+
|
|
1142
|
+
// Parse integer part and extract mantissa digits
|
|
1143
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1144
|
+
|
|
1145
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1146
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
// Parse fractional part
|
|
1150
|
+
if (peek(state) == '.') {
|
|
1151
|
+
integer = false;
|
|
1152
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1153
|
+
state->cursor++;
|
|
1154
|
+
|
|
1155
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1156
|
+
mantissa_digits += fractional_digits;
|
|
1157
|
+
|
|
1158
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1159
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
// Parse exponent
|
|
1164
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1165
|
+
integer = false;
|
|
1166
|
+
state->cursor++;
|
|
1167
|
+
|
|
1168
|
+
bool negative_exponent = false;
|
|
1169
|
+
const char next_char = peek(state);
|
|
1170
|
+
if (next_char == '-' || next_char == '+') {
|
|
1171
|
+
negative_exponent = next_char == '-';
|
|
1172
|
+
state->cursor++;
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
uint64_t abs_exponent = 0;
|
|
1176
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1177
|
+
|
|
1178
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1179
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
if (integer) {
|
|
1186
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
// Adjust exponent based on decimal point position
|
|
1190
|
+
if (decimal_point_pos >= 0) {
|
|
1191
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1198
|
+
{
|
|
1199
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1203
|
+
{
|
|
1204
|
+
const char *start = state->cursor;
|
|
1205
|
+
state->cursor++;
|
|
1206
|
+
return json_parse_number(state, config, true, start);
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1016
1209
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1017
1210
|
{
|
|
1018
1211
|
json_eat_whitespace(state);
|
|
1019
|
-
if (state->cursor >= state->end) {
|
|
1020
|
-
raise_parse_error("unexpected end of input", state);
|
|
1021
|
-
}
|
|
1022
1212
|
|
|
1023
|
-
switch (
|
|
1213
|
+
switch (peek(state)) {
|
|
1024
1214
|
case 'n':
|
|
1025
|
-
if ((state
|
|
1215
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1026
1216
|
state->cursor += 4;
|
|
1027
1217
|
return json_push_value(state, config, Qnil);
|
|
1028
1218
|
}
|
|
@@ -1030,7 +1220,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1030
1220
|
raise_parse_error("unexpected token %s", state);
|
|
1031
1221
|
break;
|
|
1032
1222
|
case 't':
|
|
1033
|
-
if ((state
|
|
1223
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1034
1224
|
state->cursor += 4;
|
|
1035
1225
|
return json_push_value(state, config, Qtrue);
|
|
1036
1226
|
}
|
|
@@ -1039,7 +1229,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1039
1229
|
break;
|
|
1040
1230
|
case 'f':
|
|
1041
1231
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1042
|
-
if ((state
|
|
1232
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1043
1233
|
state->cursor += 5;
|
|
1044
1234
|
return json_push_value(state, config, Qfalse);
|
|
1045
1235
|
}
|
|
@@ -1048,7 +1238,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1048
1238
|
break;
|
|
1049
1239
|
case 'N':
|
|
1050
1240
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1051
|
-
if (config->allow_nan && (state
|
|
1241
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1052
1242
|
state->cursor += 3;
|
|
1053
1243
|
return json_push_value(state, config, CNaN);
|
|
1054
1244
|
}
|
|
@@ -1056,16 +1246,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1056
1246
|
raise_parse_error("unexpected token %s", state);
|
|
1057
1247
|
break;
|
|
1058
1248
|
case 'I':
|
|
1059
|
-
if (config->allow_nan && (state
|
|
1249
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1060
1250
|
state->cursor += 8;
|
|
1061
1251
|
return json_push_value(state, config, CInfinity);
|
|
1062
1252
|
}
|
|
1063
1253
|
|
|
1064
1254
|
raise_parse_error("unexpected token %s", state);
|
|
1065
1255
|
break;
|
|
1066
|
-
case '-':
|
|
1256
|
+
case '-': {
|
|
1067
1257
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1068
|
-
if ((state
|
|
1258
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1069
1259
|
if (config->allow_nan) {
|
|
1070
1260
|
state->cursor += 9;
|
|
1071
1261
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1073,62 +1263,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1073
1263
|
raise_parse_error("unexpected token %s", state);
|
|
1074
1264
|
}
|
|
1075
1265
|
}
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
bool integer = true;
|
|
1079
|
-
|
|
1080
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1081
|
-
const char *start = state->cursor;
|
|
1082
|
-
state->cursor++;
|
|
1083
|
-
|
|
1084
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1085
|
-
state->cursor++;
|
|
1086
|
-
}
|
|
1087
|
-
|
|
1088
|
-
long integer_length = state->cursor - start;
|
|
1089
|
-
|
|
1090
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1091
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1092
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1093
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1094
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1095
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1096
|
-
}
|
|
1097
|
-
|
|
1098
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1099
|
-
integer = false;
|
|
1100
|
-
state->cursor++;
|
|
1101
|
-
|
|
1102
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1103
|
-
raise_parse_error("invalid number: %s", state);
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1107
|
-
state->cursor++;
|
|
1108
|
-
}
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1112
|
-
integer = false;
|
|
1113
|
-
state->cursor++;
|
|
1114
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1115
|
-
state->cursor++;
|
|
1116
|
-
}
|
|
1117
|
-
|
|
1118
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1119
|
-
raise_parse_error("invalid number: %s", state);
|
|
1120
|
-
}
|
|
1121
|
-
|
|
1122
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1123
|
-
state->cursor++;
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
if (integer) {
|
|
1128
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1129
|
-
}
|
|
1130
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1266
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1267
|
+
break;
|
|
1131
1268
|
}
|
|
1269
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1270
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1271
|
+
break;
|
|
1132
1272
|
case '"': {
|
|
1133
1273
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1134
1274
|
return json_parse_string(state, config, false);
|
|
@@ -1139,7 +1279,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1139
1279
|
json_eat_whitespace(state);
|
|
1140
1280
|
long stack_head = state->stack->head;
|
|
1141
1281
|
|
|
1142
|
-
if ((state
|
|
1282
|
+
if (peek(state) == ']') {
|
|
1143
1283
|
state->cursor++;
|
|
1144
1284
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1145
1285
|
} else {
|
|
@@ -1154,26 +1294,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1154
1294
|
while (true) {
|
|
1155
1295
|
json_eat_whitespace(state);
|
|
1156
1296
|
|
|
1157
|
-
|
|
1158
|
-
if (*state->cursor == ']') {
|
|
1159
|
-
state->cursor++;
|
|
1160
|
-
long count = state->stack->head - stack_head;
|
|
1161
|
-
state->current_nesting--;
|
|
1162
|
-
state->in_array--;
|
|
1163
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1164
|
-
}
|
|
1297
|
+
const char next_char = peek(state);
|
|
1165
1298
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
}
|
|
1299
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1300
|
+
state->cursor++;
|
|
1301
|
+
if (config->allow_trailing_comma) {
|
|
1302
|
+
json_eat_whitespace(state);
|
|
1303
|
+
if (peek(state) == ']') {
|
|
1304
|
+
continue;
|
|
1173
1305
|
}
|
|
1174
|
-
json_parse_any(state, config);
|
|
1175
|
-
continue;
|
|
1176
1306
|
}
|
|
1307
|
+
json_parse_any(state, config);
|
|
1308
|
+
continue;
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
if (next_char == ']') {
|
|
1312
|
+
state->cursor++;
|
|
1313
|
+
long count = state->stack->head - stack_head;
|
|
1314
|
+
state->current_nesting--;
|
|
1315
|
+
state->in_array--;
|
|
1316
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1177
1317
|
}
|
|
1178
1318
|
|
|
1179
1319
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1187,7 +1327,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1187
1327
|
json_eat_whitespace(state);
|
|
1188
1328
|
long stack_head = state->stack->head;
|
|
1189
1329
|
|
|
1190
|
-
if ((state
|
|
1330
|
+
if (peek(state) == '}') {
|
|
1191
1331
|
state->cursor++;
|
|
1192
1332
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1193
1333
|
} else {
|
|
@@ -1196,13 +1336,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1196
1336
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1197
1337
|
}
|
|
1198
1338
|
|
|
1199
|
-
if (
|
|
1339
|
+
if (peek(state) != '"') {
|
|
1200
1340
|
raise_parse_error("expected object key, got %s", state);
|
|
1201
1341
|
}
|
|
1202
1342
|
json_parse_string(state, config, true);
|
|
1203
1343
|
|
|
1204
1344
|
json_eat_whitespace(state);
|
|
1205
|
-
if ((state
|
|
1345
|
+
if (peek(state) != ':') {
|
|
1206
1346
|
raise_parse_error("expected ':' after object key", state);
|
|
1207
1347
|
}
|
|
1208
1348
|
state->cursor++;
|
|
@@ -1213,46 +1353,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1213
1353
|
while (true) {
|
|
1214
1354
|
json_eat_whitespace(state);
|
|
1215
1355
|
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1356
|
+
const char next_char = peek(state);
|
|
1357
|
+
if (next_char == '}') {
|
|
1358
|
+
state->cursor++;
|
|
1359
|
+
state->current_nesting--;
|
|
1360
|
+
size_t count = state->stack->head - stack_head;
|
|
1221
1361
|
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1362
|
+
// Temporary rewind cursor in case an error is raised
|
|
1363
|
+
const char *final_cursor = state->cursor;
|
|
1364
|
+
state->cursor = object_start_cursor;
|
|
1365
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1366
|
+
state->cursor = final_cursor;
|
|
1227
1367
|
|
|
1228
|
-
|
|
1229
|
-
|
|
1368
|
+
return json_push_value(state, config, object);
|
|
1369
|
+
}
|
|
1230
1370
|
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1371
|
+
if (next_char == ',') {
|
|
1372
|
+
state->cursor++;
|
|
1373
|
+
json_eat_whitespace(state);
|
|
1234
1374
|
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
}
|
|
1375
|
+
if (config->allow_trailing_comma) {
|
|
1376
|
+
if (peek(state) == '}') {
|
|
1377
|
+
continue;
|
|
1239
1378
|
}
|
|
1379
|
+
}
|
|
1240
1380
|
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1381
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1382
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1383
|
+
}
|
|
1384
|
+
json_parse_string(state, config, true);
|
|
1245
1385
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1386
|
+
json_eat_whitespace(state);
|
|
1387
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1388
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1389
|
+
}
|
|
1390
|
+
state->cursor++;
|
|
1251
1391
|
|
|
1252
|
-
|
|
1392
|
+
json_parse_any(state, config);
|
|
1253
1393
|
|
|
1254
|
-
|
|
1255
|
-
}
|
|
1394
|
+
continue;
|
|
1256
1395
|
}
|
|
1257
1396
|
|
|
1258
1397
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1260,18 +1399,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1260
1399
|
break;
|
|
1261
1400
|
}
|
|
1262
1401
|
|
|
1402
|
+
case 0:
|
|
1403
|
+
raise_parse_error("unexpected end of input", state);
|
|
1404
|
+
break;
|
|
1405
|
+
|
|
1263
1406
|
default:
|
|
1264
1407
|
raise_parse_error("unexpected character: %s", state);
|
|
1265
1408
|
break;
|
|
1266
1409
|
}
|
|
1267
1410
|
|
|
1268
|
-
raise_parse_error("
|
|
1411
|
+
raise_parse_error("unreachable: %s", state);
|
|
1412
|
+
return Qundef;
|
|
1269
1413
|
}
|
|
1270
1414
|
|
|
1271
1415
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1272
1416
|
{
|
|
1273
1417
|
json_eat_whitespace(state);
|
|
1274
|
-
if (state
|
|
1418
|
+
if (!eos(state)) {
|
|
1275
1419
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1276
1420
|
}
|
|
1277
1421
|
}
|
|
@@ -1308,14 +1452,16 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1308
1452
|
{
|
|
1309
1453
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1310
1454
|
|
|
1311
|
-
if (key == sym_max_nesting)
|
|
1312
|
-
else if (key == sym_allow_nan)
|
|
1313
|
-
else if (key == sym_allow_trailing_comma)
|
|
1314
|
-
else if (key ==
|
|
1315
|
-
else if (key ==
|
|
1316
|
-
else if (key ==
|
|
1317
|
-
else if (key ==
|
|
1318
|
-
else if (key ==
|
|
1455
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1456
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1457
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1458
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1459
|
+
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1460
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1461
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1462
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1463
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1464
|
+
else if (key == sym_decimal_class) {
|
|
1319
1465
|
if (RTEST(val)) {
|
|
1320
1466
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1321
1467
|
config->decimal_class = val;
|
|
@@ -1388,6 +1534,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1388
1534
|
*/
|
|
1389
1535
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1390
1536
|
{
|
|
1537
|
+
rb_check_frozen(self);
|
|
1391
1538
|
GET_PARSER_CONFIG;
|
|
1392
1539
|
|
|
1393
1540
|
parser_config_init(config, opts);
|
|
@@ -1483,7 +1630,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1483
1630
|
JSON_ParserConfig_memsize,
|
|
1484
1631
|
},
|
|
1485
1632
|
0, 0,
|
|
1486
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1633
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1487
1634
|
};
|
|
1488
1635
|
|
|
1489
1636
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1527,16 +1674,14 @@ void Init_parser(void)
|
|
|
1527
1674
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1528
1675
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1529
1676
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1677
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1678
|
+
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
1530
1679
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1531
1680
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1532
1681
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1533
1682
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1534
1683
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1535
1684
|
|
|
1536
|
-
i_chr = rb_intern("chr");
|
|
1537
|
-
i_aset = rb_intern("[]=");
|
|
1538
|
-
i_aref = rb_intern("[]");
|
|
1539
|
-
i_leftshift = rb_intern("<<");
|
|
1540
1685
|
i_new = rb_intern("new");
|
|
1541
1686
|
i_try_convert = rb_intern("try_convert");
|
|
1542
1687
|
i_uminus = rb_intern("-@");
|