json 2.12.2 → 2.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +95 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +35 -56
- data/ext/json/ext/generator/extconf.rb +2 -26
- data/ext/json/ext/generator/generator.c +395 -359
- data/ext/json/ext/json.h +101 -0
- data/ext/json/ext/parser/extconf.rb +7 -2
- data/ext/json/ext/parser/parser.c +701 -445
- data/ext/json/ext/simd/conf.rb +24 -0
- data/ext/json/ext/simd/simd.h +218 -0
- data/ext/json/ext/vendor/fpconv.c +13 -12
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/json.gemspec +2 -3
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +78 -40
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +113 -63
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +76 -0
- metadata +8 -4
- data/ext/json/ext/generator/simd.h +0 -112
|
@@ -1,48 +1,21 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
23
|
-
#ifndef RB_UNLIKELY
|
|
24
|
-
#define RB_UNLIKELY(expr) expr
|
|
25
|
-
#endif
|
|
26
|
-
|
|
27
|
-
#ifndef RB_LIKELY
|
|
28
|
-
#define RB_LIKELY(expr) expr
|
|
29
|
-
#endif
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
3
|
+
#include "../simd/simd.h"
|
|
30
4
|
|
|
31
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
32
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
33
7
|
|
|
34
|
-
static ID
|
|
35
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
36
9
|
|
|
37
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
|
38
|
-
sym_decimal_class, sym_on_load;
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze,
|
|
11
|
+
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
39
12
|
|
|
40
13
|
static int binary_encindex;
|
|
41
14
|
static int utf8_encindex;
|
|
42
15
|
|
|
43
16
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
44
17
|
// For TruffleRuby
|
|
45
|
-
void
|
|
18
|
+
static void
|
|
46
19
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
47
20
|
{
|
|
48
21
|
long index = 0;
|
|
@@ -59,6 +32,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
59
32
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
60
33
|
#endif
|
|
61
34
|
|
|
35
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
36
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
37
|
+
{
|
|
38
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
39
|
+
}
|
|
40
|
+
#endif
|
|
62
41
|
|
|
63
42
|
/* name cache */
|
|
64
43
|
|
|
@@ -104,116 +83,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
104
83
|
cache->entries[index] = rstring;
|
|
105
84
|
}
|
|
106
85
|
|
|
107
|
-
|
|
86
|
+
#define rstring_cache_memcmp memcmp
|
|
87
|
+
|
|
88
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
89
|
+
#if __has_builtin(__builtin_bswap64)
|
|
90
|
+
#undef rstring_cache_memcmp
|
|
91
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
108
92
|
{
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
93
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
94
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
95
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
96
|
+
long i = 0;
|
|
97
|
+
|
|
98
|
+
for (; i + 8 <= length; i += 8) {
|
|
99
|
+
uint64_t a, b;
|
|
100
|
+
memcpy(&a, str + i, 8);
|
|
101
|
+
memcpy(&b, rptr + i, 8);
|
|
102
|
+
if (a != b) {
|
|
103
|
+
a = __builtin_bswap64(a);
|
|
104
|
+
b = __builtin_bswap64(b);
|
|
105
|
+
return (a < b) ? -1 : 1;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for (; i < length; i++) {
|
|
110
|
+
if (str[i] != rptr[i]) {
|
|
111
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
112
|
+
}
|
|
114
113
|
}
|
|
114
|
+
|
|
115
|
+
return 0;
|
|
115
116
|
}
|
|
117
|
+
#endif
|
|
118
|
+
#endif
|
|
116
119
|
|
|
117
|
-
static
|
|
120
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
118
121
|
{
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
// cache names above an arbitrary threshold.
|
|
122
|
-
return Qfalse;
|
|
123
|
-
}
|
|
122
|
+
const char *rstring_ptr;
|
|
123
|
+
long rstring_length;
|
|
124
124
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
125
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
|
+
|
|
127
|
+
if (length == rstring_length) {
|
|
128
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
129
|
+
} else {
|
|
130
|
+
return (int)(length - rstring_length);
|
|
130
131
|
}
|
|
132
|
+
}
|
|
131
133
|
|
|
134
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
135
|
+
{
|
|
132
136
|
int low = 0;
|
|
133
137
|
int high = cache->length - 1;
|
|
134
|
-
int mid = 0;
|
|
135
|
-
int last_cmp = 0;
|
|
136
138
|
|
|
137
139
|
while (low <= high) {
|
|
138
|
-
mid = (high + low) >> 1;
|
|
140
|
+
int mid = (high + low) >> 1;
|
|
139
141
|
VALUE entry = cache->entries[mid];
|
|
140
|
-
|
|
142
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
141
143
|
|
|
142
|
-
if (
|
|
144
|
+
if (cmp == 0) {
|
|
143
145
|
return entry;
|
|
144
|
-
} else if (
|
|
146
|
+
} else if (cmp > 0) {
|
|
145
147
|
low = mid + 1;
|
|
146
148
|
} else {
|
|
147
149
|
high = mid - 1;
|
|
148
150
|
}
|
|
149
151
|
}
|
|
150
152
|
|
|
151
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
152
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
153
|
-
// But if they do, we have to fallback to the slow path.
|
|
154
|
-
return Qfalse;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
153
|
VALUE rstring = build_interned_string(str, length);
|
|
158
154
|
|
|
159
155
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
160
|
-
|
|
161
|
-
mid += 1;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
156
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
165
157
|
}
|
|
166
158
|
return rstring;
|
|
167
159
|
}
|
|
168
160
|
|
|
169
161
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
170
162
|
{
|
|
171
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
172
|
-
// Common names aren't likely to be very long. So we just don't
|
|
173
|
-
// cache names above an arbitrary threshold.
|
|
174
|
-
return Qfalse;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
178
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
179
|
-
// we're much less likely to see this string again.
|
|
180
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
181
|
-
return Qfalse;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
163
|
int low = 0;
|
|
185
164
|
int high = cache->length - 1;
|
|
186
|
-
int mid = 0;
|
|
187
|
-
int last_cmp = 0;
|
|
188
165
|
|
|
189
166
|
while (low <= high) {
|
|
190
|
-
mid = (high + low) >> 1;
|
|
167
|
+
int mid = (high + low) >> 1;
|
|
191
168
|
VALUE entry = cache->entries[mid];
|
|
192
|
-
|
|
169
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
193
170
|
|
|
194
|
-
if (
|
|
171
|
+
if (cmp == 0) {
|
|
195
172
|
return entry;
|
|
196
|
-
} else if (
|
|
173
|
+
} else if (cmp > 0) {
|
|
197
174
|
low = mid + 1;
|
|
198
175
|
} else {
|
|
199
176
|
high = mid - 1;
|
|
200
177
|
}
|
|
201
178
|
}
|
|
202
179
|
|
|
203
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
204
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
205
|
-
// But if they do, we have to fallback to the slow path.
|
|
206
|
-
return Qfalse;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
180
|
VALUE rsymbol = build_symbol(str, length);
|
|
210
181
|
|
|
211
182
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
212
|
-
|
|
213
|
-
mid += 1;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
183
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
217
184
|
}
|
|
218
185
|
return rsymbol;
|
|
219
186
|
}
|
|
@@ -328,15 +295,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
328
295
|
}
|
|
329
296
|
}
|
|
330
297
|
|
|
331
|
-
|
|
332
|
-
#ifndef HAVE_STRNLEN
|
|
333
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
334
|
-
{
|
|
335
|
-
char *p;
|
|
336
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
337
|
-
}
|
|
338
|
-
#endif
|
|
339
|
-
|
|
340
298
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
341
299
|
{
|
|
342
300
|
int len = 1;
|
|
@@ -363,14 +321,21 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
|
363
321
|
return len;
|
|
364
322
|
}
|
|
365
323
|
|
|
324
|
+
enum duplicate_key_action {
|
|
325
|
+
JSON_DEPRECATED = 0,
|
|
326
|
+
JSON_IGNORE,
|
|
327
|
+
JSON_RAISE,
|
|
328
|
+
};
|
|
329
|
+
|
|
366
330
|
typedef struct JSON_ParserStruct {
|
|
367
331
|
VALUE on_load_proc;
|
|
368
332
|
VALUE decimal_class;
|
|
369
333
|
ID decimal_method_id;
|
|
334
|
+
enum duplicate_key_action on_duplicate_key;
|
|
370
335
|
int max_nesting;
|
|
371
336
|
bool allow_nan;
|
|
372
337
|
bool allow_trailing_comma;
|
|
373
|
-
bool
|
|
338
|
+
bool allow_control_characters;
|
|
374
339
|
bool symbolize_names;
|
|
375
340
|
bool freeze;
|
|
376
341
|
} JSON_ParserConfig;
|
|
@@ -386,15 +351,24 @@ typedef struct JSON_ParserStateStruct {
|
|
|
386
351
|
int current_nesting;
|
|
387
352
|
} JSON_ParserState;
|
|
388
353
|
|
|
354
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
355
|
+
return state->end - state->cursor;
|
|
356
|
+
}
|
|
389
357
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
static
|
|
358
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
359
|
+
return state->cursor >= state->end;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
static inline char peek(JSON_ParserState *state)
|
|
395
363
|
{
|
|
396
|
-
|
|
364
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
365
|
+
return 0;
|
|
366
|
+
}
|
|
367
|
+
return *state->cursor;
|
|
368
|
+
}
|
|
397
369
|
|
|
370
|
+
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
371
|
+
{
|
|
398
372
|
const char *cursor = state->cursor;
|
|
399
373
|
long column = 0;
|
|
400
374
|
long line = 1;
|
|
@@ -411,6 +385,29 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
411
385
|
line++;
|
|
412
386
|
}
|
|
413
387
|
}
|
|
388
|
+
*line_out = line;
|
|
389
|
+
*column_out = column;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
393
|
+
{
|
|
394
|
+
long line, column;
|
|
395
|
+
cursor_position(state, &line, &column);
|
|
396
|
+
|
|
397
|
+
VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
|
|
398
|
+
rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
402
|
+
|
|
403
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
|
404
|
+
RBIMPL_ATTR_NORETURN()
|
|
405
|
+
#endif
|
|
406
|
+
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
407
|
+
{
|
|
408
|
+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
409
|
+
long line, column;
|
|
410
|
+
cursor_position(state, &line, &column);
|
|
414
411
|
|
|
415
412
|
const char *ptr = "EOF";
|
|
416
413
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -480,23 +477,24 @@ static const signed char digit_values[256] = {
|
|
|
480
477
|
-1, -1, -1, -1, -1, -1, -1
|
|
481
478
|
};
|
|
482
479
|
|
|
483
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
480
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
484
481
|
{
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
482
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
483
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
487
|
+
|
|
488
|
+
const signed char b0 = digit_values[p[0]];
|
|
489
|
+
const signed char b1 = digit_values[p[1]];
|
|
490
|
+
const signed char b2 = digit_values[p[2]];
|
|
491
|
+
const signed char b3 = digit_values[p[3]];
|
|
492
|
+
|
|
493
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
494
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
500
498
|
}
|
|
501
499
|
|
|
502
500
|
#define GET_PARSER_CONFIG \
|
|
@@ -505,61 +503,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
505
503
|
|
|
506
504
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
507
505
|
|
|
508
|
-
static const bool whitespace[256] = {
|
|
509
|
-
[' '] = 1,
|
|
510
|
-
['\t'] = 1,
|
|
511
|
-
['\n'] = 1,
|
|
512
|
-
['\r'] = 1,
|
|
513
|
-
['/'] = 1,
|
|
514
|
-
};
|
|
515
|
-
|
|
516
506
|
static void
|
|
517
507
|
json_eat_comments(JSON_ParserState *state)
|
|
518
508
|
{
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
509
|
+
const char *start = state->cursor;
|
|
510
|
+
state->cursor++;
|
|
511
|
+
|
|
512
|
+
switch (peek(state)) {
|
|
513
|
+
case '/': {
|
|
514
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
515
|
+
if (!state->cursor) {
|
|
516
|
+
state->cursor = state->end;
|
|
517
|
+
} else {
|
|
518
|
+
state->cursor++;
|
|
529
519
|
}
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
520
|
+
break;
|
|
521
|
+
}
|
|
522
|
+
case '*': {
|
|
523
|
+
state->cursor++;
|
|
524
|
+
|
|
525
|
+
while (true) {
|
|
526
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
527
|
+
if (!next_match) {
|
|
528
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
state->cursor = next_match + 1;
|
|
532
|
+
if (peek(state) == '/') {
|
|
533
|
+
state->cursor++;
|
|
534
|
+
break;
|
|
543
535
|
}
|
|
544
|
-
break;
|
|
545
536
|
}
|
|
546
|
-
|
|
547
|
-
raise_parse_error("unexpected token %s", state);
|
|
548
|
-
break;
|
|
537
|
+
break;
|
|
549
538
|
}
|
|
550
|
-
|
|
551
|
-
|
|
539
|
+
default:
|
|
540
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
541
|
+
break;
|
|
552
542
|
}
|
|
553
543
|
}
|
|
554
544
|
|
|
555
|
-
static
|
|
545
|
+
ALWAYS_INLINE(static) void
|
|
556
546
|
json_eat_whitespace(JSON_ParserState *state)
|
|
557
547
|
{
|
|
558
|
-
while (
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
548
|
+
while (true) {
|
|
549
|
+
switch (peek(state)) {
|
|
550
|
+
case ' ':
|
|
551
|
+
state->cursor++;
|
|
552
|
+
break;
|
|
553
|
+
case '\n':
|
|
554
|
+
state->cursor++;
|
|
555
|
+
|
|
556
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
557
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
558
|
+
while (rest(state) > 8) {
|
|
559
|
+
uint64_t chunk;
|
|
560
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
561
|
+
if (chunk == 0x2020202020202020) {
|
|
562
|
+
state->cursor += 8;
|
|
563
|
+
continue;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
567
|
+
state->cursor += consecutive_spaces;
|
|
568
|
+
break;
|
|
569
|
+
}
|
|
570
|
+
#endif
|
|
571
|
+
break;
|
|
572
|
+
case '\t':
|
|
573
|
+
case '\r':
|
|
574
|
+
state->cursor++;
|
|
575
|
+
break;
|
|
576
|
+
case '/':
|
|
577
|
+
json_eat_comments(state);
|
|
578
|
+
break;
|
|
579
|
+
|
|
580
|
+
default:
|
|
581
|
+
return;
|
|
563
582
|
}
|
|
564
583
|
}
|
|
565
584
|
}
|
|
@@ -590,11 +609,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
590
609
|
return result;
|
|
591
610
|
}
|
|
592
611
|
|
|
593
|
-
static inline
|
|
612
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
594
613
|
{
|
|
614
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
615
|
+
// Simple heuristics:
|
|
616
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
617
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
618
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
622
|
+
{
|
|
623
|
+
bool intern = is_name || config->freeze;
|
|
624
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
595
625
|
size_t bufferSize = stringEnd - string;
|
|
596
626
|
|
|
597
|
-
if (is_name && state->in_array) {
|
|
627
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
598
628
|
VALUE cached_key;
|
|
599
629
|
if (RB_UNLIKELY(symbolize)) {
|
|
600
630
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -610,104 +640,125 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
610
640
|
return build_string(string, stringEnd, intern, symbolize);
|
|
611
641
|
}
|
|
612
642
|
|
|
613
|
-
|
|
614
|
-
{
|
|
615
|
-
|
|
616
|
-
const char
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
char buf[4];
|
|
643
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
644
|
+
typedef struct _json_unescape_positions {
|
|
645
|
+
long size;
|
|
646
|
+
const char **positions;
|
|
647
|
+
unsigned long additional_backslashes;
|
|
648
|
+
} JSON_UnescapePositions;
|
|
620
649
|
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
650
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
651
|
+
{
|
|
652
|
+
while (positions->size) {
|
|
653
|
+
positions->size--;
|
|
654
|
+
const char *next_position = positions->positions[0];
|
|
655
|
+
positions->positions++;
|
|
656
|
+
if (next_position >= pe) {
|
|
657
|
+
return next_position;
|
|
627
658
|
}
|
|
659
|
+
}
|
|
628
660
|
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
661
|
+
if (positions->additional_backslashes) {
|
|
662
|
+
positions->additional_backslashes--;
|
|
663
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
632
664
|
}
|
|
633
665
|
|
|
666
|
+
return NULL;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
670
|
+
{
|
|
671
|
+
bool intern = is_name || config->freeze;
|
|
672
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
673
|
+
size_t bufferSize = stringEnd - string;
|
|
674
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
675
|
+
char *buffer;
|
|
676
|
+
|
|
634
677
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
635
678
|
rb_enc_associate_index(result, utf8_encindex);
|
|
636
679
|
buffer = RSTRING_PTR(result);
|
|
637
680
|
bufferStart = buffer;
|
|
638
681
|
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
682
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
683
|
+
|
|
684
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
642
685
|
if (pe > p) {
|
|
643
686
|
MEMCPY(buffer, p, char, pe - p);
|
|
644
687
|
buffer += pe - p;
|
|
645
688
|
}
|
|
646
689
|
switch (*++pe) {
|
|
690
|
+
case '"':
|
|
691
|
+
case '/':
|
|
692
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
693
|
+
break;
|
|
694
|
+
case '\\':
|
|
695
|
+
APPEND_CHAR('\\');
|
|
696
|
+
break;
|
|
647
697
|
case 'n':
|
|
648
|
-
|
|
698
|
+
APPEND_CHAR('\n');
|
|
649
699
|
break;
|
|
650
700
|
case 'r':
|
|
651
|
-
|
|
701
|
+
APPEND_CHAR('\r');
|
|
652
702
|
break;
|
|
653
703
|
case 't':
|
|
654
|
-
|
|
655
|
-
break;
|
|
656
|
-
case '"':
|
|
657
|
-
unescape = (char *) "\"";
|
|
658
|
-
break;
|
|
659
|
-
case '\\':
|
|
660
|
-
unescape = (char *) "\\";
|
|
704
|
+
APPEND_CHAR('\t');
|
|
661
705
|
break;
|
|
662
706
|
case 'b':
|
|
663
|
-
|
|
707
|
+
APPEND_CHAR('\b');
|
|
664
708
|
break;
|
|
665
709
|
case 'f':
|
|
666
|
-
|
|
710
|
+
APPEND_CHAR('\f');
|
|
667
711
|
break;
|
|
668
|
-
case 'u':
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
if (
|
|
687
|
-
raise_parse_error_at("
|
|
688
|
-
}
|
|
689
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
690
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
691
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
692
|
-
| (sur & 0x3FF));
|
|
693
|
-
pe += 5;
|
|
694
|
-
} else {
|
|
695
|
-
unescape = (char *) "?";
|
|
696
|
-
break;
|
|
712
|
+
case 'u': {
|
|
713
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
714
|
+
pe += 3;
|
|
715
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
716
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
717
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
718
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
719
|
+
* final codepoint.
|
|
720
|
+
*
|
|
721
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
722
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
723
|
+
* Area".
|
|
724
|
+
*/
|
|
725
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
726
|
+
pe++;
|
|
727
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
728
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
729
|
+
|
|
730
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
731
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
697
732
|
}
|
|
733
|
+
|
|
734
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
735
|
+
pe += 5;
|
|
736
|
+
} else {
|
|
737
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
738
|
+
break;
|
|
698
739
|
}
|
|
699
|
-
unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
700
|
-
unescape = buf;
|
|
701
740
|
}
|
|
741
|
+
|
|
742
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
743
|
+
buffer += unescape_len;
|
|
744
|
+
p = ++pe;
|
|
702
745
|
break;
|
|
746
|
+
}
|
|
703
747
|
default:
|
|
704
|
-
|
|
705
|
-
|
|
748
|
+
if ((unsigned char)*pe < 0x20) {
|
|
749
|
+
if (!config->allow_control_characters) {
|
|
750
|
+
if (*pe == '\n') {
|
|
751
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
752
|
+
}
|
|
753
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
754
|
+
}
|
|
755
|
+
} else {
|
|
756
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
757
|
+
}
|
|
758
|
+
break;
|
|
706
759
|
}
|
|
707
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
708
|
-
buffer += unescape_len;
|
|
709
|
-
p = ++pe;
|
|
710
760
|
}
|
|
761
|
+
#undef APPEND_CHAR
|
|
711
762
|
|
|
712
763
|
if (stringEnd > p) {
|
|
713
764
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -718,33 +769,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
718
769
|
if (symbolize) {
|
|
719
770
|
result = rb_str_intern(result);
|
|
720
771
|
} else if (intern) {
|
|
721
|
-
result =
|
|
772
|
+
result = rb_str_to_interned_str(result);
|
|
722
773
|
}
|
|
723
774
|
|
|
724
775
|
return result;
|
|
725
776
|
}
|
|
726
777
|
|
|
727
778
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
728
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
729
|
-
{
|
|
730
|
-
bool negative = false;
|
|
731
|
-
if (*p == '-') {
|
|
732
|
-
negative = true;
|
|
733
|
-
p++;
|
|
734
|
-
}
|
|
735
|
-
|
|
736
|
-
long long memo = 0;
|
|
737
|
-
while (p < pe) {
|
|
738
|
-
memo *= 10;
|
|
739
|
-
memo += *p - '0';
|
|
740
|
-
p++;
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
if (negative) {
|
|
744
|
-
memo = -memo;
|
|
745
|
-
}
|
|
746
|
-
return LL2NUM(memo);
|
|
747
|
-
}
|
|
748
779
|
|
|
749
780
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
750
781
|
{
|
|
@@ -758,17 +789,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
758
789
|
}
|
|
759
790
|
|
|
760
791
|
static inline VALUE
|
|
761
|
-
json_decode_integer(const char *start, const char *end)
|
|
792
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
762
793
|
{
|
|
763
|
-
|
|
764
|
-
if (
|
|
765
|
-
return
|
|
794
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
795
|
+
if (negative) {
|
|
796
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
766
797
|
}
|
|
767
|
-
return
|
|
798
|
+
return UINT64T2NUM(mantissa);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return json_decode_large_integer(start, end - start);
|
|
768
802
|
}
|
|
769
803
|
|
|
770
804
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
771
805
|
{
|
|
806
|
+
if (RB_LIKELY(len < 64)) {
|
|
807
|
+
char buffer[64];
|
|
808
|
+
MEMCPY(buffer, start, char, len);
|
|
809
|
+
buffer[len] = '\0';
|
|
810
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
811
|
+
}
|
|
812
|
+
|
|
772
813
|
VALUE buffer_v;
|
|
773
814
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
774
815
|
MEMCPY(buffer, start, char, len);
|
|
@@ -778,21 +819,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
778
819
|
return number;
|
|
779
820
|
}
|
|
780
821
|
|
|
781
|
-
|
|
822
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
823
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
824
|
+
*/
|
|
825
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
826
|
+
const char *start, const char *end)
|
|
782
827
|
{
|
|
783
|
-
long len = end - start;
|
|
784
|
-
|
|
785
828
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
786
|
-
VALUE text = rb_str_new(start,
|
|
829
|
+
VALUE text = rb_str_new(start, end - start);
|
|
787
830
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
788
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
789
|
-
char buffer[64];
|
|
790
|
-
MEMCPY(buffer, start, char, len);
|
|
791
|
-
buffer[len] = '\0';
|
|
792
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
793
|
-
} else {
|
|
794
|
-
return json_decode_large_float(start, len);
|
|
795
831
|
}
|
|
832
|
+
|
|
833
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
834
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
835
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
836
|
+
return json_decode_large_float(start, end - start);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
796
840
|
}
|
|
797
841
|
|
|
798
842
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -807,32 +851,75 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
|
|
|
807
851
|
return array;
|
|
808
852
|
}
|
|
809
853
|
|
|
810
|
-
static
|
|
854
|
+
static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
811
855
|
{
|
|
812
|
-
VALUE
|
|
813
|
-
|
|
856
|
+
VALUE set = rb_hash_new_capa(count / 2);
|
|
857
|
+
for (size_t index = 0; index < count; index += 2) {
|
|
858
|
+
size_t before = RHASH_SIZE(set);
|
|
859
|
+
VALUE key = pairs[index];
|
|
860
|
+
rb_hash_aset(set, key, Qtrue);
|
|
861
|
+
if (RHASH_SIZE(set) == before) {
|
|
862
|
+
if (RB_SYMBOL_P(key)) {
|
|
863
|
+
return rb_sym2str(key);
|
|
864
|
+
}
|
|
865
|
+
return key;
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
return Qfalse;
|
|
869
|
+
}
|
|
814
870
|
|
|
815
|
-
|
|
871
|
+
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
872
|
+
{
|
|
873
|
+
VALUE message = rb_sprintf(
|
|
874
|
+
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
875
|
+
rb_inspect(duplicate_key)
|
|
876
|
+
);
|
|
816
877
|
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
878
|
+
emit_parse_warning(RSTRING_PTR(message), state);
|
|
879
|
+
RB_GC_GUARD(message);
|
|
880
|
+
}
|
|
820
881
|
|
|
821
|
-
|
|
882
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
|
883
|
+
RBIMPL_ATTR_NORETURN()
|
|
884
|
+
#endif
|
|
885
|
+
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
886
|
+
{
|
|
887
|
+
VALUE message = rb_sprintf(
|
|
888
|
+
"duplicate key %"PRIsVALUE,
|
|
889
|
+
rb_inspect(duplicate_key)
|
|
890
|
+
);
|
|
891
|
+
|
|
892
|
+
raise_parse_error(RSTRING_PTR(message), state);
|
|
893
|
+
RB_GC_GUARD(message);
|
|
822
894
|
}
|
|
823
895
|
|
|
824
|
-
static inline VALUE
|
|
896
|
+
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
|
825
897
|
{
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
898
|
+
size_t entries_count = count / 2;
|
|
899
|
+
VALUE object = rb_hash_new_capa(entries_count);
|
|
900
|
+
const VALUE *pairs = rvalue_stack_peek(state->stack, count);
|
|
901
|
+
rb_hash_bulk_insert(count, pairs, object);
|
|
902
|
+
|
|
903
|
+
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
|
|
904
|
+
switch (config->on_duplicate_key) {
|
|
905
|
+
case JSON_IGNORE:
|
|
906
|
+
break;
|
|
907
|
+
case JSON_DEPRECATED:
|
|
908
|
+
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
|
|
909
|
+
break;
|
|
910
|
+
case JSON_RAISE:
|
|
911
|
+
raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
|
|
912
|
+
break;
|
|
913
|
+
}
|
|
833
914
|
}
|
|
834
915
|
|
|
835
|
-
|
|
916
|
+
rvalue_stack_pop(state->stack, count);
|
|
917
|
+
|
|
918
|
+
if (config->freeze) {
|
|
919
|
+
RB_OBJ_FREEZE(object);
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
return object;
|
|
836
923
|
}
|
|
837
924
|
|
|
838
925
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
@@ -844,7 +931,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
|
|
|
844
931
|
return value;
|
|
845
932
|
}
|
|
846
933
|
|
|
847
|
-
static const bool
|
|
934
|
+
static const bool string_scan_table[256] = {
|
|
848
935
|
// ASCII Control Characters
|
|
849
936
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
850
937
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
@@ -857,51 +944,252 @@ static const bool string_scan[256] = {
|
|
|
857
944
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
858
945
|
};
|
|
859
946
|
|
|
860
|
-
|
|
947
|
+
#ifdef HAVE_SIMD
|
|
948
|
+
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
949
|
+
#endif /* HAVE_SIMD */
|
|
950
|
+
|
|
951
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
861
952
|
{
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
bool escaped = false;
|
|
953
|
+
#ifdef HAVE_SIMD
|
|
954
|
+
#if defined(HAVE_SIMD_NEON)
|
|
865
955
|
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
956
|
+
uint64_t mask = 0;
|
|
957
|
+
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
958
|
+
state->cursor += trailing_zeros64(mask) >> 2;
|
|
959
|
+
return true;
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
963
|
+
if (simd_impl == SIMD_SSE2) {
|
|
964
|
+
int mask = 0;
|
|
965
|
+
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
966
|
+
state->cursor += trailing_zeros(mask);
|
|
967
|
+
return true;
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
971
|
+
#endif /* HAVE_SIMD */
|
|
972
|
+
|
|
973
|
+
while (!eos(state)) {
|
|
974
|
+
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
975
|
+
return true;
|
|
976
|
+
}
|
|
977
|
+
state->cursor++;
|
|
978
|
+
}
|
|
979
|
+
return false;
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
983
|
+
{
|
|
984
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
985
|
+
JSON_UnescapePositions positions = {
|
|
986
|
+
.size = 0,
|
|
987
|
+
.positions = backslashes,
|
|
988
|
+
.additional_backslashes = 0,
|
|
989
|
+
};
|
|
990
|
+
|
|
991
|
+
do {
|
|
992
|
+
switch (*state->cursor) {
|
|
993
|
+
case '"': {
|
|
994
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
995
|
+
state->cursor++;
|
|
996
|
+
return json_push_value(state, config, string);
|
|
997
|
+
}
|
|
998
|
+
case '\\': {
|
|
999
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1000
|
+
backslashes[positions.size] = state->cursor;
|
|
1001
|
+
positions.size++;
|
|
1002
|
+
} else {
|
|
1003
|
+
positions.additional_backslashes++;
|
|
881
1004
|
}
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
break;
|
|
1005
|
+
state->cursor++;
|
|
1006
|
+
break;
|
|
885
1007
|
}
|
|
1008
|
+
default:
|
|
1009
|
+
if (!config->allow_control_characters) {
|
|
1010
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1011
|
+
}
|
|
1012
|
+
break;
|
|
886
1013
|
}
|
|
887
1014
|
|
|
888
1015
|
state->cursor++;
|
|
889
|
-
}
|
|
1016
|
+
} while (string_scan(state));
|
|
890
1017
|
|
|
891
1018
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
892
1019
|
return Qfalse;
|
|
893
1020
|
}
|
|
894
1021
|
|
|
1022
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1023
|
+
{
|
|
1024
|
+
state->cursor++;
|
|
1025
|
+
const char *start = state->cursor;
|
|
1026
|
+
|
|
1027
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1028
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1032
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1033
|
+
state->cursor++;
|
|
1034
|
+
return json_push_value(state, config, string);
|
|
1035
|
+
}
|
|
1036
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1040
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1041
|
+
// Additional References:
|
|
1042
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1043
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1044
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1045
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1046
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1047
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1048
|
+
val -= 0x3030303030303030;
|
|
1049
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1050
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1051
|
+
return val;
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1055
|
+
const uint32_t mask = 0x000000FF;
|
|
1056
|
+
const uint32_t mul1 = 100;
|
|
1057
|
+
val -= 0x30303030;
|
|
1058
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1059
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1060
|
+
return val;
|
|
1061
|
+
}
|
|
1062
|
+
#endif
|
|
1063
|
+
|
|
1064
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1065
|
+
{
|
|
1066
|
+
const char *start = state->cursor;
|
|
1067
|
+
|
|
1068
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1069
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1070
|
+
uint64_t next_8bytes;
|
|
1071
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1072
|
+
|
|
1073
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1074
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1075
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1076
|
+
|
|
1077
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1078
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1079
|
+
state->cursor += 8;
|
|
1080
|
+
continue;
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1084
|
+
|
|
1085
|
+
if (consecutive_digits >= 4) {
|
|
1086
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1087
|
+
state->cursor += 4;
|
|
1088
|
+
consecutive_digits -= 4;
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
while (consecutive_digits) {
|
|
1092
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1093
|
+
consecutive_digits--;
|
|
1094
|
+
state->cursor++;
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
return (int)(state->cursor - start);
|
|
1098
|
+
}
|
|
1099
|
+
#endif
|
|
1100
|
+
|
|
1101
|
+
char next_char;
|
|
1102
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1103
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1104
|
+
state->cursor++;
|
|
1105
|
+
}
|
|
1106
|
+
return (int)(state->cursor - start);
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1110
|
+
{
|
|
1111
|
+
bool integer = true;
|
|
1112
|
+
const char first_digit = *state->cursor;
|
|
1113
|
+
|
|
1114
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1115
|
+
int32_t exponent = 0;
|
|
1116
|
+
int decimal_point_pos = -1;
|
|
1117
|
+
uint64_t mantissa = 0;
|
|
1118
|
+
|
|
1119
|
+
// Parse integer part and extract mantissa digits
|
|
1120
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1121
|
+
|
|
1122
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1123
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
// Parse fractional part
|
|
1127
|
+
if (peek(state) == '.') {
|
|
1128
|
+
integer = false;
|
|
1129
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1130
|
+
state->cursor++;
|
|
1131
|
+
|
|
1132
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1133
|
+
mantissa_digits += fractional_digits;
|
|
1134
|
+
|
|
1135
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1136
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
// Parse exponent
|
|
1141
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1142
|
+
integer = false;
|
|
1143
|
+
state->cursor++;
|
|
1144
|
+
|
|
1145
|
+
bool negative_exponent = false;
|
|
1146
|
+
const char next_char = peek(state);
|
|
1147
|
+
if (next_char == '-' || next_char == '+') {
|
|
1148
|
+
negative_exponent = next_char == '-';
|
|
1149
|
+
state->cursor++;
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
uint64_t abs_exponent = 0;
|
|
1153
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1154
|
+
|
|
1155
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1156
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
if (integer) {
|
|
1163
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
// Adjust exponent based on decimal point position
|
|
1167
|
+
if (decimal_point_pos >= 0) {
|
|
1168
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1175
|
+
{
|
|
1176
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1180
|
+
{
|
|
1181
|
+
const char *start = state->cursor;
|
|
1182
|
+
state->cursor++;
|
|
1183
|
+
return json_parse_number(state, config, true, start);
|
|
1184
|
+
}
|
|
1185
|
+
|
|
895
1186
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
896
1187
|
{
|
|
897
1188
|
json_eat_whitespace(state);
|
|
898
|
-
if (state->cursor >= state->end) {
|
|
899
|
-
raise_parse_error("unexpected end of input", state);
|
|
900
|
-
}
|
|
901
1189
|
|
|
902
|
-
switch (
|
|
1190
|
+
switch (peek(state)) {
|
|
903
1191
|
case 'n':
|
|
904
|
-
if ((state
|
|
1192
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
905
1193
|
state->cursor += 4;
|
|
906
1194
|
return json_push_value(state, config, Qnil);
|
|
907
1195
|
}
|
|
@@ -909,7 +1197,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
909
1197
|
raise_parse_error("unexpected token %s", state);
|
|
910
1198
|
break;
|
|
911
1199
|
case 't':
|
|
912
|
-
if ((state
|
|
1200
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
913
1201
|
state->cursor += 4;
|
|
914
1202
|
return json_push_value(state, config, Qtrue);
|
|
915
1203
|
}
|
|
@@ -918,7 +1206,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
918
1206
|
break;
|
|
919
1207
|
case 'f':
|
|
920
1208
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
921
|
-
if ((state
|
|
1209
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
922
1210
|
state->cursor += 5;
|
|
923
1211
|
return json_push_value(state, config, Qfalse);
|
|
924
1212
|
}
|
|
@@ -927,7 +1215,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
927
1215
|
break;
|
|
928
1216
|
case 'N':
|
|
929
1217
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
930
|
-
if (config->allow_nan && (state
|
|
1218
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
931
1219
|
state->cursor += 3;
|
|
932
1220
|
return json_push_value(state, config, CNaN);
|
|
933
1221
|
}
|
|
@@ -935,16 +1223,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
935
1223
|
raise_parse_error("unexpected token %s", state);
|
|
936
1224
|
break;
|
|
937
1225
|
case 'I':
|
|
938
|
-
if (config->allow_nan && (state
|
|
1226
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
939
1227
|
state->cursor += 8;
|
|
940
1228
|
return json_push_value(state, config, CInfinity);
|
|
941
1229
|
}
|
|
942
1230
|
|
|
943
1231
|
raise_parse_error("unexpected token %s", state);
|
|
944
1232
|
break;
|
|
945
|
-
case '-':
|
|
1233
|
+
case '-': {
|
|
946
1234
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
947
|
-
if ((state
|
|
1235
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
948
1236
|
if (config->allow_nan) {
|
|
949
1237
|
state->cursor += 9;
|
|
950
1238
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -952,62 +1240,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
952
1240
|
raise_parse_error("unexpected token %s", state);
|
|
953
1241
|
}
|
|
954
1242
|
}
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
bool integer = true;
|
|
958
|
-
|
|
959
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
960
|
-
const char *start = state->cursor;
|
|
961
|
-
state->cursor++;
|
|
962
|
-
|
|
963
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
964
|
-
state->cursor++;
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
long integer_length = state->cursor - start;
|
|
968
|
-
|
|
969
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
970
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
971
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
972
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
973
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
974
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
975
|
-
}
|
|
976
|
-
|
|
977
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
978
|
-
integer = false;
|
|
979
|
-
state->cursor++;
|
|
980
|
-
|
|
981
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
982
|
-
raise_parse_error("invalid number: %s", state);
|
|
983
|
-
}
|
|
984
|
-
|
|
985
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
986
|
-
state->cursor++;
|
|
987
|
-
}
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
991
|
-
integer = false;
|
|
992
|
-
state->cursor++;
|
|
993
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
994
|
-
state->cursor++;
|
|
995
|
-
}
|
|
996
|
-
|
|
997
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
998
|
-
raise_parse_error("invalid number: %s", state);
|
|
999
|
-
}
|
|
1000
|
-
|
|
1001
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1002
|
-
state->cursor++;
|
|
1003
|
-
}
|
|
1004
|
-
}
|
|
1005
|
-
|
|
1006
|
-
if (integer) {
|
|
1007
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1008
|
-
}
|
|
1009
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1243
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1244
|
+
break;
|
|
1010
1245
|
}
|
|
1246
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1247
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1248
|
+
break;
|
|
1011
1249
|
case '"': {
|
|
1012
1250
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1013
1251
|
return json_parse_string(state, config, false);
|
|
@@ -1018,7 +1256,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1018
1256
|
json_eat_whitespace(state);
|
|
1019
1257
|
long stack_head = state->stack->head;
|
|
1020
1258
|
|
|
1021
|
-
if ((state
|
|
1259
|
+
if (peek(state) == ']') {
|
|
1022
1260
|
state->cursor++;
|
|
1023
1261
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1024
1262
|
} else {
|
|
@@ -1033,26 +1271,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1033
1271
|
while (true) {
|
|
1034
1272
|
json_eat_whitespace(state);
|
|
1035
1273
|
|
|
1036
|
-
|
|
1037
|
-
if (*state->cursor == ']') {
|
|
1038
|
-
state->cursor++;
|
|
1039
|
-
long count = state->stack->head - stack_head;
|
|
1040
|
-
state->current_nesting--;
|
|
1041
|
-
state->in_array--;
|
|
1042
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1043
|
-
}
|
|
1274
|
+
const char next_char = peek(state);
|
|
1044
1275
|
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
}
|
|
1276
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1277
|
+
state->cursor++;
|
|
1278
|
+
if (config->allow_trailing_comma) {
|
|
1279
|
+
json_eat_whitespace(state);
|
|
1280
|
+
if (peek(state) == ']') {
|
|
1281
|
+
continue;
|
|
1052
1282
|
}
|
|
1053
|
-
json_parse_any(state, config);
|
|
1054
|
-
continue;
|
|
1055
1283
|
}
|
|
1284
|
+
json_parse_any(state, config);
|
|
1285
|
+
continue;
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
if (next_char == ']') {
|
|
1289
|
+
state->cursor++;
|
|
1290
|
+
long count = state->stack->head - stack_head;
|
|
1291
|
+
state->current_nesting--;
|
|
1292
|
+
state->in_array--;
|
|
1293
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1056
1294
|
}
|
|
1057
1295
|
|
|
1058
1296
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1060,11 +1298,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1060
1298
|
break;
|
|
1061
1299
|
}
|
|
1062
1300
|
case '{': {
|
|
1301
|
+
const char *object_start_cursor = state->cursor;
|
|
1302
|
+
|
|
1063
1303
|
state->cursor++;
|
|
1064
1304
|
json_eat_whitespace(state);
|
|
1065
1305
|
long stack_head = state->stack->head;
|
|
1066
1306
|
|
|
1067
|
-
if ((state
|
|
1307
|
+
if (peek(state) == '}') {
|
|
1068
1308
|
state->cursor++;
|
|
1069
1309
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1070
1310
|
} else {
|
|
@@ -1073,13 +1313,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1073
1313
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1074
1314
|
}
|
|
1075
1315
|
|
|
1076
|
-
if (
|
|
1316
|
+
if (peek(state) != '"') {
|
|
1077
1317
|
raise_parse_error("expected object key, got %s", state);
|
|
1078
1318
|
}
|
|
1079
1319
|
json_parse_string(state, config, true);
|
|
1080
1320
|
|
|
1081
1321
|
json_eat_whitespace(state);
|
|
1082
|
-
if ((state
|
|
1322
|
+
if (peek(state) != ':') {
|
|
1083
1323
|
raise_parse_error("expected ':' after object key", state);
|
|
1084
1324
|
}
|
|
1085
1325
|
state->cursor++;
|
|
@@ -1090,39 +1330,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1090
1330
|
while (true) {
|
|
1091
1331
|
json_eat_whitespace(state);
|
|
1092
1332
|
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
return json_push_value(state, config, json_decode_object(state, config, count));
|
|
1099
|
-
}
|
|
1333
|
+
const char next_char = peek(state);
|
|
1334
|
+
if (next_char == '}') {
|
|
1335
|
+
state->cursor++;
|
|
1336
|
+
state->current_nesting--;
|
|
1337
|
+
size_t count = state->stack->head - stack_head;
|
|
1100
1338
|
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1339
|
+
// Temporary rewind cursor in case an error is raised
|
|
1340
|
+
const char *final_cursor = state->cursor;
|
|
1341
|
+
state->cursor = object_start_cursor;
|
|
1342
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1343
|
+
state->cursor = final_cursor;
|
|
1104
1344
|
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
continue;
|
|
1108
|
-
}
|
|
1109
|
-
}
|
|
1345
|
+
return json_push_value(state, config, object);
|
|
1346
|
+
}
|
|
1110
1347
|
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
json_parse_string(state, config, true);
|
|
1348
|
+
if (next_char == ',') {
|
|
1349
|
+
state->cursor++;
|
|
1350
|
+
json_eat_whitespace(state);
|
|
1115
1351
|
|
|
1116
|
-
|
|
1117
|
-
if ((state
|
|
1118
|
-
|
|
1352
|
+
if (config->allow_trailing_comma) {
|
|
1353
|
+
if (peek(state) == '}') {
|
|
1354
|
+
continue;
|
|
1119
1355
|
}
|
|
1120
|
-
|
|
1356
|
+
}
|
|
1121
1357
|
|
|
1122
|
-
|
|
1358
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1359
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1360
|
+
}
|
|
1361
|
+
json_parse_string(state, config, true);
|
|
1123
1362
|
|
|
1124
|
-
|
|
1363
|
+
json_eat_whitespace(state);
|
|
1364
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1365
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1125
1366
|
}
|
|
1367
|
+
state->cursor++;
|
|
1368
|
+
|
|
1369
|
+
json_parse_any(state, config);
|
|
1370
|
+
|
|
1371
|
+
continue;
|
|
1126
1372
|
}
|
|
1127
1373
|
|
|
1128
1374
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1130,18 +1376,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1130
1376
|
break;
|
|
1131
1377
|
}
|
|
1132
1378
|
|
|
1379
|
+
case 0:
|
|
1380
|
+
raise_parse_error("unexpected end of input", state);
|
|
1381
|
+
break;
|
|
1382
|
+
|
|
1133
1383
|
default:
|
|
1134
1384
|
raise_parse_error("unexpected character: %s", state);
|
|
1135
1385
|
break;
|
|
1136
1386
|
}
|
|
1137
1387
|
|
|
1138
|
-
raise_parse_error("
|
|
1388
|
+
raise_parse_error("unreachable: %s", state);
|
|
1389
|
+
return Qundef;
|
|
1139
1390
|
}
|
|
1140
1391
|
|
|
1141
1392
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1142
1393
|
{
|
|
1143
1394
|
json_eat_whitespace(state);
|
|
1144
|
-
if (state
|
|
1395
|
+
if (!eos(state)) {
|
|
1145
1396
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1146
1397
|
}
|
|
1147
1398
|
}
|
|
@@ -1178,13 +1429,15 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1178
1429
|
{
|
|
1179
1430
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1180
1431
|
|
|
1181
|
-
if (key == sym_max_nesting)
|
|
1182
|
-
else if (key == sym_allow_nan)
|
|
1183
|
-
else if (key == sym_allow_trailing_comma)
|
|
1184
|
-
else if (key ==
|
|
1185
|
-
else if (key ==
|
|
1186
|
-
else if (key ==
|
|
1187
|
-
else if (key ==
|
|
1432
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1433
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1434
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1435
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1436
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1437
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1438
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1439
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1440
|
+
else if (key == sym_decimal_class) {
|
|
1188
1441
|
if (RTEST(val)) {
|
|
1189
1442
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1190
1443
|
config->decimal_class = val;
|
|
@@ -1257,6 +1510,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1257
1510
|
*/
|
|
1258
1511
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1259
1512
|
{
|
|
1513
|
+
rb_check_frozen(self);
|
|
1260
1514
|
GET_PARSER_CONFIG;
|
|
1261
1515
|
|
|
1262
1516
|
parser_config_init(config, opts);
|
|
@@ -1352,7 +1606,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1352
1606
|
JSON_ParserConfig_memsize,
|
|
1353
1607
|
},
|
|
1354
1608
|
0, 0,
|
|
1355
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1609
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1356
1610
|
};
|
|
1357
1611
|
|
|
1358
1612
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1396,15 +1650,13 @@ void Init_parser(void)
|
|
|
1396
1650
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1397
1651
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1398
1652
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1653
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1399
1654
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1400
1655
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1401
1656
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1402
1657
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1658
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1403
1659
|
|
|
1404
|
-
i_chr = rb_intern("chr");
|
|
1405
|
-
i_aset = rb_intern("[]=");
|
|
1406
|
-
i_aref = rb_intern("[]");
|
|
1407
|
-
i_leftshift = rb_intern("<<");
|
|
1408
1660
|
i_new = rb_intern("new");
|
|
1409
1661
|
i_try_convert = rb_intern("try_convert");
|
|
1410
1662
|
i_uminus = rb_intern("-@");
|
|
@@ -1413,4 +1665,8 @@ void Init_parser(void)
|
|
|
1413
1665
|
binary_encindex = rb_ascii8bit_encindex();
|
|
1414
1666
|
utf8_encindex = rb_utf8_encindex();
|
|
1415
1667
|
enc_utf8 = rb_utf8_encoding();
|
|
1668
|
+
|
|
1669
|
+
#ifdef HAVE_SIMD
|
|
1670
|
+
simd_impl = find_simd_implementation();
|
|
1671
|
+
#endif
|
|
1416
1672
|
}
|