json 2.13.1 → 2.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +98 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +47 -66
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +375 -552
- data/ext/json/ext/json.h +105 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +661 -473
- data/ext/json/ext/simd/simd.h +81 -60
- data/ext/json/ext/vendor/fpconv.c +13 -12
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +118 -49
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +126 -64
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +56 -1
- metadata +6 -3
|
@@ -1,50 +1,22 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma,
|
|
40
|
-
sym_decimal_class, sym_on_load,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
|
|
11
|
+
sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
|
|
12
|
+
sym_allow_duplicate_key;
|
|
41
13
|
|
|
42
14
|
static int binary_encindex;
|
|
43
15
|
static int utf8_encindex;
|
|
44
16
|
|
|
45
17
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
18
|
// For TruffleRuby
|
|
47
|
-
void
|
|
19
|
+
static void
|
|
48
20
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
21
|
{
|
|
50
22
|
long index = 0;
|
|
@@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
33
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
34
|
#endif
|
|
63
35
|
|
|
36
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
37
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
38
|
+
{
|
|
39
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
40
|
+
}
|
|
41
|
+
#endif
|
|
64
42
|
|
|
65
43
|
/* name cache */
|
|
66
44
|
|
|
@@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
84
|
cache->entries[index] = rstring;
|
|
107
85
|
}
|
|
108
86
|
|
|
109
|
-
|
|
87
|
+
#define rstring_cache_memcmp memcmp
|
|
88
|
+
|
|
89
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
90
|
+
#if __has_builtin(__builtin_bswap64)
|
|
91
|
+
#undef rstring_cache_memcmp
|
|
92
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
93
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
94
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
95
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
96
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
97
|
+
long i = 0;
|
|
98
|
+
|
|
99
|
+
for (; i + 8 <= length; i += 8) {
|
|
100
|
+
uint64_t a, b;
|
|
101
|
+
memcpy(&a, str + i, 8);
|
|
102
|
+
memcpy(&b, rptr + i, 8);
|
|
103
|
+
if (a != b) {
|
|
104
|
+
a = __builtin_bswap64(a);
|
|
105
|
+
b = __builtin_bswap64(b);
|
|
106
|
+
return (a < b) ? -1 : 1;
|
|
107
|
+
}
|
|
116
108
|
}
|
|
109
|
+
|
|
110
|
+
for (; i < length; i++) {
|
|
111
|
+
if (str[i] != rptr[i]) {
|
|
112
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return 0;
|
|
117
117
|
}
|
|
118
|
+
#endif
|
|
119
|
+
#endif
|
|
118
120
|
|
|
119
|
-
static
|
|
121
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
122
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
123
|
+
const char *rstring_ptr;
|
|
124
|
+
long rstring_length;
|
|
125
|
+
|
|
126
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
127
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
128
|
+
if (length == rstring_length) {
|
|
129
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
130
|
+
} else {
|
|
131
|
+
return (int)(length - rstring_length);
|
|
132
132
|
}
|
|
133
|
+
}
|
|
133
134
|
|
|
135
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
136
|
+
{
|
|
134
137
|
int low = 0;
|
|
135
138
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
139
|
|
|
139
140
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
141
|
+
int mid = (high + low) >> 1;
|
|
141
142
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
143
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
144
|
|
|
144
|
-
if (
|
|
145
|
+
if (cmp == 0) {
|
|
145
146
|
return entry;
|
|
146
|
-
} else if (
|
|
147
|
+
} else if (cmp > 0) {
|
|
147
148
|
low = mid + 1;
|
|
148
149
|
} else {
|
|
149
150
|
high = mid - 1;
|
|
150
151
|
}
|
|
151
152
|
}
|
|
152
153
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
154
|
VALUE rstring = build_interned_string(str, length);
|
|
160
155
|
|
|
161
156
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
157
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
158
|
}
|
|
168
159
|
return rstring;
|
|
169
160
|
}
|
|
170
161
|
|
|
171
162
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
163
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
164
|
int low = 0;
|
|
187
165
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
166
|
|
|
191
167
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
168
|
+
int mid = (high + low) >> 1;
|
|
193
169
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
170
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
171
|
|
|
196
|
-
if (
|
|
172
|
+
if (cmp == 0) {
|
|
197
173
|
return entry;
|
|
198
|
-
} else if (
|
|
174
|
+
} else if (cmp > 0) {
|
|
199
175
|
low = mid + 1;
|
|
200
176
|
} else {
|
|
201
177
|
high = mid - 1;
|
|
202
178
|
}
|
|
203
179
|
}
|
|
204
180
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
181
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
182
|
|
|
213
183
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
184
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
185
|
}
|
|
220
186
|
return rsymbol;
|
|
221
187
|
}
|
|
@@ -330,15 +296,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
296
|
}
|
|
331
297
|
}
|
|
332
298
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
299
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
300
|
{
|
|
344
301
|
int len = 1;
|
|
@@ -379,7 +336,8 @@ typedef struct JSON_ParserStruct {
|
|
|
379
336
|
int max_nesting;
|
|
380
337
|
bool allow_nan;
|
|
381
338
|
bool allow_trailing_comma;
|
|
382
|
-
bool
|
|
339
|
+
bool allow_control_characters;
|
|
340
|
+
bool allow_invalid_escape;
|
|
383
341
|
bool symbolize_names;
|
|
384
342
|
bool freeze;
|
|
385
343
|
} JSON_ParserConfig;
|
|
@@ -395,6 +353,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
353
|
int current_nesting;
|
|
396
354
|
} JSON_ParserState;
|
|
397
355
|
|
|
356
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
357
|
+
return state->end - state->cursor;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
361
|
+
return state->cursor >= state->end;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
static inline char peek(JSON_ParserState *state)
|
|
365
|
+
{
|
|
366
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
367
|
+
return 0;
|
|
368
|
+
}
|
|
369
|
+
return *state->cursor;
|
|
370
|
+
}
|
|
371
|
+
|
|
398
372
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
373
|
{
|
|
400
374
|
const char *cursor = state->cursor;
|
|
@@ -422,18 +396,15 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
422
396
|
long line, column;
|
|
423
397
|
cursor_position(state, &line, &column);
|
|
424
398
|
|
|
425
|
-
|
|
399
|
+
VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
|
|
400
|
+
rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
|
|
426
401
|
}
|
|
427
402
|
|
|
428
403
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
#endif
|
|
432
|
-
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
404
|
+
|
|
405
|
+
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
|
|
433
406
|
{
|
|
434
407
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
435
|
-
long line, column;
|
|
436
|
-
cursor_position(state, &line, &column);
|
|
437
408
|
|
|
438
409
|
const char *ptr = "EOF";
|
|
439
410
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -468,17 +439,26 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
468
439
|
VALUE msg = rb_sprintf(format, ptr);
|
|
469
440
|
VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
|
|
470
441
|
RB_GC_GUARD(msg);
|
|
442
|
+
return message;
|
|
443
|
+
}
|
|
471
444
|
|
|
445
|
+
static VALUE parse_error_new(VALUE message, long line, long column)
|
|
446
|
+
{
|
|
472
447
|
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
|
473
448
|
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
|
474
449
|
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
|
475
|
-
|
|
450
|
+
return exc;
|
|
476
451
|
}
|
|
477
452
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
453
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
454
|
+
{
|
|
455
|
+
long line, column;
|
|
456
|
+
cursor_position(state, &line, &column);
|
|
457
|
+
VALUE message = build_parse_error_message(format, state, line, column);
|
|
458
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
482
462
|
{
|
|
483
463
|
state->cursor = at;
|
|
484
464
|
raise_parse_error(format, state);
|
|
@@ -503,23 +483,24 @@ static const signed char digit_values[256] = {
|
|
|
503
483
|
-1, -1, -1, -1, -1, -1, -1
|
|
504
484
|
};
|
|
505
485
|
|
|
506
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
507
|
-
{
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
486
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
487
|
+
{
|
|
488
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
489
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
493
|
+
|
|
494
|
+
const signed char b0 = digit_values[p[0]];
|
|
495
|
+
const signed char b1 = digit_values[p[1]];
|
|
496
|
+
const signed char b2 = digit_values[p[2]];
|
|
497
|
+
const signed char b3 = digit_values[p[3]];
|
|
498
|
+
|
|
499
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
500
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
523
504
|
}
|
|
524
505
|
|
|
525
506
|
#define GET_PARSER_CONFIG \
|
|
@@ -528,61 +509,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
528
509
|
|
|
529
510
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
530
511
|
|
|
531
|
-
static const bool whitespace[256] = {
|
|
532
|
-
[' '] = 1,
|
|
533
|
-
['\t'] = 1,
|
|
534
|
-
['\n'] = 1,
|
|
535
|
-
['\r'] = 1,
|
|
536
|
-
['/'] = 1,
|
|
537
|
-
};
|
|
538
|
-
|
|
539
512
|
static void
|
|
540
513
|
json_eat_comments(JSON_ParserState *state)
|
|
541
514
|
{
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
515
|
+
const char *start = state->cursor;
|
|
516
|
+
state->cursor++;
|
|
517
|
+
|
|
518
|
+
switch (peek(state)) {
|
|
519
|
+
case '/': {
|
|
520
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
521
|
+
if (!state->cursor) {
|
|
522
|
+
state->cursor = state->end;
|
|
523
|
+
} else {
|
|
524
|
+
state->cursor++;
|
|
552
525
|
}
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
526
|
+
break;
|
|
527
|
+
}
|
|
528
|
+
case '*': {
|
|
529
|
+
state->cursor++;
|
|
530
|
+
|
|
531
|
+
while (true) {
|
|
532
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
533
|
+
if (!next_match) {
|
|
534
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
state->cursor = next_match + 1;
|
|
538
|
+
if (peek(state) == '/') {
|
|
539
|
+
state->cursor++;
|
|
540
|
+
break;
|
|
566
541
|
}
|
|
567
|
-
break;
|
|
568
542
|
}
|
|
569
|
-
|
|
570
|
-
raise_parse_error("unexpected token %s", state);
|
|
571
|
-
break;
|
|
543
|
+
break;
|
|
572
544
|
}
|
|
573
|
-
|
|
574
|
-
|
|
545
|
+
default:
|
|
546
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
547
|
+
break;
|
|
575
548
|
}
|
|
576
549
|
}
|
|
577
550
|
|
|
578
|
-
static
|
|
551
|
+
ALWAYS_INLINE(static) void
|
|
579
552
|
json_eat_whitespace(JSON_ParserState *state)
|
|
580
553
|
{
|
|
581
|
-
while (
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
554
|
+
while (true) {
|
|
555
|
+
switch (peek(state)) {
|
|
556
|
+
case ' ':
|
|
557
|
+
state->cursor++;
|
|
558
|
+
break;
|
|
559
|
+
case '\n':
|
|
560
|
+
state->cursor++;
|
|
561
|
+
|
|
562
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
563
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
564
|
+
while (rest(state) > 8) {
|
|
565
|
+
uint64_t chunk;
|
|
566
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
567
|
+
if (chunk == 0x2020202020202020) {
|
|
568
|
+
state->cursor += 8;
|
|
569
|
+
continue;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
573
|
+
state->cursor += consecutive_spaces;
|
|
574
|
+
break;
|
|
575
|
+
}
|
|
576
|
+
#endif
|
|
577
|
+
break;
|
|
578
|
+
case '\t':
|
|
579
|
+
case '\r':
|
|
580
|
+
state->cursor++;
|
|
581
|
+
break;
|
|
582
|
+
case '/':
|
|
583
|
+
json_eat_comments(state);
|
|
584
|
+
break;
|
|
585
|
+
|
|
586
|
+
default:
|
|
587
|
+
return;
|
|
586
588
|
}
|
|
587
589
|
}
|
|
588
590
|
}
|
|
@@ -613,11 +615,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
613
615
|
return result;
|
|
614
616
|
}
|
|
615
617
|
|
|
616
|
-
static inline
|
|
618
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
617
619
|
{
|
|
620
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
621
|
+
// Simple heuristics:
|
|
622
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
623
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
624
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
628
|
+
{
|
|
629
|
+
bool intern = is_name || config->freeze;
|
|
630
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
618
631
|
size_t bufferSize = stringEnd - string;
|
|
619
632
|
|
|
620
|
-
if (is_name && state->in_array) {
|
|
633
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
621
634
|
VALUE cached_key;
|
|
622
635
|
if (RB_UNLIKELY(symbolize)) {
|
|
623
636
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -633,104 +646,127 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
633
646
|
return build_string(string, stringEnd, intern, symbolize);
|
|
634
647
|
}
|
|
635
648
|
|
|
636
|
-
|
|
637
|
-
{
|
|
638
|
-
|
|
639
|
-
const char
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
char buf[4];
|
|
649
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
650
|
+
typedef struct _json_unescape_positions {
|
|
651
|
+
long size;
|
|
652
|
+
const char **positions;
|
|
653
|
+
unsigned long additional_backslashes;
|
|
654
|
+
} JSON_UnescapePositions;
|
|
643
655
|
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
656
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
657
|
+
{
|
|
658
|
+
while (positions->size) {
|
|
659
|
+
positions->size--;
|
|
660
|
+
const char *next_position = positions->positions[0];
|
|
661
|
+
positions->positions++;
|
|
662
|
+
if (next_position >= pe) {
|
|
663
|
+
return next_position;
|
|
650
664
|
}
|
|
665
|
+
}
|
|
651
666
|
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
667
|
+
if (positions->additional_backslashes) {
|
|
668
|
+
positions->additional_backslashes--;
|
|
669
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
655
670
|
}
|
|
656
671
|
|
|
672
|
+
return NULL;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
676
|
+
{
|
|
677
|
+
bool intern = is_name || config->freeze;
|
|
678
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
679
|
+
size_t bufferSize = stringEnd - string;
|
|
680
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
681
|
+
char *buffer;
|
|
682
|
+
|
|
657
683
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
658
684
|
rb_enc_associate_index(result, utf8_encindex);
|
|
659
685
|
buffer = RSTRING_PTR(result);
|
|
660
686
|
bufferStart = buffer;
|
|
661
687
|
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
688
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
689
|
+
|
|
690
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
665
691
|
if (pe > p) {
|
|
666
692
|
MEMCPY(buffer, p, char, pe - p);
|
|
667
693
|
buffer += pe - p;
|
|
668
694
|
}
|
|
669
695
|
switch (*++pe) {
|
|
696
|
+
case '"':
|
|
697
|
+
case '/':
|
|
698
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
699
|
+
break;
|
|
700
|
+
case '\\':
|
|
701
|
+
APPEND_CHAR('\\');
|
|
702
|
+
break;
|
|
670
703
|
case 'n':
|
|
671
|
-
|
|
704
|
+
APPEND_CHAR('\n');
|
|
672
705
|
break;
|
|
673
706
|
case 'r':
|
|
674
|
-
|
|
707
|
+
APPEND_CHAR('\r');
|
|
675
708
|
break;
|
|
676
709
|
case 't':
|
|
677
|
-
|
|
678
|
-
break;
|
|
679
|
-
case '"':
|
|
680
|
-
unescape = (char *) "\"";
|
|
681
|
-
break;
|
|
682
|
-
case '\\':
|
|
683
|
-
unescape = (char *) "\\";
|
|
710
|
+
APPEND_CHAR('\t');
|
|
684
711
|
break;
|
|
685
712
|
case 'b':
|
|
686
|
-
|
|
713
|
+
APPEND_CHAR('\b');
|
|
687
714
|
break;
|
|
688
715
|
case 'f':
|
|
689
|
-
|
|
716
|
+
APPEND_CHAR('\f');
|
|
690
717
|
break;
|
|
691
|
-
case 'u':
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
if (
|
|
710
|
-
raise_parse_error_at("
|
|
711
|
-
}
|
|
712
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
713
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
714
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
715
|
-
| (sur & 0x3FF));
|
|
716
|
-
pe += 5;
|
|
717
|
-
} else {
|
|
718
|
-
unescape = (char *) "?";
|
|
719
|
-
break;
|
|
718
|
+
case 'u': {
|
|
719
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
720
|
+
pe += 3;
|
|
721
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
722
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
723
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
724
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
725
|
+
* final codepoint.
|
|
726
|
+
*
|
|
727
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
728
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
729
|
+
* Area".
|
|
730
|
+
*/
|
|
731
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
732
|
+
pe++;
|
|
733
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
734
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
735
|
+
|
|
736
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
737
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
720
738
|
}
|
|
739
|
+
|
|
740
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
741
|
+
pe += 5;
|
|
742
|
+
} else {
|
|
743
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
744
|
+
break;
|
|
721
745
|
}
|
|
722
|
-
unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
723
|
-
unescape = buf;
|
|
724
746
|
}
|
|
747
|
+
|
|
748
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
749
|
+
buffer += unescape_len;
|
|
750
|
+
p = ++pe;
|
|
725
751
|
break;
|
|
752
|
+
}
|
|
726
753
|
default:
|
|
727
|
-
|
|
728
|
-
|
|
754
|
+
if ((unsigned char)*pe < 0x20) {
|
|
755
|
+
if (!config->allow_control_characters) {
|
|
756
|
+
if (*pe == '\n') {
|
|
757
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
758
|
+
}
|
|
759
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
760
|
+
}
|
|
761
|
+
} else if (config->allow_invalid_escape) {
|
|
762
|
+
APPEND_CHAR(*pe);
|
|
763
|
+
} else {
|
|
764
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
765
|
+
}
|
|
766
|
+
break;
|
|
729
767
|
}
|
|
730
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
731
|
-
buffer += unescape_len;
|
|
732
|
-
p = ++pe;
|
|
733
768
|
}
|
|
769
|
+
#undef APPEND_CHAR
|
|
734
770
|
|
|
735
771
|
if (stringEnd > p) {
|
|
736
772
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -741,81 +777,85 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
741
777
|
if (symbolize) {
|
|
742
778
|
result = rb_str_intern(result);
|
|
743
779
|
} else if (intern) {
|
|
744
|
-
result =
|
|
780
|
+
result = rb_str_to_interned_str(result);
|
|
745
781
|
}
|
|
746
782
|
|
|
747
783
|
return result;
|
|
748
784
|
}
|
|
749
785
|
|
|
750
786
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
751
|
-
|
|
752
|
-
{
|
|
753
|
-
bool negative = false;
|
|
754
|
-
if (*p == '-') {
|
|
755
|
-
negative = true;
|
|
756
|
-
p++;
|
|
757
|
-
}
|
|
787
|
+
#define MAX_NUMBER_STACK_BUFFER 128
|
|
758
788
|
|
|
759
|
-
|
|
760
|
-
while (p < pe) {
|
|
761
|
-
memo *= 10;
|
|
762
|
-
memo += *p - '0';
|
|
763
|
-
p++;
|
|
764
|
-
}
|
|
789
|
+
typedef VALUE (*json_number_decode_func_t)(const char *ptr);
|
|
765
790
|
|
|
766
|
-
|
|
767
|
-
|
|
791
|
+
static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
|
|
792
|
+
{
|
|
793
|
+
if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
|
|
794
|
+
char buffer[MAX_NUMBER_STACK_BUFFER];
|
|
795
|
+
MEMCPY(buffer, start, char, len);
|
|
796
|
+
buffer[len] = '\0';
|
|
797
|
+
return func(buffer);
|
|
798
|
+
} else {
|
|
799
|
+
VALUE buffer_v = rb_str_tmp_new(len);
|
|
800
|
+
char *buffer = RSTRING_PTR(buffer_v);
|
|
801
|
+
MEMCPY(buffer, start, char, len);
|
|
802
|
+
buffer[len] = '\0';
|
|
803
|
+
VALUE number = func(buffer);
|
|
804
|
+
RB_GC_GUARD(buffer_v);
|
|
805
|
+
return number;
|
|
768
806
|
}
|
|
769
|
-
return LL2NUM(memo);
|
|
770
807
|
}
|
|
771
808
|
|
|
772
|
-
static VALUE
|
|
809
|
+
static VALUE json_decode_inum(const char *buffer)
|
|
773
810
|
{
|
|
774
|
-
|
|
775
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
776
|
-
MEMCPY(buffer, start, char, len);
|
|
777
|
-
buffer[len] = '\0';
|
|
778
|
-
VALUE number = rb_cstr2inum(buffer, 10);
|
|
779
|
-
RB_ALLOCV_END(buffer_v);
|
|
780
|
-
return number;
|
|
811
|
+
return rb_cstr2inum(buffer, 10);
|
|
781
812
|
}
|
|
782
813
|
|
|
783
|
-
static
|
|
784
|
-
json_decode_integer(const char *start, const char *end)
|
|
814
|
+
NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
|
|
785
815
|
{
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
816
|
+
return json_decode_large_number(start, len, json_decode_inum);
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
820
|
+
{
|
|
821
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
822
|
+
if (negative) {
|
|
823
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
789
824
|
}
|
|
790
|
-
return
|
|
825
|
+
return UINT64T2NUM(mantissa);
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
return json_decode_large_integer(start, end - start);
|
|
791
829
|
}
|
|
792
830
|
|
|
793
|
-
static VALUE
|
|
831
|
+
static VALUE json_decode_dnum(const char *buffer)
|
|
794
832
|
{
|
|
795
|
-
|
|
796
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
797
|
-
MEMCPY(buffer, start, char, len);
|
|
798
|
-
buffer[len] = '\0';
|
|
799
|
-
VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
800
|
-
RB_ALLOCV_END(buffer_v);
|
|
801
|
-
return number;
|
|
833
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
802
834
|
}
|
|
803
835
|
|
|
804
|
-
static VALUE
|
|
836
|
+
NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
|
|
805
837
|
{
|
|
806
|
-
|
|
838
|
+
return json_decode_large_number(start, len, json_decode_dnum);
|
|
839
|
+
}
|
|
807
840
|
|
|
841
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
842
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
843
|
+
*/
|
|
844
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
845
|
+
const char *start, const char *end)
|
|
846
|
+
{
|
|
808
847
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
809
|
-
VALUE text = rb_str_new(start,
|
|
848
|
+
VALUE text = rb_str_new(start, end - start);
|
|
810
849
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
811
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
812
|
-
char buffer[64];
|
|
813
|
-
MEMCPY(buffer, start, char, len);
|
|
814
|
-
buffer[len] = '\0';
|
|
815
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
816
|
-
} else {
|
|
817
|
-
return json_decode_large_float(start, len);
|
|
818
850
|
}
|
|
851
|
+
|
|
852
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
853
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
854
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
855
|
+
return json_decode_large_float(start, end - start);
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
819
859
|
}
|
|
820
860
|
|
|
821
861
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -830,21 +870,66 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
|
|
|
830
870
|
return array;
|
|
831
871
|
}
|
|
832
872
|
|
|
873
|
+
static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
874
|
+
{
|
|
875
|
+
VALUE set = rb_hash_new_capa(count / 2);
|
|
876
|
+
for (size_t index = 0; index < count; index += 2) {
|
|
877
|
+
size_t before = RHASH_SIZE(set);
|
|
878
|
+
VALUE key = pairs[index];
|
|
879
|
+
rb_hash_aset(set, key, Qtrue);
|
|
880
|
+
if (RHASH_SIZE(set) == before) {
|
|
881
|
+
if (RB_SYMBOL_P(key)) {
|
|
882
|
+
return rb_sym2str(key);
|
|
883
|
+
}
|
|
884
|
+
return key;
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
return Qfalse;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
891
|
+
{
|
|
892
|
+
VALUE message = rb_sprintf(
|
|
893
|
+
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
894
|
+
rb_inspect(duplicate_key)
|
|
895
|
+
);
|
|
896
|
+
|
|
897
|
+
emit_parse_warning(RSTRING_PTR(message), state);
|
|
898
|
+
RB_GC_GUARD(message);
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
902
|
+
{
|
|
903
|
+
VALUE message = rb_sprintf(
|
|
904
|
+
"duplicate key %"PRIsVALUE,
|
|
905
|
+
rb_inspect(duplicate_key)
|
|
906
|
+
);
|
|
907
|
+
|
|
908
|
+
long line, column;
|
|
909
|
+
cursor_position(state, &line, &column);
|
|
910
|
+
rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
|
|
911
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
912
|
+
|
|
913
|
+
raise_parse_error(RSTRING_PTR(message), state);
|
|
914
|
+
RB_GC_GUARD(message);
|
|
915
|
+
}
|
|
916
|
+
|
|
833
917
|
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
|
834
918
|
{
|
|
835
919
|
size_t entries_count = count / 2;
|
|
836
920
|
VALUE object = rb_hash_new_capa(entries_count);
|
|
837
|
-
|
|
921
|
+
const VALUE *pairs = rvalue_stack_peek(state->stack, count);
|
|
922
|
+
rb_hash_bulk_insert(count, pairs, object);
|
|
838
923
|
|
|
839
924
|
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
|
|
840
925
|
switch (config->on_duplicate_key) {
|
|
841
926
|
case JSON_IGNORE:
|
|
842
927
|
break;
|
|
843
928
|
case JSON_DEPRECATED:
|
|
844
|
-
|
|
929
|
+
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
|
|
845
930
|
break;
|
|
846
931
|
case JSON_RAISE:
|
|
847
|
-
|
|
932
|
+
raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
|
|
848
933
|
break;
|
|
849
934
|
}
|
|
850
935
|
}
|
|
@@ -858,20 +943,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
858
943
|
return object;
|
|
859
944
|
}
|
|
860
945
|
|
|
861
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
862
|
-
{
|
|
863
|
-
VALUE string;
|
|
864
|
-
bool intern = is_name || config->freeze;
|
|
865
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
866
|
-
if (escaped) {
|
|
867
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
868
|
-
} else {
|
|
869
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
870
|
-
}
|
|
871
|
-
|
|
872
|
-
return string;
|
|
873
|
-
}
|
|
874
|
-
|
|
875
946
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
876
947
|
{
|
|
877
948
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -894,17 +965,11 @@ static const bool string_scan_table[256] = {
|
|
|
894
965
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
895
966
|
};
|
|
896
967
|
|
|
897
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
898
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
899
|
-
#else
|
|
900
|
-
#define FORCE_INLINE
|
|
901
|
-
#endif
|
|
902
|
-
|
|
903
968
|
#ifdef HAVE_SIMD
|
|
904
969
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
905
970
|
#endif /* HAVE_SIMD */
|
|
906
971
|
|
|
907
|
-
static
|
|
972
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
908
973
|
{
|
|
909
974
|
#ifdef HAVE_SIMD
|
|
910
975
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -912,7 +977,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
912
977
|
uint64_t mask = 0;
|
|
913
978
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
914
979
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
915
|
-
return
|
|
980
|
+
return true;
|
|
916
981
|
}
|
|
917
982
|
|
|
918
983
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -920,64 +985,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
920
985
|
int mask = 0;
|
|
921
986
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
922
987
|
state->cursor += trailing_zeros(mask);
|
|
923
|
-
return
|
|
988
|
+
return true;
|
|
924
989
|
}
|
|
925
990
|
}
|
|
926
991
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
927
992
|
#endif /* HAVE_SIMD */
|
|
928
993
|
|
|
929
|
-
while (state
|
|
994
|
+
while (!eos(state)) {
|
|
930
995
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
931
|
-
return
|
|
996
|
+
return true;
|
|
932
997
|
}
|
|
933
|
-
|
|
998
|
+
state->cursor++;
|
|
934
999
|
}
|
|
935
|
-
return
|
|
1000
|
+
return false;
|
|
936
1001
|
}
|
|
937
1002
|
|
|
938
|
-
static
|
|
1003
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
939
1004
|
{
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
1005
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
1006
|
+
JSON_UnescapePositions positions = {
|
|
1007
|
+
.size = 0,
|
|
1008
|
+
.positions = backslashes,
|
|
1009
|
+
.additional_backslashes = 0,
|
|
1010
|
+
};
|
|
943
1011
|
|
|
944
|
-
|
|
1012
|
+
do {
|
|
945
1013
|
switch (*state->cursor) {
|
|
946
1014
|
case '"': {
|
|
947
|
-
VALUE string =
|
|
1015
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
948
1016
|
state->cursor++;
|
|
949
1017
|
return json_push_value(state, config, string);
|
|
950
1018
|
}
|
|
951
1019
|
case '\\': {
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
1020
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1021
|
+
backslashes[positions.size] = state->cursor;
|
|
1022
|
+
positions.size++;
|
|
1023
|
+
} else {
|
|
1024
|
+
positions.additional_backslashes++;
|
|
956
1025
|
}
|
|
1026
|
+
state->cursor++;
|
|
957
1027
|
break;
|
|
958
1028
|
}
|
|
959
1029
|
default:
|
|
960
|
-
|
|
1030
|
+
if (!config->allow_control_characters) {
|
|
1031
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1032
|
+
}
|
|
961
1033
|
break;
|
|
962
1034
|
}
|
|
963
1035
|
|
|
964
1036
|
state->cursor++;
|
|
965
|
-
}
|
|
1037
|
+
} while (string_scan(state));
|
|
966
1038
|
|
|
967
1039
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
968
1040
|
return Qfalse;
|
|
969
1041
|
}
|
|
970
1042
|
|
|
1043
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1044
|
+
{
|
|
1045
|
+
state->cursor++;
|
|
1046
|
+
const char *start = state->cursor;
|
|
1047
|
+
|
|
1048
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1049
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1053
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1054
|
+
state->cursor++;
|
|
1055
|
+
return json_push_value(state, config, string);
|
|
1056
|
+
}
|
|
1057
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1061
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1062
|
+
// Additional References:
|
|
1063
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1064
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1065
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1066
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1067
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1068
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1069
|
+
val -= 0x3030303030303030;
|
|
1070
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1071
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1072
|
+
return val;
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1076
|
+
const uint32_t mask = 0x000000FF;
|
|
1077
|
+
const uint32_t mul1 = 100;
|
|
1078
|
+
val -= 0x30303030;
|
|
1079
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1080
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1081
|
+
return val;
|
|
1082
|
+
}
|
|
1083
|
+
#endif
|
|
1084
|
+
|
|
1085
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1086
|
+
{
|
|
1087
|
+
const char *start = state->cursor;
|
|
1088
|
+
|
|
1089
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1090
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1091
|
+
uint64_t next_8bytes;
|
|
1092
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1093
|
+
|
|
1094
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1095
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1096
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1097
|
+
|
|
1098
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1099
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1100
|
+
state->cursor += 8;
|
|
1101
|
+
continue;
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1105
|
+
|
|
1106
|
+
if (consecutive_digits >= 4) {
|
|
1107
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1108
|
+
state->cursor += 4;
|
|
1109
|
+
consecutive_digits -= 4;
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
while (consecutive_digits) {
|
|
1113
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1114
|
+
consecutive_digits--;
|
|
1115
|
+
state->cursor++;
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
return (int)(state->cursor - start);
|
|
1119
|
+
}
|
|
1120
|
+
#endif
|
|
1121
|
+
|
|
1122
|
+
char next_char;
|
|
1123
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1124
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1125
|
+
state->cursor++;
|
|
1126
|
+
}
|
|
1127
|
+
return (int)(state->cursor - start);
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1131
|
+
{
|
|
1132
|
+
bool integer = true;
|
|
1133
|
+
const char first_digit = *state->cursor;
|
|
1134
|
+
|
|
1135
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1136
|
+
int32_t exponent = 0;
|
|
1137
|
+
int decimal_point_pos = -1;
|
|
1138
|
+
uint64_t mantissa = 0;
|
|
1139
|
+
|
|
1140
|
+
// Parse integer part and extract mantissa digits
|
|
1141
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1142
|
+
|
|
1143
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1144
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
// Parse fractional part
|
|
1148
|
+
if (peek(state) == '.') {
|
|
1149
|
+
integer = false;
|
|
1150
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1151
|
+
state->cursor++;
|
|
1152
|
+
|
|
1153
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1154
|
+
mantissa_digits += fractional_digits;
|
|
1155
|
+
|
|
1156
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1157
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
// Parse exponent
|
|
1162
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1163
|
+
integer = false;
|
|
1164
|
+
state->cursor++;
|
|
1165
|
+
|
|
1166
|
+
bool negative_exponent = false;
|
|
1167
|
+
const char next_char = peek(state);
|
|
1168
|
+
if (next_char == '-' || next_char == '+') {
|
|
1169
|
+
negative_exponent = next_char == '-';
|
|
1170
|
+
state->cursor++;
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
uint64_t abs_exponent = 0;
|
|
1174
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1175
|
+
|
|
1176
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1177
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1181
|
+
}
|
|
1182
|
+
|
|
1183
|
+
if (integer) {
|
|
1184
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
// Adjust exponent based on decimal point position
|
|
1188
|
+
if (decimal_point_pos >= 0) {
|
|
1189
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1196
|
+
{
|
|
1197
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1198
|
+
}
|
|
1199
|
+
|
|
1200
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1201
|
+
{
|
|
1202
|
+
const char *start = state->cursor;
|
|
1203
|
+
state->cursor++;
|
|
1204
|
+
return json_parse_number(state, config, true, start);
|
|
1205
|
+
}
|
|
1206
|
+
|
|
971
1207
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
972
1208
|
{
|
|
973
1209
|
json_eat_whitespace(state);
|
|
974
|
-
if (state->cursor >= state->end) {
|
|
975
|
-
raise_parse_error("unexpected end of input", state);
|
|
976
|
-
}
|
|
977
1210
|
|
|
978
|
-
switch (
|
|
1211
|
+
switch (peek(state)) {
|
|
979
1212
|
case 'n':
|
|
980
|
-
if ((state
|
|
1213
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
981
1214
|
state->cursor += 4;
|
|
982
1215
|
return json_push_value(state, config, Qnil);
|
|
983
1216
|
}
|
|
@@ -985,7 +1218,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
985
1218
|
raise_parse_error("unexpected token %s", state);
|
|
986
1219
|
break;
|
|
987
1220
|
case 't':
|
|
988
|
-
if ((state
|
|
1221
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
989
1222
|
state->cursor += 4;
|
|
990
1223
|
return json_push_value(state, config, Qtrue);
|
|
991
1224
|
}
|
|
@@ -994,7 +1227,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
994
1227
|
break;
|
|
995
1228
|
case 'f':
|
|
996
1229
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
997
|
-
if ((state
|
|
1230
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
998
1231
|
state->cursor += 5;
|
|
999
1232
|
return json_push_value(state, config, Qfalse);
|
|
1000
1233
|
}
|
|
@@ -1003,7 +1236,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1003
1236
|
break;
|
|
1004
1237
|
case 'N':
|
|
1005
1238
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1006
|
-
if (config->allow_nan && (state
|
|
1239
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1007
1240
|
state->cursor += 3;
|
|
1008
1241
|
return json_push_value(state, config, CNaN);
|
|
1009
1242
|
}
|
|
@@ -1011,16 +1244,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1011
1244
|
raise_parse_error("unexpected token %s", state);
|
|
1012
1245
|
break;
|
|
1013
1246
|
case 'I':
|
|
1014
|
-
if (config->allow_nan && (state
|
|
1247
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1015
1248
|
state->cursor += 8;
|
|
1016
1249
|
return json_push_value(state, config, CInfinity);
|
|
1017
1250
|
}
|
|
1018
1251
|
|
|
1019
1252
|
raise_parse_error("unexpected token %s", state);
|
|
1020
1253
|
break;
|
|
1021
|
-
case '-':
|
|
1254
|
+
case '-': {
|
|
1022
1255
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1023
|
-
if ((state
|
|
1256
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1024
1257
|
if (config->allow_nan) {
|
|
1025
1258
|
state->cursor += 9;
|
|
1026
1259
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1028,62 +1261,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1028
1261
|
raise_parse_error("unexpected token %s", state);
|
|
1029
1262
|
}
|
|
1030
1263
|
}
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
bool integer = true;
|
|
1034
|
-
|
|
1035
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1036
|
-
const char *start = state->cursor;
|
|
1037
|
-
state->cursor++;
|
|
1038
|
-
|
|
1039
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1040
|
-
state->cursor++;
|
|
1041
|
-
}
|
|
1042
|
-
|
|
1043
|
-
long integer_length = state->cursor - start;
|
|
1044
|
-
|
|
1045
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1046
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1047
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1048
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1049
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1050
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1051
|
-
}
|
|
1052
|
-
|
|
1053
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1054
|
-
integer = false;
|
|
1055
|
-
state->cursor++;
|
|
1056
|
-
|
|
1057
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1058
|
-
raise_parse_error("invalid number: %s", state);
|
|
1059
|
-
}
|
|
1060
|
-
|
|
1061
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1062
|
-
state->cursor++;
|
|
1063
|
-
}
|
|
1064
|
-
}
|
|
1065
|
-
|
|
1066
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1067
|
-
integer = false;
|
|
1068
|
-
state->cursor++;
|
|
1069
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1070
|
-
state->cursor++;
|
|
1071
|
-
}
|
|
1072
|
-
|
|
1073
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1074
|
-
raise_parse_error("invalid number: %s", state);
|
|
1075
|
-
}
|
|
1076
|
-
|
|
1077
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1078
|
-
state->cursor++;
|
|
1079
|
-
}
|
|
1080
|
-
}
|
|
1081
|
-
|
|
1082
|
-
if (integer) {
|
|
1083
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1084
|
-
}
|
|
1085
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1264
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1265
|
+
break;
|
|
1086
1266
|
}
|
|
1267
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1268
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1269
|
+
break;
|
|
1087
1270
|
case '"': {
|
|
1088
1271
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1089
1272
|
return json_parse_string(state, config, false);
|
|
@@ -1094,7 +1277,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1094
1277
|
json_eat_whitespace(state);
|
|
1095
1278
|
long stack_head = state->stack->head;
|
|
1096
1279
|
|
|
1097
|
-
if ((state
|
|
1280
|
+
if (peek(state) == ']') {
|
|
1098
1281
|
state->cursor++;
|
|
1099
1282
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1100
1283
|
} else {
|
|
@@ -1109,26 +1292,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1109
1292
|
while (true) {
|
|
1110
1293
|
json_eat_whitespace(state);
|
|
1111
1294
|
|
|
1112
|
-
|
|
1113
|
-
if (*state->cursor == ']') {
|
|
1114
|
-
state->cursor++;
|
|
1115
|
-
long count = state->stack->head - stack_head;
|
|
1116
|
-
state->current_nesting--;
|
|
1117
|
-
state->in_array--;
|
|
1118
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1119
|
-
}
|
|
1295
|
+
const char next_char = peek(state);
|
|
1120
1296
|
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
}
|
|
1297
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1298
|
+
state->cursor++;
|
|
1299
|
+
if (config->allow_trailing_comma) {
|
|
1300
|
+
json_eat_whitespace(state);
|
|
1301
|
+
if (peek(state) == ']') {
|
|
1302
|
+
continue;
|
|
1128
1303
|
}
|
|
1129
|
-
json_parse_any(state, config);
|
|
1130
|
-
continue;
|
|
1131
1304
|
}
|
|
1305
|
+
json_parse_any(state, config);
|
|
1306
|
+
continue;
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
if (next_char == ']') {
|
|
1310
|
+
state->cursor++;
|
|
1311
|
+
long count = state->stack->head - stack_head;
|
|
1312
|
+
state->current_nesting--;
|
|
1313
|
+
state->in_array--;
|
|
1314
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1132
1315
|
}
|
|
1133
1316
|
|
|
1134
1317
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1142,7 +1325,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1142
1325
|
json_eat_whitespace(state);
|
|
1143
1326
|
long stack_head = state->stack->head;
|
|
1144
1327
|
|
|
1145
|
-
if ((state
|
|
1328
|
+
if (peek(state) == '}') {
|
|
1146
1329
|
state->cursor++;
|
|
1147
1330
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1148
1331
|
} else {
|
|
@@ -1151,13 +1334,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1151
1334
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1152
1335
|
}
|
|
1153
1336
|
|
|
1154
|
-
if (
|
|
1337
|
+
if (peek(state) != '"') {
|
|
1155
1338
|
raise_parse_error("expected object key, got %s", state);
|
|
1156
1339
|
}
|
|
1157
1340
|
json_parse_string(state, config, true);
|
|
1158
1341
|
|
|
1159
1342
|
json_eat_whitespace(state);
|
|
1160
|
-
if ((state
|
|
1343
|
+
if (peek(state) != ':') {
|
|
1161
1344
|
raise_parse_error("expected ':' after object key", state);
|
|
1162
1345
|
}
|
|
1163
1346
|
state->cursor++;
|
|
@@ -1168,46 +1351,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1168
1351
|
while (true) {
|
|
1169
1352
|
json_eat_whitespace(state);
|
|
1170
1353
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1354
|
+
const char next_char = peek(state);
|
|
1355
|
+
if (next_char == '}') {
|
|
1356
|
+
state->cursor++;
|
|
1357
|
+
state->current_nesting--;
|
|
1358
|
+
size_t count = state->stack->head - stack_head;
|
|
1176
1359
|
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1360
|
+
// Temporary rewind cursor in case an error is raised
|
|
1361
|
+
const char *final_cursor = state->cursor;
|
|
1362
|
+
state->cursor = object_start_cursor;
|
|
1363
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1364
|
+
state->cursor = final_cursor;
|
|
1182
1365
|
|
|
1183
|
-
|
|
1184
|
-
|
|
1366
|
+
return json_push_value(state, config, object);
|
|
1367
|
+
}
|
|
1185
1368
|
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1369
|
+
if (next_char == ',') {
|
|
1370
|
+
state->cursor++;
|
|
1371
|
+
json_eat_whitespace(state);
|
|
1189
1372
|
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
}
|
|
1373
|
+
if (config->allow_trailing_comma) {
|
|
1374
|
+
if (peek(state) == '}') {
|
|
1375
|
+
continue;
|
|
1194
1376
|
}
|
|
1377
|
+
}
|
|
1195
1378
|
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1379
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1380
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1381
|
+
}
|
|
1382
|
+
json_parse_string(state, config, true);
|
|
1200
1383
|
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1384
|
+
json_eat_whitespace(state);
|
|
1385
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1386
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1387
|
+
}
|
|
1388
|
+
state->cursor++;
|
|
1206
1389
|
|
|
1207
|
-
|
|
1390
|
+
json_parse_any(state, config);
|
|
1208
1391
|
|
|
1209
|
-
|
|
1210
|
-
}
|
|
1392
|
+
continue;
|
|
1211
1393
|
}
|
|
1212
1394
|
|
|
1213
1395
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1215,18 +1397,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1215
1397
|
break;
|
|
1216
1398
|
}
|
|
1217
1399
|
|
|
1400
|
+
case 0:
|
|
1401
|
+
raise_parse_error("unexpected end of input", state);
|
|
1402
|
+
break;
|
|
1403
|
+
|
|
1218
1404
|
default:
|
|
1219
1405
|
raise_parse_error("unexpected character: %s", state);
|
|
1220
1406
|
break;
|
|
1221
1407
|
}
|
|
1222
1408
|
|
|
1223
|
-
raise_parse_error("
|
|
1409
|
+
raise_parse_error("unreachable: %s", state);
|
|
1410
|
+
return Qundef;
|
|
1224
1411
|
}
|
|
1225
1412
|
|
|
1226
1413
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1227
1414
|
{
|
|
1228
1415
|
json_eat_whitespace(state);
|
|
1229
|
-
if (state
|
|
1416
|
+
if (!eos(state)) {
|
|
1230
1417
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1231
1418
|
}
|
|
1232
1419
|
}
|
|
@@ -1263,14 +1450,16 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1263
1450
|
{
|
|
1264
1451
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1265
1452
|
|
|
1266
|
-
if (key == sym_max_nesting)
|
|
1267
|
-
else if (key == sym_allow_nan)
|
|
1268
|
-
else if (key == sym_allow_trailing_comma)
|
|
1269
|
-
else if (key ==
|
|
1270
|
-
else if (key ==
|
|
1271
|
-
else if (key ==
|
|
1272
|
-
else if (key ==
|
|
1273
|
-
else if (key ==
|
|
1453
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1454
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1455
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1456
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1457
|
+
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1458
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1459
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1460
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1461
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1462
|
+
else if (key == sym_decimal_class) {
|
|
1274
1463
|
if (RTEST(val)) {
|
|
1275
1464
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1276
1465
|
config->decimal_class = val;
|
|
@@ -1343,6 +1532,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1343
1532
|
*/
|
|
1344
1533
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1345
1534
|
{
|
|
1535
|
+
rb_check_frozen(self);
|
|
1346
1536
|
GET_PARSER_CONFIG;
|
|
1347
1537
|
|
|
1348
1538
|
parser_config_init(config, opts);
|
|
@@ -1438,7 +1628,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1438
1628
|
JSON_ParserConfig_memsize,
|
|
1439
1629
|
},
|
|
1440
1630
|
0, 0,
|
|
1441
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1631
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1442
1632
|
};
|
|
1443
1633
|
|
|
1444
1634
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1482,16 +1672,14 @@ void Init_parser(void)
|
|
|
1482
1672
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1483
1673
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1484
1674
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1675
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1676
|
+
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
1485
1677
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1486
1678
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1487
1679
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1488
1680
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1489
1681
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1490
1682
|
|
|
1491
|
-
i_chr = rb_intern("chr");
|
|
1492
|
-
i_aset = rb_intern("[]=");
|
|
1493
|
-
i_aref = rb_intern("[]");
|
|
1494
|
-
i_leftshift = rb_intern("<<");
|
|
1495
1683
|
i_new = rb_intern("new");
|
|
1496
1684
|
i_try_convert = rb_intern("try_convert");
|
|
1497
1685
|
i_uminus = rb_intern("-@");
|