json 2.12.2 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +70 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +26 -49
- data/ext/json/ext/generator/extconf.rb +1 -25
- data/ext/json/ext/generator/generator.c +347 -313
- data/ext/json/ext/json.h +92 -0
- data/ext/json/ext/parser/extconf.rb +7 -1
- data/ext/json/ext/parser/parser.c +557 -332
- data/ext/json/ext/simd/conf.rb +24 -0
- data/ext/json/ext/simd/simd.h +191 -0
- data/ext/json/ext/vendor/fpconv.c +12 -11
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/json.gemspec +2 -3
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +57 -36
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +96 -50
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +55 -0
- metadata +8 -4
- data/ext/json/ext/generator/simd.h +0 -112
|
@@ -1,32 +1,6 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
23
|
-
#ifndef RB_UNLIKELY
|
|
24
|
-
#define RB_UNLIKELY(expr) expr
|
|
25
|
-
#endif
|
|
26
|
-
|
|
27
|
-
#ifndef RB_LIKELY
|
|
28
|
-
#define RB_LIKELY(expr) expr
|
|
29
|
-
#endif
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
3
|
+
#include "../simd/simd.h"
|
|
30
4
|
|
|
31
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
32
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
@@ -35,14 +9,14 @@ static ID i_chr, i_aset, i_aref,
|
|
|
35
9
|
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
36
10
|
|
|
37
11
|
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
|
38
|
-
sym_decimal_class, sym_on_load;
|
|
12
|
+
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
39
13
|
|
|
40
14
|
static int binary_encindex;
|
|
41
15
|
static int utf8_encindex;
|
|
42
16
|
|
|
43
17
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
44
18
|
// For TruffleRuby
|
|
45
|
-
void
|
|
19
|
+
static void
|
|
46
20
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
47
21
|
{
|
|
48
22
|
long index = 0;
|
|
@@ -59,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
59
33
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
60
34
|
#endif
|
|
61
35
|
|
|
36
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
37
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
38
|
+
{
|
|
39
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
40
|
+
}
|
|
41
|
+
#endif
|
|
62
42
|
|
|
63
43
|
/* name cache */
|
|
64
44
|
|
|
@@ -104,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
104
84
|
cache->entries[index] = rstring;
|
|
105
85
|
}
|
|
106
86
|
|
|
107
|
-
|
|
87
|
+
#define rstring_cache_memcmp memcmp
|
|
88
|
+
|
|
89
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
90
|
+
#if __has_builtin(__builtin_bswap64)
|
|
91
|
+
#undef rstring_cache_memcmp
|
|
92
|
+
static ALWAYS_INLINE() int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
108
93
|
{
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
94
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
95
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
96
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
97
|
+
long i = 0;
|
|
98
|
+
|
|
99
|
+
for (; i + 8 <= length; i += 8) {
|
|
100
|
+
uint64_t a, b;
|
|
101
|
+
memcpy(&a, str + i, 8);
|
|
102
|
+
memcpy(&b, rptr + i, 8);
|
|
103
|
+
if (a != b) {
|
|
104
|
+
a = __builtin_bswap64(a);
|
|
105
|
+
b = __builtin_bswap64(b);
|
|
106
|
+
return (a < b) ? -1 : 1;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for (; i < length; i++) {
|
|
111
|
+
if (str[i] != rptr[i]) {
|
|
112
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
113
|
+
}
|
|
114
114
|
}
|
|
115
|
+
|
|
116
|
+
return 0;
|
|
115
117
|
}
|
|
118
|
+
#endif
|
|
119
|
+
#endif
|
|
116
120
|
|
|
117
|
-
static
|
|
121
|
+
static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
118
122
|
{
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
}
|
|
123
|
+
const char *rstring_ptr;
|
|
124
|
+
long rstring_length;
|
|
125
|
+
|
|
126
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
124
127
|
|
|
125
|
-
if (
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
return Qfalse;
|
|
128
|
+
if (length == rstring_length) {
|
|
129
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
130
|
+
} else {
|
|
131
|
+
return (int)(length - rstring_length);
|
|
130
132
|
}
|
|
133
|
+
}
|
|
131
134
|
|
|
135
|
+
static ALWAYS_INLINE() VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
136
|
+
{
|
|
132
137
|
int low = 0;
|
|
133
138
|
int high = cache->length - 1;
|
|
134
|
-
int mid = 0;
|
|
135
|
-
int last_cmp = 0;
|
|
136
139
|
|
|
137
140
|
while (low <= high) {
|
|
138
|
-
mid = (high + low) >> 1;
|
|
141
|
+
int mid = (high + low) >> 1;
|
|
139
142
|
VALUE entry = cache->entries[mid];
|
|
140
|
-
|
|
143
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
141
144
|
|
|
142
|
-
if (
|
|
145
|
+
if (cmp == 0) {
|
|
143
146
|
return entry;
|
|
144
|
-
} else if (
|
|
147
|
+
} else if (cmp > 0) {
|
|
145
148
|
low = mid + 1;
|
|
146
149
|
} else {
|
|
147
150
|
high = mid - 1;
|
|
148
151
|
}
|
|
149
152
|
}
|
|
150
153
|
|
|
151
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
152
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
153
|
-
// But if they do, we have to fallback to the slow path.
|
|
154
|
-
return Qfalse;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
154
|
VALUE rstring = build_interned_string(str, length);
|
|
158
155
|
|
|
159
156
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
160
|
-
|
|
161
|
-
mid += 1;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
157
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
165
158
|
}
|
|
166
159
|
return rstring;
|
|
167
160
|
}
|
|
168
161
|
|
|
169
162
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
170
163
|
{
|
|
171
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
172
|
-
// Common names aren't likely to be very long. So we just don't
|
|
173
|
-
// cache names above an arbitrary threshold.
|
|
174
|
-
return Qfalse;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
178
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
179
|
-
// we're much less likely to see this string again.
|
|
180
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
181
|
-
return Qfalse;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
164
|
int low = 0;
|
|
185
165
|
int high = cache->length - 1;
|
|
186
|
-
int mid = 0;
|
|
187
|
-
int last_cmp = 0;
|
|
188
166
|
|
|
189
167
|
while (low <= high) {
|
|
190
|
-
mid = (high + low) >> 1;
|
|
168
|
+
int mid = (high + low) >> 1;
|
|
191
169
|
VALUE entry = cache->entries[mid];
|
|
192
|
-
|
|
170
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
193
171
|
|
|
194
|
-
if (
|
|
172
|
+
if (cmp == 0) {
|
|
195
173
|
return entry;
|
|
196
|
-
} else if (
|
|
174
|
+
} else if (cmp > 0) {
|
|
197
175
|
low = mid + 1;
|
|
198
176
|
} else {
|
|
199
177
|
high = mid - 1;
|
|
200
178
|
}
|
|
201
179
|
}
|
|
202
180
|
|
|
203
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
204
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
205
|
-
// But if they do, we have to fallback to the slow path.
|
|
206
|
-
return Qfalse;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
181
|
VALUE rsymbol = build_symbol(str, length);
|
|
210
182
|
|
|
211
183
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
212
|
-
|
|
213
|
-
mid += 1;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
184
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
217
185
|
}
|
|
218
186
|
return rsymbol;
|
|
219
187
|
}
|
|
@@ -363,10 +331,17 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
|
363
331
|
return len;
|
|
364
332
|
}
|
|
365
333
|
|
|
334
|
+
enum duplicate_key_action {
|
|
335
|
+
JSON_DEPRECATED = 0,
|
|
336
|
+
JSON_IGNORE,
|
|
337
|
+
JSON_RAISE,
|
|
338
|
+
};
|
|
339
|
+
|
|
366
340
|
typedef struct JSON_ParserStruct {
|
|
367
341
|
VALUE on_load_proc;
|
|
368
342
|
VALUE decimal_class;
|
|
369
343
|
ID decimal_method_id;
|
|
344
|
+
enum duplicate_key_action on_duplicate_key;
|
|
370
345
|
int max_nesting;
|
|
371
346
|
bool allow_nan;
|
|
372
347
|
bool allow_trailing_comma;
|
|
@@ -386,15 +361,24 @@ typedef struct JSON_ParserStateStruct {
|
|
|
386
361
|
int current_nesting;
|
|
387
362
|
} JSON_ParserState;
|
|
388
363
|
|
|
364
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
365
|
+
return state->end - state->cursor;
|
|
366
|
+
}
|
|
389
367
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
static
|
|
368
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
369
|
+
return state->cursor >= state->end;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
static inline char peek(JSON_ParserState *state)
|
|
395
373
|
{
|
|
396
|
-
|
|
374
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
375
|
+
return 0;
|
|
376
|
+
}
|
|
377
|
+
return *state->cursor;
|
|
378
|
+
}
|
|
397
379
|
|
|
380
|
+
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
381
|
+
{
|
|
398
382
|
const char *cursor = state->cursor;
|
|
399
383
|
long column = 0;
|
|
400
384
|
long line = 1;
|
|
@@ -411,6 +395,29 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
411
395
|
line++;
|
|
412
396
|
}
|
|
413
397
|
}
|
|
398
|
+
*line_out = line;
|
|
399
|
+
*column_out = column;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
403
|
+
{
|
|
404
|
+
long line, column;
|
|
405
|
+
cursor_position(state, &line, &column);
|
|
406
|
+
|
|
407
|
+
VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
|
|
408
|
+
rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
412
|
+
|
|
413
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
|
414
|
+
RBIMPL_ATTR_NORETURN()
|
|
415
|
+
#endif
|
|
416
|
+
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
417
|
+
{
|
|
418
|
+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
419
|
+
long line, column;
|
|
420
|
+
cursor_position(state, &line, &column);
|
|
414
421
|
|
|
415
422
|
const char *ptr = "EOF";
|
|
416
423
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -505,61 +512,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
505
512
|
|
|
506
513
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
507
514
|
|
|
508
|
-
static const bool whitespace[256] = {
|
|
509
|
-
[' '] = 1,
|
|
510
|
-
['\t'] = 1,
|
|
511
|
-
['\n'] = 1,
|
|
512
|
-
['\r'] = 1,
|
|
513
|
-
['/'] = 1,
|
|
514
|
-
};
|
|
515
|
-
|
|
516
515
|
static void
|
|
517
516
|
json_eat_comments(JSON_ParserState *state)
|
|
518
517
|
{
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
518
|
+
const char *start = state->cursor;
|
|
519
|
+
state->cursor++;
|
|
520
|
+
|
|
521
|
+
switch (peek(state)) {
|
|
522
|
+
case '/': {
|
|
523
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
524
|
+
if (!state->cursor) {
|
|
525
|
+
state->cursor = state->end;
|
|
526
|
+
} else {
|
|
527
|
+
state->cursor++;
|
|
529
528
|
}
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
529
|
+
break;
|
|
530
|
+
}
|
|
531
|
+
case '*': {
|
|
532
|
+
state->cursor++;
|
|
533
|
+
|
|
534
|
+
while (true) {
|
|
535
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
536
|
+
if (!next_match) {
|
|
537
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
state->cursor = next_match + 1;
|
|
541
|
+
if (peek(state) == '/') {
|
|
542
|
+
state->cursor++;
|
|
543
|
+
break;
|
|
543
544
|
}
|
|
544
|
-
break;
|
|
545
545
|
}
|
|
546
|
-
|
|
547
|
-
raise_parse_error("unexpected token %s", state);
|
|
548
|
-
break;
|
|
546
|
+
break;
|
|
549
547
|
}
|
|
550
|
-
|
|
551
|
-
|
|
548
|
+
default:
|
|
549
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
550
|
+
break;
|
|
552
551
|
}
|
|
553
552
|
}
|
|
554
553
|
|
|
555
|
-
static
|
|
554
|
+
static ALWAYS_INLINE() void
|
|
556
555
|
json_eat_whitespace(JSON_ParserState *state)
|
|
557
556
|
{
|
|
558
|
-
while (
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
557
|
+
while (true) {
|
|
558
|
+
switch (peek(state)) {
|
|
559
|
+
case ' ':
|
|
560
|
+
state->cursor++;
|
|
561
|
+
break;
|
|
562
|
+
case '\n':
|
|
563
|
+
state->cursor++;
|
|
564
|
+
|
|
565
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
566
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
567
|
+
while (rest(state) > 8) {
|
|
568
|
+
uint64_t chunk;
|
|
569
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
570
|
+
if (chunk == 0x2020202020202020) {
|
|
571
|
+
state->cursor += 8;
|
|
572
|
+
continue;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
576
|
+
state->cursor += consecutive_spaces;
|
|
577
|
+
break;
|
|
578
|
+
}
|
|
579
|
+
#endif
|
|
580
|
+
break;
|
|
581
|
+
case '\t':
|
|
582
|
+
case '\r':
|
|
583
|
+
state->cursor++;
|
|
584
|
+
break;
|
|
585
|
+
case '/':
|
|
586
|
+
json_eat_comments(state);
|
|
587
|
+
break;
|
|
588
|
+
|
|
589
|
+
default:
|
|
590
|
+
return;
|
|
563
591
|
}
|
|
564
592
|
}
|
|
565
593
|
}
|
|
@@ -590,11 +618,20 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
590
618
|
return result;
|
|
591
619
|
}
|
|
592
620
|
|
|
621
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
622
|
+
{
|
|
623
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
624
|
+
// Simple heuristics:
|
|
625
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
626
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
627
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
628
|
+
}
|
|
629
|
+
|
|
593
630
|
static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
|
|
594
631
|
{
|
|
595
632
|
size_t bufferSize = stringEnd - string;
|
|
596
633
|
|
|
597
|
-
if (is_name && state->in_array) {
|
|
634
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
598
635
|
VALUE cached_key;
|
|
599
636
|
if (RB_UNLIKELY(symbolize)) {
|
|
600
637
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -618,19 +655,6 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
618
655
|
int unescape_len;
|
|
619
656
|
char buf[4];
|
|
620
657
|
|
|
621
|
-
if (is_name && state->in_array) {
|
|
622
|
-
VALUE cached_key;
|
|
623
|
-
if (RB_UNLIKELY(symbolize)) {
|
|
624
|
-
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
625
|
-
} else {
|
|
626
|
-
cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
if (RB_LIKELY(cached_key)) {
|
|
630
|
-
return cached_key;
|
|
631
|
-
}
|
|
632
|
-
}
|
|
633
|
-
|
|
634
658
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
635
659
|
rb_enc_associate_index(result, utf8_encindex);
|
|
636
660
|
buffer = RSTRING_PTR(result);
|
|
@@ -688,11 +712,16 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
688
712
|
}
|
|
689
713
|
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
690
714
|
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
715
|
+
|
|
716
|
+
if ((sur & 0xFC00) != 0xDC00) {
|
|
717
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
718
|
+
}
|
|
719
|
+
|
|
691
720
|
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
692
721
|
| (sur & 0x3FF));
|
|
693
722
|
pe += 5;
|
|
694
723
|
} else {
|
|
695
|
-
|
|
724
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
696
725
|
break;
|
|
697
726
|
}
|
|
698
727
|
}
|
|
@@ -718,33 +747,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
718
747
|
if (symbolize) {
|
|
719
748
|
result = rb_str_intern(result);
|
|
720
749
|
} else if (intern) {
|
|
721
|
-
result =
|
|
750
|
+
result = rb_str_to_interned_str(result);
|
|
722
751
|
}
|
|
723
752
|
|
|
724
753
|
return result;
|
|
725
754
|
}
|
|
726
755
|
|
|
727
756
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
728
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
729
|
-
{
|
|
730
|
-
bool negative = false;
|
|
731
|
-
if (*p == '-') {
|
|
732
|
-
negative = true;
|
|
733
|
-
p++;
|
|
734
|
-
}
|
|
735
|
-
|
|
736
|
-
long long memo = 0;
|
|
737
|
-
while (p < pe) {
|
|
738
|
-
memo *= 10;
|
|
739
|
-
memo += *p - '0';
|
|
740
|
-
p++;
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
if (negative) {
|
|
744
|
-
memo = -memo;
|
|
745
|
-
}
|
|
746
|
-
return LL2NUM(memo);
|
|
747
|
-
}
|
|
748
757
|
|
|
749
758
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
750
759
|
{
|
|
@@ -758,17 +767,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
758
767
|
}
|
|
759
768
|
|
|
760
769
|
static inline VALUE
|
|
761
|
-
json_decode_integer(const char *start, const char *end)
|
|
770
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
762
771
|
{
|
|
763
|
-
|
|
764
|
-
if (
|
|
765
|
-
return
|
|
772
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
773
|
+
if (negative) {
|
|
774
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
766
775
|
}
|
|
767
|
-
return
|
|
776
|
+
return UINT64T2NUM(mantissa);
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
return json_decode_large_integer(start, end - start);
|
|
768
780
|
}
|
|
769
781
|
|
|
770
782
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
771
783
|
{
|
|
784
|
+
if (RB_LIKELY(len < 64)) {
|
|
785
|
+
char buffer[64];
|
|
786
|
+
MEMCPY(buffer, start, char, len);
|
|
787
|
+
buffer[len] = '\0';
|
|
788
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
789
|
+
}
|
|
790
|
+
|
|
772
791
|
VALUE buffer_v;
|
|
773
792
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
774
793
|
MEMCPY(buffer, start, char, len);
|
|
@@ -778,21 +797,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
778
797
|
return number;
|
|
779
798
|
}
|
|
780
799
|
|
|
781
|
-
|
|
800
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
801
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
802
|
+
*/
|
|
803
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
804
|
+
const char *start, const char *end)
|
|
782
805
|
{
|
|
783
|
-
long len = end - start;
|
|
784
|
-
|
|
785
806
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
786
|
-
VALUE text = rb_str_new(start,
|
|
807
|
+
VALUE text = rb_str_new(start, end - start);
|
|
787
808
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
788
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
789
|
-
char buffer[64];
|
|
790
|
-
MEMCPY(buffer, start, char, len);
|
|
791
|
-
buffer[len] = '\0';
|
|
792
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
793
|
-
} else {
|
|
794
|
-
return json_decode_large_float(start, len);
|
|
795
809
|
}
|
|
810
|
+
|
|
811
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
812
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
813
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
814
|
+
return json_decode_large_float(start, end - start);
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
796
818
|
}
|
|
797
819
|
|
|
798
820
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -807,10 +829,67 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
|
|
|
807
829
|
return array;
|
|
808
830
|
}
|
|
809
831
|
|
|
810
|
-
static
|
|
832
|
+
static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
833
|
+
{
|
|
834
|
+
VALUE set = rb_hash_new_capa(count / 2);
|
|
835
|
+
for (size_t index = 0; index < count; index += 2) {
|
|
836
|
+
size_t before = RHASH_SIZE(set);
|
|
837
|
+
VALUE key = pairs[index];
|
|
838
|
+
rb_hash_aset(set, key, Qtrue);
|
|
839
|
+
if (RHASH_SIZE(set) == before) {
|
|
840
|
+
if (RB_SYMBOL_P(key)) {
|
|
841
|
+
return rb_sym2str(key);
|
|
842
|
+
}
|
|
843
|
+
return key;
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
return Qfalse;
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
850
|
+
{
|
|
851
|
+
VALUE message = rb_sprintf(
|
|
852
|
+
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
853
|
+
rb_inspect(duplicate_key)
|
|
854
|
+
);
|
|
855
|
+
|
|
856
|
+
emit_parse_warning(RSTRING_PTR(message), state);
|
|
857
|
+
RB_GC_GUARD(message);
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
|
861
|
+
RBIMPL_ATTR_NORETURN()
|
|
862
|
+
#endif
|
|
863
|
+
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
811
864
|
{
|
|
812
|
-
VALUE
|
|
813
|
-
|
|
865
|
+
VALUE message = rb_sprintf(
|
|
866
|
+
"duplicate key %"PRIsVALUE,
|
|
867
|
+
rb_inspect(duplicate_key)
|
|
868
|
+
);
|
|
869
|
+
|
|
870
|
+
raise_parse_error(RSTRING_PTR(message), state);
|
|
871
|
+
RB_GC_GUARD(message);
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
|
875
|
+
{
|
|
876
|
+
size_t entries_count = count / 2;
|
|
877
|
+
VALUE object = rb_hash_new_capa(entries_count);
|
|
878
|
+
const VALUE *pairs = rvalue_stack_peek(state->stack, count);
|
|
879
|
+
rb_hash_bulk_insert(count, pairs, object);
|
|
880
|
+
|
|
881
|
+
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
|
|
882
|
+
switch (config->on_duplicate_key) {
|
|
883
|
+
case JSON_IGNORE:
|
|
884
|
+
break;
|
|
885
|
+
case JSON_DEPRECATED:
|
|
886
|
+
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
|
|
887
|
+
break;
|
|
888
|
+
case JSON_RAISE:
|
|
889
|
+
raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
|
|
890
|
+
break;
|
|
891
|
+
}
|
|
892
|
+
}
|
|
814
893
|
|
|
815
894
|
rvalue_stack_pop(state->stack, count);
|
|
816
895
|
|
|
@@ -844,7 +923,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
|
|
|
844
923
|
return value;
|
|
845
924
|
}
|
|
846
925
|
|
|
847
|
-
static const bool
|
|
926
|
+
static const bool string_scan_table[256] = {
|
|
848
927
|
// ASCII Control Characters
|
|
849
928
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
850
929
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
@@ -857,32 +936,65 @@ static const bool string_scan[256] = {
|
|
|
857
936
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
858
937
|
};
|
|
859
938
|
|
|
939
|
+
#ifdef HAVE_SIMD
|
|
940
|
+
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
941
|
+
#endif /* HAVE_SIMD */
|
|
942
|
+
|
|
943
|
+
static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
|
|
944
|
+
{
|
|
945
|
+
#ifdef HAVE_SIMD
|
|
946
|
+
#if defined(HAVE_SIMD_NEON)
|
|
947
|
+
|
|
948
|
+
uint64_t mask = 0;
|
|
949
|
+
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
950
|
+
state->cursor += trailing_zeros64(mask) >> 2;
|
|
951
|
+
return 1;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
955
|
+
if (simd_impl == SIMD_SSE2) {
|
|
956
|
+
int mask = 0;
|
|
957
|
+
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
958
|
+
state->cursor += trailing_zeros(mask);
|
|
959
|
+
return 1;
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
963
|
+
#endif /* HAVE_SIMD */
|
|
964
|
+
|
|
965
|
+
while (!eos(state)) {
|
|
966
|
+
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
967
|
+
return 1;
|
|
968
|
+
}
|
|
969
|
+
state->cursor++;
|
|
970
|
+
}
|
|
971
|
+
return 0;
|
|
972
|
+
}
|
|
973
|
+
|
|
860
974
|
static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
861
975
|
{
|
|
862
976
|
state->cursor++;
|
|
863
977
|
const char *start = state->cursor;
|
|
864
978
|
bool escaped = false;
|
|
865
979
|
|
|
866
|
-
while (state
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
if ((unsigned char)*state->cursor < 0x20) {
|
|
878
|
-
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
879
|
-
}
|
|
880
|
-
break;
|
|
881
|
-
}
|
|
882
|
-
default:
|
|
980
|
+
while (RB_UNLIKELY(string_scan(state))) {
|
|
981
|
+
switch (*state->cursor) {
|
|
982
|
+
case '"': {
|
|
983
|
+
VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
|
|
984
|
+
state->cursor++;
|
|
985
|
+
return json_push_value(state, config, string);
|
|
986
|
+
}
|
|
987
|
+
case '\\': {
|
|
988
|
+
state->cursor++;
|
|
989
|
+
escaped = true;
|
|
990
|
+
if ((unsigned char)*state->cursor < 0x20) {
|
|
883
991
|
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
884
|
-
|
|
992
|
+
}
|
|
993
|
+
break;
|
|
885
994
|
}
|
|
995
|
+
default:
|
|
996
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
997
|
+
break;
|
|
886
998
|
}
|
|
887
999
|
|
|
888
1000
|
state->cursor++;
|
|
@@ -892,16 +1004,160 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
|
|
|
892
1004
|
return Qfalse;
|
|
893
1005
|
}
|
|
894
1006
|
|
|
1007
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1008
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1009
|
+
// Additional References:
|
|
1010
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1011
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1012
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1013
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1014
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1015
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1016
|
+
val -= 0x3030303030303030;
|
|
1017
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1018
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1019
|
+
return val;
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1023
|
+
const uint32_t mask = 0x000000FF;
|
|
1024
|
+
const uint32_t mul1 = 100;
|
|
1025
|
+
val -= 0x30303030;
|
|
1026
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1027
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1028
|
+
return val;
|
|
1029
|
+
}
|
|
1030
|
+
#endif
|
|
1031
|
+
|
|
1032
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1033
|
+
{
|
|
1034
|
+
const char *start = state->cursor;
|
|
1035
|
+
|
|
1036
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1037
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1038
|
+
uint64_t next_8bytes;
|
|
1039
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1040
|
+
|
|
1041
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1042
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1043
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1044
|
+
|
|
1045
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1046
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1047
|
+
state->cursor += 8;
|
|
1048
|
+
continue;
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1052
|
+
|
|
1053
|
+
if (consecutive_digits >= 4) {
|
|
1054
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1055
|
+
state->cursor += 4;
|
|
1056
|
+
consecutive_digits -= 4;
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
while (consecutive_digits) {
|
|
1060
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1061
|
+
consecutive_digits--;
|
|
1062
|
+
state->cursor++;
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
return (int)(state->cursor - start);
|
|
1066
|
+
}
|
|
1067
|
+
#endif
|
|
1068
|
+
|
|
1069
|
+
char next_char;
|
|
1070
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1071
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1072
|
+
state->cursor++;
|
|
1073
|
+
}
|
|
1074
|
+
return (int)(state->cursor - start);
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1078
|
+
{
|
|
1079
|
+
bool integer = true;
|
|
1080
|
+
const char first_digit = *state->cursor;
|
|
1081
|
+
|
|
1082
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1083
|
+
int32_t exponent = 0;
|
|
1084
|
+
int decimal_point_pos = -1;
|
|
1085
|
+
uint64_t mantissa = 0;
|
|
1086
|
+
|
|
1087
|
+
// Parse integer part and extract mantissa digits
|
|
1088
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1089
|
+
|
|
1090
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1091
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
// Parse fractional part
|
|
1095
|
+
if (peek(state) == '.') {
|
|
1096
|
+
integer = false;
|
|
1097
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1098
|
+
state->cursor++;
|
|
1099
|
+
|
|
1100
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1101
|
+
mantissa_digits += fractional_digits;
|
|
1102
|
+
|
|
1103
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1104
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
// Parse exponent
|
|
1109
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1110
|
+
integer = false;
|
|
1111
|
+
state->cursor++;
|
|
1112
|
+
|
|
1113
|
+
bool negative_exponent = false;
|
|
1114
|
+
const char next_char = peek(state);
|
|
1115
|
+
if (next_char == '-' || next_char == '+') {
|
|
1116
|
+
negative_exponent = next_char == '-';
|
|
1117
|
+
state->cursor++;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
uint64_t abs_exponent = 0;
|
|
1121
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1122
|
+
|
|
1123
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1124
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
if (integer) {
|
|
1131
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
// Adjust exponent based on decimal point position
|
|
1135
|
+
if (decimal_point_pos >= 0) {
|
|
1136
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1143
|
+
{
|
|
1144
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1148
|
+
{
|
|
1149
|
+
const char *start = state->cursor;
|
|
1150
|
+
state->cursor++;
|
|
1151
|
+
return json_parse_number(state, config, true, start);
|
|
1152
|
+
}
|
|
1153
|
+
|
|
895
1154
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
896
1155
|
{
|
|
897
1156
|
json_eat_whitespace(state);
|
|
898
|
-
if (state->cursor >= state->end) {
|
|
899
|
-
raise_parse_error("unexpected end of input", state);
|
|
900
|
-
}
|
|
901
1157
|
|
|
902
|
-
switch (
|
|
1158
|
+
switch (peek(state)) {
|
|
903
1159
|
case 'n':
|
|
904
|
-
if ((state
|
|
1160
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
905
1161
|
state->cursor += 4;
|
|
906
1162
|
return json_push_value(state, config, Qnil);
|
|
907
1163
|
}
|
|
@@ -909,7 +1165,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
909
1165
|
raise_parse_error("unexpected token %s", state);
|
|
910
1166
|
break;
|
|
911
1167
|
case 't':
|
|
912
|
-
if ((state
|
|
1168
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
913
1169
|
state->cursor += 4;
|
|
914
1170
|
return json_push_value(state, config, Qtrue);
|
|
915
1171
|
}
|
|
@@ -918,7 +1174,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
918
1174
|
break;
|
|
919
1175
|
case 'f':
|
|
920
1176
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
921
|
-
if ((state
|
|
1177
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
922
1178
|
state->cursor += 5;
|
|
923
1179
|
return json_push_value(state, config, Qfalse);
|
|
924
1180
|
}
|
|
@@ -927,7 +1183,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
927
1183
|
break;
|
|
928
1184
|
case 'N':
|
|
929
1185
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
930
|
-
if (config->allow_nan && (state
|
|
1186
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
931
1187
|
state->cursor += 3;
|
|
932
1188
|
return json_push_value(state, config, CNaN);
|
|
933
1189
|
}
|
|
@@ -935,16 +1191,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
935
1191
|
raise_parse_error("unexpected token %s", state);
|
|
936
1192
|
break;
|
|
937
1193
|
case 'I':
|
|
938
|
-
if (config->allow_nan && (state
|
|
1194
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
939
1195
|
state->cursor += 8;
|
|
940
1196
|
return json_push_value(state, config, CInfinity);
|
|
941
1197
|
}
|
|
942
1198
|
|
|
943
1199
|
raise_parse_error("unexpected token %s", state);
|
|
944
1200
|
break;
|
|
945
|
-
case '-':
|
|
1201
|
+
case '-': {
|
|
946
1202
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
947
|
-
if ((state
|
|
1203
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
948
1204
|
if (config->allow_nan) {
|
|
949
1205
|
state->cursor += 9;
|
|
950
1206
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -952,62 +1208,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
952
1208
|
raise_parse_error("unexpected token %s", state);
|
|
953
1209
|
}
|
|
954
1210
|
}
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
bool integer = true;
|
|
958
|
-
|
|
959
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
960
|
-
const char *start = state->cursor;
|
|
961
|
-
state->cursor++;
|
|
962
|
-
|
|
963
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
964
|
-
state->cursor++;
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
long integer_length = state->cursor - start;
|
|
968
|
-
|
|
969
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
970
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
971
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
972
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
973
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
974
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
975
|
-
}
|
|
976
|
-
|
|
977
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
978
|
-
integer = false;
|
|
979
|
-
state->cursor++;
|
|
980
|
-
|
|
981
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
982
|
-
raise_parse_error("invalid number: %s", state);
|
|
983
|
-
}
|
|
984
|
-
|
|
985
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
986
|
-
state->cursor++;
|
|
987
|
-
}
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
991
|
-
integer = false;
|
|
992
|
-
state->cursor++;
|
|
993
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
994
|
-
state->cursor++;
|
|
995
|
-
}
|
|
996
|
-
|
|
997
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
998
|
-
raise_parse_error("invalid number: %s", state);
|
|
999
|
-
}
|
|
1000
|
-
|
|
1001
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1002
|
-
state->cursor++;
|
|
1003
|
-
}
|
|
1004
|
-
}
|
|
1005
|
-
|
|
1006
|
-
if (integer) {
|
|
1007
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1008
|
-
}
|
|
1009
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1211
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1212
|
+
break;
|
|
1010
1213
|
}
|
|
1214
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1215
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1216
|
+
break;
|
|
1011
1217
|
case '"': {
|
|
1012
1218
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1013
1219
|
return json_parse_string(state, config, false);
|
|
@@ -1018,7 +1224,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1018
1224
|
json_eat_whitespace(state);
|
|
1019
1225
|
long stack_head = state->stack->head;
|
|
1020
1226
|
|
|
1021
|
-
if ((state
|
|
1227
|
+
if (peek(state) == ']') {
|
|
1022
1228
|
state->cursor++;
|
|
1023
1229
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1024
1230
|
} else {
|
|
@@ -1033,26 +1239,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1033
1239
|
while (true) {
|
|
1034
1240
|
json_eat_whitespace(state);
|
|
1035
1241
|
|
|
1036
|
-
|
|
1037
|
-
if (*state->cursor == ']') {
|
|
1038
|
-
state->cursor++;
|
|
1039
|
-
long count = state->stack->head - stack_head;
|
|
1040
|
-
state->current_nesting--;
|
|
1041
|
-
state->in_array--;
|
|
1042
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1043
|
-
}
|
|
1242
|
+
const char next_char = peek(state);
|
|
1044
1243
|
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
}
|
|
1244
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1245
|
+
state->cursor++;
|
|
1246
|
+
if (config->allow_trailing_comma) {
|
|
1247
|
+
json_eat_whitespace(state);
|
|
1248
|
+
if (peek(state) == ']') {
|
|
1249
|
+
continue;
|
|
1052
1250
|
}
|
|
1053
|
-
json_parse_any(state, config);
|
|
1054
|
-
continue;
|
|
1055
1251
|
}
|
|
1252
|
+
json_parse_any(state, config);
|
|
1253
|
+
continue;
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
if (next_char == ']') {
|
|
1257
|
+
state->cursor++;
|
|
1258
|
+
long count = state->stack->head - stack_head;
|
|
1259
|
+
state->current_nesting--;
|
|
1260
|
+
state->in_array--;
|
|
1261
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1056
1262
|
}
|
|
1057
1263
|
|
|
1058
1264
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1060,11 +1266,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1060
1266
|
break;
|
|
1061
1267
|
}
|
|
1062
1268
|
case '{': {
|
|
1269
|
+
const char *object_start_cursor = state->cursor;
|
|
1270
|
+
|
|
1063
1271
|
state->cursor++;
|
|
1064
1272
|
json_eat_whitespace(state);
|
|
1065
1273
|
long stack_head = state->stack->head;
|
|
1066
1274
|
|
|
1067
|
-
if ((state
|
|
1275
|
+
if (peek(state) == '}') {
|
|
1068
1276
|
state->cursor++;
|
|
1069
1277
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1070
1278
|
} else {
|
|
@@ -1073,13 +1281,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1073
1281
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1074
1282
|
}
|
|
1075
1283
|
|
|
1076
|
-
if (
|
|
1284
|
+
if (peek(state) != '"') {
|
|
1077
1285
|
raise_parse_error("expected object key, got %s", state);
|
|
1078
1286
|
}
|
|
1079
1287
|
json_parse_string(state, config, true);
|
|
1080
1288
|
|
|
1081
1289
|
json_eat_whitespace(state);
|
|
1082
|
-
if ((state
|
|
1290
|
+
if (peek(state) != ':') {
|
|
1083
1291
|
raise_parse_error("expected ':' after object key", state);
|
|
1084
1292
|
}
|
|
1085
1293
|
state->cursor++;
|
|
@@ -1090,39 +1298,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1090
1298
|
while (true) {
|
|
1091
1299
|
json_eat_whitespace(state);
|
|
1092
1300
|
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
return json_push_value(state, config, json_decode_object(state, config, count));
|
|
1099
|
-
}
|
|
1301
|
+
const char next_char = peek(state);
|
|
1302
|
+
if (next_char == '}') {
|
|
1303
|
+
state->cursor++;
|
|
1304
|
+
state->current_nesting--;
|
|
1305
|
+
size_t count = state->stack->head - stack_head;
|
|
1100
1306
|
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1307
|
+
// Temporary rewind cursor in case an error is raised
|
|
1308
|
+
const char *final_cursor = state->cursor;
|
|
1309
|
+
state->cursor = object_start_cursor;
|
|
1310
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1311
|
+
state->cursor = final_cursor;
|
|
1104
1312
|
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
continue;
|
|
1108
|
-
}
|
|
1109
|
-
}
|
|
1313
|
+
return json_push_value(state, config, object);
|
|
1314
|
+
}
|
|
1110
1315
|
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
json_parse_string(state, config, true);
|
|
1316
|
+
if (next_char == ',') {
|
|
1317
|
+
state->cursor++;
|
|
1318
|
+
json_eat_whitespace(state);
|
|
1115
1319
|
|
|
1116
|
-
|
|
1117
|
-
if ((state
|
|
1118
|
-
|
|
1320
|
+
if (config->allow_trailing_comma) {
|
|
1321
|
+
if (peek(state) == '}') {
|
|
1322
|
+
continue;
|
|
1119
1323
|
}
|
|
1120
|
-
|
|
1324
|
+
}
|
|
1121
1325
|
|
|
1122
|
-
|
|
1326
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1327
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1328
|
+
}
|
|
1329
|
+
json_parse_string(state, config, true);
|
|
1123
1330
|
|
|
1124
|
-
|
|
1331
|
+
json_eat_whitespace(state);
|
|
1332
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1333
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1125
1334
|
}
|
|
1335
|
+
state->cursor++;
|
|
1336
|
+
|
|
1337
|
+
json_parse_any(state, config);
|
|
1338
|
+
|
|
1339
|
+
continue;
|
|
1126
1340
|
}
|
|
1127
1341
|
|
|
1128
1342
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1130,18 +1344,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1130
1344
|
break;
|
|
1131
1345
|
}
|
|
1132
1346
|
|
|
1347
|
+
case 0:
|
|
1348
|
+
raise_parse_error("unexpected end of input", state);
|
|
1349
|
+
break;
|
|
1350
|
+
|
|
1133
1351
|
default:
|
|
1134
1352
|
raise_parse_error("unexpected character: %s", state);
|
|
1135
1353
|
break;
|
|
1136
1354
|
}
|
|
1137
1355
|
|
|
1138
|
-
raise_parse_error("
|
|
1356
|
+
raise_parse_error("unreachable: %s", state);
|
|
1357
|
+
return Qundef;
|
|
1139
1358
|
}
|
|
1140
1359
|
|
|
1141
1360
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1142
1361
|
{
|
|
1143
1362
|
json_eat_whitespace(state);
|
|
1144
|
-
if (state
|
|
1363
|
+
if (!eos(state)) {
|
|
1145
1364
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1146
1365
|
}
|
|
1147
1366
|
}
|
|
@@ -1184,6 +1403,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1184
1403
|
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1185
1404
|
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1186
1405
|
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1406
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1187
1407
|
else if (key == sym_decimal_class) {
|
|
1188
1408
|
if (RTEST(val)) {
|
|
1189
1409
|
if (rb_respond_to(val, i_try_convert)) {
|
|
@@ -1400,6 +1620,7 @@ void Init_parser(void)
|
|
|
1400
1620
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1401
1621
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1402
1622
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1623
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1403
1624
|
|
|
1404
1625
|
i_chr = rb_intern("chr");
|
|
1405
1626
|
i_aset = rb_intern("[]=");
|
|
@@ -1413,4 +1634,8 @@ void Init_parser(void)
|
|
|
1413
1634
|
binary_encindex = rb_ascii8bit_encindex();
|
|
1414
1635
|
utf8_encindex = rb_utf8_encindex();
|
|
1415
1636
|
enc_utf8 = rb_utf8_encoding();
|
|
1637
|
+
|
|
1638
|
+
#ifdef HAVE_SIMD
|
|
1639
|
+
simd_impl = find_simd_implementation();
|
|
1640
|
+
#endif
|
|
1416
1641
|
}
|