json 2.15.2.1 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +9 -3
- data/LEGAL +12 -0
- data/README.md +17 -1
- data/ext/json/ext/fbuffer/fbuffer.h +3 -52
- data/ext/json/ext/generator/generator.c +157 -133
- data/ext/json/ext/json.h +92 -0
- data/ext/json/ext/parser/extconf.rb +2 -0
- data/ext/json/ext/parser/parser.c +407 -335
- data/ext/json/ext/simd/simd.h +15 -12
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/common.rb +7 -12
- data/lib/json/ext/generator/state.rb +4 -0
- data/lib/json/truffle_ruby/generator.rb +31 -7
- data/lib/json/version.rb +1 -1
- metadata +4 -2
|
@@ -1,35 +1,7 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
@@ -44,7 +16,7 @@ static int utf8_encindex;
|
|
|
44
16
|
|
|
45
17
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
18
|
// For TruffleRuby
|
|
47
|
-
void
|
|
19
|
+
static void
|
|
48
20
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
21
|
{
|
|
50
22
|
long index = 0;
|
|
@@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
33
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
34
|
#endif
|
|
63
35
|
|
|
36
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
37
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
38
|
+
{
|
|
39
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
40
|
+
}
|
|
41
|
+
#endif
|
|
64
42
|
|
|
65
43
|
/* name cache */
|
|
66
44
|
|
|
@@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
84
|
cache->entries[index] = rstring;
|
|
107
85
|
}
|
|
108
86
|
|
|
109
|
-
|
|
87
|
+
#define rstring_cache_memcmp memcmp
|
|
88
|
+
|
|
89
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
90
|
+
#if __has_builtin(__builtin_bswap64)
|
|
91
|
+
#undef rstring_cache_memcmp
|
|
92
|
+
static ALWAYS_INLINE() int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
93
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
94
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
95
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
96
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
97
|
+
long i = 0;
|
|
98
|
+
|
|
99
|
+
for (; i + 8 <= length; i += 8) {
|
|
100
|
+
uint64_t a, b;
|
|
101
|
+
memcpy(&a, str + i, 8);
|
|
102
|
+
memcpy(&b, rptr + i, 8);
|
|
103
|
+
if (a != b) {
|
|
104
|
+
a = __builtin_bswap64(a);
|
|
105
|
+
b = __builtin_bswap64(b);
|
|
106
|
+
return (a < b) ? -1 : 1;
|
|
107
|
+
}
|
|
116
108
|
}
|
|
109
|
+
|
|
110
|
+
for (; i < length; i++) {
|
|
111
|
+
if (str[i] != rptr[i]) {
|
|
112
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return 0;
|
|
117
117
|
}
|
|
118
|
+
#endif
|
|
119
|
+
#endif
|
|
118
120
|
|
|
119
|
-
static
|
|
121
|
+
static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
122
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
// cache names above an arbitrary threshold.
|
|
124
|
-
return Qfalse;
|
|
125
|
-
}
|
|
123
|
+
const char *rstring_ptr;
|
|
124
|
+
long rstring_length;
|
|
126
125
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
126
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
127
|
+
|
|
128
|
+
if (length == rstring_length) {
|
|
129
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
130
|
+
} else {
|
|
131
|
+
return (int)(length - rstring_length);
|
|
132
132
|
}
|
|
133
|
+
}
|
|
133
134
|
|
|
135
|
+
static ALWAYS_INLINE() VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
136
|
+
{
|
|
134
137
|
int low = 0;
|
|
135
138
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
139
|
|
|
139
140
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
141
|
+
int mid = (high + low) >> 1;
|
|
141
142
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
143
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
144
|
|
|
144
|
-
if (
|
|
145
|
+
if (cmp == 0) {
|
|
145
146
|
return entry;
|
|
146
|
-
} else if (
|
|
147
|
+
} else if (cmp > 0) {
|
|
147
148
|
low = mid + 1;
|
|
148
149
|
} else {
|
|
149
150
|
high = mid - 1;
|
|
150
151
|
}
|
|
151
152
|
}
|
|
152
153
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
154
|
VALUE rstring = build_interned_string(str, length);
|
|
160
155
|
|
|
161
156
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
157
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
158
|
}
|
|
168
159
|
return rstring;
|
|
169
160
|
}
|
|
170
161
|
|
|
171
162
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
163
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
164
|
int low = 0;
|
|
187
165
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
166
|
|
|
191
167
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
168
|
+
int mid = (high + low) >> 1;
|
|
193
169
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
170
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
171
|
|
|
196
|
-
if (
|
|
172
|
+
if (cmp == 0) {
|
|
197
173
|
return entry;
|
|
198
|
-
} else if (
|
|
174
|
+
} else if (cmp > 0) {
|
|
199
175
|
low = mid + 1;
|
|
200
176
|
} else {
|
|
201
177
|
high = mid - 1;
|
|
202
178
|
}
|
|
203
179
|
}
|
|
204
180
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
181
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
182
|
|
|
213
183
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
184
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
185
|
}
|
|
220
186
|
return rsymbol;
|
|
221
187
|
}
|
|
@@ -395,6 +361,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
361
|
int current_nesting;
|
|
396
362
|
} JSON_ParserState;
|
|
397
363
|
|
|
364
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
365
|
+
return state->end - state->cursor;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
369
|
+
return state->cursor >= state->end;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
static inline char peek(JSON_ParserState *state)
|
|
373
|
+
{
|
|
374
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
375
|
+
return 0;
|
|
376
|
+
}
|
|
377
|
+
return *state->cursor;
|
|
378
|
+
}
|
|
379
|
+
|
|
398
380
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
381
|
{
|
|
400
382
|
const char *cursor = state->cursor;
|
|
@@ -428,9 +410,14 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
428
410
|
|
|
429
411
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
430
412
|
|
|
431
|
-
|
|
413
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
|
414
|
+
RBIMPL_ATTR_NORETURN()
|
|
415
|
+
#endif
|
|
416
|
+
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
432
417
|
{
|
|
433
418
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
419
|
+
long line, column;
|
|
420
|
+
cursor_position(state, &line, &column);
|
|
434
421
|
|
|
435
422
|
const char *ptr = "EOF";
|
|
436
423
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -465,23 +452,11 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta
|
|
|
465
452
|
VALUE msg = rb_sprintf(format, ptr);
|
|
466
453
|
VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
|
|
467
454
|
RB_GC_GUARD(msg);
|
|
468
|
-
return message;
|
|
469
|
-
}
|
|
470
455
|
|
|
471
|
-
static VALUE parse_error_new(VALUE message, long line, long column)
|
|
472
|
-
{
|
|
473
456
|
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
|
474
457
|
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
|
475
458
|
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
|
476
|
-
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
480
|
-
{
|
|
481
|
-
long line, column;
|
|
482
|
-
cursor_position(state, &line, &column);
|
|
483
|
-
VALUE message = build_parse_error_message(format, state, line, column);
|
|
484
|
-
rb_exc_raise(parse_error_new(message, line, column));
|
|
459
|
+
rb_exc_raise(exc);
|
|
485
460
|
}
|
|
486
461
|
|
|
487
462
|
#ifdef RBIMPL_ATTR_NORETURN
|
|
@@ -537,61 +512,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
537
512
|
|
|
538
513
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
539
514
|
|
|
540
|
-
static const bool whitespace[256] = {
|
|
541
|
-
[' '] = 1,
|
|
542
|
-
['\t'] = 1,
|
|
543
|
-
['\n'] = 1,
|
|
544
|
-
['\r'] = 1,
|
|
545
|
-
['/'] = 1,
|
|
546
|
-
};
|
|
547
|
-
|
|
548
515
|
static void
|
|
549
516
|
json_eat_comments(JSON_ParserState *state)
|
|
550
517
|
{
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
518
|
+
const char *start = state->cursor;
|
|
519
|
+
state->cursor++;
|
|
520
|
+
|
|
521
|
+
switch (peek(state)) {
|
|
522
|
+
case '/': {
|
|
523
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
524
|
+
if (!state->cursor) {
|
|
525
|
+
state->cursor = state->end;
|
|
526
|
+
} else {
|
|
527
|
+
state->cursor++;
|
|
561
528
|
}
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
529
|
+
break;
|
|
530
|
+
}
|
|
531
|
+
case '*': {
|
|
532
|
+
state->cursor++;
|
|
533
|
+
|
|
534
|
+
while (true) {
|
|
535
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
536
|
+
if (!next_match) {
|
|
537
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
state->cursor = next_match + 1;
|
|
541
|
+
if (peek(state) == '/') {
|
|
542
|
+
state->cursor++;
|
|
543
|
+
break;
|
|
575
544
|
}
|
|
576
|
-
break;
|
|
577
545
|
}
|
|
578
|
-
|
|
579
|
-
raise_parse_error("unexpected token %s", state);
|
|
580
|
-
break;
|
|
546
|
+
break;
|
|
581
547
|
}
|
|
582
|
-
|
|
583
|
-
|
|
548
|
+
default:
|
|
549
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
550
|
+
break;
|
|
584
551
|
}
|
|
585
552
|
}
|
|
586
553
|
|
|
587
|
-
static
|
|
554
|
+
static ALWAYS_INLINE() void
|
|
588
555
|
json_eat_whitespace(JSON_ParserState *state)
|
|
589
556
|
{
|
|
590
|
-
while (
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
557
|
+
while (true) {
|
|
558
|
+
switch (peek(state)) {
|
|
559
|
+
case ' ':
|
|
560
|
+
state->cursor++;
|
|
561
|
+
break;
|
|
562
|
+
case '\n':
|
|
563
|
+
state->cursor++;
|
|
564
|
+
|
|
565
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
566
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
567
|
+
while (rest(state) > 8) {
|
|
568
|
+
uint64_t chunk;
|
|
569
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
570
|
+
if (chunk == 0x2020202020202020) {
|
|
571
|
+
state->cursor += 8;
|
|
572
|
+
continue;
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
576
|
+
state->cursor += consecutive_spaces;
|
|
577
|
+
break;
|
|
578
|
+
}
|
|
579
|
+
#endif
|
|
580
|
+
break;
|
|
581
|
+
case '\t':
|
|
582
|
+
case '\r':
|
|
583
|
+
state->cursor++;
|
|
584
|
+
break;
|
|
585
|
+
case '/':
|
|
586
|
+
json_eat_comments(state);
|
|
587
|
+
break;
|
|
588
|
+
|
|
589
|
+
default:
|
|
590
|
+
return;
|
|
595
591
|
}
|
|
596
592
|
}
|
|
597
593
|
}
|
|
@@ -622,11 +618,20 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
622
618
|
return result;
|
|
623
619
|
}
|
|
624
620
|
|
|
621
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
622
|
+
{
|
|
623
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
624
|
+
// Simple heuristics:
|
|
625
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
626
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
627
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
628
|
+
}
|
|
629
|
+
|
|
625
630
|
static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
|
|
626
631
|
{
|
|
627
632
|
size_t bufferSize = stringEnd - string;
|
|
628
633
|
|
|
629
|
-
if (is_name && state->in_array) {
|
|
634
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
630
635
|
VALUE cached_key;
|
|
631
636
|
if (RB_UNLIKELY(symbolize)) {
|
|
632
637
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -650,19 +655,6 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
650
655
|
int unescape_len;
|
|
651
656
|
char buf[4];
|
|
652
657
|
|
|
653
|
-
if (is_name && state->in_array) {
|
|
654
|
-
VALUE cached_key;
|
|
655
|
-
if (RB_UNLIKELY(symbolize)) {
|
|
656
|
-
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
657
|
-
} else {
|
|
658
|
-
cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
|
|
659
|
-
}
|
|
660
|
-
|
|
661
|
-
if (RB_LIKELY(cached_key)) {
|
|
662
|
-
return cached_key;
|
|
663
|
-
}
|
|
664
|
-
}
|
|
665
|
-
|
|
666
658
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
667
659
|
rb_enc_associate_index(result, utf8_encindex);
|
|
668
660
|
buffer = RSTRING_PTR(result);
|
|
@@ -755,33 +747,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
755
747
|
if (symbolize) {
|
|
756
748
|
result = rb_str_intern(result);
|
|
757
749
|
} else if (intern) {
|
|
758
|
-
result =
|
|
750
|
+
result = rb_str_to_interned_str(result);
|
|
759
751
|
}
|
|
760
752
|
|
|
761
753
|
return result;
|
|
762
754
|
}
|
|
763
755
|
|
|
764
756
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
765
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
766
|
-
{
|
|
767
|
-
bool negative = false;
|
|
768
|
-
if (*p == '-') {
|
|
769
|
-
negative = true;
|
|
770
|
-
p++;
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
long long memo = 0;
|
|
774
|
-
while (p < pe) {
|
|
775
|
-
memo *= 10;
|
|
776
|
-
memo += *p - '0';
|
|
777
|
-
p++;
|
|
778
|
-
}
|
|
779
|
-
|
|
780
|
-
if (negative) {
|
|
781
|
-
memo = -memo;
|
|
782
|
-
}
|
|
783
|
-
return LL2NUM(memo);
|
|
784
|
-
}
|
|
785
757
|
|
|
786
758
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
787
759
|
{
|
|
@@ -795,17 +767,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
795
767
|
}
|
|
796
768
|
|
|
797
769
|
static inline VALUE
|
|
798
|
-
json_decode_integer(const char *start, const char *end)
|
|
770
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
799
771
|
{
|
|
800
|
-
|
|
801
|
-
if (
|
|
802
|
-
return
|
|
772
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
773
|
+
if (negative) {
|
|
774
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
803
775
|
}
|
|
804
|
-
return
|
|
776
|
+
return UINT64T2NUM(mantissa);
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
return json_decode_large_integer(start, end - start);
|
|
805
780
|
}
|
|
806
781
|
|
|
807
782
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
808
783
|
{
|
|
784
|
+
if (RB_LIKELY(len < 64)) {
|
|
785
|
+
char buffer[64];
|
|
786
|
+
MEMCPY(buffer, start, char, len);
|
|
787
|
+
buffer[len] = '\0';
|
|
788
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
789
|
+
}
|
|
790
|
+
|
|
809
791
|
VALUE buffer_v;
|
|
810
792
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
811
793
|
MEMCPY(buffer, start, char, len);
|
|
@@ -815,21 +797,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
815
797
|
return number;
|
|
816
798
|
}
|
|
817
799
|
|
|
818
|
-
|
|
800
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
801
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
802
|
+
*/
|
|
803
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
804
|
+
const char *start, const char *end)
|
|
819
805
|
{
|
|
820
|
-
long len = end - start;
|
|
821
|
-
|
|
822
806
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
823
|
-
VALUE text = rb_str_new(start,
|
|
807
|
+
VALUE text = rb_str_new(start, end - start);
|
|
824
808
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
825
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
826
|
-
char buffer[64];
|
|
827
|
-
MEMCPY(buffer, start, char, len);
|
|
828
|
-
buffer[len] = '\0';
|
|
829
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
830
|
-
} else {
|
|
831
|
-
return json_decode_large_float(start, len);
|
|
832
809
|
}
|
|
810
|
+
|
|
811
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
812
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
813
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
814
|
+
return json_decode_large_float(start, end - start);
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
833
818
|
}
|
|
834
819
|
|
|
835
820
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -882,11 +867,6 @@ static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_k
|
|
|
882
867
|
rb_inspect(duplicate_key)
|
|
883
868
|
);
|
|
884
869
|
|
|
885
|
-
long line, column;
|
|
886
|
-
cursor_position(state, &line, &column);
|
|
887
|
-
rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
|
|
888
|
-
rb_exc_raise(parse_error_new(message, line, column));
|
|
889
|
-
|
|
890
870
|
raise_parse_error(RSTRING_PTR(message), state);
|
|
891
871
|
RB_GC_GUARD(message);
|
|
892
872
|
}
|
|
@@ -956,17 +936,11 @@ static const bool string_scan_table[256] = {
|
|
|
956
936
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
957
937
|
};
|
|
958
938
|
|
|
959
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
960
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
961
|
-
#else
|
|
962
|
-
#define FORCE_INLINE
|
|
963
|
-
#endif
|
|
964
|
-
|
|
965
939
|
#ifdef HAVE_SIMD
|
|
966
940
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
967
941
|
#endif /* HAVE_SIMD */
|
|
968
942
|
|
|
969
|
-
static
|
|
943
|
+
static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
|
|
970
944
|
{
|
|
971
945
|
#ifdef HAVE_SIMD
|
|
972
946
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -988,7 +962,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
988
962
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
989
963
|
#endif /* HAVE_SIMD */
|
|
990
964
|
|
|
991
|
-
while (state
|
|
965
|
+
while (!eos(state)) {
|
|
992
966
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
993
967
|
return 1;
|
|
994
968
|
}
|
|
@@ -1030,16 +1004,160 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1030
1004
|
return Qfalse;
|
|
1031
1005
|
}
|
|
1032
1006
|
|
|
1007
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1008
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1009
|
+
// Additional References:
|
|
1010
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1011
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1012
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1013
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1014
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1015
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1016
|
+
val -= 0x3030303030303030;
|
|
1017
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1018
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1019
|
+
return val;
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1023
|
+
const uint32_t mask = 0x000000FF;
|
|
1024
|
+
const uint32_t mul1 = 100;
|
|
1025
|
+
val -= 0x30303030;
|
|
1026
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1027
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1028
|
+
return val;
|
|
1029
|
+
}
|
|
1030
|
+
#endif
|
|
1031
|
+
|
|
1032
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1033
|
+
{
|
|
1034
|
+
const char *start = state->cursor;
|
|
1035
|
+
|
|
1036
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1037
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1038
|
+
uint64_t next_8bytes;
|
|
1039
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1040
|
+
|
|
1041
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1042
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1043
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1044
|
+
|
|
1045
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1046
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1047
|
+
state->cursor += 8;
|
|
1048
|
+
continue;
|
|
1049
|
+
}
|
|
1050
|
+
|
|
1051
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1052
|
+
|
|
1053
|
+
if (consecutive_digits >= 4) {
|
|
1054
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1055
|
+
state->cursor += 4;
|
|
1056
|
+
consecutive_digits -= 4;
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
while (consecutive_digits) {
|
|
1060
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1061
|
+
consecutive_digits--;
|
|
1062
|
+
state->cursor++;
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
return (int)(state->cursor - start);
|
|
1066
|
+
}
|
|
1067
|
+
#endif
|
|
1068
|
+
|
|
1069
|
+
char next_char;
|
|
1070
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1071
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1072
|
+
state->cursor++;
|
|
1073
|
+
}
|
|
1074
|
+
return (int)(state->cursor - start);
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1078
|
+
{
|
|
1079
|
+
bool integer = true;
|
|
1080
|
+
const char first_digit = *state->cursor;
|
|
1081
|
+
|
|
1082
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1083
|
+
int32_t exponent = 0;
|
|
1084
|
+
int decimal_point_pos = -1;
|
|
1085
|
+
uint64_t mantissa = 0;
|
|
1086
|
+
|
|
1087
|
+
// Parse integer part and extract mantissa digits
|
|
1088
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1089
|
+
|
|
1090
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1091
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
// Parse fractional part
|
|
1095
|
+
if (peek(state) == '.') {
|
|
1096
|
+
integer = false;
|
|
1097
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1098
|
+
state->cursor++;
|
|
1099
|
+
|
|
1100
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1101
|
+
mantissa_digits += fractional_digits;
|
|
1102
|
+
|
|
1103
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1104
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
// Parse exponent
|
|
1109
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1110
|
+
integer = false;
|
|
1111
|
+
state->cursor++;
|
|
1112
|
+
|
|
1113
|
+
bool negative_exponent = false;
|
|
1114
|
+
const char next_char = peek(state);
|
|
1115
|
+
if (next_char == '-' || next_char == '+') {
|
|
1116
|
+
negative_exponent = next_char == '-';
|
|
1117
|
+
state->cursor++;
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
uint64_t abs_exponent = 0;
|
|
1121
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1122
|
+
|
|
1123
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1124
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
if (integer) {
|
|
1131
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
// Adjust exponent based on decimal point position
|
|
1135
|
+
if (decimal_point_pos >= 0) {
|
|
1136
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1143
|
+
{
|
|
1144
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1148
|
+
{
|
|
1149
|
+
const char *start = state->cursor;
|
|
1150
|
+
state->cursor++;
|
|
1151
|
+
return json_parse_number(state, config, true, start);
|
|
1152
|
+
}
|
|
1153
|
+
|
|
1033
1154
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1034
1155
|
{
|
|
1035
1156
|
json_eat_whitespace(state);
|
|
1036
|
-
if (state->cursor >= state->end) {
|
|
1037
|
-
raise_parse_error("unexpected end of input", state);
|
|
1038
|
-
}
|
|
1039
1157
|
|
|
1040
|
-
switch (
|
|
1158
|
+
switch (peek(state)) {
|
|
1041
1159
|
case 'n':
|
|
1042
|
-
if ((state
|
|
1160
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1043
1161
|
state->cursor += 4;
|
|
1044
1162
|
return json_push_value(state, config, Qnil);
|
|
1045
1163
|
}
|
|
@@ -1047,7 +1165,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1047
1165
|
raise_parse_error("unexpected token %s", state);
|
|
1048
1166
|
break;
|
|
1049
1167
|
case 't':
|
|
1050
|
-
if ((state
|
|
1168
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1051
1169
|
state->cursor += 4;
|
|
1052
1170
|
return json_push_value(state, config, Qtrue);
|
|
1053
1171
|
}
|
|
@@ -1056,7 +1174,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1056
1174
|
break;
|
|
1057
1175
|
case 'f':
|
|
1058
1176
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1059
|
-
if ((state
|
|
1177
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1060
1178
|
state->cursor += 5;
|
|
1061
1179
|
return json_push_value(state, config, Qfalse);
|
|
1062
1180
|
}
|
|
@@ -1065,7 +1183,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1065
1183
|
break;
|
|
1066
1184
|
case 'N':
|
|
1067
1185
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1068
|
-
if (config->allow_nan && (state
|
|
1186
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1069
1187
|
state->cursor += 3;
|
|
1070
1188
|
return json_push_value(state, config, CNaN);
|
|
1071
1189
|
}
|
|
@@ -1073,16 +1191,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1073
1191
|
raise_parse_error("unexpected token %s", state);
|
|
1074
1192
|
break;
|
|
1075
1193
|
case 'I':
|
|
1076
|
-
if (config->allow_nan && (state
|
|
1194
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1077
1195
|
state->cursor += 8;
|
|
1078
1196
|
return json_push_value(state, config, CInfinity);
|
|
1079
1197
|
}
|
|
1080
1198
|
|
|
1081
1199
|
raise_parse_error("unexpected token %s", state);
|
|
1082
1200
|
break;
|
|
1083
|
-
case '-':
|
|
1201
|
+
case '-': {
|
|
1084
1202
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1085
|
-
if ((state
|
|
1203
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1086
1204
|
if (config->allow_nan) {
|
|
1087
1205
|
state->cursor += 9;
|
|
1088
1206
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1090,62 +1208,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1090
1208
|
raise_parse_error("unexpected token %s", state);
|
|
1091
1209
|
}
|
|
1092
1210
|
}
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
bool integer = true;
|
|
1096
|
-
|
|
1097
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1098
|
-
const char *start = state->cursor;
|
|
1099
|
-
state->cursor++;
|
|
1100
|
-
|
|
1101
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1102
|
-
state->cursor++;
|
|
1103
|
-
}
|
|
1104
|
-
|
|
1105
|
-
long integer_length = state->cursor - start;
|
|
1106
|
-
|
|
1107
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1108
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1109
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1110
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1111
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1112
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1113
|
-
}
|
|
1114
|
-
|
|
1115
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1116
|
-
integer = false;
|
|
1117
|
-
state->cursor++;
|
|
1118
|
-
|
|
1119
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1120
|
-
raise_parse_error("invalid number: %s", state);
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1124
|
-
state->cursor++;
|
|
1125
|
-
}
|
|
1126
|
-
}
|
|
1127
|
-
|
|
1128
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1129
|
-
integer = false;
|
|
1130
|
-
state->cursor++;
|
|
1131
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1132
|
-
state->cursor++;
|
|
1133
|
-
}
|
|
1134
|
-
|
|
1135
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1136
|
-
raise_parse_error("invalid number: %s", state);
|
|
1137
|
-
}
|
|
1138
|
-
|
|
1139
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1140
|
-
state->cursor++;
|
|
1141
|
-
}
|
|
1142
|
-
}
|
|
1143
|
-
|
|
1144
|
-
if (integer) {
|
|
1145
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1146
|
-
}
|
|
1147
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1211
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1212
|
+
break;
|
|
1148
1213
|
}
|
|
1214
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1215
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1216
|
+
break;
|
|
1149
1217
|
case '"': {
|
|
1150
1218
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1151
1219
|
return json_parse_string(state, config, false);
|
|
@@ -1156,7 +1224,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1156
1224
|
json_eat_whitespace(state);
|
|
1157
1225
|
long stack_head = state->stack->head;
|
|
1158
1226
|
|
|
1159
|
-
if ((state
|
|
1227
|
+
if (peek(state) == ']') {
|
|
1160
1228
|
state->cursor++;
|
|
1161
1229
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1162
1230
|
} else {
|
|
@@ -1171,26 +1239,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1171
1239
|
while (true) {
|
|
1172
1240
|
json_eat_whitespace(state);
|
|
1173
1241
|
|
|
1174
|
-
|
|
1175
|
-
if (*state->cursor == ']') {
|
|
1176
|
-
state->cursor++;
|
|
1177
|
-
long count = state->stack->head - stack_head;
|
|
1178
|
-
state->current_nesting--;
|
|
1179
|
-
state->in_array--;
|
|
1180
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1181
|
-
}
|
|
1242
|
+
const char next_char = peek(state);
|
|
1182
1243
|
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
}
|
|
1244
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1245
|
+
state->cursor++;
|
|
1246
|
+
if (config->allow_trailing_comma) {
|
|
1247
|
+
json_eat_whitespace(state);
|
|
1248
|
+
if (peek(state) == ']') {
|
|
1249
|
+
continue;
|
|
1190
1250
|
}
|
|
1191
|
-
json_parse_any(state, config);
|
|
1192
|
-
continue;
|
|
1193
1251
|
}
|
|
1252
|
+
json_parse_any(state, config);
|
|
1253
|
+
continue;
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
if (next_char == ']') {
|
|
1257
|
+
state->cursor++;
|
|
1258
|
+
long count = state->stack->head - stack_head;
|
|
1259
|
+
state->current_nesting--;
|
|
1260
|
+
state->in_array--;
|
|
1261
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1194
1262
|
}
|
|
1195
1263
|
|
|
1196
1264
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1204,7 +1272,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1204
1272
|
json_eat_whitespace(state);
|
|
1205
1273
|
long stack_head = state->stack->head;
|
|
1206
1274
|
|
|
1207
|
-
if ((state
|
|
1275
|
+
if (peek(state) == '}') {
|
|
1208
1276
|
state->cursor++;
|
|
1209
1277
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1210
1278
|
} else {
|
|
@@ -1213,13 +1281,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1213
1281
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1214
1282
|
}
|
|
1215
1283
|
|
|
1216
|
-
if (
|
|
1284
|
+
if (peek(state) != '"') {
|
|
1217
1285
|
raise_parse_error("expected object key, got %s", state);
|
|
1218
1286
|
}
|
|
1219
1287
|
json_parse_string(state, config, true);
|
|
1220
1288
|
|
|
1221
1289
|
json_eat_whitespace(state);
|
|
1222
|
-
if ((state
|
|
1290
|
+
if (peek(state) != ':') {
|
|
1223
1291
|
raise_parse_error("expected ':' after object key", state);
|
|
1224
1292
|
}
|
|
1225
1293
|
state->cursor++;
|
|
@@ -1230,46 +1298,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1230
1298
|
while (true) {
|
|
1231
1299
|
json_eat_whitespace(state);
|
|
1232
1300
|
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1301
|
+
const char next_char = peek(state);
|
|
1302
|
+
if (next_char == '}') {
|
|
1303
|
+
state->cursor++;
|
|
1304
|
+
state->current_nesting--;
|
|
1305
|
+
size_t count = state->stack->head - stack_head;
|
|
1238
1306
|
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1307
|
+
// Temporary rewind cursor in case an error is raised
|
|
1308
|
+
const char *final_cursor = state->cursor;
|
|
1309
|
+
state->cursor = object_start_cursor;
|
|
1310
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1311
|
+
state->cursor = final_cursor;
|
|
1244
1312
|
|
|
1245
|
-
|
|
1246
|
-
|
|
1313
|
+
return json_push_value(state, config, object);
|
|
1314
|
+
}
|
|
1247
1315
|
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1316
|
+
if (next_char == ',') {
|
|
1317
|
+
state->cursor++;
|
|
1318
|
+
json_eat_whitespace(state);
|
|
1251
1319
|
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
}
|
|
1320
|
+
if (config->allow_trailing_comma) {
|
|
1321
|
+
if (peek(state) == '}') {
|
|
1322
|
+
continue;
|
|
1256
1323
|
}
|
|
1324
|
+
}
|
|
1257
1325
|
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1326
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1327
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1328
|
+
}
|
|
1329
|
+
json_parse_string(state, config, true);
|
|
1262
1330
|
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1331
|
+
json_eat_whitespace(state);
|
|
1332
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1333
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1334
|
+
}
|
|
1335
|
+
state->cursor++;
|
|
1268
1336
|
|
|
1269
|
-
|
|
1337
|
+
json_parse_any(state, config);
|
|
1270
1338
|
|
|
1271
|
-
|
|
1272
|
-
}
|
|
1339
|
+
continue;
|
|
1273
1340
|
}
|
|
1274
1341
|
|
|
1275
1342
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1277,18 +1344,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1277
1344
|
break;
|
|
1278
1345
|
}
|
|
1279
1346
|
|
|
1347
|
+
case 0:
|
|
1348
|
+
raise_parse_error("unexpected end of input", state);
|
|
1349
|
+
break;
|
|
1350
|
+
|
|
1280
1351
|
default:
|
|
1281
1352
|
raise_parse_error("unexpected character: %s", state);
|
|
1282
1353
|
break;
|
|
1283
1354
|
}
|
|
1284
1355
|
|
|
1285
1356
|
raise_parse_error("unreachable: %s", state);
|
|
1357
|
+
return Qundef;
|
|
1286
1358
|
}
|
|
1287
1359
|
|
|
1288
1360
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1289
1361
|
{
|
|
1290
1362
|
json_eat_whitespace(state);
|
|
1291
|
-
if (state
|
|
1363
|
+
if (!eos(state)) {
|
|
1292
1364
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1293
1365
|
}
|
|
1294
1366
|
}
|