json 2.15.2 → 2.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +22 -0
- data/LEGAL +12 -0
- data/README.md +17 -1
- data/ext/json/ext/fbuffer/fbuffer.h +9 -58
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +191 -187
- data/ext/json/ext/json.h +97 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +497 -388
- data/ext/json/ext/simd/simd.h +15 -12
- data/ext/json/ext/vendor/fpconv.c +2 -2
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/common.rb +28 -16
- data/lib/json/ext/generator/state.rb +4 -0
- data/lib/json/truffle_ruby/generator.rb +52 -19
- data/lib/json/version.rb +1 -1
- metadata +3 -1
|
@@ -1,40 +1,11 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
10
|
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
|
40
11
|
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
@@ -44,7 +15,7 @@ static int utf8_encindex;
|
|
|
44
15
|
|
|
45
16
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
17
|
// For TruffleRuby
|
|
47
|
-
void
|
|
18
|
+
static void
|
|
48
19
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
20
|
{
|
|
50
21
|
long index = 0;
|
|
@@ -61,6 +32,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
32
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
33
|
#endif
|
|
63
34
|
|
|
35
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
36
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
37
|
+
{
|
|
38
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
39
|
+
}
|
|
40
|
+
#endif
|
|
64
41
|
|
|
65
42
|
/* name cache */
|
|
66
43
|
|
|
@@ -106,116 +83,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
83
|
cache->entries[index] = rstring;
|
|
107
84
|
}
|
|
108
85
|
|
|
109
|
-
|
|
86
|
+
#define rstring_cache_memcmp memcmp
|
|
87
|
+
|
|
88
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
89
|
+
#if __has_builtin(__builtin_bswap64)
|
|
90
|
+
#undef rstring_cache_memcmp
|
|
91
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
92
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
93
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
94
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
95
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
96
|
+
long i = 0;
|
|
97
|
+
|
|
98
|
+
for (; i + 8 <= length; i += 8) {
|
|
99
|
+
uint64_t a, b;
|
|
100
|
+
memcpy(&a, str + i, 8);
|
|
101
|
+
memcpy(&b, rptr + i, 8);
|
|
102
|
+
if (a != b) {
|
|
103
|
+
a = __builtin_bswap64(a);
|
|
104
|
+
b = __builtin_bswap64(b);
|
|
105
|
+
return (a < b) ? -1 : 1;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for (; i < length; i++) {
|
|
110
|
+
if (str[i] != rptr[i]) {
|
|
111
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
112
|
+
}
|
|
116
113
|
}
|
|
114
|
+
|
|
115
|
+
return 0;
|
|
117
116
|
}
|
|
117
|
+
#endif
|
|
118
|
+
#endif
|
|
118
119
|
|
|
119
|
-
static
|
|
120
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
121
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
122
|
+
const char *rstring_ptr;
|
|
123
|
+
long rstring_length;
|
|
124
|
+
|
|
125
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
126
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
127
|
+
if (length == rstring_length) {
|
|
128
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
129
|
+
} else {
|
|
130
|
+
return (int)(length - rstring_length);
|
|
132
131
|
}
|
|
132
|
+
}
|
|
133
133
|
|
|
134
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
135
|
+
{
|
|
134
136
|
int low = 0;
|
|
135
137
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
138
|
|
|
139
139
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
140
|
+
int mid = (high + low) >> 1;
|
|
141
141
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
142
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
143
|
|
|
144
|
-
if (
|
|
144
|
+
if (cmp == 0) {
|
|
145
145
|
return entry;
|
|
146
|
-
} else if (
|
|
146
|
+
} else if (cmp > 0) {
|
|
147
147
|
low = mid + 1;
|
|
148
148
|
} else {
|
|
149
149
|
high = mid - 1;
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
153
|
VALUE rstring = build_interned_string(str, length);
|
|
160
154
|
|
|
161
155
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
156
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
157
|
}
|
|
168
158
|
return rstring;
|
|
169
159
|
}
|
|
170
160
|
|
|
171
161
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
162
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
163
|
int low = 0;
|
|
187
164
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
165
|
|
|
191
166
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
167
|
+
int mid = (high + low) >> 1;
|
|
193
168
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
169
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
170
|
|
|
196
|
-
if (
|
|
171
|
+
if (cmp == 0) {
|
|
197
172
|
return entry;
|
|
198
|
-
} else if (
|
|
173
|
+
} else if (cmp > 0) {
|
|
199
174
|
low = mid + 1;
|
|
200
175
|
} else {
|
|
201
176
|
high = mid - 1;
|
|
202
177
|
}
|
|
203
178
|
}
|
|
204
179
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
180
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
181
|
|
|
213
182
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
183
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
184
|
}
|
|
220
185
|
return rsymbol;
|
|
221
186
|
}
|
|
@@ -330,15 +295,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
295
|
}
|
|
331
296
|
}
|
|
332
297
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
298
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
299
|
{
|
|
344
300
|
int len = 1;
|
|
@@ -379,7 +335,6 @@ typedef struct JSON_ParserStruct {
|
|
|
379
335
|
int max_nesting;
|
|
380
336
|
bool allow_nan;
|
|
381
337
|
bool allow_trailing_comma;
|
|
382
|
-
bool parsing_name;
|
|
383
338
|
bool symbolize_names;
|
|
384
339
|
bool freeze;
|
|
385
340
|
} JSON_ParserConfig;
|
|
@@ -395,6 +350,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
350
|
int current_nesting;
|
|
396
351
|
} JSON_ParserState;
|
|
397
352
|
|
|
353
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
354
|
+
return state->end - state->cursor;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
358
|
+
return state->cursor >= state->end;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
static inline char peek(JSON_ParserState *state)
|
|
362
|
+
{
|
|
363
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
364
|
+
return 0;
|
|
365
|
+
}
|
|
366
|
+
return *state->cursor;
|
|
367
|
+
}
|
|
368
|
+
|
|
398
369
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
370
|
{
|
|
400
371
|
const char *cursor = state->cursor;
|
|
@@ -530,61 +501,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
501
|
|
|
531
502
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
503
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
504
|
static void
|
|
542
505
|
json_eat_comments(JSON_ParserState *state)
|
|
543
506
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
507
|
+
const char *start = state->cursor;
|
|
508
|
+
state->cursor++;
|
|
509
|
+
|
|
510
|
+
switch (peek(state)) {
|
|
511
|
+
case '/': {
|
|
512
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
513
|
+
if (!state->cursor) {
|
|
514
|
+
state->cursor = state->end;
|
|
515
|
+
} else {
|
|
516
|
+
state->cursor++;
|
|
554
517
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
518
|
+
break;
|
|
519
|
+
}
|
|
520
|
+
case '*': {
|
|
521
|
+
state->cursor++;
|
|
522
|
+
|
|
523
|
+
while (true) {
|
|
524
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
525
|
+
if (!next_match) {
|
|
526
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
state->cursor = next_match + 1;
|
|
530
|
+
if (peek(state) == '/') {
|
|
531
|
+
state->cursor++;
|
|
532
|
+
break;
|
|
568
533
|
}
|
|
569
|
-
break;
|
|
570
534
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
535
|
+
break;
|
|
574
536
|
}
|
|
575
|
-
|
|
576
|
-
|
|
537
|
+
default:
|
|
538
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
539
|
+
break;
|
|
577
540
|
}
|
|
578
541
|
}
|
|
579
542
|
|
|
580
|
-
static
|
|
543
|
+
ALWAYS_INLINE(static) void
|
|
581
544
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
545
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
546
|
+
while (true) {
|
|
547
|
+
switch (peek(state)) {
|
|
548
|
+
case ' ':
|
|
549
|
+
state->cursor++;
|
|
550
|
+
break;
|
|
551
|
+
case '\n':
|
|
552
|
+
state->cursor++;
|
|
553
|
+
|
|
554
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
555
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
556
|
+
while (rest(state) > 8) {
|
|
557
|
+
uint64_t chunk;
|
|
558
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
559
|
+
if (chunk == 0x2020202020202020) {
|
|
560
|
+
state->cursor += 8;
|
|
561
|
+
continue;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
565
|
+
state->cursor += consecutive_spaces;
|
|
566
|
+
break;
|
|
567
|
+
}
|
|
568
|
+
#endif
|
|
569
|
+
break;
|
|
570
|
+
case '\t':
|
|
571
|
+
case '\r':
|
|
572
|
+
state->cursor++;
|
|
573
|
+
break;
|
|
574
|
+
case '/':
|
|
575
|
+
json_eat_comments(state);
|
|
576
|
+
break;
|
|
577
|
+
|
|
578
|
+
default:
|
|
579
|
+
return;
|
|
588
580
|
}
|
|
589
581
|
}
|
|
590
582
|
}
|
|
@@ -615,11 +607,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
607
|
return result;
|
|
616
608
|
}
|
|
617
609
|
|
|
618
|
-
static inline
|
|
610
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
619
611
|
{
|
|
612
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
613
|
+
// Simple heuristics:
|
|
614
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
615
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
616
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
620
|
+
{
|
|
621
|
+
bool intern = is_name || config->freeze;
|
|
622
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
623
|
size_t bufferSize = stringEnd - string;
|
|
621
624
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
625
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
626
|
VALUE cached_key;
|
|
624
627
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
628
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,60 +638,71 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
638
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
639
|
}
|
|
637
640
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
641
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
642
|
+
typedef struct _json_unescape_positions {
|
|
643
|
+
long size;
|
|
644
|
+
const char **positions;
|
|
645
|
+
bool has_more;
|
|
646
|
+
} JSON_UnescapePositions;
|
|
645
647
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
648
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
649
|
+
{
|
|
650
|
+
while (positions->size) {
|
|
651
|
+
positions->size--;
|
|
652
|
+
const char *next_position = positions->positions[0];
|
|
653
|
+
positions->positions++;
|
|
654
|
+
return next_position;
|
|
655
|
+
}
|
|
653
656
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
}
|
|
657
|
+
if (positions->has_more) {
|
|
658
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
659
|
}
|
|
658
660
|
|
|
661
|
+
return NULL;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
665
|
+
{
|
|
666
|
+
bool intern = is_name || config->freeze;
|
|
667
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
668
|
+
size_t bufferSize = stringEnd - string;
|
|
669
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
670
|
+
char *buffer;
|
|
671
|
+
|
|
659
672
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
673
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
674
|
buffer = RSTRING_PTR(result);
|
|
662
675
|
bufferStart = buffer;
|
|
663
676
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
677
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
678
|
+
|
|
679
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
680
|
if (pe > p) {
|
|
668
681
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
682
|
buffer += pe - p;
|
|
670
683
|
}
|
|
671
684
|
switch (*++pe) {
|
|
685
|
+
case '"':
|
|
686
|
+
case '/':
|
|
687
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
688
|
+
break;
|
|
689
|
+
case '\\':
|
|
690
|
+
APPEND_CHAR('\\');
|
|
691
|
+
break;
|
|
672
692
|
case 'n':
|
|
673
|
-
|
|
693
|
+
APPEND_CHAR('\n');
|
|
674
694
|
break;
|
|
675
695
|
case 'r':
|
|
676
|
-
|
|
696
|
+
APPEND_CHAR('\r');
|
|
677
697
|
break;
|
|
678
698
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
699
|
+
APPEND_CHAR('\t');
|
|
686
700
|
break;
|
|
687
701
|
case 'b':
|
|
688
|
-
|
|
702
|
+
APPEND_CHAR('\b');
|
|
689
703
|
break;
|
|
690
704
|
case 'f':
|
|
691
|
-
|
|
705
|
+
APPEND_CHAR('\f');
|
|
692
706
|
break;
|
|
693
707
|
case 'u':
|
|
694
708
|
if (pe > stringEnd - 5) {
|
|
@@ -726,18 +740,23 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
726
740
|
break;
|
|
727
741
|
}
|
|
728
742
|
}
|
|
729
|
-
|
|
730
|
-
|
|
743
|
+
|
|
744
|
+
char buf[4];
|
|
745
|
+
int unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
746
|
+
MEMCPY(buffer, buf, char, unescape_len);
|
|
747
|
+
buffer += unescape_len;
|
|
748
|
+
p = ++pe;
|
|
731
749
|
}
|
|
732
750
|
break;
|
|
733
751
|
default:
|
|
734
|
-
|
|
735
|
-
|
|
752
|
+
if ((unsigned char)*pe < 0x20) {
|
|
753
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
754
|
+
}
|
|
755
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
756
|
+
break;
|
|
736
757
|
}
|
|
737
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
738
|
-
buffer += unescape_len;
|
|
739
|
-
p = ++pe;
|
|
740
758
|
}
|
|
759
|
+
#undef APPEND_CHAR
|
|
741
760
|
|
|
742
761
|
if (stringEnd > p) {
|
|
743
762
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -748,33 +767,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
748
767
|
if (symbolize) {
|
|
749
768
|
result = rb_str_intern(result);
|
|
750
769
|
} else if (intern) {
|
|
751
|
-
result =
|
|
770
|
+
result = rb_str_to_interned_str(result);
|
|
752
771
|
}
|
|
753
772
|
|
|
754
773
|
return result;
|
|
755
774
|
}
|
|
756
775
|
|
|
757
776
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
758
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
759
|
-
{
|
|
760
|
-
bool negative = false;
|
|
761
|
-
if (*p == '-') {
|
|
762
|
-
negative = true;
|
|
763
|
-
p++;
|
|
764
|
-
}
|
|
765
|
-
|
|
766
|
-
long long memo = 0;
|
|
767
|
-
while (p < pe) {
|
|
768
|
-
memo *= 10;
|
|
769
|
-
memo += *p - '0';
|
|
770
|
-
p++;
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
if (negative) {
|
|
774
|
-
memo = -memo;
|
|
775
|
-
}
|
|
776
|
-
return LL2NUM(memo);
|
|
777
|
-
}
|
|
778
777
|
|
|
779
778
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
780
779
|
{
|
|
@@ -788,17 +787,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
788
787
|
}
|
|
789
788
|
|
|
790
789
|
static inline VALUE
|
|
791
|
-
json_decode_integer(const char *start, const char *end)
|
|
790
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
792
791
|
{
|
|
793
|
-
|
|
794
|
-
if (
|
|
795
|
-
return
|
|
792
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
793
|
+
if (negative) {
|
|
794
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
796
795
|
}
|
|
797
|
-
return
|
|
796
|
+
return UINT64T2NUM(mantissa);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
return json_decode_large_integer(start, end - start);
|
|
798
800
|
}
|
|
799
801
|
|
|
800
802
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
801
803
|
{
|
|
804
|
+
if (RB_LIKELY(len < 64)) {
|
|
805
|
+
char buffer[64];
|
|
806
|
+
MEMCPY(buffer, start, char, len);
|
|
807
|
+
buffer[len] = '\0';
|
|
808
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
809
|
+
}
|
|
810
|
+
|
|
802
811
|
VALUE buffer_v;
|
|
803
812
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
804
813
|
MEMCPY(buffer, start, char, len);
|
|
@@ -808,21 +817,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
808
817
|
return number;
|
|
809
818
|
}
|
|
810
819
|
|
|
811
|
-
|
|
820
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
821
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
822
|
+
*/
|
|
823
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
824
|
+
const char *start, const char *end)
|
|
812
825
|
{
|
|
813
|
-
long len = end - start;
|
|
814
|
-
|
|
815
826
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
816
|
-
VALUE text = rb_str_new(start,
|
|
827
|
+
VALUE text = rb_str_new(start, end - start);
|
|
817
828
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
818
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
819
|
-
char buffer[64];
|
|
820
|
-
MEMCPY(buffer, start, char, len);
|
|
821
|
-
buffer[len] = '\0';
|
|
822
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
823
|
-
} else {
|
|
824
|
-
return json_decode_large_float(start, len);
|
|
825
829
|
}
|
|
830
|
+
|
|
831
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
832
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
833
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
834
|
+
return json_decode_large_float(start, end - start);
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
826
838
|
}
|
|
827
839
|
|
|
828
840
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -908,20 +920,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
908
920
|
return object;
|
|
909
921
|
}
|
|
910
922
|
|
|
911
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
912
|
-
{
|
|
913
|
-
VALUE string;
|
|
914
|
-
bool intern = is_name || config->freeze;
|
|
915
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
916
|
-
if (escaped) {
|
|
917
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
918
|
-
} else {
|
|
919
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
920
|
-
}
|
|
921
|
-
|
|
922
|
-
return string;
|
|
923
|
-
}
|
|
924
|
-
|
|
925
923
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
926
924
|
{
|
|
927
925
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -944,17 +942,11 @@ static const bool string_scan_table[256] = {
|
|
|
944
942
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
945
943
|
};
|
|
946
944
|
|
|
947
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
948
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
949
|
-
#else
|
|
950
|
-
#define FORCE_INLINE
|
|
951
|
-
#endif
|
|
952
|
-
|
|
953
945
|
#ifdef HAVE_SIMD
|
|
954
946
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
955
947
|
#endif /* HAVE_SIMD */
|
|
956
948
|
|
|
957
|
-
static
|
|
949
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
958
950
|
{
|
|
959
951
|
#ifdef HAVE_SIMD
|
|
960
952
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -962,7 +954,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
962
954
|
uint64_t mask = 0;
|
|
963
955
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
964
956
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
965
|
-
return
|
|
957
|
+
return true;
|
|
966
958
|
}
|
|
967
959
|
|
|
968
960
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -970,40 +962,45 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
970
962
|
int mask = 0;
|
|
971
963
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
972
964
|
state->cursor += trailing_zeros(mask);
|
|
973
|
-
return
|
|
965
|
+
return true;
|
|
974
966
|
}
|
|
975
967
|
}
|
|
976
968
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
977
969
|
#endif /* HAVE_SIMD */
|
|
978
970
|
|
|
979
|
-
while (state
|
|
971
|
+
while (!eos(state)) {
|
|
980
972
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
981
|
-
return
|
|
973
|
+
return true;
|
|
982
974
|
}
|
|
983
975
|
state->cursor++;
|
|
984
976
|
}
|
|
985
|
-
return
|
|
977
|
+
return false;
|
|
986
978
|
}
|
|
987
979
|
|
|
988
|
-
static
|
|
980
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
989
981
|
{
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
982
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
983
|
+
JSON_UnescapePositions positions = {
|
|
984
|
+
.size = 0,
|
|
985
|
+
.positions = backslashes,
|
|
986
|
+
.has_more = false,
|
|
987
|
+
};
|
|
993
988
|
|
|
994
|
-
|
|
989
|
+
do {
|
|
995
990
|
switch (*state->cursor) {
|
|
996
991
|
case '"': {
|
|
997
|
-
VALUE string =
|
|
992
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
998
993
|
state->cursor++;
|
|
999
994
|
return json_push_value(state, config, string);
|
|
1000
995
|
}
|
|
1001
996
|
case '\\': {
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
997
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
998
|
+
backslashes[positions.size] = state->cursor;
|
|
999
|
+
positions.size++;
|
|
1000
|
+
} else {
|
|
1001
|
+
positions.has_more = true;
|
|
1006
1002
|
}
|
|
1003
|
+
state->cursor++;
|
|
1007
1004
|
break;
|
|
1008
1005
|
}
|
|
1009
1006
|
default:
|
|
@@ -1012,22 +1009,183 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1012
1009
|
}
|
|
1013
1010
|
|
|
1014
1011
|
state->cursor++;
|
|
1015
|
-
}
|
|
1012
|
+
} while (string_scan(state));
|
|
1016
1013
|
|
|
1017
1014
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1018
1015
|
return Qfalse;
|
|
1019
1016
|
}
|
|
1020
1017
|
|
|
1018
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1019
|
+
{
|
|
1020
|
+
state->cursor++;
|
|
1021
|
+
const char *start = state->cursor;
|
|
1022
|
+
|
|
1023
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1024
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1028
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1029
|
+
state->cursor++;
|
|
1030
|
+
return json_push_value(state, config, string);
|
|
1031
|
+
}
|
|
1032
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1036
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1037
|
+
// Additional References:
|
|
1038
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1039
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1040
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1041
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1042
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1043
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1044
|
+
val -= 0x3030303030303030;
|
|
1045
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1046
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1047
|
+
return val;
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1051
|
+
const uint32_t mask = 0x000000FF;
|
|
1052
|
+
const uint32_t mul1 = 100;
|
|
1053
|
+
val -= 0x30303030;
|
|
1054
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1055
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1056
|
+
return val;
|
|
1057
|
+
}
|
|
1058
|
+
#endif
|
|
1059
|
+
|
|
1060
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1061
|
+
{
|
|
1062
|
+
const char *start = state->cursor;
|
|
1063
|
+
|
|
1064
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1065
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1066
|
+
uint64_t next_8bytes;
|
|
1067
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1068
|
+
|
|
1069
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1070
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1071
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1072
|
+
|
|
1073
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1074
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1075
|
+
state->cursor += 8;
|
|
1076
|
+
continue;
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1080
|
+
|
|
1081
|
+
if (consecutive_digits >= 4) {
|
|
1082
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1083
|
+
state->cursor += 4;
|
|
1084
|
+
consecutive_digits -= 4;
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
while (consecutive_digits) {
|
|
1088
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1089
|
+
consecutive_digits--;
|
|
1090
|
+
state->cursor++;
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
return (int)(state->cursor - start);
|
|
1094
|
+
}
|
|
1095
|
+
#endif
|
|
1096
|
+
|
|
1097
|
+
char next_char;
|
|
1098
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1099
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1100
|
+
state->cursor++;
|
|
1101
|
+
}
|
|
1102
|
+
return (int)(state->cursor - start);
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1106
|
+
{
|
|
1107
|
+
bool integer = true;
|
|
1108
|
+
const char first_digit = *state->cursor;
|
|
1109
|
+
|
|
1110
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1111
|
+
int32_t exponent = 0;
|
|
1112
|
+
int decimal_point_pos = -1;
|
|
1113
|
+
uint64_t mantissa = 0;
|
|
1114
|
+
|
|
1115
|
+
// Parse integer part and extract mantissa digits
|
|
1116
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1117
|
+
|
|
1118
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1119
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
// Parse fractional part
|
|
1123
|
+
if (peek(state) == '.') {
|
|
1124
|
+
integer = false;
|
|
1125
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1126
|
+
state->cursor++;
|
|
1127
|
+
|
|
1128
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1129
|
+
mantissa_digits += fractional_digits;
|
|
1130
|
+
|
|
1131
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1132
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
// Parse exponent
|
|
1137
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1138
|
+
integer = false;
|
|
1139
|
+
state->cursor++;
|
|
1140
|
+
|
|
1141
|
+
bool negative_exponent = false;
|
|
1142
|
+
const char next_char = peek(state);
|
|
1143
|
+
if (next_char == '-' || next_char == '+') {
|
|
1144
|
+
negative_exponent = next_char == '-';
|
|
1145
|
+
state->cursor++;
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
uint64_t abs_exponent = 0;
|
|
1149
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1150
|
+
|
|
1151
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1152
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
if (integer) {
|
|
1159
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// Adjust exponent based on decimal point position
|
|
1163
|
+
if (decimal_point_pos >= 0) {
|
|
1164
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1171
|
+
{
|
|
1172
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1176
|
+
{
|
|
1177
|
+
const char *start = state->cursor;
|
|
1178
|
+
state->cursor++;
|
|
1179
|
+
return json_parse_number(state, config, true, start);
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1021
1182
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1022
1183
|
{
|
|
1023
1184
|
json_eat_whitespace(state);
|
|
1024
|
-
if (state->cursor >= state->end) {
|
|
1025
|
-
raise_parse_error("unexpected end of input", state);
|
|
1026
|
-
}
|
|
1027
1185
|
|
|
1028
|
-
switch (
|
|
1186
|
+
switch (peek(state)) {
|
|
1029
1187
|
case 'n':
|
|
1030
|
-
if ((state
|
|
1188
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1031
1189
|
state->cursor += 4;
|
|
1032
1190
|
return json_push_value(state, config, Qnil);
|
|
1033
1191
|
}
|
|
@@ -1035,7 +1193,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1035
1193
|
raise_parse_error("unexpected token %s", state);
|
|
1036
1194
|
break;
|
|
1037
1195
|
case 't':
|
|
1038
|
-
if ((state
|
|
1196
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1039
1197
|
state->cursor += 4;
|
|
1040
1198
|
return json_push_value(state, config, Qtrue);
|
|
1041
1199
|
}
|
|
@@ -1044,7 +1202,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1044
1202
|
break;
|
|
1045
1203
|
case 'f':
|
|
1046
1204
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1047
|
-
if ((state
|
|
1205
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1048
1206
|
state->cursor += 5;
|
|
1049
1207
|
return json_push_value(state, config, Qfalse);
|
|
1050
1208
|
}
|
|
@@ -1053,7 +1211,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1053
1211
|
break;
|
|
1054
1212
|
case 'N':
|
|
1055
1213
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1056
|
-
if (config->allow_nan && (state
|
|
1214
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1057
1215
|
state->cursor += 3;
|
|
1058
1216
|
return json_push_value(state, config, CNaN);
|
|
1059
1217
|
}
|
|
@@ -1061,16 +1219,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1061
1219
|
raise_parse_error("unexpected token %s", state);
|
|
1062
1220
|
break;
|
|
1063
1221
|
case 'I':
|
|
1064
|
-
if (config->allow_nan && (state
|
|
1222
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1065
1223
|
state->cursor += 8;
|
|
1066
1224
|
return json_push_value(state, config, CInfinity);
|
|
1067
1225
|
}
|
|
1068
1226
|
|
|
1069
1227
|
raise_parse_error("unexpected token %s", state);
|
|
1070
1228
|
break;
|
|
1071
|
-
case '-':
|
|
1229
|
+
case '-': {
|
|
1072
1230
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1073
|
-
if ((state
|
|
1231
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1074
1232
|
if (config->allow_nan) {
|
|
1075
1233
|
state->cursor += 9;
|
|
1076
1234
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1078,62 +1236,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1078
1236
|
raise_parse_error("unexpected token %s", state);
|
|
1079
1237
|
}
|
|
1080
1238
|
}
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
bool integer = true;
|
|
1084
|
-
|
|
1085
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1086
|
-
const char *start = state->cursor;
|
|
1087
|
-
state->cursor++;
|
|
1088
|
-
|
|
1089
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1090
|
-
state->cursor++;
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
long integer_length = state->cursor - start;
|
|
1094
|
-
|
|
1095
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1096
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1097
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1098
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1099
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1100
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1104
|
-
integer = false;
|
|
1105
|
-
state->cursor++;
|
|
1106
|
-
|
|
1107
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1108
|
-
raise_parse_error("invalid number: %s", state);
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1112
|
-
state->cursor++;
|
|
1113
|
-
}
|
|
1114
|
-
}
|
|
1115
|
-
|
|
1116
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1117
|
-
integer = false;
|
|
1118
|
-
state->cursor++;
|
|
1119
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1120
|
-
state->cursor++;
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1124
|
-
raise_parse_error("invalid number: %s", state);
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1128
|
-
state->cursor++;
|
|
1129
|
-
}
|
|
1130
|
-
}
|
|
1131
|
-
|
|
1132
|
-
if (integer) {
|
|
1133
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1134
|
-
}
|
|
1135
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1239
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1240
|
+
break;
|
|
1136
1241
|
}
|
|
1242
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1243
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1244
|
+
break;
|
|
1137
1245
|
case '"': {
|
|
1138
1246
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1139
1247
|
return json_parse_string(state, config, false);
|
|
@@ -1144,7 +1252,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1144
1252
|
json_eat_whitespace(state);
|
|
1145
1253
|
long stack_head = state->stack->head;
|
|
1146
1254
|
|
|
1147
|
-
if ((state
|
|
1255
|
+
if (peek(state) == ']') {
|
|
1148
1256
|
state->cursor++;
|
|
1149
1257
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1150
1258
|
} else {
|
|
@@ -1159,26 +1267,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1159
1267
|
while (true) {
|
|
1160
1268
|
json_eat_whitespace(state);
|
|
1161
1269
|
|
|
1162
|
-
|
|
1163
|
-
if (*state->cursor == ']') {
|
|
1164
|
-
state->cursor++;
|
|
1165
|
-
long count = state->stack->head - stack_head;
|
|
1166
|
-
state->current_nesting--;
|
|
1167
|
-
state->in_array--;
|
|
1168
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1169
|
-
}
|
|
1270
|
+
const char next_char = peek(state);
|
|
1170
1271
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
}
|
|
1272
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1273
|
+
state->cursor++;
|
|
1274
|
+
if (config->allow_trailing_comma) {
|
|
1275
|
+
json_eat_whitespace(state);
|
|
1276
|
+
if (peek(state) == ']') {
|
|
1277
|
+
continue;
|
|
1178
1278
|
}
|
|
1179
|
-
json_parse_any(state, config);
|
|
1180
|
-
continue;
|
|
1181
1279
|
}
|
|
1280
|
+
json_parse_any(state, config);
|
|
1281
|
+
continue;
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
if (next_char == ']') {
|
|
1285
|
+
state->cursor++;
|
|
1286
|
+
long count = state->stack->head - stack_head;
|
|
1287
|
+
state->current_nesting--;
|
|
1288
|
+
state->in_array--;
|
|
1289
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1182
1290
|
}
|
|
1183
1291
|
|
|
1184
1292
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1192,7 +1300,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1192
1300
|
json_eat_whitespace(state);
|
|
1193
1301
|
long stack_head = state->stack->head;
|
|
1194
1302
|
|
|
1195
|
-
if ((state
|
|
1303
|
+
if (peek(state) == '}') {
|
|
1196
1304
|
state->cursor++;
|
|
1197
1305
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1198
1306
|
} else {
|
|
@@ -1201,13 +1309,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1201
1309
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1202
1310
|
}
|
|
1203
1311
|
|
|
1204
|
-
if (
|
|
1312
|
+
if (peek(state) != '"') {
|
|
1205
1313
|
raise_parse_error("expected object key, got %s", state);
|
|
1206
1314
|
}
|
|
1207
1315
|
json_parse_string(state, config, true);
|
|
1208
1316
|
|
|
1209
1317
|
json_eat_whitespace(state);
|
|
1210
|
-
if ((state
|
|
1318
|
+
if (peek(state) != ':') {
|
|
1211
1319
|
raise_parse_error("expected ':' after object key", state);
|
|
1212
1320
|
}
|
|
1213
1321
|
state->cursor++;
|
|
@@ -1218,46 +1326,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1218
1326
|
while (true) {
|
|
1219
1327
|
json_eat_whitespace(state);
|
|
1220
1328
|
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1329
|
+
const char next_char = peek(state);
|
|
1330
|
+
if (next_char == '}') {
|
|
1331
|
+
state->cursor++;
|
|
1332
|
+
state->current_nesting--;
|
|
1333
|
+
size_t count = state->stack->head - stack_head;
|
|
1226
1334
|
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1335
|
+
// Temporary rewind cursor in case an error is raised
|
|
1336
|
+
const char *final_cursor = state->cursor;
|
|
1337
|
+
state->cursor = object_start_cursor;
|
|
1338
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1339
|
+
state->cursor = final_cursor;
|
|
1232
1340
|
|
|
1233
|
-
|
|
1234
|
-
|
|
1341
|
+
return json_push_value(state, config, object);
|
|
1342
|
+
}
|
|
1235
1343
|
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1344
|
+
if (next_char == ',') {
|
|
1345
|
+
state->cursor++;
|
|
1346
|
+
json_eat_whitespace(state);
|
|
1239
1347
|
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
}
|
|
1348
|
+
if (config->allow_trailing_comma) {
|
|
1349
|
+
if (peek(state) == '}') {
|
|
1350
|
+
continue;
|
|
1244
1351
|
}
|
|
1352
|
+
}
|
|
1245
1353
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1354
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1355
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1356
|
+
}
|
|
1357
|
+
json_parse_string(state, config, true);
|
|
1250
1358
|
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1359
|
+
json_eat_whitespace(state);
|
|
1360
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1361
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1362
|
+
}
|
|
1363
|
+
state->cursor++;
|
|
1256
1364
|
|
|
1257
|
-
|
|
1365
|
+
json_parse_any(state, config);
|
|
1258
1366
|
|
|
1259
|
-
|
|
1260
|
-
}
|
|
1367
|
+
continue;
|
|
1261
1368
|
}
|
|
1262
1369
|
|
|
1263
1370
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1265,18 +1372,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1265
1372
|
break;
|
|
1266
1373
|
}
|
|
1267
1374
|
|
|
1375
|
+
case 0:
|
|
1376
|
+
raise_parse_error("unexpected end of input", state);
|
|
1377
|
+
break;
|
|
1378
|
+
|
|
1268
1379
|
default:
|
|
1269
1380
|
raise_parse_error("unexpected character: %s", state);
|
|
1270
1381
|
break;
|
|
1271
1382
|
}
|
|
1272
1383
|
|
|
1273
1384
|
raise_parse_error("unreachable: %s", state);
|
|
1385
|
+
return Qundef;
|
|
1274
1386
|
}
|
|
1275
1387
|
|
|
1276
1388
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1277
1389
|
{
|
|
1278
1390
|
json_eat_whitespace(state);
|
|
1279
|
-
if (state
|
|
1391
|
+
if (!eos(state)) {
|
|
1280
1392
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1281
1393
|
}
|
|
1282
1394
|
}
|
|
@@ -1393,6 +1505,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1393
1505
|
*/
|
|
1394
1506
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1395
1507
|
{
|
|
1508
|
+
rb_check_frozen(self);
|
|
1396
1509
|
GET_PARSER_CONFIG;
|
|
1397
1510
|
|
|
1398
1511
|
parser_config_init(config, opts);
|
|
@@ -1488,7 +1601,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1488
1601
|
JSON_ParserConfig_memsize,
|
|
1489
1602
|
},
|
|
1490
1603
|
0, 0,
|
|
1491
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1604
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1492
1605
|
};
|
|
1493
1606
|
|
|
1494
1607
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1538,10 +1651,6 @@ void Init_parser(void)
|
|
|
1538
1651
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1539
1652
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1540
1653
|
|
|
1541
|
-
i_chr = rb_intern("chr");
|
|
1542
|
-
i_aset = rb_intern("[]=");
|
|
1543
|
-
i_aref = rb_intern("[]");
|
|
1544
|
-
i_leftshift = rb_intern("<<");
|
|
1545
1654
|
i_new = rb_intern("new");
|
|
1546
1655
|
i_try_convert = rb_intern("try_convert");
|
|
1547
1656
|
i_uminus = rb_intern("-@");
|