json 2.15.1 → 2.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +31 -0
- data/LEGAL +12 -0
- data/README.md +17 -1
- data/ext/json/ext/fbuffer/fbuffer.h +9 -58
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +192 -159
- data/ext/json/ext/json.h +97 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +498 -387
- data/ext/json/ext/simd/simd.h +15 -12
- data/ext/json/ext/vendor/fpconv.c +2 -2
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/common.rb +28 -16
- data/lib/json/ext/generator/state.rb +4 -0
- data/lib/json/truffle_ruby/generator.rb +53 -16
- data/lib/json/version.rb +1 -1
- metadata +3 -1
|
@@ -1,40 +1,11 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
10
|
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
|
40
11
|
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
@@ -44,7 +15,7 @@ static int utf8_encindex;
|
|
|
44
15
|
|
|
45
16
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
17
|
// For TruffleRuby
|
|
47
|
-
void
|
|
18
|
+
static void
|
|
48
19
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
20
|
{
|
|
50
21
|
long index = 0;
|
|
@@ -61,6 +32,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
32
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
33
|
#endif
|
|
63
34
|
|
|
35
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
36
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
37
|
+
{
|
|
38
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
39
|
+
}
|
|
40
|
+
#endif
|
|
64
41
|
|
|
65
42
|
/* name cache */
|
|
66
43
|
|
|
@@ -106,116 +83,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
83
|
cache->entries[index] = rstring;
|
|
107
84
|
}
|
|
108
85
|
|
|
109
|
-
|
|
86
|
+
#define rstring_cache_memcmp memcmp
|
|
87
|
+
|
|
88
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
89
|
+
#if __has_builtin(__builtin_bswap64)
|
|
90
|
+
#undef rstring_cache_memcmp
|
|
91
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
92
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
93
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
94
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
95
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
96
|
+
long i = 0;
|
|
97
|
+
|
|
98
|
+
for (; i + 8 <= length; i += 8) {
|
|
99
|
+
uint64_t a, b;
|
|
100
|
+
memcpy(&a, str + i, 8);
|
|
101
|
+
memcpy(&b, rptr + i, 8);
|
|
102
|
+
if (a != b) {
|
|
103
|
+
a = __builtin_bswap64(a);
|
|
104
|
+
b = __builtin_bswap64(b);
|
|
105
|
+
return (a < b) ? -1 : 1;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for (; i < length; i++) {
|
|
110
|
+
if (str[i] != rptr[i]) {
|
|
111
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
112
|
+
}
|
|
116
113
|
}
|
|
114
|
+
|
|
115
|
+
return 0;
|
|
117
116
|
}
|
|
117
|
+
#endif
|
|
118
|
+
#endif
|
|
118
119
|
|
|
119
|
-
static
|
|
120
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
121
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
122
|
+
const char *rstring_ptr;
|
|
123
|
+
long rstring_length;
|
|
124
|
+
|
|
125
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
126
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
127
|
+
if (length == rstring_length) {
|
|
128
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
129
|
+
} else {
|
|
130
|
+
return (int)(length - rstring_length);
|
|
132
131
|
}
|
|
132
|
+
}
|
|
133
133
|
|
|
134
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
135
|
+
{
|
|
134
136
|
int low = 0;
|
|
135
137
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
138
|
|
|
139
139
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
140
|
+
int mid = (high + low) >> 1;
|
|
141
141
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
142
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
143
|
|
|
144
|
-
if (
|
|
144
|
+
if (cmp == 0) {
|
|
145
145
|
return entry;
|
|
146
|
-
} else if (
|
|
146
|
+
} else if (cmp > 0) {
|
|
147
147
|
low = mid + 1;
|
|
148
148
|
} else {
|
|
149
149
|
high = mid - 1;
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
153
|
VALUE rstring = build_interned_string(str, length);
|
|
160
154
|
|
|
161
155
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
156
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
157
|
}
|
|
168
158
|
return rstring;
|
|
169
159
|
}
|
|
170
160
|
|
|
171
161
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
162
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
163
|
int low = 0;
|
|
187
164
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
165
|
|
|
191
166
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
167
|
+
int mid = (high + low) >> 1;
|
|
193
168
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
169
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
170
|
|
|
196
|
-
if (
|
|
171
|
+
if (cmp == 0) {
|
|
197
172
|
return entry;
|
|
198
|
-
} else if (
|
|
173
|
+
} else if (cmp > 0) {
|
|
199
174
|
low = mid + 1;
|
|
200
175
|
} else {
|
|
201
176
|
high = mid - 1;
|
|
202
177
|
}
|
|
203
178
|
}
|
|
204
179
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
180
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
181
|
|
|
213
182
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
183
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
184
|
}
|
|
220
185
|
return rsymbol;
|
|
221
186
|
}
|
|
@@ -330,15 +295,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
295
|
}
|
|
331
296
|
}
|
|
332
297
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
298
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
299
|
{
|
|
344
300
|
int len = 1;
|
|
@@ -379,7 +335,6 @@ typedef struct JSON_ParserStruct {
|
|
|
379
335
|
int max_nesting;
|
|
380
336
|
bool allow_nan;
|
|
381
337
|
bool allow_trailing_comma;
|
|
382
|
-
bool parsing_name;
|
|
383
338
|
bool symbolize_names;
|
|
384
339
|
bool freeze;
|
|
385
340
|
} JSON_ParserConfig;
|
|
@@ -395,6 +350,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
350
|
int current_nesting;
|
|
396
351
|
} JSON_ParserState;
|
|
397
352
|
|
|
353
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
354
|
+
return state->end - state->cursor;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
358
|
+
return state->cursor >= state->end;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
static inline char peek(JSON_ParserState *state)
|
|
362
|
+
{
|
|
363
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
364
|
+
return 0;
|
|
365
|
+
}
|
|
366
|
+
return *state->cursor;
|
|
367
|
+
}
|
|
368
|
+
|
|
398
369
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
370
|
{
|
|
400
371
|
const char *cursor = state->cursor;
|
|
@@ -530,61 +501,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
501
|
|
|
531
502
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
503
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
504
|
static void
|
|
542
505
|
json_eat_comments(JSON_ParserState *state)
|
|
543
506
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
507
|
+
const char *start = state->cursor;
|
|
508
|
+
state->cursor++;
|
|
509
|
+
|
|
510
|
+
switch (peek(state)) {
|
|
511
|
+
case '/': {
|
|
512
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
513
|
+
if (!state->cursor) {
|
|
514
|
+
state->cursor = state->end;
|
|
515
|
+
} else {
|
|
516
|
+
state->cursor++;
|
|
554
517
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
518
|
+
break;
|
|
519
|
+
}
|
|
520
|
+
case '*': {
|
|
521
|
+
state->cursor++;
|
|
522
|
+
|
|
523
|
+
while (true) {
|
|
524
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
525
|
+
if (!next_match) {
|
|
526
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
state->cursor = next_match + 1;
|
|
530
|
+
if (peek(state) == '/') {
|
|
531
|
+
state->cursor++;
|
|
532
|
+
break;
|
|
568
533
|
}
|
|
569
|
-
break;
|
|
570
534
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
535
|
+
break;
|
|
574
536
|
}
|
|
575
|
-
|
|
576
|
-
|
|
537
|
+
default:
|
|
538
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
539
|
+
break;
|
|
577
540
|
}
|
|
578
541
|
}
|
|
579
542
|
|
|
580
|
-
static
|
|
543
|
+
ALWAYS_INLINE(static) void
|
|
581
544
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
545
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
546
|
+
while (true) {
|
|
547
|
+
switch (peek(state)) {
|
|
548
|
+
case ' ':
|
|
549
|
+
state->cursor++;
|
|
550
|
+
break;
|
|
551
|
+
case '\n':
|
|
552
|
+
state->cursor++;
|
|
553
|
+
|
|
554
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
555
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
556
|
+
while (rest(state) > 8) {
|
|
557
|
+
uint64_t chunk;
|
|
558
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
559
|
+
if (chunk == 0x2020202020202020) {
|
|
560
|
+
state->cursor += 8;
|
|
561
|
+
continue;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
565
|
+
state->cursor += consecutive_spaces;
|
|
566
|
+
break;
|
|
567
|
+
}
|
|
568
|
+
#endif
|
|
569
|
+
break;
|
|
570
|
+
case '\t':
|
|
571
|
+
case '\r':
|
|
572
|
+
state->cursor++;
|
|
573
|
+
break;
|
|
574
|
+
case '/':
|
|
575
|
+
json_eat_comments(state);
|
|
576
|
+
break;
|
|
577
|
+
|
|
578
|
+
default:
|
|
579
|
+
return;
|
|
588
580
|
}
|
|
589
581
|
}
|
|
590
582
|
}
|
|
@@ -615,11 +607,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
607
|
return result;
|
|
616
608
|
}
|
|
617
609
|
|
|
618
|
-
static inline
|
|
610
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
619
611
|
{
|
|
612
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
613
|
+
// Simple heuristics:
|
|
614
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
615
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
616
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
620
|
+
{
|
|
621
|
+
bool intern = is_name || config->freeze;
|
|
622
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
623
|
size_t bufferSize = stringEnd - string;
|
|
621
624
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
625
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
626
|
VALUE cached_key;
|
|
624
627
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
628
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,60 +638,73 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
638
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
639
|
}
|
|
637
640
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
641
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
642
|
+
typedef struct _json_unescape_positions {
|
|
643
|
+
long size;
|
|
644
|
+
const char **positions;
|
|
645
|
+
bool has_more;
|
|
646
|
+
} JSON_UnescapePositions;
|
|
645
647
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
648
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
649
|
+
{
|
|
650
|
+
while (positions->size) {
|
|
651
|
+
positions->size--;
|
|
652
|
+
const char *next_position = positions->positions[0];
|
|
653
|
+
positions->positions++;
|
|
654
|
+
if (next_position >= pe) {
|
|
655
|
+
return next_position;
|
|
652
656
|
}
|
|
657
|
+
}
|
|
653
658
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
}
|
|
659
|
+
if (positions->has_more) {
|
|
660
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
661
|
}
|
|
658
662
|
|
|
663
|
+
return NULL;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
667
|
+
{
|
|
668
|
+
bool intern = is_name || config->freeze;
|
|
669
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
670
|
+
size_t bufferSize = stringEnd - string;
|
|
671
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
672
|
+
char *buffer;
|
|
673
|
+
|
|
659
674
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
675
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
676
|
buffer = RSTRING_PTR(result);
|
|
662
677
|
bufferStart = buffer;
|
|
663
678
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
679
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
680
|
+
|
|
681
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
682
|
if (pe > p) {
|
|
668
683
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
684
|
buffer += pe - p;
|
|
670
685
|
}
|
|
671
686
|
switch (*++pe) {
|
|
687
|
+
case '"':
|
|
688
|
+
case '/':
|
|
689
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
690
|
+
break;
|
|
691
|
+
case '\\':
|
|
692
|
+
APPEND_CHAR('\\');
|
|
693
|
+
break;
|
|
672
694
|
case 'n':
|
|
673
|
-
|
|
695
|
+
APPEND_CHAR('\n');
|
|
674
696
|
break;
|
|
675
697
|
case 'r':
|
|
676
|
-
|
|
698
|
+
APPEND_CHAR('\r');
|
|
677
699
|
break;
|
|
678
700
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
701
|
+
APPEND_CHAR('\t');
|
|
686
702
|
break;
|
|
687
703
|
case 'b':
|
|
688
|
-
|
|
704
|
+
APPEND_CHAR('\b');
|
|
689
705
|
break;
|
|
690
706
|
case 'f':
|
|
691
|
-
|
|
707
|
+
APPEND_CHAR('\f');
|
|
692
708
|
break;
|
|
693
709
|
case 'u':
|
|
694
710
|
if (pe > stringEnd - 5) {
|
|
@@ -726,18 +742,23 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
726
742
|
break;
|
|
727
743
|
}
|
|
728
744
|
}
|
|
729
|
-
|
|
730
|
-
|
|
745
|
+
|
|
746
|
+
char buf[4];
|
|
747
|
+
int unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
748
|
+
MEMCPY(buffer, buf, char, unescape_len);
|
|
749
|
+
buffer += unescape_len;
|
|
750
|
+
p = ++pe;
|
|
731
751
|
}
|
|
732
752
|
break;
|
|
733
753
|
default:
|
|
734
|
-
|
|
735
|
-
|
|
754
|
+
if ((unsigned char)*pe < 0x20) {
|
|
755
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
756
|
+
}
|
|
757
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
758
|
+
break;
|
|
736
759
|
}
|
|
737
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
738
|
-
buffer += unescape_len;
|
|
739
|
-
p = ++pe;
|
|
740
760
|
}
|
|
761
|
+
#undef APPEND_CHAR
|
|
741
762
|
|
|
742
763
|
if (stringEnd > p) {
|
|
743
764
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -748,33 +769,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
748
769
|
if (symbolize) {
|
|
749
770
|
result = rb_str_intern(result);
|
|
750
771
|
} else if (intern) {
|
|
751
|
-
result =
|
|
772
|
+
result = rb_str_to_interned_str(result);
|
|
752
773
|
}
|
|
753
774
|
|
|
754
775
|
return result;
|
|
755
776
|
}
|
|
756
777
|
|
|
757
778
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
758
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
759
|
-
{
|
|
760
|
-
bool negative = false;
|
|
761
|
-
if (*p == '-') {
|
|
762
|
-
negative = true;
|
|
763
|
-
p++;
|
|
764
|
-
}
|
|
765
|
-
|
|
766
|
-
long long memo = 0;
|
|
767
|
-
while (p < pe) {
|
|
768
|
-
memo *= 10;
|
|
769
|
-
memo += *p - '0';
|
|
770
|
-
p++;
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
if (negative) {
|
|
774
|
-
memo = -memo;
|
|
775
|
-
}
|
|
776
|
-
return LL2NUM(memo);
|
|
777
|
-
}
|
|
778
779
|
|
|
779
780
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
780
781
|
{
|
|
@@ -788,17 +789,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
788
789
|
}
|
|
789
790
|
|
|
790
791
|
static inline VALUE
|
|
791
|
-
json_decode_integer(const char *start, const char *end)
|
|
792
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
792
793
|
{
|
|
793
|
-
|
|
794
|
-
if (
|
|
795
|
-
return
|
|
794
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
795
|
+
if (negative) {
|
|
796
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
796
797
|
}
|
|
797
|
-
return
|
|
798
|
+
return UINT64T2NUM(mantissa);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return json_decode_large_integer(start, end - start);
|
|
798
802
|
}
|
|
799
803
|
|
|
800
804
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
801
805
|
{
|
|
806
|
+
if (RB_LIKELY(len < 64)) {
|
|
807
|
+
char buffer[64];
|
|
808
|
+
MEMCPY(buffer, start, char, len);
|
|
809
|
+
buffer[len] = '\0';
|
|
810
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
811
|
+
}
|
|
812
|
+
|
|
802
813
|
VALUE buffer_v;
|
|
803
814
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
804
815
|
MEMCPY(buffer, start, char, len);
|
|
@@ -808,21 +819,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
808
819
|
return number;
|
|
809
820
|
}
|
|
810
821
|
|
|
811
|
-
|
|
822
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
823
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
824
|
+
*/
|
|
825
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
826
|
+
const char *start, const char *end)
|
|
812
827
|
{
|
|
813
|
-
long len = end - start;
|
|
814
|
-
|
|
815
828
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
816
|
-
VALUE text = rb_str_new(start,
|
|
829
|
+
VALUE text = rb_str_new(start, end - start);
|
|
817
830
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
818
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
819
|
-
char buffer[64];
|
|
820
|
-
MEMCPY(buffer, start, char, len);
|
|
821
|
-
buffer[len] = '\0';
|
|
822
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
823
|
-
} else {
|
|
824
|
-
return json_decode_large_float(start, len);
|
|
825
831
|
}
|
|
832
|
+
|
|
833
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
834
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
835
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
836
|
+
return json_decode_large_float(start, end - start);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
826
840
|
}
|
|
827
841
|
|
|
828
842
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -908,20 +922,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
908
922
|
return object;
|
|
909
923
|
}
|
|
910
924
|
|
|
911
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
912
|
-
{
|
|
913
|
-
VALUE string;
|
|
914
|
-
bool intern = is_name || config->freeze;
|
|
915
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
916
|
-
if (escaped) {
|
|
917
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
918
|
-
} else {
|
|
919
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
920
|
-
}
|
|
921
|
-
|
|
922
|
-
return string;
|
|
923
|
-
}
|
|
924
|
-
|
|
925
925
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
926
926
|
{
|
|
927
927
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -944,17 +944,11 @@ static const bool string_scan_table[256] = {
|
|
|
944
944
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
945
945
|
};
|
|
946
946
|
|
|
947
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
948
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
949
|
-
#else
|
|
950
|
-
#define FORCE_INLINE
|
|
951
|
-
#endif
|
|
952
|
-
|
|
953
947
|
#ifdef HAVE_SIMD
|
|
954
948
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
955
949
|
#endif /* HAVE_SIMD */
|
|
956
950
|
|
|
957
|
-
static
|
|
951
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
958
952
|
{
|
|
959
953
|
#ifdef HAVE_SIMD
|
|
960
954
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -962,7 +956,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
962
956
|
uint64_t mask = 0;
|
|
963
957
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
964
958
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
965
|
-
return
|
|
959
|
+
return true;
|
|
966
960
|
}
|
|
967
961
|
|
|
968
962
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -970,40 +964,45 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
970
964
|
int mask = 0;
|
|
971
965
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
972
966
|
state->cursor += trailing_zeros(mask);
|
|
973
|
-
return
|
|
967
|
+
return true;
|
|
974
968
|
}
|
|
975
969
|
}
|
|
976
970
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
977
971
|
#endif /* HAVE_SIMD */
|
|
978
972
|
|
|
979
|
-
while (state
|
|
973
|
+
while (!eos(state)) {
|
|
980
974
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
981
|
-
return
|
|
975
|
+
return true;
|
|
982
976
|
}
|
|
983
977
|
state->cursor++;
|
|
984
978
|
}
|
|
985
|
-
return
|
|
979
|
+
return false;
|
|
986
980
|
}
|
|
987
981
|
|
|
988
|
-
static
|
|
982
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
989
983
|
{
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
984
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
985
|
+
JSON_UnescapePositions positions = {
|
|
986
|
+
.size = 0,
|
|
987
|
+
.positions = backslashes,
|
|
988
|
+
.has_more = false,
|
|
989
|
+
};
|
|
993
990
|
|
|
994
|
-
|
|
991
|
+
do {
|
|
995
992
|
switch (*state->cursor) {
|
|
996
993
|
case '"': {
|
|
997
|
-
VALUE string =
|
|
994
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
998
995
|
state->cursor++;
|
|
999
996
|
return json_push_value(state, config, string);
|
|
1000
997
|
}
|
|
1001
998
|
case '\\': {
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
999
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1000
|
+
backslashes[positions.size] = state->cursor;
|
|
1001
|
+
positions.size++;
|
|
1002
|
+
} else {
|
|
1003
|
+
positions.has_more = true;
|
|
1006
1004
|
}
|
|
1005
|
+
state->cursor++;
|
|
1007
1006
|
break;
|
|
1008
1007
|
}
|
|
1009
1008
|
default:
|
|
@@ -1012,22 +1011,183 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1012
1011
|
}
|
|
1013
1012
|
|
|
1014
1013
|
state->cursor++;
|
|
1015
|
-
}
|
|
1014
|
+
} while (string_scan(state));
|
|
1016
1015
|
|
|
1017
1016
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1018
1017
|
return Qfalse;
|
|
1019
1018
|
}
|
|
1020
1019
|
|
|
1020
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1021
|
+
{
|
|
1022
|
+
state->cursor++;
|
|
1023
|
+
const char *start = state->cursor;
|
|
1024
|
+
|
|
1025
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1026
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1030
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1031
|
+
state->cursor++;
|
|
1032
|
+
return json_push_value(state, config, string);
|
|
1033
|
+
}
|
|
1034
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1038
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1039
|
+
// Additional References:
|
|
1040
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1041
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1042
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1043
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1044
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1045
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1046
|
+
val -= 0x3030303030303030;
|
|
1047
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1048
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1049
|
+
return val;
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1053
|
+
const uint32_t mask = 0x000000FF;
|
|
1054
|
+
const uint32_t mul1 = 100;
|
|
1055
|
+
val -= 0x30303030;
|
|
1056
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1057
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1058
|
+
return val;
|
|
1059
|
+
}
|
|
1060
|
+
#endif
|
|
1061
|
+
|
|
1062
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1063
|
+
{
|
|
1064
|
+
const char *start = state->cursor;
|
|
1065
|
+
|
|
1066
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1067
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1068
|
+
uint64_t next_8bytes;
|
|
1069
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1070
|
+
|
|
1071
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1072
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1073
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1074
|
+
|
|
1075
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1076
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1077
|
+
state->cursor += 8;
|
|
1078
|
+
continue;
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1082
|
+
|
|
1083
|
+
if (consecutive_digits >= 4) {
|
|
1084
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1085
|
+
state->cursor += 4;
|
|
1086
|
+
consecutive_digits -= 4;
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
while (consecutive_digits) {
|
|
1090
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1091
|
+
consecutive_digits--;
|
|
1092
|
+
state->cursor++;
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
return (int)(state->cursor - start);
|
|
1096
|
+
}
|
|
1097
|
+
#endif
|
|
1098
|
+
|
|
1099
|
+
char next_char;
|
|
1100
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1101
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1102
|
+
state->cursor++;
|
|
1103
|
+
}
|
|
1104
|
+
return (int)(state->cursor - start);
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1108
|
+
{
|
|
1109
|
+
bool integer = true;
|
|
1110
|
+
const char first_digit = *state->cursor;
|
|
1111
|
+
|
|
1112
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1113
|
+
int32_t exponent = 0;
|
|
1114
|
+
int decimal_point_pos = -1;
|
|
1115
|
+
uint64_t mantissa = 0;
|
|
1116
|
+
|
|
1117
|
+
// Parse integer part and extract mantissa digits
|
|
1118
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1119
|
+
|
|
1120
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1121
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
// Parse fractional part
|
|
1125
|
+
if (peek(state) == '.') {
|
|
1126
|
+
integer = false;
|
|
1127
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1128
|
+
state->cursor++;
|
|
1129
|
+
|
|
1130
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1131
|
+
mantissa_digits += fractional_digits;
|
|
1132
|
+
|
|
1133
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1134
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
// Parse exponent
|
|
1139
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1140
|
+
integer = false;
|
|
1141
|
+
state->cursor++;
|
|
1142
|
+
|
|
1143
|
+
bool negative_exponent = false;
|
|
1144
|
+
const char next_char = peek(state);
|
|
1145
|
+
if (next_char == '-' || next_char == '+') {
|
|
1146
|
+
negative_exponent = next_char == '-';
|
|
1147
|
+
state->cursor++;
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
uint64_t abs_exponent = 0;
|
|
1151
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1152
|
+
|
|
1153
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1154
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
if (integer) {
|
|
1161
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
// Adjust exponent based on decimal point position
|
|
1165
|
+
if (decimal_point_pos >= 0) {
|
|
1166
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1173
|
+
{
|
|
1174
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1178
|
+
{
|
|
1179
|
+
const char *start = state->cursor;
|
|
1180
|
+
state->cursor++;
|
|
1181
|
+
return json_parse_number(state, config, true, start);
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1021
1184
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1022
1185
|
{
|
|
1023
1186
|
json_eat_whitespace(state);
|
|
1024
|
-
if (state->cursor >= state->end) {
|
|
1025
|
-
raise_parse_error("unexpected end of input", state);
|
|
1026
|
-
}
|
|
1027
1187
|
|
|
1028
|
-
switch (
|
|
1188
|
+
switch (peek(state)) {
|
|
1029
1189
|
case 'n':
|
|
1030
|
-
if ((state
|
|
1190
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1031
1191
|
state->cursor += 4;
|
|
1032
1192
|
return json_push_value(state, config, Qnil);
|
|
1033
1193
|
}
|
|
@@ -1035,7 +1195,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1035
1195
|
raise_parse_error("unexpected token %s", state);
|
|
1036
1196
|
break;
|
|
1037
1197
|
case 't':
|
|
1038
|
-
if ((state
|
|
1198
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1039
1199
|
state->cursor += 4;
|
|
1040
1200
|
return json_push_value(state, config, Qtrue);
|
|
1041
1201
|
}
|
|
@@ -1044,7 +1204,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1044
1204
|
break;
|
|
1045
1205
|
case 'f':
|
|
1046
1206
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1047
|
-
if ((state
|
|
1207
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1048
1208
|
state->cursor += 5;
|
|
1049
1209
|
return json_push_value(state, config, Qfalse);
|
|
1050
1210
|
}
|
|
@@ -1053,7 +1213,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1053
1213
|
break;
|
|
1054
1214
|
case 'N':
|
|
1055
1215
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1056
|
-
if (config->allow_nan && (state
|
|
1216
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1057
1217
|
state->cursor += 3;
|
|
1058
1218
|
return json_push_value(state, config, CNaN);
|
|
1059
1219
|
}
|
|
@@ -1061,16 +1221,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1061
1221
|
raise_parse_error("unexpected token %s", state);
|
|
1062
1222
|
break;
|
|
1063
1223
|
case 'I':
|
|
1064
|
-
if (config->allow_nan && (state
|
|
1224
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1065
1225
|
state->cursor += 8;
|
|
1066
1226
|
return json_push_value(state, config, CInfinity);
|
|
1067
1227
|
}
|
|
1068
1228
|
|
|
1069
1229
|
raise_parse_error("unexpected token %s", state);
|
|
1070
1230
|
break;
|
|
1071
|
-
case '-':
|
|
1231
|
+
case '-': {
|
|
1072
1232
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1073
|
-
if ((state
|
|
1233
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1074
1234
|
if (config->allow_nan) {
|
|
1075
1235
|
state->cursor += 9;
|
|
1076
1236
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1078,62 +1238,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1078
1238
|
raise_parse_error("unexpected token %s", state);
|
|
1079
1239
|
}
|
|
1080
1240
|
}
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
bool integer = true;
|
|
1084
|
-
|
|
1085
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1086
|
-
const char *start = state->cursor;
|
|
1087
|
-
state->cursor++;
|
|
1088
|
-
|
|
1089
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1090
|
-
state->cursor++;
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
long integer_length = state->cursor - start;
|
|
1094
|
-
|
|
1095
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1096
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1097
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1098
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1099
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1100
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1104
|
-
integer = false;
|
|
1105
|
-
state->cursor++;
|
|
1106
|
-
|
|
1107
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1108
|
-
raise_parse_error("invalid number: %s", state);
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1112
|
-
state->cursor++;
|
|
1113
|
-
}
|
|
1114
|
-
}
|
|
1115
|
-
|
|
1116
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1117
|
-
integer = false;
|
|
1118
|
-
state->cursor++;
|
|
1119
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1120
|
-
state->cursor++;
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1124
|
-
raise_parse_error("invalid number: %s", state);
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1128
|
-
state->cursor++;
|
|
1129
|
-
}
|
|
1130
|
-
}
|
|
1131
|
-
|
|
1132
|
-
if (integer) {
|
|
1133
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1134
|
-
}
|
|
1135
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1241
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1242
|
+
break;
|
|
1136
1243
|
}
|
|
1244
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1245
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1246
|
+
break;
|
|
1137
1247
|
case '"': {
|
|
1138
1248
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1139
1249
|
return json_parse_string(state, config, false);
|
|
@@ -1144,7 +1254,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1144
1254
|
json_eat_whitespace(state);
|
|
1145
1255
|
long stack_head = state->stack->head;
|
|
1146
1256
|
|
|
1147
|
-
if ((state
|
|
1257
|
+
if (peek(state) == ']') {
|
|
1148
1258
|
state->cursor++;
|
|
1149
1259
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1150
1260
|
} else {
|
|
@@ -1159,26 +1269,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1159
1269
|
while (true) {
|
|
1160
1270
|
json_eat_whitespace(state);
|
|
1161
1271
|
|
|
1162
|
-
|
|
1163
|
-
if (*state->cursor == ']') {
|
|
1164
|
-
state->cursor++;
|
|
1165
|
-
long count = state->stack->head - stack_head;
|
|
1166
|
-
state->current_nesting--;
|
|
1167
|
-
state->in_array--;
|
|
1168
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1169
|
-
}
|
|
1272
|
+
const char next_char = peek(state);
|
|
1170
1273
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
}
|
|
1274
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1275
|
+
state->cursor++;
|
|
1276
|
+
if (config->allow_trailing_comma) {
|
|
1277
|
+
json_eat_whitespace(state);
|
|
1278
|
+
if (peek(state) == ']') {
|
|
1279
|
+
continue;
|
|
1178
1280
|
}
|
|
1179
|
-
json_parse_any(state, config);
|
|
1180
|
-
continue;
|
|
1181
1281
|
}
|
|
1282
|
+
json_parse_any(state, config);
|
|
1283
|
+
continue;
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
if (next_char == ']') {
|
|
1287
|
+
state->cursor++;
|
|
1288
|
+
long count = state->stack->head - stack_head;
|
|
1289
|
+
state->current_nesting--;
|
|
1290
|
+
state->in_array--;
|
|
1291
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1182
1292
|
}
|
|
1183
1293
|
|
|
1184
1294
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1192,7 +1302,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1192
1302
|
json_eat_whitespace(state);
|
|
1193
1303
|
long stack_head = state->stack->head;
|
|
1194
1304
|
|
|
1195
|
-
if ((state
|
|
1305
|
+
if (peek(state) == '}') {
|
|
1196
1306
|
state->cursor++;
|
|
1197
1307
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1198
1308
|
} else {
|
|
@@ -1201,13 +1311,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1201
1311
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1202
1312
|
}
|
|
1203
1313
|
|
|
1204
|
-
if (
|
|
1314
|
+
if (peek(state) != '"') {
|
|
1205
1315
|
raise_parse_error("expected object key, got %s", state);
|
|
1206
1316
|
}
|
|
1207
1317
|
json_parse_string(state, config, true);
|
|
1208
1318
|
|
|
1209
1319
|
json_eat_whitespace(state);
|
|
1210
|
-
if ((state
|
|
1320
|
+
if (peek(state) != ':') {
|
|
1211
1321
|
raise_parse_error("expected ':' after object key", state);
|
|
1212
1322
|
}
|
|
1213
1323
|
state->cursor++;
|
|
@@ -1218,46 +1328,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1218
1328
|
while (true) {
|
|
1219
1329
|
json_eat_whitespace(state);
|
|
1220
1330
|
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1331
|
+
const char next_char = peek(state);
|
|
1332
|
+
if (next_char == '}') {
|
|
1333
|
+
state->cursor++;
|
|
1334
|
+
state->current_nesting--;
|
|
1335
|
+
size_t count = state->stack->head - stack_head;
|
|
1226
1336
|
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1337
|
+
// Temporary rewind cursor in case an error is raised
|
|
1338
|
+
const char *final_cursor = state->cursor;
|
|
1339
|
+
state->cursor = object_start_cursor;
|
|
1340
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1341
|
+
state->cursor = final_cursor;
|
|
1232
1342
|
|
|
1233
|
-
|
|
1234
|
-
|
|
1343
|
+
return json_push_value(state, config, object);
|
|
1344
|
+
}
|
|
1235
1345
|
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1346
|
+
if (next_char == ',') {
|
|
1347
|
+
state->cursor++;
|
|
1348
|
+
json_eat_whitespace(state);
|
|
1239
1349
|
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
}
|
|
1350
|
+
if (config->allow_trailing_comma) {
|
|
1351
|
+
if (peek(state) == '}') {
|
|
1352
|
+
continue;
|
|
1244
1353
|
}
|
|
1354
|
+
}
|
|
1245
1355
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1356
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1357
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1358
|
+
}
|
|
1359
|
+
json_parse_string(state, config, true);
|
|
1250
1360
|
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1361
|
+
json_eat_whitespace(state);
|
|
1362
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1363
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1364
|
+
}
|
|
1365
|
+
state->cursor++;
|
|
1256
1366
|
|
|
1257
|
-
|
|
1367
|
+
json_parse_any(state, config);
|
|
1258
1368
|
|
|
1259
|
-
|
|
1260
|
-
}
|
|
1369
|
+
continue;
|
|
1261
1370
|
}
|
|
1262
1371
|
|
|
1263
1372
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1265,18 +1374,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1265
1374
|
break;
|
|
1266
1375
|
}
|
|
1267
1376
|
|
|
1377
|
+
case 0:
|
|
1378
|
+
raise_parse_error("unexpected end of input", state);
|
|
1379
|
+
break;
|
|
1380
|
+
|
|
1268
1381
|
default:
|
|
1269
1382
|
raise_parse_error("unexpected character: %s", state);
|
|
1270
1383
|
break;
|
|
1271
1384
|
}
|
|
1272
1385
|
|
|
1273
1386
|
raise_parse_error("unreachable: %s", state);
|
|
1387
|
+
return Qundef;
|
|
1274
1388
|
}
|
|
1275
1389
|
|
|
1276
1390
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1277
1391
|
{
|
|
1278
1392
|
json_eat_whitespace(state);
|
|
1279
|
-
if (state
|
|
1393
|
+
if (!eos(state)) {
|
|
1280
1394
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1281
1395
|
}
|
|
1282
1396
|
}
|
|
@@ -1393,6 +1507,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1393
1507
|
*/
|
|
1394
1508
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1395
1509
|
{
|
|
1510
|
+
rb_check_frozen(self);
|
|
1396
1511
|
GET_PARSER_CONFIG;
|
|
1397
1512
|
|
|
1398
1513
|
parser_config_init(config, opts);
|
|
@@ -1488,7 +1603,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1488
1603
|
JSON_ParserConfig_memsize,
|
|
1489
1604
|
},
|
|
1490
1605
|
0, 0,
|
|
1491
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1606
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1492
1607
|
};
|
|
1493
1608
|
|
|
1494
1609
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1538,10 +1653,6 @@ void Init_parser(void)
|
|
|
1538
1653
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1539
1654
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1540
1655
|
|
|
1541
|
-
i_chr = rb_intern("chr");
|
|
1542
|
-
i_aset = rb_intern("[]=");
|
|
1543
|
-
i_aref = rb_intern("[]");
|
|
1544
|
-
i_leftshift = rb_intern("<<");
|
|
1545
1656
|
i_new = rb_intern("new");
|
|
1546
1657
|
i_try_convert = rb_intern("try_convert");
|
|
1547
1658
|
i_uminus = rb_intern("-@");
|