json 2.13.2 → 2.17.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +72 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +31 -54
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +279 -239
- data/ext/json/ext/json.h +97 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +507 -391
- data/ext/json/ext/simd/simd.h +15 -12
- data/ext/json/ext/vendor/fpconv.c +12 -11
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +60 -23
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +113 -58
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +23 -1
- metadata +6 -3
|
@@ -1,40 +1,11 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
10
|
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
|
40
11
|
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
@@ -44,7 +15,7 @@ static int utf8_encindex;
|
|
|
44
15
|
|
|
45
16
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
17
|
// For TruffleRuby
|
|
47
|
-
void
|
|
18
|
+
static void
|
|
48
19
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
20
|
{
|
|
50
21
|
long index = 0;
|
|
@@ -61,6 +32,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
32
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
33
|
#endif
|
|
63
34
|
|
|
35
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
36
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
37
|
+
{
|
|
38
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
39
|
+
}
|
|
40
|
+
#endif
|
|
64
41
|
|
|
65
42
|
/* name cache */
|
|
66
43
|
|
|
@@ -106,116 +83,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
83
|
cache->entries[index] = rstring;
|
|
107
84
|
}
|
|
108
85
|
|
|
109
|
-
|
|
86
|
+
#define rstring_cache_memcmp memcmp
|
|
87
|
+
|
|
88
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
89
|
+
#if __has_builtin(__builtin_bswap64)
|
|
90
|
+
#undef rstring_cache_memcmp
|
|
91
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
92
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
93
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
94
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
95
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
96
|
+
long i = 0;
|
|
97
|
+
|
|
98
|
+
for (; i + 8 <= length; i += 8) {
|
|
99
|
+
uint64_t a, b;
|
|
100
|
+
memcpy(&a, str + i, 8);
|
|
101
|
+
memcpy(&b, rptr + i, 8);
|
|
102
|
+
if (a != b) {
|
|
103
|
+
a = __builtin_bswap64(a);
|
|
104
|
+
b = __builtin_bswap64(b);
|
|
105
|
+
return (a < b) ? -1 : 1;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for (; i < length; i++) {
|
|
110
|
+
if (str[i] != rptr[i]) {
|
|
111
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
112
|
+
}
|
|
116
113
|
}
|
|
114
|
+
|
|
115
|
+
return 0;
|
|
117
116
|
}
|
|
117
|
+
#endif
|
|
118
|
+
#endif
|
|
118
119
|
|
|
119
|
-
static
|
|
120
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
121
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
122
|
+
const char *rstring_ptr;
|
|
123
|
+
long rstring_length;
|
|
124
|
+
|
|
125
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
126
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
127
|
+
if (length == rstring_length) {
|
|
128
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
129
|
+
} else {
|
|
130
|
+
return (int)(length - rstring_length);
|
|
132
131
|
}
|
|
132
|
+
}
|
|
133
133
|
|
|
134
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
135
|
+
{
|
|
134
136
|
int low = 0;
|
|
135
137
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
138
|
|
|
139
139
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
140
|
+
int mid = (high + low) >> 1;
|
|
141
141
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
142
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
143
|
|
|
144
|
-
if (
|
|
144
|
+
if (cmp == 0) {
|
|
145
145
|
return entry;
|
|
146
|
-
} else if (
|
|
146
|
+
} else if (cmp > 0) {
|
|
147
147
|
low = mid + 1;
|
|
148
148
|
} else {
|
|
149
149
|
high = mid - 1;
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
153
|
VALUE rstring = build_interned_string(str, length);
|
|
160
154
|
|
|
161
155
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
156
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
157
|
}
|
|
168
158
|
return rstring;
|
|
169
159
|
}
|
|
170
160
|
|
|
171
161
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
162
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
163
|
int low = 0;
|
|
187
164
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
165
|
|
|
191
166
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
167
|
+
int mid = (high + low) >> 1;
|
|
193
168
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
169
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
170
|
|
|
196
|
-
if (
|
|
171
|
+
if (cmp == 0) {
|
|
197
172
|
return entry;
|
|
198
|
-
} else if (
|
|
173
|
+
} else if (cmp > 0) {
|
|
199
174
|
low = mid + 1;
|
|
200
175
|
} else {
|
|
201
176
|
high = mid - 1;
|
|
202
177
|
}
|
|
203
178
|
}
|
|
204
179
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
180
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
181
|
|
|
213
182
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
183
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
184
|
}
|
|
220
185
|
return rsymbol;
|
|
221
186
|
}
|
|
@@ -330,15 +295,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
295
|
}
|
|
331
296
|
}
|
|
332
297
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
298
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
299
|
{
|
|
344
300
|
int len = 1;
|
|
@@ -379,7 +335,6 @@ typedef struct JSON_ParserStruct {
|
|
|
379
335
|
int max_nesting;
|
|
380
336
|
bool allow_nan;
|
|
381
337
|
bool allow_trailing_comma;
|
|
382
|
-
bool parsing_name;
|
|
383
338
|
bool symbolize_names;
|
|
384
339
|
bool freeze;
|
|
385
340
|
} JSON_ParserConfig;
|
|
@@ -395,6 +350,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
350
|
int current_nesting;
|
|
396
351
|
} JSON_ParserState;
|
|
397
352
|
|
|
353
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
354
|
+
return state->end - state->cursor;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
358
|
+
return state->cursor >= state->end;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
static inline char peek(JSON_ParserState *state)
|
|
362
|
+
{
|
|
363
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
364
|
+
return 0;
|
|
365
|
+
}
|
|
366
|
+
return *state->cursor;
|
|
367
|
+
}
|
|
368
|
+
|
|
398
369
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
370
|
{
|
|
400
371
|
const char *cursor = state->cursor;
|
|
@@ -530,61 +501,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
501
|
|
|
531
502
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
503
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
504
|
static void
|
|
542
505
|
json_eat_comments(JSON_ParserState *state)
|
|
543
506
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
507
|
+
const char *start = state->cursor;
|
|
508
|
+
state->cursor++;
|
|
509
|
+
|
|
510
|
+
switch (peek(state)) {
|
|
511
|
+
case '/': {
|
|
512
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
513
|
+
if (!state->cursor) {
|
|
514
|
+
state->cursor = state->end;
|
|
515
|
+
} else {
|
|
516
|
+
state->cursor++;
|
|
554
517
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
518
|
+
break;
|
|
519
|
+
}
|
|
520
|
+
case '*': {
|
|
521
|
+
state->cursor++;
|
|
522
|
+
|
|
523
|
+
while (true) {
|
|
524
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
525
|
+
if (!next_match) {
|
|
526
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
state->cursor = next_match + 1;
|
|
530
|
+
if (peek(state) == '/') {
|
|
531
|
+
state->cursor++;
|
|
532
|
+
break;
|
|
568
533
|
}
|
|
569
|
-
break;
|
|
570
534
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
535
|
+
break;
|
|
574
536
|
}
|
|
575
|
-
|
|
576
|
-
|
|
537
|
+
default:
|
|
538
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
539
|
+
break;
|
|
577
540
|
}
|
|
578
541
|
}
|
|
579
542
|
|
|
580
|
-
static
|
|
543
|
+
ALWAYS_INLINE(static) void
|
|
581
544
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
545
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
546
|
+
while (true) {
|
|
547
|
+
switch (peek(state)) {
|
|
548
|
+
case ' ':
|
|
549
|
+
state->cursor++;
|
|
550
|
+
break;
|
|
551
|
+
case '\n':
|
|
552
|
+
state->cursor++;
|
|
553
|
+
|
|
554
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
555
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
556
|
+
while (rest(state) > 8) {
|
|
557
|
+
uint64_t chunk;
|
|
558
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
559
|
+
if (chunk == 0x2020202020202020) {
|
|
560
|
+
state->cursor += 8;
|
|
561
|
+
continue;
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
565
|
+
state->cursor += consecutive_spaces;
|
|
566
|
+
break;
|
|
567
|
+
}
|
|
568
|
+
#endif
|
|
569
|
+
break;
|
|
570
|
+
case '\t':
|
|
571
|
+
case '\r':
|
|
572
|
+
state->cursor++;
|
|
573
|
+
break;
|
|
574
|
+
case '/':
|
|
575
|
+
json_eat_comments(state);
|
|
576
|
+
break;
|
|
577
|
+
|
|
578
|
+
default:
|
|
579
|
+
return;
|
|
588
580
|
}
|
|
589
581
|
}
|
|
590
582
|
}
|
|
@@ -615,11 +607,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
607
|
return result;
|
|
616
608
|
}
|
|
617
609
|
|
|
618
|
-
static inline
|
|
610
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
611
|
+
{
|
|
612
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
613
|
+
// Simple heuristics:
|
|
614
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
615
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
616
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
619
620
|
{
|
|
621
|
+
bool intern = is_name || config->freeze;
|
|
622
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
623
|
size_t bufferSize = stringEnd - string;
|
|
621
624
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
625
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
626
|
VALUE cached_key;
|
|
624
627
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
628
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,60 +638,73 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
638
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
639
|
}
|
|
637
640
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
641
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
642
|
+
typedef struct _json_unescape_positions {
|
|
643
|
+
long size;
|
|
644
|
+
const char **positions;
|
|
645
|
+
bool has_more;
|
|
646
|
+
} JSON_UnescapePositions;
|
|
645
647
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
648
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
649
|
+
{
|
|
650
|
+
while (positions->size) {
|
|
651
|
+
positions->size--;
|
|
652
|
+
const char *next_position = positions->positions[0];
|
|
653
|
+
positions->positions++;
|
|
654
|
+
if (next_position >= pe) {
|
|
655
|
+
return next_position;
|
|
652
656
|
}
|
|
657
|
+
}
|
|
653
658
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
}
|
|
659
|
+
if (positions->has_more) {
|
|
660
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
661
|
}
|
|
658
662
|
|
|
663
|
+
return NULL;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
667
|
+
{
|
|
668
|
+
bool intern = is_name || config->freeze;
|
|
669
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
670
|
+
size_t bufferSize = stringEnd - string;
|
|
671
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
672
|
+
char *buffer;
|
|
673
|
+
|
|
659
674
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
675
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
676
|
buffer = RSTRING_PTR(result);
|
|
662
677
|
bufferStart = buffer;
|
|
663
678
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
679
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
680
|
+
|
|
681
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
682
|
if (pe > p) {
|
|
668
683
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
684
|
buffer += pe - p;
|
|
670
685
|
}
|
|
671
686
|
switch (*++pe) {
|
|
687
|
+
case '"':
|
|
688
|
+
case '/':
|
|
689
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
690
|
+
break;
|
|
691
|
+
case '\\':
|
|
692
|
+
APPEND_CHAR('\\');
|
|
693
|
+
break;
|
|
672
694
|
case 'n':
|
|
673
|
-
|
|
695
|
+
APPEND_CHAR('\n');
|
|
674
696
|
break;
|
|
675
697
|
case 'r':
|
|
676
|
-
|
|
698
|
+
APPEND_CHAR('\r');
|
|
677
699
|
break;
|
|
678
700
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
701
|
+
APPEND_CHAR('\t');
|
|
686
702
|
break;
|
|
687
703
|
case 'b':
|
|
688
|
-
|
|
704
|
+
APPEND_CHAR('\b');
|
|
689
705
|
break;
|
|
690
706
|
case 'f':
|
|
691
|
-
|
|
707
|
+
APPEND_CHAR('\f');
|
|
692
708
|
break;
|
|
693
709
|
case 'u':
|
|
694
710
|
if (pe > stringEnd - 5) {
|
|
@@ -713,26 +729,36 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
713
729
|
}
|
|
714
730
|
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
715
731
|
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
732
|
+
|
|
733
|
+
if ((sur & 0xFC00) != 0xDC00) {
|
|
734
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
735
|
+
}
|
|
736
|
+
|
|
716
737
|
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
717
738
|
| (sur & 0x3FF));
|
|
718
739
|
pe += 5;
|
|
719
740
|
} else {
|
|
720
|
-
|
|
741
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
721
742
|
break;
|
|
722
743
|
}
|
|
723
744
|
}
|
|
724
|
-
|
|
725
|
-
|
|
745
|
+
|
|
746
|
+
char buf[4];
|
|
747
|
+
int unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
748
|
+
MEMCPY(buffer, buf, char, unescape_len);
|
|
749
|
+
buffer += unescape_len;
|
|
750
|
+
p = ++pe;
|
|
726
751
|
}
|
|
727
752
|
break;
|
|
728
753
|
default:
|
|
729
|
-
|
|
730
|
-
|
|
754
|
+
if ((unsigned char)*pe < 0x20) {
|
|
755
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
756
|
+
}
|
|
757
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
758
|
+
break;
|
|
731
759
|
}
|
|
732
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
733
|
-
buffer += unescape_len;
|
|
734
|
-
p = ++pe;
|
|
735
760
|
}
|
|
761
|
+
#undef APPEND_CHAR
|
|
736
762
|
|
|
737
763
|
if (stringEnd > p) {
|
|
738
764
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -743,33 +769,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
743
769
|
if (symbolize) {
|
|
744
770
|
result = rb_str_intern(result);
|
|
745
771
|
} else if (intern) {
|
|
746
|
-
result =
|
|
772
|
+
result = rb_str_to_interned_str(result);
|
|
747
773
|
}
|
|
748
774
|
|
|
749
775
|
return result;
|
|
750
776
|
}
|
|
751
777
|
|
|
752
778
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
753
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
754
|
-
{
|
|
755
|
-
bool negative = false;
|
|
756
|
-
if (*p == '-') {
|
|
757
|
-
negative = true;
|
|
758
|
-
p++;
|
|
759
|
-
}
|
|
760
|
-
|
|
761
|
-
long long memo = 0;
|
|
762
|
-
while (p < pe) {
|
|
763
|
-
memo *= 10;
|
|
764
|
-
memo += *p - '0';
|
|
765
|
-
p++;
|
|
766
|
-
}
|
|
767
|
-
|
|
768
|
-
if (negative) {
|
|
769
|
-
memo = -memo;
|
|
770
|
-
}
|
|
771
|
-
return LL2NUM(memo);
|
|
772
|
-
}
|
|
773
779
|
|
|
774
780
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
775
781
|
{
|
|
@@ -783,17 +789,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
783
789
|
}
|
|
784
790
|
|
|
785
791
|
static inline VALUE
|
|
786
|
-
json_decode_integer(const char *start, const char *end)
|
|
792
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
787
793
|
{
|
|
788
|
-
|
|
789
|
-
if (
|
|
790
|
-
return
|
|
794
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
795
|
+
if (negative) {
|
|
796
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
791
797
|
}
|
|
792
|
-
return
|
|
798
|
+
return UINT64T2NUM(mantissa);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return json_decode_large_integer(start, end - start);
|
|
793
802
|
}
|
|
794
803
|
|
|
795
804
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
796
805
|
{
|
|
806
|
+
if (RB_LIKELY(len < 64)) {
|
|
807
|
+
char buffer[64];
|
|
808
|
+
MEMCPY(buffer, start, char, len);
|
|
809
|
+
buffer[len] = '\0';
|
|
810
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
811
|
+
}
|
|
812
|
+
|
|
797
813
|
VALUE buffer_v;
|
|
798
814
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
799
815
|
MEMCPY(buffer, start, char, len);
|
|
@@ -803,21 +819,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
803
819
|
return number;
|
|
804
820
|
}
|
|
805
821
|
|
|
806
|
-
|
|
822
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
823
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
824
|
+
*/
|
|
825
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
826
|
+
const char *start, const char *end)
|
|
807
827
|
{
|
|
808
|
-
long len = end - start;
|
|
809
|
-
|
|
810
828
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
811
|
-
VALUE text = rb_str_new(start,
|
|
829
|
+
VALUE text = rb_str_new(start, end - start);
|
|
812
830
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
813
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
814
|
-
char buffer[64];
|
|
815
|
-
MEMCPY(buffer, start, char, len);
|
|
816
|
-
buffer[len] = '\0';
|
|
817
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
818
|
-
} else {
|
|
819
|
-
return json_decode_large_float(start, len);
|
|
820
831
|
}
|
|
832
|
+
|
|
833
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
834
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
835
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
836
|
+
return json_decode_large_float(start, end - start);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
821
840
|
}
|
|
822
841
|
|
|
823
842
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -903,20 +922,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
903
922
|
return object;
|
|
904
923
|
}
|
|
905
924
|
|
|
906
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
907
|
-
{
|
|
908
|
-
VALUE string;
|
|
909
|
-
bool intern = is_name || config->freeze;
|
|
910
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
911
|
-
if (escaped) {
|
|
912
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
913
|
-
} else {
|
|
914
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
915
|
-
}
|
|
916
|
-
|
|
917
|
-
return string;
|
|
918
|
-
}
|
|
919
|
-
|
|
920
925
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
921
926
|
{
|
|
922
927
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -939,17 +944,11 @@ static const bool string_scan_table[256] = {
|
|
|
939
944
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
940
945
|
};
|
|
941
946
|
|
|
942
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
943
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
944
|
-
#else
|
|
945
|
-
#define FORCE_INLINE
|
|
946
|
-
#endif
|
|
947
|
-
|
|
948
947
|
#ifdef HAVE_SIMD
|
|
949
948
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
950
949
|
#endif /* HAVE_SIMD */
|
|
951
950
|
|
|
952
|
-
static
|
|
951
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
953
952
|
{
|
|
954
953
|
#ifdef HAVE_SIMD
|
|
955
954
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -957,7 +956,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
957
956
|
uint64_t mask = 0;
|
|
958
957
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
959
958
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
960
|
-
return
|
|
959
|
+
return true;
|
|
961
960
|
}
|
|
962
961
|
|
|
963
962
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -965,40 +964,45 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
965
964
|
int mask = 0;
|
|
966
965
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
967
966
|
state->cursor += trailing_zeros(mask);
|
|
968
|
-
return
|
|
967
|
+
return true;
|
|
969
968
|
}
|
|
970
969
|
}
|
|
971
970
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
972
971
|
#endif /* HAVE_SIMD */
|
|
973
972
|
|
|
974
|
-
while (state
|
|
973
|
+
while (!eos(state)) {
|
|
975
974
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
976
|
-
return
|
|
975
|
+
return true;
|
|
977
976
|
}
|
|
978
|
-
|
|
977
|
+
state->cursor++;
|
|
979
978
|
}
|
|
980
|
-
return
|
|
979
|
+
return false;
|
|
981
980
|
}
|
|
982
981
|
|
|
983
|
-
static
|
|
982
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
984
983
|
{
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
984
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
985
|
+
JSON_UnescapePositions positions = {
|
|
986
|
+
.size = 0,
|
|
987
|
+
.positions = backslashes,
|
|
988
|
+
.has_more = false,
|
|
989
|
+
};
|
|
988
990
|
|
|
989
|
-
|
|
991
|
+
do {
|
|
990
992
|
switch (*state->cursor) {
|
|
991
993
|
case '"': {
|
|
992
|
-
VALUE string =
|
|
994
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
993
995
|
state->cursor++;
|
|
994
996
|
return json_push_value(state, config, string);
|
|
995
997
|
}
|
|
996
998
|
case '\\': {
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
999
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1000
|
+
backslashes[positions.size] = state->cursor;
|
|
1001
|
+
positions.size++;
|
|
1002
|
+
} else {
|
|
1003
|
+
positions.has_more = true;
|
|
1001
1004
|
}
|
|
1005
|
+
state->cursor++;
|
|
1002
1006
|
break;
|
|
1003
1007
|
}
|
|
1004
1008
|
default:
|
|
@@ -1007,22 +1011,183 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1007
1011
|
}
|
|
1008
1012
|
|
|
1009
1013
|
state->cursor++;
|
|
1010
|
-
}
|
|
1014
|
+
} while (string_scan(state));
|
|
1011
1015
|
|
|
1012
1016
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1013
1017
|
return Qfalse;
|
|
1014
1018
|
}
|
|
1015
1019
|
|
|
1020
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1021
|
+
{
|
|
1022
|
+
state->cursor++;
|
|
1023
|
+
const char *start = state->cursor;
|
|
1024
|
+
|
|
1025
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1026
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1030
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1031
|
+
state->cursor++;
|
|
1032
|
+
return json_push_value(state, config, string);
|
|
1033
|
+
}
|
|
1034
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1038
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1039
|
+
// Additional References:
|
|
1040
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1041
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1042
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1043
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1044
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1045
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1046
|
+
val -= 0x3030303030303030;
|
|
1047
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1048
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1049
|
+
return val;
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1053
|
+
const uint32_t mask = 0x000000FF;
|
|
1054
|
+
const uint32_t mul1 = 100;
|
|
1055
|
+
val -= 0x30303030;
|
|
1056
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1057
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1058
|
+
return val;
|
|
1059
|
+
}
|
|
1060
|
+
#endif
|
|
1061
|
+
|
|
1062
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1063
|
+
{
|
|
1064
|
+
const char *start = state->cursor;
|
|
1065
|
+
|
|
1066
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1067
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1068
|
+
uint64_t next_8bytes;
|
|
1069
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1070
|
+
|
|
1071
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1072
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1073
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1074
|
+
|
|
1075
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1076
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1077
|
+
state->cursor += 8;
|
|
1078
|
+
continue;
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1082
|
+
|
|
1083
|
+
if (consecutive_digits >= 4) {
|
|
1084
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1085
|
+
state->cursor += 4;
|
|
1086
|
+
consecutive_digits -= 4;
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
while (consecutive_digits) {
|
|
1090
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1091
|
+
consecutive_digits--;
|
|
1092
|
+
state->cursor++;
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
return (int)(state->cursor - start);
|
|
1096
|
+
}
|
|
1097
|
+
#endif
|
|
1098
|
+
|
|
1099
|
+
char next_char;
|
|
1100
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1101
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1102
|
+
state->cursor++;
|
|
1103
|
+
}
|
|
1104
|
+
return (int)(state->cursor - start);
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1108
|
+
{
|
|
1109
|
+
bool integer = true;
|
|
1110
|
+
const char first_digit = *state->cursor;
|
|
1111
|
+
|
|
1112
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1113
|
+
int32_t exponent = 0;
|
|
1114
|
+
int decimal_point_pos = -1;
|
|
1115
|
+
uint64_t mantissa = 0;
|
|
1116
|
+
|
|
1117
|
+
// Parse integer part and extract mantissa digits
|
|
1118
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1119
|
+
|
|
1120
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1121
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
// Parse fractional part
|
|
1125
|
+
if (peek(state) == '.') {
|
|
1126
|
+
integer = false;
|
|
1127
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1128
|
+
state->cursor++;
|
|
1129
|
+
|
|
1130
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1131
|
+
mantissa_digits += fractional_digits;
|
|
1132
|
+
|
|
1133
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1134
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
// Parse exponent
|
|
1139
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1140
|
+
integer = false;
|
|
1141
|
+
state->cursor++;
|
|
1142
|
+
|
|
1143
|
+
bool negative_exponent = false;
|
|
1144
|
+
const char next_char = peek(state);
|
|
1145
|
+
if (next_char == '-' || next_char == '+') {
|
|
1146
|
+
negative_exponent = next_char == '-';
|
|
1147
|
+
state->cursor++;
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
uint64_t abs_exponent = 0;
|
|
1151
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1152
|
+
|
|
1153
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1154
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
if (integer) {
|
|
1161
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
// Adjust exponent based on decimal point position
|
|
1165
|
+
if (decimal_point_pos >= 0) {
|
|
1166
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1173
|
+
{
|
|
1174
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1175
|
+
}
|
|
1176
|
+
|
|
1177
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1178
|
+
{
|
|
1179
|
+
const char *start = state->cursor;
|
|
1180
|
+
state->cursor++;
|
|
1181
|
+
return json_parse_number(state, config, true, start);
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1016
1184
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1017
1185
|
{
|
|
1018
1186
|
json_eat_whitespace(state);
|
|
1019
|
-
if (state->cursor >= state->end) {
|
|
1020
|
-
raise_parse_error("unexpected end of input", state);
|
|
1021
|
-
}
|
|
1022
1187
|
|
|
1023
|
-
switch (
|
|
1188
|
+
switch (peek(state)) {
|
|
1024
1189
|
case 'n':
|
|
1025
|
-
if ((state
|
|
1190
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1026
1191
|
state->cursor += 4;
|
|
1027
1192
|
return json_push_value(state, config, Qnil);
|
|
1028
1193
|
}
|
|
@@ -1030,7 +1195,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1030
1195
|
raise_parse_error("unexpected token %s", state);
|
|
1031
1196
|
break;
|
|
1032
1197
|
case 't':
|
|
1033
|
-
if ((state
|
|
1198
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1034
1199
|
state->cursor += 4;
|
|
1035
1200
|
return json_push_value(state, config, Qtrue);
|
|
1036
1201
|
}
|
|
@@ -1039,7 +1204,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1039
1204
|
break;
|
|
1040
1205
|
case 'f':
|
|
1041
1206
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1042
|
-
if ((state
|
|
1207
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1043
1208
|
state->cursor += 5;
|
|
1044
1209
|
return json_push_value(state, config, Qfalse);
|
|
1045
1210
|
}
|
|
@@ -1048,7 +1213,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1048
1213
|
break;
|
|
1049
1214
|
case 'N':
|
|
1050
1215
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1051
|
-
if (config->allow_nan && (state
|
|
1216
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1052
1217
|
state->cursor += 3;
|
|
1053
1218
|
return json_push_value(state, config, CNaN);
|
|
1054
1219
|
}
|
|
@@ -1056,16 +1221,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1056
1221
|
raise_parse_error("unexpected token %s", state);
|
|
1057
1222
|
break;
|
|
1058
1223
|
case 'I':
|
|
1059
|
-
if (config->allow_nan && (state
|
|
1224
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1060
1225
|
state->cursor += 8;
|
|
1061
1226
|
return json_push_value(state, config, CInfinity);
|
|
1062
1227
|
}
|
|
1063
1228
|
|
|
1064
1229
|
raise_parse_error("unexpected token %s", state);
|
|
1065
1230
|
break;
|
|
1066
|
-
case '-':
|
|
1231
|
+
case '-': {
|
|
1067
1232
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1068
|
-
if ((state
|
|
1233
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1069
1234
|
if (config->allow_nan) {
|
|
1070
1235
|
state->cursor += 9;
|
|
1071
1236
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1073,62 +1238,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1073
1238
|
raise_parse_error("unexpected token %s", state);
|
|
1074
1239
|
}
|
|
1075
1240
|
}
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
bool integer = true;
|
|
1079
|
-
|
|
1080
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1081
|
-
const char *start = state->cursor;
|
|
1082
|
-
state->cursor++;
|
|
1083
|
-
|
|
1084
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1085
|
-
state->cursor++;
|
|
1086
|
-
}
|
|
1087
|
-
|
|
1088
|
-
long integer_length = state->cursor - start;
|
|
1089
|
-
|
|
1090
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1091
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1092
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1093
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1094
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1095
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1096
|
-
}
|
|
1097
|
-
|
|
1098
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1099
|
-
integer = false;
|
|
1100
|
-
state->cursor++;
|
|
1101
|
-
|
|
1102
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1103
|
-
raise_parse_error("invalid number: %s", state);
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1107
|
-
state->cursor++;
|
|
1108
|
-
}
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1112
|
-
integer = false;
|
|
1113
|
-
state->cursor++;
|
|
1114
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1115
|
-
state->cursor++;
|
|
1116
|
-
}
|
|
1117
|
-
|
|
1118
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1119
|
-
raise_parse_error("invalid number: %s", state);
|
|
1120
|
-
}
|
|
1121
|
-
|
|
1122
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1123
|
-
state->cursor++;
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
if (integer) {
|
|
1128
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1129
|
-
}
|
|
1130
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1241
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1242
|
+
break;
|
|
1131
1243
|
}
|
|
1244
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1245
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1246
|
+
break;
|
|
1132
1247
|
case '"': {
|
|
1133
1248
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1134
1249
|
return json_parse_string(state, config, false);
|
|
@@ -1139,7 +1254,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1139
1254
|
json_eat_whitespace(state);
|
|
1140
1255
|
long stack_head = state->stack->head;
|
|
1141
1256
|
|
|
1142
|
-
if ((state
|
|
1257
|
+
if (peek(state) == ']') {
|
|
1143
1258
|
state->cursor++;
|
|
1144
1259
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1145
1260
|
} else {
|
|
@@ -1154,26 +1269,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1154
1269
|
while (true) {
|
|
1155
1270
|
json_eat_whitespace(state);
|
|
1156
1271
|
|
|
1157
|
-
|
|
1158
|
-
if (*state->cursor == ']') {
|
|
1159
|
-
state->cursor++;
|
|
1160
|
-
long count = state->stack->head - stack_head;
|
|
1161
|
-
state->current_nesting--;
|
|
1162
|
-
state->in_array--;
|
|
1163
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1164
|
-
}
|
|
1272
|
+
const char next_char = peek(state);
|
|
1165
1273
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
}
|
|
1274
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1275
|
+
state->cursor++;
|
|
1276
|
+
if (config->allow_trailing_comma) {
|
|
1277
|
+
json_eat_whitespace(state);
|
|
1278
|
+
if (peek(state) == ']') {
|
|
1279
|
+
continue;
|
|
1173
1280
|
}
|
|
1174
|
-
json_parse_any(state, config);
|
|
1175
|
-
continue;
|
|
1176
1281
|
}
|
|
1282
|
+
json_parse_any(state, config);
|
|
1283
|
+
continue;
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
if (next_char == ']') {
|
|
1287
|
+
state->cursor++;
|
|
1288
|
+
long count = state->stack->head - stack_head;
|
|
1289
|
+
state->current_nesting--;
|
|
1290
|
+
state->in_array--;
|
|
1291
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1177
1292
|
}
|
|
1178
1293
|
|
|
1179
1294
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1187,7 +1302,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1187
1302
|
json_eat_whitespace(state);
|
|
1188
1303
|
long stack_head = state->stack->head;
|
|
1189
1304
|
|
|
1190
|
-
if ((state
|
|
1305
|
+
if (peek(state) == '}') {
|
|
1191
1306
|
state->cursor++;
|
|
1192
1307
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1193
1308
|
} else {
|
|
@@ -1196,13 +1311,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1196
1311
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1197
1312
|
}
|
|
1198
1313
|
|
|
1199
|
-
if (
|
|
1314
|
+
if (peek(state) != '"') {
|
|
1200
1315
|
raise_parse_error("expected object key, got %s", state);
|
|
1201
1316
|
}
|
|
1202
1317
|
json_parse_string(state, config, true);
|
|
1203
1318
|
|
|
1204
1319
|
json_eat_whitespace(state);
|
|
1205
|
-
if ((state
|
|
1320
|
+
if (peek(state) != ':') {
|
|
1206
1321
|
raise_parse_error("expected ':' after object key", state);
|
|
1207
1322
|
}
|
|
1208
1323
|
state->cursor++;
|
|
@@ -1213,46 +1328,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1213
1328
|
while (true) {
|
|
1214
1329
|
json_eat_whitespace(state);
|
|
1215
1330
|
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1331
|
+
const char next_char = peek(state);
|
|
1332
|
+
if (next_char == '}') {
|
|
1333
|
+
state->cursor++;
|
|
1334
|
+
state->current_nesting--;
|
|
1335
|
+
size_t count = state->stack->head - stack_head;
|
|
1221
1336
|
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1337
|
+
// Temporary rewind cursor in case an error is raised
|
|
1338
|
+
const char *final_cursor = state->cursor;
|
|
1339
|
+
state->cursor = object_start_cursor;
|
|
1340
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1341
|
+
state->cursor = final_cursor;
|
|
1227
1342
|
|
|
1228
|
-
|
|
1229
|
-
|
|
1343
|
+
return json_push_value(state, config, object);
|
|
1344
|
+
}
|
|
1230
1345
|
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1346
|
+
if (next_char == ',') {
|
|
1347
|
+
state->cursor++;
|
|
1348
|
+
json_eat_whitespace(state);
|
|
1234
1349
|
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
}
|
|
1350
|
+
if (config->allow_trailing_comma) {
|
|
1351
|
+
if (peek(state) == '}') {
|
|
1352
|
+
continue;
|
|
1239
1353
|
}
|
|
1354
|
+
}
|
|
1240
1355
|
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1356
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1357
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1358
|
+
}
|
|
1359
|
+
json_parse_string(state, config, true);
|
|
1245
1360
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1361
|
+
json_eat_whitespace(state);
|
|
1362
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1363
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1364
|
+
}
|
|
1365
|
+
state->cursor++;
|
|
1251
1366
|
|
|
1252
|
-
|
|
1367
|
+
json_parse_any(state, config);
|
|
1253
1368
|
|
|
1254
|
-
|
|
1255
|
-
}
|
|
1369
|
+
continue;
|
|
1256
1370
|
}
|
|
1257
1371
|
|
|
1258
1372
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1260,18 +1374,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1260
1374
|
break;
|
|
1261
1375
|
}
|
|
1262
1376
|
|
|
1377
|
+
case 0:
|
|
1378
|
+
raise_parse_error("unexpected end of input", state);
|
|
1379
|
+
break;
|
|
1380
|
+
|
|
1263
1381
|
default:
|
|
1264
1382
|
raise_parse_error("unexpected character: %s", state);
|
|
1265
1383
|
break;
|
|
1266
1384
|
}
|
|
1267
1385
|
|
|
1268
|
-
raise_parse_error("
|
|
1386
|
+
raise_parse_error("unreachable: %s", state);
|
|
1387
|
+
return Qundef;
|
|
1269
1388
|
}
|
|
1270
1389
|
|
|
1271
1390
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1272
1391
|
{
|
|
1273
1392
|
json_eat_whitespace(state);
|
|
1274
|
-
if (state
|
|
1393
|
+
if (!eos(state)) {
|
|
1275
1394
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1276
1395
|
}
|
|
1277
1396
|
}
|
|
@@ -1314,7 +1433,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1314
1433
|
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1315
1434
|
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1316
1435
|
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1317
|
-
else if (key == sym_allow_duplicate_key)
|
|
1436
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1318
1437
|
else if (key == sym_decimal_class) {
|
|
1319
1438
|
if (RTEST(val)) {
|
|
1320
1439
|
if (rb_respond_to(val, i_try_convert)) {
|
|
@@ -1388,6 +1507,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1388
1507
|
*/
|
|
1389
1508
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1390
1509
|
{
|
|
1510
|
+
rb_check_frozen(self);
|
|
1391
1511
|
GET_PARSER_CONFIG;
|
|
1392
1512
|
|
|
1393
1513
|
parser_config_init(config, opts);
|
|
@@ -1483,7 +1603,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1483
1603
|
JSON_ParserConfig_memsize,
|
|
1484
1604
|
},
|
|
1485
1605
|
0, 0,
|
|
1486
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1606
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1487
1607
|
};
|
|
1488
1608
|
|
|
1489
1609
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1533,10 +1653,6 @@ void Init_parser(void)
|
|
|
1533
1653
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1534
1654
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1535
1655
|
|
|
1536
|
-
i_chr = rb_intern("chr");
|
|
1537
|
-
i_aset = rb_intern("[]=");
|
|
1538
|
-
i_aref = rb_intern("[]");
|
|
1539
|
-
i_leftshift = rb_intern("<<");
|
|
1540
1656
|
i_new = rb_intern("new");
|
|
1541
1657
|
i_try_convert = rb_intern("try_convert");
|
|
1542
1658
|
i_uminus = rb_intern("-@");
|