json 2.13.2 → 2.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +76 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +31 -54
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +279 -239
- data/ext/json/ext/json.h +97 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +527 -400
- data/ext/json/ext/simd/simd.h +15 -12
- data/ext/json/ext/vendor/fpconv.c +12 -11
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +60 -23
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +113 -63
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +23 -1
- metadata +6 -3
|
@@ -1,42 +1,13 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze,
|
|
40
11
|
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
41
12
|
|
|
42
13
|
static int binary_encindex;
|
|
@@ -44,7 +15,7 @@ static int utf8_encindex;
|
|
|
44
15
|
|
|
45
16
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
17
|
// For TruffleRuby
|
|
47
|
-
void
|
|
18
|
+
static void
|
|
48
19
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
20
|
{
|
|
50
21
|
long index = 0;
|
|
@@ -61,6 +32,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
32
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
33
|
#endif
|
|
63
34
|
|
|
35
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
36
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
37
|
+
{
|
|
38
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
39
|
+
}
|
|
40
|
+
#endif
|
|
64
41
|
|
|
65
42
|
/* name cache */
|
|
66
43
|
|
|
@@ -106,116 +83,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
83
|
cache->entries[index] = rstring;
|
|
107
84
|
}
|
|
108
85
|
|
|
109
|
-
|
|
86
|
+
#define rstring_cache_memcmp memcmp
|
|
87
|
+
|
|
88
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
89
|
+
#if __has_builtin(__builtin_bswap64)
|
|
90
|
+
#undef rstring_cache_memcmp
|
|
91
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
92
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
93
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
94
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
95
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
96
|
+
long i = 0;
|
|
97
|
+
|
|
98
|
+
for (; i + 8 <= length; i += 8) {
|
|
99
|
+
uint64_t a, b;
|
|
100
|
+
memcpy(&a, str + i, 8);
|
|
101
|
+
memcpy(&b, rptr + i, 8);
|
|
102
|
+
if (a != b) {
|
|
103
|
+
a = __builtin_bswap64(a);
|
|
104
|
+
b = __builtin_bswap64(b);
|
|
105
|
+
return (a < b) ? -1 : 1;
|
|
106
|
+
}
|
|
116
107
|
}
|
|
108
|
+
|
|
109
|
+
for (; i < length; i++) {
|
|
110
|
+
if (str[i] != rptr[i]) {
|
|
111
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return 0;
|
|
117
116
|
}
|
|
117
|
+
#endif
|
|
118
|
+
#endif
|
|
118
119
|
|
|
119
|
-
static
|
|
120
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
121
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
122
|
+
const char *rstring_ptr;
|
|
123
|
+
long rstring_length;
|
|
124
|
+
|
|
125
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
126
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
127
|
+
if (length == rstring_length) {
|
|
128
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
129
|
+
} else {
|
|
130
|
+
return (int)(length - rstring_length);
|
|
132
131
|
}
|
|
132
|
+
}
|
|
133
133
|
|
|
134
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
135
|
+
{
|
|
134
136
|
int low = 0;
|
|
135
137
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
138
|
|
|
139
139
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
140
|
+
int mid = (high + low) >> 1;
|
|
141
141
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
142
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
143
|
|
|
144
|
-
if (
|
|
144
|
+
if (cmp == 0) {
|
|
145
145
|
return entry;
|
|
146
|
-
} else if (
|
|
146
|
+
} else if (cmp > 0) {
|
|
147
147
|
low = mid + 1;
|
|
148
148
|
} else {
|
|
149
149
|
high = mid - 1;
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
153
|
VALUE rstring = build_interned_string(str, length);
|
|
160
154
|
|
|
161
155
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
156
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
157
|
}
|
|
168
158
|
return rstring;
|
|
169
159
|
}
|
|
170
160
|
|
|
171
161
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
162
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
163
|
int low = 0;
|
|
187
164
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
165
|
|
|
191
166
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
167
|
+
int mid = (high + low) >> 1;
|
|
193
168
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
169
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
170
|
|
|
196
|
-
if (
|
|
171
|
+
if (cmp == 0) {
|
|
197
172
|
return entry;
|
|
198
|
-
} else if (
|
|
173
|
+
} else if (cmp > 0) {
|
|
199
174
|
low = mid + 1;
|
|
200
175
|
} else {
|
|
201
176
|
high = mid - 1;
|
|
202
177
|
}
|
|
203
178
|
}
|
|
204
179
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
180
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
181
|
|
|
213
182
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
183
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
184
|
}
|
|
220
185
|
return rsymbol;
|
|
221
186
|
}
|
|
@@ -330,15 +295,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
295
|
}
|
|
331
296
|
}
|
|
332
297
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
298
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
299
|
{
|
|
344
300
|
int len = 1;
|
|
@@ -379,7 +335,7 @@ typedef struct JSON_ParserStruct {
|
|
|
379
335
|
int max_nesting;
|
|
380
336
|
bool allow_nan;
|
|
381
337
|
bool allow_trailing_comma;
|
|
382
|
-
bool
|
|
338
|
+
bool allow_control_characters;
|
|
383
339
|
bool symbolize_names;
|
|
384
340
|
bool freeze;
|
|
385
341
|
} JSON_ParserConfig;
|
|
@@ -395,6 +351,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
351
|
int current_nesting;
|
|
396
352
|
} JSON_ParserState;
|
|
397
353
|
|
|
354
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
355
|
+
return state->end - state->cursor;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
359
|
+
return state->cursor >= state->end;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
static inline char peek(JSON_ParserState *state)
|
|
363
|
+
{
|
|
364
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
365
|
+
return 0;
|
|
366
|
+
}
|
|
367
|
+
return *state->cursor;
|
|
368
|
+
}
|
|
369
|
+
|
|
398
370
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
371
|
{
|
|
400
372
|
const char *cursor = state->cursor;
|
|
@@ -530,61 +502,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
502
|
|
|
531
503
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
504
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
505
|
static void
|
|
542
506
|
json_eat_comments(JSON_ParserState *state)
|
|
543
507
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
508
|
+
const char *start = state->cursor;
|
|
509
|
+
state->cursor++;
|
|
510
|
+
|
|
511
|
+
switch (peek(state)) {
|
|
512
|
+
case '/': {
|
|
513
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
514
|
+
if (!state->cursor) {
|
|
515
|
+
state->cursor = state->end;
|
|
516
|
+
} else {
|
|
517
|
+
state->cursor++;
|
|
554
518
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
519
|
+
break;
|
|
520
|
+
}
|
|
521
|
+
case '*': {
|
|
522
|
+
state->cursor++;
|
|
523
|
+
|
|
524
|
+
while (true) {
|
|
525
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
526
|
+
if (!next_match) {
|
|
527
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
state->cursor = next_match + 1;
|
|
531
|
+
if (peek(state) == '/') {
|
|
532
|
+
state->cursor++;
|
|
533
|
+
break;
|
|
568
534
|
}
|
|
569
|
-
break;
|
|
570
535
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
536
|
+
break;
|
|
574
537
|
}
|
|
575
|
-
|
|
576
|
-
|
|
538
|
+
default:
|
|
539
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
540
|
+
break;
|
|
577
541
|
}
|
|
578
542
|
}
|
|
579
543
|
|
|
580
|
-
static
|
|
544
|
+
ALWAYS_INLINE(static) void
|
|
581
545
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
546
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
547
|
+
while (true) {
|
|
548
|
+
switch (peek(state)) {
|
|
549
|
+
case ' ':
|
|
550
|
+
state->cursor++;
|
|
551
|
+
break;
|
|
552
|
+
case '\n':
|
|
553
|
+
state->cursor++;
|
|
554
|
+
|
|
555
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
556
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
557
|
+
while (rest(state) > 8) {
|
|
558
|
+
uint64_t chunk;
|
|
559
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
560
|
+
if (chunk == 0x2020202020202020) {
|
|
561
|
+
state->cursor += 8;
|
|
562
|
+
continue;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
566
|
+
state->cursor += consecutive_spaces;
|
|
567
|
+
break;
|
|
568
|
+
}
|
|
569
|
+
#endif
|
|
570
|
+
break;
|
|
571
|
+
case '\t':
|
|
572
|
+
case '\r':
|
|
573
|
+
state->cursor++;
|
|
574
|
+
break;
|
|
575
|
+
case '/':
|
|
576
|
+
json_eat_comments(state);
|
|
577
|
+
break;
|
|
578
|
+
|
|
579
|
+
default:
|
|
580
|
+
return;
|
|
588
581
|
}
|
|
589
582
|
}
|
|
590
583
|
}
|
|
@@ -615,11 +608,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
608
|
return result;
|
|
616
609
|
}
|
|
617
610
|
|
|
618
|
-
static inline
|
|
611
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
612
|
+
{
|
|
613
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
614
|
+
// Simple heuristics:
|
|
615
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
616
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
617
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
619
621
|
{
|
|
622
|
+
bool intern = is_name || config->freeze;
|
|
623
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
624
|
size_t bufferSize = stringEnd - string;
|
|
621
625
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
626
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
627
|
VALUE cached_key;
|
|
624
628
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
629
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,60 +639,73 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
639
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
640
|
}
|
|
637
641
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
642
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
643
|
+
typedef struct _json_unescape_positions {
|
|
644
|
+
long size;
|
|
645
|
+
const char **positions;
|
|
646
|
+
bool has_more;
|
|
647
|
+
} JSON_UnescapePositions;
|
|
645
648
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
649
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
650
|
+
{
|
|
651
|
+
while (positions->size) {
|
|
652
|
+
positions->size--;
|
|
653
|
+
const char *next_position = positions->positions[0];
|
|
654
|
+
positions->positions++;
|
|
655
|
+
if (next_position >= pe) {
|
|
656
|
+
return next_position;
|
|
652
657
|
}
|
|
658
|
+
}
|
|
653
659
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
}
|
|
660
|
+
if (positions->has_more) {
|
|
661
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
662
|
}
|
|
658
663
|
|
|
664
|
+
return NULL;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
668
|
+
{
|
|
669
|
+
bool intern = is_name || config->freeze;
|
|
670
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
671
|
+
size_t bufferSize = stringEnd - string;
|
|
672
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
673
|
+
char *buffer;
|
|
674
|
+
|
|
659
675
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
676
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
677
|
buffer = RSTRING_PTR(result);
|
|
662
678
|
bufferStart = buffer;
|
|
663
679
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
680
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
681
|
+
|
|
682
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
683
|
if (pe > p) {
|
|
668
684
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
685
|
buffer += pe - p;
|
|
670
686
|
}
|
|
671
687
|
switch (*++pe) {
|
|
688
|
+
case '"':
|
|
689
|
+
case '/':
|
|
690
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
691
|
+
break;
|
|
692
|
+
case '\\':
|
|
693
|
+
APPEND_CHAR('\\');
|
|
694
|
+
break;
|
|
672
695
|
case 'n':
|
|
673
|
-
|
|
696
|
+
APPEND_CHAR('\n');
|
|
674
697
|
break;
|
|
675
698
|
case 'r':
|
|
676
|
-
|
|
699
|
+
APPEND_CHAR('\r');
|
|
677
700
|
break;
|
|
678
701
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
702
|
+
APPEND_CHAR('\t');
|
|
686
703
|
break;
|
|
687
704
|
case 'b':
|
|
688
|
-
|
|
705
|
+
APPEND_CHAR('\b');
|
|
689
706
|
break;
|
|
690
707
|
case 'f':
|
|
691
|
-
|
|
708
|
+
APPEND_CHAR('\f');
|
|
692
709
|
break;
|
|
693
710
|
case 'u':
|
|
694
711
|
if (pe > stringEnd - 5) {
|
|
@@ -713,26 +730,42 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
713
730
|
}
|
|
714
731
|
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
715
732
|
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
733
|
+
|
|
734
|
+
if ((sur & 0xFC00) != 0xDC00) {
|
|
735
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
736
|
+
}
|
|
737
|
+
|
|
716
738
|
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
717
739
|
| (sur & 0x3FF));
|
|
718
740
|
pe += 5;
|
|
719
741
|
} else {
|
|
720
|
-
|
|
742
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
721
743
|
break;
|
|
722
744
|
}
|
|
723
745
|
}
|
|
724
|
-
|
|
725
|
-
|
|
746
|
+
|
|
747
|
+
char buf[4];
|
|
748
|
+
int unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
749
|
+
MEMCPY(buffer, buf, char, unescape_len);
|
|
750
|
+
buffer += unescape_len;
|
|
751
|
+
p = ++pe;
|
|
726
752
|
}
|
|
727
753
|
break;
|
|
728
754
|
default:
|
|
729
|
-
|
|
730
|
-
|
|
755
|
+
if ((unsigned char)*pe < 0x20) {
|
|
756
|
+
if (!config->allow_control_characters) {
|
|
757
|
+
if (*pe == '\n') {
|
|
758
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
759
|
+
}
|
|
760
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
761
|
+
}
|
|
762
|
+
} else {
|
|
763
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
764
|
+
}
|
|
765
|
+
break;
|
|
731
766
|
}
|
|
732
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
733
|
-
buffer += unescape_len;
|
|
734
|
-
p = ++pe;
|
|
735
767
|
}
|
|
768
|
+
#undef APPEND_CHAR
|
|
736
769
|
|
|
737
770
|
if (stringEnd > p) {
|
|
738
771
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -743,33 +776,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
743
776
|
if (symbolize) {
|
|
744
777
|
result = rb_str_intern(result);
|
|
745
778
|
} else if (intern) {
|
|
746
|
-
result =
|
|
779
|
+
result = rb_str_to_interned_str(result);
|
|
747
780
|
}
|
|
748
781
|
|
|
749
782
|
return result;
|
|
750
783
|
}
|
|
751
784
|
|
|
752
785
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
753
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
754
|
-
{
|
|
755
|
-
bool negative = false;
|
|
756
|
-
if (*p == '-') {
|
|
757
|
-
negative = true;
|
|
758
|
-
p++;
|
|
759
|
-
}
|
|
760
|
-
|
|
761
|
-
long long memo = 0;
|
|
762
|
-
while (p < pe) {
|
|
763
|
-
memo *= 10;
|
|
764
|
-
memo += *p - '0';
|
|
765
|
-
p++;
|
|
766
|
-
}
|
|
767
|
-
|
|
768
|
-
if (negative) {
|
|
769
|
-
memo = -memo;
|
|
770
|
-
}
|
|
771
|
-
return LL2NUM(memo);
|
|
772
|
-
}
|
|
773
786
|
|
|
774
787
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
775
788
|
{
|
|
@@ -783,17 +796,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
783
796
|
}
|
|
784
797
|
|
|
785
798
|
static inline VALUE
|
|
786
|
-
json_decode_integer(const char *start, const char *end)
|
|
799
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
787
800
|
{
|
|
788
|
-
|
|
789
|
-
if (
|
|
790
|
-
return
|
|
801
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
802
|
+
if (negative) {
|
|
803
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
791
804
|
}
|
|
792
|
-
return
|
|
805
|
+
return UINT64T2NUM(mantissa);
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
return json_decode_large_integer(start, end - start);
|
|
793
809
|
}
|
|
794
810
|
|
|
795
811
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
796
812
|
{
|
|
813
|
+
if (RB_LIKELY(len < 64)) {
|
|
814
|
+
char buffer[64];
|
|
815
|
+
MEMCPY(buffer, start, char, len);
|
|
816
|
+
buffer[len] = '\0';
|
|
817
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
818
|
+
}
|
|
819
|
+
|
|
797
820
|
VALUE buffer_v;
|
|
798
821
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
799
822
|
MEMCPY(buffer, start, char, len);
|
|
@@ -803,21 +826,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
803
826
|
return number;
|
|
804
827
|
}
|
|
805
828
|
|
|
806
|
-
|
|
829
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
830
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
831
|
+
*/
|
|
832
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
833
|
+
const char *start, const char *end)
|
|
807
834
|
{
|
|
808
|
-
long len = end - start;
|
|
809
|
-
|
|
810
835
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
811
|
-
VALUE text = rb_str_new(start,
|
|
836
|
+
VALUE text = rb_str_new(start, end - start);
|
|
812
837
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
813
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
814
|
-
char buffer[64];
|
|
815
|
-
MEMCPY(buffer, start, char, len);
|
|
816
|
-
buffer[len] = '\0';
|
|
817
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
818
|
-
} else {
|
|
819
|
-
return json_decode_large_float(start, len);
|
|
820
838
|
}
|
|
839
|
+
|
|
840
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
841
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
842
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
843
|
+
return json_decode_large_float(start, end - start);
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
821
847
|
}
|
|
822
848
|
|
|
823
849
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -903,20 +929,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
903
929
|
return object;
|
|
904
930
|
}
|
|
905
931
|
|
|
906
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
907
|
-
{
|
|
908
|
-
VALUE string;
|
|
909
|
-
bool intern = is_name || config->freeze;
|
|
910
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
911
|
-
if (escaped) {
|
|
912
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
913
|
-
} else {
|
|
914
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
915
|
-
}
|
|
916
|
-
|
|
917
|
-
return string;
|
|
918
|
-
}
|
|
919
|
-
|
|
920
932
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
921
933
|
{
|
|
922
934
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -939,17 +951,11 @@ static const bool string_scan_table[256] = {
|
|
|
939
951
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
940
952
|
};
|
|
941
953
|
|
|
942
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
943
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
944
|
-
#else
|
|
945
|
-
#define FORCE_INLINE
|
|
946
|
-
#endif
|
|
947
|
-
|
|
948
954
|
#ifdef HAVE_SIMD
|
|
949
955
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
950
956
|
#endif /* HAVE_SIMD */
|
|
951
957
|
|
|
952
|
-
static
|
|
958
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
953
959
|
{
|
|
954
960
|
#ifdef HAVE_SIMD
|
|
955
961
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -957,7 +963,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
957
963
|
uint64_t mask = 0;
|
|
958
964
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
959
965
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
960
|
-
return
|
|
966
|
+
return true;
|
|
961
967
|
}
|
|
962
968
|
|
|
963
969
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -965,64 +971,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
965
971
|
int mask = 0;
|
|
966
972
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
967
973
|
state->cursor += trailing_zeros(mask);
|
|
968
|
-
return
|
|
974
|
+
return true;
|
|
969
975
|
}
|
|
970
976
|
}
|
|
971
977
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
972
978
|
#endif /* HAVE_SIMD */
|
|
973
979
|
|
|
974
|
-
while (state
|
|
980
|
+
while (!eos(state)) {
|
|
975
981
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
976
|
-
return
|
|
982
|
+
return true;
|
|
977
983
|
}
|
|
978
|
-
|
|
984
|
+
state->cursor++;
|
|
979
985
|
}
|
|
980
|
-
return
|
|
986
|
+
return false;
|
|
981
987
|
}
|
|
982
988
|
|
|
983
|
-
static
|
|
989
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
984
990
|
{
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
991
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
992
|
+
JSON_UnescapePositions positions = {
|
|
993
|
+
.size = 0,
|
|
994
|
+
.positions = backslashes,
|
|
995
|
+
.has_more = false,
|
|
996
|
+
};
|
|
988
997
|
|
|
989
|
-
|
|
998
|
+
do {
|
|
990
999
|
switch (*state->cursor) {
|
|
991
1000
|
case '"': {
|
|
992
|
-
VALUE string =
|
|
1001
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
993
1002
|
state->cursor++;
|
|
994
1003
|
return json_push_value(state, config, string);
|
|
995
1004
|
}
|
|
996
1005
|
case '\\': {
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1006
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1007
|
+
backslashes[positions.size] = state->cursor;
|
|
1008
|
+
positions.size++;
|
|
1009
|
+
} else {
|
|
1010
|
+
positions.has_more = true;
|
|
1001
1011
|
}
|
|
1012
|
+
state->cursor++;
|
|
1002
1013
|
break;
|
|
1003
1014
|
}
|
|
1004
1015
|
default:
|
|
1005
|
-
|
|
1016
|
+
if (!config->allow_control_characters) {
|
|
1017
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1018
|
+
}
|
|
1006
1019
|
break;
|
|
1007
1020
|
}
|
|
1008
1021
|
|
|
1009
1022
|
state->cursor++;
|
|
1010
|
-
}
|
|
1023
|
+
} while (string_scan(state));
|
|
1011
1024
|
|
|
1012
1025
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1013
1026
|
return Qfalse;
|
|
1014
1027
|
}
|
|
1015
1028
|
|
|
1029
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1030
|
+
{
|
|
1031
|
+
state->cursor++;
|
|
1032
|
+
const char *start = state->cursor;
|
|
1033
|
+
|
|
1034
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1035
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1039
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1040
|
+
state->cursor++;
|
|
1041
|
+
return json_push_value(state, config, string);
|
|
1042
|
+
}
|
|
1043
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1047
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1048
|
+
// Additional References:
|
|
1049
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1050
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1051
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1052
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1053
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1054
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1055
|
+
val -= 0x3030303030303030;
|
|
1056
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1057
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1058
|
+
return val;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1062
|
+
const uint32_t mask = 0x000000FF;
|
|
1063
|
+
const uint32_t mul1 = 100;
|
|
1064
|
+
val -= 0x30303030;
|
|
1065
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1066
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1067
|
+
return val;
|
|
1068
|
+
}
|
|
1069
|
+
#endif
|
|
1070
|
+
|
|
1071
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1072
|
+
{
|
|
1073
|
+
const char *start = state->cursor;
|
|
1074
|
+
|
|
1075
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1076
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1077
|
+
uint64_t next_8bytes;
|
|
1078
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1079
|
+
|
|
1080
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1081
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1082
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1083
|
+
|
|
1084
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1085
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1086
|
+
state->cursor += 8;
|
|
1087
|
+
continue;
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1091
|
+
|
|
1092
|
+
if (consecutive_digits >= 4) {
|
|
1093
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1094
|
+
state->cursor += 4;
|
|
1095
|
+
consecutive_digits -= 4;
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
while (consecutive_digits) {
|
|
1099
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1100
|
+
consecutive_digits--;
|
|
1101
|
+
state->cursor++;
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
return (int)(state->cursor - start);
|
|
1105
|
+
}
|
|
1106
|
+
#endif
|
|
1107
|
+
|
|
1108
|
+
char next_char;
|
|
1109
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1110
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1111
|
+
state->cursor++;
|
|
1112
|
+
}
|
|
1113
|
+
return (int)(state->cursor - start);
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1117
|
+
{
|
|
1118
|
+
bool integer = true;
|
|
1119
|
+
const char first_digit = *state->cursor;
|
|
1120
|
+
|
|
1121
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1122
|
+
int32_t exponent = 0;
|
|
1123
|
+
int decimal_point_pos = -1;
|
|
1124
|
+
uint64_t mantissa = 0;
|
|
1125
|
+
|
|
1126
|
+
// Parse integer part and extract mantissa digits
|
|
1127
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1128
|
+
|
|
1129
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1130
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
// Parse fractional part
|
|
1134
|
+
if (peek(state) == '.') {
|
|
1135
|
+
integer = false;
|
|
1136
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1137
|
+
state->cursor++;
|
|
1138
|
+
|
|
1139
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1140
|
+
mantissa_digits += fractional_digits;
|
|
1141
|
+
|
|
1142
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1143
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
// Parse exponent
|
|
1148
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1149
|
+
integer = false;
|
|
1150
|
+
state->cursor++;
|
|
1151
|
+
|
|
1152
|
+
bool negative_exponent = false;
|
|
1153
|
+
const char next_char = peek(state);
|
|
1154
|
+
if (next_char == '-' || next_char == '+') {
|
|
1155
|
+
negative_exponent = next_char == '-';
|
|
1156
|
+
state->cursor++;
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
uint64_t abs_exponent = 0;
|
|
1160
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1161
|
+
|
|
1162
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1163
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
if (integer) {
|
|
1170
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
// Adjust exponent based on decimal point position
|
|
1174
|
+
if (decimal_point_pos >= 0) {
|
|
1175
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1182
|
+
{
|
|
1183
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1187
|
+
{
|
|
1188
|
+
const char *start = state->cursor;
|
|
1189
|
+
state->cursor++;
|
|
1190
|
+
return json_parse_number(state, config, true, start);
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1016
1193
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1017
1194
|
{
|
|
1018
1195
|
json_eat_whitespace(state);
|
|
1019
|
-
if (state->cursor >= state->end) {
|
|
1020
|
-
raise_parse_error("unexpected end of input", state);
|
|
1021
|
-
}
|
|
1022
1196
|
|
|
1023
|
-
switch (
|
|
1197
|
+
switch (peek(state)) {
|
|
1024
1198
|
case 'n':
|
|
1025
|
-
if ((state
|
|
1199
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1026
1200
|
state->cursor += 4;
|
|
1027
1201
|
return json_push_value(state, config, Qnil);
|
|
1028
1202
|
}
|
|
@@ -1030,7 +1204,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1030
1204
|
raise_parse_error("unexpected token %s", state);
|
|
1031
1205
|
break;
|
|
1032
1206
|
case 't':
|
|
1033
|
-
if ((state
|
|
1207
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1034
1208
|
state->cursor += 4;
|
|
1035
1209
|
return json_push_value(state, config, Qtrue);
|
|
1036
1210
|
}
|
|
@@ -1039,7 +1213,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1039
1213
|
break;
|
|
1040
1214
|
case 'f':
|
|
1041
1215
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1042
|
-
if ((state
|
|
1216
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1043
1217
|
state->cursor += 5;
|
|
1044
1218
|
return json_push_value(state, config, Qfalse);
|
|
1045
1219
|
}
|
|
@@ -1048,7 +1222,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1048
1222
|
break;
|
|
1049
1223
|
case 'N':
|
|
1050
1224
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1051
|
-
if (config->allow_nan && (state
|
|
1225
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1052
1226
|
state->cursor += 3;
|
|
1053
1227
|
return json_push_value(state, config, CNaN);
|
|
1054
1228
|
}
|
|
@@ -1056,16 +1230,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1056
1230
|
raise_parse_error("unexpected token %s", state);
|
|
1057
1231
|
break;
|
|
1058
1232
|
case 'I':
|
|
1059
|
-
if (config->allow_nan && (state
|
|
1233
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1060
1234
|
state->cursor += 8;
|
|
1061
1235
|
return json_push_value(state, config, CInfinity);
|
|
1062
1236
|
}
|
|
1063
1237
|
|
|
1064
1238
|
raise_parse_error("unexpected token %s", state);
|
|
1065
1239
|
break;
|
|
1066
|
-
case '-':
|
|
1240
|
+
case '-': {
|
|
1067
1241
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1068
|
-
if ((state
|
|
1242
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1069
1243
|
if (config->allow_nan) {
|
|
1070
1244
|
state->cursor += 9;
|
|
1071
1245
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1073,62 +1247,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1073
1247
|
raise_parse_error("unexpected token %s", state);
|
|
1074
1248
|
}
|
|
1075
1249
|
}
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
bool integer = true;
|
|
1079
|
-
|
|
1080
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1081
|
-
const char *start = state->cursor;
|
|
1082
|
-
state->cursor++;
|
|
1083
|
-
|
|
1084
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1085
|
-
state->cursor++;
|
|
1086
|
-
}
|
|
1087
|
-
|
|
1088
|
-
long integer_length = state->cursor - start;
|
|
1089
|
-
|
|
1090
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1091
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1092
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1093
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1094
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1095
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1096
|
-
}
|
|
1097
|
-
|
|
1098
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1099
|
-
integer = false;
|
|
1100
|
-
state->cursor++;
|
|
1101
|
-
|
|
1102
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1103
|
-
raise_parse_error("invalid number: %s", state);
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1107
|
-
state->cursor++;
|
|
1108
|
-
}
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1112
|
-
integer = false;
|
|
1113
|
-
state->cursor++;
|
|
1114
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1115
|
-
state->cursor++;
|
|
1116
|
-
}
|
|
1117
|
-
|
|
1118
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1119
|
-
raise_parse_error("invalid number: %s", state);
|
|
1120
|
-
}
|
|
1121
|
-
|
|
1122
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1123
|
-
state->cursor++;
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
if (integer) {
|
|
1128
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1129
|
-
}
|
|
1130
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1250
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1251
|
+
break;
|
|
1131
1252
|
}
|
|
1253
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1254
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1255
|
+
break;
|
|
1132
1256
|
case '"': {
|
|
1133
1257
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1134
1258
|
return json_parse_string(state, config, false);
|
|
@@ -1139,7 +1263,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1139
1263
|
json_eat_whitespace(state);
|
|
1140
1264
|
long stack_head = state->stack->head;
|
|
1141
1265
|
|
|
1142
|
-
if ((state
|
|
1266
|
+
if (peek(state) == ']') {
|
|
1143
1267
|
state->cursor++;
|
|
1144
1268
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1145
1269
|
} else {
|
|
@@ -1154,26 +1278,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1154
1278
|
while (true) {
|
|
1155
1279
|
json_eat_whitespace(state);
|
|
1156
1280
|
|
|
1157
|
-
|
|
1158
|
-
if (*state->cursor == ']') {
|
|
1159
|
-
state->cursor++;
|
|
1160
|
-
long count = state->stack->head - stack_head;
|
|
1161
|
-
state->current_nesting--;
|
|
1162
|
-
state->in_array--;
|
|
1163
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1164
|
-
}
|
|
1281
|
+
const char next_char = peek(state);
|
|
1165
1282
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
}
|
|
1283
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1284
|
+
state->cursor++;
|
|
1285
|
+
if (config->allow_trailing_comma) {
|
|
1286
|
+
json_eat_whitespace(state);
|
|
1287
|
+
if (peek(state) == ']') {
|
|
1288
|
+
continue;
|
|
1173
1289
|
}
|
|
1174
|
-
json_parse_any(state, config);
|
|
1175
|
-
continue;
|
|
1176
1290
|
}
|
|
1291
|
+
json_parse_any(state, config);
|
|
1292
|
+
continue;
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
if (next_char == ']') {
|
|
1296
|
+
state->cursor++;
|
|
1297
|
+
long count = state->stack->head - stack_head;
|
|
1298
|
+
state->current_nesting--;
|
|
1299
|
+
state->in_array--;
|
|
1300
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1177
1301
|
}
|
|
1178
1302
|
|
|
1179
1303
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1187,7 +1311,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1187
1311
|
json_eat_whitespace(state);
|
|
1188
1312
|
long stack_head = state->stack->head;
|
|
1189
1313
|
|
|
1190
|
-
if ((state
|
|
1314
|
+
if (peek(state) == '}') {
|
|
1191
1315
|
state->cursor++;
|
|
1192
1316
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1193
1317
|
} else {
|
|
@@ -1196,13 +1320,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1196
1320
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1197
1321
|
}
|
|
1198
1322
|
|
|
1199
|
-
if (
|
|
1323
|
+
if (peek(state) != '"') {
|
|
1200
1324
|
raise_parse_error("expected object key, got %s", state);
|
|
1201
1325
|
}
|
|
1202
1326
|
json_parse_string(state, config, true);
|
|
1203
1327
|
|
|
1204
1328
|
json_eat_whitespace(state);
|
|
1205
|
-
if ((state
|
|
1329
|
+
if (peek(state) != ':') {
|
|
1206
1330
|
raise_parse_error("expected ':' after object key", state);
|
|
1207
1331
|
}
|
|
1208
1332
|
state->cursor++;
|
|
@@ -1213,46 +1337,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1213
1337
|
while (true) {
|
|
1214
1338
|
json_eat_whitespace(state);
|
|
1215
1339
|
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1340
|
+
const char next_char = peek(state);
|
|
1341
|
+
if (next_char == '}') {
|
|
1342
|
+
state->cursor++;
|
|
1343
|
+
state->current_nesting--;
|
|
1344
|
+
size_t count = state->stack->head - stack_head;
|
|
1221
1345
|
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1346
|
+
// Temporary rewind cursor in case an error is raised
|
|
1347
|
+
const char *final_cursor = state->cursor;
|
|
1348
|
+
state->cursor = object_start_cursor;
|
|
1349
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1350
|
+
state->cursor = final_cursor;
|
|
1227
1351
|
|
|
1228
|
-
|
|
1229
|
-
|
|
1352
|
+
return json_push_value(state, config, object);
|
|
1353
|
+
}
|
|
1230
1354
|
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1355
|
+
if (next_char == ',') {
|
|
1356
|
+
state->cursor++;
|
|
1357
|
+
json_eat_whitespace(state);
|
|
1234
1358
|
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
}
|
|
1359
|
+
if (config->allow_trailing_comma) {
|
|
1360
|
+
if (peek(state) == '}') {
|
|
1361
|
+
continue;
|
|
1239
1362
|
}
|
|
1363
|
+
}
|
|
1240
1364
|
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1365
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1366
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1367
|
+
}
|
|
1368
|
+
json_parse_string(state, config, true);
|
|
1245
1369
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1370
|
+
json_eat_whitespace(state);
|
|
1371
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1372
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1373
|
+
}
|
|
1374
|
+
state->cursor++;
|
|
1251
1375
|
|
|
1252
|
-
|
|
1376
|
+
json_parse_any(state, config);
|
|
1253
1377
|
|
|
1254
|
-
|
|
1255
|
-
}
|
|
1378
|
+
continue;
|
|
1256
1379
|
}
|
|
1257
1380
|
|
|
1258
1381
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1260,18 +1383,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1260
1383
|
break;
|
|
1261
1384
|
}
|
|
1262
1385
|
|
|
1386
|
+
case 0:
|
|
1387
|
+
raise_parse_error("unexpected end of input", state);
|
|
1388
|
+
break;
|
|
1389
|
+
|
|
1263
1390
|
default:
|
|
1264
1391
|
raise_parse_error("unexpected character: %s", state);
|
|
1265
1392
|
break;
|
|
1266
1393
|
}
|
|
1267
1394
|
|
|
1268
|
-
raise_parse_error("
|
|
1395
|
+
raise_parse_error("unreachable: %s", state);
|
|
1396
|
+
return Qundef;
|
|
1269
1397
|
}
|
|
1270
1398
|
|
|
1271
1399
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1272
1400
|
{
|
|
1273
1401
|
json_eat_whitespace(state);
|
|
1274
|
-
if (state
|
|
1402
|
+
if (!eos(state)) {
|
|
1275
1403
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1276
1404
|
}
|
|
1277
1405
|
}
|
|
@@ -1308,14 +1436,15 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1308
1436
|
{
|
|
1309
1437
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1310
1438
|
|
|
1311
|
-
if (key == sym_max_nesting)
|
|
1312
|
-
else if (key == sym_allow_nan)
|
|
1313
|
-
else if (key == sym_allow_trailing_comma)
|
|
1314
|
-
else if (key ==
|
|
1315
|
-
else if (key ==
|
|
1316
|
-
else if (key ==
|
|
1317
|
-
else if (key ==
|
|
1318
|
-
else if (key ==
|
|
1439
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1440
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1441
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1442
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1443
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1444
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1445
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1446
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1447
|
+
else if (key == sym_decimal_class) {
|
|
1319
1448
|
if (RTEST(val)) {
|
|
1320
1449
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1321
1450
|
config->decimal_class = val;
|
|
@@ -1388,6 +1517,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1388
1517
|
*/
|
|
1389
1518
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1390
1519
|
{
|
|
1520
|
+
rb_check_frozen(self);
|
|
1391
1521
|
GET_PARSER_CONFIG;
|
|
1392
1522
|
|
|
1393
1523
|
parser_config_init(config, opts);
|
|
@@ -1483,7 +1613,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1483
1613
|
JSON_ParserConfig_memsize,
|
|
1484
1614
|
},
|
|
1485
1615
|
0, 0,
|
|
1486
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1616
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1487
1617
|
};
|
|
1488
1618
|
|
|
1489
1619
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1527,16 +1657,13 @@ void Init_parser(void)
|
|
|
1527
1657
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1528
1658
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1529
1659
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1660
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1530
1661
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1531
1662
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1532
1663
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1533
1664
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1534
1665
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1535
1666
|
|
|
1536
|
-
i_chr = rb_intern("chr");
|
|
1537
|
-
i_aset = rb_intern("[]=");
|
|
1538
|
-
i_aref = rb_intern("[]");
|
|
1539
|
-
i_leftshift = rb_intern("<<");
|
|
1540
1667
|
i_new = rb_intern("new");
|
|
1541
1668
|
i_try_convert = rb_intern("try_convert");
|
|
1542
1669
|
i_uminus = rb_intern("-@");
|