json 2.15.1 → 2.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +35 -0
- data/LEGAL +12 -0
- data/README.md +17 -1
- data/ext/json/ext/fbuffer/fbuffer.h +9 -58
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +192 -159
- data/ext/json/ext/json.h +97 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +519 -397
- data/ext/json/ext/simd/simd.h +15 -12
- data/ext/json/ext/vendor/fpconv.c +2 -2
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/common.rb +28 -16
- data/lib/json/ext/generator/state.rb +4 -0
- data/lib/json/truffle_ruby/generator.rb +53 -21
- data/lib/json/version.rb +1 -1
- metadata +3 -1
|
@@ -1,42 +1,13 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze,
|
|
40
11
|
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
41
12
|
|
|
42
13
|
static int binary_encindex;
|
|
@@ -44,7 +15,7 @@ static int utf8_encindex;
|
|
|
44
15
|
|
|
45
16
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
17
|
// For TruffleRuby
|
|
47
|
-
void
|
|
18
|
+
static void
|
|
48
19
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
20
|
{
|
|
50
21
|
long index = 0;
|
|
@@ -61,6 +32,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
32
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
33
|
#endif
|
|
63
34
|
|
|
35
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
36
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
37
|
+
{
|
|
38
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
39
|
+
}
|
|
40
|
+
#endif
|
|
64
41
|
|
|
65
42
|
/* name cache */
|
|
66
43
|
|
|
@@ -106,116 +83,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
83
|
cache->entries[index] = rstring;
|
|
107
84
|
}
|
|
108
85
|
|
|
109
|
-
|
|
86
|
+
#define rstring_cache_memcmp memcmp
|
|
87
|
+
|
|
88
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
89
|
+
#if __has_builtin(__builtin_bswap64)
|
|
90
|
+
#undef rstring_cache_memcmp
|
|
91
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
92
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
93
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
94
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
95
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
96
|
+
long i = 0;
|
|
97
|
+
|
|
98
|
+
for (; i + 8 <= length; i += 8) {
|
|
99
|
+
uint64_t a, b;
|
|
100
|
+
memcpy(&a, str + i, 8);
|
|
101
|
+
memcpy(&b, rptr + i, 8);
|
|
102
|
+
if (a != b) {
|
|
103
|
+
a = __builtin_bswap64(a);
|
|
104
|
+
b = __builtin_bswap64(b);
|
|
105
|
+
return (a < b) ? -1 : 1;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for (; i < length; i++) {
|
|
110
|
+
if (str[i] != rptr[i]) {
|
|
111
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
112
|
+
}
|
|
116
113
|
}
|
|
114
|
+
|
|
115
|
+
return 0;
|
|
117
116
|
}
|
|
117
|
+
#endif
|
|
118
|
+
#endif
|
|
118
119
|
|
|
119
|
-
static
|
|
120
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
121
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
122
|
+
const char *rstring_ptr;
|
|
123
|
+
long rstring_length;
|
|
124
|
+
|
|
125
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
126
|
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return Qfalse;
|
|
127
|
+
if (length == rstring_length) {
|
|
128
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
129
|
+
} else {
|
|
130
|
+
return (int)(length - rstring_length);
|
|
132
131
|
}
|
|
132
|
+
}
|
|
133
133
|
|
|
134
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
135
|
+
{
|
|
134
136
|
int low = 0;
|
|
135
137
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
138
|
|
|
139
139
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
140
|
+
int mid = (high + low) >> 1;
|
|
141
141
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
142
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
143
|
|
|
144
|
-
if (
|
|
144
|
+
if (cmp == 0) {
|
|
145
145
|
return entry;
|
|
146
|
-
} else if (
|
|
146
|
+
} else if (cmp > 0) {
|
|
147
147
|
low = mid + 1;
|
|
148
148
|
} else {
|
|
149
149
|
high = mid - 1;
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
153
|
VALUE rstring = build_interned_string(str, length);
|
|
160
154
|
|
|
161
155
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
156
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
157
|
}
|
|
168
158
|
return rstring;
|
|
169
159
|
}
|
|
170
160
|
|
|
171
161
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
162
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
163
|
int low = 0;
|
|
187
164
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
165
|
|
|
191
166
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
167
|
+
int mid = (high + low) >> 1;
|
|
193
168
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
169
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
170
|
|
|
196
|
-
if (
|
|
171
|
+
if (cmp == 0) {
|
|
197
172
|
return entry;
|
|
198
|
-
} else if (
|
|
173
|
+
} else if (cmp > 0) {
|
|
199
174
|
low = mid + 1;
|
|
200
175
|
} else {
|
|
201
176
|
high = mid - 1;
|
|
202
177
|
}
|
|
203
178
|
}
|
|
204
179
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
180
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
181
|
|
|
213
182
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
183
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
184
|
}
|
|
220
185
|
return rsymbol;
|
|
221
186
|
}
|
|
@@ -330,15 +295,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
295
|
}
|
|
331
296
|
}
|
|
332
297
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
298
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
299
|
{
|
|
344
300
|
int len = 1;
|
|
@@ -379,7 +335,7 @@ typedef struct JSON_ParserStruct {
|
|
|
379
335
|
int max_nesting;
|
|
380
336
|
bool allow_nan;
|
|
381
337
|
bool allow_trailing_comma;
|
|
382
|
-
bool
|
|
338
|
+
bool allow_control_characters;
|
|
383
339
|
bool symbolize_names;
|
|
384
340
|
bool freeze;
|
|
385
341
|
} JSON_ParserConfig;
|
|
@@ -395,6 +351,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
351
|
int current_nesting;
|
|
396
352
|
} JSON_ParserState;
|
|
397
353
|
|
|
354
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
355
|
+
return state->end - state->cursor;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
359
|
+
return state->cursor >= state->end;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
static inline char peek(JSON_ParserState *state)
|
|
363
|
+
{
|
|
364
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
365
|
+
return 0;
|
|
366
|
+
}
|
|
367
|
+
return *state->cursor;
|
|
368
|
+
}
|
|
369
|
+
|
|
398
370
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
371
|
{
|
|
400
372
|
const char *cursor = state->cursor;
|
|
@@ -530,61 +502,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
502
|
|
|
531
503
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
504
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
505
|
static void
|
|
542
506
|
json_eat_comments(JSON_ParserState *state)
|
|
543
507
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
508
|
+
const char *start = state->cursor;
|
|
509
|
+
state->cursor++;
|
|
510
|
+
|
|
511
|
+
switch (peek(state)) {
|
|
512
|
+
case '/': {
|
|
513
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
514
|
+
if (!state->cursor) {
|
|
515
|
+
state->cursor = state->end;
|
|
516
|
+
} else {
|
|
517
|
+
state->cursor++;
|
|
554
518
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
519
|
+
break;
|
|
520
|
+
}
|
|
521
|
+
case '*': {
|
|
522
|
+
state->cursor++;
|
|
523
|
+
|
|
524
|
+
while (true) {
|
|
525
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
526
|
+
if (!next_match) {
|
|
527
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
state->cursor = next_match + 1;
|
|
531
|
+
if (peek(state) == '/') {
|
|
532
|
+
state->cursor++;
|
|
533
|
+
break;
|
|
568
534
|
}
|
|
569
|
-
break;
|
|
570
535
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
536
|
+
break;
|
|
574
537
|
}
|
|
575
|
-
|
|
576
|
-
|
|
538
|
+
default:
|
|
539
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
540
|
+
break;
|
|
577
541
|
}
|
|
578
542
|
}
|
|
579
543
|
|
|
580
|
-
static
|
|
544
|
+
ALWAYS_INLINE(static) void
|
|
581
545
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
546
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
547
|
+
while (true) {
|
|
548
|
+
switch (peek(state)) {
|
|
549
|
+
case ' ':
|
|
550
|
+
state->cursor++;
|
|
551
|
+
break;
|
|
552
|
+
case '\n':
|
|
553
|
+
state->cursor++;
|
|
554
|
+
|
|
555
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
556
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
557
|
+
while (rest(state) > 8) {
|
|
558
|
+
uint64_t chunk;
|
|
559
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
560
|
+
if (chunk == 0x2020202020202020) {
|
|
561
|
+
state->cursor += 8;
|
|
562
|
+
continue;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
566
|
+
state->cursor += consecutive_spaces;
|
|
567
|
+
break;
|
|
568
|
+
}
|
|
569
|
+
#endif
|
|
570
|
+
break;
|
|
571
|
+
case '\t':
|
|
572
|
+
case '\r':
|
|
573
|
+
state->cursor++;
|
|
574
|
+
break;
|
|
575
|
+
case '/':
|
|
576
|
+
json_eat_comments(state);
|
|
577
|
+
break;
|
|
578
|
+
|
|
579
|
+
default:
|
|
580
|
+
return;
|
|
588
581
|
}
|
|
589
582
|
}
|
|
590
583
|
}
|
|
@@ -615,11 +608,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
608
|
return result;
|
|
616
609
|
}
|
|
617
610
|
|
|
618
|
-
static inline
|
|
611
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
612
|
+
{
|
|
613
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
614
|
+
// Simple heuristics:
|
|
615
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
616
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
617
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
619
621
|
{
|
|
622
|
+
bool intern = is_name || config->freeze;
|
|
623
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
624
|
size_t bufferSize = stringEnd - string;
|
|
621
625
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
626
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
627
|
VALUE cached_key;
|
|
624
628
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
629
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,60 +639,73 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
639
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
640
|
}
|
|
637
641
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
642
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
643
|
+
typedef struct _json_unescape_positions {
|
|
644
|
+
long size;
|
|
645
|
+
const char **positions;
|
|
646
|
+
bool has_more;
|
|
647
|
+
} JSON_UnescapePositions;
|
|
645
648
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
649
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
650
|
+
{
|
|
651
|
+
while (positions->size) {
|
|
652
|
+
positions->size--;
|
|
653
|
+
const char *next_position = positions->positions[0];
|
|
654
|
+
positions->positions++;
|
|
655
|
+
if (next_position >= pe) {
|
|
656
|
+
return next_position;
|
|
652
657
|
}
|
|
658
|
+
}
|
|
653
659
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
}
|
|
660
|
+
if (positions->has_more) {
|
|
661
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
662
|
}
|
|
658
663
|
|
|
664
|
+
return NULL;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
668
|
+
{
|
|
669
|
+
bool intern = is_name || config->freeze;
|
|
670
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
671
|
+
size_t bufferSize = stringEnd - string;
|
|
672
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
673
|
+
char *buffer;
|
|
674
|
+
|
|
659
675
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
676
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
677
|
buffer = RSTRING_PTR(result);
|
|
662
678
|
bufferStart = buffer;
|
|
663
679
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
680
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
681
|
+
|
|
682
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
683
|
if (pe > p) {
|
|
668
684
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
685
|
buffer += pe - p;
|
|
670
686
|
}
|
|
671
687
|
switch (*++pe) {
|
|
688
|
+
case '"':
|
|
689
|
+
case '/':
|
|
690
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
691
|
+
break;
|
|
692
|
+
case '\\':
|
|
693
|
+
APPEND_CHAR('\\');
|
|
694
|
+
break;
|
|
672
695
|
case 'n':
|
|
673
|
-
|
|
696
|
+
APPEND_CHAR('\n');
|
|
674
697
|
break;
|
|
675
698
|
case 'r':
|
|
676
|
-
|
|
699
|
+
APPEND_CHAR('\r');
|
|
677
700
|
break;
|
|
678
701
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
702
|
+
APPEND_CHAR('\t');
|
|
686
703
|
break;
|
|
687
704
|
case 'b':
|
|
688
|
-
|
|
705
|
+
APPEND_CHAR('\b');
|
|
689
706
|
break;
|
|
690
707
|
case 'f':
|
|
691
|
-
|
|
708
|
+
APPEND_CHAR('\f');
|
|
692
709
|
break;
|
|
693
710
|
case 'u':
|
|
694
711
|
if (pe > stringEnd - 5) {
|
|
@@ -726,18 +743,29 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
726
743
|
break;
|
|
727
744
|
}
|
|
728
745
|
}
|
|
729
|
-
|
|
730
|
-
|
|
746
|
+
|
|
747
|
+
char buf[4];
|
|
748
|
+
int unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
749
|
+
MEMCPY(buffer, buf, char, unescape_len);
|
|
750
|
+
buffer += unescape_len;
|
|
751
|
+
p = ++pe;
|
|
731
752
|
}
|
|
732
753
|
break;
|
|
733
754
|
default:
|
|
734
|
-
|
|
735
|
-
|
|
755
|
+
if ((unsigned char)*pe < 0x20) {
|
|
756
|
+
if (!config->allow_control_characters) {
|
|
757
|
+
if (*pe == '\n') {
|
|
758
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
759
|
+
}
|
|
760
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
761
|
+
}
|
|
762
|
+
} else {
|
|
763
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
764
|
+
}
|
|
765
|
+
break;
|
|
736
766
|
}
|
|
737
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
738
|
-
buffer += unescape_len;
|
|
739
|
-
p = ++pe;
|
|
740
767
|
}
|
|
768
|
+
#undef APPEND_CHAR
|
|
741
769
|
|
|
742
770
|
if (stringEnd > p) {
|
|
743
771
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -748,33 +776,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
748
776
|
if (symbolize) {
|
|
749
777
|
result = rb_str_intern(result);
|
|
750
778
|
} else if (intern) {
|
|
751
|
-
result =
|
|
779
|
+
result = rb_str_to_interned_str(result);
|
|
752
780
|
}
|
|
753
781
|
|
|
754
782
|
return result;
|
|
755
783
|
}
|
|
756
784
|
|
|
757
785
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
758
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
759
|
-
{
|
|
760
|
-
bool negative = false;
|
|
761
|
-
if (*p == '-') {
|
|
762
|
-
negative = true;
|
|
763
|
-
p++;
|
|
764
|
-
}
|
|
765
|
-
|
|
766
|
-
long long memo = 0;
|
|
767
|
-
while (p < pe) {
|
|
768
|
-
memo *= 10;
|
|
769
|
-
memo += *p - '0';
|
|
770
|
-
p++;
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
if (negative) {
|
|
774
|
-
memo = -memo;
|
|
775
|
-
}
|
|
776
|
-
return LL2NUM(memo);
|
|
777
|
-
}
|
|
778
786
|
|
|
779
787
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
780
788
|
{
|
|
@@ -788,17 +796,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
788
796
|
}
|
|
789
797
|
|
|
790
798
|
static inline VALUE
|
|
791
|
-
json_decode_integer(const char *start, const char *end)
|
|
799
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
792
800
|
{
|
|
793
|
-
|
|
794
|
-
if (
|
|
795
|
-
return
|
|
801
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
802
|
+
if (negative) {
|
|
803
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
796
804
|
}
|
|
797
|
-
return
|
|
805
|
+
return UINT64T2NUM(mantissa);
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
return json_decode_large_integer(start, end - start);
|
|
798
809
|
}
|
|
799
810
|
|
|
800
811
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
801
812
|
{
|
|
813
|
+
if (RB_LIKELY(len < 64)) {
|
|
814
|
+
char buffer[64];
|
|
815
|
+
MEMCPY(buffer, start, char, len);
|
|
816
|
+
buffer[len] = '\0';
|
|
817
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
818
|
+
}
|
|
819
|
+
|
|
802
820
|
VALUE buffer_v;
|
|
803
821
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
804
822
|
MEMCPY(buffer, start, char, len);
|
|
@@ -808,21 +826,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
808
826
|
return number;
|
|
809
827
|
}
|
|
810
828
|
|
|
811
|
-
|
|
829
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
830
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
831
|
+
*/
|
|
832
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
833
|
+
const char *start, const char *end)
|
|
812
834
|
{
|
|
813
|
-
long len = end - start;
|
|
814
|
-
|
|
815
835
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
816
|
-
VALUE text = rb_str_new(start,
|
|
836
|
+
VALUE text = rb_str_new(start, end - start);
|
|
817
837
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
818
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
819
|
-
char buffer[64];
|
|
820
|
-
MEMCPY(buffer, start, char, len);
|
|
821
|
-
buffer[len] = '\0';
|
|
822
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
823
|
-
} else {
|
|
824
|
-
return json_decode_large_float(start, len);
|
|
825
838
|
}
|
|
839
|
+
|
|
840
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
841
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
842
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
843
|
+
return json_decode_large_float(start, end - start);
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
826
847
|
}
|
|
827
848
|
|
|
828
849
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -908,20 +929,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
908
929
|
return object;
|
|
909
930
|
}
|
|
910
931
|
|
|
911
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
912
|
-
{
|
|
913
|
-
VALUE string;
|
|
914
|
-
bool intern = is_name || config->freeze;
|
|
915
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
916
|
-
if (escaped) {
|
|
917
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
918
|
-
} else {
|
|
919
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
920
|
-
}
|
|
921
|
-
|
|
922
|
-
return string;
|
|
923
|
-
}
|
|
924
|
-
|
|
925
932
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
926
933
|
{
|
|
927
934
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -944,17 +951,11 @@ static const bool string_scan_table[256] = {
|
|
|
944
951
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
945
952
|
};
|
|
946
953
|
|
|
947
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
948
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
949
|
-
#else
|
|
950
|
-
#define FORCE_INLINE
|
|
951
|
-
#endif
|
|
952
|
-
|
|
953
954
|
#ifdef HAVE_SIMD
|
|
954
955
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
955
956
|
#endif /* HAVE_SIMD */
|
|
956
957
|
|
|
957
|
-
static
|
|
958
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
958
959
|
{
|
|
959
960
|
#ifdef HAVE_SIMD
|
|
960
961
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -962,7 +963,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
962
963
|
uint64_t mask = 0;
|
|
963
964
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
964
965
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
965
|
-
return
|
|
966
|
+
return true;
|
|
966
967
|
}
|
|
967
968
|
|
|
968
969
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -970,64 +971,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
970
971
|
int mask = 0;
|
|
971
972
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
972
973
|
state->cursor += trailing_zeros(mask);
|
|
973
|
-
return
|
|
974
|
+
return true;
|
|
974
975
|
}
|
|
975
976
|
}
|
|
976
977
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
977
978
|
#endif /* HAVE_SIMD */
|
|
978
979
|
|
|
979
|
-
while (state
|
|
980
|
+
while (!eos(state)) {
|
|
980
981
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
981
|
-
return
|
|
982
|
+
return true;
|
|
982
983
|
}
|
|
983
984
|
state->cursor++;
|
|
984
985
|
}
|
|
985
|
-
return
|
|
986
|
+
return false;
|
|
986
987
|
}
|
|
987
988
|
|
|
988
|
-
static
|
|
989
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
989
990
|
{
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
991
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
992
|
+
JSON_UnescapePositions positions = {
|
|
993
|
+
.size = 0,
|
|
994
|
+
.positions = backslashes,
|
|
995
|
+
.has_more = false,
|
|
996
|
+
};
|
|
993
997
|
|
|
994
|
-
|
|
998
|
+
do {
|
|
995
999
|
switch (*state->cursor) {
|
|
996
1000
|
case '"': {
|
|
997
|
-
VALUE string =
|
|
1001
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
998
1002
|
state->cursor++;
|
|
999
1003
|
return json_push_value(state, config, string);
|
|
1000
1004
|
}
|
|
1001
1005
|
case '\\': {
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1007
|
+
backslashes[positions.size] = state->cursor;
|
|
1008
|
+
positions.size++;
|
|
1009
|
+
} else {
|
|
1010
|
+
positions.has_more = true;
|
|
1006
1011
|
}
|
|
1012
|
+
state->cursor++;
|
|
1007
1013
|
break;
|
|
1008
1014
|
}
|
|
1009
1015
|
default:
|
|
1010
|
-
|
|
1016
|
+
if (!config->allow_control_characters) {
|
|
1017
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1018
|
+
}
|
|
1011
1019
|
break;
|
|
1012
1020
|
}
|
|
1013
1021
|
|
|
1014
1022
|
state->cursor++;
|
|
1015
|
-
}
|
|
1023
|
+
} while (string_scan(state));
|
|
1016
1024
|
|
|
1017
1025
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1018
1026
|
return Qfalse;
|
|
1019
1027
|
}
|
|
1020
1028
|
|
|
1029
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1030
|
+
{
|
|
1031
|
+
state->cursor++;
|
|
1032
|
+
const char *start = state->cursor;
|
|
1033
|
+
|
|
1034
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1035
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1039
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1040
|
+
state->cursor++;
|
|
1041
|
+
return json_push_value(state, config, string);
|
|
1042
|
+
}
|
|
1043
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1047
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1048
|
+
// Additional References:
|
|
1049
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1050
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1051
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1052
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1053
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1054
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1055
|
+
val -= 0x3030303030303030;
|
|
1056
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1057
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1058
|
+
return val;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1062
|
+
const uint32_t mask = 0x000000FF;
|
|
1063
|
+
const uint32_t mul1 = 100;
|
|
1064
|
+
val -= 0x30303030;
|
|
1065
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1066
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1067
|
+
return val;
|
|
1068
|
+
}
|
|
1069
|
+
#endif
|
|
1070
|
+
|
|
1071
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1072
|
+
{
|
|
1073
|
+
const char *start = state->cursor;
|
|
1074
|
+
|
|
1075
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1076
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1077
|
+
uint64_t next_8bytes;
|
|
1078
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1079
|
+
|
|
1080
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1081
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1082
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1083
|
+
|
|
1084
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1085
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1086
|
+
state->cursor += 8;
|
|
1087
|
+
continue;
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1091
|
+
|
|
1092
|
+
if (consecutive_digits >= 4) {
|
|
1093
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1094
|
+
state->cursor += 4;
|
|
1095
|
+
consecutive_digits -= 4;
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
while (consecutive_digits) {
|
|
1099
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1100
|
+
consecutive_digits--;
|
|
1101
|
+
state->cursor++;
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
return (int)(state->cursor - start);
|
|
1105
|
+
}
|
|
1106
|
+
#endif
|
|
1107
|
+
|
|
1108
|
+
char next_char;
|
|
1109
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1110
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1111
|
+
state->cursor++;
|
|
1112
|
+
}
|
|
1113
|
+
return (int)(state->cursor - start);
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1117
|
+
{
|
|
1118
|
+
bool integer = true;
|
|
1119
|
+
const char first_digit = *state->cursor;
|
|
1120
|
+
|
|
1121
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1122
|
+
int32_t exponent = 0;
|
|
1123
|
+
int decimal_point_pos = -1;
|
|
1124
|
+
uint64_t mantissa = 0;
|
|
1125
|
+
|
|
1126
|
+
// Parse integer part and extract mantissa digits
|
|
1127
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1128
|
+
|
|
1129
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1130
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
// Parse fractional part
|
|
1134
|
+
if (peek(state) == '.') {
|
|
1135
|
+
integer = false;
|
|
1136
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1137
|
+
state->cursor++;
|
|
1138
|
+
|
|
1139
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1140
|
+
mantissa_digits += fractional_digits;
|
|
1141
|
+
|
|
1142
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1143
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
// Parse exponent
|
|
1148
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1149
|
+
integer = false;
|
|
1150
|
+
state->cursor++;
|
|
1151
|
+
|
|
1152
|
+
bool negative_exponent = false;
|
|
1153
|
+
const char next_char = peek(state);
|
|
1154
|
+
if (next_char == '-' || next_char == '+') {
|
|
1155
|
+
negative_exponent = next_char == '-';
|
|
1156
|
+
state->cursor++;
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
uint64_t abs_exponent = 0;
|
|
1160
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1161
|
+
|
|
1162
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1163
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
if (integer) {
|
|
1170
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
// Adjust exponent based on decimal point position
|
|
1174
|
+
if (decimal_point_pos >= 0) {
|
|
1175
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1182
|
+
{
|
|
1183
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1187
|
+
{
|
|
1188
|
+
const char *start = state->cursor;
|
|
1189
|
+
state->cursor++;
|
|
1190
|
+
return json_parse_number(state, config, true, start);
|
|
1191
|
+
}
|
|
1192
|
+
|
|
1021
1193
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1022
1194
|
{
|
|
1023
1195
|
json_eat_whitespace(state);
|
|
1024
|
-
if (state->cursor >= state->end) {
|
|
1025
|
-
raise_parse_error("unexpected end of input", state);
|
|
1026
|
-
}
|
|
1027
1196
|
|
|
1028
|
-
switch (
|
|
1197
|
+
switch (peek(state)) {
|
|
1029
1198
|
case 'n':
|
|
1030
|
-
if ((state
|
|
1199
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1031
1200
|
state->cursor += 4;
|
|
1032
1201
|
return json_push_value(state, config, Qnil);
|
|
1033
1202
|
}
|
|
@@ -1035,7 +1204,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1035
1204
|
raise_parse_error("unexpected token %s", state);
|
|
1036
1205
|
break;
|
|
1037
1206
|
case 't':
|
|
1038
|
-
if ((state
|
|
1207
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1039
1208
|
state->cursor += 4;
|
|
1040
1209
|
return json_push_value(state, config, Qtrue);
|
|
1041
1210
|
}
|
|
@@ -1044,7 +1213,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1044
1213
|
break;
|
|
1045
1214
|
case 'f':
|
|
1046
1215
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1047
|
-
if ((state
|
|
1216
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1048
1217
|
state->cursor += 5;
|
|
1049
1218
|
return json_push_value(state, config, Qfalse);
|
|
1050
1219
|
}
|
|
@@ -1053,7 +1222,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1053
1222
|
break;
|
|
1054
1223
|
case 'N':
|
|
1055
1224
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1056
|
-
if (config->allow_nan && (state
|
|
1225
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1057
1226
|
state->cursor += 3;
|
|
1058
1227
|
return json_push_value(state, config, CNaN);
|
|
1059
1228
|
}
|
|
@@ -1061,16 +1230,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1061
1230
|
raise_parse_error("unexpected token %s", state);
|
|
1062
1231
|
break;
|
|
1063
1232
|
case 'I':
|
|
1064
|
-
if (config->allow_nan && (state
|
|
1233
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1065
1234
|
state->cursor += 8;
|
|
1066
1235
|
return json_push_value(state, config, CInfinity);
|
|
1067
1236
|
}
|
|
1068
1237
|
|
|
1069
1238
|
raise_parse_error("unexpected token %s", state);
|
|
1070
1239
|
break;
|
|
1071
|
-
case '-':
|
|
1240
|
+
case '-': {
|
|
1072
1241
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1073
|
-
if ((state
|
|
1242
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1074
1243
|
if (config->allow_nan) {
|
|
1075
1244
|
state->cursor += 9;
|
|
1076
1245
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1078,62 +1247,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1078
1247
|
raise_parse_error("unexpected token %s", state);
|
|
1079
1248
|
}
|
|
1080
1249
|
}
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
bool integer = true;
|
|
1084
|
-
|
|
1085
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1086
|
-
const char *start = state->cursor;
|
|
1087
|
-
state->cursor++;
|
|
1088
|
-
|
|
1089
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1090
|
-
state->cursor++;
|
|
1091
|
-
}
|
|
1092
|
-
|
|
1093
|
-
long integer_length = state->cursor - start;
|
|
1094
|
-
|
|
1095
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1096
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1097
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1098
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1099
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1100
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1104
|
-
integer = false;
|
|
1105
|
-
state->cursor++;
|
|
1106
|
-
|
|
1107
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1108
|
-
raise_parse_error("invalid number: %s", state);
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1112
|
-
state->cursor++;
|
|
1113
|
-
}
|
|
1114
|
-
}
|
|
1115
|
-
|
|
1116
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1117
|
-
integer = false;
|
|
1118
|
-
state->cursor++;
|
|
1119
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1120
|
-
state->cursor++;
|
|
1121
|
-
}
|
|
1122
|
-
|
|
1123
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1124
|
-
raise_parse_error("invalid number: %s", state);
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1128
|
-
state->cursor++;
|
|
1129
|
-
}
|
|
1130
|
-
}
|
|
1131
|
-
|
|
1132
|
-
if (integer) {
|
|
1133
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1134
|
-
}
|
|
1135
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1250
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1251
|
+
break;
|
|
1136
1252
|
}
|
|
1253
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1254
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1255
|
+
break;
|
|
1137
1256
|
case '"': {
|
|
1138
1257
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1139
1258
|
return json_parse_string(state, config, false);
|
|
@@ -1144,7 +1263,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1144
1263
|
json_eat_whitespace(state);
|
|
1145
1264
|
long stack_head = state->stack->head;
|
|
1146
1265
|
|
|
1147
|
-
if ((state
|
|
1266
|
+
if (peek(state) == ']') {
|
|
1148
1267
|
state->cursor++;
|
|
1149
1268
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1150
1269
|
} else {
|
|
@@ -1159,26 +1278,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1159
1278
|
while (true) {
|
|
1160
1279
|
json_eat_whitespace(state);
|
|
1161
1280
|
|
|
1162
|
-
|
|
1163
|
-
if (*state->cursor == ']') {
|
|
1164
|
-
state->cursor++;
|
|
1165
|
-
long count = state->stack->head - stack_head;
|
|
1166
|
-
state->current_nesting--;
|
|
1167
|
-
state->in_array--;
|
|
1168
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1169
|
-
}
|
|
1281
|
+
const char next_char = peek(state);
|
|
1170
1282
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
}
|
|
1283
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1284
|
+
state->cursor++;
|
|
1285
|
+
if (config->allow_trailing_comma) {
|
|
1286
|
+
json_eat_whitespace(state);
|
|
1287
|
+
if (peek(state) == ']') {
|
|
1288
|
+
continue;
|
|
1178
1289
|
}
|
|
1179
|
-
json_parse_any(state, config);
|
|
1180
|
-
continue;
|
|
1181
1290
|
}
|
|
1291
|
+
json_parse_any(state, config);
|
|
1292
|
+
continue;
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
if (next_char == ']') {
|
|
1296
|
+
state->cursor++;
|
|
1297
|
+
long count = state->stack->head - stack_head;
|
|
1298
|
+
state->current_nesting--;
|
|
1299
|
+
state->in_array--;
|
|
1300
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1182
1301
|
}
|
|
1183
1302
|
|
|
1184
1303
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1192,7 +1311,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1192
1311
|
json_eat_whitespace(state);
|
|
1193
1312
|
long stack_head = state->stack->head;
|
|
1194
1313
|
|
|
1195
|
-
if ((state
|
|
1314
|
+
if (peek(state) == '}') {
|
|
1196
1315
|
state->cursor++;
|
|
1197
1316
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1198
1317
|
} else {
|
|
@@ -1201,13 +1320,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1201
1320
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1202
1321
|
}
|
|
1203
1322
|
|
|
1204
|
-
if (
|
|
1323
|
+
if (peek(state) != '"') {
|
|
1205
1324
|
raise_parse_error("expected object key, got %s", state);
|
|
1206
1325
|
}
|
|
1207
1326
|
json_parse_string(state, config, true);
|
|
1208
1327
|
|
|
1209
1328
|
json_eat_whitespace(state);
|
|
1210
|
-
if ((state
|
|
1329
|
+
if (peek(state) != ':') {
|
|
1211
1330
|
raise_parse_error("expected ':' after object key", state);
|
|
1212
1331
|
}
|
|
1213
1332
|
state->cursor++;
|
|
@@ -1218,46 +1337,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1218
1337
|
while (true) {
|
|
1219
1338
|
json_eat_whitespace(state);
|
|
1220
1339
|
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1340
|
+
const char next_char = peek(state);
|
|
1341
|
+
if (next_char == '}') {
|
|
1342
|
+
state->cursor++;
|
|
1343
|
+
state->current_nesting--;
|
|
1344
|
+
size_t count = state->stack->head - stack_head;
|
|
1226
1345
|
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1346
|
+
// Temporary rewind cursor in case an error is raised
|
|
1347
|
+
const char *final_cursor = state->cursor;
|
|
1348
|
+
state->cursor = object_start_cursor;
|
|
1349
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1350
|
+
state->cursor = final_cursor;
|
|
1232
1351
|
|
|
1233
|
-
|
|
1234
|
-
|
|
1352
|
+
return json_push_value(state, config, object);
|
|
1353
|
+
}
|
|
1235
1354
|
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1355
|
+
if (next_char == ',') {
|
|
1356
|
+
state->cursor++;
|
|
1357
|
+
json_eat_whitespace(state);
|
|
1239
1358
|
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
}
|
|
1359
|
+
if (config->allow_trailing_comma) {
|
|
1360
|
+
if (peek(state) == '}') {
|
|
1361
|
+
continue;
|
|
1244
1362
|
}
|
|
1363
|
+
}
|
|
1245
1364
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1365
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1366
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1367
|
+
}
|
|
1368
|
+
json_parse_string(state, config, true);
|
|
1250
1369
|
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1370
|
+
json_eat_whitespace(state);
|
|
1371
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1372
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1373
|
+
}
|
|
1374
|
+
state->cursor++;
|
|
1256
1375
|
|
|
1257
|
-
|
|
1376
|
+
json_parse_any(state, config);
|
|
1258
1377
|
|
|
1259
|
-
|
|
1260
|
-
}
|
|
1378
|
+
continue;
|
|
1261
1379
|
}
|
|
1262
1380
|
|
|
1263
1381
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1265,18 +1383,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1265
1383
|
break;
|
|
1266
1384
|
}
|
|
1267
1385
|
|
|
1386
|
+
case 0:
|
|
1387
|
+
raise_parse_error("unexpected end of input", state);
|
|
1388
|
+
break;
|
|
1389
|
+
|
|
1268
1390
|
default:
|
|
1269
1391
|
raise_parse_error("unexpected character: %s", state);
|
|
1270
1392
|
break;
|
|
1271
1393
|
}
|
|
1272
1394
|
|
|
1273
1395
|
raise_parse_error("unreachable: %s", state);
|
|
1396
|
+
return Qundef;
|
|
1274
1397
|
}
|
|
1275
1398
|
|
|
1276
1399
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1277
1400
|
{
|
|
1278
1401
|
json_eat_whitespace(state);
|
|
1279
|
-
if (state
|
|
1402
|
+
if (!eos(state)) {
|
|
1280
1403
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1281
1404
|
}
|
|
1282
1405
|
}
|
|
@@ -1313,14 +1436,15 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1313
1436
|
{
|
|
1314
1437
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1315
1438
|
|
|
1316
|
-
if (key == sym_max_nesting)
|
|
1317
|
-
else if (key == sym_allow_nan)
|
|
1318
|
-
else if (key == sym_allow_trailing_comma)
|
|
1319
|
-
else if (key ==
|
|
1320
|
-
else if (key ==
|
|
1321
|
-
else if (key ==
|
|
1322
|
-
else if (key ==
|
|
1323
|
-
else if (key ==
|
|
1439
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1440
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1441
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1442
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1443
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1444
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1445
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1446
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1447
|
+
else if (key == sym_decimal_class) {
|
|
1324
1448
|
if (RTEST(val)) {
|
|
1325
1449
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1326
1450
|
config->decimal_class = val;
|
|
@@ -1393,6 +1517,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1393
1517
|
*/
|
|
1394
1518
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1395
1519
|
{
|
|
1520
|
+
rb_check_frozen(self);
|
|
1396
1521
|
GET_PARSER_CONFIG;
|
|
1397
1522
|
|
|
1398
1523
|
parser_config_init(config, opts);
|
|
@@ -1488,7 +1613,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1488
1613
|
JSON_ParserConfig_memsize,
|
|
1489
1614
|
},
|
|
1490
1615
|
0, 0,
|
|
1491
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1616
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1492
1617
|
};
|
|
1493
1618
|
|
|
1494
1619
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1532,16 +1657,13 @@ void Init_parser(void)
|
|
|
1532
1657
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1533
1658
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1534
1659
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1660
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1535
1661
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1536
1662
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1537
1663
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1538
1664
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1539
1665
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1540
1666
|
|
|
1541
|
-
i_chr = rb_intern("chr");
|
|
1542
|
-
i_aset = rb_intern("[]=");
|
|
1543
|
-
i_aref = rb_intern("[]");
|
|
1544
|
-
i_leftshift = rb_intern("<<");
|
|
1545
1667
|
i_new = rb_intern("new");
|
|
1546
1668
|
i_try_convert = rb_intern("try_convert");
|
|
1547
1669
|
i_uminus = rb_intern("-@");
|