json 2.13.2 → 2.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +81 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +35 -56
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +326 -264
- data/ext/json/ext/json.h +101 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +564 -444
- data/ext/json/ext/simd/simd.h +42 -12
- data/ext/json/ext/vendor/fpconv.c +13 -12
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +60 -23
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +113 -63
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +44 -1
- metadata +6 -3
|
@@ -1,42 +1,13 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
23
3
|
#include "../simd/simd.h"
|
|
24
4
|
|
|
25
|
-
#ifndef RB_UNLIKELY
|
|
26
|
-
#define RB_UNLIKELY(expr) expr
|
|
27
|
-
#endif
|
|
28
|
-
|
|
29
|
-
#ifndef RB_LIKELY
|
|
30
|
-
#define RB_LIKELY(expr) expr
|
|
31
|
-
#endif
|
|
32
|
-
|
|
33
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
34
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
35
7
|
|
|
36
|
-
static ID
|
|
37
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
38
9
|
|
|
39
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze,
|
|
40
11
|
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
41
12
|
|
|
42
13
|
static int binary_encindex;
|
|
@@ -44,7 +15,7 @@ static int utf8_encindex;
|
|
|
44
15
|
|
|
45
16
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
46
17
|
// For TruffleRuby
|
|
47
|
-
void
|
|
18
|
+
static void
|
|
48
19
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
49
20
|
{
|
|
50
21
|
long index = 0;
|
|
@@ -61,6 +32,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
61
32
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
62
33
|
#endif
|
|
63
34
|
|
|
35
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
36
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
37
|
+
{
|
|
38
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
39
|
+
}
|
|
40
|
+
#endif
|
|
64
41
|
|
|
65
42
|
/* name cache */
|
|
66
43
|
|
|
@@ -106,116 +83,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
106
83
|
cache->entries[index] = rstring;
|
|
107
84
|
}
|
|
108
85
|
|
|
109
|
-
|
|
86
|
+
#define rstring_cache_memcmp memcmp
|
|
87
|
+
|
|
88
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
89
|
+
#if __has_builtin(__builtin_bswap64)
|
|
90
|
+
#undef rstring_cache_memcmp
|
|
91
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
110
92
|
{
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
93
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
94
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
95
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
96
|
+
long i = 0;
|
|
97
|
+
|
|
98
|
+
for (; i + 8 <= length; i += 8) {
|
|
99
|
+
uint64_t a, b;
|
|
100
|
+
memcpy(&a, str + i, 8);
|
|
101
|
+
memcpy(&b, rptr + i, 8);
|
|
102
|
+
if (a != b) {
|
|
103
|
+
a = __builtin_bswap64(a);
|
|
104
|
+
b = __builtin_bswap64(b);
|
|
105
|
+
return (a < b) ? -1 : 1;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for (; i < length; i++) {
|
|
110
|
+
if (str[i] != rptr[i]) {
|
|
111
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
112
|
+
}
|
|
116
113
|
}
|
|
114
|
+
|
|
115
|
+
return 0;
|
|
117
116
|
}
|
|
117
|
+
#endif
|
|
118
|
+
#endif
|
|
118
119
|
|
|
119
|
-
static
|
|
120
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
120
121
|
{
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
// cache names above an arbitrary threshold.
|
|
124
|
-
return Qfalse;
|
|
125
|
-
}
|
|
122
|
+
const char *rstring_ptr;
|
|
123
|
+
long rstring_length;
|
|
126
124
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
125
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
|
+
|
|
127
|
+
if (length == rstring_length) {
|
|
128
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
129
|
+
} else {
|
|
130
|
+
return (int)(length - rstring_length);
|
|
132
131
|
}
|
|
132
|
+
}
|
|
133
133
|
|
|
134
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
135
|
+
{
|
|
134
136
|
int low = 0;
|
|
135
137
|
int high = cache->length - 1;
|
|
136
|
-
int mid = 0;
|
|
137
|
-
int last_cmp = 0;
|
|
138
138
|
|
|
139
139
|
while (low <= high) {
|
|
140
|
-
mid = (high + low) >> 1;
|
|
140
|
+
int mid = (high + low) >> 1;
|
|
141
141
|
VALUE entry = cache->entries[mid];
|
|
142
|
-
|
|
142
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
143
143
|
|
|
144
|
-
if (
|
|
144
|
+
if (cmp == 0) {
|
|
145
145
|
return entry;
|
|
146
|
-
} else if (
|
|
146
|
+
} else if (cmp > 0) {
|
|
147
147
|
low = mid + 1;
|
|
148
148
|
} else {
|
|
149
149
|
high = mid - 1;
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
154
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
155
|
-
// But if they do, we have to fallback to the slow path.
|
|
156
|
-
return Qfalse;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
153
|
VALUE rstring = build_interned_string(str, length);
|
|
160
154
|
|
|
161
155
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
162
|
-
|
|
163
|
-
mid += 1;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
156
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
167
157
|
}
|
|
168
158
|
return rstring;
|
|
169
159
|
}
|
|
170
160
|
|
|
171
161
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
172
162
|
{
|
|
173
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
174
|
-
// Common names aren't likely to be very long. So we just don't
|
|
175
|
-
// cache names above an arbitrary threshold.
|
|
176
|
-
return Qfalse;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
180
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
181
|
-
// we're much less likely to see this string again.
|
|
182
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
183
|
-
return Qfalse;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
163
|
int low = 0;
|
|
187
164
|
int high = cache->length - 1;
|
|
188
|
-
int mid = 0;
|
|
189
|
-
int last_cmp = 0;
|
|
190
165
|
|
|
191
166
|
while (low <= high) {
|
|
192
|
-
mid = (high + low) >> 1;
|
|
167
|
+
int mid = (high + low) >> 1;
|
|
193
168
|
VALUE entry = cache->entries[mid];
|
|
194
|
-
|
|
169
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
195
170
|
|
|
196
|
-
if (
|
|
171
|
+
if (cmp == 0) {
|
|
197
172
|
return entry;
|
|
198
|
-
} else if (
|
|
173
|
+
} else if (cmp > 0) {
|
|
199
174
|
low = mid + 1;
|
|
200
175
|
} else {
|
|
201
176
|
high = mid - 1;
|
|
202
177
|
}
|
|
203
178
|
}
|
|
204
179
|
|
|
205
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
206
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
207
|
-
// But if they do, we have to fallback to the slow path.
|
|
208
|
-
return Qfalse;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
180
|
VALUE rsymbol = build_symbol(str, length);
|
|
212
181
|
|
|
213
182
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
214
|
-
|
|
215
|
-
mid += 1;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
183
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
219
184
|
}
|
|
220
185
|
return rsymbol;
|
|
221
186
|
}
|
|
@@ -330,15 +295,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
330
295
|
}
|
|
331
296
|
}
|
|
332
297
|
|
|
333
|
-
|
|
334
|
-
#ifndef HAVE_STRNLEN
|
|
335
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
336
|
-
{
|
|
337
|
-
char *p;
|
|
338
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
339
|
-
}
|
|
340
|
-
#endif
|
|
341
|
-
|
|
342
298
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
343
299
|
{
|
|
344
300
|
int len = 1;
|
|
@@ -379,7 +335,7 @@ typedef struct JSON_ParserStruct {
|
|
|
379
335
|
int max_nesting;
|
|
380
336
|
bool allow_nan;
|
|
381
337
|
bool allow_trailing_comma;
|
|
382
|
-
bool
|
|
338
|
+
bool allow_control_characters;
|
|
383
339
|
bool symbolize_names;
|
|
384
340
|
bool freeze;
|
|
385
341
|
} JSON_ParserConfig;
|
|
@@ -395,6 +351,22 @@ typedef struct JSON_ParserStateStruct {
|
|
|
395
351
|
int current_nesting;
|
|
396
352
|
} JSON_ParserState;
|
|
397
353
|
|
|
354
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
355
|
+
return state->end - state->cursor;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
359
|
+
return state->cursor >= state->end;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
static inline char peek(JSON_ParserState *state)
|
|
363
|
+
{
|
|
364
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
365
|
+
return 0;
|
|
366
|
+
}
|
|
367
|
+
return *state->cursor;
|
|
368
|
+
}
|
|
369
|
+
|
|
398
370
|
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
399
371
|
{
|
|
400
372
|
const char *cursor = state->cursor;
|
|
@@ -505,23 +477,24 @@ static const signed char digit_values[256] = {
|
|
|
505
477
|
-1, -1, -1, -1, -1, -1, -1
|
|
506
478
|
};
|
|
507
479
|
|
|
508
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
480
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
509
481
|
{
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
482
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
483
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
487
|
+
|
|
488
|
+
const signed char b0 = digit_values[p[0]];
|
|
489
|
+
const signed char b1 = digit_values[p[1]];
|
|
490
|
+
const signed char b2 = digit_values[p[2]];
|
|
491
|
+
const signed char b3 = digit_values[p[3]];
|
|
492
|
+
|
|
493
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
494
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
525
498
|
}
|
|
526
499
|
|
|
527
500
|
#define GET_PARSER_CONFIG \
|
|
@@ -530,61 +503,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
530
503
|
|
|
531
504
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
532
505
|
|
|
533
|
-
static const bool whitespace[256] = {
|
|
534
|
-
[' '] = 1,
|
|
535
|
-
['\t'] = 1,
|
|
536
|
-
['\n'] = 1,
|
|
537
|
-
['\r'] = 1,
|
|
538
|
-
['/'] = 1,
|
|
539
|
-
};
|
|
540
|
-
|
|
541
506
|
static void
|
|
542
507
|
json_eat_comments(JSON_ParserState *state)
|
|
543
508
|
{
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
509
|
+
const char *start = state->cursor;
|
|
510
|
+
state->cursor++;
|
|
511
|
+
|
|
512
|
+
switch (peek(state)) {
|
|
513
|
+
case '/': {
|
|
514
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
515
|
+
if (!state->cursor) {
|
|
516
|
+
state->cursor = state->end;
|
|
517
|
+
} else {
|
|
518
|
+
state->cursor++;
|
|
554
519
|
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
520
|
+
break;
|
|
521
|
+
}
|
|
522
|
+
case '*': {
|
|
523
|
+
state->cursor++;
|
|
524
|
+
|
|
525
|
+
while (true) {
|
|
526
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
527
|
+
if (!next_match) {
|
|
528
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
state->cursor = next_match + 1;
|
|
532
|
+
if (peek(state) == '/') {
|
|
533
|
+
state->cursor++;
|
|
534
|
+
break;
|
|
568
535
|
}
|
|
569
|
-
break;
|
|
570
536
|
}
|
|
571
|
-
|
|
572
|
-
raise_parse_error("unexpected token %s", state);
|
|
573
|
-
break;
|
|
537
|
+
break;
|
|
574
538
|
}
|
|
575
|
-
|
|
576
|
-
|
|
539
|
+
default:
|
|
540
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
541
|
+
break;
|
|
577
542
|
}
|
|
578
543
|
}
|
|
579
544
|
|
|
580
|
-
static
|
|
545
|
+
ALWAYS_INLINE(static) void
|
|
581
546
|
json_eat_whitespace(JSON_ParserState *state)
|
|
582
547
|
{
|
|
583
|
-
while (
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
548
|
+
while (true) {
|
|
549
|
+
switch (peek(state)) {
|
|
550
|
+
case ' ':
|
|
551
|
+
state->cursor++;
|
|
552
|
+
break;
|
|
553
|
+
case '\n':
|
|
554
|
+
state->cursor++;
|
|
555
|
+
|
|
556
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
557
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
558
|
+
while (rest(state) > 8) {
|
|
559
|
+
uint64_t chunk;
|
|
560
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
561
|
+
if (chunk == 0x2020202020202020) {
|
|
562
|
+
state->cursor += 8;
|
|
563
|
+
continue;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
567
|
+
state->cursor += consecutive_spaces;
|
|
568
|
+
break;
|
|
569
|
+
}
|
|
570
|
+
#endif
|
|
571
|
+
break;
|
|
572
|
+
case '\t':
|
|
573
|
+
case '\r':
|
|
574
|
+
state->cursor++;
|
|
575
|
+
break;
|
|
576
|
+
case '/':
|
|
577
|
+
json_eat_comments(state);
|
|
578
|
+
break;
|
|
579
|
+
|
|
580
|
+
default:
|
|
581
|
+
return;
|
|
588
582
|
}
|
|
589
583
|
}
|
|
590
584
|
}
|
|
@@ -615,11 +609,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
615
609
|
return result;
|
|
616
610
|
}
|
|
617
611
|
|
|
618
|
-
static inline
|
|
612
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
613
|
+
{
|
|
614
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
615
|
+
// Simple heuristics:
|
|
616
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
617
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
618
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
619
622
|
{
|
|
623
|
+
bool intern = is_name || config->freeze;
|
|
624
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
620
625
|
size_t bufferSize = stringEnd - string;
|
|
621
626
|
|
|
622
|
-
if (is_name && state->in_array) {
|
|
627
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
623
628
|
VALUE cached_key;
|
|
624
629
|
if (RB_UNLIKELY(symbolize)) {
|
|
625
630
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -635,104 +640,125 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
635
640
|
return build_string(string, stringEnd, intern, symbolize);
|
|
636
641
|
}
|
|
637
642
|
|
|
638
|
-
|
|
639
|
-
{
|
|
640
|
-
|
|
641
|
-
const char
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
char buf[4];
|
|
643
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
644
|
+
typedef struct _json_unescape_positions {
|
|
645
|
+
long size;
|
|
646
|
+
const char **positions;
|
|
647
|
+
unsigned long additional_backslashes;
|
|
648
|
+
} JSON_UnescapePositions;
|
|
645
649
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
650
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
651
|
+
{
|
|
652
|
+
while (positions->size) {
|
|
653
|
+
positions->size--;
|
|
654
|
+
const char *next_position = positions->positions[0];
|
|
655
|
+
positions->positions++;
|
|
656
|
+
if (next_position >= pe) {
|
|
657
|
+
return next_position;
|
|
652
658
|
}
|
|
659
|
+
}
|
|
653
660
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
661
|
+
if (positions->additional_backslashes) {
|
|
662
|
+
positions->additional_backslashes--;
|
|
663
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
657
664
|
}
|
|
658
665
|
|
|
666
|
+
return NULL;
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
670
|
+
{
|
|
671
|
+
bool intern = is_name || config->freeze;
|
|
672
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
673
|
+
size_t bufferSize = stringEnd - string;
|
|
674
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
675
|
+
char *buffer;
|
|
676
|
+
|
|
659
677
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
660
678
|
rb_enc_associate_index(result, utf8_encindex);
|
|
661
679
|
buffer = RSTRING_PTR(result);
|
|
662
680
|
bufferStart = buffer;
|
|
663
681
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
682
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
683
|
+
|
|
684
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
667
685
|
if (pe > p) {
|
|
668
686
|
MEMCPY(buffer, p, char, pe - p);
|
|
669
687
|
buffer += pe - p;
|
|
670
688
|
}
|
|
671
689
|
switch (*++pe) {
|
|
690
|
+
case '"':
|
|
691
|
+
case '/':
|
|
692
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
693
|
+
break;
|
|
694
|
+
case '\\':
|
|
695
|
+
APPEND_CHAR('\\');
|
|
696
|
+
break;
|
|
672
697
|
case 'n':
|
|
673
|
-
|
|
698
|
+
APPEND_CHAR('\n');
|
|
674
699
|
break;
|
|
675
700
|
case 'r':
|
|
676
|
-
|
|
701
|
+
APPEND_CHAR('\r');
|
|
677
702
|
break;
|
|
678
703
|
case 't':
|
|
679
|
-
|
|
680
|
-
break;
|
|
681
|
-
case '"':
|
|
682
|
-
unescape = (char *) "\"";
|
|
683
|
-
break;
|
|
684
|
-
case '\\':
|
|
685
|
-
unescape = (char *) "\\";
|
|
704
|
+
APPEND_CHAR('\t');
|
|
686
705
|
break;
|
|
687
706
|
case 'b':
|
|
688
|
-
|
|
707
|
+
APPEND_CHAR('\b');
|
|
689
708
|
break;
|
|
690
709
|
case 'f':
|
|
691
|
-
|
|
710
|
+
APPEND_CHAR('\f');
|
|
692
711
|
break;
|
|
693
|
-
case 'u':
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
if (
|
|
712
|
-
raise_parse_error_at("
|
|
713
|
-
}
|
|
714
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
715
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
716
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
717
|
-
| (sur & 0x3FF));
|
|
718
|
-
pe += 5;
|
|
719
|
-
} else {
|
|
720
|
-
unescape = (char *) "?";
|
|
721
|
-
break;
|
|
712
|
+
case 'u': {
|
|
713
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
714
|
+
pe += 3;
|
|
715
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
716
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
717
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
718
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
719
|
+
* final codepoint.
|
|
720
|
+
*
|
|
721
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
722
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
723
|
+
* Area".
|
|
724
|
+
*/
|
|
725
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
726
|
+
pe++;
|
|
727
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
728
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
729
|
+
|
|
730
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
731
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
722
732
|
}
|
|
733
|
+
|
|
734
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
735
|
+
pe += 5;
|
|
736
|
+
} else {
|
|
737
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
738
|
+
break;
|
|
723
739
|
}
|
|
724
|
-
unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
725
|
-
unescape = buf;
|
|
726
740
|
}
|
|
741
|
+
|
|
742
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
743
|
+
buffer += unescape_len;
|
|
744
|
+
p = ++pe;
|
|
727
745
|
break;
|
|
746
|
+
}
|
|
728
747
|
default:
|
|
729
|
-
|
|
730
|
-
|
|
748
|
+
if ((unsigned char)*pe < 0x20) {
|
|
749
|
+
if (!config->allow_control_characters) {
|
|
750
|
+
if (*pe == '\n') {
|
|
751
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
752
|
+
}
|
|
753
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
754
|
+
}
|
|
755
|
+
} else {
|
|
756
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
757
|
+
}
|
|
758
|
+
break;
|
|
731
759
|
}
|
|
732
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
733
|
-
buffer += unescape_len;
|
|
734
|
-
p = ++pe;
|
|
735
760
|
}
|
|
761
|
+
#undef APPEND_CHAR
|
|
736
762
|
|
|
737
763
|
if (stringEnd > p) {
|
|
738
764
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -743,33 +769,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
743
769
|
if (symbolize) {
|
|
744
770
|
result = rb_str_intern(result);
|
|
745
771
|
} else if (intern) {
|
|
746
|
-
result =
|
|
772
|
+
result = rb_str_to_interned_str(result);
|
|
747
773
|
}
|
|
748
774
|
|
|
749
775
|
return result;
|
|
750
776
|
}
|
|
751
777
|
|
|
752
778
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
753
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
754
|
-
{
|
|
755
|
-
bool negative = false;
|
|
756
|
-
if (*p == '-') {
|
|
757
|
-
negative = true;
|
|
758
|
-
p++;
|
|
759
|
-
}
|
|
760
|
-
|
|
761
|
-
long long memo = 0;
|
|
762
|
-
while (p < pe) {
|
|
763
|
-
memo *= 10;
|
|
764
|
-
memo += *p - '0';
|
|
765
|
-
p++;
|
|
766
|
-
}
|
|
767
|
-
|
|
768
|
-
if (negative) {
|
|
769
|
-
memo = -memo;
|
|
770
|
-
}
|
|
771
|
-
return LL2NUM(memo);
|
|
772
|
-
}
|
|
773
779
|
|
|
774
780
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
775
781
|
{
|
|
@@ -783,17 +789,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
783
789
|
}
|
|
784
790
|
|
|
785
791
|
static inline VALUE
|
|
786
|
-
json_decode_integer(const char *start, const char *end)
|
|
792
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
787
793
|
{
|
|
788
|
-
|
|
789
|
-
if (
|
|
790
|
-
return
|
|
794
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
795
|
+
if (negative) {
|
|
796
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
791
797
|
}
|
|
792
|
-
return
|
|
798
|
+
return UINT64T2NUM(mantissa);
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return json_decode_large_integer(start, end - start);
|
|
793
802
|
}
|
|
794
803
|
|
|
795
804
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
796
805
|
{
|
|
806
|
+
if (RB_LIKELY(len < 64)) {
|
|
807
|
+
char buffer[64];
|
|
808
|
+
MEMCPY(buffer, start, char, len);
|
|
809
|
+
buffer[len] = '\0';
|
|
810
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
811
|
+
}
|
|
812
|
+
|
|
797
813
|
VALUE buffer_v;
|
|
798
814
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
799
815
|
MEMCPY(buffer, start, char, len);
|
|
@@ -803,21 +819,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
803
819
|
return number;
|
|
804
820
|
}
|
|
805
821
|
|
|
806
|
-
|
|
822
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
823
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
824
|
+
*/
|
|
825
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
826
|
+
const char *start, const char *end)
|
|
807
827
|
{
|
|
808
|
-
long len = end - start;
|
|
809
|
-
|
|
810
828
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
811
|
-
VALUE text = rb_str_new(start,
|
|
829
|
+
VALUE text = rb_str_new(start, end - start);
|
|
812
830
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
813
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
814
|
-
char buffer[64];
|
|
815
|
-
MEMCPY(buffer, start, char, len);
|
|
816
|
-
buffer[len] = '\0';
|
|
817
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
818
|
-
} else {
|
|
819
|
-
return json_decode_large_float(start, len);
|
|
820
831
|
}
|
|
832
|
+
|
|
833
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
834
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
835
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
836
|
+
return json_decode_large_float(start, end - start);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
821
840
|
}
|
|
822
841
|
|
|
823
842
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -903,20 +922,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
903
922
|
return object;
|
|
904
923
|
}
|
|
905
924
|
|
|
906
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
907
|
-
{
|
|
908
|
-
VALUE string;
|
|
909
|
-
bool intern = is_name || config->freeze;
|
|
910
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
911
|
-
if (escaped) {
|
|
912
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
913
|
-
} else {
|
|
914
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
915
|
-
}
|
|
916
|
-
|
|
917
|
-
return string;
|
|
918
|
-
}
|
|
919
|
-
|
|
920
925
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
921
926
|
{
|
|
922
927
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -939,17 +944,11 @@ static const bool string_scan_table[256] = {
|
|
|
939
944
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
940
945
|
};
|
|
941
946
|
|
|
942
|
-
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
943
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
944
|
-
#else
|
|
945
|
-
#define FORCE_INLINE
|
|
946
|
-
#endif
|
|
947
|
-
|
|
948
947
|
#ifdef HAVE_SIMD
|
|
949
948
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
950
949
|
#endif /* HAVE_SIMD */
|
|
951
950
|
|
|
952
|
-
static
|
|
951
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
953
952
|
{
|
|
954
953
|
#ifdef HAVE_SIMD
|
|
955
954
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -957,7 +956,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
957
956
|
uint64_t mask = 0;
|
|
958
957
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
959
958
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
960
|
-
return
|
|
959
|
+
return true;
|
|
961
960
|
}
|
|
962
961
|
|
|
963
962
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -965,64 +964,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
|
|
965
964
|
int mask = 0;
|
|
966
965
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
967
966
|
state->cursor += trailing_zeros(mask);
|
|
968
|
-
return
|
|
967
|
+
return true;
|
|
969
968
|
}
|
|
970
969
|
}
|
|
971
970
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
972
971
|
#endif /* HAVE_SIMD */
|
|
973
972
|
|
|
974
|
-
while (state
|
|
973
|
+
while (!eos(state)) {
|
|
975
974
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
976
|
-
return
|
|
975
|
+
return true;
|
|
977
976
|
}
|
|
978
|
-
|
|
977
|
+
state->cursor++;
|
|
979
978
|
}
|
|
980
|
-
return
|
|
979
|
+
return false;
|
|
981
980
|
}
|
|
982
981
|
|
|
983
|
-
static
|
|
982
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
984
983
|
{
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
984
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
985
|
+
JSON_UnescapePositions positions = {
|
|
986
|
+
.size = 0,
|
|
987
|
+
.positions = backslashes,
|
|
988
|
+
.additional_backslashes = 0,
|
|
989
|
+
};
|
|
988
990
|
|
|
989
|
-
|
|
991
|
+
do {
|
|
990
992
|
switch (*state->cursor) {
|
|
991
993
|
case '"': {
|
|
992
|
-
VALUE string =
|
|
994
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
993
995
|
state->cursor++;
|
|
994
996
|
return json_push_value(state, config, string);
|
|
995
997
|
}
|
|
996
998
|
case '\\': {
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
999
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1000
|
+
backslashes[positions.size] = state->cursor;
|
|
1001
|
+
positions.size++;
|
|
1002
|
+
} else {
|
|
1003
|
+
positions.additional_backslashes++;
|
|
1001
1004
|
}
|
|
1005
|
+
state->cursor++;
|
|
1002
1006
|
break;
|
|
1003
1007
|
}
|
|
1004
1008
|
default:
|
|
1005
|
-
|
|
1009
|
+
if (!config->allow_control_characters) {
|
|
1010
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1011
|
+
}
|
|
1006
1012
|
break;
|
|
1007
1013
|
}
|
|
1008
1014
|
|
|
1009
1015
|
state->cursor++;
|
|
1010
|
-
}
|
|
1016
|
+
} while (string_scan(state));
|
|
1011
1017
|
|
|
1012
1018
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1013
1019
|
return Qfalse;
|
|
1014
1020
|
}
|
|
1015
1021
|
|
|
1022
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1023
|
+
{
|
|
1024
|
+
state->cursor++;
|
|
1025
|
+
const char *start = state->cursor;
|
|
1026
|
+
|
|
1027
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1028
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1032
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1033
|
+
state->cursor++;
|
|
1034
|
+
return json_push_value(state, config, string);
|
|
1035
|
+
}
|
|
1036
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1040
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1041
|
+
// Additional References:
|
|
1042
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1043
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1044
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1045
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1046
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1047
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1048
|
+
val -= 0x3030303030303030;
|
|
1049
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1050
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1051
|
+
return val;
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1055
|
+
const uint32_t mask = 0x000000FF;
|
|
1056
|
+
const uint32_t mul1 = 100;
|
|
1057
|
+
val -= 0x30303030;
|
|
1058
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1059
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1060
|
+
return val;
|
|
1061
|
+
}
|
|
1062
|
+
#endif
|
|
1063
|
+
|
|
1064
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1065
|
+
{
|
|
1066
|
+
const char *start = state->cursor;
|
|
1067
|
+
|
|
1068
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1069
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1070
|
+
uint64_t next_8bytes;
|
|
1071
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1072
|
+
|
|
1073
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1074
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1075
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1076
|
+
|
|
1077
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1078
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1079
|
+
state->cursor += 8;
|
|
1080
|
+
continue;
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1084
|
+
|
|
1085
|
+
if (consecutive_digits >= 4) {
|
|
1086
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1087
|
+
state->cursor += 4;
|
|
1088
|
+
consecutive_digits -= 4;
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
while (consecutive_digits) {
|
|
1092
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1093
|
+
consecutive_digits--;
|
|
1094
|
+
state->cursor++;
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
return (int)(state->cursor - start);
|
|
1098
|
+
}
|
|
1099
|
+
#endif
|
|
1100
|
+
|
|
1101
|
+
char next_char;
|
|
1102
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1103
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1104
|
+
state->cursor++;
|
|
1105
|
+
}
|
|
1106
|
+
return (int)(state->cursor - start);
|
|
1107
|
+
}
|
|
1108
|
+
|
|
1109
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1110
|
+
{
|
|
1111
|
+
bool integer = true;
|
|
1112
|
+
const char first_digit = *state->cursor;
|
|
1113
|
+
|
|
1114
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1115
|
+
int32_t exponent = 0;
|
|
1116
|
+
int decimal_point_pos = -1;
|
|
1117
|
+
uint64_t mantissa = 0;
|
|
1118
|
+
|
|
1119
|
+
// Parse integer part and extract mantissa digits
|
|
1120
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1121
|
+
|
|
1122
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1123
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
// Parse fractional part
|
|
1127
|
+
if (peek(state) == '.') {
|
|
1128
|
+
integer = false;
|
|
1129
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1130
|
+
state->cursor++;
|
|
1131
|
+
|
|
1132
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1133
|
+
mantissa_digits += fractional_digits;
|
|
1134
|
+
|
|
1135
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1136
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
// Parse exponent
|
|
1141
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1142
|
+
integer = false;
|
|
1143
|
+
state->cursor++;
|
|
1144
|
+
|
|
1145
|
+
bool negative_exponent = false;
|
|
1146
|
+
const char next_char = peek(state);
|
|
1147
|
+
if (next_char == '-' || next_char == '+') {
|
|
1148
|
+
negative_exponent = next_char == '-';
|
|
1149
|
+
state->cursor++;
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
uint64_t abs_exponent = 0;
|
|
1153
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1154
|
+
|
|
1155
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1156
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
if (integer) {
|
|
1163
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
// Adjust exponent based on decimal point position
|
|
1167
|
+
if (decimal_point_pos >= 0) {
|
|
1168
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1175
|
+
{
|
|
1176
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1180
|
+
{
|
|
1181
|
+
const char *start = state->cursor;
|
|
1182
|
+
state->cursor++;
|
|
1183
|
+
return json_parse_number(state, config, true, start);
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1016
1186
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1017
1187
|
{
|
|
1018
1188
|
json_eat_whitespace(state);
|
|
1019
|
-
if (state->cursor >= state->end) {
|
|
1020
|
-
raise_parse_error("unexpected end of input", state);
|
|
1021
|
-
}
|
|
1022
1189
|
|
|
1023
|
-
switch (
|
|
1190
|
+
switch (peek(state)) {
|
|
1024
1191
|
case 'n':
|
|
1025
|
-
if ((state
|
|
1192
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1026
1193
|
state->cursor += 4;
|
|
1027
1194
|
return json_push_value(state, config, Qnil);
|
|
1028
1195
|
}
|
|
@@ -1030,7 +1197,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1030
1197
|
raise_parse_error("unexpected token %s", state);
|
|
1031
1198
|
break;
|
|
1032
1199
|
case 't':
|
|
1033
|
-
if ((state
|
|
1200
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
1034
1201
|
state->cursor += 4;
|
|
1035
1202
|
return json_push_value(state, config, Qtrue);
|
|
1036
1203
|
}
|
|
@@ -1039,7 +1206,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1039
1206
|
break;
|
|
1040
1207
|
case 'f':
|
|
1041
1208
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1042
|
-
if ((state
|
|
1209
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1043
1210
|
state->cursor += 5;
|
|
1044
1211
|
return json_push_value(state, config, Qfalse);
|
|
1045
1212
|
}
|
|
@@ -1048,7 +1215,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1048
1215
|
break;
|
|
1049
1216
|
case 'N':
|
|
1050
1217
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1051
|
-
if (config->allow_nan && (state
|
|
1218
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
1052
1219
|
state->cursor += 3;
|
|
1053
1220
|
return json_push_value(state, config, CNaN);
|
|
1054
1221
|
}
|
|
@@ -1056,16 +1223,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1056
1223
|
raise_parse_error("unexpected token %s", state);
|
|
1057
1224
|
break;
|
|
1058
1225
|
case 'I':
|
|
1059
|
-
if (config->allow_nan && (state
|
|
1226
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
1060
1227
|
state->cursor += 8;
|
|
1061
1228
|
return json_push_value(state, config, CInfinity);
|
|
1062
1229
|
}
|
|
1063
1230
|
|
|
1064
1231
|
raise_parse_error("unexpected token %s", state);
|
|
1065
1232
|
break;
|
|
1066
|
-
case '-':
|
|
1233
|
+
case '-': {
|
|
1067
1234
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1068
|
-
if ((state
|
|
1235
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1069
1236
|
if (config->allow_nan) {
|
|
1070
1237
|
state->cursor += 9;
|
|
1071
1238
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -1073,62 +1240,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1073
1240
|
raise_parse_error("unexpected token %s", state);
|
|
1074
1241
|
}
|
|
1075
1242
|
}
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
bool integer = true;
|
|
1079
|
-
|
|
1080
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
1081
|
-
const char *start = state->cursor;
|
|
1082
|
-
state->cursor++;
|
|
1083
|
-
|
|
1084
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1085
|
-
state->cursor++;
|
|
1086
|
-
}
|
|
1087
|
-
|
|
1088
|
-
long integer_length = state->cursor - start;
|
|
1089
|
-
|
|
1090
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
1091
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1092
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
1093
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1094
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
1095
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
1096
|
-
}
|
|
1097
|
-
|
|
1098
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
1099
|
-
integer = false;
|
|
1100
|
-
state->cursor++;
|
|
1101
|
-
|
|
1102
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1103
|
-
raise_parse_error("invalid number: %s", state);
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1107
|
-
state->cursor++;
|
|
1108
|
-
}
|
|
1109
|
-
}
|
|
1110
|
-
|
|
1111
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
1112
|
-
integer = false;
|
|
1113
|
-
state->cursor++;
|
|
1114
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
1115
|
-
state->cursor++;
|
|
1116
|
-
}
|
|
1117
|
-
|
|
1118
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
1119
|
-
raise_parse_error("invalid number: %s", state);
|
|
1120
|
-
}
|
|
1121
|
-
|
|
1122
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1123
|
-
state->cursor++;
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
|
|
1127
|
-
if (integer) {
|
|
1128
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1129
|
-
}
|
|
1130
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1243
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1244
|
+
break;
|
|
1131
1245
|
}
|
|
1246
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1247
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1248
|
+
break;
|
|
1132
1249
|
case '"': {
|
|
1133
1250
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1134
1251
|
return json_parse_string(state, config, false);
|
|
@@ -1139,7 +1256,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1139
1256
|
json_eat_whitespace(state);
|
|
1140
1257
|
long stack_head = state->stack->head;
|
|
1141
1258
|
|
|
1142
|
-
if ((state
|
|
1259
|
+
if (peek(state) == ']') {
|
|
1143
1260
|
state->cursor++;
|
|
1144
1261
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1145
1262
|
} else {
|
|
@@ -1154,26 +1271,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1154
1271
|
while (true) {
|
|
1155
1272
|
json_eat_whitespace(state);
|
|
1156
1273
|
|
|
1157
|
-
|
|
1158
|
-
if (*state->cursor == ']') {
|
|
1159
|
-
state->cursor++;
|
|
1160
|
-
long count = state->stack->head - stack_head;
|
|
1161
|
-
state->current_nesting--;
|
|
1162
|
-
state->in_array--;
|
|
1163
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1164
|
-
}
|
|
1274
|
+
const char next_char = peek(state);
|
|
1165
1275
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
}
|
|
1276
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1277
|
+
state->cursor++;
|
|
1278
|
+
if (config->allow_trailing_comma) {
|
|
1279
|
+
json_eat_whitespace(state);
|
|
1280
|
+
if (peek(state) == ']') {
|
|
1281
|
+
continue;
|
|
1173
1282
|
}
|
|
1174
|
-
json_parse_any(state, config);
|
|
1175
|
-
continue;
|
|
1176
1283
|
}
|
|
1284
|
+
json_parse_any(state, config);
|
|
1285
|
+
continue;
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
if (next_char == ']') {
|
|
1289
|
+
state->cursor++;
|
|
1290
|
+
long count = state->stack->head - stack_head;
|
|
1291
|
+
state->current_nesting--;
|
|
1292
|
+
state->in_array--;
|
|
1293
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1177
1294
|
}
|
|
1178
1295
|
|
|
1179
1296
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1187,7 +1304,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1187
1304
|
json_eat_whitespace(state);
|
|
1188
1305
|
long stack_head = state->stack->head;
|
|
1189
1306
|
|
|
1190
|
-
if ((state
|
|
1307
|
+
if (peek(state) == '}') {
|
|
1191
1308
|
state->cursor++;
|
|
1192
1309
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1193
1310
|
} else {
|
|
@@ -1196,13 +1313,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1196
1313
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1197
1314
|
}
|
|
1198
1315
|
|
|
1199
|
-
if (
|
|
1316
|
+
if (peek(state) != '"') {
|
|
1200
1317
|
raise_parse_error("expected object key, got %s", state);
|
|
1201
1318
|
}
|
|
1202
1319
|
json_parse_string(state, config, true);
|
|
1203
1320
|
|
|
1204
1321
|
json_eat_whitespace(state);
|
|
1205
|
-
if ((state
|
|
1322
|
+
if (peek(state) != ':') {
|
|
1206
1323
|
raise_parse_error("expected ':' after object key", state);
|
|
1207
1324
|
}
|
|
1208
1325
|
state->cursor++;
|
|
@@ -1213,46 +1330,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1213
1330
|
while (true) {
|
|
1214
1331
|
json_eat_whitespace(state);
|
|
1215
1332
|
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1333
|
+
const char next_char = peek(state);
|
|
1334
|
+
if (next_char == '}') {
|
|
1335
|
+
state->cursor++;
|
|
1336
|
+
state->current_nesting--;
|
|
1337
|
+
size_t count = state->stack->head - stack_head;
|
|
1221
1338
|
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1339
|
+
// Temporary rewind cursor in case an error is raised
|
|
1340
|
+
const char *final_cursor = state->cursor;
|
|
1341
|
+
state->cursor = object_start_cursor;
|
|
1342
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1343
|
+
state->cursor = final_cursor;
|
|
1227
1344
|
|
|
1228
|
-
|
|
1229
|
-
|
|
1345
|
+
return json_push_value(state, config, object);
|
|
1346
|
+
}
|
|
1230
1347
|
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1348
|
+
if (next_char == ',') {
|
|
1349
|
+
state->cursor++;
|
|
1350
|
+
json_eat_whitespace(state);
|
|
1234
1351
|
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
}
|
|
1352
|
+
if (config->allow_trailing_comma) {
|
|
1353
|
+
if (peek(state) == '}') {
|
|
1354
|
+
continue;
|
|
1239
1355
|
}
|
|
1356
|
+
}
|
|
1240
1357
|
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1358
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1359
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1360
|
+
}
|
|
1361
|
+
json_parse_string(state, config, true);
|
|
1245
1362
|
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1363
|
+
json_eat_whitespace(state);
|
|
1364
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1365
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1366
|
+
}
|
|
1367
|
+
state->cursor++;
|
|
1251
1368
|
|
|
1252
|
-
|
|
1369
|
+
json_parse_any(state, config);
|
|
1253
1370
|
|
|
1254
|
-
|
|
1255
|
-
}
|
|
1371
|
+
continue;
|
|
1256
1372
|
}
|
|
1257
1373
|
|
|
1258
1374
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1260,18 +1376,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1260
1376
|
break;
|
|
1261
1377
|
}
|
|
1262
1378
|
|
|
1379
|
+
case 0:
|
|
1380
|
+
raise_parse_error("unexpected end of input", state);
|
|
1381
|
+
break;
|
|
1382
|
+
|
|
1263
1383
|
default:
|
|
1264
1384
|
raise_parse_error("unexpected character: %s", state);
|
|
1265
1385
|
break;
|
|
1266
1386
|
}
|
|
1267
1387
|
|
|
1268
|
-
raise_parse_error("
|
|
1388
|
+
raise_parse_error("unreachable: %s", state);
|
|
1389
|
+
return Qundef;
|
|
1269
1390
|
}
|
|
1270
1391
|
|
|
1271
1392
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1272
1393
|
{
|
|
1273
1394
|
json_eat_whitespace(state);
|
|
1274
|
-
if (state
|
|
1395
|
+
if (!eos(state)) {
|
|
1275
1396
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1276
1397
|
}
|
|
1277
1398
|
}
|
|
@@ -1308,14 +1429,15 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1308
1429
|
{
|
|
1309
1430
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1310
1431
|
|
|
1311
|
-
if (key == sym_max_nesting)
|
|
1312
|
-
else if (key == sym_allow_nan)
|
|
1313
|
-
else if (key == sym_allow_trailing_comma)
|
|
1314
|
-
else if (key ==
|
|
1315
|
-
else if (key ==
|
|
1316
|
-
else if (key ==
|
|
1317
|
-
else if (key ==
|
|
1318
|
-
else if (key ==
|
|
1432
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1433
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1434
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1435
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1436
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1437
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1438
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1439
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1440
|
+
else if (key == sym_decimal_class) {
|
|
1319
1441
|
if (RTEST(val)) {
|
|
1320
1442
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1321
1443
|
config->decimal_class = val;
|
|
@@ -1388,6 +1510,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1388
1510
|
*/
|
|
1389
1511
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1390
1512
|
{
|
|
1513
|
+
rb_check_frozen(self);
|
|
1391
1514
|
GET_PARSER_CONFIG;
|
|
1392
1515
|
|
|
1393
1516
|
parser_config_init(config, opts);
|
|
@@ -1483,7 +1606,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1483
1606
|
JSON_ParserConfig_memsize,
|
|
1484
1607
|
},
|
|
1485
1608
|
0, 0,
|
|
1486
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1609
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1487
1610
|
};
|
|
1488
1611
|
|
|
1489
1612
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1527,16 +1650,13 @@ void Init_parser(void)
|
|
|
1527
1650
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1528
1651
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1529
1652
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1653
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1530
1654
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1531
1655
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1532
1656
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1533
1657
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1534
1658
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1535
1659
|
|
|
1536
|
-
i_chr = rb_intern("chr");
|
|
1537
|
-
i_aset = rb_intern("[]=");
|
|
1538
|
-
i_aref = rb_intern("[]");
|
|
1539
|
-
i_leftshift = rb_intern("<<");
|
|
1540
1660
|
i_new = rb_intern("new");
|
|
1541
1661
|
i_try_convert = rb_intern("try_convert");
|
|
1542
1662
|
i_uminus = rb_intern("-@");
|