json 2.12.2 → 2.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +90 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +31 -54
- data/ext/json/ext/generator/extconf.rb +2 -26
- data/ext/json/ext/generator/generator.c +349 -335
- data/ext/json/ext/json.h +97 -0
- data/ext/json/ext/parser/extconf.rb +7 -2
- data/ext/json/ext/parser/parser.c +664 -401
- data/ext/json/ext/simd/conf.rb +24 -0
- data/ext/json/ext/simd/simd.h +191 -0
- data/ext/json/ext/vendor/fpconv.c +12 -11
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/json.gemspec +2 -3
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +78 -40
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +113 -63
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +55 -0
- metadata +8 -4
- data/ext/json/ext/generator/simd.h +0 -112
|
@@ -1,48 +1,21 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "
|
|
3
|
-
|
|
4
|
-
/* shims */
|
|
5
|
-
/* This is the fallback definition from Ruby 3.4 */
|
|
6
|
-
|
|
7
|
-
#ifndef RBIMPL_STDBOOL_H
|
|
8
|
-
#if defined(__cplusplus)
|
|
9
|
-
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
|
|
10
|
-
# include <cstdbool>
|
|
11
|
-
# endif
|
|
12
|
-
#elif defined(HAVE_STDBOOL_H)
|
|
13
|
-
# include <stdbool.h>
|
|
14
|
-
#elif !defined(HAVE__BOOL)
|
|
15
|
-
typedef unsigned char _Bool;
|
|
16
|
-
# define bool _Bool
|
|
17
|
-
# define true ((_Bool)+1)
|
|
18
|
-
# define false ((_Bool)+0)
|
|
19
|
-
# define __bool_true_false_are_defined
|
|
20
|
-
#endif
|
|
21
|
-
#endif
|
|
22
|
-
|
|
23
|
-
#ifndef RB_UNLIKELY
|
|
24
|
-
#define RB_UNLIKELY(expr) expr
|
|
25
|
-
#endif
|
|
26
|
-
|
|
27
|
-
#ifndef RB_LIKELY
|
|
28
|
-
#define RB_LIKELY(expr) expr
|
|
29
|
-
#endif
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
#include "../vendor/ryu.h"
|
|
3
|
+
#include "../simd/simd.h"
|
|
30
4
|
|
|
31
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
32
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
33
7
|
|
|
34
|
-
static ID
|
|
35
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
36
9
|
|
|
37
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
|
38
|
-
sym_decimal_class, sym_on_load;
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze,
|
|
11
|
+
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
39
12
|
|
|
40
13
|
static int binary_encindex;
|
|
41
14
|
static int utf8_encindex;
|
|
42
15
|
|
|
43
16
|
#ifndef HAVE_RB_HASH_BULK_INSERT
|
|
44
17
|
// For TruffleRuby
|
|
45
|
-
void
|
|
18
|
+
static void
|
|
46
19
|
rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
47
20
|
{
|
|
48
21
|
long index = 0;
|
|
@@ -59,6 +32,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
|
|
59
32
|
#define rb_hash_new_capa(n) rb_hash_new()
|
|
60
33
|
#endif
|
|
61
34
|
|
|
35
|
+
#ifndef HAVE_RB_STR_TO_INTERNED_STR
|
|
36
|
+
static VALUE rb_str_to_interned_str(VALUE str)
|
|
37
|
+
{
|
|
38
|
+
return rb_funcall(rb_str_freeze(str), i_uminus, 0);
|
|
39
|
+
}
|
|
40
|
+
#endif
|
|
62
41
|
|
|
63
42
|
/* name cache */
|
|
64
43
|
|
|
@@ -104,116 +83,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
104
83
|
cache->entries[index] = rstring;
|
|
105
84
|
}
|
|
106
85
|
|
|
107
|
-
|
|
86
|
+
#define rstring_cache_memcmp memcmp
|
|
87
|
+
|
|
88
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
89
|
+
#if __has_builtin(__builtin_bswap64)
|
|
90
|
+
#undef rstring_cache_memcmp
|
|
91
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
108
92
|
{
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
93
|
+
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
94
|
+
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
95
|
+
// inline a simpler memcmp outperforms calling the libc version.
|
|
96
|
+
long i = 0;
|
|
97
|
+
|
|
98
|
+
for (; i + 8 <= length; i += 8) {
|
|
99
|
+
uint64_t a, b;
|
|
100
|
+
memcpy(&a, str + i, 8);
|
|
101
|
+
memcpy(&b, rptr + i, 8);
|
|
102
|
+
if (a != b) {
|
|
103
|
+
a = __builtin_bswap64(a);
|
|
104
|
+
b = __builtin_bswap64(b);
|
|
105
|
+
return (a < b) ? -1 : 1;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
for (; i < length; i++) {
|
|
110
|
+
if (str[i] != rptr[i]) {
|
|
111
|
+
return (str[i] < rptr[i]) ? -1 : 1;
|
|
112
|
+
}
|
|
114
113
|
}
|
|
114
|
+
|
|
115
|
+
return 0;
|
|
115
116
|
}
|
|
117
|
+
#endif
|
|
118
|
+
#endif
|
|
116
119
|
|
|
117
|
-
static
|
|
120
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
118
121
|
{
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
// cache names above an arbitrary threshold.
|
|
122
|
-
return Qfalse;
|
|
123
|
-
}
|
|
122
|
+
const char *rstring_ptr;
|
|
123
|
+
long rstring_length;
|
|
124
124
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
125
|
+
RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
|
|
126
|
+
|
|
127
|
+
if (length == rstring_length) {
|
|
128
|
+
return rstring_cache_memcmp(str, rstring_ptr, length);
|
|
129
|
+
} else {
|
|
130
|
+
return (int)(length - rstring_length);
|
|
130
131
|
}
|
|
132
|
+
}
|
|
131
133
|
|
|
134
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
135
|
+
{
|
|
132
136
|
int low = 0;
|
|
133
137
|
int high = cache->length - 1;
|
|
134
|
-
int mid = 0;
|
|
135
|
-
int last_cmp = 0;
|
|
136
138
|
|
|
137
139
|
while (low <= high) {
|
|
138
|
-
mid = (high + low) >> 1;
|
|
140
|
+
int mid = (high + low) >> 1;
|
|
139
141
|
VALUE entry = cache->entries[mid];
|
|
140
|
-
|
|
142
|
+
int cmp = rstring_cache_cmp(str, length, entry);
|
|
141
143
|
|
|
142
|
-
if (
|
|
144
|
+
if (cmp == 0) {
|
|
143
145
|
return entry;
|
|
144
|
-
} else if (
|
|
146
|
+
} else if (cmp > 0) {
|
|
145
147
|
low = mid + 1;
|
|
146
148
|
} else {
|
|
147
149
|
high = mid - 1;
|
|
148
150
|
}
|
|
149
151
|
}
|
|
150
152
|
|
|
151
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
152
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
153
|
-
// But if they do, we have to fallback to the slow path.
|
|
154
|
-
return Qfalse;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
153
|
VALUE rstring = build_interned_string(str, length);
|
|
158
154
|
|
|
159
155
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
160
|
-
|
|
161
|
-
mid += 1;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
rvalue_cache_insert_at(cache, mid, rstring);
|
|
156
|
+
rvalue_cache_insert_at(cache, low, rstring);
|
|
165
157
|
}
|
|
166
158
|
return rstring;
|
|
167
159
|
}
|
|
168
160
|
|
|
169
161
|
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
170
162
|
{
|
|
171
|
-
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
|
172
|
-
// Common names aren't likely to be very long. So we just don't
|
|
173
|
-
// cache names above an arbitrary threshold.
|
|
174
|
-
return Qfalse;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
|
|
178
|
-
// Simple heuristic, if the first character isn't a letter,
|
|
179
|
-
// we're much less likely to see this string again.
|
|
180
|
-
// We mostly want to cache strings that are likely to be repeated.
|
|
181
|
-
return Qfalse;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
163
|
int low = 0;
|
|
185
164
|
int high = cache->length - 1;
|
|
186
|
-
int mid = 0;
|
|
187
|
-
int last_cmp = 0;
|
|
188
165
|
|
|
189
166
|
while (low <= high) {
|
|
190
|
-
mid = (high + low) >> 1;
|
|
167
|
+
int mid = (high + low) >> 1;
|
|
191
168
|
VALUE entry = cache->entries[mid];
|
|
192
|
-
|
|
169
|
+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
|
193
170
|
|
|
194
|
-
if (
|
|
171
|
+
if (cmp == 0) {
|
|
195
172
|
return entry;
|
|
196
|
-
} else if (
|
|
173
|
+
} else if (cmp > 0) {
|
|
197
174
|
low = mid + 1;
|
|
198
175
|
} else {
|
|
199
176
|
high = mid - 1;
|
|
200
177
|
}
|
|
201
178
|
}
|
|
202
179
|
|
|
203
|
-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
|
204
|
-
// We assume the overwhelming majority of names don't need to be escaped.
|
|
205
|
-
// But if they do, we have to fallback to the slow path.
|
|
206
|
-
return Qfalse;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
180
|
VALUE rsymbol = build_symbol(str, length);
|
|
210
181
|
|
|
211
182
|
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
|
212
|
-
|
|
213
|
-
mid += 1;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
rvalue_cache_insert_at(cache, mid, rsymbol);
|
|
183
|
+
rvalue_cache_insert_at(cache, low, rsymbol);
|
|
217
184
|
}
|
|
218
185
|
return rsymbol;
|
|
219
186
|
}
|
|
@@ -328,15 +295,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
328
295
|
}
|
|
329
296
|
}
|
|
330
297
|
|
|
331
|
-
|
|
332
|
-
#ifndef HAVE_STRNLEN
|
|
333
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
334
|
-
{
|
|
335
|
-
char *p;
|
|
336
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
337
|
-
}
|
|
338
|
-
#endif
|
|
339
|
-
|
|
340
298
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
341
299
|
{
|
|
342
300
|
int len = 1;
|
|
@@ -363,14 +321,21 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
|
363
321
|
return len;
|
|
364
322
|
}
|
|
365
323
|
|
|
324
|
+
enum duplicate_key_action {
|
|
325
|
+
JSON_DEPRECATED = 0,
|
|
326
|
+
JSON_IGNORE,
|
|
327
|
+
JSON_RAISE,
|
|
328
|
+
};
|
|
329
|
+
|
|
366
330
|
typedef struct JSON_ParserStruct {
|
|
367
331
|
VALUE on_load_proc;
|
|
368
332
|
VALUE decimal_class;
|
|
369
333
|
ID decimal_method_id;
|
|
334
|
+
enum duplicate_key_action on_duplicate_key;
|
|
370
335
|
int max_nesting;
|
|
371
336
|
bool allow_nan;
|
|
372
337
|
bool allow_trailing_comma;
|
|
373
|
-
bool
|
|
338
|
+
bool allow_control_characters;
|
|
374
339
|
bool symbolize_names;
|
|
375
340
|
bool freeze;
|
|
376
341
|
} JSON_ParserConfig;
|
|
@@ -386,15 +351,24 @@ typedef struct JSON_ParserStateStruct {
|
|
|
386
351
|
int current_nesting;
|
|
387
352
|
} JSON_ParserState;
|
|
388
353
|
|
|
354
|
+
static inline size_t rest(JSON_ParserState *state) {
|
|
355
|
+
return state->end - state->cursor;
|
|
356
|
+
}
|
|
389
357
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
static
|
|
358
|
+
static inline bool eos(JSON_ParserState *state) {
|
|
359
|
+
return state->cursor >= state->end;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
static inline char peek(JSON_ParserState *state)
|
|
395
363
|
{
|
|
396
|
-
|
|
364
|
+
if (RB_UNLIKELY(eos(state))) {
|
|
365
|
+
return 0;
|
|
366
|
+
}
|
|
367
|
+
return *state->cursor;
|
|
368
|
+
}
|
|
397
369
|
|
|
370
|
+
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
|
371
|
+
{
|
|
398
372
|
const char *cursor = state->cursor;
|
|
399
373
|
long column = 0;
|
|
400
374
|
long line = 1;
|
|
@@ -411,6 +385,29 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
411
385
|
line++;
|
|
412
386
|
}
|
|
413
387
|
}
|
|
388
|
+
*line_out = line;
|
|
389
|
+
*column_out = column;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
393
|
+
{
|
|
394
|
+
long line, column;
|
|
395
|
+
cursor_position(state, &line, &column);
|
|
396
|
+
|
|
397
|
+
VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
|
|
398
|
+
rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
402
|
+
|
|
403
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
|
404
|
+
RBIMPL_ATTR_NORETURN()
|
|
405
|
+
#endif
|
|
406
|
+
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
407
|
+
{
|
|
408
|
+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
409
|
+
long line, column;
|
|
410
|
+
cursor_position(state, &line, &column);
|
|
414
411
|
|
|
415
412
|
const char *ptr = "EOF";
|
|
416
413
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -505,61 +502,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
|
|
|
505
502
|
|
|
506
503
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
507
504
|
|
|
508
|
-
static const bool whitespace[256] = {
|
|
509
|
-
[' '] = 1,
|
|
510
|
-
['\t'] = 1,
|
|
511
|
-
['\n'] = 1,
|
|
512
|
-
['\r'] = 1,
|
|
513
|
-
['/'] = 1,
|
|
514
|
-
};
|
|
515
|
-
|
|
516
505
|
static void
|
|
517
506
|
json_eat_comments(JSON_ParserState *state)
|
|
518
507
|
{
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
508
|
+
const char *start = state->cursor;
|
|
509
|
+
state->cursor++;
|
|
510
|
+
|
|
511
|
+
switch (peek(state)) {
|
|
512
|
+
case '/': {
|
|
513
|
+
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
|
514
|
+
if (!state->cursor) {
|
|
515
|
+
state->cursor = state->end;
|
|
516
|
+
} else {
|
|
517
|
+
state->cursor++;
|
|
529
518
|
}
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
519
|
+
break;
|
|
520
|
+
}
|
|
521
|
+
case '*': {
|
|
522
|
+
state->cursor++;
|
|
523
|
+
|
|
524
|
+
while (true) {
|
|
525
|
+
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
526
|
+
if (!next_match) {
|
|
527
|
+
raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
state->cursor = next_match + 1;
|
|
531
|
+
if (peek(state) == '/') {
|
|
532
|
+
state->cursor++;
|
|
533
|
+
break;
|
|
543
534
|
}
|
|
544
|
-
break;
|
|
545
535
|
}
|
|
546
|
-
|
|
547
|
-
raise_parse_error("unexpected token %s", state);
|
|
548
|
-
break;
|
|
536
|
+
break;
|
|
549
537
|
}
|
|
550
|
-
|
|
551
|
-
|
|
538
|
+
default:
|
|
539
|
+
raise_parse_error_at("unexpected token %s", state, start);
|
|
540
|
+
break;
|
|
552
541
|
}
|
|
553
542
|
}
|
|
554
543
|
|
|
555
|
-
static
|
|
544
|
+
ALWAYS_INLINE(static) void
|
|
556
545
|
json_eat_whitespace(JSON_ParserState *state)
|
|
557
546
|
{
|
|
558
|
-
while (
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
547
|
+
while (true) {
|
|
548
|
+
switch (peek(state)) {
|
|
549
|
+
case ' ':
|
|
550
|
+
state->cursor++;
|
|
551
|
+
break;
|
|
552
|
+
case '\n':
|
|
553
|
+
state->cursor++;
|
|
554
|
+
|
|
555
|
+
// Heuristic: if we see a newline, there is likely consecutive spaces after it.
|
|
556
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
557
|
+
while (rest(state) > 8) {
|
|
558
|
+
uint64_t chunk;
|
|
559
|
+
memcpy(&chunk, state->cursor, sizeof(uint64_t));
|
|
560
|
+
if (chunk == 0x2020202020202020) {
|
|
561
|
+
state->cursor += 8;
|
|
562
|
+
continue;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
|
|
566
|
+
state->cursor += consecutive_spaces;
|
|
567
|
+
break;
|
|
568
|
+
}
|
|
569
|
+
#endif
|
|
570
|
+
break;
|
|
571
|
+
case '\t':
|
|
572
|
+
case '\r':
|
|
573
|
+
state->cursor++;
|
|
574
|
+
break;
|
|
575
|
+
case '/':
|
|
576
|
+
json_eat_comments(state);
|
|
577
|
+
break;
|
|
578
|
+
|
|
579
|
+
default:
|
|
580
|
+
return;
|
|
563
581
|
}
|
|
564
582
|
}
|
|
565
583
|
}
|
|
@@ -590,11 +608,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
|
|
590
608
|
return result;
|
|
591
609
|
}
|
|
592
610
|
|
|
593
|
-
static inline
|
|
611
|
+
static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
612
|
+
{
|
|
613
|
+
// We mostly want to cache strings that are likely to be repeated.
|
|
614
|
+
// Simple heuristics:
|
|
615
|
+
// - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
|
|
616
|
+
// - If the first character isn't a letter, we're much less likely to see this string again.
|
|
617
|
+
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
594
621
|
{
|
|
622
|
+
bool intern = is_name || config->freeze;
|
|
623
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
595
624
|
size_t bufferSize = stringEnd - string;
|
|
596
625
|
|
|
597
|
-
if (is_name && state->in_array) {
|
|
626
|
+
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
598
627
|
VALUE cached_key;
|
|
599
628
|
if (RB_UNLIKELY(symbolize)) {
|
|
600
629
|
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
|
|
@@ -610,60 +639,73 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
610
639
|
return build_string(string, stringEnd, intern, symbolize);
|
|
611
640
|
}
|
|
612
641
|
|
|
613
|
-
|
|
614
|
-
{
|
|
615
|
-
|
|
616
|
-
const char
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
char buf[4];
|
|
642
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
643
|
+
typedef struct _json_unescape_positions {
|
|
644
|
+
long size;
|
|
645
|
+
const char **positions;
|
|
646
|
+
bool has_more;
|
|
647
|
+
} JSON_UnescapePositions;
|
|
620
648
|
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
649
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
650
|
+
{
|
|
651
|
+
while (positions->size) {
|
|
652
|
+
positions->size--;
|
|
653
|
+
const char *next_position = positions->positions[0];
|
|
654
|
+
positions->positions++;
|
|
655
|
+
if (next_position >= pe) {
|
|
656
|
+
return next_position;
|
|
627
657
|
}
|
|
658
|
+
}
|
|
628
659
|
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
}
|
|
660
|
+
if (positions->has_more) {
|
|
661
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
632
662
|
}
|
|
633
663
|
|
|
664
|
+
return NULL;
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
668
|
+
{
|
|
669
|
+
bool intern = is_name || config->freeze;
|
|
670
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
671
|
+
size_t bufferSize = stringEnd - string;
|
|
672
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
673
|
+
char *buffer;
|
|
674
|
+
|
|
634
675
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
635
676
|
rb_enc_associate_index(result, utf8_encindex);
|
|
636
677
|
buffer = RSTRING_PTR(result);
|
|
637
678
|
bufferStart = buffer;
|
|
638
679
|
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
680
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
681
|
+
|
|
682
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
642
683
|
if (pe > p) {
|
|
643
684
|
MEMCPY(buffer, p, char, pe - p);
|
|
644
685
|
buffer += pe - p;
|
|
645
686
|
}
|
|
646
687
|
switch (*++pe) {
|
|
688
|
+
case '"':
|
|
689
|
+
case '/':
|
|
690
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
691
|
+
break;
|
|
692
|
+
case '\\':
|
|
693
|
+
APPEND_CHAR('\\');
|
|
694
|
+
break;
|
|
647
695
|
case 'n':
|
|
648
|
-
|
|
696
|
+
APPEND_CHAR('\n');
|
|
649
697
|
break;
|
|
650
698
|
case 'r':
|
|
651
|
-
|
|
699
|
+
APPEND_CHAR('\r');
|
|
652
700
|
break;
|
|
653
701
|
case 't':
|
|
654
|
-
|
|
655
|
-
break;
|
|
656
|
-
case '"':
|
|
657
|
-
unescape = (char *) "\"";
|
|
658
|
-
break;
|
|
659
|
-
case '\\':
|
|
660
|
-
unescape = (char *) "\\";
|
|
702
|
+
APPEND_CHAR('\t');
|
|
661
703
|
break;
|
|
662
704
|
case 'b':
|
|
663
|
-
|
|
705
|
+
APPEND_CHAR('\b');
|
|
664
706
|
break;
|
|
665
707
|
case 'f':
|
|
666
|
-
|
|
708
|
+
APPEND_CHAR('\f');
|
|
667
709
|
break;
|
|
668
710
|
case 'u':
|
|
669
711
|
if (pe > stringEnd - 5) {
|
|
@@ -688,26 +730,42 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
688
730
|
}
|
|
689
731
|
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
690
732
|
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
733
|
+
|
|
734
|
+
if ((sur & 0xFC00) != 0xDC00) {
|
|
735
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
736
|
+
}
|
|
737
|
+
|
|
691
738
|
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
692
739
|
| (sur & 0x3FF));
|
|
693
740
|
pe += 5;
|
|
694
741
|
} else {
|
|
695
|
-
|
|
742
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
696
743
|
break;
|
|
697
744
|
}
|
|
698
745
|
}
|
|
699
|
-
|
|
700
|
-
|
|
746
|
+
|
|
747
|
+
char buf[4];
|
|
748
|
+
int unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
749
|
+
MEMCPY(buffer, buf, char, unescape_len);
|
|
750
|
+
buffer += unescape_len;
|
|
751
|
+
p = ++pe;
|
|
701
752
|
}
|
|
702
753
|
break;
|
|
703
754
|
default:
|
|
704
|
-
|
|
705
|
-
|
|
755
|
+
if ((unsigned char)*pe < 0x20) {
|
|
756
|
+
if (!config->allow_control_characters) {
|
|
757
|
+
if (*pe == '\n') {
|
|
758
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
759
|
+
}
|
|
760
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
761
|
+
}
|
|
762
|
+
} else {
|
|
763
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
764
|
+
}
|
|
765
|
+
break;
|
|
706
766
|
}
|
|
707
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
708
|
-
buffer += unescape_len;
|
|
709
|
-
p = ++pe;
|
|
710
767
|
}
|
|
768
|
+
#undef APPEND_CHAR
|
|
711
769
|
|
|
712
770
|
if (stringEnd > p) {
|
|
713
771
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -718,33 +776,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
718
776
|
if (symbolize) {
|
|
719
777
|
result = rb_str_intern(result);
|
|
720
778
|
} else if (intern) {
|
|
721
|
-
result =
|
|
779
|
+
result = rb_str_to_interned_str(result);
|
|
722
780
|
}
|
|
723
781
|
|
|
724
782
|
return result;
|
|
725
783
|
}
|
|
726
784
|
|
|
727
785
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
728
|
-
static inline VALUE fast_decode_integer(const char *p, const char *pe)
|
|
729
|
-
{
|
|
730
|
-
bool negative = false;
|
|
731
|
-
if (*p == '-') {
|
|
732
|
-
negative = true;
|
|
733
|
-
p++;
|
|
734
|
-
}
|
|
735
|
-
|
|
736
|
-
long long memo = 0;
|
|
737
|
-
while (p < pe) {
|
|
738
|
-
memo *= 10;
|
|
739
|
-
memo += *p - '0';
|
|
740
|
-
p++;
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
if (negative) {
|
|
744
|
-
memo = -memo;
|
|
745
|
-
}
|
|
746
|
-
return LL2NUM(memo);
|
|
747
|
-
}
|
|
748
786
|
|
|
749
787
|
static VALUE json_decode_large_integer(const char *start, long len)
|
|
750
788
|
{
|
|
@@ -758,17 +796,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
|
|
|
758
796
|
}
|
|
759
797
|
|
|
760
798
|
static inline VALUE
|
|
761
|
-
json_decode_integer(const char *start, const char *end)
|
|
799
|
+
json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
762
800
|
{
|
|
763
|
-
|
|
764
|
-
if (
|
|
765
|
-
return
|
|
801
|
+
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
802
|
+
if (negative) {
|
|
803
|
+
return INT64T2NUM(-((int64_t)mantissa));
|
|
766
804
|
}
|
|
767
|
-
return
|
|
805
|
+
return UINT64T2NUM(mantissa);
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
return json_decode_large_integer(start, end - start);
|
|
768
809
|
}
|
|
769
810
|
|
|
770
811
|
static VALUE json_decode_large_float(const char *start, long len)
|
|
771
812
|
{
|
|
813
|
+
if (RB_LIKELY(len < 64)) {
|
|
814
|
+
char buffer[64];
|
|
815
|
+
MEMCPY(buffer, start, char, len);
|
|
816
|
+
buffer[len] = '\0';
|
|
817
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
818
|
+
}
|
|
819
|
+
|
|
772
820
|
VALUE buffer_v;
|
|
773
821
|
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
774
822
|
MEMCPY(buffer, start, char, len);
|
|
@@ -778,21 +826,24 @@ static VALUE json_decode_large_float(const char *start, long len)
|
|
|
778
826
|
return number;
|
|
779
827
|
}
|
|
780
828
|
|
|
781
|
-
|
|
829
|
+
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
830
|
+
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
831
|
+
*/
|
|
832
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
|
|
833
|
+
const char *start, const char *end)
|
|
782
834
|
{
|
|
783
|
-
long len = end - start;
|
|
784
|
-
|
|
785
835
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
786
|
-
VALUE text = rb_str_new(start,
|
|
836
|
+
VALUE text = rb_str_new(start, end - start);
|
|
787
837
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
788
|
-
} else if (RB_LIKELY(len < 64)) {
|
|
789
|
-
char buffer[64];
|
|
790
|
-
MEMCPY(buffer, start, char, len);
|
|
791
|
-
buffer[len] = '\0';
|
|
792
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
793
|
-
} else {
|
|
794
|
-
return json_decode_large_float(start, len);
|
|
795
838
|
}
|
|
839
|
+
|
|
840
|
+
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
841
|
+
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
842
|
+
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
843
|
+
return json_decode_large_float(start, end - start);
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
796
847
|
}
|
|
797
848
|
|
|
798
849
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -807,32 +858,75 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
|
|
|
807
858
|
return array;
|
|
808
859
|
}
|
|
809
860
|
|
|
810
|
-
static
|
|
861
|
+
static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
811
862
|
{
|
|
812
|
-
VALUE
|
|
813
|
-
|
|
863
|
+
VALUE set = rb_hash_new_capa(count / 2);
|
|
864
|
+
for (size_t index = 0; index < count; index += 2) {
|
|
865
|
+
size_t before = RHASH_SIZE(set);
|
|
866
|
+
VALUE key = pairs[index];
|
|
867
|
+
rb_hash_aset(set, key, Qtrue);
|
|
868
|
+
if (RHASH_SIZE(set) == before) {
|
|
869
|
+
if (RB_SYMBOL_P(key)) {
|
|
870
|
+
return rb_sym2str(key);
|
|
871
|
+
}
|
|
872
|
+
return key;
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
return Qfalse;
|
|
876
|
+
}
|
|
814
877
|
|
|
815
|
-
|
|
878
|
+
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
879
|
+
{
|
|
880
|
+
VALUE message = rb_sprintf(
|
|
881
|
+
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
882
|
+
rb_inspect(duplicate_key)
|
|
883
|
+
);
|
|
816
884
|
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
885
|
+
emit_parse_warning(RSTRING_PTR(message), state);
|
|
886
|
+
RB_GC_GUARD(message);
|
|
887
|
+
}
|
|
820
888
|
|
|
821
|
-
|
|
889
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
|
890
|
+
RBIMPL_ATTR_NORETURN()
|
|
891
|
+
#endif
|
|
892
|
+
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
893
|
+
{
|
|
894
|
+
VALUE message = rb_sprintf(
|
|
895
|
+
"duplicate key %"PRIsVALUE,
|
|
896
|
+
rb_inspect(duplicate_key)
|
|
897
|
+
);
|
|
898
|
+
|
|
899
|
+
raise_parse_error(RSTRING_PTR(message), state);
|
|
900
|
+
RB_GC_GUARD(message);
|
|
822
901
|
}
|
|
823
902
|
|
|
824
|
-
static inline VALUE
|
|
903
|
+
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
|
825
904
|
{
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
905
|
+
size_t entries_count = count / 2;
|
|
906
|
+
VALUE object = rb_hash_new_capa(entries_count);
|
|
907
|
+
const VALUE *pairs = rvalue_stack_peek(state->stack, count);
|
|
908
|
+
rb_hash_bulk_insert(count, pairs, object);
|
|
909
|
+
|
|
910
|
+
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
|
|
911
|
+
switch (config->on_duplicate_key) {
|
|
912
|
+
case JSON_IGNORE:
|
|
913
|
+
break;
|
|
914
|
+
case JSON_DEPRECATED:
|
|
915
|
+
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
|
|
916
|
+
break;
|
|
917
|
+
case JSON_RAISE:
|
|
918
|
+
raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
|
|
919
|
+
break;
|
|
920
|
+
}
|
|
833
921
|
}
|
|
834
922
|
|
|
835
|
-
|
|
923
|
+
rvalue_stack_pop(state->stack, count);
|
|
924
|
+
|
|
925
|
+
if (config->freeze) {
|
|
926
|
+
RB_OBJ_FREEZE(object);
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
return object;
|
|
836
930
|
}
|
|
837
931
|
|
|
838
932
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
@@ -844,7 +938,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
|
|
|
844
938
|
return value;
|
|
845
939
|
}
|
|
846
940
|
|
|
847
|
-
static const bool
|
|
941
|
+
static const bool string_scan_table[256] = {
|
|
848
942
|
// ASCII Control Characters
|
|
849
943
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
850
944
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
@@ -857,51 +951,252 @@ static const bool string_scan[256] = {
|
|
|
857
951
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
858
952
|
};
|
|
859
953
|
|
|
860
|
-
|
|
954
|
+
#ifdef HAVE_SIMD
|
|
955
|
+
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
956
|
+
#endif /* HAVE_SIMD */
|
|
957
|
+
|
|
958
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
861
959
|
{
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
bool escaped = false;
|
|
960
|
+
#ifdef HAVE_SIMD
|
|
961
|
+
#if defined(HAVE_SIMD_NEON)
|
|
865
962
|
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
963
|
+
uint64_t mask = 0;
|
|
964
|
+
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
965
|
+
state->cursor += trailing_zeros64(mask) >> 2;
|
|
966
|
+
return true;
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
970
|
+
if (simd_impl == SIMD_SSE2) {
|
|
971
|
+
int mask = 0;
|
|
972
|
+
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
973
|
+
state->cursor += trailing_zeros(mask);
|
|
974
|
+
return true;
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
978
|
+
#endif /* HAVE_SIMD */
|
|
979
|
+
|
|
980
|
+
while (!eos(state)) {
|
|
981
|
+
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
982
|
+
return true;
|
|
983
|
+
}
|
|
984
|
+
state->cursor++;
|
|
985
|
+
}
|
|
986
|
+
return false;
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
990
|
+
{
|
|
991
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
992
|
+
JSON_UnescapePositions positions = {
|
|
993
|
+
.size = 0,
|
|
994
|
+
.positions = backslashes,
|
|
995
|
+
.has_more = false,
|
|
996
|
+
};
|
|
997
|
+
|
|
998
|
+
do {
|
|
999
|
+
switch (*state->cursor) {
|
|
1000
|
+
case '"': {
|
|
1001
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
1002
|
+
state->cursor++;
|
|
1003
|
+
return json_push_value(state, config, string);
|
|
1004
|
+
}
|
|
1005
|
+
case '\\': {
|
|
1006
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1007
|
+
backslashes[positions.size] = state->cursor;
|
|
1008
|
+
positions.size++;
|
|
1009
|
+
} else {
|
|
1010
|
+
positions.has_more = true;
|
|
881
1011
|
}
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
break;
|
|
1012
|
+
state->cursor++;
|
|
1013
|
+
break;
|
|
885
1014
|
}
|
|
1015
|
+
default:
|
|
1016
|
+
if (!config->allow_control_characters) {
|
|
1017
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1018
|
+
}
|
|
1019
|
+
break;
|
|
886
1020
|
}
|
|
887
1021
|
|
|
888
1022
|
state->cursor++;
|
|
889
|
-
}
|
|
1023
|
+
} while (string_scan(state));
|
|
890
1024
|
|
|
891
1025
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
892
1026
|
return Qfalse;
|
|
893
1027
|
}
|
|
894
1028
|
|
|
1029
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1030
|
+
{
|
|
1031
|
+
state->cursor++;
|
|
1032
|
+
const char *start = state->cursor;
|
|
1033
|
+
|
|
1034
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1035
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1039
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1040
|
+
state->cursor++;
|
|
1041
|
+
return json_push_value(state, config, string);
|
|
1042
|
+
}
|
|
1043
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1047
|
+
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1048
|
+
// Additional References:
|
|
1049
|
+
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
|
|
1050
|
+
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
|
|
1051
|
+
static inline uint64_t decode_8digits_unrolled(uint64_t val) {
|
|
1052
|
+
const uint64_t mask = 0x000000FF000000FF;
|
|
1053
|
+
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
|
|
1054
|
+
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
|
|
1055
|
+
val -= 0x3030303030303030;
|
|
1056
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1057
|
+
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
|
|
1058
|
+
return val;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
static inline uint64_t decode_4digits_unrolled(uint32_t val) {
|
|
1062
|
+
const uint32_t mask = 0x000000FF;
|
|
1063
|
+
const uint32_t mul1 = 100;
|
|
1064
|
+
val -= 0x30303030;
|
|
1065
|
+
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
|
|
1066
|
+
val = ((val & mask) * mul1) + (((val >> 16) & mask));
|
|
1067
|
+
return val;
|
|
1068
|
+
}
|
|
1069
|
+
#endif
|
|
1070
|
+
|
|
1071
|
+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
|
|
1072
|
+
{
|
|
1073
|
+
const char *start = state->cursor;
|
|
1074
|
+
|
|
1075
|
+
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1076
|
+
while (rest(state) >= sizeof(uint64_t)) {
|
|
1077
|
+
uint64_t next_8bytes;
|
|
1078
|
+
memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
|
|
1079
|
+
|
|
1080
|
+
// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
|
|
1081
|
+
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
|
|
1082
|
+
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
|
|
1083
|
+
|
|
1084
|
+
if (match == 0x3333333333333333) { // 8 consecutive digits
|
|
1085
|
+
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
|
|
1086
|
+
state->cursor += 8;
|
|
1087
|
+
continue;
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
|
|
1091
|
+
|
|
1092
|
+
if (consecutive_digits >= 4) {
|
|
1093
|
+
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
|
|
1094
|
+
state->cursor += 4;
|
|
1095
|
+
consecutive_digits -= 4;
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
while (consecutive_digits) {
|
|
1099
|
+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
|
|
1100
|
+
consecutive_digits--;
|
|
1101
|
+
state->cursor++;
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
return (int)(state->cursor - start);
|
|
1105
|
+
}
|
|
1106
|
+
#endif
|
|
1107
|
+
|
|
1108
|
+
char next_char;
|
|
1109
|
+
while (rb_isdigit(next_char = peek(state))) {
|
|
1110
|
+
*accumulator = *accumulator * 10 + (next_char - '0');
|
|
1111
|
+
state->cursor++;
|
|
1112
|
+
}
|
|
1113
|
+
return (int)(state->cursor - start);
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
|
|
1117
|
+
{
|
|
1118
|
+
bool integer = true;
|
|
1119
|
+
const char first_digit = *state->cursor;
|
|
1120
|
+
|
|
1121
|
+
// Variables for Ryu optimization - extract digits during parsing
|
|
1122
|
+
int32_t exponent = 0;
|
|
1123
|
+
int decimal_point_pos = -1;
|
|
1124
|
+
uint64_t mantissa = 0;
|
|
1125
|
+
|
|
1126
|
+
// Parse integer part and extract mantissa digits
|
|
1127
|
+
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1128
|
+
|
|
1129
|
+
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1130
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
// Parse fractional part
|
|
1134
|
+
if (peek(state) == '.') {
|
|
1135
|
+
integer = false;
|
|
1136
|
+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
|
|
1137
|
+
state->cursor++;
|
|
1138
|
+
|
|
1139
|
+
int fractional_digits = json_parse_digits(state, &mantissa);
|
|
1140
|
+
mantissa_digits += fractional_digits;
|
|
1141
|
+
|
|
1142
|
+
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1143
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
// Parse exponent
|
|
1148
|
+
if (rb_tolower(peek(state)) == 'e') {
|
|
1149
|
+
integer = false;
|
|
1150
|
+
state->cursor++;
|
|
1151
|
+
|
|
1152
|
+
bool negative_exponent = false;
|
|
1153
|
+
const char next_char = peek(state);
|
|
1154
|
+
if (next_char == '-' || next_char == '+') {
|
|
1155
|
+
negative_exponent = next_char == '-';
|
|
1156
|
+
state->cursor++;
|
|
1157
|
+
}
|
|
1158
|
+
|
|
1159
|
+
uint64_t abs_exponent = 0;
|
|
1160
|
+
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1161
|
+
|
|
1162
|
+
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1163
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
if (integer) {
|
|
1170
|
+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
// Adjust exponent based on decimal point position
|
|
1174
|
+
if (decimal_point_pos >= 0) {
|
|
1175
|
+
exponent -= (mantissa_digits - decimal_point_pos);
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1182
|
+
{
|
|
1183
|
+
return json_parse_number(state, config, false, state->cursor);
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1187
|
+
{
|
|
1188
|
+
const char *start = state->cursor;
|
|
1189
|
+
state->cursor++;
|
|
1190
|
+
return json_parse_number(state, config, true, start);
|
|
1191
|
+
}
|
|
1192
|
+
|
|
895
1193
|
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
896
1194
|
{
|
|
897
1195
|
json_eat_whitespace(state);
|
|
898
|
-
if (state->cursor >= state->end) {
|
|
899
|
-
raise_parse_error("unexpected end of input", state);
|
|
900
|
-
}
|
|
901
1196
|
|
|
902
|
-
switch (
|
|
1197
|
+
switch (peek(state)) {
|
|
903
1198
|
case 'n':
|
|
904
|
-
if ((state
|
|
1199
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
905
1200
|
state->cursor += 4;
|
|
906
1201
|
return json_push_value(state, config, Qnil);
|
|
907
1202
|
}
|
|
@@ -909,7 +1204,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
909
1204
|
raise_parse_error("unexpected token %s", state);
|
|
910
1205
|
break;
|
|
911
1206
|
case 't':
|
|
912
|
-
if ((state
|
|
1207
|
+
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
|
|
913
1208
|
state->cursor += 4;
|
|
914
1209
|
return json_push_value(state, config, Qtrue);
|
|
915
1210
|
}
|
|
@@ -918,7 +1213,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
918
1213
|
break;
|
|
919
1214
|
case 'f':
|
|
920
1215
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
921
|
-
if ((state
|
|
1216
|
+
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
922
1217
|
state->cursor += 5;
|
|
923
1218
|
return json_push_value(state, config, Qfalse);
|
|
924
1219
|
}
|
|
@@ -927,7 +1222,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
927
1222
|
break;
|
|
928
1223
|
case 'N':
|
|
929
1224
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
930
|
-
if (config->allow_nan && (state
|
|
1225
|
+
if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
|
|
931
1226
|
state->cursor += 3;
|
|
932
1227
|
return json_push_value(state, config, CNaN);
|
|
933
1228
|
}
|
|
@@ -935,16 +1230,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
935
1230
|
raise_parse_error("unexpected token %s", state);
|
|
936
1231
|
break;
|
|
937
1232
|
case 'I':
|
|
938
|
-
if (config->allow_nan && (state
|
|
1233
|
+
if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
|
939
1234
|
state->cursor += 8;
|
|
940
1235
|
return json_push_value(state, config, CInfinity);
|
|
941
1236
|
}
|
|
942
1237
|
|
|
943
1238
|
raise_parse_error("unexpected token %s", state);
|
|
944
1239
|
break;
|
|
945
|
-
case '-':
|
|
1240
|
+
case '-': {
|
|
946
1241
|
// Note: memcmp with a small power of two compile to an integer comparison
|
|
947
|
-
if ((state
|
|
1242
|
+
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
948
1243
|
if (config->allow_nan) {
|
|
949
1244
|
state->cursor += 9;
|
|
950
1245
|
return json_push_value(state, config, CMinusInfinity);
|
|
@@ -952,62 +1247,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
952
1247
|
raise_parse_error("unexpected token %s", state);
|
|
953
1248
|
}
|
|
954
1249
|
}
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
bool integer = true;
|
|
958
|
-
|
|
959
|
-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
|
|
960
|
-
const char *start = state->cursor;
|
|
961
|
-
state->cursor++;
|
|
962
|
-
|
|
963
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
964
|
-
state->cursor++;
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
long integer_length = state->cursor - start;
|
|
968
|
-
|
|
969
|
-
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
|
970
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
971
|
-
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
|
972
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
973
|
-
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
|
974
|
-
raise_parse_error_at("invalid number: %s", state, start);
|
|
975
|
-
}
|
|
976
|
-
|
|
977
|
-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
|
978
|
-
integer = false;
|
|
979
|
-
state->cursor++;
|
|
980
|
-
|
|
981
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
982
|
-
raise_parse_error("invalid number: %s", state);
|
|
983
|
-
}
|
|
984
|
-
|
|
985
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
986
|
-
state->cursor++;
|
|
987
|
-
}
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
|
|
991
|
-
integer = false;
|
|
992
|
-
state->cursor++;
|
|
993
|
-
if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
|
|
994
|
-
state->cursor++;
|
|
995
|
-
}
|
|
996
|
-
|
|
997
|
-
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
|
998
|
-
raise_parse_error("invalid number: %s", state);
|
|
999
|
-
}
|
|
1000
|
-
|
|
1001
|
-
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
|
1002
|
-
state->cursor++;
|
|
1003
|
-
}
|
|
1004
|
-
}
|
|
1005
|
-
|
|
1006
|
-
if (integer) {
|
|
1007
|
-
return json_push_value(state, config, json_decode_integer(start, state->cursor));
|
|
1008
|
-
}
|
|
1009
|
-
return json_push_value(state, config, json_decode_float(config, start, state->cursor));
|
|
1250
|
+
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1251
|
+
break;
|
|
1010
1252
|
}
|
|
1253
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1254
|
+
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1255
|
+
break;
|
|
1011
1256
|
case '"': {
|
|
1012
1257
|
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1013
1258
|
return json_parse_string(state, config, false);
|
|
@@ -1018,7 +1263,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1018
1263
|
json_eat_whitespace(state);
|
|
1019
1264
|
long stack_head = state->stack->head;
|
|
1020
1265
|
|
|
1021
|
-
if ((state
|
|
1266
|
+
if (peek(state) == ']') {
|
|
1022
1267
|
state->cursor++;
|
|
1023
1268
|
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1024
1269
|
} else {
|
|
@@ -1033,26 +1278,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1033
1278
|
while (true) {
|
|
1034
1279
|
json_eat_whitespace(state);
|
|
1035
1280
|
|
|
1036
|
-
|
|
1037
|
-
if (*state->cursor == ']') {
|
|
1038
|
-
state->cursor++;
|
|
1039
|
-
long count = state->stack->head - stack_head;
|
|
1040
|
-
state->current_nesting--;
|
|
1041
|
-
state->in_array--;
|
|
1042
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1043
|
-
}
|
|
1281
|
+
const char next_char = peek(state);
|
|
1044
1282
|
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
}
|
|
1283
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1284
|
+
state->cursor++;
|
|
1285
|
+
if (config->allow_trailing_comma) {
|
|
1286
|
+
json_eat_whitespace(state);
|
|
1287
|
+
if (peek(state) == ']') {
|
|
1288
|
+
continue;
|
|
1052
1289
|
}
|
|
1053
|
-
json_parse_any(state, config);
|
|
1054
|
-
continue;
|
|
1055
1290
|
}
|
|
1291
|
+
json_parse_any(state, config);
|
|
1292
|
+
continue;
|
|
1293
|
+
}
|
|
1294
|
+
|
|
1295
|
+
if (next_char == ']') {
|
|
1296
|
+
state->cursor++;
|
|
1297
|
+
long count = state->stack->head - stack_head;
|
|
1298
|
+
state->current_nesting--;
|
|
1299
|
+
state->in_array--;
|
|
1300
|
+
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1056
1301
|
}
|
|
1057
1302
|
|
|
1058
1303
|
raise_parse_error("expected ',' or ']' after array value", state);
|
|
@@ -1060,11 +1305,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1060
1305
|
break;
|
|
1061
1306
|
}
|
|
1062
1307
|
case '{': {
|
|
1308
|
+
const char *object_start_cursor = state->cursor;
|
|
1309
|
+
|
|
1063
1310
|
state->cursor++;
|
|
1064
1311
|
json_eat_whitespace(state);
|
|
1065
1312
|
long stack_head = state->stack->head;
|
|
1066
1313
|
|
|
1067
|
-
if ((state
|
|
1314
|
+
if (peek(state) == '}') {
|
|
1068
1315
|
state->cursor++;
|
|
1069
1316
|
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1070
1317
|
} else {
|
|
@@ -1073,13 +1320,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1073
1320
|
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1074
1321
|
}
|
|
1075
1322
|
|
|
1076
|
-
if (
|
|
1323
|
+
if (peek(state) != '"') {
|
|
1077
1324
|
raise_parse_error("expected object key, got %s", state);
|
|
1078
1325
|
}
|
|
1079
1326
|
json_parse_string(state, config, true);
|
|
1080
1327
|
|
|
1081
1328
|
json_eat_whitespace(state);
|
|
1082
|
-
if ((state
|
|
1329
|
+
if (peek(state) != ':') {
|
|
1083
1330
|
raise_parse_error("expected ':' after object key", state);
|
|
1084
1331
|
}
|
|
1085
1332
|
state->cursor++;
|
|
@@ -1090,39 +1337,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1090
1337
|
while (true) {
|
|
1091
1338
|
json_eat_whitespace(state);
|
|
1092
1339
|
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
return json_push_value(state, config, json_decode_object(state, config, count));
|
|
1099
|
-
}
|
|
1340
|
+
const char next_char = peek(state);
|
|
1341
|
+
if (next_char == '}') {
|
|
1342
|
+
state->cursor++;
|
|
1343
|
+
state->current_nesting--;
|
|
1344
|
+
size_t count = state->stack->head - stack_head;
|
|
1100
1345
|
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1346
|
+
// Temporary rewind cursor in case an error is raised
|
|
1347
|
+
const char *final_cursor = state->cursor;
|
|
1348
|
+
state->cursor = object_start_cursor;
|
|
1349
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1350
|
+
state->cursor = final_cursor;
|
|
1104
1351
|
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
continue;
|
|
1108
|
-
}
|
|
1109
|
-
}
|
|
1352
|
+
return json_push_value(state, config, object);
|
|
1353
|
+
}
|
|
1110
1354
|
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
json_parse_string(state, config, true);
|
|
1355
|
+
if (next_char == ',') {
|
|
1356
|
+
state->cursor++;
|
|
1357
|
+
json_eat_whitespace(state);
|
|
1115
1358
|
|
|
1116
|
-
|
|
1117
|
-
if ((state
|
|
1118
|
-
|
|
1359
|
+
if (config->allow_trailing_comma) {
|
|
1360
|
+
if (peek(state) == '}') {
|
|
1361
|
+
continue;
|
|
1119
1362
|
}
|
|
1120
|
-
|
|
1363
|
+
}
|
|
1121
1364
|
|
|
1122
|
-
|
|
1365
|
+
if (RB_UNLIKELY(peek(state) != '"')) {
|
|
1366
|
+
raise_parse_error("expected object key, got: %s", state);
|
|
1367
|
+
}
|
|
1368
|
+
json_parse_string(state, config, true);
|
|
1123
1369
|
|
|
1124
|
-
|
|
1370
|
+
json_eat_whitespace(state);
|
|
1371
|
+
if (RB_UNLIKELY(peek(state) != ':')) {
|
|
1372
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
|
1125
1373
|
}
|
|
1374
|
+
state->cursor++;
|
|
1375
|
+
|
|
1376
|
+
json_parse_any(state, config);
|
|
1377
|
+
|
|
1378
|
+
continue;
|
|
1126
1379
|
}
|
|
1127
1380
|
|
|
1128
1381
|
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
|
@@ -1130,18 +1383,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
|
1130
1383
|
break;
|
|
1131
1384
|
}
|
|
1132
1385
|
|
|
1386
|
+
case 0:
|
|
1387
|
+
raise_parse_error("unexpected end of input", state);
|
|
1388
|
+
break;
|
|
1389
|
+
|
|
1133
1390
|
default:
|
|
1134
1391
|
raise_parse_error("unexpected character: %s", state);
|
|
1135
1392
|
break;
|
|
1136
1393
|
}
|
|
1137
1394
|
|
|
1138
|
-
raise_parse_error("
|
|
1395
|
+
raise_parse_error("unreachable: %s", state);
|
|
1396
|
+
return Qundef;
|
|
1139
1397
|
}
|
|
1140
1398
|
|
|
1141
1399
|
static void json_ensure_eof(JSON_ParserState *state)
|
|
1142
1400
|
{
|
|
1143
1401
|
json_eat_whitespace(state);
|
|
1144
|
-
if (state
|
|
1402
|
+
if (!eos(state)) {
|
|
1145
1403
|
raise_parse_error("unexpected token at end of stream %s", state);
|
|
1146
1404
|
}
|
|
1147
1405
|
}
|
|
@@ -1178,13 +1436,15 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1178
1436
|
{
|
|
1179
1437
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1180
1438
|
|
|
1181
|
-
if (key == sym_max_nesting)
|
|
1182
|
-
else if (key == sym_allow_nan)
|
|
1183
|
-
else if (key == sym_allow_trailing_comma)
|
|
1184
|
-
else if (key ==
|
|
1185
|
-
else if (key ==
|
|
1186
|
-
else if (key ==
|
|
1187
|
-
else if (key ==
|
|
1439
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1440
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1441
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1442
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1443
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1444
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1445
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1446
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1447
|
+
else if (key == sym_decimal_class) {
|
|
1188
1448
|
if (RTEST(val)) {
|
|
1189
1449
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1190
1450
|
config->decimal_class = val;
|
|
@@ -1257,6 +1517,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1257
1517
|
*/
|
|
1258
1518
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1259
1519
|
{
|
|
1520
|
+
rb_check_frozen(self);
|
|
1260
1521
|
GET_PARSER_CONFIG;
|
|
1261
1522
|
|
|
1262
1523
|
parser_config_init(config, opts);
|
|
@@ -1352,7 +1613,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1352
1613
|
JSON_ParserConfig_memsize,
|
|
1353
1614
|
},
|
|
1354
1615
|
0, 0,
|
|
1355
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1616
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1356
1617
|
};
|
|
1357
1618
|
|
|
1358
1619
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1396,15 +1657,13 @@ void Init_parser(void)
|
|
|
1396
1657
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1397
1658
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1398
1659
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1660
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1399
1661
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1400
1662
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1401
1663
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1402
1664
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1665
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1403
1666
|
|
|
1404
|
-
i_chr = rb_intern("chr");
|
|
1405
|
-
i_aset = rb_intern("[]=");
|
|
1406
|
-
i_aref = rb_intern("[]");
|
|
1407
|
-
i_leftshift = rb_intern("<<");
|
|
1408
1667
|
i_new = rb_intern("new");
|
|
1409
1668
|
i_try_convert = rb_intern("try_convert");
|
|
1410
1669
|
i_uminus = rb_intern("-@");
|
|
@@ -1413,4 +1672,8 @@ void Init_parser(void)
|
|
|
1413
1672
|
binary_encindex = rb_ascii8bit_encindex();
|
|
1414
1673
|
utf8_encindex = rb_utf8_encindex();
|
|
1415
1674
|
enc_utf8 = rb_utf8_encoding();
|
|
1675
|
+
|
|
1676
|
+
#ifdef HAVE_SIMD
|
|
1677
|
+
simd_impl = find_simd_implementation();
|
|
1678
|
+
#endif
|
|
1416
1679
|
}
|