json 2.16.0 → 2.19.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +43 -1
- data/ext/json/ext/fbuffer/fbuffer.h +29 -25
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +132 -369
- data/ext/json/ext/json.h +13 -0
- data/ext/json/ext/parser/extconf.rb +1 -2
- data/ext/json/ext/parser/parser.c +235 -179
- data/ext/json/ext/simd/simd.h +33 -16
- data/ext/json/ext/vendor/fpconv.c +3 -3
- data/lib/json/common.rb +62 -14
- data/lib/json/ext/generator/state.rb +1 -1
- data/lib/json/truffle_ruby/generator.rb +34 -18
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +33 -0
- metadata +2 -2
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
6
6
|
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
7
7
|
|
|
8
|
-
static ID
|
|
9
|
-
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
10
9
|
|
|
11
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma,
|
|
12
|
-
sym_decimal_class, sym_on_load,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
|
|
11
|
+
sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
|
|
12
|
+
sym_allow_duplicate_key;
|
|
13
13
|
|
|
14
14
|
static int binary_encindex;
|
|
15
15
|
static int utf8_encindex;
|
|
@@ -89,7 +89,7 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
|
|
|
89
89
|
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
90
90
|
#if __has_builtin(__builtin_bswap64)
|
|
91
91
|
#undef rstring_cache_memcmp
|
|
92
|
-
|
|
92
|
+
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
|
|
93
93
|
{
|
|
94
94
|
// The libc memcmp has numerous complex optimizations, but in this particular case,
|
|
95
95
|
// we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
|
|
@@ -118,7 +118,7 @@ static ALWAYS_INLINE() int rstring_cache_memcmp(const char *str, const char *rpt
|
|
|
118
118
|
#endif
|
|
119
119
|
#endif
|
|
120
120
|
|
|
121
|
-
|
|
121
|
+
ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
|
122
122
|
{
|
|
123
123
|
const char *rstring_ptr;
|
|
124
124
|
long rstring_length;
|
|
@@ -132,7 +132,7 @@ static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length,
|
|
|
132
132
|
}
|
|
133
133
|
}
|
|
134
134
|
|
|
135
|
-
|
|
135
|
+
ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
|
136
136
|
{
|
|
137
137
|
int low = 0;
|
|
138
138
|
int high = cache->length - 1;
|
|
@@ -296,15 +296,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
296
296
|
}
|
|
297
297
|
}
|
|
298
298
|
|
|
299
|
-
|
|
300
|
-
#ifndef HAVE_STRNLEN
|
|
301
|
-
static size_t strnlen(const char *s, size_t maxlen)
|
|
302
|
-
{
|
|
303
|
-
char *p;
|
|
304
|
-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
|
|
305
|
-
}
|
|
306
|
-
#endif
|
|
307
|
-
|
|
308
299
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
309
300
|
{
|
|
310
301
|
int len = 1;
|
|
@@ -345,7 +336,8 @@ typedef struct JSON_ParserStruct {
|
|
|
345
336
|
int max_nesting;
|
|
346
337
|
bool allow_nan;
|
|
347
338
|
bool allow_trailing_comma;
|
|
348
|
-
bool
|
|
339
|
+
bool allow_control_characters;
|
|
340
|
+
bool allow_invalid_escape;
|
|
349
341
|
bool symbolize_names;
|
|
350
342
|
bool freeze;
|
|
351
343
|
} JSON_ParserConfig;
|
|
@@ -410,14 +402,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
410
402
|
|
|
411
403
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
412
404
|
|
|
413
|
-
|
|
414
|
-
RBIMPL_ATTR_NORETURN()
|
|
415
|
-
#endif
|
|
416
|
-
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
405
|
+
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
|
|
417
406
|
{
|
|
418
407
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
419
|
-
long line, column;
|
|
420
|
-
cursor_position(state, &line, &column);
|
|
421
408
|
|
|
422
409
|
const char *ptr = "EOF";
|
|
423
410
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -452,17 +439,26 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
452
439
|
VALUE msg = rb_sprintf(format, ptr);
|
|
453
440
|
VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
|
|
454
441
|
RB_GC_GUARD(msg);
|
|
442
|
+
return message;
|
|
443
|
+
}
|
|
455
444
|
|
|
445
|
+
static VALUE parse_error_new(VALUE message, long line, long column)
|
|
446
|
+
{
|
|
456
447
|
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
|
457
448
|
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
|
458
449
|
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
|
459
|
-
|
|
450
|
+
return exc;
|
|
460
451
|
}
|
|
461
452
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
453
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
454
|
+
{
|
|
455
|
+
long line, column;
|
|
456
|
+
cursor_position(state, &line, &column);
|
|
457
|
+
VALUE message = build_parse_error_message(format, state, line, column);
|
|
458
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
466
462
|
{
|
|
467
463
|
state->cursor = at;
|
|
468
464
|
raise_parse_error(format, state);
|
|
@@ -487,23 +483,24 @@ static const signed char digit_values[256] = {
|
|
|
487
483
|
-1, -1, -1, -1, -1, -1, -1
|
|
488
484
|
};
|
|
489
485
|
|
|
490
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
491
|
-
{
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
486
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
487
|
+
{
|
|
488
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
489
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
493
|
+
|
|
494
|
+
const signed char b0 = digit_values[p[0]];
|
|
495
|
+
const signed char b1 = digit_values[p[1]];
|
|
496
|
+
const signed char b2 = digit_values[p[2]];
|
|
497
|
+
const signed char b3 = digit_values[p[3]];
|
|
498
|
+
|
|
499
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
500
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
507
504
|
}
|
|
508
505
|
|
|
509
506
|
#define GET_PARSER_CONFIG \
|
|
@@ -551,7 +548,7 @@ json_eat_comments(JSON_ParserState *state)
|
|
|
551
548
|
}
|
|
552
549
|
}
|
|
553
550
|
|
|
554
|
-
|
|
551
|
+
ALWAYS_INLINE(static) void
|
|
555
552
|
json_eat_whitespace(JSON_ParserState *state)
|
|
556
553
|
{
|
|
557
554
|
while (true) {
|
|
@@ -627,8 +624,10 @@ static inline bool json_string_cacheable_p(const char *string, size_t length)
|
|
|
627
624
|
return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
|
|
628
625
|
}
|
|
629
626
|
|
|
630
|
-
static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name
|
|
627
|
+
static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
|
|
631
628
|
{
|
|
629
|
+
bool intern = is_name || config->freeze;
|
|
630
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
632
631
|
size_t bufferSize = stringEnd - string;
|
|
633
632
|
|
|
634
633
|
if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
|
|
@@ -647,96 +646,129 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
|
|
|
647
646
|
return build_string(string, stringEnd, intern, symbolize);
|
|
648
647
|
}
|
|
649
648
|
|
|
650
|
-
|
|
649
|
+
#define JSON_MAX_UNESCAPE_POSITIONS 16
|
|
650
|
+
typedef struct _json_unescape_positions {
|
|
651
|
+
long size;
|
|
652
|
+
const char **positions;
|
|
653
|
+
unsigned long additional_backslashes;
|
|
654
|
+
} JSON_UnescapePositions;
|
|
655
|
+
|
|
656
|
+
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
651
657
|
{
|
|
658
|
+
while (positions->size) {
|
|
659
|
+
positions->size--;
|
|
660
|
+
const char *next_position = positions->positions[0];
|
|
661
|
+
positions->positions++;
|
|
662
|
+
if (next_position >= pe) {
|
|
663
|
+
return next_position;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
if (positions->additional_backslashes) {
|
|
668
|
+
positions->additional_backslashes--;
|
|
669
|
+
return memchr(pe, '\\', stringEnd - pe);
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
return NULL;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
|
|
676
|
+
{
|
|
677
|
+
bool intern = is_name || config->freeze;
|
|
678
|
+
bool symbolize = is_name && config->symbolize_names;
|
|
652
679
|
size_t bufferSize = stringEnd - string;
|
|
653
|
-
const char *p = string, *pe = string, *
|
|
680
|
+
const char *p = string, *pe = string, *bufferStart;
|
|
654
681
|
char *buffer;
|
|
655
|
-
int unescape_len;
|
|
656
|
-
char buf[4];
|
|
657
682
|
|
|
658
683
|
VALUE result = rb_str_buf_new(bufferSize);
|
|
659
684
|
rb_enc_associate_index(result, utf8_encindex);
|
|
660
685
|
buffer = RSTRING_PTR(result);
|
|
661
686
|
bufferStart = buffer;
|
|
662
687
|
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
688
|
+
#define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
|
|
689
|
+
|
|
690
|
+
while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
|
|
666
691
|
if (pe > p) {
|
|
667
692
|
MEMCPY(buffer, p, char, pe - p);
|
|
668
693
|
buffer += pe - p;
|
|
669
694
|
}
|
|
670
695
|
switch (*++pe) {
|
|
696
|
+
case '"':
|
|
697
|
+
case '/':
|
|
698
|
+
p = pe; // nothing to unescape just need to skip the backslash
|
|
699
|
+
break;
|
|
700
|
+
case '\\':
|
|
701
|
+
APPEND_CHAR('\\');
|
|
702
|
+
break;
|
|
671
703
|
case 'n':
|
|
672
|
-
|
|
704
|
+
APPEND_CHAR('\n');
|
|
673
705
|
break;
|
|
674
706
|
case 'r':
|
|
675
|
-
|
|
707
|
+
APPEND_CHAR('\r');
|
|
676
708
|
break;
|
|
677
709
|
case 't':
|
|
678
|
-
|
|
679
|
-
break;
|
|
680
|
-
case '"':
|
|
681
|
-
unescape = (char *) "\"";
|
|
682
|
-
break;
|
|
683
|
-
case '\\':
|
|
684
|
-
unescape = (char *) "\\";
|
|
710
|
+
APPEND_CHAR('\t');
|
|
685
711
|
break;
|
|
686
712
|
case 'b':
|
|
687
|
-
|
|
713
|
+
APPEND_CHAR('\b');
|
|
688
714
|
break;
|
|
689
715
|
case 'f':
|
|
690
|
-
|
|
716
|
+
APPEND_CHAR('\f');
|
|
691
717
|
break;
|
|
692
|
-
case 'u':
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
if (
|
|
711
|
-
raise_parse_error_at("
|
|
712
|
-
}
|
|
713
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
714
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
715
|
-
|
|
716
|
-
if ((sur & 0xFC00) != 0xDC00) {
|
|
717
|
-
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
718
|
-
}
|
|
719
|
-
|
|
720
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
721
|
-
| (sur & 0x3FF));
|
|
722
|
-
pe += 5;
|
|
723
|
-
} else {
|
|
724
|
-
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
725
|
-
break;
|
|
718
|
+
case 'u': {
|
|
719
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
720
|
+
pe += 3;
|
|
721
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
722
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
723
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
724
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
725
|
+
* final codepoint.
|
|
726
|
+
*
|
|
727
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
728
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
729
|
+
* Area".
|
|
730
|
+
*/
|
|
731
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
732
|
+
pe++;
|
|
733
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
734
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
735
|
+
|
|
736
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
737
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
726
738
|
}
|
|
739
|
+
|
|
740
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
741
|
+
pe += 5;
|
|
742
|
+
} else {
|
|
743
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
744
|
+
break;
|
|
727
745
|
}
|
|
728
|
-
unescape_len = convert_UTF32_to_UTF8(buf, ch);
|
|
729
|
-
unescape = buf;
|
|
730
746
|
}
|
|
747
|
+
|
|
748
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
749
|
+
buffer += unescape_len;
|
|
750
|
+
p = ++pe;
|
|
731
751
|
break;
|
|
752
|
+
}
|
|
732
753
|
default:
|
|
733
|
-
|
|
734
|
-
|
|
754
|
+
if ((unsigned char)*pe < 0x20) {
|
|
755
|
+
if (!config->allow_control_characters) {
|
|
756
|
+
if (*pe == '\n') {
|
|
757
|
+
raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
758
|
+
}
|
|
759
|
+
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
if (config->allow_invalid_escape) {
|
|
764
|
+
APPEND_CHAR(*pe);
|
|
765
|
+
} else {
|
|
766
|
+
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
767
|
+
}
|
|
768
|
+
break;
|
|
735
769
|
}
|
|
736
|
-
MEMCPY(buffer, unescape, char, unescape_len);
|
|
737
|
-
buffer += unescape_len;
|
|
738
|
-
p = ++pe;
|
|
739
770
|
}
|
|
771
|
+
#undef APPEND_CHAR
|
|
740
772
|
|
|
741
773
|
if (stringEnd > p) {
|
|
742
774
|
MEMCPY(buffer, p, char, stringEnd - p);
|
|
@@ -754,20 +786,39 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
|
754
786
|
}
|
|
755
787
|
|
|
756
788
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
789
|
+
#define MAX_NUMBER_STACK_BUFFER 128
|
|
790
|
+
|
|
791
|
+
typedef VALUE (*json_number_decode_func_t)(const char *ptr);
|
|
792
|
+
|
|
793
|
+
static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
|
|
794
|
+
{
|
|
795
|
+
if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
|
|
796
|
+
char buffer[MAX_NUMBER_STACK_BUFFER];
|
|
797
|
+
MEMCPY(buffer, start, char, len);
|
|
798
|
+
buffer[len] = '\0';
|
|
799
|
+
return func(buffer);
|
|
800
|
+
} else {
|
|
801
|
+
VALUE buffer_v = rb_str_tmp_new(len);
|
|
802
|
+
char *buffer = RSTRING_PTR(buffer_v);
|
|
803
|
+
MEMCPY(buffer, start, char, len);
|
|
804
|
+
buffer[len] = '\0';
|
|
805
|
+
VALUE number = func(buffer);
|
|
806
|
+
RB_GC_GUARD(buffer_v);
|
|
807
|
+
return number;
|
|
808
|
+
}
|
|
809
|
+
}
|
|
757
810
|
|
|
758
|
-
static VALUE
|
|
811
|
+
static VALUE json_decode_inum(const char *buffer)
|
|
759
812
|
{
|
|
760
|
-
|
|
761
|
-
char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
|
|
762
|
-
MEMCPY(buffer, start, char, len);
|
|
763
|
-
buffer[len] = '\0';
|
|
764
|
-
VALUE number = rb_cstr2inum(buffer, 10);
|
|
765
|
-
RB_ALLOCV_END(buffer_v);
|
|
766
|
-
return number;
|
|
813
|
+
return rb_cstr2inum(buffer, 10);
|
|
767
814
|
}
|
|
768
815
|
|
|
769
|
-
static
|
|
770
|
-
|
|
816
|
+
NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
|
|
817
|
+
{
|
|
818
|
+
return json_decode_large_number(start, len, json_decode_inum);
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
771
822
|
{
|
|
772
823
|
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
773
824
|
if (negative) {
|
|
@@ -779,22 +830,14 @@ json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const
|
|
|
779
830
|
return json_decode_large_integer(start, end - start);
|
|
780
831
|
}
|
|
781
832
|
|
|
782
|
-
static VALUE
|
|
833
|
+
static VALUE json_decode_dnum(const char *buffer)
|
|
783
834
|
{
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
MEMCPY(buffer, start, char, len);
|
|
787
|
-
buffer[len] = '\0';
|
|
788
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
789
|
-
}
|
|
835
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
836
|
+
}
|
|
790
837
|
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
buffer[len] = '\0';
|
|
795
|
-
VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
796
|
-
RB_ALLOCV_END(buffer_v);
|
|
797
|
-
return number;
|
|
838
|
+
NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
|
|
839
|
+
{
|
|
840
|
+
return json_decode_large_number(start, len, json_decode_dnum);
|
|
798
841
|
}
|
|
799
842
|
|
|
800
843
|
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
@@ -846,7 +889,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
|
846
889
|
return Qfalse;
|
|
847
890
|
}
|
|
848
891
|
|
|
849
|
-
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
892
|
+
NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
850
893
|
{
|
|
851
894
|
VALUE message = rb_sprintf(
|
|
852
895
|
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
@@ -857,16 +900,18 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
|
|
|
857
900
|
RB_GC_GUARD(message);
|
|
858
901
|
}
|
|
859
902
|
|
|
860
|
-
|
|
861
|
-
RBIMPL_ATTR_NORETURN()
|
|
862
|
-
#endif
|
|
863
|
-
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
903
|
+
NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
864
904
|
{
|
|
865
905
|
VALUE message = rb_sprintf(
|
|
866
906
|
"duplicate key %"PRIsVALUE,
|
|
867
907
|
rb_inspect(duplicate_key)
|
|
868
908
|
);
|
|
869
909
|
|
|
910
|
+
long line, column;
|
|
911
|
+
cursor_position(state, &line, &column);
|
|
912
|
+
rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
|
|
913
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
914
|
+
|
|
870
915
|
raise_parse_error(RSTRING_PTR(message), state);
|
|
871
916
|
RB_GC_GUARD(message);
|
|
872
917
|
}
|
|
@@ -900,20 +945,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
|
|
|
900
945
|
return object;
|
|
901
946
|
}
|
|
902
947
|
|
|
903
|
-
static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
|
|
904
|
-
{
|
|
905
|
-
VALUE string;
|
|
906
|
-
bool intern = is_name || config->freeze;
|
|
907
|
-
bool symbolize = is_name && config->symbolize_names;
|
|
908
|
-
if (escaped) {
|
|
909
|
-
string = json_string_unescape(state, start, end, is_name, intern, symbolize);
|
|
910
|
-
} else {
|
|
911
|
-
string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
|
|
912
|
-
}
|
|
913
|
-
|
|
914
|
-
return string;
|
|
915
|
-
}
|
|
916
|
-
|
|
917
948
|
static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
|
|
918
949
|
{
|
|
919
950
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
@@ -940,7 +971,7 @@ static const bool string_scan_table[256] = {
|
|
|
940
971
|
static SIMD_Implementation simd_impl = SIMD_NONE;
|
|
941
972
|
#endif /* HAVE_SIMD */
|
|
942
973
|
|
|
943
|
-
|
|
974
|
+
ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
|
|
944
975
|
{
|
|
945
976
|
#ifdef HAVE_SIMD
|
|
946
977
|
#if defined(HAVE_SIMD_NEON)
|
|
@@ -948,7 +979,7 @@ static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
|
|
|
948
979
|
uint64_t mask = 0;
|
|
949
980
|
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
|
950
981
|
state->cursor += trailing_zeros64(mask) >> 2;
|
|
951
|
-
return
|
|
982
|
+
return true;
|
|
952
983
|
}
|
|
953
984
|
|
|
954
985
|
#elif defined(HAVE_SIMD_SSE2)
|
|
@@ -956,7 +987,7 @@ static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
|
|
|
956
987
|
int mask = 0;
|
|
957
988
|
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
|
958
989
|
state->cursor += trailing_zeros(mask);
|
|
959
|
-
return
|
|
990
|
+
return true;
|
|
960
991
|
}
|
|
961
992
|
}
|
|
962
993
|
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
|
@@ -964,46 +995,70 @@ static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
|
|
|
964
995
|
|
|
965
996
|
while (!eos(state)) {
|
|
966
997
|
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
|
967
|
-
return
|
|
998
|
+
return true;
|
|
968
999
|
}
|
|
969
1000
|
state->cursor++;
|
|
970
1001
|
}
|
|
971
|
-
return
|
|
1002
|
+
return false;
|
|
972
1003
|
}
|
|
973
1004
|
|
|
974
|
-
static
|
|
1005
|
+
static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
|
|
975
1006
|
{
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
1007
|
+
const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
|
|
1008
|
+
JSON_UnescapePositions positions = {
|
|
1009
|
+
.size = 0,
|
|
1010
|
+
.positions = backslashes,
|
|
1011
|
+
.additional_backslashes = 0,
|
|
1012
|
+
};
|
|
979
1013
|
|
|
980
|
-
|
|
1014
|
+
do {
|
|
981
1015
|
switch (*state->cursor) {
|
|
982
1016
|
case '"': {
|
|
983
|
-
VALUE string =
|
|
1017
|
+
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
984
1018
|
state->cursor++;
|
|
985
1019
|
return json_push_value(state, config, string);
|
|
986
1020
|
}
|
|
987
1021
|
case '\\': {
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
1022
|
+
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
1023
|
+
backslashes[positions.size] = state->cursor;
|
|
1024
|
+
positions.size++;
|
|
1025
|
+
} else {
|
|
1026
|
+
positions.additional_backslashes++;
|
|
992
1027
|
}
|
|
1028
|
+
state->cursor++;
|
|
993
1029
|
break;
|
|
994
1030
|
}
|
|
995
1031
|
default:
|
|
996
|
-
|
|
1032
|
+
if (!config->allow_control_characters) {
|
|
1033
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
|
1034
|
+
}
|
|
997
1035
|
break;
|
|
998
1036
|
}
|
|
999
1037
|
|
|
1000
1038
|
state->cursor++;
|
|
1001
|
-
}
|
|
1039
|
+
} while (string_scan(state));
|
|
1002
1040
|
|
|
1003
1041
|
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1004
1042
|
return Qfalse;
|
|
1005
1043
|
}
|
|
1006
1044
|
|
|
1045
|
+
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
1046
|
+
{
|
|
1047
|
+
state->cursor++;
|
|
1048
|
+
const char *start = state->cursor;
|
|
1049
|
+
|
|
1050
|
+
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1051
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1055
|
+
VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1056
|
+
state->cursor++;
|
|
1057
|
+
return json_push_value(state, config, string);
|
|
1058
|
+
}
|
|
1059
|
+
return json_parse_escaped_string(state, config, is_name, start);
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1007
1062
|
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
1008
1063
|
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
|
|
1009
1064
|
// Additional References:
|
|
@@ -1397,14 +1452,16 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1397
1452
|
{
|
|
1398
1453
|
JSON_ParserConfig *config = (JSON_ParserConfig *)data;
|
|
1399
1454
|
|
|
1400
|
-
if (key == sym_max_nesting)
|
|
1401
|
-
else if (key == sym_allow_nan)
|
|
1402
|
-
else if (key == sym_allow_trailing_comma)
|
|
1403
|
-
else if (key ==
|
|
1404
|
-
else if (key ==
|
|
1405
|
-
else if (key ==
|
|
1406
|
-
else if (key ==
|
|
1407
|
-
else if (key ==
|
|
1455
|
+
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1456
|
+
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1457
|
+
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1458
|
+
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1459
|
+
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1460
|
+
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1461
|
+
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1462
|
+
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
1463
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1464
|
+
else if (key == sym_decimal_class) {
|
|
1408
1465
|
if (RTEST(val)) {
|
|
1409
1466
|
if (rb_respond_to(val, i_try_convert)) {
|
|
1410
1467
|
config->decimal_class = val;
|
|
@@ -1477,6 +1534,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
|
|
|
1477
1534
|
*/
|
|
1478
1535
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1479
1536
|
{
|
|
1537
|
+
rb_check_frozen(self);
|
|
1480
1538
|
GET_PARSER_CONFIG;
|
|
1481
1539
|
|
|
1482
1540
|
parser_config_init(config, opts);
|
|
@@ -1572,7 +1630,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
|
|
|
1572
1630
|
JSON_ParserConfig_memsize,
|
|
1573
1631
|
},
|
|
1574
1632
|
0, 0,
|
|
1575
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
|
1633
|
+
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1576
1634
|
};
|
|
1577
1635
|
|
|
1578
1636
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1616,16 +1674,14 @@ void Init_parser(void)
|
|
|
1616
1674
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1617
1675
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1618
1676
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1677
|
+
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1678
|
+
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
1619
1679
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1620
1680
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1621
1681
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
|
1622
1682
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
|
1623
1683
|
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1624
1684
|
|
|
1625
|
-
i_chr = rb_intern("chr");
|
|
1626
|
-
i_aset = rb_intern("[]=");
|
|
1627
|
-
i_aref = rb_intern("[]");
|
|
1628
|
-
i_leftshift = rb_intern("<<");
|
|
1629
1685
|
i_new = rb_intern("new");
|
|
1630
1686
|
i_try_convert = rb_intern("try_convert");
|
|
1631
1687
|
i_uminus = rb_intern("-@");
|