json 2.16.0 → 2.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,11 @@
5
5
  static VALUE mJSON, eNestingError, Encoding_UTF_8;
6
6
  static VALUE CNaN, CInfinity, CMinusInfinity;
7
7
 
8
- static ID i_chr, i_aset, i_aref,
9
- i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
8
+ static ID i_new, i_try_convert, i_uminus, i_encode;
10
9
 
11
- static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
12
- sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
10
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
11
+ sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
12
+ sym_allow_duplicate_key;
13
13
 
14
14
  static int binary_encindex;
15
15
  static int utf8_encindex;
@@ -89,7 +89,7 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
89
89
  #if JSON_CPU_LITTLE_ENDIAN_64BITS
90
90
  #if __has_builtin(__builtin_bswap64)
91
91
  #undef rstring_cache_memcmp
92
- static ALWAYS_INLINE() int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
92
+ ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
93
93
  {
94
94
  // The libc memcmp has numerous complex optimizations, but in this particular case,
95
95
  // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
@@ -118,7 +118,7 @@ static ALWAYS_INLINE() int rstring_cache_memcmp(const char *str, const char *rpt
118
118
  #endif
119
119
  #endif
120
120
 
121
- static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
121
+ ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
122
122
  {
123
123
  const char *rstring_ptr;
124
124
  long rstring_length;
@@ -132,7 +132,7 @@ static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length,
132
132
  }
133
133
  }
134
134
 
135
- static ALWAYS_INLINE() VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
135
+ ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
136
136
  {
137
137
  int low = 0;
138
138
  int high = cache->length - 1;
@@ -296,15 +296,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
296
296
  }
297
297
  }
298
298
 
299
-
300
- #ifndef HAVE_STRNLEN
301
- static size_t strnlen(const char *s, size_t maxlen)
302
- {
303
- char *p;
304
- return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
305
- }
306
- #endif
307
-
308
299
  static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
309
300
  {
310
301
  int len = 1;
@@ -345,7 +336,8 @@ typedef struct JSON_ParserStruct {
345
336
  int max_nesting;
346
337
  bool allow_nan;
347
338
  bool allow_trailing_comma;
348
- bool parsing_name;
339
+ bool allow_control_characters;
340
+ bool allow_invalid_escape;
349
341
  bool symbolize_names;
350
342
  bool freeze;
351
343
  } JSON_ParserConfig;
@@ -410,14 +402,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
410
402
 
411
403
  #define PARSE_ERROR_FRAGMENT_LEN 32
412
404
 
413
- #ifdef RBIMPL_ATTR_NORETURN
414
- RBIMPL_ATTR_NORETURN()
415
- #endif
416
- static void raise_parse_error(const char *format, JSON_ParserState *state)
405
+ static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
417
406
  {
418
407
  unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
419
- long line, column;
420
- cursor_position(state, &line, &column);
421
408
 
422
409
  const char *ptr = "EOF";
423
410
  if (state->cursor && state->cursor < state->end) {
@@ -452,17 +439,26 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
452
439
  VALUE msg = rb_sprintf(format, ptr);
453
440
  VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
454
441
  RB_GC_GUARD(msg);
442
+ return message;
443
+ }
455
444
 
445
+ static VALUE parse_error_new(VALUE message, long line, long column)
446
+ {
456
447
  VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
457
448
  rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
458
449
  rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
459
- rb_exc_raise(exc);
450
+ return exc;
460
451
  }
461
452
 
462
- #ifdef RBIMPL_ATTR_NORETURN
463
- RBIMPL_ATTR_NORETURN()
464
- #endif
465
- static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
453
+ NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
454
+ {
455
+ long line, column;
456
+ cursor_position(state, &line, &column);
457
+ VALUE message = build_parse_error_message(format, state, line, column);
458
+ rb_exc_raise(parse_error_new(message, line, column));
459
+ }
460
+
461
+ NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
466
462
  {
467
463
  state->cursor = at;
468
464
  raise_parse_error(format, state);
@@ -487,23 +483,24 @@ static const signed char digit_values[256] = {
487
483
  -1, -1, -1, -1, -1, -1, -1
488
484
  };
489
485
 
490
- static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
491
- {
492
- signed char b;
493
- uint32_t result = 0;
494
- b = digit_values[p[0]];
495
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
496
- result = (result << 4) | (unsigned char)b;
497
- b = digit_values[p[1]];
498
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
499
- result = (result << 4) | (unsigned char)b;
500
- b = digit_values[p[2]];
501
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
502
- result = (result << 4) | (unsigned char)b;
503
- b = digit_values[p[3]];
504
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
505
- result = (result << 4) | (unsigned char)b;
506
- return result;
486
+ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
487
+ {
488
+ if (RB_UNLIKELY(sp > spe - 4)) {
489
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
490
+ }
491
+
492
+ const unsigned char *p = (const unsigned char *)sp;
493
+
494
+ const signed char b0 = digit_values[p[0]];
495
+ const signed char b1 = digit_values[p[1]];
496
+ const signed char b2 = digit_values[p[2]];
497
+ const signed char b3 = digit_values[p[3]];
498
+
499
+ if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
500
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
501
+ }
502
+
503
+ return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
507
504
  }
508
505
 
509
506
  #define GET_PARSER_CONFIG \
@@ -551,7 +548,7 @@ json_eat_comments(JSON_ParserState *state)
551
548
  }
552
549
  }
553
550
 
554
- static ALWAYS_INLINE() void
551
+ ALWAYS_INLINE(static) void
555
552
  json_eat_whitespace(JSON_ParserState *state)
556
553
  {
557
554
  while (true) {
@@ -627,8 +624,10 @@ static inline bool json_string_cacheable_p(const char *string, size_t length)
627
624
  return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
628
625
  }
629
626
 
630
- static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
627
+ static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
631
628
  {
629
+ bool intern = is_name || config->freeze;
630
+ bool symbolize = is_name && config->symbolize_names;
632
631
  size_t bufferSize = stringEnd - string;
633
632
 
634
633
  if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
@@ -647,96 +646,129 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
647
646
  return build_string(string, stringEnd, intern, symbolize);
648
647
  }
649
648
 
650
- static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
649
+ #define JSON_MAX_UNESCAPE_POSITIONS 16
650
+ typedef struct _json_unescape_positions {
651
+ long size;
652
+ const char **positions;
653
+ unsigned long additional_backslashes;
654
+ } JSON_UnescapePositions;
655
+
656
+ static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
651
657
  {
658
+ while (positions->size) {
659
+ positions->size--;
660
+ const char *next_position = positions->positions[0];
661
+ positions->positions++;
662
+ if (next_position >= pe) {
663
+ return next_position;
664
+ }
665
+ }
666
+
667
+ if (positions->additional_backslashes) {
668
+ positions->additional_backslashes--;
669
+ return memchr(pe, '\\', stringEnd - pe);
670
+ }
671
+
672
+ return NULL;
673
+ }
674
+
675
+ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
676
+ {
677
+ bool intern = is_name || config->freeze;
678
+ bool symbolize = is_name && config->symbolize_names;
652
679
  size_t bufferSize = stringEnd - string;
653
- const char *p = string, *pe = string, *unescape, *bufferStart;
680
+ const char *p = string, *pe = string, *bufferStart;
654
681
  char *buffer;
655
- int unescape_len;
656
- char buf[4];
657
682
 
658
683
  VALUE result = rb_str_buf_new(bufferSize);
659
684
  rb_enc_associate_index(result, utf8_encindex);
660
685
  buffer = RSTRING_PTR(result);
661
686
  bufferStart = buffer;
662
687
 
663
- while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
664
- unescape = (char *) "?";
665
- unescape_len = 1;
688
+ #define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
689
+
690
+ while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
666
691
  if (pe > p) {
667
692
  MEMCPY(buffer, p, char, pe - p);
668
693
  buffer += pe - p;
669
694
  }
670
695
  switch (*++pe) {
696
+ case '"':
697
+ case '/':
698
+ p = pe; // nothing to unescape just need to skip the backslash
699
+ break;
700
+ case '\\':
701
+ APPEND_CHAR('\\');
702
+ break;
671
703
  case 'n':
672
- unescape = (char *) "\n";
704
+ APPEND_CHAR('\n');
673
705
  break;
674
706
  case 'r':
675
- unescape = (char *) "\r";
707
+ APPEND_CHAR('\r');
676
708
  break;
677
709
  case 't':
678
- unescape = (char *) "\t";
679
- break;
680
- case '"':
681
- unescape = (char *) "\"";
682
- break;
683
- case '\\':
684
- unescape = (char *) "\\";
710
+ APPEND_CHAR('\t');
685
711
  break;
686
712
  case 'b':
687
- unescape = (char *) "\b";
713
+ APPEND_CHAR('\b');
688
714
  break;
689
715
  case 'f':
690
- unescape = (char *) "\f";
716
+ APPEND_CHAR('\f');
691
717
  break;
692
- case 'u':
693
- if (pe > stringEnd - 5) {
694
- raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
695
- } else {
696
- uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
697
- pe += 3;
698
- /* To handle values above U+FFFF, we take a sequence of
699
- * \uXXXX escapes in the U+D800..U+DBFF then
700
- * U+DC00..U+DFFF ranges, take the low 10 bits from each
701
- * to make a 20-bit number, then add 0x10000 to get the
702
- * final codepoint.
703
- *
704
- * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
705
- * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
706
- * Area".
707
- */
708
- if ((ch & 0xFC00) == 0xD800) {
709
- pe++;
710
- if (pe > stringEnd - 6) {
711
- raise_parse_error_at("incomplete surrogate pair at %s", state, p);
712
- }
713
- if (pe[0] == '\\' && pe[1] == 'u') {
714
- uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
715
-
716
- if ((sur & 0xFC00) != 0xDC00) {
717
- raise_parse_error_at("invalid surrogate pair at %s", state, p);
718
- }
719
-
720
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
721
- | (sur & 0x3FF));
722
- pe += 5;
723
- } else {
724
- raise_parse_error_at("incomplete surrogate pair at %s", state, p);
725
- break;
718
+ case 'u': {
719
+ uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
720
+ pe += 3;
721
+ /* To handle values above U+FFFF, we take a sequence of
722
+ * \uXXXX escapes in the U+D800..U+DBFF then
723
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
724
+ * to make a 20-bit number, then add 0x10000 to get the
725
+ * final codepoint.
726
+ *
727
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
728
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
729
+ * Area".
730
+ */
731
+ if ((ch & 0xFC00) == 0xD800) {
732
+ pe++;
733
+ if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
734
+ uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
735
+
736
+ if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
737
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
726
738
  }
739
+
740
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
741
+ pe += 5;
742
+ } else {
743
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
744
+ break;
727
745
  }
728
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
729
- unescape = buf;
730
746
  }
747
+
748
+ int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
749
+ buffer += unescape_len;
750
+ p = ++pe;
731
751
  break;
752
+ }
732
753
  default:
733
- p = pe;
734
- continue;
754
+ if ((unsigned char)*pe < 0x20) {
755
+ if (!config->allow_control_characters) {
756
+ if (*pe == '\n') {
757
+ raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
758
+ }
759
+ raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
760
+ }
761
+ }
762
+
763
+ if (config->allow_invalid_escape) {
764
+ APPEND_CHAR(*pe);
765
+ } else {
766
+ raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
767
+ }
768
+ break;
735
769
  }
736
- MEMCPY(buffer, unescape, char, unescape_len);
737
- buffer += unescape_len;
738
- p = ++pe;
739
770
  }
771
+ #undef APPEND_CHAR
740
772
 
741
773
  if (stringEnd > p) {
742
774
  MEMCPY(buffer, p, char, stringEnd - p);
@@ -754,20 +786,39 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
754
786
  }
755
787
 
756
788
  #define MAX_FAST_INTEGER_SIZE 18
789
+ #define MAX_NUMBER_STACK_BUFFER 128
790
+
791
+ typedef VALUE (*json_number_decode_func_t)(const char *ptr);
792
+
793
+ static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
794
+ {
795
+ if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
796
+ char buffer[MAX_NUMBER_STACK_BUFFER];
797
+ MEMCPY(buffer, start, char, len);
798
+ buffer[len] = '\0';
799
+ return func(buffer);
800
+ } else {
801
+ VALUE buffer_v = rb_str_tmp_new(len);
802
+ char *buffer = RSTRING_PTR(buffer_v);
803
+ MEMCPY(buffer, start, char, len);
804
+ buffer[len] = '\0';
805
+ VALUE number = func(buffer);
806
+ RB_GC_GUARD(buffer_v);
807
+ return number;
808
+ }
809
+ }
757
810
 
758
- static VALUE json_decode_large_integer(const char *start, long len)
811
+ static VALUE json_decode_inum(const char *buffer)
759
812
  {
760
- VALUE buffer_v;
761
- char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
762
- MEMCPY(buffer, start, char, len);
763
- buffer[len] = '\0';
764
- VALUE number = rb_cstr2inum(buffer, 10);
765
- RB_ALLOCV_END(buffer_v);
766
- return number;
813
+ return rb_cstr2inum(buffer, 10);
767
814
  }
768
815
 
769
- static inline VALUE
770
- json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
816
+ NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
817
+ {
818
+ return json_decode_large_number(start, len, json_decode_inum);
819
+ }
820
+
821
+ static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
771
822
  {
772
823
  if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
773
824
  if (negative) {
@@ -779,22 +830,14 @@ json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const
779
830
  return json_decode_large_integer(start, end - start);
780
831
  }
781
832
 
782
- static VALUE json_decode_large_float(const char *start, long len)
833
+ static VALUE json_decode_dnum(const char *buffer)
783
834
  {
784
- if (RB_LIKELY(len < 64)) {
785
- char buffer[64];
786
- MEMCPY(buffer, start, char, len);
787
- buffer[len] = '\0';
788
- return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
789
- }
835
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
836
+ }
790
837
 
791
- VALUE buffer_v;
792
- char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
793
- MEMCPY(buffer, start, char, len);
794
- buffer[len] = '\0';
795
- VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
796
- RB_ALLOCV_END(buffer_v);
797
- return number;
838
+ NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
839
+ {
840
+ return json_decode_large_number(start, len, json_decode_dnum);
798
841
  }
799
842
 
800
843
  /* Ruby JSON optimized float decoder using vendored Ryu algorithm
@@ -846,7 +889,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
846
889
  return Qfalse;
847
890
  }
848
891
 
849
- static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
892
+ NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
850
893
  {
851
894
  VALUE message = rb_sprintf(
852
895
  "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
@@ -857,16 +900,18 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
857
900
  RB_GC_GUARD(message);
858
901
  }
859
902
 
860
- #ifdef RBIMPL_ATTR_NORETURN
861
- RBIMPL_ATTR_NORETURN()
862
- #endif
863
- static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
903
+ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
864
904
  {
865
905
  VALUE message = rb_sprintf(
866
906
  "duplicate key %"PRIsVALUE,
867
907
  rb_inspect(duplicate_key)
868
908
  );
869
909
 
910
+ long line, column;
911
+ cursor_position(state, &line, &column);
912
+ rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
913
+ rb_exc_raise(parse_error_new(message, line, column));
914
+
870
915
  raise_parse_error(RSTRING_PTR(message), state);
871
916
  RB_GC_GUARD(message);
872
917
  }
@@ -900,20 +945,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
900
945
  return object;
901
946
  }
902
947
 
903
- static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
904
- {
905
- VALUE string;
906
- bool intern = is_name || config->freeze;
907
- bool symbolize = is_name && config->symbolize_names;
908
- if (escaped) {
909
- string = json_string_unescape(state, start, end, is_name, intern, symbolize);
910
- } else {
911
- string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
912
- }
913
-
914
- return string;
915
- }
916
-
917
948
  static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
918
949
  {
919
950
  if (RB_UNLIKELY(config->on_load_proc)) {
@@ -940,7 +971,7 @@ static const bool string_scan_table[256] = {
940
971
  static SIMD_Implementation simd_impl = SIMD_NONE;
941
972
  #endif /* HAVE_SIMD */
942
973
 
943
- static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
974
+ ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
944
975
  {
945
976
  #ifdef HAVE_SIMD
946
977
  #if defined(HAVE_SIMD_NEON)
@@ -948,7 +979,7 @@ static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
948
979
  uint64_t mask = 0;
949
980
  if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
950
981
  state->cursor += trailing_zeros64(mask) >> 2;
951
- return 1;
982
+ return true;
952
983
  }
953
984
 
954
985
  #elif defined(HAVE_SIMD_SSE2)
@@ -956,7 +987,7 @@ static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
956
987
  int mask = 0;
957
988
  if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
958
989
  state->cursor += trailing_zeros(mask);
959
- return 1;
990
+ return true;
960
991
  }
961
992
  }
962
993
  #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
@@ -964,46 +995,70 @@ static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
964
995
 
965
996
  while (!eos(state)) {
966
997
  if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
967
- return 1;
998
+ return true;
968
999
  }
969
1000
  state->cursor++;
970
1001
  }
971
- return 0;
1002
+ return false;
972
1003
  }
973
1004
 
974
- static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
1005
+ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
975
1006
  {
976
- state->cursor++;
977
- const char *start = state->cursor;
978
- bool escaped = false;
1007
+ const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
1008
+ JSON_UnescapePositions positions = {
1009
+ .size = 0,
1010
+ .positions = backslashes,
1011
+ .additional_backslashes = 0,
1012
+ };
979
1013
 
980
- while (RB_UNLIKELY(string_scan(state))) {
1014
+ do {
981
1015
  switch (*state->cursor) {
982
1016
  case '"': {
983
- VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
1017
+ VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
984
1018
  state->cursor++;
985
1019
  return json_push_value(state, config, string);
986
1020
  }
987
1021
  case '\\': {
988
- state->cursor++;
989
- escaped = true;
990
- if ((unsigned char)*state->cursor < 0x20) {
991
- raise_parse_error("invalid ASCII control character in string: %s", state);
1022
+ if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
1023
+ backslashes[positions.size] = state->cursor;
1024
+ positions.size++;
1025
+ } else {
1026
+ positions.additional_backslashes++;
992
1027
  }
1028
+ state->cursor++;
993
1029
  break;
994
1030
  }
995
1031
  default:
996
- raise_parse_error("invalid ASCII control character in string: %s", state);
1032
+ if (!config->allow_control_characters) {
1033
+ raise_parse_error("invalid ASCII control character in string: %s", state);
1034
+ }
997
1035
  break;
998
1036
  }
999
1037
 
1000
1038
  state->cursor++;
1001
- }
1039
+ } while (string_scan(state));
1002
1040
 
1003
1041
  raise_parse_error("unexpected end of input, expected closing \"", state);
1004
1042
  return Qfalse;
1005
1043
  }
1006
1044
 
1045
+ ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
1046
+ {
1047
+ state->cursor++;
1048
+ const char *start = state->cursor;
1049
+
1050
+ if (RB_UNLIKELY(!string_scan(state))) {
1051
+ raise_parse_error("unexpected end of input, expected closing \"", state);
1052
+ }
1053
+
1054
+ if (RB_LIKELY(*state->cursor == '"')) {
1055
+ VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
1056
+ state->cursor++;
1057
+ return json_push_value(state, config, string);
1058
+ }
1059
+ return json_parse_escaped_string(state, config, is_name, start);
1060
+ }
1061
+
1007
1062
  #if JSON_CPU_LITTLE_ENDIAN_64BITS
1008
1063
  // From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
1009
1064
  // Additional References:
@@ -1397,14 +1452,16 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1397
1452
  {
1398
1453
  JSON_ParserConfig *config = (JSON_ParserConfig *)data;
1399
1454
 
1400
- if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1401
- else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1402
- else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1403
- else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1404
- else if (key == sym_freeze) { config->freeze = RTEST(val); }
1405
- else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1406
- else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1407
- else if (key == sym_decimal_class) {
1455
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1456
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1457
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1458
+ else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
1459
+ else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
1460
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1461
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
1462
+ else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1463
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1464
+ else if (key == sym_decimal_class) {
1408
1465
  if (RTEST(val)) {
1409
1466
  if (rb_respond_to(val, i_try_convert)) {
1410
1467
  config->decimal_class = val;
@@ -1477,6 +1534,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
1477
1534
  */
1478
1535
  static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
1479
1536
  {
1537
+ rb_check_frozen(self);
1480
1538
  GET_PARSER_CONFIG;
1481
1539
 
1482
1540
  parser_config_init(config, opts);
@@ -1572,7 +1630,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
1572
1630
  JSON_ParserConfig_memsize,
1573
1631
  },
1574
1632
  0, 0,
1575
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
1633
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
1576
1634
  };
1577
1635
 
1578
1636
  static VALUE cJSON_parser_s_allocate(VALUE klass)
@@ -1616,16 +1674,14 @@ void Init_parser(void)
1616
1674
  sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1617
1675
  sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1618
1676
  sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1677
+ sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
1678
+ sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
1619
1679
  sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1620
1680
  sym_freeze = ID2SYM(rb_intern("freeze"));
1621
1681
  sym_on_load = ID2SYM(rb_intern("on_load"));
1622
1682
  sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1623
1683
  sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
1624
1684
 
1625
- i_chr = rb_intern("chr");
1626
- i_aset = rb_intern("[]=");
1627
- i_aref = rb_intern("[]");
1628
- i_leftshift = rb_intern("<<");
1629
1685
  i_new = rb_intern("new");
1630
1686
  i_try_convert = rb_intern("try_convert");
1631
1687
  i_uminus = rb_intern("-@");