json 2.18.0 → 2.19.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,8 +7,9 @@ static VALUE CNaN, CInfinity, CMinusInfinity;
7
7
 
8
8
  static ID i_new, i_try_convert, i_uminus, i_encode;
9
9
 
10
- static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze,
11
- sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
10
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
11
+ sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
12
+ sym_allow_duplicate_key;
12
13
 
13
14
  static int binary_encindex;
14
15
  static int utf8_encindex;
@@ -240,17 +241,27 @@ static void rvalue_stack_mark(void *ptr)
240
241
  {
241
242
  rvalue_stack *stack = (rvalue_stack *)ptr;
242
243
  long index;
243
- for (index = 0; index < stack->head; index++) {
244
- rb_gc_mark(stack->ptr[index]);
244
+ if (stack && stack->ptr) {
245
+ for (index = 0; index < stack->head; index++) {
246
+ rb_gc_mark(stack->ptr[index]);
247
+ }
245
248
  }
246
249
  }
247
250
 
251
+ static void rvalue_stack_free_buffer(rvalue_stack *stack)
252
+ {
253
+ ruby_xfree(stack->ptr);
254
+ stack->ptr = NULL;
255
+ }
256
+
248
257
  static void rvalue_stack_free(void *ptr)
249
258
  {
250
259
  rvalue_stack *stack = (rvalue_stack *)ptr;
251
260
  if (stack) {
252
- ruby_xfree(stack->ptr);
261
+ rvalue_stack_free_buffer(stack);
262
+ #ifndef HAVE_RUBY_TYPED_EMBEDDABLE
253
263
  ruby_xfree(stack);
264
+ #endif
254
265
  }
255
266
  }
256
267
 
@@ -261,14 +272,13 @@ static size_t rvalue_stack_memsize(const void *ptr)
261
272
  }
262
273
 
263
274
  static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
264
- "JSON::Ext::Parser/rvalue_stack",
265
- {
275
+ .wrap_struct_name = "JSON::Ext::Parser/rvalue_stack",
276
+ .function = {
266
277
  .dmark = rvalue_stack_mark,
267
278
  .dfree = rvalue_stack_free,
268
279
  .dsize = rvalue_stack_memsize,
269
280
  },
270
- 0, 0,
271
- RUBY_TYPED_FREE_IMMEDIATELY,
281
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
272
282
  };
273
283
 
274
284
  static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
@@ -290,8 +300,12 @@ static void rvalue_stack_eagerly_release(VALUE handle)
290
300
  if (handle) {
291
301
  rvalue_stack *stack;
292
302
  TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
293
- RTYPEDDATA_DATA(handle) = NULL;
303
+ #ifdef HAVE_RUBY_TYPED_EMBEDDABLE
304
+ rvalue_stack_free_buffer(stack);
305
+ #else
294
306
  rvalue_stack_free(stack);
307
+ RTYPEDDATA_DATA(handle) = NULL;
308
+ #endif
295
309
  }
296
310
  }
297
311
 
@@ -336,12 +350,13 @@ typedef struct JSON_ParserStruct {
336
350
  bool allow_nan;
337
351
  bool allow_trailing_comma;
338
352
  bool allow_control_characters;
353
+ bool allow_invalid_escape;
339
354
  bool symbolize_names;
340
355
  bool freeze;
341
356
  } JSON_ParserConfig;
342
357
 
343
358
  typedef struct JSON_ParserStateStruct {
344
- VALUE stack_handle;
359
+ VALUE *stack_handle;
345
360
  const char *start;
346
361
  const char *cursor;
347
362
  const char *end;
@@ -400,14 +415,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
400
415
 
401
416
  #define PARSE_ERROR_FRAGMENT_LEN 32
402
417
 
403
- #ifdef RBIMPL_ATTR_NORETURN
404
- RBIMPL_ATTR_NORETURN()
405
- #endif
406
- static void raise_parse_error(const char *format, JSON_ParserState *state)
418
+ static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
407
419
  {
408
420
  unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
409
- long line, column;
410
- cursor_position(state, &line, &column);
411
421
 
412
422
  const char *ptr = "EOF";
413
423
  if (state->cursor && state->cursor < state->end) {
@@ -439,20 +449,28 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
439
449
  }
440
450
  }
441
451
 
442
- VALUE msg = rb_sprintf(format, ptr);
443
- VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
444
- RB_GC_GUARD(msg);
452
+ VALUE message = rb_enc_sprintf(enc_utf8, format, ptr);
453
+ rb_str_catf(message, " at line %ld column %ld", line, column);
454
+ return message;
455
+ }
445
456
 
457
+ static VALUE parse_error_new(VALUE message, long line, long column)
458
+ {
446
459
  VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
447
460
  rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
448
461
  rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
449
- rb_exc_raise(exc);
462
+ return exc;
450
463
  }
451
464
 
452
- #ifdef RBIMPL_ATTR_NORETURN
453
- RBIMPL_ATTR_NORETURN()
454
- #endif
455
- static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
465
+ NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
466
+ {
467
+ long line, column;
468
+ cursor_position(state, &line, &column);
469
+ VALUE message = build_parse_error_message(format, state, line, column);
470
+ rb_exc_raise(parse_error_new(message, line, column));
471
+ }
472
+
473
+ NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
456
474
  {
457
475
  state->cursor = at;
458
476
  raise_parse_error(format, state);
@@ -477,23 +495,24 @@ static const signed char digit_values[256] = {
477
495
  -1, -1, -1, -1, -1, -1, -1
478
496
  };
479
497
 
480
- static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
481
- {
482
- signed char b;
483
- uint32_t result = 0;
484
- b = digit_values[p[0]];
485
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
486
- result = (result << 4) | (unsigned char)b;
487
- b = digit_values[p[1]];
488
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
489
- result = (result << 4) | (unsigned char)b;
490
- b = digit_values[p[2]];
491
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
492
- result = (result << 4) | (unsigned char)b;
493
- b = digit_values[p[3]];
494
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
495
- result = (result << 4) | (unsigned char)b;
496
- return result;
498
+ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
499
+ {
500
+ if (RB_UNLIKELY(sp > spe - 4)) {
501
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
502
+ }
503
+
504
+ const unsigned char *p = (const unsigned char *)sp;
505
+
506
+ const signed char b0 = digit_values[p[0]];
507
+ const signed char b1 = digit_values[p[1]];
508
+ const signed char b2 = digit_values[p[2]];
509
+ const signed char b3 = digit_values[p[3]];
510
+
511
+ if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
512
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
513
+ }
514
+
515
+ return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
497
516
  }
498
517
 
499
518
  #define GET_PARSER_CONFIG \
@@ -643,7 +662,7 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserCon
643
662
  typedef struct _json_unescape_positions {
644
663
  long size;
645
664
  const char **positions;
646
- bool has_more;
665
+ unsigned long additional_backslashes;
647
666
  } JSON_UnescapePositions;
648
667
 
649
668
  static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
@@ -657,7 +676,8 @@ static inline const char *json_next_backslash(const char *pe, const char *string
657
676
  }
658
677
  }
659
678
 
660
- if (positions->has_more) {
679
+ if (positions->additional_backslashes) {
680
+ positions->additional_backslashes--;
661
681
  return memchr(pe, '\\', stringEnd - pe);
662
682
  }
663
683
 
@@ -707,50 +727,41 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
707
727
  case 'f':
708
728
  APPEND_CHAR('\f');
709
729
  break;
710
- case 'u':
711
- if (pe > stringEnd - 5) {
712
- raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
713
- } else {
714
- uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
715
- pe += 3;
716
- /* To handle values above U+FFFF, we take a sequence of
717
- * \uXXXX escapes in the U+D800..U+DBFF then
718
- * U+DC00..U+DFFF ranges, take the low 10 bits from each
719
- * to make a 20-bit number, then add 0x10000 to get the
720
- * final codepoint.
721
- *
722
- * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
723
- * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
724
- * Area".
725
- */
726
- if ((ch & 0xFC00) == 0xD800) {
727
- pe++;
728
- if (pe > stringEnd - 6) {
729
- raise_parse_error_at("incomplete surrogate pair at %s", state, p);
730
- }
731
- if (pe[0] == '\\' && pe[1] == 'u') {
732
- uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
733
-
734
- if ((sur & 0xFC00) != 0xDC00) {
735
- raise_parse_error_at("invalid surrogate pair at %s", state, p);
736
- }
737
-
738
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
739
- | (sur & 0x3FF));
740
- pe += 5;
741
- } else {
742
- raise_parse_error_at("incomplete surrogate pair at %s", state, p);
743
- break;
730
+ case 'u': {
731
+ uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
732
+ pe += 3;
733
+ /* To handle values above U+FFFF, we take a sequence of
734
+ * \uXXXX escapes in the U+D800..U+DBFF then
735
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
736
+ * to make a 20-bit number, then add 0x10000 to get the
737
+ * final codepoint.
738
+ *
739
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
740
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
741
+ * Area".
742
+ */
743
+ if ((ch & 0xFC00) == 0xD800) {
744
+ pe++;
745
+ if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
746
+ uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
747
+
748
+ if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
749
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
744
750
  }
745
- }
746
751
 
747
- char buf[4];
748
- int unescape_len = convert_UTF32_to_UTF8(buf, ch);
749
- MEMCPY(buffer, buf, char, unescape_len);
750
- buffer += unescape_len;
751
- p = ++pe;
752
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
753
+ pe += 5;
754
+ } else {
755
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
756
+ break;
757
+ }
752
758
  }
759
+
760
+ int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
761
+ buffer += unescape_len;
762
+ p = ++pe;
753
763
  break;
764
+ }
754
765
  default:
755
766
  if ((unsigned char)*pe < 0x20) {
756
767
  if (!config->allow_control_characters) {
@@ -759,6 +770,10 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
759
770
  }
760
771
  raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
761
772
  }
773
+ }
774
+
775
+ if (config->allow_invalid_escape) {
776
+ APPEND_CHAR(*pe);
762
777
  } else {
763
778
  raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
764
779
  }
@@ -783,20 +798,39 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
783
798
  }
784
799
 
785
800
  #define MAX_FAST_INTEGER_SIZE 18
801
+ #define MAX_NUMBER_STACK_BUFFER 128
802
+
803
+ typedef VALUE (*json_number_decode_func_t)(const char *ptr);
786
804
 
787
- static VALUE json_decode_large_integer(const char *start, long len)
805
+ static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
788
806
  {
789
- VALUE buffer_v;
790
- char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
791
- MEMCPY(buffer, start, char, len);
792
- buffer[len] = '\0';
793
- VALUE number = rb_cstr2inum(buffer, 10);
794
- RB_ALLOCV_END(buffer_v);
795
- return number;
807
+ if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
808
+ char buffer[MAX_NUMBER_STACK_BUFFER];
809
+ MEMCPY(buffer, start, char, len);
810
+ buffer[len] = '\0';
811
+ return func(buffer);
812
+ } else {
813
+ VALUE buffer_v = rb_str_tmp_new(len);
814
+ char *buffer = RSTRING_PTR(buffer_v);
815
+ MEMCPY(buffer, start, char, len);
816
+ buffer[len] = '\0';
817
+ VALUE number = func(buffer);
818
+ RB_GC_GUARD(buffer_v);
819
+ return number;
820
+ }
796
821
  }
797
822
 
798
- static inline VALUE
799
- json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
823
+ static VALUE json_decode_inum(const char *buffer)
824
+ {
825
+ return rb_cstr2inum(buffer, 10);
826
+ }
827
+
828
+ NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
829
+ {
830
+ return json_decode_large_number(start, len, json_decode_inum);
831
+ }
832
+
833
+ static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
800
834
  {
801
835
  if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
802
836
  if (negative) {
@@ -808,28 +842,20 @@ json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const
808
842
  return json_decode_large_integer(start, end - start);
809
843
  }
810
844
 
811
- static VALUE json_decode_large_float(const char *start, long len)
845
+ static VALUE json_decode_dnum(const char *buffer)
812
846
  {
813
- if (RB_LIKELY(len < 64)) {
814
- char buffer[64];
815
- MEMCPY(buffer, start, char, len);
816
- buffer[len] = '\0';
817
- return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
818
- }
847
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
848
+ }
819
849
 
820
- VALUE buffer_v;
821
- char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
822
- MEMCPY(buffer, start, char, len);
823
- buffer[len] = '\0';
824
- VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
825
- RB_ALLOCV_END(buffer_v);
826
- return number;
850
+ NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
851
+ {
852
+ return json_decode_large_number(start, len, json_decode_dnum);
827
853
  }
828
854
 
829
855
  /* Ruby JSON optimized float decoder using vendored Ryu algorithm
830
856
  * Accepts pre-extracted mantissa and exponent from first-pass validation
831
857
  */
832
- static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
858
+ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative,
833
859
  const char *start, const char *end)
834
860
  {
835
861
  if (RB_UNLIKELY(config->decimal_class)) {
@@ -837,13 +863,21 @@ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantis
837
863
  return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
838
864
  }
839
865
 
866
+ if (RB_UNLIKELY(exponent > INT32_MAX)) {
867
+ return negative ? CMinusInfinity : CInfinity;
868
+ }
869
+
870
+ if (RB_UNLIKELY(exponent < INT32_MIN)) {
871
+ return rb_float_new(negative ? -0.0 : 0.0);
872
+ }
873
+
840
874
  // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
841
875
  // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
842
876
  if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
843
877
  return json_decode_large_float(start, end - start);
844
878
  }
845
879
 
846
- return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
880
+ return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative));
847
881
  }
848
882
 
849
883
  static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
@@ -875,7 +909,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
875
909
  return Qfalse;
876
910
  }
877
911
 
878
- static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
912
+ NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
879
913
  {
880
914
  VALUE message = rb_sprintf(
881
915
  "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
@@ -886,18 +920,17 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
886
920
  RB_GC_GUARD(message);
887
921
  }
888
922
 
889
- #ifdef RBIMPL_ATTR_NORETURN
890
- RBIMPL_ATTR_NORETURN()
891
- #endif
892
- static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
923
+ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
893
924
  {
894
925
  VALUE message = rb_sprintf(
895
926
  "duplicate key %"PRIsVALUE,
896
927
  rb_inspect(duplicate_key)
897
928
  );
898
929
 
899
- raise_parse_error(RSTRING_PTR(message), state);
900
- RB_GC_GUARD(message);
930
+ long line, column;
931
+ cursor_position(state, &line, &column);
932
+ rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
933
+ rb_exc_raise(parse_error_new(message, line, column));
901
934
  }
902
935
 
903
936
  static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
@@ -934,7 +967,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
934
967
  if (RB_UNLIKELY(config->on_load_proc)) {
935
968
  value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
936
969
  }
937
- rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack);
970
+ rvalue_stack_push(state->stack, value, state->stack_handle, &state->stack);
938
971
  return value;
939
972
  }
940
973
 
@@ -992,7 +1025,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
992
1025
  JSON_UnescapePositions positions = {
993
1026
  .size = 0,
994
1027
  .positions = backslashes,
995
- .has_more = false,
1028
+ .additional_backslashes = 0,
996
1029
  };
997
1030
 
998
1031
  do {
@@ -1007,7 +1040,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
1007
1040
  backslashes[positions.size] = state->cursor;
1008
1041
  positions.size++;
1009
1042
  } else {
1010
- positions.has_more = true;
1043
+ positions.additional_backslashes++;
1011
1044
  }
1012
1045
  state->cursor++;
1013
1046
  break;
@@ -1119,7 +1152,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
1119
1152
  const char first_digit = *state->cursor;
1120
1153
 
1121
1154
  // Variables for Ryu optimization - extract digits during parsing
1122
- int32_t exponent = 0;
1155
+ int64_t exponent = 0;
1123
1156
  int decimal_point_pos = -1;
1124
1157
  uint64_t mantissa = 0;
1125
1158
 
@@ -1163,7 +1196,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
1163
1196
  raise_parse_error_at("invalid number: %s", state, start);
1164
1197
  }
1165
1198
 
1166
- exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
1199
+ exponent = negative_exponent ? -abs_exponent : abs_exponent;
1167
1200
  }
1168
1201
 
1169
1202
  if (integer) {
@@ -1440,6 +1473,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1440
1473
  else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1441
1474
  else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1442
1475
  else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
1476
+ else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
1443
1477
  else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1444
1478
  else if (key == sym_freeze) { config->freeze = RTEST(val); }
1445
1479
  else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
@@ -1543,11 +1577,13 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1543
1577
  const char *start;
1544
1578
  RSTRING_GETMEM(Vsource, start, len);
1545
1579
 
1580
+ VALUE stack_handle = 0;
1546
1581
  JSON_ParserState _state = {
1547
1582
  .start = start,
1548
1583
  .cursor = start,
1549
1584
  .end = start + len,
1550
1585
  .stack = &stack,
1586
+ .stack_handle = &stack_handle,
1551
1587
  };
1552
1588
  JSON_ParserState *state = &_state;
1553
1589
 
@@ -1555,8 +1591,8 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1555
1591
 
1556
1592
  // This may be skipped in case of exception, but
1557
1593
  // it won't cause a leak.
1558
- rvalue_stack_eagerly_release(state->stack_handle);
1559
-
1594
+ rvalue_stack_eagerly_release(stack_handle);
1595
+ RB_GC_GUARD(stack_handle);
1560
1596
  json_ensure_eof(state);
1561
1597
 
1562
1598
  return result;
@@ -1594,26 +1630,19 @@ static void JSON_ParserConfig_mark(void *ptr)
1594
1630
  rb_gc_mark(config->decimal_class);
1595
1631
  }
1596
1632
 
1597
- static void JSON_ParserConfig_free(void *ptr)
1598
- {
1599
- JSON_ParserConfig *config = ptr;
1600
- ruby_xfree(config);
1601
- }
1602
-
1603
1633
  static size_t JSON_ParserConfig_memsize(const void *ptr)
1604
1634
  {
1605
1635
  return sizeof(JSON_ParserConfig);
1606
1636
  }
1607
1637
 
1608
1638
  static const rb_data_type_t JSON_ParserConfig_type = {
1609
- "JSON::Ext::Parser/ParserConfig",
1610
- {
1639
+ .wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
1640
+ .function = {
1611
1641
  JSON_ParserConfig_mark,
1612
- JSON_ParserConfig_free,
1642
+ RUBY_DEFAULT_FREE,
1613
1643
  JSON_ParserConfig_memsize,
1614
1644
  },
1615
- 0, 0,
1616
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
1645
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
1617
1646
  };
1618
1647
 
1619
1648
  static VALUE cJSON_parser_s_allocate(VALUE klass)
@@ -1658,6 +1687,7 @@ void Init_parser(void)
1658
1687
  sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1659
1688
  sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1660
1689
  sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
1690
+ sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
1661
1691
  sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1662
1692
  sym_freeze = ID2SYM(rb_intern("freeze"));
1663
1693
  sym_on_load = ID2SYM(rb_intern("on_load"));
@@ -58,7 +58,34 @@ static inline int trailing_zeros(int input)
58
58
 
59
59
  #ifdef JSON_ENABLE_SIMD
60
60
 
61
- #define SIMD_MINIMUM_THRESHOLD 6
61
+ #define SIMD_MINIMUM_THRESHOLD 4
62
+
63
+ ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len)
64
+ {
65
+ RBIMPL_ASSERT_OR_ASSUME(len < 16);
66
+ RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4
67
+ #if defined(__has_builtin) && __has_builtin(__builtin_memcpy)
68
+ // If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes.
69
+ // These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy
70
+ // the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct
71
+ // position in both copies.
72
+
73
+ // Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the
74
+ // generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)),
75
+ // when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional
76
+ // select instruction instead of direct loads and stores with a branch. This ends up slower than the branch
77
+ // plus two loads and stores generated when using __builtin_memcpy.
78
+ if (len >= 8) {
79
+ __builtin_memcpy(dst, src, 8);
80
+ __builtin_memcpy(dst + len - 8, src + len - 8, 8);
81
+ } else {
82
+ __builtin_memcpy(dst, src, 4);
83
+ __builtin_memcpy(dst + len - 4, src + len - 4, 4);
84
+ }
85
+ #else
86
+ MEMCPY(dst, src, char, len);
87
+ #endif
88
+ }
62
89
 
63
90
  #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
64
91
  #include <arm_neon.h>
@@ -106,16 +133,6 @@ ALWAYS_INLINE(static) int string_scan_simd_neon(const char **ptr, const char *en
106
133
  return 0;
107
134
  }
108
135
 
109
- static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table)
110
- {
111
- uint8x16x4_t tab;
112
- tab.val[0] = vld1q_u8(table);
113
- tab.val[1] = vld1q_u8(table+16);
114
- tab.val[2] = vld1q_u8(table+32);
115
- tab.val[3] = vld1q_u8(table+48);
116
- return tab;
117
- }
118
-
119
136
  #endif /* ARM Neon Support.*/
120
137
 
121
138
  #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
@@ -449,7 +449,7 @@ static int filter_special(double fp, char* dest)
449
449
  * }
450
450
  *
451
451
  */
452
- static int fpconv_dtoa(double d, char dest[28])
452
+ static int fpconv_dtoa(double d, char dest[32])
453
453
  {
454
454
  char digits[18];
455
455