json 2.18.0 → 2.19.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +35 -1
- data/ext/json/ext/fbuffer/fbuffer.h +23 -19
- data/ext/json/ext/generator/extconf.rb +2 -0
- data/ext/json/ext/generator/generator.c +101 -325
- data/ext/json/ext/json.h +19 -0
- data/ext/json/ext/parser/extconf.rb +4 -0
- data/ext/json/ext/parser/parser.c +165 -135
- data/ext/json/ext/simd/simd.h +28 -11
- data/ext/json/ext/vendor/fpconv.c +1 -1
- data/lib/json/common.rb +41 -10
- data/lib/json/ext/generator/state.rb +1 -1
- data/lib/json/truffle_ruby/generator.rb +21 -9
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +35 -2
- metadata +2 -2
|
@@ -7,8 +7,9 @@ static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
|
7
7
|
|
|
8
8
|
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
9
9
|
|
|
10
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
|
|
11
|
-
sym_decimal_class, sym_on_load,
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
|
|
11
|
+
sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
|
|
12
|
+
sym_allow_duplicate_key;
|
|
12
13
|
|
|
13
14
|
static int binary_encindex;
|
|
14
15
|
static int utf8_encindex;
|
|
@@ -240,17 +241,27 @@ static void rvalue_stack_mark(void *ptr)
|
|
|
240
241
|
{
|
|
241
242
|
rvalue_stack *stack = (rvalue_stack *)ptr;
|
|
242
243
|
long index;
|
|
243
|
-
|
|
244
|
-
|
|
244
|
+
if (stack && stack->ptr) {
|
|
245
|
+
for (index = 0; index < stack->head; index++) {
|
|
246
|
+
rb_gc_mark(stack->ptr[index]);
|
|
247
|
+
}
|
|
245
248
|
}
|
|
246
249
|
}
|
|
247
250
|
|
|
251
|
+
static void rvalue_stack_free_buffer(rvalue_stack *stack)
|
|
252
|
+
{
|
|
253
|
+
ruby_xfree(stack->ptr);
|
|
254
|
+
stack->ptr = NULL;
|
|
255
|
+
}
|
|
256
|
+
|
|
248
257
|
static void rvalue_stack_free(void *ptr)
|
|
249
258
|
{
|
|
250
259
|
rvalue_stack *stack = (rvalue_stack *)ptr;
|
|
251
260
|
if (stack) {
|
|
252
|
-
|
|
261
|
+
rvalue_stack_free_buffer(stack);
|
|
262
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
253
263
|
ruby_xfree(stack);
|
|
264
|
+
#endif
|
|
254
265
|
}
|
|
255
266
|
}
|
|
256
267
|
|
|
@@ -261,14 +272,13 @@ static size_t rvalue_stack_memsize(const void *ptr)
|
|
|
261
272
|
}
|
|
262
273
|
|
|
263
274
|
static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
|
|
264
|
-
"JSON::Ext::Parser/rvalue_stack",
|
|
265
|
-
{
|
|
275
|
+
.wrap_struct_name = "JSON::Ext::Parser/rvalue_stack",
|
|
276
|
+
.function = {
|
|
266
277
|
.dmark = rvalue_stack_mark,
|
|
267
278
|
.dfree = rvalue_stack_free,
|
|
268
279
|
.dsize = rvalue_stack_memsize,
|
|
269
280
|
},
|
|
270
|
-
|
|
271
|
-
RUBY_TYPED_FREE_IMMEDIATELY,
|
|
281
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
|
|
272
282
|
};
|
|
273
283
|
|
|
274
284
|
static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
|
|
@@ -290,8 +300,12 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
290
300
|
if (handle) {
|
|
291
301
|
rvalue_stack *stack;
|
|
292
302
|
TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
|
|
293
|
-
|
|
303
|
+
#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
304
|
+
rvalue_stack_free_buffer(stack);
|
|
305
|
+
#else
|
|
294
306
|
rvalue_stack_free(stack);
|
|
307
|
+
RTYPEDDATA_DATA(handle) = NULL;
|
|
308
|
+
#endif
|
|
295
309
|
}
|
|
296
310
|
}
|
|
297
311
|
|
|
@@ -336,12 +350,13 @@ typedef struct JSON_ParserStruct {
|
|
|
336
350
|
bool allow_nan;
|
|
337
351
|
bool allow_trailing_comma;
|
|
338
352
|
bool allow_control_characters;
|
|
353
|
+
bool allow_invalid_escape;
|
|
339
354
|
bool symbolize_names;
|
|
340
355
|
bool freeze;
|
|
341
356
|
} JSON_ParserConfig;
|
|
342
357
|
|
|
343
358
|
typedef struct JSON_ParserStateStruct {
|
|
344
|
-
VALUE stack_handle;
|
|
359
|
+
VALUE *stack_handle;
|
|
345
360
|
const char *start;
|
|
346
361
|
const char *cursor;
|
|
347
362
|
const char *end;
|
|
@@ -400,14 +415,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
400
415
|
|
|
401
416
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
402
417
|
|
|
403
|
-
|
|
404
|
-
RBIMPL_ATTR_NORETURN()
|
|
405
|
-
#endif
|
|
406
|
-
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
418
|
+
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
|
|
407
419
|
{
|
|
408
420
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
409
|
-
long line, column;
|
|
410
|
-
cursor_position(state, &line, &column);
|
|
411
421
|
|
|
412
422
|
const char *ptr = "EOF";
|
|
413
423
|
if (state->cursor && state->cursor < state->end) {
|
|
@@ -439,20 +449,28 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
|
439
449
|
}
|
|
440
450
|
}
|
|
441
451
|
|
|
442
|
-
VALUE
|
|
443
|
-
|
|
444
|
-
|
|
452
|
+
VALUE message = rb_enc_sprintf(enc_utf8, format, ptr);
|
|
453
|
+
rb_str_catf(message, " at line %ld column %ld", line, column);
|
|
454
|
+
return message;
|
|
455
|
+
}
|
|
445
456
|
|
|
457
|
+
static VALUE parse_error_new(VALUE message, long line, long column)
|
|
458
|
+
{
|
|
446
459
|
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
|
447
460
|
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
|
448
461
|
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
|
449
|
-
|
|
462
|
+
return exc;
|
|
450
463
|
}
|
|
451
464
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
465
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
466
|
+
{
|
|
467
|
+
long line, column;
|
|
468
|
+
cursor_position(state, &line, &column);
|
|
469
|
+
VALUE message = build_parse_error_message(format, state, line, column);
|
|
470
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
456
474
|
{
|
|
457
475
|
state->cursor = at;
|
|
458
476
|
raise_parse_error(format, state);
|
|
@@ -477,23 +495,24 @@ static const signed char digit_values[256] = {
|
|
|
477
495
|
-1, -1, -1, -1, -1, -1, -1
|
|
478
496
|
};
|
|
479
497
|
|
|
480
|
-
static uint32_t unescape_unicode(JSON_ParserState *state, const
|
|
481
|
-
{
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
498
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
499
|
+
{
|
|
500
|
+
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
501
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
const unsigned char *p = (const unsigned char *)sp;
|
|
505
|
+
|
|
506
|
+
const signed char b0 = digit_values[p[0]];
|
|
507
|
+
const signed char b1 = digit_values[p[1]];
|
|
508
|
+
const signed char b2 = digit_values[p[2]];
|
|
509
|
+
const signed char b3 = digit_values[p[3]];
|
|
510
|
+
|
|
511
|
+
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
512
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
497
516
|
}
|
|
498
517
|
|
|
499
518
|
#define GET_PARSER_CONFIG \
|
|
@@ -643,7 +662,7 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserCon
|
|
|
643
662
|
typedef struct _json_unescape_positions {
|
|
644
663
|
long size;
|
|
645
664
|
const char **positions;
|
|
646
|
-
|
|
665
|
+
unsigned long additional_backslashes;
|
|
647
666
|
} JSON_UnescapePositions;
|
|
648
667
|
|
|
649
668
|
static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
|
|
@@ -657,7 +676,8 @@ static inline const char *json_next_backslash(const char *pe, const char *string
|
|
|
657
676
|
}
|
|
658
677
|
}
|
|
659
678
|
|
|
660
|
-
if (positions->
|
|
679
|
+
if (positions->additional_backslashes) {
|
|
680
|
+
positions->additional_backslashes--;
|
|
661
681
|
return memchr(pe, '\\', stringEnd - pe);
|
|
662
682
|
}
|
|
663
683
|
|
|
@@ -707,50 +727,41 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
|
|
|
707
727
|
case 'f':
|
|
708
728
|
APPEND_CHAR('\f');
|
|
709
729
|
break;
|
|
710
|
-
case 'u':
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
if (
|
|
729
|
-
raise_parse_error_at("
|
|
730
|
-
}
|
|
731
|
-
if (pe[0] == '\\' && pe[1] == 'u') {
|
|
732
|
-
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
|
733
|
-
|
|
734
|
-
if ((sur & 0xFC00) != 0xDC00) {
|
|
735
|
-
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
736
|
-
}
|
|
737
|
-
|
|
738
|
-
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
|
739
|
-
| (sur & 0x3FF));
|
|
740
|
-
pe += 5;
|
|
741
|
-
} else {
|
|
742
|
-
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
743
|
-
break;
|
|
730
|
+
case 'u': {
|
|
731
|
+
uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
|
|
732
|
+
pe += 3;
|
|
733
|
+
/* To handle values above U+FFFF, we take a sequence of
|
|
734
|
+
* \uXXXX escapes in the U+D800..U+DBFF then
|
|
735
|
+
* U+DC00..U+DFFF ranges, take the low 10 bits from each
|
|
736
|
+
* to make a 20-bit number, then add 0x10000 to get the
|
|
737
|
+
* final codepoint.
|
|
738
|
+
*
|
|
739
|
+
* See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
|
|
740
|
+
* Surrogate Pairs in UTF-16", and 23.6 "Surrogates
|
|
741
|
+
* Area".
|
|
742
|
+
*/
|
|
743
|
+
if ((ch & 0xFC00) == 0xD800) {
|
|
744
|
+
pe++;
|
|
745
|
+
if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
|
|
746
|
+
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
747
|
+
|
|
748
|
+
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
749
|
+
raise_parse_error_at("invalid surrogate pair at %s", state, p);
|
|
744
750
|
}
|
|
745
|
-
}
|
|
746
751
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
+
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
753
|
+
pe += 5;
|
|
754
|
+
} else {
|
|
755
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
|
756
|
+
break;
|
|
757
|
+
}
|
|
752
758
|
}
|
|
759
|
+
|
|
760
|
+
int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
|
|
761
|
+
buffer += unescape_len;
|
|
762
|
+
p = ++pe;
|
|
753
763
|
break;
|
|
764
|
+
}
|
|
754
765
|
default:
|
|
755
766
|
if ((unsigned char)*pe < 0x20) {
|
|
756
767
|
if (!config->allow_control_characters) {
|
|
@@ -759,6 +770,10 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
|
|
|
759
770
|
}
|
|
760
771
|
raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
761
772
|
}
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
if (config->allow_invalid_escape) {
|
|
776
|
+
APPEND_CHAR(*pe);
|
|
762
777
|
} else {
|
|
763
778
|
raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
764
779
|
}
|
|
@@ -783,20 +798,39 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
|
|
|
783
798
|
}
|
|
784
799
|
|
|
785
800
|
#define MAX_FAST_INTEGER_SIZE 18
|
|
801
|
+
#define MAX_NUMBER_STACK_BUFFER 128
|
|
802
|
+
|
|
803
|
+
typedef VALUE (*json_number_decode_func_t)(const char *ptr);
|
|
786
804
|
|
|
787
|
-
static VALUE
|
|
805
|
+
static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
|
|
788
806
|
{
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
807
|
+
if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
|
|
808
|
+
char buffer[MAX_NUMBER_STACK_BUFFER];
|
|
809
|
+
MEMCPY(buffer, start, char, len);
|
|
810
|
+
buffer[len] = '\0';
|
|
811
|
+
return func(buffer);
|
|
812
|
+
} else {
|
|
813
|
+
VALUE buffer_v = rb_str_tmp_new(len);
|
|
814
|
+
char *buffer = RSTRING_PTR(buffer_v);
|
|
815
|
+
MEMCPY(buffer, start, char, len);
|
|
816
|
+
buffer[len] = '\0';
|
|
817
|
+
VALUE number = func(buffer);
|
|
818
|
+
RB_GC_GUARD(buffer_v);
|
|
819
|
+
return number;
|
|
820
|
+
}
|
|
796
821
|
}
|
|
797
822
|
|
|
798
|
-
static
|
|
799
|
-
|
|
823
|
+
static VALUE json_decode_inum(const char *buffer)
|
|
824
|
+
{
|
|
825
|
+
return rb_cstr2inum(buffer, 10);
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
|
|
829
|
+
{
|
|
830
|
+
return json_decode_large_number(start, len, json_decode_inum);
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
|
|
800
834
|
{
|
|
801
835
|
if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
|
|
802
836
|
if (negative) {
|
|
@@ -808,28 +842,20 @@ json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const
|
|
|
808
842
|
return json_decode_large_integer(start, end - start);
|
|
809
843
|
}
|
|
810
844
|
|
|
811
|
-
static VALUE
|
|
845
|
+
static VALUE json_decode_dnum(const char *buffer)
|
|
812
846
|
{
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
MEMCPY(buffer, start, char, len);
|
|
816
|
-
buffer[len] = '\0';
|
|
817
|
-
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
818
|
-
}
|
|
847
|
+
return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
848
|
+
}
|
|
819
849
|
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
buffer[len] = '\0';
|
|
824
|
-
VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
|
|
825
|
-
RB_ALLOCV_END(buffer_v);
|
|
826
|
-
return number;
|
|
850
|
+
NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
|
|
851
|
+
{
|
|
852
|
+
return json_decode_large_number(start, len, json_decode_dnum);
|
|
827
853
|
}
|
|
828
854
|
|
|
829
855
|
/* Ruby JSON optimized float decoder using vendored Ryu algorithm
|
|
830
856
|
* Accepts pre-extracted mantissa and exponent from first-pass validation
|
|
831
857
|
*/
|
|
832
|
-
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits,
|
|
858
|
+
static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative,
|
|
833
859
|
const char *start, const char *end)
|
|
834
860
|
{
|
|
835
861
|
if (RB_UNLIKELY(config->decimal_class)) {
|
|
@@ -837,13 +863,21 @@ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantis
|
|
|
837
863
|
return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
|
|
838
864
|
}
|
|
839
865
|
|
|
866
|
+
if (RB_UNLIKELY(exponent > INT32_MAX)) {
|
|
867
|
+
return negative ? CMinusInfinity : CInfinity;
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
if (RB_UNLIKELY(exponent < INT32_MIN)) {
|
|
871
|
+
return rb_float_new(negative ? -0.0 : 0.0);
|
|
872
|
+
}
|
|
873
|
+
|
|
840
874
|
// Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
|
|
841
875
|
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
842
876
|
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
843
877
|
return json_decode_large_float(start, end - start);
|
|
844
878
|
}
|
|
845
879
|
|
|
846
|
-
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
|
|
880
|
+
return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative));
|
|
847
881
|
}
|
|
848
882
|
|
|
849
883
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
@@ -875,7 +909,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
|
|
|
875
909
|
return Qfalse;
|
|
876
910
|
}
|
|
877
911
|
|
|
878
|
-
static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
912
|
+
NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
|
|
879
913
|
{
|
|
880
914
|
VALUE message = rb_sprintf(
|
|
881
915
|
"detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
|
|
@@ -886,18 +920,17 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
|
|
|
886
920
|
RB_GC_GUARD(message);
|
|
887
921
|
}
|
|
888
922
|
|
|
889
|
-
|
|
890
|
-
RBIMPL_ATTR_NORETURN()
|
|
891
|
-
#endif
|
|
892
|
-
static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
923
|
+
NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
|
|
893
924
|
{
|
|
894
925
|
VALUE message = rb_sprintf(
|
|
895
926
|
"duplicate key %"PRIsVALUE,
|
|
896
927
|
rb_inspect(duplicate_key)
|
|
897
928
|
);
|
|
898
929
|
|
|
899
|
-
|
|
900
|
-
|
|
930
|
+
long line, column;
|
|
931
|
+
cursor_position(state, &line, &column);
|
|
932
|
+
rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
|
|
933
|
+
rb_exc_raise(parse_error_new(message, line, column));
|
|
901
934
|
}
|
|
902
935
|
|
|
903
936
|
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
|
@@ -934,7 +967,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
|
|
|
934
967
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
935
968
|
value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
|
|
936
969
|
}
|
|
937
|
-
rvalue_stack_push(state->stack, value,
|
|
970
|
+
rvalue_stack_push(state->stack, value, state->stack_handle, &state->stack);
|
|
938
971
|
return value;
|
|
939
972
|
}
|
|
940
973
|
|
|
@@ -992,7 +1025,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
|
|
|
992
1025
|
JSON_UnescapePositions positions = {
|
|
993
1026
|
.size = 0,
|
|
994
1027
|
.positions = backslashes,
|
|
995
|
-
.
|
|
1028
|
+
.additional_backslashes = 0,
|
|
996
1029
|
};
|
|
997
1030
|
|
|
998
1031
|
do {
|
|
@@ -1007,7 +1040,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
|
|
|
1007
1040
|
backslashes[positions.size] = state->cursor;
|
|
1008
1041
|
positions.size++;
|
|
1009
1042
|
} else {
|
|
1010
|
-
positions.
|
|
1043
|
+
positions.additional_backslashes++;
|
|
1011
1044
|
}
|
|
1012
1045
|
state->cursor++;
|
|
1013
1046
|
break;
|
|
@@ -1119,7 +1152,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1119
1152
|
const char first_digit = *state->cursor;
|
|
1120
1153
|
|
|
1121
1154
|
// Variables for Ryu optimization - extract digits during parsing
|
|
1122
|
-
|
|
1155
|
+
int64_t exponent = 0;
|
|
1123
1156
|
int decimal_point_pos = -1;
|
|
1124
1157
|
uint64_t mantissa = 0;
|
|
1125
1158
|
|
|
@@ -1163,7 +1196,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1163
1196
|
raise_parse_error_at("invalid number: %s", state, start);
|
|
1164
1197
|
}
|
|
1165
1198
|
|
|
1166
|
-
exponent = negative_exponent ? -
|
|
1199
|
+
exponent = negative_exponent ? -abs_exponent : abs_exponent;
|
|
1167
1200
|
}
|
|
1168
1201
|
|
|
1169
1202
|
if (integer) {
|
|
@@ -1440,6 +1473,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1440
1473
|
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1441
1474
|
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1442
1475
|
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1476
|
+
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1443
1477
|
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
1444
1478
|
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
|
1445
1479
|
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
|
@@ -1543,11 +1577,13 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
|
|
|
1543
1577
|
const char *start;
|
|
1544
1578
|
RSTRING_GETMEM(Vsource, start, len);
|
|
1545
1579
|
|
|
1580
|
+
VALUE stack_handle = 0;
|
|
1546
1581
|
JSON_ParserState _state = {
|
|
1547
1582
|
.start = start,
|
|
1548
1583
|
.cursor = start,
|
|
1549
1584
|
.end = start + len,
|
|
1550
1585
|
.stack = &stack,
|
|
1586
|
+
.stack_handle = &stack_handle,
|
|
1551
1587
|
};
|
|
1552
1588
|
JSON_ParserState *state = &_state;
|
|
1553
1589
|
|
|
@@ -1555,8 +1591,8 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
|
|
|
1555
1591
|
|
|
1556
1592
|
// This may be skipped in case of exception, but
|
|
1557
1593
|
// it won't cause a leak.
|
|
1558
|
-
rvalue_stack_eagerly_release(
|
|
1559
|
-
|
|
1594
|
+
rvalue_stack_eagerly_release(stack_handle);
|
|
1595
|
+
RB_GC_GUARD(stack_handle);
|
|
1560
1596
|
json_ensure_eof(state);
|
|
1561
1597
|
|
|
1562
1598
|
return result;
|
|
@@ -1594,26 +1630,19 @@ static void JSON_ParserConfig_mark(void *ptr)
|
|
|
1594
1630
|
rb_gc_mark(config->decimal_class);
|
|
1595
1631
|
}
|
|
1596
1632
|
|
|
1597
|
-
static void JSON_ParserConfig_free(void *ptr)
|
|
1598
|
-
{
|
|
1599
|
-
JSON_ParserConfig *config = ptr;
|
|
1600
|
-
ruby_xfree(config);
|
|
1601
|
-
}
|
|
1602
|
-
|
|
1603
1633
|
static size_t JSON_ParserConfig_memsize(const void *ptr)
|
|
1604
1634
|
{
|
|
1605
1635
|
return sizeof(JSON_ParserConfig);
|
|
1606
1636
|
}
|
|
1607
1637
|
|
|
1608
1638
|
static const rb_data_type_t JSON_ParserConfig_type = {
|
|
1609
|
-
"JSON::Ext::Parser/ParserConfig",
|
|
1610
|
-
{
|
|
1639
|
+
.wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
|
|
1640
|
+
.function = {
|
|
1611
1641
|
JSON_ParserConfig_mark,
|
|
1612
|
-
|
|
1642
|
+
RUBY_DEFAULT_FREE,
|
|
1613
1643
|
JSON_ParserConfig_memsize,
|
|
1614
1644
|
},
|
|
1615
|
-
|
|
1616
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
|
|
1645
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
|
|
1617
1646
|
};
|
|
1618
1647
|
|
|
1619
1648
|
static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
@@ -1658,6 +1687,7 @@ void Init_parser(void)
|
|
|
1658
1687
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1659
1688
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
1660
1689
|
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1690
|
+
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
1661
1691
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
1662
1692
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
|
1663
1693
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
data/ext/json/ext/simd/simd.h
CHANGED
|
@@ -58,7 +58,34 @@ static inline int trailing_zeros(int input)
|
|
|
58
58
|
|
|
59
59
|
#ifdef JSON_ENABLE_SIMD
|
|
60
60
|
|
|
61
|
-
#define SIMD_MINIMUM_THRESHOLD
|
|
61
|
+
#define SIMD_MINIMUM_THRESHOLD 4
|
|
62
|
+
|
|
63
|
+
ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len)
|
|
64
|
+
{
|
|
65
|
+
RBIMPL_ASSERT_OR_ASSUME(len < 16);
|
|
66
|
+
RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4
|
|
67
|
+
#if defined(__has_builtin) && __has_builtin(__builtin_memcpy)
|
|
68
|
+
// If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes.
|
|
69
|
+
// These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy
|
|
70
|
+
// the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct
|
|
71
|
+
// position in both copies.
|
|
72
|
+
|
|
73
|
+
// Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the
|
|
74
|
+
// generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)),
|
|
75
|
+
// when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional
|
|
76
|
+
// select instruction instead of direct loads and stores with a branch. This ends up slower than the branch
|
|
77
|
+
// plus two loads and stores generated when using __builtin_memcpy.
|
|
78
|
+
if (len >= 8) {
|
|
79
|
+
__builtin_memcpy(dst, src, 8);
|
|
80
|
+
__builtin_memcpy(dst + len - 8, src + len - 8, 8);
|
|
81
|
+
} else {
|
|
82
|
+
__builtin_memcpy(dst, src, 4);
|
|
83
|
+
__builtin_memcpy(dst + len - 4, src + len - 4, 4);
|
|
84
|
+
}
|
|
85
|
+
#else
|
|
86
|
+
MEMCPY(dst, src, char, len);
|
|
87
|
+
#endif
|
|
88
|
+
}
|
|
62
89
|
|
|
63
90
|
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
|
|
64
91
|
#include <arm_neon.h>
|
|
@@ -106,16 +133,6 @@ ALWAYS_INLINE(static) int string_scan_simd_neon(const char **ptr, const char *en
|
|
|
106
133
|
return 0;
|
|
107
134
|
}
|
|
108
135
|
|
|
109
|
-
static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table)
|
|
110
|
-
{
|
|
111
|
-
uint8x16x4_t tab;
|
|
112
|
-
tab.val[0] = vld1q_u8(table);
|
|
113
|
-
tab.val[1] = vld1q_u8(table+16);
|
|
114
|
-
tab.val[2] = vld1q_u8(table+32);
|
|
115
|
-
tab.val[3] = vld1q_u8(table+48);
|
|
116
|
-
return tab;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
136
|
#endif /* ARM Neon Support.*/
|
|
120
137
|
|
|
121
138
|
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
|