json 2.11.3 → 2.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,8 @@ typedef unsigned char _Bool;
20
20
  #endif
21
21
  #endif
22
22
 
23
+ #include "../simd/simd.h"
24
+
23
25
  #ifndef RB_UNLIKELY
24
26
  #define RB_UNLIKELY(expr) expr
25
27
  #endif
@@ -35,7 +37,7 @@ static ID i_chr, i_aset, i_aref,
35
37
  i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
36
38
 
37
39
  static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
38
- sym_decimal_class, sym_on_load;
40
+ sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
39
41
 
40
42
  static int binary_encindex;
41
43
  static int utf8_encindex;
@@ -337,73 +339,6 @@ static size_t strnlen(const char *s, size_t maxlen)
337
339
  }
338
340
  #endif
339
341
 
340
- #define PARSE_ERROR_FRAGMENT_LEN 32
341
- #ifdef RBIMPL_ATTR_NORETURN
342
- RBIMPL_ATTR_NORETURN()
343
- #endif
344
- static void raise_parse_error(const char *format, const char *start)
345
- {
346
- unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
347
-
348
- size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
349
- const char *ptr = start;
350
-
351
- if (len == PARSE_ERROR_FRAGMENT_LEN) {
352
- MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
353
-
354
- while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
355
- len--;
356
- }
357
-
358
- if (buffer[len - 1] >= 0xC0) { // multibyte character start
359
- len--;
360
- }
361
-
362
- buffer[len] = '\0';
363
- ptr = (const char *)buffer;
364
- }
365
-
366
- rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
367
- }
368
-
369
- /* unicode */
370
-
371
- static const signed char digit_values[256] = {
372
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
373
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
374
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
375
- -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
376
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
377
- 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
378
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
379
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
380
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
381
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
382
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
383
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
384
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
385
- -1, -1, -1, -1, -1, -1, -1
386
- };
387
-
388
- static uint32_t unescape_unicode(const unsigned char *p)
389
- {
390
- signed char b;
391
- uint32_t result = 0;
392
- b = digit_values[p[0]];
393
- if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
394
- result = (result << 4) | (unsigned char)b;
395
- b = digit_values[p[1]];
396
- if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
397
- result = (result << 4) | (unsigned char)b;
398
- b = digit_values[p[2]];
399
- if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
400
- result = (result << 4) | (unsigned char)b;
401
- b = digit_values[p[3]];
402
- if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
403
- result = (result << 4) | (unsigned char)b;
404
- return result;
405
- }
406
-
407
342
  static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
408
343
  {
409
344
  int len = 1;
@@ -430,10 +365,17 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
430
365
  return len;
431
366
  }
432
367
 
368
+ enum duplicate_key_action {
369
+ JSON_DEPRECATED = 0,
370
+ JSON_IGNORE,
371
+ JSON_RAISE,
372
+ };
373
+
433
374
  typedef struct JSON_ParserStruct {
434
375
  VALUE on_load_proc;
435
376
  VALUE decimal_class;
436
377
  ID decimal_method_id;
378
+ enum duplicate_key_action on_duplicate_key;
437
379
  int max_nesting;
438
380
  bool allow_nan;
439
381
  bool allow_trailing_comma;
@@ -444,6 +386,7 @@ typedef struct JSON_ParserStruct {
444
386
 
445
387
  typedef struct JSON_ParserStateStruct {
446
388
  VALUE stack_handle;
389
+ const char *start;
447
390
  const char *cursor;
448
391
  const char *end;
449
392
  rvalue_stack *stack;
@@ -452,6 +395,133 @@ typedef struct JSON_ParserStateStruct {
452
395
  int current_nesting;
453
396
  } JSON_ParserState;
454
397
 
398
+ static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
399
+ {
400
+ const char *cursor = state->cursor;
401
+ long column = 0;
402
+ long line = 1;
403
+
404
+ while (cursor >= state->start) {
405
+ if (*cursor-- == '\n') {
406
+ break;
407
+ }
408
+ column++;
409
+ }
410
+
411
+ while (cursor >= state->start) {
412
+ if (*cursor-- == '\n') {
413
+ line++;
414
+ }
415
+ }
416
+ *line_out = line;
417
+ *column_out = column;
418
+ }
419
+
420
+ static void emit_parse_warning(const char *message, JSON_ParserState *state)
421
+ {
422
+ long line, column;
423
+ cursor_position(state, &line, &column);
424
+
425
+ rb_warn("%s at line %ld column %ld", message, line, column);
426
+ }
427
+
428
+ #define PARSE_ERROR_FRAGMENT_LEN 32
429
+ #ifdef RBIMPL_ATTR_NORETURN
430
+ RBIMPL_ATTR_NORETURN()
431
+ #endif
432
+ static void raise_parse_error(const char *format, JSON_ParserState *state)
433
+ {
434
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
435
+ long line, column;
436
+ cursor_position(state, &line, &column);
437
+
438
+ const char *ptr = "EOF";
439
+ if (state->cursor && state->cursor < state->end) {
440
+ ptr = state->cursor;
441
+ size_t len = 0;
442
+ while (len < PARSE_ERROR_FRAGMENT_LEN) {
443
+ char ch = ptr[len];
444
+ if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
445
+ break;
446
+ }
447
+ len++;
448
+ }
449
+
450
+ if (len) {
451
+ buffer[0] = '\'';
452
+ MEMCPY(buffer + 1, ptr, char, len);
453
+
454
+ while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
455
+ len--;
456
+ }
457
+
458
+ if (buffer[len] >= 0xC0) { // multibyte character start
459
+ len--;
460
+ }
461
+
462
+ buffer[len + 1] = '\'';
463
+ buffer[len + 2] = '\0';
464
+ ptr = (const char *)buffer;
465
+ }
466
+ }
467
+
468
+ VALUE msg = rb_sprintf(format, ptr);
469
+ VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
470
+ RB_GC_GUARD(msg);
471
+
472
+ VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
473
+ rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
474
+ rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
475
+ rb_exc_raise(exc);
476
+ }
477
+
478
+ #ifdef RBIMPL_ATTR_NORETURN
479
+ RBIMPL_ATTR_NORETURN()
480
+ #endif
481
+ static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
482
+ {
483
+ state->cursor = at;
484
+ raise_parse_error(format, state);
485
+ }
486
+
487
+ /* unicode */
488
+
489
+ static const signed char digit_values[256] = {
490
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
491
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
492
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
493
+ -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
494
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
495
+ 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
496
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
497
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
498
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
499
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
500
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
501
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
502
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
503
+ -1, -1, -1, -1, -1, -1, -1
504
+ };
505
+
506
+ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
507
+ {
508
+ signed char b;
509
+ uint32_t result = 0;
510
+ b = digit_values[p[0]];
511
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
512
+ result = (result << 4) | (unsigned char)b;
513
+ b = digit_values[p[1]];
514
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
515
+ result = (result << 4) | (unsigned char)b;
516
+ b = digit_values[p[2]];
517
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
518
+ result = (result << 4) | (unsigned char)b;
519
+ b = digit_values[p[3]];
520
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
521
+ result = (result << 4) | (unsigned char)b;
522
+ return result;
523
+ }
524
+
455
525
  #define GET_PARSER_CONFIG \
456
526
  JSON_ParserConfig *config; \
457
527
  TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
@@ -470,7 +540,7 @@ static void
470
540
  json_eat_comments(JSON_ParserState *state)
471
541
  {
472
542
  if (state->cursor + 1 < state->end) {
473
- switch(state->cursor[1]) {
543
+ switch (state->cursor[1]) {
474
544
  case '/': {
475
545
  state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
476
546
  if (!state->cursor) {
@@ -485,8 +555,7 @@ json_eat_comments(JSON_ParserState *state)
485
555
  while (true) {
486
556
  state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
487
557
  if (!state->cursor) {
488
- state->cursor = state->end;
489
- raise_parse_error("unexpected end of input, expected closing '*/'", state->cursor);
558
+ raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
490
559
  } else {
491
560
  state->cursor++;
492
561
  if (state->cursor < state->end && *state->cursor == '/') {
@@ -498,11 +567,11 @@ json_eat_comments(JSON_ParserState *state)
498
567
  break;
499
568
  }
500
569
  default:
501
- raise_parse_error("unexpected token at '%s'", state->cursor);
570
+ raise_parse_error("unexpected token %s", state);
502
571
  break;
503
572
  }
504
573
  } else {
505
- raise_parse_error("unexpected token at '%s'", state->cursor);
574
+ raise_parse_error("unexpected token %s", state);
506
575
  }
507
576
  }
508
577
 
@@ -621,9 +690,9 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
621
690
  break;
622
691
  case 'u':
623
692
  if (pe > stringEnd - 5) {
624
- raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
693
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
625
694
  } else {
626
- uint32_t ch = unescape_unicode((unsigned char *) ++pe);
695
+ uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
627
696
  pe += 3;
628
697
  /* To handle values above U+FFFF, we take a sequence of
629
698
  * \uXXXX escapes in the U+D800..U+DBFF then
@@ -638,10 +707,10 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
638
707
  if ((ch & 0xFC00) == 0xD800) {
639
708
  pe++;
640
709
  if (pe > stringEnd - 6) {
641
- raise_parse_error("incomplete surrogate pair at '%s'", p);
710
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
642
711
  }
643
712
  if (pe[0] == '\\' && pe[1] == 'u') {
644
- uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
713
+ uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
645
714
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
646
715
  | (sur & 0x3FF));
647
716
  pe += 5;
@@ -761,11 +830,25 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
761
830
  return array;
762
831
  }
763
832
 
764
- static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, long count)
833
+ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
765
834
  {
766
- VALUE object = rb_hash_new_capa(count);
835
+ size_t entries_count = count / 2;
836
+ VALUE object = rb_hash_new_capa(entries_count);
767
837
  rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
768
838
 
839
+ if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
840
+ switch (config->on_duplicate_key) {
841
+ case JSON_IGNORE:
842
+ break;
843
+ case JSON_DEPRECATED:
844
+ emit_parse_warning("detected duplicate keys in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`", state);
845
+ break;
846
+ case JSON_RAISE:
847
+ raise_parse_error("duplicate key", state);
848
+ break;
849
+ }
850
+ }
851
+
769
852
  rvalue_stack_pop(state->stack, count);
770
853
 
771
854
  if (config->freeze) {
@@ -798,7 +881,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
798
881
  return value;
799
882
  }
800
883
 
801
- static const bool string_scan[256] = {
884
+ static const bool string_scan_table[256] = {
802
885
  // ASCII Control Characters
803
886
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
804
887
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -811,38 +894,77 @@ static const bool string_scan[256] = {
811
894
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
812
895
  };
813
896
 
897
+ #if (defined(__GNUC__ ) || defined(__clang__))
898
+ #define FORCE_INLINE __attribute__((always_inline))
899
+ #else
900
+ #define FORCE_INLINE
901
+ #endif
902
+
903
+ #ifdef HAVE_SIMD
904
+ static SIMD_Implementation simd_impl = SIMD_NONE;
905
+ #endif /* HAVE_SIMD */
906
+
907
+ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
908
+ {
909
+ #ifdef HAVE_SIMD
910
+ #if defined(HAVE_SIMD_NEON)
911
+
912
+ uint64_t mask = 0;
913
+ if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
914
+ state->cursor += trailing_zeros64(mask) >> 2;
915
+ return 1;
916
+ }
917
+
918
+ #elif defined(HAVE_SIMD_SSE2)
919
+ if (simd_impl == SIMD_SSE2) {
920
+ int mask = 0;
921
+ if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
922
+ state->cursor += trailing_zeros(mask);
923
+ return 1;
924
+ }
925
+ }
926
+ #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
927
+ #endif /* HAVE_SIMD */
928
+
929
+ while (state->cursor < state->end) {
930
+ if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
931
+ return 1;
932
+ }
933
+ *state->cursor++;
934
+ }
935
+ return 0;
936
+ }
937
+
814
938
  static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
815
939
  {
816
940
  state->cursor++;
817
941
  const char *start = state->cursor;
818
942
  bool escaped = false;
819
943
 
820
- while (state->cursor < state->end) {
821
- if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) {
822
- switch (*state->cursor) {
823
- case '"': {
824
- VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
825
- state->cursor++;
826
- return json_push_value(state, config, string);
827
- }
828
- case '\\': {
829
- state->cursor++;
830
- escaped = true;
831
- if ((unsigned char)*state->cursor < 0x20) {
832
- raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
833
- }
834
- break;
944
+ while (RB_UNLIKELY(string_scan(state))) {
945
+ switch (*state->cursor) {
946
+ case '"': {
947
+ VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
948
+ state->cursor++;
949
+ return json_push_value(state, config, string);
950
+ }
951
+ case '\\': {
952
+ state->cursor++;
953
+ escaped = true;
954
+ if ((unsigned char)*state->cursor < 0x20) {
955
+ raise_parse_error("invalid ASCII control character in string: %s", state);
835
956
  }
836
- default:
837
- raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
838
- break;
957
+ break;
839
958
  }
959
+ default:
960
+ raise_parse_error("invalid ASCII control character in string: %s", state);
961
+ break;
840
962
  }
841
963
 
842
964
  state->cursor++;
843
965
  }
844
966
 
845
- raise_parse_error("unexpected end of input, expected closing \"", state->cursor);
967
+ raise_parse_error("unexpected end of input, expected closing \"", state);
846
968
  return Qfalse;
847
969
  }
848
970
 
@@ -850,7 +972,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
850
972
  {
851
973
  json_eat_whitespace(state);
852
974
  if (state->cursor >= state->end) {
853
- raise_parse_error("unexpected end of input", state->cursor);
975
+ raise_parse_error("unexpected end of input", state);
854
976
  }
855
977
 
856
978
  switch (*state->cursor) {
@@ -860,7 +982,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
860
982
  return json_push_value(state, config, Qnil);
861
983
  }
862
984
 
863
- raise_parse_error("unexpected token at '%s'", state->cursor);
985
+ raise_parse_error("unexpected token %s", state);
864
986
  break;
865
987
  case 't':
866
988
  if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
@@ -868,7 +990,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
868
990
  return json_push_value(state, config, Qtrue);
869
991
  }
870
992
 
871
- raise_parse_error("unexpected token at '%s'", state->cursor);
993
+ raise_parse_error("unexpected token %s", state);
872
994
  break;
873
995
  case 'f':
874
996
  // Note: memcmp with a small power of two compile to an integer comparison
@@ -877,7 +999,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
877
999
  return json_push_value(state, config, Qfalse);
878
1000
  }
879
1001
 
880
- raise_parse_error("unexpected token at '%s'", state->cursor);
1002
+ raise_parse_error("unexpected token %s", state);
881
1003
  break;
882
1004
  case 'N':
883
1005
  // Note: memcmp with a small power of two compile to an integer comparison
@@ -886,7 +1008,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
886
1008
  return json_push_value(state, config, CNaN);
887
1009
  }
888
1010
 
889
- raise_parse_error("unexpected token at '%s'", state->cursor);
1011
+ raise_parse_error("unexpected token %s", state);
890
1012
  break;
891
1013
  case 'I':
892
1014
  if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
@@ -894,7 +1016,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
894
1016
  return json_push_value(state, config, CInfinity);
895
1017
  }
896
1018
 
897
- raise_parse_error("unexpected token at '%s'", state->cursor);
1019
+ raise_parse_error("unexpected token %s", state);
898
1020
  break;
899
1021
  case '-':
900
1022
  // Note: memcmp with a small power of two compile to an integer comparison
@@ -903,7 +1025,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
903
1025
  state->cursor += 9;
904
1026
  return json_push_value(state, config, CMinusInfinity);
905
1027
  } else {
906
- raise_parse_error("unexpected token at '%s'", state->cursor);
1028
+ raise_parse_error("unexpected token %s", state);
907
1029
  }
908
1030
  }
909
1031
  // Fallthrough
@@ -921,11 +1043,11 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
921
1043
  long integer_length = state->cursor - start;
922
1044
 
923
1045
  if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
924
- raise_parse_error("invalid number: %s", start);
1046
+ raise_parse_error_at("invalid number: %s", state, start);
925
1047
  } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
926
- raise_parse_error("invalid number: %s", start);
1048
+ raise_parse_error_at("invalid number: %s", state, start);
927
1049
  } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
928
- raise_parse_error("invalid number: %s", start);
1050
+ raise_parse_error_at("invalid number: %s", state, start);
929
1051
  }
930
1052
 
931
1053
  if ((state->cursor < state->end) && (*state->cursor == '.')) {
@@ -933,7 +1055,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
933
1055
  state->cursor++;
934
1056
 
935
1057
  if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
936
- raise_parse_error("invalid number: %s", state->cursor);
1058
+ raise_parse_error("invalid number: %s", state);
937
1059
  }
938
1060
 
939
1061
  while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
@@ -949,7 +1071,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
949
1071
  }
950
1072
 
951
1073
  if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
952
- raise_parse_error("invalid number: %s", state->cursor);
1074
+ raise_parse_error("invalid number: %s", state);
953
1075
  }
954
1076
 
955
1077
  while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
@@ -1009,11 +1131,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1009
1131
  }
1010
1132
  }
1011
1133
 
1012
- raise_parse_error("expected ',' or ']' after array value", state->cursor);
1134
+ raise_parse_error("expected ',' or ']' after array value", state);
1013
1135
  }
1014
1136
  break;
1015
1137
  }
1016
1138
  case '{': {
1139
+ const char *object_start_cursor = state->cursor;
1140
+
1017
1141
  state->cursor++;
1018
1142
  json_eat_whitespace(state);
1019
1143
  long stack_head = state->stack->head;
@@ -1028,13 +1152,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1028
1152
  }
1029
1153
 
1030
1154
  if (*state->cursor != '"') {
1031
- raise_parse_error("expected object key, got '%s", state->cursor);
1155
+ raise_parse_error("expected object key, got %s", state);
1032
1156
  }
1033
1157
  json_parse_string(state, config, true);
1034
1158
 
1035
1159
  json_eat_whitespace(state);
1036
1160
  if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1037
- raise_parse_error("expected ':' after object key", state->cursor);
1161
+ raise_parse_error("expected ':' after object key", state);
1038
1162
  }
1039
1163
  state->cursor++;
1040
1164
 
@@ -1048,8 +1172,15 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1048
1172
  if (*state->cursor == '}') {
1049
1173
  state->cursor++;
1050
1174
  state->current_nesting--;
1051
- long count = state->stack->head - stack_head;
1052
- return json_push_value(state, config, json_decode_object(state, config, count));
1175
+ size_t count = state->stack->head - stack_head;
1176
+
1177
+ // Temporary rewind cursor in case an error is raised
1178
+ const char *final_cursor = state->cursor;
1179
+ state->cursor = object_start_cursor;
1180
+ VALUE object = json_decode_object(state, config, count);
1181
+ state->cursor = final_cursor;
1182
+
1183
+ return json_push_value(state, config, object);
1053
1184
  }
1054
1185
 
1055
1186
  if (*state->cursor == ',') {
@@ -1063,13 +1194,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1063
1194
  }
1064
1195
 
1065
1196
  if (*state->cursor != '"') {
1066
- raise_parse_error("expected object key, got: '%s'", state->cursor);
1197
+ raise_parse_error("expected object key, got: %s", state);
1067
1198
  }
1068
1199
  json_parse_string(state, config, true);
1069
1200
 
1070
1201
  json_eat_whitespace(state);
1071
1202
  if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1072
- raise_parse_error("expected ':' after object key, got: '%s", state->cursor);
1203
+ raise_parse_error("expected ':' after object key, got: %s", state);
1073
1204
  }
1074
1205
  state->cursor++;
1075
1206
 
@@ -1079,24 +1210,24 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1079
1210
  }
1080
1211
  }
1081
1212
 
1082
- raise_parse_error("expected ',' or '}' after object value, got: '%s'", state->cursor);
1213
+ raise_parse_error("expected ',' or '}' after object value, got: %s", state);
1083
1214
  }
1084
1215
  break;
1085
1216
  }
1086
1217
 
1087
1218
  default:
1088
- raise_parse_error("unexpected character: '%s'", state->cursor);
1219
+ raise_parse_error("unexpected character: %s", state);
1089
1220
  break;
1090
1221
  }
1091
1222
 
1092
- raise_parse_error("unreacheable: '%s'", state->cursor);
1223
+ raise_parse_error("unreacheable: %s", state);
1093
1224
  }
1094
1225
 
1095
1226
  static void json_ensure_eof(JSON_ParserState *state)
1096
1227
  {
1097
1228
  json_eat_whitespace(state);
1098
1229
  if (state->cursor != state->end) {
1099
- raise_parse_error("unexpected token at end of stream '%s'", state->cursor);
1230
+ raise_parse_error("unexpected token at end of stream %s", state);
1100
1231
  }
1101
1232
  }
1102
1233
 
@@ -1138,6 +1269,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1138
1269
  else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1139
1270
  else if (key == sym_freeze) { config->freeze = RTEST(val); }
1140
1271
  else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1272
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1141
1273
  else if (key == sym_decimal_class) {
1142
1274
  if (RTEST(val)) {
1143
1275
  if (rb_respond_to(val, i_try_convert)) {
@@ -1232,9 +1364,14 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1232
1364
  .capa = RVALUE_STACK_INITIAL_CAPA,
1233
1365
  };
1234
1366
 
1367
+ long len;
1368
+ const char *start;
1369
+ RSTRING_GETMEM(Vsource, start, len);
1370
+
1235
1371
  JSON_ParserState _state = {
1236
- .cursor = RSTRING_PTR(Vsource),
1237
- .end = RSTRING_END(Vsource),
1372
+ .start = start,
1373
+ .cursor = start,
1374
+ .end = start + len,
1238
1375
  .stack = &stack,
1239
1376
  };
1240
1377
  JSON_ParserState *state = &_state;
@@ -1349,6 +1486,7 @@ void Init_parser(void)
1349
1486
  sym_freeze = ID2SYM(rb_intern("freeze"));
1350
1487
  sym_on_load = ID2SYM(rb_intern("on_load"));
1351
1488
  sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1489
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
1352
1490
 
1353
1491
  i_chr = rb_intern("chr");
1354
1492
  i_aset = rb_intern("[]=");
@@ -1362,4 +1500,8 @@ void Init_parser(void)
1362
1500
  binary_encindex = rb_ascii8bit_encindex();
1363
1501
  utf8_encindex = rb_utf8_encindex();
1364
1502
  enc_utf8 = rb_utf8_encoding();
1503
+
1504
+ #ifdef HAVE_SIMD
1505
+ simd_impl = find_simd_implementation();
1506
+ #endif
1365
1507
  }
@@ -0,0 +1,20 @@
1
+ case RbConfig::CONFIG['host_cpu']
2
+ when /^(arm|aarch64)/
3
+ # Try to compile a small program using NEON instructions
4
+ header, type, init = 'arm_neon.h', 'uint8x16_t', 'vdupq_n_u8(32)'
5
+ when /^(x86_64|x64)/
6
+ header, type, init = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)'
7
+ end
8
+ if header
9
+ have_header(header) && try_compile(<<~SRC)
10
+ #{cpp_include(header)}
11
+ int main(int argc, char **argv) {
12
+ #{type} test = #{init};
13
+ if (argc > 100000) printf("%p", &test);
14
+ return 0;
15
+ }
16
+ SRC
17
+ $defs.push("-DJSON_ENABLE_SIMD")
18
+ end
19
+
20
+ have_header('cpuid.h')