json 2.12.2 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,32 +1,6 @@
1
- #include "ruby.h"
2
- #include "ruby/encoding.h"
3
-
4
- /* shims */
5
- /* This is the fallback definition from Ruby 3.4 */
6
-
7
- #ifndef RBIMPL_STDBOOL_H
8
- #if defined(__cplusplus)
9
- # if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
10
- # include <cstdbool>
11
- # endif
12
- #elif defined(HAVE_STDBOOL_H)
13
- # include <stdbool.h>
14
- #elif !defined(HAVE__BOOL)
15
- typedef unsigned char _Bool;
16
- # define bool _Bool
17
- # define true ((_Bool)+1)
18
- # define false ((_Bool)+0)
19
- # define __bool_true_false_are_defined
20
- #endif
21
- #endif
22
-
23
- #ifndef RB_UNLIKELY
24
- #define RB_UNLIKELY(expr) expr
25
- #endif
26
-
27
- #ifndef RB_LIKELY
28
- #define RB_LIKELY(expr) expr
29
- #endif
1
+ #include "../json.h"
2
+ #include "../vendor/ryu.h"
3
+ #include "../simd/simd.h"
30
4
 
31
5
  static VALUE mJSON, eNestingError, Encoding_UTF_8;
32
6
  static VALUE CNaN, CInfinity, CMinusInfinity;
@@ -35,14 +9,14 @@ static ID i_chr, i_aset, i_aref,
35
9
  i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
36
10
 
37
11
  static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
38
- sym_decimal_class, sym_on_load;
12
+ sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
39
13
 
40
14
  static int binary_encindex;
41
15
  static int utf8_encindex;
42
16
 
43
17
  #ifndef HAVE_RB_HASH_BULK_INSERT
44
18
  // For TruffleRuby
45
- void
19
+ static void
46
20
  rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
47
21
  {
48
22
  long index = 0;
@@ -59,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
59
33
  #define rb_hash_new_capa(n) rb_hash_new()
60
34
  #endif
61
35
 
36
+ #ifndef HAVE_RB_STR_TO_INTERNED_STR
37
+ static VALUE rb_str_to_interned_str(VALUE str)
38
+ {
39
+ return rb_funcall(rb_str_freeze(str), i_uminus, 0);
40
+ }
41
+ #endif
62
42
 
63
43
  /* name cache */
64
44
 
@@ -104,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
104
84
  cache->entries[index] = rstring;
105
85
  }
106
86
 
107
- static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
87
+ #define rstring_cache_memcmp memcmp
88
+
89
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
90
+ #if __has_builtin(__builtin_bswap64)
91
+ #undef rstring_cache_memcmp
92
+ static ALWAYS_INLINE() int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
108
93
  {
109
- long rstring_length = RSTRING_LEN(rstring);
110
- if (length == rstring_length) {
111
- return memcmp(str, RSTRING_PTR(rstring), length);
112
- } else {
113
- return (int)(length - rstring_length);
94
+ // The libc memcmp has numerous complex optimizations, but in this particular case,
95
+ // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
96
+ // inline a simpler memcmp outperforms calling the libc version.
97
+ long i = 0;
98
+
99
+ for (; i + 8 <= length; i += 8) {
100
+ uint64_t a, b;
101
+ memcpy(&a, str + i, 8);
102
+ memcpy(&b, rptr + i, 8);
103
+ if (a != b) {
104
+ a = __builtin_bswap64(a);
105
+ b = __builtin_bswap64(b);
106
+ return (a < b) ? -1 : 1;
107
+ }
108
+ }
109
+
110
+ for (; i < length; i++) {
111
+ if (str[i] != rptr[i]) {
112
+ return (str[i] < rptr[i]) ? -1 : 1;
113
+ }
114
114
  }
115
+
116
+ return 0;
115
117
  }
118
+ #endif
119
+ #endif
116
120
 
117
- static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
121
+ static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
118
122
  {
119
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
120
- // Common names aren't likely to be very long. So we just don't
121
- // cache names above an arbitrary threshold.
122
- return Qfalse;
123
- }
123
+ const char *rstring_ptr;
124
+ long rstring_length;
125
+
126
+ RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
124
127
 
125
- if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
126
- // Simple heuristic, if the first character isn't a letter,
127
- // we're much less likely to see this string again.
128
- // We mostly want to cache strings that are likely to be repeated.
129
- return Qfalse;
128
+ if (length == rstring_length) {
129
+ return rstring_cache_memcmp(str, rstring_ptr, length);
130
+ } else {
131
+ return (int)(length - rstring_length);
130
132
  }
133
+ }
131
134
 
135
+ static ALWAYS_INLINE() VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
136
+ {
132
137
  int low = 0;
133
138
  int high = cache->length - 1;
134
- int mid = 0;
135
- int last_cmp = 0;
136
139
 
137
140
  while (low <= high) {
138
- mid = (high + low) >> 1;
141
+ int mid = (high + low) >> 1;
139
142
  VALUE entry = cache->entries[mid];
140
- last_cmp = rstring_cache_cmp(str, length, entry);
143
+ int cmp = rstring_cache_cmp(str, length, entry);
141
144
 
142
- if (last_cmp == 0) {
145
+ if (cmp == 0) {
143
146
  return entry;
144
- } else if (last_cmp > 0) {
147
+ } else if (cmp > 0) {
145
148
  low = mid + 1;
146
149
  } else {
147
150
  high = mid - 1;
148
151
  }
149
152
  }
150
153
 
151
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
152
- // We assume the overwhelming majority of names don't need to be escaped.
153
- // But if they do, we have to fallback to the slow path.
154
- return Qfalse;
155
- }
156
-
157
154
  VALUE rstring = build_interned_string(str, length);
158
155
 
159
156
  if (cache->length < JSON_RVALUE_CACHE_CAPA) {
160
- if (last_cmp > 0) {
161
- mid += 1;
162
- }
163
-
164
- rvalue_cache_insert_at(cache, mid, rstring);
157
+ rvalue_cache_insert_at(cache, low, rstring);
165
158
  }
166
159
  return rstring;
167
160
  }
168
161
 
169
162
  static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
170
163
  {
171
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
172
- // Common names aren't likely to be very long. So we just don't
173
- // cache names above an arbitrary threshold.
174
- return Qfalse;
175
- }
176
-
177
- if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
178
- // Simple heuristic, if the first character isn't a letter,
179
- // we're much less likely to see this string again.
180
- // We mostly want to cache strings that are likely to be repeated.
181
- return Qfalse;
182
- }
183
-
184
164
  int low = 0;
185
165
  int high = cache->length - 1;
186
- int mid = 0;
187
- int last_cmp = 0;
188
166
 
189
167
  while (low <= high) {
190
- mid = (high + low) >> 1;
168
+ int mid = (high + low) >> 1;
191
169
  VALUE entry = cache->entries[mid];
192
- last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
170
+ int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
193
171
 
194
- if (last_cmp == 0) {
172
+ if (cmp == 0) {
195
173
  return entry;
196
- } else if (last_cmp > 0) {
174
+ } else if (cmp > 0) {
197
175
  low = mid + 1;
198
176
  } else {
199
177
  high = mid - 1;
200
178
  }
201
179
  }
202
180
 
203
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
204
- // We assume the overwhelming majority of names don't need to be escaped.
205
- // But if they do, we have to fallback to the slow path.
206
- return Qfalse;
207
- }
208
-
209
181
  VALUE rsymbol = build_symbol(str, length);
210
182
 
211
183
  if (cache->length < JSON_RVALUE_CACHE_CAPA) {
212
- if (last_cmp > 0) {
213
- mid += 1;
214
- }
215
-
216
- rvalue_cache_insert_at(cache, mid, rsymbol);
184
+ rvalue_cache_insert_at(cache, low, rsymbol);
217
185
  }
218
186
  return rsymbol;
219
187
  }
@@ -363,10 +331,17 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
363
331
  return len;
364
332
  }
365
333
 
334
+ enum duplicate_key_action {
335
+ JSON_DEPRECATED = 0,
336
+ JSON_IGNORE,
337
+ JSON_RAISE,
338
+ };
339
+
366
340
  typedef struct JSON_ParserStruct {
367
341
  VALUE on_load_proc;
368
342
  VALUE decimal_class;
369
343
  ID decimal_method_id;
344
+ enum duplicate_key_action on_duplicate_key;
370
345
  int max_nesting;
371
346
  bool allow_nan;
372
347
  bool allow_trailing_comma;
@@ -386,15 +361,24 @@ typedef struct JSON_ParserStateStruct {
386
361
  int current_nesting;
387
362
  } JSON_ParserState;
388
363
 
364
+ static inline size_t rest(JSON_ParserState *state) {
365
+ return state->end - state->cursor;
366
+ }
389
367
 
390
- #define PARSE_ERROR_FRAGMENT_LEN 32
391
- #ifdef RBIMPL_ATTR_NORETURN
392
- RBIMPL_ATTR_NORETURN()
393
- #endif
394
- static void raise_parse_error(const char *format, JSON_ParserState *state)
368
+ static inline bool eos(JSON_ParserState *state) {
369
+ return state->cursor >= state->end;
370
+ }
371
+
372
+ static inline char peek(JSON_ParserState *state)
395
373
  {
396
- unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
374
+ if (RB_UNLIKELY(eos(state))) {
375
+ return 0;
376
+ }
377
+ return *state->cursor;
378
+ }
397
379
 
380
+ static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
381
+ {
398
382
  const char *cursor = state->cursor;
399
383
  long column = 0;
400
384
  long line = 1;
@@ -411,6 +395,29 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
411
395
  line++;
412
396
  }
413
397
  }
398
+ *line_out = line;
399
+ *column_out = column;
400
+ }
401
+
402
+ static void emit_parse_warning(const char *message, JSON_ParserState *state)
403
+ {
404
+ long line, column;
405
+ cursor_position(state, &line, &column);
406
+
407
+ VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
408
+ rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
409
+ }
410
+
411
+ #define PARSE_ERROR_FRAGMENT_LEN 32
412
+
413
+ #ifdef RBIMPL_ATTR_NORETURN
414
+ RBIMPL_ATTR_NORETURN()
415
+ #endif
416
+ static void raise_parse_error(const char *format, JSON_ParserState *state)
417
+ {
418
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
419
+ long line, column;
420
+ cursor_position(state, &line, &column);
414
421
 
415
422
  const char *ptr = "EOF";
416
423
  if (state->cursor && state->cursor < state->end) {
@@ -505,61 +512,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
505
512
 
506
513
  static const rb_data_type_t JSON_ParserConfig_type;
507
514
 
508
- static const bool whitespace[256] = {
509
- [' '] = 1,
510
- ['\t'] = 1,
511
- ['\n'] = 1,
512
- ['\r'] = 1,
513
- ['/'] = 1,
514
- };
515
-
516
515
  static void
517
516
  json_eat_comments(JSON_ParserState *state)
518
517
  {
519
- if (state->cursor + 1 < state->end) {
520
- switch(state->cursor[1]) {
521
- case '/': {
522
- state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
523
- if (!state->cursor) {
524
- state->cursor = state->end;
525
- } else {
526
- state->cursor++;
527
- }
528
- break;
518
+ const char *start = state->cursor;
519
+ state->cursor++;
520
+
521
+ switch (peek(state)) {
522
+ case '/': {
523
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
524
+ if (!state->cursor) {
525
+ state->cursor = state->end;
526
+ } else {
527
+ state->cursor++;
529
528
  }
530
- case '*': {
531
- state->cursor += 2;
532
- while (true) {
533
- state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
534
- if (!state->cursor) {
535
- raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
536
- } else {
537
- state->cursor++;
538
- if (state->cursor < state->end && *state->cursor == '/') {
539
- state->cursor++;
540
- break;
541
- }
542
- }
529
+ break;
530
+ }
531
+ case '*': {
532
+ state->cursor++;
533
+
534
+ while (true) {
535
+ const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
536
+ if (!next_match) {
537
+ raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
538
+ }
539
+
540
+ state->cursor = next_match + 1;
541
+ if (peek(state) == '/') {
542
+ state->cursor++;
543
+ break;
543
544
  }
544
- break;
545
545
  }
546
- default:
547
- raise_parse_error("unexpected token %s", state);
548
- break;
546
+ break;
549
547
  }
550
- } else {
551
- raise_parse_error("unexpected token %s", state);
548
+ default:
549
+ raise_parse_error_at("unexpected token %s", state, start);
550
+ break;
552
551
  }
553
552
  }
554
553
 
555
- static inline void
554
+ static ALWAYS_INLINE() void
556
555
  json_eat_whitespace(JSON_ParserState *state)
557
556
  {
558
- while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
559
- if (RB_LIKELY(*state->cursor != '/')) {
560
- state->cursor++;
561
- } else {
562
- json_eat_comments(state);
557
+ while (true) {
558
+ switch (peek(state)) {
559
+ case ' ':
560
+ state->cursor++;
561
+ break;
562
+ case '\n':
563
+ state->cursor++;
564
+
565
+ // Heuristic: if we see a newline, there is likely consecutive spaces after it.
566
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
567
+ while (rest(state) > 8) {
568
+ uint64_t chunk;
569
+ memcpy(&chunk, state->cursor, sizeof(uint64_t));
570
+ if (chunk == 0x2020202020202020) {
571
+ state->cursor += 8;
572
+ continue;
573
+ }
574
+
575
+ uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
576
+ state->cursor += consecutive_spaces;
577
+ break;
578
+ }
579
+ #endif
580
+ break;
581
+ case '\t':
582
+ case '\r':
583
+ state->cursor++;
584
+ break;
585
+ case '/':
586
+ json_eat_comments(state);
587
+ break;
588
+
589
+ default:
590
+ return;
563
591
  }
564
592
  }
565
593
  }
@@ -590,11 +618,20 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
590
618
  return result;
591
619
  }
592
620
 
621
+ static inline bool json_string_cacheable_p(const char *string, size_t length)
622
+ {
623
+ // We mostly want to cache strings that are likely to be repeated.
624
+ // Simple heuristics:
625
+ // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
626
+ // - If the first character isn't a letter, we're much less likely to see this string again.
627
+ return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
628
+ }
629
+
593
630
  static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
594
631
  {
595
632
  size_t bufferSize = stringEnd - string;
596
633
 
597
- if (is_name && state->in_array) {
634
+ if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
598
635
  VALUE cached_key;
599
636
  if (RB_UNLIKELY(symbolize)) {
600
637
  cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
@@ -618,19 +655,6 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
618
655
  int unescape_len;
619
656
  char buf[4];
620
657
 
621
- if (is_name && state->in_array) {
622
- VALUE cached_key;
623
- if (RB_UNLIKELY(symbolize)) {
624
- cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
625
- } else {
626
- cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
627
- }
628
-
629
- if (RB_LIKELY(cached_key)) {
630
- return cached_key;
631
- }
632
- }
633
-
634
658
  VALUE result = rb_str_buf_new(bufferSize);
635
659
  rb_enc_associate_index(result, utf8_encindex);
636
660
  buffer = RSTRING_PTR(result);
@@ -688,11 +712,16 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
688
712
  }
689
713
  if (pe[0] == '\\' && pe[1] == 'u') {
690
714
  uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
715
+
716
+ if ((sur & 0xFC00) != 0xDC00) {
717
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
718
+ }
719
+
691
720
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
692
721
  | (sur & 0x3FF));
693
722
  pe += 5;
694
723
  } else {
695
- unescape = (char *) "?";
724
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
696
725
  break;
697
726
  }
698
727
  }
@@ -718,33 +747,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
718
747
  if (symbolize) {
719
748
  result = rb_str_intern(result);
720
749
  } else if (intern) {
721
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
750
+ result = rb_str_to_interned_str(result);
722
751
  }
723
752
 
724
753
  return result;
725
754
  }
726
755
 
727
756
  #define MAX_FAST_INTEGER_SIZE 18
728
- static inline VALUE fast_decode_integer(const char *p, const char *pe)
729
- {
730
- bool negative = false;
731
- if (*p == '-') {
732
- negative = true;
733
- p++;
734
- }
735
-
736
- long long memo = 0;
737
- while (p < pe) {
738
- memo *= 10;
739
- memo += *p - '0';
740
- p++;
741
- }
742
-
743
- if (negative) {
744
- memo = -memo;
745
- }
746
- return LL2NUM(memo);
747
- }
748
757
 
749
758
  static VALUE json_decode_large_integer(const char *start, long len)
750
759
  {
@@ -758,17 +767,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
758
767
  }
759
768
 
760
769
  static inline VALUE
761
- json_decode_integer(const char *start, const char *end)
770
+ json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
762
771
  {
763
- long len = end - start;
764
- if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
765
- return fast_decode_integer(start, end);
772
+ if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
773
+ if (negative) {
774
+ return INT64T2NUM(-((int64_t)mantissa));
766
775
  }
767
- return json_decode_large_integer(start, len);
776
+ return UINT64T2NUM(mantissa);
777
+ }
778
+
779
+ return json_decode_large_integer(start, end - start);
768
780
  }
769
781
 
770
782
  static VALUE json_decode_large_float(const char *start, long len)
771
783
  {
784
+ if (RB_LIKELY(len < 64)) {
785
+ char buffer[64];
786
+ MEMCPY(buffer, start, char, len);
787
+ buffer[len] = '\0';
788
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
789
+ }
790
+
772
791
  VALUE buffer_v;
773
792
  char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
774
793
  MEMCPY(buffer, start, char, len);
@@ -778,21 +797,24 @@ static VALUE json_decode_large_float(const char *start, long len)
778
797
  return number;
779
798
  }
780
799
 
781
- static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end)
800
+ /* Ruby JSON optimized float decoder using vendored Ryu algorithm
801
+ * Accepts pre-extracted mantissa and exponent from first-pass validation
802
+ */
803
+ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
804
+ const char *start, const char *end)
782
805
  {
783
- long len = end - start;
784
-
785
806
  if (RB_UNLIKELY(config->decimal_class)) {
786
- VALUE text = rb_str_new(start, len);
807
+ VALUE text = rb_str_new(start, end - start);
787
808
  return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
788
- } else if (RB_LIKELY(len < 64)) {
789
- char buffer[64];
790
- MEMCPY(buffer, start, char, len);
791
- buffer[len] = '\0';
792
- return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
793
- } else {
794
- return json_decode_large_float(start, len);
795
809
  }
810
+
811
+ // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
812
+ // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
813
+ if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
814
+ return json_decode_large_float(start, end - start);
815
+ }
816
+
817
+ return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
796
818
  }
797
819
 
798
820
  static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
@@ -807,10 +829,67 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
807
829
  return array;
808
830
  }
809
831
 
810
- static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, long count)
832
+ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
833
+ {
834
+ VALUE set = rb_hash_new_capa(count / 2);
835
+ for (size_t index = 0; index < count; index += 2) {
836
+ size_t before = RHASH_SIZE(set);
837
+ VALUE key = pairs[index];
838
+ rb_hash_aset(set, key, Qtrue);
839
+ if (RHASH_SIZE(set) == before) {
840
+ if (RB_SYMBOL_P(key)) {
841
+ return rb_sym2str(key);
842
+ }
843
+ return key;
844
+ }
845
+ }
846
+ return Qfalse;
847
+ }
848
+
849
+ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
850
+ {
851
+ VALUE message = rb_sprintf(
852
+ "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
853
+ rb_inspect(duplicate_key)
854
+ );
855
+
856
+ emit_parse_warning(RSTRING_PTR(message), state);
857
+ RB_GC_GUARD(message);
858
+ }
859
+
860
+ #ifdef RBIMPL_ATTR_NORETURN
861
+ RBIMPL_ATTR_NORETURN()
862
+ #endif
863
+ static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
811
864
  {
812
- VALUE object = rb_hash_new_capa(count);
813
- rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
865
+ VALUE message = rb_sprintf(
866
+ "duplicate key %"PRIsVALUE,
867
+ rb_inspect(duplicate_key)
868
+ );
869
+
870
+ raise_parse_error(RSTRING_PTR(message), state);
871
+ RB_GC_GUARD(message);
872
+ }
873
+
874
+ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
875
+ {
876
+ size_t entries_count = count / 2;
877
+ VALUE object = rb_hash_new_capa(entries_count);
878
+ const VALUE *pairs = rvalue_stack_peek(state->stack, count);
879
+ rb_hash_bulk_insert(count, pairs, object);
880
+
881
+ if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
882
+ switch (config->on_duplicate_key) {
883
+ case JSON_IGNORE:
884
+ break;
885
+ case JSON_DEPRECATED:
886
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
887
+ break;
888
+ case JSON_RAISE:
889
+ raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
890
+ break;
891
+ }
892
+ }
814
893
 
815
894
  rvalue_stack_pop(state->stack, count);
816
895
 
@@ -844,7 +923,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
844
923
  return value;
845
924
  }
846
925
 
847
- static const bool string_scan[256] = {
926
+ static const bool string_scan_table[256] = {
848
927
  // ASCII Control Characters
849
928
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
850
929
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -857,32 +936,65 @@ static const bool string_scan[256] = {
857
936
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
858
937
  };
859
938
 
939
+ #ifdef HAVE_SIMD
940
+ static SIMD_Implementation simd_impl = SIMD_NONE;
941
+ #endif /* HAVE_SIMD */
942
+
943
+ static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
944
+ {
945
+ #ifdef HAVE_SIMD
946
+ #if defined(HAVE_SIMD_NEON)
947
+
948
+ uint64_t mask = 0;
949
+ if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
950
+ state->cursor += trailing_zeros64(mask) >> 2;
951
+ return 1;
952
+ }
953
+
954
+ #elif defined(HAVE_SIMD_SSE2)
955
+ if (simd_impl == SIMD_SSE2) {
956
+ int mask = 0;
957
+ if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
958
+ state->cursor += trailing_zeros(mask);
959
+ return 1;
960
+ }
961
+ }
962
+ #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
963
+ #endif /* HAVE_SIMD */
964
+
965
+ while (!eos(state)) {
966
+ if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
967
+ return 1;
968
+ }
969
+ state->cursor++;
970
+ }
971
+ return 0;
972
+ }
973
+
860
974
  static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
861
975
  {
862
976
  state->cursor++;
863
977
  const char *start = state->cursor;
864
978
  bool escaped = false;
865
979
 
866
- while (state->cursor < state->end) {
867
- if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) {
868
- switch (*state->cursor) {
869
- case '"': {
870
- VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
871
- state->cursor++;
872
- return json_push_value(state, config, string);
873
- }
874
- case '\\': {
875
- state->cursor++;
876
- escaped = true;
877
- if ((unsigned char)*state->cursor < 0x20) {
878
- raise_parse_error("invalid ASCII control character in string: %s", state);
879
- }
880
- break;
881
- }
882
- default:
980
+ while (RB_UNLIKELY(string_scan(state))) {
981
+ switch (*state->cursor) {
982
+ case '"': {
983
+ VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
984
+ state->cursor++;
985
+ return json_push_value(state, config, string);
986
+ }
987
+ case '\\': {
988
+ state->cursor++;
989
+ escaped = true;
990
+ if ((unsigned char)*state->cursor < 0x20) {
883
991
  raise_parse_error("invalid ASCII control character in string: %s", state);
884
- break;
992
+ }
993
+ break;
885
994
  }
995
+ default:
996
+ raise_parse_error("invalid ASCII control character in string: %s", state);
997
+ break;
886
998
  }
887
999
 
888
1000
  state->cursor++;
@@ -892,16 +1004,160 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
892
1004
  return Qfalse;
893
1005
  }
894
1006
 
1007
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
1008
+ // From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
1009
+ // Additional References:
1010
+ // https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
1011
+ // http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
1012
+ static inline uint64_t decode_8digits_unrolled(uint64_t val) {
1013
+ const uint64_t mask = 0x000000FF000000FF;
1014
+ const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
1015
+ const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
1016
+ val -= 0x3030303030303030;
1017
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
1018
+ val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
1019
+ return val;
1020
+ }
1021
+
1022
+ static inline uint64_t decode_4digits_unrolled(uint32_t val) {
1023
+ const uint32_t mask = 0x000000FF;
1024
+ const uint32_t mul1 = 100;
1025
+ val -= 0x30303030;
1026
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
1027
+ val = ((val & mask) * mul1) + (((val >> 16) & mask));
1028
+ return val;
1029
+ }
1030
+ #endif
1031
+
1032
+ static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
1033
+ {
1034
+ const char *start = state->cursor;
1035
+
1036
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
1037
+ while (rest(state) >= sizeof(uint64_t)) {
1038
+ uint64_t next_8bytes;
1039
+ memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
1040
+
1041
+ // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
1042
+ // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
1043
+ uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
1044
+
1045
+ if (match == 0x3333333333333333) { // 8 consecutive digits
1046
+ *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
1047
+ state->cursor += 8;
1048
+ continue;
1049
+ }
1050
+
1051
+ uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
1052
+
1053
+ if (consecutive_digits >= 4) {
1054
+ *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
1055
+ state->cursor += 4;
1056
+ consecutive_digits -= 4;
1057
+ }
1058
+
1059
+ while (consecutive_digits) {
1060
+ *accumulator = *accumulator * 10 + (*state->cursor - '0');
1061
+ consecutive_digits--;
1062
+ state->cursor++;
1063
+ }
1064
+
1065
+ return (int)(state->cursor - start);
1066
+ }
1067
+ #endif
1068
+
1069
+ char next_char;
1070
+ while (rb_isdigit(next_char = peek(state))) {
1071
+ *accumulator = *accumulator * 10 + (next_char - '0');
1072
+ state->cursor++;
1073
+ }
1074
+ return (int)(state->cursor - start);
1075
+ }
1076
+
1077
+ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
1078
+ {
1079
+ bool integer = true;
1080
+ const char first_digit = *state->cursor;
1081
+
1082
+ // Variables for Ryu optimization - extract digits during parsing
1083
+ int32_t exponent = 0;
1084
+ int decimal_point_pos = -1;
1085
+ uint64_t mantissa = 0;
1086
+
1087
+ // Parse integer part and extract mantissa digits
1088
+ int mantissa_digits = json_parse_digits(state, &mantissa);
1089
+
1090
+ if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
1091
+ raise_parse_error_at("invalid number: %s", state, start);
1092
+ }
1093
+
1094
+ // Parse fractional part
1095
+ if (peek(state) == '.') {
1096
+ integer = false;
1097
+ decimal_point_pos = mantissa_digits; // Remember position of decimal point
1098
+ state->cursor++;
1099
+
1100
+ int fractional_digits = json_parse_digits(state, &mantissa);
1101
+ mantissa_digits += fractional_digits;
1102
+
1103
+ if (RB_UNLIKELY(!fractional_digits)) {
1104
+ raise_parse_error_at("invalid number: %s", state, start);
1105
+ }
1106
+ }
1107
+
1108
+ // Parse exponent
1109
+ if (rb_tolower(peek(state)) == 'e') {
1110
+ integer = false;
1111
+ state->cursor++;
1112
+
1113
+ bool negative_exponent = false;
1114
+ const char next_char = peek(state);
1115
+ if (next_char == '-' || next_char == '+') {
1116
+ negative_exponent = next_char == '-';
1117
+ state->cursor++;
1118
+ }
1119
+
1120
+ uint64_t abs_exponent = 0;
1121
+ int exponent_digits = json_parse_digits(state, &abs_exponent);
1122
+
1123
+ if (RB_UNLIKELY(!exponent_digits)) {
1124
+ raise_parse_error_at("invalid number: %s", state, start);
1125
+ }
1126
+
1127
+ exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
1128
+ }
1129
+
1130
+ if (integer) {
1131
+ return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
1132
+ }
1133
+
1134
+ // Adjust exponent based on decimal point position
1135
+ if (decimal_point_pos >= 0) {
1136
+ exponent -= (mantissa_digits - decimal_point_pos);
1137
+ }
1138
+
1139
+ return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
1140
+ }
1141
+
1142
+ static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
1143
+ {
1144
+ return json_parse_number(state, config, false, state->cursor);
1145
+ }
1146
+
1147
+ static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
1148
+ {
1149
+ const char *start = state->cursor;
1150
+ state->cursor++;
1151
+ return json_parse_number(state, config, true, start);
1152
+ }
1153
+
895
1154
  static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
896
1155
  {
897
1156
  json_eat_whitespace(state);
898
- if (state->cursor >= state->end) {
899
- raise_parse_error("unexpected end of input", state);
900
- }
901
1157
 
902
- switch (*state->cursor) {
1158
+ switch (peek(state)) {
903
1159
  case 'n':
904
- if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
1160
+ if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
905
1161
  state->cursor += 4;
906
1162
  return json_push_value(state, config, Qnil);
907
1163
  }
@@ -909,7 +1165,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
909
1165
  raise_parse_error("unexpected token %s", state);
910
1166
  break;
911
1167
  case 't':
912
- if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
1168
+ if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
913
1169
  state->cursor += 4;
914
1170
  return json_push_value(state, config, Qtrue);
915
1171
  }
@@ -918,7 +1174,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
918
1174
  break;
919
1175
  case 'f':
920
1176
  // Note: memcmp with a small power of two compile to an integer comparison
921
- if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
1177
+ if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
922
1178
  state->cursor += 5;
923
1179
  return json_push_value(state, config, Qfalse);
924
1180
  }
@@ -927,7 +1183,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
927
1183
  break;
928
1184
  case 'N':
929
1185
  // Note: memcmp with a small power of two compile to an integer comparison
930
- if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
1186
+ if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
931
1187
  state->cursor += 3;
932
1188
  return json_push_value(state, config, CNaN);
933
1189
  }
@@ -935,16 +1191,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
935
1191
  raise_parse_error("unexpected token %s", state);
936
1192
  break;
937
1193
  case 'I':
938
- if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
1194
+ if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
939
1195
  state->cursor += 8;
940
1196
  return json_push_value(state, config, CInfinity);
941
1197
  }
942
1198
 
943
1199
  raise_parse_error("unexpected token %s", state);
944
1200
  break;
945
- case '-':
1201
+ case '-': {
946
1202
  // Note: memcmp with a small power of two compile to an integer comparison
947
- if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
1203
+ if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
948
1204
  if (config->allow_nan) {
949
1205
  state->cursor += 9;
950
1206
  return json_push_value(state, config, CMinusInfinity);
@@ -952,62 +1208,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
952
1208
  raise_parse_error("unexpected token %s", state);
953
1209
  }
954
1210
  }
955
- // Fallthrough
956
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
957
- bool integer = true;
958
-
959
- // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
960
- const char *start = state->cursor;
961
- state->cursor++;
962
-
963
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
964
- state->cursor++;
965
- }
966
-
967
- long integer_length = state->cursor - start;
968
-
969
- if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
970
- raise_parse_error_at("invalid number: %s", state, start);
971
- } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
972
- raise_parse_error_at("invalid number: %s", state, start);
973
- } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
974
- raise_parse_error_at("invalid number: %s", state, start);
975
- }
976
-
977
- if ((state->cursor < state->end) && (*state->cursor == '.')) {
978
- integer = false;
979
- state->cursor++;
980
-
981
- if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
982
- raise_parse_error("invalid number: %s", state);
983
- }
984
-
985
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
986
- state->cursor++;
987
- }
988
- }
989
-
990
- if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
991
- integer = false;
992
- state->cursor++;
993
- if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
994
- state->cursor++;
995
- }
996
-
997
- if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
998
- raise_parse_error("invalid number: %s", state);
999
- }
1000
-
1001
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1002
- state->cursor++;
1003
- }
1004
- }
1005
-
1006
- if (integer) {
1007
- return json_push_value(state, config, json_decode_integer(start, state->cursor));
1008
- }
1009
- return json_push_value(state, config, json_decode_float(config, start, state->cursor));
1211
+ return json_push_value(state, config, json_parse_negative_number(state, config));
1212
+ break;
1010
1213
  }
1214
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1215
+ return json_push_value(state, config, json_parse_positive_number(state, config));
1216
+ break;
1011
1217
  case '"': {
1012
1218
  // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
1013
1219
  return json_parse_string(state, config, false);
@@ -1018,7 +1224,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1018
1224
  json_eat_whitespace(state);
1019
1225
  long stack_head = state->stack->head;
1020
1226
 
1021
- if ((state->cursor < state->end) && (*state->cursor == ']')) {
1227
+ if (peek(state) == ']') {
1022
1228
  state->cursor++;
1023
1229
  return json_push_value(state, config, json_decode_array(state, config, 0));
1024
1230
  } else {
@@ -1033,26 +1239,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1033
1239
  while (true) {
1034
1240
  json_eat_whitespace(state);
1035
1241
 
1036
- if (state->cursor < state->end) {
1037
- if (*state->cursor == ']') {
1038
- state->cursor++;
1039
- long count = state->stack->head - stack_head;
1040
- state->current_nesting--;
1041
- state->in_array--;
1042
- return json_push_value(state, config, json_decode_array(state, config, count));
1043
- }
1242
+ const char next_char = peek(state);
1044
1243
 
1045
- if (*state->cursor == ',') {
1046
- state->cursor++;
1047
- if (config->allow_trailing_comma) {
1048
- json_eat_whitespace(state);
1049
- if ((state->cursor < state->end) && (*state->cursor == ']')) {
1050
- continue;
1051
- }
1244
+ if (RB_LIKELY(next_char == ',')) {
1245
+ state->cursor++;
1246
+ if (config->allow_trailing_comma) {
1247
+ json_eat_whitespace(state);
1248
+ if (peek(state) == ']') {
1249
+ continue;
1052
1250
  }
1053
- json_parse_any(state, config);
1054
- continue;
1055
1251
  }
1252
+ json_parse_any(state, config);
1253
+ continue;
1254
+ }
1255
+
1256
+ if (next_char == ']') {
1257
+ state->cursor++;
1258
+ long count = state->stack->head - stack_head;
1259
+ state->current_nesting--;
1260
+ state->in_array--;
1261
+ return json_push_value(state, config, json_decode_array(state, config, count));
1056
1262
  }
1057
1263
 
1058
1264
  raise_parse_error("expected ',' or ']' after array value", state);
@@ -1060,11 +1266,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1060
1266
  break;
1061
1267
  }
1062
1268
  case '{': {
1269
+ const char *object_start_cursor = state->cursor;
1270
+
1063
1271
  state->cursor++;
1064
1272
  json_eat_whitespace(state);
1065
1273
  long stack_head = state->stack->head;
1066
1274
 
1067
- if ((state->cursor < state->end) && (*state->cursor == '}')) {
1275
+ if (peek(state) == '}') {
1068
1276
  state->cursor++;
1069
1277
  return json_push_value(state, config, json_decode_object(state, config, 0));
1070
1278
  } else {
@@ -1073,13 +1281,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1073
1281
  rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1074
1282
  }
1075
1283
 
1076
- if (*state->cursor != '"') {
1284
+ if (peek(state) != '"') {
1077
1285
  raise_parse_error("expected object key, got %s", state);
1078
1286
  }
1079
1287
  json_parse_string(state, config, true);
1080
1288
 
1081
1289
  json_eat_whitespace(state);
1082
- if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1290
+ if (peek(state) != ':') {
1083
1291
  raise_parse_error("expected ':' after object key", state);
1084
1292
  }
1085
1293
  state->cursor++;
@@ -1090,39 +1298,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1090
1298
  while (true) {
1091
1299
  json_eat_whitespace(state);
1092
1300
 
1093
- if (state->cursor < state->end) {
1094
- if (*state->cursor == '}') {
1095
- state->cursor++;
1096
- state->current_nesting--;
1097
- long count = state->stack->head - stack_head;
1098
- return json_push_value(state, config, json_decode_object(state, config, count));
1099
- }
1301
+ const char next_char = peek(state);
1302
+ if (next_char == '}') {
1303
+ state->cursor++;
1304
+ state->current_nesting--;
1305
+ size_t count = state->stack->head - stack_head;
1100
1306
 
1101
- if (*state->cursor == ',') {
1102
- state->cursor++;
1103
- json_eat_whitespace(state);
1307
+ // Temporary rewind cursor in case an error is raised
1308
+ const char *final_cursor = state->cursor;
1309
+ state->cursor = object_start_cursor;
1310
+ VALUE object = json_decode_object(state, config, count);
1311
+ state->cursor = final_cursor;
1104
1312
 
1105
- if (config->allow_trailing_comma) {
1106
- if ((state->cursor < state->end) && (*state->cursor == '}')) {
1107
- continue;
1108
- }
1109
- }
1313
+ return json_push_value(state, config, object);
1314
+ }
1110
1315
 
1111
- if (*state->cursor != '"') {
1112
- raise_parse_error("expected object key, got: %s", state);
1113
- }
1114
- json_parse_string(state, config, true);
1316
+ if (next_char == ',') {
1317
+ state->cursor++;
1318
+ json_eat_whitespace(state);
1115
1319
 
1116
- json_eat_whitespace(state);
1117
- if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1118
- raise_parse_error("expected ':' after object key, got: %s", state);
1320
+ if (config->allow_trailing_comma) {
1321
+ if (peek(state) == '}') {
1322
+ continue;
1119
1323
  }
1120
- state->cursor++;
1324
+ }
1121
1325
 
1122
- json_parse_any(state, config);
1326
+ if (RB_UNLIKELY(peek(state) != '"')) {
1327
+ raise_parse_error("expected object key, got: %s", state);
1328
+ }
1329
+ json_parse_string(state, config, true);
1123
1330
 
1124
- continue;
1331
+ json_eat_whitespace(state);
1332
+ if (RB_UNLIKELY(peek(state) != ':')) {
1333
+ raise_parse_error("expected ':' after object key, got: %s", state);
1125
1334
  }
1335
+ state->cursor++;
1336
+
1337
+ json_parse_any(state, config);
1338
+
1339
+ continue;
1126
1340
  }
1127
1341
 
1128
1342
  raise_parse_error("expected ',' or '}' after object value, got: %s", state);
@@ -1130,18 +1344,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1130
1344
  break;
1131
1345
  }
1132
1346
 
1347
+ case 0:
1348
+ raise_parse_error("unexpected end of input", state);
1349
+ break;
1350
+
1133
1351
  default:
1134
1352
  raise_parse_error("unexpected character: %s", state);
1135
1353
  break;
1136
1354
  }
1137
1355
 
1138
- raise_parse_error("unreacheable: %s", state);
1356
+ raise_parse_error("unreachable: %s", state);
1357
+ return Qundef;
1139
1358
  }
1140
1359
 
1141
1360
  static void json_ensure_eof(JSON_ParserState *state)
1142
1361
  {
1143
1362
  json_eat_whitespace(state);
1144
- if (state->cursor != state->end) {
1363
+ if (!eos(state)) {
1145
1364
  raise_parse_error("unexpected token at end of stream %s", state);
1146
1365
  }
1147
1366
  }
@@ -1184,6 +1403,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1184
1403
  else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1185
1404
  else if (key == sym_freeze) { config->freeze = RTEST(val); }
1186
1405
  else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1406
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1187
1407
  else if (key == sym_decimal_class) {
1188
1408
  if (RTEST(val)) {
1189
1409
  if (rb_respond_to(val, i_try_convert)) {
@@ -1400,6 +1620,7 @@ void Init_parser(void)
1400
1620
  sym_freeze = ID2SYM(rb_intern("freeze"));
1401
1621
  sym_on_load = ID2SYM(rb_intern("on_load"));
1402
1622
  sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1623
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
1403
1624
 
1404
1625
  i_chr = rb_intern("chr");
1405
1626
  i_aset = rb_intern("[]=");
@@ -1413,4 +1634,8 @@ void Init_parser(void)
1413
1634
  binary_encindex = rb_ascii8bit_encindex();
1414
1635
  utf8_encindex = rb_utf8_encindex();
1415
1636
  enc_utf8 = rb_utf8_encoding();
1637
+
1638
+ #ifdef HAVE_SIMD
1639
+ simd_impl = find_simd_implementation();
1640
+ #endif
1416
1641
  }