json 2.15.2 → 2.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,40 +1,11 @@
1
- #include "ruby.h"
2
- #include "ruby/encoding.h"
3
-
4
- /* shims */
5
- /* This is the fallback definition from Ruby 3.4 */
6
-
7
- #ifndef RBIMPL_STDBOOL_H
8
- #if defined(__cplusplus)
9
- # if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
10
- # include <cstdbool>
11
- # endif
12
- #elif defined(HAVE_STDBOOL_H)
13
- # include <stdbool.h>
14
- #elif !defined(HAVE__BOOL)
15
- typedef unsigned char _Bool;
16
- # define bool _Bool
17
- # define true ((_Bool)+1)
18
- # define false ((_Bool)+0)
19
- # define __bool_true_false_are_defined
20
- #endif
21
- #endif
22
-
1
+ #include "../json.h"
2
+ #include "../vendor/ryu.h"
23
3
  #include "../simd/simd.h"
24
4
 
25
- #ifndef RB_UNLIKELY
26
- #define RB_UNLIKELY(expr) expr
27
- #endif
28
-
29
- #ifndef RB_LIKELY
30
- #define RB_LIKELY(expr) expr
31
- #endif
32
-
33
5
  static VALUE mJSON, eNestingError, Encoding_UTF_8;
34
6
  static VALUE CNaN, CInfinity, CMinusInfinity;
35
7
 
36
- static ID i_chr, i_aset, i_aref,
37
- i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
8
+ static ID i_new, i_try_convert, i_uminus, i_encode;
38
9
 
39
10
  static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
40
11
  sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
@@ -44,7 +15,7 @@ static int utf8_encindex;
44
15
 
45
16
  #ifndef HAVE_RB_HASH_BULK_INSERT
46
17
  // For TruffleRuby
47
- void
18
+ static void
48
19
  rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
49
20
  {
50
21
  long index = 0;
@@ -61,6 +32,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
61
32
  #define rb_hash_new_capa(n) rb_hash_new()
62
33
  #endif
63
34
 
35
+ #ifndef HAVE_RB_STR_TO_INTERNED_STR
36
+ static VALUE rb_str_to_interned_str(VALUE str)
37
+ {
38
+ return rb_funcall(rb_str_freeze(str), i_uminus, 0);
39
+ }
40
+ #endif
64
41
 
65
42
  /* name cache */
66
43
 
@@ -106,116 +83,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
106
83
  cache->entries[index] = rstring;
107
84
  }
108
85
 
109
- static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
86
+ #define rstring_cache_memcmp memcmp
87
+
88
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
89
+ #if __has_builtin(__builtin_bswap64)
90
+ #undef rstring_cache_memcmp
91
+ ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
110
92
  {
111
- long rstring_length = RSTRING_LEN(rstring);
112
- if (length == rstring_length) {
113
- return memcmp(str, RSTRING_PTR(rstring), length);
114
- } else {
115
- return (int)(length - rstring_length);
93
+ // The libc memcmp has numerous complex optimizations, but in this particular case,
94
+ // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
95
+ // inline a simpler memcmp outperforms calling the libc version.
96
+ long i = 0;
97
+
98
+ for (; i + 8 <= length; i += 8) {
99
+ uint64_t a, b;
100
+ memcpy(&a, str + i, 8);
101
+ memcpy(&b, rptr + i, 8);
102
+ if (a != b) {
103
+ a = __builtin_bswap64(a);
104
+ b = __builtin_bswap64(b);
105
+ return (a < b) ? -1 : 1;
106
+ }
107
+ }
108
+
109
+ for (; i < length; i++) {
110
+ if (str[i] != rptr[i]) {
111
+ return (str[i] < rptr[i]) ? -1 : 1;
112
+ }
116
113
  }
114
+
115
+ return 0;
117
116
  }
117
+ #endif
118
+ #endif
118
119
 
119
- static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
120
+ ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
120
121
  {
121
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
122
- // Common names aren't likely to be very long. So we just don't
123
- // cache names above an arbitrary threshold.
124
- return Qfalse;
125
- }
122
+ const char *rstring_ptr;
123
+ long rstring_length;
124
+
125
+ RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
126
126
 
127
- if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
128
- // Simple heuristic, if the first character isn't a letter,
129
- // we're much less likely to see this string again.
130
- // We mostly want to cache strings that are likely to be repeated.
131
- return Qfalse;
127
+ if (length == rstring_length) {
128
+ return rstring_cache_memcmp(str, rstring_ptr, length);
129
+ } else {
130
+ return (int)(length - rstring_length);
132
131
  }
132
+ }
133
133
 
134
+ ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
135
+ {
134
136
  int low = 0;
135
137
  int high = cache->length - 1;
136
- int mid = 0;
137
- int last_cmp = 0;
138
138
 
139
139
  while (low <= high) {
140
- mid = (high + low) >> 1;
140
+ int mid = (high + low) >> 1;
141
141
  VALUE entry = cache->entries[mid];
142
- last_cmp = rstring_cache_cmp(str, length, entry);
142
+ int cmp = rstring_cache_cmp(str, length, entry);
143
143
 
144
- if (last_cmp == 0) {
144
+ if (cmp == 0) {
145
145
  return entry;
146
- } else if (last_cmp > 0) {
146
+ } else if (cmp > 0) {
147
147
  low = mid + 1;
148
148
  } else {
149
149
  high = mid - 1;
150
150
  }
151
151
  }
152
152
 
153
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
154
- // We assume the overwhelming majority of names don't need to be escaped.
155
- // But if they do, we have to fallback to the slow path.
156
- return Qfalse;
157
- }
158
-
159
153
  VALUE rstring = build_interned_string(str, length);
160
154
 
161
155
  if (cache->length < JSON_RVALUE_CACHE_CAPA) {
162
- if (last_cmp > 0) {
163
- mid += 1;
164
- }
165
-
166
- rvalue_cache_insert_at(cache, mid, rstring);
156
+ rvalue_cache_insert_at(cache, low, rstring);
167
157
  }
168
158
  return rstring;
169
159
  }
170
160
 
171
161
  static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
172
162
  {
173
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
174
- // Common names aren't likely to be very long. So we just don't
175
- // cache names above an arbitrary threshold.
176
- return Qfalse;
177
- }
178
-
179
- if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
180
- // Simple heuristic, if the first character isn't a letter,
181
- // we're much less likely to see this string again.
182
- // We mostly want to cache strings that are likely to be repeated.
183
- return Qfalse;
184
- }
185
-
186
163
  int low = 0;
187
164
  int high = cache->length - 1;
188
- int mid = 0;
189
- int last_cmp = 0;
190
165
 
191
166
  while (low <= high) {
192
- mid = (high + low) >> 1;
167
+ int mid = (high + low) >> 1;
193
168
  VALUE entry = cache->entries[mid];
194
- last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
169
+ int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
195
170
 
196
- if (last_cmp == 0) {
171
+ if (cmp == 0) {
197
172
  return entry;
198
- } else if (last_cmp > 0) {
173
+ } else if (cmp > 0) {
199
174
  low = mid + 1;
200
175
  } else {
201
176
  high = mid - 1;
202
177
  }
203
178
  }
204
179
 
205
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
206
- // We assume the overwhelming majority of names don't need to be escaped.
207
- // But if they do, we have to fallback to the slow path.
208
- return Qfalse;
209
- }
210
-
211
180
  VALUE rsymbol = build_symbol(str, length);
212
181
 
213
182
  if (cache->length < JSON_RVALUE_CACHE_CAPA) {
214
- if (last_cmp > 0) {
215
- mid += 1;
216
- }
217
-
218
- rvalue_cache_insert_at(cache, mid, rsymbol);
183
+ rvalue_cache_insert_at(cache, low, rsymbol);
219
184
  }
220
185
  return rsymbol;
221
186
  }
@@ -330,15 +295,6 @@ static void rvalue_stack_eagerly_release(VALUE handle)
330
295
  }
331
296
  }
332
297
 
333
-
334
- #ifndef HAVE_STRNLEN
335
- static size_t strnlen(const char *s, size_t maxlen)
336
- {
337
- char *p;
338
- return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
339
- }
340
- #endif
341
-
342
298
  static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
343
299
  {
344
300
  int len = 1;
@@ -379,7 +335,6 @@ typedef struct JSON_ParserStruct {
379
335
  int max_nesting;
380
336
  bool allow_nan;
381
337
  bool allow_trailing_comma;
382
- bool parsing_name;
383
338
  bool symbolize_names;
384
339
  bool freeze;
385
340
  } JSON_ParserConfig;
@@ -395,6 +350,22 @@ typedef struct JSON_ParserStateStruct {
395
350
  int current_nesting;
396
351
  } JSON_ParserState;
397
352
 
353
+ static inline size_t rest(JSON_ParserState *state) {
354
+ return state->end - state->cursor;
355
+ }
356
+
357
+ static inline bool eos(JSON_ParserState *state) {
358
+ return state->cursor >= state->end;
359
+ }
360
+
361
+ static inline char peek(JSON_ParserState *state)
362
+ {
363
+ if (RB_UNLIKELY(eos(state))) {
364
+ return 0;
365
+ }
366
+ return *state->cursor;
367
+ }
368
+
398
369
  static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
399
370
  {
400
371
  const char *cursor = state->cursor;
@@ -530,61 +501,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
530
501
 
531
502
  static const rb_data_type_t JSON_ParserConfig_type;
532
503
 
533
- static const bool whitespace[256] = {
534
- [' '] = 1,
535
- ['\t'] = 1,
536
- ['\n'] = 1,
537
- ['\r'] = 1,
538
- ['/'] = 1,
539
- };
540
-
541
504
  static void
542
505
  json_eat_comments(JSON_ParserState *state)
543
506
  {
544
- if (state->cursor + 1 < state->end) {
545
- switch (state->cursor[1]) {
546
- case '/': {
547
- state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
548
- if (!state->cursor) {
549
- state->cursor = state->end;
550
- } else {
551
- state->cursor++;
552
- }
553
- break;
507
+ const char *start = state->cursor;
508
+ state->cursor++;
509
+
510
+ switch (peek(state)) {
511
+ case '/': {
512
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
513
+ if (!state->cursor) {
514
+ state->cursor = state->end;
515
+ } else {
516
+ state->cursor++;
554
517
  }
555
- case '*': {
556
- state->cursor += 2;
557
- while (true) {
558
- state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
559
- if (!state->cursor) {
560
- raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
561
- } else {
562
- state->cursor++;
563
- if (state->cursor < state->end && *state->cursor == '/') {
564
- state->cursor++;
565
- break;
566
- }
567
- }
518
+ break;
519
+ }
520
+ case '*': {
521
+ state->cursor++;
522
+
523
+ while (true) {
524
+ const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
525
+ if (!next_match) {
526
+ raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
527
+ }
528
+
529
+ state->cursor = next_match + 1;
530
+ if (peek(state) == '/') {
531
+ state->cursor++;
532
+ break;
568
533
  }
569
- break;
570
534
  }
571
- default:
572
- raise_parse_error("unexpected token %s", state);
573
- break;
535
+ break;
574
536
  }
575
- } else {
576
- raise_parse_error("unexpected token %s", state);
537
+ default:
538
+ raise_parse_error_at("unexpected token %s", state, start);
539
+ break;
577
540
  }
578
541
  }
579
542
 
580
- static inline void
543
+ ALWAYS_INLINE(static) void
581
544
  json_eat_whitespace(JSON_ParserState *state)
582
545
  {
583
- while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
584
- if (RB_LIKELY(*state->cursor != '/')) {
585
- state->cursor++;
586
- } else {
587
- json_eat_comments(state);
546
+ while (true) {
547
+ switch (peek(state)) {
548
+ case ' ':
549
+ state->cursor++;
550
+ break;
551
+ case '\n':
552
+ state->cursor++;
553
+
554
+ // Heuristic: if we see a newline, there is likely consecutive spaces after it.
555
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
556
+ while (rest(state) > 8) {
557
+ uint64_t chunk;
558
+ memcpy(&chunk, state->cursor, sizeof(uint64_t));
559
+ if (chunk == 0x2020202020202020) {
560
+ state->cursor += 8;
561
+ continue;
562
+ }
563
+
564
+ uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
565
+ state->cursor += consecutive_spaces;
566
+ break;
567
+ }
568
+ #endif
569
+ break;
570
+ case '\t':
571
+ case '\r':
572
+ state->cursor++;
573
+ break;
574
+ case '/':
575
+ json_eat_comments(state);
576
+ break;
577
+
578
+ default:
579
+ return;
588
580
  }
589
581
  }
590
582
  }
@@ -615,11 +607,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
615
607
  return result;
616
608
  }
617
609
 
618
- static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
610
+ static inline bool json_string_cacheable_p(const char *string, size_t length)
619
611
  {
612
+ // We mostly want to cache strings that are likely to be repeated.
613
+ // Simple heuristics:
614
+ // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
615
+ // - If the first character isn't a letter, we're much less likely to see this string again.
616
+ return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
617
+ }
618
+
619
+ static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
620
+ {
621
+ bool intern = is_name || config->freeze;
622
+ bool symbolize = is_name && config->symbolize_names;
620
623
  size_t bufferSize = stringEnd - string;
621
624
 
622
- if (is_name && state->in_array) {
625
+ if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
623
626
  VALUE cached_key;
624
627
  if (RB_UNLIKELY(symbolize)) {
625
628
  cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
@@ -635,60 +638,71 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
635
638
  return build_string(string, stringEnd, intern, symbolize);
636
639
  }
637
640
 
638
- static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
639
- {
640
- size_t bufferSize = stringEnd - string;
641
- const char *p = string, *pe = string, *unescape, *bufferStart;
642
- char *buffer;
643
- int unescape_len;
644
- char buf[4];
641
+ #define JSON_MAX_UNESCAPE_POSITIONS 16
642
+ typedef struct _json_unescape_positions {
643
+ long size;
644
+ const char **positions;
645
+ bool has_more;
646
+ } JSON_UnescapePositions;
645
647
 
646
- if (is_name && state->in_array) {
647
- VALUE cached_key;
648
- if (RB_UNLIKELY(symbolize)) {
649
- cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
650
- } else {
651
- cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
652
- }
648
+ static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
649
+ {
650
+ while (positions->size) {
651
+ positions->size--;
652
+ const char *next_position = positions->positions[0];
653
+ positions->positions++;
654
+ return next_position;
655
+ }
653
656
 
654
- if (RB_LIKELY(cached_key)) {
655
- return cached_key;
656
- }
657
+ if (positions->has_more) {
658
+ return memchr(pe, '\\', stringEnd - pe);
657
659
  }
658
660
 
661
+ return NULL;
662
+ }
663
+
664
+ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
665
+ {
666
+ bool intern = is_name || config->freeze;
667
+ bool symbolize = is_name && config->symbolize_names;
668
+ size_t bufferSize = stringEnd - string;
669
+ const char *p = string, *pe = string, *bufferStart;
670
+ char *buffer;
671
+
659
672
  VALUE result = rb_str_buf_new(bufferSize);
660
673
  rb_enc_associate_index(result, utf8_encindex);
661
674
  buffer = RSTRING_PTR(result);
662
675
  bufferStart = buffer;
663
676
 
664
- while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
665
- unescape = (char *) "?";
666
- unescape_len = 1;
677
+ #define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
678
+
679
+ while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
667
680
  if (pe > p) {
668
681
  MEMCPY(buffer, p, char, pe - p);
669
682
  buffer += pe - p;
670
683
  }
671
684
  switch (*++pe) {
685
+ case '"':
686
+ case '/':
687
+ p = pe; // nothing to unescape just need to skip the backslash
688
+ break;
689
+ case '\\':
690
+ APPEND_CHAR('\\');
691
+ break;
672
692
  case 'n':
673
- unescape = (char *) "\n";
693
+ APPEND_CHAR('\n');
674
694
  break;
675
695
  case 'r':
676
- unescape = (char *) "\r";
696
+ APPEND_CHAR('\r');
677
697
  break;
678
698
  case 't':
679
- unescape = (char *) "\t";
680
- break;
681
- case '"':
682
- unescape = (char *) "\"";
683
- break;
684
- case '\\':
685
- unescape = (char *) "\\";
699
+ APPEND_CHAR('\t');
686
700
  break;
687
701
  case 'b':
688
- unescape = (char *) "\b";
702
+ APPEND_CHAR('\b');
689
703
  break;
690
704
  case 'f':
691
- unescape = (char *) "\f";
705
+ APPEND_CHAR('\f');
692
706
  break;
693
707
  case 'u':
694
708
  if (pe > stringEnd - 5) {
@@ -726,18 +740,23 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
726
740
  break;
727
741
  }
728
742
  }
729
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
730
- unescape = buf;
743
+
744
+ char buf[4];
745
+ int unescape_len = convert_UTF32_to_UTF8(buf, ch);
746
+ MEMCPY(buffer, buf, char, unescape_len);
747
+ buffer += unescape_len;
748
+ p = ++pe;
731
749
  }
732
750
  break;
733
751
  default:
734
- p = pe;
735
- continue;
752
+ if ((unsigned char)*pe < 0x20) {
753
+ raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
754
+ }
755
+ raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
756
+ break;
736
757
  }
737
- MEMCPY(buffer, unescape, char, unescape_len);
738
- buffer += unescape_len;
739
- p = ++pe;
740
758
  }
759
+ #undef APPEND_CHAR
741
760
 
742
761
  if (stringEnd > p) {
743
762
  MEMCPY(buffer, p, char, stringEnd - p);
@@ -748,33 +767,13 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
748
767
  if (symbolize) {
749
768
  result = rb_str_intern(result);
750
769
  } else if (intern) {
751
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
770
+ result = rb_str_to_interned_str(result);
752
771
  }
753
772
 
754
773
  return result;
755
774
  }
756
775
 
757
776
  #define MAX_FAST_INTEGER_SIZE 18
758
- static inline VALUE fast_decode_integer(const char *p, const char *pe)
759
- {
760
- bool negative = false;
761
- if (*p == '-') {
762
- negative = true;
763
- p++;
764
- }
765
-
766
- long long memo = 0;
767
- while (p < pe) {
768
- memo *= 10;
769
- memo += *p - '0';
770
- p++;
771
- }
772
-
773
- if (negative) {
774
- memo = -memo;
775
- }
776
- return LL2NUM(memo);
777
- }
778
777
 
779
778
  static VALUE json_decode_large_integer(const char *start, long len)
780
779
  {
@@ -788,17 +787,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
788
787
  }
789
788
 
790
789
  static inline VALUE
791
- json_decode_integer(const char *start, const char *end)
790
+ json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
792
791
  {
793
- long len = end - start;
794
- if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
795
- return fast_decode_integer(start, end);
792
+ if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
793
+ if (negative) {
794
+ return INT64T2NUM(-((int64_t)mantissa));
796
795
  }
797
- return json_decode_large_integer(start, len);
796
+ return UINT64T2NUM(mantissa);
797
+ }
798
+
799
+ return json_decode_large_integer(start, end - start);
798
800
  }
799
801
 
800
802
  static VALUE json_decode_large_float(const char *start, long len)
801
803
  {
804
+ if (RB_LIKELY(len < 64)) {
805
+ char buffer[64];
806
+ MEMCPY(buffer, start, char, len);
807
+ buffer[len] = '\0';
808
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
809
+ }
810
+
802
811
  VALUE buffer_v;
803
812
  char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
804
813
  MEMCPY(buffer, start, char, len);
@@ -808,21 +817,24 @@ static VALUE json_decode_large_float(const char *start, long len)
808
817
  return number;
809
818
  }
810
819
 
811
- static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end)
820
+ /* Ruby JSON optimized float decoder using vendored Ryu algorithm
821
+ * Accepts pre-extracted mantissa and exponent from first-pass validation
822
+ */
823
+ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int32_t exponent, bool negative,
824
+ const char *start, const char *end)
812
825
  {
813
- long len = end - start;
814
-
815
826
  if (RB_UNLIKELY(config->decimal_class)) {
816
- VALUE text = rb_str_new(start, len);
827
+ VALUE text = rb_str_new(start, end - start);
817
828
  return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
818
- } else if (RB_LIKELY(len < 64)) {
819
- char buffer[64];
820
- MEMCPY(buffer, start, char, len);
821
- buffer[len] = '\0';
822
- return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
823
- } else {
824
- return json_decode_large_float(start, len);
825
829
  }
830
+
831
+ // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
832
+ // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
833
+ if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
834
+ return json_decode_large_float(start, end - start);
835
+ }
836
+
837
+ return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, exponent, negative));
826
838
  }
827
839
 
828
840
  static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
@@ -908,20 +920,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
908
920
  return object;
909
921
  }
910
922
 
911
- static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
912
- {
913
- VALUE string;
914
- bool intern = is_name || config->freeze;
915
- bool symbolize = is_name && config->symbolize_names;
916
- if (escaped) {
917
- string = json_string_unescape(state, start, end, is_name, intern, symbolize);
918
- } else {
919
- string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
920
- }
921
-
922
- return string;
923
- }
924
-
925
923
  static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
926
924
  {
927
925
  if (RB_UNLIKELY(config->on_load_proc)) {
@@ -944,17 +942,11 @@ static const bool string_scan_table[256] = {
944
942
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
945
943
  };
946
944
 
947
- #if (defined(__GNUC__ ) || defined(__clang__))
948
- #define FORCE_INLINE __attribute__((always_inline))
949
- #else
950
- #define FORCE_INLINE
951
- #endif
952
-
953
945
  #ifdef HAVE_SIMD
954
946
  static SIMD_Implementation simd_impl = SIMD_NONE;
955
947
  #endif /* HAVE_SIMD */
956
948
 
957
- static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
949
+ ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
958
950
  {
959
951
  #ifdef HAVE_SIMD
960
952
  #if defined(HAVE_SIMD_NEON)
@@ -962,7 +954,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
962
954
  uint64_t mask = 0;
963
955
  if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
964
956
  state->cursor += trailing_zeros64(mask) >> 2;
965
- return 1;
957
+ return true;
966
958
  }
967
959
 
968
960
  #elif defined(HAVE_SIMD_SSE2)
@@ -970,40 +962,45 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
970
962
  int mask = 0;
971
963
  if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
972
964
  state->cursor += trailing_zeros(mask);
973
- return 1;
965
+ return true;
974
966
  }
975
967
  }
976
968
  #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
977
969
  #endif /* HAVE_SIMD */
978
970
 
979
- while (state->cursor < state->end) {
971
+ while (!eos(state)) {
980
972
  if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
981
- return 1;
973
+ return true;
982
974
  }
983
975
  state->cursor++;
984
976
  }
985
- return 0;
977
+ return false;
986
978
  }
987
979
 
988
- static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
980
+ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
989
981
  {
990
- state->cursor++;
991
- const char *start = state->cursor;
992
- bool escaped = false;
982
+ const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
983
+ JSON_UnescapePositions positions = {
984
+ .size = 0,
985
+ .positions = backslashes,
986
+ .has_more = false,
987
+ };
993
988
 
994
- while (RB_UNLIKELY(string_scan(state))) {
989
+ do {
995
990
  switch (*state->cursor) {
996
991
  case '"': {
997
- VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
992
+ VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
998
993
  state->cursor++;
999
994
  return json_push_value(state, config, string);
1000
995
  }
1001
996
  case '\\': {
1002
- state->cursor++;
1003
- escaped = true;
1004
- if ((unsigned char)*state->cursor < 0x20) {
1005
- raise_parse_error("invalid ASCII control character in string: %s", state);
997
+ if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
998
+ backslashes[positions.size] = state->cursor;
999
+ positions.size++;
1000
+ } else {
1001
+ positions.has_more = true;
1006
1002
  }
1003
+ state->cursor++;
1007
1004
  break;
1008
1005
  }
1009
1006
  default:
@@ -1012,22 +1009,183 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
1012
1009
  }
1013
1010
 
1014
1011
  state->cursor++;
1015
- }
1012
+ } while (string_scan(state));
1016
1013
 
1017
1014
  raise_parse_error("unexpected end of input, expected closing \"", state);
1018
1015
  return Qfalse;
1019
1016
  }
1020
1017
 
1018
+ ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
1019
+ {
1020
+ state->cursor++;
1021
+ const char *start = state->cursor;
1022
+
1023
+ if (RB_UNLIKELY(!string_scan(state))) {
1024
+ raise_parse_error("unexpected end of input, expected closing \"", state);
1025
+ }
1026
+
1027
+ if (RB_LIKELY(*state->cursor == '"')) {
1028
+ VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
1029
+ state->cursor++;
1030
+ return json_push_value(state, config, string);
1031
+ }
1032
+ return json_parse_escaped_string(state, config, is_name, start);
1033
+ }
1034
+
1035
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
1036
+ // From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
1037
+ // Additional References:
1038
+ // https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
1039
+ // http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
1040
+ static inline uint64_t decode_8digits_unrolled(uint64_t val) {
1041
+ const uint64_t mask = 0x000000FF000000FF;
1042
+ const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
1043
+ const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
1044
+ val -= 0x3030303030303030;
1045
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
1046
+ val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
1047
+ return val;
1048
+ }
1049
+
1050
+ static inline uint64_t decode_4digits_unrolled(uint32_t val) {
1051
+ const uint32_t mask = 0x000000FF;
1052
+ const uint32_t mul1 = 100;
1053
+ val -= 0x30303030;
1054
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
1055
+ val = ((val & mask) * mul1) + (((val >> 16) & mask));
1056
+ return val;
1057
+ }
1058
+ #endif
1059
+
1060
+ static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
1061
+ {
1062
+ const char *start = state->cursor;
1063
+
1064
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
1065
+ while (rest(state) >= sizeof(uint64_t)) {
1066
+ uint64_t next_8bytes;
1067
+ memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
1068
+
1069
+ // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
1070
+ // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
1071
+ uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
1072
+
1073
+ if (match == 0x3333333333333333) { // 8 consecutive digits
1074
+ *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
1075
+ state->cursor += 8;
1076
+ continue;
1077
+ }
1078
+
1079
+ uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
1080
+
1081
+ if (consecutive_digits >= 4) {
1082
+ *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
1083
+ state->cursor += 4;
1084
+ consecutive_digits -= 4;
1085
+ }
1086
+
1087
+ while (consecutive_digits) {
1088
+ *accumulator = *accumulator * 10 + (*state->cursor - '0');
1089
+ consecutive_digits--;
1090
+ state->cursor++;
1091
+ }
1092
+
1093
+ return (int)(state->cursor - start);
1094
+ }
1095
+ #endif
1096
+
1097
+ char next_char;
1098
+ while (rb_isdigit(next_char = peek(state))) {
1099
+ *accumulator = *accumulator * 10 + (next_char - '0');
1100
+ state->cursor++;
1101
+ }
1102
+ return (int)(state->cursor - start);
1103
+ }
1104
+
1105
+ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
1106
+ {
1107
+ bool integer = true;
1108
+ const char first_digit = *state->cursor;
1109
+
1110
+ // Variables for Ryu optimization - extract digits during parsing
1111
+ int32_t exponent = 0;
1112
+ int decimal_point_pos = -1;
1113
+ uint64_t mantissa = 0;
1114
+
1115
+ // Parse integer part and extract mantissa digits
1116
+ int mantissa_digits = json_parse_digits(state, &mantissa);
1117
+
1118
+ if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
1119
+ raise_parse_error_at("invalid number: %s", state, start);
1120
+ }
1121
+
1122
+ // Parse fractional part
1123
+ if (peek(state) == '.') {
1124
+ integer = false;
1125
+ decimal_point_pos = mantissa_digits; // Remember position of decimal point
1126
+ state->cursor++;
1127
+
1128
+ int fractional_digits = json_parse_digits(state, &mantissa);
1129
+ mantissa_digits += fractional_digits;
1130
+
1131
+ if (RB_UNLIKELY(!fractional_digits)) {
1132
+ raise_parse_error_at("invalid number: %s", state, start);
1133
+ }
1134
+ }
1135
+
1136
+ // Parse exponent
1137
+ if (rb_tolower(peek(state)) == 'e') {
1138
+ integer = false;
1139
+ state->cursor++;
1140
+
1141
+ bool negative_exponent = false;
1142
+ const char next_char = peek(state);
1143
+ if (next_char == '-' || next_char == '+') {
1144
+ negative_exponent = next_char == '-';
1145
+ state->cursor++;
1146
+ }
1147
+
1148
+ uint64_t abs_exponent = 0;
1149
+ int exponent_digits = json_parse_digits(state, &abs_exponent);
1150
+
1151
+ if (RB_UNLIKELY(!exponent_digits)) {
1152
+ raise_parse_error_at("invalid number: %s", state, start);
1153
+ }
1154
+
1155
+ exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
1156
+ }
1157
+
1158
+ if (integer) {
1159
+ return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
1160
+ }
1161
+
1162
+ // Adjust exponent based on decimal point position
1163
+ if (decimal_point_pos >= 0) {
1164
+ exponent -= (mantissa_digits - decimal_point_pos);
1165
+ }
1166
+
1167
+ return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
1168
+ }
1169
+
1170
+ static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
1171
+ {
1172
+ return json_parse_number(state, config, false, state->cursor);
1173
+ }
1174
+
1175
+ static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
1176
+ {
1177
+ const char *start = state->cursor;
1178
+ state->cursor++;
1179
+ return json_parse_number(state, config, true, start);
1180
+ }
1181
+
1021
1182
  static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1022
1183
  {
1023
1184
  json_eat_whitespace(state);
1024
- if (state->cursor >= state->end) {
1025
- raise_parse_error("unexpected end of input", state);
1026
- }
1027
1185
 
1028
- switch (*state->cursor) {
1186
+ switch (peek(state)) {
1029
1187
  case 'n':
1030
- if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
1188
+ if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
1031
1189
  state->cursor += 4;
1032
1190
  return json_push_value(state, config, Qnil);
1033
1191
  }
@@ -1035,7 +1193,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1035
1193
  raise_parse_error("unexpected token %s", state);
1036
1194
  break;
1037
1195
  case 't':
1038
- if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
1196
+ if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
1039
1197
  state->cursor += 4;
1040
1198
  return json_push_value(state, config, Qtrue);
1041
1199
  }
@@ -1044,7 +1202,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1044
1202
  break;
1045
1203
  case 'f':
1046
1204
  // Note: memcmp with a small power of two compile to an integer comparison
1047
- if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
1205
+ if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
1048
1206
  state->cursor += 5;
1049
1207
  return json_push_value(state, config, Qfalse);
1050
1208
  }
@@ -1053,7 +1211,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1053
1211
  break;
1054
1212
  case 'N':
1055
1213
  // Note: memcmp with a small power of two compile to an integer comparison
1056
- if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
1214
+ if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
1057
1215
  state->cursor += 3;
1058
1216
  return json_push_value(state, config, CNaN);
1059
1217
  }
@@ -1061,16 +1219,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1061
1219
  raise_parse_error("unexpected token %s", state);
1062
1220
  break;
1063
1221
  case 'I':
1064
- if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
1222
+ if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
1065
1223
  state->cursor += 8;
1066
1224
  return json_push_value(state, config, CInfinity);
1067
1225
  }
1068
1226
 
1069
1227
  raise_parse_error("unexpected token %s", state);
1070
1228
  break;
1071
- case '-':
1229
+ case '-': {
1072
1230
  // Note: memcmp with a small power of two compile to an integer comparison
1073
- if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
1231
+ if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
1074
1232
  if (config->allow_nan) {
1075
1233
  state->cursor += 9;
1076
1234
  return json_push_value(state, config, CMinusInfinity);
@@ -1078,62 +1236,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1078
1236
  raise_parse_error("unexpected token %s", state);
1079
1237
  }
1080
1238
  }
1081
- // Fallthrough
1082
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
1083
- bool integer = true;
1084
-
1085
- // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
1086
- const char *start = state->cursor;
1087
- state->cursor++;
1088
-
1089
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1090
- state->cursor++;
1091
- }
1092
-
1093
- long integer_length = state->cursor - start;
1094
-
1095
- if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
1096
- raise_parse_error_at("invalid number: %s", state, start);
1097
- } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
1098
- raise_parse_error_at("invalid number: %s", state, start);
1099
- } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
1100
- raise_parse_error_at("invalid number: %s", state, start);
1101
- }
1102
-
1103
- if ((state->cursor < state->end) && (*state->cursor == '.')) {
1104
- integer = false;
1105
- state->cursor++;
1106
-
1107
- if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
1108
- raise_parse_error("invalid number: %s", state);
1109
- }
1110
-
1111
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1112
- state->cursor++;
1113
- }
1114
- }
1115
-
1116
- if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
1117
- integer = false;
1118
- state->cursor++;
1119
- if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
1120
- state->cursor++;
1121
- }
1122
-
1123
- if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
1124
- raise_parse_error("invalid number: %s", state);
1125
- }
1126
-
1127
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1128
- state->cursor++;
1129
- }
1130
- }
1131
-
1132
- if (integer) {
1133
- return json_push_value(state, config, json_decode_integer(start, state->cursor));
1134
- }
1135
- return json_push_value(state, config, json_decode_float(config, start, state->cursor));
1239
+ return json_push_value(state, config, json_parse_negative_number(state, config));
1240
+ break;
1136
1241
  }
1242
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1243
+ return json_push_value(state, config, json_parse_positive_number(state, config));
1244
+ break;
1137
1245
  case '"': {
1138
1246
  // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
1139
1247
  return json_parse_string(state, config, false);
@@ -1144,7 +1252,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1144
1252
  json_eat_whitespace(state);
1145
1253
  long stack_head = state->stack->head;
1146
1254
 
1147
- if ((state->cursor < state->end) && (*state->cursor == ']')) {
1255
+ if (peek(state) == ']') {
1148
1256
  state->cursor++;
1149
1257
  return json_push_value(state, config, json_decode_array(state, config, 0));
1150
1258
  } else {
@@ -1159,26 +1267,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1159
1267
  while (true) {
1160
1268
  json_eat_whitespace(state);
1161
1269
 
1162
- if (state->cursor < state->end) {
1163
- if (*state->cursor == ']') {
1164
- state->cursor++;
1165
- long count = state->stack->head - stack_head;
1166
- state->current_nesting--;
1167
- state->in_array--;
1168
- return json_push_value(state, config, json_decode_array(state, config, count));
1169
- }
1270
+ const char next_char = peek(state);
1170
1271
 
1171
- if (*state->cursor == ',') {
1172
- state->cursor++;
1173
- if (config->allow_trailing_comma) {
1174
- json_eat_whitespace(state);
1175
- if ((state->cursor < state->end) && (*state->cursor == ']')) {
1176
- continue;
1177
- }
1272
+ if (RB_LIKELY(next_char == ',')) {
1273
+ state->cursor++;
1274
+ if (config->allow_trailing_comma) {
1275
+ json_eat_whitespace(state);
1276
+ if (peek(state) == ']') {
1277
+ continue;
1178
1278
  }
1179
- json_parse_any(state, config);
1180
- continue;
1181
1279
  }
1280
+ json_parse_any(state, config);
1281
+ continue;
1282
+ }
1283
+
1284
+ if (next_char == ']') {
1285
+ state->cursor++;
1286
+ long count = state->stack->head - stack_head;
1287
+ state->current_nesting--;
1288
+ state->in_array--;
1289
+ return json_push_value(state, config, json_decode_array(state, config, count));
1182
1290
  }
1183
1291
 
1184
1292
  raise_parse_error("expected ',' or ']' after array value", state);
@@ -1192,7 +1300,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1192
1300
  json_eat_whitespace(state);
1193
1301
  long stack_head = state->stack->head;
1194
1302
 
1195
- if ((state->cursor < state->end) && (*state->cursor == '}')) {
1303
+ if (peek(state) == '}') {
1196
1304
  state->cursor++;
1197
1305
  return json_push_value(state, config, json_decode_object(state, config, 0));
1198
1306
  } else {
@@ -1201,13 +1309,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1201
1309
  rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1202
1310
  }
1203
1311
 
1204
- if (*state->cursor != '"') {
1312
+ if (peek(state) != '"') {
1205
1313
  raise_parse_error("expected object key, got %s", state);
1206
1314
  }
1207
1315
  json_parse_string(state, config, true);
1208
1316
 
1209
1317
  json_eat_whitespace(state);
1210
- if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1318
+ if (peek(state) != ':') {
1211
1319
  raise_parse_error("expected ':' after object key", state);
1212
1320
  }
1213
1321
  state->cursor++;
@@ -1218,46 +1326,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1218
1326
  while (true) {
1219
1327
  json_eat_whitespace(state);
1220
1328
 
1221
- if (state->cursor < state->end) {
1222
- if (*state->cursor == '}') {
1223
- state->cursor++;
1224
- state->current_nesting--;
1225
- size_t count = state->stack->head - stack_head;
1329
+ const char next_char = peek(state);
1330
+ if (next_char == '}') {
1331
+ state->cursor++;
1332
+ state->current_nesting--;
1333
+ size_t count = state->stack->head - stack_head;
1226
1334
 
1227
- // Temporary rewind cursor in case an error is raised
1228
- const char *final_cursor = state->cursor;
1229
- state->cursor = object_start_cursor;
1230
- VALUE object = json_decode_object(state, config, count);
1231
- state->cursor = final_cursor;
1335
+ // Temporary rewind cursor in case an error is raised
1336
+ const char *final_cursor = state->cursor;
1337
+ state->cursor = object_start_cursor;
1338
+ VALUE object = json_decode_object(state, config, count);
1339
+ state->cursor = final_cursor;
1232
1340
 
1233
- return json_push_value(state, config, object);
1234
- }
1341
+ return json_push_value(state, config, object);
1342
+ }
1235
1343
 
1236
- if (*state->cursor == ',') {
1237
- state->cursor++;
1238
- json_eat_whitespace(state);
1344
+ if (next_char == ',') {
1345
+ state->cursor++;
1346
+ json_eat_whitespace(state);
1239
1347
 
1240
- if (config->allow_trailing_comma) {
1241
- if ((state->cursor < state->end) && (*state->cursor == '}')) {
1242
- continue;
1243
- }
1348
+ if (config->allow_trailing_comma) {
1349
+ if (peek(state) == '}') {
1350
+ continue;
1244
1351
  }
1352
+ }
1245
1353
 
1246
- if (*state->cursor != '"') {
1247
- raise_parse_error("expected object key, got: %s", state);
1248
- }
1249
- json_parse_string(state, config, true);
1354
+ if (RB_UNLIKELY(peek(state) != '"')) {
1355
+ raise_parse_error("expected object key, got: %s", state);
1356
+ }
1357
+ json_parse_string(state, config, true);
1250
1358
 
1251
- json_eat_whitespace(state);
1252
- if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1253
- raise_parse_error("expected ':' after object key, got: %s", state);
1254
- }
1255
- state->cursor++;
1359
+ json_eat_whitespace(state);
1360
+ if (RB_UNLIKELY(peek(state) != ':')) {
1361
+ raise_parse_error("expected ':' after object key, got: %s", state);
1362
+ }
1363
+ state->cursor++;
1256
1364
 
1257
- json_parse_any(state, config);
1365
+ json_parse_any(state, config);
1258
1366
 
1259
- continue;
1260
- }
1367
+ continue;
1261
1368
  }
1262
1369
 
1263
1370
  raise_parse_error("expected ',' or '}' after object value, got: %s", state);
@@ -1265,18 +1372,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1265
1372
  break;
1266
1373
  }
1267
1374
 
1375
+ case 0:
1376
+ raise_parse_error("unexpected end of input", state);
1377
+ break;
1378
+
1268
1379
  default:
1269
1380
  raise_parse_error("unexpected character: %s", state);
1270
1381
  break;
1271
1382
  }
1272
1383
 
1273
1384
  raise_parse_error("unreachable: %s", state);
1385
+ return Qundef;
1274
1386
  }
1275
1387
 
1276
1388
  static void json_ensure_eof(JSON_ParserState *state)
1277
1389
  {
1278
1390
  json_eat_whitespace(state);
1279
- if (state->cursor != state->end) {
1391
+ if (!eos(state)) {
1280
1392
  raise_parse_error("unexpected token at end of stream %s", state);
1281
1393
  }
1282
1394
  }
@@ -1393,6 +1505,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
1393
1505
  */
1394
1506
  static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
1395
1507
  {
1508
+ rb_check_frozen(self);
1396
1509
  GET_PARSER_CONFIG;
1397
1510
 
1398
1511
  parser_config_init(config, opts);
@@ -1488,7 +1601,7 @@ static const rb_data_type_t JSON_ParserConfig_type = {
1488
1601
  JSON_ParserConfig_memsize,
1489
1602
  },
1490
1603
  0, 0,
1491
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
1604
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE,
1492
1605
  };
1493
1606
 
1494
1607
  static VALUE cJSON_parser_s_allocate(VALUE klass)
@@ -1538,10 +1651,6 @@ void Init_parser(void)
1538
1651
  sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1539
1652
  sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
1540
1653
 
1541
- i_chr = rb_intern("chr");
1542
- i_aset = rb_intern("[]=");
1543
- i_aref = rb_intern("[]");
1544
- i_leftshift = rb_intern("<<");
1545
1654
  i_new = rb_intern("new");
1546
1655
  i_try_convert = rb_intern("try_convert");
1547
1656
  i_uminus = rb_intern("-@");