json 2.13.2 → 2.19.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,50 +1,22 @@
1
- #include "ruby.h"
2
- #include "ruby/encoding.h"
3
-
4
- /* shims */
5
- /* This is the fallback definition from Ruby 3.4 */
6
-
7
- #ifndef RBIMPL_STDBOOL_H
8
- #if defined(__cplusplus)
9
- # if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
10
- # include <cstdbool>
11
- # endif
12
- #elif defined(HAVE_STDBOOL_H)
13
- # include <stdbool.h>
14
- #elif !defined(HAVE__BOOL)
15
- typedef unsigned char _Bool;
16
- # define bool _Bool
17
- # define true ((_Bool)+1)
18
- # define false ((_Bool)+0)
19
- # define __bool_true_false_are_defined
20
- #endif
21
- #endif
22
-
1
+ #include "../json.h"
2
+ #include "../vendor/ryu.h"
23
3
  #include "../simd/simd.h"
24
4
 
25
- #ifndef RB_UNLIKELY
26
- #define RB_UNLIKELY(expr) expr
27
- #endif
28
-
29
- #ifndef RB_LIKELY
30
- #define RB_LIKELY(expr) expr
31
- #endif
32
-
33
5
  static VALUE mJSON, eNestingError, Encoding_UTF_8;
34
6
  static VALUE CNaN, CInfinity, CMinusInfinity;
35
7
 
36
- static ID i_chr, i_aset, i_aref,
37
- i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
8
+ static ID i_new, i_try_convert, i_uminus, i_encode;
38
9
 
39
- static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
40
- sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
10
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
11
+ sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
12
+ sym_allow_duplicate_key;
41
13
 
42
14
  static int binary_encindex;
43
15
  static int utf8_encindex;
44
16
 
45
17
  #ifndef HAVE_RB_HASH_BULK_INSERT
46
18
  // For TruffleRuby
47
- void
19
+ static void
48
20
  rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
49
21
  {
50
22
  long index = 0;
@@ -61,6 +33,12 @@ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
61
33
  #define rb_hash_new_capa(n) rb_hash_new()
62
34
  #endif
63
35
 
36
+ #ifndef HAVE_RB_STR_TO_INTERNED_STR
37
+ static VALUE rb_str_to_interned_str(VALUE str)
38
+ {
39
+ return rb_funcall(rb_str_freeze(str), i_uminus, 0);
40
+ }
41
+ #endif
64
42
 
65
43
  /* name cache */
66
44
 
@@ -106,116 +84,104 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
106
84
  cache->entries[index] = rstring;
107
85
  }
108
86
 
109
- static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
87
+ #define rstring_cache_memcmp memcmp
88
+
89
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
90
+ #if __has_builtin(__builtin_bswap64)
91
+ #undef rstring_cache_memcmp
92
+ ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
110
93
  {
111
- long rstring_length = RSTRING_LEN(rstring);
112
- if (length == rstring_length) {
113
- return memcmp(str, RSTRING_PTR(rstring), length);
114
- } else {
115
- return (int)(length - rstring_length);
94
+ // The libc memcmp has numerous complex optimizations, but in this particular case,
95
+ // we know the string is small (JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH), so being able to
96
+ // inline a simpler memcmp outperforms calling the libc version.
97
+ long i = 0;
98
+
99
+ for (; i + 8 <= length; i += 8) {
100
+ uint64_t a, b;
101
+ memcpy(&a, str + i, 8);
102
+ memcpy(&b, rptr + i, 8);
103
+ if (a != b) {
104
+ a = __builtin_bswap64(a);
105
+ b = __builtin_bswap64(b);
106
+ return (a < b) ? -1 : 1;
107
+ }
116
108
  }
109
+
110
+ for (; i < length; i++) {
111
+ if (str[i] != rptr[i]) {
112
+ return (str[i] < rptr[i]) ? -1 : 1;
113
+ }
114
+ }
115
+
116
+ return 0;
117
117
  }
118
+ #endif
119
+ #endif
118
120
 
119
- static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
121
+ ALWAYS_INLINE(static) int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
120
122
  {
121
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
122
- // Common names aren't likely to be very long. So we just don't
123
- // cache names above an arbitrary threshold.
124
- return Qfalse;
125
- }
123
+ const char *rstring_ptr;
124
+ long rstring_length;
125
+
126
+ RSTRING_GETMEM(rstring, rstring_ptr, rstring_length);
126
127
 
127
- if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
128
- // Simple heuristic, if the first character isn't a letter,
129
- // we're much less likely to see this string again.
130
- // We mostly want to cache strings that are likely to be repeated.
131
- return Qfalse;
128
+ if (length == rstring_length) {
129
+ return rstring_cache_memcmp(str, rstring_ptr, length);
130
+ } else {
131
+ return (int)(length - rstring_length);
132
132
  }
133
+ }
133
134
 
135
+ ALWAYS_INLINE(static) VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
136
+ {
134
137
  int low = 0;
135
138
  int high = cache->length - 1;
136
- int mid = 0;
137
- int last_cmp = 0;
138
139
 
139
140
  while (low <= high) {
140
- mid = (high + low) >> 1;
141
+ int mid = (high + low) >> 1;
141
142
  VALUE entry = cache->entries[mid];
142
- last_cmp = rstring_cache_cmp(str, length, entry);
143
+ int cmp = rstring_cache_cmp(str, length, entry);
143
144
 
144
- if (last_cmp == 0) {
145
+ if (cmp == 0) {
145
146
  return entry;
146
- } else if (last_cmp > 0) {
147
+ } else if (cmp > 0) {
147
148
  low = mid + 1;
148
149
  } else {
149
150
  high = mid - 1;
150
151
  }
151
152
  }
152
153
 
153
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
154
- // We assume the overwhelming majority of names don't need to be escaped.
155
- // But if they do, we have to fallback to the slow path.
156
- return Qfalse;
157
- }
158
-
159
154
  VALUE rstring = build_interned_string(str, length);
160
155
 
161
156
  if (cache->length < JSON_RVALUE_CACHE_CAPA) {
162
- if (last_cmp > 0) {
163
- mid += 1;
164
- }
165
-
166
- rvalue_cache_insert_at(cache, mid, rstring);
157
+ rvalue_cache_insert_at(cache, low, rstring);
167
158
  }
168
159
  return rstring;
169
160
  }
170
161
 
171
162
  static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
172
163
  {
173
- if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
174
- // Common names aren't likely to be very long. So we just don't
175
- // cache names above an arbitrary threshold.
176
- return Qfalse;
177
- }
178
-
179
- if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
180
- // Simple heuristic, if the first character isn't a letter,
181
- // we're much less likely to see this string again.
182
- // We mostly want to cache strings that are likely to be repeated.
183
- return Qfalse;
184
- }
185
-
186
164
  int low = 0;
187
165
  int high = cache->length - 1;
188
- int mid = 0;
189
- int last_cmp = 0;
190
166
 
191
167
  while (low <= high) {
192
- mid = (high + low) >> 1;
168
+ int mid = (high + low) >> 1;
193
169
  VALUE entry = cache->entries[mid];
194
- last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
170
+ int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
195
171
 
196
- if (last_cmp == 0) {
172
+ if (cmp == 0) {
197
173
  return entry;
198
- } else if (last_cmp > 0) {
174
+ } else if (cmp > 0) {
199
175
  low = mid + 1;
200
176
  } else {
201
177
  high = mid - 1;
202
178
  }
203
179
  }
204
180
 
205
- if (RB_UNLIKELY(memchr(str, '\\', length))) {
206
- // We assume the overwhelming majority of names don't need to be escaped.
207
- // But if they do, we have to fallback to the slow path.
208
- return Qfalse;
209
- }
210
-
211
181
  VALUE rsymbol = build_symbol(str, length);
212
182
 
213
183
  if (cache->length < JSON_RVALUE_CACHE_CAPA) {
214
- if (last_cmp > 0) {
215
- mid += 1;
216
- }
217
-
218
- rvalue_cache_insert_at(cache, mid, rsymbol);
184
+ rvalue_cache_insert_at(cache, low, rsymbol);
219
185
  }
220
186
  return rsymbol;
221
187
  }
@@ -275,17 +241,27 @@ static void rvalue_stack_mark(void *ptr)
275
241
  {
276
242
  rvalue_stack *stack = (rvalue_stack *)ptr;
277
243
  long index;
278
- for (index = 0; index < stack->head; index++) {
279
- rb_gc_mark(stack->ptr[index]);
244
+ if (stack && stack->ptr) {
245
+ for (index = 0; index < stack->head; index++) {
246
+ rb_gc_mark(stack->ptr[index]);
247
+ }
280
248
  }
281
249
  }
282
250
 
251
+ static void rvalue_stack_free_buffer(rvalue_stack *stack)
252
+ {
253
+ ruby_xfree(stack->ptr);
254
+ stack->ptr = NULL;
255
+ }
256
+
283
257
  static void rvalue_stack_free(void *ptr)
284
258
  {
285
259
  rvalue_stack *stack = (rvalue_stack *)ptr;
286
260
  if (stack) {
287
- ruby_xfree(stack->ptr);
261
+ rvalue_stack_free_buffer(stack);
262
+ #ifndef HAVE_RUBY_TYPED_EMBEDDABLE
288
263
  ruby_xfree(stack);
264
+ #endif
289
265
  }
290
266
  }
291
267
 
@@ -296,14 +272,13 @@ static size_t rvalue_stack_memsize(const void *ptr)
296
272
  }
297
273
 
298
274
  static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
299
- "JSON::Ext::Parser/rvalue_stack",
300
- {
275
+ .wrap_struct_name = "JSON::Ext::Parser/rvalue_stack",
276
+ .function = {
301
277
  .dmark = rvalue_stack_mark,
302
278
  .dfree = rvalue_stack_free,
303
279
  .dsize = rvalue_stack_memsize,
304
280
  },
305
- 0, 0,
306
- RUBY_TYPED_FREE_IMMEDIATELY,
281
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
307
282
  };
308
283
 
309
284
  static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
@@ -325,20 +300,15 @@ static void rvalue_stack_eagerly_release(VALUE handle)
325
300
  if (handle) {
326
301
  rvalue_stack *stack;
327
302
  TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
328
- RTYPEDDATA_DATA(handle) = NULL;
303
+ #ifdef HAVE_RUBY_TYPED_EMBEDDABLE
304
+ rvalue_stack_free_buffer(stack);
305
+ #else
329
306
  rvalue_stack_free(stack);
307
+ RTYPEDDATA_DATA(handle) = NULL;
308
+ #endif
330
309
  }
331
310
  }
332
311
 
333
-
334
- #ifndef HAVE_STRNLEN
335
- static size_t strnlen(const char *s, size_t maxlen)
336
- {
337
- char *p;
338
- return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
339
- }
340
- #endif
341
-
342
312
  static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
343
313
  {
344
314
  int len = 1;
@@ -379,13 +349,14 @@ typedef struct JSON_ParserStruct {
379
349
  int max_nesting;
380
350
  bool allow_nan;
381
351
  bool allow_trailing_comma;
382
- bool parsing_name;
352
+ bool allow_control_characters;
353
+ bool allow_invalid_escape;
383
354
  bool symbolize_names;
384
355
  bool freeze;
385
356
  } JSON_ParserConfig;
386
357
 
387
358
  typedef struct JSON_ParserStateStruct {
388
- VALUE stack_handle;
359
+ VALUE *stack_handle;
389
360
  const char *start;
390
361
  const char *cursor;
391
362
  const char *end;
@@ -393,8 +364,25 @@ typedef struct JSON_ParserStateStruct {
393
364
  rvalue_cache name_cache;
394
365
  int in_array;
395
366
  int current_nesting;
367
+ unsigned int emitted_deprecations;
396
368
  } JSON_ParserState;
397
369
 
370
+ static inline size_t rest(JSON_ParserState *state) {
371
+ return state->end - state->cursor;
372
+ }
373
+
374
+ static inline bool eos(JSON_ParserState *state) {
375
+ return state->cursor >= state->end;
376
+ }
377
+
378
+ static inline char peek(JSON_ParserState *state)
379
+ {
380
+ if (RB_UNLIKELY(eos(state))) {
381
+ return 0;
382
+ }
383
+ return *state->cursor;
384
+ }
385
+
398
386
  static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
399
387
  {
400
388
  const char *cursor = state->cursor;
@@ -428,14 +416,9 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
428
416
 
429
417
  #define PARSE_ERROR_FRAGMENT_LEN 32
430
418
 
431
- #ifdef RBIMPL_ATTR_NORETURN
432
- RBIMPL_ATTR_NORETURN()
433
- #endif
434
- static void raise_parse_error(const char *format, JSON_ParserState *state)
419
+ static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
435
420
  {
436
421
  unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
437
- long line, column;
438
- cursor_position(state, &line, &column);
439
422
 
440
423
  const char *ptr = "EOF";
441
424
  if (state->cursor && state->cursor < state->end) {
@@ -467,20 +450,28 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
467
450
  }
468
451
  }
469
452
 
470
- VALUE msg = rb_sprintf(format, ptr);
471
- VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
472
- RB_GC_GUARD(msg);
453
+ VALUE message = rb_enc_sprintf(enc_utf8, format, ptr);
454
+ rb_str_catf(message, " at line %ld column %ld", line, column);
455
+ return message;
456
+ }
473
457
 
458
+ static VALUE parse_error_new(VALUE message, long line, long column)
459
+ {
474
460
  VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
475
461
  rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
476
462
  rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
477
- rb_exc_raise(exc);
463
+ return exc;
478
464
  }
479
465
 
480
- #ifdef RBIMPL_ATTR_NORETURN
481
- RBIMPL_ATTR_NORETURN()
482
- #endif
483
- static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
466
+ NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
467
+ {
468
+ long line, column;
469
+ cursor_position(state, &line, &column);
470
+ VALUE message = build_parse_error_message(format, state, line, column);
471
+ rb_exc_raise(parse_error_new(message, line, column));
472
+ }
473
+
474
+ NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
484
475
  {
485
476
  state->cursor = at;
486
477
  raise_parse_error(format, state);
@@ -505,23 +496,24 @@ static const signed char digit_values[256] = {
505
496
  -1, -1, -1, -1, -1, -1, -1
506
497
  };
507
498
 
508
- static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
509
- {
510
- signed char b;
511
- uint32_t result = 0;
512
- b = digit_values[p[0]];
513
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
514
- result = (result << 4) | (unsigned char)b;
515
- b = digit_values[p[1]];
516
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
517
- result = (result << 4) | (unsigned char)b;
518
- b = digit_values[p[2]];
519
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
520
- result = (result << 4) | (unsigned char)b;
521
- b = digit_values[p[3]];
522
- if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
523
- result = (result << 4) | (unsigned char)b;
524
- return result;
499
+ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
500
+ {
501
+ if (RB_UNLIKELY(sp > spe - 4)) {
502
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
503
+ }
504
+
505
+ const unsigned char *p = (const unsigned char *)sp;
506
+
507
+ const signed char b0 = digit_values[p[0]];
508
+ const signed char b1 = digit_values[p[1]];
509
+ const signed char b2 = digit_values[p[2]];
510
+ const signed char b3 = digit_values[p[3]];
511
+
512
+ if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
513
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
514
+ }
515
+
516
+ return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
525
517
  }
526
518
 
527
519
  #define GET_PARSER_CONFIG \
@@ -530,61 +522,82 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
530
522
 
531
523
  static const rb_data_type_t JSON_ParserConfig_type;
532
524
 
533
- static const bool whitespace[256] = {
534
- [' '] = 1,
535
- ['\t'] = 1,
536
- ['\n'] = 1,
537
- ['\r'] = 1,
538
- ['/'] = 1,
539
- };
540
-
541
525
  static void
542
526
  json_eat_comments(JSON_ParserState *state)
543
527
  {
544
- if (state->cursor + 1 < state->end) {
545
- switch (state->cursor[1]) {
546
- case '/': {
547
- state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
548
- if (!state->cursor) {
549
- state->cursor = state->end;
550
- } else {
551
- state->cursor++;
552
- }
553
- break;
528
+ const char *start = state->cursor;
529
+ state->cursor++;
530
+
531
+ switch (peek(state)) {
532
+ case '/': {
533
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
534
+ if (!state->cursor) {
535
+ state->cursor = state->end;
536
+ } else {
537
+ state->cursor++;
554
538
  }
555
- case '*': {
556
- state->cursor += 2;
557
- while (true) {
558
- state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
559
- if (!state->cursor) {
560
- raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
561
- } else {
562
- state->cursor++;
563
- if (state->cursor < state->end && *state->cursor == '/') {
564
- state->cursor++;
565
- break;
566
- }
567
- }
539
+ break;
540
+ }
541
+ case '*': {
542
+ state->cursor++;
543
+
544
+ while (true) {
545
+ const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
546
+ if (!next_match) {
547
+ raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
548
+ }
549
+
550
+ state->cursor = next_match + 1;
551
+ if (peek(state) == '/') {
552
+ state->cursor++;
553
+ break;
568
554
  }
569
- break;
570
555
  }
571
- default:
572
- raise_parse_error("unexpected token %s", state);
573
- break;
556
+ break;
574
557
  }
575
- } else {
576
- raise_parse_error("unexpected token %s", state);
558
+ default:
559
+ raise_parse_error_at("unexpected token %s", state, start);
560
+ break;
577
561
  }
578
562
  }
579
563
 
580
- static inline void
564
+ ALWAYS_INLINE(static) void
581
565
  json_eat_whitespace(JSON_ParserState *state)
582
566
  {
583
- while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
584
- if (RB_LIKELY(*state->cursor != '/')) {
585
- state->cursor++;
586
- } else {
587
- json_eat_comments(state);
567
+ while (true) {
568
+ switch (peek(state)) {
569
+ case ' ':
570
+ state->cursor++;
571
+ break;
572
+ case '\n':
573
+ state->cursor++;
574
+
575
+ // Heuristic: if we see a newline, there is likely consecutive spaces after it.
576
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
577
+ while (rest(state) > 8) {
578
+ uint64_t chunk;
579
+ memcpy(&chunk, state->cursor, sizeof(uint64_t));
580
+ if (chunk == 0x2020202020202020) {
581
+ state->cursor += 8;
582
+ continue;
583
+ }
584
+
585
+ uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
586
+ state->cursor += consecutive_spaces;
587
+ break;
588
+ }
589
+ #endif
590
+ break;
591
+ case '\t':
592
+ case '\r':
593
+ state->cursor++;
594
+ break;
595
+ case '/':
596
+ json_eat_comments(state);
597
+ break;
598
+
599
+ default:
600
+ return;
588
601
  }
589
602
  }
590
603
  }
@@ -615,11 +628,22 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
615
628
  return result;
616
629
  }
617
630
 
618
- static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
631
+ static inline bool json_string_cacheable_p(const char *string, size_t length)
619
632
  {
633
+ // We mostly want to cache strings that are likely to be repeated.
634
+ // Simple heuristics:
635
+ // - Common names aren't likely to be very long. So we just don't cache names above an arbitrary threshold.
636
+ // - If the first character isn't a letter, we're much less likely to see this string again.
637
+ return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha(string[0]);
638
+ }
639
+
640
+ static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name)
641
+ {
642
+ bool intern = is_name || config->freeze;
643
+ bool symbolize = is_name && config->symbolize_names;
620
644
  size_t bufferSize = stringEnd - string;
621
645
 
622
- if (is_name && state->in_array) {
646
+ if (is_name && state->in_array && RB_LIKELY(json_string_cacheable_p(string, bufferSize))) {
623
647
  VALUE cached_key;
624
648
  if (RB_UNLIKELY(symbolize)) {
625
649
  cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
@@ -635,104 +659,129 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
635
659
  return build_string(string, stringEnd, intern, symbolize);
636
660
  }
637
661
 
638
- static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
639
- {
640
- size_t bufferSize = stringEnd - string;
641
- const char *p = string, *pe = string, *unescape, *bufferStart;
642
- char *buffer;
643
- int unescape_len;
644
- char buf[4];
662
+ #define JSON_MAX_UNESCAPE_POSITIONS 16
663
+ typedef struct _json_unescape_positions {
664
+ long size;
665
+ const char **positions;
666
+ unsigned long additional_backslashes;
667
+ } JSON_UnescapePositions;
645
668
 
646
- if (is_name && state->in_array) {
647
- VALUE cached_key;
648
- if (RB_UNLIKELY(symbolize)) {
649
- cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
650
- } else {
651
- cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
669
+ static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
670
+ {
671
+ while (positions->size) {
672
+ positions->size--;
673
+ const char *next_position = positions->positions[0];
674
+ positions->positions++;
675
+ if (next_position >= pe) {
676
+ return next_position;
652
677
  }
678
+ }
653
679
 
654
- if (RB_LIKELY(cached_key)) {
655
- return cached_key;
656
- }
680
+ if (positions->additional_backslashes) {
681
+ positions->additional_backslashes--;
682
+ return memchr(pe, '\\', stringEnd - pe);
657
683
  }
658
684
 
685
+ return NULL;
686
+ }
687
+
688
+ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_ParserConfig *config, const char *string, const char *stringEnd, bool is_name, JSON_UnescapePositions *positions)
689
+ {
690
+ bool intern = is_name || config->freeze;
691
+ bool symbolize = is_name && config->symbolize_names;
692
+ size_t bufferSize = stringEnd - string;
693
+ const char *p = string, *pe = string, *bufferStart;
694
+ char *buffer;
695
+
659
696
  VALUE result = rb_str_buf_new(bufferSize);
660
697
  rb_enc_associate_index(result, utf8_encindex);
661
698
  buffer = RSTRING_PTR(result);
662
699
  bufferStart = buffer;
663
700
 
664
- while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
665
- unescape = (char *) "?";
666
- unescape_len = 1;
701
+ #define APPEND_CHAR(chr) *buffer++ = chr; p = ++pe;
702
+
703
+ while (pe < stringEnd && (pe = json_next_backslash(pe, stringEnd, positions))) {
667
704
  if (pe > p) {
668
705
  MEMCPY(buffer, p, char, pe - p);
669
706
  buffer += pe - p;
670
707
  }
671
708
  switch (*++pe) {
709
+ case '"':
710
+ case '/':
711
+ p = pe; // nothing to unescape just need to skip the backslash
712
+ break;
713
+ case '\\':
714
+ APPEND_CHAR('\\');
715
+ break;
672
716
  case 'n':
673
- unescape = (char *) "\n";
717
+ APPEND_CHAR('\n');
674
718
  break;
675
719
  case 'r':
676
- unescape = (char *) "\r";
720
+ APPEND_CHAR('\r');
677
721
  break;
678
722
  case 't':
679
- unescape = (char *) "\t";
680
- break;
681
- case '"':
682
- unescape = (char *) "\"";
683
- break;
684
- case '\\':
685
- unescape = (char *) "\\";
723
+ APPEND_CHAR('\t');
686
724
  break;
687
725
  case 'b':
688
- unescape = (char *) "\b";
726
+ APPEND_CHAR('\b');
689
727
  break;
690
728
  case 'f':
691
- unescape = (char *) "\f";
729
+ APPEND_CHAR('\f');
692
730
  break;
693
- case 'u':
694
- if (pe > stringEnd - 5) {
695
- raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
696
- } else {
697
- uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
698
- pe += 3;
699
- /* To handle values above U+FFFF, we take a sequence of
700
- * \uXXXX escapes in the U+D800..U+DBFF then
701
- * U+DC00..U+DFFF ranges, take the low 10 bits from each
702
- * to make a 20-bit number, then add 0x10000 to get the
703
- * final codepoint.
704
- *
705
- * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
706
- * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
707
- * Area".
708
- */
709
- if ((ch & 0xFC00) == 0xD800) {
710
- pe++;
711
- if (pe > stringEnd - 6) {
712
- raise_parse_error_at("incomplete surrogate pair at %s", state, p);
713
- }
714
- if (pe[0] == '\\' && pe[1] == 'u') {
715
- uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
716
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
717
- | (sur & 0x3FF));
718
- pe += 5;
719
- } else {
720
- unescape = (char *) "?";
721
- break;
731
+ case 'u': {
732
+ uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
733
+ pe += 3;
734
+ /* To handle values above U+FFFF, we take a sequence of
735
+ * \uXXXX escapes in the U+D800..U+DBFF then
736
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
737
+ * to make a 20-bit number, then add 0x10000 to get the
738
+ * final codepoint.
739
+ *
740
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
741
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
742
+ * Area".
743
+ */
744
+ if ((ch & 0xFC00) == 0xD800) {
745
+ pe++;
746
+ if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
747
+ uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
748
+
749
+ if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
750
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
722
751
  }
752
+
753
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
754
+ pe += 5;
755
+ } else {
756
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
757
+ break;
723
758
  }
724
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
725
- unescape = buf;
726
759
  }
760
+
761
+ int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
762
+ buffer += unescape_len;
763
+ p = ++pe;
727
764
  break;
765
+ }
728
766
  default:
729
- p = pe;
730
- continue;
767
+ if ((unsigned char)*pe < 0x20) {
768
+ if (!config->allow_control_characters) {
769
+ if (*pe == '\n') {
770
+ raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
771
+ }
772
+ raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
773
+ }
774
+ }
775
+
776
+ if (config->allow_invalid_escape) {
777
+ APPEND_CHAR(*pe);
778
+ } else {
779
+ raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
780
+ }
781
+ break;
731
782
  }
732
- MEMCPY(buffer, unescape, char, unescape_len);
733
- buffer += unescape_len;
734
- p = ++pe;
735
783
  }
784
+ #undef APPEND_CHAR
736
785
 
737
786
  if (stringEnd > p) {
738
787
  MEMCPY(buffer, p, char, stringEnd - p);
@@ -743,81 +792,93 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
743
792
  if (symbolize) {
744
793
  result = rb_str_intern(result);
745
794
  } else if (intern) {
746
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
795
+ result = rb_str_to_interned_str(result);
747
796
  }
748
797
 
749
798
  return result;
750
799
  }
751
800
 
752
801
  #define MAX_FAST_INTEGER_SIZE 18
753
- static inline VALUE fast_decode_integer(const char *p, const char *pe)
754
- {
755
- bool negative = false;
756
- if (*p == '-') {
757
- negative = true;
758
- p++;
759
- }
802
+ #define MAX_NUMBER_STACK_BUFFER 128
760
803
 
761
- long long memo = 0;
762
- while (p < pe) {
763
- memo *= 10;
764
- memo += *p - '0';
765
- p++;
766
- }
804
+ typedef VALUE (*json_number_decode_func_t)(const char *ptr);
767
805
 
768
- if (negative) {
769
- memo = -memo;
806
+ static inline VALUE json_decode_large_number(const char *start, long len, json_number_decode_func_t func)
807
+ {
808
+ if (RB_LIKELY(len < MAX_NUMBER_STACK_BUFFER)) {
809
+ char buffer[MAX_NUMBER_STACK_BUFFER];
810
+ MEMCPY(buffer, start, char, len);
811
+ buffer[len] = '\0';
812
+ return func(buffer);
813
+ } else {
814
+ VALUE buffer_v = rb_str_tmp_new(len);
815
+ char *buffer = RSTRING_PTR(buffer_v);
816
+ MEMCPY(buffer, start, char, len);
817
+ buffer[len] = '\0';
818
+ VALUE number = func(buffer);
819
+ RB_GC_GUARD(buffer_v);
820
+ return number;
770
821
  }
771
- return LL2NUM(memo);
772
822
  }
773
823
 
774
- static VALUE json_decode_large_integer(const char *start, long len)
824
+ static VALUE json_decode_inum(const char *buffer)
775
825
  {
776
- VALUE buffer_v;
777
- char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
778
- MEMCPY(buffer, start, char, len);
779
- buffer[len] = '\0';
780
- VALUE number = rb_cstr2inum(buffer, 10);
781
- RB_ALLOCV_END(buffer_v);
782
- return number;
826
+ return rb_cstr2inum(buffer, 10);
783
827
  }
784
828
 
785
- static inline VALUE
786
- json_decode_integer(const char *start, const char *end)
829
+ NOINLINE(static) VALUE json_decode_large_integer(const char *start, long len)
787
830
  {
788
- long len = end - start;
789
- if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
790
- return fast_decode_integer(start, end);
831
+ return json_decode_large_number(start, len, json_decode_inum);
832
+ }
833
+
834
+ static inline VALUE json_decode_integer(uint64_t mantissa, int mantissa_digits, bool negative, const char *start, const char *end)
835
+ {
836
+ if (RB_LIKELY(mantissa_digits < MAX_FAST_INTEGER_SIZE)) {
837
+ if (negative) {
838
+ return INT64T2NUM(-((int64_t)mantissa));
791
839
  }
792
- return json_decode_large_integer(start, len);
840
+ return UINT64T2NUM(mantissa);
841
+ }
842
+
843
+ return json_decode_large_integer(start, end - start);
793
844
  }
794
845
 
795
- static VALUE json_decode_large_float(const char *start, long len)
846
+ static VALUE json_decode_dnum(const char *buffer)
796
847
  {
797
- VALUE buffer_v;
798
- char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
799
- MEMCPY(buffer, start, char, len);
800
- buffer[len] = '\0';
801
- VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
802
- RB_ALLOCV_END(buffer_v);
803
- return number;
848
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
804
849
  }
805
850
 
806
- static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end)
851
+ NOINLINE(static) VALUE json_decode_large_float(const char *start, long len)
807
852
  {
808
- long len = end - start;
853
+ return json_decode_large_number(start, len, json_decode_dnum);
854
+ }
809
855
 
856
+ /* Ruby JSON optimized float decoder using vendored Ryu algorithm
857
+ * Accepts pre-extracted mantissa and exponent from first-pass validation
858
+ */
859
+ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantissa, int mantissa_digits, int64_t exponent, bool negative,
860
+ const char *start, const char *end)
861
+ {
810
862
  if (RB_UNLIKELY(config->decimal_class)) {
811
- VALUE text = rb_str_new(start, len);
863
+ VALUE text = rb_str_new(start, end - start);
812
864
  return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
813
- } else if (RB_LIKELY(len < 64)) {
814
- char buffer[64];
815
- MEMCPY(buffer, start, char, len);
816
- buffer[len] = '\0';
817
- return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
818
- } else {
819
- return json_decode_large_float(start, len);
820
865
  }
866
+
867
+ if (RB_UNLIKELY(exponent > INT32_MAX)) {
868
+ return negative ? CMinusInfinity : CInfinity;
869
+ }
870
+
871
+ if (RB_UNLIKELY(exponent < INT32_MIN)) {
872
+ return rb_float_new(negative ? -0.0 : 0.0);
873
+ }
874
+
875
+ // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
876
+ // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
877
+ if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
878
+ return json_decode_large_float(start, end - start);
879
+ }
880
+
881
+ return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative));
821
882
  }
822
883
 
823
884
  static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
@@ -849,7 +910,7 @@ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
849
910
  return Qfalse;
850
911
  }
851
912
 
852
- static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
913
+ NOINLINE(static) void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
853
914
  {
854
915
  VALUE message = rb_sprintf(
855
916
  "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
@@ -860,18 +921,17 @@ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_
860
921
  RB_GC_GUARD(message);
861
922
  }
862
923
 
863
- #ifdef RBIMPL_ATTR_NORETURN
864
- RBIMPL_ATTR_NORETURN()
865
- #endif
866
- static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
924
+ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
867
925
  {
868
926
  VALUE message = rb_sprintf(
869
927
  "duplicate key %"PRIsVALUE,
870
928
  rb_inspect(duplicate_key)
871
929
  );
872
930
 
873
- raise_parse_error(RSTRING_PTR(message), state);
874
- RB_GC_GUARD(message);
931
+ long line, column;
932
+ cursor_position(state, &line, &column);
933
+ rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
934
+ rb_exc_raise(parse_error_new(message, line, column));
875
935
  }
876
936
 
877
937
  static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
@@ -886,7 +946,12 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
886
946
  case JSON_IGNORE:
887
947
  break;
888
948
  case JSON_DEPRECATED:
889
- emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
949
+ // Only emit the first few deprecations to avoid spamming.
950
+ if (state->emitted_deprecations < 5) {
951
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
952
+ state->emitted_deprecations++;
953
+ }
954
+
890
955
  break;
891
956
  case JSON_RAISE:
892
957
  raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
@@ -903,26 +968,12 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
903
968
  return object;
904
969
  }
905
970
 
906
- static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
907
- {
908
- VALUE string;
909
- bool intern = is_name || config->freeze;
910
- bool symbolize = is_name && config->symbolize_names;
911
- if (escaped) {
912
- string = json_string_unescape(state, start, end, is_name, intern, symbolize);
913
- } else {
914
- string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
915
- }
916
-
917
- return string;
918
- }
919
-
920
971
  static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
921
972
  {
922
973
  if (RB_UNLIKELY(config->on_load_proc)) {
923
974
  value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
924
975
  }
925
- rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack);
976
+ rvalue_stack_push(state->stack, value, state->stack_handle, &state->stack);
926
977
  return value;
927
978
  }
928
979
 
@@ -939,17 +990,11 @@ static const bool string_scan_table[256] = {
939
990
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
940
991
  };
941
992
 
942
- #if (defined(__GNUC__ ) || defined(__clang__))
943
- #define FORCE_INLINE __attribute__((always_inline))
944
- #else
945
- #define FORCE_INLINE
946
- #endif
947
-
948
993
  #ifdef HAVE_SIMD
949
994
  static SIMD_Implementation simd_impl = SIMD_NONE;
950
995
  #endif /* HAVE_SIMD */
951
996
 
952
- static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
997
+ ALWAYS_INLINE(static) bool string_scan(JSON_ParserState *state)
953
998
  {
954
999
  #ifdef HAVE_SIMD
955
1000
  #if defined(HAVE_SIMD_NEON)
@@ -957,7 +1002,7 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
957
1002
  uint64_t mask = 0;
958
1003
  if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
959
1004
  state->cursor += trailing_zeros64(mask) >> 2;
960
- return 1;
1005
+ return true;
961
1006
  }
962
1007
 
963
1008
  #elif defined(HAVE_SIMD_SSE2)
@@ -965,64 +1010,232 @@ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
965
1010
  int mask = 0;
966
1011
  if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
967
1012
  state->cursor += trailing_zeros(mask);
968
- return 1;
1013
+ return true;
969
1014
  }
970
1015
  }
971
1016
  #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
972
1017
  #endif /* HAVE_SIMD */
973
1018
 
974
- while (state->cursor < state->end) {
1019
+ while (!eos(state)) {
975
1020
  if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
976
- return 1;
1021
+ return true;
977
1022
  }
978
- *state->cursor++;
1023
+ state->cursor++;
979
1024
  }
980
- return 0;
1025
+ return false;
981
1026
  }
982
1027
 
983
- static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
1028
+ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name, const char *start)
984
1029
  {
985
- state->cursor++;
986
- const char *start = state->cursor;
987
- bool escaped = false;
1030
+ const char *backslashes[JSON_MAX_UNESCAPE_POSITIONS];
1031
+ JSON_UnescapePositions positions = {
1032
+ .size = 0,
1033
+ .positions = backslashes,
1034
+ .additional_backslashes = 0,
1035
+ };
988
1036
 
989
- while (RB_UNLIKELY(string_scan(state))) {
1037
+ do {
990
1038
  switch (*state->cursor) {
991
1039
  case '"': {
992
- VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
1040
+ VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
993
1041
  state->cursor++;
994
1042
  return json_push_value(state, config, string);
995
1043
  }
996
1044
  case '\\': {
997
- state->cursor++;
998
- escaped = true;
999
- if ((unsigned char)*state->cursor < 0x20) {
1000
- raise_parse_error("invalid ASCII control character in string: %s", state);
1045
+ if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
1046
+ backslashes[positions.size] = state->cursor;
1047
+ positions.size++;
1048
+ } else {
1049
+ positions.additional_backslashes++;
1001
1050
  }
1051
+ state->cursor++;
1002
1052
  break;
1003
1053
  }
1004
1054
  default:
1005
- raise_parse_error("invalid ASCII control character in string: %s", state);
1055
+ if (!config->allow_control_characters) {
1056
+ raise_parse_error("invalid ASCII control character in string: %s", state);
1057
+ }
1006
1058
  break;
1007
1059
  }
1008
1060
 
1009
1061
  state->cursor++;
1010
- }
1062
+ } while (string_scan(state));
1011
1063
 
1012
1064
  raise_parse_error("unexpected end of input, expected closing \"", state);
1013
1065
  return Qfalse;
1014
1066
  }
1015
1067
 
1068
+ ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
1069
+ {
1070
+ state->cursor++;
1071
+ const char *start = state->cursor;
1072
+
1073
+ if (RB_UNLIKELY(!string_scan(state))) {
1074
+ raise_parse_error("unexpected end of input, expected closing \"", state);
1075
+ }
1076
+
1077
+ if (RB_LIKELY(*state->cursor == '"')) {
1078
+ VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
1079
+ state->cursor++;
1080
+ return json_push_value(state, config, string);
1081
+ }
1082
+ return json_parse_escaped_string(state, config, is_name, start);
1083
+ }
1084
+
1085
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
1086
+ // From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
1087
+ // Additional References:
1088
+ // https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
1089
+ // http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
1090
+ static inline uint64_t decode_8digits_unrolled(uint64_t val) {
1091
+ const uint64_t mask = 0x000000FF000000FF;
1092
+ const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
1093
+ const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
1094
+ val -= 0x3030303030303030;
1095
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
1096
+ val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
1097
+ return val;
1098
+ }
1099
+
1100
+ static inline uint64_t decode_4digits_unrolled(uint32_t val) {
1101
+ const uint32_t mask = 0x000000FF;
1102
+ const uint32_t mul1 = 100;
1103
+ val -= 0x30303030;
1104
+ val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
1105
+ val = ((val & mask) * mul1) + (((val >> 16) & mask));
1106
+ return val;
1107
+ }
1108
+ #endif
1109
+
1110
+ static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
1111
+ {
1112
+ const char *start = state->cursor;
1113
+
1114
+ #if JSON_CPU_LITTLE_ENDIAN_64BITS
1115
+ while (rest(state) >= sizeof(uint64_t)) {
1116
+ uint64_t next_8bytes;
1117
+ memcpy(&next_8bytes, state->cursor, sizeof(uint64_t));
1118
+
1119
+ // From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
1120
+ // Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
1121
+ uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
1122
+
1123
+ if (match == 0x3333333333333333) { // 8 consecutive digits
1124
+ *accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
1125
+ state->cursor += 8;
1126
+ continue;
1127
+ }
1128
+
1129
+ uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
1130
+
1131
+ if (consecutive_digits >= 4) {
1132
+ *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
1133
+ state->cursor += 4;
1134
+ consecutive_digits -= 4;
1135
+ }
1136
+
1137
+ while (consecutive_digits) {
1138
+ *accumulator = *accumulator * 10 + (*state->cursor - '0');
1139
+ consecutive_digits--;
1140
+ state->cursor++;
1141
+ }
1142
+
1143
+ return (int)(state->cursor - start);
1144
+ }
1145
+ #endif
1146
+
1147
+ char next_char;
1148
+ while (rb_isdigit(next_char = peek(state))) {
1149
+ *accumulator = *accumulator * 10 + (next_char - '0');
1150
+ state->cursor++;
1151
+ }
1152
+ return (int)(state->cursor - start);
1153
+ }
1154
+
1155
+ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
1156
+ {
1157
+ bool integer = true;
1158
+ const char first_digit = *state->cursor;
1159
+
1160
+ // Variables for Ryu optimization - extract digits during parsing
1161
+ int64_t exponent = 0;
1162
+ int decimal_point_pos = -1;
1163
+ uint64_t mantissa = 0;
1164
+
1165
+ // Parse integer part and extract mantissa digits
1166
+ int mantissa_digits = json_parse_digits(state, &mantissa);
1167
+
1168
+ if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
1169
+ raise_parse_error_at("invalid number: %s", state, start);
1170
+ }
1171
+
1172
+ // Parse fractional part
1173
+ if (peek(state) == '.') {
1174
+ integer = false;
1175
+ decimal_point_pos = mantissa_digits; // Remember position of decimal point
1176
+ state->cursor++;
1177
+
1178
+ int fractional_digits = json_parse_digits(state, &mantissa);
1179
+ mantissa_digits += fractional_digits;
1180
+
1181
+ if (RB_UNLIKELY(!fractional_digits)) {
1182
+ raise_parse_error_at("invalid number: %s", state, start);
1183
+ }
1184
+ }
1185
+
1186
+ // Parse exponent
1187
+ if (rb_tolower(peek(state)) == 'e') {
1188
+ integer = false;
1189
+ state->cursor++;
1190
+
1191
+ bool negative_exponent = false;
1192
+ const char next_char = peek(state);
1193
+ if (next_char == '-' || next_char == '+') {
1194
+ negative_exponent = next_char == '-';
1195
+ state->cursor++;
1196
+ }
1197
+
1198
+ uint64_t abs_exponent = 0;
1199
+ int exponent_digits = json_parse_digits(state, &abs_exponent);
1200
+
1201
+ if (RB_UNLIKELY(!exponent_digits)) {
1202
+ raise_parse_error_at("invalid number: %s", state, start);
1203
+ }
1204
+
1205
+ exponent = negative_exponent ? -abs_exponent : abs_exponent;
1206
+ }
1207
+
1208
+ if (integer) {
1209
+ return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
1210
+ }
1211
+
1212
+ // Adjust exponent based on decimal point position
1213
+ if (decimal_point_pos >= 0) {
1214
+ exponent -= (mantissa_digits - decimal_point_pos);
1215
+ }
1216
+
1217
+ return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
1218
+ }
1219
+
1220
+ static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
1221
+ {
1222
+ return json_parse_number(state, config, false, state->cursor);
1223
+ }
1224
+
1225
+ static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
1226
+ {
1227
+ const char *start = state->cursor;
1228
+ state->cursor++;
1229
+ return json_parse_number(state, config, true, start);
1230
+ }
1231
+
1016
1232
  static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1017
1233
  {
1018
1234
  json_eat_whitespace(state);
1019
- if (state->cursor >= state->end) {
1020
- raise_parse_error("unexpected end of input", state);
1021
- }
1022
1235
 
1023
- switch (*state->cursor) {
1236
+ switch (peek(state)) {
1024
1237
  case 'n':
1025
- if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
1238
+ if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
1026
1239
  state->cursor += 4;
1027
1240
  return json_push_value(state, config, Qnil);
1028
1241
  }
@@ -1030,7 +1243,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1030
1243
  raise_parse_error("unexpected token %s", state);
1031
1244
  break;
1032
1245
  case 't':
1033
- if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
1246
+ if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
1034
1247
  state->cursor += 4;
1035
1248
  return json_push_value(state, config, Qtrue);
1036
1249
  }
@@ -1039,7 +1252,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1039
1252
  break;
1040
1253
  case 'f':
1041
1254
  // Note: memcmp with a small power of two compile to an integer comparison
1042
- if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
1255
+ if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
1043
1256
  state->cursor += 5;
1044
1257
  return json_push_value(state, config, Qfalse);
1045
1258
  }
@@ -1048,7 +1261,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1048
1261
  break;
1049
1262
  case 'N':
1050
1263
  // Note: memcmp with a small power of two compile to an integer comparison
1051
- if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
1264
+ if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
1052
1265
  state->cursor += 3;
1053
1266
  return json_push_value(state, config, CNaN);
1054
1267
  }
@@ -1056,16 +1269,16 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1056
1269
  raise_parse_error("unexpected token %s", state);
1057
1270
  break;
1058
1271
  case 'I':
1059
- if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
1272
+ if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
1060
1273
  state->cursor += 8;
1061
1274
  return json_push_value(state, config, CInfinity);
1062
1275
  }
1063
1276
 
1064
1277
  raise_parse_error("unexpected token %s", state);
1065
1278
  break;
1066
- case '-':
1279
+ case '-': {
1067
1280
  // Note: memcmp with a small power of two compile to an integer comparison
1068
- if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
1281
+ if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
1069
1282
  if (config->allow_nan) {
1070
1283
  state->cursor += 9;
1071
1284
  return json_push_value(state, config, CMinusInfinity);
@@ -1073,62 +1286,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1073
1286
  raise_parse_error("unexpected token %s", state);
1074
1287
  }
1075
1288
  }
1076
- // Fallthrough
1077
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
1078
- bool integer = true;
1079
-
1080
- // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
1081
- const char *start = state->cursor;
1082
- state->cursor++;
1083
-
1084
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1085
- state->cursor++;
1086
- }
1087
-
1088
- long integer_length = state->cursor - start;
1089
-
1090
- if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
1091
- raise_parse_error_at("invalid number: %s", state, start);
1092
- } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
1093
- raise_parse_error_at("invalid number: %s", state, start);
1094
- } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
1095
- raise_parse_error_at("invalid number: %s", state, start);
1096
- }
1097
-
1098
- if ((state->cursor < state->end) && (*state->cursor == '.')) {
1099
- integer = false;
1100
- state->cursor++;
1101
-
1102
- if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
1103
- raise_parse_error("invalid number: %s", state);
1104
- }
1105
-
1106
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1107
- state->cursor++;
1108
- }
1109
- }
1110
-
1111
- if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
1112
- integer = false;
1113
- state->cursor++;
1114
- if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
1115
- state->cursor++;
1116
- }
1117
-
1118
- if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
1119
- raise_parse_error("invalid number: %s", state);
1120
- }
1121
-
1122
- while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1123
- state->cursor++;
1124
- }
1125
- }
1126
-
1127
- if (integer) {
1128
- return json_push_value(state, config, json_decode_integer(start, state->cursor));
1129
- }
1130
- return json_push_value(state, config, json_decode_float(config, start, state->cursor));
1289
+ return json_push_value(state, config, json_parse_negative_number(state, config));
1290
+ break;
1131
1291
  }
1292
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1293
+ return json_push_value(state, config, json_parse_positive_number(state, config));
1294
+ break;
1132
1295
  case '"': {
1133
1296
  // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
1134
1297
  return json_parse_string(state, config, false);
@@ -1139,7 +1302,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1139
1302
  json_eat_whitespace(state);
1140
1303
  long stack_head = state->stack->head;
1141
1304
 
1142
- if ((state->cursor < state->end) && (*state->cursor == ']')) {
1305
+ if (peek(state) == ']') {
1143
1306
  state->cursor++;
1144
1307
  return json_push_value(state, config, json_decode_array(state, config, 0));
1145
1308
  } else {
@@ -1154,26 +1317,26 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1154
1317
  while (true) {
1155
1318
  json_eat_whitespace(state);
1156
1319
 
1157
- if (state->cursor < state->end) {
1158
- if (*state->cursor == ']') {
1159
- state->cursor++;
1160
- long count = state->stack->head - stack_head;
1161
- state->current_nesting--;
1162
- state->in_array--;
1163
- return json_push_value(state, config, json_decode_array(state, config, count));
1164
- }
1320
+ const char next_char = peek(state);
1165
1321
 
1166
- if (*state->cursor == ',') {
1167
- state->cursor++;
1168
- if (config->allow_trailing_comma) {
1169
- json_eat_whitespace(state);
1170
- if ((state->cursor < state->end) && (*state->cursor == ']')) {
1171
- continue;
1172
- }
1322
+ if (RB_LIKELY(next_char == ',')) {
1323
+ state->cursor++;
1324
+ if (config->allow_trailing_comma) {
1325
+ json_eat_whitespace(state);
1326
+ if (peek(state) == ']') {
1327
+ continue;
1173
1328
  }
1174
- json_parse_any(state, config);
1175
- continue;
1176
1329
  }
1330
+ json_parse_any(state, config);
1331
+ continue;
1332
+ }
1333
+
1334
+ if (next_char == ']') {
1335
+ state->cursor++;
1336
+ long count = state->stack->head - stack_head;
1337
+ state->current_nesting--;
1338
+ state->in_array--;
1339
+ return json_push_value(state, config, json_decode_array(state, config, count));
1177
1340
  }
1178
1341
 
1179
1342
  raise_parse_error("expected ',' or ']' after array value", state);
@@ -1187,7 +1350,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1187
1350
  json_eat_whitespace(state);
1188
1351
  long stack_head = state->stack->head;
1189
1352
 
1190
- if ((state->cursor < state->end) && (*state->cursor == '}')) {
1353
+ if (peek(state) == '}') {
1191
1354
  state->cursor++;
1192
1355
  return json_push_value(state, config, json_decode_object(state, config, 0));
1193
1356
  } else {
@@ -1196,13 +1359,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1196
1359
  rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1197
1360
  }
1198
1361
 
1199
- if (*state->cursor != '"') {
1362
+ if (peek(state) != '"') {
1200
1363
  raise_parse_error("expected object key, got %s", state);
1201
1364
  }
1202
1365
  json_parse_string(state, config, true);
1203
1366
 
1204
1367
  json_eat_whitespace(state);
1205
- if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1368
+ if (peek(state) != ':') {
1206
1369
  raise_parse_error("expected ':' after object key", state);
1207
1370
  }
1208
1371
  state->cursor++;
@@ -1213,46 +1376,45 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1213
1376
  while (true) {
1214
1377
  json_eat_whitespace(state);
1215
1378
 
1216
- if (state->cursor < state->end) {
1217
- if (*state->cursor == '}') {
1218
- state->cursor++;
1219
- state->current_nesting--;
1220
- size_t count = state->stack->head - stack_head;
1379
+ const char next_char = peek(state);
1380
+ if (next_char == '}') {
1381
+ state->cursor++;
1382
+ state->current_nesting--;
1383
+ size_t count = state->stack->head - stack_head;
1221
1384
 
1222
- // Temporary rewind cursor in case an error is raised
1223
- const char *final_cursor = state->cursor;
1224
- state->cursor = object_start_cursor;
1225
- VALUE object = json_decode_object(state, config, count);
1226
- state->cursor = final_cursor;
1385
+ // Temporary rewind cursor in case an error is raised
1386
+ const char *final_cursor = state->cursor;
1387
+ state->cursor = object_start_cursor;
1388
+ VALUE object = json_decode_object(state, config, count);
1389
+ state->cursor = final_cursor;
1227
1390
 
1228
- return json_push_value(state, config, object);
1229
- }
1391
+ return json_push_value(state, config, object);
1392
+ }
1230
1393
 
1231
- if (*state->cursor == ',') {
1232
- state->cursor++;
1233
- json_eat_whitespace(state);
1394
+ if (next_char == ',') {
1395
+ state->cursor++;
1396
+ json_eat_whitespace(state);
1234
1397
 
1235
- if (config->allow_trailing_comma) {
1236
- if ((state->cursor < state->end) && (*state->cursor == '}')) {
1237
- continue;
1238
- }
1398
+ if (config->allow_trailing_comma) {
1399
+ if (peek(state) == '}') {
1400
+ continue;
1239
1401
  }
1402
+ }
1240
1403
 
1241
- if (*state->cursor != '"') {
1242
- raise_parse_error("expected object key, got: %s", state);
1243
- }
1244
- json_parse_string(state, config, true);
1404
+ if (RB_UNLIKELY(peek(state) != '"')) {
1405
+ raise_parse_error("expected object key, got: %s", state);
1406
+ }
1407
+ json_parse_string(state, config, true);
1245
1408
 
1246
- json_eat_whitespace(state);
1247
- if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1248
- raise_parse_error("expected ':' after object key, got: %s", state);
1249
- }
1250
- state->cursor++;
1409
+ json_eat_whitespace(state);
1410
+ if (RB_UNLIKELY(peek(state) != ':')) {
1411
+ raise_parse_error("expected ':' after object key, got: %s", state);
1412
+ }
1413
+ state->cursor++;
1251
1414
 
1252
- json_parse_any(state, config);
1415
+ json_parse_any(state, config);
1253
1416
 
1254
- continue;
1255
- }
1417
+ continue;
1256
1418
  }
1257
1419
 
1258
1420
  raise_parse_error("expected ',' or '}' after object value, got: %s", state);
@@ -1260,18 +1422,23 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1260
1422
  break;
1261
1423
  }
1262
1424
 
1425
+ case 0:
1426
+ raise_parse_error("unexpected end of input", state);
1427
+ break;
1428
+
1263
1429
  default:
1264
1430
  raise_parse_error("unexpected character: %s", state);
1265
1431
  break;
1266
1432
  }
1267
1433
 
1268
- raise_parse_error("unreacheable: %s", state);
1434
+ raise_parse_error("unreachable: %s", state);
1435
+ return Qundef;
1269
1436
  }
1270
1437
 
1271
1438
  static void json_ensure_eof(JSON_ParserState *state)
1272
1439
  {
1273
1440
  json_eat_whitespace(state);
1274
- if (state->cursor != state->end) {
1441
+ if (!eos(state)) {
1275
1442
  raise_parse_error("unexpected token at end of stream %s", state);
1276
1443
  }
1277
1444
  }
@@ -1308,14 +1475,16 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1308
1475
  {
1309
1476
  JSON_ParserConfig *config = (JSON_ParserConfig *)data;
1310
1477
 
1311
- if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1312
- else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1313
- else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1314
- else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1315
- else if (key == sym_freeze) { config->freeze = RTEST(val); }
1316
- else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1317
- else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1318
- else if (key == sym_decimal_class) {
1478
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1479
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1480
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1481
+ else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
1482
+ else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
1483
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1484
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
1485
+ else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1486
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1487
+ else if (key == sym_decimal_class) {
1319
1488
  if (RTEST(val)) {
1320
1489
  if (rb_respond_to(val, i_try_convert)) {
1321
1490
  config->decimal_class = val;
@@ -1388,6 +1557,7 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
1388
1557
  */
1389
1558
  static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
1390
1559
  {
1560
+ rb_check_frozen(self);
1391
1561
  GET_PARSER_CONFIG;
1392
1562
 
1393
1563
  parser_config_init(config, opts);
@@ -1413,11 +1583,13 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1413
1583
  const char *start;
1414
1584
  RSTRING_GETMEM(Vsource, start, len);
1415
1585
 
1586
+ VALUE stack_handle = 0;
1416
1587
  JSON_ParserState _state = {
1417
1588
  .start = start,
1418
1589
  .cursor = start,
1419
1590
  .end = start + len,
1420
1591
  .stack = &stack,
1592
+ .stack_handle = &stack_handle,
1421
1593
  };
1422
1594
  JSON_ParserState *state = &_state;
1423
1595
 
@@ -1425,8 +1597,8 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1425
1597
 
1426
1598
  // This may be skipped in case of exception, but
1427
1599
  // it won't cause a leak.
1428
- rvalue_stack_eagerly_release(state->stack_handle);
1429
-
1600
+ rvalue_stack_eagerly_release(stack_handle);
1601
+ RB_GC_GUARD(stack_handle);
1430
1602
  json_ensure_eof(state);
1431
1603
 
1432
1604
  return result;
@@ -1464,26 +1636,19 @@ static void JSON_ParserConfig_mark(void *ptr)
1464
1636
  rb_gc_mark(config->decimal_class);
1465
1637
  }
1466
1638
 
1467
- static void JSON_ParserConfig_free(void *ptr)
1468
- {
1469
- JSON_ParserConfig *config = ptr;
1470
- ruby_xfree(config);
1471
- }
1472
-
1473
1639
  static size_t JSON_ParserConfig_memsize(const void *ptr)
1474
1640
  {
1475
1641
  return sizeof(JSON_ParserConfig);
1476
1642
  }
1477
1643
 
1478
1644
  static const rb_data_type_t JSON_ParserConfig_type = {
1479
- "JSON::Ext::Parser/ParserConfig",
1480
- {
1645
+ .wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
1646
+ .function = {
1481
1647
  JSON_ParserConfig_mark,
1482
- JSON_ParserConfig_free,
1648
+ RUBY_DEFAULT_FREE,
1483
1649
  JSON_ParserConfig_memsize,
1484
1650
  },
1485
- 0, 0,
1486
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
1651
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
1487
1652
  };
1488
1653
 
1489
1654
  static VALUE cJSON_parser_s_allocate(VALUE klass)
@@ -1527,16 +1692,14 @@ void Init_parser(void)
1527
1692
  sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1528
1693
  sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1529
1694
  sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1695
+ sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
1696
+ sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
1530
1697
  sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1531
1698
  sym_freeze = ID2SYM(rb_intern("freeze"));
1532
1699
  sym_on_load = ID2SYM(rb_intern("on_load"));
1533
1700
  sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1534
1701
  sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
1535
1702
 
1536
- i_chr = rb_intern("chr");
1537
- i_aset = rb_intern("[]=");
1538
- i_aref = rb_intern("[]");
1539
- i_leftshift = rb_intern("<<");
1540
1703
  i_new = rb_intern("new");
1541
1704
  i_try_convert = rb_intern("try_convert");
1542
1705
  i_uminus = rb_intern("-@");