ed-precompiled_json 2.15.1-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +7 -0
  2. data/BSDL +22 -0
  3. data/CHANGES.md +693 -0
  4. data/COPYING +56 -0
  5. data/LEGAL +8 -0
  6. data/README.md +283 -0
  7. data/ext/json/ext/fbuffer/fbuffer.h +296 -0
  8. data/ext/json/ext/generator/extconf.rb +16 -0
  9. data/ext/json/ext/generator/generator.c +2169 -0
  10. data/ext/json/ext/parser/extconf.rb +15 -0
  11. data/ext/json/ext/parser/parser.c +1557 -0
  12. data/ext/json/ext/simd/conf.rb +24 -0
  13. data/ext/json/ext/simd/simd.h +188 -0
  14. data/ext/json/ext/vendor/fpconv.c +480 -0
  15. data/ext/json/ext/vendor/jeaiii-ltoa.h +267 -0
  16. data/json.gemspec +62 -0
  17. data/lib/json/add/bigdecimal.rb +58 -0
  18. data/lib/json/add/complex.rb +51 -0
  19. data/lib/json/add/core.rb +13 -0
  20. data/lib/json/add/date.rb +54 -0
  21. data/lib/json/add/date_time.rb +67 -0
  22. data/lib/json/add/exception.rb +49 -0
  23. data/lib/json/add/ostruct.rb +54 -0
  24. data/lib/json/add/range.rb +54 -0
  25. data/lib/json/add/rational.rb +49 -0
  26. data/lib/json/add/regexp.rb +48 -0
  27. data/lib/json/add/set.rb +48 -0
  28. data/lib/json/add/string.rb +35 -0
  29. data/lib/json/add/struct.rb +52 -0
  30. data/lib/json/add/symbol.rb +52 -0
  31. data/lib/json/add/time.rb +52 -0
  32. data/lib/json/common.rb +1130 -0
  33. data/lib/json/ext/3.0/generator.so +0 -0
  34. data/lib/json/ext/3.0/parser.so +0 -0
  35. data/lib/json/ext/3.1/generator.so +0 -0
  36. data/lib/json/ext/3.1/parser.so +0 -0
  37. data/lib/json/ext/3.2/generator.so +0 -0
  38. data/lib/json/ext/3.2/parser.so +0 -0
  39. data/lib/json/ext/3.3/generator.so +0 -0
  40. data/lib/json/ext/3.3/parser.so +0 -0
  41. data/lib/json/ext/3.4/generator.so +0 -0
  42. data/lib/json/ext/3.4/parser.so +0 -0
  43. data/lib/json/ext/generator/state.rb +99 -0
  44. data/lib/json/ext/generator.so +0 -0
  45. data/lib/json/ext/parser.so +0 -0
  46. data/lib/json/ext.rb +57 -0
  47. data/lib/json/generic_object.rb +67 -0
  48. data/lib/json/truffle_ruby/generator.rb +708 -0
  49. data/lib/json/version.rb +5 -0
  50. data/lib/json.rb +642 -0
  51. metadata +101 -0
@@ -0,0 +1,1557 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+
4
+ /* shims */
5
+ /* This is the fallback definition from Ruby 3.4 */
6
+
7
+ #ifndef RBIMPL_STDBOOL_H
8
+ #if defined(__cplusplus)
9
+ # if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
10
+ # include <cstdbool>
11
+ # endif
12
+ #elif defined(HAVE_STDBOOL_H)
13
+ # include <stdbool.h>
14
+ #elif !defined(HAVE__BOOL)
15
+ typedef unsigned char _Bool;
16
+ # define bool _Bool
17
+ # define true ((_Bool)+1)
18
+ # define false ((_Bool)+0)
19
+ # define __bool_true_false_are_defined
20
+ #endif
21
+ #endif
22
+
23
+ #include "../simd/simd.h"
24
+
25
+ #ifndef RB_UNLIKELY
26
+ #define RB_UNLIKELY(expr) expr
27
+ #endif
28
+
29
+ #ifndef RB_LIKELY
30
+ #define RB_LIKELY(expr) expr
31
+ #endif
32
+
33
+ static VALUE mJSON, eNestingError, Encoding_UTF_8;
34
+ static VALUE CNaN, CInfinity, CMinusInfinity;
35
+
36
+ static ID i_chr, i_aset, i_aref,
37
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
38
+
39
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
40
+ sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
41
+
42
+ static int binary_encindex;
43
+ static int utf8_encindex;
44
+
45
+ #ifndef HAVE_RB_HASH_BULK_INSERT
46
+ // For TruffleRuby
47
+ void
48
+ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
49
+ {
50
+ long index = 0;
51
+ while (index < count) {
52
+ VALUE name = pairs[index++];
53
+ VALUE value = pairs[index++];
54
+ rb_hash_aset(hash, name, value);
55
+ }
56
+ RB_GC_GUARD(hash);
57
+ }
58
+ #endif
59
+
60
+ #ifndef HAVE_RB_HASH_NEW_CAPA
61
+ #define rb_hash_new_capa(n) rb_hash_new()
62
+ #endif
63
+
64
+
65
+ /* name cache */
66
+
67
+ #include <string.h>
68
+ #include <ctype.h>
69
+
70
+ // Object names are likely to be repeated, and are frozen.
71
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
72
+ // and save much more expensive lookups into the global fstring table.
73
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
74
+ // to be able to fit safely on the stack.
75
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
76
+ // performance.
77
+ #define JSON_RVALUE_CACHE_CAPA 63
78
+ typedef struct rvalue_cache_struct {
79
+ int length;
80
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
81
+ } rvalue_cache;
82
+
83
+ static rb_encoding *enc_utf8;
84
+
85
+ #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
86
+
87
+ static inline VALUE build_interned_string(const char *str, const long length)
88
+ {
89
+ # ifdef HAVE_RB_ENC_INTERNED_STR
90
+ return rb_enc_interned_str(str, length, enc_utf8);
91
+ # else
92
+ VALUE rstring = rb_utf8_str_new(str, length);
93
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
94
+ # endif
95
+ }
96
+
97
+ static inline VALUE build_symbol(const char *str, const long length)
98
+ {
99
+ return rb_str_intern(build_interned_string(str, length));
100
+ }
101
+
102
+ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
103
+ {
104
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
105
+ cache->length++;
106
+ cache->entries[index] = rstring;
107
+ }
108
+
109
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
110
+ {
111
+ long rstring_length = RSTRING_LEN(rstring);
112
+ if (length == rstring_length) {
113
+ return memcmp(str, RSTRING_PTR(rstring), length);
114
+ } else {
115
+ return (int)(length - rstring_length);
116
+ }
117
+ }
118
+
119
+ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
120
+ {
121
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
122
+ // Common names aren't likely to be very long. So we just don't
123
+ // cache names above an arbitrary threshold.
124
+ return Qfalse;
125
+ }
126
+
127
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
128
+ // Simple heuristic, if the first character isn't a letter,
129
+ // we're much less likely to see this string again.
130
+ // We mostly want to cache strings that are likely to be repeated.
131
+ return Qfalse;
132
+ }
133
+
134
+ int low = 0;
135
+ int high = cache->length - 1;
136
+ int mid = 0;
137
+ int last_cmp = 0;
138
+
139
+ while (low <= high) {
140
+ mid = (high + low) >> 1;
141
+ VALUE entry = cache->entries[mid];
142
+ last_cmp = rstring_cache_cmp(str, length, entry);
143
+
144
+ if (last_cmp == 0) {
145
+ return entry;
146
+ } else if (last_cmp > 0) {
147
+ low = mid + 1;
148
+ } else {
149
+ high = mid - 1;
150
+ }
151
+ }
152
+
153
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
154
+ // We assume the overwhelming majority of names don't need to be escaped.
155
+ // But if they do, we have to fallback to the slow path.
156
+ return Qfalse;
157
+ }
158
+
159
+ VALUE rstring = build_interned_string(str, length);
160
+
161
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
162
+ if (last_cmp > 0) {
163
+ mid += 1;
164
+ }
165
+
166
+ rvalue_cache_insert_at(cache, mid, rstring);
167
+ }
168
+ return rstring;
169
+ }
170
+
171
+ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
172
+ {
173
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
174
+ // Common names aren't likely to be very long. So we just don't
175
+ // cache names above an arbitrary threshold.
176
+ return Qfalse;
177
+ }
178
+
179
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
180
+ // Simple heuristic, if the first character isn't a letter,
181
+ // we're much less likely to see this string again.
182
+ // We mostly want to cache strings that are likely to be repeated.
183
+ return Qfalse;
184
+ }
185
+
186
+ int low = 0;
187
+ int high = cache->length - 1;
188
+ int mid = 0;
189
+ int last_cmp = 0;
190
+
191
+ while (low <= high) {
192
+ mid = (high + low) >> 1;
193
+ VALUE entry = cache->entries[mid];
194
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
195
+
196
+ if (last_cmp == 0) {
197
+ return entry;
198
+ } else if (last_cmp > 0) {
199
+ low = mid + 1;
200
+ } else {
201
+ high = mid - 1;
202
+ }
203
+ }
204
+
205
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
206
+ // We assume the overwhelming majority of names don't need to be escaped.
207
+ // But if they do, we have to fallback to the slow path.
208
+ return Qfalse;
209
+ }
210
+
211
+ VALUE rsymbol = build_symbol(str, length);
212
+
213
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
214
+ if (last_cmp > 0) {
215
+ mid += 1;
216
+ }
217
+
218
+ rvalue_cache_insert_at(cache, mid, rsymbol);
219
+ }
220
+ return rsymbol;
221
+ }
222
+
223
+ /* rvalue stack */
224
+
225
+ #define RVALUE_STACK_INITIAL_CAPA 128
226
+
227
+ enum rvalue_stack_type {
228
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
229
+ RVALUE_STACK_STACK_ALLOCATED = 1,
230
+ };
231
+
232
+ typedef struct rvalue_stack_struct {
233
+ enum rvalue_stack_type type;
234
+ long capa;
235
+ long head;
236
+ VALUE *ptr;
237
+ } rvalue_stack;
238
+
239
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
240
+
241
+ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
242
+ {
243
+ long required = stack->capa * 2;
244
+
245
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
246
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
247
+ } else {
248
+ REALLOC_N(stack->ptr, VALUE, required);
249
+ stack->capa = required;
250
+ }
251
+ return stack;
252
+ }
253
+
254
+ static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
255
+ {
256
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
257
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
258
+ }
259
+ stack->ptr[stack->head] = value;
260
+ stack->head++;
261
+ return value;
262
+ }
263
+
264
+ static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
265
+ {
266
+ return stack->ptr + (stack->head - count);
267
+ }
268
+
269
+ static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
270
+ {
271
+ stack->head -= count;
272
+ }
273
+
274
+ static void rvalue_stack_mark(void *ptr)
275
+ {
276
+ rvalue_stack *stack = (rvalue_stack *)ptr;
277
+ long index;
278
+ for (index = 0; index < stack->head; index++) {
279
+ rb_gc_mark(stack->ptr[index]);
280
+ }
281
+ }
282
+
283
+ static void rvalue_stack_free(void *ptr)
284
+ {
285
+ rvalue_stack *stack = (rvalue_stack *)ptr;
286
+ if (stack) {
287
+ ruby_xfree(stack->ptr);
288
+ ruby_xfree(stack);
289
+ }
290
+ }
291
+
292
+ static size_t rvalue_stack_memsize(const void *ptr)
293
+ {
294
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
295
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
296
+ }
297
+
298
+ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
299
+ "JSON::Ext::Parser/rvalue_stack",
300
+ {
301
+ .dmark = rvalue_stack_mark,
302
+ .dfree = rvalue_stack_free,
303
+ .dsize = rvalue_stack_memsize,
304
+ },
305
+ 0, 0,
306
+ RUBY_TYPED_FREE_IMMEDIATELY,
307
+ };
308
+
309
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
310
+ {
311
+ rvalue_stack *stack;
312
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
313
+ *stack_ref = stack;
314
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
315
+
316
+ stack->capa = old_stack->capa << 1;
317
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
318
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
319
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
320
+ return stack;
321
+ }
322
+
323
+ static void rvalue_stack_eagerly_release(VALUE handle)
324
+ {
325
+ if (handle) {
326
+ rvalue_stack *stack;
327
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
328
+ RTYPEDDATA_DATA(handle) = NULL;
329
+ rvalue_stack_free(stack);
330
+ }
331
+ }
332
+
333
+
334
+ #ifndef HAVE_STRNLEN
335
+ static size_t strnlen(const char *s, size_t maxlen)
336
+ {
337
+ char *p;
338
+ return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
339
+ }
340
+ #endif
341
+
342
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
343
+ {
344
+ int len = 1;
345
+ if (ch <= 0x7F) {
346
+ buf[0] = (char) ch;
347
+ } else if (ch <= 0x07FF) {
348
+ buf[0] = (char) ((ch >> 6) | 0xC0);
349
+ buf[1] = (char) ((ch & 0x3F) | 0x80);
350
+ len++;
351
+ } else if (ch <= 0xFFFF) {
352
+ buf[0] = (char) ((ch >> 12) | 0xE0);
353
+ buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
354
+ buf[2] = (char) ((ch & 0x3F) | 0x80);
355
+ len += 2;
356
+ } else if (ch <= 0x1fffff) {
357
+ buf[0] =(char) ((ch >> 18) | 0xF0);
358
+ buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
359
+ buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
360
+ buf[3] =(char) ((ch & 0x3F) | 0x80);
361
+ len += 3;
362
+ } else {
363
+ buf[0] = '?';
364
+ }
365
+ return len;
366
+ }
367
+
368
+ enum duplicate_key_action {
369
+ JSON_DEPRECATED = 0,
370
+ JSON_IGNORE,
371
+ JSON_RAISE,
372
+ };
373
+
374
+ typedef struct JSON_ParserStruct {
375
+ VALUE on_load_proc;
376
+ VALUE decimal_class;
377
+ ID decimal_method_id;
378
+ enum duplicate_key_action on_duplicate_key;
379
+ int max_nesting;
380
+ bool allow_nan;
381
+ bool allow_trailing_comma;
382
+ bool parsing_name;
383
+ bool symbolize_names;
384
+ bool freeze;
385
+ } JSON_ParserConfig;
386
+
387
+ typedef struct JSON_ParserStateStruct {
388
+ VALUE stack_handle;
389
+ const char *start;
390
+ const char *cursor;
391
+ const char *end;
392
+ rvalue_stack *stack;
393
+ rvalue_cache name_cache;
394
+ int in_array;
395
+ int current_nesting;
396
+ } JSON_ParserState;
397
+
398
+ static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
399
+ {
400
+ const char *cursor = state->cursor;
401
+ long column = 0;
402
+ long line = 1;
403
+
404
+ while (cursor >= state->start) {
405
+ if (*cursor-- == '\n') {
406
+ break;
407
+ }
408
+ column++;
409
+ }
410
+
411
+ while (cursor >= state->start) {
412
+ if (*cursor-- == '\n') {
413
+ line++;
414
+ }
415
+ }
416
+ *line_out = line;
417
+ *column_out = column;
418
+ }
419
+
420
+ static void emit_parse_warning(const char *message, JSON_ParserState *state)
421
+ {
422
+ long line, column;
423
+ cursor_position(state, &line, &column);
424
+
425
+ VALUE warning = rb_sprintf("%s at line %ld column %ld", message, line, column);
426
+ rb_funcall(mJSON, rb_intern("deprecation_warning"), 1, warning);
427
+ }
428
+
429
+ #define PARSE_ERROR_FRAGMENT_LEN 32
430
+
431
+ #ifdef RBIMPL_ATTR_NORETURN
432
+ RBIMPL_ATTR_NORETURN()
433
+ #endif
434
+ static void raise_parse_error(const char *format, JSON_ParserState *state)
435
+ {
436
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
437
+ long line, column;
438
+ cursor_position(state, &line, &column);
439
+
440
+ const char *ptr = "EOF";
441
+ if (state->cursor && state->cursor < state->end) {
442
+ ptr = state->cursor;
443
+ size_t len = 0;
444
+ while (len < PARSE_ERROR_FRAGMENT_LEN) {
445
+ char ch = ptr[len];
446
+ if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
447
+ break;
448
+ }
449
+ len++;
450
+ }
451
+
452
+ if (len) {
453
+ buffer[0] = '\'';
454
+ MEMCPY(buffer + 1, ptr, char, len);
455
+
456
+ while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
457
+ len--;
458
+ }
459
+
460
+ if (buffer[len] >= 0xC0) { // multibyte character start
461
+ len--;
462
+ }
463
+
464
+ buffer[len + 1] = '\'';
465
+ buffer[len + 2] = '\0';
466
+ ptr = (const char *)buffer;
467
+ }
468
+ }
469
+
470
+ VALUE msg = rb_sprintf(format, ptr);
471
+ VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
472
+ RB_GC_GUARD(msg);
473
+
474
+ VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
475
+ rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
476
+ rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
477
+ rb_exc_raise(exc);
478
+ }
479
+
480
+ #ifdef RBIMPL_ATTR_NORETURN
481
+ RBIMPL_ATTR_NORETURN()
482
+ #endif
483
+ static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
484
+ {
485
+ state->cursor = at;
486
+ raise_parse_error(format, state);
487
+ }
488
+
489
+ /* unicode */
490
+
491
+ static const signed char digit_values[256] = {
492
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
493
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
494
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
495
+ -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
496
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
497
+ 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
498
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
499
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
500
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
501
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
502
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
503
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
504
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
505
+ -1, -1, -1, -1, -1, -1, -1
506
+ };
507
+
508
+ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
509
+ {
510
+ signed char b;
511
+ uint32_t result = 0;
512
+ b = digit_values[p[0]];
513
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
514
+ result = (result << 4) | (unsigned char)b;
515
+ b = digit_values[p[1]];
516
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
517
+ result = (result << 4) | (unsigned char)b;
518
+ b = digit_values[p[2]];
519
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
520
+ result = (result << 4) | (unsigned char)b;
521
+ b = digit_values[p[3]];
522
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
523
+ result = (result << 4) | (unsigned char)b;
524
+ return result;
525
+ }
526
+
527
+ #define GET_PARSER_CONFIG \
528
+ JSON_ParserConfig *config; \
529
+ TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
530
+
531
+ static const rb_data_type_t JSON_ParserConfig_type;
532
+
533
+ static const bool whitespace[256] = {
534
+ [' '] = 1,
535
+ ['\t'] = 1,
536
+ ['\n'] = 1,
537
+ ['\r'] = 1,
538
+ ['/'] = 1,
539
+ };
540
+
541
+ static void
542
+ json_eat_comments(JSON_ParserState *state)
543
+ {
544
+ if (state->cursor + 1 < state->end) {
545
+ switch (state->cursor[1]) {
546
+ case '/': {
547
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
548
+ if (!state->cursor) {
549
+ state->cursor = state->end;
550
+ } else {
551
+ state->cursor++;
552
+ }
553
+ break;
554
+ }
555
+ case '*': {
556
+ state->cursor += 2;
557
+ while (true) {
558
+ state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
559
+ if (!state->cursor) {
560
+ raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
561
+ } else {
562
+ state->cursor++;
563
+ if (state->cursor < state->end && *state->cursor == '/') {
564
+ state->cursor++;
565
+ break;
566
+ }
567
+ }
568
+ }
569
+ break;
570
+ }
571
+ default:
572
+ raise_parse_error("unexpected token %s", state);
573
+ break;
574
+ }
575
+ } else {
576
+ raise_parse_error("unexpected token %s", state);
577
+ }
578
+ }
579
+
580
+ static inline void
581
+ json_eat_whitespace(JSON_ParserState *state)
582
+ {
583
+ while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
584
+ if (RB_LIKELY(*state->cursor != '/')) {
585
+ state->cursor++;
586
+ } else {
587
+ json_eat_comments(state);
588
+ }
589
+ }
590
+ }
591
+
592
+ static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
593
+ {
594
+ if (symbolize) {
595
+ intern = true;
596
+ }
597
+ VALUE result;
598
+ # ifdef HAVE_RB_ENC_INTERNED_STR
599
+ if (intern) {
600
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
601
+ } else {
602
+ result = rb_utf8_str_new(start, (long)(end - start));
603
+ }
604
+ # else
605
+ result = rb_utf8_str_new(start, (long)(end - start));
606
+ if (intern) {
607
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
608
+ }
609
+ # endif
610
+
611
+ if (symbolize) {
612
+ result = rb_str_intern(result);
613
+ }
614
+
615
+ return result;
616
+ }
617
+
618
+ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
619
+ {
620
+ size_t bufferSize = stringEnd - string;
621
+
622
+ if (is_name && state->in_array) {
623
+ VALUE cached_key;
624
+ if (RB_UNLIKELY(symbolize)) {
625
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
626
+ } else {
627
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
628
+ }
629
+
630
+ if (RB_LIKELY(cached_key)) {
631
+ return cached_key;
632
+ }
633
+ }
634
+
635
+ return build_string(string, stringEnd, intern, symbolize);
636
+ }
637
+
638
+ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
639
+ {
640
+ size_t bufferSize = stringEnd - string;
641
+ const char *p = string, *pe = string, *unescape, *bufferStart;
642
+ char *buffer;
643
+ int unescape_len;
644
+ char buf[4];
645
+
646
+ if (is_name && state->in_array) {
647
+ VALUE cached_key;
648
+ if (RB_UNLIKELY(symbolize)) {
649
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
650
+ } else {
651
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
652
+ }
653
+
654
+ if (RB_LIKELY(cached_key)) {
655
+ return cached_key;
656
+ }
657
+ }
658
+
659
+ VALUE result = rb_str_buf_new(bufferSize);
660
+ rb_enc_associate_index(result, utf8_encindex);
661
+ buffer = RSTRING_PTR(result);
662
+ bufferStart = buffer;
663
+
664
+ while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
665
+ unescape = (char *) "?";
666
+ unescape_len = 1;
667
+ if (pe > p) {
668
+ MEMCPY(buffer, p, char, pe - p);
669
+ buffer += pe - p;
670
+ }
671
+ switch (*++pe) {
672
+ case 'n':
673
+ unescape = (char *) "\n";
674
+ break;
675
+ case 'r':
676
+ unescape = (char *) "\r";
677
+ break;
678
+ case 't':
679
+ unescape = (char *) "\t";
680
+ break;
681
+ case '"':
682
+ unescape = (char *) "\"";
683
+ break;
684
+ case '\\':
685
+ unescape = (char *) "\\";
686
+ break;
687
+ case 'b':
688
+ unescape = (char *) "\b";
689
+ break;
690
+ case 'f':
691
+ unescape = (char *) "\f";
692
+ break;
693
+ case 'u':
694
+ if (pe > stringEnd - 5) {
695
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
696
+ } else {
697
+ uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
698
+ pe += 3;
699
+ /* To handle values above U+FFFF, we take a sequence of
700
+ * \uXXXX escapes in the U+D800..U+DBFF then
701
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
702
+ * to make a 20-bit number, then add 0x10000 to get the
703
+ * final codepoint.
704
+ *
705
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
706
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
707
+ * Area".
708
+ */
709
+ if ((ch & 0xFC00) == 0xD800) {
710
+ pe++;
711
+ if (pe > stringEnd - 6) {
712
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
713
+ }
714
+ if (pe[0] == '\\' && pe[1] == 'u') {
715
+ uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
716
+
717
+ if ((sur & 0xFC00) != 0xDC00) {
718
+ raise_parse_error_at("invalid surrogate pair at %s", state, p);
719
+ }
720
+
721
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
722
+ | (sur & 0x3FF));
723
+ pe += 5;
724
+ } else {
725
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
726
+ break;
727
+ }
728
+ }
729
+ unescape_len = convert_UTF32_to_UTF8(buf, ch);
730
+ unescape = buf;
731
+ }
732
+ break;
733
+ default:
734
+ p = pe;
735
+ continue;
736
+ }
737
+ MEMCPY(buffer, unescape, char, unescape_len);
738
+ buffer += unescape_len;
739
+ p = ++pe;
740
+ }
741
+
742
+ if (stringEnd > p) {
743
+ MEMCPY(buffer, p, char, stringEnd - p);
744
+ buffer += stringEnd - p;
745
+ }
746
+ rb_str_set_len(result, buffer - bufferStart);
747
+
748
+ if (symbolize) {
749
+ result = rb_str_intern(result);
750
+ } else if (intern) {
751
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
752
+ }
753
+
754
+ return result;
755
+ }
756
+
757
+ #define MAX_FAST_INTEGER_SIZE 18
758
+ static inline VALUE fast_decode_integer(const char *p, const char *pe)
759
+ {
760
+ bool negative = false;
761
+ if (*p == '-') {
762
+ negative = true;
763
+ p++;
764
+ }
765
+
766
+ long long memo = 0;
767
+ while (p < pe) {
768
+ memo *= 10;
769
+ memo += *p - '0';
770
+ p++;
771
+ }
772
+
773
+ if (negative) {
774
+ memo = -memo;
775
+ }
776
+ return LL2NUM(memo);
777
+ }
778
+
779
+ static VALUE json_decode_large_integer(const char *start, long len)
780
+ {
781
+ VALUE buffer_v;
782
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
783
+ MEMCPY(buffer, start, char, len);
784
+ buffer[len] = '\0';
785
+ VALUE number = rb_cstr2inum(buffer, 10);
786
+ RB_ALLOCV_END(buffer_v);
787
+ return number;
788
+ }
789
+
790
+ static inline VALUE
791
+ json_decode_integer(const char *start, const char *end)
792
+ {
793
+ long len = end - start;
794
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
795
+ return fast_decode_integer(start, end);
796
+ }
797
+ return json_decode_large_integer(start, len);
798
+ }
799
+
800
+ static VALUE json_decode_large_float(const char *start, long len)
801
+ {
802
+ VALUE buffer_v;
803
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
804
+ MEMCPY(buffer, start, char, len);
805
+ buffer[len] = '\0';
806
+ VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
807
+ RB_ALLOCV_END(buffer_v);
808
+ return number;
809
+ }
810
+
811
+ static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end)
812
+ {
813
+ long len = end - start;
814
+
815
+ if (RB_UNLIKELY(config->decimal_class)) {
816
+ VALUE text = rb_str_new(start, len);
817
+ return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
818
+ } else if (RB_LIKELY(len < 64)) {
819
+ char buffer[64];
820
+ MEMCPY(buffer, start, char, len);
821
+ buffer[len] = '\0';
822
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
823
+ } else {
824
+ return json_decode_large_float(start, len);
825
+ }
826
+ }
827
+
828
+ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
829
+ {
830
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
831
+ rvalue_stack_pop(state->stack, count);
832
+
833
+ if (config->freeze) {
834
+ RB_OBJ_FREEZE(array);
835
+ }
836
+
837
+ return array;
838
+ }
839
+
840
+ static VALUE json_find_duplicated_key(size_t count, const VALUE *pairs)
841
+ {
842
+ VALUE set = rb_hash_new_capa(count / 2);
843
+ for (size_t index = 0; index < count; index += 2) {
844
+ size_t before = RHASH_SIZE(set);
845
+ VALUE key = pairs[index];
846
+ rb_hash_aset(set, key, Qtrue);
847
+ if (RHASH_SIZE(set) == before) {
848
+ if (RB_SYMBOL_P(key)) {
849
+ return rb_sym2str(key);
850
+ }
851
+ return key;
852
+ }
853
+ }
854
+ return Qfalse;
855
+ }
856
+
857
+ static void emit_duplicate_key_warning(JSON_ParserState *state, VALUE duplicate_key)
858
+ {
859
+ VALUE message = rb_sprintf(
860
+ "detected duplicate key %"PRIsVALUE" in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`",
861
+ rb_inspect(duplicate_key)
862
+ );
863
+
864
+ emit_parse_warning(RSTRING_PTR(message), state);
865
+ RB_GC_GUARD(message);
866
+ }
867
+
868
+ #ifdef RBIMPL_ATTR_NORETURN
869
+ RBIMPL_ATTR_NORETURN()
870
+ #endif
871
+ static void raise_duplicate_key_error(JSON_ParserState *state, VALUE duplicate_key)
872
+ {
873
+ VALUE message = rb_sprintf(
874
+ "duplicate key %"PRIsVALUE,
875
+ rb_inspect(duplicate_key)
876
+ );
877
+
878
+ raise_parse_error(RSTRING_PTR(message), state);
879
+ RB_GC_GUARD(message);
880
+ }
881
+
882
+ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
883
+ {
884
+ size_t entries_count = count / 2;
885
+ VALUE object = rb_hash_new_capa(entries_count);
886
+ const VALUE *pairs = rvalue_stack_peek(state->stack, count);
887
+ rb_hash_bulk_insert(count, pairs, object);
888
+
889
+ if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
890
+ switch (config->on_duplicate_key) {
891
+ case JSON_IGNORE:
892
+ break;
893
+ case JSON_DEPRECATED:
894
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
895
+ break;
896
+ case JSON_RAISE:
897
+ raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
898
+ break;
899
+ }
900
+ }
901
+
902
+ rvalue_stack_pop(state->stack, count);
903
+
904
+ if (config->freeze) {
905
+ RB_OBJ_FREEZE(object);
906
+ }
907
+
908
+ return object;
909
+ }
910
+
911
+ static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
912
+ {
913
+ VALUE string;
914
+ bool intern = is_name || config->freeze;
915
+ bool symbolize = is_name && config->symbolize_names;
916
+ if (escaped) {
917
+ string = json_string_unescape(state, start, end, is_name, intern, symbolize);
918
+ } else {
919
+ string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
920
+ }
921
+
922
+ return string;
923
+ }
924
+
925
+ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
926
+ {
927
+ if (RB_UNLIKELY(config->on_load_proc)) {
928
+ value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
929
+ }
930
+ rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack);
931
+ return value;
932
+ }
933
+
934
+ static const bool string_scan_table[256] = {
935
+ // ASCII Control Characters
936
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
937
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
938
+ // ASCII Characters
939
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
940
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
941
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
942
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\'
943
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
944
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
945
+ };
946
+
947
+ #if (defined(__GNUC__ ) || defined(__clang__))
948
+ #define FORCE_INLINE __attribute__((always_inline))
949
+ #else
950
+ #define FORCE_INLINE
951
+ #endif
952
+
953
+ #ifdef HAVE_SIMD
954
+ static SIMD_Implementation simd_impl = SIMD_NONE;
955
+ #endif /* HAVE_SIMD */
956
+
957
+ static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
958
+ {
959
+ #ifdef HAVE_SIMD
960
+ #if defined(HAVE_SIMD_NEON)
961
+
962
+ uint64_t mask = 0;
963
+ if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
964
+ state->cursor += trailing_zeros64(mask) >> 2;
965
+ return 1;
966
+ }
967
+
968
+ #elif defined(HAVE_SIMD_SSE2)
969
+ if (simd_impl == SIMD_SSE2) {
970
+ int mask = 0;
971
+ if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
972
+ state->cursor += trailing_zeros(mask);
973
+ return 1;
974
+ }
975
+ }
976
+ #endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
977
+ #endif /* HAVE_SIMD */
978
+
979
+ while (state->cursor < state->end) {
980
+ if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
981
+ return 1;
982
+ }
983
+ state->cursor++;
984
+ }
985
+ return 0;
986
+ }
987
+
988
+ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
989
+ {
990
+ state->cursor++;
991
+ const char *start = state->cursor;
992
+ bool escaped = false;
993
+
994
+ while (RB_UNLIKELY(string_scan(state))) {
995
+ switch (*state->cursor) {
996
+ case '"': {
997
+ VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
998
+ state->cursor++;
999
+ return json_push_value(state, config, string);
1000
+ }
1001
+ case '\\': {
1002
+ state->cursor++;
1003
+ escaped = true;
1004
+ if ((unsigned char)*state->cursor < 0x20) {
1005
+ raise_parse_error("invalid ASCII control character in string: %s", state);
1006
+ }
1007
+ break;
1008
+ }
1009
+ default:
1010
+ raise_parse_error("invalid ASCII control character in string: %s", state);
1011
+ break;
1012
+ }
1013
+
1014
+ state->cursor++;
1015
+ }
1016
+
1017
+ raise_parse_error("unexpected end of input, expected closing \"", state);
1018
+ return Qfalse;
1019
+ }
1020
+
1021
+ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1022
+ {
1023
+ json_eat_whitespace(state);
1024
+ if (state->cursor >= state->end) {
1025
+ raise_parse_error("unexpected end of input", state);
1026
+ }
1027
+
1028
+ switch (*state->cursor) {
1029
+ case 'n':
1030
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
1031
+ state->cursor += 4;
1032
+ return json_push_value(state, config, Qnil);
1033
+ }
1034
+
1035
+ raise_parse_error("unexpected token %s", state);
1036
+ break;
1037
+ case 't':
1038
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
1039
+ state->cursor += 4;
1040
+ return json_push_value(state, config, Qtrue);
1041
+ }
1042
+
1043
+ raise_parse_error("unexpected token %s", state);
1044
+ break;
1045
+ case 'f':
1046
+ // Note: memcmp with a small power of two compile to an integer comparison
1047
+ if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
1048
+ state->cursor += 5;
1049
+ return json_push_value(state, config, Qfalse);
1050
+ }
1051
+
1052
+ raise_parse_error("unexpected token %s", state);
1053
+ break;
1054
+ case 'N':
1055
+ // Note: memcmp with a small power of two compile to an integer comparison
1056
+ if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
1057
+ state->cursor += 3;
1058
+ return json_push_value(state, config, CNaN);
1059
+ }
1060
+
1061
+ raise_parse_error("unexpected token %s", state);
1062
+ break;
1063
+ case 'I':
1064
+ if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
1065
+ state->cursor += 8;
1066
+ return json_push_value(state, config, CInfinity);
1067
+ }
1068
+
1069
+ raise_parse_error("unexpected token %s", state);
1070
+ break;
1071
+ case '-':
1072
+ // Note: memcmp with a small power of two compile to an integer comparison
1073
+ if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
1074
+ if (config->allow_nan) {
1075
+ state->cursor += 9;
1076
+ return json_push_value(state, config, CMinusInfinity);
1077
+ } else {
1078
+ raise_parse_error("unexpected token %s", state);
1079
+ }
1080
+ }
1081
+ // Fallthrough
1082
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
1083
+ bool integer = true;
1084
+
1085
+ // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
1086
+ const char *start = state->cursor;
1087
+ state->cursor++;
1088
+
1089
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1090
+ state->cursor++;
1091
+ }
1092
+
1093
+ long integer_length = state->cursor - start;
1094
+
1095
+ if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
1096
+ raise_parse_error_at("invalid number: %s", state, start);
1097
+ } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
1098
+ raise_parse_error_at("invalid number: %s", state, start);
1099
+ } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
1100
+ raise_parse_error_at("invalid number: %s", state, start);
1101
+ }
1102
+
1103
+ if ((state->cursor < state->end) && (*state->cursor == '.')) {
1104
+ integer = false;
1105
+ state->cursor++;
1106
+
1107
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
1108
+ raise_parse_error("invalid number: %s", state);
1109
+ }
1110
+
1111
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1112
+ state->cursor++;
1113
+ }
1114
+ }
1115
+
1116
+ if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
1117
+ integer = false;
1118
+ state->cursor++;
1119
+ if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
1120
+ state->cursor++;
1121
+ }
1122
+
1123
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
1124
+ raise_parse_error("invalid number: %s", state);
1125
+ }
1126
+
1127
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1128
+ state->cursor++;
1129
+ }
1130
+ }
1131
+
1132
+ if (integer) {
1133
+ return json_push_value(state, config, json_decode_integer(start, state->cursor));
1134
+ }
1135
+ return json_push_value(state, config, json_decode_float(config, start, state->cursor));
1136
+ }
1137
+ case '"': {
1138
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
1139
+ return json_parse_string(state, config, false);
1140
+ break;
1141
+ }
1142
+ case '[': {
1143
+ state->cursor++;
1144
+ json_eat_whitespace(state);
1145
+ long stack_head = state->stack->head;
1146
+
1147
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
1148
+ state->cursor++;
1149
+ return json_push_value(state, config, json_decode_array(state, config, 0));
1150
+ } else {
1151
+ state->current_nesting++;
1152
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1153
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1154
+ }
1155
+ state->in_array++;
1156
+ json_parse_any(state, config);
1157
+ }
1158
+
1159
+ while (true) {
1160
+ json_eat_whitespace(state);
1161
+
1162
+ if (state->cursor < state->end) {
1163
+ if (*state->cursor == ']') {
1164
+ state->cursor++;
1165
+ long count = state->stack->head - stack_head;
1166
+ state->current_nesting--;
1167
+ state->in_array--;
1168
+ return json_push_value(state, config, json_decode_array(state, config, count));
1169
+ }
1170
+
1171
+ if (*state->cursor == ',') {
1172
+ state->cursor++;
1173
+ if (config->allow_trailing_comma) {
1174
+ json_eat_whitespace(state);
1175
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
1176
+ continue;
1177
+ }
1178
+ }
1179
+ json_parse_any(state, config);
1180
+ continue;
1181
+ }
1182
+ }
1183
+
1184
+ raise_parse_error("expected ',' or ']' after array value", state);
1185
+ }
1186
+ break;
1187
+ }
1188
+ case '{': {
1189
+ const char *object_start_cursor = state->cursor;
1190
+
1191
+ state->cursor++;
1192
+ json_eat_whitespace(state);
1193
+ long stack_head = state->stack->head;
1194
+
1195
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1196
+ state->cursor++;
1197
+ return json_push_value(state, config, json_decode_object(state, config, 0));
1198
+ } else {
1199
+ state->current_nesting++;
1200
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1201
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1202
+ }
1203
+
1204
+ if (*state->cursor != '"') {
1205
+ raise_parse_error("expected object key, got %s", state);
1206
+ }
1207
+ json_parse_string(state, config, true);
1208
+
1209
+ json_eat_whitespace(state);
1210
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1211
+ raise_parse_error("expected ':' after object key", state);
1212
+ }
1213
+ state->cursor++;
1214
+
1215
+ json_parse_any(state, config);
1216
+ }
1217
+
1218
+ while (true) {
1219
+ json_eat_whitespace(state);
1220
+
1221
+ if (state->cursor < state->end) {
1222
+ if (*state->cursor == '}') {
1223
+ state->cursor++;
1224
+ state->current_nesting--;
1225
+ size_t count = state->stack->head - stack_head;
1226
+
1227
+ // Temporary rewind cursor in case an error is raised
1228
+ const char *final_cursor = state->cursor;
1229
+ state->cursor = object_start_cursor;
1230
+ VALUE object = json_decode_object(state, config, count);
1231
+ state->cursor = final_cursor;
1232
+
1233
+ return json_push_value(state, config, object);
1234
+ }
1235
+
1236
+ if (*state->cursor == ',') {
1237
+ state->cursor++;
1238
+ json_eat_whitespace(state);
1239
+
1240
+ if (config->allow_trailing_comma) {
1241
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1242
+ continue;
1243
+ }
1244
+ }
1245
+
1246
+ if (*state->cursor != '"') {
1247
+ raise_parse_error("expected object key, got: %s", state);
1248
+ }
1249
+ json_parse_string(state, config, true);
1250
+
1251
+ json_eat_whitespace(state);
1252
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1253
+ raise_parse_error("expected ':' after object key, got: %s", state);
1254
+ }
1255
+ state->cursor++;
1256
+
1257
+ json_parse_any(state, config);
1258
+
1259
+ continue;
1260
+ }
1261
+ }
1262
+
1263
+ raise_parse_error("expected ',' or '}' after object value, got: %s", state);
1264
+ }
1265
+ break;
1266
+ }
1267
+
1268
+ default:
1269
+ raise_parse_error("unexpected character: %s", state);
1270
+ break;
1271
+ }
1272
+
1273
+ raise_parse_error("unreachable: %s", state);
1274
+ }
1275
+
1276
+ static void json_ensure_eof(JSON_ParserState *state)
1277
+ {
1278
+ json_eat_whitespace(state);
1279
+ if (state->cursor != state->end) {
1280
+ raise_parse_error("unexpected token at end of stream %s", state);
1281
+ }
1282
+ }
1283
+
1284
+ /*
1285
+ * Document-class: JSON::Ext::Parser
1286
+ *
1287
+ * This is the JSON parser implemented as a C extension. It can be configured
1288
+ * to be used by setting
1289
+ *
1290
+ * JSON.parser = JSON::Ext::Parser
1291
+ *
1292
+ * with the method parser= in JSON.
1293
+ *
1294
+ */
1295
+
1296
+ static VALUE convert_encoding(VALUE source)
1297
+ {
1298
+ int encindex = RB_ENCODING_GET(source);
1299
+
1300
+ if (RB_LIKELY(encindex == utf8_encindex)) {
1301
+ return source;
1302
+ }
1303
+
1304
+ if (encindex == binary_encindex) {
1305
+ // For historical reason, we silently reinterpret binary strings as UTF-8
1306
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1307
+ }
1308
+
1309
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
1310
+ }
1311
+
1312
+ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1313
+ {
1314
+ JSON_ParserConfig *config = (JSON_ParserConfig *)data;
1315
+
1316
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1317
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1318
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1319
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1320
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
1321
+ else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1322
+ else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1323
+ else if (key == sym_decimal_class) {
1324
+ if (RTEST(val)) {
1325
+ if (rb_respond_to(val, i_try_convert)) {
1326
+ config->decimal_class = val;
1327
+ config->decimal_method_id = i_try_convert;
1328
+ } else if (rb_respond_to(val, i_new)) {
1329
+ config->decimal_class = val;
1330
+ config->decimal_method_id = i_new;
1331
+ } else if (RB_TYPE_P(val, T_CLASS)) {
1332
+ VALUE name = rb_class_name(val);
1333
+ const char *name_cstr = RSTRING_PTR(name);
1334
+ const char *last_colon = strrchr(name_cstr, ':');
1335
+ if (last_colon) {
1336
+ const char *mod_path_end = last_colon - 1;
1337
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1338
+ config->decimal_class = rb_path_to_class(mod_path);
1339
+
1340
+ const char *method_name_beg = last_colon + 1;
1341
+ long before_len = method_name_beg - name_cstr;
1342
+ long len = RSTRING_LEN(name) - before_len;
1343
+ VALUE method_name = rb_str_substr(name, before_len, len);
1344
+ config->decimal_method_id = SYM2ID(rb_str_intern(method_name));
1345
+ } else {
1346
+ config->decimal_class = rb_mKernel;
1347
+ config->decimal_method_id = SYM2ID(rb_str_intern(name));
1348
+ }
1349
+ }
1350
+ }
1351
+ }
1352
+
1353
+ return ST_CONTINUE;
1354
+ }
1355
+
1356
+ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
1357
+ {
1358
+ config->max_nesting = 100;
1359
+
1360
+ if (!NIL_P(opts)) {
1361
+ Check_Type(opts, T_HASH);
1362
+ if (RHASH_SIZE(opts) > 0) {
1363
+ // We assume in most cases few keys are set so it's faster to go over
1364
+ // the provided keys than to check all possible keys.
1365
+ rb_hash_foreach(opts, parser_config_init_i, (VALUE)config);
1366
+ }
1367
+
1368
+ }
1369
+ }
1370
+
1371
+ /*
1372
+ * call-seq: new(opts => {})
1373
+ *
1374
+ * Creates a new JSON::Ext::ParserConfig instance.
1375
+ *
1376
+ * It will be configured by the _opts_ hash. _opts_ can have the following
1377
+ * keys:
1378
+ *
1379
+ * _opts_ can have the following keys:
1380
+ * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
1381
+ * structures. Disable depth checking with :max_nesting => false|nil|0, it
1382
+ * defaults to 100.
1383
+ * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
1384
+ * defiance of RFC 4627 to be parsed by the Parser. This option defaults to
1385
+ * false.
1386
+ * * *symbolize_names*: If set to true, returns symbols for the names
1387
+ * (keys) in a JSON object. Otherwise strings are returned, which is
1388
+ * also the default. It's not possible to use this option in
1389
+ * conjunction with the *create_additions* option.
1390
+ * * *decimal_class*: Specifies which class to use instead of the default
1391
+ * (Float) when parsing decimal numbers. This class must accept a single
1392
+ * string argument in its constructor.
1393
+ */
1394
+ static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
1395
+ {
1396
+ GET_PARSER_CONFIG;
1397
+
1398
+ parser_config_init(config, opts);
1399
+
1400
+ RB_OBJ_WRITTEN(self, Qundef, config->decimal_class);
1401
+
1402
+ return self;
1403
+ }
1404
+
1405
+ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1406
+ {
1407
+ Vsource = convert_encoding(StringValue(Vsource));
1408
+ StringValue(Vsource);
1409
+
1410
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1411
+ rvalue_stack stack = {
1412
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1413
+ .ptr = rvalue_stack_buffer,
1414
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1415
+ };
1416
+
1417
+ long len;
1418
+ const char *start;
1419
+ RSTRING_GETMEM(Vsource, start, len);
1420
+
1421
+ JSON_ParserState _state = {
1422
+ .start = start,
1423
+ .cursor = start,
1424
+ .end = start + len,
1425
+ .stack = &stack,
1426
+ };
1427
+ JSON_ParserState *state = &_state;
1428
+
1429
+ VALUE result = json_parse_any(state, config);
1430
+
1431
+ // This may be skipped in case of exception, but
1432
+ // it won't cause a leak.
1433
+ rvalue_stack_eagerly_release(state->stack_handle);
1434
+
1435
+ json_ensure_eof(state);
1436
+
1437
+ return result;
1438
+ }
1439
+
1440
+ /*
1441
+ * call-seq: parse(source)
1442
+ *
1443
+ * Parses the current JSON text _source_ and returns the complete data
1444
+ * structure as a result.
1445
+ * It raises JSON::ParserError if fail to parse.
1446
+ */
1447
+ static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
1448
+ {
1449
+ GET_PARSER_CONFIG;
1450
+ return cParser_parse(config, Vsource);
1451
+ }
1452
+
1453
+ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
1454
+ {
1455
+ Vsource = convert_encoding(StringValue(Vsource));
1456
+ StringValue(Vsource);
1457
+
1458
+ JSON_ParserConfig _config = {0};
1459
+ JSON_ParserConfig *config = &_config;
1460
+ parser_config_init(config, opts);
1461
+
1462
+ return cParser_parse(config, Vsource);
1463
+ }
1464
+
1465
+ static void JSON_ParserConfig_mark(void *ptr)
1466
+ {
1467
+ JSON_ParserConfig *config = ptr;
1468
+ rb_gc_mark(config->on_load_proc);
1469
+ rb_gc_mark(config->decimal_class);
1470
+ }
1471
+
1472
+ static void JSON_ParserConfig_free(void *ptr)
1473
+ {
1474
+ JSON_ParserConfig *config = ptr;
1475
+ ruby_xfree(config);
1476
+ }
1477
+
1478
+ static size_t JSON_ParserConfig_memsize(const void *ptr)
1479
+ {
1480
+ return sizeof(JSON_ParserConfig);
1481
+ }
1482
+
1483
+ static const rb_data_type_t JSON_ParserConfig_type = {
1484
+ "JSON::Ext::Parser/ParserConfig",
1485
+ {
1486
+ JSON_ParserConfig_mark,
1487
+ JSON_ParserConfig_free,
1488
+ JSON_ParserConfig_memsize,
1489
+ },
1490
+ 0, 0,
1491
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
1492
+ };
1493
+
1494
+ static VALUE cJSON_parser_s_allocate(VALUE klass)
1495
+ {
1496
+ JSON_ParserConfig *config;
1497
+ return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
1498
+ }
1499
+
1500
+ void Init_parser(void)
1501
+ {
1502
+ #ifdef HAVE_RB_EXT_RACTOR_SAFE
1503
+ rb_ext_ractor_safe(true);
1504
+ #endif
1505
+
1506
+ #undef rb_intern
1507
+ rb_require("json/common");
1508
+ mJSON = rb_define_module("JSON");
1509
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
1510
+ VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
1511
+ eNestingError = rb_path2class("JSON::NestingError");
1512
+ rb_gc_register_mark_object(eNestingError);
1513
+ rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
1514
+ rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
1515
+ rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
1516
+
1517
+ VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
1518
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
1519
+
1520
+ CNaN = rb_const_get(mJSON, rb_intern("NaN"));
1521
+ rb_gc_register_mark_object(CNaN);
1522
+
1523
+ CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
1524
+ rb_gc_register_mark_object(CInfinity);
1525
+
1526
+ CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
1527
+ rb_gc_register_mark_object(CMinusInfinity);
1528
+
1529
+ rb_global_variable(&Encoding_UTF_8);
1530
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1531
+
1532
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1533
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1534
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1535
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1536
+ sym_freeze = ID2SYM(rb_intern("freeze"));
1537
+ sym_on_load = ID2SYM(rb_intern("on_load"));
1538
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1539
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
1540
+
1541
+ i_chr = rb_intern("chr");
1542
+ i_aset = rb_intern("[]=");
1543
+ i_aref = rb_intern("[]");
1544
+ i_leftshift = rb_intern("<<");
1545
+ i_new = rb_intern("new");
1546
+ i_try_convert = rb_intern("try_convert");
1547
+ i_uminus = rb_intern("-@");
1548
+ i_encode = rb_intern("encode");
1549
+
1550
+ binary_encindex = rb_ascii8bit_encindex();
1551
+ utf8_encindex = rb_utf8_encindex();
1552
+ enc_utf8 = rb_utf8_encoding();
1553
+
1554
+ #ifdef HAVE_SIMD
1555
+ simd_impl = find_simd_implementation();
1556
+ #endif
1557
+ }