json 2.7.3 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,48 +1,342 @@
1
- /* This file is automatically generated from parser.rl by using ragel */
2
- #line 1 "parser.rl"
3
- #include "../fbuffer/fbuffer.h"
4
- #include "parser.h"
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
5
3
 
6
- /* unicode */
4
+ /* shims */
5
+ /* This is the fallback definition from Ruby 3.4 */
7
6
 
8
- static const signed char digit_values[256] = {
9
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
10
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
11
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
12
- -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
13
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
14
- 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
15
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
16
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
17
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
18
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
19
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
20
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
21
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
22
- -1, -1, -1, -1, -1, -1, -1
7
+ #ifndef RBIMPL_STDBOOL_H
8
+ #if defined(__cplusplus)
9
+ # if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
10
+ # include <cstdbool>
11
+ # endif
12
+ #elif defined(HAVE_STDBOOL_H)
13
+ # include <stdbool.h>
14
+ #elif !defined(HAVE__BOOL)
15
+ typedef unsigned char _Bool;
16
+ # define bool _Bool
17
+ # define true ((_Bool)+1)
18
+ # define false ((_Bool)+0)
19
+ # define __bool_true_false_are_defined
20
+ #endif
21
+ #endif
22
+
23
+ #ifndef RB_UNLIKELY
24
+ #define RB_UNLIKELY(expr) expr
25
+ #endif
26
+
27
+ #ifndef RB_LIKELY
28
+ #define RB_LIKELY(expr) expr
29
+ #endif
30
+
31
+ static VALUE mJSON, eNestingError, Encoding_UTF_8;
32
+ static VALUE CNaN, CInfinity, CMinusInfinity;
33
+
34
+ static ID i_chr, i_aset, i_aref,
35
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
36
+
37
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
38
+ sym_decimal_class, sym_on_load;
39
+
40
+ static int binary_encindex;
41
+ static int utf8_encindex;
42
+
43
+ #ifndef HAVE_RB_HASH_BULK_INSERT
44
+ // For TruffleRuby
45
+ void
46
+ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
47
+ {
48
+ long index = 0;
49
+ while (index < count) {
50
+ VALUE name = pairs[index++];
51
+ VALUE value = pairs[index++];
52
+ rb_hash_aset(hash, name, value);
53
+ }
54
+ RB_GC_GUARD(hash);
55
+ }
56
+ #endif
57
+
58
+ #ifndef HAVE_RB_HASH_NEW_CAPA
59
+ #define rb_hash_new_capa(n) rb_hash_new()
60
+ #endif
61
+
62
+
63
+ /* name cache */
64
+
65
+ #include <string.h>
66
+ #include <ctype.h>
67
+
68
+ // Object names are likely to be repeated, and are frozen.
69
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
70
+ // and save much more expensive lookups into the global fstring table.
71
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
72
+ // to be able to fit safely on the stack.
73
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
74
+ // performance.
75
+ #define JSON_RVALUE_CACHE_CAPA 63
76
+ typedef struct rvalue_cache_struct {
77
+ int length;
78
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
79
+ } rvalue_cache;
80
+
81
+ static rb_encoding *enc_utf8;
82
+
83
+ #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
84
+
85
+ static inline VALUE build_interned_string(const char *str, const long length)
86
+ {
87
+ # ifdef HAVE_RB_ENC_INTERNED_STR
88
+ return rb_enc_interned_str(str, length, enc_utf8);
89
+ # else
90
+ VALUE rstring = rb_utf8_str_new(str, length);
91
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
92
+ # endif
93
+ }
94
+
95
+ static inline VALUE build_symbol(const char *str, const long length)
96
+ {
97
+ return rb_str_intern(build_interned_string(str, length));
98
+ }
99
+
100
+ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
101
+ {
102
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
103
+ cache->length++;
104
+ cache->entries[index] = rstring;
105
+ }
106
+
107
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
108
+ {
109
+ long rstring_length = RSTRING_LEN(rstring);
110
+ if (length == rstring_length) {
111
+ return memcmp(str, RSTRING_PTR(rstring), length);
112
+ } else {
113
+ return (int)(length - rstring_length);
114
+ }
115
+ }
116
+
117
+ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
118
+ {
119
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
120
+ // Common names aren't likely to be very long. So we just don't
121
+ // cache names above an arbitrary threshold.
122
+ return Qfalse;
123
+ }
124
+
125
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
126
+ // Simple heuristic, if the first character isn't a letter,
127
+ // we're much less likely to see this string again.
128
+ // We mostly want to cache strings that are likely to be repeated.
129
+ return Qfalse;
130
+ }
131
+
132
+ int low = 0;
133
+ int high = cache->length - 1;
134
+ int mid = 0;
135
+ int last_cmp = 0;
136
+
137
+ while (low <= high) {
138
+ mid = (high + low) >> 1;
139
+ VALUE entry = cache->entries[mid];
140
+ last_cmp = rstring_cache_cmp(str, length, entry);
141
+
142
+ if (last_cmp == 0) {
143
+ return entry;
144
+ } else if (last_cmp > 0) {
145
+ low = mid + 1;
146
+ } else {
147
+ high = mid - 1;
148
+ }
149
+ }
150
+
151
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
152
+ // We assume the overwhelming majority of names don't need to be escaped.
153
+ // But if they do, we have to fallback to the slow path.
154
+ return Qfalse;
155
+ }
156
+
157
+ VALUE rstring = build_interned_string(str, length);
158
+
159
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
160
+ if (last_cmp > 0) {
161
+ mid += 1;
162
+ }
163
+
164
+ rvalue_cache_insert_at(cache, mid, rstring);
165
+ }
166
+ return rstring;
167
+ }
168
+
169
+ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
170
+ {
171
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
172
+ // Common names aren't likely to be very long. So we just don't
173
+ // cache names above an arbitrary threshold.
174
+ return Qfalse;
175
+ }
176
+
177
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
178
+ // Simple heuristic, if the first character isn't a letter,
179
+ // we're much less likely to see this string again.
180
+ // We mostly want to cache strings that are likely to be repeated.
181
+ return Qfalse;
182
+ }
183
+
184
+ int low = 0;
185
+ int high = cache->length - 1;
186
+ int mid = 0;
187
+ int last_cmp = 0;
188
+
189
+ while (low <= high) {
190
+ mid = (high + low) >> 1;
191
+ VALUE entry = cache->entries[mid];
192
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
193
+
194
+ if (last_cmp == 0) {
195
+ return entry;
196
+ } else if (last_cmp > 0) {
197
+ low = mid + 1;
198
+ } else {
199
+ high = mid - 1;
200
+ }
201
+ }
202
+
203
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
204
+ // We assume the overwhelming majority of names don't need to be escaped.
205
+ // But if they do, we have to fallback to the slow path.
206
+ return Qfalse;
207
+ }
208
+
209
+ VALUE rsymbol = build_symbol(str, length);
210
+
211
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
212
+ if (last_cmp > 0) {
213
+ mid += 1;
214
+ }
215
+
216
+ rvalue_cache_insert_at(cache, mid, rsymbol);
217
+ }
218
+ return rsymbol;
219
+ }
220
+
221
+ /* rvalue stack */
222
+
223
+ #define RVALUE_STACK_INITIAL_CAPA 128
224
+
225
+ enum rvalue_stack_type {
226
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
227
+ RVALUE_STACK_STACK_ALLOCATED = 1,
23
228
  };
24
229
 
25
- static uint32_t unescape_unicode(const unsigned char *p)
230
+ typedef struct rvalue_stack_struct {
231
+ enum rvalue_stack_type type;
232
+ long capa;
233
+ long head;
234
+ VALUE *ptr;
235
+ } rvalue_stack;
236
+
237
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
238
+
239
+ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
26
240
  {
27
- const uint32_t replacement_char = 0xFFFD;
241
+ long required = stack->capa * 2;
28
242
 
29
- signed char b;
30
- uint32_t result = 0;
31
- b = digit_values[p[0]];
32
- if (b < 0) return replacement_char;
33
- result = (result << 4) | (unsigned char)b;
34
- b = digit_values[p[1]];
35
- if (b < 0) return replacement_char;
36
- result = (result << 4) | (unsigned char)b;
37
- b = digit_values[p[2]];
38
- if (b < 0) return replacement_char;
39
- result = (result << 4) | (unsigned char)b;
40
- b = digit_values[p[3]];
41
- if (b < 0) return replacement_char;
42
- result = (result << 4) | (unsigned char)b;
43
- return result;
243
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
244
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
245
+ } else {
246
+ REALLOC_N(stack->ptr, VALUE, required);
247
+ stack->capa = required;
248
+ }
249
+ return stack;
250
+ }
251
+
252
+ static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
253
+ {
254
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
255
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
256
+ }
257
+ stack->ptr[stack->head] = value;
258
+ stack->head++;
259
+ return value;
44
260
  }
45
261
 
262
+ static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
263
+ {
264
+ return stack->ptr + (stack->head - count);
265
+ }
266
+
267
+ static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
268
+ {
269
+ stack->head -= count;
270
+ }
271
+
272
+ static void rvalue_stack_mark(void *ptr)
273
+ {
274
+ rvalue_stack *stack = (rvalue_stack *)ptr;
275
+ long index;
276
+ for (index = 0; index < stack->head; index++) {
277
+ rb_gc_mark(stack->ptr[index]);
278
+ }
279
+ }
280
+
281
+ static void rvalue_stack_free(void *ptr)
282
+ {
283
+ rvalue_stack *stack = (rvalue_stack *)ptr;
284
+ if (stack) {
285
+ ruby_xfree(stack->ptr);
286
+ ruby_xfree(stack);
287
+ }
288
+ }
289
+
290
+ static size_t rvalue_stack_memsize(const void *ptr)
291
+ {
292
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
293
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
294
+ }
295
+
296
+ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
297
+ "JSON::Ext::Parser/rvalue_stack",
298
+ {
299
+ .dmark = rvalue_stack_mark,
300
+ .dfree = rvalue_stack_free,
301
+ .dsize = rvalue_stack_memsize,
302
+ },
303
+ 0, 0,
304
+ RUBY_TYPED_FREE_IMMEDIATELY,
305
+ };
306
+
307
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
308
+ {
309
+ rvalue_stack *stack;
310
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
311
+ *stack_ref = stack;
312
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
313
+
314
+ stack->capa = old_stack->capa << 1;
315
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
316
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
317
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
318
+ return stack;
319
+ }
320
+
321
+ static void rvalue_stack_eagerly_release(VALUE handle)
322
+ {
323
+ if (handle) {
324
+ rvalue_stack *stack;
325
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
326
+ RTYPEDDATA_DATA(handle) = NULL;
327
+ rvalue_stack_free(stack);
328
+ }
329
+ }
330
+
331
+
332
+ #ifndef HAVE_STRNLEN
333
+ static size_t strnlen(const char *s, size_t maxlen)
334
+ {
335
+ char *p;
336
+ return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
337
+ }
338
+ #endif
339
+
46
340
  static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
47
341
  {
48
342
  int len = 1;
@@ -69,1707 +363,774 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
69
363
  return len;
70
364
  }
71
365
 
366
+ typedef struct JSON_ParserStruct {
367
+ VALUE on_load_proc;
368
+ VALUE decimal_class;
369
+ ID decimal_method_id;
370
+ int max_nesting;
371
+ bool allow_nan;
372
+ bool allow_trailing_comma;
373
+ bool parsing_name;
374
+ bool symbolize_names;
375
+ bool freeze;
376
+ } JSON_ParserConfig;
377
+
378
+ typedef struct JSON_ParserStateStruct {
379
+ VALUE stack_handle;
380
+ const char *start;
381
+ const char *cursor;
382
+ const char *end;
383
+ rvalue_stack *stack;
384
+ rvalue_cache name_cache;
385
+ int in_array;
386
+ int current_nesting;
387
+ } JSON_ParserState;
388
+
389
+
72
390
  #define PARSE_ERROR_FRAGMENT_LEN 32
73
391
  #ifdef RBIMPL_ATTR_NORETURN
74
392
  RBIMPL_ATTR_NORETURN()
75
393
  #endif
76
- static void raise_parse_error(const char *format, const char *start)
394
+ static void raise_parse_error(const char *format, JSON_ParserState *state)
77
395
  {
78
- char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
396
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
79
397
 
80
- size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
81
- const char *ptr = start;
398
+ const char *cursor = state->cursor;
399
+ long column = 0;
400
+ long line = 1;
82
401
 
83
- if (len == PARSE_ERROR_FRAGMENT_LEN) {
84
- MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
85
- buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
86
- ptr = buffer;
402
+ while (cursor >= state->start) {
403
+ if (*cursor-- == '\n') {
404
+ break;
405
+ }
406
+ column++;
87
407
  }
88
408
 
89
- rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
90
- }
91
-
92
- static VALUE mJSON, mExt, cParser, eNestingError;
93
- static VALUE CNaN, CInfinity, CMinusInfinity;
409
+ while (cursor >= state->start) {
410
+ if (*cursor-- == '\n') {
411
+ line++;
412
+ }
413
+ }
94
414
 
95
- static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
96
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
97
- i_object_class, i_array_class, i_decimal_class,
98
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
99
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
415
+ const char *ptr = state->cursor;
416
+ size_t len = ptr ? strnlen(ptr, PARSE_ERROR_FRAGMENT_LEN) : 0;
100
417
 
101
- static int binary_encindex;
102
- static int utf8_encindex;
418
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
419
+ MEMCPY(buffer, ptr, char, PARSE_ERROR_FRAGMENT_LEN);
103
420
 
421
+ while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
422
+ len--;
423
+ }
104
424
 
425
+ if (buffer[len - 1] >= 0xC0) { // multibyte character start
426
+ len--;
427
+ }
105
428
 
106
- #line 129 "parser.rl"
429
+ buffer[len] = '\0';
430
+ ptr = (const char *)buffer;
431
+ }
107
432
 
433
+ VALUE msg = rb_sprintf(format, ptr);
434
+ VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
435
+ RB_GC_GUARD(msg);
108
436
 
437
+ VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
438
+ rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
439
+ rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
440
+ rb_exc_raise(exc);
441
+ }
109
442
 
110
- #line 111 "parser.c"
111
- enum {JSON_object_start = 1};
112
- enum {JSON_object_first_final = 27};
113
- enum {JSON_object_error = 0};
443
+ #ifdef RBIMPL_ATTR_NORETURN
444
+ RBIMPL_ATTR_NORETURN()
445
+ #endif
446
+ static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
447
+ {
448
+ state->cursor = at;
449
+ raise_parse_error(format, state);
450
+ }
114
451
 
115
- enum {JSON_object_en_main = 1};
452
+ /* unicode */
116
453
 
454
+ static const signed char digit_values[256] = {
455
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
456
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
457
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
458
+ -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
459
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
460
+ 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
461
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
462
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
463
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
464
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
465
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
466
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
467
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
468
+ -1, -1, -1, -1, -1, -1, -1
469
+ };
117
470
 
118
- #line 171 "parser.rl"
471
+ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
472
+ {
473
+ signed char b;
474
+ uint32_t result = 0;
475
+ b = digit_values[p[0]];
476
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
477
+ result = (result << 4) | (unsigned char)b;
478
+ b = digit_values[p[1]];
479
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
480
+ result = (result << 4) | (unsigned char)b;
481
+ b = digit_values[p[2]];
482
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
483
+ result = (result << 4) | (unsigned char)b;
484
+ b = digit_values[p[3]];
485
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
486
+ result = (result << 4) | (unsigned char)b;
487
+ return result;
488
+ }
119
489
 
490
+ #define GET_PARSER_CONFIG \
491
+ JSON_ParserConfig *config; \
492
+ TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
120
493
 
121
- static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
122
- {
123
- int cs = EVIL;
124
- VALUE last_name = Qnil;
125
- VALUE object_class = json->object_class;
494
+ static const rb_data_type_t JSON_ParserConfig_type;
126
495
 
127
- if (json->max_nesting && current_nesting > json->max_nesting) {
128
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
129
- }
496
+ static const bool whitespace[256] = {
497
+ [' '] = 1,
498
+ ['\t'] = 1,
499
+ ['\n'] = 1,
500
+ ['\r'] = 1,
501
+ ['/'] = 1,
502
+ };
130
503
 
131
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
132
-
133
-
134
- #line 135 "parser.c"
135
- {
136
- cs = JSON_object_start;
137
- }
138
-
139
- #line 186 "parser.rl"
140
-
141
- #line 142 "parser.c"
142
- {
143
- if ( p == pe )
144
- goto _test_eof;
145
- switch ( cs )
146
- {
147
- case 1:
148
- if ( (*p) == 123 )
149
- goto st2;
150
- goto st0;
151
- st0:
152
- cs = 0;
153
- goto _out;
154
- st2:
155
- if ( ++p == pe )
156
- goto _test_eof2;
157
- case 2:
158
- switch( (*p) ) {
159
- case 13: goto st2;
160
- case 32: goto st2;
161
- case 34: goto tr2;
162
- case 47: goto st23;
163
- case 125: goto tr4;
164
- }
165
- if ( 9 <= (*p) && (*p) <= 10 )
166
- goto st2;
167
- goto st0;
168
- tr2:
169
- #line 153 "parser.rl"
170
- {
171
- char *np;
172
- json->parsing_name = 1;
173
- np = JSON_parse_string(json, p, pe, &last_name);
174
- json->parsing_name = 0;
175
- if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;}
176
- }
177
- goto st3;
178
- st3:
179
- if ( ++p == pe )
180
- goto _test_eof3;
181
- case 3:
182
- #line 183 "parser.c"
183
- switch( (*p) ) {
184
- case 13: goto st3;
185
- case 32: goto st3;
186
- case 47: goto st4;
187
- case 58: goto st8;
188
- }
189
- if ( 9 <= (*p) && (*p) <= 10 )
190
- goto st3;
191
- goto st0;
192
- st4:
193
- if ( ++p == pe )
194
- goto _test_eof4;
195
- case 4:
196
- switch( (*p) ) {
197
- case 42: goto st5;
198
- case 47: goto st7;
199
- }
200
- goto st0;
201
- st5:
202
- if ( ++p == pe )
203
- goto _test_eof5;
204
- case 5:
205
- if ( (*p) == 42 )
206
- goto st6;
207
- goto st5;
208
- st6:
209
- if ( ++p == pe )
210
- goto _test_eof6;
211
- case 6:
212
- switch( (*p) ) {
213
- case 42: goto st6;
214
- case 47: goto st3;
215
- }
216
- goto st5;
217
- st7:
218
- if ( ++p == pe )
219
- goto _test_eof7;
220
- case 7:
221
- if ( (*p) == 10 )
222
- goto st3;
223
- goto st7;
224
- st8:
225
- if ( ++p == pe )
226
- goto _test_eof8;
227
- case 8:
228
- switch( (*p) ) {
229
- case 13: goto st8;
230
- case 32: goto st8;
231
- case 34: goto tr11;
232
- case 45: goto tr11;
233
- case 47: goto st19;
234
- case 73: goto tr11;
235
- case 78: goto tr11;
236
- case 91: goto tr11;
237
- case 102: goto tr11;
238
- case 110: goto tr11;
239
- case 116: goto tr11;
240
- case 123: goto tr11;
241
- }
242
- if ( (*p) > 10 ) {
243
- if ( 48 <= (*p) && (*p) <= 57 )
244
- goto tr11;
245
- } else if ( (*p) >= 9 )
246
- goto st8;
247
- goto st0;
248
- tr11:
249
- #line 137 "parser.rl"
250
- {
251
- VALUE v = Qnil;
252
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
253
- if (np == NULL) {
254
- p--; {p++; cs = 9; goto _out;}
255
- } else {
256
- if (NIL_P(json->object_class)) {
257
- OBJ_FREEZE(last_name);
258
- rb_hash_aset(*result, last_name, v);
259
- } else {
260
- rb_funcall(*result, i_aset, 2, last_name, v);
261
- }
262
- {p = (( np))-1;}
263
- }
264
- }
265
- goto st9;
266
- st9:
267
- if ( ++p == pe )
268
- goto _test_eof9;
269
- case 9:
270
- #line 271 "parser.c"
271
- switch( (*p) ) {
272
- case 13: goto st9;
273
- case 32: goto st9;
274
- case 44: goto st10;
275
- case 47: goto st15;
276
- case 125: goto tr4;
277
- }
278
- if ( 9 <= (*p) && (*p) <= 10 )
279
- goto st9;
280
- goto st0;
281
- st10:
282
- if ( ++p == pe )
283
- goto _test_eof10;
284
- case 10:
285
- switch( (*p) ) {
286
- case 13: goto st10;
287
- case 32: goto st10;
288
- case 34: goto tr2;
289
- case 47: goto st11;
290
- }
291
- if ( 9 <= (*p) && (*p) <= 10 )
292
- goto st10;
293
- goto st0;
294
- st11:
295
- if ( ++p == pe )
296
- goto _test_eof11;
297
- case 11:
298
- switch( (*p) ) {
299
- case 42: goto st12;
300
- case 47: goto st14;
301
- }
302
- goto st0;
303
- st12:
304
- if ( ++p == pe )
305
- goto _test_eof12;
306
- case 12:
307
- if ( (*p) == 42 )
308
- goto st13;
309
- goto st12;
310
- st13:
311
- if ( ++p == pe )
312
- goto _test_eof13;
313
- case 13:
314
- switch( (*p) ) {
315
- case 42: goto st13;
316
- case 47: goto st10;
317
- }
318
- goto st12;
319
- st14:
320
- if ( ++p == pe )
321
- goto _test_eof14;
322
- case 14:
323
- if ( (*p) == 10 )
324
- goto st10;
325
- goto st14;
326
- st15:
327
- if ( ++p == pe )
328
- goto _test_eof15;
329
- case 15:
330
- switch( (*p) ) {
331
- case 42: goto st16;
332
- case 47: goto st18;
333
- }
334
- goto st0;
335
- st16:
336
- if ( ++p == pe )
337
- goto _test_eof16;
338
- case 16:
339
- if ( (*p) == 42 )
340
- goto st17;
341
- goto st16;
342
- st17:
343
- if ( ++p == pe )
344
- goto _test_eof17;
345
- case 17:
346
- switch( (*p) ) {
347
- case 42: goto st17;
348
- case 47: goto st9;
349
- }
350
- goto st16;
351
- st18:
352
- if ( ++p == pe )
353
- goto _test_eof18;
354
- case 18:
355
- if ( (*p) == 10 )
356
- goto st9;
357
- goto st18;
358
- tr4:
359
- #line 161 "parser.rl"
360
- { p--; {p++; cs = 27; goto _out;} }
361
- goto st27;
362
- st27:
363
- if ( ++p == pe )
364
- goto _test_eof27;
365
- case 27:
366
- #line 367 "parser.c"
367
- goto st0;
368
- st19:
369
- if ( ++p == pe )
370
- goto _test_eof19;
371
- case 19:
372
- switch( (*p) ) {
373
- case 42: goto st20;
374
- case 47: goto st22;
375
- }
376
- goto st0;
377
- st20:
378
- if ( ++p == pe )
379
- goto _test_eof20;
380
- case 20:
381
- if ( (*p) == 42 )
382
- goto st21;
383
- goto st20;
384
- st21:
385
- if ( ++p == pe )
386
- goto _test_eof21;
387
- case 21:
388
- switch( (*p) ) {
389
- case 42: goto st21;
390
- case 47: goto st8;
391
- }
392
- goto st20;
393
- st22:
394
- if ( ++p == pe )
395
- goto _test_eof22;
396
- case 22:
397
- if ( (*p) == 10 )
398
- goto st8;
399
- goto st22;
400
- st23:
401
- if ( ++p == pe )
402
- goto _test_eof23;
403
- case 23:
404
- switch( (*p) ) {
405
- case 42: goto st24;
406
- case 47: goto st26;
407
- }
408
- goto st0;
409
- st24:
410
- if ( ++p == pe )
411
- goto _test_eof24;
412
- case 24:
413
- if ( (*p) == 42 )
414
- goto st25;
415
- goto st24;
416
- st25:
417
- if ( ++p == pe )
418
- goto _test_eof25;
419
- case 25:
420
- switch( (*p) ) {
421
- case 42: goto st25;
422
- case 47: goto st2;
423
- }
424
- goto st24;
425
- st26:
426
- if ( ++p == pe )
427
- goto _test_eof26;
428
- case 26:
429
- if ( (*p) == 10 )
430
- goto st2;
431
- goto st26;
432
- }
433
- _test_eof2: cs = 2; goto _test_eof;
434
- _test_eof3: cs = 3; goto _test_eof;
435
- _test_eof4: cs = 4; goto _test_eof;
436
- _test_eof5: cs = 5; goto _test_eof;
437
- _test_eof6: cs = 6; goto _test_eof;
438
- _test_eof7: cs = 7; goto _test_eof;
439
- _test_eof8: cs = 8; goto _test_eof;
440
- _test_eof9: cs = 9; goto _test_eof;
441
- _test_eof10: cs = 10; goto _test_eof;
442
- _test_eof11: cs = 11; goto _test_eof;
443
- _test_eof12: cs = 12; goto _test_eof;
444
- _test_eof13: cs = 13; goto _test_eof;
445
- _test_eof14: cs = 14; goto _test_eof;
446
- _test_eof15: cs = 15; goto _test_eof;
447
- _test_eof16: cs = 16; goto _test_eof;
448
- _test_eof17: cs = 17; goto _test_eof;
449
- _test_eof18: cs = 18; goto _test_eof;
450
- _test_eof27: cs = 27; goto _test_eof;
451
- _test_eof19: cs = 19; goto _test_eof;
452
- _test_eof20: cs = 20; goto _test_eof;
453
- _test_eof21: cs = 21; goto _test_eof;
454
- _test_eof22: cs = 22; goto _test_eof;
455
- _test_eof23: cs = 23; goto _test_eof;
456
- _test_eof24: cs = 24; goto _test_eof;
457
- _test_eof25: cs = 25; goto _test_eof;
458
- _test_eof26: cs = 26; goto _test_eof;
459
-
460
- _test_eof: {}
461
- _out: {}
462
- }
463
-
464
- #line 187 "parser.rl"
465
-
466
- if (cs >= JSON_object_first_final) {
467
- if (json->create_additions) {
468
- VALUE klassname;
469
- if (NIL_P(json->object_class)) {
470
- klassname = rb_hash_aref(*result, json->create_id);
471
- } else {
472
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
504
+ static void
505
+ json_eat_comments(JSON_ParserState *state)
506
+ {
507
+ if (state->cursor + 1 < state->end) {
508
+ switch(state->cursor[1]) {
509
+ case '/': {
510
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
511
+ if (!state->cursor) {
512
+ state->cursor = state->end;
513
+ } else {
514
+ state->cursor++;
515
+ }
516
+ break;
473
517
  }
474
- if (!NIL_P(klassname)) {
475
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
476
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
477
- *result = rb_funcall(klass, i_json_create, 1, *result);
518
+ case '*': {
519
+ state->cursor += 2;
520
+ while (true) {
521
+ state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
522
+ if (!state->cursor) {
523
+ raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
524
+ } else {
525
+ state->cursor++;
526
+ if (state->cursor < state->end && *state->cursor == '/') {
527
+ state->cursor++;
528
+ break;
529
+ }
530
+ }
478
531
  }
532
+ break;
479
533
  }
534
+ default:
535
+ raise_parse_error("unexpected token '%s'", state);
536
+ break;
480
537
  }
481
- return p + 1;
482
538
  } else {
483
- return NULL;
539
+ raise_parse_error("unexpected token '%s'", state);
484
540
  }
485
541
  }
486
542
 
543
+ static inline void
544
+ json_eat_whitespace(JSON_ParserState *state)
545
+ {
546
+ while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
547
+ if (RB_LIKELY(*state->cursor != '/')) {
548
+ state->cursor++;
549
+ } else {
550
+ json_eat_comments(state);
551
+ }
552
+ }
553
+ }
487
554
 
555
+ static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
556
+ {
557
+ if (symbolize) {
558
+ intern = true;
559
+ }
560
+ VALUE result;
561
+ # ifdef HAVE_RB_ENC_INTERNED_STR
562
+ if (intern) {
563
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
564
+ } else {
565
+ result = rb_utf8_str_new(start, (long)(end - start));
566
+ }
567
+ # else
568
+ result = rb_utf8_str_new(start, (long)(end - start));
569
+ if (intern) {
570
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
571
+ }
572
+ # endif
488
573
 
489
- #line 490 "parser.c"
490
- enum {JSON_value_start = 1};
491
- enum {JSON_value_first_final = 29};
492
- enum {JSON_value_error = 0};
493
-
494
- enum {JSON_value_en_main = 1};
574
+ if (symbolize) {
575
+ result = rb_str_intern(result);
576
+ }
495
577
 
578
+ return result;
579
+ }
496
580
 
497
- #line 287 "parser.rl"
581
+ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
582
+ {
583
+ size_t bufferSize = stringEnd - string;
498
584
 
585
+ if (is_name && state->in_array) {
586
+ VALUE cached_key;
587
+ if (RB_UNLIKELY(symbolize)) {
588
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
589
+ } else {
590
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
591
+ }
499
592
 
500
- static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
501
- {
502
- int cs = EVIL;
503
-
504
-
505
- #line 506 "parser.c"
506
- {
507
- cs = JSON_value_start;
508
- }
509
-
510
- #line 294 "parser.rl"
511
-
512
- #line 513 "parser.c"
513
- {
514
- if ( p == pe )
515
- goto _test_eof;
516
- switch ( cs )
517
- {
518
- st1:
519
- if ( ++p == pe )
520
- goto _test_eof1;
521
- case 1:
522
- switch( (*p) ) {
523
- case 13: goto st1;
524
- case 32: goto st1;
525
- case 34: goto tr2;
526
- case 45: goto tr3;
527
- case 47: goto st6;
528
- case 73: goto st10;
529
- case 78: goto st17;
530
- case 91: goto tr7;
531
- case 102: goto st19;
532
- case 110: goto st23;
533
- case 116: goto st26;
534
- case 123: goto tr11;
535
- }
536
- if ( (*p) > 10 ) {
537
- if ( 48 <= (*p) && (*p) <= 57 )
538
- goto tr3;
539
- } else if ( (*p) >= 9 )
540
- goto st1;
541
- goto st0;
542
- st0:
543
- cs = 0;
544
- goto _out;
545
- tr2:
546
- #line 239 "parser.rl"
547
- {
548
- char *np = JSON_parse_string(json, p, pe, result);
549
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
550
- }
551
- goto st29;
552
- tr3:
553
- #line 244 "parser.rl"
554
- {
555
- char *np;
556
- if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
557
- if (json->allow_nan) {
558
- *result = CMinusInfinity;
559
- {p = (( p + 10))-1;}
560
- p--; {p++; cs = 29; goto _out;}
561
- } else {
562
- raise_parse_error("unexpected token at '%s'", p);
563
- }
593
+ if (RB_LIKELY(cached_key)) {
594
+ return cached_key;
564
595
  }
565
- np = JSON_parse_float(json, p, pe, result);
566
- if (np != NULL) {p = (( np))-1;}
567
- np = JSON_parse_integer(json, p, pe, result);
568
- if (np != NULL) {p = (( np))-1;}
569
- p--; {p++; cs = 29; goto _out;}
570
- }
571
- goto st29;
572
- tr7:
573
- #line 262 "parser.rl"
574
- {
575
- char *np;
576
- np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
577
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
578
596
  }
579
- goto st29;
580
- tr11:
581
- #line 268 "parser.rl"
582
- {
583
- char *np;
584
- np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
585
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
586
- }
587
- goto st29;
588
- tr25:
589
- #line 232 "parser.rl"
590
- {
591
- if (json->allow_nan) {
592
- *result = CInfinity;
597
+
598
+ return build_string(string, stringEnd, intern, symbolize);
599
+ }
600
+
601
+ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
602
+ {
603
+ size_t bufferSize = stringEnd - string;
604
+ const char *p = string, *pe = string, *unescape, *bufferStart;
605
+ char *buffer;
606
+ int unescape_len;
607
+ char buf[4];
608
+
609
+ if (is_name && state->in_array) {
610
+ VALUE cached_key;
611
+ if (RB_UNLIKELY(symbolize)) {
612
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
593
613
  } else {
594
- raise_parse_error("unexpected token at '%s'", p - 7);
614
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
615
+ }
616
+
617
+ if (RB_LIKELY(cached_key)) {
618
+ return cached_key;
595
619
  }
596
620
  }
597
- goto st29;
598
- tr27:
599
- #line 225 "parser.rl"
600
- {
601
- if (json->allow_nan) {
602
- *result = CNaN;
603
- } else {
604
- raise_parse_error("unexpected token at '%s'", p - 2);
621
+
622
+ VALUE result = rb_str_buf_new(bufferSize);
623
+ rb_enc_associate_index(result, utf8_encindex);
624
+ buffer = RSTRING_PTR(result);
625
+ bufferStart = buffer;
626
+
627
+ while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
628
+ unescape = (char *) "?";
629
+ unescape_len = 1;
630
+ if (pe > p) {
631
+ MEMCPY(buffer, p, char, pe - p);
632
+ buffer += pe - p;
605
633
  }
634
+ switch (*++pe) {
635
+ case 'n':
636
+ unescape = (char *) "\n";
637
+ break;
638
+ case 'r':
639
+ unescape = (char *) "\r";
640
+ break;
641
+ case 't':
642
+ unescape = (char *) "\t";
643
+ break;
644
+ case '"':
645
+ unescape = (char *) "\"";
646
+ break;
647
+ case '\\':
648
+ unescape = (char *) "\\";
649
+ break;
650
+ case 'b':
651
+ unescape = (char *) "\b";
652
+ break;
653
+ case 'f':
654
+ unescape = (char *) "\f";
655
+ break;
656
+ case 'u':
657
+ if (pe > stringEnd - 5) {
658
+ raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, p);
659
+ } else {
660
+ uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
661
+ pe += 3;
662
+ /* To handle values above U+FFFF, we take a sequence of
663
+ * \uXXXX escapes in the U+D800..U+DBFF then
664
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
665
+ * to make a 20-bit number, then add 0x10000 to get the
666
+ * final codepoint.
667
+ *
668
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
669
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
670
+ * Area".
671
+ */
672
+ if ((ch & 0xFC00) == 0xD800) {
673
+ pe++;
674
+ if (pe > stringEnd - 6) {
675
+ raise_parse_error_at("incomplete surrogate pair at '%s'", state, p);
676
+ }
677
+ if (pe[0] == '\\' && pe[1] == 'u') {
678
+ uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
679
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
680
+ | (sur & 0x3FF));
681
+ pe += 5;
682
+ } else {
683
+ unescape = (char *) "?";
684
+ break;
685
+ }
686
+ }
687
+ unescape_len = convert_UTF32_to_UTF8(buf, ch);
688
+ unescape = buf;
689
+ }
690
+ break;
691
+ default:
692
+ p = pe;
693
+ continue;
694
+ }
695
+ MEMCPY(buffer, unescape, char, unescape_len);
696
+ buffer += unescape_len;
697
+ p = ++pe;
698
+ }
699
+
700
+ if (stringEnd > p) {
701
+ MEMCPY(buffer, p, char, stringEnd - p);
702
+ buffer += stringEnd - p;
606
703
  }
607
- goto st29;
608
- tr31:
609
- #line 219 "parser.rl"
610
- {
611
- *result = Qfalse;
704
+ rb_str_set_len(result, buffer - bufferStart);
705
+
706
+ if (symbolize) {
707
+ result = rb_str_intern(result);
708
+ } else if (intern) {
709
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
612
710
  }
613
- goto st29;
614
- tr34:
615
- #line 216 "parser.rl"
616
- {
617
- *result = Qnil;
711
+
712
+ return result;
713
+ }
714
+
715
+ #define MAX_FAST_INTEGER_SIZE 18
716
+ static inline VALUE fast_decode_integer(const char *p, const char *pe)
717
+ {
718
+ bool negative = false;
719
+ if (*p == '-') {
720
+ negative = true;
721
+ p++;
618
722
  }
619
- goto st29;
620
- tr37:
621
- #line 222 "parser.rl"
622
- {
623
- *result = Qtrue;
723
+
724
+ long long memo = 0;
725
+ while (p < pe) {
726
+ memo *= 10;
727
+ memo += *p - '0';
728
+ p++;
624
729
  }
625
- goto st29;
626
- st29:
627
- if ( ++p == pe )
628
- goto _test_eof29;
629
- case 29:
630
- #line 274 "parser.rl"
631
- { p--; {p++; cs = 29; goto _out;} }
632
- #line 633 "parser.c"
633
- switch( (*p) ) {
634
- case 13: goto st29;
635
- case 32: goto st29;
636
- case 47: goto st2;
637
- }
638
- if ( 9 <= (*p) && (*p) <= 10 )
639
- goto st29;
640
- goto st0;
641
- st2:
642
- if ( ++p == pe )
643
- goto _test_eof2;
644
- case 2:
645
- switch( (*p) ) {
646
- case 42: goto st3;
647
- case 47: goto st5;
648
- }
649
- goto st0;
650
- st3:
651
- if ( ++p == pe )
652
- goto _test_eof3;
653
- case 3:
654
- if ( (*p) == 42 )
655
- goto st4;
656
- goto st3;
657
- st4:
658
- if ( ++p == pe )
659
- goto _test_eof4;
660
- case 4:
661
- switch( (*p) ) {
662
- case 42: goto st4;
663
- case 47: goto st29;
664
- }
665
- goto st3;
666
- st5:
667
- if ( ++p == pe )
668
- goto _test_eof5;
669
- case 5:
670
- if ( (*p) == 10 )
671
- goto st29;
672
- goto st5;
673
- st6:
674
- if ( ++p == pe )
675
- goto _test_eof6;
676
- case 6:
677
- switch( (*p) ) {
678
- case 42: goto st7;
679
- case 47: goto st9;
680
- }
681
- goto st0;
682
- st7:
683
- if ( ++p == pe )
684
- goto _test_eof7;
685
- case 7:
686
- if ( (*p) == 42 )
687
- goto st8;
688
- goto st7;
689
- st8:
690
- if ( ++p == pe )
691
- goto _test_eof8;
692
- case 8:
693
- switch( (*p) ) {
694
- case 42: goto st8;
695
- case 47: goto st1;
696
- }
697
- goto st7;
698
- st9:
699
- if ( ++p == pe )
700
- goto _test_eof9;
701
- case 9:
702
- if ( (*p) == 10 )
703
- goto st1;
704
- goto st9;
705
- st10:
706
- if ( ++p == pe )
707
- goto _test_eof10;
708
- case 10:
709
- if ( (*p) == 110 )
710
- goto st11;
711
- goto st0;
712
- st11:
713
- if ( ++p == pe )
714
- goto _test_eof11;
715
- case 11:
716
- if ( (*p) == 102 )
717
- goto st12;
718
- goto st0;
719
- st12:
720
- if ( ++p == pe )
721
- goto _test_eof12;
722
- case 12:
723
- if ( (*p) == 105 )
724
- goto st13;
725
- goto st0;
726
- st13:
727
- if ( ++p == pe )
728
- goto _test_eof13;
729
- case 13:
730
- if ( (*p) == 110 )
731
- goto st14;
732
- goto st0;
733
- st14:
734
- if ( ++p == pe )
735
- goto _test_eof14;
736
- case 14:
737
- if ( (*p) == 105 )
738
- goto st15;
739
- goto st0;
740
- st15:
741
- if ( ++p == pe )
742
- goto _test_eof15;
743
- case 15:
744
- if ( (*p) == 116 )
745
- goto st16;
746
- goto st0;
747
- st16:
748
- if ( ++p == pe )
749
- goto _test_eof16;
750
- case 16:
751
- if ( (*p) == 121 )
752
- goto tr25;
753
- goto st0;
754
- st17:
755
- if ( ++p == pe )
756
- goto _test_eof17;
757
- case 17:
758
- if ( (*p) == 97 )
759
- goto st18;
760
- goto st0;
761
- st18:
762
- if ( ++p == pe )
763
- goto _test_eof18;
764
- case 18:
765
- if ( (*p) == 78 )
766
- goto tr27;
767
- goto st0;
768
- st19:
769
- if ( ++p == pe )
770
- goto _test_eof19;
771
- case 19:
772
- if ( (*p) == 97 )
773
- goto st20;
774
- goto st0;
775
- st20:
776
- if ( ++p == pe )
777
- goto _test_eof20;
778
- case 20:
779
- if ( (*p) == 108 )
780
- goto st21;
781
- goto st0;
782
- st21:
783
- if ( ++p == pe )
784
- goto _test_eof21;
785
- case 21:
786
- if ( (*p) == 115 )
787
- goto st22;
788
- goto st0;
789
- st22:
790
- if ( ++p == pe )
791
- goto _test_eof22;
792
- case 22:
793
- if ( (*p) == 101 )
794
- goto tr31;
795
- goto st0;
796
- st23:
797
- if ( ++p == pe )
798
- goto _test_eof23;
799
- case 23:
800
- if ( (*p) == 117 )
801
- goto st24;
802
- goto st0;
803
- st24:
804
- if ( ++p == pe )
805
- goto _test_eof24;
806
- case 24:
807
- if ( (*p) == 108 )
808
- goto st25;
809
- goto st0;
810
- st25:
811
- if ( ++p == pe )
812
- goto _test_eof25;
813
- case 25:
814
- if ( (*p) == 108 )
815
- goto tr34;
816
- goto st0;
817
- st26:
818
- if ( ++p == pe )
819
- goto _test_eof26;
820
- case 26:
821
- if ( (*p) == 114 )
822
- goto st27;
823
- goto st0;
824
- st27:
825
- if ( ++p == pe )
826
- goto _test_eof27;
827
- case 27:
828
- if ( (*p) == 117 )
829
- goto st28;
830
- goto st0;
831
- st28:
832
- if ( ++p == pe )
833
- goto _test_eof28;
834
- case 28:
835
- if ( (*p) == 101 )
836
- goto tr37;
837
- goto st0;
838
- }
839
- _test_eof1: cs = 1; goto _test_eof;
840
- _test_eof29: cs = 29; goto _test_eof;
841
- _test_eof2: cs = 2; goto _test_eof;
842
- _test_eof3: cs = 3; goto _test_eof;
843
- _test_eof4: cs = 4; goto _test_eof;
844
- _test_eof5: cs = 5; goto _test_eof;
845
- _test_eof6: cs = 6; goto _test_eof;
846
- _test_eof7: cs = 7; goto _test_eof;
847
- _test_eof8: cs = 8; goto _test_eof;
848
- _test_eof9: cs = 9; goto _test_eof;
849
- _test_eof10: cs = 10; goto _test_eof;
850
- _test_eof11: cs = 11; goto _test_eof;
851
- _test_eof12: cs = 12; goto _test_eof;
852
- _test_eof13: cs = 13; goto _test_eof;
853
- _test_eof14: cs = 14; goto _test_eof;
854
- _test_eof15: cs = 15; goto _test_eof;
855
- _test_eof16: cs = 16; goto _test_eof;
856
- _test_eof17: cs = 17; goto _test_eof;
857
- _test_eof18: cs = 18; goto _test_eof;
858
- _test_eof19: cs = 19; goto _test_eof;
859
- _test_eof20: cs = 20; goto _test_eof;
860
- _test_eof21: cs = 21; goto _test_eof;
861
- _test_eof22: cs = 22; goto _test_eof;
862
- _test_eof23: cs = 23; goto _test_eof;
863
- _test_eof24: cs = 24; goto _test_eof;
864
- _test_eof25: cs = 25; goto _test_eof;
865
- _test_eof26: cs = 26; goto _test_eof;
866
- _test_eof27: cs = 27; goto _test_eof;
867
- _test_eof28: cs = 28; goto _test_eof;
868
-
869
- _test_eof: {}
870
- _out: {}
871
- }
872
-
873
- #line 295 "parser.rl"
874
-
875
- if (json->freeze) {
876
- OBJ_FREEZE(*result);
730
+
731
+ if (negative) {
732
+ memo = -memo;
877
733
  }
734
+ return LL2NUM(memo);
735
+ }
736
+
737
+ static VALUE json_decode_large_integer(const char *start, long len)
738
+ {
739
+ VALUE buffer_v;
740
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
741
+ MEMCPY(buffer, start, char, len);
742
+ buffer[len] = '\0';
743
+ VALUE number = rb_cstr2inum(buffer, 10);
744
+ RB_ALLOCV_END(buffer_v);
745
+ return number;
746
+ }
747
+
748
+ static inline VALUE
749
+ json_decode_integer(const char *start, const char *end)
750
+ {
751
+ long len = end - start;
752
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
753
+ return fast_decode_integer(start, end);
754
+ }
755
+ return json_decode_large_integer(start, len);
756
+ }
757
+
758
+ static VALUE json_decode_large_float(const char *start, long len)
759
+ {
760
+ VALUE buffer_v;
761
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
762
+ MEMCPY(buffer, start, char, len);
763
+ buffer[len] = '\0';
764
+ VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
765
+ RB_ALLOCV_END(buffer_v);
766
+ return number;
767
+ }
878
768
 
879
- if (cs >= JSON_value_first_final) {
880
- return p;
769
+ static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end)
770
+ {
771
+ long len = end - start;
772
+
773
+ if (RB_UNLIKELY(config->decimal_class)) {
774
+ VALUE text = rb_str_new(start, len);
775
+ return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
776
+ } else if (RB_LIKELY(len < 64)) {
777
+ char buffer[64];
778
+ MEMCPY(buffer, start, char, len);
779
+ buffer[len] = '\0';
780
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
881
781
  } else {
882
- return NULL;
782
+ return json_decode_large_float(start, len);
883
783
  }
884
784
  }
885
785
 
786
+ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
787
+ {
788
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
789
+ rvalue_stack_pop(state->stack, count);
790
+
791
+ if (config->freeze) {
792
+ RB_OBJ_FREEZE(array);
793
+ }
886
794
 
887
- #line 888 "parser.c"
888
- enum {JSON_integer_start = 1};
889
- enum {JSON_integer_first_final = 3};
890
- enum {JSON_integer_error = 0};
795
+ return array;
796
+ }
891
797
 
892
- enum {JSON_integer_en_main = 1};
798
+ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, long count)
799
+ {
800
+ VALUE object = rb_hash_new_capa(count);
801
+ rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
893
802
 
803
+ rvalue_stack_pop(state->stack, count);
894
804
 
895
- #line 315 "parser.rl"
805
+ if (config->freeze) {
806
+ RB_OBJ_FREEZE(object);
807
+ }
896
808
 
809
+ return object;
810
+ }
897
811
 
898
- static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
812
+ static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
899
813
  {
900
- int cs = EVIL;
901
-
902
-
903
- #line 904 "parser.c"
904
- {
905
- cs = JSON_integer_start;
906
- }
907
-
908
- #line 322 "parser.rl"
909
- json->memo = p;
910
-
911
- #line 912 "parser.c"
912
- {
913
- if ( p == pe )
914
- goto _test_eof;
915
- switch ( cs )
916
- {
917
- case 1:
918
- switch( (*p) ) {
919
- case 45: goto st2;
920
- case 48: goto st3;
921
- }
922
- if ( 49 <= (*p) && (*p) <= 57 )
923
- goto st5;
924
- goto st0;
925
- st0:
926
- cs = 0;
927
- goto _out;
928
- st2:
929
- if ( ++p == pe )
930
- goto _test_eof2;
931
- case 2:
932
- if ( (*p) == 48 )
933
- goto st3;
934
- if ( 49 <= (*p) && (*p) <= 57 )
935
- goto st5;
936
- goto st0;
937
- st3:
938
- if ( ++p == pe )
939
- goto _test_eof3;
940
- case 3:
941
- if ( 48 <= (*p) && (*p) <= 57 )
942
- goto st0;
943
- goto tr4;
944
- tr4:
945
- #line 312 "parser.rl"
946
- { p--; {p++; cs = 4; goto _out;} }
947
- goto st4;
948
- st4:
949
- if ( ++p == pe )
950
- goto _test_eof4;
951
- case 4:
952
- #line 953 "parser.c"
953
- goto st0;
954
- st5:
955
- if ( ++p == pe )
956
- goto _test_eof5;
957
- case 5:
958
- if ( 48 <= (*p) && (*p) <= 57 )
959
- goto st5;
960
- goto tr4;
961
- }
962
- _test_eof2: cs = 2; goto _test_eof;
963
- _test_eof3: cs = 3; goto _test_eof;
964
- _test_eof4: cs = 4; goto _test_eof;
965
- _test_eof5: cs = 5; goto _test_eof;
966
-
967
- _test_eof: {}
968
- _out: {}
969
- }
970
-
971
- #line 324 "parser.rl"
972
-
973
- if (cs >= JSON_integer_first_final) {
974
- long len = p - json->memo;
975
- fbuffer_clear(json->fbuffer);
976
- fbuffer_append(json->fbuffer, json->memo, len);
977
- fbuffer_append_char(json->fbuffer, '\0');
978
- *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
979
- return p + 1;
814
+ VALUE string;
815
+ bool intern = is_name || config->freeze;
816
+ bool symbolize = is_name && config->symbolize_names;
817
+ if (escaped) {
818
+ string = json_string_unescape(state, start, end, is_name, intern, symbolize);
980
819
  } else {
981
- return NULL;
820
+ string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
982
821
  }
983
- }
984
822
 
823
+ return string;
824
+ }
985
825
 
986
- #line 987 "parser.c"
987
- enum {JSON_float_start = 1};
988
- enum {JSON_float_first_final = 8};
989
- enum {JSON_float_error = 0};
826
+ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
827
+ {
828
+ if (RB_UNLIKELY(config->on_load_proc)) {
829
+ value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
830
+ }
831
+ rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack);
832
+ return value;
833
+ }
990
834
 
991
- enum {JSON_float_en_main = 1};
835
+ static const bool string_scan[256] = {
836
+ // ASCII Control Characters
837
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
838
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
839
+ // ASCII Characters
840
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
841
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
842
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
843
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\'
844
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
845
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
846
+ };
992
847
 
848
+ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
849
+ {
850
+ state->cursor++;
851
+ const char *start = state->cursor;
852
+ bool escaped = false;
853
+
854
+ while (state->cursor < state->end) {
855
+ if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) {
856
+ switch (*state->cursor) {
857
+ case '"': {
858
+ VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
859
+ state->cursor++;
860
+ return json_push_value(state, config, string);
861
+ }
862
+ case '\\': {
863
+ state->cursor++;
864
+ escaped = true;
865
+ if ((unsigned char)*state->cursor < 0x20) {
866
+ raise_parse_error("invalid ASCII control character in string: %s", state);
867
+ }
868
+ break;
869
+ }
870
+ default:
871
+ raise_parse_error("invalid ASCII control character in string: %s", state);
872
+ break;
873
+ }
874
+ }
993
875
 
994
- #line 349 "parser.rl"
876
+ state->cursor++;
877
+ }
995
878
 
879
+ raise_parse_error("unexpected end of input, expected closing \"", state);
880
+ return Qfalse;
881
+ }
996
882
 
997
- static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
883
+ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
998
884
  {
999
- int cs = EVIL;
1000
-
1001
-
1002
- #line 1003 "parser.c"
1003
- {
1004
- cs = JSON_float_start;
1005
- }
1006
-
1007
- #line 356 "parser.rl"
1008
- json->memo = p;
1009
-
1010
- #line 1011 "parser.c"
1011
- {
1012
- if ( p == pe )
1013
- goto _test_eof;
1014
- switch ( cs )
1015
- {
1016
- case 1:
1017
- switch( (*p) ) {
1018
- case 45: goto st2;
1019
- case 48: goto st3;
1020
- }
1021
- if ( 49 <= (*p) && (*p) <= 57 )
1022
- goto st7;
1023
- goto st0;
1024
- st0:
1025
- cs = 0;
1026
- goto _out;
1027
- st2:
1028
- if ( ++p == pe )
1029
- goto _test_eof2;
1030
- case 2:
1031
- if ( (*p) == 48 )
1032
- goto st3;
1033
- if ( 49 <= (*p) && (*p) <= 57 )
1034
- goto st7;
1035
- goto st0;
1036
- st3:
1037
- if ( ++p == pe )
1038
- goto _test_eof3;
1039
- case 3:
1040
- switch( (*p) ) {
1041
- case 46: goto st4;
1042
- case 69: goto st5;
1043
- case 101: goto st5;
1044
- }
1045
- goto st0;
1046
- st4:
1047
- if ( ++p == pe )
1048
- goto _test_eof4;
1049
- case 4:
1050
- if ( 48 <= (*p) && (*p) <= 57 )
1051
- goto st8;
1052
- goto st0;
1053
- st8:
1054
- if ( ++p == pe )
1055
- goto _test_eof8;
1056
- case 8:
1057
- switch( (*p) ) {
1058
- case 69: goto st5;
1059
- case 101: goto st5;
1060
- }
1061
- if ( (*p) > 46 ) {
1062
- if ( 48 <= (*p) && (*p) <= 57 )
1063
- goto st8;
1064
- } else if ( (*p) >= 45 )
1065
- goto st0;
1066
- goto tr9;
1067
- tr9:
1068
- #line 343 "parser.rl"
1069
- { p--; {p++; cs = 9; goto _out;} }
1070
- goto st9;
1071
- st9:
1072
- if ( ++p == pe )
1073
- goto _test_eof9;
1074
- case 9:
1075
- #line 1076 "parser.c"
1076
- goto st0;
1077
- st5:
1078
- if ( ++p == pe )
1079
- goto _test_eof5;
1080
- case 5:
1081
- switch( (*p) ) {
1082
- case 43: goto st6;
1083
- case 45: goto st6;
1084
- }
1085
- if ( 48 <= (*p) && (*p) <= 57 )
1086
- goto st10;
1087
- goto st0;
1088
- st6:
1089
- if ( ++p == pe )
1090
- goto _test_eof6;
1091
- case 6:
1092
- if ( 48 <= (*p) && (*p) <= 57 )
1093
- goto st10;
1094
- goto st0;
1095
- st10:
1096
- if ( ++p == pe )
1097
- goto _test_eof10;
1098
- case 10:
1099
- switch( (*p) ) {
1100
- case 69: goto st0;
1101
- case 101: goto st0;
1102
- }
1103
- if ( (*p) > 46 ) {
1104
- if ( 48 <= (*p) && (*p) <= 57 )
1105
- goto st10;
1106
- } else if ( (*p) >= 45 )
1107
- goto st0;
1108
- goto tr9;
1109
- st7:
1110
- if ( ++p == pe )
1111
- goto _test_eof7;
1112
- case 7:
1113
- switch( (*p) ) {
1114
- case 46: goto st4;
1115
- case 69: goto st5;
1116
- case 101: goto st5;
1117
- }
1118
- if ( 48 <= (*p) && (*p) <= 57 )
1119
- goto st7;
1120
- goto st0;
1121
- }
1122
- _test_eof2: cs = 2; goto _test_eof;
1123
- _test_eof3: cs = 3; goto _test_eof;
1124
- _test_eof4: cs = 4; goto _test_eof;
1125
- _test_eof8: cs = 8; goto _test_eof;
1126
- _test_eof9: cs = 9; goto _test_eof;
1127
- _test_eof5: cs = 5; goto _test_eof;
1128
- _test_eof6: cs = 6; goto _test_eof;
1129
- _test_eof10: cs = 10; goto _test_eof;
1130
- _test_eof7: cs = 7; goto _test_eof;
1131
-
1132
- _test_eof: {}
1133
- _out: {}
1134
- }
1135
-
1136
- #line 358 "parser.rl"
1137
-
1138
- if (cs >= JSON_float_first_final) {
1139
- VALUE mod = Qnil;
1140
- ID method_id = 0;
1141
- if (!NIL_P(json->decimal_class)) {
1142
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
1143
- mod = json->decimal_class;
1144
- method_id = i_try_convert;
1145
- } else if (rb_respond_to(json->decimal_class, i_new)) {
1146
- mod = json->decimal_class;
1147
- method_id = i_new;
1148
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
1149
- VALUE name = rb_class_name(json->decimal_class);
1150
- const char *name_cstr = RSTRING_PTR(name);
1151
- const char *last_colon = strrchr(name_cstr, ':');
1152
- if (last_colon) {
1153
- const char *mod_path_end = last_colon - 1;
1154
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1155
- mod = rb_path_to_class(mod_path);
885
+ json_eat_whitespace(state);
886
+ if (state->cursor >= state->end) {
887
+ raise_parse_error("unexpected end of input", state);
888
+ }
1156
889
 
1157
- const char *method_name_beg = last_colon + 1;
1158
- long before_len = method_name_beg - name_cstr;
1159
- long len = RSTRING_LEN(name) - before_len;
1160
- VALUE method_name = rb_str_substr(name, before_len, len);
1161
- method_id = SYM2ID(rb_str_intern(method_name));
890
+ switch (*state->cursor) {
891
+ case 'n':
892
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
893
+ state->cursor += 4;
894
+ return json_push_value(state, config, Qnil);
895
+ }
896
+
897
+ raise_parse_error("unexpected token '%s'", state);
898
+ break;
899
+ case 't':
900
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
901
+ state->cursor += 4;
902
+ return json_push_value(state, config, Qtrue);
903
+ }
904
+
905
+ raise_parse_error("unexpected token '%s'", state);
906
+ break;
907
+ case 'f':
908
+ // Note: memcmp with a small power of two compile to an integer comparison
909
+ if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
910
+ state->cursor += 5;
911
+ return json_push_value(state, config, Qfalse);
912
+ }
913
+
914
+ raise_parse_error("unexpected token '%s'", state);
915
+ break;
916
+ case 'N':
917
+ // Note: memcmp with a small power of two compile to an integer comparison
918
+ if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
919
+ state->cursor += 3;
920
+ return json_push_value(state, config, CNaN);
921
+ }
922
+
923
+ raise_parse_error("unexpected token '%s'", state);
924
+ break;
925
+ case 'I':
926
+ if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
927
+ state->cursor += 8;
928
+ return json_push_value(state, config, CInfinity);
929
+ }
930
+
931
+ raise_parse_error("unexpected token '%s'", state);
932
+ break;
933
+ case '-':
934
+ // Note: memcmp with a small power of two compile to an integer comparison
935
+ if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
936
+ if (config->allow_nan) {
937
+ state->cursor += 9;
938
+ return json_push_value(state, config, CMinusInfinity);
1162
939
  } else {
1163
- mod = rb_mKernel;
1164
- method_id = SYM2ID(rb_str_intern(name));
940
+ raise_parse_error("unexpected token '%s'", state);
1165
941
  }
1166
942
  }
1167
- }
1168
-
1169
- long len = p - json->memo;
1170
- fbuffer_clear(json->fbuffer);
1171
- fbuffer_append(json->fbuffer, json->memo, len);
1172
- fbuffer_append_char(json->fbuffer, '\0');
943
+ // Fallthrough
944
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
945
+ bool integer = true;
1173
946
 
1174
- if (method_id) {
1175
- VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
1176
- *result = rb_funcallv(mod, method_id, 1, &text);
1177
- } else {
1178
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
1179
- }
947
+ // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
948
+ const char *start = state->cursor;
949
+ state->cursor++;
1180
950
 
1181
- return p + 1;
1182
- } else {
1183
- return NULL;
1184
- }
1185
- }
951
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
952
+ state->cursor++;
953
+ }
1186
954
 
955
+ long integer_length = state->cursor - start;
1187
956
 
957
+ if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
958
+ raise_parse_error_at("invalid number: %s", state, start);
959
+ } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
960
+ raise_parse_error_at("invalid number: %s", state, start);
961
+ } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
962
+ raise_parse_error_at("invalid number: %s", state, start);
963
+ }
1188
964
 
1189
- #line 1190 "parser.c"
1190
- enum {JSON_array_start = 1};
1191
- enum {JSON_array_first_final = 17};
1192
- enum {JSON_array_error = 0};
965
+ if ((state->cursor < state->end) && (*state->cursor == '.')) {
966
+ integer = false;
967
+ state->cursor++;
1193
968
 
1194
- enum {JSON_array_en_main = 1};
969
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
970
+ raise_parse_error("invalid number: %s", state);
971
+ }
1195
972
 
973
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
974
+ state->cursor++;
975
+ }
976
+ }
1196
977
 
1197
- #line 438 "parser.rl"
978
+ if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
979
+ integer = false;
980
+ state->cursor++;
981
+ if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
982
+ state->cursor++;
983
+ }
1198
984
 
985
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
986
+ raise_parse_error("invalid number: %s", state);
987
+ }
1199
988
 
1200
- static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
1201
- {
1202
- int cs = EVIL;
1203
- VALUE array_class = json->array_class;
989
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
990
+ state->cursor++;
991
+ }
992
+ }
1204
993
 
1205
- if (json->max_nesting && current_nesting > json->max_nesting) {
1206
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
1207
- }
1208
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
1209
-
1210
-
1211
- #line 1212 "parser.c"
1212
- {
1213
- cs = JSON_array_start;
1214
- }
1215
-
1216
- #line 451 "parser.rl"
1217
-
1218
- #line 1219 "parser.c"
1219
- {
1220
- if ( p == pe )
1221
- goto _test_eof;
1222
- switch ( cs )
1223
- {
1224
- case 1:
1225
- if ( (*p) == 91 )
1226
- goto st2;
1227
- goto st0;
1228
- st0:
1229
- cs = 0;
1230
- goto _out;
1231
- st2:
1232
- if ( ++p == pe )
1233
- goto _test_eof2;
1234
- case 2:
1235
- switch( (*p) ) {
1236
- case 13: goto st2;
1237
- case 32: goto st2;
1238
- case 34: goto tr2;
1239
- case 45: goto tr2;
1240
- case 47: goto st13;
1241
- case 73: goto tr2;
1242
- case 78: goto tr2;
1243
- case 91: goto tr2;
1244
- case 93: goto tr4;
1245
- case 102: goto tr2;
1246
- case 110: goto tr2;
1247
- case 116: goto tr2;
1248
- case 123: goto tr2;
1249
- }
1250
- if ( (*p) > 10 ) {
1251
- if ( 48 <= (*p) && (*p) <= 57 )
1252
- goto tr2;
1253
- } else if ( (*p) >= 9 )
1254
- goto st2;
1255
- goto st0;
1256
- tr2:
1257
- #line 415 "parser.rl"
1258
- {
1259
- VALUE v = Qnil;
1260
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
1261
- if (np == NULL) {
1262
- p--; {p++; cs = 3; goto _out;}
1263
- } else {
1264
- if (NIL_P(json->array_class)) {
1265
- rb_ary_push(*result, v);
1266
- } else {
1267
- rb_funcall(*result, i_leftshift, 1, v);
994
+ if (integer) {
995
+ return json_push_value(state, config, json_decode_integer(start, state->cursor));
1268
996
  }
1269
- {p = (( np))-1;}
997
+ return json_push_value(state, config, json_decode_float(config, start, state->cursor));
1270
998
  }
1271
- }
1272
- goto st3;
1273
- st3:
1274
- if ( ++p == pe )
1275
- goto _test_eof3;
1276
- case 3:
1277
- #line 1278 "parser.c"
1278
- switch( (*p) ) {
1279
- case 13: goto st3;
1280
- case 32: goto st3;
1281
- case 44: goto st4;
1282
- case 47: goto st9;
1283
- case 93: goto tr4;
1284
- }
1285
- if ( 9 <= (*p) && (*p) <= 10 )
1286
- goto st3;
1287
- goto st0;
1288
- st4:
1289
- if ( ++p == pe )
1290
- goto _test_eof4;
1291
- case 4:
1292
- switch( (*p) ) {
1293
- case 13: goto st4;
1294
- case 32: goto st4;
1295
- case 34: goto tr2;
1296
- case 45: goto tr2;
1297
- case 47: goto st5;
1298
- case 73: goto tr2;
1299
- case 78: goto tr2;
1300
- case 91: goto tr2;
1301
- case 102: goto tr2;
1302
- case 110: goto tr2;
1303
- case 116: goto tr2;
1304
- case 123: goto tr2;
1305
- }
1306
- if ( (*p) > 10 ) {
1307
- if ( 48 <= (*p) && (*p) <= 57 )
1308
- goto tr2;
1309
- } else if ( (*p) >= 9 )
1310
- goto st4;
1311
- goto st0;
1312
- st5:
1313
- if ( ++p == pe )
1314
- goto _test_eof5;
1315
- case 5:
1316
- switch( (*p) ) {
1317
- case 42: goto st6;
1318
- case 47: goto st8;
1319
- }
1320
- goto st0;
1321
- st6:
1322
- if ( ++p == pe )
1323
- goto _test_eof6;
1324
- case 6:
1325
- if ( (*p) == 42 )
1326
- goto st7;
1327
- goto st6;
1328
- st7:
1329
- if ( ++p == pe )
1330
- goto _test_eof7;
1331
- case 7:
1332
- switch( (*p) ) {
1333
- case 42: goto st7;
1334
- case 47: goto st4;
1335
- }
1336
- goto st6;
1337
- st8:
1338
- if ( ++p == pe )
1339
- goto _test_eof8;
1340
- case 8:
1341
- if ( (*p) == 10 )
1342
- goto st4;
1343
- goto st8;
1344
- st9:
1345
- if ( ++p == pe )
1346
- goto _test_eof9;
1347
- case 9:
1348
- switch( (*p) ) {
1349
- case 42: goto st10;
1350
- case 47: goto st12;
1351
- }
1352
- goto st0;
1353
- st10:
1354
- if ( ++p == pe )
1355
- goto _test_eof10;
1356
- case 10:
1357
- if ( (*p) == 42 )
1358
- goto st11;
1359
- goto st10;
1360
- st11:
1361
- if ( ++p == pe )
1362
- goto _test_eof11;
1363
- case 11:
1364
- switch( (*p) ) {
1365
- case 42: goto st11;
1366
- case 47: goto st3;
1367
- }
1368
- goto st10;
1369
- st12:
1370
- if ( ++p == pe )
1371
- goto _test_eof12;
1372
- case 12:
1373
- if ( (*p) == 10 )
1374
- goto st3;
1375
- goto st12;
1376
- tr4:
1377
- #line 430 "parser.rl"
1378
- { p--; {p++; cs = 17; goto _out;} }
1379
- goto st17;
1380
- st17:
1381
- if ( ++p == pe )
1382
- goto _test_eof17;
1383
- case 17:
1384
- #line 1385 "parser.c"
1385
- goto st0;
1386
- st13:
1387
- if ( ++p == pe )
1388
- goto _test_eof13;
1389
- case 13:
1390
- switch( (*p) ) {
1391
- case 42: goto st14;
1392
- case 47: goto st16;
1393
- }
1394
- goto st0;
1395
- st14:
1396
- if ( ++p == pe )
1397
- goto _test_eof14;
1398
- case 14:
1399
- if ( (*p) == 42 )
1400
- goto st15;
1401
- goto st14;
1402
- st15:
1403
- if ( ++p == pe )
1404
- goto _test_eof15;
1405
- case 15:
1406
- switch( (*p) ) {
1407
- case 42: goto st15;
1408
- case 47: goto st2;
1409
- }
1410
- goto st14;
1411
- st16:
1412
- if ( ++p == pe )
1413
- goto _test_eof16;
1414
- case 16:
1415
- if ( (*p) == 10 )
1416
- goto st2;
1417
- goto st16;
1418
- }
1419
- _test_eof2: cs = 2; goto _test_eof;
1420
- _test_eof3: cs = 3; goto _test_eof;
1421
- _test_eof4: cs = 4; goto _test_eof;
1422
- _test_eof5: cs = 5; goto _test_eof;
1423
- _test_eof6: cs = 6; goto _test_eof;
1424
- _test_eof7: cs = 7; goto _test_eof;
1425
- _test_eof8: cs = 8; goto _test_eof;
1426
- _test_eof9: cs = 9; goto _test_eof;
1427
- _test_eof10: cs = 10; goto _test_eof;
1428
- _test_eof11: cs = 11; goto _test_eof;
1429
- _test_eof12: cs = 12; goto _test_eof;
1430
- _test_eof17: cs = 17; goto _test_eof;
1431
- _test_eof13: cs = 13; goto _test_eof;
1432
- _test_eof14: cs = 14; goto _test_eof;
1433
- _test_eof15: cs = 15; goto _test_eof;
1434
- _test_eof16: cs = 16; goto _test_eof;
1435
-
1436
- _test_eof: {}
1437
- _out: {}
1438
- }
1439
-
1440
- #line 452 "parser.rl"
1441
-
1442
- if(cs >= JSON_array_first_final) {
1443
- return p + 1;
1444
- } else {
1445
- raise_parse_error("unexpected token at '%s'", p);
1446
- return NULL;
1447
- }
1448
- }
999
+ case '"': {
1000
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
1001
+ return json_parse_string(state, config, false);
1002
+ break;
1003
+ }
1004
+ case '[': {
1005
+ state->cursor++;
1006
+ json_eat_whitespace(state);
1007
+ long stack_head = state->stack->head;
1008
+
1009
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
1010
+ state->cursor++;
1011
+ return json_push_value(state, config, json_decode_array(state, config, 0));
1012
+ } else {
1013
+ state->current_nesting++;
1014
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1015
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1016
+ }
1017
+ state->in_array++;
1018
+ json_parse_any(state, config);
1019
+ }
1449
1020
 
1450
- static const size_t MAX_STACK_BUFFER_SIZE = 128;
1451
- static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
1452
- {
1453
- VALUE result = Qnil;
1454
- size_t bufferSize = stringEnd - string;
1455
- char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
1456
- int unescape_len;
1457
- char buf[4];
1021
+ while (true) {
1022
+ json_eat_whitespace(state);
1458
1023
 
1459
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1460
- # ifdef HAVE_RB_ENC_INTERNED_STR
1461
- bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
1462
- # else
1463
- bufferStart = buffer = ALLOC_N(char, bufferSize);
1464
- # endif
1465
- } else {
1466
- # ifdef HAVE_RB_ENC_INTERNED_STR
1467
- bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1);
1468
- # else
1469
- bufferStart = buffer = ALLOCA_N(char, bufferSize);
1470
- # endif
1471
- }
1024
+ if (state->cursor < state->end) {
1025
+ if (*state->cursor == ']') {
1026
+ state->cursor++;
1027
+ long count = state->stack->head - stack_head;
1028
+ state->current_nesting--;
1029
+ state->in_array--;
1030
+ return json_push_value(state, config, json_decode_array(state, config, count));
1031
+ }
1472
1032
 
1473
- while (pe < stringEnd) {
1474
- if (*pe == '\\') {
1475
- unescape = (char *) "?";
1476
- unescape_len = 1;
1477
- if (pe > p) {
1478
- MEMCPY(buffer, p, char, pe - p);
1479
- buffer += pe - p;
1480
- }
1481
- switch (*++pe) {
1482
- case 'n':
1483
- unescape = (char *) "\n";
1484
- break;
1485
- case 'r':
1486
- unescape = (char *) "\r";
1487
- break;
1488
- case 't':
1489
- unescape = (char *) "\t";
1490
- break;
1491
- case '"':
1492
- unescape = (char *) "\"";
1493
- break;
1494
- case '\\':
1495
- unescape = (char *) "\\";
1496
- break;
1497
- case 'b':
1498
- unescape = (char *) "\b";
1499
- break;
1500
- case 'f':
1501
- unescape = (char *) "\f";
1502
- break;
1503
- case 'u':
1504
- if (pe > stringEnd - 4) {
1505
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1506
- ruby_xfree(bufferStart);
1507
- }
1508
- raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
1509
- } else {
1510
- uint32_t ch = unescape_unicode((unsigned char *) ++pe);
1511
- pe += 3;
1512
- /* To handle values above U+FFFF, we take a sequence of
1513
- * \uXXXX escapes in the U+D800..U+DBFF then
1514
- * U+DC00..U+DFFF ranges, take the low 10 bits from each
1515
- * to make a 20-bit number, then add 0x10000 to get the
1516
- * final codepoint.
1517
- *
1518
- * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
1519
- * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
1520
- * Area".
1521
- */
1522
- if ((ch & 0xFC00) == 0xD800) {
1523
- pe++;
1524
- if (pe > stringEnd - 6) {
1525
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1526
- ruby_xfree(bufferStart);
1527
- }
1528
- raise_parse_error("incomplete surrogate pair at '%s'", p);
1529
- }
1530
- if (pe[0] == '\\' && pe[1] == 'u') {
1531
- uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
1532
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
1533
- | (sur & 0x3FF));
1534
- pe += 5;
1535
- } else {
1536
- unescape = (char *) "?";
1537
- break;
1033
+ if (*state->cursor == ',') {
1034
+ state->cursor++;
1035
+ if (config->allow_trailing_comma) {
1036
+ json_eat_whitespace(state);
1037
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
1038
+ continue;
1538
1039
  }
1539
1040
  }
1540
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
1541
- unescape = buf;
1041
+ json_parse_any(state, config);
1042
+ continue;
1542
1043
  }
1543
- break;
1544
- default:
1545
- p = pe;
1546
- continue;
1044
+ }
1045
+
1046
+ raise_parse_error("expected ',' or ']' after array value", state);
1547
1047
  }
1548
- MEMCPY(buffer, unescape, char, unescape_len);
1549
- buffer += unescape_len;
1550
- p = ++pe;
1551
- } else {
1552
- pe++;
1048
+ break;
1553
1049
  }
1554
- }
1050
+ case '{': {
1051
+ state->cursor++;
1052
+ json_eat_whitespace(state);
1053
+ long stack_head = state->stack->head;
1054
+
1055
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1056
+ state->cursor++;
1057
+ return json_push_value(state, config, json_decode_object(state, config, 0));
1058
+ } else {
1059
+ state->current_nesting++;
1060
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1061
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1062
+ }
1555
1063
 
1556
- if (pe > p) {
1557
- MEMCPY(buffer, p, char, pe - p);
1558
- buffer += pe - p;
1559
- }
1064
+ if (*state->cursor != '"') {
1065
+ raise_parse_error("expected object key, got '%s", state);
1066
+ }
1067
+ json_parse_string(state, config, true);
1560
1068
 
1561
- # ifdef HAVE_RB_ENC_INTERNED_STR
1562
- if (intern) {
1563
- result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
1564
- } else {
1565
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
1566
- }
1567
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1568
- ruby_xfree(bufferStart);
1569
- }
1570
- # else
1571
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
1069
+ json_eat_whitespace(state);
1070
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1071
+ raise_parse_error("expected ':' after object key", state);
1072
+ }
1073
+ state->cursor++;
1572
1074
 
1573
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1574
- ruby_xfree(bufferStart);
1575
- }
1075
+ json_parse_any(state, config);
1076
+ }
1576
1077
 
1577
- if (intern) {
1578
- # if STR_UMINUS_DEDUPE_FROZEN
1579
- // Starting from MRI 2.8 it is preferable to freeze the string
1580
- // before deduplication so that it can be interned directly
1581
- // otherwise it would be duplicated first which is wasteful.
1582
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
1583
- # elif STR_UMINUS_DEDUPE
1584
- // MRI 2.5 and older do not deduplicate strings that are already
1585
- // frozen.
1586
- result = rb_funcall(result, i_uminus, 0);
1587
- # else
1588
- result = rb_str_freeze(result);
1589
- # endif
1590
- }
1591
- # endif
1078
+ while (true) {
1079
+ json_eat_whitespace(state);
1592
1080
 
1593
- if (symbolize) {
1594
- result = rb_str_intern(result);
1595
- }
1081
+ if (state->cursor < state->end) {
1082
+ if (*state->cursor == '}') {
1083
+ state->cursor++;
1084
+ state->current_nesting--;
1085
+ long count = state->stack->head - stack_head;
1086
+ return json_push_value(state, config, json_decode_object(state, config, count));
1087
+ }
1596
1088
 
1597
- return result;
1598
- }
1089
+ if (*state->cursor == ',') {
1090
+ state->cursor++;
1091
+ json_eat_whitespace(state);
1599
1092
 
1093
+ if (config->allow_trailing_comma) {
1094
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1095
+ continue;
1096
+ }
1097
+ }
1600
1098
 
1601
- #line 1602 "parser.c"
1602
- enum {JSON_string_start = 1};
1603
- enum {JSON_string_first_final = 8};
1604
- enum {JSON_string_error = 0};
1099
+ if (*state->cursor != '"') {
1100
+ raise_parse_error("expected object key, got: '%s'", state);
1101
+ }
1102
+ json_parse_string(state, config, true);
1605
1103
 
1606
- enum {JSON_string_en_main = 1};
1104
+ json_eat_whitespace(state);
1105
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1106
+ raise_parse_error("expected ':' after object key, got: '%s", state);
1107
+ }
1108
+ state->cursor++;
1607
1109
 
1110
+ json_parse_any(state, config);
1608
1111
 
1609
- #line 630 "parser.rl"
1112
+ continue;
1113
+ }
1114
+ }
1610
1115
 
1116
+ raise_parse_error("expected ',' or '}' after object value, got: '%s'", state);
1117
+ }
1118
+ break;
1119
+ }
1611
1120
 
1612
- static int
1613
- match_i(VALUE regexp, VALUE klass, VALUE memo)
1614
- {
1615
- if (regexp == Qundef) return ST_STOP;
1616
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
1617
- RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
1618
- rb_ary_push(memo, klass);
1619
- return ST_STOP;
1121
+ default:
1122
+ raise_parse_error("unexpected character: '%s'", state);
1123
+ break;
1620
1124
  }
1621
- return ST_CONTINUE;
1125
+
1126
+ raise_parse_error("unreacheable: '%s'", state);
1622
1127
  }
1623
1128
 
1624
- static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
1129
+ static void json_ensure_eof(JSON_ParserState *state)
1625
1130
  {
1626
- int cs = EVIL;
1627
- VALUE match_string;
1628
-
1629
-
1630
- #line 1631 "parser.c"
1631
- {
1632
- cs = JSON_string_start;
1633
- }
1634
-
1635
- #line 650 "parser.rl"
1636
- json->memo = p;
1637
-
1638
- #line 1639 "parser.c"
1639
- {
1640
- if ( p == pe )
1641
- goto _test_eof;
1642
- switch ( cs )
1643
- {
1644
- case 1:
1645
- if ( (*p) == 34 )
1646
- goto st2;
1647
- goto st0;
1648
- st0:
1649
- cs = 0;
1650
- goto _out;
1651
- st2:
1652
- if ( ++p == pe )
1653
- goto _test_eof2;
1654
- case 2:
1655
- switch( (*p) ) {
1656
- case 34: goto tr2;
1657
- case 92: goto st3;
1658
- }
1659
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
1660
- goto st0;
1661
- goto st2;
1662
- tr2:
1663
- #line 617 "parser.rl"
1664
- {
1665
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1666
- if (NIL_P(*result)) {
1667
- p--;
1668
- {p++; cs = 8; goto _out;}
1669
- } else {
1670
- {p = (( p + 1))-1;}
1671
- }
1672
- }
1673
- #line 627 "parser.rl"
1674
- { p--; {p++; cs = 8; goto _out;} }
1675
- goto st8;
1676
- st8:
1677
- if ( ++p == pe )
1678
- goto _test_eof8;
1679
- case 8:
1680
- #line 1681 "parser.c"
1681
- goto st0;
1682
- st3:
1683
- if ( ++p == pe )
1684
- goto _test_eof3;
1685
- case 3:
1686
- if ( (*p) == 117 )
1687
- goto st4;
1688
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
1689
- goto st0;
1690
- goto st2;
1691
- st4:
1692
- if ( ++p == pe )
1693
- goto _test_eof4;
1694
- case 4:
1695
- if ( (*p) < 65 ) {
1696
- if ( 48 <= (*p) && (*p) <= 57 )
1697
- goto st5;
1698
- } else if ( (*p) > 70 ) {
1699
- if ( 97 <= (*p) && (*p) <= 102 )
1700
- goto st5;
1701
- } else
1702
- goto st5;
1703
- goto st0;
1704
- st5:
1705
- if ( ++p == pe )
1706
- goto _test_eof5;
1707
- case 5:
1708
- if ( (*p) < 65 ) {
1709
- if ( 48 <= (*p) && (*p) <= 57 )
1710
- goto st6;
1711
- } else if ( (*p) > 70 ) {
1712
- if ( 97 <= (*p) && (*p) <= 102 )
1713
- goto st6;
1714
- } else
1715
- goto st6;
1716
- goto st0;
1717
- st6:
1718
- if ( ++p == pe )
1719
- goto _test_eof6;
1720
- case 6:
1721
- if ( (*p) < 65 ) {
1722
- if ( 48 <= (*p) && (*p) <= 57 )
1723
- goto st7;
1724
- } else if ( (*p) > 70 ) {
1725
- if ( 97 <= (*p) && (*p) <= 102 )
1726
- goto st7;
1727
- } else
1728
- goto st7;
1729
- goto st0;
1730
- st7:
1731
- if ( ++p == pe )
1732
- goto _test_eof7;
1733
- case 7:
1734
- if ( (*p) < 65 ) {
1735
- if ( 48 <= (*p) && (*p) <= 57 )
1736
- goto st2;
1737
- } else if ( (*p) > 70 ) {
1738
- if ( 97 <= (*p) && (*p) <= 102 )
1739
- goto st2;
1740
- } else
1741
- goto st2;
1742
- goto st0;
1743
- }
1744
- _test_eof2: cs = 2; goto _test_eof;
1745
- _test_eof8: cs = 8; goto _test_eof;
1746
- _test_eof3: cs = 3; goto _test_eof;
1747
- _test_eof4: cs = 4; goto _test_eof;
1748
- _test_eof5: cs = 5; goto _test_eof;
1749
- _test_eof6: cs = 6; goto _test_eof;
1750
- _test_eof7: cs = 7; goto _test_eof;
1751
-
1752
- _test_eof: {}
1753
- _out: {}
1754
- }
1755
-
1756
- #line 652 "parser.rl"
1757
-
1758
- if (json->create_additions && RTEST(match_string = json->match_string)) {
1759
- VALUE klass;
1760
- VALUE memo = rb_ary_new2(2);
1761
- rb_ary_push(memo, *result);
1762
- rb_hash_foreach(match_string, match_i, memo);
1763
- klass = rb_ary_entry(memo, 1);
1764
- if (RTEST(klass)) {
1765
- *result = rb_funcall(klass, i_json_create, 1, *result);
1766
- }
1767
- }
1768
-
1769
- if (cs >= JSON_string_first_final) {
1770
- return p + 1;
1771
- } else {
1772
- return NULL;
1131
+ json_eat_whitespace(state);
1132
+ if (state->cursor != state->end) {
1133
+ raise_parse_error("unexpected token at end of stream '%s'", state);
1773
1134
  }
1774
1135
  }
1775
1136
 
@@ -1789,24 +1150,80 @@ static VALUE convert_encoding(VALUE source)
1789
1150
  {
1790
1151
  int encindex = RB_ENCODING_GET(source);
1791
1152
 
1792
- if (encindex == utf8_encindex) {
1153
+ if (RB_LIKELY(encindex == utf8_encindex)) {
1793
1154
  return source;
1794
1155
  }
1795
1156
 
1796
1157
  if (encindex == binary_encindex) {
1797
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1798
- // TODO: Deprecate in 2.8.0
1799
- // TODO: Remove in 3.0.0
1158
+ // For historical reason, we silently reinterpret binary strings as UTF-8
1800
1159
  return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1801
1160
  }
1802
1161
 
1803
- return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());
1162
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
1163
+ }
1164
+
1165
+ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1166
+ {
1167
+ JSON_ParserConfig *config = (JSON_ParserConfig *)data;
1168
+
1169
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1170
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1171
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1172
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1173
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
1174
+ else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1175
+ else if (key == sym_decimal_class) {
1176
+ if (RTEST(val)) {
1177
+ if (rb_respond_to(val, i_try_convert)) {
1178
+ config->decimal_class = val;
1179
+ config->decimal_method_id = i_try_convert;
1180
+ } else if (rb_respond_to(val, i_new)) {
1181
+ config->decimal_class = val;
1182
+ config->decimal_method_id = i_new;
1183
+ } else if (RB_TYPE_P(val, T_CLASS)) {
1184
+ VALUE name = rb_class_name(val);
1185
+ const char *name_cstr = RSTRING_PTR(name);
1186
+ const char *last_colon = strrchr(name_cstr, ':');
1187
+ if (last_colon) {
1188
+ const char *mod_path_end = last_colon - 1;
1189
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1190
+ config->decimal_class = rb_path_to_class(mod_path);
1191
+
1192
+ const char *method_name_beg = last_colon + 1;
1193
+ long before_len = method_name_beg - name_cstr;
1194
+ long len = RSTRING_LEN(name) - before_len;
1195
+ VALUE method_name = rb_str_substr(name, before_len, len);
1196
+ config->decimal_method_id = SYM2ID(rb_str_intern(method_name));
1197
+ } else {
1198
+ config->decimal_class = rb_mKernel;
1199
+ config->decimal_method_id = SYM2ID(rb_str_intern(name));
1200
+ }
1201
+ }
1202
+ }
1203
+ }
1204
+
1205
+ return ST_CONTINUE;
1206
+ }
1207
+
1208
+ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
1209
+ {
1210
+ config->max_nesting = 100;
1211
+
1212
+ if (!NIL_P(opts)) {
1213
+ Check_Type(opts, T_HASH);
1214
+ if (RHASH_SIZE(opts) > 0) {
1215
+ // We assume in most cases few keys are set so it's faster to go over
1216
+ // the provided keys than to check all possible keys.
1217
+ rb_hash_foreach(opts, parser_config_init_i, (VALUE)config);
1218
+ }
1219
+
1220
+ }
1804
1221
  }
1805
1222
 
1806
1223
  /*
1807
- * call-seq: new(source, opts => {})
1224
+ * call-seq: new(opts => {})
1808
1225
  *
1809
- * Creates a new JSON::Ext::Parser instance for the string _source_.
1226
+ * Creates a new JSON::Ext::ParserConfig instance.
1810
1227
  *
1811
1228
  * It will be configured by the _opts_ hash. _opts_ can have the following
1812
1229
  * keys:
@@ -1822,357 +1239,114 @@ static VALUE convert_encoding(VALUE source)
1822
1239
  * (keys) in a JSON object. Otherwise strings are returned, which is
1823
1240
  * also the default. It's not possible to use this option in
1824
1241
  * conjunction with the *create_additions* option.
1825
- * * *create_additions*: If set to false, the Parser doesn't create
1826
- * additions even if a matching class and create_id was found. This option
1827
- * defaults to false.
1828
- * * *object_class*: Defaults to Hash. If another type is provided, it will be used
1829
- * instead of Hash to represent JSON objects. The type must respond to
1830
- * +new+ without arguments, and return an object that respond to +[]=+.
1831
- * * *array_class*: Defaults to Array If another type is provided, it will be used
1832
- * instead of Hash to represent JSON arrays. The type must respond to
1833
- * +new+ without arguments, and return an object that respond to +<<+.
1834
1242
  * * *decimal_class*: Specifies which class to use instead of the default
1835
1243
  * (Float) when parsing decimal numbers. This class must accept a single
1836
1244
  * string argument in its constructor.
1837
1245
  */
1838
- static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1246
+ static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
1839
1247
  {
1840
- VALUE source, opts;
1841
- GET_PARSER_INIT;
1248
+ GET_PARSER_CONFIG;
1842
1249
 
1843
- if (json->Vsource) {
1844
- rb_raise(rb_eTypeError, "already initialized instance");
1845
- }
1250
+ parser_config_init(config, opts);
1846
1251
 
1847
- rb_check_arity(argc, 1, 2);
1848
- source = argv[0];
1849
- opts = Qnil;
1850
- if (argc == 2) {
1851
- opts = argv[1];
1852
- Check_Type(argv[1], T_HASH);
1853
- if (RHASH_SIZE(argv[1]) > 0) {
1854
- opts = argv[1];
1855
- }
1856
- }
1252
+ RB_OBJ_WRITTEN(self, Qundef, config->decimal_class);
1857
1253
 
1858
- if (!NIL_P(opts)) {
1859
- VALUE tmp = ID2SYM(i_max_nesting);
1860
- if (option_given_p(opts, tmp)) {
1861
- VALUE max_nesting = rb_hash_aref(opts, tmp);
1862
- if (RTEST(max_nesting)) {
1863
- Check_Type(max_nesting, T_FIXNUM);
1864
- json->max_nesting = FIX2INT(max_nesting);
1865
- } else {
1866
- json->max_nesting = 0;
1867
- }
1868
- } else {
1869
- json->max_nesting = 100;
1870
- }
1871
- tmp = ID2SYM(i_allow_nan);
1872
- if (option_given_p(opts, tmp)) {
1873
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1874
- } else {
1875
- json->allow_nan = 0;
1876
- }
1877
- tmp = ID2SYM(i_symbolize_names);
1878
- if (option_given_p(opts, tmp)) {
1879
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1880
- } else {
1881
- json->symbolize_names = 0;
1882
- }
1883
- tmp = ID2SYM(i_freeze);
1884
- if (option_given_p(opts, tmp)) {
1885
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1886
- } else {
1887
- json->freeze = 0;
1888
- }
1889
- tmp = ID2SYM(i_create_additions);
1890
- if (option_given_p(opts, tmp)) {
1891
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
1892
- } else {
1893
- json->create_additions = 0;
1894
- }
1895
- if (json->symbolize_names && json->create_additions) {
1896
- rb_raise(rb_eArgError,
1897
- "options :symbolize_names and :create_additions cannot be "
1898
- " used in conjunction");
1899
- }
1900
- tmp = ID2SYM(i_create_id);
1901
- if (option_given_p(opts, tmp)) {
1902
- json->create_id = rb_hash_aref(opts, tmp);
1903
- } else {
1904
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
1905
- }
1906
- tmp = ID2SYM(i_object_class);
1907
- if (option_given_p(opts, tmp)) {
1908
- json->object_class = rb_hash_aref(opts, tmp);
1909
- } else {
1910
- json->object_class = Qnil;
1911
- }
1912
- tmp = ID2SYM(i_array_class);
1913
- if (option_given_p(opts, tmp)) {
1914
- json->array_class = rb_hash_aref(opts, tmp);
1915
- } else {
1916
- json->array_class = Qnil;
1917
- }
1918
- tmp = ID2SYM(i_decimal_class);
1919
- if (option_given_p(opts, tmp)) {
1920
- json->decimal_class = rb_hash_aref(opts, tmp);
1921
- } else {
1922
- json->decimal_class = Qnil;
1923
- }
1924
- tmp = ID2SYM(i_match_string);
1925
- if (option_given_p(opts, tmp)) {
1926
- VALUE match_string = rb_hash_aref(opts, tmp);
1927
- json->match_string = RTEST(match_string) ? match_string : Qnil;
1928
- } else {
1929
- json->match_string = Qnil;
1930
- }
1931
- } else {
1932
- json->max_nesting = 100;
1933
- json->allow_nan = 0;
1934
- json->create_additions = 0;
1935
- json->create_id = Qnil;
1936
- json->object_class = Qnil;
1937
- json->array_class = Qnil;
1938
- json->decimal_class = Qnil;
1939
- }
1940
- source = convert_encoding(StringValue(source));
1941
- StringValue(source);
1942
- json->len = RSTRING_LEN(source);
1943
- json->source = RSTRING_PTR(source);;
1944
- json->Vsource = source;
1945
1254
  return self;
1946
1255
  }
1947
1256
 
1257
+ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1258
+ {
1259
+ Vsource = convert_encoding(StringValue(Vsource));
1260
+ StringValue(Vsource);
1261
+
1262
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1263
+ rvalue_stack stack = {
1264
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1265
+ .ptr = rvalue_stack_buffer,
1266
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1267
+ };
1268
+
1269
+ long len;
1270
+ const char *start;
1271
+ RSTRING_GETMEM(Vsource, start, len);
1948
1272
 
1949
- #line 1950 "parser.c"
1950
- enum {JSON_start = 1};
1951
- enum {JSON_first_final = 10};
1952
- enum {JSON_error = 0};
1273
+ JSON_ParserState _state = {
1274
+ .start = start,
1275
+ .cursor = start,
1276
+ .end = start + len,
1277
+ .stack = &stack,
1278
+ };
1279
+ JSON_ParserState *state = &_state;
1953
1280
 
1954
- enum {JSON_en_main = 1};
1281
+ VALUE result = json_parse_any(state, config);
1955
1282
 
1283
+ // This may be skipped in case of exception, but
1284
+ // it won't cause a leak.
1285
+ rvalue_stack_eagerly_release(state->stack_handle);
1956
1286
 
1957
- #line 858 "parser.rl"
1287
+ json_ensure_eof(state);
1958
1288
 
1289
+ return result;
1290
+ }
1959
1291
 
1960
1292
  /*
1961
- * call-seq: parse()
1293
+ * call-seq: parse(source)
1962
1294
  *
1963
1295
  * Parses the current JSON text _source_ and returns the complete data
1964
1296
  * structure as a result.
1965
1297
  * It raises JSON::ParserError if fail to parse.
1966
1298
  */
1967
- static VALUE cParser_parse(VALUE self)
1299
+ static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
1968
1300
  {
1969
- char *p, *pe;
1970
- int cs = EVIL;
1971
- VALUE result = Qnil;
1972
- GET_PARSER;
1973
-
1974
-
1975
- #line 1976 "parser.c"
1976
- {
1977
- cs = JSON_start;
1978
- }
1979
-
1980
- #line 875 "parser.rl"
1981
- p = json->source;
1982
- pe = p + json->len;
1983
-
1984
- #line 1985 "parser.c"
1985
- {
1986
- if ( p == pe )
1987
- goto _test_eof;
1988
- switch ( cs )
1989
- {
1990
- st1:
1991
- if ( ++p == pe )
1992
- goto _test_eof1;
1993
- case 1:
1994
- switch( (*p) ) {
1995
- case 13: goto st1;
1996
- case 32: goto st1;
1997
- case 34: goto tr2;
1998
- case 45: goto tr2;
1999
- case 47: goto st6;
2000
- case 73: goto tr2;
2001
- case 78: goto tr2;
2002
- case 91: goto tr2;
2003
- case 102: goto tr2;
2004
- case 110: goto tr2;
2005
- case 116: goto tr2;
2006
- case 123: goto tr2;
2007
- }
2008
- if ( (*p) > 10 ) {
2009
- if ( 48 <= (*p) && (*p) <= 57 )
2010
- goto tr2;
2011
- } else if ( (*p) >= 9 )
2012
- goto st1;
2013
- goto st0;
2014
- st0:
2015
- cs = 0;
2016
- goto _out;
2017
- tr2:
2018
- #line 850 "parser.rl"
2019
- {
2020
- char *np = JSON_parse_value(json, p, pe, &result, 0);
2021
- if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
2022
- }
2023
- goto st10;
2024
- st10:
2025
- if ( ++p == pe )
2026
- goto _test_eof10;
2027
- case 10:
2028
- #line 2029 "parser.c"
2029
- switch( (*p) ) {
2030
- case 13: goto st10;
2031
- case 32: goto st10;
2032
- case 47: goto st2;
2033
- }
2034
- if ( 9 <= (*p) && (*p) <= 10 )
2035
- goto st10;
2036
- goto st0;
2037
- st2:
2038
- if ( ++p == pe )
2039
- goto _test_eof2;
2040
- case 2:
2041
- switch( (*p) ) {
2042
- case 42: goto st3;
2043
- case 47: goto st5;
2044
- }
2045
- goto st0;
2046
- st3:
2047
- if ( ++p == pe )
2048
- goto _test_eof3;
2049
- case 3:
2050
- if ( (*p) == 42 )
2051
- goto st4;
2052
- goto st3;
2053
- st4:
2054
- if ( ++p == pe )
2055
- goto _test_eof4;
2056
- case 4:
2057
- switch( (*p) ) {
2058
- case 42: goto st4;
2059
- case 47: goto st10;
2060
- }
2061
- goto st3;
2062
- st5:
2063
- if ( ++p == pe )
2064
- goto _test_eof5;
2065
- case 5:
2066
- if ( (*p) == 10 )
2067
- goto st10;
2068
- goto st5;
2069
- st6:
2070
- if ( ++p == pe )
2071
- goto _test_eof6;
2072
- case 6:
2073
- switch( (*p) ) {
2074
- case 42: goto st7;
2075
- case 47: goto st9;
2076
- }
2077
- goto st0;
2078
- st7:
2079
- if ( ++p == pe )
2080
- goto _test_eof7;
2081
- case 7:
2082
- if ( (*p) == 42 )
2083
- goto st8;
2084
- goto st7;
2085
- st8:
2086
- if ( ++p == pe )
2087
- goto _test_eof8;
2088
- case 8:
2089
- switch( (*p) ) {
2090
- case 42: goto st8;
2091
- case 47: goto st1;
2092
- }
2093
- goto st7;
2094
- st9:
2095
- if ( ++p == pe )
2096
- goto _test_eof9;
2097
- case 9:
2098
- if ( (*p) == 10 )
2099
- goto st1;
2100
- goto st9;
2101
- }
2102
- _test_eof1: cs = 1; goto _test_eof;
2103
- _test_eof10: cs = 10; goto _test_eof;
2104
- _test_eof2: cs = 2; goto _test_eof;
2105
- _test_eof3: cs = 3; goto _test_eof;
2106
- _test_eof4: cs = 4; goto _test_eof;
2107
- _test_eof5: cs = 5; goto _test_eof;
2108
- _test_eof6: cs = 6; goto _test_eof;
2109
- _test_eof7: cs = 7; goto _test_eof;
2110
- _test_eof8: cs = 8; goto _test_eof;
2111
- _test_eof9: cs = 9; goto _test_eof;
2112
-
2113
- _test_eof: {}
2114
- _out: {}
2115
- }
2116
-
2117
- #line 878 "parser.rl"
2118
-
2119
- if (cs >= JSON_first_final && p == pe) {
2120
- return result;
2121
- } else {
2122
- raise_parse_error("unexpected token at '%s'", p);
2123
- return Qnil;
2124
- }
1301
+ GET_PARSER_CONFIG;
1302
+ return cParser_parse(config, Vsource);
2125
1303
  }
2126
1304
 
2127
- static void JSON_mark(void *ptr)
1305
+ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
2128
1306
  {
2129
- JSON_Parser *json = ptr;
2130
- rb_gc_mark(json->Vsource);
2131
- rb_gc_mark(json->create_id);
2132
- rb_gc_mark(json->object_class);
2133
- rb_gc_mark(json->array_class);
2134
- rb_gc_mark(json->decimal_class);
2135
- rb_gc_mark(json->match_string);
1307
+ Vsource = convert_encoding(StringValue(Vsource));
1308
+ StringValue(Vsource);
1309
+
1310
+ JSON_ParserConfig _config = {0};
1311
+ JSON_ParserConfig *config = &_config;
1312
+ parser_config_init(config, opts);
1313
+
1314
+ return cParser_parse(config, Vsource);
2136
1315
  }
2137
1316
 
2138
- static void JSON_free(void *ptr)
1317
+ static void JSON_ParserConfig_mark(void *ptr)
2139
1318
  {
2140
- JSON_Parser *json = ptr;
2141
- fbuffer_free(json->fbuffer);
2142
- ruby_xfree(json);
1319
+ JSON_ParserConfig *config = ptr;
1320
+ rb_gc_mark(config->on_load_proc);
1321
+ rb_gc_mark(config->decimal_class);
2143
1322
  }
2144
1323
 
2145
- static size_t JSON_memsize(const void *ptr)
1324
+ static void JSON_ParserConfig_free(void *ptr)
2146
1325
  {
2147
- const JSON_Parser *json = ptr;
2148
- return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
1326
+ JSON_ParserConfig *config = ptr;
1327
+ ruby_xfree(config);
2149
1328
  }
2150
1329
 
2151
- static const rb_data_type_t JSON_Parser_type = {
2152
- "JSON/Parser",
2153
- {JSON_mark, JSON_free, JSON_memsize,},
1330
+ static size_t JSON_ParserConfig_memsize(const void *ptr)
1331
+ {
1332
+ return sizeof(JSON_ParserConfig);
1333
+ }
1334
+
1335
+ static const rb_data_type_t JSON_ParserConfig_type = {
1336
+ "JSON::Ext::Parser/ParserConfig",
1337
+ {
1338
+ JSON_ParserConfig_mark,
1339
+ JSON_ParserConfig_free,
1340
+ JSON_ParserConfig_memsize,
1341
+ },
2154
1342
  0, 0,
2155
- RUBY_TYPED_FREE_IMMEDIATELY,
1343
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
2156
1344
  };
2157
1345
 
2158
1346
  static VALUE cJSON_parser_s_allocate(VALUE klass)
2159
1347
  {
2160
- JSON_Parser *json;
2161
- VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
2162
- json->fbuffer = fbuffer_alloc(0);
2163
- return obj;
2164
- }
2165
-
2166
- /*
2167
- * call-seq: source()
2168
- *
2169
- * Returns a copy of the current _source_ string, that was used to construct
2170
- * this Parser.
2171
- */
2172
- static VALUE cParser_source(VALUE self)
2173
- {
2174
- GET_PARSER;
2175
- return rb_str_dup(json->Vsource);
1348
+ JSON_ParserConfig *config;
1349
+ return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
2176
1350
  }
2177
1351
 
2178
1352
  void Init_parser(void)
@@ -2184,14 +1358,16 @@ void Init_parser(void)
2184
1358
  #undef rb_intern
2185
1359
  rb_require("json/common");
2186
1360
  mJSON = rb_define_module("JSON");
2187
- mExt = rb_define_module_under(mJSON, "Ext");
2188
- cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
1361
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
1362
+ VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
2189
1363
  eNestingError = rb_path2class("JSON::NestingError");
2190
1364
  rb_gc_register_mark_object(eNestingError);
2191
- rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
2192
- rb_define_method(cParser, "initialize", cParser_initialize, -1);
2193
- rb_define_method(cParser, "parse", cParser_parse, 0);
2194
- rb_define_method(cParser, "source", cParser_source, 0);
1365
+ rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
1366
+ rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
1367
+ rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
1368
+
1369
+ VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
1370
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
2195
1371
 
2196
1372
  CNaN = rb_const_get(mJSON, rb_intern("NaN"));
2197
1373
  rb_gc_register_mark_object(CNaN);
@@ -2202,36 +1378,27 @@ void Init_parser(void)
2202
1378
  CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
2203
1379
  rb_gc_register_mark_object(CMinusInfinity);
2204
1380
 
2205
- i_json_creatable_p = rb_intern("json_creatable?");
2206
- i_json_create = rb_intern("json_create");
2207
- i_create_id = rb_intern("create_id");
2208
- i_create_additions = rb_intern("create_additions");
1381
+ rb_global_variable(&Encoding_UTF_8);
1382
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1383
+
1384
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1385
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1386
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1387
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1388
+ sym_freeze = ID2SYM(rb_intern("freeze"));
1389
+ sym_on_load = ID2SYM(rb_intern("on_load"));
1390
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1391
+
2209
1392
  i_chr = rb_intern("chr");
2210
- i_max_nesting = rb_intern("max_nesting");
2211
- i_allow_nan = rb_intern("allow_nan");
2212
- i_symbolize_names = rb_intern("symbolize_names");
2213
- i_object_class = rb_intern("object_class");
2214
- i_array_class = rb_intern("array_class");
2215
- i_decimal_class = rb_intern("decimal_class");
2216
- i_match = rb_intern("match");
2217
- i_match_string = rb_intern("match_string");
2218
- i_deep_const_get = rb_intern("deep_const_get");
2219
1393
  i_aset = rb_intern("[]=");
2220
1394
  i_aref = rb_intern("[]");
2221
1395
  i_leftshift = rb_intern("<<");
2222
1396
  i_new = rb_intern("new");
2223
1397
  i_try_convert = rb_intern("try_convert");
2224
- i_freeze = rb_intern("freeze");
2225
1398
  i_uminus = rb_intern("-@");
1399
+ i_encode = rb_intern("encode");
2226
1400
 
2227
1401
  binary_encindex = rb_ascii8bit_encindex();
2228
1402
  utf8_encindex = rb_utf8_encindex();
1403
+ enc_utf8 = rb_utf8_encoding();
2229
1404
  }
2230
-
2231
- /*
2232
- * Local variables:
2233
- * mode: c
2234
- * c-file-style: ruby
2235
- * indent-tabs-mode: nil
2236
- * End:
2237
- */