json 2.7.2 → 2.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/BSDL +22 -0
  3. data/CHANGES.md +160 -17
  4. data/LEGAL +8 -0
  5. data/README.md +76 -211
  6. data/ext/json/ext/fbuffer/fbuffer.h +178 -95
  7. data/ext/json/ext/generator/extconf.rb +38 -2
  8. data/ext/json/ext/generator/generator.c +1311 -826
  9. data/ext/json/ext/generator/simd.h +112 -0
  10. data/ext/json/ext/parser/extconf.rb +6 -27
  11. data/ext/json/ext/parser/parser.c +1176 -1971
  12. data/ext/json/ext/vendor/fpconv.c +479 -0
  13. data/ext/json/ext/vendor/jeaiii-ltoa.h +267 -0
  14. data/json.gemspec +44 -49
  15. data/lib/json/add/bigdecimal.rb +2 -2
  16. data/lib/json/add/complex.rb +1 -1
  17. data/lib/json/add/core.rb +1 -1
  18. data/lib/json/add/date.rb +1 -1
  19. data/lib/json/add/date_time.rb +1 -1
  20. data/lib/json/add/exception.rb +1 -1
  21. data/lib/json/add/ostruct.rb +1 -1
  22. data/lib/json/add/range.rb +1 -1
  23. data/lib/json/add/rational.rb +1 -1
  24. data/lib/json/add/regexp.rb +1 -1
  25. data/lib/json/add/struct.rb +1 -1
  26. data/lib/json/add/symbol.rb +8 -4
  27. data/lib/json/add/time.rb +3 -10
  28. data/lib/json/common.rb +647 -241
  29. data/lib/json/ext/generator/state.rb +106 -0
  30. data/lib/json/ext.rb +35 -5
  31. data/lib/json/generic_object.rb +1 -1
  32. data/lib/json/{pure → truffle_ruby}/generator.rb +322 -145
  33. data/lib/json/version.rb +3 -7
  34. data/lib/json.rb +16 -21
  35. metadata +18 -22
  36. data/ext/json/ext/generator/depend +0 -1
  37. data/ext/json/ext/generator/generator.h +0 -177
  38. data/ext/json/ext/parser/depend +0 -1
  39. data/ext/json/ext/parser/parser.h +0 -96
  40. data/ext/json/ext/parser/parser.rl +0 -971
  41. data/ext/json/extconf.rb +0 -3
  42. data/lib/json/pure/parser.rb +0 -337
  43. data/lib/json/pure.rb +0 -15
  44. /data/{LICENSE → COPYING} +0 -0
@@ -1,69 +1,343 @@
1
- /* This file is automatically generated from parser.rl by using ragel */
2
- #line 1 "parser.rl"
3
- #include "../fbuffer/fbuffer.h"
4
- #include "parser.h"
5
-
6
- #if defined HAVE_RUBY_ENCODING_H
7
- # define EXC_ENCODING rb_utf8_encoding(),
8
- # ifndef HAVE_RB_ENC_RAISE
9
- static void
10
- enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
11
- {
12
- va_list args;
13
- VALUE mesg;
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
14
3
 
15
- va_start(args, fmt);
16
- mesg = rb_enc_vsprintf(enc, fmt, args);
17
- va_end(args);
4
+ /* shims */
5
+ /* This is the fallback definition from Ruby 3.4 */
18
6
 
19
- rb_exc_raise(rb_exc_new3(exc, mesg));
20
- }
21
- # define rb_enc_raise enc_raise
7
+ #ifndef RBIMPL_STDBOOL_H
8
+ #if defined(__cplusplus)
9
+ # if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
10
+ # include <cstdbool>
22
11
  # endif
23
- #else
24
- # define EXC_ENCODING /* nothing */
25
- # define rb_enc_raise rb_raise
12
+ #elif defined(HAVE_STDBOOL_H)
13
+ # include <stdbool.h>
14
+ #elif !defined(HAVE__BOOL)
15
+ typedef unsigned char _Bool;
16
+ # define bool _Bool
17
+ # define true ((_Bool)+1)
18
+ # define false ((_Bool)+0)
19
+ # define __bool_true_false_are_defined
20
+ #endif
26
21
  #endif
27
22
 
28
- /* unicode */
23
+ #ifndef RB_UNLIKELY
24
+ #define RB_UNLIKELY(expr) expr
25
+ #endif
29
26
 
30
- static const signed char digit_values[256] = {
31
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
34
- -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
35
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
36
- 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
37
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
38
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
39
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
40
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
42
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
43
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44
- -1, -1, -1, -1, -1, -1, -1
27
+ #ifndef RB_LIKELY
28
+ #define RB_LIKELY(expr) expr
29
+ #endif
30
+
31
+ static VALUE mJSON, eNestingError, Encoding_UTF_8;
32
+ static VALUE CNaN, CInfinity, CMinusInfinity;
33
+
34
+ static ID i_chr, i_aset, i_aref,
35
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
36
+
37
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
38
+ sym_decimal_class, sym_on_load;
39
+
40
+ static int binary_encindex;
41
+ static int utf8_encindex;
42
+
43
+ #ifndef HAVE_RB_HASH_BULK_INSERT
44
+ // For TruffleRuby
45
+ void
46
+ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
47
+ {
48
+ long index = 0;
49
+ while (index < count) {
50
+ VALUE name = pairs[index++];
51
+ VALUE value = pairs[index++];
52
+ rb_hash_aset(hash, name, value);
53
+ }
54
+ RB_GC_GUARD(hash);
55
+ }
56
+ #endif
57
+
58
+ #ifndef HAVE_RB_HASH_NEW_CAPA
59
+ #define rb_hash_new_capa(n) rb_hash_new()
60
+ #endif
61
+
62
+
63
+ /* name cache */
64
+
65
+ #include <string.h>
66
+ #include <ctype.h>
67
+
68
+ // Object names are likely to be repeated, and are frozen.
69
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
70
+ // and save much more expensive lookups into the global fstring table.
71
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
72
+ // to be able to fit safely on the stack.
73
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
74
+ // performance.
75
+ #define JSON_RVALUE_CACHE_CAPA 63
76
+ typedef struct rvalue_cache_struct {
77
+ int length;
78
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
79
+ } rvalue_cache;
80
+
81
+ static rb_encoding *enc_utf8;
82
+
83
+ #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
84
+
85
+ static inline VALUE build_interned_string(const char *str, const long length)
86
+ {
87
+ # ifdef HAVE_RB_ENC_INTERNED_STR
88
+ return rb_enc_interned_str(str, length, enc_utf8);
89
+ # else
90
+ VALUE rstring = rb_utf8_str_new(str, length);
91
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
92
+ # endif
93
+ }
94
+
95
+ static inline VALUE build_symbol(const char *str, const long length)
96
+ {
97
+ return rb_str_intern(build_interned_string(str, length));
98
+ }
99
+
100
+ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
101
+ {
102
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
103
+ cache->length++;
104
+ cache->entries[index] = rstring;
105
+ }
106
+
107
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
108
+ {
109
+ long rstring_length = RSTRING_LEN(rstring);
110
+ if (length == rstring_length) {
111
+ return memcmp(str, RSTRING_PTR(rstring), length);
112
+ } else {
113
+ return (int)(length - rstring_length);
114
+ }
115
+ }
116
+
117
+ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
118
+ {
119
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
120
+ // Common names aren't likely to be very long. So we just don't
121
+ // cache names above an arbitrary threshold.
122
+ return Qfalse;
123
+ }
124
+
125
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
126
+ // Simple heuristic, if the first character isn't a letter,
127
+ // we're much less likely to see this string again.
128
+ // We mostly want to cache strings that are likely to be repeated.
129
+ return Qfalse;
130
+ }
131
+
132
+ int low = 0;
133
+ int high = cache->length - 1;
134
+ int mid = 0;
135
+ int last_cmp = 0;
136
+
137
+ while (low <= high) {
138
+ mid = (high + low) >> 1;
139
+ VALUE entry = cache->entries[mid];
140
+ last_cmp = rstring_cache_cmp(str, length, entry);
141
+
142
+ if (last_cmp == 0) {
143
+ return entry;
144
+ } else if (last_cmp > 0) {
145
+ low = mid + 1;
146
+ } else {
147
+ high = mid - 1;
148
+ }
149
+ }
150
+
151
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
152
+ // We assume the overwhelming majority of names don't need to be escaped.
153
+ // But if they do, we have to fallback to the slow path.
154
+ return Qfalse;
155
+ }
156
+
157
+ VALUE rstring = build_interned_string(str, length);
158
+
159
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
160
+ if (last_cmp > 0) {
161
+ mid += 1;
162
+ }
163
+
164
+ rvalue_cache_insert_at(cache, mid, rstring);
165
+ }
166
+ return rstring;
167
+ }
168
+
169
+ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
170
+ {
171
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
172
+ // Common names aren't likely to be very long. So we just don't
173
+ // cache names above an arbitrary threshold.
174
+ return Qfalse;
175
+ }
176
+
177
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
178
+ // Simple heuristic, if the first character isn't a letter,
179
+ // we're much less likely to see this string again.
180
+ // We mostly want to cache strings that are likely to be repeated.
181
+ return Qfalse;
182
+ }
183
+
184
+ int low = 0;
185
+ int high = cache->length - 1;
186
+ int mid = 0;
187
+ int last_cmp = 0;
188
+
189
+ while (low <= high) {
190
+ mid = (high + low) >> 1;
191
+ VALUE entry = cache->entries[mid];
192
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
193
+
194
+ if (last_cmp == 0) {
195
+ return entry;
196
+ } else if (last_cmp > 0) {
197
+ low = mid + 1;
198
+ } else {
199
+ high = mid - 1;
200
+ }
201
+ }
202
+
203
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
204
+ // We assume the overwhelming majority of names don't need to be escaped.
205
+ // But if they do, we have to fallback to the slow path.
206
+ return Qfalse;
207
+ }
208
+
209
+ VALUE rsymbol = build_symbol(str, length);
210
+
211
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
212
+ if (last_cmp > 0) {
213
+ mid += 1;
214
+ }
215
+
216
+ rvalue_cache_insert_at(cache, mid, rsymbol);
217
+ }
218
+ return rsymbol;
219
+ }
220
+
221
+ /* rvalue stack */
222
+
223
+ #define RVALUE_STACK_INITIAL_CAPA 128
224
+
225
+ enum rvalue_stack_type {
226
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
227
+ RVALUE_STACK_STACK_ALLOCATED = 1,
45
228
  };
46
229
 
47
- static UTF32 unescape_unicode(const unsigned char *p)
230
+ typedef struct rvalue_stack_struct {
231
+ enum rvalue_stack_type type;
232
+ long capa;
233
+ long head;
234
+ VALUE *ptr;
235
+ } rvalue_stack;
236
+
237
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
238
+
239
+ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
48
240
  {
49
- signed char b;
50
- UTF32 result = 0;
51
- b = digit_values[p[0]];
52
- if (b < 0) return UNI_REPLACEMENT_CHAR;
53
- result = (result << 4) | (unsigned char)b;
54
- b = digit_values[p[1]];
55
- if (b < 0) return UNI_REPLACEMENT_CHAR;
56
- result = (result << 4) | (unsigned char)b;
57
- b = digit_values[p[2]];
58
- if (b < 0) return UNI_REPLACEMENT_CHAR;
59
- result = (result << 4) | (unsigned char)b;
60
- b = digit_values[p[3]];
61
- if (b < 0) return UNI_REPLACEMENT_CHAR;
62
- result = (result << 4) | (unsigned char)b;
63
- return result;
241
+ long required = stack->capa * 2;
242
+
243
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
244
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
245
+ } else {
246
+ REALLOC_N(stack->ptr, VALUE, required);
247
+ stack->capa = required;
248
+ }
249
+ return stack;
64
250
  }
65
251
 
66
- static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
252
+ static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
253
+ {
254
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
255
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
256
+ }
257
+ stack->ptr[stack->head] = value;
258
+ stack->head++;
259
+ return value;
260
+ }
261
+
262
+ static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
263
+ {
264
+ return stack->ptr + (stack->head - count);
265
+ }
266
+
267
+ static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
268
+ {
269
+ stack->head -= count;
270
+ }
271
+
272
+ static void rvalue_stack_mark(void *ptr)
273
+ {
274
+ rvalue_stack *stack = (rvalue_stack *)ptr;
275
+ long index;
276
+ for (index = 0; index < stack->head; index++) {
277
+ rb_gc_mark(stack->ptr[index]);
278
+ }
279
+ }
280
+
281
+ static void rvalue_stack_free(void *ptr)
282
+ {
283
+ rvalue_stack *stack = (rvalue_stack *)ptr;
284
+ if (stack) {
285
+ ruby_xfree(stack->ptr);
286
+ ruby_xfree(stack);
287
+ }
288
+ }
289
+
290
+ static size_t rvalue_stack_memsize(const void *ptr)
291
+ {
292
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
293
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
294
+ }
295
+
296
+ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
297
+ "JSON::Ext::Parser/rvalue_stack",
298
+ {
299
+ .dmark = rvalue_stack_mark,
300
+ .dfree = rvalue_stack_free,
301
+ .dsize = rvalue_stack_memsize,
302
+ },
303
+ 0, 0,
304
+ RUBY_TYPED_FREE_IMMEDIATELY,
305
+ };
306
+
307
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
308
+ {
309
+ rvalue_stack *stack;
310
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
311
+ *stack_ref = stack;
312
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
313
+
314
+ stack->capa = old_stack->capa << 1;
315
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
316
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
317
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
318
+ return stack;
319
+ }
320
+
321
+ static void rvalue_stack_eagerly_release(VALUE handle)
322
+ {
323
+ if (handle) {
324
+ rvalue_stack *stack;
325
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
326
+ RTYPEDDATA_DATA(handle) = NULL;
327
+ rvalue_stack_free(stack);
328
+ }
329
+ }
330
+
331
+
332
+ #ifndef HAVE_STRNLEN
333
+ static size_t strnlen(const char *s, size_t maxlen)
334
+ {
335
+ char *p;
336
+ return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
337
+ }
338
+ #endif
339
+
340
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
67
341
  {
68
342
  int len = 1;
69
343
  if (ch <= 0x7F) {
@@ -89,1677 +363,786 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
89
363
  return len;
90
364
  }
91
365
 
92
- static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
93
- static VALUE CNaN, CInfinity, CMinusInfinity;
366
+ typedef struct JSON_ParserStruct {
367
+ VALUE on_load_proc;
368
+ VALUE decimal_class;
369
+ ID decimal_method_id;
370
+ int max_nesting;
371
+ bool allow_nan;
372
+ bool allow_trailing_comma;
373
+ bool parsing_name;
374
+ bool symbolize_names;
375
+ bool freeze;
376
+ } JSON_ParserConfig;
377
+
378
+ typedef struct JSON_ParserStateStruct {
379
+ VALUE stack_handle;
380
+ const char *start;
381
+ const char *cursor;
382
+ const char *end;
383
+ rvalue_stack *stack;
384
+ rvalue_cache name_cache;
385
+ int in_array;
386
+ int current_nesting;
387
+ } JSON_ParserState;
388
+
389
+
390
+ #define PARSE_ERROR_FRAGMENT_LEN 32
391
+ #ifdef RBIMPL_ATTR_NORETURN
392
+ RBIMPL_ATTR_NORETURN()
393
+ #endif
394
+ static void raise_parse_error(const char *format, JSON_ParserState *state)
395
+ {
396
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
94
397
 
95
- static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
96
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
97
- i_object_class, i_array_class, i_decimal_class, i_key_p,
98
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
99
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
398
+ const char *cursor = state->cursor;
399
+ long column = 0;
400
+ long line = 1;
100
401
 
402
+ while (cursor >= state->start) {
403
+ if (*cursor-- == '\n') {
404
+ break;
405
+ }
406
+ column++;
407
+ }
101
408
 
102
- #line 125 "parser.rl"
409
+ while (cursor >= state->start) {
410
+ if (*cursor-- == '\n') {
411
+ line++;
412
+ }
413
+ }
103
414
 
415
+ const char *ptr = "EOF";
416
+ if (state->cursor && state->cursor < state->end) {
417
+ ptr = state->cursor;
418
+ size_t len = 0;
419
+ while (len < PARSE_ERROR_FRAGMENT_LEN) {
420
+ char ch = ptr[len];
421
+ if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
422
+ break;
423
+ }
424
+ len++;
425
+ }
104
426
 
427
+ if (len) {
428
+ buffer[0] = '\'';
429
+ MEMCPY(buffer + 1, ptr, char, len);
105
430
 
106
- #line 107 "parser.c"
107
- enum {JSON_object_start = 1};
108
- enum {JSON_object_first_final = 27};
109
- enum {JSON_object_error = 0};
431
+ while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
432
+ len--;
433
+ }
110
434
 
111
- enum {JSON_object_en_main = 1};
435
+ if (buffer[len] >= 0xC0) { // multibyte character start
436
+ len--;
437
+ }
112
438
 
439
+ buffer[len + 1] = '\'';
440
+ buffer[len + 2] = '\0';
441
+ ptr = (const char *)buffer;
442
+ }
443
+ }
113
444
 
114
- #line 167 "parser.rl"
445
+ VALUE msg = rb_sprintf(format, ptr);
446
+ VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
447
+ RB_GC_GUARD(msg);
115
448
 
449
+ VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
450
+ rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
451
+ rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
452
+ rb_exc_raise(exc);
453
+ }
116
454
 
117
- static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
455
+ #ifdef RBIMPL_ATTR_NORETURN
456
+ RBIMPL_ATTR_NORETURN()
457
+ #endif
458
+ static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
118
459
  {
119
- int cs = EVIL;
120
- VALUE last_name = Qnil;
121
- VALUE object_class = json->object_class;
460
+ state->cursor = at;
461
+ raise_parse_error(format, state);
462
+ }
122
463
 
123
- if (json->max_nesting && current_nesting > json->max_nesting) {
124
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
125
- }
464
+ /* unicode */
126
465
 
127
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
128
-
129
-
130
- #line 131 "parser.c"
131
- {
132
- cs = JSON_object_start;
133
- }
134
-
135
- #line 182 "parser.rl"
136
-
137
- #line 138 "parser.c"
138
- {
139
- if ( p == pe )
140
- goto _test_eof;
141
- switch ( cs )
142
- {
143
- case 1:
144
- if ( (*p) == 123 )
145
- goto st2;
146
- goto st0;
147
- st0:
148
- cs = 0;
149
- goto _out;
150
- st2:
151
- if ( ++p == pe )
152
- goto _test_eof2;
153
- case 2:
154
- switch( (*p) ) {
155
- case 13: goto st2;
156
- case 32: goto st2;
157
- case 34: goto tr2;
158
- case 47: goto st23;
159
- case 125: goto tr4;
160
- }
161
- if ( 9 <= (*p) && (*p) <= 10 )
162
- goto st2;
163
- goto st0;
164
- tr2:
165
- #line 149 "parser.rl"
166
- {
167
- char *np;
168
- json->parsing_name = 1;
169
- np = JSON_parse_string(json, p, pe, &last_name);
170
- json->parsing_name = 0;
171
- if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;}
172
- }
173
- goto st3;
174
- st3:
175
- if ( ++p == pe )
176
- goto _test_eof3;
177
- case 3:
178
- #line 179 "parser.c"
179
- switch( (*p) ) {
180
- case 13: goto st3;
181
- case 32: goto st3;
182
- case 47: goto st4;
183
- case 58: goto st8;
184
- }
185
- if ( 9 <= (*p) && (*p) <= 10 )
186
- goto st3;
187
- goto st0;
188
- st4:
189
- if ( ++p == pe )
190
- goto _test_eof4;
191
- case 4:
192
- switch( (*p) ) {
193
- case 42: goto st5;
194
- case 47: goto st7;
195
- }
196
- goto st0;
197
- st5:
198
- if ( ++p == pe )
199
- goto _test_eof5;
200
- case 5:
201
- if ( (*p) == 42 )
202
- goto st6;
203
- goto st5;
204
- st6:
205
- if ( ++p == pe )
206
- goto _test_eof6;
207
- case 6:
208
- switch( (*p) ) {
209
- case 42: goto st6;
210
- case 47: goto st3;
211
- }
212
- goto st5;
213
- st7:
214
- if ( ++p == pe )
215
- goto _test_eof7;
216
- case 7:
217
- if ( (*p) == 10 )
218
- goto st3;
219
- goto st7;
220
- st8:
221
- if ( ++p == pe )
222
- goto _test_eof8;
223
- case 8:
224
- switch( (*p) ) {
225
- case 13: goto st8;
226
- case 32: goto st8;
227
- case 34: goto tr11;
228
- case 45: goto tr11;
229
- case 47: goto st19;
230
- case 73: goto tr11;
231
- case 78: goto tr11;
232
- case 91: goto tr11;
233
- case 102: goto tr11;
234
- case 110: goto tr11;
235
- case 116: goto tr11;
236
- case 123: goto tr11;
237
- }
238
- if ( (*p) > 10 ) {
239
- if ( 48 <= (*p) && (*p) <= 57 )
240
- goto tr11;
241
- } else if ( (*p) >= 9 )
242
- goto st8;
243
- goto st0;
244
- tr11:
245
- #line 133 "parser.rl"
246
- {
247
- VALUE v = Qnil;
248
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
249
- if (np == NULL) {
250
- p--; {p++; cs = 9; goto _out;}
251
- } else {
252
- if (NIL_P(json->object_class)) {
253
- OBJ_FREEZE(last_name);
254
- rb_hash_aset(*result, last_name, v);
255
- } else {
256
- rb_funcall(*result, i_aset, 2, last_name, v);
257
- }
258
- {p = (( np))-1;}
259
- }
260
- }
261
- goto st9;
262
- st9:
263
- if ( ++p == pe )
264
- goto _test_eof9;
265
- case 9:
266
- #line 267 "parser.c"
267
- switch( (*p) ) {
268
- case 13: goto st9;
269
- case 32: goto st9;
270
- case 44: goto st10;
271
- case 47: goto st15;
272
- case 125: goto tr4;
273
- }
274
- if ( 9 <= (*p) && (*p) <= 10 )
275
- goto st9;
276
- goto st0;
277
- st10:
278
- if ( ++p == pe )
279
- goto _test_eof10;
280
- case 10:
281
- switch( (*p) ) {
282
- case 13: goto st10;
283
- case 32: goto st10;
284
- case 34: goto tr2;
285
- case 47: goto st11;
286
- }
287
- if ( 9 <= (*p) && (*p) <= 10 )
288
- goto st10;
289
- goto st0;
290
- st11:
291
- if ( ++p == pe )
292
- goto _test_eof11;
293
- case 11:
294
- switch( (*p) ) {
295
- case 42: goto st12;
296
- case 47: goto st14;
297
- }
298
- goto st0;
299
- st12:
300
- if ( ++p == pe )
301
- goto _test_eof12;
302
- case 12:
303
- if ( (*p) == 42 )
304
- goto st13;
305
- goto st12;
306
- st13:
307
- if ( ++p == pe )
308
- goto _test_eof13;
309
- case 13:
310
- switch( (*p) ) {
311
- case 42: goto st13;
312
- case 47: goto st10;
313
- }
314
- goto st12;
315
- st14:
316
- if ( ++p == pe )
317
- goto _test_eof14;
318
- case 14:
319
- if ( (*p) == 10 )
320
- goto st10;
321
- goto st14;
322
- st15:
323
- if ( ++p == pe )
324
- goto _test_eof15;
325
- case 15:
326
- switch( (*p) ) {
327
- case 42: goto st16;
328
- case 47: goto st18;
329
- }
330
- goto st0;
331
- st16:
332
- if ( ++p == pe )
333
- goto _test_eof16;
334
- case 16:
335
- if ( (*p) == 42 )
336
- goto st17;
337
- goto st16;
338
- st17:
339
- if ( ++p == pe )
340
- goto _test_eof17;
341
- case 17:
342
- switch( (*p) ) {
343
- case 42: goto st17;
344
- case 47: goto st9;
345
- }
346
- goto st16;
347
- st18:
348
- if ( ++p == pe )
349
- goto _test_eof18;
350
- case 18:
351
- if ( (*p) == 10 )
352
- goto st9;
353
- goto st18;
354
- tr4:
355
- #line 157 "parser.rl"
356
- { p--; {p++; cs = 27; goto _out;} }
357
- goto st27;
358
- st27:
359
- if ( ++p == pe )
360
- goto _test_eof27;
361
- case 27:
362
- #line 363 "parser.c"
363
- goto st0;
364
- st19:
365
- if ( ++p == pe )
366
- goto _test_eof19;
367
- case 19:
368
- switch( (*p) ) {
369
- case 42: goto st20;
370
- case 47: goto st22;
371
- }
372
- goto st0;
373
- st20:
374
- if ( ++p == pe )
375
- goto _test_eof20;
376
- case 20:
377
- if ( (*p) == 42 )
378
- goto st21;
379
- goto st20;
380
- st21:
381
- if ( ++p == pe )
382
- goto _test_eof21;
383
- case 21:
384
- switch( (*p) ) {
385
- case 42: goto st21;
386
- case 47: goto st8;
387
- }
388
- goto st20;
389
- st22:
390
- if ( ++p == pe )
391
- goto _test_eof22;
392
- case 22:
393
- if ( (*p) == 10 )
394
- goto st8;
395
- goto st22;
396
- st23:
397
- if ( ++p == pe )
398
- goto _test_eof23;
399
- case 23:
400
- switch( (*p) ) {
401
- case 42: goto st24;
402
- case 47: goto st26;
403
- }
404
- goto st0;
405
- st24:
406
- if ( ++p == pe )
407
- goto _test_eof24;
408
- case 24:
409
- if ( (*p) == 42 )
410
- goto st25;
411
- goto st24;
412
- st25:
413
- if ( ++p == pe )
414
- goto _test_eof25;
415
- case 25:
416
- switch( (*p) ) {
417
- case 42: goto st25;
418
- case 47: goto st2;
419
- }
420
- goto st24;
421
- st26:
422
- if ( ++p == pe )
423
- goto _test_eof26;
424
- case 26:
425
- if ( (*p) == 10 )
426
- goto st2;
427
- goto st26;
428
- }
429
- _test_eof2: cs = 2; goto _test_eof;
430
- _test_eof3: cs = 3; goto _test_eof;
431
- _test_eof4: cs = 4; goto _test_eof;
432
- _test_eof5: cs = 5; goto _test_eof;
433
- _test_eof6: cs = 6; goto _test_eof;
434
- _test_eof7: cs = 7; goto _test_eof;
435
- _test_eof8: cs = 8; goto _test_eof;
436
- _test_eof9: cs = 9; goto _test_eof;
437
- _test_eof10: cs = 10; goto _test_eof;
438
- _test_eof11: cs = 11; goto _test_eof;
439
- _test_eof12: cs = 12; goto _test_eof;
440
- _test_eof13: cs = 13; goto _test_eof;
441
- _test_eof14: cs = 14; goto _test_eof;
442
- _test_eof15: cs = 15; goto _test_eof;
443
- _test_eof16: cs = 16; goto _test_eof;
444
- _test_eof17: cs = 17; goto _test_eof;
445
- _test_eof18: cs = 18; goto _test_eof;
446
- _test_eof27: cs = 27; goto _test_eof;
447
- _test_eof19: cs = 19; goto _test_eof;
448
- _test_eof20: cs = 20; goto _test_eof;
449
- _test_eof21: cs = 21; goto _test_eof;
450
- _test_eof22: cs = 22; goto _test_eof;
451
- _test_eof23: cs = 23; goto _test_eof;
452
- _test_eof24: cs = 24; goto _test_eof;
453
- _test_eof25: cs = 25; goto _test_eof;
454
- _test_eof26: cs = 26; goto _test_eof;
455
-
456
- _test_eof: {}
457
- _out: {}
458
- }
459
-
460
- #line 183 "parser.rl"
461
-
462
- if (cs >= JSON_object_first_final) {
463
- if (json->create_additions) {
464
- VALUE klassname;
465
- if (NIL_P(json->object_class)) {
466
- klassname = rb_hash_aref(*result, json->create_id);
467
- } else {
468
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
466
+ static const signed char digit_values[256] = {
467
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
468
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
469
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
470
+ -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
471
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
472
+ 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
473
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
474
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
475
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
476
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
477
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
478
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
479
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
480
+ -1, -1, -1, -1, -1, -1, -1
481
+ };
482
+
483
+ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
484
+ {
485
+ signed char b;
486
+ uint32_t result = 0;
487
+ b = digit_values[p[0]];
488
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
489
+ result = (result << 4) | (unsigned char)b;
490
+ b = digit_values[p[1]];
491
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
492
+ result = (result << 4) | (unsigned char)b;
493
+ b = digit_values[p[2]];
494
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
495
+ result = (result << 4) | (unsigned char)b;
496
+ b = digit_values[p[3]];
497
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
498
+ result = (result << 4) | (unsigned char)b;
499
+ return result;
500
+ }
501
+
502
+ #define GET_PARSER_CONFIG \
503
+ JSON_ParserConfig *config; \
504
+ TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
505
+
506
+ static const rb_data_type_t JSON_ParserConfig_type;
507
+
508
+ static const bool whitespace[256] = {
509
+ [' '] = 1,
510
+ ['\t'] = 1,
511
+ ['\n'] = 1,
512
+ ['\r'] = 1,
513
+ ['/'] = 1,
514
+ };
515
+
516
+ static void
517
+ json_eat_comments(JSON_ParserState *state)
518
+ {
519
+ if (state->cursor + 1 < state->end) {
520
+ switch(state->cursor[1]) {
521
+ case '/': {
522
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
523
+ if (!state->cursor) {
524
+ state->cursor = state->end;
525
+ } else {
526
+ state->cursor++;
527
+ }
528
+ break;
469
529
  }
470
- if (!NIL_P(klassname)) {
471
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
472
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
473
- *result = rb_funcall(klass, i_json_create, 1, *result);
530
+ case '*': {
531
+ state->cursor += 2;
532
+ while (true) {
533
+ state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
534
+ if (!state->cursor) {
535
+ raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
536
+ } else {
537
+ state->cursor++;
538
+ if (state->cursor < state->end && *state->cursor == '/') {
539
+ state->cursor++;
540
+ break;
541
+ }
542
+ }
474
543
  }
544
+ break;
475
545
  }
546
+ default:
547
+ raise_parse_error("unexpected token %s", state);
548
+ break;
476
549
  }
477
- return p + 1;
478
550
  } else {
479
- return NULL;
551
+ raise_parse_error("unexpected token %s", state);
480
552
  }
481
553
  }
482
554
 
555
+ static inline void
556
+ json_eat_whitespace(JSON_ParserState *state)
557
+ {
558
+ while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
559
+ if (RB_LIKELY(*state->cursor != '/')) {
560
+ state->cursor++;
561
+ } else {
562
+ json_eat_comments(state);
563
+ }
564
+ }
565
+ }
483
566
 
567
+ static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
568
+ {
569
+ if (symbolize) {
570
+ intern = true;
571
+ }
572
+ VALUE result;
573
+ # ifdef HAVE_RB_ENC_INTERNED_STR
574
+ if (intern) {
575
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
576
+ } else {
577
+ result = rb_utf8_str_new(start, (long)(end - start));
578
+ }
579
+ # else
580
+ result = rb_utf8_str_new(start, (long)(end - start));
581
+ if (intern) {
582
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
583
+ }
584
+ # endif
484
585
 
485
- #line 486 "parser.c"
486
- enum {JSON_value_start = 1};
487
- enum {JSON_value_first_final = 29};
488
- enum {JSON_value_error = 0};
489
-
490
- enum {JSON_value_en_main = 1};
586
+ if (symbolize) {
587
+ result = rb_str_intern(result);
588
+ }
491
589
 
590
+ return result;
591
+ }
492
592
 
493
- #line 283 "parser.rl"
593
+ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
594
+ {
595
+ size_t bufferSize = stringEnd - string;
494
596
 
597
+ if (is_name && state->in_array) {
598
+ VALUE cached_key;
599
+ if (RB_UNLIKELY(symbolize)) {
600
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
601
+ } else {
602
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
603
+ }
495
604
 
496
- static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
497
- {
498
- int cs = EVIL;
499
-
500
-
501
- #line 502 "parser.c"
502
- {
503
- cs = JSON_value_start;
504
- }
505
-
506
- #line 290 "parser.rl"
507
-
508
- #line 509 "parser.c"
509
- {
510
- if ( p == pe )
511
- goto _test_eof;
512
- switch ( cs )
513
- {
514
- st1:
515
- if ( ++p == pe )
516
- goto _test_eof1;
517
- case 1:
518
- switch( (*p) ) {
519
- case 13: goto st1;
520
- case 32: goto st1;
521
- case 34: goto tr2;
522
- case 45: goto tr3;
523
- case 47: goto st6;
524
- case 73: goto st10;
525
- case 78: goto st17;
526
- case 91: goto tr7;
527
- case 102: goto st19;
528
- case 110: goto st23;
529
- case 116: goto st26;
530
- case 123: goto tr11;
531
- }
532
- if ( (*p) > 10 ) {
533
- if ( 48 <= (*p) && (*p) <= 57 )
534
- goto tr3;
535
- } else if ( (*p) >= 9 )
536
- goto st1;
537
- goto st0;
538
- st0:
539
- cs = 0;
540
- goto _out;
541
- tr2:
542
- #line 235 "parser.rl"
543
- {
544
- char *np = JSON_parse_string(json, p, pe, result);
545
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
546
- }
547
- goto st29;
548
- tr3:
549
- #line 240 "parser.rl"
550
- {
551
- char *np;
552
- if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
553
- if (json->allow_nan) {
554
- *result = CMinusInfinity;
555
- {p = (( p + 10))-1;}
556
- p--; {p++; cs = 29; goto _out;}
557
- } else {
558
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
559
- }
605
+ if (RB_LIKELY(cached_key)) {
606
+ return cached_key;
560
607
  }
561
- np = JSON_parse_float(json, p, pe, result);
562
- if (np != NULL) {p = (( np))-1;}
563
- np = JSON_parse_integer(json, p, pe, result);
564
- if (np != NULL) {p = (( np))-1;}
565
- p--; {p++; cs = 29; goto _out;}
566
- }
567
- goto st29;
568
- tr7:
569
- #line 258 "parser.rl"
570
- {
571
- char *np;
572
- np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
573
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
574
608
  }
575
- goto st29;
576
- tr11:
577
- #line 264 "parser.rl"
578
- {
579
- char *np;
580
- np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
581
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
582
- }
583
- goto st29;
584
- tr25:
585
- #line 228 "parser.rl"
586
- {
587
- if (json->allow_nan) {
588
- *result = CInfinity;
609
+
610
+ return build_string(string, stringEnd, intern, symbolize);
611
+ }
612
+
613
+ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
614
+ {
615
+ size_t bufferSize = stringEnd - string;
616
+ const char *p = string, *pe = string, *unescape, *bufferStart;
617
+ char *buffer;
618
+ int unescape_len;
619
+ char buf[4];
620
+
621
+ if (is_name && state->in_array) {
622
+ VALUE cached_key;
623
+ if (RB_UNLIKELY(symbolize)) {
624
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
589
625
  } else {
590
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 7);
626
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
627
+ }
628
+
629
+ if (RB_LIKELY(cached_key)) {
630
+ return cached_key;
591
631
  }
592
632
  }
593
- goto st29;
594
- tr27:
595
- #line 221 "parser.rl"
596
- {
597
- if (json->allow_nan) {
598
- *result = CNaN;
599
- } else {
600
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 2);
633
+
634
+ VALUE result = rb_str_buf_new(bufferSize);
635
+ rb_enc_associate_index(result, utf8_encindex);
636
+ buffer = RSTRING_PTR(result);
637
+ bufferStart = buffer;
638
+
639
+ while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
640
+ unescape = (char *) "?";
641
+ unescape_len = 1;
642
+ if (pe > p) {
643
+ MEMCPY(buffer, p, char, pe - p);
644
+ buffer += pe - p;
601
645
  }
646
+ switch (*++pe) {
647
+ case 'n':
648
+ unescape = (char *) "\n";
649
+ break;
650
+ case 'r':
651
+ unescape = (char *) "\r";
652
+ break;
653
+ case 't':
654
+ unescape = (char *) "\t";
655
+ break;
656
+ case '"':
657
+ unescape = (char *) "\"";
658
+ break;
659
+ case '\\':
660
+ unescape = (char *) "\\";
661
+ break;
662
+ case 'b':
663
+ unescape = (char *) "\b";
664
+ break;
665
+ case 'f':
666
+ unescape = (char *) "\f";
667
+ break;
668
+ case 'u':
669
+ if (pe > stringEnd - 5) {
670
+ raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
671
+ } else {
672
+ uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
673
+ pe += 3;
674
+ /* To handle values above U+FFFF, we take a sequence of
675
+ * \uXXXX escapes in the U+D800..U+DBFF then
676
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
677
+ * to make a 20-bit number, then add 0x10000 to get the
678
+ * final codepoint.
679
+ *
680
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
681
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
682
+ * Area".
683
+ */
684
+ if ((ch & 0xFC00) == 0xD800) {
685
+ pe++;
686
+ if (pe > stringEnd - 6) {
687
+ raise_parse_error_at("incomplete surrogate pair at %s", state, p);
688
+ }
689
+ if (pe[0] == '\\' && pe[1] == 'u') {
690
+ uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
691
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
692
+ | (sur & 0x3FF));
693
+ pe += 5;
694
+ } else {
695
+ unescape = (char *) "?";
696
+ break;
697
+ }
698
+ }
699
+ unescape_len = convert_UTF32_to_UTF8(buf, ch);
700
+ unescape = buf;
701
+ }
702
+ break;
703
+ default:
704
+ p = pe;
705
+ continue;
706
+ }
707
+ MEMCPY(buffer, unescape, char, unescape_len);
708
+ buffer += unescape_len;
709
+ p = ++pe;
602
710
  }
603
- goto st29;
604
- tr31:
605
- #line 215 "parser.rl"
606
- {
607
- *result = Qfalse;
711
+
712
+ if (stringEnd > p) {
713
+ MEMCPY(buffer, p, char, stringEnd - p);
714
+ buffer += stringEnd - p;
608
715
  }
609
- goto st29;
610
- tr34:
611
- #line 212 "parser.rl"
612
- {
613
- *result = Qnil;
716
+ rb_str_set_len(result, buffer - bufferStart);
717
+
718
+ if (symbolize) {
719
+ result = rb_str_intern(result);
720
+ } else if (intern) {
721
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
614
722
  }
615
- goto st29;
616
- tr37:
617
- #line 218 "parser.rl"
618
- {
619
- *result = Qtrue;
723
+
724
+ return result;
725
+ }
726
+
727
+ #define MAX_FAST_INTEGER_SIZE 18
728
+ static inline VALUE fast_decode_integer(const char *p, const char *pe)
729
+ {
730
+ bool negative = false;
731
+ if (*p == '-') {
732
+ negative = true;
733
+ p++;
620
734
  }
621
- goto st29;
622
- st29:
623
- if ( ++p == pe )
624
- goto _test_eof29;
625
- case 29:
626
- #line 270 "parser.rl"
627
- { p--; {p++; cs = 29; goto _out;} }
628
- #line 629 "parser.c"
629
- switch( (*p) ) {
630
- case 13: goto st29;
631
- case 32: goto st29;
632
- case 47: goto st2;
633
- }
634
- if ( 9 <= (*p) && (*p) <= 10 )
635
- goto st29;
636
- goto st0;
637
- st2:
638
- if ( ++p == pe )
639
- goto _test_eof2;
640
- case 2:
641
- switch( (*p) ) {
642
- case 42: goto st3;
643
- case 47: goto st5;
644
- }
645
- goto st0;
646
- st3:
647
- if ( ++p == pe )
648
- goto _test_eof3;
649
- case 3:
650
- if ( (*p) == 42 )
651
- goto st4;
652
- goto st3;
653
- st4:
654
- if ( ++p == pe )
655
- goto _test_eof4;
656
- case 4:
657
- switch( (*p) ) {
658
- case 42: goto st4;
659
- case 47: goto st29;
660
- }
661
- goto st3;
662
- st5:
663
- if ( ++p == pe )
664
- goto _test_eof5;
665
- case 5:
666
- if ( (*p) == 10 )
667
- goto st29;
668
- goto st5;
669
- st6:
670
- if ( ++p == pe )
671
- goto _test_eof6;
672
- case 6:
673
- switch( (*p) ) {
674
- case 42: goto st7;
675
- case 47: goto st9;
676
- }
677
- goto st0;
678
- st7:
679
- if ( ++p == pe )
680
- goto _test_eof7;
681
- case 7:
682
- if ( (*p) == 42 )
683
- goto st8;
684
- goto st7;
685
- st8:
686
- if ( ++p == pe )
687
- goto _test_eof8;
688
- case 8:
689
- switch( (*p) ) {
690
- case 42: goto st8;
691
- case 47: goto st1;
692
- }
693
- goto st7;
694
- st9:
695
- if ( ++p == pe )
696
- goto _test_eof9;
697
- case 9:
698
- if ( (*p) == 10 )
699
- goto st1;
700
- goto st9;
701
- st10:
702
- if ( ++p == pe )
703
- goto _test_eof10;
704
- case 10:
705
- if ( (*p) == 110 )
706
- goto st11;
707
- goto st0;
708
- st11:
709
- if ( ++p == pe )
710
- goto _test_eof11;
711
- case 11:
712
- if ( (*p) == 102 )
713
- goto st12;
714
- goto st0;
715
- st12:
716
- if ( ++p == pe )
717
- goto _test_eof12;
718
- case 12:
719
- if ( (*p) == 105 )
720
- goto st13;
721
- goto st0;
722
- st13:
723
- if ( ++p == pe )
724
- goto _test_eof13;
725
- case 13:
726
- if ( (*p) == 110 )
727
- goto st14;
728
- goto st0;
729
- st14:
730
- if ( ++p == pe )
731
- goto _test_eof14;
732
- case 14:
733
- if ( (*p) == 105 )
734
- goto st15;
735
- goto st0;
736
- st15:
737
- if ( ++p == pe )
738
- goto _test_eof15;
739
- case 15:
740
- if ( (*p) == 116 )
741
- goto st16;
742
- goto st0;
743
- st16:
744
- if ( ++p == pe )
745
- goto _test_eof16;
746
- case 16:
747
- if ( (*p) == 121 )
748
- goto tr25;
749
- goto st0;
750
- st17:
751
- if ( ++p == pe )
752
- goto _test_eof17;
753
- case 17:
754
- if ( (*p) == 97 )
755
- goto st18;
756
- goto st0;
757
- st18:
758
- if ( ++p == pe )
759
- goto _test_eof18;
760
- case 18:
761
- if ( (*p) == 78 )
762
- goto tr27;
763
- goto st0;
764
- st19:
765
- if ( ++p == pe )
766
- goto _test_eof19;
767
- case 19:
768
- if ( (*p) == 97 )
769
- goto st20;
770
- goto st0;
771
- st20:
772
- if ( ++p == pe )
773
- goto _test_eof20;
774
- case 20:
775
- if ( (*p) == 108 )
776
- goto st21;
777
- goto st0;
778
- st21:
779
- if ( ++p == pe )
780
- goto _test_eof21;
781
- case 21:
782
- if ( (*p) == 115 )
783
- goto st22;
784
- goto st0;
785
- st22:
786
- if ( ++p == pe )
787
- goto _test_eof22;
788
- case 22:
789
- if ( (*p) == 101 )
790
- goto tr31;
791
- goto st0;
792
- st23:
793
- if ( ++p == pe )
794
- goto _test_eof23;
795
- case 23:
796
- if ( (*p) == 117 )
797
- goto st24;
798
- goto st0;
799
- st24:
800
- if ( ++p == pe )
801
- goto _test_eof24;
802
- case 24:
803
- if ( (*p) == 108 )
804
- goto st25;
805
- goto st0;
806
- st25:
807
- if ( ++p == pe )
808
- goto _test_eof25;
809
- case 25:
810
- if ( (*p) == 108 )
811
- goto tr34;
812
- goto st0;
813
- st26:
814
- if ( ++p == pe )
815
- goto _test_eof26;
816
- case 26:
817
- if ( (*p) == 114 )
818
- goto st27;
819
- goto st0;
820
- st27:
821
- if ( ++p == pe )
822
- goto _test_eof27;
823
- case 27:
824
- if ( (*p) == 117 )
825
- goto st28;
826
- goto st0;
827
- st28:
828
- if ( ++p == pe )
829
- goto _test_eof28;
830
- case 28:
831
- if ( (*p) == 101 )
832
- goto tr37;
833
- goto st0;
834
- }
835
- _test_eof1: cs = 1; goto _test_eof;
836
- _test_eof29: cs = 29; goto _test_eof;
837
- _test_eof2: cs = 2; goto _test_eof;
838
- _test_eof3: cs = 3; goto _test_eof;
839
- _test_eof4: cs = 4; goto _test_eof;
840
- _test_eof5: cs = 5; goto _test_eof;
841
- _test_eof6: cs = 6; goto _test_eof;
842
- _test_eof7: cs = 7; goto _test_eof;
843
- _test_eof8: cs = 8; goto _test_eof;
844
- _test_eof9: cs = 9; goto _test_eof;
845
- _test_eof10: cs = 10; goto _test_eof;
846
- _test_eof11: cs = 11; goto _test_eof;
847
- _test_eof12: cs = 12; goto _test_eof;
848
- _test_eof13: cs = 13; goto _test_eof;
849
- _test_eof14: cs = 14; goto _test_eof;
850
- _test_eof15: cs = 15; goto _test_eof;
851
- _test_eof16: cs = 16; goto _test_eof;
852
- _test_eof17: cs = 17; goto _test_eof;
853
- _test_eof18: cs = 18; goto _test_eof;
854
- _test_eof19: cs = 19; goto _test_eof;
855
- _test_eof20: cs = 20; goto _test_eof;
856
- _test_eof21: cs = 21; goto _test_eof;
857
- _test_eof22: cs = 22; goto _test_eof;
858
- _test_eof23: cs = 23; goto _test_eof;
859
- _test_eof24: cs = 24; goto _test_eof;
860
- _test_eof25: cs = 25; goto _test_eof;
861
- _test_eof26: cs = 26; goto _test_eof;
862
- _test_eof27: cs = 27; goto _test_eof;
863
- _test_eof28: cs = 28; goto _test_eof;
864
-
865
- _test_eof: {}
866
- _out: {}
867
- }
868
-
869
- #line 291 "parser.rl"
870
-
871
- if (json->freeze) {
872
- OBJ_FREEZE(*result);
735
+
736
+ long long memo = 0;
737
+ while (p < pe) {
738
+ memo *= 10;
739
+ memo += *p - '0';
740
+ p++;
873
741
  }
874
742
 
875
- if (cs >= JSON_value_first_final) {
876
- return p;
743
+ if (negative) {
744
+ memo = -memo;
745
+ }
746
+ return LL2NUM(memo);
747
+ }
748
+
749
+ static VALUE json_decode_large_integer(const char *start, long len)
750
+ {
751
+ VALUE buffer_v;
752
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
753
+ MEMCPY(buffer, start, char, len);
754
+ buffer[len] = '\0';
755
+ VALUE number = rb_cstr2inum(buffer, 10);
756
+ RB_ALLOCV_END(buffer_v);
757
+ return number;
758
+ }
759
+
760
+ static inline VALUE
761
+ json_decode_integer(const char *start, const char *end)
762
+ {
763
+ long len = end - start;
764
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
765
+ return fast_decode_integer(start, end);
766
+ }
767
+ return json_decode_large_integer(start, len);
768
+ }
769
+
770
+ static VALUE json_decode_large_float(const char *start, long len)
771
+ {
772
+ VALUE buffer_v;
773
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
774
+ MEMCPY(buffer, start, char, len);
775
+ buffer[len] = '\0';
776
+ VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
777
+ RB_ALLOCV_END(buffer_v);
778
+ return number;
779
+ }
780
+
781
+ static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end)
782
+ {
783
+ long len = end - start;
784
+
785
+ if (RB_UNLIKELY(config->decimal_class)) {
786
+ VALUE text = rb_str_new(start, len);
787
+ return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
788
+ } else if (RB_LIKELY(len < 64)) {
789
+ char buffer[64];
790
+ MEMCPY(buffer, start, char, len);
791
+ buffer[len] = '\0';
792
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
877
793
  } else {
878
- return NULL;
794
+ return json_decode_large_float(start, len);
879
795
  }
880
796
  }
881
797
 
798
+ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
799
+ {
800
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
801
+ rvalue_stack_pop(state->stack, count);
802
+
803
+ if (config->freeze) {
804
+ RB_OBJ_FREEZE(array);
805
+ }
882
806
 
883
- #line 884 "parser.c"
884
- enum {JSON_integer_start = 1};
885
- enum {JSON_integer_first_final = 3};
886
- enum {JSON_integer_error = 0};
807
+ return array;
808
+ }
887
809
 
888
- enum {JSON_integer_en_main = 1};
810
+ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, long count)
811
+ {
812
+ VALUE object = rb_hash_new_capa(count);
813
+ rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
889
814
 
815
+ rvalue_stack_pop(state->stack, count);
890
816
 
891
- #line 311 "parser.rl"
817
+ if (config->freeze) {
818
+ RB_OBJ_FREEZE(object);
819
+ }
892
820
 
821
+ return object;
822
+ }
893
823
 
894
- static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
824
+ static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
895
825
  {
896
- int cs = EVIL;
897
-
898
-
899
- #line 900 "parser.c"
900
- {
901
- cs = JSON_integer_start;
902
- }
903
-
904
- #line 318 "parser.rl"
905
- json->memo = p;
906
-
907
- #line 908 "parser.c"
908
- {
909
- if ( p == pe )
910
- goto _test_eof;
911
- switch ( cs )
912
- {
913
- case 1:
914
- switch( (*p) ) {
915
- case 45: goto st2;
916
- case 48: goto st3;
917
- }
918
- if ( 49 <= (*p) && (*p) <= 57 )
919
- goto st5;
920
- goto st0;
921
- st0:
922
- cs = 0;
923
- goto _out;
924
- st2:
925
- if ( ++p == pe )
926
- goto _test_eof2;
927
- case 2:
928
- if ( (*p) == 48 )
929
- goto st3;
930
- if ( 49 <= (*p) && (*p) <= 57 )
931
- goto st5;
932
- goto st0;
933
- st3:
934
- if ( ++p == pe )
935
- goto _test_eof3;
936
- case 3:
937
- if ( 48 <= (*p) && (*p) <= 57 )
938
- goto st0;
939
- goto tr4;
940
- tr4:
941
- #line 308 "parser.rl"
942
- { p--; {p++; cs = 4; goto _out;} }
943
- goto st4;
944
- st4:
945
- if ( ++p == pe )
946
- goto _test_eof4;
947
- case 4:
948
- #line 949 "parser.c"
949
- goto st0;
950
- st5:
951
- if ( ++p == pe )
952
- goto _test_eof5;
953
- case 5:
954
- if ( 48 <= (*p) && (*p) <= 57 )
955
- goto st5;
956
- goto tr4;
957
- }
958
- _test_eof2: cs = 2; goto _test_eof;
959
- _test_eof3: cs = 3; goto _test_eof;
960
- _test_eof4: cs = 4; goto _test_eof;
961
- _test_eof5: cs = 5; goto _test_eof;
962
-
963
- _test_eof: {}
964
- _out: {}
965
- }
966
-
967
- #line 320 "parser.rl"
968
-
969
- if (cs >= JSON_integer_first_final) {
970
- long len = p - json->memo;
971
- fbuffer_clear(json->fbuffer);
972
- fbuffer_append(json->fbuffer, json->memo, len);
973
- fbuffer_append_char(json->fbuffer, '\0');
974
- *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
975
- return p + 1;
826
+ VALUE string;
827
+ bool intern = is_name || config->freeze;
828
+ bool symbolize = is_name && config->symbolize_names;
829
+ if (escaped) {
830
+ string = json_string_unescape(state, start, end, is_name, intern, symbolize);
976
831
  } else {
977
- return NULL;
832
+ string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
978
833
  }
979
- }
980
834
 
835
+ return string;
836
+ }
981
837
 
982
- #line 983 "parser.c"
983
- enum {JSON_float_start = 1};
984
- enum {JSON_float_first_final = 8};
985
- enum {JSON_float_error = 0};
838
+ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
839
+ {
840
+ if (RB_UNLIKELY(config->on_load_proc)) {
841
+ value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
842
+ }
843
+ rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack);
844
+ return value;
845
+ }
986
846
 
987
- enum {JSON_float_en_main = 1};
847
+ static const bool string_scan[256] = {
848
+ // ASCII Control Characters
849
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
850
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
851
+ // ASCII Characters
852
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
853
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
854
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
855
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\'
856
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
857
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
858
+ };
988
859
 
860
+ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
861
+ {
862
+ state->cursor++;
863
+ const char *start = state->cursor;
864
+ bool escaped = false;
865
+
866
+ while (state->cursor < state->end) {
867
+ if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) {
868
+ switch (*state->cursor) {
869
+ case '"': {
870
+ VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
871
+ state->cursor++;
872
+ return json_push_value(state, config, string);
873
+ }
874
+ case '\\': {
875
+ state->cursor++;
876
+ escaped = true;
877
+ if ((unsigned char)*state->cursor < 0x20) {
878
+ raise_parse_error("invalid ASCII control character in string: %s", state);
879
+ }
880
+ break;
881
+ }
882
+ default:
883
+ raise_parse_error("invalid ASCII control character in string: %s", state);
884
+ break;
885
+ }
886
+ }
989
887
 
990
- #line 345 "parser.rl"
888
+ state->cursor++;
889
+ }
991
890
 
891
+ raise_parse_error("unexpected end of input, expected closing \"", state);
892
+ return Qfalse;
893
+ }
992
894
 
993
- static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
895
+ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
994
896
  {
995
- int cs = EVIL;
996
-
997
-
998
- #line 999 "parser.c"
999
- {
1000
- cs = JSON_float_start;
1001
- }
1002
-
1003
- #line 352 "parser.rl"
1004
- json->memo = p;
1005
-
1006
- #line 1007 "parser.c"
1007
- {
1008
- if ( p == pe )
1009
- goto _test_eof;
1010
- switch ( cs )
1011
- {
1012
- case 1:
1013
- switch( (*p) ) {
1014
- case 45: goto st2;
1015
- case 48: goto st3;
1016
- }
1017
- if ( 49 <= (*p) && (*p) <= 57 )
1018
- goto st7;
1019
- goto st0;
1020
- st0:
1021
- cs = 0;
1022
- goto _out;
1023
- st2:
1024
- if ( ++p == pe )
1025
- goto _test_eof2;
1026
- case 2:
1027
- if ( (*p) == 48 )
1028
- goto st3;
1029
- if ( 49 <= (*p) && (*p) <= 57 )
1030
- goto st7;
1031
- goto st0;
1032
- st3:
1033
- if ( ++p == pe )
1034
- goto _test_eof3;
1035
- case 3:
1036
- switch( (*p) ) {
1037
- case 46: goto st4;
1038
- case 69: goto st5;
1039
- case 101: goto st5;
1040
- }
1041
- goto st0;
1042
- st4:
1043
- if ( ++p == pe )
1044
- goto _test_eof4;
1045
- case 4:
1046
- if ( 48 <= (*p) && (*p) <= 57 )
1047
- goto st8;
1048
- goto st0;
1049
- st8:
1050
- if ( ++p == pe )
1051
- goto _test_eof8;
1052
- case 8:
1053
- switch( (*p) ) {
1054
- case 69: goto st5;
1055
- case 101: goto st5;
1056
- }
1057
- if ( (*p) > 46 ) {
1058
- if ( 48 <= (*p) && (*p) <= 57 )
1059
- goto st8;
1060
- } else if ( (*p) >= 45 )
1061
- goto st0;
1062
- goto tr9;
1063
- tr9:
1064
- #line 339 "parser.rl"
1065
- { p--; {p++; cs = 9; goto _out;} }
1066
- goto st9;
1067
- st9:
1068
- if ( ++p == pe )
1069
- goto _test_eof9;
1070
- case 9:
1071
- #line 1072 "parser.c"
1072
- goto st0;
1073
- st5:
1074
- if ( ++p == pe )
1075
- goto _test_eof5;
1076
- case 5:
1077
- switch( (*p) ) {
1078
- case 43: goto st6;
1079
- case 45: goto st6;
1080
- }
1081
- if ( 48 <= (*p) && (*p) <= 57 )
1082
- goto st10;
1083
- goto st0;
1084
- st6:
1085
- if ( ++p == pe )
1086
- goto _test_eof6;
1087
- case 6:
1088
- if ( 48 <= (*p) && (*p) <= 57 )
1089
- goto st10;
1090
- goto st0;
1091
- st10:
1092
- if ( ++p == pe )
1093
- goto _test_eof10;
1094
- case 10:
1095
- switch( (*p) ) {
1096
- case 69: goto st0;
1097
- case 101: goto st0;
1098
- }
1099
- if ( (*p) > 46 ) {
1100
- if ( 48 <= (*p) && (*p) <= 57 )
1101
- goto st10;
1102
- } else if ( (*p) >= 45 )
1103
- goto st0;
1104
- goto tr9;
1105
- st7:
1106
- if ( ++p == pe )
1107
- goto _test_eof7;
1108
- case 7:
1109
- switch( (*p) ) {
1110
- case 46: goto st4;
1111
- case 69: goto st5;
1112
- case 101: goto st5;
1113
- }
1114
- if ( 48 <= (*p) && (*p) <= 57 )
1115
- goto st7;
1116
- goto st0;
1117
- }
1118
- _test_eof2: cs = 2; goto _test_eof;
1119
- _test_eof3: cs = 3; goto _test_eof;
1120
- _test_eof4: cs = 4; goto _test_eof;
1121
- _test_eof8: cs = 8; goto _test_eof;
1122
- _test_eof9: cs = 9; goto _test_eof;
1123
- _test_eof5: cs = 5; goto _test_eof;
1124
- _test_eof6: cs = 6; goto _test_eof;
1125
- _test_eof10: cs = 10; goto _test_eof;
1126
- _test_eof7: cs = 7; goto _test_eof;
1127
-
1128
- _test_eof: {}
1129
- _out: {}
1130
- }
1131
-
1132
- #line 354 "parser.rl"
1133
-
1134
- if (cs >= JSON_float_first_final) {
1135
- VALUE mod = Qnil;
1136
- ID method_id = 0;
1137
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
1138
- mod = json->decimal_class;
1139
- method_id = i_try_convert;
1140
- } else if (rb_respond_to(json->decimal_class, i_new)) {
1141
- mod = json->decimal_class;
1142
- method_id = i_new;
1143
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
1144
- VALUE name = rb_class_name(json->decimal_class);
1145
- const char *name_cstr = RSTRING_PTR(name);
1146
- const char *last_colon = strrchr(name_cstr, ':');
1147
- if (last_colon) {
1148
- const char *mod_path_end = last_colon - 1;
1149
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1150
- mod = rb_path_to_class(mod_path);
1151
-
1152
- const char *method_name_beg = last_colon + 1;
1153
- long before_len = method_name_beg - name_cstr;
1154
- long len = RSTRING_LEN(name) - before_len;
1155
- VALUE method_name = rb_str_substr(name, before_len, len);
1156
- method_id = SYM2ID(rb_str_intern(method_name));
1157
- } else {
1158
- mod = rb_mKernel;
1159
- method_id = SYM2ID(rb_str_intern(name));
897
+ json_eat_whitespace(state);
898
+ if (state->cursor >= state->end) {
899
+ raise_parse_error("unexpected end of input", state);
900
+ }
901
+
902
+ switch (*state->cursor) {
903
+ case 'n':
904
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
905
+ state->cursor += 4;
906
+ return json_push_value(state, config, Qnil);
1160
907
  }
1161
- }
1162
908
 
1163
- long len = p - json->memo;
1164
- fbuffer_clear(json->fbuffer);
1165
- fbuffer_append(json->fbuffer, json->memo, len);
1166
- fbuffer_append_char(json->fbuffer, '\0');
909
+ raise_parse_error("unexpected token %s", state);
910
+ break;
911
+ case 't':
912
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
913
+ state->cursor += 4;
914
+ return json_push_value(state, config, Qtrue);
915
+ }
1167
916
 
1168
- if (method_id) {
1169
- VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
1170
- *result = rb_funcallv(mod, method_id, 1, &text);
1171
- } else {
1172
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
1173
- }
917
+ raise_parse_error("unexpected token %s", state);
918
+ break;
919
+ case 'f':
920
+ // Note: memcmp with a small power of two compile to an integer comparison
921
+ if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
922
+ state->cursor += 5;
923
+ return json_push_value(state, config, Qfalse);
924
+ }
1174
925
 
1175
- return p + 1;
1176
- } else {
1177
- return NULL;
1178
- }
1179
- }
926
+ raise_parse_error("unexpected token %s", state);
927
+ break;
928
+ case 'N':
929
+ // Note: memcmp with a small power of two compile to an integer comparison
930
+ if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
931
+ state->cursor += 3;
932
+ return json_push_value(state, config, CNaN);
933
+ }
1180
934
 
935
+ raise_parse_error("unexpected token %s", state);
936
+ break;
937
+ case 'I':
938
+ if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
939
+ state->cursor += 8;
940
+ return json_push_value(state, config, CInfinity);
941
+ }
1181
942
 
943
+ raise_parse_error("unexpected token %s", state);
944
+ break;
945
+ case '-':
946
+ // Note: memcmp with a small power of two compile to an integer comparison
947
+ if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
948
+ if (config->allow_nan) {
949
+ state->cursor += 9;
950
+ return json_push_value(state, config, CMinusInfinity);
951
+ } else {
952
+ raise_parse_error("unexpected token %s", state);
953
+ }
954
+ }
955
+ // Fallthrough
956
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
957
+ bool integer = true;
1182
958
 
1183
- #line 1184 "parser.c"
1184
- enum {JSON_array_start = 1};
1185
- enum {JSON_array_first_final = 17};
1186
- enum {JSON_array_error = 0};
959
+ // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
960
+ const char *start = state->cursor;
961
+ state->cursor++;
1187
962
 
1188
- enum {JSON_array_en_main = 1};
963
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
964
+ state->cursor++;
965
+ }
1189
966
 
967
+ long integer_length = state->cursor - start;
1190
968
 
1191
- #line 432 "parser.rl"
969
+ if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
970
+ raise_parse_error_at("invalid number: %s", state, start);
971
+ } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
972
+ raise_parse_error_at("invalid number: %s", state, start);
973
+ } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
974
+ raise_parse_error_at("invalid number: %s", state, start);
975
+ }
1192
976
 
977
+ if ((state->cursor < state->end) && (*state->cursor == '.')) {
978
+ integer = false;
979
+ state->cursor++;
1193
980
 
1194
- static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
1195
- {
1196
- int cs = EVIL;
1197
- VALUE array_class = json->array_class;
981
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
982
+ raise_parse_error("invalid number: %s", state);
983
+ }
1198
984
 
1199
- if (json->max_nesting && current_nesting > json->max_nesting) {
1200
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
1201
- }
1202
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
1203
-
1204
-
1205
- #line 1206 "parser.c"
1206
- {
1207
- cs = JSON_array_start;
1208
- }
1209
-
1210
- #line 445 "parser.rl"
1211
-
1212
- #line 1213 "parser.c"
1213
- {
1214
- if ( p == pe )
1215
- goto _test_eof;
1216
- switch ( cs )
1217
- {
1218
- case 1:
1219
- if ( (*p) == 91 )
1220
- goto st2;
1221
- goto st0;
1222
- st0:
1223
- cs = 0;
1224
- goto _out;
1225
- st2:
1226
- if ( ++p == pe )
1227
- goto _test_eof2;
1228
- case 2:
1229
- switch( (*p) ) {
1230
- case 13: goto st2;
1231
- case 32: goto st2;
1232
- case 34: goto tr2;
1233
- case 45: goto tr2;
1234
- case 47: goto st13;
1235
- case 73: goto tr2;
1236
- case 78: goto tr2;
1237
- case 91: goto tr2;
1238
- case 93: goto tr4;
1239
- case 102: goto tr2;
1240
- case 110: goto tr2;
1241
- case 116: goto tr2;
1242
- case 123: goto tr2;
1243
- }
1244
- if ( (*p) > 10 ) {
1245
- if ( 48 <= (*p) && (*p) <= 57 )
1246
- goto tr2;
1247
- } else if ( (*p) >= 9 )
1248
- goto st2;
1249
- goto st0;
1250
- tr2:
1251
- #line 409 "parser.rl"
1252
- {
1253
- VALUE v = Qnil;
1254
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
1255
- if (np == NULL) {
1256
- p--; {p++; cs = 3; goto _out;}
1257
- } else {
1258
- if (NIL_P(json->array_class)) {
1259
- rb_ary_push(*result, v);
1260
- } else {
1261
- rb_funcall(*result, i_leftshift, 1, v);
985
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
986
+ state->cursor++;
987
+ }
1262
988
  }
1263
- {p = (( np))-1;}
1264
- }
1265
- }
1266
- goto st3;
1267
- st3:
1268
- if ( ++p == pe )
1269
- goto _test_eof3;
1270
- case 3:
1271
- #line 1272 "parser.c"
1272
- switch( (*p) ) {
1273
- case 13: goto st3;
1274
- case 32: goto st3;
1275
- case 44: goto st4;
1276
- case 47: goto st9;
1277
- case 93: goto tr4;
1278
- }
1279
- if ( 9 <= (*p) && (*p) <= 10 )
1280
- goto st3;
1281
- goto st0;
1282
- st4:
1283
- if ( ++p == pe )
1284
- goto _test_eof4;
1285
- case 4:
1286
- switch( (*p) ) {
1287
- case 13: goto st4;
1288
- case 32: goto st4;
1289
- case 34: goto tr2;
1290
- case 45: goto tr2;
1291
- case 47: goto st5;
1292
- case 73: goto tr2;
1293
- case 78: goto tr2;
1294
- case 91: goto tr2;
1295
- case 102: goto tr2;
1296
- case 110: goto tr2;
1297
- case 116: goto tr2;
1298
- case 123: goto tr2;
1299
- }
1300
- if ( (*p) > 10 ) {
1301
- if ( 48 <= (*p) && (*p) <= 57 )
1302
- goto tr2;
1303
- } else if ( (*p) >= 9 )
1304
- goto st4;
1305
- goto st0;
1306
- st5:
1307
- if ( ++p == pe )
1308
- goto _test_eof5;
1309
- case 5:
1310
- switch( (*p) ) {
1311
- case 42: goto st6;
1312
- case 47: goto st8;
1313
- }
1314
- goto st0;
1315
- st6:
1316
- if ( ++p == pe )
1317
- goto _test_eof6;
1318
- case 6:
1319
- if ( (*p) == 42 )
1320
- goto st7;
1321
- goto st6;
1322
- st7:
1323
- if ( ++p == pe )
1324
- goto _test_eof7;
1325
- case 7:
1326
- switch( (*p) ) {
1327
- case 42: goto st7;
1328
- case 47: goto st4;
1329
- }
1330
- goto st6;
1331
- st8:
1332
- if ( ++p == pe )
1333
- goto _test_eof8;
1334
- case 8:
1335
- if ( (*p) == 10 )
1336
- goto st4;
1337
- goto st8;
1338
- st9:
1339
- if ( ++p == pe )
1340
- goto _test_eof9;
1341
- case 9:
1342
- switch( (*p) ) {
1343
- case 42: goto st10;
1344
- case 47: goto st12;
1345
- }
1346
- goto st0;
1347
- st10:
1348
- if ( ++p == pe )
1349
- goto _test_eof10;
1350
- case 10:
1351
- if ( (*p) == 42 )
1352
- goto st11;
1353
- goto st10;
1354
- st11:
1355
- if ( ++p == pe )
1356
- goto _test_eof11;
1357
- case 11:
1358
- switch( (*p) ) {
1359
- case 42: goto st11;
1360
- case 47: goto st3;
1361
- }
1362
- goto st10;
1363
- st12:
1364
- if ( ++p == pe )
1365
- goto _test_eof12;
1366
- case 12:
1367
- if ( (*p) == 10 )
1368
- goto st3;
1369
- goto st12;
1370
- tr4:
1371
- #line 424 "parser.rl"
1372
- { p--; {p++; cs = 17; goto _out;} }
1373
- goto st17;
1374
- st17:
1375
- if ( ++p == pe )
1376
- goto _test_eof17;
1377
- case 17:
1378
- #line 1379 "parser.c"
1379
- goto st0;
1380
- st13:
1381
- if ( ++p == pe )
1382
- goto _test_eof13;
1383
- case 13:
1384
- switch( (*p) ) {
1385
- case 42: goto st14;
1386
- case 47: goto st16;
1387
- }
1388
- goto st0;
1389
- st14:
1390
- if ( ++p == pe )
1391
- goto _test_eof14;
1392
- case 14:
1393
- if ( (*p) == 42 )
1394
- goto st15;
1395
- goto st14;
1396
- st15:
1397
- if ( ++p == pe )
1398
- goto _test_eof15;
1399
- case 15:
1400
- switch( (*p) ) {
1401
- case 42: goto st15;
1402
- case 47: goto st2;
1403
- }
1404
- goto st14;
1405
- st16:
1406
- if ( ++p == pe )
1407
- goto _test_eof16;
1408
- case 16:
1409
- if ( (*p) == 10 )
1410
- goto st2;
1411
- goto st16;
1412
- }
1413
- _test_eof2: cs = 2; goto _test_eof;
1414
- _test_eof3: cs = 3; goto _test_eof;
1415
- _test_eof4: cs = 4; goto _test_eof;
1416
- _test_eof5: cs = 5; goto _test_eof;
1417
- _test_eof6: cs = 6; goto _test_eof;
1418
- _test_eof7: cs = 7; goto _test_eof;
1419
- _test_eof8: cs = 8; goto _test_eof;
1420
- _test_eof9: cs = 9; goto _test_eof;
1421
- _test_eof10: cs = 10; goto _test_eof;
1422
- _test_eof11: cs = 11; goto _test_eof;
1423
- _test_eof12: cs = 12; goto _test_eof;
1424
- _test_eof17: cs = 17; goto _test_eof;
1425
- _test_eof13: cs = 13; goto _test_eof;
1426
- _test_eof14: cs = 14; goto _test_eof;
1427
- _test_eof15: cs = 15; goto _test_eof;
1428
- _test_eof16: cs = 16; goto _test_eof;
1429
-
1430
- _test_eof: {}
1431
- _out: {}
1432
- }
1433
-
1434
- #line 446 "parser.rl"
1435
-
1436
- if(cs >= JSON_array_first_final) {
1437
- return p + 1;
1438
- } else {
1439
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
1440
- return NULL;
1441
- }
1442
- }
1443
989
 
1444
- static const size_t MAX_STACK_BUFFER_SIZE = 128;
1445
- static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
1446
- {
1447
- VALUE result = Qnil;
1448
- size_t bufferSize = stringEnd - string;
1449
- char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
1450
- int unescape_len;
1451
- char buf[4];
990
+ if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
991
+ integer = false;
992
+ state->cursor++;
993
+ if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
994
+ state->cursor++;
995
+ }
1452
996
 
1453
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1454
- # ifdef HAVE_RB_ENC_INTERNED_STR
1455
- bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
1456
- # else
1457
- bufferStart = buffer = ALLOC_N(char, bufferSize);
1458
- # endif
1459
- } else {
1460
- # ifdef HAVE_RB_ENC_INTERNED_STR
1461
- bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1);
1462
- # else
1463
- bufferStart = buffer = ALLOCA_N(char, bufferSize);
1464
- # endif
1465
- }
997
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
998
+ raise_parse_error("invalid number: %s", state);
999
+ }
1466
1000
 
1467
- while (pe < stringEnd) {
1468
- if (*pe == '\\') {
1469
- unescape = (char *) "?";
1470
- unescape_len = 1;
1471
- if (pe > p) {
1472
- MEMCPY(buffer, p, char, pe - p);
1473
- buffer += pe - p;
1001
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1002
+ state->cursor++;
1003
+ }
1474
1004
  }
1475
- switch (*++pe) {
1476
- case 'n':
1477
- unescape = (char *) "\n";
1478
- break;
1479
- case 'r':
1480
- unescape = (char *) "\r";
1481
- break;
1482
- case 't':
1483
- unescape = (char *) "\t";
1484
- break;
1485
- case '"':
1486
- unescape = (char *) "\"";
1487
- break;
1488
- case '\\':
1489
- unescape = (char *) "\\";
1490
- break;
1491
- case 'b':
1492
- unescape = (char *) "\b";
1493
- break;
1494
- case 'f':
1495
- unescape = (char *) "\f";
1496
- break;
1497
- case 'u':
1498
- if (pe > stringEnd - 4) {
1499
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1500
- ruby_xfree(bufferStart);
1501
- }
1502
- rb_enc_raise(
1503
- EXC_ENCODING eParserError,
1504
- "incomplete unicode character escape sequence at '%s'", p
1505
- );
1506
- } else {
1507
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
1508
- pe += 3;
1509
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
1510
- pe++;
1511
- if (pe > stringEnd - 6) {
1512
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1513
- ruby_xfree(bufferStart);
1514
- }
1515
- rb_enc_raise(
1516
- EXC_ENCODING eParserError,
1517
- "incomplete surrogate pair at '%s'", p
1518
- );
1519
- }
1520
- if (pe[0] == '\\' && pe[1] == 'u') {
1521
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
1522
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
1523
- | (sur & 0x3FF));
1524
- pe += 5;
1525
- } else {
1526
- unescape = (char *) "?";
1527
- break;
1005
+
1006
+ if (integer) {
1007
+ return json_push_value(state, config, json_decode_integer(start, state->cursor));
1008
+ }
1009
+ return json_push_value(state, config, json_decode_float(config, start, state->cursor));
1010
+ }
1011
+ case '"': {
1012
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
1013
+ return json_parse_string(state, config, false);
1014
+ break;
1015
+ }
1016
+ case '[': {
1017
+ state->cursor++;
1018
+ json_eat_whitespace(state);
1019
+ long stack_head = state->stack->head;
1020
+
1021
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
1022
+ state->cursor++;
1023
+ return json_push_value(state, config, json_decode_array(state, config, 0));
1024
+ } else {
1025
+ state->current_nesting++;
1026
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1027
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1028
+ }
1029
+ state->in_array++;
1030
+ json_parse_any(state, config);
1031
+ }
1032
+
1033
+ while (true) {
1034
+ json_eat_whitespace(state);
1035
+
1036
+ if (state->cursor < state->end) {
1037
+ if (*state->cursor == ']') {
1038
+ state->cursor++;
1039
+ long count = state->stack->head - stack_head;
1040
+ state->current_nesting--;
1041
+ state->in_array--;
1042
+ return json_push_value(state, config, json_decode_array(state, config, count));
1043
+ }
1044
+
1045
+ if (*state->cursor == ',') {
1046
+ state->cursor++;
1047
+ if (config->allow_trailing_comma) {
1048
+ json_eat_whitespace(state);
1049
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
1050
+ continue;
1528
1051
  }
1529
1052
  }
1530
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
1531
- unescape = buf;
1053
+ json_parse_any(state, config);
1054
+ continue;
1532
1055
  }
1533
- break;
1534
- default:
1535
- p = pe;
1536
- continue;
1056
+ }
1057
+
1058
+ raise_parse_error("expected ',' or ']' after array value", state);
1537
1059
  }
1538
- MEMCPY(buffer, unescape, char, unescape_len);
1539
- buffer += unescape_len;
1540
- p = ++pe;
1541
- } else {
1542
- pe++;
1060
+ break;
1543
1061
  }
1544
- }
1062
+ case '{': {
1063
+ state->cursor++;
1064
+ json_eat_whitespace(state);
1065
+ long stack_head = state->stack->head;
1066
+
1067
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1068
+ state->cursor++;
1069
+ return json_push_value(state, config, json_decode_object(state, config, 0));
1070
+ } else {
1071
+ state->current_nesting++;
1072
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1073
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1074
+ }
1545
1075
 
1546
- if (pe > p) {
1547
- MEMCPY(buffer, p, char, pe - p);
1548
- buffer += pe - p;
1549
- }
1076
+ if (*state->cursor != '"') {
1077
+ raise_parse_error("expected object key, got %s", state);
1078
+ }
1079
+ json_parse_string(state, config, true);
1550
1080
 
1551
- # ifdef HAVE_RB_ENC_INTERNED_STR
1552
- if (intern) {
1553
- result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
1554
- } else {
1555
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
1556
- }
1557
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1558
- ruby_xfree(bufferStart);
1559
- }
1560
- # else
1561
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
1081
+ json_eat_whitespace(state);
1082
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1083
+ raise_parse_error("expected ':' after object key", state);
1084
+ }
1085
+ state->cursor++;
1562
1086
 
1563
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1564
- ruby_xfree(bufferStart);
1565
- }
1087
+ json_parse_any(state, config);
1088
+ }
1566
1089
 
1567
- if (intern) {
1568
- # if STR_UMINUS_DEDUPE_FROZEN
1569
- // Starting from MRI 2.8 it is preferable to freeze the string
1570
- // before deduplication so that it can be interned directly
1571
- // otherwise it would be duplicated first which is wasteful.
1572
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
1573
- # elif STR_UMINUS_DEDUPE
1574
- // MRI 2.5 and older do not deduplicate strings that are already
1575
- // frozen.
1576
- result = rb_funcall(result, i_uminus, 0);
1577
- # else
1578
- result = rb_str_freeze(result);
1579
- # endif
1580
- }
1581
- # endif
1090
+ while (true) {
1091
+ json_eat_whitespace(state);
1582
1092
 
1583
- if (symbolize) {
1584
- result = rb_str_intern(result);
1585
- }
1093
+ if (state->cursor < state->end) {
1094
+ if (*state->cursor == '}') {
1095
+ state->cursor++;
1096
+ state->current_nesting--;
1097
+ long count = state->stack->head - stack_head;
1098
+ return json_push_value(state, config, json_decode_object(state, config, count));
1099
+ }
1586
1100
 
1587
- return result;
1588
- }
1101
+ if (*state->cursor == ',') {
1102
+ state->cursor++;
1103
+ json_eat_whitespace(state);
1589
1104
 
1105
+ if (config->allow_trailing_comma) {
1106
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1107
+ continue;
1108
+ }
1109
+ }
1590
1110
 
1591
- #line 1592 "parser.c"
1592
- enum {JSON_string_start = 1};
1593
- enum {JSON_string_first_final = 8};
1594
- enum {JSON_string_error = 0};
1111
+ if (*state->cursor != '"') {
1112
+ raise_parse_error("expected object key, got: %s", state);
1113
+ }
1114
+ json_parse_string(state, config, true);
1595
1115
 
1596
- enum {JSON_string_en_main = 1};
1116
+ json_eat_whitespace(state);
1117
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1118
+ raise_parse_error("expected ':' after object key, got: %s", state);
1119
+ }
1120
+ state->cursor++;
1597
1121
 
1122
+ json_parse_any(state, config);
1598
1123
 
1599
- #line 620 "parser.rl"
1124
+ continue;
1125
+ }
1126
+ }
1600
1127
 
1128
+ raise_parse_error("expected ',' or '}' after object value, got: %s", state);
1129
+ }
1130
+ break;
1131
+ }
1601
1132
 
1602
- static int
1603
- match_i(VALUE regexp, VALUE klass, VALUE memo)
1604
- {
1605
- if (regexp == Qundef) return ST_STOP;
1606
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
1607
- RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
1608
- rb_ary_push(memo, klass);
1609
- return ST_STOP;
1133
+ default:
1134
+ raise_parse_error("unexpected character: %s", state);
1135
+ break;
1610
1136
  }
1611
- return ST_CONTINUE;
1137
+
1138
+ raise_parse_error("unreacheable: %s", state);
1612
1139
  }
1613
1140
 
1614
- static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
1141
+ static void json_ensure_eof(JSON_ParserState *state)
1615
1142
  {
1616
- int cs = EVIL;
1617
- VALUE match_string;
1618
-
1619
-
1620
- #line 1621 "parser.c"
1621
- {
1622
- cs = JSON_string_start;
1623
- }
1624
-
1625
- #line 640 "parser.rl"
1626
- json->memo = p;
1627
-
1628
- #line 1629 "parser.c"
1629
- {
1630
- if ( p == pe )
1631
- goto _test_eof;
1632
- switch ( cs )
1633
- {
1634
- case 1:
1635
- if ( (*p) == 34 )
1636
- goto st2;
1637
- goto st0;
1638
- st0:
1639
- cs = 0;
1640
- goto _out;
1641
- st2:
1642
- if ( ++p == pe )
1643
- goto _test_eof2;
1644
- case 2:
1645
- switch( (*p) ) {
1646
- case 34: goto tr2;
1647
- case 92: goto st3;
1648
- }
1649
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
1650
- goto st0;
1651
- goto st2;
1652
- tr2:
1653
- #line 607 "parser.rl"
1654
- {
1655
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1656
- if (NIL_P(*result)) {
1657
- p--;
1658
- {p++; cs = 8; goto _out;}
1659
- } else {
1660
- {p = (( p + 1))-1;}
1661
- }
1662
- }
1663
- #line 617 "parser.rl"
1664
- { p--; {p++; cs = 8; goto _out;} }
1665
- goto st8;
1666
- st8:
1667
- if ( ++p == pe )
1668
- goto _test_eof8;
1669
- case 8:
1670
- #line 1671 "parser.c"
1671
- goto st0;
1672
- st3:
1673
- if ( ++p == pe )
1674
- goto _test_eof3;
1675
- case 3:
1676
- if ( (*p) == 117 )
1677
- goto st4;
1678
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
1679
- goto st0;
1680
- goto st2;
1681
- st4:
1682
- if ( ++p == pe )
1683
- goto _test_eof4;
1684
- case 4:
1685
- if ( (*p) < 65 ) {
1686
- if ( 48 <= (*p) && (*p) <= 57 )
1687
- goto st5;
1688
- } else if ( (*p) > 70 ) {
1689
- if ( 97 <= (*p) && (*p) <= 102 )
1690
- goto st5;
1691
- } else
1692
- goto st5;
1693
- goto st0;
1694
- st5:
1695
- if ( ++p == pe )
1696
- goto _test_eof5;
1697
- case 5:
1698
- if ( (*p) < 65 ) {
1699
- if ( 48 <= (*p) && (*p) <= 57 )
1700
- goto st6;
1701
- } else if ( (*p) > 70 ) {
1702
- if ( 97 <= (*p) && (*p) <= 102 )
1703
- goto st6;
1704
- } else
1705
- goto st6;
1706
- goto st0;
1707
- st6:
1708
- if ( ++p == pe )
1709
- goto _test_eof6;
1710
- case 6:
1711
- if ( (*p) < 65 ) {
1712
- if ( 48 <= (*p) && (*p) <= 57 )
1713
- goto st7;
1714
- } else if ( (*p) > 70 ) {
1715
- if ( 97 <= (*p) && (*p) <= 102 )
1716
- goto st7;
1717
- } else
1718
- goto st7;
1719
- goto st0;
1720
- st7:
1721
- if ( ++p == pe )
1722
- goto _test_eof7;
1723
- case 7:
1724
- if ( (*p) < 65 ) {
1725
- if ( 48 <= (*p) && (*p) <= 57 )
1726
- goto st2;
1727
- } else if ( (*p) > 70 ) {
1728
- if ( 97 <= (*p) && (*p) <= 102 )
1729
- goto st2;
1730
- } else
1731
- goto st2;
1732
- goto st0;
1733
- }
1734
- _test_eof2: cs = 2; goto _test_eof;
1735
- _test_eof8: cs = 8; goto _test_eof;
1736
- _test_eof3: cs = 3; goto _test_eof;
1737
- _test_eof4: cs = 4; goto _test_eof;
1738
- _test_eof5: cs = 5; goto _test_eof;
1739
- _test_eof6: cs = 6; goto _test_eof;
1740
- _test_eof7: cs = 7; goto _test_eof;
1741
-
1742
- _test_eof: {}
1743
- _out: {}
1744
- }
1745
-
1746
- #line 642 "parser.rl"
1747
-
1748
- if (json->create_additions && RTEST(match_string = json->match_string)) {
1749
- VALUE klass;
1750
- VALUE memo = rb_ary_new2(2);
1751
- rb_ary_push(memo, *result);
1752
- rb_hash_foreach(match_string, match_i, memo);
1753
- klass = rb_ary_entry(memo, 1);
1754
- if (RTEST(klass)) {
1755
- *result = rb_funcall(klass, i_json_create, 1, *result);
1756
- }
1757
- }
1758
-
1759
- if (cs >= JSON_string_first_final) {
1760
- return p + 1;
1761
- } else {
1762
- return NULL;
1143
+ json_eat_whitespace(state);
1144
+ if (state->cursor != state->end) {
1145
+ raise_parse_error("unexpected token at end of stream %s", state);
1763
1146
  }
1764
1147
  }
1765
1148
 
@@ -1777,24 +1160,82 @@ case 7:
1777
1160
 
1778
1161
  static VALUE convert_encoding(VALUE source)
1779
1162
  {
1780
- #ifdef HAVE_RUBY_ENCODING_H
1781
- rb_encoding *enc = rb_enc_get(source);
1782
- if (enc == rb_ascii8bit_encoding()) {
1783
- if (OBJ_FROZEN(source)) {
1784
- source = rb_str_dup(source);
1785
- }
1786
- FORCE_UTF8(source);
1787
- } else {
1788
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
1789
- }
1790
- #endif
1163
+ int encindex = RB_ENCODING_GET(source);
1164
+
1165
+ if (RB_LIKELY(encindex == utf8_encindex)) {
1791
1166
  return source;
1167
+ }
1168
+
1169
+ if (encindex == binary_encindex) {
1170
+ // For historical reason, we silently reinterpret binary strings as UTF-8
1171
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1172
+ }
1173
+
1174
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
1175
+ }
1176
+
1177
+ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1178
+ {
1179
+ JSON_ParserConfig *config = (JSON_ParserConfig *)data;
1180
+
1181
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1182
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1183
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1184
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1185
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
1186
+ else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1187
+ else if (key == sym_decimal_class) {
1188
+ if (RTEST(val)) {
1189
+ if (rb_respond_to(val, i_try_convert)) {
1190
+ config->decimal_class = val;
1191
+ config->decimal_method_id = i_try_convert;
1192
+ } else if (rb_respond_to(val, i_new)) {
1193
+ config->decimal_class = val;
1194
+ config->decimal_method_id = i_new;
1195
+ } else if (RB_TYPE_P(val, T_CLASS)) {
1196
+ VALUE name = rb_class_name(val);
1197
+ const char *name_cstr = RSTRING_PTR(name);
1198
+ const char *last_colon = strrchr(name_cstr, ':');
1199
+ if (last_colon) {
1200
+ const char *mod_path_end = last_colon - 1;
1201
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1202
+ config->decimal_class = rb_path_to_class(mod_path);
1203
+
1204
+ const char *method_name_beg = last_colon + 1;
1205
+ long before_len = method_name_beg - name_cstr;
1206
+ long len = RSTRING_LEN(name) - before_len;
1207
+ VALUE method_name = rb_str_substr(name, before_len, len);
1208
+ config->decimal_method_id = SYM2ID(rb_str_intern(method_name));
1209
+ } else {
1210
+ config->decimal_class = rb_mKernel;
1211
+ config->decimal_method_id = SYM2ID(rb_str_intern(name));
1212
+ }
1213
+ }
1214
+ }
1215
+ }
1216
+
1217
+ return ST_CONTINUE;
1218
+ }
1219
+
1220
+ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
1221
+ {
1222
+ config->max_nesting = 100;
1223
+
1224
+ if (!NIL_P(opts)) {
1225
+ Check_Type(opts, T_HASH);
1226
+ if (RHASH_SIZE(opts) > 0) {
1227
+ // We assume in most cases few keys are set so it's faster to go over
1228
+ // the provided keys than to check all possible keys.
1229
+ rb_hash_foreach(opts, parser_config_init_i, (VALUE)config);
1230
+ }
1231
+
1232
+ }
1792
1233
  }
1793
1234
 
1794
1235
  /*
1795
- * call-seq: new(source, opts => {})
1236
+ * call-seq: new(opts => {})
1796
1237
  *
1797
- * Creates a new JSON::Ext::Parser instance for the string _source_.
1238
+ * Creates a new JSON::Ext::ParserConfig instance.
1798
1239
  *
1799
1240
  * It will be configured by the _opts_ hash. _opts_ can have the following
1800
1241
  * keys:
@@ -1810,343 +1251,114 @@ static VALUE convert_encoding(VALUE source)
1810
1251
  * (keys) in a JSON object. Otherwise strings are returned, which is
1811
1252
  * also the default. It's not possible to use this option in
1812
1253
  * conjunction with the *create_additions* option.
1813
- * * *create_additions*: If set to false, the Parser doesn't create
1814
- * additions even if a matching class and create_id was found. This option
1815
- * defaults to false.
1816
- * * *object_class*: Defaults to Hash
1817
- * * *array_class*: Defaults to Array
1254
+ * * *decimal_class*: Specifies which class to use instead of the default
1255
+ * (Float) when parsing decimal numbers. This class must accept a single
1256
+ * string argument in its constructor.
1818
1257
  */
1819
- static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1258
+ static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
1820
1259
  {
1821
- VALUE source, opts;
1822
- GET_PARSER_INIT;
1260
+ GET_PARSER_CONFIG;
1261
+
1262
+ parser_config_init(config, opts);
1263
+
1264
+ RB_OBJ_WRITTEN(self, Qundef, config->decimal_class);
1823
1265
 
1824
- if (json->Vsource) {
1825
- rb_raise(rb_eTypeError, "already initialized instance");
1826
- }
1827
- rb_scan_args(argc, argv, "1:", &source, &opts);
1828
- if (!NIL_P(opts)) {
1829
- VALUE tmp = ID2SYM(i_max_nesting);
1830
- if (option_given_p(opts, tmp)) {
1831
- VALUE max_nesting = rb_hash_aref(opts, tmp);
1832
- if (RTEST(max_nesting)) {
1833
- Check_Type(max_nesting, T_FIXNUM);
1834
- json->max_nesting = FIX2INT(max_nesting);
1835
- } else {
1836
- json->max_nesting = 0;
1837
- }
1838
- } else {
1839
- json->max_nesting = 100;
1840
- }
1841
- tmp = ID2SYM(i_allow_nan);
1842
- if (option_given_p(opts, tmp)) {
1843
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1844
- } else {
1845
- json->allow_nan = 0;
1846
- }
1847
- tmp = ID2SYM(i_symbolize_names);
1848
- if (option_given_p(opts, tmp)) {
1849
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1850
- } else {
1851
- json->symbolize_names = 0;
1852
- }
1853
- tmp = ID2SYM(i_freeze);
1854
- if (option_given_p(opts, tmp)) {
1855
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1856
- } else {
1857
- json->freeze = 0;
1858
- }
1859
- tmp = ID2SYM(i_create_additions);
1860
- if (option_given_p(opts, tmp)) {
1861
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
1862
- } else {
1863
- json->create_additions = 0;
1864
- }
1865
- if (json->symbolize_names && json->create_additions) {
1866
- rb_raise(rb_eArgError,
1867
- "options :symbolize_names and :create_additions cannot be "
1868
- " used in conjunction");
1869
- }
1870
- tmp = ID2SYM(i_create_id);
1871
- if (option_given_p(opts, tmp)) {
1872
- json->create_id = rb_hash_aref(opts, tmp);
1873
- } else {
1874
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
1875
- }
1876
- tmp = ID2SYM(i_object_class);
1877
- if (option_given_p(opts, tmp)) {
1878
- json->object_class = rb_hash_aref(opts, tmp);
1879
- } else {
1880
- json->object_class = Qnil;
1881
- }
1882
- tmp = ID2SYM(i_array_class);
1883
- if (option_given_p(opts, tmp)) {
1884
- json->array_class = rb_hash_aref(opts, tmp);
1885
- } else {
1886
- json->array_class = Qnil;
1887
- }
1888
- tmp = ID2SYM(i_decimal_class);
1889
- if (option_given_p(opts, tmp)) {
1890
- json->decimal_class = rb_hash_aref(opts, tmp);
1891
- } else {
1892
- json->decimal_class = Qnil;
1893
- }
1894
- tmp = ID2SYM(i_match_string);
1895
- if (option_given_p(opts, tmp)) {
1896
- VALUE match_string = rb_hash_aref(opts, tmp);
1897
- json->match_string = RTEST(match_string) ? match_string : Qnil;
1898
- } else {
1899
- json->match_string = Qnil;
1900
- }
1901
- } else {
1902
- json->max_nesting = 100;
1903
- json->allow_nan = 0;
1904
- json->create_additions = 0;
1905
- json->create_id = Qnil;
1906
- json->object_class = Qnil;
1907
- json->array_class = Qnil;
1908
- json->decimal_class = Qnil;
1909
- }
1910
- source = convert_encoding(StringValue(source));
1911
- StringValue(source);
1912
- json->len = RSTRING_LEN(source);
1913
- json->source = RSTRING_PTR(source);;
1914
- json->Vsource = source;
1915
1266
  return self;
1916
1267
  }
1917
1268
 
1269
+ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1270
+ {
1271
+ Vsource = convert_encoding(StringValue(Vsource));
1272
+ StringValue(Vsource);
1273
+
1274
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1275
+ rvalue_stack stack = {
1276
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1277
+ .ptr = rvalue_stack_buffer,
1278
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1279
+ };
1918
1280
 
1919
- #line 1920 "parser.c"
1920
- enum {JSON_start = 1};
1921
- enum {JSON_first_final = 10};
1922
- enum {JSON_error = 0};
1281
+ long len;
1282
+ const char *start;
1283
+ RSTRING_GETMEM(Vsource, start, len);
1923
1284
 
1924
- enum {JSON_en_main = 1};
1285
+ JSON_ParserState _state = {
1286
+ .start = start,
1287
+ .cursor = start,
1288
+ .end = start + len,
1289
+ .stack = &stack,
1290
+ };
1291
+ JSON_ParserState *state = &_state;
1925
1292
 
1293
+ VALUE result = json_parse_any(state, config);
1926
1294
 
1927
- #line 828 "parser.rl"
1295
+ // This may be skipped in case of exception, but
1296
+ // it won't cause a leak.
1297
+ rvalue_stack_eagerly_release(state->stack_handle);
1928
1298
 
1299
+ json_ensure_eof(state);
1300
+
1301
+ return result;
1302
+ }
1929
1303
 
1930
1304
  /*
1931
- * call-seq: parse()
1305
+ * call-seq: parse(source)
1932
1306
  *
1933
1307
  * Parses the current JSON text _source_ and returns the complete data
1934
1308
  * structure as a result.
1935
1309
  * It raises JSON::ParserError if fail to parse.
1936
1310
  */
1937
- static VALUE cParser_parse(VALUE self)
1311
+ static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
1938
1312
  {
1939
- char *p, *pe;
1940
- int cs = EVIL;
1941
- VALUE result = Qnil;
1942
- GET_PARSER;
1943
-
1944
-
1945
- #line 1946 "parser.c"
1946
- {
1947
- cs = JSON_start;
1948
- }
1949
-
1950
- #line 845 "parser.rl"
1951
- p = json->source;
1952
- pe = p + json->len;
1953
-
1954
- #line 1955 "parser.c"
1955
- {
1956
- if ( p == pe )
1957
- goto _test_eof;
1958
- switch ( cs )
1959
- {
1960
- st1:
1961
- if ( ++p == pe )
1962
- goto _test_eof1;
1963
- case 1:
1964
- switch( (*p) ) {
1965
- case 13: goto st1;
1966
- case 32: goto st1;
1967
- case 34: goto tr2;
1968
- case 45: goto tr2;
1969
- case 47: goto st6;
1970
- case 73: goto tr2;
1971
- case 78: goto tr2;
1972
- case 91: goto tr2;
1973
- case 102: goto tr2;
1974
- case 110: goto tr2;
1975
- case 116: goto tr2;
1976
- case 123: goto tr2;
1977
- }
1978
- if ( (*p) > 10 ) {
1979
- if ( 48 <= (*p) && (*p) <= 57 )
1980
- goto tr2;
1981
- } else if ( (*p) >= 9 )
1982
- goto st1;
1983
- goto st0;
1984
- st0:
1985
- cs = 0;
1986
- goto _out;
1987
- tr2:
1988
- #line 820 "parser.rl"
1989
- {
1990
- char *np = JSON_parse_value(json, p, pe, &result, 0);
1991
- if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
1992
- }
1993
- goto st10;
1994
- st10:
1995
- if ( ++p == pe )
1996
- goto _test_eof10;
1997
- case 10:
1998
- #line 1999 "parser.c"
1999
- switch( (*p) ) {
2000
- case 13: goto st10;
2001
- case 32: goto st10;
2002
- case 47: goto st2;
2003
- }
2004
- if ( 9 <= (*p) && (*p) <= 10 )
2005
- goto st10;
2006
- goto st0;
2007
- st2:
2008
- if ( ++p == pe )
2009
- goto _test_eof2;
2010
- case 2:
2011
- switch( (*p) ) {
2012
- case 42: goto st3;
2013
- case 47: goto st5;
2014
- }
2015
- goto st0;
2016
- st3:
2017
- if ( ++p == pe )
2018
- goto _test_eof3;
2019
- case 3:
2020
- if ( (*p) == 42 )
2021
- goto st4;
2022
- goto st3;
2023
- st4:
2024
- if ( ++p == pe )
2025
- goto _test_eof4;
2026
- case 4:
2027
- switch( (*p) ) {
2028
- case 42: goto st4;
2029
- case 47: goto st10;
2030
- }
2031
- goto st3;
2032
- st5:
2033
- if ( ++p == pe )
2034
- goto _test_eof5;
2035
- case 5:
2036
- if ( (*p) == 10 )
2037
- goto st10;
2038
- goto st5;
2039
- st6:
2040
- if ( ++p == pe )
2041
- goto _test_eof6;
2042
- case 6:
2043
- switch( (*p) ) {
2044
- case 42: goto st7;
2045
- case 47: goto st9;
2046
- }
2047
- goto st0;
2048
- st7:
2049
- if ( ++p == pe )
2050
- goto _test_eof7;
2051
- case 7:
2052
- if ( (*p) == 42 )
2053
- goto st8;
2054
- goto st7;
2055
- st8:
2056
- if ( ++p == pe )
2057
- goto _test_eof8;
2058
- case 8:
2059
- switch( (*p) ) {
2060
- case 42: goto st8;
2061
- case 47: goto st1;
2062
- }
2063
- goto st7;
2064
- st9:
2065
- if ( ++p == pe )
2066
- goto _test_eof9;
2067
- case 9:
2068
- if ( (*p) == 10 )
2069
- goto st1;
2070
- goto st9;
2071
- }
2072
- _test_eof1: cs = 1; goto _test_eof;
2073
- _test_eof10: cs = 10; goto _test_eof;
2074
- _test_eof2: cs = 2; goto _test_eof;
2075
- _test_eof3: cs = 3; goto _test_eof;
2076
- _test_eof4: cs = 4; goto _test_eof;
2077
- _test_eof5: cs = 5; goto _test_eof;
2078
- _test_eof6: cs = 6; goto _test_eof;
2079
- _test_eof7: cs = 7; goto _test_eof;
2080
- _test_eof8: cs = 8; goto _test_eof;
2081
- _test_eof9: cs = 9; goto _test_eof;
2082
-
2083
- _test_eof: {}
2084
- _out: {}
2085
- }
2086
-
2087
- #line 848 "parser.rl"
2088
-
2089
- if (cs >= JSON_first_final && p == pe) {
2090
- return result;
2091
- } else {
2092
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
2093
- return Qnil;
2094
- }
1313
+ GET_PARSER_CONFIG;
1314
+ return cParser_parse(config, Vsource);
2095
1315
  }
2096
1316
 
2097
- static void JSON_mark(void *ptr)
1317
+ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
2098
1318
  {
2099
- JSON_Parser *json = ptr;
2100
- rb_gc_mark_maybe(json->Vsource);
2101
- rb_gc_mark_maybe(json->create_id);
2102
- rb_gc_mark_maybe(json->object_class);
2103
- rb_gc_mark_maybe(json->array_class);
2104
- rb_gc_mark_maybe(json->decimal_class);
2105
- rb_gc_mark_maybe(json->match_string);
1319
+ Vsource = convert_encoding(StringValue(Vsource));
1320
+ StringValue(Vsource);
1321
+
1322
+ JSON_ParserConfig _config = {0};
1323
+ JSON_ParserConfig *config = &_config;
1324
+ parser_config_init(config, opts);
1325
+
1326
+ return cParser_parse(config, Vsource);
2106
1327
  }
2107
1328
 
2108
- static void JSON_free(void *ptr)
1329
+ static void JSON_ParserConfig_mark(void *ptr)
2109
1330
  {
2110
- JSON_Parser *json = ptr;
2111
- fbuffer_free(json->fbuffer);
2112
- ruby_xfree(json);
1331
+ JSON_ParserConfig *config = ptr;
1332
+ rb_gc_mark(config->on_load_proc);
1333
+ rb_gc_mark(config->decimal_class);
2113
1334
  }
2114
1335
 
2115
- static size_t JSON_memsize(const void *ptr)
1336
+ static void JSON_ParserConfig_free(void *ptr)
2116
1337
  {
2117
- const JSON_Parser *json = ptr;
2118
- return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
1338
+ JSON_ParserConfig *config = ptr;
1339
+ ruby_xfree(config);
2119
1340
  }
2120
1341
 
2121
- #ifdef NEW_TYPEDDATA_WRAPPER
2122
- static const rb_data_type_t JSON_Parser_type = {
2123
- "JSON/Parser",
2124
- {JSON_mark, JSON_free, JSON_memsize,},
2125
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
1342
+ static size_t JSON_ParserConfig_memsize(const void *ptr)
1343
+ {
1344
+ return sizeof(JSON_ParserConfig);
1345
+ }
1346
+
1347
+ static const rb_data_type_t JSON_ParserConfig_type = {
1348
+ "JSON::Ext::Parser/ParserConfig",
1349
+ {
1350
+ JSON_ParserConfig_mark,
1351
+ JSON_ParserConfig_free,
1352
+ JSON_ParserConfig_memsize,
1353
+ },
2126
1354
  0, 0,
2127
- RUBY_TYPED_FREE_IMMEDIATELY,
2128
- #endif
1355
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
2129
1356
  };
2130
- #endif
2131
1357
 
2132
1358
  static VALUE cJSON_parser_s_allocate(VALUE klass)
2133
1359
  {
2134
- JSON_Parser *json;
2135
- VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
2136
- json->fbuffer = fbuffer_alloc(0);
2137
- return obj;
2138
- }
2139
-
2140
- /*
2141
- * call-seq: source()
2142
- *
2143
- * Returns a copy of the current _source_ string, that was used to construct
2144
- * this Parser.
2145
- */
2146
- static VALUE cParser_source(VALUE self)
2147
- {
2148
- GET_PARSER;
2149
- return rb_str_dup(json->Vsource);
1360
+ JSON_ParserConfig *config;
1361
+ return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
2150
1362
  }
2151
1363
 
2152
1364
  void Init_parser(void)
@@ -2158,16 +1370,16 @@ void Init_parser(void)
2158
1370
  #undef rb_intern
2159
1371
  rb_require("json/common");
2160
1372
  mJSON = rb_define_module("JSON");
2161
- mExt = rb_define_module_under(mJSON, "Ext");
2162
- cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
2163
- eParserError = rb_path2class("JSON::ParserError");
1373
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
1374
+ VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
2164
1375
  eNestingError = rb_path2class("JSON::NestingError");
2165
- rb_gc_register_mark_object(eParserError);
2166
1376
  rb_gc_register_mark_object(eNestingError);
2167
- rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
2168
- rb_define_method(cParser, "initialize", cParser_initialize, -1);
2169
- rb_define_method(cParser, "parse", cParser_parse, 0);
2170
- rb_define_method(cParser, "source", cParser_source, 0);
1377
+ rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
1378
+ rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
1379
+ rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
1380
+
1381
+ VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
1382
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
2171
1383
 
2172
1384
  CNaN = rb_const_get(mJSON, rb_intern("NaN"));
2173
1385
  rb_gc_register_mark_object(CNaN);
@@ -2178,34 +1390,27 @@ void Init_parser(void)
2178
1390
  CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
2179
1391
  rb_gc_register_mark_object(CMinusInfinity);
2180
1392
 
2181
- i_json_creatable_p = rb_intern("json_creatable?");
2182
- i_json_create = rb_intern("json_create");
2183
- i_create_id = rb_intern("create_id");
2184
- i_create_additions = rb_intern("create_additions");
1393
+ rb_global_variable(&Encoding_UTF_8);
1394
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1395
+
1396
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1397
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1398
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1399
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1400
+ sym_freeze = ID2SYM(rb_intern("freeze"));
1401
+ sym_on_load = ID2SYM(rb_intern("on_load"));
1402
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1403
+
2185
1404
  i_chr = rb_intern("chr");
2186
- i_max_nesting = rb_intern("max_nesting");
2187
- i_allow_nan = rb_intern("allow_nan");
2188
- i_symbolize_names = rb_intern("symbolize_names");
2189
- i_object_class = rb_intern("object_class");
2190
- i_array_class = rb_intern("array_class");
2191
- i_decimal_class = rb_intern("decimal_class");
2192
- i_match = rb_intern("match");
2193
- i_match_string = rb_intern("match_string");
2194
- i_key_p = rb_intern("key?");
2195
- i_deep_const_get = rb_intern("deep_const_get");
2196
1405
  i_aset = rb_intern("[]=");
2197
1406
  i_aref = rb_intern("[]");
2198
1407
  i_leftshift = rb_intern("<<");
2199
1408
  i_new = rb_intern("new");
2200
1409
  i_try_convert = rb_intern("try_convert");
2201
- i_freeze = rb_intern("freeze");
2202
1410
  i_uminus = rb_intern("-@");
2203
- }
1411
+ i_encode = rb_intern("encode");
2204
1412
 
2205
- /*
2206
- * Local variables:
2207
- * mode: c
2208
- * c-file-style: ruby
2209
- * indent-tabs-mode: nil
2210
- * End:
2211
- */
1413
+ binary_encindex = rb_ascii8bit_encindex();
1414
+ utf8_encindex = rb_utf8_encindex();
1415
+ enc_utf8 = rb_utf8_encoding();
1416
+ }