json 2.6.2 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/BSDL +22 -0
  3. data/CHANGES.md +144 -17
  4. data/LEGAL +8 -0
  5. data/README.md +67 -224
  6. data/ext/json/ext/fbuffer/fbuffer.h +110 -92
  7. data/ext/json/ext/generator/extconf.rb +8 -2
  8. data/ext/json/ext/generator/generator.c +1020 -806
  9. data/ext/json/ext/parser/extconf.rb +7 -27
  10. data/ext/json/ext/parser/parser.c +1343 -3212
  11. data/json.gemspec +48 -52
  12. data/lib/json/add/bigdecimal.rb +39 -10
  13. data/lib/json/add/complex.rb +29 -6
  14. data/lib/json/add/core.rb +1 -1
  15. data/lib/json/add/date.rb +27 -7
  16. data/lib/json/add/date_time.rb +26 -9
  17. data/lib/json/add/exception.rb +25 -7
  18. data/lib/json/add/ostruct.rb +32 -9
  19. data/lib/json/add/range.rb +33 -8
  20. data/lib/json/add/rational.rb +28 -6
  21. data/lib/json/add/regexp.rb +26 -8
  22. data/lib/json/add/set.rb +25 -6
  23. data/lib/json/add/struct.rb +29 -7
  24. data/lib/json/add/symbol.rb +34 -7
  25. data/lib/json/add/time.rb +29 -15
  26. data/lib/json/common.rb +418 -128
  27. data/lib/json/ext/generator/state.rb +106 -0
  28. data/lib/json/ext.rb +34 -4
  29. data/lib/json/generic_object.rb +7 -3
  30. data/lib/json/truffle_ruby/generator.rb +690 -0
  31. data/lib/json/version.rb +3 -7
  32. data/lib/json.rb +25 -21
  33. metadata +15 -26
  34. data/VERSION +0 -1
  35. data/ext/json/ext/generator/depend +0 -1
  36. data/ext/json/ext/generator/generator.h +0 -174
  37. data/ext/json/ext/parser/depend +0 -1
  38. data/ext/json/ext/parser/parser.h +0 -96
  39. data/ext/json/ext/parser/parser.rl +0 -986
  40. data/ext/json/extconf.rb +0 -3
  41. data/lib/json/pure/generator.rb +0 -479
  42. data/lib/json/pure/parser.rb +0 -337
  43. data/lib/json/pure.rb +0 -15
  44. /data/{LICENSE → COPYING} +0 -0
@@ -1,3347 +1,1478 @@
1
- /* This file is automatically generated from parser.rl by using ragel */
2
- #line 1 "parser.rl"
3
- #include "../fbuffer/fbuffer.h"
4
- #include "parser.h"
5
-
6
- #if defined HAVE_RUBY_ENCODING_H
7
- # define EXC_ENCODING rb_utf8_encoding(),
8
- # ifndef HAVE_RB_ENC_RAISE
9
- static void
10
- enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
11
- {
12
- va_list args;
13
- VALUE mesg;
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
14
3
 
15
- va_start(args, fmt);
16
- mesg = rb_enc_vsprintf(enc, fmt, args);
17
- va_end(args);
4
+ /* shims */
5
+ /* This is the fallback definition from Ruby 3.4 */
18
6
 
19
- rb_exc_raise(rb_exc_new3(exc, mesg));
20
- }
21
- # define rb_enc_raise enc_raise
7
+ #ifndef RBIMPL_STDBOOL_H
8
+ #if defined(__cplusplus)
9
+ # if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
10
+ # include <cstdbool>
22
11
  # endif
23
- #else
24
- # define EXC_ENCODING /* nothing */
25
- # define rb_enc_raise rb_raise
12
+ #elif defined(HAVE_STDBOOL_H)
13
+ # include <stdbool.h>
14
+ #elif !defined(HAVE__BOOL)
15
+ typedef unsigned char _Bool;
16
+ # define bool _Bool
17
+ # define true ((_Bool)+1)
18
+ # define false ((_Bool)+0)
19
+ # define __bool_true_false_are_defined
20
+ #endif
26
21
  #endif
27
22
 
28
- /* unicode */
29
-
30
- static const signed char digit_values[256] = {
31
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
32
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
33
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
34
- -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
35
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
36
- 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
37
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
38
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
39
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
40
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
41
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
42
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
43
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
44
- -1, -1, -1, -1, -1, -1, -1
45
- };
46
-
47
- static UTF32 unescape_unicode(const unsigned char *p)
48
- {
49
- signed char b;
50
- UTF32 result = 0;
51
- b = digit_values[p[0]];
52
- if (b < 0) return UNI_REPLACEMENT_CHAR;
53
- result = (result << 4) | (unsigned char)b;
54
- b = digit_values[p[1]];
55
- if (b < 0) return UNI_REPLACEMENT_CHAR;
56
- result = (result << 4) | (unsigned char)b;
57
- b = digit_values[p[2]];
58
- if (b < 0) return UNI_REPLACEMENT_CHAR;
59
- result = (result << 4) | (unsigned char)b;
60
- b = digit_values[p[3]];
61
- if (b < 0) return UNI_REPLACEMENT_CHAR;
62
- result = (result << 4) | (unsigned char)b;
63
- return result;
64
- }
23
+ #ifndef RB_UNLIKELY
24
+ #define RB_UNLIKELY(expr) expr
25
+ #endif
65
26
 
66
- static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
67
- {
68
- int len = 1;
69
- if (ch <= 0x7F) {
70
- buf[0] = (char) ch;
71
- } else if (ch <= 0x07FF) {
72
- buf[0] = (char) ((ch >> 6) | 0xC0);
73
- buf[1] = (char) ((ch & 0x3F) | 0x80);
74
- len++;
75
- } else if (ch <= 0xFFFF) {
76
- buf[0] = (char) ((ch >> 12) | 0xE0);
77
- buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
78
- buf[2] = (char) ((ch & 0x3F) | 0x80);
79
- len += 2;
80
- } else if (ch <= 0x1fffff) {
81
- buf[0] =(char) ((ch >> 18) | 0xF0);
82
- buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
83
- buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
84
- buf[3] =(char) ((ch & 0x3F) | 0x80);
85
- len += 3;
86
- } else {
87
- buf[0] = '?';
88
- }
89
- return len;
90
- }
27
+ #ifndef RB_LIKELY
28
+ #define RB_LIKELY(expr) expr
29
+ #endif
91
30
 
92
- static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
31
+ static VALUE mJSON, eNestingError, Encoding_UTF_8;
93
32
  static VALUE CNaN, CInfinity, CMinusInfinity;
94
33
 
95
- static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
96
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
97
- i_object_class, i_array_class, i_decimal_class, i_key_p,
98
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
99
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
100
-
34
+ static ID i_json_creatable_p, i_json_create, i_create_id,
35
+ i_chr, i_deep_const_get, i_match, i_aset, i_aref,
36
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
101
37
 
102
- #line 125 "parser.rl"
38
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
39
+ sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
40
+ sym_decimal_class, sym_match_string;
103
41
 
42
+ static int binary_encindex;
43
+ static int utf8_encindex;
104
44
 
45
+ #ifdef HAVE_RB_CATEGORY_WARN
46
+ # define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message)
47
+ #else
48
+ # define json_deprecated(message) rb_warn(message)
49
+ #endif
105
50
 
106
- enum {JSON_object_start = 1};
107
- enum {JSON_object_first_final = 27};
108
- enum {JSON_object_error = 0};
51
+ static const char deprecated_create_additions_warning[] =
52
+ "JSON.load implicit support for `create_additions: true` is deprecated "
53
+ "and will be removed in 3.0, use JSON.unsafe_load or explicitly "
54
+ "pass `create_additions: true`";
109
55
 
110
- enum {JSON_object_en_main = 1};
56
+ #ifndef HAVE_RB_HASH_BULK_INSERT
57
+ // For TruffleRuby
58
+ void
59
+ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
60
+ {
61
+ long index = 0;
62
+ while (index < count) {
63
+ VALUE name = pairs[index++];
64
+ VALUE value = pairs[index++];
65
+ rb_hash_aset(hash, name, value);
66
+ }
67
+ RB_GC_GUARD(hash);
68
+ }
69
+ #endif
111
70
 
112
- static const char MAYBE_UNUSED(_JSON_object_nfa_targs)[] = {
113
- 0, 0
114
- };
71
+ #ifndef HAVE_RB_HASH_NEW_CAPA
72
+ #define rb_hash_new_capa(n) rb_hash_new()
73
+ #endif
115
74
 
116
- static const char MAYBE_UNUSED(_JSON_object_nfa_offsets)[] = {
117
- 0, 0, 0, 0, 0, 0, 0, 0,
118
- 0, 0, 0, 0, 0, 0, 0, 0,
119
- 0, 0, 0, 0, 0, 0, 0, 0,
120
- 0, 0, 0, 0, 0
121
- };
122
75
 
123
- static const char MAYBE_UNUSED(_JSON_object_nfa_push_actions)[] = {
124
- 0, 0
125
- };
76
+ /* name cache */
126
77
 
127
- static const char MAYBE_UNUSED(_JSON_object_nfa_pop_trans)[] = {
128
- 0, 0
129
- };
78
+ #include <string.h>
79
+ #include <ctype.h>
130
80
 
81
+ // Object names are likely to be repeated, and are frozen.
82
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
83
+ // and save much more expensive lookups into the global fstring table.
84
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
85
+ // to be able to fit safely on the stack.
86
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
87
+ // performance.
88
+ #define JSON_RVALUE_CACHE_CAPA 63
89
+ typedef struct rvalue_cache_struct {
90
+ int length;
91
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
92
+ } rvalue_cache;
131
93
 
132
- #line 167 "parser.rl"
94
+ static rb_encoding *enc_utf8;
133
95
 
96
+ #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
134
97
 
135
- static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
98
+ static inline VALUE build_interned_string(const char *str, const long length)
136
99
  {
137
- int cs = EVIL;
138
- VALUE last_name = Qnil;
139
- VALUE object_class = json->object_class;
140
-
141
- if (json->max_nesting && current_nesting > json->max_nesting) {
142
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
143
- }
144
-
145
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
146
-
147
-
148
- {
149
- cs = (int)JSON_object_start;
150
- }
151
-
152
- #line 182 "parser.rl"
153
-
154
-
155
- {
156
- if ( p == pe )
157
- goto _test_eof;
158
- switch ( cs )
159
- {
160
- case 1:
161
- goto st_case_1;
162
- case 0:
163
- goto st_case_0;
164
- case 2:
165
- goto st_case_2;
166
- case 3:
167
- goto st_case_3;
168
- case 4:
169
- goto st_case_4;
170
- case 5:
171
- goto st_case_5;
172
- case 6:
173
- goto st_case_6;
174
- case 7:
175
- goto st_case_7;
176
- case 8:
177
- goto st_case_8;
178
- case 9:
179
- goto st_case_9;
180
- case 10:
181
- goto st_case_10;
182
- case 11:
183
- goto st_case_11;
184
- case 12:
185
- goto st_case_12;
186
- case 13:
187
- goto st_case_13;
188
- case 14:
189
- goto st_case_14;
190
- case 15:
191
- goto st_case_15;
192
- case 16:
193
- goto st_case_16;
194
- case 17:
195
- goto st_case_17;
196
- case 18:
197
- goto st_case_18;
198
- case 27:
199
- goto st_case_27;
200
- case 19:
201
- goto st_case_19;
202
- case 20:
203
- goto st_case_20;
204
- case 21:
205
- goto st_case_21;
206
- case 22:
207
- goto st_case_22;
208
- case 23:
209
- goto st_case_23;
210
- case 24:
211
- goto st_case_24;
212
- case 25:
213
- goto st_case_25;
214
- case 26:
215
- goto st_case_26;
216
- }
217
- goto st_out;
218
- st_case_1:
219
- if ( ( (*( p))) == 123 ) {
220
- goto st2;
221
- }
222
- {
223
- goto st0;
224
- }
225
- st_case_0:
226
- st0:
227
- cs = 0;
228
- goto _out;
229
- st2:
230
- p+= 1;
231
- if ( p == pe )
232
- goto _test_eof2;
233
- st_case_2:
234
- switch( ( (*( p))) ) {
235
- case 13: {
236
- goto st2;
237
- }
238
- case 32: {
239
- goto st2;
240
- }
241
- case 34: {
242
- goto ctr2;
243
- }
244
- case 47: {
245
- goto st23;
246
- }
247
- case 125: {
248
- goto ctr4;
249
- }
250
- }
251
- if ( 9 <= ( (*( p))) && ( (*( p))) <= 10 ) {
252
- goto st2;
253
- }
254
- {
255
- goto st0;
256
- }
257
- ctr2:
258
- {
259
- #line 149 "parser.rl"
260
-
261
- char *np;
262
- json->parsing_name = 1;
263
- np = JSON_parse_string(json, p, pe, &last_name);
264
- json->parsing_name = 0;
265
- if (np == NULL) { {p = p - 1; } {p+= 1; cs = 3; goto _out;} } else {p = (( np))-1;}
266
-
267
- }
268
-
269
- goto st3;
270
- st3:
271
- p+= 1;
272
- if ( p == pe )
273
- goto _test_eof3;
274
- st_case_3:
275
- switch( ( (*( p))) ) {
276
- case 13: {
277
- goto st3;
278
- }
279
- case 32: {
280
- goto st3;
281
- }
282
- case 47: {
283
- goto st4;
284
- }
285
- case 58: {
286
- goto st8;
287
- }
288
- }
289
- if ( 9 <= ( (*( p))) && ( (*( p))) <= 10 ) {
290
- goto st3;
291
- }
292
- {
293
- goto st0;
294
- }
295
- st4:
296
- p+= 1;
297
- if ( p == pe )
298
- goto _test_eof4;
299
- st_case_4:
300
- switch( ( (*( p))) ) {
301
- case 42: {
302
- goto st5;
303
- }
304
- case 47: {
305
- goto st7;
306
- }
307
- }
308
- {
309
- goto st0;
310
- }
311
- st5:
312
- p+= 1;
313
- if ( p == pe )
314
- goto _test_eof5;
315
- st_case_5:
316
- if ( ( (*( p))) == 42 ) {
317
- goto st6;
318
- }
319
- {
320
- goto st5;
321
- }
322
- st6:
323
- p+= 1;
324
- if ( p == pe )
325
- goto _test_eof6;
326
- st_case_6:
327
- switch( ( (*( p))) ) {
328
- case 42: {
329
- goto st6;
330
- }
331
- case 47: {
332
- goto st3;
333
- }
334
- }
335
- {
336
- goto st5;
337
- }
338
- st7:
339
- p+= 1;
340
- if ( p == pe )
341
- goto _test_eof7;
342
- st_case_7:
343
- if ( ( (*( p))) == 10 ) {
344
- goto st3;
345
- }
346
- {
347
- goto st7;
348
- }
349
- st8:
350
- p+= 1;
351
- if ( p == pe )
352
- goto _test_eof8;
353
- st_case_8:
354
- switch( ( (*( p))) ) {
355
- case 13: {
356
- goto st8;
357
- }
358
- case 32: {
359
- goto st8;
360
- }
361
- case 34: {
362
- goto ctr11;
363
- }
364
- case 45: {
365
- goto ctr11;
366
- }
367
- case 47: {
368
- goto st19;
369
- }
370
- case 73: {
371
- goto ctr11;
372
- }
373
- case 78: {
374
- goto ctr11;
375
- }
376
- case 91: {
377
- goto ctr11;
378
- }
379
- case 102: {
380
- goto ctr11;
381
- }
382
- case 110: {
383
- goto ctr11;
384
- }
385
- case 116: {
386
- goto ctr11;
387
- }
388
- case 123: {
389
- goto ctr11;
390
- }
391
- }
392
- if ( ( (*( p))) > 10 ) {
393
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
394
- goto ctr11;
395
- }
396
- } else if ( ( (*( p))) >= 9 ) {
397
- goto st8;
398
- }
399
- {
400
- goto st0;
401
- }
402
- ctr11:
403
- {
404
- #line 133 "parser.rl"
405
-
406
- VALUE v = Qnil;
407
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
408
- if (np == NULL) {
409
- {p = p - 1; } {p+= 1; cs = 9; goto _out;}
410
- } else {
411
- if (NIL_P(json->object_class)) {
412
- OBJ_FREEZE(last_name);
413
- rb_hash_aset(*result, last_name, v);
414
- } else {
415
- rb_funcall(*result, i_aset, 2, last_name, v);
416
- }
417
- {p = (( np))-1;}
418
-
419
- }
420
- }
421
-
422
- goto st9;
423
- st9:
424
- p+= 1;
425
- if ( p == pe )
426
- goto _test_eof9;
427
- st_case_9:
428
- switch( ( (*( p))) ) {
429
- case 13: {
430
- goto st9;
431
- }
432
- case 32: {
433
- goto st9;
434
- }
435
- case 44: {
436
- goto st10;
437
- }
438
- case 47: {
439
- goto st15;
440
- }
441
- case 125: {
442
- goto ctr4;
443
- }
444
- }
445
- if ( 9 <= ( (*( p))) && ( (*( p))) <= 10 ) {
446
- goto st9;
447
- }
448
- {
449
- goto st0;
450
- }
451
- st10:
452
- p+= 1;
453
- if ( p == pe )
454
- goto _test_eof10;
455
- st_case_10:
456
- switch( ( (*( p))) ) {
457
- case 13: {
458
- goto st10;
459
- }
460
- case 32: {
461
- goto st10;
462
- }
463
- case 34: {
464
- goto ctr2;
465
- }
466
- case 47: {
467
- goto st11;
468
- }
469
- }
470
- if ( 9 <= ( (*( p))) && ( (*( p))) <= 10 ) {
471
- goto st10;
472
- }
473
- {
474
- goto st0;
475
- }
476
- st11:
477
- p+= 1;
478
- if ( p == pe )
479
- goto _test_eof11;
480
- st_case_11:
481
- switch( ( (*( p))) ) {
482
- case 42: {
483
- goto st12;
484
- }
485
- case 47: {
486
- goto st14;
487
- }
488
- }
489
- {
490
- goto st0;
491
- }
492
- st12:
493
- p+= 1;
494
- if ( p == pe )
495
- goto _test_eof12;
496
- st_case_12:
497
- if ( ( (*( p))) == 42 ) {
498
- goto st13;
499
- }
500
- {
501
- goto st12;
502
- }
503
- st13:
504
- p+= 1;
505
- if ( p == pe )
506
- goto _test_eof13;
507
- st_case_13:
508
- switch( ( (*( p))) ) {
509
- case 42: {
510
- goto st13;
511
- }
512
- case 47: {
513
- goto st10;
514
- }
515
- }
516
- {
517
- goto st12;
518
- }
519
- st14:
520
- p+= 1;
521
- if ( p == pe )
522
- goto _test_eof14;
523
- st_case_14:
524
- if ( ( (*( p))) == 10 ) {
525
- goto st10;
526
- }
527
- {
528
- goto st14;
529
- }
530
- st15:
531
- p+= 1;
532
- if ( p == pe )
533
- goto _test_eof15;
534
- st_case_15:
535
- switch( ( (*( p))) ) {
536
- case 42: {
537
- goto st16;
538
- }
539
- case 47: {
540
- goto st18;
541
- }
542
- }
543
- {
544
- goto st0;
545
- }
546
- st16:
547
- p+= 1;
548
- if ( p == pe )
549
- goto _test_eof16;
550
- st_case_16:
551
- if ( ( (*( p))) == 42 ) {
552
- goto st17;
553
- }
554
- {
555
- goto st16;
556
- }
557
- st17:
558
- p+= 1;
559
- if ( p == pe )
560
- goto _test_eof17;
561
- st_case_17:
562
- switch( ( (*( p))) ) {
563
- case 42: {
564
- goto st17;
565
- }
566
- case 47: {
567
- goto st9;
568
- }
569
- }
570
- {
571
- goto st16;
572
- }
573
- st18:
574
- p+= 1;
575
- if ( p == pe )
576
- goto _test_eof18;
577
- st_case_18:
578
- if ( ( (*( p))) == 10 ) {
579
- goto st9;
580
- }
581
- {
582
- goto st18;
583
- }
584
- ctr4:
585
- {
586
- #line 157 "parser.rl"
587
- {p = p - 1; } {p+= 1; cs = 27; goto _out;} }
588
-
589
- goto st27;
590
- st27:
591
- p+= 1;
592
- if ( p == pe )
593
- goto _test_eof27;
594
- st_case_27:
595
- {
596
- goto st0;
597
- }
598
- st19:
599
- p+= 1;
600
- if ( p == pe )
601
- goto _test_eof19;
602
- st_case_19:
603
- switch( ( (*( p))) ) {
604
- case 42: {
605
- goto st20;
606
- }
607
- case 47: {
608
- goto st22;
609
- }
610
- }
611
- {
612
- goto st0;
613
- }
614
- st20:
615
- p+= 1;
616
- if ( p == pe )
617
- goto _test_eof20;
618
- st_case_20:
619
- if ( ( (*( p))) == 42 ) {
620
- goto st21;
621
- }
622
- {
623
- goto st20;
624
- }
625
- st21:
626
- p+= 1;
627
- if ( p == pe )
628
- goto _test_eof21;
629
- st_case_21:
630
- switch( ( (*( p))) ) {
631
- case 42: {
632
- goto st21;
633
- }
634
- case 47: {
635
- goto st8;
636
- }
637
- }
638
- {
639
- goto st20;
640
- }
641
- st22:
642
- p+= 1;
643
- if ( p == pe )
644
- goto _test_eof22;
645
- st_case_22:
646
- if ( ( (*( p))) == 10 ) {
647
- goto st8;
648
- }
649
- {
650
- goto st22;
651
- }
652
- st23:
653
- p+= 1;
654
- if ( p == pe )
655
- goto _test_eof23;
656
- st_case_23:
657
- switch( ( (*( p))) ) {
658
- case 42: {
659
- goto st24;
660
- }
661
- case 47: {
662
- goto st26;
663
- }
664
- }
665
- {
666
- goto st0;
667
- }
668
- st24:
669
- p+= 1;
670
- if ( p == pe )
671
- goto _test_eof24;
672
- st_case_24:
673
- if ( ( (*( p))) == 42 ) {
674
- goto st25;
675
- }
676
- {
677
- goto st24;
678
- }
679
- st25:
680
- p+= 1;
681
- if ( p == pe )
682
- goto _test_eof25;
683
- st_case_25:
684
- switch( ( (*( p))) ) {
685
- case 42: {
686
- goto st25;
687
- }
688
- case 47: {
689
- goto st2;
690
- }
691
- }
692
- {
693
- goto st24;
694
- }
695
- st26:
696
- p+= 1;
697
- if ( p == pe )
698
- goto _test_eof26;
699
- st_case_26:
700
- if ( ( (*( p))) == 10 ) {
701
- goto st2;
702
- }
703
- {
704
- goto st26;
705
- }
706
- st_out:
707
- _test_eof2: cs = 2; goto _test_eof;
708
- _test_eof3: cs = 3; goto _test_eof;
709
- _test_eof4: cs = 4; goto _test_eof;
710
- _test_eof5: cs = 5; goto _test_eof;
711
- _test_eof6: cs = 6; goto _test_eof;
712
- _test_eof7: cs = 7; goto _test_eof;
713
- _test_eof8: cs = 8; goto _test_eof;
714
- _test_eof9: cs = 9; goto _test_eof;
715
- _test_eof10: cs = 10; goto _test_eof;
716
- _test_eof11: cs = 11; goto _test_eof;
717
- _test_eof12: cs = 12; goto _test_eof;
718
- _test_eof13: cs = 13; goto _test_eof;
719
- _test_eof14: cs = 14; goto _test_eof;
720
- _test_eof15: cs = 15; goto _test_eof;
721
- _test_eof16: cs = 16; goto _test_eof;
722
- _test_eof17: cs = 17; goto _test_eof;
723
- _test_eof18: cs = 18; goto _test_eof;
724
- _test_eof27: cs = 27; goto _test_eof;
725
- _test_eof19: cs = 19; goto _test_eof;
726
- _test_eof20: cs = 20; goto _test_eof;
727
- _test_eof21: cs = 21; goto _test_eof;
728
- _test_eof22: cs = 22; goto _test_eof;
729
- _test_eof23: cs = 23; goto _test_eof;
730
- _test_eof24: cs = 24; goto _test_eof;
731
- _test_eof25: cs = 25; goto _test_eof;
732
- _test_eof26: cs = 26; goto _test_eof;
733
-
734
- _test_eof: {}
735
- _out: {}
736
- }
737
-
738
- #line 183 "parser.rl"
739
-
740
-
741
- if (cs >= JSON_object_first_final) {
742
- if (json->create_additions) {
743
- VALUE klassname;
744
- if (NIL_P(json->object_class)) {
745
- klassname = rb_hash_aref(*result, json->create_id);
746
- } else {
747
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
748
- }
749
- if (!NIL_P(klassname)) {
750
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
751
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
752
- *result = rb_funcall(klass, i_json_create, 1, *result);
753
- }
754
- }
755
- }
756
- return p + 1;
757
- } else {
758
- return NULL;
759
- }
100
+ # ifdef HAVE_RB_ENC_INTERNED_STR
101
+ return rb_enc_interned_str(str, length, enc_utf8);
102
+ # else
103
+ VALUE rstring = rb_utf8_str_new(str, length);
104
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
105
+ # endif
760
106
  }
761
107
 
108
+ static inline VALUE build_symbol(const char *str, const long length)
109
+ {
110
+ return rb_str_intern(build_interned_string(str, length));
111
+ }
762
112
 
113
+ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
114
+ {
115
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
116
+ cache->length++;
117
+ cache->entries[index] = rstring;
118
+ }
763
119
 
764
- enum {JSON_value_start = 1};
765
- enum {JSON_value_first_final = 29};
766
- enum {JSON_value_error = 0};
120
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
121
+ {
122
+ long rstring_length = RSTRING_LEN(rstring);
123
+ if (length == rstring_length) {
124
+ return memcmp(str, RSTRING_PTR(rstring), length);
125
+ } else {
126
+ return (int)(length - rstring_length);
127
+ }
128
+ }
767
129
 
768
- enum {JSON_value_en_main = 1};
130
+ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
131
+ {
132
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
133
+ // Common names aren't likely to be very long. So we just don't
134
+ // cache names above an arbitrary threshold.
135
+ return Qfalse;
136
+ }
137
+
138
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
139
+ // Simple heuristic, if the first character isn't a letter,
140
+ // we're much less likely to see this string again.
141
+ // We mostly want to cache strings that are likely to be repeated.
142
+ return Qfalse;
143
+ }
144
+
145
+ int low = 0;
146
+ int high = cache->length - 1;
147
+ int mid = 0;
148
+ int last_cmp = 0;
149
+
150
+ while (low <= high) {
151
+ mid = (high + low) >> 1;
152
+ VALUE entry = cache->entries[mid];
153
+ last_cmp = rstring_cache_cmp(str, length, entry);
154
+
155
+ if (last_cmp == 0) {
156
+ return entry;
157
+ } else if (last_cmp > 0) {
158
+ low = mid + 1;
159
+ } else {
160
+ high = mid - 1;
161
+ }
162
+ }
163
+
164
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
165
+ // We assume the overwhelming majority of names don't need to be escaped.
166
+ // But if they do, we have to fallback to the slow path.
167
+ return Qfalse;
168
+ }
169
+
170
+ VALUE rstring = build_interned_string(str, length);
171
+
172
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
173
+ if (last_cmp > 0) {
174
+ mid += 1;
175
+ }
176
+
177
+ rvalue_cache_insert_at(cache, mid, rstring);
178
+ }
179
+ return rstring;
180
+ }
769
181
 
770
- static const char MAYBE_UNUSED(_JSON_value_nfa_targs)[] = {
771
- 0, 0
772
- };
182
+ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
183
+ {
184
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
185
+ // Common names aren't likely to be very long. So we just don't
186
+ // cache names above an arbitrary threshold.
187
+ return Qfalse;
188
+ }
189
+
190
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
191
+ // Simple heuristic, if the first character isn't a letter,
192
+ // we're much less likely to see this string again.
193
+ // We mostly want to cache strings that are likely to be repeated.
194
+ return Qfalse;
195
+ }
196
+
197
+ int low = 0;
198
+ int high = cache->length - 1;
199
+ int mid = 0;
200
+ int last_cmp = 0;
201
+
202
+ while (low <= high) {
203
+ mid = (high + low) >> 1;
204
+ VALUE entry = cache->entries[mid];
205
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
206
+
207
+ if (last_cmp == 0) {
208
+ return entry;
209
+ } else if (last_cmp > 0) {
210
+ low = mid + 1;
211
+ } else {
212
+ high = mid - 1;
213
+ }
214
+ }
215
+
216
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
217
+ // We assume the overwhelming majority of names don't need to be escaped.
218
+ // But if they do, we have to fallback to the slow path.
219
+ return Qfalse;
220
+ }
221
+
222
+ VALUE rsymbol = build_symbol(str, length);
223
+
224
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
225
+ if (last_cmp > 0) {
226
+ mid += 1;
227
+ }
228
+
229
+ rvalue_cache_insert_at(cache, mid, rsymbol);
230
+ }
231
+ return rsymbol;
232
+ }
773
233
 
774
- static const char MAYBE_UNUSED(_JSON_value_nfa_offsets)[] = {
775
- 0, 0, 0, 0, 0, 0, 0, 0,
776
- 0, 0, 0, 0, 0, 0, 0, 0,
777
- 0, 0, 0, 0, 0, 0, 0, 0,
778
- 0, 0, 0, 0, 0, 0, 0
779
- };
234
+ /* rvalue stack */
780
235
 
781
- static const char MAYBE_UNUSED(_JSON_value_nfa_push_actions)[] = {
782
- 0, 0
783
- };
236
+ #define RVALUE_STACK_INITIAL_CAPA 128
784
237
 
785
- static const char MAYBE_UNUSED(_JSON_value_nfa_pop_trans)[] = {
786
- 0, 0
238
+ enum rvalue_stack_type {
239
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
240
+ RVALUE_STACK_STACK_ALLOCATED = 1,
787
241
  };
788
242
 
243
+ typedef struct rvalue_stack_struct {
244
+ enum rvalue_stack_type type;
245
+ long capa;
246
+ long head;
247
+ VALUE *ptr;
248
+ } rvalue_stack;
789
249
 
790
- #line 283 "parser.rl"
250
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
791
251
 
252
+ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
253
+ {
254
+ long required = stack->capa * 2;
255
+
256
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
257
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
258
+ } else {
259
+ REALLOC_N(stack->ptr, VALUE, required);
260
+ stack->capa = required;
261
+ }
262
+ return stack;
263
+ }
792
264
 
793
- static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
265
+ static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
794
266
  {
795
- int cs = EVIL;
796
-
797
-
798
- {
799
- cs = (int)JSON_value_start;
800
- }
801
-
802
- #line 290 "parser.rl"
803
-
804
-
805
- {
806
- if ( p == pe )
807
- goto _test_eof;
808
- switch ( cs )
809
- {
810
- case 1:
811
- goto st_case_1;
812
- case 0:
813
- goto st_case_0;
814
- case 29:
815
- goto st_case_29;
816
- case 2:
817
- goto st_case_2;
818
- case 3:
819
- goto st_case_3;
820
- case 4:
821
- goto st_case_4;
822
- case 5:
823
- goto st_case_5;
824
- case 6:
825
- goto st_case_6;
826
- case 7:
827
- goto st_case_7;
828
- case 8:
829
- goto st_case_8;
830
- case 9:
831
- goto st_case_9;
832
- case 10:
833
- goto st_case_10;
834
- case 11:
835
- goto st_case_11;
836
- case 12:
837
- goto st_case_12;
838
- case 13:
839
- goto st_case_13;
840
- case 14:
841
- goto st_case_14;
842
- case 15:
843
- goto st_case_15;
844
- case 16:
845
- goto st_case_16;
846
- case 17:
847
- goto st_case_17;
848
- case 18:
849
- goto st_case_18;
850
- case 19:
851
- goto st_case_19;
852
- case 20:
853
- goto st_case_20;
854
- case 21:
855
- goto st_case_21;
856
- case 22:
857
- goto st_case_22;
858
- case 23:
859
- goto st_case_23;
860
- case 24:
861
- goto st_case_24;
862
- case 25:
863
- goto st_case_25;
864
- case 26:
865
- goto st_case_26;
866
- case 27:
867
- goto st_case_27;
868
- case 28:
869
- goto st_case_28;
870
- }
871
- goto st_out;
872
- st1:
873
- p+= 1;
874
- if ( p == pe )
875
- goto _test_eof1;
876
- st_case_1:
877
- switch( ( (*( p))) ) {
878
- case 13: {
879
- goto st1;
880
- }
881
- case 32: {
882
- goto st1;
883
- }
884
- case 34: {
885
- goto ctr2;
886
- }
887
- case 45: {
888
- goto ctr3;
889
- }
890
- case 47: {
891
- goto st6;
892
- }
893
- case 73: {
894
- goto st10;
895
- }
896
- case 78: {
897
- goto st17;
898
- }
899
- case 91: {
900
- goto ctr7;
901
- }
902
- case 102: {
903
- goto st19;
904
- }
905
- case 110: {
906
- goto st23;
907
- }
908
- case 116: {
909
- goto st26;
910
- }
911
- case 123: {
912
- goto ctr11;
913
- }
914
- }
915
- if ( ( (*( p))) > 10 ) {
916
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
917
- goto ctr3;
918
- }
919
- } else if ( ( (*( p))) >= 9 ) {
920
- goto st1;
921
- }
922
- {
923
- goto st0;
924
- }
925
- st_case_0:
926
- st0:
927
- cs = 0;
928
- goto _out;
929
- ctr2:
930
- {
931
- #line 235 "parser.rl"
932
-
933
- char *np = JSON_parse_string(json, p, pe, result);
934
- if (np == NULL) { {p = p - 1; } {p+= 1; cs = 29; goto _out;} } else {p = (( np))-1;}
935
-
936
- }
937
-
938
- goto st29;
939
- ctr3:
940
- {
941
- #line 240 "parser.rl"
942
-
943
- char *np;
944
- if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
945
- if (json->allow_nan) {
946
- *result = CMinusInfinity;
947
- {p = (( p + 10))-1;}
948
-
949
- {p = p - 1; } {p+= 1; cs = 29; goto _out;}
950
- } else {
951
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p);
952
- }
953
- }
954
- np = JSON_parse_float(json, p, pe, result);
955
- if (np != NULL) {p = (( np))-1;}
956
-
957
- np = JSON_parse_integer(json, p, pe, result);
958
- if (np != NULL) {p = (( np))-1;}
959
-
960
- {p = p - 1; } {p+= 1; cs = 29; goto _out;}
961
- }
962
-
963
- goto st29;
964
- ctr7:
965
- {
966
- #line 258 "parser.rl"
967
-
968
- char *np;
969
- np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
970
- if (np == NULL) { {p = p - 1; } {p+= 1; cs = 29; goto _out;} } else {p = (( np))-1;}
971
-
972
- }
973
-
974
- goto st29;
975
- ctr11:
976
- {
977
- #line 264 "parser.rl"
978
-
979
- char *np;
980
- np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
981
- if (np == NULL) { {p = p - 1; } {p+= 1; cs = 29; goto _out;} } else {p = (( np))-1;}
982
-
983
- }
984
-
985
- goto st29;
986
- ctr25:
987
- {
988
- #line 228 "parser.rl"
989
-
990
- if (json->allow_nan) {
991
- *result = CInfinity;
992
- } else {
993
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 8);
994
- }
995
- }
996
-
997
- goto st29;
998
- ctr27:
999
- {
1000
- #line 221 "parser.rl"
1001
-
1002
- if (json->allow_nan) {
1003
- *result = CNaN;
1004
- } else {
1005
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p - 2);
1006
- }
1007
- }
1008
-
1009
- goto st29;
1010
- ctr31:
1011
- {
1012
- #line 215 "parser.rl"
1013
-
1014
- *result = Qfalse;
1015
- }
1016
-
1017
- goto st29;
1018
- ctr34:
1019
- {
1020
- #line 212 "parser.rl"
1021
-
1022
- *result = Qnil;
1023
- }
1024
-
1025
- goto st29;
1026
- ctr37:
1027
- {
1028
- #line 218 "parser.rl"
1029
-
1030
- *result = Qtrue;
1031
- }
1032
-
1033
- goto st29;
1034
- st29:
1035
- p+= 1;
1036
- if ( p == pe )
1037
- goto _test_eof29;
1038
- st_case_29:
1039
- {
1040
- #line 270 "parser.rl"
1041
- {p = p - 1; } {p+= 1; cs = 29; goto _out;} }
1042
- switch( ( (*( p))) ) {
1043
- case 13: {
1044
- goto st29;
1045
- }
1046
- case 32: {
1047
- goto st29;
1048
- }
1049
- case 47: {
1050
- goto st2;
1051
- }
1052
- }
1053
- if ( 9 <= ( (*( p))) && ( (*( p))) <= 10 ) {
1054
- goto st29;
1055
- }
1056
- {
1057
- goto st0;
1058
- }
1059
- st2:
1060
- p+= 1;
1061
- if ( p == pe )
1062
- goto _test_eof2;
1063
- st_case_2:
1064
- switch( ( (*( p))) ) {
1065
- case 42: {
1066
- goto st3;
1067
- }
1068
- case 47: {
1069
- goto st5;
1070
- }
1071
- }
1072
- {
1073
- goto st0;
1074
- }
1075
- st3:
1076
- p+= 1;
1077
- if ( p == pe )
1078
- goto _test_eof3;
1079
- st_case_3:
1080
- if ( ( (*( p))) == 42 ) {
1081
- goto st4;
1082
- }
1083
- {
1084
- goto st3;
1085
- }
1086
- st4:
1087
- p+= 1;
1088
- if ( p == pe )
1089
- goto _test_eof4;
1090
- st_case_4:
1091
- switch( ( (*( p))) ) {
1092
- case 42: {
1093
- goto st4;
1094
- }
1095
- case 47: {
1096
- goto st29;
1097
- }
1098
- }
1099
- {
1100
- goto st3;
1101
- }
1102
- st5:
1103
- p+= 1;
1104
- if ( p == pe )
1105
- goto _test_eof5;
1106
- st_case_5:
1107
- if ( ( (*( p))) == 10 ) {
1108
- goto st29;
1109
- }
1110
- {
1111
- goto st5;
1112
- }
1113
- st6:
1114
- p+= 1;
1115
- if ( p == pe )
1116
- goto _test_eof6;
1117
- st_case_6:
1118
- switch( ( (*( p))) ) {
1119
- case 42: {
1120
- goto st7;
1121
- }
1122
- case 47: {
1123
- goto st9;
1124
- }
1125
- }
1126
- {
1127
- goto st0;
1128
- }
1129
- st7:
1130
- p+= 1;
1131
- if ( p == pe )
1132
- goto _test_eof7;
1133
- st_case_7:
1134
- if ( ( (*( p))) == 42 ) {
1135
- goto st8;
1136
- }
1137
- {
1138
- goto st7;
1139
- }
1140
- st8:
1141
- p+= 1;
1142
- if ( p == pe )
1143
- goto _test_eof8;
1144
- st_case_8:
1145
- switch( ( (*( p))) ) {
1146
- case 42: {
1147
- goto st8;
1148
- }
1149
- case 47: {
1150
- goto st1;
1151
- }
1152
- }
1153
- {
1154
- goto st7;
1155
- }
1156
- st9:
1157
- p+= 1;
1158
- if ( p == pe )
1159
- goto _test_eof9;
1160
- st_case_9:
1161
- if ( ( (*( p))) == 10 ) {
1162
- goto st1;
1163
- }
1164
- {
1165
- goto st9;
1166
- }
1167
- st10:
1168
- p+= 1;
1169
- if ( p == pe )
1170
- goto _test_eof10;
1171
- st_case_10:
1172
- if ( ( (*( p))) == 110 ) {
1173
- goto st11;
1174
- }
1175
- {
1176
- goto st0;
1177
- }
1178
- st11:
1179
- p+= 1;
1180
- if ( p == pe )
1181
- goto _test_eof11;
1182
- st_case_11:
1183
- if ( ( (*( p))) == 102 ) {
1184
- goto st12;
1185
- }
1186
- {
1187
- goto st0;
1188
- }
1189
- st12:
1190
- p+= 1;
1191
- if ( p == pe )
1192
- goto _test_eof12;
1193
- st_case_12:
1194
- if ( ( (*( p))) == 105 ) {
1195
- goto st13;
1196
- }
1197
- {
1198
- goto st0;
1199
- }
1200
- st13:
1201
- p+= 1;
1202
- if ( p == pe )
1203
- goto _test_eof13;
1204
- st_case_13:
1205
- if ( ( (*( p))) == 110 ) {
1206
- goto st14;
1207
- }
1208
- {
1209
- goto st0;
1210
- }
1211
- st14:
1212
- p+= 1;
1213
- if ( p == pe )
1214
- goto _test_eof14;
1215
- st_case_14:
1216
- if ( ( (*( p))) == 105 ) {
1217
- goto st15;
1218
- }
1219
- {
1220
- goto st0;
1221
- }
1222
- st15:
1223
- p+= 1;
1224
- if ( p == pe )
1225
- goto _test_eof15;
1226
- st_case_15:
1227
- if ( ( (*( p))) == 116 ) {
1228
- goto st16;
1229
- }
1230
- {
1231
- goto st0;
1232
- }
1233
- st16:
1234
- p+= 1;
1235
- if ( p == pe )
1236
- goto _test_eof16;
1237
- st_case_16:
1238
- if ( ( (*( p))) == 121 ) {
1239
- goto ctr25;
1240
- }
1241
- {
1242
- goto st0;
1243
- }
1244
- st17:
1245
- p+= 1;
1246
- if ( p == pe )
1247
- goto _test_eof17;
1248
- st_case_17:
1249
- if ( ( (*( p))) == 97 ) {
1250
- goto st18;
1251
- }
1252
- {
1253
- goto st0;
1254
- }
1255
- st18:
1256
- p+= 1;
1257
- if ( p == pe )
1258
- goto _test_eof18;
1259
- st_case_18:
1260
- if ( ( (*( p))) == 78 ) {
1261
- goto ctr27;
1262
- }
1263
- {
1264
- goto st0;
1265
- }
1266
- st19:
1267
- p+= 1;
1268
- if ( p == pe )
1269
- goto _test_eof19;
1270
- st_case_19:
1271
- if ( ( (*( p))) == 97 ) {
1272
- goto st20;
1273
- }
1274
- {
1275
- goto st0;
1276
- }
1277
- st20:
1278
- p+= 1;
1279
- if ( p == pe )
1280
- goto _test_eof20;
1281
- st_case_20:
1282
- if ( ( (*( p))) == 108 ) {
1283
- goto st21;
1284
- }
1285
- {
1286
- goto st0;
1287
- }
1288
- st21:
1289
- p+= 1;
1290
- if ( p == pe )
1291
- goto _test_eof21;
1292
- st_case_21:
1293
- if ( ( (*( p))) == 115 ) {
1294
- goto st22;
1295
- }
1296
- {
1297
- goto st0;
1298
- }
1299
- st22:
1300
- p+= 1;
1301
- if ( p == pe )
1302
- goto _test_eof22;
1303
- st_case_22:
1304
- if ( ( (*( p))) == 101 ) {
1305
- goto ctr31;
1306
- }
1307
- {
1308
- goto st0;
1309
- }
1310
- st23:
1311
- p+= 1;
1312
- if ( p == pe )
1313
- goto _test_eof23;
1314
- st_case_23:
1315
- if ( ( (*( p))) == 117 ) {
1316
- goto st24;
1317
- }
1318
- {
1319
- goto st0;
1320
- }
1321
- st24:
1322
- p+= 1;
1323
- if ( p == pe )
1324
- goto _test_eof24;
1325
- st_case_24:
1326
- if ( ( (*( p))) == 108 ) {
1327
- goto st25;
1328
- }
1329
- {
1330
- goto st0;
1331
- }
1332
- st25:
1333
- p+= 1;
1334
- if ( p == pe )
1335
- goto _test_eof25;
1336
- st_case_25:
1337
- if ( ( (*( p))) == 108 ) {
1338
- goto ctr34;
1339
- }
1340
- {
1341
- goto st0;
1342
- }
1343
- st26:
1344
- p+= 1;
1345
- if ( p == pe )
1346
- goto _test_eof26;
1347
- st_case_26:
1348
- if ( ( (*( p))) == 114 ) {
1349
- goto st27;
1350
- }
1351
- {
1352
- goto st0;
1353
- }
1354
- st27:
1355
- p+= 1;
1356
- if ( p == pe )
1357
- goto _test_eof27;
1358
- st_case_27:
1359
- if ( ( (*( p))) == 117 ) {
1360
- goto st28;
1361
- }
1362
- {
1363
- goto st0;
1364
- }
1365
- st28:
1366
- p+= 1;
1367
- if ( p == pe )
1368
- goto _test_eof28;
1369
- st_case_28:
1370
- if ( ( (*( p))) == 101 ) {
1371
- goto ctr37;
1372
- }
1373
- {
1374
- goto st0;
1375
- }
1376
- st_out:
1377
- _test_eof1: cs = 1; goto _test_eof;
1378
- _test_eof29: cs = 29; goto _test_eof;
1379
- _test_eof2: cs = 2; goto _test_eof;
1380
- _test_eof3: cs = 3; goto _test_eof;
1381
- _test_eof4: cs = 4; goto _test_eof;
1382
- _test_eof5: cs = 5; goto _test_eof;
1383
- _test_eof6: cs = 6; goto _test_eof;
1384
- _test_eof7: cs = 7; goto _test_eof;
1385
- _test_eof8: cs = 8; goto _test_eof;
1386
- _test_eof9: cs = 9; goto _test_eof;
1387
- _test_eof10: cs = 10; goto _test_eof;
1388
- _test_eof11: cs = 11; goto _test_eof;
1389
- _test_eof12: cs = 12; goto _test_eof;
1390
- _test_eof13: cs = 13; goto _test_eof;
1391
- _test_eof14: cs = 14; goto _test_eof;
1392
- _test_eof15: cs = 15; goto _test_eof;
1393
- _test_eof16: cs = 16; goto _test_eof;
1394
- _test_eof17: cs = 17; goto _test_eof;
1395
- _test_eof18: cs = 18; goto _test_eof;
1396
- _test_eof19: cs = 19; goto _test_eof;
1397
- _test_eof20: cs = 20; goto _test_eof;
1398
- _test_eof21: cs = 21; goto _test_eof;
1399
- _test_eof22: cs = 22; goto _test_eof;
1400
- _test_eof23: cs = 23; goto _test_eof;
1401
- _test_eof24: cs = 24; goto _test_eof;
1402
- _test_eof25: cs = 25; goto _test_eof;
1403
- _test_eof26: cs = 26; goto _test_eof;
1404
- _test_eof27: cs = 27; goto _test_eof;
1405
- _test_eof28: cs = 28; goto _test_eof;
1406
-
1407
- _test_eof: {}
1408
- _out: {}
1409
- }
1410
-
1411
- #line 291 "parser.rl"
1412
-
1413
-
1414
- if (json->freeze) {
1415
- OBJ_FREEZE(*result);
1416
- }
1417
-
1418
- if (cs >= JSON_value_first_final) {
1419
- return p;
1420
- } else {
1421
- return NULL;
1422
- }
267
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
268
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
269
+ }
270
+ stack->ptr[stack->head] = value;
271
+ stack->head++;
272
+ return value;
1423
273
  }
1424
274
 
275
+ static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
276
+ {
277
+ return stack->ptr + (stack->head - count);
278
+ }
1425
279
 
1426
- enum {JSON_integer_start = 1};
1427
- enum {JSON_integer_first_final = 3};
1428
- enum {JSON_integer_error = 0};
280
+ static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
281
+ {
282
+ stack->head -= count;
283
+ }
1429
284
 
1430
- enum {JSON_integer_en_main = 1};
285
+ static void rvalue_stack_mark(void *ptr)
286
+ {
287
+ rvalue_stack *stack = (rvalue_stack *)ptr;
288
+ long index;
289
+ for (index = 0; index < stack->head; index++) {
290
+ rb_gc_mark(stack->ptr[index]);
291
+ }
292
+ }
1431
293
 
1432
- static const char MAYBE_UNUSED(_JSON_integer_nfa_targs)[] = {
1433
- 0, 0
1434
- };
294
+ static void rvalue_stack_free(void *ptr)
295
+ {
296
+ rvalue_stack *stack = (rvalue_stack *)ptr;
297
+ if (stack) {
298
+ ruby_xfree(stack->ptr);
299
+ ruby_xfree(stack);
300
+ }
301
+ }
1435
302
 
1436
- static const char MAYBE_UNUSED(_JSON_integer_nfa_offsets)[] = {
1437
- 0, 0, 0, 0, 0, 0, 0
1438
- };
303
+ static size_t rvalue_stack_memsize(const void *ptr)
304
+ {
305
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
306
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
307
+ }
1439
308
 
1440
- static const char MAYBE_UNUSED(_JSON_integer_nfa_push_actions)[] = {
1441
- 0, 0
309
+ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
310
+ "JSON::Ext::Parser/rvalue_stack",
311
+ {
312
+ .dmark = rvalue_stack_mark,
313
+ .dfree = rvalue_stack_free,
314
+ .dsize = rvalue_stack_memsize,
315
+ },
316
+ 0, 0,
317
+ RUBY_TYPED_FREE_IMMEDIATELY,
1442
318
  };
1443
319
 
1444
- static const char MAYBE_UNUSED(_JSON_integer_nfa_pop_trans)[] = {
1445
- 0, 0
1446
- };
320
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
321
+ {
322
+ rvalue_stack *stack;
323
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
324
+ *stack_ref = stack;
325
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
326
+
327
+ stack->capa = old_stack->capa << 1;
328
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
329
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
330
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
331
+ return stack;
332
+ }
1447
333
 
334
+ static void rvalue_stack_eagerly_release(VALUE handle)
335
+ {
336
+ if (handle) {
337
+ rvalue_stack *stack;
338
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
339
+ RTYPEDDATA_DATA(handle) = NULL;
340
+ rvalue_stack_free(stack);
341
+ }
342
+ }
1448
343
 
1449
- #line 311 "parser.rl"
344
+ /* unicode */
1450
345
 
346
+ static const signed char digit_values[256] = {
347
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
348
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
349
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
350
+ -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
351
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
352
+ 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
353
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
354
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
355
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
356
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
357
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
358
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
359
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
360
+ -1, -1, -1, -1, -1, -1, -1
361
+ };
1451
362
 
1452
- static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
363
+ static uint32_t unescape_unicode(const unsigned char *p)
1453
364
  {
1454
- int cs = EVIL;
1455
-
1456
-
1457
- {
1458
- cs = (int)JSON_integer_start;
1459
- }
1460
-
1461
- #line 318 "parser.rl"
1462
-
1463
- json->memo = p;
1464
-
1465
- {
1466
- if ( p == pe )
1467
- goto _test_eof;
1468
- switch ( cs )
1469
- {
1470
- case 1:
1471
- goto st_case_1;
1472
- case 0:
1473
- goto st_case_0;
1474
- case 2:
1475
- goto st_case_2;
1476
- case 3:
1477
- goto st_case_3;
1478
- case 4:
1479
- goto st_case_4;
1480
- case 5:
1481
- goto st_case_5;
1482
- }
1483
- goto st_out;
1484
- st_case_1:
1485
- switch( ( (*( p))) ) {
1486
- case 45: {
1487
- goto st2;
1488
- }
1489
- case 48: {
1490
- goto st3;
1491
- }
1492
- }
1493
- if ( 49 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1494
- goto st5;
1495
- }
1496
- {
1497
- goto st0;
1498
- }
1499
- st_case_0:
1500
- st0:
1501
- cs = 0;
1502
- goto _out;
1503
- st2:
1504
- p+= 1;
1505
- if ( p == pe )
1506
- goto _test_eof2;
1507
- st_case_2:
1508
- if ( ( (*( p))) == 48 ) {
1509
- goto st3;
1510
- }
1511
- if ( 49 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1512
- goto st5;
1513
- }
1514
- {
1515
- goto st0;
1516
- }
1517
- st3:
1518
- p+= 1;
1519
- if ( p == pe )
1520
- goto _test_eof3;
1521
- st_case_3:
1522
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1523
- goto st0;
1524
- }
1525
- {
1526
- goto ctr4;
1527
- }
1528
- ctr4:
1529
- {
1530
- #line 308 "parser.rl"
1531
- {p = p - 1; } {p+= 1; cs = 4; goto _out;} }
1532
-
1533
- goto st4;
1534
- st4:
1535
- p+= 1;
1536
- if ( p == pe )
1537
- goto _test_eof4;
1538
- st_case_4:
1539
- {
1540
- goto st0;
1541
- }
1542
- st5:
1543
- p+= 1;
1544
- if ( p == pe )
1545
- goto _test_eof5;
1546
- st_case_5:
1547
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1548
- goto st5;
1549
- }
1550
- {
1551
- goto ctr4;
1552
- }
1553
- st_out:
1554
- _test_eof2: cs = 2; goto _test_eof;
1555
- _test_eof3: cs = 3; goto _test_eof;
1556
- _test_eof4: cs = 4; goto _test_eof;
1557
- _test_eof5: cs = 5; goto _test_eof;
1558
-
1559
- _test_eof: {}
1560
- _out: {}
1561
- }
1562
-
1563
- #line 320 "parser.rl"
1564
-
1565
-
1566
- if (cs >= JSON_integer_first_final) {
1567
- long len = p - json->memo;
1568
- fbuffer_clear(json->fbuffer);
1569
- fbuffer_append(json->fbuffer, json->memo, len);
1570
- fbuffer_append_char(json->fbuffer, '\0');
1571
- *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
1572
- return p + 1;
1573
- } else {
1574
- return NULL;
1575
- }
365
+ const uint32_t replacement_char = 0xFFFD;
366
+
367
+ signed char b;
368
+ uint32_t result = 0;
369
+ b = digit_values[p[0]];
370
+ if (b < 0) return replacement_char;
371
+ result = (result << 4) | (unsigned char)b;
372
+ b = digit_values[p[1]];
373
+ if (b < 0) return replacement_char;
374
+ result = (result << 4) | (unsigned char)b;
375
+ b = digit_values[p[2]];
376
+ if (b < 0) return replacement_char;
377
+ result = (result << 4) | (unsigned char)b;
378
+ b = digit_values[p[3]];
379
+ if (b < 0) return replacement_char;
380
+ result = (result << 4) | (unsigned char)b;
381
+ return result;
1576
382
  }
1577
383
 
384
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
385
+ {
386
+ int len = 1;
387
+ if (ch <= 0x7F) {
388
+ buf[0] = (char) ch;
389
+ } else if (ch <= 0x07FF) {
390
+ buf[0] = (char) ((ch >> 6) | 0xC0);
391
+ buf[1] = (char) ((ch & 0x3F) | 0x80);
392
+ len++;
393
+ } else if (ch <= 0xFFFF) {
394
+ buf[0] = (char) ((ch >> 12) | 0xE0);
395
+ buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
396
+ buf[2] = (char) ((ch & 0x3F) | 0x80);
397
+ len += 2;
398
+ } else if (ch <= 0x1fffff) {
399
+ buf[0] =(char) ((ch >> 18) | 0xF0);
400
+ buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
401
+ buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
402
+ buf[3] =(char) ((ch & 0x3F) | 0x80);
403
+ len += 3;
404
+ } else {
405
+ buf[0] = '?';
406
+ }
407
+ return len;
408
+ }
1578
409
 
1579
- enum {JSON_float_start = 1};
1580
- enum {JSON_float_first_final = 8};
1581
- enum {JSON_float_error = 0};
410
+ typedef struct JSON_ParserStruct {
411
+ VALUE create_id;
412
+ VALUE object_class;
413
+ VALUE array_class;
414
+ VALUE decimal_class;
415
+ ID decimal_method_id;
416
+ VALUE match_string;
417
+ int max_nesting;
418
+ bool allow_nan;
419
+ bool allow_trailing_comma;
420
+ bool parsing_name;
421
+ bool symbolize_names;
422
+ bool freeze;
423
+ bool create_additions;
424
+ bool deprecated_create_additions;
425
+ } JSON_ParserConfig;
426
+
427
+ typedef struct JSON_ParserStateStruct {
428
+ VALUE stack_handle;
429
+ const char *cursor;
430
+ const char *end;
431
+ rvalue_stack *stack;
432
+ rvalue_cache name_cache;
433
+ int in_array;
434
+ int current_nesting;
435
+ } JSON_ParserState;
436
+
437
+ #define GET_PARSER_CONFIG \
438
+ JSON_ParserConfig *config; \
439
+ TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
440
+
441
+ static const rb_data_type_t JSON_ParserConfig_type;
442
+
443
+ #ifndef HAVE_STRNLEN
444
+ static size_t strnlen(const char *s, size_t maxlen)
445
+ {
446
+ char *p;
447
+ return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
448
+ }
449
+ #endif
1582
450
 
1583
- enum {JSON_float_en_main = 1};
451
+ #define PARSE_ERROR_FRAGMENT_LEN 32
452
+ #ifdef RBIMPL_ATTR_NORETURN
453
+ RBIMPL_ATTR_NORETURN()
454
+ #endif
455
+ static void raise_parse_error(const char *format, const char *start)
456
+ {
457
+ char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
1584
458
 
1585
- static const char MAYBE_UNUSED(_JSON_float_nfa_targs)[] = {
1586
- 0, 0
1587
- };
459
+ size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
460
+ const char *ptr = start;
1588
461
 
1589
- static const char MAYBE_UNUSED(_JSON_float_nfa_offsets)[] = {
1590
- 0, 0, 0, 0, 0, 0, 0, 0,
1591
- 0, 0, 0, 0
1592
- };
462
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
463
+ MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
464
+ buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
465
+ ptr = buffer;
466
+ }
1593
467
 
1594
- static const char MAYBE_UNUSED(_JSON_float_nfa_push_actions)[] = {
1595
- 0, 0
1596
- };
468
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
469
+ }
1597
470
 
1598
- static const char MAYBE_UNUSED(_JSON_float_nfa_pop_trans)[] = {
1599
- 0, 0
471
+ static const bool whitespace[256] = {
472
+ [' '] = 1,
473
+ ['\t'] = 1,
474
+ ['\n'] = 1,
475
+ ['\r'] = 1,
476
+ ['/'] = 1,
1600
477
  };
1601
478
 
1602
-
1603
- #line 345 "parser.rl"
1604
-
1605
-
1606
- static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
479
+ static void
480
+ json_eat_comments(JSON_ParserState *state)
1607
481
  {
1608
- int cs = EVIL;
1609
-
1610
-
1611
- {
1612
- cs = (int)JSON_float_start;
1613
- }
1614
-
1615
- #line 352 "parser.rl"
1616
-
1617
- json->memo = p;
1618
-
1619
- {
1620
- if ( p == pe )
1621
- goto _test_eof;
1622
- switch ( cs )
1623
- {
1624
- case 1:
1625
- goto st_case_1;
1626
- case 0:
1627
- goto st_case_0;
1628
- case 2:
1629
- goto st_case_2;
1630
- case 3:
1631
- goto st_case_3;
1632
- case 4:
1633
- goto st_case_4;
1634
- case 8:
1635
- goto st_case_8;
1636
- case 9:
1637
- goto st_case_9;
1638
- case 5:
1639
- goto st_case_5;
1640
- case 6:
1641
- goto st_case_6;
1642
- case 10:
1643
- goto st_case_10;
1644
- case 7:
1645
- goto st_case_7;
1646
- }
1647
- goto st_out;
1648
- st_case_1:
1649
- switch( ( (*( p))) ) {
1650
- case 45: {
1651
- goto st2;
1652
- }
1653
- case 48: {
1654
- goto st3;
1655
- }
1656
- }
1657
- if ( 49 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1658
- goto st7;
1659
- }
1660
- {
1661
- goto st0;
1662
- }
1663
- st_case_0:
1664
- st0:
1665
- cs = 0;
1666
- goto _out;
1667
- st2:
1668
- p+= 1;
1669
- if ( p == pe )
1670
- goto _test_eof2;
1671
- st_case_2:
1672
- if ( ( (*( p))) == 48 ) {
1673
- goto st3;
1674
- }
1675
- if ( 49 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1676
- goto st7;
1677
- }
1678
- {
1679
- goto st0;
1680
- }
1681
- st3:
1682
- p+= 1;
1683
- if ( p == pe )
1684
- goto _test_eof3;
1685
- st_case_3:
1686
- switch( ( (*( p))) ) {
1687
- case 46: {
1688
- goto st4;
1689
- }
1690
- case 69: {
1691
- goto st5;
1692
- }
1693
- case 101: {
1694
- goto st5;
1695
- }
1696
- }
1697
- {
1698
- goto st0;
1699
- }
1700
- st4:
1701
- p+= 1;
1702
- if ( p == pe )
1703
- goto _test_eof4;
1704
- st_case_4:
1705
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1706
- goto st8;
1707
- }
1708
- {
1709
- goto st0;
1710
- }
1711
- st8:
1712
- p+= 1;
1713
- if ( p == pe )
1714
- goto _test_eof8;
1715
- st_case_8:
1716
- switch( ( (*( p))) ) {
1717
- case 69: {
1718
- goto st5;
1719
- }
1720
- case 101: {
1721
- goto st5;
1722
- }
1723
- }
1724
- if ( ( (*( p))) > 46 ) {
1725
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1726
- goto st8;
1727
- }
1728
- } else if ( ( (*( p))) >= 45 ) {
1729
- goto st0;
1730
- }
1731
- {
1732
- goto ctr9;
1733
- }
1734
- ctr9:
1735
- {
1736
- #line 339 "parser.rl"
1737
- {p = p - 1; } {p+= 1; cs = 9; goto _out;} }
1738
-
1739
- goto st9;
1740
- st9:
1741
- p+= 1;
1742
- if ( p == pe )
1743
- goto _test_eof9;
1744
- st_case_9:
1745
- {
1746
- goto st0;
1747
- }
1748
- st5:
1749
- p+= 1;
1750
- if ( p == pe )
1751
- goto _test_eof5;
1752
- st_case_5:
1753
- switch( ( (*( p))) ) {
1754
- case 43: {
1755
- goto st6;
1756
- }
1757
- case 45: {
1758
- goto st6;
1759
- }
1760
- }
1761
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1762
- goto st10;
1763
- }
1764
- {
1765
- goto st0;
1766
- }
1767
- st6:
1768
- p+= 1;
1769
- if ( p == pe )
1770
- goto _test_eof6;
1771
- st_case_6:
1772
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1773
- goto st10;
1774
- }
1775
- {
1776
- goto st0;
1777
- }
1778
- st10:
1779
- p+= 1;
1780
- if ( p == pe )
1781
- goto _test_eof10;
1782
- st_case_10:
1783
- switch( ( (*( p))) ) {
1784
- case 69: {
1785
- goto st0;
1786
- }
1787
- case 101: {
1788
- goto st0;
1789
- }
1790
- }
1791
- if ( ( (*( p))) > 46 ) {
1792
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1793
- goto st10;
1794
- }
1795
- } else if ( ( (*( p))) >= 45 ) {
1796
- goto st0;
1797
- }
1798
- {
1799
- goto ctr9;
1800
- }
1801
- st7:
1802
- p+= 1;
1803
- if ( p == pe )
1804
- goto _test_eof7;
1805
- st_case_7:
1806
- switch( ( (*( p))) ) {
1807
- case 46: {
1808
- goto st4;
1809
- }
1810
- case 69: {
1811
- goto st5;
1812
- }
1813
- case 101: {
1814
- goto st5;
1815
- }
1816
- }
1817
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
1818
- goto st7;
1819
- }
1820
- {
1821
- goto st0;
1822
- }
1823
- st_out:
1824
- _test_eof2: cs = 2; goto _test_eof;
1825
- _test_eof3: cs = 3; goto _test_eof;
1826
- _test_eof4: cs = 4; goto _test_eof;
1827
- _test_eof8: cs = 8; goto _test_eof;
1828
- _test_eof9: cs = 9; goto _test_eof;
1829
- _test_eof5: cs = 5; goto _test_eof;
1830
- _test_eof6: cs = 6; goto _test_eof;
1831
- _test_eof10: cs = 10; goto _test_eof;
1832
- _test_eof7: cs = 7; goto _test_eof;
1833
-
1834
- _test_eof: {}
1835
- _out: {}
1836
- }
1837
-
1838
- #line 354 "parser.rl"
1839
-
1840
-
1841
- if (cs >= JSON_float_first_final) {
1842
- VALUE mod = Qnil;
1843
- ID method_id = 0;
1844
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
1845
- mod = json->decimal_class;
1846
- method_id = i_try_convert;
1847
- } else if (rb_respond_to(json->decimal_class, i_new)) {
1848
- mod = json->decimal_class;
1849
- method_id = i_new;
1850
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
1851
- VALUE name = rb_class_name(json->decimal_class);
1852
- const char *name_cstr = RSTRING_PTR(name);
1853
- const char *last_colon = strrchr(name_cstr, ':');
1854
- if (last_colon) {
1855
- const char *mod_path_end = last_colon - 1;
1856
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1857
- mod = rb_path_to_class(mod_path);
1858
-
1859
- const char *method_name_beg = last_colon + 1;
1860
- long before_len = method_name_beg - name_cstr;
1861
- long len = RSTRING_LEN(name) - before_len;
1862
- VALUE method_name = rb_str_substr(name, before_len, len);
1863
- method_id = SYM2ID(rb_str_intern(method_name));
1864
- } else {
1865
- mod = rb_mKernel;
1866
- method_id = SYM2ID(rb_str_intern(name));
1867
- }
1868
- }
1869
-
1870
- long len = p - json->memo;
1871
- fbuffer_clear(json->fbuffer);
1872
- fbuffer_append(json->fbuffer, json->memo, len);
1873
- fbuffer_append_char(json->fbuffer, '\0');
1874
-
1875
- if (method_id) {
1876
- VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
1877
- *result = rb_funcallv(mod, method_id, 1, &text);
1878
- } else {
1879
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
1880
- }
1881
-
1882
- return p + 1;
1883
- } else {
1884
- return NULL;
1885
- }
482
+ if (state->cursor + 1 < state->end) {
483
+ switch(state->cursor[1]) {
484
+ case '/': {
485
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
486
+ if (!state->cursor) {
487
+ state->cursor = state->end;
488
+ } else {
489
+ state->cursor++;
490
+ }
491
+ break;
492
+ }
493
+ case '*': {
494
+ state->cursor += 2;
495
+ while (true) {
496
+ state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
497
+ if (!state->cursor) {
498
+ state->cursor = state->end;
499
+ raise_parse_error("unexpected end of input, expected closing '*/'", state->cursor);
500
+ } else {
501
+ state->cursor++;
502
+ if (state->cursor < state->end && *state->cursor == '/') {
503
+ state->cursor++;
504
+ break;
505
+ }
506
+ }
507
+ }
508
+ break;
509
+ }
510
+ default:
511
+ raise_parse_error("unexpected token at '%s'", state->cursor);
512
+ break;
513
+ }
514
+ } else {
515
+ raise_parse_error("unexpected token at '%s'", state->cursor);
516
+ }
1886
517
  }
1887
518
 
519
+ static inline void
520
+ json_eat_whitespace(JSON_ParserState *state)
521
+ {
522
+ while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
523
+ if (RB_LIKELY(*state->cursor != '/')) {
524
+ state->cursor++;
525
+ } else {
526
+ json_eat_comments(state);
527
+ }
528
+ }
529
+ }
1888
530
 
531
+ static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
532
+ {
533
+ if (symbolize) {
534
+ intern = true;
535
+ }
536
+ VALUE result;
537
+ # ifdef HAVE_RB_ENC_INTERNED_STR
538
+ if (intern) {
539
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
540
+ } else {
541
+ result = rb_utf8_str_new(start, (long)(end - start));
542
+ }
543
+ # else
544
+ result = rb_utf8_str_new(start, (long)(end - start));
545
+ if (intern) {
546
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
547
+ }
548
+ # endif
1889
549
 
1890
- enum {JSON_array_start = 1};
1891
- enum {JSON_array_first_final = 17};
1892
- enum {JSON_array_error = 0};
1893
-
1894
- enum {JSON_array_en_main = 1};
1895
-
1896
- static const char MAYBE_UNUSED(_JSON_array_nfa_targs)[] = {
1897
- 0, 0
1898
- };
550
+ if (symbolize) {
551
+ result = rb_str_intern(result);
552
+ }
1899
553
 
1900
- static const char MAYBE_UNUSED(_JSON_array_nfa_offsets)[] = {
1901
- 0, 0, 0, 0, 0, 0, 0, 0,
1902
- 0, 0, 0, 0, 0, 0, 0, 0,
1903
- 0, 0, 0
1904
- };
554
+ return result;
555
+ }
1905
556
 
1906
- static const char MAYBE_UNUSED(_JSON_array_nfa_push_actions)[] = {
1907
- 0, 0
1908
- };
557
+ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
558
+ {
559
+ size_t bufferSize = stringEnd - string;
560
+
561
+ if (is_name && state->in_array) {
562
+ VALUE cached_key;
563
+ if (RB_UNLIKELY(symbolize)) {
564
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
565
+ } else {
566
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
567
+ }
568
+
569
+ if (RB_LIKELY(cached_key)) {
570
+ return cached_key;
571
+ }
572
+ }
573
+
574
+ return build_string(string, stringEnd, intern, symbolize);
575
+ }
1909
576
 
1910
- static const char MAYBE_UNUSED(_JSON_array_nfa_pop_trans)[] = {
1911
- 0, 0
1912
- };
577
+ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
578
+ {
579
+ size_t bufferSize = stringEnd - string;
580
+ const char *p = string, *pe = string, *unescape, *bufferStart;
581
+ char *buffer;
582
+ int unescape_len;
583
+ char buf[4];
584
+
585
+ if (is_name && state->in_array) {
586
+ VALUE cached_key;
587
+ if (RB_UNLIKELY(symbolize)) {
588
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
589
+ } else {
590
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
591
+ }
592
+
593
+ if (RB_LIKELY(cached_key)) {
594
+ return cached_key;
595
+ }
596
+ }
597
+
598
+ VALUE result = rb_str_buf_new(bufferSize);
599
+ rb_enc_associate_index(result, utf8_encindex);
600
+ buffer = RSTRING_PTR(result);
601
+ bufferStart = buffer;
602
+
603
+ while ((pe = memchr(pe, '\\', stringEnd - pe))) {
604
+ unescape = (char *) "?";
605
+ unescape_len = 1;
606
+ if (pe > p) {
607
+ MEMCPY(buffer, p, char, pe - p);
608
+ buffer += pe - p;
609
+ }
610
+ switch (*++pe) {
611
+ case 'n':
612
+ unescape = (char *) "\n";
613
+ break;
614
+ case 'r':
615
+ unescape = (char *) "\r";
616
+ break;
617
+ case 't':
618
+ unescape = (char *) "\t";
619
+ break;
620
+ case '"':
621
+ unescape = (char *) "\"";
622
+ break;
623
+ case '\\':
624
+ unescape = (char *) "\\";
625
+ break;
626
+ case 'b':
627
+ unescape = (char *) "\b";
628
+ break;
629
+ case 'f':
630
+ unescape = (char *) "\f";
631
+ break;
632
+ case 'u':
633
+ if (pe > stringEnd - 5) {
634
+ raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
635
+ } else {
636
+ uint32_t ch = unescape_unicode((unsigned char *) ++pe);
637
+ pe += 3;
638
+ /* To handle values above U+FFFF, we take a sequence of
639
+ * \uXXXX escapes in the U+D800..U+DBFF then
640
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
641
+ * to make a 20-bit number, then add 0x10000 to get the
642
+ * final codepoint.
643
+ *
644
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
645
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
646
+ * Area".
647
+ */
648
+ if ((ch & 0xFC00) == 0xD800) {
649
+ pe++;
650
+ if (pe > stringEnd - 6) {
651
+ raise_parse_error("incomplete surrogate pair at '%s'", p);
652
+ }
653
+ if (pe[0] == '\\' && pe[1] == 'u') {
654
+ uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
655
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
656
+ | (sur & 0x3FF));
657
+ pe += 5;
658
+ } else {
659
+ unescape = (char *) "?";
660
+ break;
661
+ }
662
+ }
663
+ unescape_len = convert_UTF32_to_UTF8(buf, ch);
664
+ unescape = buf;
665
+ }
666
+ break;
667
+ default:
668
+ p = pe;
669
+ continue;
670
+ }
671
+ MEMCPY(buffer, unescape, char, unescape_len);
672
+ buffer += unescape_len;
673
+ p = ++pe;
674
+ }
675
+
676
+ if (stringEnd > p) {
677
+ MEMCPY(buffer, p, char, stringEnd - p);
678
+ buffer += stringEnd - p;
679
+ }
680
+ rb_str_set_len(result, buffer - bufferStart);
681
+
682
+ if (symbolize) {
683
+ result = rb_str_intern(result);
684
+ } else if (intern) {
685
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
686
+ }
687
+
688
+ return result;
689
+ }
1913
690
 
691
+ #define MAX_FAST_INTEGER_SIZE 18
692
+ static inline VALUE fast_decode_integer(const char *p, const char *pe)
693
+ {
694
+ bool negative = false;
695
+ if (*p == '-') {
696
+ negative = true;
697
+ p++;
698
+ }
699
+
700
+ long long memo = 0;
701
+ while (p < pe) {
702
+ memo *= 10;
703
+ memo += *p - '0';
704
+ p++;
705
+ }
706
+
707
+ if (negative) {
708
+ memo = -memo;
709
+ }
710
+ return LL2NUM(memo);
711
+ }
1914
712
 
1915
- #line 432 "parser.rl"
713
+ static VALUE json_decode_large_integer(const char *start, long len)
714
+ {
715
+ VALUE buffer_v;
716
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
717
+ MEMCPY(buffer, start, char, len);
718
+ buffer[len] = '\0';
719
+ VALUE number = rb_cstr2inum(buffer, 10);
720
+ RB_ALLOCV_END(buffer_v);
721
+ return number;
722
+ }
1916
723
 
724
+ static inline VALUE
725
+ json_decode_integer(const char *start, const char *end)
726
+ {
727
+ long len = end - start;
728
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
729
+ return fast_decode_integer(start, end);
730
+ }
731
+ return json_decode_large_integer(start, len);
732
+ }
1917
733
 
1918
- static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
734
+ static VALUE json_decode_large_float(const char *start, long len)
1919
735
  {
1920
- int cs = EVIL;
1921
- VALUE array_class = json->array_class;
1922
-
1923
- if (json->max_nesting && current_nesting > json->max_nesting) {
1924
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
1925
- }
1926
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
1927
-
1928
-
1929
- {
1930
- cs = (int)JSON_array_start;
1931
- }
1932
-
1933
- #line 445 "parser.rl"
1934
-
1935
-
1936
- {
1937
- if ( p == pe )
1938
- goto _test_eof;
1939
- switch ( cs )
1940
- {
1941
- case 1:
1942
- goto st_case_1;
1943
- case 0:
1944
- goto st_case_0;
1945
- case 2:
1946
- goto st_case_2;
1947
- case 3:
1948
- goto st_case_3;
1949
- case 4:
1950
- goto st_case_4;
1951
- case 5:
1952
- goto st_case_5;
1953
- case 6:
1954
- goto st_case_6;
1955
- case 7:
1956
- goto st_case_7;
1957
- case 8:
1958
- goto st_case_8;
1959
- case 9:
1960
- goto st_case_9;
1961
- case 10:
1962
- goto st_case_10;
1963
- case 11:
1964
- goto st_case_11;
1965
- case 12:
1966
- goto st_case_12;
1967
- case 17:
1968
- goto st_case_17;
1969
- case 13:
1970
- goto st_case_13;
1971
- case 14:
1972
- goto st_case_14;
1973
- case 15:
1974
- goto st_case_15;
1975
- case 16:
1976
- goto st_case_16;
1977
- }
1978
- goto st_out;
1979
- st_case_1:
1980
- if ( ( (*( p))) == 91 ) {
1981
- goto st2;
1982
- }
1983
- {
1984
- goto st0;
1985
- }
1986
- st_case_0:
1987
- st0:
1988
- cs = 0;
1989
- goto _out;
1990
- st2:
1991
- p+= 1;
1992
- if ( p == pe )
1993
- goto _test_eof2;
1994
- st_case_2:
1995
- switch( ( (*( p))) ) {
1996
- case 13: {
1997
- goto st2;
1998
- }
1999
- case 32: {
2000
- goto st2;
2001
- }
2002
- case 34: {
2003
- goto ctr2;
2004
- }
2005
- case 45: {
2006
- goto ctr2;
2007
- }
2008
- case 47: {
2009
- goto st13;
2010
- }
2011
- case 73: {
2012
- goto ctr2;
2013
- }
2014
- case 78: {
2015
- goto ctr2;
2016
- }
2017
- case 91: {
2018
- goto ctr2;
2019
- }
2020
- case 93: {
2021
- goto ctr4;
2022
- }
2023
- case 102: {
2024
- goto ctr2;
2025
- }
2026
- case 110: {
2027
- goto ctr2;
2028
- }
2029
- case 116: {
2030
- goto ctr2;
2031
- }
2032
- case 123: {
2033
- goto ctr2;
2034
- }
2035
- }
2036
- if ( ( (*( p))) > 10 ) {
2037
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
2038
- goto ctr2;
2039
- }
2040
- } else if ( ( (*( p))) >= 9 ) {
2041
- goto st2;
2042
- }
2043
- {
2044
- goto st0;
2045
- }
2046
- ctr2:
2047
- {
2048
- #line 409 "parser.rl"
2049
-
2050
- VALUE v = Qnil;
2051
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
2052
- if (np == NULL) {
2053
- {p = p - 1; } {p+= 1; cs = 3; goto _out;}
2054
- } else {
2055
- if (NIL_P(json->array_class)) {
2056
- rb_ary_push(*result, v);
2057
- } else {
2058
- rb_funcall(*result, i_leftshift, 1, v);
2059
- }
2060
- {p = (( np))-1;}
2061
-
2062
- }
2063
- }
2064
-
2065
- goto st3;
2066
- st3:
2067
- p+= 1;
2068
- if ( p == pe )
2069
- goto _test_eof3;
2070
- st_case_3:
2071
- switch( ( (*( p))) ) {
2072
- case 13: {
2073
- goto st3;
2074
- }
2075
- case 32: {
2076
- goto st3;
2077
- }
2078
- case 44: {
2079
- goto st4;
2080
- }
2081
- case 47: {
2082
- goto st9;
2083
- }
2084
- case 93: {
2085
- goto ctr4;
2086
- }
2087
- }
2088
- if ( 9 <= ( (*( p))) && ( (*( p))) <= 10 ) {
2089
- goto st3;
2090
- }
2091
- {
2092
- goto st0;
2093
- }
2094
- st4:
2095
- p+= 1;
2096
- if ( p == pe )
2097
- goto _test_eof4;
2098
- st_case_4:
2099
- switch( ( (*( p))) ) {
2100
- case 13: {
2101
- goto st4;
2102
- }
2103
- case 32: {
2104
- goto st4;
2105
- }
2106
- case 34: {
2107
- goto ctr2;
2108
- }
2109
- case 45: {
2110
- goto ctr2;
2111
- }
2112
- case 47: {
2113
- goto st5;
2114
- }
2115
- case 73: {
2116
- goto ctr2;
2117
- }
2118
- case 78: {
2119
- goto ctr2;
2120
- }
2121
- case 91: {
2122
- goto ctr2;
2123
- }
2124
- case 102: {
2125
- goto ctr2;
2126
- }
2127
- case 110: {
2128
- goto ctr2;
2129
- }
2130
- case 116: {
2131
- goto ctr2;
2132
- }
2133
- case 123: {
2134
- goto ctr2;
2135
- }
2136
- }
2137
- if ( ( (*( p))) > 10 ) {
2138
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
2139
- goto ctr2;
2140
- }
2141
- } else if ( ( (*( p))) >= 9 ) {
2142
- goto st4;
2143
- }
2144
- {
2145
- goto st0;
2146
- }
2147
- st5:
2148
- p+= 1;
2149
- if ( p == pe )
2150
- goto _test_eof5;
2151
- st_case_5:
2152
- switch( ( (*( p))) ) {
2153
- case 42: {
2154
- goto st6;
2155
- }
2156
- case 47: {
2157
- goto st8;
2158
- }
2159
- }
2160
- {
2161
- goto st0;
2162
- }
2163
- st6:
2164
- p+= 1;
2165
- if ( p == pe )
2166
- goto _test_eof6;
2167
- st_case_6:
2168
- if ( ( (*( p))) == 42 ) {
2169
- goto st7;
2170
- }
2171
- {
2172
- goto st6;
2173
- }
2174
- st7:
2175
- p+= 1;
2176
- if ( p == pe )
2177
- goto _test_eof7;
2178
- st_case_7:
2179
- switch( ( (*( p))) ) {
2180
- case 42: {
2181
- goto st7;
2182
- }
2183
- case 47: {
2184
- goto st4;
2185
- }
2186
- }
2187
- {
2188
- goto st6;
2189
- }
2190
- st8:
2191
- p+= 1;
2192
- if ( p == pe )
2193
- goto _test_eof8;
2194
- st_case_8:
2195
- if ( ( (*( p))) == 10 ) {
2196
- goto st4;
2197
- }
2198
- {
2199
- goto st8;
2200
- }
2201
- st9:
2202
- p+= 1;
2203
- if ( p == pe )
2204
- goto _test_eof9;
2205
- st_case_9:
2206
- switch( ( (*( p))) ) {
2207
- case 42: {
2208
- goto st10;
2209
- }
2210
- case 47: {
2211
- goto st12;
2212
- }
2213
- }
2214
- {
2215
- goto st0;
2216
- }
2217
- st10:
2218
- p+= 1;
2219
- if ( p == pe )
2220
- goto _test_eof10;
2221
- st_case_10:
2222
- if ( ( (*( p))) == 42 ) {
2223
- goto st11;
2224
- }
2225
- {
2226
- goto st10;
2227
- }
2228
- st11:
2229
- p+= 1;
2230
- if ( p == pe )
2231
- goto _test_eof11;
2232
- st_case_11:
2233
- switch( ( (*( p))) ) {
2234
- case 42: {
2235
- goto st11;
2236
- }
2237
- case 47: {
2238
- goto st3;
2239
- }
2240
- }
2241
- {
2242
- goto st10;
2243
- }
2244
- st12:
2245
- p+= 1;
2246
- if ( p == pe )
2247
- goto _test_eof12;
2248
- st_case_12:
2249
- if ( ( (*( p))) == 10 ) {
2250
- goto st3;
2251
- }
2252
- {
2253
- goto st12;
2254
- }
2255
- ctr4:
2256
- {
2257
- #line 424 "parser.rl"
2258
- {p = p - 1; } {p+= 1; cs = 17; goto _out;} }
2259
-
2260
- goto st17;
2261
- st17:
2262
- p+= 1;
2263
- if ( p == pe )
2264
- goto _test_eof17;
2265
- st_case_17:
2266
- {
2267
- goto st0;
2268
- }
2269
- st13:
2270
- p+= 1;
2271
- if ( p == pe )
2272
- goto _test_eof13;
2273
- st_case_13:
2274
- switch( ( (*( p))) ) {
2275
- case 42: {
2276
- goto st14;
2277
- }
2278
- case 47: {
2279
- goto st16;
2280
- }
2281
- }
2282
- {
2283
- goto st0;
2284
- }
2285
- st14:
2286
- p+= 1;
2287
- if ( p == pe )
2288
- goto _test_eof14;
2289
- st_case_14:
2290
- if ( ( (*( p))) == 42 ) {
2291
- goto st15;
2292
- }
2293
- {
2294
- goto st14;
2295
- }
2296
- st15:
2297
- p+= 1;
2298
- if ( p == pe )
2299
- goto _test_eof15;
2300
- st_case_15:
2301
- switch( ( (*( p))) ) {
2302
- case 42: {
2303
- goto st15;
2304
- }
2305
- case 47: {
2306
- goto st2;
2307
- }
2308
- }
2309
- {
2310
- goto st14;
2311
- }
2312
- st16:
2313
- p+= 1;
2314
- if ( p == pe )
2315
- goto _test_eof16;
2316
- st_case_16:
2317
- if ( ( (*( p))) == 10 ) {
2318
- goto st2;
2319
- }
2320
- {
2321
- goto st16;
2322
- }
2323
- st_out:
2324
- _test_eof2: cs = 2; goto _test_eof;
2325
- _test_eof3: cs = 3; goto _test_eof;
2326
- _test_eof4: cs = 4; goto _test_eof;
2327
- _test_eof5: cs = 5; goto _test_eof;
2328
- _test_eof6: cs = 6; goto _test_eof;
2329
- _test_eof7: cs = 7; goto _test_eof;
2330
- _test_eof8: cs = 8; goto _test_eof;
2331
- _test_eof9: cs = 9; goto _test_eof;
2332
- _test_eof10: cs = 10; goto _test_eof;
2333
- _test_eof11: cs = 11; goto _test_eof;
2334
- _test_eof12: cs = 12; goto _test_eof;
2335
- _test_eof17: cs = 17; goto _test_eof;
2336
- _test_eof13: cs = 13; goto _test_eof;
2337
- _test_eof14: cs = 14; goto _test_eof;
2338
- _test_eof15: cs = 15; goto _test_eof;
2339
- _test_eof16: cs = 16; goto _test_eof;
2340
-
2341
- _test_eof: {}
2342
- _out: {}
2343
- }
2344
-
2345
- #line 446 "parser.rl"
2346
-
2347
-
2348
- if(cs >= JSON_array_first_final) {
2349
- return p + 1;
2350
- } else {
2351
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p);
2352
- return NULL;
2353
- }
736
+ VALUE buffer_v;
737
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
738
+ MEMCPY(buffer, start, char, len);
739
+ buffer[len] = '\0';
740
+ VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
741
+ RB_ALLOCV_END(buffer_v);
742
+ return number;
2354
743
  }
2355
744
 
2356
- static const size_t MAX_STACK_BUFFER_SIZE = 128;
2357
- static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
745
+ static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end)
2358
746
  {
2359
- VALUE result = Qnil;
2360
- size_t bufferSize = stringEnd - string;
2361
- char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
2362
- int unescape_len;
2363
- char buf[4];
747
+ long len = end - start;
748
+
749
+ if (RB_UNLIKELY(config->decimal_class)) {
750
+ VALUE text = rb_str_new(start, len);
751
+ return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
752
+ } else if (RB_LIKELY(len < 64)) {
753
+ char buffer[64];
754
+ MEMCPY(buffer, start, char, len);
755
+ buffer[len] = '\0';
756
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
757
+ } else {
758
+ return json_decode_large_float(start, len);
759
+ }
760
+ }
2364
761
 
2365
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
2366
- # ifdef HAVE_RB_ENC_INTERNED_STR
2367
- bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
2368
- # else
2369
- bufferStart = buffer = ALLOC_N(char, bufferSize);
2370
- # endif
2371
- } else {
2372
- # ifdef HAVE_RB_ENC_INTERNED_STR
2373
- bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1);
2374
- # else
2375
- bufferStart = buffer = ALLOCA_N(char, bufferSize);
2376
- # endif
2377
- }
2378
-
2379
- while (pe < stringEnd) {
2380
- if (*pe == '\\') {
2381
- unescape = (char *) "?";
2382
- unescape_len = 1;
2383
- if (pe > p) {
2384
- MEMCPY(buffer, p, char, pe - p);
2385
- buffer += pe - p;
2386
- }
2387
- switch (*++pe) {
2388
- case 'n':
2389
- unescape = (char *) "\n";
2390
- break;
2391
- case 'r':
2392
- unescape = (char *) "\r";
2393
- break;
2394
- case 't':
2395
- unescape = (char *) "\t";
2396
- break;
2397
- case '"':
2398
- unescape = (char *) "\"";
2399
- break;
2400
- case '\\':
2401
- unescape = (char *) "\\";
2402
- break;
2403
- case 'b':
2404
- unescape = (char *) "\b";
2405
- break;
2406
- case 'f':
2407
- unescape = (char *) "\f";
2408
- break;
2409
- case 'u':
2410
- if (pe > stringEnd - 4) {
2411
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
2412
- free(bufferStart);
2413
- }
2414
- rb_enc_raise(
2415
- EXC_ENCODING eParserError,
2416
- "%u: incomplete unicode character escape sequence at '%s'", __LINE__, p
2417
- );
2418
- } else {
2419
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
2420
- pe += 3;
2421
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
2422
- pe++;
2423
- if (pe > stringEnd - 6) {
2424
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
2425
- free(bufferStart);
2426
- }
2427
- rb_enc_raise(
2428
- EXC_ENCODING eParserError,
2429
- "%u: incomplete surrogate pair at '%s'", __LINE__, p
2430
- );
2431
- }
2432
- if (pe[0] == '\\' && pe[1] == 'u') {
2433
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
2434
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
2435
- | (sur & 0x3FF));
2436
- pe += 5;
2437
- } else {
2438
- unescape = (char *) "?";
2439
- break;
2440
- }
2441
- }
2442
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
2443
- unescape = buf;
2444
- }
2445
- break;
2446
- default:
2447
- p = pe;
2448
- continue;
2449
- }
2450
- MEMCPY(buffer, unescape, char, unescape_len);
2451
- buffer += unescape_len;
2452
- p = ++pe;
2453
- } else {
2454
- pe++;
2455
- }
2456
- }
2457
-
2458
- if (pe > p) {
2459
- MEMCPY(buffer, p, char, pe - p);
2460
- buffer += pe - p;
2461
- }
2462
-
2463
- # ifdef HAVE_RB_ENC_INTERNED_STR
2464
- if (intern) {
2465
- result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
2466
- } else {
2467
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
2468
- }
2469
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
2470
- free(bufferStart);
2471
- }
2472
- # else
2473
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
2474
-
2475
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
2476
- free(bufferStart);
2477
- }
2478
-
2479
- if (intern) {
2480
- # if STR_UMINUS_DEDUPE_FROZEN
2481
- // Starting from MRI 2.8 it is preferable to freeze the string
2482
- // before deduplication so that it can be interned directly
2483
- // otherwise it would be duplicated first which is wasteful.
2484
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
2485
- # elif STR_UMINUS_DEDUPE
2486
- // MRI 2.5 and older do not deduplicate strings that are already
2487
- // frozen.
2488
- result = rb_funcall(result, i_uminus, 0);
2489
- # else
2490
- result = rb_str_freeze(result);
2491
- # endif
2492
- }
2493
- # endif
2494
-
2495
- if (symbolize) {
2496
- result = rb_str_intern(result);
2497
- }
2498
-
2499
- return result;
762
+ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
763
+ {
764
+ VALUE array;
765
+ if (RB_UNLIKELY(config->array_class)) {
766
+ array = rb_class_new_instance(0, 0, config->array_class);
767
+ VALUE *items = rvalue_stack_peek(state->stack, count);
768
+ long index;
769
+ for (index = 0; index < count; index++) {
770
+ rb_funcall(array, i_leftshift, 1, items[index]);
771
+ }
772
+ } else {
773
+ array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
774
+ }
775
+
776
+ rvalue_stack_pop(state->stack, count);
777
+
778
+ if (config->freeze) {
779
+ RB_OBJ_FREEZE(array);
780
+ }
781
+
782
+ return array;
2500
783
  }
2501
784
 
785
+ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, long count)
786
+ {
787
+ VALUE object;
788
+ if (RB_UNLIKELY(config->object_class)) {
789
+ object = rb_class_new_instance(0, 0, config->object_class);
790
+ long index = 0;
791
+ VALUE *items = rvalue_stack_peek(state->stack, count);
792
+ while (index < count) {
793
+ VALUE name = items[index++];
794
+ VALUE value = items[index++];
795
+ rb_funcall(object, i_aset, 2, name, value);
796
+ }
797
+ } else {
798
+ object = rb_hash_new_capa(count);
799
+ rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
800
+ }
801
+
802
+ rvalue_stack_pop(state->stack, count);
803
+
804
+ if (RB_UNLIKELY(config->create_additions)) {
805
+ VALUE klassname;
806
+ if (config->object_class) {
807
+ klassname = rb_funcall(object, i_aref, 1, config->create_id);
808
+ } else {
809
+ klassname = rb_hash_aref(object, config->create_id);
810
+ }
811
+ if (!NIL_P(klassname)) {
812
+ VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
813
+ if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
814
+ if (config->deprecated_create_additions) {
815
+ json_deprecated(deprecated_create_additions_warning);
816
+ }
817
+ object = rb_funcall(klass, i_json_create, 1, object);
818
+ }
819
+ }
820
+ }
821
+
822
+ if (config->freeze) {
823
+ RB_OBJ_FREEZE(object);
824
+ }
825
+
826
+ return object;
827
+ }
2502
828
 
2503
- enum {JSON_string_start = 1};
2504
- enum {JSON_string_first_final = 8};
2505
- enum {JSON_string_error = 0};
829
+ static int match_i(VALUE regexp, VALUE klass, VALUE memo)
830
+ {
831
+ if (regexp == Qundef) return ST_STOP;
832
+ if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
833
+ RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
834
+ rb_ary_push(memo, klass);
835
+ return ST_STOP;
836
+ }
837
+ return ST_CONTINUE;
838
+ }
2506
839
 
2507
- enum {JSON_string_en_main = 1};
840
+ static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
841
+ {
842
+ VALUE string;
843
+ bool intern = is_name || config->freeze;
844
+ bool symbolize = is_name && config->symbolize_names;
845
+ if (escaped) {
846
+ string = json_string_unescape(state, start, end, is_name, intern, symbolize);
847
+ } else {
848
+ string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
849
+ }
850
+
851
+ if (RB_UNLIKELY(config->create_additions && RTEST(config->match_string))) {
852
+ VALUE klass;
853
+ VALUE memo = rb_ary_new2(2);
854
+ rb_ary_push(memo, string);
855
+ rb_hash_foreach(config->match_string, match_i, memo);
856
+ klass = rb_ary_entry(memo, 1);
857
+ if (RTEST(klass)) {
858
+ string = rb_funcall(klass, i_json_create, 1, string);
859
+ }
860
+ }
861
+
862
+ return string;
863
+ }
2508
864
 
2509
- static const char MAYBE_UNUSED(_JSON_string_nfa_targs)[] = {
2510
- 0, 0
865
+ #define PUSH(result) rvalue_stack_push(state->stack, result, &state->stack_handle, &state->stack)
866
+
867
+ static const bool string_scan[256] = {
868
+ // ASCII Control Characters
869
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
870
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
871
+ // ASCII Characters
872
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
873
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
874
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
875
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\'
876
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
877
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2511
878
  };
2512
879
 
2513
- static const char MAYBE_UNUSED(_JSON_string_nfa_offsets)[] = {
2514
- 0, 0, 0, 0, 0, 0, 0, 0,
2515
- 0, 0
2516
- };
880
+ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
881
+ {
882
+ state->cursor++;
883
+ const char *start = state->cursor;
884
+ bool escaped = false;
885
+
886
+ while (state->cursor < state->end) {
887
+ if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) {
888
+ switch (*state->cursor) {
889
+ case '"': {
890
+ VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
891
+ state->cursor++;
892
+ return PUSH(string);
893
+ }
894
+ case '\\': {
895
+ state->cursor++;
896
+ escaped = true;
897
+ if ((unsigned char)*state->cursor < 0x20) {
898
+ raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
899
+ }
900
+ break;
901
+ }
902
+ default:
903
+ raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
904
+ break;
905
+ }
906
+ }
907
+
908
+ state->cursor++;
909
+ }
910
+
911
+ raise_parse_error("unexpected end of input, expected closing \"", state->cursor);
912
+ return Qfalse;
913
+ }
2517
914
 
2518
- static const char MAYBE_UNUSED(_JSON_string_nfa_push_actions)[] = {
2519
- 0, 0
2520
- };
915
+ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
916
+ {
917
+ json_eat_whitespace(state);
918
+ if (state->cursor >= state->end) {
919
+ raise_parse_error("unexpected end of input", state->cursor);
920
+ }
921
+
922
+ switch (*state->cursor) {
923
+ case 'n':
924
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
925
+ state->cursor += 4;
926
+ return PUSH(Qnil);
927
+ }
928
+
929
+ raise_parse_error("unexpected token at '%s'", state->cursor);
930
+ break;
931
+ case 't':
932
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
933
+ state->cursor += 4;
934
+ return PUSH(Qtrue);
935
+ }
936
+
937
+ raise_parse_error("unexpected token at '%s'", state->cursor);
938
+ break;
939
+ case 'f':
940
+ // Note: memcmp with a small power of two compile to an integer comparison
941
+ if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
942
+ state->cursor += 5;
943
+ return PUSH(Qfalse);
944
+ }
945
+
946
+ raise_parse_error("unexpected token at '%s'", state->cursor);
947
+ break;
948
+ case 'N':
949
+ // Note: memcmp with a small power of two compile to an integer comparison
950
+ if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
951
+ state->cursor += 3;
952
+ return PUSH(CNaN);
953
+ }
954
+
955
+ raise_parse_error("unexpected token at '%s'", state->cursor);
956
+ break;
957
+ case 'I':
958
+ if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
959
+ state->cursor += 8;
960
+ return PUSH(CInfinity);
961
+ }
962
+
963
+ raise_parse_error("unexpected token at '%s'", state->cursor);
964
+ break;
965
+ case '-':
966
+ // Note: memcmp with a small power of two compile to an integer comparison
967
+ if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
968
+ if (config->allow_nan) {
969
+ state->cursor += 9;
970
+ return PUSH(CMinusInfinity);
971
+ } else {
972
+ raise_parse_error("unexpected token at '%s'", state->cursor);
973
+ }
974
+ }
975
+ // Fallthrough
976
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
977
+ bool integer = true;
978
+
979
+ // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
980
+ const char *start = state->cursor;
981
+ state->cursor++;
982
+
983
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
984
+ state->cursor++;
985
+ }
986
+
987
+ long integer_length = state->cursor - start;
988
+
989
+ if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
990
+ raise_parse_error("invalid number: %s", start);
991
+ } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
992
+ raise_parse_error("invalid number: %s", start);
993
+ } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
994
+ raise_parse_error("invalid number: %s", start);
995
+ }
996
+
997
+ if ((state->cursor < state->end) && (*state->cursor == '.')) {
998
+ integer = false;
999
+ state->cursor++;
1000
+
1001
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
1002
+ raise_parse_error("invalid number: %s", state->cursor);
1003
+ }
1004
+
1005
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1006
+ state->cursor++;
1007
+ }
1008
+ }
1009
+
1010
+ if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
1011
+ integer = false;
1012
+ state->cursor++;
1013
+ if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
1014
+ state->cursor++;
1015
+ }
1016
+
1017
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
1018
+ raise_parse_error("invalid number: %s", state->cursor);
1019
+ }
1020
+
1021
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
1022
+ state->cursor++;
1023
+ }
1024
+ }
1025
+
1026
+ if (integer) {
1027
+ return PUSH(json_decode_integer(start, state->cursor));
1028
+ }
1029
+ return PUSH(json_decode_float(config, start, state->cursor));
1030
+ }
1031
+ case '"': {
1032
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
1033
+ return json_parse_string(state, config, false);
1034
+ break;
1035
+ }
1036
+ case '[': {
1037
+ state->cursor++;
1038
+ json_eat_whitespace(state);
1039
+ long stack_head = state->stack->head;
1040
+
1041
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
1042
+ state->cursor++;
1043
+ return PUSH(json_decode_array(state, config, 0));
1044
+ } else {
1045
+ state->current_nesting++;
1046
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1047
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1048
+ }
1049
+ state->in_array++;
1050
+ json_parse_any(state, config);
1051
+ }
1052
+
1053
+ while (true) {
1054
+ json_eat_whitespace(state);
1055
+
1056
+ if (state->cursor < state->end) {
1057
+ if (*state->cursor == ']') {
1058
+ state->cursor++;
1059
+ long count = state->stack->head - stack_head;
1060
+ state->current_nesting--;
1061
+ state->in_array--;
1062
+ return PUSH(json_decode_array(state, config, count));
1063
+ }
1064
+
1065
+ if (*state->cursor == ',') {
1066
+ state->cursor++;
1067
+ if (config->allow_trailing_comma) {
1068
+ json_eat_whitespace(state);
1069
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
1070
+ continue;
1071
+ }
1072
+ }
1073
+ json_parse_any(state, config);
1074
+ continue;
1075
+ }
1076
+ }
1077
+
1078
+ raise_parse_error("expected ',' or ']' after array value", state->cursor);
1079
+ }
1080
+ break;
1081
+ }
1082
+ case '{': {
1083
+ state->cursor++;
1084
+ json_eat_whitespace(state);
1085
+ long stack_head = state->stack->head;
1086
+
1087
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1088
+ state->cursor++;
1089
+ return PUSH(json_decode_object(state, config, 0));
1090
+ } else {
1091
+ state->current_nesting++;
1092
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1093
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1094
+ }
1095
+
1096
+ if (*state->cursor != '"') {
1097
+ raise_parse_error("expected object key, got '%s", state->cursor);
1098
+ }
1099
+ json_parse_string(state, config, true);
1100
+
1101
+ json_eat_whitespace(state);
1102
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1103
+ raise_parse_error("expected ':' after object key", state->cursor);
1104
+ }
1105
+ state->cursor++;
1106
+
1107
+ json_parse_any(state, config);
1108
+ }
1109
+
1110
+ while (true) {
1111
+ json_eat_whitespace(state);
1112
+
1113
+ if (state->cursor < state->end) {
1114
+ if (*state->cursor == '}') {
1115
+ state->cursor++;
1116
+ state->current_nesting--;
1117
+ long count = state->stack->head - stack_head;
1118
+ return PUSH(json_decode_object(state, config, count));
1119
+ }
1120
+
1121
+ if (*state->cursor == ',') {
1122
+ state->cursor++;
1123
+ json_eat_whitespace(state);
1124
+
1125
+ if (config->allow_trailing_comma) {
1126
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1127
+ continue;
1128
+ }
1129
+ }
1130
+
1131
+ if (*state->cursor != '"') {
1132
+ raise_parse_error("expected object key, got: '%s'", state->cursor);
1133
+ }
1134
+ json_parse_string(state, config, true);
1135
+
1136
+ json_eat_whitespace(state);
1137
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1138
+ raise_parse_error("expected ':' after object key, got: '%s", state->cursor);
1139
+ }
1140
+ state->cursor++;
1141
+
1142
+ json_parse_any(state, config);
1143
+
1144
+ continue;
1145
+ }
1146
+ }
1147
+
1148
+ raise_parse_error("expected ',' or '}' after object value, got: '%s'", state->cursor);
1149
+ }
1150
+ break;
1151
+ }
1152
+
1153
+ default:
1154
+ raise_parse_error("unexpected character: '%s'", state->cursor);
1155
+ break;
1156
+ }
1157
+
1158
+ raise_parse_error("unreacheable: '%s'", state->cursor);
1159
+ }
2521
1160
 
2522
- static const char MAYBE_UNUSED(_JSON_string_nfa_pop_trans)[] = {
2523
- 0, 0
2524
- };
1161
+ static void json_ensure_eof(JSON_ParserState *state)
1162
+ {
1163
+ json_eat_whitespace(state);
1164
+ if (state->cursor != state->end) {
1165
+ raise_parse_error("unexpected token at end of stream '%s'", state->cursor);
1166
+ }
1167
+ }
1168
+
1169
+ /*
1170
+ * Document-class: JSON::Ext::Parser
1171
+ *
1172
+ * This is the JSON parser implemented as a C extension. It can be configured
1173
+ * to be used by setting
1174
+ *
1175
+ * JSON.parser = JSON::Ext::Parser
1176
+ *
1177
+ * with the method parser= in JSON.
1178
+ *
1179
+ */
2525
1180
 
1181
+ static VALUE convert_encoding(VALUE source)
1182
+ {
1183
+ int encindex = RB_ENCODING_GET(source);
2526
1184
 
2527
- #line 612 "parser.rl"
1185
+ if (RB_LIKELY(encindex == utf8_encindex)) {
1186
+ return source;
1187
+ }
2528
1188
 
1189
+ if (encindex == binary_encindex) {
1190
+ // For historical reason, we silently reinterpret binary strings as UTF-8
1191
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1192
+ }
2529
1193
 
2530
- static int
2531
- match_i(VALUE regexp, VALUE klass, VALUE memo)
2532
- {
2533
- if (regexp == Qundef) return ST_STOP;
2534
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
2535
- RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
2536
- rb_ary_push(memo, klass);
2537
- return ST_STOP;
2538
- }
2539
- return ST_CONTINUE;
1194
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
2540
1195
  }
2541
1196
 
2542
- static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
1197
+ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
2543
1198
  {
2544
- int cs = EVIL;
2545
- VALUE match_string;
2546
-
2547
-
2548
- {
2549
- cs = (int)JSON_string_start;
2550
- }
2551
-
2552
- #line 632 "parser.rl"
2553
-
2554
- json->memo = p;
2555
-
2556
- {
2557
- if ( p == pe )
2558
- goto _test_eof;
2559
- switch ( cs )
2560
- {
2561
- case 1:
2562
- goto st_case_1;
2563
- case 0:
2564
- goto st_case_0;
2565
- case 2:
2566
- goto st_case_2;
2567
- case 8:
2568
- goto st_case_8;
2569
- case 3:
2570
- goto st_case_3;
2571
- case 4:
2572
- goto st_case_4;
2573
- case 5:
2574
- goto st_case_5;
2575
- case 6:
2576
- goto st_case_6;
2577
- case 7:
2578
- goto st_case_7;
2579
- }
2580
- goto st_out;
2581
- st_case_1:
2582
- if ( ( (*( p))) == 34 ) {
2583
- goto st2;
2584
- }
2585
- {
2586
- goto st0;
2587
- }
2588
- st_case_0:
2589
- st0:
2590
- cs = 0;
2591
- goto _out;
2592
- st2:
2593
- p+= 1;
2594
- if ( p == pe )
2595
- goto _test_eof2;
2596
- st_case_2:
2597
- switch( ( (*( p))) ) {
2598
- case 34: {
2599
- goto ctr2;
2600
- }
2601
- case 92: {
2602
- goto st3;
2603
- }
2604
- }
2605
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) {
2606
- goto st0;
2607
- }
2608
- {
2609
- goto st2;
2610
- }
2611
- ctr2:
2612
- {
2613
- #line 599 "parser.rl"
2614
-
2615
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
2616
- if (NIL_P(*result)) {
2617
- {p = p - 1; }
2618
- {p+= 1; cs = 8; goto _out;}
2619
- } else {
2620
- {p = (( p + 1))-1;}
2621
-
2622
- }
2623
- }
2624
- {
2625
- #line 609 "parser.rl"
2626
- {p = p - 1; } {p+= 1; cs = 8; goto _out;} }
2627
-
2628
- goto st8;
2629
- st8:
2630
- p+= 1;
2631
- if ( p == pe )
2632
- goto _test_eof8;
2633
- st_case_8:
2634
- {
2635
- goto st0;
2636
- }
2637
- st3:
2638
- p+= 1;
2639
- if ( p == pe )
2640
- goto _test_eof3;
2641
- st_case_3:
2642
- if ( ( (*( p))) == 117 ) {
2643
- goto st4;
2644
- }
2645
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) {
2646
- goto st0;
2647
- }
2648
- {
2649
- goto st2;
2650
- }
2651
- st4:
2652
- p+= 1;
2653
- if ( p == pe )
2654
- goto _test_eof4;
2655
- st_case_4:
2656
- if ( ( (*( p))) < 65 ) {
2657
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
2658
- goto st5;
2659
- }
2660
- } else if ( ( (*( p))) > 70 ) {
2661
- if ( 97 <= ( (*( p))) && ( (*( p))) <= 102 ) {
2662
- goto st5;
2663
- }
2664
- } else {
2665
- goto st5;
2666
- }
2667
- {
2668
- goto st0;
2669
- }
2670
- st5:
2671
- p+= 1;
2672
- if ( p == pe )
2673
- goto _test_eof5;
2674
- st_case_5:
2675
- if ( ( (*( p))) < 65 ) {
2676
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
2677
- goto st6;
2678
- }
2679
- } else if ( ( (*( p))) > 70 ) {
2680
- if ( 97 <= ( (*( p))) && ( (*( p))) <= 102 ) {
2681
- goto st6;
2682
- }
2683
- } else {
2684
- goto st6;
2685
- }
2686
- {
2687
- goto st0;
2688
- }
2689
- st6:
2690
- p+= 1;
2691
- if ( p == pe )
2692
- goto _test_eof6;
2693
- st_case_6:
2694
- if ( ( (*( p))) < 65 ) {
2695
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
2696
- goto st7;
2697
- }
2698
- } else if ( ( (*( p))) > 70 ) {
2699
- if ( 97 <= ( (*( p))) && ( (*( p))) <= 102 ) {
2700
- goto st7;
2701
- }
2702
- } else {
2703
- goto st7;
2704
- }
2705
- {
2706
- goto st0;
2707
- }
2708
- st7:
2709
- p+= 1;
2710
- if ( p == pe )
2711
- goto _test_eof7;
2712
- st_case_7:
2713
- if ( ( (*( p))) < 65 ) {
2714
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
2715
- goto st2;
2716
- }
2717
- } else if ( ( (*( p))) > 70 ) {
2718
- if ( 97 <= ( (*( p))) && ( (*( p))) <= 102 ) {
2719
- goto st2;
2720
- }
2721
- } else {
2722
- goto st2;
2723
- }
2724
- {
2725
- goto st0;
2726
- }
2727
- st_out:
2728
- _test_eof2: cs = 2; goto _test_eof;
2729
- _test_eof8: cs = 8; goto _test_eof;
2730
- _test_eof3: cs = 3; goto _test_eof;
2731
- _test_eof4: cs = 4; goto _test_eof;
2732
- _test_eof5: cs = 5; goto _test_eof;
2733
- _test_eof6: cs = 6; goto _test_eof;
2734
- _test_eof7: cs = 7; goto _test_eof;
2735
-
2736
- _test_eof: {}
2737
- _out: {}
2738
- }
2739
-
2740
- #line 634 "parser.rl"
2741
-
2742
-
2743
- if (json->create_additions && RTEST(match_string = json->match_string)) {
2744
- VALUE klass;
2745
- VALUE memo = rb_ary_new2(2);
2746
- rb_ary_push(memo, *result);
2747
- rb_hash_foreach(match_string, match_i, memo);
2748
- klass = rb_ary_entry(memo, 1);
2749
- if (RTEST(klass)) {
2750
- *result = rb_funcall(klass, i_json_create, 1, *result);
2751
- }
2752
- }
2753
-
2754
- if (cs >= JSON_string_first_final) {
2755
- return p + 1;
2756
- } else {
2757
- return NULL;
2758
- }
1199
+ JSON_ParserConfig *config = (JSON_ParserConfig *)data;
1200
+
1201
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1202
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1203
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1204
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1205
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
1206
+ else if (key == sym_create_id) { config->create_id = RTEST(val) ? val : Qfalse; }
1207
+ else if (key == sym_object_class) { config->object_class = RTEST(val) ? val : Qfalse; }
1208
+ else if (key == sym_array_class) { config->array_class = RTEST(val) ? val : Qfalse; }
1209
+ else if (key == sym_match_string) { config->match_string = RTEST(val) ? val : Qfalse; }
1210
+ else if (key == sym_decimal_class) {
1211
+ if (RTEST(val)) {
1212
+ if (rb_respond_to(val, i_try_convert)) {
1213
+ config->decimal_class = val;
1214
+ config->decimal_method_id = i_try_convert;
1215
+ } else if (rb_respond_to(val, i_new)) {
1216
+ config->decimal_class = val;
1217
+ config->decimal_method_id = i_new;
1218
+ } else if (RB_TYPE_P(val, T_CLASS)) {
1219
+ VALUE name = rb_class_name(val);
1220
+ const char *name_cstr = RSTRING_PTR(name);
1221
+ const char *last_colon = strrchr(name_cstr, ':');
1222
+ if (last_colon) {
1223
+ const char *mod_path_end = last_colon - 1;
1224
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1225
+ config->decimal_class = rb_path_to_class(mod_path);
1226
+
1227
+ const char *method_name_beg = last_colon + 1;
1228
+ long before_len = method_name_beg - name_cstr;
1229
+ long len = RSTRING_LEN(name) - before_len;
1230
+ VALUE method_name = rb_str_substr(name, before_len, len);
1231
+ config->decimal_method_id = SYM2ID(rb_str_intern(method_name));
1232
+ } else {
1233
+ config->decimal_class = rb_mKernel;
1234
+ config->decimal_method_id = SYM2ID(rb_str_intern(name));
1235
+ }
1236
+ }
1237
+ }
1238
+ }
1239
+ else if (key == sym_create_additions) {
1240
+ if (NIL_P(val)) {
1241
+ config->create_additions = true;
1242
+ config->deprecated_create_additions = true;
1243
+ } else {
1244
+ config->create_additions = RTEST(val);
1245
+ config->deprecated_create_additions = false;
1246
+ }
1247
+ }
1248
+
1249
+ return ST_CONTINUE;
2759
1250
  }
2760
1251
 
2761
- /*
2762
- * Document-class: JSON::Ext::Parser
2763
- *
2764
- * This is the JSON parser implemented as a C extension. It can be configured
2765
- * to be used by setting
2766
- *
2767
- * JSON.parser = JSON::Ext::Parser
2768
- *
2769
- * with the method parser= in JSON.
2770
- *
2771
- */
2772
-
2773
- static VALUE convert_encoding(VALUE source)
1252
+ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
2774
1253
  {
2775
- #ifdef HAVE_RUBY_ENCODING_H
2776
- rb_encoding *enc = rb_enc_get(source);
2777
- if (enc == rb_ascii8bit_encoding()) {
2778
- if (OBJ_FROZEN(source)) {
2779
- source = rb_str_dup(source);
2780
- }
2781
- FORCE_UTF8(source);
2782
- } else {
2783
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
2784
- }
2785
- #endif
2786
- return source;
1254
+ config->max_nesting = 100;
1255
+
1256
+ if (!NIL_P(opts)) {
1257
+ Check_Type(opts, T_HASH);
1258
+ if (RHASH_SIZE(opts) > 0) {
1259
+ // We assume in most cases few keys are set so it's faster to go over
1260
+ // the provided keys than to check all possible keys.
1261
+ rb_hash_foreach(opts, parser_config_init_i, (VALUE)config);
1262
+
1263
+ if (config->symbolize_names && config->create_additions) {
1264
+ rb_raise(rb_eArgError,
1265
+ "options :symbolize_names and :create_additions cannot be "
1266
+ " used in conjunction");
1267
+ }
1268
+
1269
+ if (config->create_additions && !config->create_id) {
1270
+ config->create_id = rb_funcall(mJSON, i_create_id, 0);
1271
+ }
1272
+ }
1273
+
1274
+ }
2787
1275
  }
2788
1276
 
2789
1277
  /*
2790
- * call-seq: new(source, opts => {})
2791
- *
2792
- * Creates a new JSON::Ext::Parser instance for the string _source_.
2793
- *
2794
- * Creates a new JSON::Ext::Parser instance for the string _source_.
2795
- *
2796
- * It will be configured by the _opts_ hash. _opts_ can have the following
2797
- * keys:
2798
- *
2799
- * _opts_ can have the following keys:
2800
- * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
2801
- * structures. Disable depth checking with :max_nesting => false|nil|0, it
2802
- * defaults to 100.
2803
- * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
2804
- * defiance of RFC 4627 to be parsed by the Parser. This option defaults to
2805
- * false.
2806
- * * *symbolize_names*: If set to true, returns symbols for the names
2807
- * (keys) in a JSON object. Otherwise strings are returned, which is
2808
- * also the default. It's not possible to use this option in
2809
- * conjunction with the *create_additions* option.
2810
- * * *create_additions*: If set to false, the Parser doesn't create
2811
- * additions even if a matching class and create_id was found. This option
2812
- * defaults to false.
2813
- * * *object_class*: Defaults to Hash
2814
- * * *array_class*: Defaults to Array
2815
- */
2816
- static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1278
+ * call-seq: new(opts => {})
1279
+ *
1280
+ * Creates a new JSON::Ext::ParserConfig instance.
1281
+ *
1282
+ * It will be configured by the _opts_ hash. _opts_ can have the following
1283
+ * keys:
1284
+ *
1285
+ * _opts_ can have the following keys:
1286
+ * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
1287
+ * structures. Disable depth checking with :max_nesting => false|nil|0, it
1288
+ * defaults to 100.
1289
+ * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
1290
+ * defiance of RFC 4627 to be parsed by the Parser. This option defaults to
1291
+ * false.
1292
+ * * *symbolize_names*: If set to true, returns symbols for the names
1293
+ * (keys) in a JSON object. Otherwise strings are returned, which is
1294
+ * also the default. It's not possible to use this option in
1295
+ * conjunction with the *create_additions* option.
1296
+ * * *create_additions*: If set to false, the Parser doesn't create
1297
+ * additions even if a matching class and create_id was found. This option
1298
+ * defaults to false.
1299
+ * * *object_class*: Defaults to Hash. If another type is provided, it will be used
1300
+ * instead of Hash to represent JSON objects. The type must respond to
1301
+ * +new+ without arguments, and return an object that respond to +[]=+.
1302
+ * * *array_class*: Defaults to Array If another type is provided, it will be used
1303
+ * instead of Hash to represent JSON arrays. The type must respond to
1304
+ * +new+ without arguments, and return an object that respond to +<<+.
1305
+ * * *decimal_class*: Specifies which class to use instead of the default
1306
+ * (Float) when parsing decimal numbers. This class must accept a single
1307
+ * string argument in its constructor.
1308
+ */
1309
+ static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
2817
1310
  {
2818
- VALUE source, opts;
2819
- GET_PARSER_INIT;
2820
-
2821
- if (json->Vsource) {
2822
- rb_raise(rb_eTypeError, "already initialized instance");
2823
- }
2824
- #ifdef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH
2825
- rb_scan_args(argc, argv, "1:", &source, &opts);
2826
- #else
2827
- rb_scan_args(argc, argv, "11", &source, &opts);
2828
- #endif
2829
- if (!NIL_P(opts)) {
2830
- #ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH
2831
- opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
2832
- if (NIL_P(opts)) {
2833
- rb_raise(rb_eArgError, "opts needs to be like a hash");
2834
- } else {
2835
- #endif
2836
- VALUE tmp = ID2SYM(i_max_nesting);
2837
- if (option_given_p(opts, tmp)) {
2838
- VALUE max_nesting = rb_hash_aref(opts, tmp);
2839
- if (RTEST(max_nesting)) {
2840
- Check_Type(max_nesting, T_FIXNUM);
2841
- json->max_nesting = FIX2INT(max_nesting);
2842
- } else {
2843
- json->max_nesting = 0;
2844
- }
2845
- } else {
2846
- json->max_nesting = 100;
2847
- }
2848
- tmp = ID2SYM(i_allow_nan);
2849
- if (option_given_p(opts, tmp)) {
2850
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
2851
- } else {
2852
- json->allow_nan = 0;
2853
- }
2854
- tmp = ID2SYM(i_symbolize_names);
2855
- if (option_given_p(opts, tmp)) {
2856
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
2857
- } else {
2858
- json->symbolize_names = 0;
2859
- }
2860
- tmp = ID2SYM(i_freeze);
2861
- if (option_given_p(opts, tmp)) {
2862
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
2863
- } else {
2864
- json->freeze = 0;
2865
- }
2866
- tmp = ID2SYM(i_create_additions);
2867
- if (option_given_p(opts, tmp)) {
2868
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
2869
- } else {
2870
- json->create_additions = 0;
2871
- }
2872
- if (json->symbolize_names && json->create_additions) {
2873
- rb_raise(rb_eArgError,
2874
- "options :symbolize_names and :create_additions cannot be "
2875
- " used in conjunction");
2876
- }
2877
- tmp = ID2SYM(i_create_id);
2878
- if (option_given_p(opts, tmp)) {
2879
- json->create_id = rb_hash_aref(opts, tmp);
2880
- } else {
2881
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
2882
- }
2883
- tmp = ID2SYM(i_object_class);
2884
- if (option_given_p(opts, tmp)) {
2885
- json->object_class = rb_hash_aref(opts, tmp);
2886
- } else {
2887
- json->object_class = Qnil;
2888
- }
2889
- tmp = ID2SYM(i_array_class);
2890
- if (option_given_p(opts, tmp)) {
2891
- json->array_class = rb_hash_aref(opts, tmp);
2892
- } else {
2893
- json->array_class = Qnil;
2894
- }
2895
- tmp = ID2SYM(i_decimal_class);
2896
- if (option_given_p(opts, tmp)) {
2897
- json->decimal_class = rb_hash_aref(opts, tmp);
2898
- } else {
2899
- json->decimal_class = Qnil;
2900
- }
2901
- tmp = ID2SYM(i_match_string);
2902
- if (option_given_p(opts, tmp)) {
2903
- VALUE match_string = rb_hash_aref(opts, tmp);
2904
- json->match_string = RTEST(match_string) ? match_string : Qnil;
2905
- } else {
2906
- json->match_string = Qnil;
2907
- }
2908
- #ifndef HAVE_RB_SCAN_ARGS_OPTIONAL_HASH
2909
- }
2910
- #endif
2911
- } else {
2912
- json->max_nesting = 100;
2913
- json->allow_nan = 0;
2914
- json->create_additions = 0;
2915
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
2916
- json->object_class = Qnil;
2917
- json->array_class = Qnil;
2918
- json->decimal_class = Qnil;
2919
- }
2920
- source = convert_encoding(StringValue(source));
2921
- StringValue(source);
2922
- json->len = RSTRING_LEN(source);
2923
- json->source = RSTRING_PTR(source);;
2924
- json->Vsource = source;
2925
- return self;
2926
- }
1311
+ GET_PARSER_CONFIG;
2927
1312
 
1313
+ parser_config_init(config, opts);
2928
1314
 
2929
- enum {JSON_start = 1};
2930
- enum {JSON_first_final = 10};
2931
- enum {JSON_error = 0};
1315
+ RB_OBJ_WRITTEN(self, Qundef, config->create_id);
1316
+ RB_OBJ_WRITTEN(self, Qundef, config->object_class);
1317
+ RB_OBJ_WRITTEN(self, Qundef, config->array_class);
1318
+ RB_OBJ_WRITTEN(self, Qundef, config->decimal_class);
1319
+ RB_OBJ_WRITTEN(self, Qundef, config->match_string);
2932
1320
 
2933
- enum {JSON_en_main = 1};
1321
+ return self;
1322
+ }
2934
1323
 
2935
- static const char MAYBE_UNUSED(_JSON_nfa_targs)[] = {
2936
- 0, 0
2937
- };
1324
+ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1325
+ {
1326
+ Vsource = convert_encoding(StringValue(Vsource));
1327
+ StringValue(Vsource);
2938
1328
 
2939
- static const char MAYBE_UNUSED(_JSON_nfa_offsets)[] = {
2940
- 0, 0, 0, 0, 0, 0, 0, 0,
2941
- 0, 0, 0, 0
2942
- };
1329
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1330
+ rvalue_stack stack = {
1331
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1332
+ .ptr = rvalue_stack_buffer,
1333
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1334
+ };
2943
1335
 
2944
- static const char MAYBE_UNUSED(_JSON_nfa_push_actions)[] = {
2945
- 0, 0
2946
- };
1336
+ JSON_ParserState _state = {
1337
+ .cursor = RSTRING_PTR(Vsource),
1338
+ .end = RSTRING_END(Vsource),
1339
+ .stack = &stack,
1340
+ };
1341
+ JSON_ParserState *state = &_state;
2947
1342
 
2948
- static const char MAYBE_UNUSED(_JSON_nfa_pop_trans)[] = {
2949
- 0, 0
2950
- };
1343
+ VALUE result = json_parse_any(state, config);
2951
1344
 
1345
+ // This may be skipped in case of exception, but
1346
+ // it won't cause a leak.
1347
+ rvalue_stack_eagerly_release(state->stack_handle);
2952
1348
 
2953
- #line 835 "parser.rl"
1349
+ json_ensure_eof(state);
2954
1350
 
1351
+ return result;
1352
+ }
2955
1353
 
2956
1354
  /*
2957
- * call-seq: parse()
2958
- *
2959
- * Parses the current JSON text _source_ and returns the complete data
2960
- * structure as a result.
2961
- * It raises JSON::ParseError if fail to parse.
2962
- */
2963
- static VALUE cParser_parse(VALUE self)
1355
+ * call-seq: parse(source)
1356
+ *
1357
+ * Parses the current JSON text _source_ and returns the complete data
1358
+ * structure as a result.
1359
+ * It raises JSON::ParserError if fail to parse.
1360
+ */
1361
+ static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
2964
1362
  {
2965
- char *p, *pe;
2966
- int cs = EVIL;
2967
- VALUE result = Qnil;
2968
- GET_PARSER;
2969
-
2970
-
2971
- {
2972
- cs = (int)JSON_start;
2973
- }
2974
-
2975
- #line 851 "parser.rl"
2976
-
2977
- p = json->source;
2978
- pe = p + json->len;
2979
-
2980
- {
2981
- if ( p == pe )
2982
- goto _test_eof;
2983
- switch ( cs )
2984
- {
2985
- case 1:
2986
- goto st_case_1;
2987
- case 0:
2988
- goto st_case_0;
2989
- case 10:
2990
- goto st_case_10;
2991
- case 2:
2992
- goto st_case_2;
2993
- case 3:
2994
- goto st_case_3;
2995
- case 4:
2996
- goto st_case_4;
2997
- case 5:
2998
- goto st_case_5;
2999
- case 6:
3000
- goto st_case_6;
3001
- case 7:
3002
- goto st_case_7;
3003
- case 8:
3004
- goto st_case_8;
3005
- case 9:
3006
- goto st_case_9;
3007
- }
3008
- goto st_out;
3009
- st1:
3010
- p+= 1;
3011
- if ( p == pe )
3012
- goto _test_eof1;
3013
- st_case_1:
3014
- switch( ( (*( p))) ) {
3015
- case 13: {
3016
- goto st1;
3017
- }
3018
- case 32: {
3019
- goto st1;
3020
- }
3021
- case 34: {
3022
- goto ctr2;
3023
- }
3024
- case 45: {
3025
- goto ctr2;
3026
- }
3027
- case 47: {
3028
- goto st6;
3029
- }
3030
- case 73: {
3031
- goto ctr2;
3032
- }
3033
- case 78: {
3034
- goto ctr2;
3035
- }
3036
- case 91: {
3037
- goto ctr2;
3038
- }
3039
- case 102: {
3040
- goto ctr2;
3041
- }
3042
- case 110: {
3043
- goto ctr2;
3044
- }
3045
- case 116: {
3046
- goto ctr2;
3047
- }
3048
- case 123: {
3049
- goto ctr2;
3050
- }
3051
- }
3052
- if ( ( (*( p))) > 10 ) {
3053
- if ( 48 <= ( (*( p))) && ( (*( p))) <= 57 ) {
3054
- goto ctr2;
3055
- }
3056
- } else if ( ( (*( p))) >= 9 ) {
3057
- goto st1;
3058
- }
3059
- {
3060
- goto st0;
3061
- }
3062
- st_case_0:
3063
- st0:
3064
- cs = 0;
3065
- goto _out;
3066
- ctr2:
3067
- {
3068
- #line 827 "parser.rl"
3069
-
3070
- char *np = JSON_parse_value(json, p, pe, &result, 0);
3071
- if (np == NULL) { {p = p - 1; } {p+= 1; cs = 10; goto _out;} } else {p = (( np))-1;}
3072
-
3073
- }
3074
-
3075
- goto st10;
3076
- st10:
3077
- p+= 1;
3078
- if ( p == pe )
3079
- goto _test_eof10;
3080
- st_case_10:
3081
- switch( ( (*( p))) ) {
3082
- case 13: {
3083
- goto st10;
3084
- }
3085
- case 32: {
3086
- goto st10;
3087
- }
3088
- case 47: {
3089
- goto st2;
3090
- }
3091
- }
3092
- if ( 9 <= ( (*( p))) && ( (*( p))) <= 10 ) {
3093
- goto st10;
3094
- }
3095
- {
3096
- goto st0;
3097
- }
3098
- st2:
3099
- p+= 1;
3100
- if ( p == pe )
3101
- goto _test_eof2;
3102
- st_case_2:
3103
- switch( ( (*( p))) ) {
3104
- case 42: {
3105
- goto st3;
3106
- }
3107
- case 47: {
3108
- goto st5;
3109
- }
3110
- }
3111
- {
3112
- goto st0;
3113
- }
3114
- st3:
3115
- p+= 1;
3116
- if ( p == pe )
3117
- goto _test_eof3;
3118
- st_case_3:
3119
- if ( ( (*( p))) == 42 ) {
3120
- goto st4;
3121
- }
3122
- {
3123
- goto st3;
3124
- }
3125
- st4:
3126
- p+= 1;
3127
- if ( p == pe )
3128
- goto _test_eof4;
3129
- st_case_4:
3130
- switch( ( (*( p))) ) {
3131
- case 42: {
3132
- goto st4;
3133
- }
3134
- case 47: {
3135
- goto st10;
3136
- }
3137
- }
3138
- {
3139
- goto st3;
3140
- }
3141
- st5:
3142
- p+= 1;
3143
- if ( p == pe )
3144
- goto _test_eof5;
3145
- st_case_5:
3146
- if ( ( (*( p))) == 10 ) {
3147
- goto st10;
3148
- }
3149
- {
3150
- goto st5;
3151
- }
3152
- st6:
3153
- p+= 1;
3154
- if ( p == pe )
3155
- goto _test_eof6;
3156
- st_case_6:
3157
- switch( ( (*( p))) ) {
3158
- case 42: {
3159
- goto st7;
3160
- }
3161
- case 47: {
3162
- goto st9;
3163
- }
3164
- }
3165
- {
3166
- goto st0;
3167
- }
3168
- st7:
3169
- p+= 1;
3170
- if ( p == pe )
3171
- goto _test_eof7;
3172
- st_case_7:
3173
- if ( ( (*( p))) == 42 ) {
3174
- goto st8;
3175
- }
3176
- {
3177
- goto st7;
3178
- }
3179
- st8:
3180
- p+= 1;
3181
- if ( p == pe )
3182
- goto _test_eof8;
3183
- st_case_8:
3184
- switch( ( (*( p))) ) {
3185
- case 42: {
3186
- goto st8;
3187
- }
3188
- case 47: {
3189
- goto st1;
3190
- }
3191
- }
3192
- {
3193
- goto st7;
3194
- }
3195
- st9:
3196
- p+= 1;
3197
- if ( p == pe )
3198
- goto _test_eof9;
3199
- st_case_9:
3200
- if ( ( (*( p))) == 10 ) {
3201
- goto st1;
3202
- }
3203
- {
3204
- goto st9;
3205
- }
3206
- st_out:
3207
- _test_eof1: cs = 1; goto _test_eof;
3208
- _test_eof10: cs = 10; goto _test_eof;
3209
- _test_eof2: cs = 2; goto _test_eof;
3210
- _test_eof3: cs = 3; goto _test_eof;
3211
- _test_eof4: cs = 4; goto _test_eof;
3212
- _test_eof5: cs = 5; goto _test_eof;
3213
- _test_eof6: cs = 6; goto _test_eof;
3214
- _test_eof7: cs = 7; goto _test_eof;
3215
- _test_eof8: cs = 8; goto _test_eof;
3216
- _test_eof9: cs = 9; goto _test_eof;
3217
-
3218
- _test_eof: {}
3219
- _out: {}
3220
- }
3221
-
3222
- #line 854 "parser.rl"
3223
-
3224
-
3225
- if (cs >= JSON_first_final && p == pe) {
3226
- return result;
3227
- } else {
3228
- rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p);
3229
- return Qnil;
3230
- }
1363
+ GET_PARSER_CONFIG;
1364
+ return cParser_parse(config, Vsource);
3231
1365
  }
3232
1366
 
3233
- static void JSON_mark(void *ptr)
1367
+ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
3234
1368
  {
3235
- JSON_Parser *json = ptr;
3236
- rb_gc_mark_maybe(json->Vsource);
3237
- rb_gc_mark_maybe(json->create_id);
3238
- rb_gc_mark_maybe(json->object_class);
3239
- rb_gc_mark_maybe(json->array_class);
3240
- rb_gc_mark_maybe(json->decimal_class);
3241
- rb_gc_mark_maybe(json->match_string);
1369
+ Vsource = convert_encoding(StringValue(Vsource));
1370
+ StringValue(Vsource);
1371
+
1372
+ JSON_ParserConfig _config = {0};
1373
+ JSON_ParserConfig *config = &_config;
1374
+ parser_config_init(config, opts);
1375
+
1376
+ return cParser_parse(config, Vsource);
3242
1377
  }
3243
1378
 
3244
- static void JSON_free(void *ptr)
1379
+ static void JSON_ParserConfig_mark(void *ptr)
3245
1380
  {
3246
- JSON_Parser *json = ptr;
3247
- fbuffer_free(json->fbuffer);
3248
- ruby_xfree(json);
1381
+ JSON_ParserConfig *config = ptr;
1382
+ rb_gc_mark(config->create_id);
1383
+ rb_gc_mark(config->object_class);
1384
+ rb_gc_mark(config->array_class);
1385
+ rb_gc_mark(config->decimal_class);
1386
+ rb_gc_mark(config->match_string);
3249
1387
  }
3250
1388
 
3251
- static size_t JSON_memsize(const void *ptr)
1389
+ static void JSON_ParserConfig_free(void *ptr)
3252
1390
  {
3253
- const JSON_Parser *json = ptr;
3254
- return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
1391
+ JSON_ParserConfig *config = ptr;
1392
+ ruby_xfree(config);
3255
1393
  }
3256
1394
 
3257
- #ifdef NEW_TYPEDDATA_WRAPPER
3258
- static const rb_data_type_t JSON_Parser_type = {
3259
- "JSON/Parser",
3260
- {JSON_mark, JSON_free, JSON_memsize,},
3261
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
3262
- 0, 0,
3263
- RUBY_TYPED_FREE_IMMEDIATELY,
3264
- #endif
3265
- };
3266
- #endif
3267
-
3268
- static VALUE cJSON_parser_s_allocate(VALUE klass)
1395
+ static size_t JSON_ParserConfig_memsize(const void *ptr)
3269
1396
  {
3270
- JSON_Parser *json;
3271
- VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
3272
- json->fbuffer = fbuffer_alloc(0);
3273
- return obj;
1397
+ return sizeof(JSON_ParserConfig);
3274
1398
  }
3275
1399
 
3276
- /*
3277
- * call-seq: source()
3278
- *
3279
- * Returns a copy of the current _source_ string, that was used to construct
3280
- * this Parser.
3281
- */
3282
- static VALUE cParser_source(VALUE self)
1400
+ static const rb_data_type_t JSON_ParserConfig_type = {
1401
+ "JSON::Ext::Parser/ParserConfig",
1402
+ {
1403
+ JSON_ParserConfig_mark,
1404
+ JSON_ParserConfig_free,
1405
+ JSON_ParserConfig_memsize,
1406
+ },
1407
+ 0, 0,
1408
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
1409
+ };
1410
+
1411
+ static VALUE cJSON_parser_s_allocate(VALUE klass)
3283
1412
  {
3284
- GET_PARSER;
3285
- return rb_str_dup(json->Vsource);
1413
+ JSON_ParserConfig *config;
1414
+ return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
3286
1415
  }
3287
1416
 
3288
1417
  void Init_parser(void)
3289
1418
  {
3290
- #ifdef HAVE_RB_EXT_RACTOR_SAFE
3291
- rb_ext_ractor_safe(true);
3292
- #endif
3293
-
3294
- #undef rb_intern
3295
- rb_require("json/common");
3296
- mJSON = rb_define_module("JSON");
3297
- mExt = rb_define_module_under(mJSON, "Ext");
3298
- cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
3299
- eParserError = rb_path2class("JSON::ParserError");
3300
- eNestingError = rb_path2class("JSON::NestingError");
3301
- rb_gc_register_mark_object(eParserError);
3302
- rb_gc_register_mark_object(eNestingError);
3303
- rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
3304
- rb_define_method(cParser, "initialize", cParser_initialize, -1);
3305
- rb_define_method(cParser, "parse", cParser_parse, 0);
3306
- rb_define_method(cParser, "source", cParser_source, 0);
3307
-
3308
- CNaN = rb_const_get(mJSON, rb_intern("NaN"));
3309
- rb_gc_register_mark_object(CNaN);
3310
-
3311
- CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
3312
- rb_gc_register_mark_object(CInfinity);
3313
-
3314
- CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
3315
- rb_gc_register_mark_object(CMinusInfinity);
3316
-
3317
- i_json_creatable_p = rb_intern("json_creatable?");
3318
- i_json_create = rb_intern("json_create");
3319
- i_create_id = rb_intern("create_id");
3320
- i_create_additions = rb_intern("create_additions");
3321
- i_chr = rb_intern("chr");
3322
- i_max_nesting = rb_intern("max_nesting");
3323
- i_allow_nan = rb_intern("allow_nan");
3324
- i_symbolize_names = rb_intern("symbolize_names");
3325
- i_object_class = rb_intern("object_class");
3326
- i_array_class = rb_intern("array_class");
3327
- i_decimal_class = rb_intern("decimal_class");
3328
- i_match = rb_intern("match");
3329
- i_match_string = rb_intern("match_string");
3330
- i_key_p = rb_intern("key?");
3331
- i_deep_const_get = rb_intern("deep_const_get");
3332
- i_aset = rb_intern("[]=");
3333
- i_aref = rb_intern("[]");
3334
- i_leftshift = rb_intern("<<");
3335
- i_new = rb_intern("new");
3336
- i_try_convert = rb_intern("try_convert");
3337
- i_freeze = rb_intern("freeze");
3338
- i_uminus = rb_intern("-@");
3339
- }
1419
+ #ifdef HAVE_RB_EXT_RACTOR_SAFE
1420
+ rb_ext_ractor_safe(true);
1421
+ #endif
3340
1422
 
3341
- /*
3342
- * Local variables:
3343
- * mode: c
3344
- * c-file-style: ruby
3345
- * indent-tabs-mode: nil
3346
- * End:
3347
- */
1423
+ #undef rb_intern
1424
+ rb_require("json/common");
1425
+ mJSON = rb_define_module("JSON");
1426
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
1427
+ VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
1428
+ eNestingError = rb_path2class("JSON::NestingError");
1429
+ rb_gc_register_mark_object(eNestingError);
1430
+ rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
1431
+ rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
1432
+ rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
1433
+
1434
+ VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
1435
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
1436
+
1437
+ CNaN = rb_const_get(mJSON, rb_intern("NaN"));
1438
+ rb_gc_register_mark_object(CNaN);
1439
+
1440
+ CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
1441
+ rb_gc_register_mark_object(CInfinity);
1442
+
1443
+ CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
1444
+ rb_gc_register_mark_object(CMinusInfinity);
1445
+
1446
+ rb_global_variable(&Encoding_UTF_8);
1447
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1448
+
1449
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1450
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1451
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1452
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1453
+ sym_freeze = ID2SYM(rb_intern("freeze"));
1454
+ sym_create_additions = ID2SYM(rb_intern("create_additions"));
1455
+ sym_create_id = ID2SYM(rb_intern("create_id"));
1456
+ sym_object_class = ID2SYM(rb_intern("object_class"));
1457
+ sym_array_class = ID2SYM(rb_intern("array_class"));
1458
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1459
+ sym_match_string = ID2SYM(rb_intern("match_string"));
1460
+
1461
+ i_create_id = rb_intern("create_id");
1462
+ i_json_creatable_p = rb_intern("json_creatable?");
1463
+ i_json_create = rb_intern("json_create");
1464
+ i_chr = rb_intern("chr");
1465
+ i_match = rb_intern("match");
1466
+ i_deep_const_get = rb_intern("deep_const_get");
1467
+ i_aset = rb_intern("[]=");
1468
+ i_aref = rb_intern("[]");
1469
+ i_leftshift = rb_intern("<<");
1470
+ i_new = rb_intern("new");
1471
+ i_try_convert = rb_intern("try_convert");
1472
+ i_uminus = rb_intern("-@");
1473
+ i_encode = rb_intern("encode");
1474
+
1475
+ binary_encindex = rb_ascii8bit_encindex();
1476
+ utf8_encindex = rb_utf8_encindex();
1477
+ enc_utf8 = rb_utf8_encoding();
1478
+ }