json 2.9.0 → 2.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,36 +1,49 @@
1
- /* This file is automatically generated from parser.rl by using ragel */
2
- #line 1 "parser.rl"
3
1
  #include "ruby.h"
4
- #include "../fbuffer/fbuffer.h"
2
+ #include "ruby/encoding.h"
5
3
 
6
- static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
4
+ /* shims */
5
+ /* This is the fallback definition from Ruby 3.4 */
6
+
7
+ #ifndef RBIMPL_STDBOOL_H
8
+ #if defined(__cplusplus)
9
+ # if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
10
+ # include <cstdbool>
11
+ # endif
12
+ #elif defined(HAVE_STDBOOL_H)
13
+ # include <stdbool.h>
14
+ #elif !defined(HAVE__BOOL)
15
+ typedef unsigned char _Bool;
16
+ # define bool _Bool
17
+ # define true ((_Bool)+1)
18
+ # define false ((_Bool)+0)
19
+ # define __bool_true_false_are_defined
20
+ #endif
21
+ #endif
22
+
23
+ #ifndef RB_UNLIKELY
24
+ #define RB_UNLIKELY(expr) expr
25
+ #endif
26
+
27
+ #ifndef RB_LIKELY
28
+ #define RB_LIKELY(expr) expr
29
+ #endif
30
+
31
+ static VALUE mJSON, eNestingError, Encoding_UTF_8;
7
32
  static VALUE CNaN, CInfinity, CMinusInfinity;
8
33
 
9
- static ID i_json_creatable_p, i_json_create, i_create_id,
10
- i_chr, i_deep_const_get, i_match, i_aset, i_aref,
34
+ static ID i_chr, i_aset, i_aref,
11
35
  i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
12
36
 
13
37
  static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
14
- sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
15
- sym_decimal_class, sym_match_string;
38
+ sym_decimal_class, sym_on_load;
16
39
 
17
40
  static int binary_encindex;
18
41
  static int utf8_encindex;
19
42
 
20
- #ifdef HAVE_RB_CATEGORY_WARN
21
- # define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message)
22
- #else
23
- # define json_deprecated(message) rb_warn(message)
24
- #endif
25
-
26
- static const char deprecated_create_additions_warning[] =
27
- "JSON.load implicit support for `create_additions: true` is deprecated "
28
- "and will be removed in 3.0, use JSON.unsafe_load or explicitly "
29
- "pass `create_additions: true`";
30
-
31
43
  #ifndef HAVE_RB_HASH_BULK_INSERT
32
44
  // For TruffleRuby
33
- void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
45
+ void
46
+ rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
34
47
  {
35
48
  long index = 0;
36
49
  while (index < count) {
@@ -42,6 +55,11 @@ void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
42
55
  }
43
56
  #endif
44
57
 
58
+ #ifndef HAVE_RB_HASH_NEW_CAPA
59
+ #define rb_hash_new_capa(n) rb_hash_new()
60
+ #endif
61
+
62
+
45
63
  /* name cache */
46
64
 
47
65
  #include <string.h>
@@ -104,7 +122,7 @@ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const lon
104
122
  return Qfalse;
105
123
  }
106
124
 
107
- if (RB_UNLIKELY(!isalpha(str[0]))) {
125
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
108
126
  // Simple heuristic, if the first character isn't a letter,
109
127
  // we're much less likely to see this string again.
110
128
  // We mostly want to cache strings that are likely to be repeated.
@@ -156,7 +174,7 @@ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const lon
156
174
  return Qfalse;
157
175
  }
158
176
 
159
- if (RB_UNLIKELY(!isalpha(str[0]))) {
177
+ if (RB_UNLIKELY(!isalpha((unsigned char)str[0]))) {
160
178
  // Simple heuristic, if the first character isn't a letter,
161
179
  // we're much less likely to see this string again.
162
180
  // We mostly want to cache strings that are likely to be repeated.
@@ -231,13 +249,14 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu
231
249
  return stack;
232
250
  }
233
251
 
234
- static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
252
+ static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
235
253
  {
236
254
  if (RB_UNLIKELY(stack->head >= stack->capa)) {
237
255
  stack = rvalue_stack_grow(stack, handle, stack_ref);
238
256
  }
239
257
  stack->ptr[stack->head] = value;
240
258
  stack->head++;
259
+ return value;
241
260
  }
242
261
 
243
262
  static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
@@ -301,10 +320,50 @@ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle,
301
320
 
302
321
  static void rvalue_stack_eagerly_release(VALUE handle)
303
322
  {
304
- rvalue_stack *stack;
305
- TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
306
- RTYPEDDATA_DATA(handle) = NULL;
307
- rvalue_stack_free(stack);
323
+ if (handle) {
324
+ rvalue_stack *stack;
325
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
326
+ RTYPEDDATA_DATA(handle) = NULL;
327
+ rvalue_stack_free(stack);
328
+ }
329
+ }
330
+
331
+
332
+ #ifndef HAVE_STRNLEN
333
+ static size_t strnlen(const char *s, size_t maxlen)
334
+ {
335
+ char *p;
336
+ return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
337
+ }
338
+ #endif
339
+
340
+ #define PARSE_ERROR_FRAGMENT_LEN 32
341
+ #ifdef RBIMPL_ATTR_NORETURN
342
+ RBIMPL_ATTR_NORETURN()
343
+ #endif
344
+ static void raise_parse_error(const char *format, const char *start)
345
+ {
346
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
347
+
348
+ size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
349
+ const char *ptr = start;
350
+
351
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
352
+ MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
353
+
354
+ while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
355
+ len--;
356
+ }
357
+
358
+ if (buffer[len - 1] >= 0xC0) { // multibyte character start
359
+ len--;
360
+ }
361
+
362
+ buffer[len] = '\0';
363
+ ptr = (const char *)buffer;
364
+ }
365
+
366
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
308
367
  }
309
368
 
310
369
  /* unicode */
@@ -328,21 +387,19 @@ static const signed char digit_values[256] = {
328
387
 
329
388
  static uint32_t unescape_unicode(const unsigned char *p)
330
389
  {
331
- const uint32_t replacement_char = 0xFFFD;
332
-
333
390
  signed char b;
334
391
  uint32_t result = 0;
335
392
  b = digit_values[p[0]];
336
- if (b < 0) return replacement_char;
393
+ if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
337
394
  result = (result << 4) | (unsigned char)b;
338
395
  b = digit_values[p[1]];
339
- if (b < 0) return replacement_char;
396
+ if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
340
397
  result = (result << 4) | (unsigned char)b;
341
398
  b = digit_values[p[2]];
342
- if (b < 0) return replacement_char;
399
+ if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
343
400
  result = (result << 4) | (unsigned char)b;
344
401
  b = digit_values[p[3]];
345
- if (b < 0) return replacement_char;
402
+ if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
346
403
  result = (result << 4) | (unsigned char)b;
347
404
  return result;
348
405
  }
@@ -374,1110 +431,255 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
374
431
  }
375
432
 
376
433
  typedef struct JSON_ParserStruct {
377
- VALUE Vsource;
378
- char *source;
379
- long len;
380
- char *memo;
381
- VALUE create_id;
382
- VALUE object_class;
383
- VALUE array_class;
434
+ VALUE on_load_proc;
384
435
  VALUE decimal_class;
385
- VALUE match_string;
386
- FBuffer fbuffer;
387
- int in_array;
436
+ ID decimal_method_id;
388
437
  int max_nesting;
389
438
  bool allow_nan;
390
439
  bool allow_trailing_comma;
391
440
  bool parsing_name;
392
441
  bool symbolize_names;
393
442
  bool freeze;
394
- bool create_additions;
395
- bool deprecated_create_additions;
396
- rvalue_cache name_cache;
397
- rvalue_stack *stack;
398
- VALUE stack_handle;
399
- } JSON_Parser;
400
-
401
- #define GET_PARSER \
402
- GET_PARSER_INIT; \
403
- if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
443
+ } JSON_ParserConfig;
404
444
 
405
- #define GET_PARSER_INIT \
406
- JSON_Parser *json; \
407
- TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
445
+ typedef struct JSON_ParserStateStruct {
446
+ VALUE stack_handle;
447
+ const char *cursor;
448
+ const char *end;
449
+ rvalue_stack *stack;
450
+ rvalue_cache name_cache;
451
+ int in_array;
452
+ int current_nesting;
453
+ } JSON_ParserState;
408
454
 
409
- #define MinusInfinity "-Infinity"
410
- #define EVIL 0x666
455
+ #define GET_PARSER_CONFIG \
456
+ JSON_ParserConfig *config; \
457
+ TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
411
458
 
412
- static const rb_data_type_t JSON_Parser_type;
413
- static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
414
- static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
415
- static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
416
- static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result);
417
- static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
459
+ static const rb_data_type_t JSON_ParserConfig_type;
418
460
 
461
+ static const bool whitespace[256] = {
462
+ [' '] = 1,
463
+ ['\t'] = 1,
464
+ ['\n'] = 1,
465
+ ['\r'] = 1,
466
+ ['/'] = 1,
467
+ };
419
468
 
420
- #define PARSE_ERROR_FRAGMENT_LEN 32
421
- #ifdef RBIMPL_ATTR_NORETURN
422
- RBIMPL_ATTR_NORETURN()
423
- #endif
424
- static void raise_parse_error(const char *format, const char *start)
469
+ static void
470
+ json_eat_comments(JSON_ParserState *state)
425
471
  {
426
- char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
427
-
428
- size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
429
- const char *ptr = start;
430
-
431
- if (len == PARSE_ERROR_FRAGMENT_LEN) {
432
- MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
433
- buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
434
- ptr = buffer;
472
+ if (state->cursor + 1 < state->end) {
473
+ switch(state->cursor[1]) {
474
+ case '/': {
475
+ state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
476
+ if (!state->cursor) {
477
+ state->cursor = state->end;
478
+ } else {
479
+ state->cursor++;
480
+ }
481
+ break;
482
+ }
483
+ case '*': {
484
+ state->cursor += 2;
485
+ while (true) {
486
+ state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
487
+ if (!state->cursor) {
488
+ state->cursor = state->end;
489
+ raise_parse_error("unexpected end of input, expected closing '*/'", state->cursor);
490
+ } else {
491
+ state->cursor++;
492
+ if (state->cursor < state->end && *state->cursor == '/') {
493
+ state->cursor++;
494
+ break;
495
+ }
496
+ }
497
+ }
498
+ break;
499
+ }
500
+ default:
501
+ raise_parse_error("unexpected token at '%s'", state->cursor);
502
+ break;
503
+ }
504
+ } else {
505
+ raise_parse_error("unexpected token at '%s'", state->cursor);
435
506
  }
436
-
437
- rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
438
507
  }
439
508
 
440
-
441
-
442
- #line 465 "parser.rl"
443
-
444
-
445
-
446
- #line 447 "parser.c"
447
- enum {JSON_object_start = 1};
448
- enum {JSON_object_first_final = 32};
449
- enum {JSON_object_error = 0};
450
-
451
- enum {JSON_object_en_main = 1};
452
-
453
-
454
- #line 505 "parser.rl"
455
-
456
-
457
- #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack)
458
-
459
- static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
509
+ static inline void
510
+ json_eat_whitespace(JSON_ParserState *state)
460
511
  {
461
- int cs = EVIL;
462
-
463
- if (json->max_nesting && current_nesting > json->max_nesting) {
464
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
465
- }
466
-
467
- long stack_head = json->stack->head;
468
-
469
-
470
- #line 471 "parser.c"
471
- {
472
- cs = JSON_object_start;
473
- }
474
-
475
- #line 520 "parser.rl"
476
-
477
- #line 478 "parser.c"
478
- {
479
- short _widec;
480
- if ( p == pe )
481
- goto _test_eof;
482
- switch ( cs )
483
- {
484
- case 1:
485
- if ( (*p) == 123 )
486
- goto st2;
487
- goto st0;
488
- st0:
489
- cs = 0;
490
- goto _out;
491
- st2:
492
- if ( ++p == pe )
493
- goto _test_eof2;
494
- case 2:
495
- switch( (*p) ) {
496
- case 13: goto st2;
497
- case 32: goto st2;
498
- case 34: goto tr2;
499
- case 47: goto st28;
500
- case 125: goto tr4;
501
- }
502
- if ( 9 <= (*p) && (*p) <= 10 )
503
- goto st2;
504
- goto st0;
505
- tr2:
506
- #line 484 "parser.rl"
507
- {
508
- char *np;
509
- json->parsing_name = true;
510
- np = JSON_parse_string(json, p, pe, result);
511
- json->parsing_name = false;
512
- if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {
513
- PUSH(*result);
514
- {p = (( np))-1;}
515
- }
516
- }
517
- goto st3;
518
- st3:
519
- if ( ++p == pe )
520
- goto _test_eof3;
521
- case 3:
522
- #line 523 "parser.c"
523
- switch( (*p) ) {
524
- case 13: goto st3;
525
- case 32: goto st3;
526
- case 47: goto st4;
527
- case 58: goto st8;
528
- }
529
- if ( 9 <= (*p) && (*p) <= 10 )
530
- goto st3;
531
- goto st0;
532
- st4:
533
- if ( ++p == pe )
534
- goto _test_eof4;
535
- case 4:
536
- switch( (*p) ) {
537
- case 42: goto st5;
538
- case 47: goto st7;
539
- }
540
- goto st0;
541
- st5:
542
- if ( ++p == pe )
543
- goto _test_eof5;
544
- case 5:
545
- if ( (*p) == 42 )
546
- goto st6;
547
- goto st5;
548
- st6:
549
- if ( ++p == pe )
550
- goto _test_eof6;
551
- case 6:
552
- switch( (*p) ) {
553
- case 42: goto st6;
554
- case 47: goto st3;
555
- }
556
- goto st5;
557
- st7:
558
- if ( ++p == pe )
559
- goto _test_eof7;
560
- case 7:
561
- if ( (*p) == 10 )
562
- goto st3;
563
- goto st7;
564
- st8:
565
- if ( ++p == pe )
566
- goto _test_eof8;
567
- case 8:
568
- switch( (*p) ) {
569
- case 13: goto st8;
570
- case 32: goto st8;
571
- case 34: goto tr11;
572
- case 45: goto tr11;
573
- case 47: goto st24;
574
- case 73: goto tr11;
575
- case 78: goto tr11;
576
- case 91: goto tr11;
577
- case 102: goto tr11;
578
- case 110: goto tr11;
579
- case 116: goto tr11;
580
- case 123: goto tr11;
581
- }
582
- if ( (*p) > 10 ) {
583
- if ( 48 <= (*p) && (*p) <= 57 )
584
- goto tr11;
585
- } else if ( (*p) >= 9 )
586
- goto st8;
587
- goto st0;
588
- tr11:
589
- #line 473 "parser.rl"
590
- {
591
- char *np = JSON_parse_value(json, p, pe, result, current_nesting);
592
- if (np == NULL) {
593
- p--; {p++; cs = 9; goto _out;}
512
+ while (state->cursor < state->end && RB_UNLIKELY(whitespace[(unsigned char)*state->cursor])) {
513
+ if (RB_LIKELY(*state->cursor != '/')) {
514
+ state->cursor++;
594
515
  } else {
595
- {p = (( np))-1;}
516
+ json_eat_comments(state);
596
517
  }
597
518
  }
598
- goto st9;
599
- st9:
600
- if ( ++p == pe )
601
- goto _test_eof9;
602
- case 9:
603
- #line 604 "parser.c"
604
- _widec = (*p);
605
- if ( (*p) < 13 ) {
606
- if ( (*p) > 9 ) {
607
- if ( 10 <= (*p) && (*p) <= 10 ) {
608
- _widec = (short)(128 + ((*p) - -128));
609
- if (
610
- #line 482 "parser.rl"
611
- json->allow_trailing_comma ) _widec += 256;
612
- }
613
- } else if ( (*p) >= 9 ) {
614
- _widec = (short)(128 + ((*p) - -128));
615
- if (
616
- #line 482 "parser.rl"
617
- json->allow_trailing_comma ) _widec += 256;
618
- }
619
- } else if ( (*p) > 13 ) {
620
- if ( (*p) < 44 ) {
621
- if ( 32 <= (*p) && (*p) <= 32 ) {
622
- _widec = (short)(128 + ((*p) - -128));
623
- if (
624
- #line 482 "parser.rl"
625
- json->allow_trailing_comma ) _widec += 256;
626
- }
627
- } else if ( (*p) > 44 ) {
628
- if ( 47 <= (*p) && (*p) <= 47 ) {
629
- _widec = (short)(128 + ((*p) - -128));
630
- if (
631
- #line 482 "parser.rl"
632
- json->allow_trailing_comma ) _widec += 256;
633
- }
634
- } else {
635
- _widec = (short)(128 + ((*p) - -128));
636
- if (
637
- #line 482 "parser.rl"
638
- json->allow_trailing_comma ) _widec += 256;
639
- }
640
- } else {
641
- _widec = (short)(128 + ((*p) - -128));
642
- if (
643
- #line 482 "parser.rl"
644
- json->allow_trailing_comma ) _widec += 256;
645
- }
646
- switch( _widec ) {
647
- case 125: goto tr4;
648
- case 269: goto st10;
649
- case 288: goto st10;
650
- case 300: goto st11;
651
- case 303: goto st16;
652
- case 525: goto st9;
653
- case 544: goto st9;
654
- case 556: goto st2;
655
- case 559: goto st20;
656
- }
657
- if ( _widec > 266 ) {
658
- if ( 521 <= _widec && _widec <= 522 )
659
- goto st9;
660
- } else if ( _widec >= 265 )
661
- goto st10;
662
- goto st0;
663
- tr4:
664
- #line 495 "parser.rl"
665
- { p--; {p++; cs = 32; goto _out;} }
666
- goto st32;
667
- st32:
668
- if ( ++p == pe )
669
- goto _test_eof32;
670
- case 32:
671
- #line 672 "parser.c"
672
- goto st0;
673
- st10:
674
- if ( ++p == pe )
675
- goto _test_eof10;
676
- case 10:
677
- switch( (*p) ) {
678
- case 13: goto st10;
679
- case 32: goto st10;
680
- case 44: goto st11;
681
- case 47: goto st16;
682
- case 125: goto tr4;
683
- }
684
- if ( 9 <= (*p) && (*p) <= 10 )
685
- goto st10;
686
- goto st0;
687
- st11:
688
- if ( ++p == pe )
689
- goto _test_eof11;
690
- case 11:
691
- switch( (*p) ) {
692
- case 13: goto st11;
693
- case 32: goto st11;
694
- case 34: goto tr2;
695
- case 47: goto st12;
696
- }
697
- if ( 9 <= (*p) && (*p) <= 10 )
698
- goto st11;
699
- goto st0;
700
- st12:
701
- if ( ++p == pe )
702
- goto _test_eof12;
703
- case 12:
704
- switch( (*p) ) {
705
- case 42: goto st13;
706
- case 47: goto st15;
707
- }
708
- goto st0;
709
- st13:
710
- if ( ++p == pe )
711
- goto _test_eof13;
712
- case 13:
713
- if ( (*p) == 42 )
714
- goto st14;
715
- goto st13;
716
- st14:
717
- if ( ++p == pe )
718
- goto _test_eof14;
719
- case 14:
720
- switch( (*p) ) {
721
- case 42: goto st14;
722
- case 47: goto st11;
723
- }
724
- goto st13;
725
- st15:
726
- if ( ++p == pe )
727
- goto _test_eof15;
728
- case 15:
729
- if ( (*p) == 10 )
730
- goto st11;
731
- goto st15;
732
- st16:
733
- if ( ++p == pe )
734
- goto _test_eof16;
735
- case 16:
736
- switch( (*p) ) {
737
- case 42: goto st17;
738
- case 47: goto st19;
739
- }
740
- goto st0;
741
- st17:
742
- if ( ++p == pe )
743
- goto _test_eof17;
744
- case 17:
745
- if ( (*p) == 42 )
746
- goto st18;
747
- goto st17;
748
- st18:
749
- if ( ++p == pe )
750
- goto _test_eof18;
751
- case 18:
752
- switch( (*p) ) {
753
- case 42: goto st18;
754
- case 47: goto st10;
755
- }
756
- goto st17;
757
- st19:
758
- if ( ++p == pe )
759
- goto _test_eof19;
760
- case 19:
761
- if ( (*p) == 10 )
762
- goto st10;
763
- goto st19;
764
- st20:
765
- if ( ++p == pe )
766
- goto _test_eof20;
767
- case 20:
768
- _widec = (*p);
769
- if ( (*p) > 42 ) {
770
- if ( 47 <= (*p) && (*p) <= 47 ) {
771
- _widec = (short)(128 + ((*p) - -128));
772
- if (
773
- #line 482 "parser.rl"
774
- json->allow_trailing_comma ) _widec += 256;
775
- }
776
- } else if ( (*p) >= 42 ) {
777
- _widec = (short)(128 + ((*p) - -128));
778
- if (
779
- #line 482 "parser.rl"
780
- json->allow_trailing_comma ) _widec += 256;
781
- }
782
- switch( _widec ) {
783
- case 298: goto st17;
784
- case 303: goto st19;
785
- case 554: goto st21;
786
- case 559: goto st23;
787
- }
788
- goto st0;
789
- st21:
790
- if ( ++p == pe )
791
- goto _test_eof21;
792
- case 21:
793
- _widec = (*p);
794
- if ( (*p) < 42 ) {
795
- if ( (*p) <= 41 ) {
796
- _widec = (short)(128 + ((*p) - -128));
797
- if (
798
- #line 482 "parser.rl"
799
- json->allow_trailing_comma ) _widec += 256;
800
- }
801
- } else if ( (*p) > 42 ) {
802
- if ( 43 <= (*p) )
803
- { _widec = (short)(128 + ((*p) - -128));
804
- if (
805
- #line 482 "parser.rl"
806
- json->allow_trailing_comma ) _widec += 256;
807
- }
808
- } else {
809
- _widec = (short)(128 + ((*p) - -128));
810
- if (
811
- #line 482 "parser.rl"
812
- json->allow_trailing_comma ) _widec += 256;
813
- }
814
- switch( _widec ) {
815
- case 298: goto st18;
816
- case 554: goto st22;
817
- }
818
- if ( _widec > 383 ) {
819
- if ( 384 <= _widec && _widec <= 639 )
820
- goto st21;
821
- } else if ( _widec >= 128 )
822
- goto st17;
823
- goto st0;
824
- st22:
825
- if ( ++p == pe )
826
- goto _test_eof22;
827
- case 22:
828
- _widec = (*p);
829
- if ( (*p) < 43 ) {
830
- if ( (*p) > 41 ) {
831
- if ( 42 <= (*p) && (*p) <= 42 ) {
832
- _widec = (short)(128 + ((*p) - -128));
833
- if (
834
- #line 482 "parser.rl"
835
- json->allow_trailing_comma ) _widec += 256;
836
- }
837
- } else {
838
- _widec = (short)(128 + ((*p) - -128));
839
- if (
840
- #line 482 "parser.rl"
841
- json->allow_trailing_comma ) _widec += 256;
842
- }
843
- } else if ( (*p) > 46 ) {
844
- if ( (*p) > 47 ) {
845
- if ( 48 <= (*p) )
846
- { _widec = (short)(128 + ((*p) - -128));
847
- if (
848
- #line 482 "parser.rl"
849
- json->allow_trailing_comma ) _widec += 256;
850
- }
851
- } else if ( (*p) >= 47 ) {
852
- _widec = (short)(128 + ((*p) - -128));
853
- if (
854
- #line 482 "parser.rl"
855
- json->allow_trailing_comma ) _widec += 256;
856
- }
857
- } else {
858
- _widec = (short)(128 + ((*p) - -128));
859
- if (
860
- #line 482 "parser.rl"
861
- json->allow_trailing_comma ) _widec += 256;
862
- }
863
- switch( _widec ) {
864
- case 298: goto st18;
865
- case 303: goto st10;
866
- case 554: goto st22;
867
- case 559: goto st9;
868
- }
869
- if ( _widec > 383 ) {
870
- if ( 384 <= _widec && _widec <= 639 )
871
- goto st21;
872
- } else if ( _widec >= 128 )
873
- goto st17;
874
- goto st0;
875
- st23:
876
- if ( ++p == pe )
877
- goto _test_eof23;
878
- case 23:
879
- _widec = (*p);
880
- if ( (*p) < 10 ) {
881
- if ( (*p) <= 9 ) {
882
- _widec = (short)(128 + ((*p) - -128));
883
- if (
884
- #line 482 "parser.rl"
885
- json->allow_trailing_comma ) _widec += 256;
886
- }
887
- } else if ( (*p) > 10 ) {
888
- if ( 11 <= (*p) )
889
- { _widec = (short)(128 + ((*p) - -128));
890
- if (
891
- #line 482 "parser.rl"
892
- json->allow_trailing_comma ) _widec += 256;
893
- }
894
- } else {
895
- _widec = (short)(128 + ((*p) - -128));
896
- if (
897
- #line 482 "parser.rl"
898
- json->allow_trailing_comma ) _widec += 256;
899
- }
900
- switch( _widec ) {
901
- case 266: goto st10;
902
- case 522: goto st9;
903
- }
904
- if ( _widec > 383 ) {
905
- if ( 384 <= _widec && _widec <= 639 )
906
- goto st23;
907
- } else if ( _widec >= 128 )
908
- goto st19;
909
- goto st0;
910
- st24:
911
- if ( ++p == pe )
912
- goto _test_eof24;
913
- case 24:
914
- switch( (*p) ) {
915
- case 42: goto st25;
916
- case 47: goto st27;
917
- }
918
- goto st0;
919
- st25:
920
- if ( ++p == pe )
921
- goto _test_eof25;
922
- case 25:
923
- if ( (*p) == 42 )
924
- goto st26;
925
- goto st25;
926
- st26:
927
- if ( ++p == pe )
928
- goto _test_eof26;
929
- case 26:
930
- switch( (*p) ) {
931
- case 42: goto st26;
932
- case 47: goto st8;
933
- }
934
- goto st25;
935
- st27:
936
- if ( ++p == pe )
937
- goto _test_eof27;
938
- case 27:
939
- if ( (*p) == 10 )
940
- goto st8;
941
- goto st27;
942
- st28:
943
- if ( ++p == pe )
944
- goto _test_eof28;
945
- case 28:
946
- switch( (*p) ) {
947
- case 42: goto st29;
948
- case 47: goto st31;
949
- }
950
- goto st0;
951
- st29:
952
- if ( ++p == pe )
953
- goto _test_eof29;
954
- case 29:
955
- if ( (*p) == 42 )
956
- goto st30;
957
- goto st29;
958
- st30:
959
- if ( ++p == pe )
960
- goto _test_eof30;
961
- case 30:
962
- switch( (*p) ) {
963
- case 42: goto st30;
964
- case 47: goto st2;
965
- }
966
- goto st29;
967
- st31:
968
- if ( ++p == pe )
969
- goto _test_eof31;
970
- case 31:
971
- if ( (*p) == 10 )
972
- goto st2;
973
- goto st31;
974
- }
975
- _test_eof2: cs = 2; goto _test_eof;
976
- _test_eof3: cs = 3; goto _test_eof;
977
- _test_eof4: cs = 4; goto _test_eof;
978
- _test_eof5: cs = 5; goto _test_eof;
979
- _test_eof6: cs = 6; goto _test_eof;
980
- _test_eof7: cs = 7; goto _test_eof;
981
- _test_eof8: cs = 8; goto _test_eof;
982
- _test_eof9: cs = 9; goto _test_eof;
983
- _test_eof32: cs = 32; goto _test_eof;
984
- _test_eof10: cs = 10; goto _test_eof;
985
- _test_eof11: cs = 11; goto _test_eof;
986
- _test_eof12: cs = 12; goto _test_eof;
987
- _test_eof13: cs = 13; goto _test_eof;
988
- _test_eof14: cs = 14; goto _test_eof;
989
- _test_eof15: cs = 15; goto _test_eof;
990
- _test_eof16: cs = 16; goto _test_eof;
991
- _test_eof17: cs = 17; goto _test_eof;
992
- _test_eof18: cs = 18; goto _test_eof;
993
- _test_eof19: cs = 19; goto _test_eof;
994
- _test_eof20: cs = 20; goto _test_eof;
995
- _test_eof21: cs = 21; goto _test_eof;
996
- _test_eof22: cs = 22; goto _test_eof;
997
- _test_eof23: cs = 23; goto _test_eof;
998
- _test_eof24: cs = 24; goto _test_eof;
999
- _test_eof25: cs = 25; goto _test_eof;
1000
- _test_eof26: cs = 26; goto _test_eof;
1001
- _test_eof27: cs = 27; goto _test_eof;
1002
- _test_eof28: cs = 28; goto _test_eof;
1003
- _test_eof29: cs = 29; goto _test_eof;
1004
- _test_eof30: cs = 30; goto _test_eof;
1005
- _test_eof31: cs = 31; goto _test_eof;
1006
-
1007
- _test_eof: {}
1008
- _out: {}
1009
- }
1010
-
1011
- #line 521 "parser.rl"
1012
-
1013
- if (cs >= JSON_object_first_final) {
1014
- long count = json->stack->head - stack_head;
1015
-
1016
- if (RB_UNLIKELY(json->object_class)) {
1017
- VALUE object = rb_class_new_instance(0, 0, json->object_class);
1018
- long index = 0;
1019
- VALUE *items = rvalue_stack_peek(json->stack, count);
1020
- while (index < count) {
1021
- VALUE name = items[index++];
1022
- VALUE value = items[index++];
1023
- rb_funcall(object, i_aset, 2, name, value);
1024
- }
1025
- *result = object;
1026
- } else {
1027
- VALUE hash;
1028
- #ifdef HAVE_RB_HASH_NEW_CAPA
1029
- hash = rb_hash_new_capa(count >> 1);
1030
- #else
1031
- hash = rb_hash_new();
1032
- #endif
1033
- rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash);
1034
- *result = hash;
1035
- }
1036
- rvalue_stack_pop(json->stack, count);
519
+ }
1037
520
 
1038
- if (RB_UNLIKELY(json->create_additions)) {
1039
- VALUE klassname;
1040
- if (json->object_class) {
1041
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
1042
- } else {
1043
- klassname = rb_hash_aref(*result, json->create_id);
1044
- }
1045
- if (!NIL_P(klassname)) {
1046
- VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
1047
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
1048
- if (json->deprecated_create_additions) {
1049
- json_deprecated(deprecated_create_additions_warning);
1050
- }
1051
- *result = rb_funcall(klass, i_json_create, 1, *result);
1052
- }
1053
- }
1054
- }
1055
- return p + 1;
521
+ static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
522
+ {
523
+ if (symbolize) {
524
+ intern = true;
525
+ }
526
+ VALUE result;
527
+ # ifdef HAVE_RB_ENC_INTERNED_STR
528
+ if (intern) {
529
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
1056
530
  } else {
1057
- return NULL;
531
+ result = rb_utf8_str_new(start, (long)(end - start));
1058
532
  }
1059
- }
533
+ # else
534
+ result = rb_utf8_str_new(start, (long)(end - start));
535
+ if (intern) {
536
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
537
+ }
538
+ # endif
1060
539
 
540
+ if (symbolize) {
541
+ result = rb_str_intern(result);
542
+ }
1061
543
 
1062
- #line 1063 "parser.c"
1063
- enum {JSON_value_start = 1};
1064
- enum {JSON_value_first_final = 29};
1065
- enum {JSON_value_error = 0};
544
+ return result;
545
+ }
1066
546
 
1067
- enum {JSON_value_en_main = 1};
547
+ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
548
+ {
549
+ size_t bufferSize = stringEnd - string;
1068
550
 
551
+ if (is_name && state->in_array) {
552
+ VALUE cached_key;
553
+ if (RB_UNLIKELY(symbolize)) {
554
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
555
+ } else {
556
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
557
+ }
1069
558
 
1070
- #line 654 "parser.rl"
559
+ if (RB_LIKELY(cached_key)) {
560
+ return cached_key;
561
+ }
562
+ }
1071
563
 
564
+ return build_string(string, stringEnd, intern, symbolize);
565
+ }
1072
566
 
1073
- static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
567
+ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, const char *stringEnd, bool is_name, bool intern, bool symbolize)
1074
568
  {
1075
- int cs = EVIL;
1076
-
1077
-
1078
- #line 1079 "parser.c"
1079
- {
1080
- cs = JSON_value_start;
1081
- }
1082
-
1083
- #line 661 "parser.rl"
1084
-
1085
- #line 1086 "parser.c"
1086
- {
1087
- if ( p == pe )
1088
- goto _test_eof;
1089
- switch ( cs )
1090
- {
1091
- st1:
1092
- if ( ++p == pe )
1093
- goto _test_eof1;
1094
- case 1:
1095
- switch( (*p) ) {
1096
- case 13: goto st1;
1097
- case 32: goto st1;
1098
- case 34: goto tr2;
1099
- case 45: goto tr3;
1100
- case 47: goto st6;
1101
- case 73: goto st10;
1102
- case 78: goto st17;
1103
- case 91: goto tr7;
1104
- case 102: goto st19;
1105
- case 110: goto st23;
1106
- case 116: goto st26;
1107
- case 123: goto tr11;
1108
- }
1109
- if ( (*p) > 10 ) {
1110
- if ( 48 <= (*p) && (*p) <= 57 )
1111
- goto tr3;
1112
- } else if ( (*p) >= 9 )
1113
- goto st1;
1114
- goto st0;
1115
- st0:
1116
- cs = 0;
1117
- goto _out;
1118
- tr2:
1119
- #line 599 "parser.rl"
1120
- {
1121
- char *np = JSON_parse_string(json, p, pe, result);
1122
- if (np == NULL) {
1123
- p--;
1124
- {p++; cs = 29; goto _out;}
569
+ size_t bufferSize = stringEnd - string;
570
+ const char *p = string, *pe = string, *unescape, *bufferStart;
571
+ char *buffer;
572
+ int unescape_len;
573
+ char buf[4];
574
+
575
+ if (is_name && state->in_array) {
576
+ VALUE cached_key;
577
+ if (RB_UNLIKELY(symbolize)) {
578
+ cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
1125
579
  } else {
1126
- {p = (( np))-1;}
1127
- }
1128
- }
1129
- goto st29;
1130
- tr3:
1131
- #line 609 "parser.rl"
1132
- {
1133
- char *np;
1134
- if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
1135
- if (json->allow_nan) {
1136
- *result = CMinusInfinity;
1137
- {p = (( p + 10))-1;}
1138
- p--; {p++; cs = 29; goto _out;}
1139
- } else {
1140
- raise_parse_error("unexpected token at '%s'", p);
1141
- }
580
+ cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
1142
581
  }
1143
- np = JSON_parse_number(json, p, pe, result);
1144
- if (np != NULL) {
1145
- {p = (( np))-1;}
582
+
583
+ if (RB_LIKELY(cached_key)) {
584
+ return cached_key;
1146
585
  }
1147
- p--; {p++; cs = 29; goto _out;}
1148
586
  }
1149
- goto st29;
1150
- tr7:
1151
- #line 627 "parser.rl"
1152
- {
1153
- char *np;
1154
- json->in_array++;
1155
- np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
1156
- json->in_array--;
1157
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
1158
- }
1159
- goto st29;
1160
- tr11:
1161
- #line 635 "parser.rl"
1162
- {
1163
- char *np;
1164
- np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
1165
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
1166
- }
1167
- goto st29;
1168
- tr25:
1169
- #line 592 "parser.rl"
1170
- {
1171
- if (json->allow_nan) {
1172
- *result = CInfinity;
1173
- } else {
1174
- raise_parse_error("unexpected token at '%s'", p - 7);
587
+
588
+ VALUE result = rb_str_buf_new(bufferSize);
589
+ rb_enc_associate_index(result, utf8_encindex);
590
+ buffer = RSTRING_PTR(result);
591
+ bufferStart = buffer;
592
+
593
+ while (pe < stringEnd && (pe = memchr(pe, '\\', stringEnd - pe))) {
594
+ unescape = (char *) "?";
595
+ unescape_len = 1;
596
+ if (pe > p) {
597
+ MEMCPY(buffer, p, char, pe - p);
598
+ buffer += pe - p;
1175
599
  }
1176
- }
1177
- goto st29;
1178
- tr27:
1179
- #line 585 "parser.rl"
1180
- {
1181
- if (json->allow_nan) {
1182
- *result = CNaN;
1183
- } else {
1184
- raise_parse_error("unexpected token at '%s'", p - 2);
600
+ switch (*++pe) {
601
+ case 'n':
602
+ unescape = (char *) "\n";
603
+ break;
604
+ case 'r':
605
+ unescape = (char *) "\r";
606
+ break;
607
+ case 't':
608
+ unescape = (char *) "\t";
609
+ break;
610
+ case '"':
611
+ unescape = (char *) "\"";
612
+ break;
613
+ case '\\':
614
+ unescape = (char *) "\\";
615
+ break;
616
+ case 'b':
617
+ unescape = (char *) "\b";
618
+ break;
619
+ case 'f':
620
+ unescape = (char *) "\f";
621
+ break;
622
+ case 'u':
623
+ if (pe > stringEnd - 5) {
624
+ raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
625
+ } else {
626
+ uint32_t ch = unescape_unicode((unsigned char *) ++pe);
627
+ pe += 3;
628
+ /* To handle values above U+FFFF, we take a sequence of
629
+ * \uXXXX escapes in the U+D800..U+DBFF then
630
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
631
+ * to make a 20-bit number, then add 0x10000 to get the
632
+ * final codepoint.
633
+ *
634
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
635
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
636
+ * Area".
637
+ */
638
+ if ((ch & 0xFC00) == 0xD800) {
639
+ pe++;
640
+ if (pe > stringEnd - 6) {
641
+ raise_parse_error("incomplete surrogate pair at '%s'", p);
642
+ }
643
+ if (pe[0] == '\\' && pe[1] == 'u') {
644
+ uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
645
+ ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
646
+ | (sur & 0x3FF));
647
+ pe += 5;
648
+ } else {
649
+ unescape = (char *) "?";
650
+ break;
651
+ }
652
+ }
653
+ unescape_len = convert_UTF32_to_UTF8(buf, ch);
654
+ unescape = buf;
655
+ }
656
+ break;
657
+ default:
658
+ p = pe;
659
+ continue;
1185
660
  }
1186
- }
1187
- goto st29;
1188
- tr31:
1189
- #line 579 "parser.rl"
1190
- {
1191
- *result = Qfalse;
1192
- }
1193
- goto st29;
1194
- tr34:
1195
- #line 576 "parser.rl"
1196
- {
1197
- *result = Qnil;
1198
- }
1199
- goto st29;
1200
- tr37:
1201
- #line 582 "parser.rl"
1202
- {
1203
- *result = Qtrue;
1204
- }
1205
- goto st29;
1206
- st29:
1207
- if ( ++p == pe )
1208
- goto _test_eof29;
1209
- case 29:
1210
- #line 641 "parser.rl"
1211
- { p--; {p++; cs = 29; goto _out;} }
1212
- #line 1213 "parser.c"
1213
- switch( (*p) ) {
1214
- case 13: goto st29;
1215
- case 32: goto st29;
1216
- case 47: goto st2;
1217
- }
1218
- if ( 9 <= (*p) && (*p) <= 10 )
1219
- goto st29;
1220
- goto st0;
1221
- st2:
1222
- if ( ++p == pe )
1223
- goto _test_eof2;
1224
- case 2:
1225
- switch( (*p) ) {
1226
- case 42: goto st3;
1227
- case 47: goto st5;
1228
- }
1229
- goto st0;
1230
- st3:
1231
- if ( ++p == pe )
1232
- goto _test_eof3;
1233
- case 3:
1234
- if ( (*p) == 42 )
1235
- goto st4;
1236
- goto st3;
1237
- st4:
1238
- if ( ++p == pe )
1239
- goto _test_eof4;
1240
- case 4:
1241
- switch( (*p) ) {
1242
- case 42: goto st4;
1243
- case 47: goto st29;
1244
- }
1245
- goto st3;
1246
- st5:
1247
- if ( ++p == pe )
1248
- goto _test_eof5;
1249
- case 5:
1250
- if ( (*p) == 10 )
1251
- goto st29;
1252
- goto st5;
1253
- st6:
1254
- if ( ++p == pe )
1255
- goto _test_eof6;
1256
- case 6:
1257
- switch( (*p) ) {
1258
- case 42: goto st7;
1259
- case 47: goto st9;
1260
- }
1261
- goto st0;
1262
- st7:
1263
- if ( ++p == pe )
1264
- goto _test_eof7;
1265
- case 7:
1266
- if ( (*p) == 42 )
1267
- goto st8;
1268
- goto st7;
1269
- st8:
1270
- if ( ++p == pe )
1271
- goto _test_eof8;
1272
- case 8:
1273
- switch( (*p) ) {
1274
- case 42: goto st8;
1275
- case 47: goto st1;
1276
- }
1277
- goto st7;
1278
- st9:
1279
- if ( ++p == pe )
1280
- goto _test_eof9;
1281
- case 9:
1282
- if ( (*p) == 10 )
1283
- goto st1;
1284
- goto st9;
1285
- st10:
1286
- if ( ++p == pe )
1287
- goto _test_eof10;
1288
- case 10:
1289
- if ( (*p) == 110 )
1290
- goto st11;
1291
- goto st0;
1292
- st11:
1293
- if ( ++p == pe )
1294
- goto _test_eof11;
1295
- case 11:
1296
- if ( (*p) == 102 )
1297
- goto st12;
1298
- goto st0;
1299
- st12:
1300
- if ( ++p == pe )
1301
- goto _test_eof12;
1302
- case 12:
1303
- if ( (*p) == 105 )
1304
- goto st13;
1305
- goto st0;
1306
- st13:
1307
- if ( ++p == pe )
1308
- goto _test_eof13;
1309
- case 13:
1310
- if ( (*p) == 110 )
1311
- goto st14;
1312
- goto st0;
1313
- st14:
1314
- if ( ++p == pe )
1315
- goto _test_eof14;
1316
- case 14:
1317
- if ( (*p) == 105 )
1318
- goto st15;
1319
- goto st0;
1320
- st15:
1321
- if ( ++p == pe )
1322
- goto _test_eof15;
1323
- case 15:
1324
- if ( (*p) == 116 )
1325
- goto st16;
1326
- goto st0;
1327
- st16:
1328
- if ( ++p == pe )
1329
- goto _test_eof16;
1330
- case 16:
1331
- if ( (*p) == 121 )
1332
- goto tr25;
1333
- goto st0;
1334
- st17:
1335
- if ( ++p == pe )
1336
- goto _test_eof17;
1337
- case 17:
1338
- if ( (*p) == 97 )
1339
- goto st18;
1340
- goto st0;
1341
- st18:
1342
- if ( ++p == pe )
1343
- goto _test_eof18;
1344
- case 18:
1345
- if ( (*p) == 78 )
1346
- goto tr27;
1347
- goto st0;
1348
- st19:
1349
- if ( ++p == pe )
1350
- goto _test_eof19;
1351
- case 19:
1352
- if ( (*p) == 97 )
1353
- goto st20;
1354
- goto st0;
1355
- st20:
1356
- if ( ++p == pe )
1357
- goto _test_eof20;
1358
- case 20:
1359
- if ( (*p) == 108 )
1360
- goto st21;
1361
- goto st0;
1362
- st21:
1363
- if ( ++p == pe )
1364
- goto _test_eof21;
1365
- case 21:
1366
- if ( (*p) == 115 )
1367
- goto st22;
1368
- goto st0;
1369
- st22:
1370
- if ( ++p == pe )
1371
- goto _test_eof22;
1372
- case 22:
1373
- if ( (*p) == 101 )
1374
- goto tr31;
1375
- goto st0;
1376
- st23:
1377
- if ( ++p == pe )
1378
- goto _test_eof23;
1379
- case 23:
1380
- if ( (*p) == 117 )
1381
- goto st24;
1382
- goto st0;
1383
- st24:
1384
- if ( ++p == pe )
1385
- goto _test_eof24;
1386
- case 24:
1387
- if ( (*p) == 108 )
1388
- goto st25;
1389
- goto st0;
1390
- st25:
1391
- if ( ++p == pe )
1392
- goto _test_eof25;
1393
- case 25:
1394
- if ( (*p) == 108 )
1395
- goto tr34;
1396
- goto st0;
1397
- st26:
1398
- if ( ++p == pe )
1399
- goto _test_eof26;
1400
- case 26:
1401
- if ( (*p) == 114 )
1402
- goto st27;
1403
- goto st0;
1404
- st27:
1405
- if ( ++p == pe )
1406
- goto _test_eof27;
1407
- case 27:
1408
- if ( (*p) == 117 )
1409
- goto st28;
1410
- goto st0;
1411
- st28:
1412
- if ( ++p == pe )
1413
- goto _test_eof28;
1414
- case 28:
1415
- if ( (*p) == 101 )
1416
- goto tr37;
1417
- goto st0;
1418
- }
1419
- _test_eof1: cs = 1; goto _test_eof;
1420
- _test_eof29: cs = 29; goto _test_eof;
1421
- _test_eof2: cs = 2; goto _test_eof;
1422
- _test_eof3: cs = 3; goto _test_eof;
1423
- _test_eof4: cs = 4; goto _test_eof;
1424
- _test_eof5: cs = 5; goto _test_eof;
1425
- _test_eof6: cs = 6; goto _test_eof;
1426
- _test_eof7: cs = 7; goto _test_eof;
1427
- _test_eof8: cs = 8; goto _test_eof;
1428
- _test_eof9: cs = 9; goto _test_eof;
1429
- _test_eof10: cs = 10; goto _test_eof;
1430
- _test_eof11: cs = 11; goto _test_eof;
1431
- _test_eof12: cs = 12; goto _test_eof;
1432
- _test_eof13: cs = 13; goto _test_eof;
1433
- _test_eof14: cs = 14; goto _test_eof;
1434
- _test_eof15: cs = 15; goto _test_eof;
1435
- _test_eof16: cs = 16; goto _test_eof;
1436
- _test_eof17: cs = 17; goto _test_eof;
1437
- _test_eof18: cs = 18; goto _test_eof;
1438
- _test_eof19: cs = 19; goto _test_eof;
1439
- _test_eof20: cs = 20; goto _test_eof;
1440
- _test_eof21: cs = 21; goto _test_eof;
1441
- _test_eof22: cs = 22; goto _test_eof;
1442
- _test_eof23: cs = 23; goto _test_eof;
1443
- _test_eof24: cs = 24; goto _test_eof;
1444
- _test_eof25: cs = 25; goto _test_eof;
1445
- _test_eof26: cs = 26; goto _test_eof;
1446
- _test_eof27: cs = 27; goto _test_eof;
1447
- _test_eof28: cs = 28; goto _test_eof;
1448
-
1449
- _test_eof: {}
1450
- _out: {}
1451
- }
1452
-
1453
- #line 662 "parser.rl"
1454
-
1455
- if (json->freeze) {
1456
- OBJ_FREEZE(*result);
661
+ MEMCPY(buffer, unescape, char, unescape_len);
662
+ buffer += unescape_len;
663
+ p = ++pe;
1457
664
  }
1458
665
 
1459
- if (cs >= JSON_value_first_final) {
1460
- PUSH(*result);
1461
- return p;
1462
- } else {
1463
- return NULL;
666
+ if (stringEnd > p) {
667
+ MEMCPY(buffer, p, char, stringEnd - p);
668
+ buffer += stringEnd - p;
1464
669
  }
1465
- }
1466
-
1467
-
1468
- #line 1469 "parser.c"
1469
- enum {JSON_integer_start = 1};
1470
- enum {JSON_integer_first_final = 3};
1471
- enum {JSON_integer_error = 0};
1472
-
1473
- enum {JSON_integer_en_main = 1};
1474
-
670
+ rb_str_set_len(result, buffer - bufferStart);
1475
671
 
1476
- #line 683 "parser.rl"
672
+ if (symbolize) {
673
+ result = rb_str_intern(result);
674
+ } else if (intern) {
675
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
676
+ }
1477
677
 
678
+ return result;
679
+ }
1478
680
 
1479
681
  #define MAX_FAST_INTEGER_SIZE 18
1480
- static inline VALUE fast_parse_integer(char *p, char *pe)
682
+ static inline VALUE fast_decode_integer(const char *p, const char *pe)
1481
683
  {
1482
684
  bool negative = false;
1483
685
  if (*p == '-') {
@@ -1498,1102 +700,403 @@ static inline VALUE fast_parse_integer(char *p, char *pe)
1498
700
  return LL2NUM(memo);
1499
701
  }
1500
702
 
1501
- static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result)
703
+ static VALUE json_decode_large_integer(const char *start, long len)
704
+ {
705
+ VALUE buffer_v;
706
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
707
+ MEMCPY(buffer, start, char, len);
708
+ buffer[len] = '\0';
709
+ VALUE number = rb_cstr2inum(buffer, 10);
710
+ RB_ALLOCV_END(buffer_v);
711
+ return number;
712
+ }
713
+
714
+ static inline VALUE
715
+ json_decode_integer(const char *start, const char *end)
1502
716
  {
1503
- long len = p - json->memo;
717
+ long len = end - start;
1504
718
  if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
1505
- *result = fast_parse_integer(json->memo, p);
1506
- } else {
1507
- fbuffer_clear(&json->fbuffer);
1508
- fbuffer_append(&json->fbuffer, json->memo, len);
1509
- fbuffer_append_char(&json->fbuffer, '\0');
1510
- *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10);
719
+ return fast_decode_integer(start, end);
1511
720
  }
1512
- return p + 1;
721
+ return json_decode_large_integer(start, len);
1513
722
  }
1514
723
 
724
+ static VALUE json_decode_large_float(const char *start, long len)
725
+ {
726
+ VALUE buffer_v;
727
+ char *buffer = RB_ALLOCV_N(char, buffer_v, len + 1);
728
+ MEMCPY(buffer, start, char, len);
729
+ buffer[len] = '\0';
730
+ VALUE number = DBL2NUM(rb_cstr_to_dbl(buffer, 1));
731
+ RB_ALLOCV_END(buffer_v);
732
+ return number;
733
+ }
1515
734
 
1516
- #line 1517 "parser.c"
1517
- enum {JSON_float_start = 1};
1518
- enum {JSON_float_first_final = 6};
1519
- enum {JSON_float_error = 0};
1520
-
1521
- enum {JSON_float_en_main = 1};
735
+ static VALUE json_decode_float(JSON_ParserConfig *config, const char *start, const char *end)
736
+ {
737
+ long len = end - start;
738
+
739
+ if (RB_UNLIKELY(config->decimal_class)) {
740
+ VALUE text = rb_str_new(start, len);
741
+ return rb_funcallv(config->decimal_class, config->decimal_method_id, 1, &text);
742
+ } else if (RB_LIKELY(len < 64)) {
743
+ char buffer[64];
744
+ MEMCPY(buffer, start, char, len);
745
+ buffer[len] = '\0';
746
+ return DBL2NUM(rb_cstr_to_dbl(buffer, 1));
747
+ } else {
748
+ return json_decode_large_float(start, len);
749
+ }
750
+ }
1522
751
 
752
+ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
753
+ {
754
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
755
+ rvalue_stack_pop(state->stack, count);
1523
756
 
1524
- #line 735 "parser.rl"
757
+ if (config->freeze) {
758
+ RB_OBJ_FREEZE(array);
759
+ }
1525
760
 
761
+ return array;
762
+ }
1526
763
 
1527
- static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result)
764
+ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, long count)
1528
765
  {
1529
- int cs = EVIL;
1530
- bool is_float = false;
1531
-
1532
-
1533
- #line 1534 "parser.c"
1534
- {
1535
- cs = JSON_float_start;
1536
- }
1537
-
1538
- #line 743 "parser.rl"
1539
- json->memo = p;
1540
-
1541
- #line 1542 "parser.c"
1542
- {
1543
- if ( p == pe )
1544
- goto _test_eof;
1545
- switch ( cs )
1546
- {
1547
- case 1:
1548
- switch( (*p) ) {
1549
- case 45: goto st2;
1550
- case 48: goto st6;
1551
- }
1552
- if ( 49 <= (*p) && (*p) <= 57 )
1553
- goto st10;
1554
- goto st0;
1555
- st0:
1556
- cs = 0;
1557
- goto _out;
1558
- st2:
1559
- if ( ++p == pe )
1560
- goto _test_eof2;
1561
- case 2:
1562
- if ( (*p) == 48 )
1563
- goto st6;
1564
- if ( 49 <= (*p) && (*p) <= 57 )
1565
- goto st10;
1566
- goto st0;
1567
- st6:
1568
- if ( ++p == pe )
1569
- goto _test_eof6;
1570
- case 6:
1571
- switch( (*p) ) {
1572
- case 45: goto st0;
1573
- case 46: goto tr8;
1574
- case 69: goto tr9;
1575
- case 101: goto tr9;
1576
- }
1577
- if ( 48 <= (*p) && (*p) <= 57 )
1578
- goto st0;
1579
- goto tr7;
1580
- tr7:
1581
- #line 727 "parser.rl"
1582
- { p--; {p++; cs = 7; goto _out;} }
1583
- goto st7;
1584
- st7:
1585
- if ( ++p == pe )
1586
- goto _test_eof7;
1587
- case 7:
1588
- #line 1589 "parser.c"
1589
- goto st0;
1590
- tr8:
1591
- #line 728 "parser.rl"
1592
- { is_float = true; }
1593
- goto st3;
1594
- st3:
1595
- if ( ++p == pe )
1596
- goto _test_eof3;
1597
- case 3:
1598
- #line 1599 "parser.c"
1599
- if ( 48 <= (*p) && (*p) <= 57 )
1600
- goto st8;
1601
- goto st0;
1602
- st8:
1603
- if ( ++p == pe )
1604
- goto _test_eof8;
1605
- case 8:
1606
- switch( (*p) ) {
1607
- case 69: goto st4;
1608
- case 101: goto st4;
1609
- }
1610
- if ( (*p) > 46 ) {
1611
- if ( 48 <= (*p) && (*p) <= 57 )
1612
- goto st8;
1613
- } else if ( (*p) >= 45 )
1614
- goto st0;
1615
- goto tr7;
1616
- tr9:
1617
- #line 728 "parser.rl"
1618
- { is_float = true; }
1619
- goto st4;
1620
- st4:
1621
- if ( ++p == pe )
1622
- goto _test_eof4;
1623
- case 4:
1624
- #line 1625 "parser.c"
1625
- switch( (*p) ) {
1626
- case 43: goto st5;
1627
- case 45: goto st5;
1628
- }
1629
- if ( 48 <= (*p) && (*p) <= 57 )
1630
- goto st9;
1631
- goto st0;
1632
- st5:
1633
- if ( ++p == pe )
1634
- goto _test_eof5;
1635
- case 5:
1636
- if ( 48 <= (*p) && (*p) <= 57 )
1637
- goto st9;
1638
- goto st0;
1639
- st9:
1640
- if ( ++p == pe )
1641
- goto _test_eof9;
1642
- case 9:
1643
- switch( (*p) ) {
1644
- case 69: goto st0;
1645
- case 101: goto st0;
1646
- }
1647
- if ( (*p) > 46 ) {
1648
- if ( 48 <= (*p) && (*p) <= 57 )
1649
- goto st9;
1650
- } else if ( (*p) >= 45 )
1651
- goto st0;
1652
- goto tr7;
1653
- st10:
1654
- if ( ++p == pe )
1655
- goto _test_eof10;
1656
- case 10:
1657
- switch( (*p) ) {
1658
- case 45: goto st0;
1659
- case 46: goto tr8;
1660
- case 69: goto tr9;
1661
- case 101: goto tr9;
1662
- }
1663
- if ( 48 <= (*p) && (*p) <= 57 )
1664
- goto st10;
1665
- goto tr7;
1666
- }
1667
- _test_eof2: cs = 2; goto _test_eof;
1668
- _test_eof6: cs = 6; goto _test_eof;
1669
- _test_eof7: cs = 7; goto _test_eof;
1670
- _test_eof3: cs = 3; goto _test_eof;
1671
- _test_eof8: cs = 8; goto _test_eof;
1672
- _test_eof4: cs = 4; goto _test_eof;
1673
- _test_eof5: cs = 5; goto _test_eof;
1674
- _test_eof9: cs = 9; goto _test_eof;
1675
- _test_eof10: cs = 10; goto _test_eof;
1676
-
1677
- _test_eof: {}
1678
- _out: {}
1679
- }
1680
-
1681
- #line 745 "parser.rl"
1682
-
1683
- if (cs >= JSON_float_first_final) {
1684
- if (!is_float) {
1685
- return JSON_decode_integer(json, p, result);
1686
- }
1687
- VALUE mod = Qnil;
1688
- ID method_id = 0;
1689
- if (json->decimal_class) {
1690
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
1691
- mod = json->decimal_class;
1692
- method_id = i_try_convert;
1693
- } else if (rb_respond_to(json->decimal_class, i_new)) {
1694
- mod = json->decimal_class;
1695
- method_id = i_new;
1696
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
1697
- VALUE name = rb_class_name(json->decimal_class);
1698
- const char *name_cstr = RSTRING_PTR(name);
1699
- const char *last_colon = strrchr(name_cstr, ':');
1700
- if (last_colon) {
1701
- const char *mod_path_end = last_colon - 1;
1702
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1703
- mod = rb_path_to_class(mod_path);
766
+ VALUE object = rb_hash_new_capa(count);
767
+ rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
1704
768
 
1705
- const char *method_name_beg = last_colon + 1;
1706
- long before_len = method_name_beg - name_cstr;
1707
- long len = RSTRING_LEN(name) - before_len;
1708
- VALUE method_name = rb_str_substr(name, before_len, len);
1709
- method_id = SYM2ID(rb_str_intern(method_name));
1710
- } else {
1711
- mod = rb_mKernel;
1712
- method_id = SYM2ID(rb_str_intern(name));
1713
- }
1714
- }
1715
- }
769
+ rvalue_stack_pop(state->stack, count);
1716
770
 
1717
- long len = p - json->memo;
1718
- fbuffer_clear(&json->fbuffer);
1719
- fbuffer_append(&json->fbuffer, json->memo, len);
1720
- fbuffer_append_char(&json->fbuffer, '\0');
771
+ if (config->freeze) {
772
+ RB_OBJ_FREEZE(object);
773
+ }
1721
774
 
1722
- if (method_id) {
1723
- VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer));
1724
- *result = rb_funcallv(mod, method_id, 1, &text);
1725
- } else {
1726
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1));
1727
- }
775
+ return object;
776
+ }
1728
777
 
1729
- return p + 1;
778
+ static inline VALUE json_decode_string(JSON_ParserState *state, JSON_ParserConfig *config, const char *start, const char *end, bool escaped, bool is_name)
779
+ {
780
+ VALUE string;
781
+ bool intern = is_name || config->freeze;
782
+ bool symbolize = is_name && config->symbolize_names;
783
+ if (escaped) {
784
+ string = json_string_unescape(state, start, end, is_name, intern, symbolize);
1730
785
  } else {
1731
- return NULL;
786
+ string = json_string_fastpath(state, start, end, is_name, intern, symbolize);
1732
787
  }
1733
- }
1734
-
1735
-
1736
-
1737
- #line 1738 "parser.c"
1738
- enum {JSON_array_start = 1};
1739
- enum {JSON_array_first_final = 22};
1740
- enum {JSON_array_error = 0};
1741
-
1742
- enum {JSON_array_en_main = 1};
1743
788
 
789
+ return string;
790
+ }
1744
791
 
1745
- #line 825 "parser.rl"
792
+ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *config, VALUE value)
793
+ {
794
+ if (RB_UNLIKELY(config->on_load_proc)) {
795
+ value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
796
+ }
797
+ rvalue_stack_push(state->stack, value, &state->stack_handle, &state->stack);
798
+ return value;
799
+ }
1746
800
 
801
+ static const bool string_scan[256] = {
802
+ // ASCII Control Characters
803
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
804
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
805
+ // ASCII Characters
806
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
807
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
808
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
809
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '\\'
810
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
811
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
812
+ };
1747
813
 
1748
- static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
814
+ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
1749
815
  {
1750
- int cs = EVIL;
1751
-
1752
- if (json->max_nesting && current_nesting > json->max_nesting) {
1753
- rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
1754
- }
1755
- long stack_head = json->stack->head;
1756
-
1757
-
1758
- #line 1759 "parser.c"
1759
- {
1760
- cs = JSON_array_start;
1761
- }
1762
-
1763
- #line 837 "parser.rl"
1764
-
1765
- #line 1766 "parser.c"
1766
- {
1767
- short _widec;
1768
- if ( p == pe )
1769
- goto _test_eof;
1770
- switch ( cs )
1771
- {
1772
- case 1:
1773
- if ( (*p) == 91 )
1774
- goto st2;
1775
- goto st0;
1776
- st0:
1777
- cs = 0;
1778
- goto _out;
1779
- st2:
1780
- if ( ++p == pe )
1781
- goto _test_eof2;
1782
- case 2:
1783
- switch( (*p) ) {
1784
- case 13: goto st2;
1785
- case 32: goto st2;
1786
- case 34: goto tr2;
1787
- case 45: goto tr2;
1788
- case 47: goto st18;
1789
- case 73: goto tr2;
1790
- case 78: goto tr2;
1791
- case 91: goto tr2;
1792
- case 93: goto tr4;
1793
- case 102: goto tr2;
1794
- case 110: goto tr2;
1795
- case 116: goto tr2;
1796
- case 123: goto tr2;
1797
- }
1798
- if ( (*p) > 10 ) {
1799
- if ( 48 <= (*p) && (*p) <= 57 )
1800
- goto tr2;
1801
- } else if ( (*p) >= 9 )
1802
- goto st2;
1803
- goto st0;
1804
- tr2:
1805
- #line 805 "parser.rl"
1806
- {
1807
- VALUE v = Qnil;
1808
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
1809
- if (np == NULL) {
1810
- p--; {p++; cs = 3; goto _out;}
1811
- } else {
1812
- {p = (( np))-1;}
1813
- }
1814
- }
1815
- goto st3;
1816
- st3:
1817
- if ( ++p == pe )
1818
- goto _test_eof3;
1819
- case 3:
1820
- #line 1821 "parser.c"
1821
- _widec = (*p);
1822
- if ( 44 <= (*p) && (*p) <= 44 ) {
1823
- _widec = (short)(128 + ((*p) - -128));
1824
- if (
1825
- #line 815 "parser.rl"
1826
- json->allow_trailing_comma ) _widec += 256;
1827
- }
1828
- switch( _widec ) {
1829
- case 13: goto st3;
1830
- case 32: goto st3;
1831
- case 47: goto st4;
1832
- case 93: goto tr4;
1833
- case 300: goto st8;
1834
- case 556: goto st13;
1835
- }
1836
- if ( 9 <= _widec && _widec <= 10 )
1837
- goto st3;
1838
- goto st0;
1839
- st4:
1840
- if ( ++p == pe )
1841
- goto _test_eof4;
1842
- case 4:
1843
- switch( (*p) ) {
1844
- case 42: goto st5;
1845
- case 47: goto st7;
1846
- }
1847
- goto st0;
1848
- st5:
1849
- if ( ++p == pe )
1850
- goto _test_eof5;
1851
- case 5:
1852
- if ( (*p) == 42 )
1853
- goto st6;
1854
- goto st5;
1855
- st6:
1856
- if ( ++p == pe )
1857
- goto _test_eof6;
1858
- case 6:
1859
- switch( (*p) ) {
1860
- case 42: goto st6;
1861
- case 47: goto st3;
1862
- }
1863
- goto st5;
1864
- st7:
1865
- if ( ++p == pe )
1866
- goto _test_eof7;
1867
- case 7:
1868
- if ( (*p) == 10 )
1869
- goto st3;
1870
- goto st7;
1871
- tr4:
1872
- #line 817 "parser.rl"
1873
- { p--; {p++; cs = 22; goto _out;} }
1874
- goto st22;
1875
- st22:
1876
- if ( ++p == pe )
1877
- goto _test_eof22;
1878
- case 22:
1879
- #line 1880 "parser.c"
1880
- goto st0;
1881
- st8:
1882
- if ( ++p == pe )
1883
- goto _test_eof8;
1884
- case 8:
1885
- switch( (*p) ) {
1886
- case 13: goto st8;
1887
- case 32: goto st8;
1888
- case 34: goto tr2;
1889
- case 45: goto tr2;
1890
- case 47: goto st9;
1891
- case 73: goto tr2;
1892
- case 78: goto tr2;
1893
- case 91: goto tr2;
1894
- case 102: goto tr2;
1895
- case 110: goto tr2;
1896
- case 116: goto tr2;
1897
- case 123: goto tr2;
1898
- }
1899
- if ( (*p) > 10 ) {
1900
- if ( 48 <= (*p) && (*p) <= 57 )
1901
- goto tr2;
1902
- } else if ( (*p) >= 9 )
1903
- goto st8;
1904
- goto st0;
1905
- st9:
1906
- if ( ++p == pe )
1907
- goto _test_eof9;
1908
- case 9:
1909
- switch( (*p) ) {
1910
- case 42: goto st10;
1911
- case 47: goto st12;
1912
- }
1913
- goto st0;
1914
- st10:
1915
- if ( ++p == pe )
1916
- goto _test_eof10;
1917
- case 10:
1918
- if ( (*p) == 42 )
1919
- goto st11;
1920
- goto st10;
1921
- st11:
1922
- if ( ++p == pe )
1923
- goto _test_eof11;
1924
- case 11:
1925
- switch( (*p) ) {
1926
- case 42: goto st11;
1927
- case 47: goto st8;
1928
- }
1929
- goto st10;
1930
- st12:
1931
- if ( ++p == pe )
1932
- goto _test_eof12;
1933
- case 12:
1934
- if ( (*p) == 10 )
1935
- goto st8;
1936
- goto st12;
1937
- st13:
1938
- if ( ++p == pe )
1939
- goto _test_eof13;
1940
- case 13:
1941
- _widec = (*p);
1942
- if ( (*p) < 13 ) {
1943
- if ( (*p) > 9 ) {
1944
- if ( 10 <= (*p) && (*p) <= 10 ) {
1945
- _widec = (short)(128 + ((*p) - -128));
1946
- if (
1947
- #line 815 "parser.rl"
1948
- json->allow_trailing_comma ) _widec += 256;
1949
- }
1950
- } else if ( (*p) >= 9 ) {
1951
- _widec = (short)(128 + ((*p) - -128));
1952
- if (
1953
- #line 815 "parser.rl"
1954
- json->allow_trailing_comma ) _widec += 256;
1955
- }
1956
- } else if ( (*p) > 13 ) {
1957
- if ( (*p) > 32 ) {
1958
- if ( 47 <= (*p) && (*p) <= 47 ) {
1959
- _widec = (short)(128 + ((*p) - -128));
1960
- if (
1961
- #line 815 "parser.rl"
1962
- json->allow_trailing_comma ) _widec += 256;
1963
- }
1964
- } else if ( (*p) >= 32 ) {
1965
- _widec = (short)(128 + ((*p) - -128));
1966
- if (
1967
- #line 815 "parser.rl"
1968
- json->allow_trailing_comma ) _widec += 256;
1969
- }
1970
- } else {
1971
- _widec = (short)(128 + ((*p) - -128));
1972
- if (
1973
- #line 815 "parser.rl"
1974
- json->allow_trailing_comma ) _widec += 256;
1975
- }
1976
- switch( _widec ) {
1977
- case 34: goto tr2;
1978
- case 45: goto tr2;
1979
- case 73: goto tr2;
1980
- case 78: goto tr2;
1981
- case 91: goto tr2;
1982
- case 93: goto tr4;
1983
- case 102: goto tr2;
1984
- case 110: goto tr2;
1985
- case 116: goto tr2;
1986
- case 123: goto tr2;
1987
- case 269: goto st8;
1988
- case 288: goto st8;
1989
- case 303: goto st9;
1990
- case 525: goto st13;
1991
- case 544: goto st13;
1992
- case 559: goto st14;
1993
- }
1994
- if ( _widec < 265 ) {
1995
- if ( 48 <= _widec && _widec <= 57 )
1996
- goto tr2;
1997
- } else if ( _widec > 266 ) {
1998
- if ( 521 <= _widec && _widec <= 522 )
1999
- goto st13;
2000
- } else
2001
- goto st8;
2002
- goto st0;
2003
- st14:
2004
- if ( ++p == pe )
2005
- goto _test_eof14;
2006
- case 14:
2007
- _widec = (*p);
2008
- if ( (*p) > 42 ) {
2009
- if ( 47 <= (*p) && (*p) <= 47 ) {
2010
- _widec = (short)(128 + ((*p) - -128));
2011
- if (
2012
- #line 815 "parser.rl"
2013
- json->allow_trailing_comma ) _widec += 256;
2014
- }
2015
- } else if ( (*p) >= 42 ) {
2016
- _widec = (short)(128 + ((*p) - -128));
2017
- if (
2018
- #line 815 "parser.rl"
2019
- json->allow_trailing_comma ) _widec += 256;
2020
- }
2021
- switch( _widec ) {
2022
- case 298: goto st10;
2023
- case 303: goto st12;
2024
- case 554: goto st15;
2025
- case 559: goto st17;
2026
- }
2027
- goto st0;
2028
- st15:
2029
- if ( ++p == pe )
2030
- goto _test_eof15;
2031
- case 15:
2032
- _widec = (*p);
2033
- if ( (*p) < 42 ) {
2034
- if ( (*p) <= 41 ) {
2035
- _widec = (short)(128 + ((*p) - -128));
2036
- if (
2037
- #line 815 "parser.rl"
2038
- json->allow_trailing_comma ) _widec += 256;
2039
- }
2040
- } else if ( (*p) > 42 ) {
2041
- if ( 43 <= (*p) )
2042
- { _widec = (short)(128 + ((*p) - -128));
2043
- if (
2044
- #line 815 "parser.rl"
2045
- json->allow_trailing_comma ) _widec += 256;
2046
- }
2047
- } else {
2048
- _widec = (short)(128 + ((*p) - -128));
2049
- if (
2050
- #line 815 "parser.rl"
2051
- json->allow_trailing_comma ) _widec += 256;
2052
- }
2053
- switch( _widec ) {
2054
- case 298: goto st11;
2055
- case 554: goto st16;
2056
- }
2057
- if ( _widec > 383 ) {
2058
- if ( 384 <= _widec && _widec <= 639 )
2059
- goto st15;
2060
- } else if ( _widec >= 128 )
2061
- goto st10;
2062
- goto st0;
2063
- st16:
2064
- if ( ++p == pe )
2065
- goto _test_eof16;
2066
- case 16:
2067
- _widec = (*p);
2068
- if ( (*p) < 43 ) {
2069
- if ( (*p) > 41 ) {
2070
- if ( 42 <= (*p) && (*p) <= 42 ) {
2071
- _widec = (short)(128 + ((*p) - -128));
2072
- if (
2073
- #line 815 "parser.rl"
2074
- json->allow_trailing_comma ) _widec += 256;
2075
- }
2076
- } else {
2077
- _widec = (short)(128 + ((*p) - -128));
2078
- if (
2079
- #line 815 "parser.rl"
2080
- json->allow_trailing_comma ) _widec += 256;
2081
- }
2082
- } else if ( (*p) > 46 ) {
2083
- if ( (*p) > 47 ) {
2084
- if ( 48 <= (*p) )
2085
- { _widec = (short)(128 + ((*p) - -128));
2086
- if (
2087
- #line 815 "parser.rl"
2088
- json->allow_trailing_comma ) _widec += 256;
2089
- }
2090
- } else if ( (*p) >= 47 ) {
2091
- _widec = (short)(128 + ((*p) - -128));
2092
- if (
2093
- #line 815 "parser.rl"
2094
- json->allow_trailing_comma ) _widec += 256;
2095
- }
2096
- } else {
2097
- _widec = (short)(128 + ((*p) - -128));
2098
- if (
2099
- #line 815 "parser.rl"
2100
- json->allow_trailing_comma ) _widec += 256;
2101
- }
2102
- switch( _widec ) {
2103
- case 298: goto st11;
2104
- case 303: goto st8;
2105
- case 554: goto st16;
2106
- case 559: goto st13;
2107
- }
2108
- if ( _widec > 383 ) {
2109
- if ( 384 <= _widec && _widec <= 639 )
2110
- goto st15;
2111
- } else if ( _widec >= 128 )
2112
- goto st10;
2113
- goto st0;
2114
- st17:
2115
- if ( ++p == pe )
2116
- goto _test_eof17;
2117
- case 17:
2118
- _widec = (*p);
2119
- if ( (*p) < 10 ) {
2120
- if ( (*p) <= 9 ) {
2121
- _widec = (short)(128 + ((*p) - -128));
2122
- if (
2123
- #line 815 "parser.rl"
2124
- json->allow_trailing_comma ) _widec += 256;
2125
- }
2126
- } else if ( (*p) > 10 ) {
2127
- if ( 11 <= (*p) )
2128
- { _widec = (short)(128 + ((*p) - -128));
2129
- if (
2130
- #line 815 "parser.rl"
2131
- json->allow_trailing_comma ) _widec += 256;
2132
- }
2133
- } else {
2134
- _widec = (short)(128 + ((*p) - -128));
2135
- if (
2136
- #line 815 "parser.rl"
2137
- json->allow_trailing_comma ) _widec += 256;
2138
- }
2139
- switch( _widec ) {
2140
- case 266: goto st8;
2141
- case 522: goto st13;
2142
- }
2143
- if ( _widec > 383 ) {
2144
- if ( 384 <= _widec && _widec <= 639 )
2145
- goto st17;
2146
- } else if ( _widec >= 128 )
2147
- goto st12;
2148
- goto st0;
2149
- st18:
2150
- if ( ++p == pe )
2151
- goto _test_eof18;
2152
- case 18:
2153
- switch( (*p) ) {
2154
- case 42: goto st19;
2155
- case 47: goto st21;
2156
- }
2157
- goto st0;
2158
- st19:
2159
- if ( ++p == pe )
2160
- goto _test_eof19;
2161
- case 19:
2162
- if ( (*p) == 42 )
2163
- goto st20;
2164
- goto st19;
2165
- st20:
2166
- if ( ++p == pe )
2167
- goto _test_eof20;
2168
- case 20:
2169
- switch( (*p) ) {
2170
- case 42: goto st20;
2171
- case 47: goto st2;
2172
- }
2173
- goto st19;
2174
- st21:
2175
- if ( ++p == pe )
2176
- goto _test_eof21;
2177
- case 21:
2178
- if ( (*p) == 10 )
2179
- goto st2;
2180
- goto st21;
2181
- }
2182
- _test_eof2: cs = 2; goto _test_eof;
2183
- _test_eof3: cs = 3; goto _test_eof;
2184
- _test_eof4: cs = 4; goto _test_eof;
2185
- _test_eof5: cs = 5; goto _test_eof;
2186
- _test_eof6: cs = 6; goto _test_eof;
2187
- _test_eof7: cs = 7; goto _test_eof;
2188
- _test_eof22: cs = 22; goto _test_eof;
2189
- _test_eof8: cs = 8; goto _test_eof;
2190
- _test_eof9: cs = 9; goto _test_eof;
2191
- _test_eof10: cs = 10; goto _test_eof;
2192
- _test_eof11: cs = 11; goto _test_eof;
2193
- _test_eof12: cs = 12; goto _test_eof;
2194
- _test_eof13: cs = 13; goto _test_eof;
2195
- _test_eof14: cs = 14; goto _test_eof;
2196
- _test_eof15: cs = 15; goto _test_eof;
2197
- _test_eof16: cs = 16; goto _test_eof;
2198
- _test_eof17: cs = 17; goto _test_eof;
2199
- _test_eof18: cs = 18; goto _test_eof;
2200
- _test_eof19: cs = 19; goto _test_eof;
2201
- _test_eof20: cs = 20; goto _test_eof;
2202
- _test_eof21: cs = 21; goto _test_eof;
2203
-
2204
- _test_eof: {}
2205
- _out: {}
2206
- }
2207
-
2208
- #line 838 "parser.rl"
2209
-
2210
- if(cs >= JSON_array_first_final) {
2211
- long count = json->stack->head - stack_head;
2212
-
2213
- if (RB_UNLIKELY(json->array_class)) {
2214
- VALUE array = rb_class_new_instance(0, 0, json->array_class);
2215
- VALUE *items = rvalue_stack_peek(json->stack, count);
2216
- long index;
2217
- for (index = 0; index < count; index++) {
2218
- rb_funcall(array, i_leftshift, 1, items[index]);
816
+ state->cursor++;
817
+ const char *start = state->cursor;
818
+ bool escaped = false;
819
+
820
+ while (state->cursor < state->end) {
821
+ if (RB_UNLIKELY(string_scan[(unsigned char)*state->cursor])) {
822
+ switch (*state->cursor) {
823
+ case '"': {
824
+ VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
825
+ state->cursor++;
826
+ return json_push_value(state, config, string);
827
+ }
828
+ case '\\': {
829
+ state->cursor++;
830
+ escaped = true;
831
+ if ((unsigned char)*state->cursor < 0x20) {
832
+ raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
833
+ }
834
+ break;
835
+ }
836
+ default:
837
+ raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
838
+ break;
2219
839
  }
2220
- *result = array;
2221
- } else {
2222
- VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count));
2223
- *result = array;
2224
840
  }
2225
- rvalue_stack_pop(json->stack, count);
2226
841
 
2227
- return p + 1;
2228
- } else {
2229
- raise_parse_error("unexpected token at '%s'", p);
2230
- return NULL;
842
+ state->cursor++;
2231
843
  }
844
+
845
+ raise_parse_error("unexpected end of input, expected closing \"", state->cursor);
846
+ return Qfalse;
2232
847
  }
2233
848
 
2234
- static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
849
+ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
2235
850
  {
2236
- if (symbolize) {
2237
- intern = true;
2238
- }
2239
- VALUE result;
2240
- # ifdef HAVE_RB_ENC_INTERNED_STR
2241
- if (intern) {
2242
- result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
2243
- } else {
2244
- result = rb_utf8_str_new(start, (long)(end - start));
851
+ json_eat_whitespace(state);
852
+ if (state->cursor >= state->end) {
853
+ raise_parse_error("unexpected end of input", state->cursor);
2245
854
  }
2246
- # else
2247
- result = rb_utf8_str_new(start, (long)(end - start));
2248
- if (intern) {
2249
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
2250
- }
2251
- # endif
2252
855
 
2253
- if (symbolize) {
2254
- result = rb_str_intern(result);
2255
- }
856
+ switch (*state->cursor) {
857
+ case 'n':
858
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "null", 4) == 0)) {
859
+ state->cursor += 4;
860
+ return json_push_value(state, config, Qnil);
861
+ }
2256
862
 
2257
- return result;
2258
- }
863
+ raise_parse_error("unexpected token at '%s'", state->cursor);
864
+ break;
865
+ case 't':
866
+ if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
867
+ state->cursor += 4;
868
+ return json_push_value(state, config, Qtrue);
869
+ }
2259
870
 
2260
- static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
2261
- {
2262
- size_t bufferSize = stringEnd - string;
871
+ raise_parse_error("unexpected token at '%s'", state->cursor);
872
+ break;
873
+ case 'f':
874
+ // Note: memcmp with a small power of two compile to an integer comparison
875
+ if ((state->end - state->cursor >= 5) && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
876
+ state->cursor += 5;
877
+ return json_push_value(state, config, Qfalse);
878
+ }
2263
879
 
2264
- if (is_name && json->in_array) {
2265
- VALUE cached_key;
2266
- if (RB_UNLIKELY(symbolize)) {
2267
- cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
2268
- } else {
2269
- cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
2270
- }
880
+ raise_parse_error("unexpected token at '%s'", state->cursor);
881
+ break;
882
+ case 'N':
883
+ // Note: memcmp with a small power of two compile to an integer comparison
884
+ if (config->allow_nan && (state->end - state->cursor >= 3) && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
885
+ state->cursor += 3;
886
+ return json_push_value(state, config, CNaN);
887
+ }
2271
888
 
2272
- if (RB_LIKELY(cached_key)) {
2273
- return cached_key;
2274
- }
2275
- }
889
+ raise_parse_error("unexpected token at '%s'", state->cursor);
890
+ break;
891
+ case 'I':
892
+ if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
893
+ state->cursor += 8;
894
+ return json_push_value(state, config, CInfinity);
895
+ }
2276
896
 
2277
- return build_string(string, stringEnd, intern, symbolize);
2278
- }
897
+ raise_parse_error("unexpected token at '%s'", state->cursor);
898
+ break;
899
+ case '-':
900
+ // Note: memcmp with a small power of two compile to an integer comparison
901
+ if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
902
+ if (config->allow_nan) {
903
+ state->cursor += 9;
904
+ return json_push_value(state, config, CMinusInfinity);
905
+ } else {
906
+ raise_parse_error("unexpected token at '%s'", state->cursor);
907
+ }
908
+ }
909
+ // Fallthrough
910
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
911
+ bool integer = true;
2279
912
 
2280
- static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
2281
- {
2282
- size_t bufferSize = stringEnd - string;
2283
- char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
2284
- int unescape_len;
2285
- char buf[4];
913
+ // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
914
+ const char *start = state->cursor;
915
+ state->cursor++;
2286
916
 
2287
- if (is_name && json->in_array) {
2288
- VALUE cached_key;
2289
- if (RB_UNLIKELY(symbolize)) {
2290
- cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
2291
- } else {
2292
- cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
2293
- }
917
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
918
+ state->cursor++;
919
+ }
2294
920
 
2295
- if (RB_LIKELY(cached_key)) {
2296
- return cached_key;
2297
- }
2298
- }
921
+ long integer_length = state->cursor - start;
2299
922
 
2300
- pe = memchr(p, '\\', bufferSize);
2301
- if (RB_UNLIKELY(pe == NULL)) {
2302
- return build_string(string, stringEnd, intern, symbolize);
2303
- }
923
+ if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
924
+ raise_parse_error("invalid number: %s", start);
925
+ } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
926
+ raise_parse_error("invalid number: %s", start);
927
+ } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
928
+ raise_parse_error("invalid number: %s", start);
929
+ }
2304
930
 
2305
- VALUE result = rb_str_buf_new(bufferSize);
2306
- rb_enc_associate_index(result, utf8_encindex);
2307
- buffer = bufferStart = RSTRING_PTR(result);
2308
-
2309
- while (pe < stringEnd) {
2310
- if (*pe == '\\') {
2311
- unescape = (char *) "?";
2312
- unescape_len = 1;
2313
- if (pe > p) {
2314
- MEMCPY(buffer, p, char, pe - p);
2315
- buffer += pe - p;
931
+ if ((state->cursor < state->end) && (*state->cursor == '.')) {
932
+ integer = false;
933
+ state->cursor++;
934
+
935
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
936
+ raise_parse_error("invalid number: %s", state->cursor);
937
+ }
938
+
939
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
940
+ state->cursor++;
941
+ }
2316
942
  }
2317
- switch (*++pe) {
2318
- case 'n':
2319
- unescape = (char *) "\n";
2320
- break;
2321
- case 'r':
2322
- unescape = (char *) "\r";
2323
- break;
2324
- case 't':
2325
- unescape = (char *) "\t";
2326
- break;
2327
- case '"':
2328
- unescape = (char *) "\"";
2329
- break;
2330
- case '\\':
2331
- unescape = (char *) "\\";
2332
- break;
2333
- case 'b':
2334
- unescape = (char *) "\b";
2335
- break;
2336
- case 'f':
2337
- unescape = (char *) "\f";
2338
- break;
2339
- case 'u':
2340
- if (pe > stringEnd - 4) {
2341
- raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
2342
- } else {
2343
- uint32_t ch = unescape_unicode((unsigned char *) ++pe);
2344
- pe += 3;
2345
- /* To handle values above U+FFFF, we take a sequence of
2346
- * \uXXXX escapes in the U+D800..U+DBFF then
2347
- * U+DC00..U+DFFF ranges, take the low 10 bits from each
2348
- * to make a 20-bit number, then add 0x10000 to get the
2349
- * final codepoint.
2350
- *
2351
- * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
2352
- * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
2353
- * Area".
2354
- */
2355
- if ((ch & 0xFC00) == 0xD800) {
2356
- pe++;
2357
- if (pe > stringEnd - 6) {
2358
- raise_parse_error("incomplete surrogate pair at '%s'", p);
2359
- }
2360
- if (pe[0] == '\\' && pe[1] == 'u') {
2361
- uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
2362
- ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
2363
- | (sur & 0x3FF));
2364
- pe += 5;
2365
- } else {
2366
- unescape = (char *) "?";
2367
- break;
943
+
944
+ if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
945
+ integer = false;
946
+ state->cursor++;
947
+ if ((state->cursor < state->end) && ((*state->cursor == '+') || (*state->cursor == '-'))) {
948
+ state->cursor++;
949
+ }
950
+
951
+ if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
952
+ raise_parse_error("invalid number: %s", state->cursor);
953
+ }
954
+
955
+ while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
956
+ state->cursor++;
957
+ }
958
+ }
959
+
960
+ if (integer) {
961
+ return json_push_value(state, config, json_decode_integer(start, state->cursor));
962
+ }
963
+ return json_push_value(state, config, json_decode_float(config, start, state->cursor));
964
+ }
965
+ case '"': {
966
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
967
+ return json_parse_string(state, config, false);
968
+ break;
969
+ }
970
+ case '[': {
971
+ state->cursor++;
972
+ json_eat_whitespace(state);
973
+ long stack_head = state->stack->head;
974
+
975
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
976
+ state->cursor++;
977
+ return json_push_value(state, config, json_decode_array(state, config, 0));
978
+ } else {
979
+ state->current_nesting++;
980
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
981
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
982
+ }
983
+ state->in_array++;
984
+ json_parse_any(state, config);
985
+ }
986
+
987
+ while (true) {
988
+ json_eat_whitespace(state);
989
+
990
+ if (state->cursor < state->end) {
991
+ if (*state->cursor == ']') {
992
+ state->cursor++;
993
+ long count = state->stack->head - stack_head;
994
+ state->current_nesting--;
995
+ state->in_array--;
996
+ return json_push_value(state, config, json_decode_array(state, config, count));
997
+ }
998
+
999
+ if (*state->cursor == ',') {
1000
+ state->cursor++;
1001
+ if (config->allow_trailing_comma) {
1002
+ json_eat_whitespace(state);
1003
+ if ((state->cursor < state->end) && (*state->cursor == ']')) {
1004
+ continue;
2368
1005
  }
2369
1006
  }
2370
- unescape_len = convert_UTF32_to_UTF8(buf, ch);
2371
- unescape = buf;
1007
+ json_parse_any(state, config);
1008
+ continue;
2372
1009
  }
2373
- break;
2374
- default:
2375
- p = pe;
2376
- continue;
1010
+ }
1011
+
1012
+ raise_parse_error("expected ',' or ']' after array value", state->cursor);
2377
1013
  }
2378
- MEMCPY(buffer, unescape, char, unescape_len);
2379
- buffer += unescape_len;
2380
- p = ++pe;
2381
- } else {
2382
- pe++;
1014
+ break;
2383
1015
  }
2384
- }
1016
+ case '{': {
1017
+ state->cursor++;
1018
+ json_eat_whitespace(state);
1019
+ long stack_head = state->stack->head;
1020
+
1021
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1022
+ state->cursor++;
1023
+ return json_push_value(state, config, json_decode_object(state, config, 0));
1024
+ } else {
1025
+ state->current_nesting++;
1026
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1027
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1028
+ }
2385
1029
 
2386
- if (pe > p) {
2387
- MEMCPY(buffer, p, char, pe - p);
2388
- buffer += pe - p;
2389
- }
2390
- rb_str_set_len(result, buffer - bufferStart);
1030
+ if (*state->cursor != '"') {
1031
+ raise_parse_error("expected object key, got '%s", state->cursor);
1032
+ }
1033
+ json_parse_string(state, config, true);
2391
1034
 
2392
- if (symbolize) {
2393
- result = rb_str_intern(result);
2394
- } else if (intern) {
2395
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
2396
- }
1035
+ json_eat_whitespace(state);
1036
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1037
+ raise_parse_error("expected ':' after object key", state->cursor);
1038
+ }
1039
+ state->cursor++;
2397
1040
 
2398
- return result;
2399
- }
1041
+ json_parse_any(state, config);
1042
+ }
2400
1043
 
1044
+ while (true) {
1045
+ json_eat_whitespace(state);
1046
+
1047
+ if (state->cursor < state->end) {
1048
+ if (*state->cursor == '}') {
1049
+ state->cursor++;
1050
+ state->current_nesting--;
1051
+ long count = state->stack->head - stack_head;
1052
+ return json_push_value(state, config, json_decode_object(state, config, count));
1053
+ }
2401
1054
 
2402
- #line 2403 "parser.c"
2403
- enum {JSON_string_start = 1};
2404
- enum {JSON_string_first_final = 9};
2405
- enum {JSON_string_error = 0};
1055
+ if (*state->cursor == ',') {
1056
+ state->cursor++;
1057
+ json_eat_whitespace(state);
1058
+
1059
+ if (config->allow_trailing_comma) {
1060
+ if ((state->cursor < state->end) && (*state->cursor == '}')) {
1061
+ continue;
1062
+ }
1063
+ }
2406
1064
 
2407
- enum {JSON_string_en_main = 1};
1065
+ if (*state->cursor != '"') {
1066
+ raise_parse_error("expected object key, got: '%s'", state->cursor);
1067
+ }
1068
+ json_parse_string(state, config, true);
2408
1069
 
1070
+ json_eat_whitespace(state);
1071
+ if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1072
+ raise_parse_error("expected ':' after object key, got: '%s", state->cursor);
1073
+ }
1074
+ state->cursor++;
2409
1075
 
2410
- #line 1061 "parser.rl"
1076
+ json_parse_any(state, config);
2411
1077
 
1078
+ continue;
1079
+ }
1080
+ }
2412
1081
 
2413
- static int
2414
- match_i(VALUE regexp, VALUE klass, VALUE memo)
2415
- {
2416
- if (regexp == Qundef) return ST_STOP;
2417
- if (RTEST(rb_funcall(klass, i_json_creatable_p, 0)) &&
2418
- RTEST(rb_funcall(regexp, i_match, 1, rb_ary_entry(memo, 0)))) {
2419
- rb_ary_push(memo, klass);
2420
- return ST_STOP;
1082
+ raise_parse_error("expected ',' or '}' after object value, got: '%s'", state->cursor);
1083
+ }
1084
+ break;
1085
+ }
1086
+
1087
+ default:
1088
+ raise_parse_error("unexpected character: '%s'", state->cursor);
1089
+ break;
2421
1090
  }
2422
- return ST_CONTINUE;
1091
+
1092
+ raise_parse_error("unreacheable: '%s'", state->cursor);
2423
1093
  }
2424
1094
 
2425
- static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result)
1095
+ static void json_ensure_eof(JSON_ParserState *state)
2426
1096
  {
2427
- int cs = EVIL;
2428
- VALUE match_string;
2429
-
2430
-
2431
- #line 2432 "parser.c"
2432
- {
2433
- cs = JSON_string_start;
2434
- }
2435
-
2436
- #line 1081 "parser.rl"
2437
- json->memo = p;
2438
-
2439
- #line 2440 "parser.c"
2440
- {
2441
- if ( p == pe )
2442
- goto _test_eof;
2443
- switch ( cs )
2444
- {
2445
- case 1:
2446
- if ( (*p) == 34 )
2447
- goto st2;
2448
- goto st0;
2449
- st0:
2450
- cs = 0;
2451
- goto _out;
2452
- st2:
2453
- if ( ++p == pe )
2454
- goto _test_eof2;
2455
- case 2:
2456
- switch( (*p) ) {
2457
- case 34: goto tr2;
2458
- case 92: goto st3;
2459
- }
2460
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
2461
- goto st0;
2462
- goto st2;
2463
- tr2:
2464
- #line 1043 "parser.rl"
2465
- {
2466
- *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
2467
- {p = (( p + 1))-1;}
2468
- p--;
2469
- {p++; cs = 9; goto _out;}
2470
- }
2471
- #line 1036 "parser.rl"
2472
- {
2473
- *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
2474
- {p = (( p + 1))-1;}
2475
- p--;
2476
- {p++; cs = 9; goto _out;}
2477
- }
2478
- goto st9;
2479
- tr6:
2480
- #line 1036 "parser.rl"
2481
- {
2482
- *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
2483
- {p = (( p + 1))-1;}
2484
- p--;
2485
- {p++; cs = 9; goto _out;}
2486
- }
2487
- goto st9;
2488
- st9:
2489
- if ( ++p == pe )
2490
- goto _test_eof9;
2491
- case 9:
2492
- #line 2493 "parser.c"
2493
- goto st0;
2494
- st3:
2495
- if ( ++p == pe )
2496
- goto _test_eof3;
2497
- case 3:
2498
- if ( (*p) == 117 )
2499
- goto st5;
2500
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
2501
- goto st0;
2502
- goto st4;
2503
- st4:
2504
- if ( ++p == pe )
2505
- goto _test_eof4;
2506
- case 4:
2507
- switch( (*p) ) {
2508
- case 34: goto tr6;
2509
- case 92: goto st3;
2510
- }
2511
- if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
2512
- goto st0;
2513
- goto st4;
2514
- st5:
2515
- if ( ++p == pe )
2516
- goto _test_eof5;
2517
- case 5:
2518
- if ( (*p) < 65 ) {
2519
- if ( 48 <= (*p) && (*p) <= 57 )
2520
- goto st6;
2521
- } else if ( (*p) > 70 ) {
2522
- if ( 97 <= (*p) && (*p) <= 102 )
2523
- goto st6;
2524
- } else
2525
- goto st6;
2526
- goto st0;
2527
- st6:
2528
- if ( ++p == pe )
2529
- goto _test_eof6;
2530
- case 6:
2531
- if ( (*p) < 65 ) {
2532
- if ( 48 <= (*p) && (*p) <= 57 )
2533
- goto st7;
2534
- } else if ( (*p) > 70 ) {
2535
- if ( 97 <= (*p) && (*p) <= 102 )
2536
- goto st7;
2537
- } else
2538
- goto st7;
2539
- goto st0;
2540
- st7:
2541
- if ( ++p == pe )
2542
- goto _test_eof7;
2543
- case 7:
2544
- if ( (*p) < 65 ) {
2545
- if ( 48 <= (*p) && (*p) <= 57 )
2546
- goto st8;
2547
- } else if ( (*p) > 70 ) {
2548
- if ( 97 <= (*p) && (*p) <= 102 )
2549
- goto st8;
2550
- } else
2551
- goto st8;
2552
- goto st0;
2553
- st8:
2554
- if ( ++p == pe )
2555
- goto _test_eof8;
2556
- case 8:
2557
- if ( (*p) < 65 ) {
2558
- if ( 48 <= (*p) && (*p) <= 57 )
2559
- goto st4;
2560
- } else if ( (*p) > 70 ) {
2561
- if ( 97 <= (*p) && (*p) <= 102 )
2562
- goto st4;
2563
- } else
2564
- goto st4;
2565
- goto st0;
2566
- }
2567
- _test_eof2: cs = 2; goto _test_eof;
2568
- _test_eof9: cs = 9; goto _test_eof;
2569
- _test_eof3: cs = 3; goto _test_eof;
2570
- _test_eof4: cs = 4; goto _test_eof;
2571
- _test_eof5: cs = 5; goto _test_eof;
2572
- _test_eof6: cs = 6; goto _test_eof;
2573
- _test_eof7: cs = 7; goto _test_eof;
2574
- _test_eof8: cs = 8; goto _test_eof;
2575
-
2576
- _test_eof: {}
2577
- _out: {}
2578
- }
2579
-
2580
- #line 1083 "parser.rl"
2581
-
2582
- if (json->create_additions && RTEST(match_string = json->match_string)) {
2583
- VALUE klass;
2584
- VALUE memo = rb_ary_new2(2);
2585
- rb_ary_push(memo, *result);
2586
- rb_hash_foreach(match_string, match_i, memo);
2587
- klass = rb_ary_entry(memo, 1);
2588
- if (RTEST(klass)) {
2589
- *result = rb_funcall(klass, i_json_create, 1, *result);
2590
- }
2591
- }
2592
-
2593
- if (cs >= JSON_string_first_final) {
2594
- return p + 1;
2595
- } else {
2596
- return NULL;
1097
+ json_eat_whitespace(state);
1098
+ if (state->cursor != state->end) {
1099
+ raise_parse_error("unexpected token at end of stream '%s'", state->cursor);
2597
1100
  }
2598
1101
  }
2599
1102
 
@@ -2625,72 +1128,68 @@ static VALUE convert_encoding(VALUE source)
2625
1128
  return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
2626
1129
  }
2627
1130
 
2628
- static int configure_parser_i(VALUE key, VALUE val, VALUE data)
1131
+ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
2629
1132
  {
2630
- JSON_Parser *json = (JSON_Parser *)data;
2631
-
2632
- if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
2633
- else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
2634
- else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
2635
- else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
2636
- else if (key == sym_freeze) { json->freeze = RTEST(val); }
2637
- else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
2638
- else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
2639
- else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
2640
- else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
2641
- else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
2642
- else if (key == sym_create_additions) {
2643
- if (NIL_P(val)) {
2644
- json->create_additions = true;
2645
- json->deprecated_create_additions = true;
2646
- } else {
2647
- json->create_additions = RTEST(val);
2648
- json->deprecated_create_additions = false;
1133
+ JSON_ParserConfig *config = (JSON_ParserConfig *)data;
1134
+
1135
+ if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1136
+ else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1137
+ else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1138
+ else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
1139
+ else if (key == sym_freeze) { config->freeze = RTEST(val); }
1140
+ else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1141
+ else if (key == sym_decimal_class) {
1142
+ if (RTEST(val)) {
1143
+ if (rb_respond_to(val, i_try_convert)) {
1144
+ config->decimal_class = val;
1145
+ config->decimal_method_id = i_try_convert;
1146
+ } else if (rb_respond_to(val, i_new)) {
1147
+ config->decimal_class = val;
1148
+ config->decimal_method_id = i_new;
1149
+ } else if (RB_TYPE_P(val, T_CLASS)) {
1150
+ VALUE name = rb_class_name(val);
1151
+ const char *name_cstr = RSTRING_PTR(name);
1152
+ const char *last_colon = strrchr(name_cstr, ':');
1153
+ if (last_colon) {
1154
+ const char *mod_path_end = last_colon - 1;
1155
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1156
+ config->decimal_class = rb_path_to_class(mod_path);
1157
+
1158
+ const char *method_name_beg = last_colon + 1;
1159
+ long before_len = method_name_beg - name_cstr;
1160
+ long len = RSTRING_LEN(name) - before_len;
1161
+ VALUE method_name = rb_str_substr(name, before_len, len);
1162
+ config->decimal_method_id = SYM2ID(rb_str_intern(method_name));
1163
+ } else {
1164
+ config->decimal_class = rb_mKernel;
1165
+ config->decimal_method_id = SYM2ID(rb_str_intern(name));
1166
+ }
1167
+ }
2649
1168
  }
2650
1169
  }
2651
1170
 
2652
1171
  return ST_CONTINUE;
2653
1172
  }
2654
1173
 
2655
- static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
1174
+ static void parser_config_init(JSON_ParserConfig *config, VALUE opts)
2656
1175
  {
2657
- if (json->Vsource) {
2658
- rb_raise(rb_eTypeError, "already initialized instance");
2659
- }
2660
-
2661
- json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
2662
- json->max_nesting = 100;
1176
+ config->max_nesting = 100;
2663
1177
 
2664
1178
  if (!NIL_P(opts)) {
2665
1179
  Check_Type(opts, T_HASH);
2666
1180
  if (RHASH_SIZE(opts) > 0) {
2667
1181
  // We assume in most cases few keys are set so it's faster to go over
2668
1182
  // the provided keys than to check all possible keys.
2669
- rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
2670
-
2671
- if (json->symbolize_names && json->create_additions) {
2672
- rb_raise(rb_eArgError,
2673
- "options :symbolize_names and :create_additions cannot be "
2674
- " used in conjunction");
2675
- }
2676
-
2677
- if (json->create_additions && !json->create_id) {
2678
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
2679
- }
1183
+ rb_hash_foreach(opts, parser_config_init_i, (VALUE)config);
2680
1184
  }
2681
1185
 
2682
1186
  }
2683
- source = convert_encoding(StringValue(source));
2684
- StringValue(source);
2685
- json->len = RSTRING_LEN(source);
2686
- json->source = RSTRING_PTR(source);
2687
- json->Vsource = source;
2688
1187
  }
2689
1188
 
2690
1189
  /*
2691
- * call-seq: new(source, opts => {})
1190
+ * call-seq: new(opts => {})
2692
1191
  *
2693
- * Creates a new JSON::Ext::Parser instance for the string _source_.
1192
+ * Creates a new JSON::Ext::ParserConfig instance.
2694
1193
  *
2695
1194
  * It will be configured by the _opts_ hash. _opts_ can have the following
2696
1195
  * keys:
@@ -2706,456 +1205,109 @@ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
2706
1205
  * (keys) in a JSON object. Otherwise strings are returned, which is
2707
1206
  * also the default. It's not possible to use this option in
2708
1207
  * conjunction with the *create_additions* option.
2709
- * * *create_additions*: If set to false, the Parser doesn't create
2710
- * additions even if a matching class and create_id was found. This option
2711
- * defaults to false.
2712
- * * *object_class*: Defaults to Hash. If another type is provided, it will be used
2713
- * instead of Hash to represent JSON objects. The type must respond to
2714
- * +new+ without arguments, and return an object that respond to +[]=+.
2715
- * * *array_class*: Defaults to Array If another type is provided, it will be used
2716
- * instead of Hash to represent JSON arrays. The type must respond to
2717
- * +new+ without arguments, and return an object that respond to +<<+.
2718
1208
  * * *decimal_class*: Specifies which class to use instead of the default
2719
1209
  * (Float) when parsing decimal numbers. This class must accept a single
2720
1210
  * string argument in its constructor.
2721
1211
  */
2722
- static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1212
+ static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
2723
1213
  {
2724
- GET_PARSER_INIT;
1214
+ GET_PARSER_CONFIG;
2725
1215
 
2726
- rb_check_arity(argc, 1, 2);
1216
+ parser_config_init(config, opts);
1217
+
1218
+ RB_OBJ_WRITTEN(self, Qundef, config->decimal_class);
2727
1219
 
2728
- parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil);
2729
1220
  return self;
2730
1221
  }
2731
1222
 
1223
+ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1224
+ {
1225
+ Vsource = convert_encoding(StringValue(Vsource));
1226
+ StringValue(Vsource);
1227
+
1228
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1229
+ rvalue_stack stack = {
1230
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1231
+ .ptr = rvalue_stack_buffer,
1232
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1233
+ };
2732
1234
 
2733
- #line 2734 "parser.c"
2734
- enum {JSON_start = 1};
2735
- enum {JSON_first_final = 10};
2736
- enum {JSON_error = 0};
1235
+ JSON_ParserState _state = {
1236
+ .cursor = RSTRING_PTR(Vsource),
1237
+ .end = RSTRING_END(Vsource),
1238
+ .stack = &stack,
1239
+ };
1240
+ JSON_ParserState *state = &_state;
2737
1241
 
2738
- enum {JSON_en_main = 1};
1242
+ VALUE result = json_parse_any(state, config);
2739
1243
 
1244
+ // This may be skipped in case of exception, but
1245
+ // it won't cause a leak.
1246
+ rvalue_stack_eagerly_release(state->stack_handle);
2740
1247
 
2741
- #line 1249 "parser.rl"
1248
+ json_ensure_eof(state);
2742
1249
 
1250
+ return result;
1251
+ }
2743
1252
 
2744
1253
  /*
2745
- * call-seq: parse()
1254
+ * call-seq: parse(source)
2746
1255
  *
2747
1256
  * Parses the current JSON text _source_ and returns the complete data
2748
1257
  * structure as a result.
2749
1258
  * It raises JSON::ParserError if fail to parse.
2750
1259
  */
2751
- static VALUE cParser_parse(VALUE self)
1260
+ static VALUE cParserConfig_parse(VALUE self, VALUE Vsource)
2752
1261
  {
2753
- char *p, *pe;
2754
- int cs = EVIL;
2755
- VALUE result = Qnil;
2756
- GET_PARSER;
2757
-
2758
- char stack_buffer[FBUFFER_STACK_SIZE];
2759
- fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
2760
-
2761
- VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
2762
- rvalue_stack stack = {
2763
- .type = RVALUE_STACK_STACK_ALLOCATED,
2764
- .ptr = rvalue_stack_buffer,
2765
- .capa = RVALUE_STACK_INITIAL_CAPA,
2766
- };
2767
- json->stack = &stack;
2768
-
2769
-
2770
- #line 2771 "parser.c"
2771
- {
2772
- cs = JSON_start;
2773
- }
2774
-
2775
- #line 1277 "parser.rl"
2776
- p = json->source;
2777
- pe = p + json->len;
2778
-
2779
- #line 2780 "parser.c"
2780
- {
2781
- if ( p == pe )
2782
- goto _test_eof;
2783
- switch ( cs )
2784
- {
2785
- st1:
2786
- if ( ++p == pe )
2787
- goto _test_eof1;
2788
- case 1:
2789
- switch( (*p) ) {
2790
- case 13: goto st1;
2791
- case 32: goto st1;
2792
- case 34: goto tr2;
2793
- case 45: goto tr2;
2794
- case 47: goto st6;
2795
- case 73: goto tr2;
2796
- case 78: goto tr2;
2797
- case 91: goto tr2;
2798
- case 102: goto tr2;
2799
- case 110: goto tr2;
2800
- case 116: goto tr2;
2801
- case 123: goto tr2;
2802
- }
2803
- if ( (*p) > 10 ) {
2804
- if ( 48 <= (*p) && (*p) <= 57 )
2805
- goto tr2;
2806
- } else if ( (*p) >= 9 )
2807
- goto st1;
2808
- goto st0;
2809
- st0:
2810
- cs = 0;
2811
- goto _out;
2812
- tr2:
2813
- #line 1241 "parser.rl"
2814
- {
2815
- char *np = JSON_parse_value(json, p, pe, &result, 0);
2816
- if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
2817
- }
2818
- goto st10;
2819
- st10:
2820
- if ( ++p == pe )
2821
- goto _test_eof10;
2822
- case 10:
2823
- #line 2824 "parser.c"
2824
- switch( (*p) ) {
2825
- case 13: goto st10;
2826
- case 32: goto st10;
2827
- case 47: goto st2;
2828
- }
2829
- if ( 9 <= (*p) && (*p) <= 10 )
2830
- goto st10;
2831
- goto st0;
2832
- st2:
2833
- if ( ++p == pe )
2834
- goto _test_eof2;
2835
- case 2:
2836
- switch( (*p) ) {
2837
- case 42: goto st3;
2838
- case 47: goto st5;
2839
- }
2840
- goto st0;
2841
- st3:
2842
- if ( ++p == pe )
2843
- goto _test_eof3;
2844
- case 3:
2845
- if ( (*p) == 42 )
2846
- goto st4;
2847
- goto st3;
2848
- st4:
2849
- if ( ++p == pe )
2850
- goto _test_eof4;
2851
- case 4:
2852
- switch( (*p) ) {
2853
- case 42: goto st4;
2854
- case 47: goto st10;
2855
- }
2856
- goto st3;
2857
- st5:
2858
- if ( ++p == pe )
2859
- goto _test_eof5;
2860
- case 5:
2861
- if ( (*p) == 10 )
2862
- goto st10;
2863
- goto st5;
2864
- st6:
2865
- if ( ++p == pe )
2866
- goto _test_eof6;
2867
- case 6:
2868
- switch( (*p) ) {
2869
- case 42: goto st7;
2870
- case 47: goto st9;
2871
- }
2872
- goto st0;
2873
- st7:
2874
- if ( ++p == pe )
2875
- goto _test_eof7;
2876
- case 7:
2877
- if ( (*p) == 42 )
2878
- goto st8;
2879
- goto st7;
2880
- st8:
2881
- if ( ++p == pe )
2882
- goto _test_eof8;
2883
- case 8:
2884
- switch( (*p) ) {
2885
- case 42: goto st8;
2886
- case 47: goto st1;
2887
- }
2888
- goto st7;
2889
- st9:
2890
- if ( ++p == pe )
2891
- goto _test_eof9;
2892
- case 9:
2893
- if ( (*p) == 10 )
2894
- goto st1;
2895
- goto st9;
2896
- }
2897
- _test_eof1: cs = 1; goto _test_eof;
2898
- _test_eof10: cs = 10; goto _test_eof;
2899
- _test_eof2: cs = 2; goto _test_eof;
2900
- _test_eof3: cs = 3; goto _test_eof;
2901
- _test_eof4: cs = 4; goto _test_eof;
2902
- _test_eof5: cs = 5; goto _test_eof;
2903
- _test_eof6: cs = 6; goto _test_eof;
2904
- _test_eof7: cs = 7; goto _test_eof;
2905
- _test_eof8: cs = 8; goto _test_eof;
2906
- _test_eof9: cs = 9; goto _test_eof;
2907
-
2908
- _test_eof: {}
2909
- _out: {}
2910
- }
2911
-
2912
- #line 1280 "parser.rl"
2913
-
2914
- if (json->stack_handle) {
2915
- rvalue_stack_eagerly_release(json->stack_handle);
2916
- }
2917
-
2918
- if (cs >= JSON_first_final && p == pe) {
2919
- return result;
2920
- } else {
2921
- raise_parse_error("unexpected token at '%s'", p);
2922
- return Qnil;
2923
- }
1262
+ GET_PARSER_CONFIG;
1263
+ return cParser_parse(config, Vsource);
2924
1264
  }
2925
1265
 
2926
- static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
1266
+ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
2927
1267
  {
2928
- char *p, *pe;
2929
- int cs = EVIL;
2930
- VALUE result = Qnil;
1268
+ Vsource = convert_encoding(StringValue(Vsource));
1269
+ StringValue(Vsource);
2931
1270
 
2932
- JSON_Parser _parser = {0};
2933
- JSON_Parser *json = &_parser;
2934
- parser_init(json, source, opts);
1271
+ JSON_ParserConfig _config = {0};
1272
+ JSON_ParserConfig *config = &_config;
1273
+ parser_config_init(config, opts);
2935
1274
 
2936
- char stack_buffer[FBUFFER_STACK_SIZE];
2937
- fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
2938
-
2939
- VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
2940
- rvalue_stack stack = {
2941
- .type = RVALUE_STACK_STACK_ALLOCATED,
2942
- .ptr = rvalue_stack_buffer,
2943
- .capa = RVALUE_STACK_INITIAL_CAPA,
2944
- };
2945
- json->stack = &stack;
2946
-
2947
-
2948
- #line 2949 "parser.c"
2949
- {
2950
- cs = JSON_start;
2951
- }
2952
-
2953
- #line 1315 "parser.rl"
2954
- p = json->source;
2955
- pe = p + json->len;
2956
-
2957
- #line 2958 "parser.c"
2958
- {
2959
- if ( p == pe )
2960
- goto _test_eof;
2961
- switch ( cs )
2962
- {
2963
- st1:
2964
- if ( ++p == pe )
2965
- goto _test_eof1;
2966
- case 1:
2967
- switch( (*p) ) {
2968
- case 13: goto st1;
2969
- case 32: goto st1;
2970
- case 34: goto tr2;
2971
- case 45: goto tr2;
2972
- case 47: goto st6;
2973
- case 73: goto tr2;
2974
- case 78: goto tr2;
2975
- case 91: goto tr2;
2976
- case 102: goto tr2;
2977
- case 110: goto tr2;
2978
- case 116: goto tr2;
2979
- case 123: goto tr2;
2980
- }
2981
- if ( (*p) > 10 ) {
2982
- if ( 48 <= (*p) && (*p) <= 57 )
2983
- goto tr2;
2984
- } else if ( (*p) >= 9 )
2985
- goto st1;
2986
- goto st0;
2987
- st0:
2988
- cs = 0;
2989
- goto _out;
2990
- tr2:
2991
- #line 1241 "parser.rl"
2992
- {
2993
- char *np = JSON_parse_value(json, p, pe, &result, 0);
2994
- if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
2995
- }
2996
- goto st10;
2997
- st10:
2998
- if ( ++p == pe )
2999
- goto _test_eof10;
3000
- case 10:
3001
- #line 3002 "parser.c"
3002
- switch( (*p) ) {
3003
- case 13: goto st10;
3004
- case 32: goto st10;
3005
- case 47: goto st2;
3006
- }
3007
- if ( 9 <= (*p) && (*p) <= 10 )
3008
- goto st10;
3009
- goto st0;
3010
- st2:
3011
- if ( ++p == pe )
3012
- goto _test_eof2;
3013
- case 2:
3014
- switch( (*p) ) {
3015
- case 42: goto st3;
3016
- case 47: goto st5;
3017
- }
3018
- goto st0;
3019
- st3:
3020
- if ( ++p == pe )
3021
- goto _test_eof3;
3022
- case 3:
3023
- if ( (*p) == 42 )
3024
- goto st4;
3025
- goto st3;
3026
- st4:
3027
- if ( ++p == pe )
3028
- goto _test_eof4;
3029
- case 4:
3030
- switch( (*p) ) {
3031
- case 42: goto st4;
3032
- case 47: goto st10;
3033
- }
3034
- goto st3;
3035
- st5:
3036
- if ( ++p == pe )
3037
- goto _test_eof5;
3038
- case 5:
3039
- if ( (*p) == 10 )
3040
- goto st10;
3041
- goto st5;
3042
- st6:
3043
- if ( ++p == pe )
3044
- goto _test_eof6;
3045
- case 6:
3046
- switch( (*p) ) {
3047
- case 42: goto st7;
3048
- case 47: goto st9;
3049
- }
3050
- goto st0;
3051
- st7:
3052
- if ( ++p == pe )
3053
- goto _test_eof7;
3054
- case 7:
3055
- if ( (*p) == 42 )
3056
- goto st8;
3057
- goto st7;
3058
- st8:
3059
- if ( ++p == pe )
3060
- goto _test_eof8;
3061
- case 8:
3062
- switch( (*p) ) {
3063
- case 42: goto st8;
3064
- case 47: goto st1;
3065
- }
3066
- goto st7;
3067
- st9:
3068
- if ( ++p == pe )
3069
- goto _test_eof9;
3070
- case 9:
3071
- if ( (*p) == 10 )
3072
- goto st1;
3073
- goto st9;
3074
- }
3075
- _test_eof1: cs = 1; goto _test_eof;
3076
- _test_eof10: cs = 10; goto _test_eof;
3077
- _test_eof2: cs = 2; goto _test_eof;
3078
- _test_eof3: cs = 3; goto _test_eof;
3079
- _test_eof4: cs = 4; goto _test_eof;
3080
- _test_eof5: cs = 5; goto _test_eof;
3081
- _test_eof6: cs = 6; goto _test_eof;
3082
- _test_eof7: cs = 7; goto _test_eof;
3083
- _test_eof8: cs = 8; goto _test_eof;
3084
- _test_eof9: cs = 9; goto _test_eof;
3085
-
3086
- _test_eof: {}
3087
- _out: {}
3088
- }
3089
-
3090
- #line 1318 "parser.rl"
3091
-
3092
- if (json->stack_handle) {
3093
- rvalue_stack_eagerly_release(json->stack_handle);
3094
- }
3095
-
3096
- if (cs >= JSON_first_final && p == pe) {
3097
- return result;
3098
- } else {
3099
- raise_parse_error("unexpected token at '%s'", p);
3100
- return Qnil;
3101
- }
1275
+ return cParser_parse(config, Vsource);
3102
1276
  }
3103
1277
 
3104
- static void JSON_mark(void *ptr)
1278
+ static void JSON_ParserConfig_mark(void *ptr)
3105
1279
  {
3106
- JSON_Parser *json = ptr;
3107
- rb_gc_mark(json->Vsource);
3108
- rb_gc_mark(json->create_id);
3109
- rb_gc_mark(json->object_class);
3110
- rb_gc_mark(json->array_class);
3111
- rb_gc_mark(json->decimal_class);
3112
- rb_gc_mark(json->match_string);
3113
- rb_gc_mark(json->stack_handle);
3114
-
3115
- long index;
3116
- for (index = 0; index < json->name_cache.length; index++) {
3117
- rb_gc_mark(json->name_cache.entries[index]);
3118
- }
1280
+ JSON_ParserConfig *config = ptr;
1281
+ rb_gc_mark(config->on_load_proc);
1282
+ rb_gc_mark(config->decimal_class);
3119
1283
  }
3120
1284
 
3121
- static void JSON_free(void *ptr)
1285
+ static void JSON_ParserConfig_free(void *ptr)
3122
1286
  {
3123
- JSON_Parser *json = ptr;
3124
- fbuffer_free(&json->fbuffer);
3125
- ruby_xfree(json);
1287
+ JSON_ParserConfig *config = ptr;
1288
+ ruby_xfree(config);
3126
1289
  }
3127
1290
 
3128
- static size_t JSON_memsize(const void *ptr)
1291
+ static size_t JSON_ParserConfig_memsize(const void *ptr)
3129
1292
  {
3130
- const JSON_Parser *json = ptr;
3131
- return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer);
1293
+ return sizeof(JSON_ParserConfig);
3132
1294
  }
3133
1295
 
3134
- static const rb_data_type_t JSON_Parser_type = {
3135
- "JSON/Parser",
3136
- {JSON_mark, JSON_free, JSON_memsize,},
1296
+ static const rb_data_type_t JSON_ParserConfig_type = {
1297
+ "JSON::Ext::Parser/ParserConfig",
1298
+ {
1299
+ JSON_ParserConfig_mark,
1300
+ JSON_ParserConfig_free,
1301
+ JSON_ParserConfig_memsize,
1302
+ },
3137
1303
  0, 0,
3138
- RUBY_TYPED_FREE_IMMEDIATELY,
1304
+ RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
3139
1305
  };
3140
1306
 
3141
1307
  static VALUE cJSON_parser_s_allocate(VALUE klass)
3142
1308
  {
3143
- JSON_Parser *json;
3144
- VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
3145
- fbuffer_stack_init(&json->fbuffer, 0, NULL, 0);
3146
- return obj;
3147
- }
3148
-
3149
- /*
3150
- * call-seq: source()
3151
- *
3152
- * Returns a copy of the current _source_ string, that was used to construct
3153
- * this Parser.
3154
- */
3155
- static VALUE cParser_source(VALUE self)
3156
- {
3157
- GET_PARSER;
3158
- return rb_str_dup(json->Vsource);
1309
+ JSON_ParserConfig *config;
1310
+ return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
3159
1311
  }
3160
1312
 
3161
1313
  void Init_parser(void)
@@ -3167,15 +1319,15 @@ void Init_parser(void)
3167
1319
  #undef rb_intern
3168
1320
  rb_require("json/common");
3169
1321
  mJSON = rb_define_module("JSON");
3170
- mExt = rb_define_module_under(mJSON, "Ext");
3171
- cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
1322
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
1323
+ VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
3172
1324
  eNestingError = rb_path2class("JSON::NestingError");
3173
1325
  rb_gc_register_mark_object(eNestingError);
3174
- rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
3175
- rb_define_method(cParser, "initialize", cParser_initialize, -1);
3176
- rb_define_method(cParser, "parse", cParser_parse, 0);
3177
- rb_define_method(cParser, "source", cParser_source, 0);
1326
+ rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
1327
+ rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
1328
+ rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
3178
1329
 
1330
+ VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
3179
1331
  rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
3180
1332
 
3181
1333
  CNaN = rb_const_get(mJSON, rb_intern("NaN"));
@@ -3195,19 +1347,10 @@ void Init_parser(void)
3195
1347
  sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
3196
1348
  sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
3197
1349
  sym_freeze = ID2SYM(rb_intern("freeze"));
3198
- sym_create_additions = ID2SYM(rb_intern("create_additions"));
3199
- sym_create_id = ID2SYM(rb_intern("create_id"));
3200
- sym_object_class = ID2SYM(rb_intern("object_class"));
3201
- sym_array_class = ID2SYM(rb_intern("array_class"));
1350
+ sym_on_load = ID2SYM(rb_intern("on_load"));
3202
1351
  sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
3203
- sym_match_string = ID2SYM(rb_intern("match_string"));
3204
1352
 
3205
- i_create_id = rb_intern("create_id");
3206
- i_json_creatable_p = rb_intern("json_creatable?");
3207
- i_json_create = rb_intern("json_create");
3208
1353
  i_chr = rb_intern("chr");
3209
- i_match = rb_intern("match");
3210
- i_deep_const_get = rb_intern("deep_const_get");
3211
1354
  i_aset = rb_intern("[]=");
3212
1355
  i_aref = rb_intern("[]");
3213
1356
  i_leftshift = rb_intern("<<");
@@ -3220,11 +1363,3 @@ void Init_parser(void)
3220
1363
  utf8_encindex = rb_utf8_encindex();
3221
1364
  enc_utf8 = rb_utf8_encoding();
3222
1365
  }
3223
-
3224
- /*
3225
- * Local variables:
3226
- * mode: c
3227
- * c-file-style: ruby
3228
- * indent-tabs-mode: nil
3229
- * End:
3230
- */