json 2.7.2 → 2.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,28 +3,6 @@
3
3
  #include "../fbuffer/fbuffer.h"
4
4
  #include "parser.h"
5
5
 
6
- #if defined HAVE_RUBY_ENCODING_H
7
- # define EXC_ENCODING rb_utf8_encoding(),
8
- # ifndef HAVE_RB_ENC_RAISE
9
- static void
10
- enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
11
- {
12
- va_list args;
13
- VALUE mesg;
14
-
15
- va_start(args, fmt);
16
- mesg = rb_enc_vsprintf(enc, fmt, args);
17
- va_end(args);
18
-
19
- rb_exc_raise(rb_exc_new3(exc, mesg));
20
- }
21
- # define rb_enc_raise enc_raise
22
- # endif
23
- #else
24
- # define EXC_ENCODING /* nothing */
25
- # define rb_enc_raise rb_raise
26
- #endif
27
-
28
6
  /* unicode */
29
7
 
30
8
  static const signed char digit_values[256] = {
@@ -44,26 +22,28 @@ static const signed char digit_values[256] = {
44
22
  -1, -1, -1, -1, -1, -1, -1
45
23
  };
46
24
 
47
- static UTF32 unescape_unicode(const unsigned char *p)
25
+ static uint32_t unescape_unicode(const unsigned char *p)
48
26
  {
27
+ const uint32_t replacement_char = 0xFFFD;
28
+
49
29
  signed char b;
50
- UTF32 result = 0;
30
+ uint32_t result = 0;
51
31
  b = digit_values[p[0]];
52
- if (b < 0) return UNI_REPLACEMENT_CHAR;
32
+ if (b < 0) return replacement_char;
53
33
  result = (result << 4) | (unsigned char)b;
54
34
  b = digit_values[p[1]];
55
- if (b < 0) return UNI_REPLACEMENT_CHAR;
35
+ if (b < 0) return replacement_char;
56
36
  result = (result << 4) | (unsigned char)b;
57
37
  b = digit_values[p[2]];
58
- if (b < 0) return UNI_REPLACEMENT_CHAR;
38
+ if (b < 0) return replacement_char;
59
39
  result = (result << 4) | (unsigned char)b;
60
40
  b = digit_values[p[3]];
61
- if (b < 0) return UNI_REPLACEMENT_CHAR;
41
+ if (b < 0) return replacement_char;
62
42
  result = (result << 4) | (unsigned char)b;
63
43
  return result;
64
44
  }
65
45
 
66
- static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
46
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
67
47
  {
68
48
  int len = 1;
69
49
  if (ch <= 0x7F) {
@@ -89,21 +69,45 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
89
69
  return len;
90
70
  }
91
71
 
92
- static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
72
+ #define PARSE_ERROR_FRAGMENT_LEN 32
73
+ #ifdef RBIMPL_ATTR_NORETURN
74
+ RBIMPL_ATTR_NORETURN()
75
+ #endif
76
+ static void raise_parse_error(const char *format, const char *start)
77
+ {
78
+ char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
79
+
80
+ size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
81
+ const char *ptr = start;
82
+
83
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
84
+ MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
85
+ buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
86
+ ptr = buffer;
87
+ }
88
+
89
+ rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
90
+ }
91
+
92
+ static VALUE mJSON, mExt, cParser, eNestingError;
93
93
  static VALUE CNaN, CInfinity, CMinusInfinity;
94
94
 
95
95
  static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
96
96
  i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
97
- i_object_class, i_array_class, i_decimal_class, i_key_p,
97
+ i_object_class, i_array_class, i_decimal_class,
98
98
  i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
99
99
  i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
100
100
 
101
+ static int binary_encindex;
102
+ static int utf8_encindex;
103
+
101
104
 
102
- #line 125 "parser.rl"
103
105
 
106
+ #line 129 "parser.rl"
104
107
 
105
108
 
106
- #line 107 "parser.c"
109
+
110
+ #line 111 "parser.c"
107
111
  enum {JSON_object_start = 1};
108
112
  enum {JSON_object_first_final = 27};
109
113
  enum {JSON_object_error = 0};
@@ -111,7 +115,7 @@ enum {JSON_object_error = 0};
111
115
  enum {JSON_object_en_main = 1};
112
116
 
113
117
 
114
- #line 167 "parser.rl"
118
+ #line 171 "parser.rl"
115
119
 
116
120
 
117
121
  static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -127,14 +131,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
127
131
  *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
128
132
 
129
133
 
130
- #line 131 "parser.c"
134
+ #line 135 "parser.c"
131
135
  {
132
136
  cs = JSON_object_start;
133
137
  }
134
138
 
135
- #line 182 "parser.rl"
139
+ #line 186 "parser.rl"
136
140
 
137
- #line 138 "parser.c"
141
+ #line 142 "parser.c"
138
142
  {
139
143
  if ( p == pe )
140
144
  goto _test_eof;
@@ -162,7 +166,7 @@ case 2:
162
166
  goto st2;
163
167
  goto st0;
164
168
  tr2:
165
- #line 149 "parser.rl"
169
+ #line 153 "parser.rl"
166
170
  {
167
171
  char *np;
168
172
  json->parsing_name = 1;
@@ -175,7 +179,7 @@ st3:
175
179
  if ( ++p == pe )
176
180
  goto _test_eof3;
177
181
  case 3:
178
- #line 179 "parser.c"
182
+ #line 183 "parser.c"
179
183
  switch( (*p) ) {
180
184
  case 13: goto st3;
181
185
  case 32: goto st3;
@@ -242,7 +246,7 @@ case 8:
242
246
  goto st8;
243
247
  goto st0;
244
248
  tr11:
245
- #line 133 "parser.rl"
249
+ #line 137 "parser.rl"
246
250
  {
247
251
  VALUE v = Qnil;
248
252
  char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
@@ -263,7 +267,7 @@ st9:
263
267
  if ( ++p == pe )
264
268
  goto _test_eof9;
265
269
  case 9:
266
- #line 267 "parser.c"
270
+ #line 271 "parser.c"
267
271
  switch( (*p) ) {
268
272
  case 13: goto st9;
269
273
  case 32: goto st9;
@@ -352,14 +356,14 @@ case 18:
352
356
  goto st9;
353
357
  goto st18;
354
358
  tr4:
355
- #line 157 "parser.rl"
359
+ #line 161 "parser.rl"
356
360
  { p--; {p++; cs = 27; goto _out;} }
357
361
  goto st27;
358
362
  st27:
359
363
  if ( ++p == pe )
360
364
  goto _test_eof27;
361
365
  case 27:
362
- #line 363 "parser.c"
366
+ #line 367 "parser.c"
363
367
  goto st0;
364
368
  st19:
365
369
  if ( ++p == pe )
@@ -457,7 +461,7 @@ case 26:
457
461
  _out: {}
458
462
  }
459
463
 
460
- #line 183 "parser.rl"
464
+ #line 187 "parser.rl"
461
465
 
462
466
  if (cs >= JSON_object_first_final) {
463
467
  if (json->create_additions) {
@@ -482,7 +486,7 @@ case 26:
482
486
 
483
487
 
484
488
 
485
- #line 486 "parser.c"
489
+ #line 490 "parser.c"
486
490
  enum {JSON_value_start = 1};
487
491
  enum {JSON_value_first_final = 29};
488
492
  enum {JSON_value_error = 0};
@@ -490,7 +494,7 @@ enum {JSON_value_error = 0};
490
494
  enum {JSON_value_en_main = 1};
491
495
 
492
496
 
493
- #line 283 "parser.rl"
497
+ #line 287 "parser.rl"
494
498
 
495
499
 
496
500
  static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -498,14 +502,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
498
502
  int cs = EVIL;
499
503
 
500
504
 
501
- #line 502 "parser.c"
505
+ #line 506 "parser.c"
502
506
  {
503
507
  cs = JSON_value_start;
504
508
  }
505
509
 
506
- #line 290 "parser.rl"
510
+ #line 294 "parser.rl"
507
511
 
508
- #line 509 "parser.c"
512
+ #line 513 "parser.c"
509
513
  {
510
514
  if ( p == pe )
511
515
  goto _test_eof;
@@ -539,14 +543,14 @@ st0:
539
543
  cs = 0;
540
544
  goto _out;
541
545
  tr2:
542
- #line 235 "parser.rl"
546
+ #line 239 "parser.rl"
543
547
  {
544
548
  char *np = JSON_parse_string(json, p, pe, result);
545
549
  if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
546
550
  }
547
551
  goto st29;
548
552
  tr3:
549
- #line 240 "parser.rl"
553
+ #line 244 "parser.rl"
550
554
  {
551
555
  char *np;
552
556
  if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
@@ -555,7 +559,7 @@ tr3:
555
559
  {p = (( p + 10))-1;}
556
560
  p--; {p++; cs = 29; goto _out;}
557
561
  } else {
558
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
562
+ raise_parse_error("unexpected token at '%s'", p);
559
563
  }
560
564
  }
561
565
  np = JSON_parse_float(json, p, pe, result);
@@ -566,7 +570,7 @@ tr3:
566
570
  }
567
571
  goto st29;
568
572
  tr7:
569
- #line 258 "parser.rl"
573
+ #line 262 "parser.rl"
570
574
  {
571
575
  char *np;
572
576
  np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
@@ -574,7 +578,7 @@ tr7:
574
578
  }
575
579
  goto st29;
576
580
  tr11:
577
- #line 264 "parser.rl"
581
+ #line 268 "parser.rl"
578
582
  {
579
583
  char *np;
580
584
  np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
@@ -582,39 +586,39 @@ tr11:
582
586
  }
583
587
  goto st29;
584
588
  tr25:
585
- #line 228 "parser.rl"
589
+ #line 232 "parser.rl"
586
590
  {
587
591
  if (json->allow_nan) {
588
592
  *result = CInfinity;
589
593
  } else {
590
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 7);
594
+ raise_parse_error("unexpected token at '%s'", p - 7);
591
595
  }
592
596
  }
593
597
  goto st29;
594
598
  tr27:
595
- #line 221 "parser.rl"
599
+ #line 225 "parser.rl"
596
600
  {
597
601
  if (json->allow_nan) {
598
602
  *result = CNaN;
599
603
  } else {
600
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 2);
604
+ raise_parse_error("unexpected token at '%s'", p - 2);
601
605
  }
602
606
  }
603
607
  goto st29;
604
608
  tr31:
605
- #line 215 "parser.rl"
609
+ #line 219 "parser.rl"
606
610
  {
607
611
  *result = Qfalse;
608
612
  }
609
613
  goto st29;
610
614
  tr34:
611
- #line 212 "parser.rl"
615
+ #line 216 "parser.rl"
612
616
  {
613
617
  *result = Qnil;
614
618
  }
615
619
  goto st29;
616
620
  tr37:
617
- #line 218 "parser.rl"
621
+ #line 222 "parser.rl"
618
622
  {
619
623
  *result = Qtrue;
620
624
  }
@@ -623,9 +627,9 @@ st29:
623
627
  if ( ++p == pe )
624
628
  goto _test_eof29;
625
629
  case 29:
626
- #line 270 "parser.rl"
630
+ #line 274 "parser.rl"
627
631
  { p--; {p++; cs = 29; goto _out;} }
628
- #line 629 "parser.c"
632
+ #line 633 "parser.c"
629
633
  switch( (*p) ) {
630
634
  case 13: goto st29;
631
635
  case 32: goto st29;
@@ -866,7 +870,7 @@ case 28:
866
870
  _out: {}
867
871
  }
868
872
 
869
- #line 291 "parser.rl"
873
+ #line 295 "parser.rl"
870
874
 
871
875
  if (json->freeze) {
872
876
  OBJ_FREEZE(*result);
@@ -880,7 +884,7 @@ case 28:
880
884
  }
881
885
 
882
886
 
883
- #line 884 "parser.c"
887
+ #line 888 "parser.c"
884
888
  enum {JSON_integer_start = 1};
885
889
  enum {JSON_integer_first_final = 3};
886
890
  enum {JSON_integer_error = 0};
@@ -888,7 +892,7 @@ enum {JSON_integer_error = 0};
888
892
  enum {JSON_integer_en_main = 1};
889
893
 
890
894
 
891
- #line 311 "parser.rl"
895
+ #line 315 "parser.rl"
892
896
 
893
897
 
894
898
  static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -896,15 +900,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
896
900
  int cs = EVIL;
897
901
 
898
902
 
899
- #line 900 "parser.c"
903
+ #line 904 "parser.c"
900
904
  {
901
905
  cs = JSON_integer_start;
902
906
  }
903
907
 
904
- #line 318 "parser.rl"
908
+ #line 322 "parser.rl"
905
909
  json->memo = p;
906
910
 
907
- #line 908 "parser.c"
911
+ #line 912 "parser.c"
908
912
  {
909
913
  if ( p == pe )
910
914
  goto _test_eof;
@@ -938,14 +942,14 @@ case 3:
938
942
  goto st0;
939
943
  goto tr4;
940
944
  tr4:
941
- #line 308 "parser.rl"
945
+ #line 312 "parser.rl"
942
946
  { p--; {p++; cs = 4; goto _out;} }
943
947
  goto st4;
944
948
  st4:
945
949
  if ( ++p == pe )
946
950
  goto _test_eof4;
947
951
  case 4:
948
- #line 949 "parser.c"
952
+ #line 953 "parser.c"
949
953
  goto st0;
950
954
  st5:
951
955
  if ( ++p == pe )
@@ -964,7 +968,7 @@ case 5:
964
968
  _out: {}
965
969
  }
966
970
 
967
- #line 320 "parser.rl"
971
+ #line 324 "parser.rl"
968
972
 
969
973
  if (cs >= JSON_integer_first_final) {
970
974
  long len = p - json->memo;
@@ -979,7 +983,7 @@ case 5:
979
983
  }
980
984
 
981
985
 
982
- #line 983 "parser.c"
986
+ #line 987 "parser.c"
983
987
  enum {JSON_float_start = 1};
984
988
  enum {JSON_float_first_final = 8};
985
989
  enum {JSON_float_error = 0};
@@ -987,7 +991,7 @@ enum {JSON_float_error = 0};
987
991
  enum {JSON_float_en_main = 1};
988
992
 
989
993
 
990
- #line 345 "parser.rl"
994
+ #line 349 "parser.rl"
991
995
 
992
996
 
993
997
  static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -995,15 +999,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
995
999
  int cs = EVIL;
996
1000
 
997
1001
 
998
- #line 999 "parser.c"
1002
+ #line 1003 "parser.c"
999
1003
  {
1000
1004
  cs = JSON_float_start;
1001
1005
  }
1002
1006
 
1003
- #line 352 "parser.rl"
1007
+ #line 356 "parser.rl"
1004
1008
  json->memo = p;
1005
1009
 
1006
- #line 1007 "parser.c"
1010
+ #line 1011 "parser.c"
1007
1011
  {
1008
1012
  if ( p == pe )
1009
1013
  goto _test_eof;
@@ -1061,14 +1065,14 @@ case 8:
1061
1065
  goto st0;
1062
1066
  goto tr9;
1063
1067
  tr9:
1064
- #line 339 "parser.rl"
1068
+ #line 343 "parser.rl"
1065
1069
  { p--; {p++; cs = 9; goto _out;} }
1066
1070
  goto st9;
1067
1071
  st9:
1068
1072
  if ( ++p == pe )
1069
1073
  goto _test_eof9;
1070
1074
  case 9:
1071
- #line 1072 "parser.c"
1075
+ #line 1076 "parser.c"
1072
1076
  goto st0;
1073
1077
  st5:
1074
1078
  if ( ++p == pe )
@@ -1129,34 +1133,36 @@ case 7:
1129
1133
  _out: {}
1130
1134
  }
1131
1135
 
1132
- #line 354 "parser.rl"
1136
+ #line 358 "parser.rl"
1133
1137
 
1134
1138
  if (cs >= JSON_float_first_final) {
1135
1139
  VALUE mod = Qnil;
1136
1140
  ID method_id = 0;
1137
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
1138
- mod = json->decimal_class;
1139
- method_id = i_try_convert;
1140
- } else if (rb_respond_to(json->decimal_class, i_new)) {
1141
- mod = json->decimal_class;
1142
- method_id = i_new;
1143
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
1144
- VALUE name = rb_class_name(json->decimal_class);
1145
- const char *name_cstr = RSTRING_PTR(name);
1146
- const char *last_colon = strrchr(name_cstr, ':');
1147
- if (last_colon) {
1148
- const char *mod_path_end = last_colon - 1;
1149
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1150
- mod = rb_path_to_class(mod_path);
1151
-
1152
- const char *method_name_beg = last_colon + 1;
1153
- long before_len = method_name_beg - name_cstr;
1154
- long len = RSTRING_LEN(name) - before_len;
1155
- VALUE method_name = rb_str_substr(name, before_len, len);
1156
- method_id = SYM2ID(rb_str_intern(method_name));
1157
- } else {
1158
- mod = rb_mKernel;
1159
- method_id = SYM2ID(rb_str_intern(name));
1141
+ if (!NIL_P(json->decimal_class)) {
1142
+ if (rb_respond_to(json->decimal_class, i_try_convert)) {
1143
+ mod = json->decimal_class;
1144
+ method_id = i_try_convert;
1145
+ } else if (rb_respond_to(json->decimal_class, i_new)) {
1146
+ mod = json->decimal_class;
1147
+ method_id = i_new;
1148
+ } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
1149
+ VALUE name = rb_class_name(json->decimal_class);
1150
+ const char *name_cstr = RSTRING_PTR(name);
1151
+ const char *last_colon = strrchr(name_cstr, ':');
1152
+ if (last_colon) {
1153
+ const char *mod_path_end = last_colon - 1;
1154
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1155
+ mod = rb_path_to_class(mod_path);
1156
+
1157
+ const char *method_name_beg = last_colon + 1;
1158
+ long before_len = method_name_beg - name_cstr;
1159
+ long len = RSTRING_LEN(name) - before_len;
1160
+ VALUE method_name = rb_str_substr(name, before_len, len);
1161
+ method_id = SYM2ID(rb_str_intern(method_name));
1162
+ } else {
1163
+ mod = rb_mKernel;
1164
+ method_id = SYM2ID(rb_str_intern(name));
1165
+ }
1160
1166
  }
1161
1167
  }
1162
1168
 
@@ -1180,7 +1186,7 @@ case 7:
1180
1186
 
1181
1187
 
1182
1188
 
1183
- #line 1184 "parser.c"
1189
+ #line 1190 "parser.c"
1184
1190
  enum {JSON_array_start = 1};
1185
1191
  enum {JSON_array_first_final = 17};
1186
1192
  enum {JSON_array_error = 0};
@@ -1188,7 +1194,7 @@ enum {JSON_array_error = 0};
1188
1194
  enum {JSON_array_en_main = 1};
1189
1195
 
1190
1196
 
1191
- #line 432 "parser.rl"
1197
+ #line 438 "parser.rl"
1192
1198
 
1193
1199
 
1194
1200
  static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -1202,14 +1208,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
1202
1208
  *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
1203
1209
 
1204
1210
 
1205
- #line 1206 "parser.c"
1211
+ #line 1212 "parser.c"
1206
1212
  {
1207
1213
  cs = JSON_array_start;
1208
1214
  }
1209
1215
 
1210
- #line 445 "parser.rl"
1216
+ #line 451 "parser.rl"
1211
1217
 
1212
- #line 1213 "parser.c"
1218
+ #line 1219 "parser.c"
1213
1219
  {
1214
1220
  if ( p == pe )
1215
1221
  goto _test_eof;
@@ -1248,7 +1254,7 @@ case 2:
1248
1254
  goto st2;
1249
1255
  goto st0;
1250
1256
  tr2:
1251
- #line 409 "parser.rl"
1257
+ #line 415 "parser.rl"
1252
1258
  {
1253
1259
  VALUE v = Qnil;
1254
1260
  char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
@@ -1268,7 +1274,7 @@ st3:
1268
1274
  if ( ++p == pe )
1269
1275
  goto _test_eof3;
1270
1276
  case 3:
1271
- #line 1272 "parser.c"
1277
+ #line 1278 "parser.c"
1272
1278
  switch( (*p) ) {
1273
1279
  case 13: goto st3;
1274
1280
  case 32: goto st3;
@@ -1368,14 +1374,14 @@ case 12:
1368
1374
  goto st3;
1369
1375
  goto st12;
1370
1376
  tr4:
1371
- #line 424 "parser.rl"
1377
+ #line 430 "parser.rl"
1372
1378
  { p--; {p++; cs = 17; goto _out;} }
1373
1379
  goto st17;
1374
1380
  st17:
1375
1381
  if ( ++p == pe )
1376
1382
  goto _test_eof17;
1377
1383
  case 17:
1378
- #line 1379 "parser.c"
1384
+ #line 1385 "parser.c"
1379
1385
  goto st0;
1380
1386
  st13:
1381
1387
  if ( ++p == pe )
@@ -1431,12 +1437,12 @@ case 16:
1431
1437
  _out: {}
1432
1438
  }
1433
1439
 
1434
- #line 446 "parser.rl"
1440
+ #line 452 "parser.rl"
1435
1441
 
1436
1442
  if(cs >= JSON_array_first_final) {
1437
1443
  return p + 1;
1438
1444
  } else {
1439
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
1445
+ raise_parse_error("unexpected token at '%s'", p);
1440
1446
  return NULL;
1441
1447
  }
1442
1448
  }
@@ -1499,26 +1505,30 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
1499
1505
  if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1500
1506
  ruby_xfree(bufferStart);
1501
1507
  }
1502
- rb_enc_raise(
1503
- EXC_ENCODING eParserError,
1504
- "incomplete unicode character escape sequence at '%s'", p
1505
- );
1508
+ raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
1506
1509
  } else {
1507
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
1510
+ uint32_t ch = unescape_unicode((unsigned char *) ++pe);
1508
1511
  pe += 3;
1509
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
1512
+ /* To handle values above U+FFFF, we take a sequence of
1513
+ * \uXXXX escapes in the U+D800..U+DBFF then
1514
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
1515
+ * to make a 20-bit number, then add 0x10000 to get the
1516
+ * final codepoint.
1517
+ *
1518
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
1519
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
1520
+ * Area".
1521
+ */
1522
+ if ((ch & 0xFC00) == 0xD800) {
1510
1523
  pe++;
1511
1524
  if (pe > stringEnd - 6) {
1512
1525
  if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1513
1526
  ruby_xfree(bufferStart);
1514
1527
  }
1515
- rb_enc_raise(
1516
- EXC_ENCODING eParserError,
1517
- "incomplete surrogate pair at '%s'", p
1518
- );
1528
+ raise_parse_error("incomplete surrogate pair at '%s'", p);
1519
1529
  }
1520
1530
  if (pe[0] == '\\' && pe[1] == 'u') {
1521
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
1531
+ uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
1522
1532
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
1523
1533
  | (sur & 0x3FF));
1524
1534
  pe += 5;
@@ -1588,7 +1598,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
1588
1598
  }
1589
1599
 
1590
1600
 
1591
- #line 1592 "parser.c"
1601
+ #line 1602 "parser.c"
1592
1602
  enum {JSON_string_start = 1};
1593
1603
  enum {JSON_string_first_final = 8};
1594
1604
  enum {JSON_string_error = 0};
@@ -1596,7 +1606,7 @@ enum {JSON_string_error = 0};
1596
1606
  enum {JSON_string_en_main = 1};
1597
1607
 
1598
1608
 
1599
- #line 620 "parser.rl"
1609
+ #line 630 "parser.rl"
1600
1610
 
1601
1611
 
1602
1612
  static int
@@ -1617,15 +1627,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
1617
1627
  VALUE match_string;
1618
1628
 
1619
1629
 
1620
- #line 1621 "parser.c"
1630
+ #line 1631 "parser.c"
1621
1631
  {
1622
1632
  cs = JSON_string_start;
1623
1633
  }
1624
1634
 
1625
- #line 640 "parser.rl"
1635
+ #line 650 "parser.rl"
1626
1636
  json->memo = p;
1627
1637
 
1628
- #line 1629 "parser.c"
1638
+ #line 1639 "parser.c"
1629
1639
  {
1630
1640
  if ( p == pe )
1631
1641
  goto _test_eof;
@@ -1650,7 +1660,7 @@ case 2:
1650
1660
  goto st0;
1651
1661
  goto st2;
1652
1662
  tr2:
1653
- #line 607 "parser.rl"
1663
+ #line 617 "parser.rl"
1654
1664
  {
1655
1665
  *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1656
1666
  if (NIL_P(*result)) {
@@ -1660,14 +1670,14 @@ tr2:
1660
1670
  {p = (( p + 1))-1;}
1661
1671
  }
1662
1672
  }
1663
- #line 617 "parser.rl"
1673
+ #line 627 "parser.rl"
1664
1674
  { p--; {p++; cs = 8; goto _out;} }
1665
1675
  goto st8;
1666
1676
  st8:
1667
1677
  if ( ++p == pe )
1668
1678
  goto _test_eof8;
1669
1679
  case 8:
1670
- #line 1671 "parser.c"
1680
+ #line 1681 "parser.c"
1671
1681
  goto st0;
1672
1682
  st3:
1673
1683
  if ( ++p == pe )
@@ -1743,7 +1753,7 @@ case 7:
1743
1753
  _out: {}
1744
1754
  }
1745
1755
 
1746
- #line 642 "parser.rl"
1756
+ #line 652 "parser.rl"
1747
1757
 
1748
1758
  if (json->create_additions && RTEST(match_string = json->match_string)) {
1749
1759
  VALUE klass;
@@ -1777,18 +1787,20 @@ case 7:
1777
1787
 
1778
1788
  static VALUE convert_encoding(VALUE source)
1779
1789
  {
1780
- #ifdef HAVE_RUBY_ENCODING_H
1781
- rb_encoding *enc = rb_enc_get(source);
1782
- if (enc == rb_ascii8bit_encoding()) {
1783
- if (OBJ_FROZEN(source)) {
1784
- source = rb_str_dup(source);
1785
- }
1786
- FORCE_UTF8(source);
1787
- } else {
1788
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
1789
- }
1790
- #endif
1790
+ int encindex = RB_ENCODING_GET(source);
1791
+
1792
+ if (encindex == utf8_encindex) {
1791
1793
  return source;
1794
+ }
1795
+
1796
+ if (encindex == binary_encindex) {
1797
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1798
+ // TODO: Deprecate in 2.8.0
1799
+ // TODO: Remove in 3.0.0
1800
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1801
+ }
1802
+
1803
+ return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());
1792
1804
  }
1793
1805
 
1794
1806
  /*
@@ -1813,8 +1825,15 @@ static VALUE convert_encoding(VALUE source)
1813
1825
  * * *create_additions*: If set to false, the Parser doesn't create
1814
1826
  * additions even if a matching class and create_id was found. This option
1815
1827
  * defaults to false.
1816
- * * *object_class*: Defaults to Hash
1817
- * * *array_class*: Defaults to Array
1828
+ * * *object_class*: Defaults to Hash. If another type is provided, it will be used
1829
+ * instead of Hash to represent JSON objects. The type must respond to
1830
+ * +new+ without arguments, and return an object that respond to +[]=+.
1831
+ * * *array_class*: Defaults to Array If another type is provided, it will be used
1832
+ * instead of Hash to represent JSON arrays. The type must respond to
1833
+ * +new+ without arguments, and return an object that respond to +<<+.
1834
+ * * *decimal_class*: Specifies which class to use instead of the default
1835
+ * (Float) when parsing decimal numbers. This class must accept a single
1836
+ * string argument in its constructor.
1818
1837
  */
1819
1838
  static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1820
1839
  {
@@ -1824,80 +1843,91 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1824
1843
  if (json->Vsource) {
1825
1844
  rb_raise(rb_eTypeError, "already initialized instance");
1826
1845
  }
1827
- rb_scan_args(argc, argv, "1:", &source, &opts);
1846
+
1847
+ rb_check_arity(argc, 1, 2);
1848
+ source = argv[0];
1849
+ opts = Qnil;
1850
+ if (argc == 2) {
1851
+ opts = argv[1];
1852
+ Check_Type(argv[1], T_HASH);
1853
+ if (RHASH_SIZE(argv[1]) > 0) {
1854
+ opts = argv[1];
1855
+ }
1856
+ }
1857
+
1828
1858
  if (!NIL_P(opts)) {
1829
- VALUE tmp = ID2SYM(i_max_nesting);
1830
- if (option_given_p(opts, tmp)) {
1831
- VALUE max_nesting = rb_hash_aref(opts, tmp);
1832
- if (RTEST(max_nesting)) {
1833
- Check_Type(max_nesting, T_FIXNUM);
1834
- json->max_nesting = FIX2INT(max_nesting);
1835
- } else {
1836
- json->max_nesting = 0;
1837
- }
1838
- } else {
1839
- json->max_nesting = 100;
1840
- }
1841
- tmp = ID2SYM(i_allow_nan);
1842
- if (option_given_p(opts, tmp)) {
1843
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1844
- } else {
1845
- json->allow_nan = 0;
1846
- }
1847
- tmp = ID2SYM(i_symbolize_names);
1848
- if (option_given_p(opts, tmp)) {
1849
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1850
- } else {
1851
- json->symbolize_names = 0;
1852
- }
1853
- tmp = ID2SYM(i_freeze);
1854
- if (option_given_p(opts, tmp)) {
1855
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1856
- } else {
1857
- json->freeze = 0;
1858
- }
1859
- tmp = ID2SYM(i_create_additions);
1860
- if (option_given_p(opts, tmp)) {
1861
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
1862
- } else {
1863
- json->create_additions = 0;
1864
- }
1865
- if (json->symbolize_names && json->create_additions) {
1866
- rb_raise(rb_eArgError,
1867
- "options :symbolize_names and :create_additions cannot be "
1868
- " used in conjunction");
1869
- }
1870
- tmp = ID2SYM(i_create_id);
1871
- if (option_given_p(opts, tmp)) {
1872
- json->create_id = rb_hash_aref(opts, tmp);
1873
- } else {
1874
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
1875
- }
1876
- tmp = ID2SYM(i_object_class);
1877
- if (option_given_p(opts, tmp)) {
1878
- json->object_class = rb_hash_aref(opts, tmp);
1879
- } else {
1880
- json->object_class = Qnil;
1881
- }
1882
- tmp = ID2SYM(i_array_class);
1883
- if (option_given_p(opts, tmp)) {
1884
- json->array_class = rb_hash_aref(opts, tmp);
1885
- } else {
1886
- json->array_class = Qnil;
1887
- }
1888
- tmp = ID2SYM(i_decimal_class);
1889
- if (option_given_p(opts, tmp)) {
1890
- json->decimal_class = rb_hash_aref(opts, tmp);
1891
- } else {
1892
- json->decimal_class = Qnil;
1893
- }
1894
- tmp = ID2SYM(i_match_string);
1895
- if (option_given_p(opts, tmp)) {
1896
- VALUE match_string = rb_hash_aref(opts, tmp);
1897
- json->match_string = RTEST(match_string) ? match_string : Qnil;
1898
- } else {
1899
- json->match_string = Qnil;
1900
- }
1859
+ VALUE tmp = ID2SYM(i_max_nesting);
1860
+ if (option_given_p(opts, tmp)) {
1861
+ VALUE max_nesting = rb_hash_aref(opts, tmp);
1862
+ if (RTEST(max_nesting)) {
1863
+ Check_Type(max_nesting, T_FIXNUM);
1864
+ json->max_nesting = FIX2INT(max_nesting);
1865
+ } else {
1866
+ json->max_nesting = 0;
1867
+ }
1868
+ } else {
1869
+ json->max_nesting = 100;
1870
+ }
1871
+ tmp = ID2SYM(i_allow_nan);
1872
+ if (option_given_p(opts, tmp)) {
1873
+ json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1874
+ } else {
1875
+ json->allow_nan = 0;
1876
+ }
1877
+ tmp = ID2SYM(i_symbolize_names);
1878
+ if (option_given_p(opts, tmp)) {
1879
+ json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1880
+ } else {
1881
+ json->symbolize_names = 0;
1882
+ }
1883
+ tmp = ID2SYM(i_freeze);
1884
+ if (option_given_p(opts, tmp)) {
1885
+ json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1886
+ } else {
1887
+ json->freeze = 0;
1888
+ }
1889
+ tmp = ID2SYM(i_create_additions);
1890
+ if (option_given_p(opts, tmp)) {
1891
+ json->create_additions = RTEST(rb_hash_aref(opts, tmp));
1892
+ } else {
1893
+ json->create_additions = 0;
1894
+ }
1895
+ if (json->symbolize_names && json->create_additions) {
1896
+ rb_raise(rb_eArgError,
1897
+ "options :symbolize_names and :create_additions cannot be "
1898
+ " used in conjunction");
1899
+ }
1900
+ tmp = ID2SYM(i_create_id);
1901
+ if (option_given_p(opts, tmp)) {
1902
+ json->create_id = rb_hash_aref(opts, tmp);
1903
+ } else {
1904
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
1905
+ }
1906
+ tmp = ID2SYM(i_object_class);
1907
+ if (option_given_p(opts, tmp)) {
1908
+ json->object_class = rb_hash_aref(opts, tmp);
1909
+ } else {
1910
+ json->object_class = Qnil;
1911
+ }
1912
+ tmp = ID2SYM(i_array_class);
1913
+ if (option_given_p(opts, tmp)) {
1914
+ json->array_class = rb_hash_aref(opts, tmp);
1915
+ } else {
1916
+ json->array_class = Qnil;
1917
+ }
1918
+ tmp = ID2SYM(i_decimal_class);
1919
+ if (option_given_p(opts, tmp)) {
1920
+ json->decimal_class = rb_hash_aref(opts, tmp);
1921
+ } else {
1922
+ json->decimal_class = Qnil;
1923
+ }
1924
+ tmp = ID2SYM(i_match_string);
1925
+ if (option_given_p(opts, tmp)) {
1926
+ VALUE match_string = rb_hash_aref(opts, tmp);
1927
+ json->match_string = RTEST(match_string) ? match_string : Qnil;
1928
+ } else {
1929
+ json->match_string = Qnil;
1930
+ }
1901
1931
  } else {
1902
1932
  json->max_nesting = 100;
1903
1933
  json->allow_nan = 0;
@@ -1916,7 +1946,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1916
1946
  }
1917
1947
 
1918
1948
 
1919
- #line 1920 "parser.c"
1949
+ #line 1950 "parser.c"
1920
1950
  enum {JSON_start = 1};
1921
1951
  enum {JSON_first_final = 10};
1922
1952
  enum {JSON_error = 0};
@@ -1924,7 +1954,7 @@ enum {JSON_error = 0};
1924
1954
  enum {JSON_en_main = 1};
1925
1955
 
1926
1956
 
1927
- #line 828 "parser.rl"
1957
+ #line 858 "parser.rl"
1928
1958
 
1929
1959
 
1930
1960
  /*
@@ -1936,22 +1966,22 @@ enum {JSON_en_main = 1};
1936
1966
  */
1937
1967
  static VALUE cParser_parse(VALUE self)
1938
1968
  {
1939
- char *p, *pe;
1940
- int cs = EVIL;
1941
- VALUE result = Qnil;
1942
- GET_PARSER;
1969
+ char *p, *pe;
1970
+ int cs = EVIL;
1971
+ VALUE result = Qnil;
1972
+ GET_PARSER;
1943
1973
 
1944
1974
 
1945
- #line 1946 "parser.c"
1975
+ #line 1976 "parser.c"
1946
1976
  {
1947
1977
  cs = JSON_start;
1948
1978
  }
1949
1979
 
1950
- #line 845 "parser.rl"
1951
- p = json->source;
1952
- pe = p + json->len;
1980
+ #line 875 "parser.rl"
1981
+ p = json->source;
1982
+ pe = p + json->len;
1953
1983
 
1954
- #line 1955 "parser.c"
1984
+ #line 1985 "parser.c"
1955
1985
  {
1956
1986
  if ( p == pe )
1957
1987
  goto _test_eof;
@@ -1985,7 +2015,7 @@ st0:
1985
2015
  cs = 0;
1986
2016
  goto _out;
1987
2017
  tr2:
1988
- #line 820 "parser.rl"
2018
+ #line 850 "parser.rl"
1989
2019
  {
1990
2020
  char *np = JSON_parse_value(json, p, pe, &result, 0);
1991
2021
  if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -1995,7 +2025,7 @@ st10:
1995
2025
  if ( ++p == pe )
1996
2026
  goto _test_eof10;
1997
2027
  case 10:
1998
- #line 1999 "parser.c"
2028
+ #line 2029 "parser.c"
1999
2029
  switch( (*p) ) {
2000
2030
  case 13: goto st10;
2001
2031
  case 32: goto st10;
@@ -2084,25 +2114,25 @@ case 9:
2084
2114
  _out: {}
2085
2115
  }
2086
2116
 
2087
- #line 848 "parser.rl"
2117
+ #line 878 "parser.rl"
2088
2118
 
2089
- if (cs >= JSON_first_final && p == pe) {
2090
- return result;
2091
- } else {
2092
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
2093
- return Qnil;
2094
- }
2119
+ if (cs >= JSON_first_final && p == pe) {
2120
+ return result;
2121
+ } else {
2122
+ raise_parse_error("unexpected token at '%s'", p);
2123
+ return Qnil;
2124
+ }
2095
2125
  }
2096
2126
 
2097
2127
  static void JSON_mark(void *ptr)
2098
2128
  {
2099
2129
  JSON_Parser *json = ptr;
2100
- rb_gc_mark_maybe(json->Vsource);
2101
- rb_gc_mark_maybe(json->create_id);
2102
- rb_gc_mark_maybe(json->object_class);
2103
- rb_gc_mark_maybe(json->array_class);
2104
- rb_gc_mark_maybe(json->decimal_class);
2105
- rb_gc_mark_maybe(json->match_string);
2130
+ rb_gc_mark(json->Vsource);
2131
+ rb_gc_mark(json->create_id);
2132
+ rb_gc_mark(json->object_class);
2133
+ rb_gc_mark(json->array_class);
2134
+ rb_gc_mark(json->decimal_class);
2135
+ rb_gc_mark(json->match_string);
2106
2136
  }
2107
2137
 
2108
2138
  static void JSON_free(void *ptr)
@@ -2118,16 +2148,12 @@ static size_t JSON_memsize(const void *ptr)
2118
2148
  return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
2119
2149
  }
2120
2150
 
2121
- #ifdef NEW_TYPEDDATA_WRAPPER
2122
2151
  static const rb_data_type_t JSON_Parser_type = {
2123
2152
  "JSON/Parser",
2124
2153
  {JSON_mark, JSON_free, JSON_memsize,},
2125
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
2126
2154
  0, 0,
2127
2155
  RUBY_TYPED_FREE_IMMEDIATELY,
2128
- #endif
2129
2156
  };
2130
- #endif
2131
2157
 
2132
2158
  static VALUE cJSON_parser_s_allocate(VALUE klass)
2133
2159
  {
@@ -2160,9 +2186,7 @@ void Init_parser(void)
2160
2186
  mJSON = rb_define_module("JSON");
2161
2187
  mExt = rb_define_module_under(mJSON, "Ext");
2162
2188
  cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
2163
- eParserError = rb_path2class("JSON::ParserError");
2164
2189
  eNestingError = rb_path2class("JSON::NestingError");
2165
- rb_gc_register_mark_object(eParserError);
2166
2190
  rb_gc_register_mark_object(eNestingError);
2167
2191
  rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
2168
2192
  rb_define_method(cParser, "initialize", cParser_initialize, -1);
@@ -2191,7 +2215,6 @@ void Init_parser(void)
2191
2215
  i_decimal_class = rb_intern("decimal_class");
2192
2216
  i_match = rb_intern("match");
2193
2217
  i_match_string = rb_intern("match_string");
2194
- i_key_p = rb_intern("key?");
2195
2218
  i_deep_const_get = rb_intern("deep_const_get");
2196
2219
  i_aset = rb_intern("[]=");
2197
2220
  i_aref = rb_intern("[]");
@@ -2200,6 +2223,9 @@ void Init_parser(void)
2200
2223
  i_try_convert = rb_intern("try_convert");
2201
2224
  i_freeze = rb_intern("freeze");
2202
2225
  i_uminus = rb_intern("-@");
2226
+
2227
+ binary_encindex = rb_ascii8bit_encindex();
2228
+ utf8_encindex = rb_utf8_encindex();
2203
2229
  }
2204
2230
 
2205
2231
  /*