json 2.7.2 → 2.7.3.rc1

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,28 +3,6 @@
3
3
  #include "../fbuffer/fbuffer.h"
4
4
  #include "parser.h"
5
5
 
6
- #if defined HAVE_RUBY_ENCODING_H
7
- # define EXC_ENCODING rb_utf8_encoding(),
8
- # ifndef HAVE_RB_ENC_RAISE
9
- static void
10
- enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
11
- {
12
- va_list args;
13
- VALUE mesg;
14
-
15
- va_start(args, fmt);
16
- mesg = rb_enc_vsprintf(enc, fmt, args);
17
- va_end(args);
18
-
19
- rb_exc_raise(rb_exc_new3(exc, mesg));
20
- }
21
- # define rb_enc_raise enc_raise
22
- # endif
23
- #else
24
- # define EXC_ENCODING /* nothing */
25
- # define rb_enc_raise rb_raise
26
- #endif
27
-
28
6
  /* unicode */
29
7
 
30
8
  static const signed char digit_values[256] = {
@@ -44,26 +22,28 @@ static const signed char digit_values[256] = {
44
22
  -1, -1, -1, -1, -1, -1, -1
45
23
  };
46
24
 
47
- static UTF32 unescape_unicode(const unsigned char *p)
25
+ static uint32_t unescape_unicode(const unsigned char *p)
48
26
  {
27
+ const uint32_t replacement_char = 0xFFFD;
28
+
49
29
  signed char b;
50
- UTF32 result = 0;
30
+ uint32_t result = 0;
51
31
  b = digit_values[p[0]];
52
- if (b < 0) return UNI_REPLACEMENT_CHAR;
32
+ if (b < 0) return replacement_char;
53
33
  result = (result << 4) | (unsigned char)b;
54
34
  b = digit_values[p[1]];
55
- if (b < 0) return UNI_REPLACEMENT_CHAR;
35
+ if (b < 0) return replacement_char;
56
36
  result = (result << 4) | (unsigned char)b;
57
37
  b = digit_values[p[2]];
58
- if (b < 0) return UNI_REPLACEMENT_CHAR;
38
+ if (b < 0) return replacement_char;
59
39
  result = (result << 4) | (unsigned char)b;
60
40
  b = digit_values[p[3]];
61
- if (b < 0) return UNI_REPLACEMENT_CHAR;
41
+ if (b < 0) return replacement_char;
62
42
  result = (result << 4) | (unsigned char)b;
63
43
  return result;
64
44
  }
65
45
 
66
- static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
46
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
67
47
  {
68
48
  int len = 1;
69
49
  if (ch <= 0x7F) {
@@ -89,21 +69,45 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
89
69
  return len;
90
70
  }
91
71
 
92
- static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
72
+ #define PARSE_ERROR_FRAGMENT_LEN 32
73
+ #ifdef RBIMPL_ATTR_NORETURN
74
+ RBIMPL_ATTR_NORETURN()
75
+ #endif
76
+ static void raise_parse_error(const char *format, const char *start)
77
+ {
78
+ char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
79
+
80
+ size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
81
+ const char *ptr = start;
82
+
83
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
84
+ MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
85
+ buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
86
+ ptr = buffer;
87
+ }
88
+
89
+ rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
90
+ }
91
+
92
+ static VALUE mJSON, mExt, cParser, eNestingError;
93
93
  static VALUE CNaN, CInfinity, CMinusInfinity;
94
94
 
95
95
  static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
96
96
  i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
97
- i_object_class, i_array_class, i_decimal_class, i_key_p,
97
+ i_object_class, i_array_class, i_decimal_class,
98
98
  i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
99
99
  i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
100
100
 
101
+ static int binary_encindex;
102
+ static int utf8_encindex;
103
+
101
104
 
102
- #line 125 "parser.rl"
103
105
 
106
+ #line 129 "parser.rl"
104
107
 
105
108
 
106
- #line 107 "parser.c"
109
+
110
+ #line 111 "parser.c"
107
111
  enum {JSON_object_start = 1};
108
112
  enum {JSON_object_first_final = 27};
109
113
  enum {JSON_object_error = 0};
@@ -111,7 +115,7 @@ enum {JSON_object_error = 0};
111
115
  enum {JSON_object_en_main = 1};
112
116
 
113
117
 
114
- #line 167 "parser.rl"
118
+ #line 171 "parser.rl"
115
119
 
116
120
 
117
121
  static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -127,14 +131,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
127
131
  *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
128
132
 
129
133
 
130
- #line 131 "parser.c"
134
+ #line 135 "parser.c"
131
135
  {
132
136
  cs = JSON_object_start;
133
137
  }
134
138
 
135
- #line 182 "parser.rl"
139
+ #line 186 "parser.rl"
136
140
 
137
- #line 138 "parser.c"
141
+ #line 142 "parser.c"
138
142
  {
139
143
  if ( p == pe )
140
144
  goto _test_eof;
@@ -162,7 +166,7 @@ case 2:
162
166
  goto st2;
163
167
  goto st0;
164
168
  tr2:
165
- #line 149 "parser.rl"
169
+ #line 153 "parser.rl"
166
170
  {
167
171
  char *np;
168
172
  json->parsing_name = 1;
@@ -175,7 +179,7 @@ st3:
175
179
  if ( ++p == pe )
176
180
  goto _test_eof3;
177
181
  case 3:
178
- #line 179 "parser.c"
182
+ #line 183 "parser.c"
179
183
  switch( (*p) ) {
180
184
  case 13: goto st3;
181
185
  case 32: goto st3;
@@ -242,7 +246,7 @@ case 8:
242
246
  goto st8;
243
247
  goto st0;
244
248
  tr11:
245
- #line 133 "parser.rl"
249
+ #line 137 "parser.rl"
246
250
  {
247
251
  VALUE v = Qnil;
248
252
  char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
@@ -263,7 +267,7 @@ st9:
263
267
  if ( ++p == pe )
264
268
  goto _test_eof9;
265
269
  case 9:
266
- #line 267 "parser.c"
270
+ #line 271 "parser.c"
267
271
  switch( (*p) ) {
268
272
  case 13: goto st9;
269
273
  case 32: goto st9;
@@ -352,14 +356,14 @@ case 18:
352
356
  goto st9;
353
357
  goto st18;
354
358
  tr4:
355
- #line 157 "parser.rl"
359
+ #line 161 "parser.rl"
356
360
  { p--; {p++; cs = 27; goto _out;} }
357
361
  goto st27;
358
362
  st27:
359
363
  if ( ++p == pe )
360
364
  goto _test_eof27;
361
365
  case 27:
362
- #line 363 "parser.c"
366
+ #line 367 "parser.c"
363
367
  goto st0;
364
368
  st19:
365
369
  if ( ++p == pe )
@@ -457,7 +461,7 @@ case 26:
457
461
  _out: {}
458
462
  }
459
463
 
460
- #line 183 "parser.rl"
464
+ #line 187 "parser.rl"
461
465
 
462
466
  if (cs >= JSON_object_first_final) {
463
467
  if (json->create_additions) {
@@ -482,7 +486,7 @@ case 26:
482
486
 
483
487
 
484
488
 
485
- #line 486 "parser.c"
489
+ #line 490 "parser.c"
486
490
  enum {JSON_value_start = 1};
487
491
  enum {JSON_value_first_final = 29};
488
492
  enum {JSON_value_error = 0};
@@ -490,7 +494,7 @@ enum {JSON_value_error = 0};
490
494
  enum {JSON_value_en_main = 1};
491
495
 
492
496
 
493
- #line 283 "parser.rl"
497
+ #line 287 "parser.rl"
494
498
 
495
499
 
496
500
  static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -498,14 +502,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
498
502
  int cs = EVIL;
499
503
 
500
504
 
501
- #line 502 "parser.c"
505
+ #line 506 "parser.c"
502
506
  {
503
507
  cs = JSON_value_start;
504
508
  }
505
509
 
506
- #line 290 "parser.rl"
510
+ #line 294 "parser.rl"
507
511
 
508
- #line 509 "parser.c"
512
+ #line 513 "parser.c"
509
513
  {
510
514
  if ( p == pe )
511
515
  goto _test_eof;
@@ -539,14 +543,14 @@ st0:
539
543
  cs = 0;
540
544
  goto _out;
541
545
  tr2:
542
- #line 235 "parser.rl"
546
+ #line 239 "parser.rl"
543
547
  {
544
548
  char *np = JSON_parse_string(json, p, pe, result);
545
549
  if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
546
550
  }
547
551
  goto st29;
548
552
  tr3:
549
- #line 240 "parser.rl"
553
+ #line 244 "parser.rl"
550
554
  {
551
555
  char *np;
552
556
  if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
@@ -555,7 +559,7 @@ tr3:
555
559
  {p = (( p + 10))-1;}
556
560
  p--; {p++; cs = 29; goto _out;}
557
561
  } else {
558
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
562
+ raise_parse_error("unexpected token at '%s'", p);
559
563
  }
560
564
  }
561
565
  np = JSON_parse_float(json, p, pe, result);
@@ -566,7 +570,7 @@ tr3:
566
570
  }
567
571
  goto st29;
568
572
  tr7:
569
- #line 258 "parser.rl"
573
+ #line 262 "parser.rl"
570
574
  {
571
575
  char *np;
572
576
  np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
@@ -574,7 +578,7 @@ tr7:
574
578
  }
575
579
  goto st29;
576
580
  tr11:
577
- #line 264 "parser.rl"
581
+ #line 268 "parser.rl"
578
582
  {
579
583
  char *np;
580
584
  np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
@@ -582,39 +586,39 @@ tr11:
582
586
  }
583
587
  goto st29;
584
588
  tr25:
585
- #line 228 "parser.rl"
589
+ #line 232 "parser.rl"
586
590
  {
587
591
  if (json->allow_nan) {
588
592
  *result = CInfinity;
589
593
  } else {
590
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 7);
594
+ raise_parse_error("unexpected token at '%s'", p - 7);
591
595
  }
592
596
  }
593
597
  goto st29;
594
598
  tr27:
595
- #line 221 "parser.rl"
599
+ #line 225 "parser.rl"
596
600
  {
597
601
  if (json->allow_nan) {
598
602
  *result = CNaN;
599
603
  } else {
600
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 2);
604
+ raise_parse_error("unexpected token at '%s'", p - 2);
601
605
  }
602
606
  }
603
607
  goto st29;
604
608
  tr31:
605
- #line 215 "parser.rl"
609
+ #line 219 "parser.rl"
606
610
  {
607
611
  *result = Qfalse;
608
612
  }
609
613
  goto st29;
610
614
  tr34:
611
- #line 212 "parser.rl"
615
+ #line 216 "parser.rl"
612
616
  {
613
617
  *result = Qnil;
614
618
  }
615
619
  goto st29;
616
620
  tr37:
617
- #line 218 "parser.rl"
621
+ #line 222 "parser.rl"
618
622
  {
619
623
  *result = Qtrue;
620
624
  }
@@ -623,9 +627,9 @@ st29:
623
627
  if ( ++p == pe )
624
628
  goto _test_eof29;
625
629
  case 29:
626
- #line 270 "parser.rl"
630
+ #line 274 "parser.rl"
627
631
  { p--; {p++; cs = 29; goto _out;} }
628
- #line 629 "parser.c"
632
+ #line 633 "parser.c"
629
633
  switch( (*p) ) {
630
634
  case 13: goto st29;
631
635
  case 32: goto st29;
@@ -866,7 +870,7 @@ case 28:
866
870
  _out: {}
867
871
  }
868
872
 
869
- #line 291 "parser.rl"
873
+ #line 295 "parser.rl"
870
874
 
871
875
  if (json->freeze) {
872
876
  OBJ_FREEZE(*result);
@@ -880,7 +884,7 @@ case 28:
880
884
  }
881
885
 
882
886
 
883
- #line 884 "parser.c"
887
+ #line 888 "parser.c"
884
888
  enum {JSON_integer_start = 1};
885
889
  enum {JSON_integer_first_final = 3};
886
890
  enum {JSON_integer_error = 0};
@@ -888,7 +892,7 @@ enum {JSON_integer_error = 0};
888
892
  enum {JSON_integer_en_main = 1};
889
893
 
890
894
 
891
- #line 311 "parser.rl"
895
+ #line 315 "parser.rl"
892
896
 
893
897
 
894
898
  static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -896,15 +900,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
896
900
  int cs = EVIL;
897
901
 
898
902
 
899
- #line 900 "parser.c"
903
+ #line 904 "parser.c"
900
904
  {
901
905
  cs = JSON_integer_start;
902
906
  }
903
907
 
904
- #line 318 "parser.rl"
908
+ #line 322 "parser.rl"
905
909
  json->memo = p;
906
910
 
907
- #line 908 "parser.c"
911
+ #line 912 "parser.c"
908
912
  {
909
913
  if ( p == pe )
910
914
  goto _test_eof;
@@ -938,14 +942,14 @@ case 3:
938
942
  goto st0;
939
943
  goto tr4;
940
944
  tr4:
941
- #line 308 "parser.rl"
945
+ #line 312 "parser.rl"
942
946
  { p--; {p++; cs = 4; goto _out;} }
943
947
  goto st4;
944
948
  st4:
945
949
  if ( ++p == pe )
946
950
  goto _test_eof4;
947
951
  case 4:
948
- #line 949 "parser.c"
952
+ #line 953 "parser.c"
949
953
  goto st0;
950
954
  st5:
951
955
  if ( ++p == pe )
@@ -964,7 +968,7 @@ case 5:
964
968
  _out: {}
965
969
  }
966
970
 
967
- #line 320 "parser.rl"
971
+ #line 324 "parser.rl"
968
972
 
969
973
  if (cs >= JSON_integer_first_final) {
970
974
  long len = p - json->memo;
@@ -979,7 +983,7 @@ case 5:
979
983
  }
980
984
 
981
985
 
982
- #line 983 "parser.c"
986
+ #line 987 "parser.c"
983
987
  enum {JSON_float_start = 1};
984
988
  enum {JSON_float_first_final = 8};
985
989
  enum {JSON_float_error = 0};
@@ -987,7 +991,7 @@ enum {JSON_float_error = 0};
987
991
  enum {JSON_float_en_main = 1};
988
992
 
989
993
 
990
- #line 345 "parser.rl"
994
+ #line 349 "parser.rl"
991
995
 
992
996
 
993
997
  static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -995,15 +999,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
995
999
  int cs = EVIL;
996
1000
 
997
1001
 
998
- #line 999 "parser.c"
1002
+ #line 1003 "parser.c"
999
1003
  {
1000
1004
  cs = JSON_float_start;
1001
1005
  }
1002
1006
 
1003
- #line 352 "parser.rl"
1007
+ #line 356 "parser.rl"
1004
1008
  json->memo = p;
1005
1009
 
1006
- #line 1007 "parser.c"
1010
+ #line 1011 "parser.c"
1007
1011
  {
1008
1012
  if ( p == pe )
1009
1013
  goto _test_eof;
@@ -1061,14 +1065,14 @@ case 8:
1061
1065
  goto st0;
1062
1066
  goto tr9;
1063
1067
  tr9:
1064
- #line 339 "parser.rl"
1068
+ #line 343 "parser.rl"
1065
1069
  { p--; {p++; cs = 9; goto _out;} }
1066
1070
  goto st9;
1067
1071
  st9:
1068
1072
  if ( ++p == pe )
1069
1073
  goto _test_eof9;
1070
1074
  case 9:
1071
- #line 1072 "parser.c"
1075
+ #line 1076 "parser.c"
1072
1076
  goto st0;
1073
1077
  st5:
1074
1078
  if ( ++p == pe )
@@ -1129,34 +1133,36 @@ case 7:
1129
1133
  _out: {}
1130
1134
  }
1131
1135
 
1132
- #line 354 "parser.rl"
1136
+ #line 358 "parser.rl"
1133
1137
 
1134
1138
  if (cs >= JSON_float_first_final) {
1135
1139
  VALUE mod = Qnil;
1136
1140
  ID method_id = 0;
1137
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
1138
- mod = json->decimal_class;
1139
- method_id = i_try_convert;
1140
- } else if (rb_respond_to(json->decimal_class, i_new)) {
1141
- mod = json->decimal_class;
1142
- method_id = i_new;
1143
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
1144
- VALUE name = rb_class_name(json->decimal_class);
1145
- const char *name_cstr = RSTRING_PTR(name);
1146
- const char *last_colon = strrchr(name_cstr, ':');
1147
- if (last_colon) {
1148
- const char *mod_path_end = last_colon - 1;
1149
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1150
- mod = rb_path_to_class(mod_path);
1151
-
1152
- const char *method_name_beg = last_colon + 1;
1153
- long before_len = method_name_beg - name_cstr;
1154
- long len = RSTRING_LEN(name) - before_len;
1155
- VALUE method_name = rb_str_substr(name, before_len, len);
1156
- method_id = SYM2ID(rb_str_intern(method_name));
1157
- } else {
1158
- mod = rb_mKernel;
1159
- method_id = SYM2ID(rb_str_intern(name));
1141
+ if (!NIL_P(json->decimal_class)) {
1142
+ if (rb_respond_to(json->decimal_class, i_try_convert)) {
1143
+ mod = json->decimal_class;
1144
+ method_id = i_try_convert;
1145
+ } else if (rb_respond_to(json->decimal_class, i_new)) {
1146
+ mod = json->decimal_class;
1147
+ method_id = i_new;
1148
+ } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
1149
+ VALUE name = rb_class_name(json->decimal_class);
1150
+ const char *name_cstr = RSTRING_PTR(name);
1151
+ const char *last_colon = strrchr(name_cstr, ':');
1152
+ if (last_colon) {
1153
+ const char *mod_path_end = last_colon - 1;
1154
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
1155
+ mod = rb_path_to_class(mod_path);
1156
+
1157
+ const char *method_name_beg = last_colon + 1;
1158
+ long before_len = method_name_beg - name_cstr;
1159
+ long len = RSTRING_LEN(name) - before_len;
1160
+ VALUE method_name = rb_str_substr(name, before_len, len);
1161
+ method_id = SYM2ID(rb_str_intern(method_name));
1162
+ } else {
1163
+ mod = rb_mKernel;
1164
+ method_id = SYM2ID(rb_str_intern(name));
1165
+ }
1160
1166
  }
1161
1167
  }
1162
1168
 
@@ -1180,7 +1186,7 @@ case 7:
1180
1186
 
1181
1187
 
1182
1188
 
1183
- #line 1184 "parser.c"
1189
+ #line 1190 "parser.c"
1184
1190
  enum {JSON_array_start = 1};
1185
1191
  enum {JSON_array_first_final = 17};
1186
1192
  enum {JSON_array_error = 0};
@@ -1188,7 +1194,7 @@ enum {JSON_array_error = 0};
1188
1194
  enum {JSON_array_en_main = 1};
1189
1195
 
1190
1196
 
1191
- #line 432 "parser.rl"
1197
+ #line 438 "parser.rl"
1192
1198
 
1193
1199
 
1194
1200
  static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -1202,14 +1208,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
1202
1208
  *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
1203
1209
 
1204
1210
 
1205
- #line 1206 "parser.c"
1211
+ #line 1212 "parser.c"
1206
1212
  {
1207
1213
  cs = JSON_array_start;
1208
1214
  }
1209
1215
 
1210
- #line 445 "parser.rl"
1216
+ #line 451 "parser.rl"
1211
1217
 
1212
- #line 1213 "parser.c"
1218
+ #line 1219 "parser.c"
1213
1219
  {
1214
1220
  if ( p == pe )
1215
1221
  goto _test_eof;
@@ -1248,7 +1254,7 @@ case 2:
1248
1254
  goto st2;
1249
1255
  goto st0;
1250
1256
  tr2:
1251
- #line 409 "parser.rl"
1257
+ #line 415 "parser.rl"
1252
1258
  {
1253
1259
  VALUE v = Qnil;
1254
1260
  char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
@@ -1268,7 +1274,7 @@ st3:
1268
1274
  if ( ++p == pe )
1269
1275
  goto _test_eof3;
1270
1276
  case 3:
1271
- #line 1272 "parser.c"
1277
+ #line 1278 "parser.c"
1272
1278
  switch( (*p) ) {
1273
1279
  case 13: goto st3;
1274
1280
  case 32: goto st3;
@@ -1368,14 +1374,14 @@ case 12:
1368
1374
  goto st3;
1369
1375
  goto st12;
1370
1376
  tr4:
1371
- #line 424 "parser.rl"
1377
+ #line 430 "parser.rl"
1372
1378
  { p--; {p++; cs = 17; goto _out;} }
1373
1379
  goto st17;
1374
1380
  st17:
1375
1381
  if ( ++p == pe )
1376
1382
  goto _test_eof17;
1377
1383
  case 17:
1378
- #line 1379 "parser.c"
1384
+ #line 1385 "parser.c"
1379
1385
  goto st0;
1380
1386
  st13:
1381
1387
  if ( ++p == pe )
@@ -1431,12 +1437,12 @@ case 16:
1431
1437
  _out: {}
1432
1438
  }
1433
1439
 
1434
- #line 446 "parser.rl"
1440
+ #line 452 "parser.rl"
1435
1441
 
1436
1442
  if(cs >= JSON_array_first_final) {
1437
1443
  return p + 1;
1438
1444
  } else {
1439
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
1445
+ raise_parse_error("unexpected token at '%s'", p);
1440
1446
  return NULL;
1441
1447
  }
1442
1448
  }
@@ -1499,26 +1505,30 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
1499
1505
  if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1500
1506
  ruby_xfree(bufferStart);
1501
1507
  }
1502
- rb_enc_raise(
1503
- EXC_ENCODING eParserError,
1504
- "incomplete unicode character escape sequence at '%s'", p
1505
- );
1508
+ raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
1506
1509
  } else {
1507
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
1510
+ uint32_t ch = unescape_unicode((unsigned char *) ++pe);
1508
1511
  pe += 3;
1509
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
1512
+ /* To handle values above U+FFFF, we take a sequence of
1513
+ * \uXXXX escapes in the U+D800..U+DBFF then
1514
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
1515
+ * to make a 20-bit number, then add 0x10000 to get the
1516
+ * final codepoint.
1517
+ *
1518
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
1519
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
1520
+ * Area".
1521
+ */
1522
+ if ((ch & 0xFC00) == 0xD800) {
1510
1523
  pe++;
1511
1524
  if (pe > stringEnd - 6) {
1512
1525
  if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1513
1526
  ruby_xfree(bufferStart);
1514
1527
  }
1515
- rb_enc_raise(
1516
- EXC_ENCODING eParserError,
1517
- "incomplete surrogate pair at '%s'", p
1518
- );
1528
+ raise_parse_error("incomplete surrogate pair at '%s'", p);
1519
1529
  }
1520
1530
  if (pe[0] == '\\' && pe[1] == 'u') {
1521
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
1531
+ uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
1522
1532
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
1523
1533
  | (sur & 0x3FF));
1524
1534
  pe += 5;
@@ -1588,7 +1598,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
1588
1598
  }
1589
1599
 
1590
1600
 
1591
- #line 1592 "parser.c"
1601
+ #line 1602 "parser.c"
1592
1602
  enum {JSON_string_start = 1};
1593
1603
  enum {JSON_string_first_final = 8};
1594
1604
  enum {JSON_string_error = 0};
@@ -1596,7 +1606,7 @@ enum {JSON_string_error = 0};
1596
1606
  enum {JSON_string_en_main = 1};
1597
1607
 
1598
1608
 
1599
- #line 620 "parser.rl"
1609
+ #line 630 "parser.rl"
1600
1610
 
1601
1611
 
1602
1612
  static int
@@ -1617,15 +1627,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
1617
1627
  VALUE match_string;
1618
1628
 
1619
1629
 
1620
- #line 1621 "parser.c"
1630
+ #line 1631 "parser.c"
1621
1631
  {
1622
1632
  cs = JSON_string_start;
1623
1633
  }
1624
1634
 
1625
- #line 640 "parser.rl"
1635
+ #line 650 "parser.rl"
1626
1636
  json->memo = p;
1627
1637
 
1628
- #line 1629 "parser.c"
1638
+ #line 1639 "parser.c"
1629
1639
  {
1630
1640
  if ( p == pe )
1631
1641
  goto _test_eof;
@@ -1650,7 +1660,7 @@ case 2:
1650
1660
  goto st0;
1651
1661
  goto st2;
1652
1662
  tr2:
1653
- #line 607 "parser.rl"
1663
+ #line 617 "parser.rl"
1654
1664
  {
1655
1665
  *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1656
1666
  if (NIL_P(*result)) {
@@ -1660,14 +1670,14 @@ tr2:
1660
1670
  {p = (( p + 1))-1;}
1661
1671
  }
1662
1672
  }
1663
- #line 617 "parser.rl"
1673
+ #line 627 "parser.rl"
1664
1674
  { p--; {p++; cs = 8; goto _out;} }
1665
1675
  goto st8;
1666
1676
  st8:
1667
1677
  if ( ++p == pe )
1668
1678
  goto _test_eof8;
1669
1679
  case 8:
1670
- #line 1671 "parser.c"
1680
+ #line 1681 "parser.c"
1671
1681
  goto st0;
1672
1682
  st3:
1673
1683
  if ( ++p == pe )
@@ -1743,7 +1753,7 @@ case 7:
1743
1753
  _out: {}
1744
1754
  }
1745
1755
 
1746
- #line 642 "parser.rl"
1756
+ #line 652 "parser.rl"
1747
1757
 
1748
1758
  if (json->create_additions && RTEST(match_string = json->match_string)) {
1749
1759
  VALUE klass;
@@ -1777,18 +1787,20 @@ case 7:
1777
1787
 
1778
1788
  static VALUE convert_encoding(VALUE source)
1779
1789
  {
1780
- #ifdef HAVE_RUBY_ENCODING_H
1781
- rb_encoding *enc = rb_enc_get(source);
1782
- if (enc == rb_ascii8bit_encoding()) {
1783
- if (OBJ_FROZEN(source)) {
1784
- source = rb_str_dup(source);
1785
- }
1786
- FORCE_UTF8(source);
1787
- } else {
1788
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
1789
- }
1790
- #endif
1790
+ int encindex = RB_ENCODING_GET(source);
1791
+
1792
+ if (encindex == utf8_encindex) {
1791
1793
  return source;
1794
+ }
1795
+
1796
+ if (encindex == binary_encindex) {
1797
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1798
+ // TODO: Deprecate in 2.8.0
1799
+ // TODO: Remove in 3.0.0
1800
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1801
+ }
1802
+
1803
+ return rb_str_conv_enc(source, rb_enc_from_index(encindex), rb_utf8_encoding());
1792
1804
  }
1793
1805
 
1794
1806
  /*
@@ -1813,8 +1825,15 @@ static VALUE convert_encoding(VALUE source)
1813
1825
  * * *create_additions*: If set to false, the Parser doesn't create
1814
1826
  * additions even if a matching class and create_id was found. This option
1815
1827
  * defaults to false.
1816
- * * *object_class*: Defaults to Hash
1817
- * * *array_class*: Defaults to Array
1828
+ * * *object_class*: Defaults to Hash. If another type is provided, it will be used
1829
+ * instead of Hash to represent JSON objects. The type must respond to
1830
+ * +new+ without arguments, and return an object that respond to +[]=+.
1831
+ * * *array_class*: Defaults to Array If another type is provided, it will be used
1832
+ * instead of Hash to represent JSON arrays. The type must respond to
1833
+ * +new+ without arguments, and return an object that respond to +<<+.
1834
+ * * *decimal_class*: Specifies which class to use instead of the default
1835
+ * (Float) when parsing decimal numbers. This class must accept a single
1836
+ * string argument in its constructor.
1818
1837
  */
1819
1838
  static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1820
1839
  {
@@ -1824,80 +1843,91 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1824
1843
  if (json->Vsource) {
1825
1844
  rb_raise(rb_eTypeError, "already initialized instance");
1826
1845
  }
1827
- rb_scan_args(argc, argv, "1:", &source, &opts);
1846
+
1847
+ rb_check_arity(argc, 1, 2);
1848
+ source = argv[0];
1849
+ opts = Qnil;
1850
+ if (argc == 2) {
1851
+ opts = argv[1];
1852
+ Check_Type(argv[1], T_HASH);
1853
+ if (RHASH_SIZE(argv[1]) > 0) {
1854
+ opts = argv[1];
1855
+ }
1856
+ }
1857
+
1828
1858
  if (!NIL_P(opts)) {
1829
- VALUE tmp = ID2SYM(i_max_nesting);
1830
- if (option_given_p(opts, tmp)) {
1831
- VALUE max_nesting = rb_hash_aref(opts, tmp);
1832
- if (RTEST(max_nesting)) {
1833
- Check_Type(max_nesting, T_FIXNUM);
1834
- json->max_nesting = FIX2INT(max_nesting);
1835
- } else {
1836
- json->max_nesting = 0;
1837
- }
1838
- } else {
1839
- json->max_nesting = 100;
1840
- }
1841
- tmp = ID2SYM(i_allow_nan);
1842
- if (option_given_p(opts, tmp)) {
1843
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1844
- } else {
1845
- json->allow_nan = 0;
1846
- }
1847
- tmp = ID2SYM(i_symbolize_names);
1848
- if (option_given_p(opts, tmp)) {
1849
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1850
- } else {
1851
- json->symbolize_names = 0;
1852
- }
1853
- tmp = ID2SYM(i_freeze);
1854
- if (option_given_p(opts, tmp)) {
1855
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1856
- } else {
1857
- json->freeze = 0;
1858
- }
1859
- tmp = ID2SYM(i_create_additions);
1860
- if (option_given_p(opts, tmp)) {
1861
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
1862
- } else {
1863
- json->create_additions = 0;
1864
- }
1865
- if (json->symbolize_names && json->create_additions) {
1866
- rb_raise(rb_eArgError,
1867
- "options :symbolize_names and :create_additions cannot be "
1868
- " used in conjunction");
1869
- }
1870
- tmp = ID2SYM(i_create_id);
1871
- if (option_given_p(opts, tmp)) {
1872
- json->create_id = rb_hash_aref(opts, tmp);
1873
- } else {
1874
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
1875
- }
1876
- tmp = ID2SYM(i_object_class);
1877
- if (option_given_p(opts, tmp)) {
1878
- json->object_class = rb_hash_aref(opts, tmp);
1879
- } else {
1880
- json->object_class = Qnil;
1881
- }
1882
- tmp = ID2SYM(i_array_class);
1883
- if (option_given_p(opts, tmp)) {
1884
- json->array_class = rb_hash_aref(opts, tmp);
1885
- } else {
1886
- json->array_class = Qnil;
1887
- }
1888
- tmp = ID2SYM(i_decimal_class);
1889
- if (option_given_p(opts, tmp)) {
1890
- json->decimal_class = rb_hash_aref(opts, tmp);
1891
- } else {
1892
- json->decimal_class = Qnil;
1893
- }
1894
- tmp = ID2SYM(i_match_string);
1895
- if (option_given_p(opts, tmp)) {
1896
- VALUE match_string = rb_hash_aref(opts, tmp);
1897
- json->match_string = RTEST(match_string) ? match_string : Qnil;
1898
- } else {
1899
- json->match_string = Qnil;
1900
- }
1859
+ VALUE tmp = ID2SYM(i_max_nesting);
1860
+ if (option_given_p(opts, tmp)) {
1861
+ VALUE max_nesting = rb_hash_aref(opts, tmp);
1862
+ if (RTEST(max_nesting)) {
1863
+ Check_Type(max_nesting, T_FIXNUM);
1864
+ json->max_nesting = FIX2INT(max_nesting);
1865
+ } else {
1866
+ json->max_nesting = 0;
1867
+ }
1868
+ } else {
1869
+ json->max_nesting = 100;
1870
+ }
1871
+ tmp = ID2SYM(i_allow_nan);
1872
+ if (option_given_p(opts, tmp)) {
1873
+ json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1874
+ } else {
1875
+ json->allow_nan = 0;
1876
+ }
1877
+ tmp = ID2SYM(i_symbolize_names);
1878
+ if (option_given_p(opts, tmp)) {
1879
+ json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1880
+ } else {
1881
+ json->symbolize_names = 0;
1882
+ }
1883
+ tmp = ID2SYM(i_freeze);
1884
+ if (option_given_p(opts, tmp)) {
1885
+ json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
1886
+ } else {
1887
+ json->freeze = 0;
1888
+ }
1889
+ tmp = ID2SYM(i_create_additions);
1890
+ if (option_given_p(opts, tmp)) {
1891
+ json->create_additions = RTEST(rb_hash_aref(opts, tmp));
1892
+ } else {
1893
+ json->create_additions = 0;
1894
+ }
1895
+ if (json->symbolize_names && json->create_additions) {
1896
+ rb_raise(rb_eArgError,
1897
+ "options :symbolize_names and :create_additions cannot be "
1898
+ " used in conjunction");
1899
+ }
1900
+ tmp = ID2SYM(i_create_id);
1901
+ if (option_given_p(opts, tmp)) {
1902
+ json->create_id = rb_hash_aref(opts, tmp);
1903
+ } else {
1904
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
1905
+ }
1906
+ tmp = ID2SYM(i_object_class);
1907
+ if (option_given_p(opts, tmp)) {
1908
+ json->object_class = rb_hash_aref(opts, tmp);
1909
+ } else {
1910
+ json->object_class = Qnil;
1911
+ }
1912
+ tmp = ID2SYM(i_array_class);
1913
+ if (option_given_p(opts, tmp)) {
1914
+ json->array_class = rb_hash_aref(opts, tmp);
1915
+ } else {
1916
+ json->array_class = Qnil;
1917
+ }
1918
+ tmp = ID2SYM(i_decimal_class);
1919
+ if (option_given_p(opts, tmp)) {
1920
+ json->decimal_class = rb_hash_aref(opts, tmp);
1921
+ } else {
1922
+ json->decimal_class = Qnil;
1923
+ }
1924
+ tmp = ID2SYM(i_match_string);
1925
+ if (option_given_p(opts, tmp)) {
1926
+ VALUE match_string = rb_hash_aref(opts, tmp);
1927
+ json->match_string = RTEST(match_string) ? match_string : Qnil;
1928
+ } else {
1929
+ json->match_string = Qnil;
1930
+ }
1901
1931
  } else {
1902
1932
  json->max_nesting = 100;
1903
1933
  json->allow_nan = 0;
@@ -1916,7 +1946,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
1916
1946
  }
1917
1947
 
1918
1948
 
1919
- #line 1920 "parser.c"
1949
+ #line 1950 "parser.c"
1920
1950
  enum {JSON_start = 1};
1921
1951
  enum {JSON_first_final = 10};
1922
1952
  enum {JSON_error = 0};
@@ -1924,7 +1954,7 @@ enum {JSON_error = 0};
1924
1954
  enum {JSON_en_main = 1};
1925
1955
 
1926
1956
 
1927
- #line 828 "parser.rl"
1957
+ #line 858 "parser.rl"
1928
1958
 
1929
1959
 
1930
1960
  /*
@@ -1936,22 +1966,22 @@ enum {JSON_en_main = 1};
1936
1966
  */
1937
1967
  static VALUE cParser_parse(VALUE self)
1938
1968
  {
1939
- char *p, *pe;
1940
- int cs = EVIL;
1941
- VALUE result = Qnil;
1942
- GET_PARSER;
1969
+ char *p, *pe;
1970
+ int cs = EVIL;
1971
+ VALUE result = Qnil;
1972
+ GET_PARSER;
1943
1973
 
1944
1974
 
1945
- #line 1946 "parser.c"
1975
+ #line 1976 "parser.c"
1946
1976
  {
1947
1977
  cs = JSON_start;
1948
1978
  }
1949
1979
 
1950
- #line 845 "parser.rl"
1951
- p = json->source;
1952
- pe = p + json->len;
1980
+ #line 875 "parser.rl"
1981
+ p = json->source;
1982
+ pe = p + json->len;
1953
1983
 
1954
- #line 1955 "parser.c"
1984
+ #line 1985 "parser.c"
1955
1985
  {
1956
1986
  if ( p == pe )
1957
1987
  goto _test_eof;
@@ -1985,7 +2015,7 @@ st0:
1985
2015
  cs = 0;
1986
2016
  goto _out;
1987
2017
  tr2:
1988
- #line 820 "parser.rl"
2018
+ #line 850 "parser.rl"
1989
2019
  {
1990
2020
  char *np = JSON_parse_value(json, p, pe, &result, 0);
1991
2021
  if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -1995,7 +2025,7 @@ st10:
1995
2025
  if ( ++p == pe )
1996
2026
  goto _test_eof10;
1997
2027
  case 10:
1998
- #line 1999 "parser.c"
2028
+ #line 2029 "parser.c"
1999
2029
  switch( (*p) ) {
2000
2030
  case 13: goto st10;
2001
2031
  case 32: goto st10;
@@ -2084,25 +2114,25 @@ case 9:
2084
2114
  _out: {}
2085
2115
  }
2086
2116
 
2087
- #line 848 "parser.rl"
2117
+ #line 878 "parser.rl"
2088
2118
 
2089
- if (cs >= JSON_first_final && p == pe) {
2090
- return result;
2091
- } else {
2092
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
2093
- return Qnil;
2094
- }
2119
+ if (cs >= JSON_first_final && p == pe) {
2120
+ return result;
2121
+ } else {
2122
+ raise_parse_error("unexpected token at '%s'", p);
2123
+ return Qnil;
2124
+ }
2095
2125
  }
2096
2126
 
2097
2127
  static void JSON_mark(void *ptr)
2098
2128
  {
2099
2129
  JSON_Parser *json = ptr;
2100
- rb_gc_mark_maybe(json->Vsource);
2101
- rb_gc_mark_maybe(json->create_id);
2102
- rb_gc_mark_maybe(json->object_class);
2103
- rb_gc_mark_maybe(json->array_class);
2104
- rb_gc_mark_maybe(json->decimal_class);
2105
- rb_gc_mark_maybe(json->match_string);
2130
+ rb_gc_mark(json->Vsource);
2131
+ rb_gc_mark(json->create_id);
2132
+ rb_gc_mark(json->object_class);
2133
+ rb_gc_mark(json->array_class);
2134
+ rb_gc_mark(json->decimal_class);
2135
+ rb_gc_mark(json->match_string);
2106
2136
  }
2107
2137
 
2108
2138
  static void JSON_free(void *ptr)
@@ -2118,16 +2148,12 @@ static size_t JSON_memsize(const void *ptr)
2118
2148
  return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
2119
2149
  }
2120
2150
 
2121
- #ifdef NEW_TYPEDDATA_WRAPPER
2122
2151
  static const rb_data_type_t JSON_Parser_type = {
2123
2152
  "JSON/Parser",
2124
2153
  {JSON_mark, JSON_free, JSON_memsize,},
2125
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
2126
2154
  0, 0,
2127
2155
  RUBY_TYPED_FREE_IMMEDIATELY,
2128
- #endif
2129
2156
  };
2130
- #endif
2131
2157
 
2132
2158
  static VALUE cJSON_parser_s_allocate(VALUE klass)
2133
2159
  {
@@ -2160,9 +2186,7 @@ void Init_parser(void)
2160
2186
  mJSON = rb_define_module("JSON");
2161
2187
  mExt = rb_define_module_under(mJSON, "Ext");
2162
2188
  cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
2163
- eParserError = rb_path2class("JSON::ParserError");
2164
2189
  eNestingError = rb_path2class("JSON::NestingError");
2165
- rb_gc_register_mark_object(eParserError);
2166
2190
  rb_gc_register_mark_object(eNestingError);
2167
2191
  rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
2168
2192
  rb_define_method(cParser, "initialize", cParser_initialize, -1);
@@ -2191,7 +2215,6 @@ void Init_parser(void)
2191
2215
  i_decimal_class = rb_intern("decimal_class");
2192
2216
  i_match = rb_intern("match");
2193
2217
  i_match_string = rb_intern("match_string");
2194
- i_key_p = rb_intern("key?");
2195
2218
  i_deep_const_get = rb_intern("deep_const_get");
2196
2219
  i_aset = rb_intern("[]=");
2197
2220
  i_aref = rb_intern("[]");
@@ -2200,6 +2223,9 @@ void Init_parser(void)
2200
2223
  i_try_convert = rb_intern("try_convert");
2201
2224
  i_freeze = rb_intern("freeze");
2202
2225
  i_uminus = rb_intern("-@");
2226
+
2227
+ binary_encindex = rb_ascii8bit_encindex();
2228
+ utf8_encindex = rb_utf8_encindex();
2203
2229
  }
2204
2230
 
2205
2231
  /*