json 2.7.2 → 2.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,310 @@
1
+ #include "ruby.h"
1
2
  #include "../fbuffer/fbuffer.h"
2
- #include "parser.h"
3
3
 
4
- #if defined HAVE_RUBY_ENCODING_H
5
- # define EXC_ENCODING rb_utf8_encoding(),
6
- # ifndef HAVE_RB_ENC_RAISE
7
- static void
8
- enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
9
- {
10
- va_list args;
11
- VALUE mesg;
4
+ static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
5
+ static VALUE CNaN, CInfinity, CMinusInfinity;
12
6
 
13
- va_start(args, fmt);
14
- mesg = rb_enc_vsprintf(enc, fmt, args);
15
- va_end(args);
7
+ static ID i_json_creatable_p, i_json_create, i_create_id,
8
+ i_chr, i_deep_const_get, i_match, i_aset, i_aref,
9
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
16
10
 
17
- rb_exc_raise(rb_exc_new3(exc, mesg));
18
- }
19
- # define rb_enc_raise enc_raise
20
- # endif
11
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
12
+ sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
13
+ sym_decimal_class, sym_match_string;
14
+
15
+ static int binary_encindex;
16
+ static int utf8_encindex;
17
+
18
+ #ifdef HAVE_RB_CATEGORY_WARN
19
+ # define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message)
21
20
  #else
22
- # define EXC_ENCODING /* nothing */
23
- # define rb_enc_raise rb_raise
21
+ # define json_deprecated(message) rb_warn(message)
24
22
  #endif
25
23
 
24
+ static const char deprecated_create_additions_warning[] =
25
+ "JSON.load implicit support for `create_additions: true` is deprecated "
26
+ "and will be removed in 3.0, use JSON.unsafe_load or explicitly "
27
+ "pass `create_additions: true`";
28
+
29
+ #ifndef HAVE_RB_HASH_BULK_INSERT
30
+ // For TruffleRuby
31
+ void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
32
+ {
33
+ long index = 0;
34
+ while (index < count) {
35
+ VALUE name = pairs[index++];
36
+ VALUE value = pairs[index++];
37
+ rb_hash_aset(hash, name, value);
38
+ }
39
+ RB_GC_GUARD(hash);
40
+ }
41
+ #endif
42
+
43
+ /* name cache */
44
+
45
+ #include <string.h>
46
+ #include <ctype.h>
47
+
48
+ // Object names are likely to be repeated, and are frozen.
49
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
50
+ // and save much more expensive lookups into the global fstring table.
51
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
52
+ // to be able to fit safely on the stack.
53
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
54
+ // performance.
55
+ #define JSON_RVALUE_CACHE_CAPA 63
56
+ typedef struct rvalue_cache_struct {
57
+ int length;
58
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
59
+ } rvalue_cache;
60
+
61
+ static rb_encoding *enc_utf8;
62
+
63
+ #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
64
+
65
+ static inline VALUE build_interned_string(const char *str, const long length)
66
+ {
67
+ # ifdef HAVE_RB_ENC_INTERNED_STR
68
+ return rb_enc_interned_str(str, length, enc_utf8);
69
+ # else
70
+ VALUE rstring = rb_utf8_str_new(str, length);
71
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
72
+ # endif
73
+ }
74
+
75
+ static inline VALUE build_symbol(const char *str, const long length)
76
+ {
77
+ return rb_str_intern(build_interned_string(str, length));
78
+ }
79
+
80
+ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
81
+ {
82
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
83
+ cache->length++;
84
+ cache->entries[index] = rstring;
85
+ }
86
+
87
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
88
+ {
89
+ long rstring_length = RSTRING_LEN(rstring);
90
+ if (length == rstring_length) {
91
+ return memcmp(str, RSTRING_PTR(rstring), length);
92
+ } else {
93
+ return (int)(length - rstring_length);
94
+ }
95
+ }
96
+
97
+ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
98
+ {
99
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
100
+ // Common names aren't likely to be very long. So we just don't
101
+ // cache names above an arbitrary threshold.
102
+ return Qfalse;
103
+ }
104
+
105
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
106
+ // Simple heuristic, if the first character isn't a letter,
107
+ // we're much less likely to see this string again.
108
+ // We mostly want to cache strings that are likely to be repeated.
109
+ return Qfalse;
110
+ }
111
+
112
+ int low = 0;
113
+ int high = cache->length - 1;
114
+ int mid = 0;
115
+ int last_cmp = 0;
116
+
117
+ while (low <= high) {
118
+ mid = (high + low) >> 1;
119
+ VALUE entry = cache->entries[mid];
120
+ last_cmp = rstring_cache_cmp(str, length, entry);
121
+
122
+ if (last_cmp == 0) {
123
+ return entry;
124
+ } else if (last_cmp > 0) {
125
+ low = mid + 1;
126
+ } else {
127
+ high = mid - 1;
128
+ }
129
+ }
130
+
131
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
132
+ // We assume the overwhelming majority of names don't need to be escaped.
133
+ // But if they do, we have to fallback to the slow path.
134
+ return Qfalse;
135
+ }
136
+
137
+ VALUE rstring = build_interned_string(str, length);
138
+
139
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
140
+ if (last_cmp > 0) {
141
+ mid += 1;
142
+ }
143
+
144
+ rvalue_cache_insert_at(cache, mid, rstring);
145
+ }
146
+ return rstring;
147
+ }
148
+
149
+ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
150
+ {
151
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
152
+ // Common names aren't likely to be very long. So we just don't
153
+ // cache names above an arbitrary threshold.
154
+ return Qfalse;
155
+ }
156
+
157
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
158
+ // Simple heuristic, if the first character isn't a letter,
159
+ // we're much less likely to see this string again.
160
+ // We mostly want to cache strings that are likely to be repeated.
161
+ return Qfalse;
162
+ }
163
+
164
+ int low = 0;
165
+ int high = cache->length - 1;
166
+ int mid = 0;
167
+ int last_cmp = 0;
168
+
169
+ while (low <= high) {
170
+ mid = (high + low) >> 1;
171
+ VALUE entry = cache->entries[mid];
172
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
173
+
174
+ if (last_cmp == 0) {
175
+ return entry;
176
+ } else if (last_cmp > 0) {
177
+ low = mid + 1;
178
+ } else {
179
+ high = mid - 1;
180
+ }
181
+ }
182
+
183
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
184
+ // We assume the overwhelming majority of names don't need to be escaped.
185
+ // But if they do, we have to fallback to the slow path.
186
+ return Qfalse;
187
+ }
188
+
189
+ VALUE rsymbol = build_symbol(str, length);
190
+
191
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
192
+ if (last_cmp > 0) {
193
+ mid += 1;
194
+ }
195
+
196
+ rvalue_cache_insert_at(cache, mid, rsymbol);
197
+ }
198
+ return rsymbol;
199
+ }
200
+
201
+ /* rvalue stack */
202
+
203
+ #define RVALUE_STACK_INITIAL_CAPA 128
204
+
205
+ enum rvalue_stack_type {
206
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
207
+ RVALUE_STACK_STACK_ALLOCATED = 1,
208
+ };
209
+
210
+ typedef struct rvalue_stack_struct {
211
+ enum rvalue_stack_type type;
212
+ long capa;
213
+ long head;
214
+ VALUE *ptr;
215
+ } rvalue_stack;
216
+
217
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
218
+
219
+ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
220
+ {
221
+ long required = stack->capa * 2;
222
+
223
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
224
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
225
+ } else {
226
+ REALLOC_N(stack->ptr, VALUE, required);
227
+ stack->capa = required;
228
+ }
229
+ return stack;
230
+ }
231
+
232
+ static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
233
+ {
234
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
235
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
236
+ }
237
+ stack->ptr[stack->head] = value;
238
+ stack->head++;
239
+ }
240
+
241
+ static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
242
+ {
243
+ return stack->ptr + (stack->head - count);
244
+ }
245
+
246
+ static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
247
+ {
248
+ stack->head -= count;
249
+ }
250
+
251
+ static void rvalue_stack_mark(void *ptr)
252
+ {
253
+ rvalue_stack *stack = (rvalue_stack *)ptr;
254
+ long index;
255
+ for (index = 0; index < stack->head; index++) {
256
+ rb_gc_mark(stack->ptr[index]);
257
+ }
258
+ }
259
+
260
+ static void rvalue_stack_free(void *ptr)
261
+ {
262
+ rvalue_stack *stack = (rvalue_stack *)ptr;
263
+ if (stack) {
264
+ ruby_xfree(stack->ptr);
265
+ ruby_xfree(stack);
266
+ }
267
+ }
268
+
269
+ static size_t rvalue_stack_memsize(const void *ptr)
270
+ {
271
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
272
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
273
+ }
274
+
275
+ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
276
+ "JSON::Ext::Parser/rvalue_stack",
277
+ {
278
+ .dmark = rvalue_stack_mark,
279
+ .dfree = rvalue_stack_free,
280
+ .dsize = rvalue_stack_memsize,
281
+ },
282
+ 0, 0,
283
+ RUBY_TYPED_FREE_IMMEDIATELY,
284
+ };
285
+
286
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
287
+ {
288
+ rvalue_stack *stack;
289
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
290
+ *stack_ref = stack;
291
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
292
+
293
+ stack->capa = old_stack->capa << 1;
294
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
295
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
296
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
297
+ return stack;
298
+ }
299
+
300
+ static void rvalue_stack_eagerly_release(VALUE handle)
301
+ {
302
+ rvalue_stack *stack;
303
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
304
+ RTYPEDDATA_DATA(handle) = NULL;
305
+ rvalue_stack_free(stack);
306
+ }
307
+
26
308
  /* unicode */
27
309
 
28
310
  static const signed char digit_values[256] = {
@@ -42,26 +324,28 @@ static const signed char digit_values[256] = {
42
324
  -1, -1, -1, -1, -1, -1, -1
43
325
  };
44
326
 
45
- static UTF32 unescape_unicode(const unsigned char *p)
327
+ static uint32_t unescape_unicode(const unsigned char *p)
46
328
  {
329
+ const uint32_t replacement_char = 0xFFFD;
330
+
47
331
  signed char b;
48
- UTF32 result = 0;
332
+ uint32_t result = 0;
49
333
  b = digit_values[p[0]];
50
- if (b < 0) return UNI_REPLACEMENT_CHAR;
334
+ if (b < 0) return replacement_char;
51
335
  result = (result << 4) | (unsigned char)b;
52
336
  b = digit_values[p[1]];
53
- if (b < 0) return UNI_REPLACEMENT_CHAR;
337
+ if (b < 0) return replacement_char;
54
338
  result = (result << 4) | (unsigned char)b;
55
339
  b = digit_values[p[2]];
56
- if (b < 0) return UNI_REPLACEMENT_CHAR;
340
+ if (b < 0) return replacement_char;
57
341
  result = (result << 4) | (unsigned char)b;
58
342
  b = digit_values[p[3]];
59
- if (b < 0) return UNI_REPLACEMENT_CHAR;
343
+ if (b < 0) return replacement_char;
60
344
  result = (result << 4) | (unsigned char)b;
61
345
  return result;
62
346
  }
63
347
 
64
- static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
348
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
65
349
  {
66
350
  int len = 1;
67
351
  if (ch <= 0x7F) {
@@ -87,14 +371,78 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
87
371
  return len;
88
372
  }
89
373
 
90
- static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
91
- static VALUE CNaN, CInfinity, CMinusInfinity;
374
+ typedef struct JSON_ParserStruct {
375
+ VALUE Vsource;
376
+ char *source;
377
+ long len;
378
+ char *memo;
379
+ VALUE create_id;
380
+ VALUE object_class;
381
+ VALUE array_class;
382
+ VALUE decimal_class;
383
+ VALUE match_string;
384
+ FBuffer fbuffer;
385
+ int in_array;
386
+ int max_nesting;
387
+ bool allow_nan;
388
+ bool allow_trailing_comma;
389
+ bool parsing_name;
390
+ bool symbolize_names;
391
+ bool freeze;
392
+ bool create_additions;
393
+ bool deprecated_create_additions;
394
+ rvalue_cache name_cache;
395
+ rvalue_stack *stack;
396
+ VALUE stack_handle;
397
+ } JSON_Parser;
398
+
399
+ #define GET_PARSER \
400
+ GET_PARSER_INIT; \
401
+ if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
402
+
403
+ #define GET_PARSER_INIT \
404
+ JSON_Parser *json; \
405
+ TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
406
+
407
+ #define MinusInfinity "-Infinity"
408
+ #define EVIL 0x666
409
+
410
+ static const rb_data_type_t JSON_Parser_type;
411
+ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
412
+ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
413
+ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
414
+ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result);
415
+ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
416
+
417
+
418
+ #ifndef HAVE_STRNLEN
419
+ static size_t strnlen(const char *s, size_t maxlen)
420
+ {
421
+ char *p;
422
+ return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
423
+ }
424
+ #endif
425
+
426
+ #define PARSE_ERROR_FRAGMENT_LEN 32
427
+ #ifdef RBIMPL_ATTR_NORETURN
428
+ RBIMPL_ATTR_NORETURN()
429
+ #endif
430
+ static void raise_parse_error(const char *format, const char *start)
431
+ {
432
+ char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
433
+
434
+ size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
435
+ const char *ptr = start;
436
+
437
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
438
+ MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
439
+ buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
440
+ ptr = buffer;
441
+ }
442
+
443
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
444
+ }
92
445
 
93
- static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
94
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
95
- i_object_class, i_array_class, i_decimal_class, i_key_p,
96
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
97
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
98
446
 
99
447
  %%{
100
448
  machine JSON_common;
@@ -131,27 +479,25 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
131
479
  write data;
132
480
 
133
481
  action parse_value {
134
- VALUE v = Qnil;
135
- char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting);
482
+ char *np = JSON_parse_value(json, fpc, pe, result, current_nesting);
136
483
  if (np == NULL) {
137
484
  fhold; fbreak;
138
485
  } else {
139
- if (NIL_P(json->object_class)) {
140
- OBJ_FREEZE(last_name);
141
- rb_hash_aset(*result, last_name, v);
142
- } else {
143
- rb_funcall(*result, i_aset, 2, last_name, v);
144
- }
145
486
  fexec np;
146
487
  }
147
488
  }
148
489
 
490
+ action allow_trailing_comma { json->allow_trailing_comma }
491
+
149
492
  action parse_name {
150
493
  char *np;
151
- json->parsing_name = 1;
152
- np = JSON_parse_string(json, fpc, pe, &last_name);
153
- json->parsing_name = 0;
154
- if (np == NULL) { fhold; fbreak; } else fexec np;
494
+ json->parsing_name = true;
495
+ np = JSON_parse_string(json, fpc, pe, result);
496
+ json->parsing_name = false;
497
+ if (np == NULL) { fhold; fbreak; } else {
498
+ PUSH(*result);
499
+ fexec np;
500
+ }
155
501
  }
156
502
 
157
503
  action exit { fhold; fbreak; }
@@ -161,37 +507,64 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
161
507
 
162
508
  main := (
163
509
  begin_object
164
- (pair (next_pair)*)? ignore*
510
+ (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore*
165
511
  end_object
166
512
  ) @exit;
167
513
  }%%
168
514
 
515
+ #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack)
516
+
169
517
  static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
170
518
  {
171
519
  int cs = EVIL;
172
- VALUE last_name = Qnil;
173
- VALUE object_class = json->object_class;
174
520
 
175
521
  if (json->max_nesting && current_nesting > json->max_nesting) {
176
522
  rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
177
523
  }
178
524
 
179
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
525
+ long stack_head = json->stack->head;
180
526
 
181
527
  %% write init;
182
528
  %% write exec;
183
529
 
184
530
  if (cs >= JSON_object_first_final) {
185
- if (json->create_additions) {
531
+ long count = json->stack->head - stack_head;
532
+
533
+ if (RB_UNLIKELY(json->object_class)) {
534
+ VALUE object = rb_class_new_instance(0, 0, json->object_class);
535
+ long index = 0;
536
+ VALUE *items = rvalue_stack_peek(json->stack, count);
537
+ while (index < count) {
538
+ VALUE name = items[index++];
539
+ VALUE value = items[index++];
540
+ rb_funcall(object, i_aset, 2, name, value);
541
+ }
542
+ *result = object;
543
+ } else {
544
+ VALUE hash;
545
+ #ifdef HAVE_RB_HASH_NEW_CAPA
546
+ hash = rb_hash_new_capa(count >> 1);
547
+ #else
548
+ hash = rb_hash_new();
549
+ #endif
550
+ rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash);
551
+ *result = hash;
552
+ }
553
+ rvalue_stack_pop(json->stack, count);
554
+
555
+ if (RB_UNLIKELY(json->create_additions)) {
186
556
  VALUE klassname;
187
- if (NIL_P(json->object_class)) {
188
- klassname = rb_hash_aref(*result, json->create_id);
557
+ if (json->object_class) {
558
+ klassname = rb_funcall(*result, i_aref, 1, json->create_id);
189
559
  } else {
190
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
560
+ klassname = rb_hash_aref(*result, json->create_id);
191
561
  }
192
562
  if (!NIL_P(klassname)) {
193
563
  VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
194
564
  if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
565
+ if (json->deprecated_create_additions) {
566
+ json_deprecated(deprecated_create_additions_warning);
567
+ }
195
568
  *result = rb_funcall(klass, i_json_create, 1, *result);
196
569
  }
197
570
  }
@@ -202,7 +575,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
202
575
  }
203
576
  }
204
577
 
205
-
206
578
  %%{
207
579
  machine JSON_value;
208
580
  include JSON_common;
@@ -222,19 +594,24 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
222
594
  if (json->allow_nan) {
223
595
  *result = CNaN;
224
596
  } else {
225
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 2);
597
+ raise_parse_error("unexpected token at '%s'", p - 2);
226
598
  }
227
599
  }
228
600
  action parse_infinity {
229
601
  if (json->allow_nan) {
230
602
  *result = CInfinity;
231
603
  } else {
232
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 7);
604
+ raise_parse_error("unexpected token at '%s'", p - 7);
233
605
  }
234
606
  }
235
607
  action parse_string {
236
608
  char *np = JSON_parse_string(json, fpc, pe, result);
237
- if (np == NULL) { fhold; fbreak; } else fexec np;
609
+ if (np == NULL) {
610
+ fhold;
611
+ fbreak;
612
+ } else {
613
+ fexec np;
614
+ }
238
615
  }
239
616
 
240
617
  action parse_number {
@@ -245,19 +622,21 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
245
622
  fexec p + 10;
246
623
  fhold; fbreak;
247
624
  } else {
248
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
625
+ raise_parse_error("unexpected token at '%s'", p);
249
626
  }
250
627
  }
251
- np = JSON_parse_float(json, fpc, pe, result);
252
- if (np != NULL) fexec np;
253
- np = JSON_parse_integer(json, fpc, pe, result);
254
- if (np != NULL) fexec np;
628
+ np = JSON_parse_number(json, fpc, pe, result);
629
+ if (np != NULL) {
630
+ fexec np;
631
+ }
255
632
  fhold; fbreak;
256
633
  }
257
634
 
258
635
  action parse_array {
259
636
  char *np;
637
+ json->in_array++;
260
638
  np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1);
639
+ json->in_array--;
261
640
  if (np == NULL) { fhold; fbreak; } else fexec np;
262
641
  }
263
642
 
@@ -275,10 +654,10 @@ main := ignore* (
275
654
  Vtrue @parse_true |
276
655
  VNaN @parse_nan |
277
656
  VInfinity @parse_infinity |
278
- begin_number >parse_number |
279
- begin_string >parse_string |
280
- begin_array >parse_array |
281
- begin_object >parse_object
657
+ begin_number @parse_number |
658
+ begin_string @parse_string |
659
+ begin_array @parse_array |
660
+ begin_object @parse_object
282
661
  ) ignore* %*exit;
283
662
  }%%
284
663
 
@@ -294,6 +673,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
294
673
  }
295
674
 
296
675
  if (cs >= JSON_value_first_final) {
676
+ PUSH(*result);
297
677
  return p;
298
678
  } else {
299
679
  return NULL;
@@ -310,24 +690,40 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
310
690
  main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
311
691
  }%%
312
692
 
313
- static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
693
+ #define MAX_FAST_INTEGER_SIZE 18
694
+ static inline VALUE fast_parse_integer(char *p, char *pe)
314
695
  {
315
- int cs = EVIL;
696
+ bool negative = false;
697
+ if (*p == '-') {
698
+ negative = true;
699
+ p++;
700
+ }
316
701
 
317
- %% write init;
318
- json->memo = p;
319
- %% write exec;
702
+ long long memo = 0;
703
+ while (p < pe) {
704
+ memo *= 10;
705
+ memo += *p - '0';
706
+ p++;
707
+ }
708
+
709
+ if (negative) {
710
+ memo = -memo;
711
+ }
712
+ return LL2NUM(memo);
713
+ }
320
714
 
321
- if (cs >= JSON_integer_first_final) {
715
+ static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result)
716
+ {
322
717
  long len = p - json->memo;
323
- fbuffer_clear(json->fbuffer);
324
- fbuffer_append(json->fbuffer, json->memo, len);
325
- fbuffer_append_char(json->fbuffer, '\0');
326
- *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
718
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
719
+ *result = fast_parse_integer(json->memo, p);
720
+ } else {
721
+ fbuffer_clear(&json->fbuffer);
722
+ fbuffer_append(&json->fbuffer, json->memo, len);
723
+ fbuffer_append_char(&json->fbuffer, '\0');
724
+ *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10);
725
+ }
327
726
  return p + 1;
328
- } else {
329
- return NULL;
330
- }
331
727
  }
332
728
 
333
729
  %%{
@@ -337,60 +733,68 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
337
733
  write data;
338
734
 
339
735
  action exit { fhold; fbreak; }
736
+ action isFloat { is_float = true; }
340
737
 
341
738
  main := '-'? (
342
- (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
343
- | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
344
- ) (^[0-9Ee.\-]? @exit );
739
+ (('0' | [1-9][0-9]*)
740
+ ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) |
741
+ ([Ee] [+\-]?[0-9]+)) > isFloat)?
742
+ ) (^[0-9Ee.\-]? @exit ));
345
743
  }%%
346
744
 
347
- static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
745
+ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result)
348
746
  {
349
747
  int cs = EVIL;
748
+ bool is_float = false;
350
749
 
351
750
  %% write init;
352
751
  json->memo = p;
353
752
  %% write exec;
354
753
 
355
754
  if (cs >= JSON_float_first_final) {
755
+ if (!is_float) {
756
+ return JSON_decode_integer(json, p, result);
757
+ }
356
758
  VALUE mod = Qnil;
357
759
  ID method_id = 0;
358
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
359
- mod = json->decimal_class;
360
- method_id = i_try_convert;
361
- } else if (rb_respond_to(json->decimal_class, i_new)) {
362
- mod = json->decimal_class;
363
- method_id = i_new;
364
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
365
- VALUE name = rb_class_name(json->decimal_class);
366
- const char *name_cstr = RSTRING_PTR(name);
367
- const char *last_colon = strrchr(name_cstr, ':');
368
- if (last_colon) {
369
- const char *mod_path_end = last_colon - 1;
370
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
371
- mod = rb_path_to_class(mod_path);
372
-
373
- const char *method_name_beg = last_colon + 1;
374
- long before_len = method_name_beg - name_cstr;
375
- long len = RSTRING_LEN(name) - before_len;
376
- VALUE method_name = rb_str_substr(name, before_len, len);
377
- method_id = SYM2ID(rb_str_intern(method_name));
378
- } else {
379
- mod = rb_mKernel;
380
- method_id = SYM2ID(rb_str_intern(name));
760
+ if (json->decimal_class) {
761
+ if (rb_respond_to(json->decimal_class, i_try_convert)) {
762
+ mod = json->decimal_class;
763
+ method_id = i_try_convert;
764
+ } else if (rb_respond_to(json->decimal_class, i_new)) {
765
+ mod = json->decimal_class;
766
+ method_id = i_new;
767
+ } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
768
+ VALUE name = rb_class_name(json->decimal_class);
769
+ const char *name_cstr = RSTRING_PTR(name);
770
+ const char *last_colon = strrchr(name_cstr, ':');
771
+ if (last_colon) {
772
+ const char *mod_path_end = last_colon - 1;
773
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
774
+ mod = rb_path_to_class(mod_path);
775
+
776
+ const char *method_name_beg = last_colon + 1;
777
+ long before_len = method_name_beg - name_cstr;
778
+ long len = RSTRING_LEN(name) - before_len;
779
+ VALUE method_name = rb_str_substr(name, before_len, len);
780
+ method_id = SYM2ID(rb_str_intern(method_name));
781
+ } else {
782
+ mod = rb_mKernel;
783
+ method_id = SYM2ID(rb_str_intern(name));
784
+ }
381
785
  }
382
786
  }
383
787
 
384
788
  long len = p - json->memo;
385
- fbuffer_clear(json->fbuffer);
386
- fbuffer_append(json->fbuffer, json->memo, len);
387
- fbuffer_append_char(json->fbuffer, '\0');
789
+ fbuffer_clear(&json->fbuffer);
790
+ fbuffer_append(&json->fbuffer, json->memo, len);
791
+ fbuffer_append_char(&json->fbuffer, '\0');
388
792
 
389
793
  if (method_id) {
390
- VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
794
+ VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer));
391
795
  *result = rb_funcallv(mod, method_id, 1, &text);
392
796
  } else {
393
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
797
+ *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1));
394
798
  }
395
799
 
396
800
  return p + 1;
@@ -412,69 +816,133 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
412
816
  if (np == NULL) {
413
817
  fhold; fbreak;
414
818
  } else {
415
- if (NIL_P(json->array_class)) {
416
- rb_ary_push(*result, v);
417
- } else {
418
- rb_funcall(*result, i_leftshift, 1, v);
419
- }
420
819
  fexec np;
421
820
  }
422
821
  }
423
822
 
823
+ action allow_trailing_comma { json->allow_trailing_comma }
824
+
424
825
  action exit { fhold; fbreak; }
425
826
 
426
827
  next_element = value_separator ignore* begin_value >parse_value;
427
828
 
428
829
  main := begin_array ignore*
429
830
  ((begin_value >parse_value ignore*)
430
- (ignore* next_element ignore*)*)?
831
+ (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)?
431
832
  end_array @exit;
432
833
  }%%
433
834
 
434
835
  static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
435
836
  {
436
837
  int cs = EVIL;
437
- VALUE array_class = json->array_class;
438
838
 
439
839
  if (json->max_nesting && current_nesting > json->max_nesting) {
440
840
  rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
441
841
  }
442
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
842
+ long stack_head = json->stack->head;
443
843
 
444
844
  %% write init;
445
845
  %% write exec;
446
846
 
447
847
  if(cs >= JSON_array_first_final) {
848
+ long count = json->stack->head - stack_head;
849
+
850
+ if (RB_UNLIKELY(json->array_class)) {
851
+ VALUE array = rb_class_new_instance(0, 0, json->array_class);
852
+ VALUE *items = rvalue_stack_peek(json->stack, count);
853
+ long index;
854
+ for (index = 0; index < count; index++) {
855
+ rb_funcall(array, i_leftshift, 1, items[index]);
856
+ }
857
+ *result = array;
858
+ } else {
859
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count));
860
+ *result = array;
861
+ }
862
+ rvalue_stack_pop(json->stack, count);
863
+
448
864
  return p + 1;
449
865
  } else {
450
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
866
+ raise_parse_error("unexpected token at '%s'", p);
451
867
  return NULL;
452
868
  }
453
869
  }
454
870
 
455
- static const size_t MAX_STACK_BUFFER_SIZE = 128;
456
- static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
871
+ static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
872
+ {
873
+ if (symbolize) {
874
+ intern = true;
875
+ }
876
+ VALUE result;
877
+ # ifdef HAVE_RB_ENC_INTERNED_STR
878
+ if (intern) {
879
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
880
+ } else {
881
+ result = rb_utf8_str_new(start, (long)(end - start));
882
+ }
883
+ # else
884
+ result = rb_utf8_str_new(start, (long)(end - start));
885
+ if (intern) {
886
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
887
+ }
888
+ # endif
889
+
890
+ if (symbolize) {
891
+ result = rb_str_intern(result);
892
+ }
893
+
894
+ return result;
895
+ }
896
+
897
+ static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
898
+ {
899
+ size_t bufferSize = stringEnd - string;
900
+
901
+ if (is_name && json->in_array) {
902
+ VALUE cached_key;
903
+ if (RB_UNLIKELY(symbolize)) {
904
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
905
+ } else {
906
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
907
+ }
908
+
909
+ if (RB_LIKELY(cached_key)) {
910
+ return cached_key;
911
+ }
912
+ }
913
+
914
+ return build_string(string, stringEnd, intern, symbolize);
915
+ }
916
+
917
+ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
457
918
  {
458
- VALUE result = Qnil;
459
919
  size_t bufferSize = stringEnd - string;
460
920
  char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
461
921
  int unescape_len;
462
922
  char buf[4];
463
923
 
464
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
465
- # ifdef HAVE_RB_ENC_INTERNED_STR
466
- bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
467
- # else
468
- bufferStart = buffer = ALLOC_N(char, bufferSize);
469
- # endif
470
- } else {
471
- # ifdef HAVE_RB_ENC_INTERNED_STR
472
- bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1);
473
- # else
474
- bufferStart = buffer = ALLOCA_N(char, bufferSize);
475
- # endif
924
+ if (is_name && json->in_array) {
925
+ VALUE cached_key;
926
+ if (RB_UNLIKELY(symbolize)) {
927
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
928
+ } else {
929
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
930
+ }
931
+
932
+ if (RB_LIKELY(cached_key)) {
933
+ return cached_key;
934
+ }
935
+ }
936
+
937
+ pe = memchr(p, '\\', bufferSize);
938
+ if (RB_UNLIKELY(pe == NULL)) {
939
+ return build_string(string, stringEnd, intern, symbolize);
476
940
  }
477
941
 
942
+ VALUE result = rb_str_buf_new(bufferSize);
943
+ rb_enc_associate_index(result, utf8_encindex);
944
+ buffer = bufferStart = RSTRING_PTR(result);
945
+
478
946
  while (pe < stringEnd) {
479
947
  if (*pe == '\\') {
480
948
  unescape = (char *) "?";
@@ -507,29 +975,27 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
507
975
  break;
508
976
  case 'u':
509
977
  if (pe > stringEnd - 4) {
510
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
511
- ruby_xfree(bufferStart);
512
- }
513
- rb_enc_raise(
514
- EXC_ENCODING eParserError,
515
- "incomplete unicode character escape sequence at '%s'", p
516
- );
978
+ raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
517
979
  } else {
518
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
980
+ uint32_t ch = unescape_unicode((unsigned char *) ++pe);
519
981
  pe += 3;
520
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
982
+ /* To handle values above U+FFFF, we take a sequence of
983
+ * \uXXXX escapes in the U+D800..U+DBFF then
984
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
985
+ * to make a 20-bit number, then add 0x10000 to get the
986
+ * final codepoint.
987
+ *
988
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
989
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
990
+ * Area".
991
+ */
992
+ if ((ch & 0xFC00) == 0xD800) {
521
993
  pe++;
522
994
  if (pe > stringEnd - 6) {
523
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
524
- ruby_xfree(bufferStart);
525
- }
526
- rb_enc_raise(
527
- EXC_ENCODING eParserError,
528
- "incomplete surrogate pair at '%s'", p
529
- );
995
+ raise_parse_error("incomplete surrogate pair at '%s'", p);
530
996
  }
531
997
  if (pe[0] == '\\' && pe[1] == 'u') {
532
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
998
+ uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
533
999
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
534
1000
  | (sur & 0x3FF));
535
1001
  pe += 5;
@@ -558,41 +1024,12 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
558
1024
  MEMCPY(buffer, p, char, pe - p);
559
1025
  buffer += pe - p;
560
1026
  }
561
-
562
- # ifdef HAVE_RB_ENC_INTERNED_STR
563
- if (intern) {
564
- result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
565
- } else {
566
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
567
- }
568
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
569
- ruby_xfree(bufferStart);
570
- }
571
- # else
572
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
573
-
574
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
575
- ruby_xfree(bufferStart);
576
- }
577
-
578
- if (intern) {
579
- # if STR_UMINUS_DEDUPE_FROZEN
580
- // Starting from MRI 2.8 it is preferable to freeze the string
581
- // before deduplication so that it can be interned directly
582
- // otherwise it would be duplicated first which is wasteful.
583
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
584
- # elif STR_UMINUS_DEDUPE
585
- // MRI 2.5 and older do not deduplicate strings that are already
586
- // frozen.
587
- result = rb_funcall(result, i_uminus, 0);
588
- # else
589
- result = rb_str_freeze(result);
590
- # endif
591
- }
592
- # endif
1027
+ rb_str_set_len(result, buffer - bufferStart);
593
1028
 
594
1029
  if (symbolize) {
595
- result = rb_str_intern(result);
1030
+ result = rb_str_intern(result);
1031
+ } else if (intern) {
1032
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
596
1033
  }
597
1034
 
598
1035
  return result;
@@ -604,19 +1041,31 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
604
1041
 
605
1042
  write data;
606
1043
 
607
- action parse_string {
608
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
609
- if (NIL_P(*result)) {
610
- fhold;
611
- fbreak;
612
- } else {
613
- fexec p + 1;
614
- }
1044
+ action parse_complex_string {
1045
+ *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1046
+ fexec p + 1;
1047
+ fhold;
1048
+ fbreak;
615
1049
  }
616
1050
 
617
- action exit { fhold; fbreak; }
1051
+ action parse_simple_string {
1052
+ *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1053
+ fexec p + 1;
1054
+ fhold;
1055
+ fbreak;
1056
+ }
618
1057
 
619
- main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
1058
+ double_quote = '"';
1059
+ escape = '\\';
1060
+ control = 0..0x1f;
1061
+ simple = any - escape - double_quote - control;
1062
+
1063
+ main := double_quote (
1064
+ (simple*)(
1065
+ (double_quote) @parse_simple_string |
1066
+ ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string
1067
+ )
1068
+ );
620
1069
  }%%
621
1070
 
622
1071
  static int
@@ -672,18 +1121,80 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
672
1121
 
673
1122
  static VALUE convert_encoding(VALUE source)
674
1123
  {
675
- #ifdef HAVE_RUBY_ENCODING_H
676
- rb_encoding *enc = rb_enc_get(source);
677
- if (enc == rb_ascii8bit_encoding()) {
678
- if (OBJ_FROZEN(source)) {
679
- source = rb_str_dup(source);
680
- }
681
- FORCE_UTF8(source);
682
- } else {
683
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
684
- }
685
- #endif
1124
+ int encindex = RB_ENCODING_GET(source);
1125
+
1126
+ if (RB_LIKELY(encindex == utf8_encindex)) {
686
1127
  return source;
1128
+ }
1129
+
1130
+ if (encindex == binary_encindex) {
1131
+ // For historical reason, we silently reinterpret binary strings as UTF-8
1132
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1133
+ }
1134
+
1135
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
1136
+ }
1137
+
1138
+ static int configure_parser_i(VALUE key, VALUE val, VALUE data)
1139
+ {
1140
+ JSON_Parser *json = (JSON_Parser *)data;
1141
+
1142
+ if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1143
+ else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
1144
+ else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
1145
+ else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
1146
+ else if (key == sym_freeze) { json->freeze = RTEST(val); }
1147
+ else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
1148
+ else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
1149
+ else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
1150
+ else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
1151
+ else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
1152
+ else if (key == sym_create_additions) {
1153
+ if (NIL_P(val)) {
1154
+ json->create_additions = true;
1155
+ json->deprecated_create_additions = true;
1156
+ } else {
1157
+ json->create_additions = RTEST(val);
1158
+ json->deprecated_create_additions = false;
1159
+ }
1160
+ }
1161
+
1162
+ return ST_CONTINUE;
1163
+ }
1164
+
1165
+ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
1166
+ {
1167
+ if (json->Vsource) {
1168
+ rb_raise(rb_eTypeError, "already initialized instance");
1169
+ }
1170
+
1171
+ json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1172
+ json->max_nesting = 100;
1173
+
1174
+ if (!NIL_P(opts)) {
1175
+ Check_Type(opts, T_HASH);
1176
+ if (RHASH_SIZE(opts) > 0) {
1177
+ // We assume in most cases few keys are set so it's faster to go over
1178
+ // the provided keys than to check all possible keys.
1179
+ rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
1180
+
1181
+ if (json->symbolize_names && json->create_additions) {
1182
+ rb_raise(rb_eArgError,
1183
+ "options :symbolize_names and :create_additions cannot be "
1184
+ " used in conjunction");
1185
+ }
1186
+
1187
+ if (json->create_additions && !json->create_id) {
1188
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
1189
+ }
1190
+ }
1191
+
1192
+ }
1193
+ source = convert_encoding(StringValue(source));
1194
+ StringValue(source);
1195
+ json->len = RSTRING_LEN(source);
1196
+ json->source = RSTRING_PTR(source);
1197
+ json->Vsource = source;
687
1198
  }
688
1199
 
689
1200
  /*
@@ -708,105 +1219,23 @@ static VALUE convert_encoding(VALUE source)
708
1219
  * * *create_additions*: If set to false, the Parser doesn't create
709
1220
  * additions even if a matching class and create_id was found. This option
710
1221
  * defaults to false.
711
- * * *object_class*: Defaults to Hash
712
- * * *array_class*: Defaults to Array
1222
+ * * *object_class*: Defaults to Hash. If another type is provided, it will be used
1223
+ * instead of Hash to represent JSON objects. The type must respond to
1224
+ * +new+ without arguments, and return an object that respond to +[]=+.
1225
+ * * *array_class*: Defaults to Array If another type is provided, it will be used
1226
+ * instead of Hash to represent JSON arrays. The type must respond to
1227
+ * +new+ without arguments, and return an object that respond to +<<+.
1228
+ * * *decimal_class*: Specifies which class to use instead of the default
1229
+ * (Float) when parsing decimal numbers. This class must accept a single
1230
+ * string argument in its constructor.
713
1231
  */
714
1232
  static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
715
1233
  {
716
- VALUE source, opts;
717
1234
  GET_PARSER_INIT;
718
1235
 
719
- if (json->Vsource) {
720
- rb_raise(rb_eTypeError, "already initialized instance");
721
- }
722
- rb_scan_args(argc, argv, "1:", &source, &opts);
723
- if (!NIL_P(opts)) {
724
- VALUE tmp = ID2SYM(i_max_nesting);
725
- if (option_given_p(opts, tmp)) {
726
- VALUE max_nesting = rb_hash_aref(opts, tmp);
727
- if (RTEST(max_nesting)) {
728
- Check_Type(max_nesting, T_FIXNUM);
729
- json->max_nesting = FIX2INT(max_nesting);
730
- } else {
731
- json->max_nesting = 0;
732
- }
733
- } else {
734
- json->max_nesting = 100;
735
- }
736
- tmp = ID2SYM(i_allow_nan);
737
- if (option_given_p(opts, tmp)) {
738
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
739
- } else {
740
- json->allow_nan = 0;
741
- }
742
- tmp = ID2SYM(i_symbolize_names);
743
- if (option_given_p(opts, tmp)) {
744
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
745
- } else {
746
- json->symbolize_names = 0;
747
- }
748
- tmp = ID2SYM(i_freeze);
749
- if (option_given_p(opts, tmp)) {
750
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
751
- } else {
752
- json->freeze = 0;
753
- }
754
- tmp = ID2SYM(i_create_additions);
755
- if (option_given_p(opts, tmp)) {
756
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
757
- } else {
758
- json->create_additions = 0;
759
- }
760
- if (json->symbolize_names && json->create_additions) {
761
- rb_raise(rb_eArgError,
762
- "options :symbolize_names and :create_additions cannot be "
763
- " used in conjunction");
764
- }
765
- tmp = ID2SYM(i_create_id);
766
- if (option_given_p(opts, tmp)) {
767
- json->create_id = rb_hash_aref(opts, tmp);
768
- } else {
769
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
770
- }
771
- tmp = ID2SYM(i_object_class);
772
- if (option_given_p(opts, tmp)) {
773
- json->object_class = rb_hash_aref(opts, tmp);
774
- } else {
775
- json->object_class = Qnil;
776
- }
777
- tmp = ID2SYM(i_array_class);
778
- if (option_given_p(opts, tmp)) {
779
- json->array_class = rb_hash_aref(opts, tmp);
780
- } else {
781
- json->array_class = Qnil;
782
- }
783
- tmp = ID2SYM(i_decimal_class);
784
- if (option_given_p(opts, tmp)) {
785
- json->decimal_class = rb_hash_aref(opts, tmp);
786
- } else {
787
- json->decimal_class = Qnil;
788
- }
789
- tmp = ID2SYM(i_match_string);
790
- if (option_given_p(opts, tmp)) {
791
- VALUE match_string = rb_hash_aref(opts, tmp);
792
- json->match_string = RTEST(match_string) ? match_string : Qnil;
793
- } else {
794
- json->match_string = Qnil;
795
- }
796
- } else {
797
- json->max_nesting = 100;
798
- json->allow_nan = 0;
799
- json->create_additions = 0;
800
- json->create_id = Qnil;
801
- json->object_class = Qnil;
802
- json->array_class = Qnil;
803
- json->decimal_class = Qnil;
804
- }
805
- source = convert_encoding(StringValue(source));
806
- StringValue(source);
807
- json->len = RSTRING_LEN(source);
808
- json->source = RSTRING_PTR(source);;
809
- json->Vsource = source;
1236
+ rb_check_arity(argc, 1, 2);
1237
+
1238
+ parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil);
810
1239
  return self;
811
1240
  }
812
1241
 
@@ -836,64 +1265,119 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
836
1265
  */
837
1266
  static VALUE cParser_parse(VALUE self)
838
1267
  {
839
- char *p, *pe;
840
- int cs = EVIL;
841
- VALUE result = Qnil;
842
- GET_PARSER;
1268
+ char *p, *pe;
1269
+ int cs = EVIL;
1270
+ VALUE result = Qnil;
1271
+ GET_PARSER;
843
1272
 
844
- %% write init;
845
- p = json->source;
846
- pe = p + json->len;
847
- %% write exec;
1273
+ char stack_buffer[FBUFFER_STACK_SIZE];
1274
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
848
1275
 
849
- if (cs >= JSON_first_final && p == pe) {
850
- return result;
851
- } else {
852
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
853
- return Qnil;
854
- }
1276
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1277
+ rvalue_stack stack = {
1278
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1279
+ .ptr = rvalue_stack_buffer,
1280
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1281
+ };
1282
+ json->stack = &stack;
1283
+
1284
+ %% write init;
1285
+ p = json->source;
1286
+ pe = p + json->len;
1287
+ %% write exec;
1288
+
1289
+ if (json->stack_handle) {
1290
+ rvalue_stack_eagerly_release(json->stack_handle);
1291
+ }
1292
+
1293
+ if (cs >= JSON_first_final && p == pe) {
1294
+ return result;
1295
+ } else {
1296
+ raise_parse_error("unexpected token at '%s'", p);
1297
+ return Qnil;
1298
+ }
1299
+ }
1300
+
1301
+ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
1302
+ {
1303
+ char *p, *pe;
1304
+ int cs = EVIL;
1305
+ VALUE result = Qnil;
1306
+
1307
+ JSON_Parser _parser = {0};
1308
+ JSON_Parser *json = &_parser;
1309
+ parser_init(json, source, opts);
1310
+
1311
+ char stack_buffer[FBUFFER_STACK_SIZE];
1312
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
1313
+
1314
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1315
+ rvalue_stack stack = {
1316
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1317
+ .ptr = rvalue_stack_buffer,
1318
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1319
+ };
1320
+ json->stack = &stack;
1321
+
1322
+ %% write init;
1323
+ p = json->source;
1324
+ pe = p + json->len;
1325
+ %% write exec;
1326
+
1327
+ if (json->stack_handle) {
1328
+ rvalue_stack_eagerly_release(json->stack_handle);
1329
+ }
1330
+
1331
+ if (cs >= JSON_first_final && p == pe) {
1332
+ return result;
1333
+ } else {
1334
+ raise_parse_error("unexpected token at '%s'", p);
1335
+ return Qnil;
1336
+ }
855
1337
  }
856
1338
 
857
1339
  static void JSON_mark(void *ptr)
858
1340
  {
859
1341
  JSON_Parser *json = ptr;
860
- rb_gc_mark_maybe(json->Vsource);
861
- rb_gc_mark_maybe(json->create_id);
862
- rb_gc_mark_maybe(json->object_class);
863
- rb_gc_mark_maybe(json->array_class);
864
- rb_gc_mark_maybe(json->decimal_class);
865
- rb_gc_mark_maybe(json->match_string);
1342
+ rb_gc_mark(json->Vsource);
1343
+ rb_gc_mark(json->create_id);
1344
+ rb_gc_mark(json->object_class);
1345
+ rb_gc_mark(json->array_class);
1346
+ rb_gc_mark(json->decimal_class);
1347
+ rb_gc_mark(json->match_string);
1348
+ rb_gc_mark(json->stack_handle);
1349
+
1350
+ long index;
1351
+ for (index = 0; index < json->name_cache.length; index++) {
1352
+ rb_gc_mark(json->name_cache.entries[index]);
1353
+ }
866
1354
  }
867
1355
 
868
1356
  static void JSON_free(void *ptr)
869
1357
  {
870
1358
  JSON_Parser *json = ptr;
871
- fbuffer_free(json->fbuffer);
1359
+ fbuffer_free(&json->fbuffer);
872
1360
  ruby_xfree(json);
873
1361
  }
874
1362
 
875
1363
  static size_t JSON_memsize(const void *ptr)
876
1364
  {
877
1365
  const JSON_Parser *json = ptr;
878
- return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
1366
+ return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer);
879
1367
  }
880
1368
 
881
- #ifdef NEW_TYPEDDATA_WRAPPER
882
1369
  static const rb_data_type_t JSON_Parser_type = {
883
1370
  "JSON/Parser",
884
1371
  {JSON_mark, JSON_free, JSON_memsize,},
885
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
886
1372
  0, 0,
887
1373
  RUBY_TYPED_FREE_IMMEDIATELY,
888
- #endif
889
1374
  };
890
- #endif
891
1375
 
892
1376
  static VALUE cJSON_parser_s_allocate(VALUE klass)
893
1377
  {
894
1378
  JSON_Parser *json;
895
1379
  VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
896
- json->fbuffer = fbuffer_alloc(0);
1380
+ fbuffer_stack_init(&json->fbuffer, 0, NULL, 0);
897
1381
  return obj;
898
1382
  }
899
1383
 
@@ -920,15 +1404,15 @@ void Init_parser(void)
920
1404
  mJSON = rb_define_module("JSON");
921
1405
  mExt = rb_define_module_under(mJSON, "Ext");
922
1406
  cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
923
- eParserError = rb_path2class("JSON::ParserError");
924
1407
  eNestingError = rb_path2class("JSON::NestingError");
925
- rb_gc_register_mark_object(eParserError);
926
1408
  rb_gc_register_mark_object(eNestingError);
927
1409
  rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
928
1410
  rb_define_method(cParser, "initialize", cParser_initialize, -1);
929
1411
  rb_define_method(cParser, "parse", cParser_parse, 0);
930
1412
  rb_define_method(cParser, "source", cParser_source, 0);
931
1413
 
1414
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
1415
+
932
1416
  CNaN = rb_const_get(mJSON, rb_intern("NaN"));
933
1417
  rb_gc_register_mark_object(CNaN);
934
1418
 
@@ -938,28 +1422,38 @@ void Init_parser(void)
938
1422
  CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
939
1423
  rb_gc_register_mark_object(CMinusInfinity);
940
1424
 
1425
+ rb_global_variable(&Encoding_UTF_8);
1426
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1427
+
1428
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1429
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1430
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1431
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1432
+ sym_freeze = ID2SYM(rb_intern("freeze"));
1433
+ sym_create_additions = ID2SYM(rb_intern("create_additions"));
1434
+ sym_create_id = ID2SYM(rb_intern("create_id"));
1435
+ sym_object_class = ID2SYM(rb_intern("object_class"));
1436
+ sym_array_class = ID2SYM(rb_intern("array_class"));
1437
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1438
+ sym_match_string = ID2SYM(rb_intern("match_string"));
1439
+
1440
+ i_create_id = rb_intern("create_id");
941
1441
  i_json_creatable_p = rb_intern("json_creatable?");
942
1442
  i_json_create = rb_intern("json_create");
943
- i_create_id = rb_intern("create_id");
944
- i_create_additions = rb_intern("create_additions");
945
1443
  i_chr = rb_intern("chr");
946
- i_max_nesting = rb_intern("max_nesting");
947
- i_allow_nan = rb_intern("allow_nan");
948
- i_symbolize_names = rb_intern("symbolize_names");
949
- i_object_class = rb_intern("object_class");
950
- i_array_class = rb_intern("array_class");
951
- i_decimal_class = rb_intern("decimal_class");
952
1444
  i_match = rb_intern("match");
953
- i_match_string = rb_intern("match_string");
954
- i_key_p = rb_intern("key?");
955
1445
  i_deep_const_get = rb_intern("deep_const_get");
956
1446
  i_aset = rb_intern("[]=");
957
1447
  i_aref = rb_intern("[]");
958
1448
  i_leftshift = rb_intern("<<");
959
1449
  i_new = rb_intern("new");
960
1450
  i_try_convert = rb_intern("try_convert");
961
- i_freeze = rb_intern("freeze");
962
1451
  i_uminus = rb_intern("-@");
1452
+ i_encode = rb_intern("encode");
1453
+
1454
+ binary_encindex = rb_ascii8bit_encindex();
1455
+ utf8_encindex = rb_utf8_encindex();
1456
+ enc_utf8 = rb_utf8_encoding();
963
1457
  }
964
1458
 
965
1459
  /*