json 2.7.2 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,310 @@
1
+ #include "ruby.h"
1
2
  #include "../fbuffer/fbuffer.h"
2
- #include "parser.h"
3
3
 
4
- #if defined HAVE_RUBY_ENCODING_H
5
- # define EXC_ENCODING rb_utf8_encoding(),
6
- # ifndef HAVE_RB_ENC_RAISE
7
- static void
8
- enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
9
- {
10
- va_list args;
11
- VALUE mesg;
4
+ static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
5
+ static VALUE CNaN, CInfinity, CMinusInfinity;
12
6
 
13
- va_start(args, fmt);
14
- mesg = rb_enc_vsprintf(enc, fmt, args);
15
- va_end(args);
7
+ static ID i_json_creatable_p, i_json_create, i_create_id,
8
+ i_chr, i_deep_const_get, i_match, i_aset, i_aref,
9
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
16
10
 
17
- rb_exc_raise(rb_exc_new3(exc, mesg));
18
- }
19
- # define rb_enc_raise enc_raise
20
- # endif
11
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
12
+ sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
13
+ sym_decimal_class, sym_match_string;
14
+
15
+ static int binary_encindex;
16
+ static int utf8_encindex;
17
+
18
+ #ifdef HAVE_RB_CATEGORY_WARN
19
+ # define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message)
21
20
  #else
22
- # define EXC_ENCODING /* nothing */
23
- # define rb_enc_raise rb_raise
21
+ # define json_deprecated(message) rb_warn(message)
22
+ #endif
23
+
24
+ static const char deprecated_create_additions_warning[] =
25
+ "JSON.load implicit support for `create_additions: true` is deprecated "
26
+ "and will be removed in 3.0, use JSON.unsafe_load or explicitly "
27
+ "pass `create_additions: true`";
28
+
29
+ #ifndef HAVE_RB_HASH_BULK_INSERT
30
+ // For TruffleRuby
31
+ void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
32
+ {
33
+ long index = 0;
34
+ while (index < count) {
35
+ VALUE name = pairs[index++];
36
+ VALUE value = pairs[index++];
37
+ rb_hash_aset(hash, name, value);
38
+ }
39
+ RB_GC_GUARD(hash);
40
+ }
24
41
  #endif
25
42
 
43
+ /* name cache */
44
+
45
+ #include <string.h>
46
+ #include <ctype.h>
47
+
48
+ // Object names are likely to be repeated, and are frozen.
49
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
50
+ // and save much more expensive lookups into the global fstring table.
51
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
52
+ // to be able to fit safely on the stack.
53
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
54
+ // performance.
55
+ #define JSON_RVALUE_CACHE_CAPA 63
56
+ typedef struct rvalue_cache_struct {
57
+ int length;
58
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
59
+ } rvalue_cache;
60
+
61
+ static rb_encoding *enc_utf8;
62
+
63
+ #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
64
+
65
+ static inline VALUE build_interned_string(const char *str, const long length)
66
+ {
67
+ # ifdef HAVE_RB_ENC_INTERNED_STR
68
+ return rb_enc_interned_str(str, length, enc_utf8);
69
+ # else
70
+ VALUE rstring = rb_utf8_str_new(str, length);
71
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
72
+ # endif
73
+ }
74
+
75
+ static inline VALUE build_symbol(const char *str, const long length)
76
+ {
77
+ return rb_str_intern(build_interned_string(str, length));
78
+ }
79
+
80
+ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
81
+ {
82
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
83
+ cache->length++;
84
+ cache->entries[index] = rstring;
85
+ }
86
+
87
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
88
+ {
89
+ long rstring_length = RSTRING_LEN(rstring);
90
+ if (length == rstring_length) {
91
+ return memcmp(str, RSTRING_PTR(rstring), length);
92
+ } else {
93
+ return (int)(length - rstring_length);
94
+ }
95
+ }
96
+
97
+ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
98
+ {
99
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
100
+ // Common names aren't likely to be very long. So we just don't
101
+ // cache names above an arbitrary threshold.
102
+ return Qfalse;
103
+ }
104
+
105
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
106
+ // Simple heuristic, if the first character isn't a letter,
107
+ // we're much less likely to see this string again.
108
+ // We mostly want to cache strings that are likely to be repeated.
109
+ return Qfalse;
110
+ }
111
+
112
+ int low = 0;
113
+ int high = cache->length - 1;
114
+ int mid = 0;
115
+ int last_cmp = 0;
116
+
117
+ while (low <= high) {
118
+ mid = (high + low) >> 1;
119
+ VALUE entry = cache->entries[mid];
120
+ last_cmp = rstring_cache_cmp(str, length, entry);
121
+
122
+ if (last_cmp == 0) {
123
+ return entry;
124
+ } else if (last_cmp > 0) {
125
+ low = mid + 1;
126
+ } else {
127
+ high = mid - 1;
128
+ }
129
+ }
130
+
131
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
132
+ // We assume the overwhelming majority of names don't need to be escaped.
133
+ // But if they do, we have to fallback to the slow path.
134
+ return Qfalse;
135
+ }
136
+
137
+ VALUE rstring = build_interned_string(str, length);
138
+
139
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
140
+ if (last_cmp > 0) {
141
+ mid += 1;
142
+ }
143
+
144
+ rvalue_cache_insert_at(cache, mid, rstring);
145
+ }
146
+ return rstring;
147
+ }
148
+
149
+ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
150
+ {
151
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
152
+ // Common names aren't likely to be very long. So we just don't
153
+ // cache names above an arbitrary threshold.
154
+ return Qfalse;
155
+ }
156
+
157
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
158
+ // Simple heuristic, if the first character isn't a letter,
159
+ // we're much less likely to see this string again.
160
+ // We mostly want to cache strings that are likely to be repeated.
161
+ return Qfalse;
162
+ }
163
+
164
+ int low = 0;
165
+ int high = cache->length - 1;
166
+ int mid = 0;
167
+ int last_cmp = 0;
168
+
169
+ while (low <= high) {
170
+ mid = (high + low) >> 1;
171
+ VALUE entry = cache->entries[mid];
172
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
173
+
174
+ if (last_cmp == 0) {
175
+ return entry;
176
+ } else if (last_cmp > 0) {
177
+ low = mid + 1;
178
+ } else {
179
+ high = mid - 1;
180
+ }
181
+ }
182
+
183
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
184
+ // We assume the overwhelming majority of names don't need to be escaped.
185
+ // But if they do, we have to fallback to the slow path.
186
+ return Qfalse;
187
+ }
188
+
189
+ VALUE rsymbol = build_symbol(str, length);
190
+
191
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
192
+ if (last_cmp > 0) {
193
+ mid += 1;
194
+ }
195
+
196
+ rvalue_cache_insert_at(cache, mid, rsymbol);
197
+ }
198
+ return rsymbol;
199
+ }
200
+
201
+ /* rvalue stack */
202
+
203
+ #define RVALUE_STACK_INITIAL_CAPA 128
204
+
205
+ enum rvalue_stack_type {
206
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
207
+ RVALUE_STACK_STACK_ALLOCATED = 1,
208
+ };
209
+
210
+ typedef struct rvalue_stack_struct {
211
+ enum rvalue_stack_type type;
212
+ long capa;
213
+ long head;
214
+ VALUE *ptr;
215
+ } rvalue_stack;
216
+
217
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
218
+
219
+ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
220
+ {
221
+ long required = stack->capa * 2;
222
+
223
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
224
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
225
+ } else {
226
+ REALLOC_N(stack->ptr, VALUE, required);
227
+ stack->capa = required;
228
+ }
229
+ return stack;
230
+ }
231
+
232
+ static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
233
+ {
234
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
235
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
236
+ }
237
+ stack->ptr[stack->head] = value;
238
+ stack->head++;
239
+ }
240
+
241
+ static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
242
+ {
243
+ return stack->ptr + (stack->head - count);
244
+ }
245
+
246
+ static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
247
+ {
248
+ stack->head -= count;
249
+ }
250
+
251
+ static void rvalue_stack_mark(void *ptr)
252
+ {
253
+ rvalue_stack *stack = (rvalue_stack *)ptr;
254
+ long index;
255
+ for (index = 0; index < stack->head; index++) {
256
+ rb_gc_mark(stack->ptr[index]);
257
+ }
258
+ }
259
+
260
+ static void rvalue_stack_free(void *ptr)
261
+ {
262
+ rvalue_stack *stack = (rvalue_stack *)ptr;
263
+ if (stack) {
264
+ ruby_xfree(stack->ptr);
265
+ ruby_xfree(stack);
266
+ }
267
+ }
268
+
269
+ static size_t rvalue_stack_memsize(const void *ptr)
270
+ {
271
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
272
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
273
+ }
274
+
275
+ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
276
+ "JSON::Ext::Parser/rvalue_stack",
277
+ {
278
+ .dmark = rvalue_stack_mark,
279
+ .dfree = rvalue_stack_free,
280
+ .dsize = rvalue_stack_memsize,
281
+ },
282
+ 0, 0,
283
+ RUBY_TYPED_FREE_IMMEDIATELY,
284
+ };
285
+
286
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
287
+ {
288
+ rvalue_stack *stack;
289
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
290
+ *stack_ref = stack;
291
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
292
+
293
+ stack->capa = old_stack->capa << 1;
294
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
295
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
296
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
297
+ return stack;
298
+ }
299
+
300
+ static void rvalue_stack_eagerly_release(VALUE handle)
301
+ {
302
+ rvalue_stack *stack;
303
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
304
+ RTYPEDDATA_DATA(handle) = NULL;
305
+ rvalue_stack_free(stack);
306
+ }
307
+
26
308
  /* unicode */
27
309
 
28
310
  static const signed char digit_values[256] = {
@@ -42,26 +324,28 @@ static const signed char digit_values[256] = {
42
324
  -1, -1, -1, -1, -1, -1, -1
43
325
  };
44
326
 
45
- static UTF32 unescape_unicode(const unsigned char *p)
327
+ static uint32_t unescape_unicode(const unsigned char *p)
46
328
  {
329
+ const uint32_t replacement_char = 0xFFFD;
330
+
47
331
  signed char b;
48
- UTF32 result = 0;
332
+ uint32_t result = 0;
49
333
  b = digit_values[p[0]];
50
- if (b < 0) return UNI_REPLACEMENT_CHAR;
334
+ if (b < 0) return replacement_char;
51
335
  result = (result << 4) | (unsigned char)b;
52
336
  b = digit_values[p[1]];
53
- if (b < 0) return UNI_REPLACEMENT_CHAR;
337
+ if (b < 0) return replacement_char;
54
338
  result = (result << 4) | (unsigned char)b;
55
339
  b = digit_values[p[2]];
56
- if (b < 0) return UNI_REPLACEMENT_CHAR;
340
+ if (b < 0) return replacement_char;
57
341
  result = (result << 4) | (unsigned char)b;
58
342
  b = digit_values[p[3]];
59
- if (b < 0) return UNI_REPLACEMENT_CHAR;
343
+ if (b < 0) return replacement_char;
60
344
  result = (result << 4) | (unsigned char)b;
61
345
  return result;
62
346
  }
63
347
 
64
- static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
348
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
65
349
  {
66
350
  int len = 1;
67
351
  if (ch <= 0x7F) {
@@ -87,14 +371,70 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
87
371
  return len;
88
372
  }
89
373
 
90
- static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
91
- static VALUE CNaN, CInfinity, CMinusInfinity;
374
+ typedef struct JSON_ParserStruct {
375
+ VALUE Vsource;
376
+ char *source;
377
+ long len;
378
+ char *memo;
379
+ VALUE create_id;
380
+ VALUE object_class;
381
+ VALUE array_class;
382
+ VALUE decimal_class;
383
+ VALUE match_string;
384
+ FBuffer fbuffer;
385
+ int in_array;
386
+ int max_nesting;
387
+ bool allow_nan;
388
+ bool allow_trailing_comma;
389
+ bool parsing_name;
390
+ bool symbolize_names;
391
+ bool freeze;
392
+ bool create_additions;
393
+ bool deprecated_create_additions;
394
+ rvalue_cache name_cache;
395
+ rvalue_stack *stack;
396
+ VALUE stack_handle;
397
+ } JSON_Parser;
398
+
399
+ #define GET_PARSER \
400
+ GET_PARSER_INIT; \
401
+ if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
402
+
403
+ #define GET_PARSER_INIT \
404
+ JSON_Parser *json; \
405
+ TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
406
+
407
+ #define MinusInfinity "-Infinity"
408
+ #define EVIL 0x666
409
+
410
+ static const rb_data_type_t JSON_Parser_type;
411
+ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
412
+ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
413
+ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
414
+ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result);
415
+ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
416
+
417
+
418
+ #define PARSE_ERROR_FRAGMENT_LEN 32
419
+ #ifdef RBIMPL_ATTR_NORETURN
420
+ RBIMPL_ATTR_NORETURN()
421
+ #endif
422
+ static void raise_parse_error(const char *format, const char *start)
423
+ {
424
+ char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
425
+
426
+ size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
427
+ const char *ptr = start;
428
+
429
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
430
+ MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
431
+ buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
432
+ ptr = buffer;
433
+ }
434
+
435
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
436
+ }
92
437
 
93
- static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
94
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
95
- i_object_class, i_array_class, i_decimal_class, i_key_p,
96
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
97
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
98
438
 
99
439
  %%{
100
440
  machine JSON_common;
@@ -131,27 +471,25 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
131
471
  write data;
132
472
 
133
473
  action parse_value {
134
- VALUE v = Qnil;
135
- char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting);
474
+ char *np = JSON_parse_value(json, fpc, pe, result, current_nesting);
136
475
  if (np == NULL) {
137
476
  fhold; fbreak;
138
477
  } else {
139
- if (NIL_P(json->object_class)) {
140
- OBJ_FREEZE(last_name);
141
- rb_hash_aset(*result, last_name, v);
142
- } else {
143
- rb_funcall(*result, i_aset, 2, last_name, v);
144
- }
145
478
  fexec np;
146
479
  }
147
480
  }
148
481
 
482
+ action allow_trailing_comma { json->allow_trailing_comma }
483
+
149
484
  action parse_name {
150
485
  char *np;
151
- json->parsing_name = 1;
152
- np = JSON_parse_string(json, fpc, pe, &last_name);
153
- json->parsing_name = 0;
154
- if (np == NULL) { fhold; fbreak; } else fexec np;
486
+ json->parsing_name = true;
487
+ np = JSON_parse_string(json, fpc, pe, result);
488
+ json->parsing_name = false;
489
+ if (np == NULL) { fhold; fbreak; } else {
490
+ PUSH(*result);
491
+ fexec np;
492
+ }
155
493
  }
156
494
 
157
495
  action exit { fhold; fbreak; }
@@ -161,37 +499,64 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
161
499
 
162
500
  main := (
163
501
  begin_object
164
- (pair (next_pair)*)? ignore*
502
+ (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore*
165
503
  end_object
166
504
  ) @exit;
167
505
  }%%
168
506
 
507
+ #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack)
508
+
169
509
  static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
170
510
  {
171
511
  int cs = EVIL;
172
- VALUE last_name = Qnil;
173
- VALUE object_class = json->object_class;
174
512
 
175
513
  if (json->max_nesting && current_nesting > json->max_nesting) {
176
514
  rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
177
515
  }
178
516
 
179
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
517
+ long stack_head = json->stack->head;
180
518
 
181
519
  %% write init;
182
520
  %% write exec;
183
521
 
184
522
  if (cs >= JSON_object_first_final) {
185
- if (json->create_additions) {
523
+ long count = json->stack->head - stack_head;
524
+
525
+ if (RB_UNLIKELY(json->object_class)) {
526
+ VALUE object = rb_class_new_instance(0, 0, json->object_class);
527
+ long index = 0;
528
+ VALUE *items = rvalue_stack_peek(json->stack, count);
529
+ while (index < count) {
530
+ VALUE name = items[index++];
531
+ VALUE value = items[index++];
532
+ rb_funcall(object, i_aset, 2, name, value);
533
+ }
534
+ *result = object;
535
+ } else {
536
+ VALUE hash;
537
+ #ifdef HAVE_RB_HASH_NEW_CAPA
538
+ hash = rb_hash_new_capa(count >> 1);
539
+ #else
540
+ hash = rb_hash_new();
541
+ #endif
542
+ rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash);
543
+ *result = hash;
544
+ }
545
+ rvalue_stack_pop(json->stack, count);
546
+
547
+ if (RB_UNLIKELY(json->create_additions)) {
186
548
  VALUE klassname;
187
- if (NIL_P(json->object_class)) {
188
- klassname = rb_hash_aref(*result, json->create_id);
549
+ if (json->object_class) {
550
+ klassname = rb_funcall(*result, i_aref, 1, json->create_id);
189
551
  } else {
190
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
552
+ klassname = rb_hash_aref(*result, json->create_id);
191
553
  }
192
554
  if (!NIL_P(klassname)) {
193
555
  VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
194
556
  if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
557
+ if (json->deprecated_create_additions) {
558
+ json_deprecated(deprecated_create_additions_warning);
559
+ }
195
560
  *result = rb_funcall(klass, i_json_create, 1, *result);
196
561
  }
197
562
  }
@@ -202,7 +567,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
202
567
  }
203
568
  }
204
569
 
205
-
206
570
  %%{
207
571
  machine JSON_value;
208
572
  include JSON_common;
@@ -222,19 +586,24 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
222
586
  if (json->allow_nan) {
223
587
  *result = CNaN;
224
588
  } else {
225
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 2);
589
+ raise_parse_error("unexpected token at '%s'", p - 2);
226
590
  }
227
591
  }
228
592
  action parse_infinity {
229
593
  if (json->allow_nan) {
230
594
  *result = CInfinity;
231
595
  } else {
232
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 7);
596
+ raise_parse_error("unexpected token at '%s'", p - 7);
233
597
  }
234
598
  }
235
599
  action parse_string {
236
600
  char *np = JSON_parse_string(json, fpc, pe, result);
237
- if (np == NULL) { fhold; fbreak; } else fexec np;
601
+ if (np == NULL) {
602
+ fhold;
603
+ fbreak;
604
+ } else {
605
+ fexec np;
606
+ }
238
607
  }
239
608
 
240
609
  action parse_number {
@@ -245,19 +614,21 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
245
614
  fexec p + 10;
246
615
  fhold; fbreak;
247
616
  } else {
248
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
617
+ raise_parse_error("unexpected token at '%s'", p);
249
618
  }
250
619
  }
251
- np = JSON_parse_float(json, fpc, pe, result);
252
- if (np != NULL) fexec np;
253
- np = JSON_parse_integer(json, fpc, pe, result);
254
- if (np != NULL) fexec np;
620
+ np = JSON_parse_number(json, fpc, pe, result);
621
+ if (np != NULL) {
622
+ fexec np;
623
+ }
255
624
  fhold; fbreak;
256
625
  }
257
626
 
258
627
  action parse_array {
259
628
  char *np;
629
+ json->in_array++;
260
630
  np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1);
631
+ json->in_array--;
261
632
  if (np == NULL) { fhold; fbreak; } else fexec np;
262
633
  }
263
634
 
@@ -275,10 +646,10 @@ main := ignore* (
275
646
  Vtrue @parse_true |
276
647
  VNaN @parse_nan |
277
648
  VInfinity @parse_infinity |
278
- begin_number >parse_number |
279
- begin_string >parse_string |
280
- begin_array >parse_array |
281
- begin_object >parse_object
649
+ begin_number @parse_number |
650
+ begin_string @parse_string |
651
+ begin_array @parse_array |
652
+ begin_object @parse_object
282
653
  ) ignore* %*exit;
283
654
  }%%
284
655
 
@@ -294,6 +665,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
294
665
  }
295
666
 
296
667
  if (cs >= JSON_value_first_final) {
668
+ PUSH(*result);
297
669
  return p;
298
670
  } else {
299
671
  return NULL;
@@ -310,24 +682,40 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
310
682
  main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
311
683
  }%%
312
684
 
313
- static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
685
+ #define MAX_FAST_INTEGER_SIZE 18
686
+ static inline VALUE fast_parse_integer(char *p, char *pe)
314
687
  {
315
- int cs = EVIL;
688
+ bool negative = false;
689
+ if (*p == '-') {
690
+ negative = true;
691
+ p++;
692
+ }
316
693
 
317
- %% write init;
318
- json->memo = p;
319
- %% write exec;
694
+ long long memo = 0;
695
+ while (p < pe) {
696
+ memo *= 10;
697
+ memo += *p - '0';
698
+ p++;
699
+ }
700
+
701
+ if (negative) {
702
+ memo = -memo;
703
+ }
704
+ return LL2NUM(memo);
705
+ }
320
706
 
321
- if (cs >= JSON_integer_first_final) {
707
+ static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result)
708
+ {
322
709
  long len = p - json->memo;
323
- fbuffer_clear(json->fbuffer);
324
- fbuffer_append(json->fbuffer, json->memo, len);
325
- fbuffer_append_char(json->fbuffer, '\0');
326
- *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
710
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
711
+ *result = fast_parse_integer(json->memo, p);
712
+ } else {
713
+ fbuffer_clear(&json->fbuffer);
714
+ fbuffer_append(&json->fbuffer, json->memo, len);
715
+ fbuffer_append_char(&json->fbuffer, '\0');
716
+ *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10);
717
+ }
327
718
  return p + 1;
328
- } else {
329
- return NULL;
330
- }
331
719
  }
332
720
 
333
721
  %%{
@@ -337,60 +725,68 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
337
725
  write data;
338
726
 
339
727
  action exit { fhold; fbreak; }
728
+ action isFloat { is_float = true; }
340
729
 
341
730
  main := '-'? (
342
- (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
343
- | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
344
- ) (^[0-9Ee.\-]? @exit );
731
+ (('0' | [1-9][0-9]*)
732
+ ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) |
733
+ ([Ee] [+\-]?[0-9]+)) > isFloat)?
734
+ ) (^[0-9Ee.\-]? @exit ));
345
735
  }%%
346
736
 
347
- static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
737
+ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result)
348
738
  {
349
739
  int cs = EVIL;
740
+ bool is_float = false;
350
741
 
351
742
  %% write init;
352
743
  json->memo = p;
353
744
  %% write exec;
354
745
 
355
746
  if (cs >= JSON_float_first_final) {
747
+ if (!is_float) {
748
+ return JSON_decode_integer(json, p, result);
749
+ }
356
750
  VALUE mod = Qnil;
357
751
  ID method_id = 0;
358
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
359
- mod = json->decimal_class;
360
- method_id = i_try_convert;
361
- } else if (rb_respond_to(json->decimal_class, i_new)) {
362
- mod = json->decimal_class;
363
- method_id = i_new;
364
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
365
- VALUE name = rb_class_name(json->decimal_class);
366
- const char *name_cstr = RSTRING_PTR(name);
367
- const char *last_colon = strrchr(name_cstr, ':');
368
- if (last_colon) {
369
- const char *mod_path_end = last_colon - 1;
370
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
371
- mod = rb_path_to_class(mod_path);
372
-
373
- const char *method_name_beg = last_colon + 1;
374
- long before_len = method_name_beg - name_cstr;
375
- long len = RSTRING_LEN(name) - before_len;
376
- VALUE method_name = rb_str_substr(name, before_len, len);
377
- method_id = SYM2ID(rb_str_intern(method_name));
378
- } else {
379
- mod = rb_mKernel;
380
- method_id = SYM2ID(rb_str_intern(name));
752
+ if (json->decimal_class) {
753
+ if (rb_respond_to(json->decimal_class, i_try_convert)) {
754
+ mod = json->decimal_class;
755
+ method_id = i_try_convert;
756
+ } else if (rb_respond_to(json->decimal_class, i_new)) {
757
+ mod = json->decimal_class;
758
+ method_id = i_new;
759
+ } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
760
+ VALUE name = rb_class_name(json->decimal_class);
761
+ const char *name_cstr = RSTRING_PTR(name);
762
+ const char *last_colon = strrchr(name_cstr, ':');
763
+ if (last_colon) {
764
+ const char *mod_path_end = last_colon - 1;
765
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
766
+ mod = rb_path_to_class(mod_path);
767
+
768
+ const char *method_name_beg = last_colon + 1;
769
+ long before_len = method_name_beg - name_cstr;
770
+ long len = RSTRING_LEN(name) - before_len;
771
+ VALUE method_name = rb_str_substr(name, before_len, len);
772
+ method_id = SYM2ID(rb_str_intern(method_name));
773
+ } else {
774
+ mod = rb_mKernel;
775
+ method_id = SYM2ID(rb_str_intern(name));
776
+ }
381
777
  }
382
778
  }
383
779
 
384
780
  long len = p - json->memo;
385
- fbuffer_clear(json->fbuffer);
386
- fbuffer_append(json->fbuffer, json->memo, len);
387
- fbuffer_append_char(json->fbuffer, '\0');
781
+ fbuffer_clear(&json->fbuffer);
782
+ fbuffer_append(&json->fbuffer, json->memo, len);
783
+ fbuffer_append_char(&json->fbuffer, '\0');
388
784
 
389
785
  if (method_id) {
390
- VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
786
+ VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer));
391
787
  *result = rb_funcallv(mod, method_id, 1, &text);
392
788
  } else {
393
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
789
+ *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1));
394
790
  }
395
791
 
396
792
  return p + 1;
@@ -412,69 +808,133 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
412
808
  if (np == NULL) {
413
809
  fhold; fbreak;
414
810
  } else {
415
- if (NIL_P(json->array_class)) {
416
- rb_ary_push(*result, v);
417
- } else {
418
- rb_funcall(*result, i_leftshift, 1, v);
419
- }
420
811
  fexec np;
421
812
  }
422
813
  }
423
814
 
815
+ action allow_trailing_comma { json->allow_trailing_comma }
816
+
424
817
  action exit { fhold; fbreak; }
425
818
 
426
819
  next_element = value_separator ignore* begin_value >parse_value;
427
820
 
428
821
  main := begin_array ignore*
429
822
  ((begin_value >parse_value ignore*)
430
- (ignore* next_element ignore*)*)?
823
+ (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)?
431
824
  end_array @exit;
432
825
  }%%
433
826
 
434
827
  static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
435
828
  {
436
829
  int cs = EVIL;
437
- VALUE array_class = json->array_class;
438
830
 
439
831
  if (json->max_nesting && current_nesting > json->max_nesting) {
440
832
  rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
441
833
  }
442
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
834
+ long stack_head = json->stack->head;
443
835
 
444
836
  %% write init;
445
837
  %% write exec;
446
838
 
447
839
  if(cs >= JSON_array_first_final) {
840
+ long count = json->stack->head - stack_head;
841
+
842
+ if (RB_UNLIKELY(json->array_class)) {
843
+ VALUE array = rb_class_new_instance(0, 0, json->array_class);
844
+ VALUE *items = rvalue_stack_peek(json->stack, count);
845
+ long index;
846
+ for (index = 0; index < count; index++) {
847
+ rb_funcall(array, i_leftshift, 1, items[index]);
848
+ }
849
+ *result = array;
850
+ } else {
851
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count));
852
+ *result = array;
853
+ }
854
+ rvalue_stack_pop(json->stack, count);
855
+
448
856
  return p + 1;
449
857
  } else {
450
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
858
+ raise_parse_error("unexpected token at '%s'", p);
451
859
  return NULL;
452
860
  }
453
861
  }
454
862
 
455
- static const size_t MAX_STACK_BUFFER_SIZE = 128;
456
- static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
863
+ static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
864
+ {
865
+ if (symbolize) {
866
+ intern = true;
867
+ }
868
+ VALUE result;
869
+ # ifdef HAVE_RB_ENC_INTERNED_STR
870
+ if (intern) {
871
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
872
+ } else {
873
+ result = rb_utf8_str_new(start, (long)(end - start));
874
+ }
875
+ # else
876
+ result = rb_utf8_str_new(start, (long)(end - start));
877
+ if (intern) {
878
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
879
+ }
880
+ # endif
881
+
882
+ if (symbolize) {
883
+ result = rb_str_intern(result);
884
+ }
885
+
886
+ return result;
887
+ }
888
+
889
+ static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
890
+ {
891
+ size_t bufferSize = stringEnd - string;
892
+
893
+ if (is_name && json->in_array) {
894
+ VALUE cached_key;
895
+ if (RB_UNLIKELY(symbolize)) {
896
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
897
+ } else {
898
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
899
+ }
900
+
901
+ if (RB_LIKELY(cached_key)) {
902
+ return cached_key;
903
+ }
904
+ }
905
+
906
+ return build_string(string, stringEnd, intern, symbolize);
907
+ }
908
+
909
+ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
457
910
  {
458
- VALUE result = Qnil;
459
911
  size_t bufferSize = stringEnd - string;
460
912
  char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
461
913
  int unescape_len;
462
914
  char buf[4];
463
915
 
464
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
465
- # ifdef HAVE_RB_ENC_INTERNED_STR
466
- bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
467
- # else
468
- bufferStart = buffer = ALLOC_N(char, bufferSize);
469
- # endif
470
- } else {
471
- # ifdef HAVE_RB_ENC_INTERNED_STR
472
- bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1);
473
- # else
474
- bufferStart = buffer = ALLOCA_N(char, bufferSize);
475
- # endif
916
+ if (is_name && json->in_array) {
917
+ VALUE cached_key;
918
+ if (RB_UNLIKELY(symbolize)) {
919
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
920
+ } else {
921
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
922
+ }
923
+
924
+ if (RB_LIKELY(cached_key)) {
925
+ return cached_key;
926
+ }
476
927
  }
477
928
 
929
+ pe = memchr(p, '\\', bufferSize);
930
+ if (RB_UNLIKELY(pe == NULL)) {
931
+ return build_string(string, stringEnd, intern, symbolize);
932
+ }
933
+
934
+ VALUE result = rb_str_buf_new(bufferSize);
935
+ rb_enc_associate_index(result, utf8_encindex);
936
+ buffer = bufferStart = RSTRING_PTR(result);
937
+
478
938
  while (pe < stringEnd) {
479
939
  if (*pe == '\\') {
480
940
  unescape = (char *) "?";
@@ -507,29 +967,27 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
507
967
  break;
508
968
  case 'u':
509
969
  if (pe > stringEnd - 4) {
510
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
511
- ruby_xfree(bufferStart);
512
- }
513
- rb_enc_raise(
514
- EXC_ENCODING eParserError,
515
- "incomplete unicode character escape sequence at '%s'", p
516
- );
970
+ raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
517
971
  } else {
518
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
972
+ uint32_t ch = unescape_unicode((unsigned char *) ++pe);
519
973
  pe += 3;
520
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
974
+ /* To handle values above U+FFFF, we take a sequence of
975
+ * \uXXXX escapes in the U+D800..U+DBFF then
976
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
977
+ * to make a 20-bit number, then add 0x10000 to get the
978
+ * final codepoint.
979
+ *
980
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
981
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
982
+ * Area".
983
+ */
984
+ if ((ch & 0xFC00) == 0xD800) {
521
985
  pe++;
522
986
  if (pe > stringEnd - 6) {
523
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
524
- ruby_xfree(bufferStart);
525
- }
526
- rb_enc_raise(
527
- EXC_ENCODING eParserError,
528
- "incomplete surrogate pair at '%s'", p
529
- );
987
+ raise_parse_error("incomplete surrogate pair at '%s'", p);
530
988
  }
531
989
  if (pe[0] == '\\' && pe[1] == 'u') {
532
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
990
+ uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
533
991
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
534
992
  | (sur & 0x3FF));
535
993
  pe += 5;
@@ -558,41 +1016,12 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
558
1016
  MEMCPY(buffer, p, char, pe - p);
559
1017
  buffer += pe - p;
560
1018
  }
561
-
562
- # ifdef HAVE_RB_ENC_INTERNED_STR
563
- if (intern) {
564
- result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
565
- } else {
566
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
567
- }
568
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
569
- ruby_xfree(bufferStart);
570
- }
571
- # else
572
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
573
-
574
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
575
- ruby_xfree(bufferStart);
576
- }
577
-
578
- if (intern) {
579
- # if STR_UMINUS_DEDUPE_FROZEN
580
- // Starting from MRI 2.8 it is preferable to freeze the string
581
- // before deduplication so that it can be interned directly
582
- // otherwise it would be duplicated first which is wasteful.
583
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
584
- # elif STR_UMINUS_DEDUPE
585
- // MRI 2.5 and older do not deduplicate strings that are already
586
- // frozen.
587
- result = rb_funcall(result, i_uminus, 0);
588
- # else
589
- result = rb_str_freeze(result);
590
- # endif
591
- }
592
- # endif
1019
+ rb_str_set_len(result, buffer - bufferStart);
593
1020
 
594
1021
  if (symbolize) {
595
- result = rb_str_intern(result);
1022
+ result = rb_str_intern(result);
1023
+ } else if (intern) {
1024
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
596
1025
  }
597
1026
 
598
1027
  return result;
@@ -604,19 +1033,31 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
604
1033
 
605
1034
  write data;
606
1035
 
607
- action parse_string {
608
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
609
- if (NIL_P(*result)) {
610
- fhold;
611
- fbreak;
612
- } else {
613
- fexec p + 1;
614
- }
1036
+ action parse_complex_string {
1037
+ *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1038
+ fexec p + 1;
1039
+ fhold;
1040
+ fbreak;
615
1041
  }
616
1042
 
617
- action exit { fhold; fbreak; }
1043
+ action parse_simple_string {
1044
+ *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1045
+ fexec p + 1;
1046
+ fhold;
1047
+ fbreak;
1048
+ }
618
1049
 
619
- main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
1050
+ double_quote = '"';
1051
+ escape = '\\';
1052
+ control = 0..0x1f;
1053
+ simple = any - escape - double_quote - control;
1054
+
1055
+ main := double_quote (
1056
+ (simple*)(
1057
+ (double_quote) @parse_simple_string |
1058
+ ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string
1059
+ )
1060
+ );
620
1061
  }%%
621
1062
 
622
1063
  static int
@@ -672,18 +1113,80 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
672
1113
 
673
1114
  static VALUE convert_encoding(VALUE source)
674
1115
  {
675
- #ifdef HAVE_RUBY_ENCODING_H
676
- rb_encoding *enc = rb_enc_get(source);
677
- if (enc == rb_ascii8bit_encoding()) {
678
- if (OBJ_FROZEN(source)) {
679
- source = rb_str_dup(source);
680
- }
681
- FORCE_UTF8(source);
682
- } else {
683
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
684
- }
685
- #endif
1116
+ int encindex = RB_ENCODING_GET(source);
1117
+
1118
+ if (RB_LIKELY(encindex == utf8_encindex)) {
686
1119
  return source;
1120
+ }
1121
+
1122
+ if (encindex == binary_encindex) {
1123
+ // For historical reason, we silently reinterpret binary strings as UTF-8
1124
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1125
+ }
1126
+
1127
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
1128
+ }
1129
+
1130
+ static int configure_parser_i(VALUE key, VALUE val, VALUE data)
1131
+ {
1132
+ JSON_Parser *json = (JSON_Parser *)data;
1133
+
1134
+ if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1135
+ else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
1136
+ else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
1137
+ else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
1138
+ else if (key == sym_freeze) { json->freeze = RTEST(val); }
1139
+ else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
1140
+ else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
1141
+ else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
1142
+ else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
1143
+ else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
1144
+ else if (key == sym_create_additions) {
1145
+ if (NIL_P(val)) {
1146
+ json->create_additions = true;
1147
+ json->deprecated_create_additions = true;
1148
+ } else {
1149
+ json->create_additions = RTEST(val);
1150
+ json->deprecated_create_additions = false;
1151
+ }
1152
+ }
1153
+
1154
+ return ST_CONTINUE;
1155
+ }
1156
+
1157
+ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
1158
+ {
1159
+ if (json->Vsource) {
1160
+ rb_raise(rb_eTypeError, "already initialized instance");
1161
+ }
1162
+
1163
+ json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1164
+ json->max_nesting = 100;
1165
+
1166
+ if (!NIL_P(opts)) {
1167
+ Check_Type(opts, T_HASH);
1168
+ if (RHASH_SIZE(opts) > 0) {
1169
+ // We assume in most cases few keys are set so it's faster to go over
1170
+ // the provided keys than to check all possible keys.
1171
+ rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
1172
+
1173
+ if (json->symbolize_names && json->create_additions) {
1174
+ rb_raise(rb_eArgError,
1175
+ "options :symbolize_names and :create_additions cannot be "
1176
+ " used in conjunction");
1177
+ }
1178
+
1179
+ if (json->create_additions && !json->create_id) {
1180
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
1181
+ }
1182
+ }
1183
+
1184
+ }
1185
+ source = convert_encoding(StringValue(source));
1186
+ StringValue(source);
1187
+ json->len = RSTRING_LEN(source);
1188
+ json->source = RSTRING_PTR(source);
1189
+ json->Vsource = source;
687
1190
  }
688
1191
 
689
1192
  /*
@@ -708,105 +1211,23 @@ static VALUE convert_encoding(VALUE source)
708
1211
  * * *create_additions*: If set to false, the Parser doesn't create
709
1212
  * additions even if a matching class and create_id was found. This option
710
1213
  * defaults to false.
711
- * * *object_class*: Defaults to Hash
712
- * * *array_class*: Defaults to Array
1214
+ * * *object_class*: Defaults to Hash. If another type is provided, it will be used
1215
+ * instead of Hash to represent JSON objects. The type must respond to
1216
+ * +new+ without arguments, and return an object that respond to +[]=+.
1217
+ * * *array_class*: Defaults to Array If another type is provided, it will be used
1218
+ * instead of Hash to represent JSON arrays. The type must respond to
1219
+ * +new+ without arguments, and return an object that respond to +<<+.
1220
+ * * *decimal_class*: Specifies which class to use instead of the default
1221
+ * (Float) when parsing decimal numbers. This class must accept a single
1222
+ * string argument in its constructor.
713
1223
  */
714
1224
  static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
715
1225
  {
716
- VALUE source, opts;
717
1226
  GET_PARSER_INIT;
718
1227
 
719
- if (json->Vsource) {
720
- rb_raise(rb_eTypeError, "already initialized instance");
721
- }
722
- rb_scan_args(argc, argv, "1:", &source, &opts);
723
- if (!NIL_P(opts)) {
724
- VALUE tmp = ID2SYM(i_max_nesting);
725
- if (option_given_p(opts, tmp)) {
726
- VALUE max_nesting = rb_hash_aref(opts, tmp);
727
- if (RTEST(max_nesting)) {
728
- Check_Type(max_nesting, T_FIXNUM);
729
- json->max_nesting = FIX2INT(max_nesting);
730
- } else {
731
- json->max_nesting = 0;
732
- }
733
- } else {
734
- json->max_nesting = 100;
735
- }
736
- tmp = ID2SYM(i_allow_nan);
737
- if (option_given_p(opts, tmp)) {
738
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
739
- } else {
740
- json->allow_nan = 0;
741
- }
742
- tmp = ID2SYM(i_symbolize_names);
743
- if (option_given_p(opts, tmp)) {
744
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
745
- } else {
746
- json->symbolize_names = 0;
747
- }
748
- tmp = ID2SYM(i_freeze);
749
- if (option_given_p(opts, tmp)) {
750
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
751
- } else {
752
- json->freeze = 0;
753
- }
754
- tmp = ID2SYM(i_create_additions);
755
- if (option_given_p(opts, tmp)) {
756
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
757
- } else {
758
- json->create_additions = 0;
759
- }
760
- if (json->symbolize_names && json->create_additions) {
761
- rb_raise(rb_eArgError,
762
- "options :symbolize_names and :create_additions cannot be "
763
- " used in conjunction");
764
- }
765
- tmp = ID2SYM(i_create_id);
766
- if (option_given_p(opts, tmp)) {
767
- json->create_id = rb_hash_aref(opts, tmp);
768
- } else {
769
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
770
- }
771
- tmp = ID2SYM(i_object_class);
772
- if (option_given_p(opts, tmp)) {
773
- json->object_class = rb_hash_aref(opts, tmp);
774
- } else {
775
- json->object_class = Qnil;
776
- }
777
- tmp = ID2SYM(i_array_class);
778
- if (option_given_p(opts, tmp)) {
779
- json->array_class = rb_hash_aref(opts, tmp);
780
- } else {
781
- json->array_class = Qnil;
782
- }
783
- tmp = ID2SYM(i_decimal_class);
784
- if (option_given_p(opts, tmp)) {
785
- json->decimal_class = rb_hash_aref(opts, tmp);
786
- } else {
787
- json->decimal_class = Qnil;
788
- }
789
- tmp = ID2SYM(i_match_string);
790
- if (option_given_p(opts, tmp)) {
791
- VALUE match_string = rb_hash_aref(opts, tmp);
792
- json->match_string = RTEST(match_string) ? match_string : Qnil;
793
- } else {
794
- json->match_string = Qnil;
795
- }
796
- } else {
797
- json->max_nesting = 100;
798
- json->allow_nan = 0;
799
- json->create_additions = 0;
800
- json->create_id = Qnil;
801
- json->object_class = Qnil;
802
- json->array_class = Qnil;
803
- json->decimal_class = Qnil;
804
- }
805
- source = convert_encoding(StringValue(source));
806
- StringValue(source);
807
- json->len = RSTRING_LEN(source);
808
- json->source = RSTRING_PTR(source);;
809
- json->Vsource = source;
1228
+ rb_check_arity(argc, 1, 2);
1229
+
1230
+ parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil);
810
1231
  return self;
811
1232
  }
812
1233
 
@@ -836,64 +1257,119 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
836
1257
  */
837
1258
  static VALUE cParser_parse(VALUE self)
838
1259
  {
839
- char *p, *pe;
840
- int cs = EVIL;
841
- VALUE result = Qnil;
842
- GET_PARSER;
1260
+ char *p, *pe;
1261
+ int cs = EVIL;
1262
+ VALUE result = Qnil;
1263
+ GET_PARSER;
843
1264
 
844
- %% write init;
845
- p = json->source;
846
- pe = p + json->len;
847
- %% write exec;
1265
+ char stack_buffer[FBUFFER_STACK_SIZE];
1266
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
848
1267
 
849
- if (cs >= JSON_first_final && p == pe) {
850
- return result;
851
- } else {
852
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
853
- return Qnil;
854
- }
1268
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1269
+ rvalue_stack stack = {
1270
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1271
+ .ptr = rvalue_stack_buffer,
1272
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1273
+ };
1274
+ json->stack = &stack;
1275
+
1276
+ %% write init;
1277
+ p = json->source;
1278
+ pe = p + json->len;
1279
+ %% write exec;
1280
+
1281
+ if (json->stack_handle) {
1282
+ rvalue_stack_eagerly_release(json->stack_handle);
1283
+ }
1284
+
1285
+ if (cs >= JSON_first_final && p == pe) {
1286
+ return result;
1287
+ } else {
1288
+ raise_parse_error("unexpected token at '%s'", p);
1289
+ return Qnil;
1290
+ }
1291
+ }
1292
+
1293
+ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
1294
+ {
1295
+ char *p, *pe;
1296
+ int cs = EVIL;
1297
+ VALUE result = Qnil;
1298
+
1299
+ JSON_Parser _parser = {0};
1300
+ JSON_Parser *json = &_parser;
1301
+ parser_init(json, source, opts);
1302
+
1303
+ char stack_buffer[FBUFFER_STACK_SIZE];
1304
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
1305
+
1306
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1307
+ rvalue_stack stack = {
1308
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1309
+ .ptr = rvalue_stack_buffer,
1310
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1311
+ };
1312
+ json->stack = &stack;
1313
+
1314
+ %% write init;
1315
+ p = json->source;
1316
+ pe = p + json->len;
1317
+ %% write exec;
1318
+
1319
+ if (json->stack_handle) {
1320
+ rvalue_stack_eagerly_release(json->stack_handle);
1321
+ }
1322
+
1323
+ if (cs >= JSON_first_final && p == pe) {
1324
+ return result;
1325
+ } else {
1326
+ raise_parse_error("unexpected token at '%s'", p);
1327
+ return Qnil;
1328
+ }
855
1329
  }
856
1330
 
857
1331
  static void JSON_mark(void *ptr)
858
1332
  {
859
1333
  JSON_Parser *json = ptr;
860
- rb_gc_mark_maybe(json->Vsource);
861
- rb_gc_mark_maybe(json->create_id);
862
- rb_gc_mark_maybe(json->object_class);
863
- rb_gc_mark_maybe(json->array_class);
864
- rb_gc_mark_maybe(json->decimal_class);
865
- rb_gc_mark_maybe(json->match_string);
1334
+ rb_gc_mark(json->Vsource);
1335
+ rb_gc_mark(json->create_id);
1336
+ rb_gc_mark(json->object_class);
1337
+ rb_gc_mark(json->array_class);
1338
+ rb_gc_mark(json->decimal_class);
1339
+ rb_gc_mark(json->match_string);
1340
+ rb_gc_mark(json->stack_handle);
1341
+
1342
+ long index;
1343
+ for (index = 0; index < json->name_cache.length; index++) {
1344
+ rb_gc_mark(json->name_cache.entries[index]);
1345
+ }
866
1346
  }
867
1347
 
868
1348
  static void JSON_free(void *ptr)
869
1349
  {
870
1350
  JSON_Parser *json = ptr;
871
- fbuffer_free(json->fbuffer);
1351
+ fbuffer_free(&json->fbuffer);
872
1352
  ruby_xfree(json);
873
1353
  }
874
1354
 
875
1355
  static size_t JSON_memsize(const void *ptr)
876
1356
  {
877
1357
  const JSON_Parser *json = ptr;
878
- return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
1358
+ return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer);
879
1359
  }
880
1360
 
881
- #ifdef NEW_TYPEDDATA_WRAPPER
882
1361
  static const rb_data_type_t JSON_Parser_type = {
883
1362
  "JSON/Parser",
884
1363
  {JSON_mark, JSON_free, JSON_memsize,},
885
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
886
1364
  0, 0,
887
1365
  RUBY_TYPED_FREE_IMMEDIATELY,
888
- #endif
889
1366
  };
890
- #endif
891
1367
 
892
1368
  static VALUE cJSON_parser_s_allocate(VALUE klass)
893
1369
  {
894
1370
  JSON_Parser *json;
895
1371
  VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
896
- json->fbuffer = fbuffer_alloc(0);
1372
+ fbuffer_stack_init(&json->fbuffer, 0, NULL, 0);
897
1373
  return obj;
898
1374
  }
899
1375
 
@@ -920,15 +1396,15 @@ void Init_parser(void)
920
1396
  mJSON = rb_define_module("JSON");
921
1397
  mExt = rb_define_module_under(mJSON, "Ext");
922
1398
  cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
923
- eParserError = rb_path2class("JSON::ParserError");
924
1399
  eNestingError = rb_path2class("JSON::NestingError");
925
- rb_gc_register_mark_object(eParserError);
926
1400
  rb_gc_register_mark_object(eNestingError);
927
1401
  rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
928
1402
  rb_define_method(cParser, "initialize", cParser_initialize, -1);
929
1403
  rb_define_method(cParser, "parse", cParser_parse, 0);
930
1404
  rb_define_method(cParser, "source", cParser_source, 0);
931
1405
 
1406
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
1407
+
932
1408
  CNaN = rb_const_get(mJSON, rb_intern("NaN"));
933
1409
  rb_gc_register_mark_object(CNaN);
934
1410
 
@@ -938,28 +1414,38 @@ void Init_parser(void)
938
1414
  CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
939
1415
  rb_gc_register_mark_object(CMinusInfinity);
940
1416
 
1417
+ rb_global_variable(&Encoding_UTF_8);
1418
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1419
+
1420
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1421
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1422
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1423
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1424
+ sym_freeze = ID2SYM(rb_intern("freeze"));
1425
+ sym_create_additions = ID2SYM(rb_intern("create_additions"));
1426
+ sym_create_id = ID2SYM(rb_intern("create_id"));
1427
+ sym_object_class = ID2SYM(rb_intern("object_class"));
1428
+ sym_array_class = ID2SYM(rb_intern("array_class"));
1429
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1430
+ sym_match_string = ID2SYM(rb_intern("match_string"));
1431
+
1432
+ i_create_id = rb_intern("create_id");
941
1433
  i_json_creatable_p = rb_intern("json_creatable?");
942
1434
  i_json_create = rb_intern("json_create");
943
- i_create_id = rb_intern("create_id");
944
- i_create_additions = rb_intern("create_additions");
945
1435
  i_chr = rb_intern("chr");
946
- i_max_nesting = rb_intern("max_nesting");
947
- i_allow_nan = rb_intern("allow_nan");
948
- i_symbolize_names = rb_intern("symbolize_names");
949
- i_object_class = rb_intern("object_class");
950
- i_array_class = rb_intern("array_class");
951
- i_decimal_class = rb_intern("decimal_class");
952
1436
  i_match = rb_intern("match");
953
- i_match_string = rb_intern("match_string");
954
- i_key_p = rb_intern("key?");
955
1437
  i_deep_const_get = rb_intern("deep_const_get");
956
1438
  i_aset = rb_intern("[]=");
957
1439
  i_aref = rb_intern("[]");
958
1440
  i_leftshift = rb_intern("<<");
959
1441
  i_new = rb_intern("new");
960
1442
  i_try_convert = rb_intern("try_convert");
961
- i_freeze = rb_intern("freeze");
962
1443
  i_uminus = rb_intern("-@");
1444
+ i_encode = rb_intern("encode");
1445
+
1446
+ binary_encindex = rb_ascii8bit_encindex();
1447
+ utf8_encindex = rb_utf8_encindex();
1448
+ enc_utf8 = rb_utf8_encoding();
963
1449
  }
964
1450
 
965
1451
  /*