json 2.7.2 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,28 +1,310 @@
1
+ #include "ruby.h"
1
2
  #include "../fbuffer/fbuffer.h"
2
- #include "parser.h"
3
3
 
4
- #if defined HAVE_RUBY_ENCODING_H
5
- # define EXC_ENCODING rb_utf8_encoding(),
6
- # ifndef HAVE_RB_ENC_RAISE
7
- static void
8
- enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
9
- {
10
- va_list args;
11
- VALUE mesg;
4
+ static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
5
+ static VALUE CNaN, CInfinity, CMinusInfinity;
12
6
 
13
- va_start(args, fmt);
14
- mesg = rb_enc_vsprintf(enc, fmt, args);
15
- va_end(args);
7
+ static ID i_json_creatable_p, i_json_create, i_create_id,
8
+ i_chr, i_deep_const_get, i_match, i_aset, i_aref,
9
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
16
10
 
17
- rb_exc_raise(rb_exc_new3(exc, mesg));
18
- }
19
- # define rb_enc_raise enc_raise
20
- # endif
11
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
12
+ sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
13
+ sym_decimal_class, sym_match_string;
14
+
15
+ static int binary_encindex;
16
+ static int utf8_encindex;
17
+
18
+ #ifdef HAVE_RB_CATEGORY_WARN
19
+ # define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message)
21
20
  #else
22
- # define EXC_ENCODING /* nothing */
23
- # define rb_enc_raise rb_raise
21
+ # define json_deprecated(message) rb_warn(message)
22
+ #endif
23
+
24
+ static const char deprecated_create_additions_warning[] =
25
+ "JSON.load implicit support for `create_additions: true` is deprecated "
26
+ "and will be removed in 3.0, use JSON.unsafe_load or explicitly "
27
+ "pass `create_additions: true`";
28
+
29
+ #ifndef HAVE_RB_HASH_BULK_INSERT
30
+ // For TruffleRuby
31
+ void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
32
+ {
33
+ long index = 0;
34
+ while (index < count) {
35
+ VALUE name = pairs[index++];
36
+ VALUE value = pairs[index++];
37
+ rb_hash_aset(hash, name, value);
38
+ }
39
+ RB_GC_GUARD(hash);
40
+ }
24
41
  #endif
25
42
 
43
+ /* name cache */
44
+
45
+ #include <string.h>
46
+ #include <ctype.h>
47
+
48
+ // Object names are likely to be repeated, and are frozen.
49
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
50
+ // and save much more expensive lookups into the global fstring table.
51
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
52
+ // to be able to fit safely on the stack.
53
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
54
+ // performance.
55
+ #define JSON_RVALUE_CACHE_CAPA 63
56
+ typedef struct rvalue_cache_struct {
57
+ int length;
58
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
59
+ } rvalue_cache;
60
+
61
+ static rb_encoding *enc_utf8;
62
+
63
+ #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
64
+
65
+ static inline VALUE build_interned_string(const char *str, const long length)
66
+ {
67
+ # ifdef HAVE_RB_ENC_INTERNED_STR
68
+ return rb_enc_interned_str(str, length, enc_utf8);
69
+ # else
70
+ VALUE rstring = rb_utf8_str_new(str, length);
71
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
72
+ # endif
73
+ }
74
+
75
+ static inline VALUE build_symbol(const char *str, const long length)
76
+ {
77
+ return rb_str_intern(build_interned_string(str, length));
78
+ }
79
+
80
+ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
81
+ {
82
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
83
+ cache->length++;
84
+ cache->entries[index] = rstring;
85
+ }
86
+
87
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
88
+ {
89
+ long rstring_length = RSTRING_LEN(rstring);
90
+ if (length == rstring_length) {
91
+ return memcmp(str, RSTRING_PTR(rstring), length);
92
+ } else {
93
+ return (int)(length - rstring_length);
94
+ }
95
+ }
96
+
97
+ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
98
+ {
99
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
100
+ // Common names aren't likely to be very long. So we just don't
101
+ // cache names above an arbitrary threshold.
102
+ return Qfalse;
103
+ }
104
+
105
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
106
+ // Simple heuristic, if the first character isn't a letter,
107
+ // we're much less likely to see this string again.
108
+ // We mostly want to cache strings that are likely to be repeated.
109
+ return Qfalse;
110
+ }
111
+
112
+ int low = 0;
113
+ int high = cache->length - 1;
114
+ int mid = 0;
115
+ int last_cmp = 0;
116
+
117
+ while (low <= high) {
118
+ mid = (high + low) >> 1;
119
+ VALUE entry = cache->entries[mid];
120
+ last_cmp = rstring_cache_cmp(str, length, entry);
121
+
122
+ if (last_cmp == 0) {
123
+ return entry;
124
+ } else if (last_cmp > 0) {
125
+ low = mid + 1;
126
+ } else {
127
+ high = mid - 1;
128
+ }
129
+ }
130
+
131
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
132
+ // We assume the overwhelming majority of names don't need to be escaped.
133
+ // But if they do, we have to fallback to the slow path.
134
+ return Qfalse;
135
+ }
136
+
137
+ VALUE rstring = build_interned_string(str, length);
138
+
139
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
140
+ if (last_cmp > 0) {
141
+ mid += 1;
142
+ }
143
+
144
+ rvalue_cache_insert_at(cache, mid, rstring);
145
+ }
146
+ return rstring;
147
+ }
148
+
149
+ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
150
+ {
151
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
152
+ // Common names aren't likely to be very long. So we just don't
153
+ // cache names above an arbitrary threshold.
154
+ return Qfalse;
155
+ }
156
+
157
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
158
+ // Simple heuristic, if the first character isn't a letter,
159
+ // we're much less likely to see this string again.
160
+ // We mostly want to cache strings that are likely to be repeated.
161
+ return Qfalse;
162
+ }
163
+
164
+ int low = 0;
165
+ int high = cache->length - 1;
166
+ int mid = 0;
167
+ int last_cmp = 0;
168
+
169
+ while (low <= high) {
170
+ mid = (high + low) >> 1;
171
+ VALUE entry = cache->entries[mid];
172
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
173
+
174
+ if (last_cmp == 0) {
175
+ return entry;
176
+ } else if (last_cmp > 0) {
177
+ low = mid + 1;
178
+ } else {
179
+ high = mid - 1;
180
+ }
181
+ }
182
+
183
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
184
+ // We assume the overwhelming majority of names don't need to be escaped.
185
+ // But if they do, we have to fallback to the slow path.
186
+ return Qfalse;
187
+ }
188
+
189
+ VALUE rsymbol = build_symbol(str, length);
190
+
191
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
192
+ if (last_cmp > 0) {
193
+ mid += 1;
194
+ }
195
+
196
+ rvalue_cache_insert_at(cache, mid, rsymbol);
197
+ }
198
+ return rsymbol;
199
+ }
200
+
201
+ /* rvalue stack */
202
+
203
+ #define RVALUE_STACK_INITIAL_CAPA 128
204
+
205
+ enum rvalue_stack_type {
206
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
207
+ RVALUE_STACK_STACK_ALLOCATED = 1,
208
+ };
209
+
210
+ typedef struct rvalue_stack_struct {
211
+ enum rvalue_stack_type type;
212
+ long capa;
213
+ long head;
214
+ VALUE *ptr;
215
+ } rvalue_stack;
216
+
217
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
218
+
219
+ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
220
+ {
221
+ long required = stack->capa * 2;
222
+
223
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
224
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
225
+ } else {
226
+ REALLOC_N(stack->ptr, VALUE, required);
227
+ stack->capa = required;
228
+ }
229
+ return stack;
230
+ }
231
+
232
+ static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
233
+ {
234
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
235
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
236
+ }
237
+ stack->ptr[stack->head] = value;
238
+ stack->head++;
239
+ }
240
+
241
+ static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
242
+ {
243
+ return stack->ptr + (stack->head - count);
244
+ }
245
+
246
+ static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
247
+ {
248
+ stack->head -= count;
249
+ }
250
+
251
+ static void rvalue_stack_mark(void *ptr)
252
+ {
253
+ rvalue_stack *stack = (rvalue_stack *)ptr;
254
+ long index;
255
+ for (index = 0; index < stack->head; index++) {
256
+ rb_gc_mark(stack->ptr[index]);
257
+ }
258
+ }
259
+
260
+ static void rvalue_stack_free(void *ptr)
261
+ {
262
+ rvalue_stack *stack = (rvalue_stack *)ptr;
263
+ if (stack) {
264
+ ruby_xfree(stack->ptr);
265
+ ruby_xfree(stack);
266
+ }
267
+ }
268
+
269
+ static size_t rvalue_stack_memsize(const void *ptr)
270
+ {
271
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
272
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
273
+ }
274
+
275
+ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
276
+ "JSON::Ext::Parser/rvalue_stack",
277
+ {
278
+ .dmark = rvalue_stack_mark,
279
+ .dfree = rvalue_stack_free,
280
+ .dsize = rvalue_stack_memsize,
281
+ },
282
+ 0, 0,
283
+ RUBY_TYPED_FREE_IMMEDIATELY,
284
+ };
285
+
286
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
287
+ {
288
+ rvalue_stack *stack;
289
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
290
+ *stack_ref = stack;
291
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
292
+
293
+ stack->capa = old_stack->capa << 1;
294
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
295
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
296
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
297
+ return stack;
298
+ }
299
+
300
+ static void rvalue_stack_eagerly_release(VALUE handle)
301
+ {
302
+ rvalue_stack *stack;
303
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
304
+ RTYPEDDATA_DATA(handle) = NULL;
305
+ rvalue_stack_free(stack);
306
+ }
307
+
26
308
  /* unicode */
27
309
 
28
310
  static const signed char digit_values[256] = {
@@ -42,26 +324,28 @@ static const signed char digit_values[256] = {
42
324
  -1, -1, -1, -1, -1, -1, -1
43
325
  };
44
326
 
45
- static UTF32 unescape_unicode(const unsigned char *p)
327
+ static uint32_t unescape_unicode(const unsigned char *p)
46
328
  {
329
+ const uint32_t replacement_char = 0xFFFD;
330
+
47
331
  signed char b;
48
- UTF32 result = 0;
332
+ uint32_t result = 0;
49
333
  b = digit_values[p[0]];
50
- if (b < 0) return UNI_REPLACEMENT_CHAR;
334
+ if (b < 0) return replacement_char;
51
335
  result = (result << 4) | (unsigned char)b;
52
336
  b = digit_values[p[1]];
53
- if (b < 0) return UNI_REPLACEMENT_CHAR;
337
+ if (b < 0) return replacement_char;
54
338
  result = (result << 4) | (unsigned char)b;
55
339
  b = digit_values[p[2]];
56
- if (b < 0) return UNI_REPLACEMENT_CHAR;
340
+ if (b < 0) return replacement_char;
57
341
  result = (result << 4) | (unsigned char)b;
58
342
  b = digit_values[p[3]];
59
- if (b < 0) return UNI_REPLACEMENT_CHAR;
343
+ if (b < 0) return replacement_char;
60
344
  result = (result << 4) | (unsigned char)b;
61
345
  return result;
62
346
  }
63
347
 
64
- static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
348
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
65
349
  {
66
350
  int len = 1;
67
351
  if (ch <= 0x7F) {
@@ -87,14 +371,70 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
87
371
  return len;
88
372
  }
89
373
 
90
- static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
91
- static VALUE CNaN, CInfinity, CMinusInfinity;
374
+ typedef struct JSON_ParserStruct {
375
+ VALUE Vsource;
376
+ char *source;
377
+ long len;
378
+ char *memo;
379
+ VALUE create_id;
380
+ VALUE object_class;
381
+ VALUE array_class;
382
+ VALUE decimal_class;
383
+ VALUE match_string;
384
+ FBuffer fbuffer;
385
+ int in_array;
386
+ int max_nesting;
387
+ bool allow_nan;
388
+ bool allow_trailing_comma;
389
+ bool parsing_name;
390
+ bool symbolize_names;
391
+ bool freeze;
392
+ bool create_additions;
393
+ bool deprecated_create_additions;
394
+ rvalue_cache name_cache;
395
+ rvalue_stack *stack;
396
+ VALUE stack_handle;
397
+ } JSON_Parser;
398
+
399
+ #define GET_PARSER \
400
+ GET_PARSER_INIT; \
401
+ if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
402
+
403
+ #define GET_PARSER_INIT \
404
+ JSON_Parser *json; \
405
+ TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
406
+
407
+ #define MinusInfinity "-Infinity"
408
+ #define EVIL 0x666
409
+
410
+ static const rb_data_type_t JSON_Parser_type;
411
+ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
412
+ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
413
+ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
414
+ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result);
415
+ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
416
+
417
+
418
+ #define PARSE_ERROR_FRAGMENT_LEN 32
419
+ #ifdef RBIMPL_ATTR_NORETURN
420
+ RBIMPL_ATTR_NORETURN()
421
+ #endif
422
+ static void raise_parse_error(const char *format, const char *start)
423
+ {
424
+ char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
425
+
426
+ size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
427
+ const char *ptr = start;
428
+
429
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
430
+ MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
431
+ buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
432
+ ptr = buffer;
433
+ }
434
+
435
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
436
+ }
92
437
 
93
- static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
94
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
95
- i_object_class, i_array_class, i_decimal_class, i_key_p,
96
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
97
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
98
438
 
99
439
  %%{
100
440
  machine JSON_common;
@@ -131,27 +471,25 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
131
471
  write data;
132
472
 
133
473
  action parse_value {
134
- VALUE v = Qnil;
135
- char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting);
474
+ char *np = JSON_parse_value(json, fpc, pe, result, current_nesting);
136
475
  if (np == NULL) {
137
476
  fhold; fbreak;
138
477
  } else {
139
- if (NIL_P(json->object_class)) {
140
- OBJ_FREEZE(last_name);
141
- rb_hash_aset(*result, last_name, v);
142
- } else {
143
- rb_funcall(*result, i_aset, 2, last_name, v);
144
- }
145
478
  fexec np;
146
479
  }
147
480
  }
148
481
 
482
+ action allow_trailing_comma { json->allow_trailing_comma }
483
+
149
484
  action parse_name {
150
485
  char *np;
151
- json->parsing_name = 1;
152
- np = JSON_parse_string(json, fpc, pe, &last_name);
153
- json->parsing_name = 0;
154
- if (np == NULL) { fhold; fbreak; } else fexec np;
486
+ json->parsing_name = true;
487
+ np = JSON_parse_string(json, fpc, pe, result);
488
+ json->parsing_name = false;
489
+ if (np == NULL) { fhold; fbreak; } else {
490
+ PUSH(*result);
491
+ fexec np;
492
+ }
155
493
  }
156
494
 
157
495
  action exit { fhold; fbreak; }
@@ -161,37 +499,64 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
161
499
 
162
500
  main := (
163
501
  begin_object
164
- (pair (next_pair)*)? ignore*
502
+ (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore*
165
503
  end_object
166
504
  ) @exit;
167
505
  }%%
168
506
 
507
+ #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack)
508
+
169
509
  static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
170
510
  {
171
511
  int cs = EVIL;
172
- VALUE last_name = Qnil;
173
- VALUE object_class = json->object_class;
174
512
 
175
513
  if (json->max_nesting && current_nesting > json->max_nesting) {
176
514
  rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
177
515
  }
178
516
 
179
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
517
+ long stack_head = json->stack->head;
180
518
 
181
519
  %% write init;
182
520
  %% write exec;
183
521
 
184
522
  if (cs >= JSON_object_first_final) {
185
- if (json->create_additions) {
523
+ long count = json->stack->head - stack_head;
524
+
525
+ if (RB_UNLIKELY(json->object_class)) {
526
+ VALUE object = rb_class_new_instance(0, 0, json->object_class);
527
+ long index = 0;
528
+ VALUE *items = rvalue_stack_peek(json->stack, count);
529
+ while (index < count) {
530
+ VALUE name = items[index++];
531
+ VALUE value = items[index++];
532
+ rb_funcall(object, i_aset, 2, name, value);
533
+ }
534
+ *result = object;
535
+ } else {
536
+ VALUE hash;
537
+ #ifdef HAVE_RB_HASH_NEW_CAPA
538
+ hash = rb_hash_new_capa(count >> 1);
539
+ #else
540
+ hash = rb_hash_new();
541
+ #endif
542
+ rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash);
543
+ *result = hash;
544
+ }
545
+ rvalue_stack_pop(json->stack, count);
546
+
547
+ if (RB_UNLIKELY(json->create_additions)) {
186
548
  VALUE klassname;
187
- if (NIL_P(json->object_class)) {
188
- klassname = rb_hash_aref(*result, json->create_id);
549
+ if (json->object_class) {
550
+ klassname = rb_funcall(*result, i_aref, 1, json->create_id);
189
551
  } else {
190
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
552
+ klassname = rb_hash_aref(*result, json->create_id);
191
553
  }
192
554
  if (!NIL_P(klassname)) {
193
555
  VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
194
556
  if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
557
+ if (json->deprecated_create_additions) {
558
+ json_deprecated(deprecated_create_additions_warning);
559
+ }
195
560
  *result = rb_funcall(klass, i_json_create, 1, *result);
196
561
  }
197
562
  }
@@ -202,7 +567,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
202
567
  }
203
568
  }
204
569
 
205
-
206
570
  %%{
207
571
  machine JSON_value;
208
572
  include JSON_common;
@@ -222,19 +586,24 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
222
586
  if (json->allow_nan) {
223
587
  *result = CNaN;
224
588
  } else {
225
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 2);
589
+ raise_parse_error("unexpected token at '%s'", p - 2);
226
590
  }
227
591
  }
228
592
  action parse_infinity {
229
593
  if (json->allow_nan) {
230
594
  *result = CInfinity;
231
595
  } else {
232
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 7);
596
+ raise_parse_error("unexpected token at '%s'", p - 7);
233
597
  }
234
598
  }
235
599
  action parse_string {
236
600
  char *np = JSON_parse_string(json, fpc, pe, result);
237
- if (np == NULL) { fhold; fbreak; } else fexec np;
601
+ if (np == NULL) {
602
+ fhold;
603
+ fbreak;
604
+ } else {
605
+ fexec np;
606
+ }
238
607
  }
239
608
 
240
609
  action parse_number {
@@ -245,19 +614,21 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
245
614
  fexec p + 10;
246
615
  fhold; fbreak;
247
616
  } else {
248
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
617
+ raise_parse_error("unexpected token at '%s'", p);
249
618
  }
250
619
  }
251
- np = JSON_parse_float(json, fpc, pe, result);
252
- if (np != NULL) fexec np;
253
- np = JSON_parse_integer(json, fpc, pe, result);
254
- if (np != NULL) fexec np;
620
+ np = JSON_parse_number(json, fpc, pe, result);
621
+ if (np != NULL) {
622
+ fexec np;
623
+ }
255
624
  fhold; fbreak;
256
625
  }
257
626
 
258
627
  action parse_array {
259
628
  char *np;
629
+ json->in_array++;
260
630
  np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1);
631
+ json->in_array--;
261
632
  if (np == NULL) { fhold; fbreak; } else fexec np;
262
633
  }
263
634
 
@@ -275,10 +646,10 @@ main := ignore* (
275
646
  Vtrue @parse_true |
276
647
  VNaN @parse_nan |
277
648
  VInfinity @parse_infinity |
278
- begin_number >parse_number |
279
- begin_string >parse_string |
280
- begin_array >parse_array |
281
- begin_object >parse_object
649
+ begin_number @parse_number |
650
+ begin_string @parse_string |
651
+ begin_array @parse_array |
652
+ begin_object @parse_object
282
653
  ) ignore* %*exit;
283
654
  }%%
284
655
 
@@ -294,6 +665,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
294
665
  }
295
666
 
296
667
  if (cs >= JSON_value_first_final) {
668
+ PUSH(*result);
297
669
  return p;
298
670
  } else {
299
671
  return NULL;
@@ -310,24 +682,40 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
310
682
  main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
311
683
  }%%
312
684
 
313
- static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
685
+ #define MAX_FAST_INTEGER_SIZE 18
686
+ static inline VALUE fast_parse_integer(char *p, char *pe)
314
687
  {
315
- int cs = EVIL;
688
+ bool negative = false;
689
+ if (*p == '-') {
690
+ negative = true;
691
+ p++;
692
+ }
316
693
 
317
- %% write init;
318
- json->memo = p;
319
- %% write exec;
694
+ long long memo = 0;
695
+ while (p < pe) {
696
+ memo *= 10;
697
+ memo += *p - '0';
698
+ p++;
699
+ }
700
+
701
+ if (negative) {
702
+ memo = -memo;
703
+ }
704
+ return LL2NUM(memo);
705
+ }
320
706
 
321
- if (cs >= JSON_integer_first_final) {
707
+ static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result)
708
+ {
322
709
  long len = p - json->memo;
323
- fbuffer_clear(json->fbuffer);
324
- fbuffer_append(json->fbuffer, json->memo, len);
325
- fbuffer_append_char(json->fbuffer, '\0');
326
- *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
710
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
711
+ *result = fast_parse_integer(json->memo, p);
712
+ } else {
713
+ fbuffer_clear(&json->fbuffer);
714
+ fbuffer_append(&json->fbuffer, json->memo, len);
715
+ fbuffer_append_char(&json->fbuffer, '\0');
716
+ *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10);
717
+ }
327
718
  return p + 1;
328
- } else {
329
- return NULL;
330
- }
331
719
  }
332
720
 
333
721
  %%{
@@ -337,60 +725,68 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
337
725
  write data;
338
726
 
339
727
  action exit { fhold; fbreak; }
728
+ action isFloat { is_float = true; }
340
729
 
341
730
  main := '-'? (
342
- (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
343
- | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
344
- ) (^[0-9Ee.\-]? @exit );
731
+ (('0' | [1-9][0-9]*)
732
+ ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) |
733
+ ([Ee] [+\-]?[0-9]+)) > isFloat)?
734
+ ) (^[0-9Ee.\-]? @exit ));
345
735
  }%%
346
736
 
347
- static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
737
+ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result)
348
738
  {
349
739
  int cs = EVIL;
740
+ bool is_float = false;
350
741
 
351
742
  %% write init;
352
743
  json->memo = p;
353
744
  %% write exec;
354
745
 
355
746
  if (cs >= JSON_float_first_final) {
747
+ if (!is_float) {
748
+ return JSON_decode_integer(json, p, result);
749
+ }
356
750
  VALUE mod = Qnil;
357
751
  ID method_id = 0;
358
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
359
- mod = json->decimal_class;
360
- method_id = i_try_convert;
361
- } else if (rb_respond_to(json->decimal_class, i_new)) {
362
- mod = json->decimal_class;
363
- method_id = i_new;
364
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
365
- VALUE name = rb_class_name(json->decimal_class);
366
- const char *name_cstr = RSTRING_PTR(name);
367
- const char *last_colon = strrchr(name_cstr, ':');
368
- if (last_colon) {
369
- const char *mod_path_end = last_colon - 1;
370
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
371
- mod = rb_path_to_class(mod_path);
372
-
373
- const char *method_name_beg = last_colon + 1;
374
- long before_len = method_name_beg - name_cstr;
375
- long len = RSTRING_LEN(name) - before_len;
376
- VALUE method_name = rb_str_substr(name, before_len, len);
377
- method_id = SYM2ID(rb_str_intern(method_name));
378
- } else {
379
- mod = rb_mKernel;
380
- method_id = SYM2ID(rb_str_intern(name));
752
+ if (json->decimal_class) {
753
+ if (rb_respond_to(json->decimal_class, i_try_convert)) {
754
+ mod = json->decimal_class;
755
+ method_id = i_try_convert;
756
+ } else if (rb_respond_to(json->decimal_class, i_new)) {
757
+ mod = json->decimal_class;
758
+ method_id = i_new;
759
+ } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
760
+ VALUE name = rb_class_name(json->decimal_class);
761
+ const char *name_cstr = RSTRING_PTR(name);
762
+ const char *last_colon = strrchr(name_cstr, ':');
763
+ if (last_colon) {
764
+ const char *mod_path_end = last_colon - 1;
765
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
766
+ mod = rb_path_to_class(mod_path);
767
+
768
+ const char *method_name_beg = last_colon + 1;
769
+ long before_len = method_name_beg - name_cstr;
770
+ long len = RSTRING_LEN(name) - before_len;
771
+ VALUE method_name = rb_str_substr(name, before_len, len);
772
+ method_id = SYM2ID(rb_str_intern(method_name));
773
+ } else {
774
+ mod = rb_mKernel;
775
+ method_id = SYM2ID(rb_str_intern(name));
776
+ }
381
777
  }
382
778
  }
383
779
 
384
780
  long len = p - json->memo;
385
- fbuffer_clear(json->fbuffer);
386
- fbuffer_append(json->fbuffer, json->memo, len);
387
- fbuffer_append_char(json->fbuffer, '\0');
781
+ fbuffer_clear(&json->fbuffer);
782
+ fbuffer_append(&json->fbuffer, json->memo, len);
783
+ fbuffer_append_char(&json->fbuffer, '\0');
388
784
 
389
785
  if (method_id) {
390
- VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
786
+ VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer));
391
787
  *result = rb_funcallv(mod, method_id, 1, &text);
392
788
  } else {
393
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
789
+ *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1));
394
790
  }
395
791
 
396
792
  return p + 1;
@@ -412,69 +808,133 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
412
808
  if (np == NULL) {
413
809
  fhold; fbreak;
414
810
  } else {
415
- if (NIL_P(json->array_class)) {
416
- rb_ary_push(*result, v);
417
- } else {
418
- rb_funcall(*result, i_leftshift, 1, v);
419
- }
420
811
  fexec np;
421
812
  }
422
813
  }
423
814
 
815
+ action allow_trailing_comma { json->allow_trailing_comma }
816
+
424
817
  action exit { fhold; fbreak; }
425
818
 
426
819
  next_element = value_separator ignore* begin_value >parse_value;
427
820
 
428
821
  main := begin_array ignore*
429
822
  ((begin_value >parse_value ignore*)
430
- (ignore* next_element ignore*)*)?
823
+ (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)?
431
824
  end_array @exit;
432
825
  }%%
433
826
 
434
827
  static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
435
828
  {
436
829
  int cs = EVIL;
437
- VALUE array_class = json->array_class;
438
830
 
439
831
  if (json->max_nesting && current_nesting > json->max_nesting) {
440
832
  rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
441
833
  }
442
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
834
+ long stack_head = json->stack->head;
443
835
 
444
836
  %% write init;
445
837
  %% write exec;
446
838
 
447
839
  if(cs >= JSON_array_first_final) {
840
+ long count = json->stack->head - stack_head;
841
+
842
+ if (RB_UNLIKELY(json->array_class)) {
843
+ VALUE array = rb_class_new_instance(0, 0, json->array_class);
844
+ VALUE *items = rvalue_stack_peek(json->stack, count);
845
+ long index;
846
+ for (index = 0; index < count; index++) {
847
+ rb_funcall(array, i_leftshift, 1, items[index]);
848
+ }
849
+ *result = array;
850
+ } else {
851
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count));
852
+ *result = array;
853
+ }
854
+ rvalue_stack_pop(json->stack, count);
855
+
448
856
  return p + 1;
449
857
  } else {
450
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
858
+ raise_parse_error("unexpected token at '%s'", p);
451
859
  return NULL;
452
860
  }
453
861
  }
454
862
 
455
- static const size_t MAX_STACK_BUFFER_SIZE = 128;
456
- static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
863
+ static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
864
+ {
865
+ if (symbolize) {
866
+ intern = true;
867
+ }
868
+ VALUE result;
869
+ # ifdef HAVE_RB_ENC_INTERNED_STR
870
+ if (intern) {
871
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
872
+ } else {
873
+ result = rb_utf8_str_new(start, (long)(end - start));
874
+ }
875
+ # else
876
+ result = rb_utf8_str_new(start, (long)(end - start));
877
+ if (intern) {
878
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
879
+ }
880
+ # endif
881
+
882
+ if (symbolize) {
883
+ result = rb_str_intern(result);
884
+ }
885
+
886
+ return result;
887
+ }
888
+
889
+ static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
890
+ {
891
+ size_t bufferSize = stringEnd - string;
892
+
893
+ if (is_name && json->in_array) {
894
+ VALUE cached_key;
895
+ if (RB_UNLIKELY(symbolize)) {
896
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
897
+ } else {
898
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
899
+ }
900
+
901
+ if (RB_LIKELY(cached_key)) {
902
+ return cached_key;
903
+ }
904
+ }
905
+
906
+ return build_string(string, stringEnd, intern, symbolize);
907
+ }
908
+
909
+ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
457
910
  {
458
- VALUE result = Qnil;
459
911
  size_t bufferSize = stringEnd - string;
460
912
  char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
461
913
  int unescape_len;
462
914
  char buf[4];
463
915
 
464
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
465
- # ifdef HAVE_RB_ENC_INTERNED_STR
466
- bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
467
- # else
468
- bufferStart = buffer = ALLOC_N(char, bufferSize);
469
- # endif
470
- } else {
471
- # ifdef HAVE_RB_ENC_INTERNED_STR
472
- bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1);
473
- # else
474
- bufferStart = buffer = ALLOCA_N(char, bufferSize);
475
- # endif
916
+ if (is_name && json->in_array) {
917
+ VALUE cached_key;
918
+ if (RB_UNLIKELY(symbolize)) {
919
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
920
+ } else {
921
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
922
+ }
923
+
924
+ if (RB_LIKELY(cached_key)) {
925
+ return cached_key;
926
+ }
476
927
  }
477
928
 
929
+ pe = memchr(p, '\\', bufferSize);
930
+ if (RB_UNLIKELY(pe == NULL)) {
931
+ return build_string(string, stringEnd, intern, symbolize);
932
+ }
933
+
934
+ VALUE result = rb_str_buf_new(bufferSize);
935
+ rb_enc_associate_index(result, utf8_encindex);
936
+ buffer = bufferStart = RSTRING_PTR(result);
937
+
478
938
  while (pe < stringEnd) {
479
939
  if (*pe == '\\') {
480
940
  unescape = (char *) "?";
@@ -507,29 +967,27 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
507
967
  break;
508
968
  case 'u':
509
969
  if (pe > stringEnd - 4) {
510
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
511
- ruby_xfree(bufferStart);
512
- }
513
- rb_enc_raise(
514
- EXC_ENCODING eParserError,
515
- "incomplete unicode character escape sequence at '%s'", p
516
- );
970
+ raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
517
971
  } else {
518
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
972
+ uint32_t ch = unescape_unicode((unsigned char *) ++pe);
519
973
  pe += 3;
520
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
974
+ /* To handle values above U+FFFF, we take a sequence of
975
+ * \uXXXX escapes in the U+D800..U+DBFF then
976
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
977
+ * to make a 20-bit number, then add 0x10000 to get the
978
+ * final codepoint.
979
+ *
980
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
981
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
982
+ * Area".
983
+ */
984
+ if ((ch & 0xFC00) == 0xD800) {
521
985
  pe++;
522
986
  if (pe > stringEnd - 6) {
523
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
524
- ruby_xfree(bufferStart);
525
- }
526
- rb_enc_raise(
527
- EXC_ENCODING eParserError,
528
- "incomplete surrogate pair at '%s'", p
529
- );
987
+ raise_parse_error("incomplete surrogate pair at '%s'", p);
530
988
  }
531
989
  if (pe[0] == '\\' && pe[1] == 'u') {
532
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
990
+ uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
533
991
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
534
992
  | (sur & 0x3FF));
535
993
  pe += 5;
@@ -558,41 +1016,12 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
558
1016
  MEMCPY(buffer, p, char, pe - p);
559
1017
  buffer += pe - p;
560
1018
  }
561
-
562
- # ifdef HAVE_RB_ENC_INTERNED_STR
563
- if (intern) {
564
- result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
565
- } else {
566
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
567
- }
568
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
569
- ruby_xfree(bufferStart);
570
- }
571
- # else
572
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
573
-
574
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
575
- ruby_xfree(bufferStart);
576
- }
577
-
578
- if (intern) {
579
- # if STR_UMINUS_DEDUPE_FROZEN
580
- // Starting from MRI 2.8 it is preferable to freeze the string
581
- // before deduplication so that it can be interned directly
582
- // otherwise it would be duplicated first which is wasteful.
583
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
584
- # elif STR_UMINUS_DEDUPE
585
- // MRI 2.5 and older do not deduplicate strings that are already
586
- // frozen.
587
- result = rb_funcall(result, i_uminus, 0);
588
- # else
589
- result = rb_str_freeze(result);
590
- # endif
591
- }
592
- # endif
1019
+ rb_str_set_len(result, buffer - bufferStart);
593
1020
 
594
1021
  if (symbolize) {
595
- result = rb_str_intern(result);
1022
+ result = rb_str_intern(result);
1023
+ } else if (intern) {
1024
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
596
1025
  }
597
1026
 
598
1027
  return result;
@@ -604,19 +1033,31 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
604
1033
 
605
1034
  write data;
606
1035
 
607
- action parse_string {
608
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
609
- if (NIL_P(*result)) {
610
- fhold;
611
- fbreak;
612
- } else {
613
- fexec p + 1;
614
- }
1036
+ action parse_complex_string {
1037
+ *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1038
+ fexec p + 1;
1039
+ fhold;
1040
+ fbreak;
615
1041
  }
616
1042
 
617
- action exit { fhold; fbreak; }
1043
+ action parse_simple_string {
1044
+ *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1045
+ fexec p + 1;
1046
+ fhold;
1047
+ fbreak;
1048
+ }
618
1049
 
619
- main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
1050
+ double_quote = '"';
1051
+ escape = '\\';
1052
+ control = 0..0x1f;
1053
+ simple = any - escape - double_quote - control;
1054
+
1055
+ main := double_quote (
1056
+ (simple*)(
1057
+ (double_quote) @parse_simple_string |
1058
+ ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string
1059
+ )
1060
+ );
620
1061
  }%%
621
1062
 
622
1063
  static int
@@ -672,18 +1113,80 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
672
1113
 
673
1114
  static VALUE convert_encoding(VALUE source)
674
1115
  {
675
- #ifdef HAVE_RUBY_ENCODING_H
676
- rb_encoding *enc = rb_enc_get(source);
677
- if (enc == rb_ascii8bit_encoding()) {
678
- if (OBJ_FROZEN(source)) {
679
- source = rb_str_dup(source);
680
- }
681
- FORCE_UTF8(source);
682
- } else {
683
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
684
- }
685
- #endif
1116
+ int encindex = RB_ENCODING_GET(source);
1117
+
1118
+ if (RB_LIKELY(encindex == utf8_encindex)) {
686
1119
  return source;
1120
+ }
1121
+
1122
+ if (encindex == binary_encindex) {
1123
+ // For historical reason, we silently reinterpret binary strings as UTF-8
1124
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1125
+ }
1126
+
1127
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
1128
+ }
1129
+
1130
+ static int configure_parser_i(VALUE key, VALUE val, VALUE data)
1131
+ {
1132
+ JSON_Parser *json = (JSON_Parser *)data;
1133
+
1134
+ if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1135
+ else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
1136
+ else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
1137
+ else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
1138
+ else if (key == sym_freeze) { json->freeze = RTEST(val); }
1139
+ else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
1140
+ else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
1141
+ else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
1142
+ else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
1143
+ else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
1144
+ else if (key == sym_create_additions) {
1145
+ if (NIL_P(val)) {
1146
+ json->create_additions = true;
1147
+ json->deprecated_create_additions = true;
1148
+ } else {
1149
+ json->create_additions = RTEST(val);
1150
+ json->deprecated_create_additions = false;
1151
+ }
1152
+ }
1153
+
1154
+ return ST_CONTINUE;
1155
+ }
1156
+
1157
+ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
1158
+ {
1159
+ if (json->Vsource) {
1160
+ rb_raise(rb_eTypeError, "already initialized instance");
1161
+ }
1162
+
1163
+ json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1164
+ json->max_nesting = 100;
1165
+
1166
+ if (!NIL_P(opts)) {
1167
+ Check_Type(opts, T_HASH);
1168
+ if (RHASH_SIZE(opts) > 0) {
1169
+ // We assume in most cases few keys are set so it's faster to go over
1170
+ // the provided keys than to check all possible keys.
1171
+ rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
1172
+
1173
+ if (json->symbolize_names && json->create_additions) {
1174
+ rb_raise(rb_eArgError,
1175
+ "options :symbolize_names and :create_additions cannot be "
1176
+ " used in conjunction");
1177
+ }
1178
+
1179
+ if (json->create_additions && !json->create_id) {
1180
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
1181
+ }
1182
+ }
1183
+
1184
+ }
1185
+ source = convert_encoding(StringValue(source));
1186
+ StringValue(source);
1187
+ json->len = RSTRING_LEN(source);
1188
+ json->source = RSTRING_PTR(source);
1189
+ json->Vsource = source;
687
1190
  }
688
1191
 
689
1192
  /*
@@ -708,105 +1211,23 @@ static VALUE convert_encoding(VALUE source)
708
1211
  * * *create_additions*: If set to false, the Parser doesn't create
709
1212
  * additions even if a matching class and create_id was found. This option
710
1213
  * defaults to false.
711
- * * *object_class*: Defaults to Hash
712
- * * *array_class*: Defaults to Array
1214
+ * * *object_class*: Defaults to Hash. If another type is provided, it will be used
1215
+ * instead of Hash to represent JSON objects. The type must respond to
1216
+ * +new+ without arguments, and return an object that respond to +[]=+.
1217
+ * * *array_class*: Defaults to Array If another type is provided, it will be used
1218
+ * instead of Hash to represent JSON arrays. The type must respond to
1219
+ * +new+ without arguments, and return an object that respond to +<<+.
1220
+ * * *decimal_class*: Specifies which class to use instead of the default
1221
+ * (Float) when parsing decimal numbers. This class must accept a single
1222
+ * string argument in its constructor.
713
1223
  */
714
1224
  static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
715
1225
  {
716
- VALUE source, opts;
717
1226
  GET_PARSER_INIT;
718
1227
 
719
- if (json->Vsource) {
720
- rb_raise(rb_eTypeError, "already initialized instance");
721
- }
722
- rb_scan_args(argc, argv, "1:", &source, &opts);
723
- if (!NIL_P(opts)) {
724
- VALUE tmp = ID2SYM(i_max_nesting);
725
- if (option_given_p(opts, tmp)) {
726
- VALUE max_nesting = rb_hash_aref(opts, tmp);
727
- if (RTEST(max_nesting)) {
728
- Check_Type(max_nesting, T_FIXNUM);
729
- json->max_nesting = FIX2INT(max_nesting);
730
- } else {
731
- json->max_nesting = 0;
732
- }
733
- } else {
734
- json->max_nesting = 100;
735
- }
736
- tmp = ID2SYM(i_allow_nan);
737
- if (option_given_p(opts, tmp)) {
738
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
739
- } else {
740
- json->allow_nan = 0;
741
- }
742
- tmp = ID2SYM(i_symbolize_names);
743
- if (option_given_p(opts, tmp)) {
744
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
745
- } else {
746
- json->symbolize_names = 0;
747
- }
748
- tmp = ID2SYM(i_freeze);
749
- if (option_given_p(opts, tmp)) {
750
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
751
- } else {
752
- json->freeze = 0;
753
- }
754
- tmp = ID2SYM(i_create_additions);
755
- if (option_given_p(opts, tmp)) {
756
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
757
- } else {
758
- json->create_additions = 0;
759
- }
760
- if (json->symbolize_names && json->create_additions) {
761
- rb_raise(rb_eArgError,
762
- "options :symbolize_names and :create_additions cannot be "
763
- " used in conjunction");
764
- }
765
- tmp = ID2SYM(i_create_id);
766
- if (option_given_p(opts, tmp)) {
767
- json->create_id = rb_hash_aref(opts, tmp);
768
- } else {
769
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
770
- }
771
- tmp = ID2SYM(i_object_class);
772
- if (option_given_p(opts, tmp)) {
773
- json->object_class = rb_hash_aref(opts, tmp);
774
- } else {
775
- json->object_class = Qnil;
776
- }
777
- tmp = ID2SYM(i_array_class);
778
- if (option_given_p(opts, tmp)) {
779
- json->array_class = rb_hash_aref(opts, tmp);
780
- } else {
781
- json->array_class = Qnil;
782
- }
783
- tmp = ID2SYM(i_decimal_class);
784
- if (option_given_p(opts, tmp)) {
785
- json->decimal_class = rb_hash_aref(opts, tmp);
786
- } else {
787
- json->decimal_class = Qnil;
788
- }
789
- tmp = ID2SYM(i_match_string);
790
- if (option_given_p(opts, tmp)) {
791
- VALUE match_string = rb_hash_aref(opts, tmp);
792
- json->match_string = RTEST(match_string) ? match_string : Qnil;
793
- } else {
794
- json->match_string = Qnil;
795
- }
796
- } else {
797
- json->max_nesting = 100;
798
- json->allow_nan = 0;
799
- json->create_additions = 0;
800
- json->create_id = Qnil;
801
- json->object_class = Qnil;
802
- json->array_class = Qnil;
803
- json->decimal_class = Qnil;
804
- }
805
- source = convert_encoding(StringValue(source));
806
- StringValue(source);
807
- json->len = RSTRING_LEN(source);
808
- json->source = RSTRING_PTR(source);;
809
- json->Vsource = source;
1228
+ rb_check_arity(argc, 1, 2);
1229
+
1230
+ parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil);
810
1231
  return self;
811
1232
  }
812
1233
 
@@ -836,64 +1257,119 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
836
1257
  */
837
1258
  static VALUE cParser_parse(VALUE self)
838
1259
  {
839
- char *p, *pe;
840
- int cs = EVIL;
841
- VALUE result = Qnil;
842
- GET_PARSER;
1260
+ char *p, *pe;
1261
+ int cs = EVIL;
1262
+ VALUE result = Qnil;
1263
+ GET_PARSER;
843
1264
 
844
- %% write init;
845
- p = json->source;
846
- pe = p + json->len;
847
- %% write exec;
1265
+ char stack_buffer[FBUFFER_STACK_SIZE];
1266
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
848
1267
 
849
- if (cs >= JSON_first_final && p == pe) {
850
- return result;
851
- } else {
852
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
853
- return Qnil;
854
- }
1268
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1269
+ rvalue_stack stack = {
1270
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1271
+ .ptr = rvalue_stack_buffer,
1272
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1273
+ };
1274
+ json->stack = &stack;
1275
+
1276
+ %% write init;
1277
+ p = json->source;
1278
+ pe = p + json->len;
1279
+ %% write exec;
1280
+
1281
+ if (json->stack_handle) {
1282
+ rvalue_stack_eagerly_release(json->stack_handle);
1283
+ }
1284
+
1285
+ if (cs >= JSON_first_final && p == pe) {
1286
+ return result;
1287
+ } else {
1288
+ raise_parse_error("unexpected token at '%s'", p);
1289
+ return Qnil;
1290
+ }
1291
+ }
1292
+
1293
+ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
1294
+ {
1295
+ char *p, *pe;
1296
+ int cs = EVIL;
1297
+ VALUE result = Qnil;
1298
+
1299
+ JSON_Parser _parser = {0};
1300
+ JSON_Parser *json = &_parser;
1301
+ parser_init(json, source, opts);
1302
+
1303
+ char stack_buffer[FBUFFER_STACK_SIZE];
1304
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
1305
+
1306
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1307
+ rvalue_stack stack = {
1308
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1309
+ .ptr = rvalue_stack_buffer,
1310
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1311
+ };
1312
+ json->stack = &stack;
1313
+
1314
+ %% write init;
1315
+ p = json->source;
1316
+ pe = p + json->len;
1317
+ %% write exec;
1318
+
1319
+ if (json->stack_handle) {
1320
+ rvalue_stack_eagerly_release(json->stack_handle);
1321
+ }
1322
+
1323
+ if (cs >= JSON_first_final && p == pe) {
1324
+ return result;
1325
+ } else {
1326
+ raise_parse_error("unexpected token at '%s'", p);
1327
+ return Qnil;
1328
+ }
855
1329
  }
856
1330
 
857
1331
  static void JSON_mark(void *ptr)
858
1332
  {
859
1333
  JSON_Parser *json = ptr;
860
- rb_gc_mark_maybe(json->Vsource);
861
- rb_gc_mark_maybe(json->create_id);
862
- rb_gc_mark_maybe(json->object_class);
863
- rb_gc_mark_maybe(json->array_class);
864
- rb_gc_mark_maybe(json->decimal_class);
865
- rb_gc_mark_maybe(json->match_string);
1334
+ rb_gc_mark(json->Vsource);
1335
+ rb_gc_mark(json->create_id);
1336
+ rb_gc_mark(json->object_class);
1337
+ rb_gc_mark(json->array_class);
1338
+ rb_gc_mark(json->decimal_class);
1339
+ rb_gc_mark(json->match_string);
1340
+ rb_gc_mark(json->stack_handle);
1341
+
1342
+ long index;
1343
+ for (index = 0; index < json->name_cache.length; index++) {
1344
+ rb_gc_mark(json->name_cache.entries[index]);
1345
+ }
866
1346
  }
867
1347
 
868
1348
  static void JSON_free(void *ptr)
869
1349
  {
870
1350
  JSON_Parser *json = ptr;
871
- fbuffer_free(json->fbuffer);
1351
+ fbuffer_free(&json->fbuffer);
872
1352
  ruby_xfree(json);
873
1353
  }
874
1354
 
875
1355
  static size_t JSON_memsize(const void *ptr)
876
1356
  {
877
1357
  const JSON_Parser *json = ptr;
878
- return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
1358
+ return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer);
879
1359
  }
880
1360
 
881
- #ifdef NEW_TYPEDDATA_WRAPPER
882
1361
  static const rb_data_type_t JSON_Parser_type = {
883
1362
  "JSON/Parser",
884
1363
  {JSON_mark, JSON_free, JSON_memsize,},
885
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
886
1364
  0, 0,
887
1365
  RUBY_TYPED_FREE_IMMEDIATELY,
888
- #endif
889
1366
  };
890
- #endif
891
1367
 
892
1368
  static VALUE cJSON_parser_s_allocate(VALUE klass)
893
1369
  {
894
1370
  JSON_Parser *json;
895
1371
  VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
896
- json->fbuffer = fbuffer_alloc(0);
1372
+ fbuffer_stack_init(&json->fbuffer, 0, NULL, 0);
897
1373
  return obj;
898
1374
  }
899
1375
 
@@ -920,15 +1396,15 @@ void Init_parser(void)
920
1396
  mJSON = rb_define_module("JSON");
921
1397
  mExt = rb_define_module_under(mJSON, "Ext");
922
1398
  cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
923
- eParserError = rb_path2class("JSON::ParserError");
924
1399
  eNestingError = rb_path2class("JSON::NestingError");
925
- rb_gc_register_mark_object(eParserError);
926
1400
  rb_gc_register_mark_object(eNestingError);
927
1401
  rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
928
1402
  rb_define_method(cParser, "initialize", cParser_initialize, -1);
929
1403
  rb_define_method(cParser, "parse", cParser_parse, 0);
930
1404
  rb_define_method(cParser, "source", cParser_source, 0);
931
1405
 
1406
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
1407
+
932
1408
  CNaN = rb_const_get(mJSON, rb_intern("NaN"));
933
1409
  rb_gc_register_mark_object(CNaN);
934
1410
 
@@ -938,28 +1414,38 @@ void Init_parser(void)
938
1414
  CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
939
1415
  rb_gc_register_mark_object(CMinusInfinity);
940
1416
 
1417
+ rb_global_variable(&Encoding_UTF_8);
1418
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1419
+
1420
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1421
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1422
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1423
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1424
+ sym_freeze = ID2SYM(rb_intern("freeze"));
1425
+ sym_create_additions = ID2SYM(rb_intern("create_additions"));
1426
+ sym_create_id = ID2SYM(rb_intern("create_id"));
1427
+ sym_object_class = ID2SYM(rb_intern("object_class"));
1428
+ sym_array_class = ID2SYM(rb_intern("array_class"));
1429
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1430
+ sym_match_string = ID2SYM(rb_intern("match_string"));
1431
+
1432
+ i_create_id = rb_intern("create_id");
941
1433
  i_json_creatable_p = rb_intern("json_creatable?");
942
1434
  i_json_create = rb_intern("json_create");
943
- i_create_id = rb_intern("create_id");
944
- i_create_additions = rb_intern("create_additions");
945
1435
  i_chr = rb_intern("chr");
946
- i_max_nesting = rb_intern("max_nesting");
947
- i_allow_nan = rb_intern("allow_nan");
948
- i_symbolize_names = rb_intern("symbolize_names");
949
- i_object_class = rb_intern("object_class");
950
- i_array_class = rb_intern("array_class");
951
- i_decimal_class = rb_intern("decimal_class");
952
1436
  i_match = rb_intern("match");
953
- i_match_string = rb_intern("match_string");
954
- i_key_p = rb_intern("key?");
955
1437
  i_deep_const_get = rb_intern("deep_const_get");
956
1438
  i_aset = rb_intern("[]=");
957
1439
  i_aref = rb_intern("[]");
958
1440
  i_leftshift = rb_intern("<<");
959
1441
  i_new = rb_intern("new");
960
1442
  i_try_convert = rb_intern("try_convert");
961
- i_freeze = rb_intern("freeze");
962
1443
  i_uminus = rb_intern("-@");
1444
+ i_encode = rb_intern("encode");
1445
+
1446
+ binary_encindex = rb_ascii8bit_encindex();
1447
+ utf8_encindex = rb_utf8_encindex();
1448
+ enc_utf8 = rb_utf8_encoding();
963
1449
  }
964
1450
 
965
1451
  /*