json 2.7.2 → 2.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,320 @@
1
+ #include "ruby.h"
1
2
  #include "../fbuffer/fbuffer.h"
2
- #include "parser.h"
3
3
 
4
- #if defined HAVE_RUBY_ENCODING_H
5
- # define EXC_ENCODING rb_utf8_encoding(),
6
- # ifndef HAVE_RB_ENC_RAISE
7
- static void
8
- enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...)
4
+ static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
5
+ static VALUE CNaN, CInfinity, CMinusInfinity;
6
+
7
+ static ID i_json_creatable_p, i_json_create, i_create_id,
8
+ i_chr, i_deep_const_get, i_match, i_aset, i_aref,
9
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
10
+
11
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
12
+ sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
13
+ sym_decimal_class, sym_match_string;
14
+
15
+ static int binary_encindex;
16
+ static int utf8_encindex;
17
+
18
+ #ifdef HAVE_RB_CATEGORY_WARN
19
+ # define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message)
20
+ #else
21
+ # define json_deprecated(message) rb_warn(message)
22
+ #endif
23
+
24
+ static const char deprecated_create_additions_warning[] =
25
+ "JSON.load implicit support for `create_additions: true` is deprecated "
26
+ "and will be removed in 3.0, use JSON.unsafe_load or explicitly "
27
+ "pass `create_additions: true`";
28
+
29
+ #ifndef HAVE_RB_GC_MARK_LOCATIONS
30
+ // For TruffleRuby
31
+ void rb_gc_mark_locations(const VALUE *start, const VALUE *end)
9
32
  {
10
- va_list args;
11
- VALUE mesg;
33
+ VALUE *value = start;
12
34
 
13
- va_start(args, fmt);
14
- mesg = rb_enc_vsprintf(enc, fmt, args);
15
- va_end(args);
35
+ while (value < end) {
36
+ rb_gc_mark(*value);
37
+ value++;
38
+ }
39
+ }
40
+ #endif
16
41
 
17
- rb_exc_raise(rb_exc_new3(exc, mesg));
42
+ #ifndef HAVE_RB_HASH_BULK_INSERT
43
+ // For TruffleRuby
44
+ void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
45
+ {
46
+ long index = 0;
47
+ while (index < count) {
48
+ VALUE name = pairs[index++];
49
+ VALUE value = pairs[index++];
50
+ rb_hash_aset(hash, name, value);
51
+ }
52
+ RB_GC_GUARD(hash);
18
53
  }
19
- # define rb_enc_raise enc_raise
20
- # endif
21
- #else
22
- # define EXC_ENCODING /* nothing */
23
- # define rb_enc_raise rb_raise
24
54
  #endif
25
55
 
56
+ /* name cache */
57
+
58
+ #include <string.h>
59
+ #include <ctype.h>
60
+
61
+ // Object names are likely to be repeated, and are frozen.
62
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
63
+ // and save much more expensive lookups into the global fstring table.
64
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
65
+ // to be able to fit safely on the stack.
66
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
67
+ // performance.
68
+ #define JSON_RVALUE_CACHE_CAPA 63
69
+ typedef struct rvalue_cache_struct {
70
+ int length;
71
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
72
+ } rvalue_cache;
73
+
74
+ static rb_encoding *enc_utf8;
75
+
76
+ #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
77
+
78
+ static inline VALUE build_interned_string(const char *str, const long length)
79
+ {
80
+ # ifdef HAVE_RB_ENC_INTERNED_STR
81
+ return rb_enc_interned_str(str, length, enc_utf8);
82
+ # else
83
+ VALUE rstring = rb_utf8_str_new(str, length);
84
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
85
+ # endif
86
+ }
87
+
88
+ static inline VALUE build_symbol(const char *str, const long length)
89
+ {
90
+ return rb_str_intern(build_interned_string(str, length));
91
+ }
92
+
93
+ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
94
+ {
95
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
96
+ cache->length++;
97
+ cache->entries[index] = rstring;
98
+ }
99
+
100
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
101
+ {
102
+ long rstring_length = RSTRING_LEN(rstring);
103
+ if (length == rstring_length) {
104
+ return memcmp(str, RSTRING_PTR(rstring), length);
105
+ } else {
106
+ return (int)(length - rstring_length);
107
+ }
108
+ }
109
+
110
+ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
111
+ {
112
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
113
+ // Common names aren't likely to be very long. So we just don't
114
+ // cache names above an arbitrary threshold.
115
+ return Qfalse;
116
+ }
117
+
118
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
119
+ // Simple heuristic, if the first character isn't a letter,
120
+ // we're much less likely to see this string again.
121
+ // We mostly want to cache strings that are likely to be repeated.
122
+ return Qfalse;
123
+ }
124
+
125
+ int low = 0;
126
+ int high = cache->length - 1;
127
+ int mid = 0;
128
+ int last_cmp = 0;
129
+
130
+ while (low <= high) {
131
+ mid = (high + low) >> 1;
132
+ VALUE entry = cache->entries[mid];
133
+ last_cmp = rstring_cache_cmp(str, length, entry);
134
+
135
+ if (last_cmp == 0) {
136
+ return entry;
137
+ } else if (last_cmp > 0) {
138
+ low = mid + 1;
139
+ } else {
140
+ high = mid - 1;
141
+ }
142
+ }
143
+
144
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
145
+ // We assume the overwhelming majority of names don't need to be escaped.
146
+ // But if they do, we have to fallback to the slow path.
147
+ return Qfalse;
148
+ }
149
+
150
+ VALUE rstring = build_interned_string(str, length);
151
+
152
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
153
+ if (last_cmp > 0) {
154
+ mid += 1;
155
+ }
156
+
157
+ rvalue_cache_insert_at(cache, mid, rstring);
158
+ }
159
+ return rstring;
160
+ }
161
+
162
+ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
163
+ {
164
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
165
+ // Common names aren't likely to be very long. So we just don't
166
+ // cache names above an arbitrary threshold.
167
+ return Qfalse;
168
+ }
169
+
170
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
171
+ // Simple heuristic, if the first character isn't a letter,
172
+ // we're much less likely to see this string again.
173
+ // We mostly want to cache strings that are likely to be repeated.
174
+ return Qfalse;
175
+ }
176
+
177
+ int low = 0;
178
+ int high = cache->length - 1;
179
+ int mid = 0;
180
+ int last_cmp = 0;
181
+
182
+ while (low <= high) {
183
+ mid = (high + low) >> 1;
184
+ VALUE entry = cache->entries[mid];
185
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
186
+
187
+ if (last_cmp == 0) {
188
+ return entry;
189
+ } else if (last_cmp > 0) {
190
+ low = mid + 1;
191
+ } else {
192
+ high = mid - 1;
193
+ }
194
+ }
195
+
196
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
197
+ // We assume the overwhelming majority of names don't need to be escaped.
198
+ // But if they do, we have to fallback to the slow path.
199
+ return Qfalse;
200
+ }
201
+
202
+ VALUE rsymbol = build_symbol(str, length);
203
+
204
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
205
+ if (last_cmp > 0) {
206
+ mid += 1;
207
+ }
208
+
209
+ rvalue_cache_insert_at(cache, mid, rsymbol);
210
+ }
211
+ return rsymbol;
212
+ }
213
+
214
+ /* rvalue stack */
215
+
216
+ #define RVALUE_STACK_INITIAL_CAPA 128
217
+
218
+ enum rvalue_stack_type {
219
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
220
+ RVALUE_STACK_STACK_ALLOCATED = 1,
221
+ };
222
+
223
+ typedef struct rvalue_stack_struct {
224
+ enum rvalue_stack_type type;
225
+ long capa;
226
+ long head;
227
+ VALUE *ptr;
228
+ } rvalue_stack;
229
+
230
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
231
+
232
+ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
233
+ {
234
+ long required = stack->capa * 2;
235
+
236
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
237
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
238
+ } else {
239
+ REALLOC_N(stack->ptr, VALUE, required);
240
+ stack->capa = required;
241
+ }
242
+ return stack;
243
+ }
244
+
245
+ static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
246
+ {
247
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
248
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
249
+ }
250
+ stack->ptr[stack->head] = value;
251
+ stack->head++;
252
+ }
253
+
254
+ static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
255
+ {
256
+ return stack->ptr + (stack->head - count);
257
+ }
258
+
259
+ static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
260
+ {
261
+ stack->head -= count;
262
+ }
263
+
264
+ static void rvalue_stack_mark(void *ptr)
265
+ {
266
+ rvalue_stack *stack = (rvalue_stack *)ptr;
267
+ rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head);
268
+ }
269
+
270
+ static void rvalue_stack_free(void *ptr)
271
+ {
272
+ rvalue_stack *stack = (rvalue_stack *)ptr;
273
+ if (stack) {
274
+ ruby_xfree(stack->ptr);
275
+ ruby_xfree(stack);
276
+ }
277
+ }
278
+
279
+ static size_t rvalue_stack_memsize(const void *ptr)
280
+ {
281
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
282
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
283
+ }
284
+
285
+ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
286
+ "JSON::Ext::Parser/rvalue_stack",
287
+ {
288
+ .dmark = rvalue_stack_mark,
289
+ .dfree = rvalue_stack_free,
290
+ .dsize = rvalue_stack_memsize,
291
+ },
292
+ 0, 0,
293
+ RUBY_TYPED_FREE_IMMEDIATELY,
294
+ };
295
+
296
+ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
297
+ {
298
+ rvalue_stack *stack;
299
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
300
+ *stack_ref = stack;
301
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
302
+
303
+ stack->capa = old_stack->capa << 1;
304
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
305
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
306
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
307
+ return stack;
308
+ }
309
+
310
+ static void rvalue_stack_eagerly_release(VALUE handle)
311
+ {
312
+ rvalue_stack *stack;
313
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
314
+ RTYPEDDATA_DATA(handle) = NULL;
315
+ rvalue_stack_free(stack);
316
+ }
317
+
26
318
  /* unicode */
27
319
 
28
320
  static const signed char digit_values[256] = {
@@ -42,26 +334,28 @@ static const signed char digit_values[256] = {
42
334
  -1, -1, -1, -1, -1, -1, -1
43
335
  };
44
336
 
45
- static UTF32 unescape_unicode(const unsigned char *p)
337
+ static uint32_t unescape_unicode(const unsigned char *p)
46
338
  {
339
+ const uint32_t replacement_char = 0xFFFD;
340
+
47
341
  signed char b;
48
- UTF32 result = 0;
342
+ uint32_t result = 0;
49
343
  b = digit_values[p[0]];
50
- if (b < 0) return UNI_REPLACEMENT_CHAR;
344
+ if (b < 0) return replacement_char;
51
345
  result = (result << 4) | (unsigned char)b;
52
346
  b = digit_values[p[1]];
53
- if (b < 0) return UNI_REPLACEMENT_CHAR;
347
+ if (b < 0) return replacement_char;
54
348
  result = (result << 4) | (unsigned char)b;
55
349
  b = digit_values[p[2]];
56
- if (b < 0) return UNI_REPLACEMENT_CHAR;
350
+ if (b < 0) return replacement_char;
57
351
  result = (result << 4) | (unsigned char)b;
58
352
  b = digit_values[p[3]];
59
- if (b < 0) return UNI_REPLACEMENT_CHAR;
353
+ if (b < 0) return replacement_char;
60
354
  result = (result << 4) | (unsigned char)b;
61
355
  return result;
62
356
  }
63
357
 
64
- static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
358
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
65
359
  {
66
360
  int len = 1;
67
361
  if (ch <= 0x7F) {
@@ -87,14 +381,70 @@ static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
87
381
  return len;
88
382
  }
89
383
 
90
- static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
91
- static VALUE CNaN, CInfinity, CMinusInfinity;
384
+ typedef struct JSON_ParserStruct {
385
+ VALUE Vsource;
386
+ char *source;
387
+ long len;
388
+ char *memo;
389
+ VALUE create_id;
390
+ VALUE object_class;
391
+ VALUE array_class;
392
+ VALUE decimal_class;
393
+ VALUE match_string;
394
+ FBuffer fbuffer;
395
+ int in_array;
396
+ int max_nesting;
397
+ bool allow_nan;
398
+ bool allow_trailing_comma;
399
+ bool parsing_name;
400
+ bool symbolize_names;
401
+ bool freeze;
402
+ bool create_additions;
403
+ bool deprecated_create_additions;
404
+ rvalue_cache name_cache;
405
+ rvalue_stack *stack;
406
+ VALUE stack_handle;
407
+ } JSON_Parser;
408
+
409
+ #define GET_PARSER \
410
+ GET_PARSER_INIT; \
411
+ if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
412
+
413
+ #define GET_PARSER_INIT \
414
+ JSON_Parser *json; \
415
+ TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
416
+
417
+ #define MinusInfinity "-Infinity"
418
+ #define EVIL 0x666
419
+
420
+ static const rb_data_type_t JSON_Parser_type;
421
+ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
422
+ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
423
+ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
424
+ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result);
425
+ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
426
+
427
+
428
+ #define PARSE_ERROR_FRAGMENT_LEN 32
429
+ #ifdef RBIMPL_ATTR_NORETURN
430
+ RBIMPL_ATTR_NORETURN()
431
+ #endif
432
+ static void raise_parse_error(const char *format, const char *start)
433
+ {
434
+ char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
435
+
436
+ size_t len = strnlen(start, PARSE_ERROR_FRAGMENT_LEN);
437
+ const char *ptr = start;
438
+
439
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
440
+ MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
441
+ buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
442
+ ptr = buffer;
443
+ }
444
+
445
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
446
+ }
92
447
 
93
- static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
94
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
95
- i_object_class, i_array_class, i_decimal_class, i_key_p,
96
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
97
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
98
448
 
99
449
  %%{
100
450
  machine JSON_common;
@@ -131,27 +481,25 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
131
481
  write data;
132
482
 
133
483
  action parse_value {
134
- VALUE v = Qnil;
135
- char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting);
484
+ char *np = JSON_parse_value(json, fpc, pe, result, current_nesting);
136
485
  if (np == NULL) {
137
486
  fhold; fbreak;
138
487
  } else {
139
- if (NIL_P(json->object_class)) {
140
- OBJ_FREEZE(last_name);
141
- rb_hash_aset(*result, last_name, v);
142
- } else {
143
- rb_funcall(*result, i_aset, 2, last_name, v);
144
- }
145
488
  fexec np;
146
489
  }
147
490
  }
148
491
 
492
+ action allow_trailing_comma { json->allow_trailing_comma }
493
+
149
494
  action parse_name {
150
495
  char *np;
151
- json->parsing_name = 1;
152
- np = JSON_parse_string(json, fpc, pe, &last_name);
153
- json->parsing_name = 0;
154
- if (np == NULL) { fhold; fbreak; } else fexec np;
496
+ json->parsing_name = true;
497
+ np = JSON_parse_string(json, fpc, pe, result);
498
+ json->parsing_name = false;
499
+ if (np == NULL) { fhold; fbreak; } else {
500
+ PUSH(*result);
501
+ fexec np;
502
+ }
155
503
  }
156
504
 
157
505
  action exit { fhold; fbreak; }
@@ -161,37 +509,64 @@ static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
161
509
 
162
510
  main := (
163
511
  begin_object
164
- (pair (next_pair)*)? ignore*
512
+ (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore*
165
513
  end_object
166
514
  ) @exit;
167
515
  }%%
168
516
 
517
+ #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack)
518
+
169
519
  static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
170
520
  {
171
521
  int cs = EVIL;
172
- VALUE last_name = Qnil;
173
- VALUE object_class = json->object_class;
174
522
 
175
523
  if (json->max_nesting && current_nesting > json->max_nesting) {
176
524
  rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
177
525
  }
178
526
 
179
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
527
+ long stack_head = json->stack->head;
180
528
 
181
529
  %% write init;
182
530
  %% write exec;
183
531
 
184
532
  if (cs >= JSON_object_first_final) {
185
- if (json->create_additions) {
533
+ long count = json->stack->head - stack_head;
534
+
535
+ if (RB_UNLIKELY(json->object_class)) {
536
+ VALUE object = rb_class_new_instance(0, 0, json->object_class);
537
+ long index = 0;
538
+ VALUE *items = rvalue_stack_peek(json->stack, count);
539
+ while (index < count) {
540
+ VALUE name = items[index++];
541
+ VALUE value = items[index++];
542
+ rb_funcall(object, i_aset, 2, name, value);
543
+ }
544
+ *result = object;
545
+ } else {
546
+ VALUE hash;
547
+ #ifdef HAVE_RB_HASH_NEW_CAPA
548
+ hash = rb_hash_new_capa(count >> 1);
549
+ #else
550
+ hash = rb_hash_new();
551
+ #endif
552
+ rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash);
553
+ *result = hash;
554
+ }
555
+ rvalue_stack_pop(json->stack, count);
556
+
557
+ if (RB_UNLIKELY(json->create_additions)) {
186
558
  VALUE klassname;
187
- if (NIL_P(json->object_class)) {
188
- klassname = rb_hash_aref(*result, json->create_id);
559
+ if (json->object_class) {
560
+ klassname = rb_funcall(*result, i_aref, 1, json->create_id);
189
561
  } else {
190
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
562
+ klassname = rb_hash_aref(*result, json->create_id);
191
563
  }
192
564
  if (!NIL_P(klassname)) {
193
565
  VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
194
566
  if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
567
+ if (json->deprecated_create_additions) {
568
+ json_deprecated(deprecated_create_additions_warning);
569
+ }
195
570
  *result = rb_funcall(klass, i_json_create, 1, *result);
196
571
  }
197
572
  }
@@ -202,7 +577,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
202
577
  }
203
578
  }
204
579
 
205
-
206
580
  %%{
207
581
  machine JSON_value;
208
582
  include JSON_common;
@@ -222,19 +596,24 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
222
596
  if (json->allow_nan) {
223
597
  *result = CNaN;
224
598
  } else {
225
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 2);
599
+ raise_parse_error("unexpected token at '%s'", p - 2);
226
600
  }
227
601
  }
228
602
  action parse_infinity {
229
603
  if (json->allow_nan) {
230
604
  *result = CInfinity;
231
605
  } else {
232
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p - 7);
606
+ raise_parse_error("unexpected token at '%s'", p - 7);
233
607
  }
234
608
  }
235
609
  action parse_string {
236
610
  char *np = JSON_parse_string(json, fpc, pe, result);
237
- if (np == NULL) { fhold; fbreak; } else fexec np;
611
+ if (np == NULL) {
612
+ fhold;
613
+ fbreak;
614
+ } else {
615
+ fexec np;
616
+ }
238
617
  }
239
618
 
240
619
  action parse_number {
@@ -245,19 +624,21 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
245
624
  fexec p + 10;
246
625
  fhold; fbreak;
247
626
  } else {
248
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
627
+ raise_parse_error("unexpected token at '%s'", p);
249
628
  }
250
629
  }
251
- np = JSON_parse_float(json, fpc, pe, result);
252
- if (np != NULL) fexec np;
253
- np = JSON_parse_integer(json, fpc, pe, result);
254
- if (np != NULL) fexec np;
630
+ np = JSON_parse_number(json, fpc, pe, result);
631
+ if (np != NULL) {
632
+ fexec np;
633
+ }
255
634
  fhold; fbreak;
256
635
  }
257
636
 
258
637
  action parse_array {
259
638
  char *np;
639
+ json->in_array++;
260
640
  np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1);
641
+ json->in_array--;
261
642
  if (np == NULL) { fhold; fbreak; } else fexec np;
262
643
  }
263
644
 
@@ -275,10 +656,10 @@ main := ignore* (
275
656
  Vtrue @parse_true |
276
657
  VNaN @parse_nan |
277
658
  VInfinity @parse_infinity |
278
- begin_number >parse_number |
279
- begin_string >parse_string |
280
- begin_array >parse_array |
281
- begin_object >parse_object
659
+ begin_number @parse_number |
660
+ begin_string @parse_string |
661
+ begin_array @parse_array |
662
+ begin_object @parse_object
282
663
  ) ignore* %*exit;
283
664
  }%%
284
665
 
@@ -294,6 +675,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
294
675
  }
295
676
 
296
677
  if (cs >= JSON_value_first_final) {
678
+ PUSH(*result);
297
679
  return p;
298
680
  } else {
299
681
  return NULL;
@@ -310,24 +692,40 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
310
692
  main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
311
693
  }%%
312
694
 
313
- static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
695
+ #define MAX_FAST_INTEGER_SIZE 18
696
+ static inline VALUE fast_parse_integer(char *p, char *pe)
314
697
  {
315
- int cs = EVIL;
698
+ bool negative = false;
699
+ if (*p == '-') {
700
+ negative = true;
701
+ p++;
702
+ }
316
703
 
317
- %% write init;
318
- json->memo = p;
319
- %% write exec;
704
+ long long memo = 0;
705
+ while (p < pe) {
706
+ memo *= 10;
707
+ memo += *p - '0';
708
+ p++;
709
+ }
710
+
711
+ if (negative) {
712
+ memo = -memo;
713
+ }
714
+ return LL2NUM(memo);
715
+ }
320
716
 
321
- if (cs >= JSON_integer_first_final) {
717
+ static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result)
718
+ {
322
719
  long len = p - json->memo;
323
- fbuffer_clear(json->fbuffer);
324
- fbuffer_append(json->fbuffer, json->memo, len);
325
- fbuffer_append_char(json->fbuffer, '\0');
326
- *result = rb_cstr2inum(FBUFFER_PTR(json->fbuffer), 10);
720
+ if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
721
+ *result = fast_parse_integer(json->memo, p);
722
+ } else {
723
+ fbuffer_clear(&json->fbuffer);
724
+ fbuffer_append(&json->fbuffer, json->memo, len);
725
+ fbuffer_append_char(&json->fbuffer, '\0');
726
+ *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10);
727
+ }
327
728
  return p + 1;
328
- } else {
329
- return NULL;
330
- }
331
729
  }
332
730
 
333
731
  %%{
@@ -337,60 +735,68 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
337
735
  write data;
338
736
 
339
737
  action exit { fhold; fbreak; }
738
+ action isFloat { is_float = true; }
340
739
 
341
740
  main := '-'? (
342
- (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
343
- | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
344
- ) (^[0-9Ee.\-]? @exit );
741
+ (('0' | [1-9][0-9]*)
742
+ ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) |
743
+ ([Ee] [+\-]?[0-9]+)) > isFloat)?
744
+ ) (^[0-9Ee.\-]? @exit ));
345
745
  }%%
346
746
 
347
- static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
747
+ static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result)
348
748
  {
349
749
  int cs = EVIL;
750
+ bool is_float = false;
350
751
 
351
752
  %% write init;
352
753
  json->memo = p;
353
754
  %% write exec;
354
755
 
355
756
  if (cs >= JSON_float_first_final) {
757
+ if (!is_float) {
758
+ return JSON_decode_integer(json, p, result);
759
+ }
356
760
  VALUE mod = Qnil;
357
761
  ID method_id = 0;
358
- if (rb_respond_to(json->decimal_class, i_try_convert)) {
359
- mod = json->decimal_class;
360
- method_id = i_try_convert;
361
- } else if (rb_respond_to(json->decimal_class, i_new)) {
362
- mod = json->decimal_class;
363
- method_id = i_new;
364
- } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
365
- VALUE name = rb_class_name(json->decimal_class);
366
- const char *name_cstr = RSTRING_PTR(name);
367
- const char *last_colon = strrchr(name_cstr, ':');
368
- if (last_colon) {
369
- const char *mod_path_end = last_colon - 1;
370
- VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
371
- mod = rb_path_to_class(mod_path);
372
-
373
- const char *method_name_beg = last_colon + 1;
374
- long before_len = method_name_beg - name_cstr;
375
- long len = RSTRING_LEN(name) - before_len;
376
- VALUE method_name = rb_str_substr(name, before_len, len);
377
- method_id = SYM2ID(rb_str_intern(method_name));
378
- } else {
379
- mod = rb_mKernel;
380
- method_id = SYM2ID(rb_str_intern(name));
762
+ if (json->decimal_class) {
763
+ if (rb_respond_to(json->decimal_class, i_try_convert)) {
764
+ mod = json->decimal_class;
765
+ method_id = i_try_convert;
766
+ } else if (rb_respond_to(json->decimal_class, i_new)) {
767
+ mod = json->decimal_class;
768
+ method_id = i_new;
769
+ } else if (RB_TYPE_P(json->decimal_class, T_CLASS)) {
770
+ VALUE name = rb_class_name(json->decimal_class);
771
+ const char *name_cstr = RSTRING_PTR(name);
772
+ const char *last_colon = strrchr(name_cstr, ':');
773
+ if (last_colon) {
774
+ const char *mod_path_end = last_colon - 1;
775
+ VALUE mod_path = rb_str_substr(name, 0, mod_path_end - name_cstr);
776
+ mod = rb_path_to_class(mod_path);
777
+
778
+ const char *method_name_beg = last_colon + 1;
779
+ long before_len = method_name_beg - name_cstr;
780
+ long len = RSTRING_LEN(name) - before_len;
781
+ VALUE method_name = rb_str_substr(name, before_len, len);
782
+ method_id = SYM2ID(rb_str_intern(method_name));
783
+ } else {
784
+ mod = rb_mKernel;
785
+ method_id = SYM2ID(rb_str_intern(name));
786
+ }
381
787
  }
382
788
  }
383
789
 
384
790
  long len = p - json->memo;
385
- fbuffer_clear(json->fbuffer);
386
- fbuffer_append(json->fbuffer, json->memo, len);
387
- fbuffer_append_char(json->fbuffer, '\0');
791
+ fbuffer_clear(&json->fbuffer);
792
+ fbuffer_append(&json->fbuffer, json->memo, len);
793
+ fbuffer_append_char(&json->fbuffer, '\0');
388
794
 
389
795
  if (method_id) {
390
- VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
796
+ VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer));
391
797
  *result = rb_funcallv(mod, method_id, 1, &text);
392
798
  } else {
393
- *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
799
+ *result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1));
394
800
  }
395
801
 
396
802
  return p + 1;
@@ -412,69 +818,133 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
412
818
  if (np == NULL) {
413
819
  fhold; fbreak;
414
820
  } else {
415
- if (NIL_P(json->array_class)) {
416
- rb_ary_push(*result, v);
417
- } else {
418
- rb_funcall(*result, i_leftshift, 1, v);
419
- }
420
821
  fexec np;
421
822
  }
422
823
  }
423
824
 
825
+ action allow_trailing_comma { json->allow_trailing_comma }
826
+
424
827
  action exit { fhold; fbreak; }
425
828
 
426
829
  next_element = value_separator ignore* begin_value >parse_value;
427
830
 
428
831
  main := begin_array ignore*
429
832
  ((begin_value >parse_value ignore*)
430
- (ignore* next_element ignore*)*)?
833
+ (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)?
431
834
  end_array @exit;
432
835
  }%%
433
836
 
434
837
  static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
435
838
  {
436
839
  int cs = EVIL;
437
- VALUE array_class = json->array_class;
438
840
 
439
841
  if (json->max_nesting && current_nesting > json->max_nesting) {
440
842
  rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
441
843
  }
442
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
844
+ long stack_head = json->stack->head;
443
845
 
444
846
  %% write init;
445
847
  %% write exec;
446
848
 
447
849
  if(cs >= JSON_array_first_final) {
850
+ long count = json->stack->head - stack_head;
851
+
852
+ if (RB_UNLIKELY(json->array_class)) {
853
+ VALUE array = rb_class_new_instance(0, 0, json->array_class);
854
+ VALUE *items = rvalue_stack_peek(json->stack, count);
855
+ long index;
856
+ for (index = 0; index < count; index++) {
857
+ rb_funcall(array, i_leftshift, 1, items[index]);
858
+ }
859
+ *result = array;
860
+ } else {
861
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count));
862
+ *result = array;
863
+ }
864
+ rvalue_stack_pop(json->stack, count);
865
+
448
866
  return p + 1;
449
867
  } else {
450
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
868
+ raise_parse_error("unexpected token at '%s'", p);
451
869
  return NULL;
452
870
  }
453
871
  }
454
872
 
455
- static const size_t MAX_STACK_BUFFER_SIZE = 128;
456
- static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
873
+ static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
874
+ {
875
+ if (symbolize) {
876
+ intern = true;
877
+ }
878
+ VALUE result;
879
+ # ifdef HAVE_RB_ENC_INTERNED_STR
880
+ if (intern) {
881
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
882
+ } else {
883
+ result = rb_utf8_str_new(start, (long)(end - start));
884
+ }
885
+ # else
886
+ result = rb_utf8_str_new(start, (long)(end - start));
887
+ if (intern) {
888
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
889
+ }
890
+ # endif
891
+
892
+ if (symbolize) {
893
+ result = rb_str_intern(result);
894
+ }
895
+
896
+ return result;
897
+ }
898
+
899
+ static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
900
+ {
901
+ size_t bufferSize = stringEnd - string;
902
+
903
+ if (is_name && json->in_array) {
904
+ VALUE cached_key;
905
+ if (RB_UNLIKELY(symbolize)) {
906
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
907
+ } else {
908
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
909
+ }
910
+
911
+ if (RB_LIKELY(cached_key)) {
912
+ return cached_key;
913
+ }
914
+ }
915
+
916
+ return build_string(string, stringEnd, intern, symbolize);
917
+ }
918
+
919
+ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
457
920
  {
458
- VALUE result = Qnil;
459
921
  size_t bufferSize = stringEnd - string;
460
922
  char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
461
923
  int unescape_len;
462
924
  char buf[4];
463
925
 
464
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
465
- # ifdef HAVE_RB_ENC_INTERNED_STR
466
- bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
467
- # else
468
- bufferStart = buffer = ALLOC_N(char, bufferSize);
469
- # endif
470
- } else {
471
- # ifdef HAVE_RB_ENC_INTERNED_STR
472
- bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1);
473
- # else
474
- bufferStart = buffer = ALLOCA_N(char, bufferSize);
475
- # endif
926
+ if (is_name && json->in_array) {
927
+ VALUE cached_key;
928
+ if (RB_UNLIKELY(symbolize)) {
929
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
930
+ } else {
931
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
932
+ }
933
+
934
+ if (RB_LIKELY(cached_key)) {
935
+ return cached_key;
936
+ }
937
+ }
938
+
939
+ pe = memchr(p, '\\', bufferSize);
940
+ if (RB_UNLIKELY(pe == NULL)) {
941
+ return build_string(string, stringEnd, intern, symbolize);
476
942
  }
477
943
 
944
+ VALUE result = rb_str_buf_new(bufferSize);
945
+ rb_enc_associate_index(result, utf8_encindex);
946
+ buffer = bufferStart = RSTRING_PTR(result);
947
+
478
948
  while (pe < stringEnd) {
479
949
  if (*pe == '\\') {
480
950
  unescape = (char *) "?";
@@ -507,29 +977,27 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
507
977
  break;
508
978
  case 'u':
509
979
  if (pe > stringEnd - 4) {
510
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
511
- ruby_xfree(bufferStart);
512
- }
513
- rb_enc_raise(
514
- EXC_ENCODING eParserError,
515
- "incomplete unicode character escape sequence at '%s'", p
516
- );
980
+ raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
517
981
  } else {
518
- UTF32 ch = unescape_unicode((unsigned char *) ++pe);
982
+ uint32_t ch = unescape_unicode((unsigned char *) ++pe);
519
983
  pe += 3;
520
- if (UNI_SUR_HIGH_START == (ch & 0xFC00)) {
984
+ /* To handle values above U+FFFF, we take a sequence of
985
+ * \uXXXX escapes in the U+D800..U+DBFF then
986
+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
987
+ * to make a 20-bit number, then add 0x10000 to get the
988
+ * final codepoint.
989
+ *
990
+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
991
+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
992
+ * Area".
993
+ */
994
+ if ((ch & 0xFC00) == 0xD800) {
521
995
  pe++;
522
996
  if (pe > stringEnd - 6) {
523
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
524
- ruby_xfree(bufferStart);
525
- }
526
- rb_enc_raise(
527
- EXC_ENCODING eParserError,
528
- "incomplete surrogate pair at '%s'", p
529
- );
997
+ raise_parse_error("incomplete surrogate pair at '%s'", p);
530
998
  }
531
999
  if (pe[0] == '\\' && pe[1] == 'u') {
532
- UTF32 sur = unescape_unicode((unsigned char *) pe + 2);
1000
+ uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
533
1001
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
534
1002
  | (sur & 0x3FF));
535
1003
  pe += 5;
@@ -558,41 +1026,12 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
558
1026
  MEMCPY(buffer, p, char, pe - p);
559
1027
  buffer += pe - p;
560
1028
  }
561
-
562
- # ifdef HAVE_RB_ENC_INTERNED_STR
563
- if (intern) {
564
- result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
565
- } else {
566
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
567
- }
568
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
569
- ruby_xfree(bufferStart);
570
- }
571
- # else
572
- result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
573
-
574
- if (bufferSize > MAX_STACK_BUFFER_SIZE) {
575
- ruby_xfree(bufferStart);
576
- }
577
-
578
- if (intern) {
579
- # if STR_UMINUS_DEDUPE_FROZEN
580
- // Starting from MRI 2.8 it is preferable to freeze the string
581
- // before deduplication so that it can be interned directly
582
- // otherwise it would be duplicated first which is wasteful.
583
- result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
584
- # elif STR_UMINUS_DEDUPE
585
- // MRI 2.5 and older do not deduplicate strings that are already
586
- // frozen.
587
- result = rb_funcall(result, i_uminus, 0);
588
- # else
589
- result = rb_str_freeze(result);
590
- # endif
591
- }
592
- # endif
1029
+ rb_str_set_len(result, buffer - bufferStart);
593
1030
 
594
1031
  if (symbolize) {
595
- result = rb_str_intern(result);
1032
+ result = rb_str_intern(result);
1033
+ } else if (intern) {
1034
+ result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
596
1035
  }
597
1036
 
598
1037
  return result;
@@ -604,19 +1043,31 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
604
1043
 
605
1044
  write data;
606
1045
 
607
- action parse_string {
608
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
609
- if (NIL_P(*result)) {
610
- fhold;
611
- fbreak;
612
- } else {
613
- fexec p + 1;
614
- }
1046
+ action parse_complex_string {
1047
+ *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1048
+ fexec p + 1;
1049
+ fhold;
1050
+ fbreak;
615
1051
  }
616
1052
 
617
- action exit { fhold; fbreak; }
1053
+ action parse_simple_string {
1054
+ *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
1055
+ fexec p + 1;
1056
+ fhold;
1057
+ fbreak;
1058
+ }
618
1059
 
619
- main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
1060
+ double_quote = '"';
1061
+ escape = '\\';
1062
+ control = 0..0x1f;
1063
+ simple = any - escape - double_quote - control;
1064
+
1065
+ main := double_quote (
1066
+ (simple*)(
1067
+ (double_quote) @parse_simple_string |
1068
+ ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string
1069
+ )
1070
+ );
620
1071
  }%%
621
1072
 
622
1073
  static int
@@ -672,18 +1123,80 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
672
1123
 
673
1124
  static VALUE convert_encoding(VALUE source)
674
1125
  {
675
- #ifdef HAVE_RUBY_ENCODING_H
676
- rb_encoding *enc = rb_enc_get(source);
677
- if (enc == rb_ascii8bit_encoding()) {
678
- if (OBJ_FROZEN(source)) {
679
- source = rb_str_dup(source);
680
- }
681
- FORCE_UTF8(source);
682
- } else {
683
- source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding());
684
- }
685
- #endif
1126
+ int encindex = RB_ENCODING_GET(source);
1127
+
1128
+ if (RB_LIKELY(encindex == utf8_encindex)) {
686
1129
  return source;
1130
+ }
1131
+
1132
+ if (encindex == binary_encindex) {
1133
+ // For historical reason, we silently reinterpret binary strings as UTF-8
1134
+ return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
1135
+ }
1136
+
1137
+ return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
1138
+ }
1139
+
1140
+ static int configure_parser_i(VALUE key, VALUE val, VALUE data)
1141
+ {
1142
+ JSON_Parser *json = (JSON_Parser *)data;
1143
+
1144
+ if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1145
+ else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
1146
+ else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
1147
+ else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
1148
+ else if (key == sym_freeze) { json->freeze = RTEST(val); }
1149
+ else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
1150
+ else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
1151
+ else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
1152
+ else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
1153
+ else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
1154
+ else if (key == sym_create_additions) {
1155
+ if (NIL_P(val)) {
1156
+ json->create_additions = true;
1157
+ json->deprecated_create_additions = true;
1158
+ } else {
1159
+ json->create_additions = RTEST(val);
1160
+ json->deprecated_create_additions = false;
1161
+ }
1162
+ }
1163
+
1164
+ return ST_CONTINUE;
1165
+ }
1166
+
1167
+ static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
1168
+ {
1169
+ if (json->Vsource) {
1170
+ rb_raise(rb_eTypeError, "already initialized instance");
1171
+ }
1172
+
1173
+ json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1174
+ json->max_nesting = 100;
1175
+
1176
+ if (!NIL_P(opts)) {
1177
+ Check_Type(opts, T_HASH);
1178
+ if (RHASH_SIZE(opts) > 0) {
1179
+ // We assume in most cases few keys are set so it's faster to go over
1180
+ // the provided keys than to check all possible keys.
1181
+ rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
1182
+
1183
+ if (json->symbolize_names && json->create_additions) {
1184
+ rb_raise(rb_eArgError,
1185
+ "options :symbolize_names and :create_additions cannot be "
1186
+ " used in conjunction");
1187
+ }
1188
+
1189
+ if (json->create_additions && !json->create_id) {
1190
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
1191
+ }
1192
+ }
1193
+
1194
+ }
1195
+ source = convert_encoding(StringValue(source));
1196
+ StringValue(source);
1197
+ json->len = RSTRING_LEN(source);
1198
+ json->source = RSTRING_PTR(source);
1199
+ json->Vsource = source;
687
1200
  }
688
1201
 
689
1202
  /*
@@ -708,105 +1221,23 @@ static VALUE convert_encoding(VALUE source)
708
1221
  * * *create_additions*: If set to false, the Parser doesn't create
709
1222
  * additions even if a matching class and create_id was found. This option
710
1223
  * defaults to false.
711
- * * *object_class*: Defaults to Hash
712
- * * *array_class*: Defaults to Array
1224
+ * * *object_class*: Defaults to Hash. If another type is provided, it will be used
1225
+ * instead of Hash to represent JSON objects. The type must respond to
1226
+ * +new+ without arguments, and return an object that respond to +[]=+.
1227
+ * * *array_class*: Defaults to Array If another type is provided, it will be used
1228
+ * instead of Hash to represent JSON arrays. The type must respond to
1229
+ * +new+ without arguments, and return an object that respond to +<<+.
1230
+ * * *decimal_class*: Specifies which class to use instead of the default
1231
+ * (Float) when parsing decimal numbers. This class must accept a single
1232
+ * string argument in its constructor.
713
1233
  */
714
1234
  static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
715
1235
  {
716
- VALUE source, opts;
717
1236
  GET_PARSER_INIT;
718
1237
 
719
- if (json->Vsource) {
720
- rb_raise(rb_eTypeError, "already initialized instance");
721
- }
722
- rb_scan_args(argc, argv, "1:", &source, &opts);
723
- if (!NIL_P(opts)) {
724
- VALUE tmp = ID2SYM(i_max_nesting);
725
- if (option_given_p(opts, tmp)) {
726
- VALUE max_nesting = rb_hash_aref(opts, tmp);
727
- if (RTEST(max_nesting)) {
728
- Check_Type(max_nesting, T_FIXNUM);
729
- json->max_nesting = FIX2INT(max_nesting);
730
- } else {
731
- json->max_nesting = 0;
732
- }
733
- } else {
734
- json->max_nesting = 100;
735
- }
736
- tmp = ID2SYM(i_allow_nan);
737
- if (option_given_p(opts, tmp)) {
738
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
739
- } else {
740
- json->allow_nan = 0;
741
- }
742
- tmp = ID2SYM(i_symbolize_names);
743
- if (option_given_p(opts, tmp)) {
744
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
745
- } else {
746
- json->symbolize_names = 0;
747
- }
748
- tmp = ID2SYM(i_freeze);
749
- if (option_given_p(opts, tmp)) {
750
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
751
- } else {
752
- json->freeze = 0;
753
- }
754
- tmp = ID2SYM(i_create_additions);
755
- if (option_given_p(opts, tmp)) {
756
- json->create_additions = RTEST(rb_hash_aref(opts, tmp));
757
- } else {
758
- json->create_additions = 0;
759
- }
760
- if (json->symbolize_names && json->create_additions) {
761
- rb_raise(rb_eArgError,
762
- "options :symbolize_names and :create_additions cannot be "
763
- " used in conjunction");
764
- }
765
- tmp = ID2SYM(i_create_id);
766
- if (option_given_p(opts, tmp)) {
767
- json->create_id = rb_hash_aref(opts, tmp);
768
- } else {
769
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
770
- }
771
- tmp = ID2SYM(i_object_class);
772
- if (option_given_p(opts, tmp)) {
773
- json->object_class = rb_hash_aref(opts, tmp);
774
- } else {
775
- json->object_class = Qnil;
776
- }
777
- tmp = ID2SYM(i_array_class);
778
- if (option_given_p(opts, tmp)) {
779
- json->array_class = rb_hash_aref(opts, tmp);
780
- } else {
781
- json->array_class = Qnil;
782
- }
783
- tmp = ID2SYM(i_decimal_class);
784
- if (option_given_p(opts, tmp)) {
785
- json->decimal_class = rb_hash_aref(opts, tmp);
786
- } else {
787
- json->decimal_class = Qnil;
788
- }
789
- tmp = ID2SYM(i_match_string);
790
- if (option_given_p(opts, tmp)) {
791
- VALUE match_string = rb_hash_aref(opts, tmp);
792
- json->match_string = RTEST(match_string) ? match_string : Qnil;
793
- } else {
794
- json->match_string = Qnil;
795
- }
796
- } else {
797
- json->max_nesting = 100;
798
- json->allow_nan = 0;
799
- json->create_additions = 0;
800
- json->create_id = Qnil;
801
- json->object_class = Qnil;
802
- json->array_class = Qnil;
803
- json->decimal_class = Qnil;
804
- }
805
- source = convert_encoding(StringValue(source));
806
- StringValue(source);
807
- json->len = RSTRING_LEN(source);
808
- json->source = RSTRING_PTR(source);;
809
- json->Vsource = source;
1238
+ rb_check_arity(argc, 1, 2);
1239
+
1240
+ parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil);
810
1241
  return self;
811
1242
  }
812
1243
 
@@ -836,64 +1267,117 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
836
1267
  */
837
1268
  static VALUE cParser_parse(VALUE self)
838
1269
  {
839
- char *p, *pe;
840
- int cs = EVIL;
841
- VALUE result = Qnil;
842
- GET_PARSER;
1270
+ char *p, *pe;
1271
+ int cs = EVIL;
1272
+ VALUE result = Qnil;
1273
+ GET_PARSER;
843
1274
 
844
- %% write init;
845
- p = json->source;
846
- pe = p + json->len;
847
- %% write exec;
1275
+ char stack_buffer[FBUFFER_STACK_SIZE];
1276
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
848
1277
 
849
- if (cs >= JSON_first_final && p == pe) {
850
- return result;
851
- } else {
852
- rb_enc_raise(EXC_ENCODING eParserError, "unexpected token at '%s'", p);
853
- return Qnil;
854
- }
1278
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1279
+ rvalue_stack stack = {
1280
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1281
+ .ptr = rvalue_stack_buffer,
1282
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1283
+ };
1284
+ json->stack = &stack;
1285
+
1286
+ %% write init;
1287
+ p = json->source;
1288
+ pe = p + json->len;
1289
+ %% write exec;
1290
+
1291
+ if (json->stack_handle) {
1292
+ rvalue_stack_eagerly_release(json->stack_handle);
1293
+ }
1294
+
1295
+ if (cs >= JSON_first_final && p == pe) {
1296
+ return result;
1297
+ } else {
1298
+ raise_parse_error("unexpected token at '%s'", p);
1299
+ return Qnil;
1300
+ }
1301
+ }
1302
+
1303
+ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
1304
+ {
1305
+ char *p, *pe;
1306
+ int cs = EVIL;
1307
+ VALUE result = Qnil;
1308
+
1309
+ JSON_Parser _parser = {0};
1310
+ JSON_Parser *json = &_parser;
1311
+ parser_init(json, source, opts);
1312
+
1313
+ char stack_buffer[FBUFFER_STACK_SIZE];
1314
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
1315
+
1316
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1317
+ rvalue_stack stack = {
1318
+ .type = RVALUE_STACK_STACK_ALLOCATED,
1319
+ .ptr = rvalue_stack_buffer,
1320
+ .capa = RVALUE_STACK_INITIAL_CAPA,
1321
+ };
1322
+ json->stack = &stack;
1323
+
1324
+ %% write init;
1325
+ p = json->source;
1326
+ pe = p + json->len;
1327
+ %% write exec;
1328
+
1329
+ if (json->stack_handle) {
1330
+ rvalue_stack_eagerly_release(json->stack_handle);
1331
+ }
1332
+
1333
+ if (cs >= JSON_first_final && p == pe) {
1334
+ return result;
1335
+ } else {
1336
+ raise_parse_error("unexpected token at '%s'", p);
1337
+ return Qnil;
1338
+ }
855
1339
  }
856
1340
 
857
1341
  static void JSON_mark(void *ptr)
858
1342
  {
859
1343
  JSON_Parser *json = ptr;
860
- rb_gc_mark_maybe(json->Vsource);
861
- rb_gc_mark_maybe(json->create_id);
862
- rb_gc_mark_maybe(json->object_class);
863
- rb_gc_mark_maybe(json->array_class);
864
- rb_gc_mark_maybe(json->decimal_class);
865
- rb_gc_mark_maybe(json->match_string);
1344
+ rb_gc_mark(json->Vsource);
1345
+ rb_gc_mark(json->create_id);
1346
+ rb_gc_mark(json->object_class);
1347
+ rb_gc_mark(json->array_class);
1348
+ rb_gc_mark(json->decimal_class);
1349
+ rb_gc_mark(json->match_string);
1350
+ rb_gc_mark(json->stack_handle);
1351
+
1352
+ const VALUE *name_cache_entries = &json->name_cache.entries[0];
1353
+ rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length);
866
1354
  }
867
1355
 
868
1356
  static void JSON_free(void *ptr)
869
1357
  {
870
1358
  JSON_Parser *json = ptr;
871
- fbuffer_free(json->fbuffer);
1359
+ fbuffer_free(&json->fbuffer);
872
1360
  ruby_xfree(json);
873
1361
  }
874
1362
 
875
1363
  static size_t JSON_memsize(const void *ptr)
876
1364
  {
877
1365
  const JSON_Parser *json = ptr;
878
- return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
1366
+ return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer);
879
1367
  }
880
1368
 
881
- #ifdef NEW_TYPEDDATA_WRAPPER
882
1369
  static const rb_data_type_t JSON_Parser_type = {
883
1370
  "JSON/Parser",
884
1371
  {JSON_mark, JSON_free, JSON_memsize,},
885
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
886
1372
  0, 0,
887
1373
  RUBY_TYPED_FREE_IMMEDIATELY,
888
- #endif
889
1374
  };
890
- #endif
891
1375
 
892
1376
  static VALUE cJSON_parser_s_allocate(VALUE klass)
893
1377
  {
894
1378
  JSON_Parser *json;
895
1379
  VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
896
- json->fbuffer = fbuffer_alloc(0);
1380
+ fbuffer_stack_init(&json->fbuffer, 0, NULL, 0);
897
1381
  return obj;
898
1382
  }
899
1383
 
@@ -920,15 +1404,15 @@ void Init_parser(void)
920
1404
  mJSON = rb_define_module("JSON");
921
1405
  mExt = rb_define_module_under(mJSON, "Ext");
922
1406
  cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
923
- eParserError = rb_path2class("JSON::ParserError");
924
1407
  eNestingError = rb_path2class("JSON::NestingError");
925
- rb_gc_register_mark_object(eParserError);
926
1408
  rb_gc_register_mark_object(eNestingError);
927
1409
  rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
928
1410
  rb_define_method(cParser, "initialize", cParser_initialize, -1);
929
1411
  rb_define_method(cParser, "parse", cParser_parse, 0);
930
1412
  rb_define_method(cParser, "source", cParser_source, 0);
931
1413
 
1414
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
1415
+
932
1416
  CNaN = rb_const_get(mJSON, rb_intern("NaN"));
933
1417
  rb_gc_register_mark_object(CNaN);
934
1418
 
@@ -938,28 +1422,38 @@ void Init_parser(void)
938
1422
  CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
939
1423
  rb_gc_register_mark_object(CMinusInfinity);
940
1424
 
1425
+ rb_global_variable(&Encoding_UTF_8);
1426
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1427
+
1428
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1429
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1430
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
1431
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
1432
+ sym_freeze = ID2SYM(rb_intern("freeze"));
1433
+ sym_create_additions = ID2SYM(rb_intern("create_additions"));
1434
+ sym_create_id = ID2SYM(rb_intern("create_id"));
1435
+ sym_object_class = ID2SYM(rb_intern("object_class"));
1436
+ sym_array_class = ID2SYM(rb_intern("array_class"));
1437
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1438
+ sym_match_string = ID2SYM(rb_intern("match_string"));
1439
+
1440
+ i_create_id = rb_intern("create_id");
941
1441
  i_json_creatable_p = rb_intern("json_creatable?");
942
1442
  i_json_create = rb_intern("json_create");
943
- i_create_id = rb_intern("create_id");
944
- i_create_additions = rb_intern("create_additions");
945
1443
  i_chr = rb_intern("chr");
946
- i_max_nesting = rb_intern("max_nesting");
947
- i_allow_nan = rb_intern("allow_nan");
948
- i_symbolize_names = rb_intern("symbolize_names");
949
- i_object_class = rb_intern("object_class");
950
- i_array_class = rb_intern("array_class");
951
- i_decimal_class = rb_intern("decimal_class");
952
1444
  i_match = rb_intern("match");
953
- i_match_string = rb_intern("match_string");
954
- i_key_p = rb_intern("key?");
955
1445
  i_deep_const_get = rb_intern("deep_const_get");
956
1446
  i_aset = rb_intern("[]=");
957
1447
  i_aref = rb_intern("[]");
958
1448
  i_leftshift = rb_intern("<<");
959
1449
  i_new = rb_intern("new");
960
1450
  i_try_convert = rb_intern("try_convert");
961
- i_freeze = rb_intern("freeze");
962
1451
  i_uminus = rb_intern("-@");
1452
+ i_encode = rb_intern("encode");
1453
+
1454
+ binary_encindex = rb_ascii8bit_encindex();
1455
+ utf8_encindex = rb_utf8_encindex();
1456
+ enc_utf8 = rb_utf8_encoding();
963
1457
  }
964
1458
 
965
1459
  /*