json 2.7.6 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +15 -0
- data/README.md +8 -77
- data/ext/json/ext/fbuffer/fbuffer.h +85 -51
- data/ext/json/ext/generator/generator.c +327 -204
- data/ext/json/ext/parser/extconf.rb +5 -27
- data/ext/json/ext/parser/parser.c +1536 -474
- data/ext/json/ext/parser/parser.rl +717 -243
- data/json.gemspec +4 -1
- data/lib/json/add/bigdecimal.rb +1 -1
- data/lib/json/common.rb +200 -59
- data/lib/json/ext/generator/state.rb +1 -31
- data/lib/json/ext.rb +2 -4
- data/lib/json/{pure → truffle_ruby}/generator.rb +150 -128
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +15 -20
- metadata +4 -8
- data/ext/json/ext/generator/generator.h +0 -118
- data/ext/json/ext/parser/parser.h +0 -60
- data/lib/json/pure/parser.rb +0 -331
- data/lib/json/pure.rb +0 -16
@@ -1,5 +1,319 @@
|
|
1
|
+
#include "ruby.h"
|
1
2
|
#include "../fbuffer/fbuffer.h"
|
2
|
-
|
3
|
+
|
4
|
+
static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
|
5
|
+
static VALUE CNaN, CInfinity, CMinusInfinity;
|
6
|
+
|
7
|
+
static ID i_json_creatable_p, i_json_create, i_create_id,
|
8
|
+
i_chr, i_deep_const_get, i_match, i_aset, i_aref,
|
9
|
+
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
10
|
+
|
11
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
12
|
+
sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
|
13
|
+
sym_decimal_class, sym_match_string;
|
14
|
+
|
15
|
+
static int binary_encindex;
|
16
|
+
static int utf8_encindex;
|
17
|
+
|
18
|
+
#ifdef HAVE_RB_CATEGORY_WARN
|
19
|
+
# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message)
|
20
|
+
#else
|
21
|
+
# define json_deprecated(message) rb_warn(message)
|
22
|
+
#endif
|
23
|
+
|
24
|
+
static const char deprecated_create_additions_warning[] =
|
25
|
+
"JSON.load implicit support for `create_additions: true` is deprecated "
|
26
|
+
"and will be removed in 3.0, use JSON.unsafe_load or explicitly "
|
27
|
+
"pass `create_additions: true`";
|
28
|
+
|
29
|
+
#ifndef HAVE_RB_GC_MARK_LOCATIONS
|
30
|
+
// For TruffleRuby
|
31
|
+
void rb_gc_mark_locations(const VALUE *start, const VALUE *end)
|
32
|
+
{
|
33
|
+
VALUE *value = start;
|
34
|
+
|
35
|
+
while (value < end) {
|
36
|
+
rb_gc_mark(*value);
|
37
|
+
value++;
|
38
|
+
}
|
39
|
+
}
|
40
|
+
#endif
|
41
|
+
|
42
|
+
#ifndef HAVE_RB_HASH_BULK_INSERT
|
43
|
+
// For TruffleRuby
|
44
|
+
void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
|
45
|
+
{
|
46
|
+
long index = 0;
|
47
|
+
while (index < count) {
|
48
|
+
VALUE name = pairs[index++];
|
49
|
+
VALUE value = pairs[index++];
|
50
|
+
rb_hash_aset(hash, name, value);
|
51
|
+
}
|
52
|
+
RB_GC_GUARD(hash);
|
53
|
+
}
|
54
|
+
#endif
|
55
|
+
|
56
|
+
/* name cache */
|
57
|
+
|
58
|
+
#include <string.h>
|
59
|
+
#include <ctype.h>
|
60
|
+
|
61
|
+
// Object names are likely to be repeated, and are frozen.
|
62
|
+
// As such we can re-use them if we keep a cache of the ones we've seen so far,
|
63
|
+
// and save much more expensive lookups into the global fstring table.
|
64
|
+
// This cache implementation is deliberately simple, as we're optimizing for compactness,
|
65
|
+
// to be able to fit safely on the stack.
|
66
|
+
// As such, binary search into a sorted array gives a good tradeoff between compactness and
|
67
|
+
// performance.
|
68
|
+
#define JSON_RVALUE_CACHE_CAPA 63
|
69
|
+
typedef struct rvalue_cache_struct {
|
70
|
+
int length;
|
71
|
+
VALUE entries[JSON_RVALUE_CACHE_CAPA];
|
72
|
+
} rvalue_cache;
|
73
|
+
|
74
|
+
static rb_encoding *enc_utf8;
|
75
|
+
|
76
|
+
#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
|
77
|
+
|
78
|
+
static inline VALUE build_interned_string(const char *str, const long length)
|
79
|
+
{
|
80
|
+
# ifdef HAVE_RB_ENC_INTERNED_STR
|
81
|
+
return rb_enc_interned_str(str, length, enc_utf8);
|
82
|
+
# else
|
83
|
+
VALUE rstring = rb_utf8_str_new(str, length);
|
84
|
+
return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
|
85
|
+
# endif
|
86
|
+
}
|
87
|
+
|
88
|
+
static inline VALUE build_symbol(const char *str, const long length)
|
89
|
+
{
|
90
|
+
return rb_str_intern(build_interned_string(str, length));
|
91
|
+
}
|
92
|
+
|
93
|
+
static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
|
94
|
+
{
|
95
|
+
MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
|
96
|
+
cache->length++;
|
97
|
+
cache->entries[index] = rstring;
|
98
|
+
}
|
99
|
+
|
100
|
+
static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
101
|
+
{
|
102
|
+
long rstring_length = RSTRING_LEN(rstring);
|
103
|
+
if (length == rstring_length) {
|
104
|
+
return memcmp(str, RSTRING_PTR(rstring), length);
|
105
|
+
} else {
|
106
|
+
return (int)(length - rstring_length);
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
111
|
+
{
|
112
|
+
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
113
|
+
// Common names aren't likely to be very long. So we just don't
|
114
|
+
// cache names above an arbitrary threshold.
|
115
|
+
return Qfalse;
|
116
|
+
}
|
117
|
+
|
118
|
+
if (RB_UNLIKELY(!isalpha(str[0]))) {
|
119
|
+
// Simple heuristic, if the first character isn't a letter,
|
120
|
+
// we're much less likely to see this string again.
|
121
|
+
// We mostly want to cache strings that are likely to be repeated.
|
122
|
+
return Qfalse;
|
123
|
+
}
|
124
|
+
|
125
|
+
int low = 0;
|
126
|
+
int high = cache->length - 1;
|
127
|
+
int mid = 0;
|
128
|
+
int last_cmp = 0;
|
129
|
+
|
130
|
+
while (low <= high) {
|
131
|
+
mid = (high + low) >> 1;
|
132
|
+
VALUE entry = cache->entries[mid];
|
133
|
+
last_cmp = rstring_cache_cmp(str, length, entry);
|
134
|
+
|
135
|
+
if (last_cmp == 0) {
|
136
|
+
return entry;
|
137
|
+
} else if (last_cmp > 0) {
|
138
|
+
low = mid + 1;
|
139
|
+
} else {
|
140
|
+
high = mid - 1;
|
141
|
+
}
|
142
|
+
}
|
143
|
+
|
144
|
+
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
145
|
+
// We assume the overwhelming majority of names don't need to be escaped.
|
146
|
+
// But if they do, we have to fallback to the slow path.
|
147
|
+
return Qfalse;
|
148
|
+
}
|
149
|
+
|
150
|
+
VALUE rstring = build_interned_string(str, length);
|
151
|
+
|
152
|
+
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
153
|
+
if (last_cmp > 0) {
|
154
|
+
mid += 1;
|
155
|
+
}
|
156
|
+
|
157
|
+
rvalue_cache_insert_at(cache, mid, rstring);
|
158
|
+
}
|
159
|
+
return rstring;
|
160
|
+
}
|
161
|
+
|
162
|
+
static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
|
163
|
+
{
|
164
|
+
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
|
165
|
+
// Common names aren't likely to be very long. So we just don't
|
166
|
+
// cache names above an arbitrary threshold.
|
167
|
+
return Qfalse;
|
168
|
+
}
|
169
|
+
|
170
|
+
if (RB_UNLIKELY(!isalpha(str[0]))) {
|
171
|
+
// Simple heuristic, if the first character isn't a letter,
|
172
|
+
// we're much less likely to see this string again.
|
173
|
+
// We mostly want to cache strings that are likely to be repeated.
|
174
|
+
return Qfalse;
|
175
|
+
}
|
176
|
+
|
177
|
+
int low = 0;
|
178
|
+
int high = cache->length - 1;
|
179
|
+
int mid = 0;
|
180
|
+
int last_cmp = 0;
|
181
|
+
|
182
|
+
while (low <= high) {
|
183
|
+
mid = (high + low) >> 1;
|
184
|
+
VALUE entry = cache->entries[mid];
|
185
|
+
last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
186
|
+
|
187
|
+
if (last_cmp == 0) {
|
188
|
+
return entry;
|
189
|
+
} else if (last_cmp > 0) {
|
190
|
+
low = mid + 1;
|
191
|
+
} else {
|
192
|
+
high = mid - 1;
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
if (RB_UNLIKELY(memchr(str, '\\', length))) {
|
197
|
+
// We assume the overwhelming majority of names don't need to be escaped.
|
198
|
+
// But if they do, we have to fallback to the slow path.
|
199
|
+
return Qfalse;
|
200
|
+
}
|
201
|
+
|
202
|
+
VALUE rsymbol = build_symbol(str, length);
|
203
|
+
|
204
|
+
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
|
205
|
+
if (last_cmp > 0) {
|
206
|
+
mid += 1;
|
207
|
+
}
|
208
|
+
|
209
|
+
rvalue_cache_insert_at(cache, mid, rsymbol);
|
210
|
+
}
|
211
|
+
return rsymbol;
|
212
|
+
}
|
213
|
+
|
214
|
+
/* rvalue stack */
|
215
|
+
|
216
|
+
#define RVALUE_STACK_INITIAL_CAPA 128
|
217
|
+
|
218
|
+
enum rvalue_stack_type {
|
219
|
+
RVALUE_STACK_HEAP_ALLOCATED = 0,
|
220
|
+
RVALUE_STACK_STACK_ALLOCATED = 1,
|
221
|
+
};
|
222
|
+
|
223
|
+
typedef struct rvalue_stack_struct {
|
224
|
+
enum rvalue_stack_type type;
|
225
|
+
long capa;
|
226
|
+
long head;
|
227
|
+
VALUE *ptr;
|
228
|
+
} rvalue_stack;
|
229
|
+
|
230
|
+
static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
|
231
|
+
|
232
|
+
static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
|
233
|
+
{
|
234
|
+
long required = stack->capa * 2;
|
235
|
+
|
236
|
+
if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
|
237
|
+
stack = rvalue_stack_spill(stack, handle, stack_ref);
|
238
|
+
} else {
|
239
|
+
REALLOC_N(stack->ptr, VALUE, required);
|
240
|
+
stack->capa = required;
|
241
|
+
}
|
242
|
+
return stack;
|
243
|
+
}
|
244
|
+
|
245
|
+
static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
|
246
|
+
{
|
247
|
+
if (RB_UNLIKELY(stack->head >= stack->capa)) {
|
248
|
+
stack = rvalue_stack_grow(stack, handle, stack_ref);
|
249
|
+
}
|
250
|
+
stack->ptr[stack->head] = value;
|
251
|
+
stack->head++;
|
252
|
+
}
|
253
|
+
|
254
|
+
static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
|
255
|
+
{
|
256
|
+
return stack->ptr + (stack->head - count);
|
257
|
+
}
|
258
|
+
|
259
|
+
static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
|
260
|
+
{
|
261
|
+
stack->head -= count;
|
262
|
+
}
|
263
|
+
|
264
|
+
static void rvalue_stack_mark(void *ptr)
|
265
|
+
{
|
266
|
+
rvalue_stack *stack = (rvalue_stack *)ptr;
|
267
|
+
rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head);
|
268
|
+
}
|
269
|
+
|
270
|
+
static void rvalue_stack_free(void *ptr)
|
271
|
+
{
|
272
|
+
rvalue_stack *stack = (rvalue_stack *)ptr;
|
273
|
+
if (stack) {
|
274
|
+
ruby_xfree(stack->ptr);
|
275
|
+
ruby_xfree(stack);
|
276
|
+
}
|
277
|
+
}
|
278
|
+
|
279
|
+
static size_t rvalue_stack_memsize(const void *ptr)
|
280
|
+
{
|
281
|
+
const rvalue_stack *stack = (const rvalue_stack *)ptr;
|
282
|
+
return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
|
283
|
+
}
|
284
|
+
|
285
|
+
static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
|
286
|
+
"JSON::Ext::Parser/rvalue_stack",
|
287
|
+
{
|
288
|
+
.dmark = rvalue_stack_mark,
|
289
|
+
.dfree = rvalue_stack_free,
|
290
|
+
.dsize = rvalue_stack_memsize,
|
291
|
+
},
|
292
|
+
0, 0,
|
293
|
+
RUBY_TYPED_FREE_IMMEDIATELY,
|
294
|
+
};
|
295
|
+
|
296
|
+
static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
|
297
|
+
{
|
298
|
+
rvalue_stack *stack;
|
299
|
+
*handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
|
300
|
+
*stack_ref = stack;
|
301
|
+
MEMCPY(stack, old_stack, rvalue_stack, 1);
|
302
|
+
|
303
|
+
stack->capa = old_stack->capa << 1;
|
304
|
+
stack->ptr = ALLOC_N(VALUE, stack->capa);
|
305
|
+
stack->type = RVALUE_STACK_HEAP_ALLOCATED;
|
306
|
+
MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
|
307
|
+
return stack;
|
308
|
+
}
|
309
|
+
|
310
|
+
static void rvalue_stack_eagerly_release(VALUE handle)
|
311
|
+
{
|
312
|
+
rvalue_stack *stack;
|
313
|
+
TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
|
314
|
+
RTYPEDDATA_DATA(handle) = NULL;
|
315
|
+
rvalue_stack_free(stack);
|
316
|
+
}
|
3
317
|
|
4
318
|
/* unicode */
|
5
319
|
|
@@ -67,6 +381,50 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
67
381
|
return len;
|
68
382
|
}
|
69
383
|
|
384
|
+
typedef struct JSON_ParserStruct {
|
385
|
+
VALUE Vsource;
|
386
|
+
char *source;
|
387
|
+
long len;
|
388
|
+
char *memo;
|
389
|
+
VALUE create_id;
|
390
|
+
VALUE object_class;
|
391
|
+
VALUE array_class;
|
392
|
+
VALUE decimal_class;
|
393
|
+
VALUE match_string;
|
394
|
+
FBuffer fbuffer;
|
395
|
+
int max_nesting;
|
396
|
+
bool allow_nan;
|
397
|
+
bool allow_trailing_comma;
|
398
|
+
bool parsing_name;
|
399
|
+
bool symbolize_names;
|
400
|
+
bool freeze;
|
401
|
+
bool create_additions;
|
402
|
+
bool deprecated_create_additions;
|
403
|
+
rvalue_cache name_cache;
|
404
|
+
rvalue_stack *stack;
|
405
|
+
VALUE stack_handle;
|
406
|
+
} JSON_Parser;
|
407
|
+
|
408
|
+
#define GET_PARSER \
|
409
|
+
GET_PARSER_INIT; \
|
410
|
+
if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
|
411
|
+
|
412
|
+
#define GET_PARSER_INIT \
|
413
|
+
JSON_Parser *json; \
|
414
|
+
TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
|
415
|
+
|
416
|
+
#define MinusInfinity "-Infinity"
|
417
|
+
#define EVIL 0x666
|
418
|
+
|
419
|
+
static const rb_data_type_t JSON_Parser_type;
|
420
|
+
static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
|
421
|
+
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
|
422
|
+
static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
|
423
|
+
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
|
424
|
+
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
|
425
|
+
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
|
426
|
+
|
427
|
+
|
70
428
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
71
429
|
#ifdef RBIMPL_ATTR_NORETURN
|
72
430
|
RBIMPL_ATTR_NORETURN()
|
@@ -84,21 +442,9 @@ static void raise_parse_error(const char *format, const char *start)
|
|
84
442
|
ptr = buffer;
|
85
443
|
}
|
86
444
|
|
87
|
-
rb_enc_raise(
|
445
|
+
rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
|
88
446
|
}
|
89
447
|
|
90
|
-
static VALUE mJSON, mExt, cParser, eNestingError;
|
91
|
-
static VALUE CNaN, CInfinity, CMinusInfinity;
|
92
|
-
|
93
|
-
static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
|
94
|
-
i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
|
95
|
-
i_object_class, i_array_class, i_decimal_class,
|
96
|
-
i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
|
97
|
-
i_leftshift, i_new, i_try_convert, i_freeze, i_uminus;
|
98
|
-
|
99
|
-
static int binary_encindex;
|
100
|
-
static int utf8_encindex;
|
101
|
-
|
102
448
|
|
103
449
|
%%{
|
104
450
|
machine JSON_common;
|
@@ -135,27 +481,25 @@ static int utf8_encindex;
|
|
135
481
|
write data;
|
136
482
|
|
137
483
|
action parse_value {
|
138
|
-
|
139
|
-
char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting);
|
484
|
+
char *np = JSON_parse_value(json, fpc, pe, result, current_nesting);
|
140
485
|
if (np == NULL) {
|
141
486
|
fhold; fbreak;
|
142
487
|
} else {
|
143
|
-
if (NIL_P(json->object_class)) {
|
144
|
-
OBJ_FREEZE(last_name);
|
145
|
-
rb_hash_aset(*result, last_name, v);
|
146
|
-
} else {
|
147
|
-
rb_funcall(*result, i_aset, 2, last_name, v);
|
148
|
-
}
|
149
488
|
fexec np;
|
150
489
|
}
|
151
490
|
}
|
152
491
|
|
492
|
+
action allow_trailing_comma { json->allow_trailing_comma }
|
493
|
+
|
153
494
|
action parse_name {
|
154
495
|
char *np;
|
155
|
-
json->parsing_name =
|
156
|
-
np = JSON_parse_string(json, fpc, pe,
|
157
|
-
json->parsing_name =
|
158
|
-
if (np == NULL) { fhold; fbreak; } else
|
496
|
+
json->parsing_name = true;
|
497
|
+
np = JSON_parse_string(json, fpc, pe, result);
|
498
|
+
json->parsing_name = false;
|
499
|
+
if (np == NULL) { fhold; fbreak; } else {
|
500
|
+
PUSH(*result);
|
501
|
+
fexec np;
|
502
|
+
}
|
159
503
|
}
|
160
504
|
|
161
505
|
action exit { fhold; fbreak; }
|
@@ -165,37 +509,64 @@ static int utf8_encindex;
|
|
165
509
|
|
166
510
|
main := (
|
167
511
|
begin_object
|
168
|
-
(pair (next_pair)*)? ignore*
|
512
|
+
(pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore*
|
169
513
|
end_object
|
170
514
|
) @exit;
|
171
515
|
}%%
|
172
516
|
|
517
|
+
#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack)
|
518
|
+
|
173
519
|
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
|
174
520
|
{
|
175
521
|
int cs = EVIL;
|
176
|
-
VALUE last_name = Qnil;
|
177
|
-
VALUE object_class = json->object_class;
|
178
522
|
|
179
523
|
if (json->max_nesting && current_nesting > json->max_nesting) {
|
180
524
|
rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
|
181
525
|
}
|
182
526
|
|
183
|
-
|
527
|
+
long stack_head = json->stack->head;
|
184
528
|
|
185
529
|
%% write init;
|
186
530
|
%% write exec;
|
187
531
|
|
188
532
|
if (cs >= JSON_object_first_final) {
|
189
|
-
|
533
|
+
long count = json->stack->head - stack_head;
|
534
|
+
|
535
|
+
if (RB_UNLIKELY(json->object_class)) {
|
536
|
+
VALUE object = rb_class_new_instance(0, 0, json->object_class);
|
537
|
+
long index = 0;
|
538
|
+
VALUE *items = rvalue_stack_peek(json->stack, count);
|
539
|
+
while (index < count) {
|
540
|
+
VALUE name = items[index++];
|
541
|
+
VALUE value = items[index++];
|
542
|
+
rb_funcall(object, i_aset, 2, name, value);
|
543
|
+
}
|
544
|
+
*result = object;
|
545
|
+
} else {
|
546
|
+
VALUE hash;
|
547
|
+
#ifdef HAVE_RB_HASH_NEW_CAPA
|
548
|
+
hash = rb_hash_new_capa(count >> 1);
|
549
|
+
#else
|
550
|
+
hash = rb_hash_new();
|
551
|
+
#endif
|
552
|
+
rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash);
|
553
|
+
*result = hash;
|
554
|
+
}
|
555
|
+
rvalue_stack_pop(json->stack, count);
|
556
|
+
|
557
|
+
if (RB_UNLIKELY(json->create_additions)) {
|
190
558
|
VALUE klassname;
|
191
|
-
if (
|
192
|
-
|
559
|
+
if (json->object_class) {
|
560
|
+
klassname = rb_funcall(*result, i_aref, 1, json->create_id);
|
193
561
|
} else {
|
194
|
-
|
562
|
+
klassname = rb_hash_aref(*result, json->create_id);
|
195
563
|
}
|
196
564
|
if (!NIL_P(klassname)) {
|
197
565
|
VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
|
198
566
|
if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) {
|
567
|
+
if (json->deprecated_create_additions) {
|
568
|
+
json_deprecated(deprecated_create_additions_warning);
|
569
|
+
}
|
199
570
|
*result = rb_funcall(klass, i_json_create, 1, *result);
|
200
571
|
}
|
201
572
|
}
|
@@ -206,7 +577,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
|
|
206
577
|
}
|
207
578
|
}
|
208
579
|
|
209
|
-
|
210
580
|
%%{
|
211
581
|
machine JSON_value;
|
212
582
|
include JSON_common;
|
@@ -238,7 +608,12 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
|
|
238
608
|
}
|
239
609
|
action parse_string {
|
240
610
|
char *np = JSON_parse_string(json, fpc, pe, result);
|
241
|
-
if (np == NULL) {
|
611
|
+
if (np == NULL) {
|
612
|
+
fhold;
|
613
|
+
fbreak;
|
614
|
+
} else {
|
615
|
+
fexec np;
|
616
|
+
}
|
242
617
|
}
|
243
618
|
|
244
619
|
action parse_number {
|
@@ -253,9 +628,13 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
|
|
253
628
|
}
|
254
629
|
}
|
255
630
|
np = JSON_parse_float(json, fpc, pe, result);
|
256
|
-
if (np != NULL)
|
631
|
+
if (np != NULL) {
|
632
|
+
fexec np;
|
633
|
+
}
|
257
634
|
np = JSON_parse_integer(json, fpc, pe, result);
|
258
|
-
if (np != NULL)
|
635
|
+
if (np != NULL) {
|
636
|
+
fexec np;
|
637
|
+
}
|
259
638
|
fhold; fbreak;
|
260
639
|
}
|
261
640
|
|
@@ -279,10 +658,10 @@ main := ignore* (
|
|
279
658
|
Vtrue @parse_true |
|
280
659
|
VNaN @parse_nan |
|
281
660
|
VInfinity @parse_infinity |
|
282
|
-
begin_number
|
283
|
-
begin_string
|
284
|
-
begin_array
|
285
|
-
begin_object
|
661
|
+
begin_number @parse_number |
|
662
|
+
begin_string @parse_string |
|
663
|
+
begin_array @parse_array |
|
664
|
+
begin_object @parse_object
|
286
665
|
) ignore* %*exit;
|
287
666
|
}%%
|
288
667
|
|
@@ -298,6 +677,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
|
|
298
677
|
}
|
299
678
|
|
300
679
|
if (cs >= JSON_value_first_final) {
|
680
|
+
PUSH(*result);
|
301
681
|
return p;
|
302
682
|
} else {
|
303
683
|
return NULL;
|
@@ -314,6 +694,28 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
|
|
314
694
|
main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
|
315
695
|
}%%
|
316
696
|
|
697
|
+
#define MAX_FAST_INTEGER_SIZE 18
|
698
|
+
static inline VALUE fast_parse_integer(char *p, char *pe)
|
699
|
+
{
|
700
|
+
bool negative = false;
|
701
|
+
if (*p == '-') {
|
702
|
+
negative = true;
|
703
|
+
p++;
|
704
|
+
}
|
705
|
+
|
706
|
+
long long memo = 0;
|
707
|
+
while (p < pe) {
|
708
|
+
memo *= 10;
|
709
|
+
memo += *p - '0';
|
710
|
+
p++;
|
711
|
+
}
|
712
|
+
|
713
|
+
if (negative) {
|
714
|
+
memo = -memo;
|
715
|
+
}
|
716
|
+
return LL2NUM(memo);
|
717
|
+
}
|
718
|
+
|
317
719
|
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
|
318
720
|
{
|
319
721
|
int cs = EVIL;
|
@@ -324,10 +726,14 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
|
|
324
726
|
|
325
727
|
if (cs >= JSON_integer_first_final) {
|
326
728
|
long len = p - json->memo;
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
729
|
+
if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) {
|
730
|
+
*result = fast_parse_integer(json->memo, p);
|
731
|
+
} else {
|
732
|
+
fbuffer_clear(&json->fbuffer);
|
733
|
+
fbuffer_append(&json->fbuffer, json->memo, len);
|
734
|
+
fbuffer_append_char(&json->fbuffer, '\0');
|
735
|
+
*result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10);
|
736
|
+
}
|
331
737
|
return p + 1;
|
332
738
|
} else {
|
333
739
|
return NULL;
|
@@ -359,7 +765,7 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
|
|
359
765
|
if (cs >= JSON_float_first_final) {
|
360
766
|
VALUE mod = Qnil;
|
361
767
|
ID method_id = 0;
|
362
|
-
if (
|
768
|
+
if (json->decimal_class) {
|
363
769
|
if (rb_respond_to(json->decimal_class, i_try_convert)) {
|
364
770
|
mod = json->decimal_class;
|
365
771
|
method_id = i_try_convert;
|
@@ -388,15 +794,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
|
|
388
794
|
}
|
389
795
|
|
390
796
|
long len = p - json->memo;
|
391
|
-
fbuffer_clear(json->fbuffer);
|
392
|
-
fbuffer_append(json->fbuffer, json->memo, len);
|
393
|
-
fbuffer_append_char(json->fbuffer, '\0');
|
797
|
+
fbuffer_clear(&json->fbuffer);
|
798
|
+
fbuffer_append(&json->fbuffer, json->memo, len);
|
799
|
+
fbuffer_append_char(&json->fbuffer, '\0');
|
394
800
|
|
395
801
|
if (method_id) {
|
396
|
-
VALUE text = rb_str_new2(FBUFFER_PTR(json->fbuffer));
|
802
|
+
VALUE text = rb_str_new2(FBUFFER_PTR(&json->fbuffer));
|
397
803
|
*result = rb_funcallv(mod, method_id, 1, &text);
|
398
804
|
} else {
|
399
|
-
*result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(json->fbuffer), 1));
|
805
|
+
*result = DBL2NUM(rb_cstr_to_dbl(FBUFFER_PTR(&json->fbuffer), 1));
|
400
806
|
}
|
401
807
|
|
402
808
|
return p + 1;
|
@@ -418,39 +824,51 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
|
|
418
824
|
if (np == NULL) {
|
419
825
|
fhold; fbreak;
|
420
826
|
} else {
|
421
|
-
if (NIL_P(json->array_class)) {
|
422
|
-
rb_ary_push(*result, v);
|
423
|
-
} else {
|
424
|
-
rb_funcall(*result, i_leftshift, 1, v);
|
425
|
-
}
|
426
827
|
fexec np;
|
427
828
|
}
|
428
829
|
}
|
429
830
|
|
831
|
+
action allow_trailing_comma { json->allow_trailing_comma }
|
832
|
+
|
430
833
|
action exit { fhold; fbreak; }
|
431
834
|
|
432
835
|
next_element = value_separator ignore* begin_value >parse_value;
|
433
836
|
|
434
837
|
main := begin_array ignore*
|
435
838
|
((begin_value >parse_value ignore*)
|
436
|
-
|
839
|
+
(ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)?
|
437
840
|
end_array @exit;
|
438
841
|
}%%
|
439
842
|
|
440
843
|
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
|
441
844
|
{
|
442
845
|
int cs = EVIL;
|
443
|
-
VALUE array_class = json->array_class;
|
444
846
|
|
445
847
|
if (json->max_nesting && current_nesting > json->max_nesting) {
|
446
848
|
rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
|
447
849
|
}
|
448
|
-
|
850
|
+
long stack_head = json->stack->head;
|
449
851
|
|
450
852
|
%% write init;
|
451
853
|
%% write exec;
|
452
854
|
|
453
855
|
if(cs >= JSON_array_first_final) {
|
856
|
+
long count = json->stack->head - stack_head;
|
857
|
+
|
858
|
+
if (RB_UNLIKELY(json->array_class)) {
|
859
|
+
VALUE array = rb_class_new_instance(0, 0, json->array_class);
|
860
|
+
VALUE *items = rvalue_stack_peek(json->stack, count);
|
861
|
+
long index;
|
862
|
+
for (index = 0; index < count; index++) {
|
863
|
+
rb_funcall(array, i_leftshift, 1, items[index]);
|
864
|
+
}
|
865
|
+
*result = array;
|
866
|
+
} else {
|
867
|
+
VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count));
|
868
|
+
*result = array;
|
869
|
+
}
|
870
|
+
rvalue_stack_pop(json->stack, count);
|
871
|
+
|
454
872
|
return p + 1;
|
455
873
|
} else {
|
456
874
|
raise_parse_error("unexpected token at '%s'", p);
|
@@ -458,29 +876,81 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
|
|
458
876
|
}
|
459
877
|
}
|
460
878
|
|
461
|
-
static const
|
462
|
-
|
879
|
+
static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
|
880
|
+
{
|
881
|
+
if (symbolize) {
|
882
|
+
intern = true;
|
883
|
+
}
|
884
|
+
VALUE result;
|
885
|
+
# ifdef HAVE_RB_ENC_INTERNED_STR
|
886
|
+
if (intern) {
|
887
|
+
result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
|
888
|
+
} else {
|
889
|
+
result = rb_utf8_str_new(start, (long)(end - start));
|
890
|
+
}
|
891
|
+
# else
|
892
|
+
result = rb_utf8_str_new(start, (long)(end - start));
|
893
|
+
if (intern) {
|
894
|
+
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
|
895
|
+
}
|
896
|
+
# endif
|
897
|
+
|
898
|
+
if (symbolize) {
|
899
|
+
result = rb_str_intern(result);
|
900
|
+
}
|
901
|
+
|
902
|
+
return result;
|
903
|
+
}
|
904
|
+
|
905
|
+
static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
|
906
|
+
{
|
907
|
+
size_t bufferSize = stringEnd - string;
|
908
|
+
|
909
|
+
if (is_name) {
|
910
|
+
VALUE cached_key;
|
911
|
+
if (RB_UNLIKELY(symbolize)) {
|
912
|
+
cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
|
913
|
+
} else {
|
914
|
+
cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
|
915
|
+
}
|
916
|
+
|
917
|
+
if (RB_LIKELY(cached_key)) {
|
918
|
+
return cached_key;
|
919
|
+
}
|
920
|
+
}
|
921
|
+
|
922
|
+
return build_string(string, stringEnd, intern, symbolize);
|
923
|
+
}
|
924
|
+
|
925
|
+
static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
|
463
926
|
{
|
464
|
-
VALUE result = Qnil;
|
465
927
|
size_t bufferSize = stringEnd - string;
|
466
928
|
char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
|
467
929
|
int unescape_len;
|
468
930
|
char buf[4];
|
469
931
|
|
470
|
-
if (
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
932
|
+
if (is_name) {
|
933
|
+
VALUE cached_key;
|
934
|
+
if (RB_UNLIKELY(symbolize)) {
|
935
|
+
cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
|
936
|
+
} else {
|
937
|
+
cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
|
938
|
+
}
|
939
|
+
|
940
|
+
if (RB_LIKELY(cached_key)) {
|
941
|
+
return cached_key;
|
942
|
+
}
|
943
|
+
}
|
944
|
+
|
945
|
+
pe = memchr(p, '\\', bufferSize);
|
946
|
+
if (RB_UNLIKELY(pe == NULL)) {
|
947
|
+
return build_string(string, stringEnd, intern, symbolize);
|
482
948
|
}
|
483
949
|
|
950
|
+
VALUE result = rb_str_buf_new(bufferSize);
|
951
|
+
rb_enc_associate_index(result, utf8_encindex);
|
952
|
+
buffer = bufferStart = RSTRING_PTR(result);
|
953
|
+
|
484
954
|
while (pe < stringEnd) {
|
485
955
|
if (*pe == '\\') {
|
486
956
|
unescape = (char *) "?";
|
@@ -513,9 +983,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
|
|
513
983
|
break;
|
514
984
|
case 'u':
|
515
985
|
if (pe > stringEnd - 4) {
|
516
|
-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
|
517
|
-
ruby_xfree(bufferStart);
|
518
|
-
}
|
519
986
|
raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
|
520
987
|
} else {
|
521
988
|
uint32_t ch = unescape_unicode((unsigned char *) ++pe);
|
@@ -533,9 +1000,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
|
|
533
1000
|
if ((ch & 0xFC00) == 0xD800) {
|
534
1001
|
pe++;
|
535
1002
|
if (pe > stringEnd - 6) {
|
536
|
-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
|
537
|
-
ruby_xfree(bufferStart);
|
538
|
-
}
|
539
1003
|
raise_parse_error("incomplete surrogate pair at '%s'", p);
|
540
1004
|
}
|
541
1005
|
if (pe[0] == '\\' && pe[1] == 'u') {
|
@@ -568,41 +1032,12 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
|
|
568
1032
|
MEMCPY(buffer, p, char, pe - p);
|
569
1033
|
buffer += pe - p;
|
570
1034
|
}
|
571
|
-
|
572
|
-
# ifdef HAVE_RB_ENC_INTERNED_STR
|
573
|
-
if (intern) {
|
574
|
-
result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
|
575
|
-
} else {
|
576
|
-
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
|
577
|
-
}
|
578
|
-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
|
579
|
-
ruby_xfree(bufferStart);
|
580
|
-
}
|
581
|
-
# else
|
582
|
-
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
|
583
|
-
|
584
|
-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
|
585
|
-
ruby_xfree(bufferStart);
|
586
|
-
}
|
587
|
-
|
588
|
-
if (intern) {
|
589
|
-
# if STR_UMINUS_DEDUPE_FROZEN
|
590
|
-
// Starting from MRI 2.8 it is preferable to freeze the string
|
591
|
-
// before deduplication so that it can be interned directly
|
592
|
-
// otherwise it would be duplicated first which is wasteful.
|
593
|
-
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
|
594
|
-
# elif STR_UMINUS_DEDUPE
|
595
|
-
// MRI 2.5 and older do not deduplicate strings that are already
|
596
|
-
// frozen.
|
597
|
-
result = rb_funcall(result, i_uminus, 0);
|
598
|
-
# else
|
599
|
-
result = rb_str_freeze(result);
|
600
|
-
# endif
|
601
|
-
}
|
602
|
-
# endif
|
1035
|
+
rb_str_set_len(result, buffer - bufferStart);
|
603
1036
|
|
604
1037
|
if (symbolize) {
|
605
|
-
|
1038
|
+
result = rb_str_intern(result);
|
1039
|
+
} else if (intern) {
|
1040
|
+
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
|
606
1041
|
}
|
607
1042
|
|
608
1043
|
return result;
|
@@ -614,19 +1049,31 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
|
|
614
1049
|
|
615
1050
|
write data;
|
616
1051
|
|
617
|
-
action
|
618
|
-
*result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
} else {
|
623
|
-
fexec p + 1;
|
624
|
-
}
|
1052
|
+
action parse_complex_string {
|
1053
|
+
*result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
|
1054
|
+
fexec p + 1;
|
1055
|
+
fhold;
|
1056
|
+
fbreak;
|
625
1057
|
}
|
626
1058
|
|
627
|
-
action
|
1059
|
+
action parse_simple_string {
|
1060
|
+
*result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
|
1061
|
+
fexec p + 1;
|
1062
|
+
fhold;
|
1063
|
+
fbreak;
|
1064
|
+
}
|
628
1065
|
|
629
|
-
|
1066
|
+
double_quote = '"';
|
1067
|
+
escape = '\\';
|
1068
|
+
control = 0..0x1f;
|
1069
|
+
simple = any - escape - double_quote - control;
|
1070
|
+
|
1071
|
+
main := double_quote (
|
1072
|
+
(simple*)(
|
1073
|
+
(double_quote) @parse_simple_string |
|
1074
|
+
((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string
|
1075
|
+
)
|
1076
|
+
);
|
630
1077
|
}%%
|
631
1078
|
|
632
1079
|
static int
|
@@ -684,18 +1131,78 @@ static VALUE convert_encoding(VALUE source)
|
|
684
1131
|
{
|
685
1132
|
int encindex = RB_ENCODING_GET(source);
|
686
1133
|
|
687
|
-
if (encindex == utf8_encindex) {
|
1134
|
+
if (RB_LIKELY(encindex == utf8_encindex)) {
|
688
1135
|
return source;
|
689
1136
|
}
|
690
1137
|
|
691
1138
|
if (encindex == binary_encindex) {
|
692
|
-
// For historical reason, we silently reinterpret binary strings as UTF-8
|
693
|
-
// TODO: Deprecate in 2.8.0
|
694
|
-
// TODO: Remove in 3.0.0
|
1139
|
+
// For historical reason, we silently reinterpret binary strings as UTF-8
|
695
1140
|
return rb_enc_associate_index(rb_str_dup(source), utf8_encindex);
|
696
1141
|
}
|
697
1142
|
|
698
|
-
return
|
1143
|
+
return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
|
1144
|
+
}
|
1145
|
+
|
1146
|
+
static int configure_parser_i(VALUE key, VALUE val, VALUE data)
|
1147
|
+
{
|
1148
|
+
JSON_Parser *json = (JSON_Parser *)data;
|
1149
|
+
|
1150
|
+
if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
1151
|
+
else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
|
1152
|
+
else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
|
1153
|
+
else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
|
1154
|
+
else if (key == sym_freeze) { json->freeze = RTEST(val); }
|
1155
|
+
else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
|
1156
|
+
else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
|
1157
|
+
else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
|
1158
|
+
else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
|
1159
|
+
else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
|
1160
|
+
else if (key == sym_create_additions) {
|
1161
|
+
if (NIL_P(val)) {
|
1162
|
+
json->create_additions = true;
|
1163
|
+
json->deprecated_create_additions = true;
|
1164
|
+
} else {
|
1165
|
+
json->create_additions = RTEST(val);
|
1166
|
+
json->deprecated_create_additions = false;
|
1167
|
+
}
|
1168
|
+
}
|
1169
|
+
|
1170
|
+
return ST_CONTINUE;
|
1171
|
+
}
|
1172
|
+
|
1173
|
+
static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
|
1174
|
+
{
|
1175
|
+
if (json->Vsource) {
|
1176
|
+
rb_raise(rb_eTypeError, "already initialized instance");
|
1177
|
+
}
|
1178
|
+
|
1179
|
+
json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
|
1180
|
+
json->max_nesting = 100;
|
1181
|
+
|
1182
|
+
if (!NIL_P(opts)) {
|
1183
|
+
Check_Type(opts, T_HASH);
|
1184
|
+
if (RHASH_SIZE(opts) > 0) {
|
1185
|
+
// We assume in most cases few keys are set so it's faster to go over
|
1186
|
+
// the provided keys than to check all possible keys.
|
1187
|
+
rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
|
1188
|
+
|
1189
|
+
if (json->symbolize_names && json->create_additions) {
|
1190
|
+
rb_raise(rb_eArgError,
|
1191
|
+
"options :symbolize_names and :create_additions cannot be "
|
1192
|
+
" used in conjunction");
|
1193
|
+
}
|
1194
|
+
|
1195
|
+
if (json->create_additions && !json->create_id) {
|
1196
|
+
json->create_id = rb_funcall(mJSON, i_create_id, 0);
|
1197
|
+
}
|
1198
|
+
}
|
1199
|
+
|
1200
|
+
}
|
1201
|
+
source = convert_encoding(StringValue(source));
|
1202
|
+
StringValue(source);
|
1203
|
+
json->len = RSTRING_LEN(source);
|
1204
|
+
json->source = RSTRING_PTR(source);
|
1205
|
+
json->Vsource = source;
|
699
1206
|
}
|
700
1207
|
|
701
1208
|
/*
|
@@ -732,111 +1239,11 @@ static VALUE convert_encoding(VALUE source)
|
|
732
1239
|
*/
|
733
1240
|
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
|
734
1241
|
{
|
735
|
-
VALUE source, opts;
|
736
1242
|
GET_PARSER_INIT;
|
737
1243
|
|
738
|
-
if (json->Vsource) {
|
739
|
-
rb_raise(rb_eTypeError, "already initialized instance");
|
740
|
-
}
|
741
|
-
|
742
1244
|
rb_check_arity(argc, 1, 2);
|
743
|
-
source = argv[0];
|
744
|
-
opts = Qnil;
|
745
|
-
if (argc == 2) {
|
746
|
-
opts = argv[1];
|
747
|
-
Check_Type(argv[1], T_HASH);
|
748
|
-
if (RHASH_SIZE(argv[1]) > 0) {
|
749
|
-
opts = argv[1];
|
750
|
-
}
|
751
|
-
}
|
752
1245
|
|
753
|
-
|
754
|
-
VALUE tmp = ID2SYM(i_max_nesting);
|
755
|
-
if (option_given_p(opts, tmp)) {
|
756
|
-
VALUE max_nesting = rb_hash_aref(opts, tmp);
|
757
|
-
if (RTEST(max_nesting)) {
|
758
|
-
Check_Type(max_nesting, T_FIXNUM);
|
759
|
-
json->max_nesting = FIX2INT(max_nesting);
|
760
|
-
} else {
|
761
|
-
json->max_nesting = 0;
|
762
|
-
}
|
763
|
-
} else {
|
764
|
-
json->max_nesting = 100;
|
765
|
-
}
|
766
|
-
tmp = ID2SYM(i_allow_nan);
|
767
|
-
if (option_given_p(opts, tmp)) {
|
768
|
-
json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
|
769
|
-
} else {
|
770
|
-
json->allow_nan = 0;
|
771
|
-
}
|
772
|
-
tmp = ID2SYM(i_symbolize_names);
|
773
|
-
if (option_given_p(opts, tmp)) {
|
774
|
-
json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
|
775
|
-
} else {
|
776
|
-
json->symbolize_names = 0;
|
777
|
-
}
|
778
|
-
tmp = ID2SYM(i_freeze);
|
779
|
-
if (option_given_p(opts, tmp)) {
|
780
|
-
json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
|
781
|
-
} else {
|
782
|
-
json->freeze = 0;
|
783
|
-
}
|
784
|
-
tmp = ID2SYM(i_create_additions);
|
785
|
-
if (option_given_p(opts, tmp)) {
|
786
|
-
json->create_additions = RTEST(rb_hash_aref(opts, tmp));
|
787
|
-
} else {
|
788
|
-
json->create_additions = 0;
|
789
|
-
}
|
790
|
-
if (json->symbolize_names && json->create_additions) {
|
791
|
-
rb_raise(rb_eArgError,
|
792
|
-
"options :symbolize_names and :create_additions cannot be "
|
793
|
-
" used in conjunction");
|
794
|
-
}
|
795
|
-
tmp = ID2SYM(i_create_id);
|
796
|
-
if (option_given_p(opts, tmp)) {
|
797
|
-
json->create_id = rb_hash_aref(opts, tmp);
|
798
|
-
} else {
|
799
|
-
json->create_id = rb_funcall(mJSON, i_create_id, 0);
|
800
|
-
}
|
801
|
-
tmp = ID2SYM(i_object_class);
|
802
|
-
if (option_given_p(opts, tmp)) {
|
803
|
-
json->object_class = rb_hash_aref(opts, tmp);
|
804
|
-
} else {
|
805
|
-
json->object_class = Qnil;
|
806
|
-
}
|
807
|
-
tmp = ID2SYM(i_array_class);
|
808
|
-
if (option_given_p(opts, tmp)) {
|
809
|
-
json->array_class = rb_hash_aref(opts, tmp);
|
810
|
-
} else {
|
811
|
-
json->array_class = Qnil;
|
812
|
-
}
|
813
|
-
tmp = ID2SYM(i_decimal_class);
|
814
|
-
if (option_given_p(opts, tmp)) {
|
815
|
-
json->decimal_class = rb_hash_aref(opts, tmp);
|
816
|
-
} else {
|
817
|
-
json->decimal_class = Qnil;
|
818
|
-
}
|
819
|
-
tmp = ID2SYM(i_match_string);
|
820
|
-
if (option_given_p(opts, tmp)) {
|
821
|
-
VALUE match_string = rb_hash_aref(opts, tmp);
|
822
|
-
json->match_string = RTEST(match_string) ? match_string : Qnil;
|
823
|
-
} else {
|
824
|
-
json->match_string = Qnil;
|
825
|
-
}
|
826
|
-
} else {
|
827
|
-
json->max_nesting = 100;
|
828
|
-
json->allow_nan = 0;
|
829
|
-
json->create_additions = 0;
|
830
|
-
json->create_id = Qnil;
|
831
|
-
json->object_class = Qnil;
|
832
|
-
json->array_class = Qnil;
|
833
|
-
json->decimal_class = Qnil;
|
834
|
-
}
|
835
|
-
source = convert_encoding(StringValue(source));
|
836
|
-
StringValue(source);
|
837
|
-
json->len = RSTRING_LEN(source);
|
838
|
-
json->source = RSTRING_PTR(source);;
|
839
|
-
json->Vsource = source;
|
1246
|
+
parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil);
|
840
1247
|
return self;
|
841
1248
|
}
|
842
1249
|
|
@@ -871,11 +1278,64 @@ static VALUE cParser_parse(VALUE self)
|
|
871
1278
|
VALUE result = Qnil;
|
872
1279
|
GET_PARSER;
|
873
1280
|
|
1281
|
+
char stack_buffer[FBUFFER_STACK_SIZE];
|
1282
|
+
fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
|
1283
|
+
|
1284
|
+
VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
|
1285
|
+
rvalue_stack stack = {
|
1286
|
+
.type = RVALUE_STACK_STACK_ALLOCATED,
|
1287
|
+
.ptr = rvalue_stack_buffer,
|
1288
|
+
.capa = RVALUE_STACK_INITIAL_CAPA,
|
1289
|
+
};
|
1290
|
+
json->stack = &stack;
|
1291
|
+
|
874
1292
|
%% write init;
|
875
1293
|
p = json->source;
|
876
1294
|
pe = p + json->len;
|
877
1295
|
%% write exec;
|
878
1296
|
|
1297
|
+
if (json->stack_handle) {
|
1298
|
+
rvalue_stack_eagerly_release(json->stack_handle);
|
1299
|
+
}
|
1300
|
+
|
1301
|
+
if (cs >= JSON_first_final && p == pe) {
|
1302
|
+
return result;
|
1303
|
+
} else {
|
1304
|
+
raise_parse_error("unexpected token at '%s'", p);
|
1305
|
+
return Qnil;
|
1306
|
+
}
|
1307
|
+
}
|
1308
|
+
|
1309
|
+
static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
|
1310
|
+
{
|
1311
|
+
char *p, *pe;
|
1312
|
+
int cs = EVIL;
|
1313
|
+
VALUE result = Qnil;
|
1314
|
+
|
1315
|
+
JSON_Parser _parser = {0};
|
1316
|
+
JSON_Parser *json = &_parser;
|
1317
|
+
parser_init(json, source, opts);
|
1318
|
+
|
1319
|
+
char stack_buffer[FBUFFER_STACK_SIZE];
|
1320
|
+
fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
|
1321
|
+
|
1322
|
+
VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
|
1323
|
+
rvalue_stack stack = {
|
1324
|
+
.type = RVALUE_STACK_STACK_ALLOCATED,
|
1325
|
+
.ptr = rvalue_stack_buffer,
|
1326
|
+
.capa = RVALUE_STACK_INITIAL_CAPA,
|
1327
|
+
};
|
1328
|
+
json->stack = &stack;
|
1329
|
+
|
1330
|
+
%% write init;
|
1331
|
+
p = json->source;
|
1332
|
+
pe = p + json->len;
|
1333
|
+
%% write exec;
|
1334
|
+
|
1335
|
+
if (json->stack_handle) {
|
1336
|
+
rvalue_stack_eagerly_release(json->stack_handle);
|
1337
|
+
}
|
1338
|
+
|
879
1339
|
if (cs >= JSON_first_final && p == pe) {
|
880
1340
|
return result;
|
881
1341
|
} else {
|
@@ -893,19 +1353,23 @@ static void JSON_mark(void *ptr)
|
|
893
1353
|
rb_gc_mark(json->array_class);
|
894
1354
|
rb_gc_mark(json->decimal_class);
|
895
1355
|
rb_gc_mark(json->match_string);
|
1356
|
+
rb_gc_mark(json->stack_handle);
|
1357
|
+
|
1358
|
+
const VALUE *name_cache_entries = &json->name_cache.entries[0];
|
1359
|
+
rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length);
|
896
1360
|
}
|
897
1361
|
|
898
1362
|
static void JSON_free(void *ptr)
|
899
1363
|
{
|
900
1364
|
JSON_Parser *json = ptr;
|
901
|
-
fbuffer_free(json->fbuffer);
|
1365
|
+
fbuffer_free(&json->fbuffer);
|
902
1366
|
ruby_xfree(json);
|
903
1367
|
}
|
904
1368
|
|
905
1369
|
static size_t JSON_memsize(const void *ptr)
|
906
1370
|
{
|
907
1371
|
const JSON_Parser *json = ptr;
|
908
|
-
return sizeof(*json) + FBUFFER_CAPA(json->fbuffer);
|
1372
|
+
return sizeof(*json) + FBUFFER_CAPA(&json->fbuffer);
|
909
1373
|
}
|
910
1374
|
|
911
1375
|
static const rb_data_type_t JSON_Parser_type = {
|
@@ -919,7 +1383,7 @@ static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
919
1383
|
{
|
920
1384
|
JSON_Parser *json;
|
921
1385
|
VALUE obj = TypedData_Make_Struct(klass, JSON_Parser, &JSON_Parser_type, json);
|
922
|
-
json->fbuffer
|
1386
|
+
fbuffer_stack_init(&json->fbuffer, 0, NULL, 0);
|
923
1387
|
return obj;
|
924
1388
|
}
|
925
1389
|
|
@@ -953,6 +1417,8 @@ void Init_parser(void)
|
|
953
1417
|
rb_define_method(cParser, "parse", cParser_parse, 0);
|
954
1418
|
rb_define_method(cParser, "source", cParser_source, 0);
|
955
1419
|
|
1420
|
+
rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
|
1421
|
+
|
956
1422
|
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
|
957
1423
|
rb_gc_register_mark_object(CNaN);
|
958
1424
|
|
@@ -962,30 +1428,38 @@ void Init_parser(void)
|
|
962
1428
|
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
|
963
1429
|
rb_gc_register_mark_object(CMinusInfinity);
|
964
1430
|
|
1431
|
+
rb_global_variable(&Encoding_UTF_8);
|
1432
|
+
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
|
1433
|
+
|
1434
|
+
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
1435
|
+
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
1436
|
+
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
1437
|
+
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
1438
|
+
sym_freeze = ID2SYM(rb_intern("freeze"));
|
1439
|
+
sym_create_additions = ID2SYM(rb_intern("create_additions"));
|
1440
|
+
sym_create_id = ID2SYM(rb_intern("create_id"));
|
1441
|
+
sym_object_class = ID2SYM(rb_intern("object_class"));
|
1442
|
+
sym_array_class = ID2SYM(rb_intern("array_class"));
|
1443
|
+
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
1444
|
+
sym_match_string = ID2SYM(rb_intern("match_string"));
|
1445
|
+
|
1446
|
+
i_create_id = rb_intern("create_id");
|
965
1447
|
i_json_creatable_p = rb_intern("json_creatable?");
|
966
1448
|
i_json_create = rb_intern("json_create");
|
967
|
-
i_create_id = rb_intern("create_id");
|
968
|
-
i_create_additions = rb_intern("create_additions");
|
969
1449
|
i_chr = rb_intern("chr");
|
970
|
-
i_max_nesting = rb_intern("max_nesting");
|
971
|
-
i_allow_nan = rb_intern("allow_nan");
|
972
|
-
i_symbolize_names = rb_intern("symbolize_names");
|
973
|
-
i_object_class = rb_intern("object_class");
|
974
|
-
i_array_class = rb_intern("array_class");
|
975
|
-
i_decimal_class = rb_intern("decimal_class");
|
976
1450
|
i_match = rb_intern("match");
|
977
|
-
i_match_string = rb_intern("match_string");
|
978
1451
|
i_deep_const_get = rb_intern("deep_const_get");
|
979
1452
|
i_aset = rb_intern("[]=");
|
980
1453
|
i_aref = rb_intern("[]");
|
981
1454
|
i_leftshift = rb_intern("<<");
|
982
1455
|
i_new = rb_intern("new");
|
983
1456
|
i_try_convert = rb_intern("try_convert");
|
984
|
-
i_freeze = rb_intern("freeze");
|
985
1457
|
i_uminus = rb_intern("-@");
|
1458
|
+
i_encode = rb_intern("encode");
|
986
1459
|
|
987
1460
|
binary_encindex = rb_ascii8bit_encindex();
|
988
1461
|
utf8_encindex = rb_utf8_encindex();
|
1462
|
+
enc_utf8 = rb_utf8_encoding();
|
989
1463
|
}
|
990
1464
|
|
991
1465
|
/*
|