json 2.19.9 → 2.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +11 -1
- data/LEGAL +3 -3
- data/README.md +0 -2
- data/ext/json/ext/fbuffer/fbuffer.h +2 -2
- data/ext/json/ext/generator/extconf.rb +1 -0
- data/ext/json/ext/generator/generator.c +4 -0
- data/ext/json/ext/json.h +63 -0
- data/ext/json/ext/parser/extconf.rb +33 -0
- data/ext/json/ext/parser/parser.c +1393 -323
- data/ext/json/ext/vendor/fast_float_parser.h +814 -0
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +16 -2
- metadata +63 -65
- data/ext/json/ext/vendor/ryu.h +0 -819
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
#include "../json.h"
|
|
2
|
-
#include "../vendor/
|
|
2
|
+
#include "../vendor/fast_float_parser.h"
|
|
3
3
|
#include "../simd/simd.h"
|
|
4
4
|
|
|
5
|
-
static VALUE mJSON, eNestingError, Encoding_UTF_8;
|
|
6
|
-
static VALUE CNaN, CInfinity, CMinusInfinity;
|
|
5
|
+
static VALUE mJSON, eNestingError, eParserError, Encoding_UTF_8;
|
|
6
|
+
static VALUE CNaN, CInfinity, CMinusInfinity, JSON_empty_string;
|
|
7
7
|
|
|
8
|
-
static ID i_new, i_try_convert, i_uminus, i_encode;
|
|
8
|
+
static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column;
|
|
9
9
|
|
|
10
|
-
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma,
|
|
11
|
-
sym_allow_invalid_escape, sym_symbolize_names,
|
|
12
|
-
sym_allow_duplicate_key;
|
|
10
|
+
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_comments,
|
|
11
|
+
sym_allow_control_characters, sym_allow_invalid_escape, sym_symbolize_names,
|
|
12
|
+
sym_freeze, sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
|
13
13
|
|
|
14
14
|
static int binary_encindex;
|
|
15
15
|
static int utf8_encindex;
|
|
@@ -58,6 +58,20 @@ typedef struct rvalue_cache_struct {
|
|
|
58
58
|
VALUE entries[JSON_RVALUE_CACHE_CAPA];
|
|
59
59
|
} rvalue_cache;
|
|
60
60
|
|
|
61
|
+
static void rvalue_cache_mark(rvalue_cache *cache)
|
|
62
|
+
{
|
|
63
|
+
for (int index = 0; index < cache->length; index++) {
|
|
64
|
+
rb_gc_mark_movable(cache->entries[index]);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
static void rvalue_cache_compact(rvalue_cache *cache)
|
|
69
|
+
{
|
|
70
|
+
for (int index = 0; index < cache->length; index++) {
|
|
71
|
+
cache->entries[index] = rb_gc_location(cache->entries[index]);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
61
75
|
static rb_encoding *enc_utf8;
|
|
62
76
|
|
|
63
77
|
#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
|
|
@@ -206,12 +220,12 @@ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle,
|
|
|
206
220
|
|
|
207
221
|
static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
|
|
208
222
|
{
|
|
209
|
-
long required = stack->capa * 2;
|
|
223
|
+
long required = stack->capa ? stack->capa * 2 : RVALUE_STACK_INITIAL_CAPA;
|
|
210
224
|
|
|
211
225
|
if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
|
|
212
226
|
stack = rvalue_stack_spill(stack, handle, stack_ref);
|
|
213
227
|
} else {
|
|
214
|
-
|
|
228
|
+
JSON_SIZED_REALLOC_N(stack->ptr, VALUE, required, stack->capa);
|
|
215
229
|
stack->capa = required;
|
|
216
230
|
}
|
|
217
231
|
return stack;
|
|
@@ -219,11 +233,15 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu
|
|
|
219
233
|
|
|
220
234
|
static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
|
|
221
235
|
{
|
|
236
|
+
JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || handle);
|
|
237
|
+
|
|
222
238
|
if (RB_UNLIKELY(stack->head >= stack->capa)) {
|
|
223
239
|
stack = rvalue_stack_grow(stack, handle, stack_ref);
|
|
224
240
|
}
|
|
241
|
+
|
|
225
242
|
stack->ptr[stack->head] = value;
|
|
226
243
|
stack->head++;
|
|
244
|
+
|
|
227
245
|
return value;
|
|
228
246
|
}
|
|
229
247
|
|
|
@@ -243,14 +261,14 @@ static void rvalue_stack_mark(void *ptr)
|
|
|
243
261
|
long index;
|
|
244
262
|
if (stack && stack->ptr) {
|
|
245
263
|
for (index = 0; index < stack->head; index++) {
|
|
246
|
-
|
|
264
|
+
rb_gc_mark_movable(stack->ptr[index]);
|
|
247
265
|
}
|
|
248
266
|
}
|
|
249
267
|
}
|
|
250
268
|
|
|
251
269
|
static void rvalue_stack_free_buffer(rvalue_stack *stack)
|
|
252
270
|
{
|
|
253
|
-
|
|
271
|
+
JSON_SIZED_FREE_N(stack->ptr, stack->capa);
|
|
254
272
|
stack->ptr = NULL;
|
|
255
273
|
}
|
|
256
274
|
|
|
@@ -260,7 +278,7 @@ static void rvalue_stack_free(void *ptr)
|
|
|
260
278
|
if (stack) {
|
|
261
279
|
rvalue_stack_free_buffer(stack);
|
|
262
280
|
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
263
|
-
|
|
281
|
+
JSON_SIZED_FREE(stack);
|
|
264
282
|
#endif
|
|
265
283
|
}
|
|
266
284
|
}
|
|
@@ -268,7 +286,22 @@ static void rvalue_stack_free(void *ptr)
|
|
|
268
286
|
static size_t rvalue_stack_memsize(const void *ptr)
|
|
269
287
|
{
|
|
270
288
|
const rvalue_stack *stack = (const rvalue_stack *)ptr;
|
|
271
|
-
|
|
289
|
+
size_t memsize = sizeof(VALUE) * stack->capa;
|
|
290
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
291
|
+
memsize += sizeof(rvalue_stack);
|
|
292
|
+
#endif
|
|
293
|
+
return memsize;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
static void rvalue_stack_compact(void *ptr)
|
|
297
|
+
{
|
|
298
|
+
rvalue_stack *stack = (rvalue_stack *)ptr;
|
|
299
|
+
long index;
|
|
300
|
+
if (stack && stack->ptr) {
|
|
301
|
+
for (index = 0; index < stack->head; index++) {
|
|
302
|
+
stack->ptr[index] = rb_gc_location(stack->ptr[index]);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
272
305
|
}
|
|
273
306
|
|
|
274
307
|
static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
|
|
@@ -277,7 +310,10 @@ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
|
|
|
277
310
|
.dmark = rvalue_stack_mark,
|
|
278
311
|
.dfree = rvalue_stack_free,
|
|
279
312
|
.dsize = rvalue_stack_memsize,
|
|
313
|
+
.dcompact = rvalue_stack_compact,
|
|
280
314
|
},
|
|
315
|
+
// We deliberately don't declare rvalue_stack as RUBY_TYPED_WB_PROTECTED
|
|
316
|
+
// because it churns a lot of values so trigering write barriers every time is very costly.
|
|
281
317
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
|
|
282
318
|
};
|
|
283
319
|
|
|
@@ -309,33 +345,62 @@ static void rvalue_stack_eagerly_release(VALUE handle)
|
|
|
309
345
|
}
|
|
310
346
|
}
|
|
311
347
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
}
|
|
348
|
+
/* frame stack */
|
|
349
|
+
|
|
350
|
+
// Iterative (non-recursive) parsing keeps an explicit stack of the containers
|
|
351
|
+
// currently being built, instead of relying on the C call stack. Each frame
|
|
352
|
+
// only needs enough bookkeeping to close its container: which kind it is, the
|
|
353
|
+
// rvalue_stack position where its children start (so we know how many to pop),
|
|
354
|
+
// and the cursor at its opening brace (used to rewind for duplicate key
|
|
355
|
+
// errors). Frames hold no VALUEs, so this stack needs no GC marking; it reuses
|
|
356
|
+
// the same stack-allocated-with-heap-spill strategy as the rvalue_stack so that
|
|
357
|
+
// it's freed even if parsing raises.
|
|
358
|
+
//
|
|
359
|
+
// The lifecycle helpers below (grow/push/peek/pop/spill/free/eagerly_release
|
|
360
|
+
// and the rb_data_type_t) deliberately mirror their rvalue_stack counterparts
|
|
361
|
+
// -- the element type and the absence of a mark function are the only real
|
|
362
|
+
// differences. Keep the two in sync: a fix to the spill/release or
|
|
363
|
+
// HAVE_RUBY_TYPED_EMBEDDABLE handling in one almost certainly belongs in the
|
|
364
|
+
// other.
|
|
365
|
+
#define JSON_FRAME_STACK_INITIAL_CAPA 32
|
|
366
|
+
|
|
367
|
+
enum json_frame_type {
|
|
368
|
+
JSON_FRAME_ROOT, // == JSON_PHASE_DONE
|
|
369
|
+
JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA
|
|
370
|
+
JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA
|
|
371
|
+
};
|
|
337
372
|
|
|
338
|
-
|
|
373
|
+
// Where a frame is within its container's grammar. This is the entirety of the
|
|
374
|
+
// parser's "what to do next" state: json_parse_any dispatches on the top
|
|
375
|
+
// frame's phase and holds no resume state in C locals, so a parse can stop at
|
|
376
|
+
// any value boundary and be resumed purely from the (persistable) frame stack.
|
|
377
|
+
//
|
|
378
|
+
// The first three phases are deliberately equal to the corresponding json_frame_type
|
|
379
|
+
// to simplify the transition of phase in json_value_completed.
|
|
380
|
+
enum json_frame_phase {
|
|
381
|
+
JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed
|
|
382
|
+
JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']'
|
|
383
|
+
JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}'
|
|
384
|
+
JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':')
|
|
385
|
+
JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',')
|
|
386
|
+
JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':'
|
|
387
|
+
};
|
|
388
|
+
|
|
389
|
+
typedef struct json_frame_struct {
|
|
390
|
+
enum json_frame_type type;
|
|
391
|
+
enum json_frame_phase phase;
|
|
392
|
+
long value_stack_head; // rvalue_stack->head when this container opened
|
|
393
|
+
size_t start_offset; // object frames only (the '{'); NULL otherwise
|
|
394
|
+
} json_frame;
|
|
395
|
+
|
|
396
|
+
typedef struct json_frame_stack_struct {
|
|
397
|
+
enum rvalue_stack_type type; // shared with rvalue_stack: is ptr stack- or heap-allocated
|
|
398
|
+
long capa;
|
|
399
|
+
long head;
|
|
400
|
+
json_frame *ptr;
|
|
401
|
+
} json_frame_stack;
|
|
402
|
+
|
|
403
|
+
enum deprecatable_action {
|
|
339
404
|
JSON_DEPRECATED = 0,
|
|
340
405
|
JSON_IGNORE,
|
|
341
406
|
JSON_RAISE,
|
|
@@ -345,7 +410,8 @@ typedef struct JSON_ParserStruct {
|
|
|
345
410
|
VALUE on_load_proc;
|
|
346
411
|
VALUE decimal_class;
|
|
347
412
|
ID decimal_method_id;
|
|
348
|
-
enum
|
|
413
|
+
enum deprecatable_action on_duplicate_key;
|
|
414
|
+
enum deprecatable_action on_comment;
|
|
349
415
|
int max_nesting;
|
|
350
416
|
bool allow_nan;
|
|
351
417
|
bool allow_trailing_comma;
|
|
@@ -356,17 +422,152 @@ typedef struct JSON_ParserStruct {
|
|
|
356
422
|
} JSON_ParserConfig;
|
|
357
423
|
|
|
358
424
|
typedef struct JSON_ParserStateStruct {
|
|
359
|
-
VALUE *
|
|
425
|
+
VALUE *value_stack_handle;
|
|
426
|
+
VALUE *frame_stack_handle;
|
|
360
427
|
const char *start;
|
|
361
428
|
const char *cursor;
|
|
362
429
|
const char *end;
|
|
363
|
-
rvalue_stack *
|
|
430
|
+
rvalue_stack *value_stack;
|
|
431
|
+
json_frame_stack *frames;
|
|
364
432
|
rvalue_cache name_cache;
|
|
365
433
|
int in_array;
|
|
366
434
|
int current_nesting;
|
|
367
435
|
unsigned int emitted_deprecations;
|
|
436
|
+
VALUE parser;
|
|
368
437
|
} JSON_ParserState;
|
|
369
438
|
|
|
439
|
+
static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref);
|
|
440
|
+
|
|
441
|
+
static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *handle, json_frame_stack **stack_ref)
|
|
442
|
+
{
|
|
443
|
+
long required = stack->capa ? stack->capa * 2 : JSON_FRAME_STACK_INITIAL_CAPA;
|
|
444
|
+
|
|
445
|
+
if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
|
|
446
|
+
stack = json_frame_stack_spill(stack, handle, stack_ref);
|
|
447
|
+
} else {
|
|
448
|
+
JSON_SIZED_REALLOC_N(stack->ptr, json_frame, required, stack->capa);
|
|
449
|
+
stack->capa = required;
|
|
450
|
+
}
|
|
451
|
+
return stack;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame)
|
|
455
|
+
{
|
|
456
|
+
json_frame_stack *stack = state->frames;
|
|
457
|
+
|
|
458
|
+
JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || state->frame_stack_handle);
|
|
459
|
+
|
|
460
|
+
if (RB_UNLIKELY(stack->head >= stack->capa)) {
|
|
461
|
+
stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
json_frame *frame_ptr = &stack->ptr[stack->head++];
|
|
465
|
+
*frame_ptr = frame;
|
|
466
|
+
return frame_ptr;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
static inline json_frame *json_frame_stack_peek(json_frame_stack *stack)
|
|
470
|
+
{
|
|
471
|
+
return &stack->ptr[stack->head - 1];
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
static inline void json_frame_stack_pop(json_frame_stack *stack)
|
|
475
|
+
{
|
|
476
|
+
stack->head--;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
static void json_frame_stack_free_buffer(json_frame_stack *stack)
|
|
480
|
+
{
|
|
481
|
+
JSON_SIZED_FREE_N(stack->ptr, stack->capa);
|
|
482
|
+
stack->ptr = NULL;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
static void json_frame_stack_free(void *ptr)
|
|
486
|
+
{
|
|
487
|
+
json_frame_stack *stack = (json_frame_stack *)ptr;
|
|
488
|
+
if (stack) {
|
|
489
|
+
json_frame_stack_free_buffer(stack);
|
|
490
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
491
|
+
JSON_SIZED_FREE(stack);
|
|
492
|
+
#endif
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
static size_t json_frame_stack_memsize(const void *ptr)
|
|
497
|
+
{
|
|
498
|
+
const json_frame_stack *stack = (const json_frame_stack *)ptr;
|
|
499
|
+
|
|
500
|
+
size_t memsize = sizeof(json_frame) * stack->capa;
|
|
501
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
502
|
+
memsize += sizeof(json_frame_stack);
|
|
503
|
+
#endif
|
|
504
|
+
return memsize;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
static const rb_data_type_t JSON_Parser_frame_stack_type = {
|
|
508
|
+
.wrap_struct_name = "JSON::Ext::Parser/frame_stack",
|
|
509
|
+
.function = {
|
|
510
|
+
.dmark = NULL,
|
|
511
|
+
.dfree = json_frame_stack_free,
|
|
512
|
+
.dsize = json_frame_stack_memsize,
|
|
513
|
+
},
|
|
514
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE,
|
|
515
|
+
};
|
|
516
|
+
|
|
517
|
+
static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref)
|
|
518
|
+
{
|
|
519
|
+
json_frame_stack *stack;
|
|
520
|
+
*handle = TypedData_Make_Struct(0, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
|
|
521
|
+
*stack_ref = stack;
|
|
522
|
+
MEMCPY(stack, old_stack, json_frame_stack, 1);
|
|
523
|
+
|
|
524
|
+
stack->capa = old_stack->capa << 1;
|
|
525
|
+
stack->ptr = ALLOC_N(json_frame, stack->capa);
|
|
526
|
+
stack->type = RVALUE_STACK_HEAP_ALLOCATED;
|
|
527
|
+
MEMCPY(stack->ptr, old_stack->ptr, json_frame, old_stack->head);
|
|
528
|
+
return stack;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
static void json_frame_stack_eagerly_release(VALUE handle)
|
|
532
|
+
{
|
|
533
|
+
if (handle) {
|
|
534
|
+
json_frame_stack *stack;
|
|
535
|
+
TypedData_Get_Struct(handle, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
|
|
536
|
+
#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
537
|
+
json_frame_stack_free_buffer(stack);
|
|
538
|
+
#else
|
|
539
|
+
json_frame_stack_free(stack);
|
|
540
|
+
RTYPEDDATA_DATA(handle) = NULL;
|
|
541
|
+
#endif
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
546
|
+
{
|
|
547
|
+
int len = 1;
|
|
548
|
+
if (ch <= 0x7F) {
|
|
549
|
+
buf[0] = (char) ch;
|
|
550
|
+
} else if (ch <= 0x07FF) {
|
|
551
|
+
buf[0] = (char) ((ch >> 6) | 0xC0);
|
|
552
|
+
buf[1] = (char) ((ch & 0x3F) | 0x80);
|
|
553
|
+
len++;
|
|
554
|
+
} else if (ch <= 0xFFFF) {
|
|
555
|
+
buf[0] = (char) ((ch >> 12) | 0xE0);
|
|
556
|
+
buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
|
|
557
|
+
buf[2] = (char) ((ch & 0x3F) | 0x80);
|
|
558
|
+
len += 2;
|
|
559
|
+
} else if (ch <= 0x1fffff) {
|
|
560
|
+
buf[0] =(char) ((ch >> 18) | 0xF0);
|
|
561
|
+
buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
|
|
562
|
+
buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
|
|
563
|
+
buf[3] =(char) ((ch & 0x3F) | 0x80);
|
|
564
|
+
len += 3;
|
|
565
|
+
} else {
|
|
566
|
+
buf[0] = '?';
|
|
567
|
+
}
|
|
568
|
+
return len;
|
|
569
|
+
}
|
|
570
|
+
|
|
370
571
|
static inline size_t rest(JSON_ParserState *state) {
|
|
371
572
|
return state->end - state->cursor;
|
|
372
573
|
}
|
|
@@ -398,6 +599,7 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum
|
|
|
398
599
|
|
|
399
600
|
while (cursor >= state->start) {
|
|
400
601
|
if (*cursor-- == '\n') {
|
|
602
|
+
line++;
|
|
401
603
|
break;
|
|
402
604
|
}
|
|
403
605
|
column++;
|
|
@@ -412,6 +614,8 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum
|
|
|
412
614
|
*column_out = column;
|
|
413
615
|
}
|
|
414
616
|
|
|
617
|
+
static const unsigned int MAX_DEPRECATIONS = 5;
|
|
618
|
+
|
|
415
619
|
static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
416
620
|
{
|
|
417
621
|
long line, column;
|
|
@@ -423,7 +627,7 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
|
|
423
627
|
|
|
424
628
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
|
425
629
|
|
|
426
|
-
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state
|
|
630
|
+
static VALUE build_parse_error_message(const char *format, JSON_ParserState *state)
|
|
427
631
|
{
|
|
428
632
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
|
429
633
|
|
|
@@ -457,31 +661,61 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta
|
|
|
457
661
|
}
|
|
458
662
|
}
|
|
459
663
|
|
|
460
|
-
|
|
461
|
-
rb_str_catf(message, " at line %ld column %ld", line, column);
|
|
462
|
-
return message;
|
|
664
|
+
return rb_enc_sprintf(enc_utf8, format, ptr);
|
|
463
665
|
}
|
|
464
666
|
|
|
465
|
-
static VALUE parse_error_new(VALUE message, long line, long column)
|
|
667
|
+
static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, long column, bool eos)
|
|
466
668
|
{
|
|
467
|
-
VALUE exc = rb_exc_new_str(
|
|
468
|
-
rb_ivar_set(exc,
|
|
469
|
-
rb_ivar_set(exc,
|
|
669
|
+
VALUE exc = rb_exc_new_str(eParserError, message);
|
|
670
|
+
rb_ivar_set(exc, i_at_line, LONG2NUM(line));
|
|
671
|
+
rb_ivar_set(exc, i_at_column, LONG2NUM(column));
|
|
470
672
|
return exc;
|
|
471
673
|
}
|
|
472
674
|
|
|
473
|
-
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
675
|
+
NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state, bool eos)
|
|
474
676
|
{
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
677
|
+
if (state->parser) {
|
|
678
|
+
if (eos) {
|
|
679
|
+
// the error will be swallowed by ResumableParser#parse, so no
|
|
680
|
+
// point building a message or backtrace.
|
|
681
|
+
rb_throw_obj(state->parser, state->parser);
|
|
682
|
+
} else {
|
|
683
|
+
// line and columns can't be accurate in resumable
|
|
684
|
+
rb_exc_raise(parse_error_new(state, build_parse_error_message(format, state), 0, 0, eos));
|
|
685
|
+
}
|
|
686
|
+
} else {
|
|
687
|
+
VALUE message = build_parse_error_message(format, state);
|
|
688
|
+
long line, column;
|
|
689
|
+
cursor_position(state, &line, &column);
|
|
690
|
+
rb_str_catf(message, " at line %ld column %ld", line, column);
|
|
691
|
+
rb_exc_raise(parse_error_new(state, message, line, column, eos));
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
NORETURN(static) void raise_eos_error(const char *format, JSON_ParserState *state)
|
|
696
|
+
{
|
|
697
|
+
raise_parse_error(format, state, true);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
NORETURN(static) void raise_syntax_error(const char *format, JSON_ParserState *state)
|
|
701
|
+
{
|
|
702
|
+
raise_parse_error(format, state, false);
|
|
479
703
|
}
|
|
480
704
|
|
|
481
|
-
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
705
|
+
NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at, bool eos)
|
|
482
706
|
{
|
|
483
707
|
state->cursor = at;
|
|
484
|
-
raise_parse_error(format, state);
|
|
708
|
+
raise_parse_error(format, state, eos);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
NORETURN(static) void raise_eos_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
712
|
+
{
|
|
713
|
+
raise_parse_error_at(format, state, at, true);
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
NORETURN(static) void raise_syntax_error_at(const char *format, JSON_ParserState *state, const char *at)
|
|
717
|
+
{
|
|
718
|
+
raise_parse_error_at(format, state, at, false);
|
|
485
719
|
}
|
|
486
720
|
|
|
487
721
|
/* unicode */
|
|
@@ -506,7 +740,7 @@ static const signed char digit_values[256] = {
|
|
|
506
740
|
static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
|
|
507
741
|
{
|
|
508
742
|
if (RB_UNLIKELY(sp > spe - 4)) {
|
|
509
|
-
|
|
743
|
+
raise_eos_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
510
744
|
}
|
|
511
745
|
|
|
512
746
|
const unsigned char *p = (const unsigned char *)sp;
|
|
@@ -517,7 +751,7 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const
|
|
|
517
751
|
const signed char b3 = digit_values[p[3]];
|
|
518
752
|
|
|
519
753
|
if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
|
|
520
|
-
|
|
754
|
+
raise_syntax_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
|
|
521
755
|
}
|
|
522
756
|
|
|
523
757
|
return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
|
|
@@ -529,9 +763,14 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const
|
|
|
529
763
|
|
|
530
764
|
static const rb_data_type_t JSON_ParserConfig_type;
|
|
531
765
|
|
|
532
|
-
|
|
533
|
-
|
|
766
|
+
const char *COMMENT_DEPRECATION_MESSAGE = "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`";
|
|
767
|
+
NOINLINE(static) void
|
|
768
|
+
json_eat_comments(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
534
769
|
{
|
|
770
|
+
if (config->on_comment == JSON_RAISE) {
|
|
771
|
+
raise_syntax_error("unexpected token %s", state);
|
|
772
|
+
}
|
|
773
|
+
|
|
535
774
|
const char *start = state->cursor;
|
|
536
775
|
state->cursor++;
|
|
537
776
|
|
|
@@ -551,7 +790,7 @@ json_eat_comments(JSON_ParserState *state)
|
|
|
551
790
|
while (true) {
|
|
552
791
|
const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
|
|
553
792
|
if (!next_match) {
|
|
554
|
-
|
|
793
|
+
raise_eos_error_at("unterminated comment, expected closing '*/'", state, start);
|
|
555
794
|
}
|
|
556
795
|
|
|
557
796
|
state->cursor = next_match + 1;
|
|
@@ -563,13 +802,18 @@ json_eat_comments(JSON_ParserState *state)
|
|
|
563
802
|
break;
|
|
564
803
|
}
|
|
565
804
|
default:
|
|
566
|
-
raise_parse_error_at("unexpected token %s", state, start);
|
|
805
|
+
raise_parse_error_at("unexpected token %s", state, start, eos(state));
|
|
567
806
|
break;
|
|
568
807
|
}
|
|
808
|
+
|
|
809
|
+
if (config->on_comment == JSON_DEPRECATED && state->emitted_deprecations < MAX_DEPRECATIONS) {
|
|
810
|
+
state->emitted_deprecations++;
|
|
811
|
+
emit_parse_warning(COMMENT_DEPRECATION_MESSAGE, state);
|
|
812
|
+
}
|
|
569
813
|
}
|
|
570
814
|
|
|
571
815
|
ALWAYS_INLINE(static) void
|
|
572
|
-
json_eat_whitespace(JSON_ParserState *state)
|
|
816
|
+
json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config, bool include_comments)
|
|
573
817
|
{
|
|
574
818
|
while (true) {
|
|
575
819
|
switch (peek(state)) {
|
|
@@ -600,7 +844,11 @@ json_eat_whitespace(JSON_ParserState *state)
|
|
|
600
844
|
state->cursor++;
|
|
601
845
|
break;
|
|
602
846
|
case '/':
|
|
603
|
-
|
|
847
|
+
if (!include_comments) {
|
|
848
|
+
return;
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
json_eat_comments(state, config);
|
|
604
852
|
break;
|
|
605
853
|
|
|
606
854
|
default:
|
|
@@ -754,13 +1002,13 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
|
|
|
754
1002
|
uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
|
|
755
1003
|
|
|
756
1004
|
if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
|
|
757
|
-
|
|
1005
|
+
raise_syntax_error_at("invalid surrogate pair at %s", state, p);
|
|
758
1006
|
}
|
|
759
1007
|
|
|
760
1008
|
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
|
|
761
1009
|
pe += 5;
|
|
762
1010
|
} else {
|
|
763
|
-
|
|
1011
|
+
raise_syntax_error_at("incomplete surrogate pair at %s", state, p);
|
|
764
1012
|
break;
|
|
765
1013
|
}
|
|
766
1014
|
}
|
|
@@ -770,20 +1018,22 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
|
|
|
770
1018
|
p = ++pe;
|
|
771
1019
|
break;
|
|
772
1020
|
}
|
|
1021
|
+
case 0:
|
|
1022
|
+
return Qundef;
|
|
773
1023
|
default:
|
|
774
1024
|
if ((unsigned char)*pe < 0x20) {
|
|
775
1025
|
if (!config->allow_control_characters) {
|
|
776
1026
|
if (*pe == '\n') {
|
|
777
|
-
|
|
1027
|
+
raise_syntax_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
|
|
778
1028
|
}
|
|
779
|
-
|
|
1029
|
+
raise_syntax_error_at("invalid ASCII control character in string: %s", state, pe - 1);
|
|
780
1030
|
}
|
|
781
1031
|
}
|
|
782
1032
|
|
|
783
1033
|
if (config->allow_invalid_escape) {
|
|
784
1034
|
APPEND_CHAR(*pe);
|
|
785
1035
|
} else {
|
|
786
|
-
|
|
1036
|
+
raise_syntax_error_at("invalid escape character in string: %s", state, pe - 1);
|
|
787
1037
|
}
|
|
788
1038
|
break;
|
|
789
1039
|
}
|
|
@@ -879,19 +1129,17 @@ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantis
|
|
|
879
1129
|
return rb_float_new(negative ? -0.0 : 0.0);
|
|
880
1130
|
}
|
|
881
1131
|
|
|
882
|
-
|
|
883
|
-
// Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
|
|
884
|
-
if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
|
|
1132
|
+
if (RB_UNLIKELY(mantissa_digits > 18 || mantissa_digits + exponent < -307)) {
|
|
885
1133
|
return json_decode_large_float(start, end - start);
|
|
886
1134
|
}
|
|
887
1135
|
|
|
888
|
-
return DBL2NUM(
|
|
1136
|
+
return DBL2NUM(ffp_s2d(exponent, mantissa, negative));
|
|
889
1137
|
}
|
|
890
1138
|
|
|
891
1139
|
static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
|
|
892
1140
|
{
|
|
893
|
-
VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->
|
|
894
|
-
rvalue_stack_pop(state->
|
|
1141
|
+
VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->value_stack, count));
|
|
1142
|
+
rvalue_stack_pop(state->value_stack, count);
|
|
895
1143
|
|
|
896
1144
|
if (config->freeze) {
|
|
897
1145
|
RB_OBJ_FREEZE(array);
|
|
@@ -935,38 +1183,50 @@ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE d
|
|
|
935
1183
|
rb_inspect(duplicate_key)
|
|
936
1184
|
);
|
|
937
1185
|
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
1186
|
+
rb_str_concat(message, build_parse_error_message("", state));
|
|
1187
|
+
if (state->parser) { // line and columns can't be accurate in resumable
|
|
1188
|
+
rb_exc_raise(parse_error_new(state, message, 0, 0, false));
|
|
1189
|
+
} else {
|
|
1190
|
+
long line, column;
|
|
1191
|
+
cursor_position(state, &line, &column);
|
|
1192
|
+
rb_str_catf(message, " at line %ld column %ld", line, column);
|
|
1193
|
+
rb_exc_raise(parse_error_new(state, message, line, column, false));
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs)
|
|
1198
|
+
{
|
|
1199
|
+
switch (config->on_duplicate_key) {
|
|
1200
|
+
case JSON_IGNORE:
|
|
1201
|
+
return;
|
|
1202
|
+
|
|
1203
|
+
case JSON_DEPRECATED:
|
|
1204
|
+
// Only emit the first few deprecations to avoid spamming.
|
|
1205
|
+
if (state->emitted_deprecations < MAX_DEPRECATIONS) {
|
|
1206
|
+
state->emitted_deprecations++;
|
|
1207
|
+
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
|
|
1208
|
+
}
|
|
1209
|
+
return;
|
|
1210
|
+
|
|
1211
|
+
case JSON_RAISE:
|
|
1212
|
+
raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
|
|
1213
|
+
return;
|
|
1214
|
+
}
|
|
1215
|
+
UNREACHABLE;
|
|
942
1216
|
}
|
|
943
1217
|
|
|
944
1218
|
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
|
945
1219
|
{
|
|
946
1220
|
size_t entries_count = count / 2;
|
|
947
1221
|
VALUE object = rb_hash_new_capa(entries_count);
|
|
948
|
-
const VALUE *pairs = rvalue_stack_peek(state->
|
|
1222
|
+
const VALUE *pairs = rvalue_stack_peek(state->value_stack, count);
|
|
949
1223
|
rb_hash_bulk_insert(count, pairs, object);
|
|
950
1224
|
|
|
951
1225
|
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
|
|
952
|
-
|
|
953
|
-
case JSON_IGNORE:
|
|
954
|
-
break;
|
|
955
|
-
case JSON_DEPRECATED:
|
|
956
|
-
// Only emit the first few deprecations to avoid spamming.
|
|
957
|
-
if (state->emitted_deprecations < 5) {
|
|
958
|
-
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
|
|
959
|
-
state->emitted_deprecations++;
|
|
960
|
-
}
|
|
961
|
-
|
|
962
|
-
break;
|
|
963
|
-
case JSON_RAISE:
|
|
964
|
-
raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
|
|
965
|
-
break;
|
|
966
|
-
}
|
|
1226
|
+
json_on_duplicate_key(state, config, count, pairs);
|
|
967
1227
|
}
|
|
968
1228
|
|
|
969
|
-
rvalue_stack_pop(state->
|
|
1229
|
+
rvalue_stack_pop(state->value_stack, count);
|
|
970
1230
|
|
|
971
1231
|
if (config->freeze) {
|
|
972
1232
|
RB_OBJ_FREEZE(object);
|
|
@@ -980,7 +1240,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
|
|
|
980
1240
|
if (RB_UNLIKELY(config->on_load_proc)) {
|
|
981
1241
|
value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
|
|
982
1242
|
}
|
|
983
|
-
rvalue_stack_push(state->
|
|
1243
|
+
rvalue_stack_push(state->value_stack, value, state->value_stack_handle, &state->value_stack);
|
|
984
1244
|
return value;
|
|
985
1245
|
}
|
|
986
1246
|
|
|
@@ -1053,7 +1313,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
|
|
|
1053
1313
|
case '"': {
|
|
1054
1314
|
VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
|
|
1055
1315
|
state->cursor++;
|
|
1056
|
-
return
|
|
1316
|
+
return string;
|
|
1057
1317
|
}
|
|
1058
1318
|
case '\\': {
|
|
1059
1319
|
if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
|
|
@@ -1067,7 +1327,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
|
|
|
1067
1327
|
}
|
|
1068
1328
|
default:
|
|
1069
1329
|
if (!config->allow_control_characters) {
|
|
1070
|
-
|
|
1330
|
+
raise_syntax_error("invalid ASCII control character in string: %s", state);
|
|
1071
1331
|
}
|
|
1072
1332
|
break;
|
|
1073
1333
|
}
|
|
@@ -1075,8 +1335,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
|
|
|
1075
1335
|
state->cursor++;
|
|
1076
1336
|
} while (string_scan(state));
|
|
1077
1337
|
|
|
1078
|
-
|
|
1079
|
-
return Qfalse;
|
|
1338
|
+
return Qundef;
|
|
1080
1339
|
}
|
|
1081
1340
|
|
|
1082
1341
|
ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
|
@@ -1085,15 +1344,19 @@ ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_Pars
|
|
|
1085
1344
|
const char *start = state->cursor;
|
|
1086
1345
|
|
|
1087
1346
|
if (RB_UNLIKELY(!string_scan(state))) {
|
|
1088
|
-
|
|
1347
|
+
return Qundef;
|
|
1089
1348
|
}
|
|
1090
1349
|
|
|
1350
|
+
VALUE string;
|
|
1091
1351
|
if (RB_LIKELY(*state->cursor == '"')) {
|
|
1092
|
-
|
|
1352
|
+
string = json_string_fastpath(state, config, start, state->cursor, is_name);
|
|
1093
1353
|
state->cursor++;
|
|
1094
|
-
return json_push_value(state, config, string);
|
|
1095
1354
|
}
|
|
1096
|
-
|
|
1355
|
+
else {
|
|
1356
|
+
string = json_parse_escaped_string(state, config, is_name, start);
|
|
1357
|
+
}
|
|
1358
|
+
|
|
1359
|
+
return string;
|
|
1097
1360
|
}
|
|
1098
1361
|
|
|
1099
1362
|
#if JSON_CPU_LITTLE_ENDIAN_64BITS
|
|
@@ -1180,7 +1443,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1180
1443
|
int mantissa_digits = json_parse_digits(state, &mantissa);
|
|
1181
1444
|
|
|
1182
1445
|
if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
|
|
1183
|
-
|
|
1446
|
+
return Qundef;
|
|
1184
1447
|
}
|
|
1185
1448
|
|
|
1186
1449
|
// Parse fractional part
|
|
@@ -1193,7 +1456,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1193
1456
|
mantissa_digits += fractional_digits;
|
|
1194
1457
|
|
|
1195
1458
|
if (RB_UNLIKELY(!fractional_digits)) {
|
|
1196
|
-
|
|
1459
|
+
return Qundef;
|
|
1197
1460
|
}
|
|
1198
1461
|
}
|
|
1199
1462
|
|
|
@@ -1213,7 +1476,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1213
1476
|
int exponent_digits = json_parse_digits(state, &abs_exponent);
|
|
1214
1477
|
|
|
1215
1478
|
if (RB_UNLIKELY(!exponent_digits)) {
|
|
1216
|
-
|
|
1479
|
+
return Qundef;
|
|
1217
1480
|
}
|
|
1218
1481
|
|
|
1219
1482
|
if (RB_UNLIKELY(exponent_digits >= 20 || abs_exponent > (uint64_t)INT64_MAX)) {
|
|
@@ -1235,229 +1498,411 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
|
|
|
1235
1498
|
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
|
|
1236
1499
|
}
|
|
1237
1500
|
|
|
1238
|
-
|
|
1501
|
+
// How many values (array elements, or interleaved object keys+values) have been
|
|
1502
|
+
// pushed onto the rvalue stack since this container opened. Used to size the
|
|
1503
|
+
// bulk decode on close, and to tell the first key/colon from later ones.
|
|
1504
|
+
static inline long json_frame_entry_count(const json_frame *frame, const rvalue_stack *value_stack)
|
|
1239
1505
|
{
|
|
1240
|
-
return
|
|
1506
|
+
return value_stack->head - frame->value_stack_head;
|
|
1241
1507
|
}
|
|
1242
1508
|
|
|
1243
|
-
|
|
1509
|
+
// A complete value now sits on top of the rvalue stack. Advance the frame that
|
|
1510
|
+
// was waiting for it: the root document is done, or the enclosing container
|
|
1511
|
+
// moves on to expecting a ',' or its closing bracket. The caller passes the
|
|
1512
|
+
// frame it already has in hand -- the one that was expecting the value -- which
|
|
1513
|
+
// after a container close is the freshly re-exposed parent.
|
|
1514
|
+
static inline enum json_frame_phase json_value_completed(json_frame *frame)
|
|
1244
1515
|
{
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1516
|
+
JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT);
|
|
1517
|
+
JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY);
|
|
1518
|
+
JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT);
|
|
1519
|
+
|
|
1520
|
+
return frame->phase = (enum json_frame_phase) frame->type;
|
|
1248
1521
|
}
|
|
1249
1522
|
|
|
1250
|
-
static
|
|
1523
|
+
ALWAYS_INLINE(static) void json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset)
|
|
1251
1524
|
{
|
|
1252
|
-
|
|
1525
|
+
// It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
|
|
1526
|
+
// `strlen` and several other computations in that routine.
|
|
1253
1527
|
|
|
1254
|
-
|
|
1255
|
-
case 'n':
|
|
1256
|
-
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
|
|
1257
|
-
state->cursor += 4;
|
|
1258
|
-
return json_push_value(state, config, Qnil);
|
|
1259
|
-
}
|
|
1528
|
+
size_t len = strlen(keyword);
|
|
1260
1529
|
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
}
|
|
1530
|
+
// Note: memcmp with a small power of two and a literal string compile to an integer comparison /
|
|
1531
|
+
// That's why we sometime compare starting from the first byte and sometimes from the second.
|
|
1532
|
+
if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) {
|
|
1533
|
+
state->cursor += len;
|
|
1534
|
+
return;
|
|
1535
|
+
}
|
|
1268
1536
|
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1273
|
-
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
|
|
1274
|
-
state->cursor += 5;
|
|
1275
|
-
return json_push_value(state, config, Qfalse);
|
|
1276
|
-
}
|
|
1537
|
+
bool eos = rest(state) < len && memcmp(state->cursor, keyword, rest(state)) == 0;
|
|
1538
|
+
raise_parse_error("unexpected token %s", state, eos);
|
|
1539
|
+
}
|
|
1277
1540
|
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1541
|
+
// Parse an arbitrary JSON value iteratively. This is a state machine driven
|
|
1542
|
+
// entirely by the top frame's phase so it can stop at any value boundary and
|
|
1543
|
+
// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the
|
|
1544
|
+
// bottom of the stack, so the stack is never empty mid-parse and the document
|
|
1545
|
+
// itself is just another frame whose value, once parsed, leaves its phase DONE.
|
|
1546
|
+
// When invoked in resumable mode, it returns true after parsing a complete document.
|
|
1547
|
+
// If reaching EOS without having parsed a complete document, either returns false
|
|
1548
|
+
// of raise a JSON::ParserError tagged with `@eos=true`.
|
|
1549
|
+
ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config, bool resumable)
|
|
1550
|
+
{
|
|
1551
|
+
json_frame *frame = json_frame_stack_peek(state->frames);
|
|
1286
1552
|
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
case
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1553
|
+
switch (frame->phase) {
|
|
1554
|
+
case JSON_PHASE_DONE: JSON_UNREACHABLE_RETURN(false);
|
|
1555
|
+
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
|
|
1556
|
+
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
|
|
1557
|
+
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
|
|
1558
|
+
case JSON_PHASE_OBJECT_KEY: goto JSON_PHASE_OBJECT_KEY;
|
|
1559
|
+
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
|
|
1560
|
+
}
|
|
1561
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1294
1562
|
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
case '-': {
|
|
1298
|
-
// Note: memcmp with a small power of two compile to an integer comparison
|
|
1299
|
-
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
|
|
1300
|
-
if (config->allow_nan) {
|
|
1301
|
-
state->cursor += 9;
|
|
1302
|
-
return json_push_value(state, config, CMinusInfinity);
|
|
1303
|
-
} else {
|
|
1304
|
-
raise_parse_error("unexpected token %s", state);
|
|
1305
|
-
}
|
|
1306
|
-
}
|
|
1307
|
-
return json_push_value(state, config, json_parse_negative_number(state, config));
|
|
1308
|
-
break;
|
|
1309
|
-
}
|
|
1310
|
-
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
|
|
1311
|
-
return json_push_value(state, config, json_parse_positive_number(state, config));
|
|
1312
|
-
break;
|
|
1313
|
-
case '"': {
|
|
1314
|
-
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1315
|
-
return json_parse_string(state, config, false);
|
|
1316
|
-
break;
|
|
1317
|
-
}
|
|
1318
|
-
case '[': {
|
|
1319
|
-
state->cursor++;
|
|
1320
|
-
json_eat_whitespace(state);
|
|
1321
|
-
long stack_head = state->stack->head;
|
|
1563
|
+
JSON_PHASE_VALUE: {
|
|
1564
|
+
json_eat_whitespace(state, config, true);
|
|
1322
1565
|
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
return json_push_value(state, config, json_decode_array(state, config, 0));
|
|
1326
|
-
} else {
|
|
1327
|
-
state->current_nesting++;
|
|
1328
|
-
if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
|
|
1329
|
-
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1330
|
-
}
|
|
1331
|
-
state->in_array++;
|
|
1332
|
-
json_parse_any(state, config);
|
|
1333
|
-
}
|
|
1566
|
+
VALUE value;
|
|
1567
|
+
const char *value_start = state->cursor;
|
|
1334
1568
|
|
|
1335
|
-
|
|
1336
|
-
|
|
1569
|
+
switch (peek(state)) {
|
|
1570
|
+
case 'n':
|
|
1571
|
+
json_match_keyword(state, "null", 0);
|
|
1572
|
+
value = Qnil;
|
|
1573
|
+
break;
|
|
1337
1574
|
|
|
1338
|
-
|
|
1575
|
+
case 't':
|
|
1576
|
+
json_match_keyword(state, "true", 0);
|
|
1577
|
+
value = Qtrue;
|
|
1578
|
+
break;
|
|
1339
1579
|
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
if (peek(state) == ']') {
|
|
1345
|
-
continue;
|
|
1346
|
-
}
|
|
1347
|
-
}
|
|
1348
|
-
json_parse_any(state, config);
|
|
1349
|
-
continue;
|
|
1350
|
-
}
|
|
1580
|
+
case 'f':
|
|
1581
|
+
json_match_keyword(state, "false", 1);
|
|
1582
|
+
value = Qfalse;
|
|
1583
|
+
break;
|
|
1351
1584
|
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
state->current_nesting--;
|
|
1356
|
-
state->in_array--;
|
|
1357
|
-
return json_push_value(state, config, json_decode_array(state, config, count));
|
|
1585
|
+
case 'N':
|
|
1586
|
+
if (!config->allow_nan) {
|
|
1587
|
+
raise_syntax_error("unexpected token %s", state);
|
|
1358
1588
|
}
|
|
1359
1589
|
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
}
|
|
1364
|
-
case '{': {
|
|
1365
|
-
const char *object_start_cursor = state->cursor;
|
|
1590
|
+
json_match_keyword(state, "NaN", 1);
|
|
1591
|
+
value = CNaN;
|
|
1592
|
+
break;
|
|
1366
1593
|
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1594
|
+
case 'I':
|
|
1595
|
+
if (!config->allow_nan) {
|
|
1596
|
+
raise_syntax_error("unexpected token %s", state);
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
json_match_keyword(state, "Infinity", 0);
|
|
1600
|
+
value = CInfinity;
|
|
1601
|
+
break;
|
|
1370
1602
|
|
|
1371
|
-
|
|
1603
|
+
case '-': {
|
|
1372
1604
|
state->cursor++;
|
|
1373
|
-
return json_push_value(state, config, json_decode_object(state, config, 0));
|
|
1374
|
-
} else {
|
|
1375
|
-
state->current_nesting++;
|
|
1376
|
-
if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
|
|
1377
|
-
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1378
|
-
}
|
|
1379
1605
|
|
|
1380
|
-
|
|
1381
|
-
|
|
1606
|
+
value = json_parse_number(state, config, true, value_start);
|
|
1607
|
+
|
|
1608
|
+
if (RB_UNLIKELY(UNDEF_P(value) && config->allow_nan && peek(state) == 'I')) {
|
|
1609
|
+
state->cursor = value_start;
|
|
1610
|
+
json_match_keyword(state, "-Infinity", 1);
|
|
1611
|
+
value = CMinusInfinity;
|
|
1612
|
+
break;
|
|
1382
1613
|
}
|
|
1383
|
-
json_parse_string(state, config, true);
|
|
1384
1614
|
|
|
1385
|
-
|
|
1386
|
-
if
|
|
1387
|
-
|
|
1615
|
+
// Top level numbers are ambiguous when parsing streams, we can't
|
|
1616
|
+
// know if we parsed all the digits if we hit EOS.
|
|
1617
|
+
if (RB_UNLIKELY(resumable && eos(state))) {
|
|
1618
|
+
state->cursor = value_start;
|
|
1619
|
+
return false;
|
|
1388
1620
|
}
|
|
1389
|
-
state->cursor++;
|
|
1390
1621
|
|
|
1391
|
-
|
|
1392
|
-
|
|
1622
|
+
if (RB_UNLIKELY(UNDEF_P(value))) {
|
|
1623
|
+
raise_syntax_error_at("invalid number: %s", state, value_start);
|
|
1624
|
+
}
|
|
1625
|
+
break;
|
|
1626
|
+
}
|
|
1393
1627
|
|
|
1394
|
-
|
|
1395
|
-
|
|
1628
|
+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
|
|
1629
|
+
value = json_parse_number(state, config, false, value_start);
|
|
1396
1630
|
|
|
1397
|
-
|
|
1398
|
-
if
|
|
1399
|
-
|
|
1400
|
-
state->
|
|
1401
|
-
|
|
1631
|
+
// Top level numbers are ambiguous when parsing streams, we can't
|
|
1632
|
+
// know if we parsed all the digits if we hit EOS.
|
|
1633
|
+
if (RB_UNLIKELY(resumable && eos(state))) {
|
|
1634
|
+
state->cursor = value_start;
|
|
1635
|
+
return false;
|
|
1636
|
+
}
|
|
1402
1637
|
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1638
|
+
if (RB_UNLIKELY(UNDEF_P(value))) {
|
|
1639
|
+
raise_syntax_error_at("invalid number: %s", state, value_start);
|
|
1640
|
+
}
|
|
1641
|
+
break;
|
|
1642
|
+
}
|
|
1408
1643
|
|
|
1409
|
-
|
|
1644
|
+
case '"': {
|
|
1645
|
+
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
|
|
1646
|
+
value = json_parse_string(state, config, false);
|
|
1647
|
+
|
|
1648
|
+
if (RB_UNLIKELY(UNDEF_P(value))) {
|
|
1649
|
+
bool is_eos = eos(state);
|
|
1650
|
+
if (resumable && is_eos) {
|
|
1651
|
+
state->cursor = value_start;
|
|
1652
|
+
return false;
|
|
1653
|
+
}
|
|
1654
|
+
raise_parse_error("unexpected end of input, expected closing \"", state, is_eos);
|
|
1410
1655
|
}
|
|
1656
|
+
break;
|
|
1657
|
+
}
|
|
1411
1658
|
|
|
1412
|
-
|
|
1659
|
+
case '[': {
|
|
1660
|
+
state->cursor++;
|
|
1661
|
+
json_eat_whitespace(state, config, true);
|
|
1662
|
+
|
|
1663
|
+
const char next = peek(state);
|
|
1664
|
+
if (next == ']') {
|
|
1413
1665
|
state->cursor++;
|
|
1414
|
-
|
|
1666
|
+
value = json_decode_array(state, config, 0);
|
|
1667
|
+
break;
|
|
1668
|
+
} else if (resumable && eos(state)) {
|
|
1669
|
+
state->cursor = value_start;
|
|
1670
|
+
return false;
|
|
1671
|
+
}
|
|
1415
1672
|
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1673
|
+
state->current_nesting++;
|
|
1674
|
+
if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
|
|
1675
|
+
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1676
|
+
}
|
|
1677
|
+
state->in_array++;
|
|
1421
1678
|
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1679
|
+
// Phase stays VALUE: the next iteration reads the first element.
|
|
1680
|
+
frame = json_frame_stack_push(state, (json_frame){
|
|
1681
|
+
.type = JSON_FRAME_ARRAY,
|
|
1682
|
+
.phase = JSON_PHASE_VALUE,
|
|
1683
|
+
.value_stack_head = state->value_stack->head,
|
|
1684
|
+
});
|
|
1685
|
+
goto JSON_PHASE_VALUE;
|
|
1686
|
+
}
|
|
1426
1687
|
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1688
|
+
case '{': {
|
|
1689
|
+
state->cursor++;
|
|
1690
|
+
json_eat_whitespace(state, config, true);
|
|
1691
|
+
|
|
1692
|
+
if (peek(state) == '}') {
|
|
1431
1693
|
state->cursor++;
|
|
1694
|
+
value = json_decode_object(state, config, 0);
|
|
1695
|
+
break;
|
|
1696
|
+
} else if (resumable && eos(state)) {
|
|
1697
|
+
state->cursor = value_start;
|
|
1698
|
+
return false;
|
|
1699
|
+
}
|
|
1432
1700
|
|
|
1433
|
-
|
|
1701
|
+
state->current_nesting++;
|
|
1702
|
+
if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
|
|
1703
|
+
rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
// Phase KEY: the next iteration reads the first key.
|
|
1707
|
+
frame = json_frame_stack_push(state, (json_frame){
|
|
1708
|
+
.type = JSON_FRAME_OBJECT,
|
|
1709
|
+
.phase = JSON_PHASE_OBJECT_KEY,
|
|
1710
|
+
.value_stack_head = state->value_stack->head,
|
|
1711
|
+
.start_offset = value_start - state->start,
|
|
1712
|
+
});
|
|
1713
|
+
goto JSON_PHASE_OBJECT_KEY;
|
|
1714
|
+
}
|
|
1434
1715
|
|
|
1435
|
-
|
|
1716
|
+
case 0:
|
|
1717
|
+
// peek() returns 0 both at end-of-stream and for a literal NUL byte in the
|
|
1718
|
+
// buffer. Only a genuine EOS means "feed me more"; a NUL byte that is not at
|
|
1719
|
+
// EOS is just an invalid character.
|
|
1720
|
+
if (eos(state)) {
|
|
1721
|
+
return false;
|
|
1722
|
+
} else {
|
|
1723
|
+
raise_syntax_error("unexpected NULL byte: %s", state);
|
|
1436
1724
|
}
|
|
1725
|
+
default:
|
|
1726
|
+
raise_syntax_error("unexpected character: %s", state);
|
|
1727
|
+
}
|
|
1437
1728
|
|
|
1438
|
-
|
|
1729
|
+
json_push_value(state, config, value);
|
|
1730
|
+
json_value_completed(frame);
|
|
1731
|
+
|
|
1732
|
+
switch (frame->phase) {
|
|
1733
|
+
case JSON_PHASE_DONE: return true;
|
|
1734
|
+
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
|
|
1735
|
+
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
|
|
1736
|
+
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
|
|
1737
|
+
case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
|
|
1738
|
+
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
|
|
1739
|
+
}
|
|
1740
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1741
|
+
}
|
|
1742
|
+
|
|
1743
|
+
JSON_PHASE_OBJECT_KEY: {
|
|
1744
|
+
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
|
|
1745
|
+
|
|
1746
|
+
json_eat_whitespace(state, config, true);
|
|
1747
|
+
|
|
1748
|
+
const char *start = state->cursor;
|
|
1749
|
+
|
|
1750
|
+
if (RB_LIKELY(peek(state) == '"')) {
|
|
1751
|
+
VALUE string = json_parse_string(state, config, true);
|
|
1752
|
+
if (UNDEF_P(string)) {
|
|
1753
|
+
if (resumable) {
|
|
1754
|
+
state->cursor = start;
|
|
1755
|
+
return false;
|
|
1756
|
+
} else {
|
|
1757
|
+
raise_syntax_error("unexpected end of input, expected closing \"", state);
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
json_push_value(state, config, string);
|
|
1761
|
+
frame->phase = JSON_PHASE_OBJECT_COLON;
|
|
1762
|
+
goto JSON_PHASE_OBJECT_COLON;
|
|
1763
|
+
} else if (resumable && eos(state)) {
|
|
1764
|
+
return false;
|
|
1765
|
+
} else {
|
|
1766
|
+
// The message differs for the first key vs. a key after a
|
|
1767
|
+
// ',': the first is the only one reached with nothing pushed
|
|
1768
|
+
// for this object yet.
|
|
1769
|
+
if (json_frame_entry_count(frame, state->value_stack) == 0) {
|
|
1770
|
+
raise_syntax_error("expected object key, got %s", state);
|
|
1771
|
+
} else {
|
|
1772
|
+
raise_syntax_error("expected object key, got: %s", state);
|
|
1439
1773
|
}
|
|
1440
|
-
break;
|
|
1441
1774
|
}
|
|
1775
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1776
|
+
}
|
|
1442
1777
|
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
break;
|
|
1778
|
+
JSON_PHASE_OBJECT_COLON: {
|
|
1779
|
+
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
|
|
1446
1780
|
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1781
|
+
json_eat_whitespace(state, config, true);
|
|
1782
|
+
|
|
1783
|
+
if (RB_LIKELY(peek(state) == ':')) {
|
|
1784
|
+
state->cursor++;
|
|
1785
|
+
frame->phase = JSON_PHASE_VALUE;
|
|
1786
|
+
goto JSON_PHASE_VALUE;
|
|
1787
|
+
} else if (resumable && eos(state)) {
|
|
1788
|
+
return false;
|
|
1789
|
+
} else {
|
|
1790
|
+
// First colon (only the first pair's key is pushed, nothing
|
|
1791
|
+
// else) vs. a later one.
|
|
1792
|
+
if (json_frame_entry_count(frame, state->value_stack) == 1) {
|
|
1793
|
+
raise_syntax_error("expected ':' after object key", state);
|
|
1794
|
+
} else {
|
|
1795
|
+
raise_syntax_error("expected ':' after object key, got: %s", state);
|
|
1796
|
+
}
|
|
1797
|
+
}
|
|
1798
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1450
1799
|
}
|
|
1451
1800
|
|
|
1452
|
-
|
|
1453
|
-
|
|
1801
|
+
JSON_PHASE_ARRAY_COMMA: {
|
|
1802
|
+
JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);
|
|
1803
|
+
|
|
1804
|
+
json_eat_whitespace(state, config, true);
|
|
1805
|
+
|
|
1806
|
+
const char next_char = peek(state);
|
|
1807
|
+
|
|
1808
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1809
|
+
state->cursor++;
|
|
1810
|
+
if (config->allow_trailing_comma) {
|
|
1811
|
+
json_eat_whitespace(state, config, true);
|
|
1812
|
+
if (peek(state) == ']') {
|
|
1813
|
+
// Trailing comma: stay in COMMA to close on the next iteration.
|
|
1814
|
+
goto JSON_PHASE_ARRAY_COMMA;
|
|
1815
|
+
}
|
|
1816
|
+
}
|
|
1817
|
+
frame->phase = JSON_PHASE_VALUE;
|
|
1818
|
+
goto JSON_PHASE_VALUE;
|
|
1819
|
+
} else if (next_char == ']') {
|
|
1820
|
+
state->cursor++;
|
|
1821
|
+
long count = json_frame_entry_count(frame, state->value_stack);
|
|
1822
|
+
state->current_nesting--;
|
|
1823
|
+
state->in_array--;
|
|
1824
|
+
|
|
1825
|
+
json_push_value(state, config, json_decode_array(state, config, count));
|
|
1826
|
+
json_frame_stack_pop(state->frames);
|
|
1827
|
+
frame = json_frame_stack_peek(state->frames);
|
|
1828
|
+
|
|
1829
|
+
json_value_completed(frame);
|
|
1830
|
+
|
|
1831
|
+
switch (frame->phase) {
|
|
1832
|
+
case JSON_PHASE_DONE: return true;
|
|
1833
|
+
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
|
|
1834
|
+
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
|
|
1835
|
+
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
|
|
1836
|
+
case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
|
|
1837
|
+
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
|
|
1838
|
+
}
|
|
1839
|
+
} else if (resumable && eos(state)) {
|
|
1840
|
+
return false;
|
|
1841
|
+
} else {
|
|
1842
|
+
raise_syntax_error("expected ',' or ']' after array value", state);
|
|
1843
|
+
}
|
|
1844
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
JSON_PHASE_OBJECT_COMMA: {
|
|
1848
|
+
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
|
|
1849
|
+
|
|
1850
|
+
json_eat_whitespace(state, config, true);
|
|
1851
|
+
const char next_char = peek(state);
|
|
1852
|
+
|
|
1853
|
+
if (RB_LIKELY(next_char == ',')) {
|
|
1854
|
+
state->cursor++;
|
|
1855
|
+
json_eat_whitespace(state, config, true);
|
|
1856
|
+
|
|
1857
|
+
if (config->allow_trailing_comma) {
|
|
1858
|
+
if (peek(state) == '}') {
|
|
1859
|
+
// Trailing comma: stay in COMMA to close on the next iteration.
|
|
1860
|
+
goto JSON_PHASE_OBJECT_COMMA;
|
|
1861
|
+
}
|
|
1862
|
+
}
|
|
1863
|
+
|
|
1864
|
+
frame->phase = JSON_PHASE_OBJECT_KEY;
|
|
1865
|
+
goto JSON_PHASE_OBJECT_KEY;
|
|
1866
|
+
} else if (next_char == '}') {
|
|
1867
|
+
state->cursor++;
|
|
1868
|
+
state->current_nesting--;
|
|
1869
|
+
size_t count = json_frame_entry_count(frame, state->value_stack);
|
|
1870
|
+
|
|
1871
|
+
// Temporary rewind cursor in case an error is raised
|
|
1872
|
+
const char *final_cursor = state->cursor;
|
|
1873
|
+
state->cursor = state->start + frame->start_offset;
|
|
1874
|
+
VALUE object = json_decode_object(state, config, count);
|
|
1875
|
+
state->cursor = final_cursor;
|
|
1876
|
+
|
|
1877
|
+
json_push_value(state, config, object);
|
|
1878
|
+
json_frame_stack_pop(state->frames);
|
|
1879
|
+
frame = json_frame_stack_peek(state->frames);
|
|
1880
|
+
json_value_completed(frame);
|
|
1881
|
+
|
|
1882
|
+
switch (frame->phase) {
|
|
1883
|
+
case JSON_PHASE_DONE: return true;
|
|
1884
|
+
case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
|
|
1885
|
+
case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
|
|
1886
|
+
case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
|
|
1887
|
+
case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
|
|
1888
|
+
case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
|
|
1889
|
+
}
|
|
1890
|
+
} else if (resumable && eos(state)) {
|
|
1891
|
+
return false;
|
|
1892
|
+
} else {
|
|
1893
|
+
raise_syntax_error("expected ',' or '}' after object value, got: %s", state);
|
|
1894
|
+
}
|
|
1895
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1896
|
+
}
|
|
1897
|
+
|
|
1898
|
+
JSON_UNREACHABLE_RETURN(false);
|
|
1454
1899
|
}
|
|
1455
1900
|
|
|
1456
|
-
static void json_ensure_eof(JSON_ParserState *state)
|
|
1901
|
+
static void json_ensure_eof(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1457
1902
|
{
|
|
1458
|
-
json_eat_whitespace(state);
|
|
1903
|
+
json_eat_whitespace(state, config, true);
|
|
1459
1904
|
if (!eos(state)) {
|
|
1460
|
-
|
|
1905
|
+
raise_syntax_error("unexpected token at end of stream %s", state);
|
|
1461
1906
|
}
|
|
1462
1907
|
}
|
|
1463
1908
|
|
|
@@ -1495,6 +1940,8 @@ static VALUE convert_encoding(VALUE source)
|
|
|
1495
1940
|
struct parser_config_init_args {
|
|
1496
1941
|
JSON_ParserConfig *config;
|
|
1497
1942
|
VALUE self;
|
|
1943
|
+
VALUE unknown_keywords;
|
|
1944
|
+
bool strict;
|
|
1498
1945
|
};
|
|
1499
1946
|
|
|
1500
1947
|
static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val)
|
|
@@ -1512,6 +1959,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1512
1959
|
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
|
|
1513
1960
|
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
|
|
1514
1961
|
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
|
|
1962
|
+
else if (key == sym_allow_comments) { config->on_comment = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1515
1963
|
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
|
|
1516
1964
|
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
|
|
1517
1965
|
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
|
@@ -1547,27 +1995,42 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
|
1547
1995
|
}
|
|
1548
1996
|
}
|
|
1549
1997
|
}
|
|
1998
|
+
else if (args->strict) {
|
|
1999
|
+
if (!args->unknown_keywords) {
|
|
2000
|
+
args->unknown_keywords = rb_obj_hide(rb_ary_new());
|
|
2001
|
+
}
|
|
2002
|
+
rb_ary_push(args->unknown_keywords, key);
|
|
2003
|
+
}
|
|
1550
2004
|
|
|
1551
2005
|
return ST_CONTINUE;
|
|
1552
2006
|
}
|
|
1553
2007
|
|
|
1554
|
-
static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self)
|
|
2008
|
+
static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self, bool strict)
|
|
1555
2009
|
{
|
|
1556
2010
|
config->max_nesting = 100;
|
|
1557
2011
|
|
|
1558
2012
|
struct parser_config_init_args args = {
|
|
1559
2013
|
.config = config,
|
|
1560
2014
|
.self = self,
|
|
2015
|
+
.strict = strict,
|
|
1561
2016
|
};
|
|
1562
2017
|
|
|
1563
|
-
if (
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
// We assume in most cases few keys are set so it's faster to go over
|
|
1567
|
-
// the provided keys than to check all possible keys.
|
|
1568
|
-
rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args);
|
|
1569
|
-
}
|
|
2018
|
+
if (NIL_P(opts)) return;
|
|
2019
|
+
Check_Type(opts, T_HASH);
|
|
2020
|
+
if (RHASH_SIZE(opts) == 0) return;
|
|
1570
2021
|
|
|
2022
|
+
// We assume in most cases few keys are set so it's faster to go over
|
|
2023
|
+
// the provided keys than to check all possible keys.
|
|
2024
|
+
rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args);
|
|
2025
|
+
|
|
2026
|
+
if (RB_UNLIKELY(args.unknown_keywords)) {
|
|
2027
|
+
if (RARRAY_LEN(args.unknown_keywords) == 1) {
|
|
2028
|
+
rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, RARRAY_AREF(args.unknown_keywords, 0));
|
|
2029
|
+
}
|
|
2030
|
+
else {
|
|
2031
|
+
VALUE keywords = rb_ary_join(args.unknown_keywords, rb_utf8_str_new_cstr(", "));
|
|
2032
|
+
rb_raise(rb_eArgError, "unknown keywords: %" PRIsVALUE, keywords);
|
|
2033
|
+
}
|
|
1571
2034
|
}
|
|
1572
2035
|
}
|
|
1573
2036
|
|
|
@@ -1576,30 +2039,16 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self
|
|
|
1576
2039
|
*
|
|
1577
2040
|
* Creates a new JSON::Ext::ParserConfig instance.
|
|
1578
2041
|
*
|
|
1579
|
-
*
|
|
1580
|
-
*
|
|
2042
|
+
* Argument +opts+, if given, contains a \Hash of options for the parsing.
|
|
2043
|
+
* See {Parsing Options}[#module-JSON-label-Parsing+Options].
|
|
1581
2044
|
*
|
|
1582
|
-
* _opts_ can have the following keys:
|
|
1583
|
-
* * *max_nesting*: The maximum depth of nesting allowed in the parsed data
|
|
1584
|
-
* structures. Disable depth checking with :max_nesting => false|nil|0, it
|
|
1585
|
-
* defaults to 100.
|
|
1586
|
-
* * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
|
|
1587
|
-
* defiance of RFC 4627 to be parsed by the Parser. This option defaults to
|
|
1588
|
-
* false.
|
|
1589
|
-
* * *symbolize_names*: If set to true, returns symbols for the names
|
|
1590
|
-
* (keys) in a JSON object. Otherwise strings are returned, which is
|
|
1591
|
-
* also the default. It's not possible to use this option in
|
|
1592
|
-
* conjunction with the *create_additions* option.
|
|
1593
|
-
* * *decimal_class*: Specifies which class to use instead of the default
|
|
1594
|
-
* (Float) when parsing decimal numbers. This class must accept a single
|
|
1595
|
-
* string argument in its constructor.
|
|
1596
2045
|
*/
|
|
1597
2046
|
static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
|
|
1598
2047
|
{
|
|
1599
2048
|
rb_check_frozen(self);
|
|
1600
2049
|
GET_PARSER_CONFIG;
|
|
1601
2050
|
|
|
1602
|
-
parser_config_init(config, opts, self);
|
|
2051
|
+
parser_config_init(config, opts, self, false);
|
|
1603
2052
|
|
|
1604
2053
|
return self;
|
|
1605
2054
|
}
|
|
@@ -1616,35 +2065,64 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
|
|
|
1616
2065
|
}
|
|
1617
2066
|
|
|
1618
2067
|
VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
|
|
1619
|
-
rvalue_stack
|
|
2068
|
+
rvalue_stack value_stack = {
|
|
1620
2069
|
.type = RVALUE_STACK_STACK_ALLOCATED,
|
|
1621
2070
|
.ptr = rvalue_stack_buffer,
|
|
1622
2071
|
.capa = RVALUE_STACK_INITIAL_CAPA,
|
|
1623
2072
|
};
|
|
1624
2073
|
|
|
2074
|
+
// Seed the frame stack with the root frame, establishing the invariant that
|
|
2075
|
+
// json_parse_any always has a top frame to dispatch on (so the stack is never
|
|
2076
|
+
// empty mid-parse).
|
|
2077
|
+
json_frame frame_stack_buffer[JSON_FRAME_STACK_INITIAL_CAPA];
|
|
2078
|
+
frame_stack_buffer[0] = (json_frame){
|
|
2079
|
+
.type = JSON_FRAME_ROOT,
|
|
2080
|
+
.phase = JSON_PHASE_VALUE,
|
|
2081
|
+
};
|
|
2082
|
+
json_frame_stack frames = {
|
|
2083
|
+
.type = RVALUE_STACK_STACK_ALLOCATED,
|
|
2084
|
+
.ptr = frame_stack_buffer,
|
|
2085
|
+
.capa = JSON_FRAME_STACK_INITIAL_CAPA,
|
|
2086
|
+
.head = 1,
|
|
2087
|
+
};
|
|
2088
|
+
|
|
1625
2089
|
long len;
|
|
1626
2090
|
const char *start;
|
|
1627
2091
|
|
|
1628
2092
|
RSTRING_GETMEM(Vsource, start, len);
|
|
1629
2093
|
|
|
1630
|
-
VALUE
|
|
2094
|
+
VALUE value_stack_handle = 0;
|
|
2095
|
+
VALUE frame_stack_handle = 0;
|
|
1631
2096
|
JSON_ParserState _state = {
|
|
1632
2097
|
.start = start,
|
|
1633
2098
|
.cursor = start,
|
|
1634
2099
|
.end = start + len,
|
|
1635
|
-
.
|
|
1636
|
-
.
|
|
2100
|
+
.value_stack = &value_stack,
|
|
2101
|
+
.value_stack_handle = &value_stack_handle,
|
|
2102
|
+
.frames = &frames,
|
|
2103
|
+
.frame_stack_handle = &frame_stack_handle,
|
|
1637
2104
|
};
|
|
1638
2105
|
JSON_ParserState *state = &_state;
|
|
1639
2106
|
|
|
1640
|
-
|
|
2107
|
+
bool complete = json_parse_any(state, config, false);
|
|
2108
|
+
|
|
2109
|
+
// The root document value is parsed; it is the lone survivor on
|
|
2110
|
+
// the rvalue stack.
|
|
2111
|
+
VALUE result = complete ? *rvalue_stack_peek(state->value_stack, 1) : Qundef;
|
|
1641
2112
|
|
|
1642
2113
|
// This may be skipped in case of exception, but
|
|
1643
2114
|
// it won't cause a leak.
|
|
1644
|
-
rvalue_stack_eagerly_release(
|
|
1645
|
-
|
|
2115
|
+
rvalue_stack_eagerly_release(value_stack_handle);
|
|
2116
|
+
json_frame_stack_eagerly_release(frame_stack_handle);
|
|
2117
|
+
RB_GC_GUARD(value_stack_handle);
|
|
2118
|
+
RB_GC_GUARD(frame_stack_handle);
|
|
1646
2119
|
RB_GC_GUARD(Vsource);
|
|
1647
|
-
|
|
2120
|
+
|
|
2121
|
+
if (complete) {
|
|
2122
|
+
json_ensure_eof(state, config);
|
|
2123
|
+
} else {
|
|
2124
|
+
raise_eos_error("unexpected end of input", state);
|
|
2125
|
+
}
|
|
1648
2126
|
|
|
1649
2127
|
return result;
|
|
1650
2128
|
}
|
|
@@ -1666,7 +2144,7 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
|
|
|
1666
2144
|
{
|
|
1667
2145
|
JSON_ParserConfig _config = {0};
|
|
1668
2146
|
JSON_ParserConfig *config = &_config;
|
|
1669
|
-
parser_config_init(config, opts, false);
|
|
2147
|
+
parser_config_init(config, opts, Qfalse, false);
|
|
1670
2148
|
|
|
1671
2149
|
return cParser_parse(config, Vsource);
|
|
1672
2150
|
}
|
|
@@ -1674,21 +2152,33 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
|
|
|
1674
2152
|
static void JSON_ParserConfig_mark(void *ptr)
|
|
1675
2153
|
{
|
|
1676
2154
|
JSON_ParserConfig *config = ptr;
|
|
1677
|
-
|
|
1678
|
-
|
|
2155
|
+
rb_gc_mark_movable(config->on_load_proc);
|
|
2156
|
+
rb_gc_mark_movable(config->decimal_class);
|
|
1679
2157
|
}
|
|
1680
2158
|
|
|
1681
2159
|
static size_t JSON_ParserConfig_memsize(const void *ptr)
|
|
1682
2160
|
{
|
|
2161
|
+
#ifdef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
2162
|
+
return 0;
|
|
2163
|
+
#else
|
|
1683
2164
|
return sizeof(JSON_ParserConfig);
|
|
2165
|
+
#endif
|
|
2166
|
+
}
|
|
2167
|
+
|
|
2168
|
+
static void JSON_ParserConfig_compact(void *ptr)
|
|
2169
|
+
{
|
|
2170
|
+
JSON_ParserConfig *config = ptr;
|
|
2171
|
+
config->on_load_proc = rb_gc_location(config->on_load_proc);
|
|
2172
|
+
config->decimal_class = rb_gc_location(config->decimal_class);
|
|
1684
2173
|
}
|
|
1685
2174
|
|
|
1686
2175
|
static const rb_data_type_t JSON_ParserConfig_type = {
|
|
1687
2176
|
.wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
|
|
1688
2177
|
.function = {
|
|
1689
|
-
JSON_ParserConfig_mark,
|
|
1690
|
-
RUBY_DEFAULT_FREE,
|
|
1691
|
-
JSON_ParserConfig_memsize,
|
|
2178
|
+
.dmark = JSON_ParserConfig_mark,
|
|
2179
|
+
.dfree = RUBY_DEFAULT_FREE,
|
|
2180
|
+
.dsize = JSON_ParserConfig_memsize,
|
|
2181
|
+
.dcompact = JSON_ParserConfig_compact,
|
|
1692
2182
|
},
|
|
1693
2183
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
|
|
1694
2184
|
};
|
|
@@ -1699,6 +2189,562 @@ static VALUE cJSON_parser_s_allocate(VALUE klass)
|
|
|
1699
2189
|
return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
|
|
1700
2190
|
}
|
|
1701
2191
|
|
|
2192
|
+
static void json_str_clear(VALUE str)
|
|
2193
|
+
{
|
|
2194
|
+
if (RB_OBJ_FROZEN_RAW(str)) {
|
|
2195
|
+
return;
|
|
2196
|
+
}
|
|
2197
|
+
rb_str_replace(str, JSON_empty_string);
|
|
2198
|
+
}
|
|
2199
|
+
|
|
2200
|
+
typedef struct JSON_ResumableParserStruct {
|
|
2201
|
+
JSON_ParserConfig config;
|
|
2202
|
+
JSON_ParserState state;
|
|
2203
|
+
rvalue_stack value_stack;
|
|
2204
|
+
json_frame_stack frames;
|
|
2205
|
+
VALUE buffer;
|
|
2206
|
+
size_t parsed_bytes;
|
|
2207
|
+
size_t incomplete_bytes;
|
|
2208
|
+
bool complete;
|
|
2209
|
+
bool in_use;
|
|
2210
|
+
} JSON_ResumableParser;
|
|
2211
|
+
|
|
2212
|
+
static void JSON_ResumableParser_mark(void *ptr)
|
|
2213
|
+
{
|
|
2214
|
+
JSON_ResumableParser *parser = (JSON_ResumableParser *)ptr;
|
|
2215
|
+
JSON_ParserConfig_mark(&parser->config);
|
|
2216
|
+
rvalue_stack_mark(&parser->value_stack);
|
|
2217
|
+
rvalue_cache_mark(&parser->state.name_cache);
|
|
2218
|
+
rb_gc_mark(parser->buffer); // pin the buffer
|
|
2219
|
+
rb_gc_mark_movable(parser->state.parser);
|
|
2220
|
+
}
|
|
2221
|
+
|
|
2222
|
+
static void JSON_ResumableParser_free(void *ptr)
|
|
2223
|
+
{
|
|
2224
|
+
JSON_ResumableParser *parser = (JSON_ResumableParser *)ptr;
|
|
2225
|
+
rvalue_stack_free_buffer(&parser->value_stack);
|
|
2226
|
+
json_frame_stack_free_buffer(&parser->frames);
|
|
2227
|
+
}
|
|
2228
|
+
|
|
2229
|
+
static size_t JSON_ResumableParser_memsize(const void *ptr)
|
|
2230
|
+
{
|
|
2231
|
+
const JSON_ResumableParser *parser = (const JSON_ResumableParser *)ptr;
|
|
2232
|
+
size_t memsize = JSON_ParserConfig_memsize(&parser->config);
|
|
2233
|
+
memsize += rvalue_stack_memsize(&parser->value_stack);
|
|
2234
|
+
memsize += json_frame_stack_memsize(&parser->frames);
|
|
2235
|
+
#ifndef HAVE_RUBY_TYPED_EMBEDDABLE
|
|
2236
|
+
memsize += (
|
|
2237
|
+
sizeof(JSON_ResumableParser)
|
|
2238
|
+
- sizeof(JSON_ParserState)
|
|
2239
|
+
- sizeof(JSON_ParserConfig)
|
|
2240
|
+
- sizeof(rvalue_stack)
|
|
2241
|
+
- sizeof(json_frame_stack)
|
|
2242
|
+
);
|
|
2243
|
+
#endif
|
|
2244
|
+
return memsize;
|
|
2245
|
+
}
|
|
2246
|
+
|
|
2247
|
+
static void JSON_ResumableParser_compact(void *ptr)
|
|
2248
|
+
{
|
|
2249
|
+
JSON_ResumableParser *parser = (JSON_ResumableParser *)ptr;
|
|
2250
|
+
JSON_ParserConfig_compact(&parser->config);
|
|
2251
|
+
rvalue_stack_compact(&parser->value_stack);
|
|
2252
|
+
rvalue_cache_compact(&parser->state.name_cache);
|
|
2253
|
+
parser->buffer = rb_gc_location(parser->buffer);
|
|
2254
|
+
parser->state.parser = rb_gc_location(parser->state.parser);
|
|
2255
|
+
}
|
|
2256
|
+
|
|
2257
|
+
static const rb_data_type_t JSON_ResumableParser_type = {
|
|
2258
|
+
.wrap_struct_name = "JSON::Ext::ResumableParser",
|
|
2259
|
+
.function = {
|
|
2260
|
+
JSON_ResumableParser_mark,
|
|
2261
|
+
JSON_ResumableParser_free,
|
|
2262
|
+
JSON_ResumableParser_memsize,
|
|
2263
|
+
JSON_ResumableParser_compact,
|
|
2264
|
+
},
|
|
2265
|
+
// RUBY_TYPED_WB_PROTECTED is deliberately not declared because
|
|
2266
|
+
// this is a superset of JSON_Parser_rvalue_stack_type, so we'd need
|
|
2267
|
+
// to trigger a lot of write barriers.
|
|
2268
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
|
|
2269
|
+
};
|
|
2270
|
+
|
|
2271
|
+
static VALUE cResumableParser_allocate(VALUE klass)
|
|
2272
|
+
{
|
|
2273
|
+
JSON_ResumableParser *parser;
|
|
2274
|
+
VALUE obj = TypedData_Make_Struct(klass, JSON_ResumableParser, &JSON_ResumableParser_type, parser);
|
|
2275
|
+
parser->state.in_array++;
|
|
2276
|
+
parser->state.parser = obj;
|
|
2277
|
+
return obj;
|
|
2278
|
+
}
|
|
2279
|
+
|
|
2280
|
+
static inline JSON_ResumableParser *cResumableParser_get(VALUE self)
|
|
2281
|
+
{
|
|
2282
|
+
JSON_ResumableParser *parser;
|
|
2283
|
+
TypedData_Get_Struct(self, JSON_ResumableParser, &JSON_ResumableParser_type, parser);
|
|
2284
|
+
return parser;
|
|
2285
|
+
}
|
|
2286
|
+
|
|
2287
|
+
/*
|
|
2288
|
+
* call-seq: new(opts => {})
|
|
2289
|
+
*
|
|
2290
|
+
* Creates a new JSON::ResumableParser instance.
|
|
2291
|
+
*
|
|
2292
|
+
* Argument +opts+, if given, contains a \Hash of options for the parsing.
|
|
2293
|
+
* See {Parsing Options}[#module-JSON-label-Parsing+Options].
|
|
2294
|
+
*
|
|
2295
|
+
* A ResumableParser is able to parse partial documents and resume parsing later
|
|
2296
|
+
* when more of the document is provided:
|
|
2297
|
+
*
|
|
2298
|
+
* parser = JSON::ResumableParser.new
|
|
2299
|
+
* parser << '{"user": "george", "role": "ad'
|
|
2300
|
+
* parser.parse # => false
|
|
2301
|
+
* parser.eos? # => true
|
|
2302
|
+
* parser.partial_value # => { "user" => "george", "role" => nil }
|
|
2303
|
+
* parser.rest # => '"ad'
|
|
2304
|
+
*
|
|
2305
|
+
* parser << 'min" }[1, 2, 3]'
|
|
2306
|
+
* parser.parse # => true
|
|
2307
|
+
* parser.value # => { "user" => "george", "role" => "admin" }
|
|
2308
|
+
*
|
|
2309
|
+
* parser.parse # => true
|
|
2310
|
+
* parser.value # => [1, 2, 3]
|
|
2311
|
+
*
|
|
2312
|
+
* === Limitations
|
|
2313
|
+
*
|
|
2314
|
+
* While ResumableParser is able to parse streams of documents without any
|
|
2315
|
+
* explicit separators between them, it is highly recommended to separate documents
|
|
2316
|
+
* by either spaces or newlines, as otherwise the \JSON syntax for numbers may be ambiguous.
|
|
2317
|
+
* When parsing a number, ResumableParser will not consider the number complete until something follows:
|
|
2318
|
+
*
|
|
2319
|
+
* parser << '123'
|
|
2320
|
+
* parser.parse # => false
|
|
2321
|
+
* parser << ' '
|
|
2322
|
+
* parser.parse # => true
|
|
2323
|
+
* parser.value # => 123
|
|
2324
|
+
*
|
|
2325
|
+
* === Security
|
|
2326
|
+
*
|
|
2327
|
+
* An incomplete document is buffered in full and there is no size limit, so when reading
|
|
2328
|
+
* from an untrusted source the caller is responsible for bounding how much data is fed.
|
|
2329
|
+
* For example:
|
|
2330
|
+
*
|
|
2331
|
+
* loop do
|
|
2332
|
+
* if parser.parsed_bytes > DOCUMENT_MAX_SIZE
|
|
2333
|
+
* raise "document too large"
|
|
2334
|
+
* end
|
|
2335
|
+
*
|
|
2336
|
+
* parser << read_chunk
|
|
2337
|
+
* while parser.parse
|
|
2338
|
+
* process(parser.value)
|
|
2339
|
+
* end
|
|
2340
|
+
* end
|
|
2341
|
+
*/
|
|
2342
|
+
static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self)
|
|
2343
|
+
{
|
|
2344
|
+
rb_check_frozen(self);
|
|
2345
|
+
|
|
2346
|
+
VALUE opts = Qfalse;
|
|
2347
|
+
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "0:", &opts);
|
|
2348
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2349
|
+
|
|
2350
|
+
opts = argc > 0 ? argv[0] : Qnil;
|
|
2351
|
+
parser_config_init(&parser->config, opts, self, true);
|
|
2352
|
+
|
|
2353
|
+
return self;
|
|
2354
|
+
}
|
|
2355
|
+
|
|
2356
|
+
static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock);
|
|
2357
|
+
|
|
2358
|
+
/*
|
|
2359
|
+
* call-seq: self << string -> self
|
|
2360
|
+
*
|
|
2361
|
+
* Appends the given string to the parser's buffer.
|
|
2362
|
+
*/
|
|
2363
|
+
static VALUE cResumableParser_feed(VALUE self, VALUE str)
|
|
2364
|
+
{
|
|
2365
|
+
rb_check_frozen(self);
|
|
2366
|
+
|
|
2367
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
|
|
2368
|
+
|
|
2369
|
+
str = convert_encoding(str);
|
|
2370
|
+
if (!RSTRING_LEN(str)) {
|
|
2371
|
+
return self;
|
|
2372
|
+
}
|
|
2373
|
+
|
|
2374
|
+
size_t offset = parser->state.cursor - parser->state.start;
|
|
2375
|
+
const size_t remaining = parser->state.end - parser->state.cursor;
|
|
2376
|
+
|
|
2377
|
+
if (!remaining) {
|
|
2378
|
+
if (parser->buffer) {
|
|
2379
|
+
json_str_clear(parser->buffer);
|
|
2380
|
+
}
|
|
2381
|
+
parser->buffer = RB_OBJ_FROZEN_RAW(str) ? str : rb_obj_hide(rb_str_new_shared(str));
|
|
2382
|
+
offset = 0;
|
|
2383
|
+
} else {
|
|
2384
|
+
JSON_ASSERT(parser->buffer);
|
|
2385
|
+
|
|
2386
|
+
const size_t size = parser->state.end - parser->state.start;
|
|
2387
|
+
const size_t consumed = size - remaining;
|
|
2388
|
+
|
|
2389
|
+
if (RB_OBJ_FROZEN_RAW(parser->buffer)) {
|
|
2390
|
+
VALUE new_buffer = rb_obj_hide(rb_str_buf_new(remaining + RSTRING_LEN(str)));
|
|
2391
|
+
rb_enc_associate_index(new_buffer, utf8_encindex);
|
|
2392
|
+
|
|
2393
|
+
char *old_ptr = RSTRING_PTR(parser->buffer);
|
|
2394
|
+
memcpy(RSTRING_PTR(new_buffer), old_ptr + consumed, remaining);
|
|
2395
|
+
rb_str_set_len(new_buffer, remaining);
|
|
2396
|
+
offset = 0;
|
|
2397
|
+
parser->buffer = new_buffer;
|
|
2398
|
+
} else if (consumed > (size / 2) && size >= 512) {
|
|
2399
|
+
rb_str_modify(parser->buffer);
|
|
2400
|
+
char *old_ptr = RSTRING_PTR(parser->buffer);
|
|
2401
|
+
memmove(old_ptr, old_ptr + consumed, remaining);
|
|
2402
|
+
rb_str_set_len(parser->buffer, remaining);
|
|
2403
|
+
offset = 0;
|
|
2404
|
+
}
|
|
2405
|
+
rb_str_append(parser->buffer, str);
|
|
2406
|
+
}
|
|
2407
|
+
|
|
2408
|
+
long len;
|
|
2409
|
+
const char *start;
|
|
2410
|
+
RSTRING_GETMEM(parser->buffer, start, len);
|
|
2411
|
+
parser->state.start = start;
|
|
2412
|
+
parser->state.end = start + len;
|
|
2413
|
+
parser->state.cursor = parser->state.start + offset;
|
|
2414
|
+
|
|
2415
|
+
return self;
|
|
2416
|
+
}
|
|
2417
|
+
|
|
2418
|
+
struct json_parse_any_args {
|
|
2419
|
+
JSON_ParserState *state;
|
|
2420
|
+
JSON_ParserConfig *config;
|
|
2421
|
+
VALUE parser;
|
|
2422
|
+
};
|
|
2423
|
+
|
|
2424
|
+
static VALUE json_parse_any_resumable_safe0(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, _args))
|
|
2425
|
+
{
|
|
2426
|
+
struct json_parse_any_args *args = (struct json_parse_any_args *)_args;
|
|
2427
|
+
return (VALUE)json_parse_any(args->state, args->config, true);
|
|
2428
|
+
}
|
|
2429
|
+
|
|
2430
|
+
static VALUE json_parse_any_resumable_safe(VALUE _args)
|
|
2431
|
+
{
|
|
2432
|
+
struct json_parse_any_args *args = (struct json_parse_any_args *)_args;
|
|
2433
|
+
VALUE result = rb_catch_obj(args->parser, json_parse_any_resumable_safe0, _args);
|
|
2434
|
+
return result == args->parser ? Qfalse : result;
|
|
2435
|
+
}
|
|
2436
|
+
|
|
2437
|
+
static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock)
|
|
2438
|
+
{
|
|
2439
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2440
|
+
|
|
2441
|
+
if (parser->in_use) {
|
|
2442
|
+
rb_raise(rb_eArgError, "ResumableParser can't be used recursively");
|
|
2443
|
+
}
|
|
2444
|
+
|
|
2445
|
+
if (lock) {
|
|
2446
|
+
parser->in_use = true;
|
|
2447
|
+
}
|
|
2448
|
+
|
|
2449
|
+
// self may have moved, so we need to update all pointers
|
|
2450
|
+
// Investigate: We might be better off keeping JSON_ParserState on the stack
|
|
2451
|
+
// and only persist what we need.
|
|
2452
|
+
parser->state.value_stack = &parser->value_stack;
|
|
2453
|
+
parser->state.frames = &parser->frames;
|
|
2454
|
+
|
|
2455
|
+
return parser;
|
|
2456
|
+
}
|
|
2457
|
+
|
|
2458
|
+
/*
|
|
2459
|
+
* call-seq: parse -> true or false
|
|
2460
|
+
*
|
|
2461
|
+
* Attemps to parse a JSON document from the internal buffer.
|
|
2462
|
+
* Returns whether a complete document could be parsed.
|
|
2463
|
+
*
|
|
2464
|
+
* It does raise +JSON::ParserError+ when encountering invalid \JSON syntax.
|
|
2465
|
+
*
|
|
2466
|
+
* The parsed object can be retrieved by calling #value
|
|
2467
|
+
*/
|
|
2468
|
+
static VALUE cResumableParser_parse(VALUE self)
|
|
2469
|
+
{
|
|
2470
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
|
|
2471
|
+
|
|
2472
|
+
if (parser->complete) {
|
|
2473
|
+
parser->parsed_bytes = 0;
|
|
2474
|
+
parser->incomplete_bytes = 0;
|
|
2475
|
+
parser->complete = false;
|
|
2476
|
+
}
|
|
2477
|
+
|
|
2478
|
+
if (!parser->buffer) {
|
|
2479
|
+
parser->in_use = false;
|
|
2480
|
+
return Qfalse;
|
|
2481
|
+
}
|
|
2482
|
+
|
|
2483
|
+
if (parser->frames.head == 0) {
|
|
2484
|
+
json_frame_stack_push(&parser->state, (json_frame){
|
|
2485
|
+
.type = JSON_FRAME_ROOT,
|
|
2486
|
+
.phase = JSON_PHASE_VALUE,
|
|
2487
|
+
});
|
|
2488
|
+
}
|
|
2489
|
+
|
|
2490
|
+
VALUE Vsource = parser->buffer; // Prevent compaction
|
|
2491
|
+
|
|
2492
|
+
json_frame *frame = json_frame_stack_peek(&parser->frames);
|
|
2493
|
+
|
|
2494
|
+
if (frame->phase == JSON_PHASE_DONE) {
|
|
2495
|
+
JSON_ASSERT(parser->value_stack.head == 1);
|
|
2496
|
+
JSON_ASSERT(parser->frames.head == 1);
|
|
2497
|
+
|
|
2498
|
+
frame->phase = JSON_PHASE_VALUE;
|
|
2499
|
+
rvalue_stack_pop(parser->state.value_stack, 1);
|
|
2500
|
+
}
|
|
2501
|
+
|
|
2502
|
+
struct json_parse_any_args args = {
|
|
2503
|
+
.state = &parser->state,
|
|
2504
|
+
.config = &parser->config,
|
|
2505
|
+
.parser = self,
|
|
2506
|
+
};
|
|
2507
|
+
int status;
|
|
2508
|
+
const char *initial_cursor = parser->state.cursor;
|
|
2509
|
+
parser->complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status);
|
|
2510
|
+
|
|
2511
|
+
if (status) {
|
|
2512
|
+
parser->complete = true; // a parse error is considered complete
|
|
2513
|
+
}
|
|
2514
|
+
|
|
2515
|
+
parser->parsed_bytes += parser->state.cursor - initial_cursor;
|
|
2516
|
+
parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor;
|
|
2517
|
+
|
|
2518
|
+
json_eat_whitespace(&parser->state, &parser->config, false);
|
|
2519
|
+
if (eos(&parser->state)) {
|
|
2520
|
+
json_str_clear(parser->buffer);
|
|
2521
|
+
parser->buffer = Qfalse;
|
|
2522
|
+
}
|
|
2523
|
+
parser->in_use = false;
|
|
2524
|
+
|
|
2525
|
+
if (status) {
|
|
2526
|
+
rb_jump_tag(status); // reraise
|
|
2527
|
+
}
|
|
2528
|
+
RB_GC_GUARD(Vsource);
|
|
2529
|
+
return parser->complete ? Qtrue : Qfalse;
|
|
2530
|
+
}
|
|
2531
|
+
|
|
2532
|
+
/*
|
|
2533
|
+
* call-seq: value? -> true or false
|
|
2534
|
+
*
|
|
2535
|
+
* Returns whether a parsed value is available.
|
|
2536
|
+
*/
|
|
2537
|
+
static VALUE cResumableParser_value_p(VALUE self)
|
|
2538
|
+
{
|
|
2539
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
|
|
2540
|
+
|
|
2541
|
+
if (parser->value_stack.head > 0) {
|
|
2542
|
+
json_frame *frame = json_frame_stack_peek(&parser->frames);
|
|
2543
|
+
if (frame->phase == JSON_PHASE_DONE) {
|
|
2544
|
+
return Qtrue;
|
|
2545
|
+
}
|
|
2546
|
+
}
|
|
2547
|
+
return Qfalse;
|
|
2548
|
+
}
|
|
2549
|
+
|
|
2550
|
+
/*
|
|
2551
|
+
* call-seq: value -> object
|
|
2552
|
+
*
|
|
2553
|
+
* Returns and consume the last parsed value.
|
|
2554
|
+
* Raises ArgumentError if there is no parsed value or if it was already retrieved:
|
|
2555
|
+
* parser << '[1][2]'
|
|
2556
|
+
* parser.value # ArgumentError no ready value
|
|
2557
|
+
* parser.parse # => true
|
|
2558
|
+
* parser.value # => [1]
|
|
2559
|
+
* parser.value # ArgumentError no ready value
|
|
2560
|
+
*/
|
|
2561
|
+
static VALUE cResumableParser_value(VALUE self)
|
|
2562
|
+
{
|
|
2563
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
|
|
2564
|
+
|
|
2565
|
+
if (parser->frames.head > 0) {
|
|
2566
|
+
json_frame *frame = json_frame_stack_peek(&parser->frames);
|
|
2567
|
+
|
|
2568
|
+
if (frame->phase == JSON_PHASE_DONE) {
|
|
2569
|
+
VALUE result = *rvalue_stack_peek(parser->state.value_stack, 1);
|
|
2570
|
+
rvalue_stack_pop(parser->state.value_stack, 1);
|
|
2571
|
+
json_frame_stack_pop(parser->state.frames);
|
|
2572
|
+
return result;
|
|
2573
|
+
}
|
|
2574
|
+
}
|
|
2575
|
+
rb_raise(rb_eArgError, "no ready value");
|
|
2576
|
+
}
|
|
2577
|
+
|
|
2578
|
+
/*
|
|
2579
|
+
* call-seq: clear -> self
|
|
2580
|
+
*
|
|
2581
|
+
* Entirely reset the parser state and buffer.
|
|
2582
|
+
*/
|
|
2583
|
+
static VALUE cResumableParser_clear(VALUE self)
|
|
2584
|
+
{
|
|
2585
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
|
|
2586
|
+
parser->buffer = 0;
|
|
2587
|
+
parser->complete = true;
|
|
2588
|
+
parser->parsed_bytes = 0;
|
|
2589
|
+
parser->incomplete_bytes = 0;
|
|
2590
|
+
parser->frames.head = 0;
|
|
2591
|
+
parser->value_stack.head = 0;
|
|
2592
|
+
parser->state.name_cache.length = 0;
|
|
2593
|
+
parser->state.current_nesting = 0;
|
|
2594
|
+
parser->state.in_array = 1;
|
|
2595
|
+
parser->state.emitted_deprecations = 0;
|
|
2596
|
+
parser->state.start = parser->state.cursor = parser->state.end = NULL;
|
|
2597
|
+
return self;
|
|
2598
|
+
}
|
|
2599
|
+
|
|
2600
|
+
static VALUE cResumableParser_partial_value_body(VALUE self)
|
|
2601
|
+
{
|
|
2602
|
+
JSON_ResumableParser *original_parser = cResumableParser_get(self);
|
|
2603
|
+
JSON_ResumableParser parser = *original_parser;
|
|
2604
|
+
|
|
2605
|
+
parser.state.frames = &parser.frames;
|
|
2606
|
+
parser.state.value_stack = &parser.value_stack;
|
|
2607
|
+
|
|
2608
|
+
if (parser.value_stack.head == 0) {
|
|
2609
|
+
return Qnil;
|
|
2610
|
+
}
|
|
2611
|
+
|
|
2612
|
+
json_frame *frame = json_frame_stack_peek(parser.state.frames);
|
|
2613
|
+
long missing_object_value = 0;
|
|
2614
|
+
if (frame->type == JSON_FRAME_OBJECT && (frame->phase == JSON_PHASE_VALUE || frame->phase == JSON_PHASE_OBJECT_COLON)) {
|
|
2615
|
+
missing_object_value = 1;
|
|
2616
|
+
}
|
|
2617
|
+
|
|
2618
|
+
// Copy the value stack as we need to mutate it.
|
|
2619
|
+
long capa = parser.value_stack.head;
|
|
2620
|
+
parser.value_stack.capa = (capa + missing_object_value);
|
|
2621
|
+
VALUE tmpbuf, *value_stack_buffer = ALLOCV_N(VALUE, tmpbuf, capa + missing_object_value);
|
|
2622
|
+
MEMCPY(value_stack_buffer, parser.value_stack.ptr, VALUE, parser.value_stack.capa);
|
|
2623
|
+
parser.value_stack.ptr = value_stack_buffer;
|
|
2624
|
+
|
|
2625
|
+
JSON_ParserState *state = &parser.state;
|
|
2626
|
+
JSON_ParserConfig *config = &parser.config;
|
|
2627
|
+
|
|
2628
|
+
if (missing_object_value) {
|
|
2629
|
+
rvalue_stack_push(state->value_stack, Qnil, NULL, &state->value_stack);
|
|
2630
|
+
}
|
|
2631
|
+
|
|
2632
|
+
VALUE partial_result = Qundef;
|
|
2633
|
+
|
|
2634
|
+
while (UNDEF_P(partial_result)) {
|
|
2635
|
+
frame = json_frame_stack_peek(state->frames);
|
|
2636
|
+
|
|
2637
|
+
switch (frame->type) {
|
|
2638
|
+
case JSON_FRAME_ROOT: {
|
|
2639
|
+
partial_result = *rvalue_stack_peek(state->value_stack, 1);
|
|
2640
|
+
break;
|
|
2641
|
+
}
|
|
2642
|
+
|
|
2643
|
+
case JSON_FRAME_ARRAY: {
|
|
2644
|
+
long count = json_frame_entry_count(frame, state->value_stack);
|
|
2645
|
+
json_push_value(state, config, json_decode_array(state, config, count));
|
|
2646
|
+
json_frame_stack_pop(state->frames);
|
|
2647
|
+
|
|
2648
|
+
break;
|
|
2649
|
+
}
|
|
2650
|
+
|
|
2651
|
+
case JSON_FRAME_OBJECT: {
|
|
2652
|
+
long count = json_frame_entry_count(frame, state->value_stack);
|
|
2653
|
+
json_push_value(state, config, json_decode_object(state, config, count));
|
|
2654
|
+
json_frame_stack_pop(state->frames);
|
|
2655
|
+
break;
|
|
2656
|
+
}
|
|
2657
|
+
|
|
2658
|
+
default: {
|
|
2659
|
+
JSON_UNREACHABLE_RETURN(Qundef);
|
|
2660
|
+
break;
|
|
2661
|
+
}
|
|
2662
|
+
}
|
|
2663
|
+
}
|
|
2664
|
+
|
|
2665
|
+
ALLOCV_END(tmpbuf);
|
|
2666
|
+
return partial_result;
|
|
2667
|
+
}
|
|
2668
|
+
|
|
2669
|
+
/*
|
|
2670
|
+
* call-seq: partial_value -> object
|
|
2671
|
+
*
|
|
2672
|
+
* Returns the Ruby objects parsed up to this point:
|
|
2673
|
+
* parser << '[1, [2, 3,'
|
|
2674
|
+
* parser.parse # => false
|
|
2675
|
+
* parser.value # ArgumentError no ready value
|
|
2676
|
+
* parser.partial_value # => [1, [2, 3]]
|
|
2677
|
+
*/
|
|
2678
|
+
static VALUE cResumableParser_partial_value(VALUE self)
|
|
2679
|
+
{
|
|
2680
|
+
JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
|
|
2681
|
+
|
|
2682
|
+
int status;
|
|
2683
|
+
VALUE result = rb_protect(cResumableParser_partial_value_body, self, &status);
|
|
2684
|
+
parser->in_use = false;
|
|
2685
|
+
if (status) {
|
|
2686
|
+
rb_jump_tag(status);
|
|
2687
|
+
}
|
|
2688
|
+
return result;
|
|
2689
|
+
}
|
|
2690
|
+
|
|
2691
|
+
/*
|
|
2692
|
+
* call-seq: rest -> string
|
|
2693
|
+
*
|
|
2694
|
+
* Returns a string containing what remains to be parsed in the buffer
|
|
2695
|
+
* parser << '{ "message": "unterminated message'
|
|
2696
|
+
* parser.parse # => false
|
|
2697
|
+
* parser.rest # => '"unterminated message"'
|
|
2698
|
+
*/
|
|
2699
|
+
static VALUE cResumableParser_rest(VALUE self)
|
|
2700
|
+
{
|
|
2701
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2702
|
+
|
|
2703
|
+
if (!parser->buffer) {
|
|
2704
|
+
return rb_utf8_str_new("", 0);
|
|
2705
|
+
}
|
|
2706
|
+
|
|
2707
|
+
size_t offset = parser->state.cursor - parser->state.start;
|
|
2708
|
+
const char *ptr;
|
|
2709
|
+
long len;
|
|
2710
|
+
RSTRING_GETMEM(parser->buffer, ptr, len);
|
|
2711
|
+
return rb_utf8_str_new(ptr + offset, len - offset);
|
|
2712
|
+
}
|
|
2713
|
+
|
|
2714
|
+
/*
|
|
2715
|
+
* call-seq: value? -> true or false
|
|
2716
|
+
*
|
|
2717
|
+
* Returns whether the internal buffer has been entirely consumed.
|
|
2718
|
+
*/
|
|
2719
|
+
static VALUE cResumableParser_eos_p(VALUE self)
|
|
2720
|
+
{
|
|
2721
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2722
|
+
return eos(&parser->state) ? Qtrue : Qfalse;
|
|
2723
|
+
}
|
|
2724
|
+
|
|
2725
|
+
/*
|
|
2726
|
+
* call-seq: parsed_bytes -> integer
|
|
2727
|
+
*
|
|
2728
|
+
* Returns the number of bytes parsed since the start of the current partial value.
|
|
2729
|
+
* This is intended to be used for securing against untrusted input:
|
|
2730
|
+
*
|
|
2731
|
+
* loop do
|
|
2732
|
+
* if parser.parsed_bytes > DOCUMENT_MAX_SIZE
|
|
2733
|
+
* raise "document too large"
|
|
2734
|
+
* end
|
|
2735
|
+
*
|
|
2736
|
+
* parser << read_chunk
|
|
2737
|
+
* while parser.parse
|
|
2738
|
+
* process(parser.value)
|
|
2739
|
+
* end
|
|
2740
|
+
* end
|
|
2741
|
+
*/
|
|
2742
|
+
static VALUE cResumableParser_parsed_bytes(VALUE self)
|
|
2743
|
+
{
|
|
2744
|
+
JSON_ResumableParser *parser = cResumableParser_get(self);
|
|
2745
|
+
return ULL2NUM(parser->parsed_bytes + parser->incomplete_bytes);
|
|
2746
|
+
}
|
|
2747
|
+
|
|
1702
2748
|
void Init_parser(void)
|
|
1703
2749
|
{
|
|
1704
2750
|
#ifdef HAVE_RB_EXT_RACTOR_SAFE
|
|
@@ -1710,30 +2756,52 @@ void Init_parser(void)
|
|
|
1710
2756
|
mJSON = rb_define_module("JSON");
|
|
1711
2757
|
VALUE mExt = rb_define_module_under(mJSON, "Ext");
|
|
1712
2758
|
VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
|
|
2759
|
+
|
|
2760
|
+
rb_global_variable(&eParserError);
|
|
2761
|
+
eParserError = rb_path2class("JSON::ParserError");
|
|
2762
|
+
|
|
2763
|
+
rb_global_variable(&eNestingError);
|
|
1713
2764
|
eNestingError = rb_path2class("JSON::NestingError");
|
|
1714
|
-
|
|
2765
|
+
|
|
1715
2766
|
rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
|
|
1716
|
-
|
|
2767
|
+
rb_define_private_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
|
|
1717
2768
|
rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
|
|
1718
2769
|
|
|
1719
2770
|
VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
|
|
1720
2771
|
rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
|
|
1721
2772
|
|
|
2773
|
+
VALUE cResumableParser = rb_define_class_under(mJSON, "ResumableParser", rb_cObject);
|
|
2774
|
+
rb_define_alloc_func(cResumableParser, cResumableParser_allocate);
|
|
2775
|
+
rb_define_private_method(cResumableParser, "initialize", cResumableParser_initialize, -1);
|
|
2776
|
+
rb_define_method(cResumableParser, "<<", cResumableParser_feed, 1);
|
|
2777
|
+
rb_define_method(cResumableParser, "parse", cResumableParser_parse, 0);
|
|
2778
|
+
rb_define_method(cResumableParser, "value", cResumableParser_value, 0);
|
|
2779
|
+
rb_define_method(cResumableParser, "value?", cResumableParser_value_p, 0);
|
|
2780
|
+
rb_define_method(cResumableParser, "partial_value", cResumableParser_partial_value, 0);
|
|
2781
|
+
rb_define_method(cResumableParser, "clear", cResumableParser_clear, 0);
|
|
2782
|
+
rb_define_method(cResumableParser, "rest", cResumableParser_rest, 0);
|
|
2783
|
+
rb_define_method(cResumableParser, "eos?", cResumableParser_eos_p, 0);
|
|
2784
|
+
rb_define_method(cResumableParser, "parsed_bytes", cResumableParser_parsed_bytes, 0);
|
|
2785
|
+
|
|
2786
|
+
rb_global_variable(&CNaN);
|
|
1722
2787
|
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
|
|
1723
|
-
rb_gc_register_mark_object(CNaN);
|
|
1724
2788
|
|
|
2789
|
+
rb_global_variable(&CInfinity);
|
|
1725
2790
|
CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
|
|
1726
|
-
rb_gc_register_mark_object(CInfinity);
|
|
1727
2791
|
|
|
2792
|
+
rb_global_variable(&CMinusInfinity);
|
|
1728
2793
|
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
|
|
1729
|
-
rb_gc_register_mark_object(CMinusInfinity);
|
|
1730
2794
|
|
|
1731
2795
|
rb_global_variable(&Encoding_UTF_8);
|
|
1732
2796
|
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
|
|
1733
2797
|
|
|
2798
|
+
rb_global_variable(&JSON_empty_string);
|
|
2799
|
+
JSON_empty_string = rb_obj_hide(rb_utf8_str_new("", 0));
|
|
2800
|
+
|
|
1734
2801
|
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
|
1735
2802
|
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
|
1736
2803
|
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
|
|
2804
|
+
sym_allow_comments = ID2SYM(rb_intern("allow_comments"));
|
|
1737
2805
|
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
|
|
1738
2806
|
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
|
|
1739
2807
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|
|
@@ -1746,6 +2814,8 @@ void Init_parser(void)
|
|
|
1746
2814
|
i_try_convert = rb_intern("try_convert");
|
|
1747
2815
|
i_uminus = rb_intern("-@");
|
|
1748
2816
|
i_encode = rb_intern("encode");
|
|
2817
|
+
i_at_line = rb_intern("@line");
|
|
2818
|
+
i_at_column = rb_intern("@column");
|
|
1749
2819
|
|
|
1750
2820
|
binary_encindex = rb_ascii8bit_encindex();
|
|
1751
2821
|
utf8_encindex = rb_utf8_encindex();
|