json 2.19.9 → 2.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,15 @@
1
1
  #include "../json.h"
2
- #include "../vendor/ryu.h"
2
+ #include "../vendor/fast_float_parser.h"
3
3
  #include "../simd/simd.h"
4
4
 
5
- static VALUE mJSON, eNestingError, Encoding_UTF_8;
6
- static VALUE CNaN, CInfinity, CMinusInfinity;
5
+ static VALUE mJSON, eNestingError, eParserError, Encoding_UTF_8;
6
+ static VALUE CNaN, CInfinity, CMinusInfinity, JSON_empty_string;
7
7
 
8
- static ID i_new, i_try_convert, i_uminus, i_encode;
8
+ static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column;
9
9
 
10
- static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
11
- sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
12
- sym_allow_duplicate_key;
10
+ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_comments,
11
+ sym_allow_control_characters, sym_allow_invalid_escape, sym_symbolize_names,
12
+ sym_freeze, sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
13
13
 
14
14
  static int binary_encindex;
15
15
  static int utf8_encindex;
@@ -58,6 +58,20 @@ typedef struct rvalue_cache_struct {
58
58
  VALUE entries[JSON_RVALUE_CACHE_CAPA];
59
59
  } rvalue_cache;
60
60
 
61
+ static void rvalue_cache_mark(rvalue_cache *cache)
62
+ {
63
+ for (int index = 0; index < cache->length; index++) {
64
+ rb_gc_mark_movable(cache->entries[index]);
65
+ }
66
+ }
67
+
68
+ static void rvalue_cache_compact(rvalue_cache *cache)
69
+ {
70
+ for (int index = 0; index < cache->length; index++) {
71
+ cache->entries[index] = rb_gc_location(cache->entries[index]);
72
+ }
73
+ }
74
+
61
75
  static rb_encoding *enc_utf8;
62
76
 
63
77
  #define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
@@ -206,12 +220,12 @@ static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle,
206
220
 
207
221
  static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
208
222
  {
209
- long required = stack->capa * 2;
223
+ long required = stack->capa ? stack->capa * 2 : RVALUE_STACK_INITIAL_CAPA;
210
224
 
211
225
  if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
212
226
  stack = rvalue_stack_spill(stack, handle, stack_ref);
213
227
  } else {
214
- REALLOC_N(stack->ptr, VALUE, required);
228
+ JSON_SIZED_REALLOC_N(stack->ptr, VALUE, required, stack->capa);
215
229
  stack->capa = required;
216
230
  }
217
231
  return stack;
@@ -219,11 +233,15 @@ static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalu
219
233
 
220
234
  static VALUE rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
221
235
  {
236
+ JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || handle);
237
+
222
238
  if (RB_UNLIKELY(stack->head >= stack->capa)) {
223
239
  stack = rvalue_stack_grow(stack, handle, stack_ref);
224
240
  }
241
+
225
242
  stack->ptr[stack->head] = value;
226
243
  stack->head++;
244
+
227
245
  return value;
228
246
  }
229
247
 
@@ -243,14 +261,14 @@ static void rvalue_stack_mark(void *ptr)
243
261
  long index;
244
262
  if (stack && stack->ptr) {
245
263
  for (index = 0; index < stack->head; index++) {
246
- rb_gc_mark(stack->ptr[index]);
264
+ rb_gc_mark_movable(stack->ptr[index]);
247
265
  }
248
266
  }
249
267
  }
250
268
 
251
269
  static void rvalue_stack_free_buffer(rvalue_stack *stack)
252
270
  {
253
- ruby_xfree(stack->ptr);
271
+ JSON_SIZED_FREE_N(stack->ptr, stack->capa);
254
272
  stack->ptr = NULL;
255
273
  }
256
274
 
@@ -260,7 +278,7 @@ static void rvalue_stack_free(void *ptr)
260
278
  if (stack) {
261
279
  rvalue_stack_free_buffer(stack);
262
280
  #ifndef HAVE_RUBY_TYPED_EMBEDDABLE
263
- ruby_xfree(stack);
281
+ JSON_SIZED_FREE(stack);
264
282
  #endif
265
283
  }
266
284
  }
@@ -268,7 +286,22 @@ static void rvalue_stack_free(void *ptr)
268
286
  static size_t rvalue_stack_memsize(const void *ptr)
269
287
  {
270
288
  const rvalue_stack *stack = (const rvalue_stack *)ptr;
271
- return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
289
+ size_t memsize = sizeof(VALUE) * stack->capa;
290
+ #ifndef HAVE_RUBY_TYPED_EMBEDDABLE
291
+ memsize += sizeof(rvalue_stack);
292
+ #endif
293
+ return memsize;
294
+ }
295
+
296
+ static void rvalue_stack_compact(void *ptr)
297
+ {
298
+ rvalue_stack *stack = (rvalue_stack *)ptr;
299
+ long index;
300
+ if (stack && stack->ptr) {
301
+ for (index = 0; index < stack->head; index++) {
302
+ stack->ptr[index] = rb_gc_location(stack->ptr[index]);
303
+ }
304
+ }
272
305
  }
273
306
 
274
307
  static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
@@ -277,7 +310,10 @@ static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
277
310
  .dmark = rvalue_stack_mark,
278
311
  .dfree = rvalue_stack_free,
279
312
  .dsize = rvalue_stack_memsize,
313
+ .dcompact = rvalue_stack_compact,
280
314
  },
315
+ // We deliberately don't declare rvalue_stack as RUBY_TYPED_WB_PROTECTED
316
+ // because it churns a lot of values so trigering write barriers every time is very costly.
281
317
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
282
318
  };
283
319
 
@@ -309,33 +345,62 @@ static void rvalue_stack_eagerly_release(VALUE handle)
309
345
  }
310
346
  }
311
347
 
312
- static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
313
- {
314
- int len = 1;
315
- if (ch <= 0x7F) {
316
- buf[0] = (char) ch;
317
- } else if (ch <= 0x07FF) {
318
- buf[0] = (char) ((ch >> 6) | 0xC0);
319
- buf[1] = (char) ((ch & 0x3F) | 0x80);
320
- len++;
321
- } else if (ch <= 0xFFFF) {
322
- buf[0] = (char) ((ch >> 12) | 0xE0);
323
- buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
324
- buf[2] = (char) ((ch & 0x3F) | 0x80);
325
- len += 2;
326
- } else if (ch <= 0x1fffff) {
327
- buf[0] =(char) ((ch >> 18) | 0xF0);
328
- buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
329
- buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
330
- buf[3] =(char) ((ch & 0x3F) | 0x80);
331
- len += 3;
332
- } else {
333
- buf[0] = '?';
334
- }
335
- return len;
336
- }
348
+ /* frame stack */
349
+
350
+ // Iterative (non-recursive) parsing keeps an explicit stack of the containers
351
+ // currently being built, instead of relying on the C call stack. Each frame
352
+ // only needs enough bookkeeping to close its container: which kind it is, the
353
+ // rvalue_stack position where its children start (so we know how many to pop),
354
+ // and the cursor at its opening brace (used to rewind for duplicate key
355
+ // errors). Frames hold no VALUEs, so this stack needs no GC marking; it reuses
356
+ // the same stack-allocated-with-heap-spill strategy as the rvalue_stack so that
357
+ // it's freed even if parsing raises.
358
+ //
359
+ // The lifecycle helpers below (grow/push/peek/pop/spill/free/eagerly_release
360
+ // and the rb_data_type_t) deliberately mirror their rvalue_stack counterparts
361
+ // -- the element type and the absence of a mark function are the only real
362
+ // differences. Keep the two in sync: a fix to the spill/release or
363
+ // HAVE_RUBY_TYPED_EMBEDDABLE handling in one almost certainly belongs in the
364
+ // other.
365
+ #define JSON_FRAME_STACK_INITIAL_CAPA 32
366
+
367
+ enum json_frame_type {
368
+ JSON_FRAME_ROOT, // == JSON_PHASE_DONE
369
+ JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA
370
+ JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA
371
+ };
337
372
 
338
- enum duplicate_key_action {
373
+ // Where a frame is within its container's grammar. This is the entirety of the
374
+ // parser's "what to do next" state: json_parse_any dispatches on the top
375
+ // frame's phase and holds no resume state in C locals, so a parse can stop at
376
+ // any value boundary and be resumed purely from the (persistable) frame stack.
377
+ //
378
+ // The first three phases are deliberately equal to the corresponding json_frame_type
379
+ // to simplify the transition of phase in json_value_completed.
380
+ enum json_frame_phase {
381
+ JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed
382
+ JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']'
383
+ JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}'
384
+ JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':')
385
+ JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',')
386
+ JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':'
387
+ };
388
+
389
+ typedef struct json_frame_struct {
390
+ enum json_frame_type type;
391
+ enum json_frame_phase phase;
392
+ long value_stack_head; // rvalue_stack->head when this container opened
393
+ size_t start_offset; // object frames only (the '{'); NULL otherwise
394
+ } json_frame;
395
+
396
+ typedef struct json_frame_stack_struct {
397
+ enum rvalue_stack_type type; // shared with rvalue_stack: is ptr stack- or heap-allocated
398
+ long capa;
399
+ long head;
400
+ json_frame *ptr;
401
+ } json_frame_stack;
402
+
403
+ enum deprecatable_action {
339
404
  JSON_DEPRECATED = 0,
340
405
  JSON_IGNORE,
341
406
  JSON_RAISE,
@@ -345,7 +410,8 @@ typedef struct JSON_ParserStruct {
345
410
  VALUE on_load_proc;
346
411
  VALUE decimal_class;
347
412
  ID decimal_method_id;
348
- enum duplicate_key_action on_duplicate_key;
413
+ enum deprecatable_action on_duplicate_key;
414
+ enum deprecatable_action on_comment;
349
415
  int max_nesting;
350
416
  bool allow_nan;
351
417
  bool allow_trailing_comma;
@@ -356,17 +422,152 @@ typedef struct JSON_ParserStruct {
356
422
  } JSON_ParserConfig;
357
423
 
358
424
  typedef struct JSON_ParserStateStruct {
359
- VALUE *stack_handle;
425
+ VALUE *value_stack_handle;
426
+ VALUE *frame_stack_handle;
360
427
  const char *start;
361
428
  const char *cursor;
362
429
  const char *end;
363
- rvalue_stack *stack;
430
+ rvalue_stack *value_stack;
431
+ json_frame_stack *frames;
364
432
  rvalue_cache name_cache;
365
433
  int in_array;
366
434
  int current_nesting;
367
435
  unsigned int emitted_deprecations;
436
+ VALUE parser;
368
437
  } JSON_ParserState;
369
438
 
439
+ static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref);
440
+
441
+ static json_frame_stack *json_frame_stack_grow(json_frame_stack *stack, VALUE *handle, json_frame_stack **stack_ref)
442
+ {
443
+ long required = stack->capa ? stack->capa * 2 : JSON_FRAME_STACK_INITIAL_CAPA;
444
+
445
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
446
+ stack = json_frame_stack_spill(stack, handle, stack_ref);
447
+ } else {
448
+ JSON_SIZED_REALLOC_N(stack->ptr, json_frame, required, stack->capa);
449
+ stack->capa = required;
450
+ }
451
+ return stack;
452
+ }
453
+
454
+ static json_frame *json_frame_stack_push(JSON_ParserState *state, json_frame frame)
455
+ {
456
+ json_frame_stack *stack = state->frames;
457
+
458
+ JSON_ASSERT(stack->type != RVALUE_STACK_STACK_ALLOCATED || state->frame_stack_handle);
459
+
460
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
461
+ stack = json_frame_stack_grow(stack, state->frame_stack_handle, &state->frames);
462
+ }
463
+
464
+ json_frame *frame_ptr = &stack->ptr[stack->head++];
465
+ *frame_ptr = frame;
466
+ return frame_ptr;
467
+ }
468
+
469
+ static inline json_frame *json_frame_stack_peek(json_frame_stack *stack)
470
+ {
471
+ return &stack->ptr[stack->head - 1];
472
+ }
473
+
474
+ static inline void json_frame_stack_pop(json_frame_stack *stack)
475
+ {
476
+ stack->head--;
477
+ }
478
+
479
+ static void json_frame_stack_free_buffer(json_frame_stack *stack)
480
+ {
481
+ JSON_SIZED_FREE_N(stack->ptr, stack->capa);
482
+ stack->ptr = NULL;
483
+ }
484
+
485
+ static void json_frame_stack_free(void *ptr)
486
+ {
487
+ json_frame_stack *stack = (json_frame_stack *)ptr;
488
+ if (stack) {
489
+ json_frame_stack_free_buffer(stack);
490
+ #ifndef HAVE_RUBY_TYPED_EMBEDDABLE
491
+ JSON_SIZED_FREE(stack);
492
+ #endif
493
+ }
494
+ }
495
+
496
+ static size_t json_frame_stack_memsize(const void *ptr)
497
+ {
498
+ const json_frame_stack *stack = (const json_frame_stack *)ptr;
499
+
500
+ size_t memsize = sizeof(json_frame) * stack->capa;
501
+ #ifndef HAVE_RUBY_TYPED_EMBEDDABLE
502
+ memsize += sizeof(json_frame_stack);
503
+ #endif
504
+ return memsize;
505
+ }
506
+
507
+ static const rb_data_type_t JSON_Parser_frame_stack_type = {
508
+ .wrap_struct_name = "JSON::Ext::Parser/frame_stack",
509
+ .function = {
510
+ .dmark = NULL,
511
+ .dfree = json_frame_stack_free,
512
+ .dsize = json_frame_stack_memsize,
513
+ },
514
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_EMBEDDABLE,
515
+ };
516
+
517
+ static json_frame_stack *json_frame_stack_spill(json_frame_stack *old_stack, VALUE *handle, json_frame_stack **stack_ref)
518
+ {
519
+ json_frame_stack *stack;
520
+ *handle = TypedData_Make_Struct(0, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
521
+ *stack_ref = stack;
522
+ MEMCPY(stack, old_stack, json_frame_stack, 1);
523
+
524
+ stack->capa = old_stack->capa << 1;
525
+ stack->ptr = ALLOC_N(json_frame, stack->capa);
526
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
527
+ MEMCPY(stack->ptr, old_stack->ptr, json_frame, old_stack->head);
528
+ return stack;
529
+ }
530
+
531
+ static void json_frame_stack_eagerly_release(VALUE handle)
532
+ {
533
+ if (handle) {
534
+ json_frame_stack *stack;
535
+ TypedData_Get_Struct(handle, json_frame_stack, &JSON_Parser_frame_stack_type, stack);
536
+ #ifdef HAVE_RUBY_TYPED_EMBEDDABLE
537
+ json_frame_stack_free_buffer(stack);
538
+ #else
539
+ json_frame_stack_free(stack);
540
+ RTYPEDDATA_DATA(handle) = NULL;
541
+ #endif
542
+ }
543
+ }
544
+
545
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
546
+ {
547
+ int len = 1;
548
+ if (ch <= 0x7F) {
549
+ buf[0] = (char) ch;
550
+ } else if (ch <= 0x07FF) {
551
+ buf[0] = (char) ((ch >> 6) | 0xC0);
552
+ buf[1] = (char) ((ch & 0x3F) | 0x80);
553
+ len++;
554
+ } else if (ch <= 0xFFFF) {
555
+ buf[0] = (char) ((ch >> 12) | 0xE0);
556
+ buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
557
+ buf[2] = (char) ((ch & 0x3F) | 0x80);
558
+ len += 2;
559
+ } else if (ch <= 0x1fffff) {
560
+ buf[0] =(char) ((ch >> 18) | 0xF0);
561
+ buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
562
+ buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
563
+ buf[3] =(char) ((ch & 0x3F) | 0x80);
564
+ len += 3;
565
+ } else {
566
+ buf[0] = '?';
567
+ }
568
+ return len;
569
+ }
570
+
370
571
  static inline size_t rest(JSON_ParserState *state) {
371
572
  return state->end - state->cursor;
372
573
  }
@@ -398,6 +599,7 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum
398
599
 
399
600
  while (cursor >= state->start) {
400
601
  if (*cursor-- == '\n') {
602
+ line++;
401
603
  break;
402
604
  }
403
605
  column++;
@@ -412,6 +614,8 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum
412
614
  *column_out = column;
413
615
  }
414
616
 
617
+ static const unsigned int MAX_DEPRECATIONS = 5;
618
+
415
619
  static void emit_parse_warning(const char *message, JSON_ParserState *state)
416
620
  {
417
621
  long line, column;
@@ -423,7 +627,7 @@ static void emit_parse_warning(const char *message, JSON_ParserState *state)
423
627
 
424
628
  #define PARSE_ERROR_FRAGMENT_LEN 32
425
629
 
426
- static VALUE build_parse_error_message(const char *format, JSON_ParserState *state, long line, long column)
630
+ static VALUE build_parse_error_message(const char *format, JSON_ParserState *state)
427
631
  {
428
632
  unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
429
633
 
@@ -457,31 +661,61 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta
457
661
  }
458
662
  }
459
663
 
460
- VALUE message = rb_enc_sprintf(enc_utf8, format, ptr);
461
- rb_str_catf(message, " at line %ld column %ld", line, column);
462
- return message;
664
+ return rb_enc_sprintf(enc_utf8, format, ptr);
463
665
  }
464
666
 
465
- static VALUE parse_error_new(VALUE message, long line, long column)
667
+ static VALUE parse_error_new(JSON_ParserState *state, VALUE message, long line, long column, bool eos)
466
668
  {
467
- VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
468
- rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
469
- rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
669
+ VALUE exc = rb_exc_new_str(eParserError, message);
670
+ rb_ivar_set(exc, i_at_line, LONG2NUM(line));
671
+ rb_ivar_set(exc, i_at_column, LONG2NUM(column));
470
672
  return exc;
471
673
  }
472
674
 
473
- NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state)
675
+ NORETURN(static) void raise_parse_error(const char *format, JSON_ParserState *state, bool eos)
474
676
  {
475
- long line, column;
476
- cursor_position(state, &line, &column);
477
- VALUE message = build_parse_error_message(format, state, line, column);
478
- rb_exc_raise(parse_error_new(message, line, column));
677
+ if (state->parser) {
678
+ if (eos) {
679
+ // the error will be swallowed by ResumableParser#parse, so no
680
+ // point building a message or backtrace.
681
+ rb_throw_obj(state->parser, state->parser);
682
+ } else {
683
+ // line and columns can't be accurate in resumable
684
+ rb_exc_raise(parse_error_new(state, build_parse_error_message(format, state), 0, 0, eos));
685
+ }
686
+ } else {
687
+ VALUE message = build_parse_error_message(format, state);
688
+ long line, column;
689
+ cursor_position(state, &line, &column);
690
+ rb_str_catf(message, " at line %ld column %ld", line, column);
691
+ rb_exc_raise(parse_error_new(state, message, line, column, eos));
692
+ }
693
+ }
694
+
695
+ NORETURN(static) void raise_eos_error(const char *format, JSON_ParserState *state)
696
+ {
697
+ raise_parse_error(format, state, true);
698
+ }
699
+
700
+ NORETURN(static) void raise_syntax_error(const char *format, JSON_ParserState *state)
701
+ {
702
+ raise_parse_error(format, state, false);
479
703
  }
480
704
 
481
- NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
705
+ NORETURN(static) void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at, bool eos)
482
706
  {
483
707
  state->cursor = at;
484
- raise_parse_error(format, state);
708
+ raise_parse_error(format, state, eos);
709
+ }
710
+
711
+ NORETURN(static) void raise_eos_error_at(const char *format, JSON_ParserState *state, const char *at)
712
+ {
713
+ raise_parse_error_at(format, state, at, true);
714
+ }
715
+
716
+ NORETURN(static) void raise_syntax_error_at(const char *format, JSON_ParserState *state, const char *at)
717
+ {
718
+ raise_parse_error_at(format, state, at, false);
485
719
  }
486
720
 
487
721
  /* unicode */
@@ -506,7 +740,7 @@ static const signed char digit_values[256] = {
506
740
  static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
507
741
  {
508
742
  if (RB_UNLIKELY(sp > spe - 4)) {
509
- raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
743
+ raise_eos_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
510
744
  }
511
745
 
512
746
  const unsigned char *p = (const unsigned char *)sp;
@@ -517,7 +751,7 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const
517
751
  const signed char b3 = digit_values[p[3]];
518
752
 
519
753
  if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
520
- raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
754
+ raise_syntax_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
521
755
  }
522
756
 
523
757
  return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
@@ -529,9 +763,14 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const
529
763
 
530
764
  static const rb_data_type_t JSON_ParserConfig_type;
531
765
 
532
- static void
533
- json_eat_comments(JSON_ParserState *state)
766
+ const char *COMMENT_DEPRECATION_MESSAGE = "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`";
767
+ NOINLINE(static) void
768
+ json_eat_comments(JSON_ParserState *state, JSON_ParserConfig *config)
534
769
  {
770
+ if (config->on_comment == JSON_RAISE) {
771
+ raise_syntax_error("unexpected token %s", state);
772
+ }
773
+
535
774
  const char *start = state->cursor;
536
775
  state->cursor++;
537
776
 
@@ -551,7 +790,7 @@ json_eat_comments(JSON_ParserState *state)
551
790
  while (true) {
552
791
  const char *next_match = memchr(state->cursor, '*', state->end - state->cursor);
553
792
  if (!next_match) {
554
- raise_parse_error_at("unterminated comment, expected closing '*/'", state, start);
793
+ raise_eos_error_at("unterminated comment, expected closing '*/'", state, start);
555
794
  }
556
795
 
557
796
  state->cursor = next_match + 1;
@@ -563,13 +802,18 @@ json_eat_comments(JSON_ParserState *state)
563
802
  break;
564
803
  }
565
804
  default:
566
- raise_parse_error_at("unexpected token %s", state, start);
805
+ raise_parse_error_at("unexpected token %s", state, start, eos(state));
567
806
  break;
568
807
  }
808
+
809
+ if (config->on_comment == JSON_DEPRECATED && state->emitted_deprecations < MAX_DEPRECATIONS) {
810
+ state->emitted_deprecations++;
811
+ emit_parse_warning(COMMENT_DEPRECATION_MESSAGE, state);
812
+ }
569
813
  }
570
814
 
571
815
  ALWAYS_INLINE(static) void
572
- json_eat_whitespace(JSON_ParserState *state)
816
+ json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config, bool include_comments)
573
817
  {
574
818
  while (true) {
575
819
  switch (peek(state)) {
@@ -600,7 +844,11 @@ json_eat_whitespace(JSON_ParserState *state)
600
844
  state->cursor++;
601
845
  break;
602
846
  case '/':
603
- json_eat_comments(state);
847
+ if (!include_comments) {
848
+ return;
849
+ }
850
+
851
+ json_eat_comments(state, config);
604
852
  break;
605
853
 
606
854
  default:
@@ -754,13 +1002,13 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
754
1002
  uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
755
1003
 
756
1004
  if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
757
- raise_parse_error_at("invalid surrogate pair at %s", state, p);
1005
+ raise_syntax_error_at("invalid surrogate pair at %s", state, p);
758
1006
  }
759
1007
 
760
1008
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
761
1009
  pe += 5;
762
1010
  } else {
763
- raise_parse_error_at("incomplete surrogate pair at %s", state, p);
1011
+ raise_syntax_error_at("incomplete surrogate pair at %s", state, p);
764
1012
  break;
765
1013
  }
766
1014
  }
@@ -770,20 +1018,22 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
770
1018
  p = ++pe;
771
1019
  break;
772
1020
  }
1021
+ case 0:
1022
+ return Qundef;
773
1023
  default:
774
1024
  if ((unsigned char)*pe < 0x20) {
775
1025
  if (!config->allow_control_characters) {
776
1026
  if (*pe == '\n') {
777
- raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
1027
+ raise_syntax_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1);
778
1028
  }
779
- raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1);
1029
+ raise_syntax_error_at("invalid ASCII control character in string: %s", state, pe - 1);
780
1030
  }
781
1031
  }
782
1032
 
783
1033
  if (config->allow_invalid_escape) {
784
1034
  APPEND_CHAR(*pe);
785
1035
  } else {
786
- raise_parse_error_at("invalid escape character in string: %s", state, pe - 1);
1036
+ raise_syntax_error_at("invalid escape character in string: %s", state, pe - 1);
787
1037
  }
788
1038
  break;
789
1039
  }
@@ -879,19 +1129,17 @@ static inline VALUE json_decode_float(JSON_ParserConfig *config, uint64_t mantis
879
1129
  return rb_float_new(negative ? -0.0 : 0.0);
880
1130
  }
881
1131
 
882
- // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
883
- // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
884
- if (RB_UNLIKELY(mantissa_digits > 17 || mantissa_digits + exponent < -307)) {
1132
+ if (RB_UNLIKELY(mantissa_digits > 18 || mantissa_digits + exponent < -307)) {
885
1133
  return json_decode_large_float(start, end - start);
886
1134
  }
887
1135
 
888
- return DBL2NUM(ryu_s2d_from_parts(mantissa, mantissa_digits, (int32_t)exponent, negative));
1136
+ return DBL2NUM(ffp_s2d(exponent, mantissa, negative));
889
1137
  }
890
1138
 
891
1139
  static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig *config, long count)
892
1140
  {
893
- VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->stack, count));
894
- rvalue_stack_pop(state->stack, count);
1141
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(state->value_stack, count));
1142
+ rvalue_stack_pop(state->value_stack, count);
895
1143
 
896
1144
  if (config->freeze) {
897
1145
  RB_OBJ_FREEZE(array);
@@ -935,38 +1183,50 @@ NORETURN(static) void raise_duplicate_key_error(JSON_ParserState *state, VALUE d
935
1183
  rb_inspect(duplicate_key)
936
1184
  );
937
1185
 
938
- long line, column;
939
- cursor_position(state, &line, &column);
940
- rb_str_concat(message, build_parse_error_message("", state, line, column)) ;
941
- rb_exc_raise(parse_error_new(message, line, column));
1186
+ rb_str_concat(message, build_parse_error_message("", state));
1187
+ if (state->parser) { // line and columns can't be accurate in resumable
1188
+ rb_exc_raise(parse_error_new(state, message, 0, 0, false));
1189
+ } else {
1190
+ long line, column;
1191
+ cursor_position(state, &line, &column);
1192
+ rb_str_catf(message, " at line %ld column %ld", line, column);
1193
+ rb_exc_raise(parse_error_new(state, message, line, column, false));
1194
+ }
1195
+ }
1196
+
1197
+ NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_ParserConfig *config, size_t count, const VALUE *pairs)
1198
+ {
1199
+ switch (config->on_duplicate_key) {
1200
+ case JSON_IGNORE:
1201
+ return;
1202
+
1203
+ case JSON_DEPRECATED:
1204
+ // Only emit the first few deprecations to avoid spamming.
1205
+ if (state->emitted_deprecations < MAX_DEPRECATIONS) {
1206
+ state->emitted_deprecations++;
1207
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
1208
+ }
1209
+ return;
1210
+
1211
+ case JSON_RAISE:
1212
+ raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
1213
+ return;
1214
+ }
1215
+ UNREACHABLE;
942
1216
  }
943
1217
 
944
1218
  static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
945
1219
  {
946
1220
  size_t entries_count = count / 2;
947
1221
  VALUE object = rb_hash_new_capa(entries_count);
948
- const VALUE *pairs = rvalue_stack_peek(state->stack, count);
1222
+ const VALUE *pairs = rvalue_stack_peek(state->value_stack, count);
949
1223
  rb_hash_bulk_insert(count, pairs, object);
950
1224
 
951
1225
  if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
952
- switch (config->on_duplicate_key) {
953
- case JSON_IGNORE:
954
- break;
955
- case JSON_DEPRECATED:
956
- // Only emit the first few deprecations to avoid spamming.
957
- if (state->emitted_deprecations < 5) {
958
- emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
959
- state->emitted_deprecations++;
960
- }
961
-
962
- break;
963
- case JSON_RAISE:
964
- raise_duplicate_key_error(state, json_find_duplicated_key(count, pairs));
965
- break;
966
- }
1226
+ json_on_duplicate_key(state, config, count, pairs);
967
1227
  }
968
1228
 
969
- rvalue_stack_pop(state->stack, count);
1229
+ rvalue_stack_pop(state->value_stack, count);
970
1230
 
971
1231
  if (config->freeze) {
972
1232
  RB_OBJ_FREEZE(object);
@@ -980,7 +1240,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
980
1240
  if (RB_UNLIKELY(config->on_load_proc)) {
981
1241
  value = rb_proc_call_with_block(config->on_load_proc, 1, &value, Qnil);
982
1242
  }
983
- rvalue_stack_push(state->stack, value, state->stack_handle, &state->stack);
1243
+ rvalue_stack_push(state->value_stack, value, state->value_stack_handle, &state->value_stack);
984
1244
  return value;
985
1245
  }
986
1246
 
@@ -1053,7 +1313,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
1053
1313
  case '"': {
1054
1314
  VALUE string = json_string_unescape(state, config, start, state->cursor, is_name, &positions);
1055
1315
  state->cursor++;
1056
- return json_push_value(state, config, string);
1316
+ return string;
1057
1317
  }
1058
1318
  case '\\': {
1059
1319
  if (RB_LIKELY(positions.size < JSON_MAX_UNESCAPE_POSITIONS)) {
@@ -1067,7 +1327,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
1067
1327
  }
1068
1328
  default:
1069
1329
  if (!config->allow_control_characters) {
1070
- raise_parse_error("invalid ASCII control character in string: %s", state);
1330
+ raise_syntax_error("invalid ASCII control character in string: %s", state);
1071
1331
  }
1072
1332
  break;
1073
1333
  }
@@ -1075,8 +1335,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
1075
1335
  state->cursor++;
1076
1336
  } while (string_scan(state));
1077
1337
 
1078
- raise_parse_error("unexpected end of input, expected closing \"", state);
1079
- return Qfalse;
1338
+ return Qundef;
1080
1339
  }
1081
1340
 
1082
1341
  ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
@@ -1085,15 +1344,19 @@ ALWAYS_INLINE(static) VALUE json_parse_string(JSON_ParserState *state, JSON_Pars
1085
1344
  const char *start = state->cursor;
1086
1345
 
1087
1346
  if (RB_UNLIKELY(!string_scan(state))) {
1088
- raise_parse_error("unexpected end of input, expected closing \"", state);
1347
+ return Qundef;
1089
1348
  }
1090
1349
 
1350
+ VALUE string;
1091
1351
  if (RB_LIKELY(*state->cursor == '"')) {
1092
- VALUE string = json_string_fastpath(state, config, start, state->cursor, is_name);
1352
+ string = json_string_fastpath(state, config, start, state->cursor, is_name);
1093
1353
  state->cursor++;
1094
- return json_push_value(state, config, string);
1095
1354
  }
1096
- return json_parse_escaped_string(state, config, is_name, start);
1355
+ else {
1356
+ string = json_parse_escaped_string(state, config, is_name, start);
1357
+ }
1358
+
1359
+ return string;
1097
1360
  }
1098
1361
 
1099
1362
  #if JSON_CPU_LITTLE_ENDIAN_64BITS
@@ -1180,7 +1443,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
1180
1443
  int mantissa_digits = json_parse_digits(state, &mantissa);
1181
1444
 
1182
1445
  if (RB_UNLIKELY((first_digit == '0' && mantissa_digits > 1) || (negative && mantissa_digits == 0))) {
1183
- raise_parse_error_at("invalid number: %s", state, start);
1446
+ return Qundef;
1184
1447
  }
1185
1448
 
1186
1449
  // Parse fractional part
@@ -1193,7 +1456,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
1193
1456
  mantissa_digits += fractional_digits;
1194
1457
 
1195
1458
  if (RB_UNLIKELY(!fractional_digits)) {
1196
- raise_parse_error_at("invalid number: %s", state, start);
1459
+ return Qundef;
1197
1460
  }
1198
1461
  }
1199
1462
 
@@ -1213,7 +1476,7 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
1213
1476
  int exponent_digits = json_parse_digits(state, &abs_exponent);
1214
1477
 
1215
1478
  if (RB_UNLIKELY(!exponent_digits)) {
1216
- raise_parse_error_at("invalid number: %s", state, start);
1479
+ return Qundef;
1217
1480
  }
1218
1481
 
1219
1482
  if (RB_UNLIKELY(exponent_digits >= 20 || abs_exponent > (uint64_t)INT64_MAX)) {
@@ -1235,229 +1498,411 @@ static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig
1235
1498
  return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
1236
1499
  }
1237
1500
 
1238
- static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
1501
+ // How many values (array elements, or interleaved object keys+values) have been
1502
+ // pushed onto the rvalue stack since this container opened. Used to size the
1503
+ // bulk decode on close, and to tell the first key/colon from later ones.
1504
+ static inline long json_frame_entry_count(const json_frame *frame, const rvalue_stack *value_stack)
1239
1505
  {
1240
- return json_parse_number(state, config, false, state->cursor);
1506
+ return value_stack->head - frame->value_stack_head;
1241
1507
  }
1242
1508
 
1243
- static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
1509
+ // A complete value now sits on top of the rvalue stack. Advance the frame that
1510
+ // was waiting for it: the root document is done, or the enclosing container
1511
+ // moves on to expecting a ',' or its closing bracket. The caller passes the
1512
+ // frame it already has in hand -- the one that was expecting the value -- which
1513
+ // after a container close is the freshly re-exposed parent.
1514
+ static inline enum json_frame_phase json_value_completed(json_frame *frame)
1244
1515
  {
1245
- const char *start = state->cursor;
1246
- state->cursor++;
1247
- return json_parse_number(state, config, true, start);
1516
+ JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT);
1517
+ JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY);
1518
+ JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT);
1519
+
1520
+ return frame->phase = (enum json_frame_phase) frame->type;
1248
1521
  }
1249
1522
 
1250
- static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1523
+ ALWAYS_INLINE(static) void json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset)
1251
1524
  {
1252
- json_eat_whitespace(state);
1525
+ // It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
1526
+ // `strlen` and several other computations in that routine.
1253
1527
 
1254
- switch (peek(state)) {
1255
- case 'n':
1256
- if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
1257
- state->cursor += 4;
1258
- return json_push_value(state, config, Qnil);
1259
- }
1528
+ size_t len = strlen(keyword);
1260
1529
 
1261
- raise_parse_error("unexpected token %s", state);
1262
- break;
1263
- case 't':
1264
- if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
1265
- state->cursor += 4;
1266
- return json_push_value(state, config, Qtrue);
1267
- }
1530
+ // Note: memcmp with a small power of two and a literal string compile to an integer comparison /
1531
+ // That's why we sometime compare starting from the first byte and sometimes from the second.
1532
+ if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) {
1533
+ state->cursor += len;
1534
+ return;
1535
+ }
1268
1536
 
1269
- raise_parse_error("unexpected token %s", state);
1270
- break;
1271
- case 'f':
1272
- // Note: memcmp with a small power of two compile to an integer comparison
1273
- if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
1274
- state->cursor += 5;
1275
- return json_push_value(state, config, Qfalse);
1276
- }
1537
+ bool eos = rest(state) < len && memcmp(state->cursor, keyword, rest(state)) == 0;
1538
+ raise_parse_error("unexpected token %s", state, eos);
1539
+ }
1277
1540
 
1278
- raise_parse_error("unexpected token %s", state);
1279
- break;
1280
- case 'N':
1281
- // Note: memcmp with a small power of two compile to an integer comparison
1282
- if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
1283
- state->cursor += 3;
1284
- return json_push_value(state, config, CNaN);
1285
- }
1541
+ // Parse an arbitrary JSON value iteratively. This is a state machine driven
1542
+ // entirely by the top frame's phase so it can stop at any value boundary and
1543
+ // resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the
1544
+ // bottom of the stack, so the stack is never empty mid-parse and the document
1545
+ // itself is just another frame whose value, once parsed, leaves its phase DONE.
1546
+ // When invoked in resumable mode, it returns true after parsing a complete document.
1547
+ // If reaching EOS without having parsed a complete document, either returns false
1548
+ // of raise a JSON::ParserError tagged with `@eos=true`.
1549
+ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config, bool resumable)
1550
+ {
1551
+ json_frame *frame = json_frame_stack_peek(state->frames);
1286
1552
 
1287
- raise_parse_error("unexpected token %s", state);
1288
- break;
1289
- case 'I':
1290
- if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
1291
- state->cursor += 8;
1292
- return json_push_value(state, config, CInfinity);
1293
- }
1553
+ switch (frame->phase) {
1554
+ case JSON_PHASE_DONE: JSON_UNREACHABLE_RETURN(false);
1555
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
1556
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
1557
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
1558
+ case JSON_PHASE_OBJECT_KEY: goto JSON_PHASE_OBJECT_KEY;
1559
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
1560
+ }
1561
+ JSON_UNREACHABLE_RETURN(false);
1294
1562
 
1295
- raise_parse_error("unexpected token %s", state);
1296
- break;
1297
- case '-': {
1298
- // Note: memcmp with a small power of two compile to an integer comparison
1299
- if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
1300
- if (config->allow_nan) {
1301
- state->cursor += 9;
1302
- return json_push_value(state, config, CMinusInfinity);
1303
- } else {
1304
- raise_parse_error("unexpected token %s", state);
1305
- }
1306
- }
1307
- return json_push_value(state, config, json_parse_negative_number(state, config));
1308
- break;
1309
- }
1310
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1311
- return json_push_value(state, config, json_parse_positive_number(state, config));
1312
- break;
1313
- case '"': {
1314
- // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
1315
- return json_parse_string(state, config, false);
1316
- break;
1317
- }
1318
- case '[': {
1319
- state->cursor++;
1320
- json_eat_whitespace(state);
1321
- long stack_head = state->stack->head;
1563
+ JSON_PHASE_VALUE: {
1564
+ json_eat_whitespace(state, config, true);
1322
1565
 
1323
- if (peek(state) == ']') {
1324
- state->cursor++;
1325
- return json_push_value(state, config, json_decode_array(state, config, 0));
1326
- } else {
1327
- state->current_nesting++;
1328
- if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1329
- rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1330
- }
1331
- state->in_array++;
1332
- json_parse_any(state, config);
1333
- }
1566
+ VALUE value;
1567
+ const char *value_start = state->cursor;
1334
1568
 
1335
- while (true) {
1336
- json_eat_whitespace(state);
1569
+ switch (peek(state)) {
1570
+ case 'n':
1571
+ json_match_keyword(state, "null", 0);
1572
+ value = Qnil;
1573
+ break;
1337
1574
 
1338
- const char next_char = peek(state);
1575
+ case 't':
1576
+ json_match_keyword(state, "true", 0);
1577
+ value = Qtrue;
1578
+ break;
1339
1579
 
1340
- if (RB_LIKELY(next_char == ',')) {
1341
- state->cursor++;
1342
- if (config->allow_trailing_comma) {
1343
- json_eat_whitespace(state);
1344
- if (peek(state) == ']') {
1345
- continue;
1346
- }
1347
- }
1348
- json_parse_any(state, config);
1349
- continue;
1350
- }
1580
+ case 'f':
1581
+ json_match_keyword(state, "false", 1);
1582
+ value = Qfalse;
1583
+ break;
1351
1584
 
1352
- if (next_char == ']') {
1353
- state->cursor++;
1354
- long count = state->stack->head - stack_head;
1355
- state->current_nesting--;
1356
- state->in_array--;
1357
- return json_push_value(state, config, json_decode_array(state, config, count));
1585
+ case 'N':
1586
+ if (!config->allow_nan) {
1587
+ raise_syntax_error("unexpected token %s", state);
1358
1588
  }
1359
1589
 
1360
- raise_parse_error("expected ',' or ']' after array value", state);
1361
- }
1362
- break;
1363
- }
1364
- case '{': {
1365
- const char *object_start_cursor = state->cursor;
1590
+ json_match_keyword(state, "NaN", 1);
1591
+ value = CNaN;
1592
+ break;
1366
1593
 
1367
- state->cursor++;
1368
- json_eat_whitespace(state);
1369
- long stack_head = state->stack->head;
1594
+ case 'I':
1595
+ if (!config->allow_nan) {
1596
+ raise_syntax_error("unexpected token %s", state);
1597
+ }
1598
+
1599
+ json_match_keyword(state, "Infinity", 0);
1600
+ value = CInfinity;
1601
+ break;
1370
1602
 
1371
- if (peek(state) == '}') {
1603
+ case '-': {
1372
1604
  state->cursor++;
1373
- return json_push_value(state, config, json_decode_object(state, config, 0));
1374
- } else {
1375
- state->current_nesting++;
1376
- if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1377
- rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1378
- }
1379
1605
 
1380
- if (peek(state) != '"') {
1381
- raise_parse_error("expected object key, got %s", state);
1606
+ value = json_parse_number(state, config, true, value_start);
1607
+
1608
+ if (RB_UNLIKELY(UNDEF_P(value) && config->allow_nan && peek(state) == 'I')) {
1609
+ state->cursor = value_start;
1610
+ json_match_keyword(state, "-Infinity", 1);
1611
+ value = CMinusInfinity;
1612
+ break;
1382
1613
  }
1383
- json_parse_string(state, config, true);
1384
1614
 
1385
- json_eat_whitespace(state);
1386
- if (peek(state) != ':') {
1387
- raise_parse_error("expected ':' after object key", state);
1615
+ // Top level numbers are ambiguous when parsing streams, we can't
1616
+ // know if we parsed all the digits if we hit EOS.
1617
+ if (RB_UNLIKELY(resumable && eos(state))) {
1618
+ state->cursor = value_start;
1619
+ return false;
1388
1620
  }
1389
- state->cursor++;
1390
1621
 
1391
- json_parse_any(state, config);
1392
- }
1622
+ if (RB_UNLIKELY(UNDEF_P(value))) {
1623
+ raise_syntax_error_at("invalid number: %s", state, value_start);
1624
+ }
1625
+ break;
1626
+ }
1393
1627
 
1394
- while (true) {
1395
- json_eat_whitespace(state);
1628
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
1629
+ value = json_parse_number(state, config, false, value_start);
1396
1630
 
1397
- const char next_char = peek(state);
1398
- if (next_char == '}') {
1399
- state->cursor++;
1400
- state->current_nesting--;
1401
- size_t count = state->stack->head - stack_head;
1631
+ // Top level numbers are ambiguous when parsing streams, we can't
1632
+ // know if we parsed all the digits if we hit EOS.
1633
+ if (RB_UNLIKELY(resumable && eos(state))) {
1634
+ state->cursor = value_start;
1635
+ return false;
1636
+ }
1402
1637
 
1403
- // Temporary rewind cursor in case an error is raised
1404
- const char *final_cursor = state->cursor;
1405
- state->cursor = object_start_cursor;
1406
- VALUE object = json_decode_object(state, config, count);
1407
- state->cursor = final_cursor;
1638
+ if (RB_UNLIKELY(UNDEF_P(value))) {
1639
+ raise_syntax_error_at("invalid number: %s", state, value_start);
1640
+ }
1641
+ break;
1642
+ }
1408
1643
 
1409
- return json_push_value(state, config, object);
1644
+ case '"': {
1645
+ // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
1646
+ value = json_parse_string(state, config, false);
1647
+
1648
+ if (RB_UNLIKELY(UNDEF_P(value))) {
1649
+ bool is_eos = eos(state);
1650
+ if (resumable && is_eos) {
1651
+ state->cursor = value_start;
1652
+ return false;
1653
+ }
1654
+ raise_parse_error("unexpected end of input, expected closing \"", state, is_eos);
1410
1655
  }
1656
+ break;
1657
+ }
1411
1658
 
1412
- if (next_char == ',') {
1659
+ case '[': {
1660
+ state->cursor++;
1661
+ json_eat_whitespace(state, config, true);
1662
+
1663
+ const char next = peek(state);
1664
+ if (next == ']') {
1413
1665
  state->cursor++;
1414
- json_eat_whitespace(state);
1666
+ value = json_decode_array(state, config, 0);
1667
+ break;
1668
+ } else if (resumable && eos(state)) {
1669
+ state->cursor = value_start;
1670
+ return false;
1671
+ }
1415
1672
 
1416
- if (config->allow_trailing_comma) {
1417
- if (peek(state) == '}') {
1418
- continue;
1419
- }
1420
- }
1673
+ state->current_nesting++;
1674
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1675
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1676
+ }
1677
+ state->in_array++;
1421
1678
 
1422
- if (RB_UNLIKELY(peek(state) != '"')) {
1423
- raise_parse_error("expected object key, got: %s", state);
1424
- }
1425
- json_parse_string(state, config, true);
1679
+ // Phase stays VALUE: the next iteration reads the first element.
1680
+ frame = json_frame_stack_push(state, (json_frame){
1681
+ .type = JSON_FRAME_ARRAY,
1682
+ .phase = JSON_PHASE_VALUE,
1683
+ .value_stack_head = state->value_stack->head,
1684
+ });
1685
+ goto JSON_PHASE_VALUE;
1686
+ }
1426
1687
 
1427
- json_eat_whitespace(state);
1428
- if (RB_UNLIKELY(peek(state) != ':')) {
1429
- raise_parse_error("expected ':' after object key, got: %s", state);
1430
- }
1688
+ case '{': {
1689
+ state->cursor++;
1690
+ json_eat_whitespace(state, config, true);
1691
+
1692
+ if (peek(state) == '}') {
1431
1693
  state->cursor++;
1694
+ value = json_decode_object(state, config, 0);
1695
+ break;
1696
+ } else if (resumable && eos(state)) {
1697
+ state->cursor = value_start;
1698
+ return false;
1699
+ }
1432
1700
 
1433
- json_parse_any(state, config);
1701
+ state->current_nesting++;
1702
+ if (RB_UNLIKELY(config->max_nesting && (config->max_nesting < state->current_nesting))) {
1703
+ rb_raise(eNestingError, "nesting of %d is too deep", state->current_nesting);
1704
+ }
1705
+
1706
+ // Phase KEY: the next iteration reads the first key.
1707
+ frame = json_frame_stack_push(state, (json_frame){
1708
+ .type = JSON_FRAME_OBJECT,
1709
+ .phase = JSON_PHASE_OBJECT_KEY,
1710
+ .value_stack_head = state->value_stack->head,
1711
+ .start_offset = value_start - state->start,
1712
+ });
1713
+ goto JSON_PHASE_OBJECT_KEY;
1714
+ }
1434
1715
 
1435
- continue;
1716
+ case 0:
1717
+ // peek() returns 0 both at end-of-stream and for a literal NUL byte in the
1718
+ // buffer. Only a genuine EOS means "feed me more"; a NUL byte that is not at
1719
+ // EOS is just an invalid character.
1720
+ if (eos(state)) {
1721
+ return false;
1722
+ } else {
1723
+ raise_syntax_error("unexpected NULL byte: %s", state);
1436
1724
  }
1725
+ default:
1726
+ raise_syntax_error("unexpected character: %s", state);
1727
+ }
1437
1728
 
1438
- raise_parse_error("expected ',' or '}' after object value, got: %s", state);
1729
+ json_push_value(state, config, value);
1730
+ json_value_completed(frame);
1731
+
1732
+ switch (frame->phase) {
1733
+ case JSON_PHASE_DONE: return true;
1734
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
1735
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
1736
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
1737
+ case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
1738
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
1739
+ }
1740
+ JSON_UNREACHABLE_RETURN(false);
1741
+ }
1742
+
1743
+ JSON_PHASE_OBJECT_KEY: {
1744
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
1745
+
1746
+ json_eat_whitespace(state, config, true);
1747
+
1748
+ const char *start = state->cursor;
1749
+
1750
+ if (RB_LIKELY(peek(state) == '"')) {
1751
+ VALUE string = json_parse_string(state, config, true);
1752
+ if (UNDEF_P(string)) {
1753
+ if (resumable) {
1754
+ state->cursor = start;
1755
+ return false;
1756
+ } else {
1757
+ raise_syntax_error("unexpected end of input, expected closing \"", state);
1758
+ }
1759
+ }
1760
+ json_push_value(state, config, string);
1761
+ frame->phase = JSON_PHASE_OBJECT_COLON;
1762
+ goto JSON_PHASE_OBJECT_COLON;
1763
+ } else if (resumable && eos(state)) {
1764
+ return false;
1765
+ } else {
1766
+ // The message differs for the first key vs. a key after a
1767
+ // ',': the first is the only one reached with nothing pushed
1768
+ // for this object yet.
1769
+ if (json_frame_entry_count(frame, state->value_stack) == 0) {
1770
+ raise_syntax_error("expected object key, got %s", state);
1771
+ } else {
1772
+ raise_syntax_error("expected object key, got: %s", state);
1439
1773
  }
1440
- break;
1441
1774
  }
1775
+ JSON_UNREACHABLE_RETURN(false);
1776
+ }
1442
1777
 
1443
- case 0:
1444
- raise_parse_error("unexpected end of input", state);
1445
- break;
1778
+ JSON_PHASE_OBJECT_COLON: {
1779
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
1446
1780
 
1447
- default:
1448
- raise_parse_error("unexpected character: %s", state);
1449
- break;
1781
+ json_eat_whitespace(state, config, true);
1782
+
1783
+ if (RB_LIKELY(peek(state) == ':')) {
1784
+ state->cursor++;
1785
+ frame->phase = JSON_PHASE_VALUE;
1786
+ goto JSON_PHASE_VALUE;
1787
+ } else if (resumable && eos(state)) {
1788
+ return false;
1789
+ } else {
1790
+ // First colon (only the first pair's key is pushed, nothing
1791
+ // else) vs. a later one.
1792
+ if (json_frame_entry_count(frame, state->value_stack) == 1) {
1793
+ raise_syntax_error("expected ':' after object key", state);
1794
+ } else {
1795
+ raise_syntax_error("expected ':' after object key, got: %s", state);
1796
+ }
1797
+ }
1798
+ JSON_UNREACHABLE_RETURN(false);
1450
1799
  }
1451
1800
 
1452
- raise_parse_error("unreachable: %s", state);
1453
- return Qundef;
1801
+ JSON_PHASE_ARRAY_COMMA: {
1802
+ JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);
1803
+
1804
+ json_eat_whitespace(state, config, true);
1805
+
1806
+ const char next_char = peek(state);
1807
+
1808
+ if (RB_LIKELY(next_char == ',')) {
1809
+ state->cursor++;
1810
+ if (config->allow_trailing_comma) {
1811
+ json_eat_whitespace(state, config, true);
1812
+ if (peek(state) == ']') {
1813
+ // Trailing comma: stay in COMMA to close on the next iteration.
1814
+ goto JSON_PHASE_ARRAY_COMMA;
1815
+ }
1816
+ }
1817
+ frame->phase = JSON_PHASE_VALUE;
1818
+ goto JSON_PHASE_VALUE;
1819
+ } else if (next_char == ']') {
1820
+ state->cursor++;
1821
+ long count = json_frame_entry_count(frame, state->value_stack);
1822
+ state->current_nesting--;
1823
+ state->in_array--;
1824
+
1825
+ json_push_value(state, config, json_decode_array(state, config, count));
1826
+ json_frame_stack_pop(state->frames);
1827
+ frame = json_frame_stack_peek(state->frames);
1828
+
1829
+ json_value_completed(frame);
1830
+
1831
+ switch (frame->phase) {
1832
+ case JSON_PHASE_DONE: return true;
1833
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
1834
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
1835
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
1836
+ case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
1837
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
1838
+ }
1839
+ } else if (resumable && eos(state)) {
1840
+ return false;
1841
+ } else {
1842
+ raise_syntax_error("expected ',' or ']' after array value", state);
1843
+ }
1844
+ JSON_UNREACHABLE_RETURN(false);
1845
+ }
1846
+
1847
+ JSON_PHASE_OBJECT_COMMA: {
1848
+ JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
1849
+
1850
+ json_eat_whitespace(state, config, true);
1851
+ const char next_char = peek(state);
1852
+
1853
+ if (RB_LIKELY(next_char == ',')) {
1854
+ state->cursor++;
1855
+ json_eat_whitespace(state, config, true);
1856
+
1857
+ if (config->allow_trailing_comma) {
1858
+ if (peek(state) == '}') {
1859
+ // Trailing comma: stay in COMMA to close on the next iteration.
1860
+ goto JSON_PHASE_OBJECT_COMMA;
1861
+ }
1862
+ }
1863
+
1864
+ frame->phase = JSON_PHASE_OBJECT_KEY;
1865
+ goto JSON_PHASE_OBJECT_KEY;
1866
+ } else if (next_char == '}') {
1867
+ state->cursor++;
1868
+ state->current_nesting--;
1869
+ size_t count = json_frame_entry_count(frame, state->value_stack);
1870
+
1871
+ // Temporary rewind cursor in case an error is raised
1872
+ const char *final_cursor = state->cursor;
1873
+ state->cursor = state->start + frame->start_offset;
1874
+ VALUE object = json_decode_object(state, config, count);
1875
+ state->cursor = final_cursor;
1876
+
1877
+ json_push_value(state, config, object);
1878
+ json_frame_stack_pop(state->frames);
1879
+ frame = json_frame_stack_peek(state->frames);
1880
+ json_value_completed(frame);
1881
+
1882
+ switch (frame->phase) {
1883
+ case JSON_PHASE_DONE: return true;
1884
+ case JSON_PHASE_ARRAY_COMMA: goto JSON_PHASE_ARRAY_COMMA;
1885
+ case JSON_PHASE_OBJECT_COMMA: goto JSON_PHASE_OBJECT_COMMA;
1886
+ case JSON_PHASE_VALUE: goto JSON_PHASE_VALUE;
1887
+ case JSON_PHASE_OBJECT_KEY: JSON_UNREACHABLE_RETURN(false);
1888
+ case JSON_PHASE_OBJECT_COLON: goto JSON_PHASE_OBJECT_COLON;
1889
+ }
1890
+ } else if (resumable && eos(state)) {
1891
+ return false;
1892
+ } else {
1893
+ raise_syntax_error("expected ',' or '}' after object value, got: %s", state);
1894
+ }
1895
+ JSON_UNREACHABLE_RETURN(false);
1896
+ }
1897
+
1898
+ JSON_UNREACHABLE_RETURN(false);
1454
1899
  }
1455
1900
 
1456
- static void json_ensure_eof(JSON_ParserState *state)
1901
+ static void json_ensure_eof(JSON_ParserState *state, JSON_ParserConfig *config)
1457
1902
  {
1458
- json_eat_whitespace(state);
1903
+ json_eat_whitespace(state, config, true);
1459
1904
  if (!eos(state)) {
1460
- raise_parse_error("unexpected token at end of stream %s", state);
1905
+ raise_syntax_error("unexpected token at end of stream %s", state);
1461
1906
  }
1462
1907
  }
1463
1908
 
@@ -1495,6 +1940,8 @@ static VALUE convert_encoding(VALUE source)
1495
1940
  struct parser_config_init_args {
1496
1941
  JSON_ParserConfig *config;
1497
1942
  VALUE self;
1943
+ VALUE unknown_keywords;
1944
+ bool strict;
1498
1945
  };
1499
1946
 
1500
1947
  static void parser_config_wb_write(VALUE self, VALUE *dest, VALUE val)
@@ -1512,6 +1959,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1512
1959
  if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
1513
1960
  else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
1514
1961
  else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
1962
+ else if (key == sym_allow_comments) { config->on_comment = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1515
1963
  else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
1516
1964
  else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
1517
1965
  else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
@@ -1547,27 +1995,42 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
1547
1995
  }
1548
1996
  }
1549
1997
  }
1998
+ else if (args->strict) {
1999
+ if (!args->unknown_keywords) {
2000
+ args->unknown_keywords = rb_obj_hide(rb_ary_new());
2001
+ }
2002
+ rb_ary_push(args->unknown_keywords, key);
2003
+ }
1550
2004
 
1551
2005
  return ST_CONTINUE;
1552
2006
  }
1553
2007
 
1554
- static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self)
2008
+ static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self, bool strict)
1555
2009
  {
1556
2010
  config->max_nesting = 100;
1557
2011
 
1558
2012
  struct parser_config_init_args args = {
1559
2013
  .config = config,
1560
2014
  .self = self,
2015
+ .strict = strict,
1561
2016
  };
1562
2017
 
1563
- if (!NIL_P(opts)) {
1564
- Check_Type(opts, T_HASH);
1565
- if (RHASH_SIZE(opts) > 0) {
1566
- // We assume in most cases few keys are set so it's faster to go over
1567
- // the provided keys than to check all possible keys.
1568
- rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args);
1569
- }
2018
+ if (NIL_P(opts)) return;
2019
+ Check_Type(opts, T_HASH);
2020
+ if (RHASH_SIZE(opts) == 0) return;
1570
2021
 
2022
+ // We assume in most cases few keys are set so it's faster to go over
2023
+ // the provided keys than to check all possible keys.
2024
+ rb_hash_foreach(opts, parser_config_init_i, (VALUE)&args);
2025
+
2026
+ if (RB_UNLIKELY(args.unknown_keywords)) {
2027
+ if (RARRAY_LEN(args.unknown_keywords) == 1) {
2028
+ rb_raise(rb_eArgError, "unknown keyword: %" PRIsVALUE, RARRAY_AREF(args.unknown_keywords, 0));
2029
+ }
2030
+ else {
2031
+ VALUE keywords = rb_ary_join(args.unknown_keywords, rb_utf8_str_new_cstr(", "));
2032
+ rb_raise(rb_eArgError, "unknown keywords: %" PRIsVALUE, keywords);
2033
+ }
1571
2034
  }
1572
2035
  }
1573
2036
 
@@ -1576,30 +2039,16 @@ static void parser_config_init(JSON_ParserConfig *config, VALUE opts, VALUE self
1576
2039
  *
1577
2040
  * Creates a new JSON::Ext::ParserConfig instance.
1578
2041
  *
1579
- * It will be configured by the _opts_ hash. _opts_ can have the following
1580
- * keys:
2042
+ * Argument +opts+, if given, contains a \Hash of options for the parsing.
2043
+ * See {Parsing Options}[#module-JSON-label-Parsing+Options].
1581
2044
  *
1582
- * _opts_ can have the following keys:
1583
- * * *max_nesting*: The maximum depth of nesting allowed in the parsed data
1584
- * structures. Disable depth checking with :max_nesting => false|nil|0, it
1585
- * defaults to 100.
1586
- * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in
1587
- * defiance of RFC 4627 to be parsed by the Parser. This option defaults to
1588
- * false.
1589
- * * *symbolize_names*: If set to true, returns symbols for the names
1590
- * (keys) in a JSON object. Otherwise strings are returned, which is
1591
- * also the default. It's not possible to use this option in
1592
- * conjunction with the *create_additions* option.
1593
- * * *decimal_class*: Specifies which class to use instead of the default
1594
- * (Float) when parsing decimal numbers. This class must accept a single
1595
- * string argument in its constructor.
1596
2045
  */
1597
2046
  static VALUE cParserConfig_initialize(VALUE self, VALUE opts)
1598
2047
  {
1599
2048
  rb_check_frozen(self);
1600
2049
  GET_PARSER_CONFIG;
1601
2050
 
1602
- parser_config_init(config, opts, self);
2051
+ parser_config_init(config, opts, self, false);
1603
2052
 
1604
2053
  return self;
1605
2054
  }
@@ -1616,35 +2065,64 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
1616
2065
  }
1617
2066
 
1618
2067
  VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
1619
- rvalue_stack stack = {
2068
+ rvalue_stack value_stack = {
1620
2069
  .type = RVALUE_STACK_STACK_ALLOCATED,
1621
2070
  .ptr = rvalue_stack_buffer,
1622
2071
  .capa = RVALUE_STACK_INITIAL_CAPA,
1623
2072
  };
1624
2073
 
2074
+ // Seed the frame stack with the root frame, establishing the invariant that
2075
+ // json_parse_any always has a top frame to dispatch on (so the stack is never
2076
+ // empty mid-parse).
2077
+ json_frame frame_stack_buffer[JSON_FRAME_STACK_INITIAL_CAPA];
2078
+ frame_stack_buffer[0] = (json_frame){
2079
+ .type = JSON_FRAME_ROOT,
2080
+ .phase = JSON_PHASE_VALUE,
2081
+ };
2082
+ json_frame_stack frames = {
2083
+ .type = RVALUE_STACK_STACK_ALLOCATED,
2084
+ .ptr = frame_stack_buffer,
2085
+ .capa = JSON_FRAME_STACK_INITIAL_CAPA,
2086
+ .head = 1,
2087
+ };
2088
+
1625
2089
  long len;
1626
2090
  const char *start;
1627
2091
 
1628
2092
  RSTRING_GETMEM(Vsource, start, len);
1629
2093
 
1630
- VALUE stack_handle = 0;
2094
+ VALUE value_stack_handle = 0;
2095
+ VALUE frame_stack_handle = 0;
1631
2096
  JSON_ParserState _state = {
1632
2097
  .start = start,
1633
2098
  .cursor = start,
1634
2099
  .end = start + len,
1635
- .stack = &stack,
1636
- .stack_handle = &stack_handle,
2100
+ .value_stack = &value_stack,
2101
+ .value_stack_handle = &value_stack_handle,
2102
+ .frames = &frames,
2103
+ .frame_stack_handle = &frame_stack_handle,
1637
2104
  };
1638
2105
  JSON_ParserState *state = &_state;
1639
2106
 
1640
- VALUE result = json_parse_any(state, config);
2107
+ bool complete = json_parse_any(state, config, false);
2108
+
2109
+ // The root document value is parsed; it is the lone survivor on
2110
+ // the rvalue stack.
2111
+ VALUE result = complete ? *rvalue_stack_peek(state->value_stack, 1) : Qundef;
1641
2112
 
1642
2113
  // This may be skipped in case of exception, but
1643
2114
  // it won't cause a leak.
1644
- rvalue_stack_eagerly_release(stack_handle);
1645
- RB_GC_GUARD(stack_handle);
2115
+ rvalue_stack_eagerly_release(value_stack_handle);
2116
+ json_frame_stack_eagerly_release(frame_stack_handle);
2117
+ RB_GC_GUARD(value_stack_handle);
2118
+ RB_GC_GUARD(frame_stack_handle);
1646
2119
  RB_GC_GUARD(Vsource);
1647
- json_ensure_eof(state);
2120
+
2121
+ if (complete) {
2122
+ json_ensure_eof(state, config);
2123
+ } else {
2124
+ raise_eos_error("unexpected end of input", state);
2125
+ }
1648
2126
 
1649
2127
  return result;
1650
2128
  }
@@ -1666,7 +2144,7 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
1666
2144
  {
1667
2145
  JSON_ParserConfig _config = {0};
1668
2146
  JSON_ParserConfig *config = &_config;
1669
- parser_config_init(config, opts, false);
2147
+ parser_config_init(config, opts, Qfalse, false);
1670
2148
 
1671
2149
  return cParser_parse(config, Vsource);
1672
2150
  }
@@ -1674,21 +2152,33 @@ static VALUE cParser_m_parse(VALUE klass, VALUE Vsource, VALUE opts)
1674
2152
  static void JSON_ParserConfig_mark(void *ptr)
1675
2153
  {
1676
2154
  JSON_ParserConfig *config = ptr;
1677
- rb_gc_mark(config->on_load_proc);
1678
- rb_gc_mark(config->decimal_class);
2155
+ rb_gc_mark_movable(config->on_load_proc);
2156
+ rb_gc_mark_movable(config->decimal_class);
1679
2157
  }
1680
2158
 
1681
2159
  static size_t JSON_ParserConfig_memsize(const void *ptr)
1682
2160
  {
2161
+ #ifdef HAVE_RUBY_TYPED_EMBEDDABLE
2162
+ return 0;
2163
+ #else
1683
2164
  return sizeof(JSON_ParserConfig);
2165
+ #endif
2166
+ }
2167
+
2168
+ static void JSON_ParserConfig_compact(void *ptr)
2169
+ {
2170
+ JSON_ParserConfig *config = ptr;
2171
+ config->on_load_proc = rb_gc_location(config->on_load_proc);
2172
+ config->decimal_class = rb_gc_location(config->decimal_class);
1684
2173
  }
1685
2174
 
1686
2175
  static const rb_data_type_t JSON_ParserConfig_type = {
1687
2176
  .wrap_struct_name = "JSON::Ext::Parser/ParserConfig",
1688
2177
  .function = {
1689
- JSON_ParserConfig_mark,
1690
- RUBY_DEFAULT_FREE,
1691
- JSON_ParserConfig_memsize,
2178
+ .dmark = JSON_ParserConfig_mark,
2179
+ .dfree = RUBY_DEFAULT_FREE,
2180
+ .dsize = JSON_ParserConfig_memsize,
2181
+ .dcompact = JSON_ParserConfig_compact,
1692
2182
  },
1693
2183
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
1694
2184
  };
@@ -1699,6 +2189,562 @@ static VALUE cJSON_parser_s_allocate(VALUE klass)
1699
2189
  return TypedData_Make_Struct(klass, JSON_ParserConfig, &JSON_ParserConfig_type, config);
1700
2190
  }
1701
2191
 
2192
+ static void json_str_clear(VALUE str)
2193
+ {
2194
+ if (RB_OBJ_FROZEN_RAW(str)) {
2195
+ return;
2196
+ }
2197
+ rb_str_replace(str, JSON_empty_string);
2198
+ }
2199
+
2200
+ typedef struct JSON_ResumableParserStruct {
2201
+ JSON_ParserConfig config;
2202
+ JSON_ParserState state;
2203
+ rvalue_stack value_stack;
2204
+ json_frame_stack frames;
2205
+ VALUE buffer;
2206
+ size_t parsed_bytes;
2207
+ size_t incomplete_bytes;
2208
+ bool complete;
2209
+ bool in_use;
2210
+ } JSON_ResumableParser;
2211
+
2212
+ static void JSON_ResumableParser_mark(void *ptr)
2213
+ {
2214
+ JSON_ResumableParser *parser = (JSON_ResumableParser *)ptr;
2215
+ JSON_ParserConfig_mark(&parser->config);
2216
+ rvalue_stack_mark(&parser->value_stack);
2217
+ rvalue_cache_mark(&parser->state.name_cache);
2218
+ rb_gc_mark(parser->buffer); // pin the buffer
2219
+ rb_gc_mark_movable(parser->state.parser);
2220
+ }
2221
+
2222
+ static void JSON_ResumableParser_free(void *ptr)
2223
+ {
2224
+ JSON_ResumableParser *parser = (JSON_ResumableParser *)ptr;
2225
+ rvalue_stack_free_buffer(&parser->value_stack);
2226
+ json_frame_stack_free_buffer(&parser->frames);
2227
+ }
2228
+
2229
+ static size_t JSON_ResumableParser_memsize(const void *ptr)
2230
+ {
2231
+ const JSON_ResumableParser *parser = (const JSON_ResumableParser *)ptr;
2232
+ size_t memsize = JSON_ParserConfig_memsize(&parser->config);
2233
+ memsize += rvalue_stack_memsize(&parser->value_stack);
2234
+ memsize += json_frame_stack_memsize(&parser->frames);
2235
+ #ifndef HAVE_RUBY_TYPED_EMBEDDABLE
2236
+ memsize += (
2237
+ sizeof(JSON_ResumableParser)
2238
+ - sizeof(JSON_ParserState)
2239
+ - sizeof(JSON_ParserConfig)
2240
+ - sizeof(rvalue_stack)
2241
+ - sizeof(json_frame_stack)
2242
+ );
2243
+ #endif
2244
+ return memsize;
2245
+ }
2246
+
2247
+ static void JSON_ResumableParser_compact(void *ptr)
2248
+ {
2249
+ JSON_ResumableParser *parser = (JSON_ResumableParser *)ptr;
2250
+ JSON_ParserConfig_compact(&parser->config);
2251
+ rvalue_stack_compact(&parser->value_stack);
2252
+ rvalue_cache_compact(&parser->state.name_cache);
2253
+ parser->buffer = rb_gc_location(parser->buffer);
2254
+ parser->state.parser = rb_gc_location(parser->state.parser);
2255
+ }
2256
+
2257
+ static const rb_data_type_t JSON_ResumableParser_type = {
2258
+ .wrap_struct_name = "JSON::Ext::ResumableParser",
2259
+ .function = {
2260
+ JSON_ResumableParser_mark,
2261
+ JSON_ResumableParser_free,
2262
+ JSON_ResumableParser_memsize,
2263
+ JSON_ResumableParser_compact,
2264
+ },
2265
+ // RUBY_TYPED_WB_PROTECTED is deliberately not declared because
2266
+ // this is a superset of JSON_Parser_rvalue_stack_type, so we'd need
2267
+ // to trigger a lot of write barriers.
2268
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
2269
+ };
2270
+
2271
+ static VALUE cResumableParser_allocate(VALUE klass)
2272
+ {
2273
+ JSON_ResumableParser *parser;
2274
+ VALUE obj = TypedData_Make_Struct(klass, JSON_ResumableParser, &JSON_ResumableParser_type, parser);
2275
+ parser->state.in_array++;
2276
+ parser->state.parser = obj;
2277
+ return obj;
2278
+ }
2279
+
2280
+ static inline JSON_ResumableParser *cResumableParser_get(VALUE self)
2281
+ {
2282
+ JSON_ResumableParser *parser;
2283
+ TypedData_Get_Struct(self, JSON_ResumableParser, &JSON_ResumableParser_type, parser);
2284
+ return parser;
2285
+ }
2286
+
2287
+ /*
2288
+ * call-seq: new(opts => {})
2289
+ *
2290
+ * Creates a new JSON::ResumableParser instance.
2291
+ *
2292
+ * Argument +opts+, if given, contains a \Hash of options for the parsing.
2293
+ * See {Parsing Options}[#module-JSON-label-Parsing+Options].
2294
+ *
2295
+ * A ResumableParser is able to parse partial documents and resume parsing later
2296
+ * when more of the document is provided:
2297
+ *
2298
+ * parser = JSON::ResumableParser.new
2299
+ * parser << '{"user": "george", "role": "ad'
2300
+ * parser.parse # => false
2301
+ * parser.eos? # => true
2302
+ * parser.partial_value # => { "user" => "george", "role" => nil }
2303
+ * parser.rest # => '"ad'
2304
+ *
2305
+ * parser << 'min" }[1, 2, 3]'
2306
+ * parser.parse # => true
2307
+ * parser.value # => { "user" => "george", "role" => "admin" }
2308
+ *
2309
+ * parser.parse # => true
2310
+ * parser.value # => [1, 2, 3]
2311
+ *
2312
+ * === Limitations
2313
+ *
2314
+ * While ResumableParser is able to parse streams of documents without any
2315
+ * explicit separators between them, it is highly recommended to separate documents
2316
+ * by either spaces or newlines, as otherwise the \JSON syntax for numbers may be ambiguous.
2317
+ * When parsing a number, ResumableParser will not consider the number complete until something follows:
2318
+ *
2319
+ * parser << '123'
2320
+ * parser.parse # => false
2321
+ * parser << ' '
2322
+ * parser.parse # => true
2323
+ * parser.value # => 123
2324
+ *
2325
+ * === Security
2326
+ *
2327
+ * An incomplete document is buffered in full and there is no size limit, so when reading
2328
+ * from an untrusted source the caller is responsible for bounding how much data is fed.
2329
+ * For example:
2330
+ *
2331
+ * loop do
2332
+ * if parser.parsed_bytes > DOCUMENT_MAX_SIZE
2333
+ * raise "document too large"
2334
+ * end
2335
+ *
2336
+ * parser << read_chunk
2337
+ * while parser.parse
2338
+ * process(parser.value)
2339
+ * end
2340
+ * end
2341
+ */
2342
+ static VALUE cResumableParser_initialize(int argc, VALUE *argv, VALUE self)
2343
+ {
2344
+ rb_check_frozen(self);
2345
+
2346
+ VALUE opts = Qfalse;
2347
+ rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "0:", &opts);
2348
+ JSON_ResumableParser *parser = cResumableParser_get(self);
2349
+
2350
+ opts = argc > 0 ? argv[0] : Qnil;
2351
+ parser_config_init(&parser->config, opts, self, true);
2352
+
2353
+ return self;
2354
+ }
2355
+
2356
+ static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock);
2357
+
2358
+ /*
2359
+ * call-seq: self << string -> self
2360
+ *
2361
+ * Appends the given string to the parser's buffer.
2362
+ */
2363
+ static VALUE cResumableParser_feed(VALUE self, VALUE str)
2364
+ {
2365
+ rb_check_frozen(self);
2366
+
2367
+ JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
2368
+
2369
+ str = convert_encoding(str);
2370
+ if (!RSTRING_LEN(str)) {
2371
+ return self;
2372
+ }
2373
+
2374
+ size_t offset = parser->state.cursor - parser->state.start;
2375
+ const size_t remaining = parser->state.end - parser->state.cursor;
2376
+
2377
+ if (!remaining) {
2378
+ if (parser->buffer) {
2379
+ json_str_clear(parser->buffer);
2380
+ }
2381
+ parser->buffer = RB_OBJ_FROZEN_RAW(str) ? str : rb_obj_hide(rb_str_new_shared(str));
2382
+ offset = 0;
2383
+ } else {
2384
+ JSON_ASSERT(parser->buffer);
2385
+
2386
+ const size_t size = parser->state.end - parser->state.start;
2387
+ const size_t consumed = size - remaining;
2388
+
2389
+ if (RB_OBJ_FROZEN_RAW(parser->buffer)) {
2390
+ VALUE new_buffer = rb_obj_hide(rb_str_buf_new(remaining + RSTRING_LEN(str)));
2391
+ rb_enc_associate_index(new_buffer, utf8_encindex);
2392
+
2393
+ char *old_ptr = RSTRING_PTR(parser->buffer);
2394
+ memcpy(RSTRING_PTR(new_buffer), old_ptr + consumed, remaining);
2395
+ rb_str_set_len(new_buffer, remaining);
2396
+ offset = 0;
2397
+ parser->buffer = new_buffer;
2398
+ } else if (consumed > (size / 2) && size >= 512) {
2399
+ rb_str_modify(parser->buffer);
2400
+ char *old_ptr = RSTRING_PTR(parser->buffer);
2401
+ memmove(old_ptr, old_ptr + consumed, remaining);
2402
+ rb_str_set_len(parser->buffer, remaining);
2403
+ offset = 0;
2404
+ }
2405
+ rb_str_append(parser->buffer, str);
2406
+ }
2407
+
2408
+ long len;
2409
+ const char *start;
2410
+ RSTRING_GETMEM(parser->buffer, start, len);
2411
+ parser->state.start = start;
2412
+ parser->state.end = start + len;
2413
+ parser->state.cursor = parser->state.start + offset;
2414
+
2415
+ return self;
2416
+ }
2417
+
2418
+ struct json_parse_any_args {
2419
+ JSON_ParserState *state;
2420
+ JSON_ParserConfig *config;
2421
+ VALUE parser;
2422
+ };
2423
+
2424
+ static VALUE json_parse_any_resumable_safe0(RB_BLOCK_CALL_FUNC_ARGLIST(yielded_arg, _args))
2425
+ {
2426
+ struct json_parse_any_args *args = (struct json_parse_any_args *)_args;
2427
+ return (VALUE)json_parse_any(args->state, args->config, true);
2428
+ }
2429
+
2430
+ static VALUE json_parse_any_resumable_safe(VALUE _args)
2431
+ {
2432
+ struct json_parse_any_args *args = (struct json_parse_any_args *)_args;
2433
+ VALUE result = rb_catch_obj(args->parser, json_parse_any_resumable_safe0, _args);
2434
+ return result == args->parser ? Qfalse : result;
2435
+ }
2436
+
2437
+ static JSON_ResumableParser *ResumableParser_acquire(VALUE self, bool lock)
2438
+ {
2439
+ JSON_ResumableParser *parser = cResumableParser_get(self);
2440
+
2441
+ if (parser->in_use) {
2442
+ rb_raise(rb_eArgError, "ResumableParser can't be used recursively");
2443
+ }
2444
+
2445
+ if (lock) {
2446
+ parser->in_use = true;
2447
+ }
2448
+
2449
+ // self may have moved, so we need to update all pointers
2450
+ // Investigate: We might be better off keeping JSON_ParserState on the stack
2451
+ // and only persist what we need.
2452
+ parser->state.value_stack = &parser->value_stack;
2453
+ parser->state.frames = &parser->frames;
2454
+
2455
+ return parser;
2456
+ }
2457
+
2458
+ /*
2459
+ * call-seq: parse -> true or false
2460
+ *
2461
+ * Attemps to parse a JSON document from the internal buffer.
2462
+ * Returns whether a complete document could be parsed.
2463
+ *
2464
+ * It does raise +JSON::ParserError+ when encountering invalid \JSON syntax.
2465
+ *
2466
+ * The parsed object can be retrieved by calling #value
2467
+ */
2468
+ static VALUE cResumableParser_parse(VALUE self)
2469
+ {
2470
+ JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
2471
+
2472
+ if (parser->complete) {
2473
+ parser->parsed_bytes = 0;
2474
+ parser->incomplete_bytes = 0;
2475
+ parser->complete = false;
2476
+ }
2477
+
2478
+ if (!parser->buffer) {
2479
+ parser->in_use = false;
2480
+ return Qfalse;
2481
+ }
2482
+
2483
+ if (parser->frames.head == 0) {
2484
+ json_frame_stack_push(&parser->state, (json_frame){
2485
+ .type = JSON_FRAME_ROOT,
2486
+ .phase = JSON_PHASE_VALUE,
2487
+ });
2488
+ }
2489
+
2490
+ VALUE Vsource = parser->buffer; // Prevent compaction
2491
+
2492
+ json_frame *frame = json_frame_stack_peek(&parser->frames);
2493
+
2494
+ if (frame->phase == JSON_PHASE_DONE) {
2495
+ JSON_ASSERT(parser->value_stack.head == 1);
2496
+ JSON_ASSERT(parser->frames.head == 1);
2497
+
2498
+ frame->phase = JSON_PHASE_VALUE;
2499
+ rvalue_stack_pop(parser->state.value_stack, 1);
2500
+ }
2501
+
2502
+ struct json_parse_any_args args = {
2503
+ .state = &parser->state,
2504
+ .config = &parser->config,
2505
+ .parser = self,
2506
+ };
2507
+ int status;
2508
+ const char *initial_cursor = parser->state.cursor;
2509
+ parser->complete = rb_protect(json_parse_any_resumable_safe, (VALUE)&args, &status);
2510
+
2511
+ if (status) {
2512
+ parser->complete = true; // a parse error is considered complete
2513
+ }
2514
+
2515
+ parser->parsed_bytes += parser->state.cursor - initial_cursor;
2516
+ parser->incomplete_bytes = parser->complete ? 0 : parser->state.end - parser->state.cursor;
2517
+
2518
+ json_eat_whitespace(&parser->state, &parser->config, false);
2519
+ if (eos(&parser->state)) {
2520
+ json_str_clear(parser->buffer);
2521
+ parser->buffer = Qfalse;
2522
+ }
2523
+ parser->in_use = false;
2524
+
2525
+ if (status) {
2526
+ rb_jump_tag(status); // reraise
2527
+ }
2528
+ RB_GC_GUARD(Vsource);
2529
+ return parser->complete ? Qtrue : Qfalse;
2530
+ }
2531
+
2532
+ /*
2533
+ * call-seq: value? -> true or false
2534
+ *
2535
+ * Returns whether a parsed value is available.
2536
+ */
2537
+ static VALUE cResumableParser_value_p(VALUE self)
2538
+ {
2539
+ JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
2540
+
2541
+ if (parser->value_stack.head > 0) {
2542
+ json_frame *frame = json_frame_stack_peek(&parser->frames);
2543
+ if (frame->phase == JSON_PHASE_DONE) {
2544
+ return Qtrue;
2545
+ }
2546
+ }
2547
+ return Qfalse;
2548
+ }
2549
+
2550
+ /*
2551
+ * call-seq: value -> object
2552
+ *
2553
+ * Returns and consume the last parsed value.
2554
+ * Raises ArgumentError if there is no parsed value or if it was already retrieved:
2555
+ * parser << '[1][2]'
2556
+ * parser.value # ArgumentError no ready value
2557
+ * parser.parse # => true
2558
+ * parser.value # => [1]
2559
+ * parser.value # ArgumentError no ready value
2560
+ */
2561
+ static VALUE cResumableParser_value(VALUE self)
2562
+ {
2563
+ JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
2564
+
2565
+ if (parser->frames.head > 0) {
2566
+ json_frame *frame = json_frame_stack_peek(&parser->frames);
2567
+
2568
+ if (frame->phase == JSON_PHASE_DONE) {
2569
+ VALUE result = *rvalue_stack_peek(parser->state.value_stack, 1);
2570
+ rvalue_stack_pop(parser->state.value_stack, 1);
2571
+ json_frame_stack_pop(parser->state.frames);
2572
+ return result;
2573
+ }
2574
+ }
2575
+ rb_raise(rb_eArgError, "no ready value");
2576
+ }
2577
+
2578
+ /*
2579
+ * call-seq: clear -> self
2580
+ *
2581
+ * Entirely reset the parser state and buffer.
2582
+ */
2583
+ static VALUE cResumableParser_clear(VALUE self)
2584
+ {
2585
+ JSON_ResumableParser *parser = ResumableParser_acquire(self, false);
2586
+ parser->buffer = 0;
2587
+ parser->complete = true;
2588
+ parser->parsed_bytes = 0;
2589
+ parser->incomplete_bytes = 0;
2590
+ parser->frames.head = 0;
2591
+ parser->value_stack.head = 0;
2592
+ parser->state.name_cache.length = 0;
2593
+ parser->state.current_nesting = 0;
2594
+ parser->state.in_array = 1;
2595
+ parser->state.emitted_deprecations = 0;
2596
+ parser->state.start = parser->state.cursor = parser->state.end = NULL;
2597
+ return self;
2598
+ }
2599
+
2600
+ static VALUE cResumableParser_partial_value_body(VALUE self)
2601
+ {
2602
+ JSON_ResumableParser *original_parser = cResumableParser_get(self);
2603
+ JSON_ResumableParser parser = *original_parser;
2604
+
2605
+ parser.state.frames = &parser.frames;
2606
+ parser.state.value_stack = &parser.value_stack;
2607
+
2608
+ if (parser.value_stack.head == 0) {
2609
+ return Qnil;
2610
+ }
2611
+
2612
+ json_frame *frame = json_frame_stack_peek(parser.state.frames);
2613
+ long missing_object_value = 0;
2614
+ if (frame->type == JSON_FRAME_OBJECT && (frame->phase == JSON_PHASE_VALUE || frame->phase == JSON_PHASE_OBJECT_COLON)) {
2615
+ missing_object_value = 1;
2616
+ }
2617
+
2618
+ // Copy the value stack as we need to mutate it.
2619
+ long capa = parser.value_stack.head;
2620
+ parser.value_stack.capa = (capa + missing_object_value);
2621
+ VALUE tmpbuf, *value_stack_buffer = ALLOCV_N(VALUE, tmpbuf, capa + missing_object_value);
2622
+ MEMCPY(value_stack_buffer, parser.value_stack.ptr, VALUE, parser.value_stack.capa);
2623
+ parser.value_stack.ptr = value_stack_buffer;
2624
+
2625
+ JSON_ParserState *state = &parser.state;
2626
+ JSON_ParserConfig *config = &parser.config;
2627
+
2628
+ if (missing_object_value) {
2629
+ rvalue_stack_push(state->value_stack, Qnil, NULL, &state->value_stack);
2630
+ }
2631
+
2632
+ VALUE partial_result = Qundef;
2633
+
2634
+ while (UNDEF_P(partial_result)) {
2635
+ frame = json_frame_stack_peek(state->frames);
2636
+
2637
+ switch (frame->type) {
2638
+ case JSON_FRAME_ROOT: {
2639
+ partial_result = *rvalue_stack_peek(state->value_stack, 1);
2640
+ break;
2641
+ }
2642
+
2643
+ case JSON_FRAME_ARRAY: {
2644
+ long count = json_frame_entry_count(frame, state->value_stack);
2645
+ json_push_value(state, config, json_decode_array(state, config, count));
2646
+ json_frame_stack_pop(state->frames);
2647
+
2648
+ break;
2649
+ }
2650
+
2651
+ case JSON_FRAME_OBJECT: {
2652
+ long count = json_frame_entry_count(frame, state->value_stack);
2653
+ json_push_value(state, config, json_decode_object(state, config, count));
2654
+ json_frame_stack_pop(state->frames);
2655
+ break;
2656
+ }
2657
+
2658
+ default: {
2659
+ JSON_UNREACHABLE_RETURN(Qundef);
2660
+ break;
2661
+ }
2662
+ }
2663
+ }
2664
+
2665
+ ALLOCV_END(tmpbuf);
2666
+ return partial_result;
2667
+ }
2668
+
2669
+ /*
2670
+ * call-seq: partial_value -> object
2671
+ *
2672
+ * Returns the Ruby objects parsed up to this point:
2673
+ * parser << '[1, [2, 3,'
2674
+ * parser.parse # => false
2675
+ * parser.value # ArgumentError no ready value
2676
+ * parser.partial_value # => [1, [2, 3]]
2677
+ */
2678
+ static VALUE cResumableParser_partial_value(VALUE self)
2679
+ {
2680
+ JSON_ResumableParser *parser = ResumableParser_acquire(self, true);
2681
+
2682
+ int status;
2683
+ VALUE result = rb_protect(cResumableParser_partial_value_body, self, &status);
2684
+ parser->in_use = false;
2685
+ if (status) {
2686
+ rb_jump_tag(status);
2687
+ }
2688
+ return result;
2689
+ }
2690
+
2691
+ /*
2692
+ * call-seq: rest -> string
2693
+ *
2694
+ * Returns a string containing what remains to be parsed in the buffer
2695
+ * parser << '{ "message": "unterminated message'
2696
+ * parser.parse # => false
2697
+ * parser.rest # => '"unterminated message"'
2698
+ */
2699
+ static VALUE cResumableParser_rest(VALUE self)
2700
+ {
2701
+ JSON_ResumableParser *parser = cResumableParser_get(self);
2702
+
2703
+ if (!parser->buffer) {
2704
+ return rb_utf8_str_new("", 0);
2705
+ }
2706
+
2707
+ size_t offset = parser->state.cursor - parser->state.start;
2708
+ const char *ptr;
2709
+ long len;
2710
+ RSTRING_GETMEM(parser->buffer, ptr, len);
2711
+ return rb_utf8_str_new(ptr + offset, len - offset);
2712
+ }
2713
+
2714
+ /*
2715
+ * call-seq: value? -> true or false
2716
+ *
2717
+ * Returns whether the internal buffer has been entirely consumed.
2718
+ */
2719
+ static VALUE cResumableParser_eos_p(VALUE self)
2720
+ {
2721
+ JSON_ResumableParser *parser = cResumableParser_get(self);
2722
+ return eos(&parser->state) ? Qtrue : Qfalse;
2723
+ }
2724
+
2725
+ /*
2726
+ * call-seq: parsed_bytes -> integer
2727
+ *
2728
+ * Returns the number of bytes parsed since the start of the current partial value.
2729
+ * This is intended to be used for securing against untrusted input:
2730
+ *
2731
+ * loop do
2732
+ * if parser.parsed_bytes > DOCUMENT_MAX_SIZE
2733
+ * raise "document too large"
2734
+ * end
2735
+ *
2736
+ * parser << read_chunk
2737
+ * while parser.parse
2738
+ * process(parser.value)
2739
+ * end
2740
+ * end
2741
+ */
2742
+ static VALUE cResumableParser_parsed_bytes(VALUE self)
2743
+ {
2744
+ JSON_ResumableParser *parser = cResumableParser_get(self);
2745
+ return ULL2NUM(parser->parsed_bytes + parser->incomplete_bytes);
2746
+ }
2747
+
1702
2748
  void Init_parser(void)
1703
2749
  {
1704
2750
  #ifdef HAVE_RB_EXT_RACTOR_SAFE
@@ -1710,30 +2756,52 @@ void Init_parser(void)
1710
2756
  mJSON = rb_define_module("JSON");
1711
2757
  VALUE mExt = rb_define_module_under(mJSON, "Ext");
1712
2758
  VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
2759
+
2760
+ rb_global_variable(&eParserError);
2761
+ eParserError = rb_path2class("JSON::ParserError");
2762
+
2763
+ rb_global_variable(&eNestingError);
1713
2764
  eNestingError = rb_path2class("JSON::NestingError");
1714
- rb_gc_register_mark_object(eNestingError);
2765
+
1715
2766
  rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
1716
- rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
2767
+ rb_define_private_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
1717
2768
  rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
1718
2769
 
1719
2770
  VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
1720
2771
  rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
1721
2772
 
2773
+ VALUE cResumableParser = rb_define_class_under(mJSON, "ResumableParser", rb_cObject);
2774
+ rb_define_alloc_func(cResumableParser, cResumableParser_allocate);
2775
+ rb_define_private_method(cResumableParser, "initialize", cResumableParser_initialize, -1);
2776
+ rb_define_method(cResumableParser, "<<", cResumableParser_feed, 1);
2777
+ rb_define_method(cResumableParser, "parse", cResumableParser_parse, 0);
2778
+ rb_define_method(cResumableParser, "value", cResumableParser_value, 0);
2779
+ rb_define_method(cResumableParser, "value?", cResumableParser_value_p, 0);
2780
+ rb_define_method(cResumableParser, "partial_value", cResumableParser_partial_value, 0);
2781
+ rb_define_method(cResumableParser, "clear", cResumableParser_clear, 0);
2782
+ rb_define_method(cResumableParser, "rest", cResumableParser_rest, 0);
2783
+ rb_define_method(cResumableParser, "eos?", cResumableParser_eos_p, 0);
2784
+ rb_define_method(cResumableParser, "parsed_bytes", cResumableParser_parsed_bytes, 0);
2785
+
2786
+ rb_global_variable(&CNaN);
1722
2787
  CNaN = rb_const_get(mJSON, rb_intern("NaN"));
1723
- rb_gc_register_mark_object(CNaN);
1724
2788
 
2789
+ rb_global_variable(&CInfinity);
1725
2790
  CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
1726
- rb_gc_register_mark_object(CInfinity);
1727
2791
 
2792
+ rb_global_variable(&CMinusInfinity);
1728
2793
  CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
1729
- rb_gc_register_mark_object(CMinusInfinity);
1730
2794
 
1731
2795
  rb_global_variable(&Encoding_UTF_8);
1732
2796
  Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
1733
2797
 
2798
+ rb_global_variable(&JSON_empty_string);
2799
+ JSON_empty_string = rb_obj_hide(rb_utf8_str_new("", 0));
2800
+
1734
2801
  sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
1735
2802
  sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
1736
2803
  sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
2804
+ sym_allow_comments = ID2SYM(rb_intern("allow_comments"));
1737
2805
  sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
1738
2806
  sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
1739
2807
  sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
@@ -1746,6 +2814,8 @@ void Init_parser(void)
1746
2814
  i_try_convert = rb_intern("try_convert");
1747
2815
  i_uminus = rb_intern("-@");
1748
2816
  i_encode = rb_intern("encode");
2817
+ i_at_line = rb_intern("@line");
2818
+ i_at_column = rb_intern("@column");
1749
2819
 
1750
2820
  binary_encindex = rb_ascii8bit_encindex();
1751
2821
  utf8_encindex = rb_utf8_encindex();