json 2.11.3 → 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +24 -0
- data/README.md +13 -0
- data/ext/json/ext/fbuffer/fbuffer.h +38 -4
- data/ext/json/ext/generator/extconf.rb +6 -0
- data/ext/json/ext/generator/generator.c +317 -19
- data/ext/json/ext/parser/extconf.rb +5 -1
- data/ext/json/ext/parser/parser.c +266 -124
- data/ext/json/ext/simd/conf.rb +20 -0
- data/ext/json/ext/simd/simd.h +187 -0
- data/ext/json/ext/vendor/fpconv.c +10 -10
- data/json.gemspec +2 -3
- data/lib/json/common.rb +8 -6
- data/lib/json/ext.rb +2 -2
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +33 -0
- metadata +4 -2
@@ -20,6 +20,8 @@ typedef unsigned char _Bool;
|
|
20
20
|
#endif
|
21
21
|
#endif
|
22
22
|
|
23
|
+
#include "../simd/simd.h"
|
24
|
+
|
23
25
|
#ifndef RB_UNLIKELY
|
24
26
|
#define RB_UNLIKELY(expr) expr
|
25
27
|
#endif
|
@@ -35,7 +37,7 @@ static ID i_chr, i_aset, i_aref,
|
|
35
37
|
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
36
38
|
|
37
39
|
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
38
|
-
sym_decimal_class, sym_on_load;
|
40
|
+
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
39
41
|
|
40
42
|
static int binary_encindex;
|
41
43
|
static int utf8_encindex;
|
@@ -337,73 +339,6 @@ static size_t strnlen(const char *s, size_t maxlen)
|
|
337
339
|
}
|
338
340
|
#endif
|
339
341
|
|
340
|
-
#define PARSE_ERROR_FRAGMENT_LEN 32
|
341
|
-
#ifdef RBIMPL_ATTR_NORETURN
|
342
|
-
RBIMPL_ATTR_NORETURN()
|
343
|
-
#endif
|
344
|
-
static void raise_parse_error(const char *format, const char *start)
|
345
|
-
{
|
346
|
-
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
|
347
|
-
|
348
|
-
size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
|
349
|
-
const char *ptr = start;
|
350
|
-
|
351
|
-
if (len == PARSE_ERROR_FRAGMENT_LEN) {
|
352
|
-
MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
|
353
|
-
|
354
|
-
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
|
355
|
-
len--;
|
356
|
-
}
|
357
|
-
|
358
|
-
if (buffer[len - 1] >= 0xC0) { // multibyte character start
|
359
|
-
len--;
|
360
|
-
}
|
361
|
-
|
362
|
-
buffer[len] = '\0';
|
363
|
-
ptr = (const char *)buffer;
|
364
|
-
}
|
365
|
-
|
366
|
-
rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
|
367
|
-
}
|
368
|
-
|
369
|
-
/* unicode */
|
370
|
-
|
371
|
-
static const signed char digit_values[256] = {
|
372
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
373
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
374
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
|
375
|
-
-1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
|
376
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
377
|
-
10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
378
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
379
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
380
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
381
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
382
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
383
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
384
|
-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
385
|
-
-1, -1, -1, -1, -1, -1, -1
|
386
|
-
};
|
387
|
-
|
388
|
-
static uint32_t unescape_unicode(const unsigned char *p)
|
389
|
-
{
|
390
|
-
signed char b;
|
391
|
-
uint32_t result = 0;
|
392
|
-
b = digit_values[p[0]];
|
393
|
-
if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
|
394
|
-
result = (result << 4) | (unsigned char)b;
|
395
|
-
b = digit_values[p[1]];
|
396
|
-
if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
|
397
|
-
result = (result << 4) | (unsigned char)b;
|
398
|
-
b = digit_values[p[2]];
|
399
|
-
if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
|
400
|
-
result = (result << 4) | (unsigned char)b;
|
401
|
-
b = digit_values[p[3]];
|
402
|
-
if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
|
403
|
-
result = (result << 4) | (unsigned char)b;
|
404
|
-
return result;
|
405
|
-
}
|
406
|
-
|
407
342
|
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
408
343
|
{
|
409
344
|
int len = 1;
|
@@ -430,10 +365,17 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
430
365
|
return len;
|
431
366
|
}
|
432
367
|
|
368
|
+
enum duplicate_key_action {
|
369
|
+
JSON_DEPRECATED = 0,
|
370
|
+
JSON_IGNORE,
|
371
|
+
JSON_RAISE,
|
372
|
+
};
|
373
|
+
|
433
374
|
typedef struct JSON_ParserStruct {
|
434
375
|
VALUE on_load_proc;
|
435
376
|
VALUE decimal_class;
|
436
377
|
ID decimal_method_id;
|
378
|
+
enum duplicate_key_action on_duplicate_key;
|
437
379
|
int max_nesting;
|
438
380
|
bool allow_nan;
|
439
381
|
bool allow_trailing_comma;
|
@@ -444,6 +386,7 @@ typedef struct JSON_ParserStruct {
|
|
444
386
|
|
445
387
|
typedef struct JSON_ParserStateStruct {
|
446
388
|
VALUE stack_handle;
|
389
|
+
const char *start;
|
447
390
|
const char *cursor;
|
448
391
|
const char *end;
|
449
392
|
rvalue_stack *stack;
|
@@ -452,6 +395,133 @@ typedef struct JSON_ParserStateStruct {
|
|
452
395
|
int current_nesting;
|
453
396
|
} JSON_ParserState;
|
454
397
|
|
398
|
+
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
399
|
+
{
|
400
|
+
const char *cursor = state->cursor;
|
401
|
+
long column = 0;
|
402
|
+
long line = 1;
|
403
|
+
|
404
|
+
while (cursor >= state->start) {
|
405
|
+
if (*cursor-- == '\n') {
|
406
|
+
break;
|
407
|
+
}
|
408
|
+
column++;
|
409
|
+
}
|
410
|
+
|
411
|
+
while (cursor >= state->start) {
|
412
|
+
if (*cursor-- == '\n') {
|
413
|
+
line++;
|
414
|
+
}
|
415
|
+
}
|
416
|
+
*line_out = line;
|
417
|
+
*column_out = column;
|
418
|
+
}
|
419
|
+
|
420
|
+
static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
421
|
+
{
|
422
|
+
long line, column;
|
423
|
+
cursor_position(state, &line, &column);
|
424
|
+
|
425
|
+
rb_warn("%s at line %ld column %ld", message, line, column);
|
426
|
+
}
|
427
|
+
|
428
|
+
#define PARSE_ERROR_FRAGMENT_LEN 32
|
429
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
430
|
+
RBIMPL_ATTR_NORETURN()
|
431
|
+
#endif
|
432
|
+
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
433
|
+
{
|
434
|
+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
435
|
+
long line, column;
|
436
|
+
cursor_position(state, &line, &column);
|
437
|
+
|
438
|
+
const char *ptr = "EOF";
|
439
|
+
if (state->cursor && state->cursor < state->end) {
|
440
|
+
ptr = state->cursor;
|
441
|
+
size_t len = 0;
|
442
|
+
while (len < PARSE_ERROR_FRAGMENT_LEN) {
|
443
|
+
char ch = ptr[len];
|
444
|
+
if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
|
445
|
+
break;
|
446
|
+
}
|
447
|
+
len++;
|
448
|
+
}
|
449
|
+
|
450
|
+
if (len) {
|
451
|
+
buffer[0] = '\'';
|
452
|
+
MEMCPY(buffer + 1, ptr, char, len);
|
453
|
+
|
454
|
+
while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
|
455
|
+
len--;
|
456
|
+
}
|
457
|
+
|
458
|
+
if (buffer[len] >= 0xC0) { // multibyte character start
|
459
|
+
len--;
|
460
|
+
}
|
461
|
+
|
462
|
+
buffer[len + 1] = '\'';
|
463
|
+
buffer[len + 2] = '\0';
|
464
|
+
ptr = (const char *)buffer;
|
465
|
+
}
|
466
|
+
}
|
467
|
+
|
468
|
+
VALUE msg = rb_sprintf(format, ptr);
|
469
|
+
VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
|
470
|
+
RB_GC_GUARD(msg);
|
471
|
+
|
472
|
+
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
473
|
+
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
474
|
+
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
475
|
+
rb_exc_raise(exc);
|
476
|
+
}
|
477
|
+
|
478
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
479
|
+
RBIMPL_ATTR_NORETURN()
|
480
|
+
#endif
|
481
|
+
static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
482
|
+
{
|
483
|
+
state->cursor = at;
|
484
|
+
raise_parse_error(format, state);
|
485
|
+
}
|
486
|
+
|
487
|
+
/* unicode */
|
488
|
+
|
489
|
+
static const signed char digit_values[256] = {
|
490
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
491
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
492
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
|
493
|
+
-1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
|
494
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
495
|
+
10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
496
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
497
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
498
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
499
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
500
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
501
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
502
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
503
|
+
-1, -1, -1, -1, -1, -1, -1
|
504
|
+
};
|
505
|
+
|
506
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
|
507
|
+
{
|
508
|
+
signed char b;
|
509
|
+
uint32_t result = 0;
|
510
|
+
b = digit_values[p[0]];
|
511
|
+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
512
|
+
result = (result << 4) | (unsigned char)b;
|
513
|
+
b = digit_values[p[1]];
|
514
|
+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
515
|
+
result = (result << 4) | (unsigned char)b;
|
516
|
+
b = digit_values[p[2]];
|
517
|
+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
518
|
+
result = (result << 4) | (unsigned char)b;
|
519
|
+
b = digit_values[p[3]];
|
520
|
+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
|
521
|
+
result = (result << 4) | (unsigned char)b;
|
522
|
+
return result;
|
523
|
+
}
|
524
|
+
|
455
525
|
#define GET_PARSER_CONFIG \
|
456
526
|
JSON_ParserConfig *config; \
|
457
527
|
TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
|
@@ -470,7 +540,7 @@ static void
|
|
470
540
|
json_eat_comments(JSON_ParserState *state)
|
471
541
|
{
|
472
542
|
if (state->cursor + 1 < state->end) {
|
473
|
-
switch(state->cursor[1]) {
|
543
|
+
switch (state->cursor[1]) {
|
474
544
|
case '/': {
|
475
545
|
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
476
546
|
if (!state->cursor) {
|
@@ -485,8 +555,7 @@ json_eat_comments(JSON_ParserState *state)
|
|
485
555
|
while (true) {
|
486
556
|
state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
|
487
557
|
if (!state->cursor) {
|
488
|
-
|
489
|
-
raise_parse_error("unexpected end of input, expected closing '*/'", state->cursor);
|
558
|
+
raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
|
490
559
|
} else {
|
491
560
|
state->cursor++;
|
492
561
|
if (state->cursor < state->end && *state->cursor == '/') {
|
@@ -498,11 +567,11 @@ json_eat_comments(JSON_ParserState *state)
|
|
498
567
|
break;
|
499
568
|
}
|
500
569
|
default:
|
501
|
-
raise_parse_error("unexpected token
|
570
|
+
raise_parse_error("unexpected token %s", state);
|
502
571
|
break;
|
503
572
|
}
|
504
573
|
} else {
|
505
|
-
raise_parse_error("unexpected token
|
574
|
+
raise_parse_error("unexpected token %s", state);
|
506
575
|
}
|
507
576
|
}
|
508
577
|
|
@@ -621,9 +690,9 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
621
690
|
break;
|
622
691
|
case 'u':
|
623
692
|
if (pe > stringEnd - 5) {
|
624
|
-
|
693
|
+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
|
625
694
|
} else {
|
626
|
-
uint32_t ch = unescape_unicode((unsigned char *) ++pe);
|
695
|
+
uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
|
627
696
|
pe += 3;
|
628
697
|
/* To handle values above U+FFFF, we take a sequence of
|
629
698
|
* \uXXXX escapes in the U+D800..U+DBFF then
|
@@ -638,10 +707,10 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
638
707
|
if ((ch & 0xFC00) == 0xD800) {
|
639
708
|
pe++;
|
640
709
|
if (pe > stringEnd - 6) {
|
641
|
-
|
710
|
+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
|
642
711
|
}
|
643
712
|
if (pe[0] == '\\' && pe[1] == 'u') {
|
644
|
-
uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
|
713
|
+
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
645
714
|
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
646
715
|
| (sur & 0x3FF));
|
647
716
|
pe += 5;
|
@@ -761,11 +830,25 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
|
|
761
830
|
return array;
|
762
831
|
}
|
763
832
|
|
764
|
-
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config,
|
833
|
+
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
765
834
|
{
|
766
|
-
|
835
|
+
size_t entries_count = count / 2;
|
836
|
+
VALUE object = rb_hash_new_capa(entries_count);
|
767
837
|
rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
|
768
838
|
|
839
|
+
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
|
840
|
+
switch (config->on_duplicate_key) {
|
841
|
+
case JSON_IGNORE:
|
842
|
+
break;
|
843
|
+
case JSON_DEPRECATED:
|
844
|
+
emit_parse_warning("detected duplicate keys in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`", state);
|
845
|
+
break;
|
846
|
+
case JSON_RAISE:
|
847
|
+
raise_parse_error("duplicate key", state);
|
848
|
+
break;
|
849
|
+
}
|
850
|
+
}
|
851
|
+
|
769
852
|
rvalue_stack_pop(state->stack, count);
|
770
853
|
|
771
854
|
if (config->freeze) {
|
@@ -798,7 +881,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
|
|
798
881
|
return value;
|
799
882
|
}
|
800
883
|
|
801
|
-
static const bool
|
884
|
+
static const bool string_scan_table[256] = {
|
802
885
|
// ASCII Control Characters
|
803
886
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
804
887
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
@@ -811,38 +894,77 @@ static const bool string_scan[256] = {
|
|
811
894
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
812
895
|
};
|
813
896
|
|
897
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
898
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
899
|
+
#else
|
900
|
+
#define FORCE_INLINE
|
901
|
+
#endif
|
902
|
+
|
903
|
+
#ifdef HAVE_SIMD
|
904
|
+
static SIMD_Implementation simd_impl = SIMD_NONE;
|
905
|
+
#endif /* HAVE_SIMD */
|
906
|
+
|
907
|
+
static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
908
|
+
{
|
909
|
+
#ifdef HAVE_SIMD
|
910
|
+
#if defined(HAVE_SIMD_NEON)
|
911
|
+
|
912
|
+
uint64_t mask = 0;
|
913
|
+
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
914
|
+
state->cursor += trailing_zeros64(mask) >> 2;
|
915
|
+
return 1;
|
916
|
+
}
|
917
|
+
|
918
|
+
#elif defined(HAVE_SIMD_SSE2)
|
919
|
+
if (simd_impl == SIMD_SSE2) {
|
920
|
+
int mask = 0;
|
921
|
+
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
922
|
+
state->cursor += trailing_zeros(mask);
|
923
|
+
return 1;
|
924
|
+
}
|
925
|
+
}
|
926
|
+
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
927
|
+
#endif /* HAVE_SIMD */
|
928
|
+
|
929
|
+
while (state->cursor < state->end) {
|
930
|
+
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
931
|
+
return 1;
|
932
|
+
}
|
933
|
+
*state->cursor++;
|
934
|
+
}
|
935
|
+
return 0;
|
936
|
+
}
|
937
|
+
|
814
938
|
static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
815
939
|
{
|
816
940
|
state->cursor++;
|
817
941
|
const char *start = state->cursor;
|
818
942
|
bool escaped = false;
|
819
943
|
|
820
|
-
while (state
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
|
833
|
-
}
|
834
|
-
break;
|
944
|
+
while (RB_UNLIKELY(string_scan(state))) {
|
945
|
+
switch (*state->cursor) {
|
946
|
+
case '"': {
|
947
|
+
VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
|
948
|
+
state->cursor++;
|
949
|
+
return json_push_value(state, config, string);
|
950
|
+
}
|
951
|
+
case '\\': {
|
952
|
+
state->cursor++;
|
953
|
+
escaped = true;
|
954
|
+
if ((unsigned char)*state->cursor < 0x20) {
|
955
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
835
956
|
}
|
836
|
-
|
837
|
-
raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
|
838
|
-
break;
|
957
|
+
break;
|
839
958
|
}
|
959
|
+
default:
|
960
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
961
|
+
break;
|
840
962
|
}
|
841
963
|
|
842
964
|
state->cursor++;
|
843
965
|
}
|
844
966
|
|
845
|
-
raise_parse_error("unexpected end of input, expected closing \"", state
|
967
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
846
968
|
return Qfalse;
|
847
969
|
}
|
848
970
|
|
@@ -850,7 +972,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
850
972
|
{
|
851
973
|
json_eat_whitespace(state);
|
852
974
|
if (state->cursor >= state->end) {
|
853
|
-
raise_parse_error("unexpected end of input", state
|
975
|
+
raise_parse_error("unexpected end of input", state);
|
854
976
|
}
|
855
977
|
|
856
978
|
switch (*state->cursor) {
|
@@ -860,7 +982,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
860
982
|
return json_push_value(state, config, Qnil);
|
861
983
|
}
|
862
984
|
|
863
|
-
raise_parse_error("unexpected token
|
985
|
+
raise_parse_error("unexpected token %s", state);
|
864
986
|
break;
|
865
987
|
case 't':
|
866
988
|
if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
|
@@ -868,7 +990,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
868
990
|
return json_push_value(state, config, Qtrue);
|
869
991
|
}
|
870
992
|
|
871
|
-
raise_parse_error("unexpected token
|
993
|
+
raise_parse_error("unexpected token %s", state);
|
872
994
|
break;
|
873
995
|
case 'f':
|
874
996
|
// Note: memcmp with a small power of two compile to an integer comparison
|
@@ -877,7 +999,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
877
999
|
return json_push_value(state, config, Qfalse);
|
878
1000
|
}
|
879
1001
|
|
880
|
-
raise_parse_error("unexpected token
|
1002
|
+
raise_parse_error("unexpected token %s", state);
|
881
1003
|
break;
|
882
1004
|
case 'N':
|
883
1005
|
// Note: memcmp with a small power of two compile to an integer comparison
|
@@ -886,7 +1008,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
886
1008
|
return json_push_value(state, config, CNaN);
|
887
1009
|
}
|
888
1010
|
|
889
|
-
raise_parse_error("unexpected token
|
1011
|
+
raise_parse_error("unexpected token %s", state);
|
890
1012
|
break;
|
891
1013
|
case 'I':
|
892
1014
|
if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
@@ -894,7 +1016,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
894
1016
|
return json_push_value(state, config, CInfinity);
|
895
1017
|
}
|
896
1018
|
|
897
|
-
raise_parse_error("unexpected token
|
1019
|
+
raise_parse_error("unexpected token %s", state);
|
898
1020
|
break;
|
899
1021
|
case '-':
|
900
1022
|
// Note: memcmp with a small power of two compile to an integer comparison
|
@@ -903,7 +1025,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
903
1025
|
state->cursor += 9;
|
904
1026
|
return json_push_value(state, config, CMinusInfinity);
|
905
1027
|
} else {
|
906
|
-
raise_parse_error("unexpected token
|
1028
|
+
raise_parse_error("unexpected token %s", state);
|
907
1029
|
}
|
908
1030
|
}
|
909
1031
|
// Fallthrough
|
@@ -921,11 +1043,11 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
921
1043
|
long integer_length = state->cursor - start;
|
922
1044
|
|
923
1045
|
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
924
|
-
|
1046
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
925
1047
|
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
926
|
-
|
1048
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
927
1049
|
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
928
|
-
|
1050
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
929
1051
|
}
|
930
1052
|
|
931
1053
|
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
@@ -933,7 +1055,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
933
1055
|
state->cursor++;
|
934
1056
|
|
935
1057
|
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
936
|
-
raise_parse_error("invalid number: %s", state
|
1058
|
+
raise_parse_error("invalid number: %s", state);
|
937
1059
|
}
|
938
1060
|
|
939
1061
|
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
@@ -949,7 +1071,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
949
1071
|
}
|
950
1072
|
|
951
1073
|
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
952
|
-
raise_parse_error("invalid number: %s", state
|
1074
|
+
raise_parse_error("invalid number: %s", state);
|
953
1075
|
}
|
954
1076
|
|
955
1077
|
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
@@ -1009,11 +1131,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1009
1131
|
}
|
1010
1132
|
}
|
1011
1133
|
|
1012
|
-
raise_parse_error("expected ',' or ']' after array value", state
|
1134
|
+
raise_parse_error("expected ',' or ']' after array value", state);
|
1013
1135
|
}
|
1014
1136
|
break;
|
1015
1137
|
}
|
1016
1138
|
case '{': {
|
1139
|
+
const char *object_start_cursor = state->cursor;
|
1140
|
+
|
1017
1141
|
state->cursor++;
|
1018
1142
|
json_eat_whitespace(state);
|
1019
1143
|
long stack_head = state->stack->head;
|
@@ -1028,13 +1152,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1028
1152
|
}
|
1029
1153
|
|
1030
1154
|
if (*state->cursor != '"') {
|
1031
|
-
raise_parse_error("expected object key, got
|
1155
|
+
raise_parse_error("expected object key, got %s", state);
|
1032
1156
|
}
|
1033
1157
|
json_parse_string(state, config, true);
|
1034
1158
|
|
1035
1159
|
json_eat_whitespace(state);
|
1036
1160
|
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
|
1037
|
-
raise_parse_error("expected ':' after object key", state
|
1161
|
+
raise_parse_error("expected ':' after object key", state);
|
1038
1162
|
}
|
1039
1163
|
state->cursor++;
|
1040
1164
|
|
@@ -1048,8 +1172,15 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1048
1172
|
if (*state->cursor == '}') {
|
1049
1173
|
state->cursor++;
|
1050
1174
|
state->current_nesting--;
|
1051
|
-
|
1052
|
-
|
1175
|
+
size_t count = state->stack->head - stack_head;
|
1176
|
+
|
1177
|
+
// Temporary rewind cursor in case an error is raised
|
1178
|
+
const char *final_cursor = state->cursor;
|
1179
|
+
state->cursor = object_start_cursor;
|
1180
|
+
VALUE object = json_decode_object(state, config, count);
|
1181
|
+
state->cursor = final_cursor;
|
1182
|
+
|
1183
|
+
return json_push_value(state, config, object);
|
1053
1184
|
}
|
1054
1185
|
|
1055
1186
|
if (*state->cursor == ',') {
|
@@ -1063,13 +1194,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1063
1194
|
}
|
1064
1195
|
|
1065
1196
|
if (*state->cursor != '"') {
|
1066
|
-
raise_parse_error("expected object key, got:
|
1197
|
+
raise_parse_error("expected object key, got: %s", state);
|
1067
1198
|
}
|
1068
1199
|
json_parse_string(state, config, true);
|
1069
1200
|
|
1070
1201
|
json_eat_whitespace(state);
|
1071
1202
|
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
|
1072
|
-
raise_parse_error("expected ':' after object key, got:
|
1203
|
+
raise_parse_error("expected ':' after object key, got: %s", state);
|
1073
1204
|
}
|
1074
1205
|
state->cursor++;
|
1075
1206
|
|
@@ -1079,24 +1210,24 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1079
1210
|
}
|
1080
1211
|
}
|
1081
1212
|
|
1082
|
-
raise_parse_error("expected ',' or '}' after object value, got:
|
1213
|
+
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
|
1083
1214
|
}
|
1084
1215
|
break;
|
1085
1216
|
}
|
1086
1217
|
|
1087
1218
|
default:
|
1088
|
-
raise_parse_error("unexpected character:
|
1219
|
+
raise_parse_error("unexpected character: %s", state);
|
1089
1220
|
break;
|
1090
1221
|
}
|
1091
1222
|
|
1092
|
-
raise_parse_error("unreacheable:
|
1223
|
+
raise_parse_error("unreacheable: %s", state);
|
1093
1224
|
}
|
1094
1225
|
|
1095
1226
|
static void json_ensure_eof(JSON_ParserState *state)
|
1096
1227
|
{
|
1097
1228
|
json_eat_whitespace(state);
|
1098
1229
|
if (state->cursor != state->end) {
|
1099
|
-
raise_parse_error("unexpected token at end of stream
|
1230
|
+
raise_parse_error("unexpected token at end of stream %s", state);
|
1100
1231
|
}
|
1101
1232
|
}
|
1102
1233
|
|
@@ -1138,6 +1269,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
1138
1269
|
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
1139
1270
|
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
1140
1271
|
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
1272
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
1141
1273
|
else if (key == sym_decimal_class) {
|
1142
1274
|
if (RTEST(val)) {
|
1143
1275
|
if (rb_respond_to(val, i_try_convert)) {
|
@@ -1232,9 +1364,14 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
|
|
1232
1364
|
.capa = RVALUE_STACK_INITIAL_CAPA,
|
1233
1365
|
};
|
1234
1366
|
|
1367
|
+
long len;
|
1368
|
+
const char *start;
|
1369
|
+
RSTRING_GETMEM(Vsource, start, len);
|
1370
|
+
|
1235
1371
|
JSON_ParserState _state = {
|
1236
|
-
.
|
1237
|
-
.
|
1372
|
+
.start = start,
|
1373
|
+
.cursor = start,
|
1374
|
+
.end = start + len,
|
1238
1375
|
.stack = &stack,
|
1239
1376
|
};
|
1240
1377
|
JSON_ParserState *state = &_state;
|
@@ -1349,6 +1486,7 @@ void Init_parser(void)
|
|
1349
1486
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
1350
1487
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
1351
1488
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
1489
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
1352
1490
|
|
1353
1491
|
i_chr = rb_intern("chr");
|
1354
1492
|
i_aset = rb_intern("[]=");
|
@@ -1362,4 +1500,8 @@ void Init_parser(void)
|
|
1362
1500
|
binary_encindex = rb_ascii8bit_encindex();
|
1363
1501
|
utf8_encindex = rb_utf8_encindex();
|
1364
1502
|
enc_utf8 = rb_utf8_encoding();
|
1503
|
+
|
1504
|
+
#ifdef HAVE_SIMD
|
1505
|
+
simd_impl = find_simd_implementation();
|
1506
|
+
#endif
|
1365
1507
|
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
case RbConfig::CONFIG['host_cpu']
|
2
|
+
when /^(arm|aarch64)/
|
3
|
+
# Try to compile a small program using NEON instructions
|
4
|
+
header, type, init = 'arm_neon.h', 'uint8x16_t', 'vdupq_n_u8(32)'
|
5
|
+
when /^(x86_64|x64)/
|
6
|
+
header, type, init = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)'
|
7
|
+
end
|
8
|
+
if header
|
9
|
+
have_header(header) && try_compile(<<~SRC)
|
10
|
+
#{cpp_include(header)}
|
11
|
+
int main(int argc, char **argv) {
|
12
|
+
#{type} test = #{init};
|
13
|
+
if (argc > 100000) printf("%p", &test);
|
14
|
+
return 0;
|
15
|
+
}
|
16
|
+
SRC
|
17
|
+
$defs.push("-DJSON_ENABLE_SIMD")
|
18
|
+
end
|
19
|
+
|
20
|
+
have_header('cpuid.h')
|