json 2.12.2 → 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +6 -0
- data/ext/json/ext/generator/extconf.rb +1 -25
- data/ext/json/ext/generator/generator.c +41 -90
- data/ext/json/ext/parser/extconf.rb +5 -1
- data/ext/json/ext/parser/parser.c +124 -33
- data/ext/json/ext/simd/conf.rb +20 -0
- data/ext/json/ext/simd/simd.h +187 -0
- data/json.gemspec +2 -3
- data/lib/json/common.rb +1 -1
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +33 -0
- metadata +4 -3
- data/ext/json/ext/generator/simd.h +0 -112
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 41dcbe399cb5dd00e62d93c87f31b356674c1feb9430306902009ebf8f56bd9a
|
4
|
+
data.tar.gz: f398e819143dc90c162474b5c97241d7a8c8c8209d5a5512c1b081d72a29192a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff828416dfb1f4a6ffcb51e02827d948b6c566ce008799cc9a02afb538b8d9a32ff1d6eee5c4e0e609757d1e9cfe1332ea359049a3fd820b199bdc7931a2573e
|
7
|
+
data.tar.gz: e5ea4c5bd447d2ed5c37b144219cd0cbed1474a5c7976f63b938996687cfc7f422e4e147582e6fee0a2d3f4ba09937496c23f08c42e2a525744a9246bcfcb5df
|
data/CHANGES.md
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
### Unreleased
|
4
4
|
|
5
|
+
### 2025-05-23 (2.13.0)
|
6
|
+
|
7
|
+
* Add new `allow_duplicate_key` parsing options. By default a warning is now emitted when a duplicated key is encountered.
|
8
|
+
In `json 3.0` an error will be raised.
|
9
|
+
* Optimize parsing further using SIMD to scan strings.
|
10
|
+
|
5
11
|
### 2025-05-23 (2.12.2)
|
6
12
|
|
7
13
|
* Fix compiler optimization level.
|
@@ -9,31 +9,7 @@ else
|
|
9
9
|
$defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"]
|
10
10
|
|
11
11
|
if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
|
12
|
-
|
13
|
-
# Try to compile a small program using NEON instructions
|
14
|
-
if have_header('arm_neon.h')
|
15
|
-
have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
|
16
|
-
#include <arm_neon.h>
|
17
|
-
int main() {
|
18
|
-
uint8x16_t test = vdupq_n_u8(32);
|
19
|
-
return 0;
|
20
|
-
}
|
21
|
-
SRC
|
22
|
-
$defs.push("-DJSON_ENABLE_SIMD")
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC')
|
27
|
-
#include <x86intrin.h>
|
28
|
-
int main() {
|
29
|
-
__m128i test = _mm_set1_epi8(32);
|
30
|
-
return 0;
|
31
|
-
}
|
32
|
-
SRC
|
33
|
-
$defs.push("-DJSON_ENABLE_SIMD")
|
34
|
-
end
|
35
|
-
|
36
|
-
have_header('cpuid.h')
|
12
|
+
load __dir__ + "/../simd/conf.rb"
|
37
13
|
end
|
38
14
|
|
39
15
|
create_makefile 'json/ext/generator'
|
@@ -5,7 +5,7 @@
|
|
5
5
|
#include <math.h>
|
6
6
|
#include <ctype.h>
|
7
7
|
|
8
|
-
#include "simd.h"
|
8
|
+
#include "../simd/simd.h"
|
9
9
|
|
10
10
|
/* ruby api and some helpers */
|
11
11
|
|
@@ -304,28 +304,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
|
304
304
|
return 1;
|
305
305
|
}
|
306
306
|
|
307
|
-
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
308
|
-
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
309
|
-
{
|
310
|
-
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
311
|
-
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
312
|
-
return mask & 0x8888888888888888ull;
|
313
|
-
}
|
314
|
-
|
315
|
-
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
|
316
|
-
{
|
317
|
-
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
318
|
-
|
319
|
-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
320
|
-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
321
|
-
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
322
|
-
|
323
|
-
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
324
|
-
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
325
|
-
|
326
|
-
return neon_match_mask(needs_escape);
|
327
|
-
}
|
328
|
-
|
329
307
|
static inline unsigned char search_escape_basic_neon(search_state *search)
|
330
308
|
{
|
331
309
|
if (RB_UNLIKELY(search->has_matches)) {
|
@@ -333,7 +311,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
333
311
|
if (search->matches_mask > 0) {
|
334
312
|
return neon_next_match(search);
|
335
313
|
} else {
|
336
|
-
// neon_next_match will only advance search->ptr up to the last matching character.
|
314
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
337
315
|
// Skip over any characters in the last chunk that occur after the last match.
|
338
316
|
search->has_matches = false;
|
339
317
|
search->ptr = search->chunk_end;
|
@@ -342,67 +320,61 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
342
320
|
|
343
321
|
/*
|
344
322
|
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
345
|
-
* need to be escaped.
|
346
|
-
*
|
323
|
+
* need to be escaped.
|
324
|
+
*
|
347
325
|
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
348
|
-
*
|
326
|
+
*
|
349
327
|
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
350
328
|
* the vector insructions may work on larger vectors.
|
351
|
-
*
|
329
|
+
*
|
352
330
|
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
353
|
-
*
|
354
|
-
* lower_bound: [20 20 20 20 20 20 20 20]
|
355
|
-
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
356
|
-
* dblquote: [22 22 22 22 22 22 22 22]
|
357
|
-
*
|
358
|
-
* Next we load the first chunk of the ptr:
|
331
|
+
*
|
332
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
333
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
334
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
335
|
+
*
|
336
|
+
* Next we load the first chunk of the ptr:
|
359
337
|
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
360
|
-
*
|
338
|
+
*
|
361
339
|
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
362
340
|
* as no bytes are less than 32 (0x20):
|
363
341
|
* [0 0 0 0 0 0 0 0]
|
364
|
-
*
|
342
|
+
*
|
365
343
|
* Next, we check if any byte in chunk is equal to a backslash:
|
366
344
|
* [0 0 0 FF 0 0 0 0]
|
367
|
-
*
|
345
|
+
*
|
368
346
|
* Finally we check if any byte in chunk is equal to a double quote:
|
369
|
-
* [FF 0 0 0 0 0 0 0]
|
370
|
-
*
|
347
|
+
* [FF 0 0 0 0 0 0 0]
|
348
|
+
*
|
371
349
|
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
372
350
|
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
373
351
|
* This is the needs_escape vector and it is equal to:
|
374
|
-
* [FF 0 0 FF 0 0 0 0]
|
375
|
-
*
|
352
|
+
* [FF 0 0 FF 0 0 0 0]
|
353
|
+
*
|
376
354
|
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
377
355
|
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
378
|
-
*
|
356
|
+
*
|
379
357
|
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
380
358
|
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
381
359
|
* have at least one byte that needs to be escaped.
|
382
360
|
*/
|
383
|
-
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
|
384
|
-
uint64_t mask = neon_rules_update(search->ptr);
|
385
361
|
|
386
|
-
|
387
|
-
search->ptr += sizeof(uint8x16_t);
|
388
|
-
continue;
|
389
|
-
}
|
390
|
-
search->matches_mask = mask;
|
362
|
+
if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
|
391
363
|
search->has_matches = true;
|
392
364
|
search->chunk_base = search->ptr;
|
393
365
|
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
394
366
|
return neon_next_match(search);
|
395
367
|
}
|
396
368
|
|
397
|
-
// There are fewer than 16 bytes left.
|
369
|
+
// There are fewer than 16 bytes left.
|
398
370
|
unsigned long remaining = (search->end - search->ptr);
|
399
371
|
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
400
372
|
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
401
373
|
|
402
|
-
uint64_t mask =
|
374
|
+
uint64_t mask = compute_chunk_mask_neon(s);
|
403
375
|
|
404
376
|
if (!mask) {
|
405
|
-
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
377
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
406
378
|
// search->cursor to search->ptr.
|
407
379
|
fbuffer_consumed(search->buffer, remaining);
|
408
380
|
search->ptr = search->end;
|
@@ -428,11 +400,6 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
428
400
|
|
429
401
|
#ifdef HAVE_SIMD_SSE2
|
430
402
|
|
431
|
-
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
|
432
|
-
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
433
|
-
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
434
|
-
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
435
|
-
|
436
403
|
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
437
404
|
{
|
438
405
|
int mask = search->matches_mask;
|
@@ -457,18 +424,6 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
|
457
424
|
#define TARGET_SSE2
|
458
425
|
#endif
|
459
426
|
|
460
|
-
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
|
461
|
-
{
|
462
|
-
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
463
|
-
|
464
|
-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
465
|
-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
466
|
-
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
467
|
-
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
468
|
-
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
469
|
-
return _mm_movemask_epi8(needs_escape);
|
470
|
-
}
|
471
|
-
|
472
427
|
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
473
428
|
{
|
474
429
|
if (RB_UNLIKELY(search->has_matches)) {
|
@@ -476,7 +431,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
|
|
476
431
|
if (search->matches_mask > 0) {
|
477
432
|
return sse2_next_match(search);
|
478
433
|
} else {
|
479
|
-
// sse2_next_match will only advance search->ptr up to the last matching character.
|
434
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
480
435
|
// Skip over any characters in the last chunk that occur after the last match.
|
481
436
|
search->has_matches = false;
|
482
437
|
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
@@ -487,29 +442,22 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
|
|
487
442
|
}
|
488
443
|
}
|
489
444
|
|
490
|
-
|
491
|
-
int needs_escape_mask = sse2_update(search->ptr);
|
492
|
-
|
493
|
-
if (needs_escape_mask == 0) {
|
494
|
-
search->ptr += sizeof(__m128i);
|
495
|
-
continue;
|
496
|
-
}
|
497
|
-
|
445
|
+
if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
|
498
446
|
search->has_matches = true;
|
499
|
-
search->matches_mask = needs_escape_mask;
|
500
447
|
search->chunk_base = search->ptr;
|
448
|
+
search->chunk_end = search->ptr + sizeof(__m128i);
|
501
449
|
return sse2_next_match(search);
|
502
450
|
}
|
503
451
|
|
504
|
-
// There are fewer than 16 bytes left.
|
452
|
+
// There are fewer than 16 bytes left.
|
505
453
|
unsigned long remaining = (search->end - search->ptr);
|
506
454
|
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
507
455
|
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
508
456
|
|
509
|
-
int needs_escape_mask =
|
457
|
+
int needs_escape_mask = compute_chunk_mask_sse2(s);
|
510
458
|
|
511
459
|
if (needs_escape_mask == 0) {
|
512
|
-
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
460
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
513
461
|
// search->cursor to search->ptr.
|
514
462
|
fbuffer_consumed(search->buffer, remaining);
|
515
463
|
search->ptr = search->end;
|
@@ -638,7 +586,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
|
|
638
586
|
return 0;
|
639
587
|
}
|
640
588
|
|
641
|
-
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
589
|
+
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
590
|
+
{
|
642
591
|
const unsigned char ch = (unsigned char)*search->ptr;
|
643
592
|
switch (ch_len) {
|
644
593
|
case 1: {
|
@@ -668,7 +617,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
|
|
668
617
|
|
669
618
|
uint32_t wchar = 0;
|
670
619
|
|
671
|
-
switch(ch_len) {
|
620
|
+
switch (ch_len) {
|
672
621
|
case 2:
|
673
622
|
wchar = ch & 0x1F;
|
674
623
|
break;
|
@@ -828,7 +777,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
|
|
828
777
|
* _state_ is a JSON::State object, that can also be used to configure the
|
829
778
|
* produced JSON string output further.
|
830
779
|
*/
|
831
|
-
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
780
|
+
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
781
|
+
{
|
832
782
|
rb_check_arity(argc, 0, 1);
|
833
783
|
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
834
784
|
return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
|
@@ -890,7 +840,8 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
|
|
890
840
|
*
|
891
841
|
* Extends _modul_ with the String::Extend module.
|
892
842
|
*/
|
893
|
-
static VALUE mString_included_s(VALUE self, VALUE modul)
|
843
|
+
static VALUE mString_included_s(VALUE self, VALUE modul)
|
844
|
+
{
|
894
845
|
VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
|
895
846
|
rb_call_super(1, &modul);
|
896
847
|
return result;
|
@@ -1135,7 +1086,7 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
1135
1086
|
}
|
1136
1087
|
|
1137
1088
|
VALUE key_to_s;
|
1138
|
-
switch(rb_type(key)) {
|
1089
|
+
switch (rb_type(key)) {
|
1139
1090
|
case T_STRING:
|
1140
1091
|
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
1141
1092
|
key_to_s = key;
|
@@ -1219,7 +1170,7 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
|
|
1219
1170
|
|
1220
1171
|
fbuffer_append_char(buffer, '[');
|
1221
1172
|
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
1222
|
-
for(i = 0; i < RARRAY_LEN(obj); i++) {
|
1173
|
+
for (i = 0; i < RARRAY_LEN(obj); i++) {
|
1223
1174
|
if (i > 0) {
|
1224
1175
|
fbuffer_append_char(buffer, ',');
|
1225
1176
|
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
@@ -1304,7 +1255,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
|
|
1304
1255
|
search.chunk_base = NULL;
|
1305
1256
|
#endif /* HAVE_SIMD */
|
1306
1257
|
|
1307
|
-
switch(rb_enc_str_coderange(obj)) {
|
1258
|
+
switch (rb_enc_str_coderange(obj)) {
|
1308
1259
|
case ENC_CODERANGE_7BIT:
|
1309
1260
|
case ENC_CODERANGE_VALID:
|
1310
1261
|
if (RB_UNLIKELY(data->state->ascii_only)) {
|
@@ -2168,7 +2119,7 @@ void Init_generator(void)
|
|
2168
2119
|
rb_require("json/ext/generator/state");
|
2169
2120
|
|
2170
2121
|
|
2171
|
-
switch(find_simd_implementation()) {
|
2122
|
+
switch (find_simd_implementation()) {
|
2172
2123
|
#ifdef HAVE_SIMD
|
2173
2124
|
#ifdef HAVE_SIMD_NEON
|
2174
2125
|
case SIMD_NEON:
|
@@ -1,11 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require 'mkmf'
|
3
3
|
|
4
|
-
have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0
|
4
|
+
have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0
|
5
5
|
have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2
|
6
6
|
have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby
|
7
7
|
have_func("strnlen", "string.h") # Missing on Solaris 10
|
8
8
|
|
9
9
|
append_cflags("-std=c99")
|
10
10
|
|
11
|
+
if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
|
12
|
+
load __dir__ + "/../simd/conf.rb"
|
13
|
+
end
|
14
|
+
|
11
15
|
create_makefile 'json/ext/parser'
|
@@ -20,6 +20,8 @@ typedef unsigned char _Bool;
|
|
20
20
|
#endif
|
21
21
|
#endif
|
22
22
|
|
23
|
+
#include "../simd/simd.h"
|
24
|
+
|
23
25
|
#ifndef RB_UNLIKELY
|
24
26
|
#define RB_UNLIKELY(expr) expr
|
25
27
|
#endif
|
@@ -35,7 +37,7 @@ static ID i_chr, i_aset, i_aref,
|
|
35
37
|
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
|
36
38
|
|
37
39
|
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
|
38
|
-
sym_decimal_class, sym_on_load;
|
40
|
+
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
|
39
41
|
|
40
42
|
static int binary_encindex;
|
41
43
|
static int utf8_encindex;
|
@@ -363,10 +365,17 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
|
363
365
|
return len;
|
364
366
|
}
|
365
367
|
|
368
|
+
enum duplicate_key_action {
|
369
|
+
JSON_DEPRECATED = 0,
|
370
|
+
JSON_IGNORE,
|
371
|
+
JSON_RAISE,
|
372
|
+
};
|
373
|
+
|
366
374
|
typedef struct JSON_ParserStruct {
|
367
375
|
VALUE on_load_proc;
|
368
376
|
VALUE decimal_class;
|
369
377
|
ID decimal_method_id;
|
378
|
+
enum duplicate_key_action on_duplicate_key;
|
370
379
|
int max_nesting;
|
371
380
|
bool allow_nan;
|
372
381
|
bool allow_trailing_comma;
|
@@ -386,15 +395,8 @@ typedef struct JSON_ParserStateStruct {
|
|
386
395
|
int current_nesting;
|
387
396
|
} JSON_ParserState;
|
388
397
|
|
389
|
-
|
390
|
-
#define PARSE_ERROR_FRAGMENT_LEN 32
|
391
|
-
#ifdef RBIMPL_ATTR_NORETURN
|
392
|
-
RBIMPL_ATTR_NORETURN()
|
393
|
-
#endif
|
394
|
-
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
398
|
+
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
|
395
399
|
{
|
396
|
-
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
397
|
-
|
398
400
|
const char *cursor = state->cursor;
|
399
401
|
long column = 0;
|
400
402
|
long line = 1;
|
@@ -411,6 +413,27 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
|
|
411
413
|
line++;
|
412
414
|
}
|
413
415
|
}
|
416
|
+
*line_out = line;
|
417
|
+
*column_out = column;
|
418
|
+
}
|
419
|
+
|
420
|
+
static void emit_parse_warning(const char *message, JSON_ParserState *state)
|
421
|
+
{
|
422
|
+
long line, column;
|
423
|
+
cursor_position(state, &line, &column);
|
424
|
+
|
425
|
+
rb_warn("%s at line %ld column %ld", message, line, column);
|
426
|
+
}
|
427
|
+
|
428
|
+
#define PARSE_ERROR_FRAGMENT_LEN 32
|
429
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
430
|
+
RBIMPL_ATTR_NORETURN()
|
431
|
+
#endif
|
432
|
+
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
433
|
+
{
|
434
|
+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
|
435
|
+
long line, column;
|
436
|
+
cursor_position(state, &line, &column);
|
414
437
|
|
415
438
|
const char *ptr = "EOF";
|
416
439
|
if (state->cursor && state->cursor < state->end) {
|
@@ -517,7 +540,7 @@ static void
|
|
517
540
|
json_eat_comments(JSON_ParserState *state)
|
518
541
|
{
|
519
542
|
if (state->cursor + 1 < state->end) {
|
520
|
-
switch(state->cursor[1]) {
|
543
|
+
switch (state->cursor[1]) {
|
521
544
|
case '/': {
|
522
545
|
state->cursor = memchr(state->cursor, '\n', state->end - state->cursor);
|
523
546
|
if (!state->cursor) {
|
@@ -807,11 +830,25 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
|
|
807
830
|
return array;
|
808
831
|
}
|
809
832
|
|
810
|
-
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config,
|
833
|
+
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
|
811
834
|
{
|
812
|
-
|
835
|
+
size_t entries_count = count / 2;
|
836
|
+
VALUE object = rb_hash_new_capa(entries_count);
|
813
837
|
rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
|
814
838
|
|
839
|
+
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
|
840
|
+
switch (config->on_duplicate_key) {
|
841
|
+
case JSON_IGNORE:
|
842
|
+
break;
|
843
|
+
case JSON_DEPRECATED:
|
844
|
+
emit_parse_warning("detected duplicate keys in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`", state);
|
845
|
+
break;
|
846
|
+
case JSON_RAISE:
|
847
|
+
raise_parse_error("duplicate key", state);
|
848
|
+
break;
|
849
|
+
}
|
850
|
+
}
|
851
|
+
|
815
852
|
rvalue_stack_pop(state->stack, count);
|
816
853
|
|
817
854
|
if (config->freeze) {
|
@@ -844,7 +881,7 @@ static inline VALUE json_push_value(JSON_ParserState *state, JSON_ParserConfig *
|
|
844
881
|
return value;
|
845
882
|
}
|
846
883
|
|
847
|
-
static const bool
|
884
|
+
static const bool string_scan_table[256] = {
|
848
885
|
// ASCII Control Characters
|
849
886
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
850
887
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
@@ -857,32 +894,71 @@ static const bool string_scan[256] = {
|
|
857
894
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
858
895
|
};
|
859
896
|
|
897
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
898
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
899
|
+
#else
|
900
|
+
#define FORCE_INLINE
|
901
|
+
#endif
|
902
|
+
|
903
|
+
#ifdef HAVE_SIMD
|
904
|
+
static SIMD_Implementation simd_impl = SIMD_NONE;
|
905
|
+
#endif /* HAVE_SIMD */
|
906
|
+
|
907
|
+
static inline bool FORCE_INLINE string_scan(JSON_ParserState *state)
|
908
|
+
{
|
909
|
+
#ifdef HAVE_SIMD
|
910
|
+
#if defined(HAVE_SIMD_NEON)
|
911
|
+
|
912
|
+
uint64_t mask = 0;
|
913
|
+
if (string_scan_simd_neon(&state->cursor, state->end, &mask)) {
|
914
|
+
state->cursor += trailing_zeros64(mask) >> 2;
|
915
|
+
return 1;
|
916
|
+
}
|
917
|
+
|
918
|
+
#elif defined(HAVE_SIMD_SSE2)
|
919
|
+
if (simd_impl == SIMD_SSE2) {
|
920
|
+
int mask = 0;
|
921
|
+
if (string_scan_simd_sse2(&state->cursor, state->end, &mask)) {
|
922
|
+
state->cursor += trailing_zeros(mask);
|
923
|
+
return 1;
|
924
|
+
}
|
925
|
+
}
|
926
|
+
#endif /* HAVE_SIMD_NEON or HAVE_SIMD_SSE2 */
|
927
|
+
#endif /* HAVE_SIMD */
|
928
|
+
|
929
|
+
while (state->cursor < state->end) {
|
930
|
+
if (RB_UNLIKELY(string_scan_table[(unsigned char)*state->cursor])) {
|
931
|
+
return 1;
|
932
|
+
}
|
933
|
+
*state->cursor++;
|
934
|
+
}
|
935
|
+
return 0;
|
936
|
+
}
|
937
|
+
|
860
938
|
static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig *config, bool is_name)
|
861
939
|
{
|
862
940
|
state->cursor++;
|
863
941
|
const char *start = state->cursor;
|
864
942
|
bool escaped = false;
|
865
943
|
|
866
|
-
while (state
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
if ((unsigned char)*state->cursor < 0x20) {
|
878
|
-
raise_parse_error("invalid ASCII control character in string: %s", state);
|
879
|
-
}
|
880
|
-
break;
|
881
|
-
}
|
882
|
-
default:
|
944
|
+
while (RB_UNLIKELY(string_scan(state))) {
|
945
|
+
switch (*state->cursor) {
|
946
|
+
case '"': {
|
947
|
+
VALUE string = json_decode_string(state, config, start, state->cursor, escaped, is_name);
|
948
|
+
state->cursor++;
|
949
|
+
return json_push_value(state, config, string);
|
950
|
+
}
|
951
|
+
case '\\': {
|
952
|
+
state->cursor++;
|
953
|
+
escaped = true;
|
954
|
+
if ((unsigned char)*state->cursor < 0x20) {
|
883
955
|
raise_parse_error("invalid ASCII control character in string: %s", state);
|
884
|
-
|
956
|
+
}
|
957
|
+
break;
|
885
958
|
}
|
959
|
+
default:
|
960
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
961
|
+
break;
|
886
962
|
}
|
887
963
|
|
888
964
|
state->cursor++;
|
@@ -1060,6 +1136,8 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1060
1136
|
break;
|
1061
1137
|
}
|
1062
1138
|
case '{': {
|
1139
|
+
const char *object_start_cursor = state->cursor;
|
1140
|
+
|
1063
1141
|
state->cursor++;
|
1064
1142
|
json_eat_whitespace(state);
|
1065
1143
|
long stack_head = state->stack->head;
|
@@ -1094,8 +1172,15 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1094
1172
|
if (*state->cursor == '}') {
|
1095
1173
|
state->cursor++;
|
1096
1174
|
state->current_nesting--;
|
1097
|
-
|
1098
|
-
|
1175
|
+
size_t count = state->stack->head - stack_head;
|
1176
|
+
|
1177
|
+
// Temporary rewind cursor in case an error is raised
|
1178
|
+
const char *final_cursor = state->cursor;
|
1179
|
+
state->cursor = object_start_cursor;
|
1180
|
+
VALUE object = json_decode_object(state, config, count);
|
1181
|
+
state->cursor = final_cursor;
|
1182
|
+
|
1183
|
+
return json_push_value(state, config, object);
|
1099
1184
|
}
|
1100
1185
|
|
1101
1186
|
if (*state->cursor == ',') {
|
@@ -1184,6 +1269,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
|
|
1184
1269
|
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
|
1185
1270
|
else if (key == sym_freeze) { config->freeze = RTEST(val); }
|
1186
1271
|
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
|
1272
|
+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
1187
1273
|
else if (key == sym_decimal_class) {
|
1188
1274
|
if (RTEST(val)) {
|
1189
1275
|
if (rb_respond_to(val, i_try_convert)) {
|
@@ -1400,6 +1486,7 @@ void Init_parser(void)
|
|
1400
1486
|
sym_freeze = ID2SYM(rb_intern("freeze"));
|
1401
1487
|
sym_on_load = ID2SYM(rb_intern("on_load"));
|
1402
1488
|
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
|
1489
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
1403
1490
|
|
1404
1491
|
i_chr = rb_intern("chr");
|
1405
1492
|
i_aset = rb_intern("[]=");
|
@@ -1413,4 +1500,8 @@ void Init_parser(void)
|
|
1413
1500
|
binary_encindex = rb_ascii8bit_encindex();
|
1414
1501
|
utf8_encindex = rb_utf8_encindex();
|
1415
1502
|
enc_utf8 = rb_utf8_encoding();
|
1503
|
+
|
1504
|
+
#ifdef HAVE_SIMD
|
1505
|
+
simd_impl = find_simd_implementation();
|
1506
|
+
#endif
|
1416
1507
|
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
case RbConfig::CONFIG['host_cpu']
|
2
|
+
when /^(arm|aarch64)/
|
3
|
+
# Try to compile a small program using NEON instructions
|
4
|
+
header, type, init = 'arm_neon.h', 'uint8x16_t', 'vdupq_n_u8(32)'
|
5
|
+
when /^(x86_64|x64)/
|
6
|
+
header, type, init = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)'
|
7
|
+
end
|
8
|
+
if header
|
9
|
+
have_header(header) && try_compile(<<~SRC)
|
10
|
+
#{cpp_include(header)}
|
11
|
+
int main(int argc, char **argv) {
|
12
|
+
#{type} test = #{init};
|
13
|
+
if (argc > 100000) printf("%p", &test);
|
14
|
+
return 0;
|
15
|
+
}
|
16
|
+
SRC
|
17
|
+
$defs.push("-DJSON_ENABLE_SIMD")
|
18
|
+
end
|
19
|
+
|
20
|
+
have_header('cpuid.h')
|
@@ -0,0 +1,187 @@
|
|
1
|
+
typedef enum {
|
2
|
+
SIMD_NONE,
|
3
|
+
SIMD_NEON,
|
4
|
+
SIMD_SSE2
|
5
|
+
} SIMD_Implementation;
|
6
|
+
|
7
|
+
#ifdef JSON_ENABLE_SIMD
|
8
|
+
|
9
|
+
#ifdef __clang__
|
10
|
+
#if __has_builtin(__builtin_ctzll)
|
11
|
+
#define HAVE_BUILTIN_CTZLL 1
|
12
|
+
#else
|
13
|
+
#define HAVE_BUILTIN_CTZLL 0
|
14
|
+
#endif
|
15
|
+
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
16
|
+
#define HAVE_BUILTIN_CTZLL 1
|
17
|
+
#else
|
18
|
+
#define HAVE_BUILTIN_CTZLL 0
|
19
|
+
#endif
|
20
|
+
|
21
|
+
static inline uint32_t trailing_zeros64(uint64_t input)
|
22
|
+
{
|
23
|
+
#if HAVE_BUILTIN_CTZLL
|
24
|
+
return __builtin_ctzll(input);
|
25
|
+
#else
|
26
|
+
uint32_t trailing_zeros = 0;
|
27
|
+
uint64_t temp = input;
|
28
|
+
while ((temp & 1) == 0 && temp > 0) {
|
29
|
+
trailing_zeros++;
|
30
|
+
temp >>= 1;
|
31
|
+
}
|
32
|
+
return trailing_zeros;
|
33
|
+
#endif
|
34
|
+
}
|
35
|
+
|
36
|
+
static inline int trailing_zeros(int input)
|
37
|
+
{
|
38
|
+
#if HAVE_BUILTIN_CTZLL
|
39
|
+
return __builtin_ctz(input);
|
40
|
+
#else
|
41
|
+
int trailing_zeros = 0;
|
42
|
+
int temp = input;
|
43
|
+
while ((temp & 1) == 0 && temp > 0) {
|
44
|
+
trailing_zeros++;
|
45
|
+
temp >>= 1;
|
46
|
+
}
|
47
|
+
return trailing_zeros;
|
48
|
+
#endif
|
49
|
+
}
|
50
|
+
|
51
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
52
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
53
|
+
#else
|
54
|
+
#define FORCE_INLINE
|
55
|
+
#endif
|
56
|
+
|
57
|
+
|
58
|
+
#define SIMD_MINIMUM_THRESHOLD 6
|
59
|
+
|
60
|
+
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
|
61
|
+
#include <arm_neon.h>
|
62
|
+
|
63
|
+
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
|
64
|
+
static inline SIMD_Implementation find_simd_implementation(void)
|
65
|
+
{
|
66
|
+
return SIMD_NEON;
|
67
|
+
}
|
68
|
+
|
69
|
+
#define HAVE_SIMD 1
|
70
|
+
#define HAVE_SIMD_NEON 1
|
71
|
+
|
72
|
+
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
73
|
+
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
74
|
+
{
|
75
|
+
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
76
|
+
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
77
|
+
return mask & 0x8888888888888888ull;
|
78
|
+
}
|
79
|
+
|
80
|
+
static inline FORCE_INLINE uint64_t compute_chunk_mask_neon(const char *ptr)
|
81
|
+
{
|
82
|
+
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
83
|
+
|
84
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
85
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
86
|
+
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
87
|
+
|
88
|
+
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
89
|
+
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
90
|
+
return neon_match_mask(needs_escape);
|
91
|
+
}
|
92
|
+
|
93
|
+
static inline FORCE_INLINE int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask)
|
94
|
+
{
|
95
|
+
while (*ptr + sizeof(uint8x16_t) <= end) {
|
96
|
+
uint64_t chunk_mask = compute_chunk_mask_neon(*ptr);
|
97
|
+
if (chunk_mask) {
|
98
|
+
*mask = chunk_mask;
|
99
|
+
return 1;
|
100
|
+
}
|
101
|
+
*ptr += sizeof(uint8x16_t);
|
102
|
+
}
|
103
|
+
return 0;
|
104
|
+
}
|
105
|
+
|
106
|
+
uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
|
107
|
+
uint8x16x4_t tab;
|
108
|
+
tab.val[0] = vld1q_u8(table);
|
109
|
+
tab.val[1] = vld1q_u8(table+16);
|
110
|
+
tab.val[2] = vld1q_u8(table+32);
|
111
|
+
tab.val[3] = vld1q_u8(table+48);
|
112
|
+
return tab;
|
113
|
+
}
|
114
|
+
|
115
|
+
#endif /* ARM Neon Support.*/
|
116
|
+
|
117
|
+
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
|
118
|
+
|
119
|
+
#ifdef HAVE_X86INTRIN_H
|
120
|
+
#include <x86intrin.h>
|
121
|
+
|
122
|
+
#define HAVE_SIMD 1
|
123
|
+
#define HAVE_SIMD_SSE2 1
|
124
|
+
|
125
|
+
#ifdef HAVE_CPUID_H
|
126
|
+
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
|
127
|
+
|
128
|
+
#if defined(__clang__) || defined(__GNUC__)
|
129
|
+
#define TARGET_SSE2 __attribute__((target("sse2")))
|
130
|
+
#else
|
131
|
+
#define TARGET_SSE2
|
132
|
+
#endif
|
133
|
+
|
134
|
+
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
|
135
|
+
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
136
|
+
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
137
|
+
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
138
|
+
|
139
|
+
static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *ptr)
|
140
|
+
{
|
141
|
+
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
142
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
143
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
144
|
+
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
145
|
+
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
146
|
+
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
147
|
+
return _mm_movemask_epi8(needs_escape);
|
148
|
+
}
|
149
|
+
|
150
|
+
static inline TARGET_SSE2 FORCE_INLINE int string_scan_simd_sse2(const char **ptr, const char *end, int *mask)
|
151
|
+
{
|
152
|
+
while (*ptr + sizeof(__m128i) <= end) {
|
153
|
+
int chunk_mask = compute_chunk_mask_sse2(*ptr);
|
154
|
+
if (chunk_mask) {
|
155
|
+
*mask = chunk_mask;
|
156
|
+
return 1;
|
157
|
+
}
|
158
|
+
*ptr += sizeof(__m128i);
|
159
|
+
}
|
160
|
+
|
161
|
+
return 0;
|
162
|
+
}
|
163
|
+
|
164
|
+
#include <cpuid.h>
|
165
|
+
#endif /* HAVE_CPUID_H */
|
166
|
+
|
167
|
+
static inline SIMD_Implementation find_simd_implementation(void)
|
168
|
+
{
|
169
|
+
// TODO Revisit. I think the SSE version now only uses SSE2 instructions.
|
170
|
+
if (__builtin_cpu_supports("sse2")) {
|
171
|
+
return SIMD_SSE2;
|
172
|
+
}
|
173
|
+
|
174
|
+
return SIMD_NONE;
|
175
|
+
}
|
176
|
+
|
177
|
+
#endif /* HAVE_X86INTRIN_H */
|
178
|
+
#endif /* X86_64 Support */
|
179
|
+
|
180
|
+
#endif /* JSON_ENABLE_SIMD */
|
181
|
+
|
182
|
+
#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
|
183
|
+
static inline SIMD_Implementation find_simd_implementation(void)
|
184
|
+
{
|
185
|
+
return SIMD_NONE;
|
186
|
+
}
|
187
|
+
#endif
|
data/json.gemspec
CHANGED
@@ -44,15 +44,14 @@ spec = Gem::Specification.new do |s|
|
|
44
44
|
"LEGAL",
|
45
45
|
"README.md",
|
46
46
|
"json.gemspec",
|
47
|
-
|
48
|
-
]
|
47
|
+
] + Dir.glob("lib/**/*.rb", base: File.expand_path("..", __FILE__))
|
49
48
|
|
50
49
|
if java_ext
|
51
50
|
s.platform = 'java'
|
52
51
|
s.files += Dir["lib/json/ext/**/*.jar"]
|
53
52
|
else
|
54
53
|
s.extensions = Dir["ext/json/**/extconf.rb"]
|
55
|
-
s.files += Dir["ext/json/**/*.{c,h}"]
|
54
|
+
s.files += Dir["ext/json/**/*.{c,h,rb}"]
|
56
55
|
end
|
57
56
|
end
|
58
57
|
|
data/lib/json/common.rb
CHANGED
@@ -268,7 +268,7 @@ module JSON
|
|
268
268
|
# to string interpolation.
|
269
269
|
#
|
270
270
|
# Note: no validation is performed on the provided string. It is the
|
271
|
-
#
|
271
|
+
# responsibility of the caller to ensure the string contains valid JSON.
|
272
272
|
Fragment = Struct.new(:json) do
|
273
273
|
def initialize(json)
|
274
274
|
unless string = String.try_convert(json)
|
data/lib/json/version.rb
CHANGED
data/lib/json.rb
CHANGED
@@ -127,6 +127,24 @@ require 'json/common'
|
|
127
127
|
#
|
128
128
|
# ---
|
129
129
|
#
|
130
|
+
# Option +allow_duplicate_key+ specifies whether duplicate keys in objects
|
131
|
+
# should be ignored or cause an error to be raised:
|
132
|
+
#
|
133
|
+
# When not specified:
|
134
|
+
# # The last value is used and a deprecation warning emitted.
|
135
|
+
# JSON.parse('{"a": 1, "a":2}') => {"a" => 2}
|
136
|
+
# # waring: detected duplicate keys in JSON object.
|
137
|
+
# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`
|
138
|
+
#
|
139
|
+
# When set to `+true+`
|
140
|
+
# # The last value is used.
|
141
|
+
# JSON.parse('{"a": 1, "a":2}') => {"a" => 2}
|
142
|
+
#
|
143
|
+
# When set to `+false+`, the future default:
|
144
|
+
# JSON.parse('{"a": 1, "a":2}') => duplicate key at line 1 column 1 (JSON::ParserError)
|
145
|
+
#
|
146
|
+
# ---
|
147
|
+
#
|
130
148
|
# Option +allow_nan+ (boolean) specifies whether to allow
|
131
149
|
# NaN, Infinity, and MinusInfinity in +source+;
|
132
150
|
# defaults to +false+.
|
@@ -143,8 +161,23 @@ require 'json/common'
|
|
143
161
|
# ruby = JSON.parse(source, {allow_nan: true})
|
144
162
|
# ruby # => [NaN, Infinity, -Infinity]
|
145
163
|
#
|
164
|
+
# ---
|
165
|
+
#
|
166
|
+
# Option +allow_trailing_comma+ (boolean) specifies whether to allow
|
167
|
+
# trailing commas in objects and arrays;
|
168
|
+
# defaults to +false+.
|
169
|
+
#
|
170
|
+
# With the default, +false+:
|
171
|
+
# JSON.parse('[1,]') # unexpected character: ']' at line 1 column 4 (JSON::ParserError)
|
172
|
+
#
|
173
|
+
# When enabled:
|
174
|
+
# JSON.parse('[1,]', allow_trailing_comma: true) # => [1]
|
175
|
+
#
|
146
176
|
# ====== Output Options
|
147
177
|
#
|
178
|
+
# Option +freeze+ (boolean) specifies whether the returned objects will be frozen;
|
179
|
+
# defaults to +false+.
|
180
|
+
#
|
148
181
|
# Option +symbolize_names+ (boolean) specifies whether returned \Hash keys
|
149
182
|
# should be Symbols;
|
150
183
|
# defaults to +false+ (use Strings).
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Frank
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-07-17 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: This is a JSON implementation as a Ruby extension in C.
|
13
13
|
email: flori@ping.de
|
@@ -26,9 +26,10 @@ files:
|
|
26
26
|
- ext/json/ext/fbuffer/fbuffer.h
|
27
27
|
- ext/json/ext/generator/extconf.rb
|
28
28
|
- ext/json/ext/generator/generator.c
|
29
|
-
- ext/json/ext/generator/simd.h
|
30
29
|
- ext/json/ext/parser/extconf.rb
|
31
30
|
- ext/json/ext/parser/parser.c
|
31
|
+
- ext/json/ext/simd/conf.rb
|
32
|
+
- ext/json/ext/simd/simd.h
|
32
33
|
- ext/json/ext/vendor/fpconv.c
|
33
34
|
- ext/json/ext/vendor/jeaiii-ltoa.h
|
34
35
|
- json.gemspec
|
@@ -1,112 +0,0 @@
|
|
1
|
-
typedef enum {
|
2
|
-
SIMD_NONE,
|
3
|
-
SIMD_NEON,
|
4
|
-
SIMD_SSE2
|
5
|
-
} SIMD_Implementation;
|
6
|
-
|
7
|
-
#ifdef JSON_ENABLE_SIMD
|
8
|
-
|
9
|
-
#ifdef __clang__
|
10
|
-
#if __has_builtin(__builtin_ctzll)
|
11
|
-
#define HAVE_BUILTIN_CTZLL 1
|
12
|
-
#else
|
13
|
-
#define HAVE_BUILTIN_CTZLL 0
|
14
|
-
#endif
|
15
|
-
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
16
|
-
#define HAVE_BUILTIN_CTZLL 1
|
17
|
-
#else
|
18
|
-
#define HAVE_BUILTIN_CTZLL 0
|
19
|
-
#endif
|
20
|
-
|
21
|
-
static inline uint32_t trailing_zeros64(uint64_t input) {
|
22
|
-
#if HAVE_BUILTIN_CTZLL
|
23
|
-
return __builtin_ctzll(input);
|
24
|
-
#else
|
25
|
-
uint32_t trailing_zeros = 0;
|
26
|
-
uint64_t temp = input;
|
27
|
-
while ((temp & 1) == 0 && temp > 0) {
|
28
|
-
trailing_zeros++;
|
29
|
-
temp >>= 1;
|
30
|
-
}
|
31
|
-
return trailing_zeros;
|
32
|
-
#endif
|
33
|
-
}
|
34
|
-
|
35
|
-
static inline int trailing_zeros(int input) {
|
36
|
-
#if HAVE_BUILTIN_CTZLL
|
37
|
-
return __builtin_ctz(input);
|
38
|
-
#else
|
39
|
-
int trailing_zeros = 0;
|
40
|
-
int temp = input;
|
41
|
-
while ((temp & 1) == 0 && temp > 0) {
|
42
|
-
trailing_zeros++;
|
43
|
-
temp >>= 1;
|
44
|
-
}
|
45
|
-
return trailing_zeros;
|
46
|
-
#endif
|
47
|
-
}
|
48
|
-
|
49
|
-
#define SIMD_MINIMUM_THRESHOLD 6
|
50
|
-
|
51
|
-
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
|
52
|
-
#include <arm_neon.h>
|
53
|
-
|
54
|
-
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
|
55
|
-
static SIMD_Implementation find_simd_implementation(void) {
|
56
|
-
return SIMD_NEON;
|
57
|
-
}
|
58
|
-
|
59
|
-
#define HAVE_SIMD 1
|
60
|
-
#define HAVE_SIMD_NEON 1
|
61
|
-
|
62
|
-
uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
|
63
|
-
uint8x16x4_t tab;
|
64
|
-
tab.val[0] = vld1q_u8(table);
|
65
|
-
tab.val[1] = vld1q_u8(table+16);
|
66
|
-
tab.val[2] = vld1q_u8(table+32);
|
67
|
-
tab.val[3] = vld1q_u8(table+48);
|
68
|
-
return tab;
|
69
|
-
}
|
70
|
-
|
71
|
-
#endif /* ARM Neon Support.*/
|
72
|
-
|
73
|
-
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
|
74
|
-
|
75
|
-
#ifdef HAVE_X86INTRIN_H
|
76
|
-
#include <x86intrin.h>
|
77
|
-
|
78
|
-
#define HAVE_SIMD 1
|
79
|
-
#define HAVE_SIMD_SSE2 1
|
80
|
-
|
81
|
-
#ifdef HAVE_CPUID_H
|
82
|
-
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
|
83
|
-
|
84
|
-
#include <cpuid.h>
|
85
|
-
#endif /* HAVE_CPUID_H */
|
86
|
-
|
87
|
-
static SIMD_Implementation find_simd_implementation(void) {
|
88
|
-
|
89
|
-
#if defined(__GNUC__ ) || defined(__clang__)
|
90
|
-
#ifdef __GNUC__
|
91
|
-
__builtin_cpu_init();
|
92
|
-
#endif /* __GNUC__ */
|
93
|
-
|
94
|
-
// TODO Revisit. I think the SSE version now only uses SSE2 instructions.
|
95
|
-
if (__builtin_cpu_supports("sse2")) {
|
96
|
-
return SIMD_SSE2;
|
97
|
-
}
|
98
|
-
#endif /* __GNUC__ || __clang__*/
|
99
|
-
|
100
|
-
return SIMD_NONE;
|
101
|
-
}
|
102
|
-
|
103
|
-
#endif /* HAVE_X86INTRIN_H */
|
104
|
-
#endif /* X86_64 Support */
|
105
|
-
|
106
|
-
#endif /* JSON_ENABLE_SIMD */
|
107
|
-
|
108
|
-
#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
|
109
|
-
static SIMD_Implementation find_simd_implementation(void) {
|
110
|
-
return SIMD_NONE;
|
111
|
-
}
|
112
|
-
#endif
|