json 2.13.1 → 2.19.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +98 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +47 -66
- data/ext/json/ext/generator/extconf.rb +1 -1
- data/ext/json/ext/generator/generator.c +375 -552
- data/ext/json/ext/json.h +105 -0
- data/ext/json/ext/parser/extconf.rb +2 -1
- data/ext/json/ext/parser/parser.c +661 -473
- data/ext/json/ext/simd/simd.h +81 -60
- data/ext/json/ext/vendor/fpconv.c +13 -12
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +118 -49
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +126 -64
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +56 -1
- metadata +6 -3
data/ext/json/ext/simd/simd.h
CHANGED
|
@@ -1,61 +1,91 @@
|
|
|
1
|
+
#include "../json.h"
|
|
2
|
+
|
|
1
3
|
typedef enum {
|
|
2
4
|
SIMD_NONE,
|
|
3
5
|
SIMD_NEON,
|
|
4
6
|
SIMD_SSE2
|
|
5
7
|
} SIMD_Implementation;
|
|
6
8
|
|
|
7
|
-
#
|
|
9
|
+
#ifndef __has_builtin // Optional of course.
|
|
10
|
+
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
|
|
11
|
+
#endif
|
|
8
12
|
|
|
9
13
|
#ifdef __clang__
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
# if __has_builtin(__builtin_ctzll)
|
|
15
|
+
# define HAVE_BUILTIN_CTZLL 1
|
|
16
|
+
# else
|
|
17
|
+
# define HAVE_BUILTIN_CTZLL 0
|
|
18
|
+
# endif
|
|
15
19
|
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
|
16
|
-
|
|
20
|
+
# define HAVE_BUILTIN_CTZLL 1
|
|
17
21
|
#else
|
|
18
|
-
|
|
22
|
+
# define HAVE_BUILTIN_CTZLL 0
|
|
19
23
|
#endif
|
|
20
24
|
|
|
21
25
|
static inline uint32_t trailing_zeros64(uint64_t input)
|
|
22
26
|
{
|
|
27
|
+
JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior
|
|
28
|
+
|
|
23
29
|
#if HAVE_BUILTIN_CTZLL
|
|
24
|
-
|
|
30
|
+
return __builtin_ctzll(input);
|
|
25
31
|
#else
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
uint32_t trailing_zeros = 0;
|
|
33
|
+
uint64_t temp = input;
|
|
34
|
+
while ((temp & 1) == 0 && temp > 0) {
|
|
35
|
+
trailing_zeros++;
|
|
36
|
+
temp >>= 1;
|
|
37
|
+
}
|
|
38
|
+
return trailing_zeros;
|
|
33
39
|
#endif
|
|
34
40
|
}
|
|
35
41
|
|
|
36
42
|
static inline int trailing_zeros(int input)
|
|
37
43
|
{
|
|
38
|
-
|
|
44
|
+
JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior
|
|
45
|
+
|
|
46
|
+
#if HAVE_BUILTIN_CTZLL
|
|
39
47
|
return __builtin_ctz(input);
|
|
40
|
-
|
|
48
|
+
#else
|
|
41
49
|
int trailing_zeros = 0;
|
|
42
50
|
int temp = input;
|
|
43
51
|
while ((temp & 1) == 0 && temp > 0) {
|
|
44
|
-
|
|
45
|
-
|
|
52
|
+
trailing_zeros++;
|
|
53
|
+
temp >>= 1;
|
|
46
54
|
}
|
|
47
55
|
return trailing_zeros;
|
|
48
|
-
|
|
56
|
+
#endif
|
|
49
57
|
}
|
|
50
58
|
|
|
51
|
-
#
|
|
52
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
53
|
-
#else
|
|
54
|
-
#define FORCE_INLINE
|
|
55
|
-
#endif
|
|
59
|
+
#ifdef JSON_ENABLE_SIMD
|
|
56
60
|
|
|
61
|
+
#define SIMD_MINIMUM_THRESHOLD 4
|
|
57
62
|
|
|
58
|
-
|
|
63
|
+
ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len)
|
|
64
|
+
{
|
|
65
|
+
RBIMPL_ASSERT_OR_ASSUME(len < 16);
|
|
66
|
+
RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4
|
|
67
|
+
#if defined(__has_builtin) && __has_builtin(__builtin_memcpy)
|
|
68
|
+
// If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes.
|
|
69
|
+
// These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy
|
|
70
|
+
// the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct
|
|
71
|
+
// position in both copies.
|
|
72
|
+
|
|
73
|
+
// Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the
|
|
74
|
+
// generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)),
|
|
75
|
+
// when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional
|
|
76
|
+
// select instruction instead of direct loads and stores with a branch. This ends up slower than the branch
|
|
77
|
+
// plus two loads and stores generated when using __builtin_memcpy.
|
|
78
|
+
if (len >= 8) {
|
|
79
|
+
__builtin_memcpy(dst, src, 8);
|
|
80
|
+
__builtin_memcpy(dst + len - 8, src + len - 8, 8);
|
|
81
|
+
} else {
|
|
82
|
+
__builtin_memcpy(dst, src, 4);
|
|
83
|
+
__builtin_memcpy(dst + len - 4, src + len - 4, 4);
|
|
84
|
+
}
|
|
85
|
+
#else
|
|
86
|
+
MEMCPY(dst, src, char, len);
|
|
87
|
+
#endif
|
|
88
|
+
}
|
|
59
89
|
|
|
60
90
|
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
|
|
61
91
|
#include <arm_neon.h>
|
|
@@ -70,48 +100,39 @@ static inline SIMD_Implementation find_simd_implementation(void)
|
|
|
70
100
|
#define HAVE_SIMD_NEON 1
|
|
71
101
|
|
|
72
102
|
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
|
73
|
-
static
|
|
103
|
+
ALWAYS_INLINE(static) uint64_t neon_match_mask(uint8x16_t matches)
|
|
74
104
|
{
|
|
75
105
|
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
|
76
106
|
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
|
77
107
|
return mask & 0x8888888888888888ull;
|
|
78
108
|
}
|
|
79
109
|
|
|
80
|
-
static
|
|
110
|
+
ALWAYS_INLINE(static) uint64_t compute_chunk_mask_neon(const char *ptr)
|
|
81
111
|
{
|
|
82
|
-
|
|
112
|
+
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
|
83
113
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
114
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
|
115
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
|
116
|
+
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
|
87
117
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
118
|
+
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
|
119
|
+
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
|
120
|
+
return neon_match_mask(needs_escape);
|
|
91
121
|
}
|
|
92
122
|
|
|
93
|
-
static
|
|
123
|
+
ALWAYS_INLINE(static) int string_scan_simd_neon(const char **ptr, const char *end, uint64_t *mask)
|
|
94
124
|
{
|
|
95
125
|
while (*ptr + sizeof(uint8x16_t) <= end) {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
126
|
+
uint64_t chunk_mask = compute_chunk_mask_neon(*ptr);
|
|
127
|
+
if (chunk_mask) {
|
|
128
|
+
*mask = chunk_mask;
|
|
129
|
+
return 1;
|
|
130
|
+
}
|
|
131
|
+
*ptr += sizeof(uint8x16_t);
|
|
102
132
|
}
|
|
103
133
|
return 0;
|
|
104
134
|
}
|
|
105
135
|
|
|
106
|
-
uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
|
|
107
|
-
uint8x16x4_t tab;
|
|
108
|
-
tab.val[0] = vld1q_u8(table);
|
|
109
|
-
tab.val[1] = vld1q_u8(table+16);
|
|
110
|
-
tab.val[2] = vld1q_u8(table+32);
|
|
111
|
-
tab.val[3] = vld1q_u8(table+48);
|
|
112
|
-
return tab;
|
|
113
|
-
}
|
|
114
|
-
|
|
115
136
|
#endif /* ARM Neon Support.*/
|
|
116
137
|
|
|
117
138
|
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
|
|
@@ -136,7 +157,7 @@ uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
|
|
|
136
157
|
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
|
137
158
|
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
|
138
159
|
|
|
139
|
-
static
|
|
160
|
+
ALWAYS_INLINE(static) TARGET_SSE2 int compute_chunk_mask_sse2(const char *ptr)
|
|
140
161
|
{
|
|
141
162
|
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
|
142
163
|
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
|
@@ -147,15 +168,15 @@ static inline TARGET_SSE2 FORCE_INLINE int compute_chunk_mask_sse2(const char *p
|
|
|
147
168
|
return _mm_movemask_epi8(needs_escape);
|
|
148
169
|
}
|
|
149
170
|
|
|
150
|
-
static
|
|
171
|
+
ALWAYS_INLINE(static) TARGET_SSE2 int string_scan_simd_sse2(const char **ptr, const char *end, int *mask)
|
|
151
172
|
{
|
|
152
173
|
while (*ptr + sizeof(__m128i) <= end) {
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
174
|
+
int chunk_mask = compute_chunk_mask_sse2(*ptr);
|
|
175
|
+
if (chunk_mask) {
|
|
176
|
+
*mask = chunk_mask;
|
|
177
|
+
return 1;
|
|
178
|
+
}
|
|
179
|
+
*ptr += sizeof(__m128i);
|
|
159
180
|
}
|
|
160
181
|
|
|
161
182
|
return 0;
|
|
@@ -29,6 +29,10 @@
|
|
|
29
29
|
#include <string.h>
|
|
30
30
|
#include <stdint.h>
|
|
31
31
|
|
|
32
|
+
#if JSON_DEBUG
|
|
33
|
+
#include <assert.h>
|
|
34
|
+
#endif
|
|
35
|
+
|
|
32
36
|
#define npowers 87
|
|
33
37
|
#define steppowers 8
|
|
34
38
|
#define firstpower -348 /* 10 ^ -348 */
|
|
@@ -320,15 +324,7 @@ static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg)
|
|
|
320
324
|
{
|
|
321
325
|
int exp = absv(K + ndigits - 1);
|
|
322
326
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
if(neg) {
|
|
326
|
-
max_trailing_zeros -= 1;
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
/* write plain integer */
|
|
330
|
-
if(K >= 0 && (exp < (ndigits + max_trailing_zeros))) {
|
|
331
|
-
|
|
327
|
+
if(K >= 0 && exp < 15) {
|
|
332
328
|
memcpy(dest, digits, ndigits);
|
|
333
329
|
memset(dest + ndigits, '0', K);
|
|
334
330
|
|
|
@@ -432,10 +428,12 @@ static int filter_special(double fp, char* dest)
|
|
|
432
428
|
*
|
|
433
429
|
* Input:
|
|
434
430
|
* fp -> the double to convert, dest -> destination buffer.
|
|
435
|
-
* The generated string will never be longer than
|
|
436
|
-
* Make sure to pass a pointer to at least
|
|
431
|
+
* The generated string will never be longer than 32 characters.
|
|
432
|
+
* Make sure to pass a pointer to at least 32 bytes of memory.
|
|
437
433
|
* The emitted string will not be null terminated.
|
|
438
434
|
*
|
|
435
|
+
*
|
|
436
|
+
*
|
|
439
437
|
* Output:
|
|
440
438
|
* The number of written characters.
|
|
441
439
|
*
|
|
@@ -451,7 +449,7 @@ static int filter_special(double fp, char* dest)
|
|
|
451
449
|
* }
|
|
452
450
|
*
|
|
453
451
|
*/
|
|
454
|
-
static int fpconv_dtoa(double d, char dest[
|
|
452
|
+
static int fpconv_dtoa(double d, char dest[32])
|
|
455
453
|
{
|
|
456
454
|
char digits[18];
|
|
457
455
|
|
|
@@ -474,6 +472,9 @@ static int fpconv_dtoa(double d, char dest[28])
|
|
|
474
472
|
int ndigits = grisu2(d, digits, &K);
|
|
475
473
|
|
|
476
474
|
str_len += emit_digits(digits, ndigits, dest + str_len, K, neg);
|
|
475
|
+
#if JSON_DEBUG
|
|
476
|
+
assert(str_len <= 32);
|
|
477
|
+
#endif
|
|
477
478
|
|
|
478
479
|
return str_len;
|
|
479
480
|
}
|