json 2.11.3 → 2.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +14 -0
- data/README.md +13 -0
- data/ext/json/ext/fbuffer/fbuffer.h +38 -4
- data/ext/json/ext/generator/extconf.rb +31 -1
- data/ext/json/ext/generator/generator.c +359 -12
- data/ext/json/ext/generator/simd.h +112 -0
- data/ext/json/ext/parser/parser.c +151 -100
- data/ext/json/ext/vendor/fpconv.c +10 -10
- data/lib/json/common.rb +7 -5
- data/lib/json/ext.rb +2 -2
- data/lib/json/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a3ab7a1c1427b28a1f9cd6b82a23afeefa4f1b5c10cda70ac8f63c7f54849b1
|
4
|
+
data.tar.gz: ae21fc70a6e8e82c22b75570602d65fdd91b0d10f5eaf38cc8fbed26f5cba009
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc18c29f53460965d3cdadb2b2f864e3a13999d58316cfa7046d142fdb2b8cce2e6d0d45a9f09ab3beb346f1fac78cd9ba9901f66d76bd74c6a1c4d30a4f08e2
|
7
|
+
data.tar.gz: a9d76ce65491f9e1d76f70a686d2edc606f9b2bc3797dc0daa8f23b2caeecbf48dd7ed042e9e813bcc37d4157863782afd543a8f667dd5d6890795b4cd44b012
|
data/CHANGES.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# Changes
|
2
2
|
|
3
|
+
### Unreleased
|
4
|
+
|
5
|
+
### 2025-05-23 (2.12.1)
|
6
|
+
|
7
|
+
* Fix a potential crash in large negative floating point number generation.
|
8
|
+
* Fix for JSON.pretty_generate to use passed state object's generate instead of state class as the required parameters aren't available.
|
9
|
+
|
10
|
+
### 2025-05-12 (2.12.0)
|
11
|
+
|
12
|
+
* Improve floating point generation to not use scientific notation as much.
|
13
|
+
* Include line and column in parser errors. Both in the message and as exception attributes.
|
14
|
+
* Handle non-string hash keys with broken `to_s` implementations.
|
15
|
+
* `JSON.generate` now uses SSE2 (x86) or NEON (arm64) instructions when available to escape strings.
|
16
|
+
|
3
17
|
### 2025-04-25 (2.11.3)
|
4
18
|
|
5
19
|
* Fix a regression in `JSON.pretty_generate` that could cause indentation to be off once some `#to_json` has been called.
|
data/README.md
CHANGED
@@ -233,6 +233,19 @@ the `pp` library's `pp` methods.
|
|
233
233
|
|
234
234
|
## Development
|
235
235
|
|
236
|
+
### Prerequisites
|
237
|
+
|
238
|
+
1. Clone the repository
|
239
|
+
2. Install dependencies with `bundle install`
|
240
|
+
|
241
|
+
### Testing
|
242
|
+
|
243
|
+
The full test suite can be run with:
|
244
|
+
|
245
|
+
```bash
|
246
|
+
bundle exec rake test
|
247
|
+
```
|
248
|
+
|
236
249
|
### Release
|
237
250
|
|
238
251
|
Update the `lib/json/version.rb` file.
|
@@ -36,6 +36,12 @@ typedef unsigned char _Bool;
|
|
36
36
|
# define MAYBE_UNUSED(x) x
|
37
37
|
#endif
|
38
38
|
|
39
|
+
#ifdef RUBY_DEBUG
|
40
|
+
#ifndef JSON_DEBUG
|
41
|
+
#define JSON_DEBUG RUBY_DEBUG
|
42
|
+
#endif
|
43
|
+
#endif
|
44
|
+
|
39
45
|
enum fbuffer_type {
|
40
46
|
FBUFFER_HEAP_ALLOCATED = 0,
|
41
47
|
FBUFFER_STACK_ALLOCATED = 1,
|
@@ -46,6 +52,9 @@ typedef struct FBufferStruct {
|
|
46
52
|
unsigned long initial_length;
|
47
53
|
unsigned long len;
|
48
54
|
unsigned long capa;
|
55
|
+
#ifdef JSON_DEBUG
|
56
|
+
unsigned long requested;
|
57
|
+
#endif
|
49
58
|
char *ptr;
|
50
59
|
VALUE io;
|
51
60
|
} FBuffer;
|
@@ -74,6 +83,20 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *
|
|
74
83
|
fb->ptr = stack_buffer;
|
75
84
|
fb->capa = stack_buffer_size;
|
76
85
|
}
|
86
|
+
#ifdef JSON_DEBUG
|
87
|
+
fb->requested = 0;
|
88
|
+
#endif
|
89
|
+
}
|
90
|
+
|
91
|
+
static inline void fbuffer_consumed(FBuffer *fb, unsigned long consumed)
|
92
|
+
{
|
93
|
+
#ifdef JSON_DEBUG
|
94
|
+
if (consumed > fb->requested) {
|
95
|
+
rb_bug("fbuffer: Out of bound write");
|
96
|
+
}
|
97
|
+
fb->requested = 0;
|
98
|
+
#endif
|
99
|
+
fb->len += consumed;
|
77
100
|
}
|
78
101
|
|
79
102
|
static void fbuffer_free(FBuffer *fb)
|
@@ -137,6 +160,10 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
|
|
137
160
|
|
138
161
|
static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
|
139
162
|
{
|
163
|
+
#ifdef JSON_DEBUG
|
164
|
+
fb->requested = requested;
|
165
|
+
#endif
|
166
|
+
|
140
167
|
if (RB_UNLIKELY(requested > fb->capa - fb->len)) {
|
141
168
|
fbuffer_do_inc_capa(fb, requested);
|
142
169
|
}
|
@@ -147,15 +174,22 @@ static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len)
|
|
147
174
|
if (len > 0) {
|
148
175
|
fbuffer_inc_capa(fb, len);
|
149
176
|
MEMCPY(fb->ptr + fb->len, newstr, char, len);
|
150
|
-
fb
|
177
|
+
fbuffer_consumed(fb, len);
|
151
178
|
}
|
152
179
|
}
|
153
180
|
|
154
181
|
/* Appends a character into a buffer. The buffer needs to have sufficient capacity, via fbuffer_inc_capa(...). */
|
155
182
|
static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr)
|
156
183
|
{
|
184
|
+
#ifdef JSON_DEBUG
|
185
|
+
if (fb->requested < 1) {
|
186
|
+
rb_bug("fbuffer: unreserved write");
|
187
|
+
}
|
188
|
+
fb->requested--;
|
189
|
+
#endif
|
190
|
+
|
157
191
|
fb->ptr[fb->len] = chr;
|
158
|
-
fb->len
|
192
|
+
fb->len++;
|
159
193
|
}
|
160
194
|
|
161
195
|
static void fbuffer_append_str(FBuffer *fb, VALUE str)
|
@@ -172,7 +206,7 @@ static inline void fbuffer_append_char(FBuffer *fb, char newchr)
|
|
172
206
|
{
|
173
207
|
fbuffer_inc_capa(fb, 1);
|
174
208
|
*(fb->ptr + fb->len) = newchr;
|
175
|
-
fb
|
209
|
+
fbuffer_consumed(fb, 1);
|
176
210
|
}
|
177
211
|
|
178
212
|
static inline char *fbuffer_cursor(FBuffer *fb)
|
@@ -182,7 +216,7 @@ static inline char *fbuffer_cursor(FBuffer *fb)
|
|
182
216
|
|
183
217
|
static inline void fbuffer_advance_to(FBuffer *fb, char *end)
|
184
218
|
{
|
185
|
-
fb
|
219
|
+
fbuffer_consumed(fb, (end - fb->ptr) - fb->len);
|
186
220
|
}
|
187
221
|
|
188
222
|
/*
|
@@ -4,7 +4,37 @@ if RUBY_ENGINE == 'truffleruby'
|
|
4
4
|
# The pure-Ruby generator is faster on TruffleRuby, so skip compiling the generator extension
|
5
5
|
File.write('Makefile', dummy_makefile("").join)
|
6
6
|
else
|
7
|
-
append_cflags("-std=c99")
|
7
|
+
append_cflags("-std=c99 -O0")
|
8
8
|
$defs << "-DJSON_GENERATOR"
|
9
|
+
$defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"]
|
10
|
+
|
11
|
+
if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
|
12
|
+
if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
|
13
|
+
# Try to compile a small program using NEON instructions
|
14
|
+
if have_header('arm_neon.h')
|
15
|
+
have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
|
16
|
+
#include <arm_neon.h>
|
17
|
+
int main() {
|
18
|
+
uint8x16_t test = vdupq_n_u8(32);
|
19
|
+
return 0;
|
20
|
+
}
|
21
|
+
SRC
|
22
|
+
$defs.push("-DJSON_ENABLE_SIMD")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC')
|
27
|
+
#include <x86intrin.h>
|
28
|
+
int main() {
|
29
|
+
__m128i test = _mm_set1_epi8(32);
|
30
|
+
return 0;
|
31
|
+
}
|
32
|
+
SRC
|
33
|
+
$defs.push("-DJSON_ENABLE_SIMD")
|
34
|
+
end
|
35
|
+
|
36
|
+
have_header('cpuid.h')
|
37
|
+
end
|
38
|
+
|
9
39
|
create_makefile 'json/ext/generator'
|
10
40
|
end
|
@@ -5,6 +5,8 @@
|
|
5
5
|
#include <math.h>
|
6
6
|
#include <ctype.h>
|
7
7
|
|
8
|
+
#include "simd.h"
|
9
|
+
|
8
10
|
/* ruby api and some helpers */
|
9
11
|
|
10
12
|
typedef struct JSON_Generator_StateStruct {
|
@@ -109,12 +111,40 @@ typedef struct _search_state {
|
|
109
111
|
const char *end;
|
110
112
|
const char *cursor;
|
111
113
|
FBuffer *buffer;
|
114
|
+
|
115
|
+
#ifdef HAVE_SIMD
|
116
|
+
const char *chunk_base;
|
117
|
+
const char *chunk_end;
|
118
|
+
bool has_matches;
|
119
|
+
|
120
|
+
#if defined(HAVE_SIMD_NEON)
|
121
|
+
uint64_t matches_mask;
|
122
|
+
#elif defined(HAVE_SIMD_SSE2)
|
123
|
+
int matches_mask;
|
124
|
+
#else
|
125
|
+
#error "Unknown SIMD Implementation."
|
126
|
+
#endif /* HAVE_SIMD_NEON */
|
127
|
+
#endif /* HAVE_SIMD */
|
112
128
|
} search_state;
|
113
129
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
130
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
131
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
132
|
+
#else
|
133
|
+
#define FORCE_INLINE
|
134
|
+
#endif
|
135
|
+
|
136
|
+
static inline FORCE_INLINE void search_flush(search_state *search)
|
137
|
+
{
|
138
|
+
// Do not remove this conditional without profiling, specifically escape-heavy text.
|
139
|
+
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
|
140
|
+
// For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
|
141
|
+
// will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
|
142
|
+
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
|
143
|
+
// nothing needs to be flushed, we can save a few memory references with this conditional.
|
144
|
+
if (search->ptr > search->cursor) {
|
145
|
+
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
|
146
|
+
search->cursor = search->ptr;
|
147
|
+
}
|
118
148
|
}
|
119
149
|
|
120
150
|
static const unsigned char escape_table_basic[256] = {
|
@@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
|
|
130
160
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
131
161
|
};
|
132
162
|
|
163
|
+
static unsigned char (*search_escape_basic_impl)(search_state *);
|
164
|
+
|
133
165
|
static inline unsigned char search_escape_basic(search_state *search)
|
134
166
|
{
|
135
167
|
while (search->ptr < search->end) {
|
@@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
|
|
144
176
|
return 0;
|
145
177
|
}
|
146
178
|
|
147
|
-
static inline void escape_UTF8_char_basic(search_state *search)
|
179
|
+
static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
|
180
|
+
{
|
148
181
|
const unsigned char ch = (unsigned char)*search->ptr;
|
149
182
|
switch (ch) {
|
150
183
|
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
@@ -186,12 +219,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
|
|
186
219
|
*/
|
187
220
|
static inline void convert_UTF8_to_JSON(search_state *search)
|
188
221
|
{
|
189
|
-
while (
|
222
|
+
while (search_escape_basic_impl(search)) {
|
190
223
|
escape_UTF8_char_basic(search);
|
191
224
|
}
|
192
225
|
}
|
193
226
|
|
194
|
-
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
227
|
+
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
228
|
+
{
|
195
229
|
const unsigned char ch = (unsigned char)*search->ptr;
|
196
230
|
switch (ch_len) {
|
197
231
|
case 1: {
|
@@ -227,6 +261,280 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
227
261
|
search->cursor = (search->ptr += ch_len);
|
228
262
|
}
|
229
263
|
|
264
|
+
#ifdef HAVE_SIMD
|
265
|
+
|
266
|
+
static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
|
267
|
+
{
|
268
|
+
// Flush the buffer so everything up until the last 'len' characters are unflushed.
|
269
|
+
search_flush(search);
|
270
|
+
|
271
|
+
FBuffer *buf = search->buffer;
|
272
|
+
fbuffer_inc_capa(buf, vec_len);
|
273
|
+
|
274
|
+
char *s = (buf->ptr + buf->len);
|
275
|
+
|
276
|
+
// Pad the buffer with dummy characters that won't need escaping.
|
277
|
+
// This seem wateful at first sight, but memset of vector length is very fast.
|
278
|
+
memset(s, 'X', vec_len);
|
279
|
+
|
280
|
+
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
|
281
|
+
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
|
282
|
+
MEMCPY(s, search->ptr, char, len);
|
283
|
+
|
284
|
+
return s;
|
285
|
+
}
|
286
|
+
|
287
|
+
#ifdef HAVE_SIMD_NEON
|
288
|
+
|
289
|
+
static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
290
|
+
{
|
291
|
+
uint64_t mask = search->matches_mask;
|
292
|
+
uint32_t index = trailing_zeros64(mask) >> 2;
|
293
|
+
|
294
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
295
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
296
|
+
// search->chunk_base + index >= search->ptr
|
297
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
298
|
+
// is one byte after the previous match then:
|
299
|
+
// search->chunk_base + index == search->ptr
|
300
|
+
search->ptr = search->chunk_base + index;
|
301
|
+
mask &= mask - 1;
|
302
|
+
search->matches_mask = mask;
|
303
|
+
search_flush(search);
|
304
|
+
return 1;
|
305
|
+
}
|
306
|
+
|
307
|
+
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
308
|
+
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
309
|
+
{
|
310
|
+
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
311
|
+
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
312
|
+
return mask & 0x8888888888888888ull;
|
313
|
+
}
|
314
|
+
|
315
|
+
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
|
316
|
+
{
|
317
|
+
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
318
|
+
|
319
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
320
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
321
|
+
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
322
|
+
|
323
|
+
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
324
|
+
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
325
|
+
|
326
|
+
return neon_match_mask(needs_escape);
|
327
|
+
}
|
328
|
+
|
329
|
+
static inline unsigned char search_escape_basic_neon(search_state *search)
|
330
|
+
{
|
331
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
332
|
+
// There are more matches if search->matches_mask > 0.
|
333
|
+
if (search->matches_mask > 0) {
|
334
|
+
return neon_next_match(search);
|
335
|
+
} else {
|
336
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
337
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
338
|
+
search->has_matches = false;
|
339
|
+
search->ptr = search->chunk_end;
|
340
|
+
}
|
341
|
+
}
|
342
|
+
|
343
|
+
/*
|
344
|
+
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
345
|
+
* need to be escaped.
|
346
|
+
*
|
347
|
+
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
348
|
+
*
|
349
|
+
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
350
|
+
* the vector insructions may work on larger vectors.
|
351
|
+
*
|
352
|
+
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
353
|
+
*
|
354
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
355
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
356
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
357
|
+
*
|
358
|
+
* Next we load the first chunk of the ptr:
|
359
|
+
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
360
|
+
*
|
361
|
+
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
362
|
+
* as no bytes are less than 32 (0x20):
|
363
|
+
* [0 0 0 0 0 0 0 0]
|
364
|
+
*
|
365
|
+
* Next, we check if any byte in chunk is equal to a backslash:
|
366
|
+
* [0 0 0 FF 0 0 0 0]
|
367
|
+
*
|
368
|
+
* Finally we check if any byte in chunk is equal to a double quote:
|
369
|
+
* [FF 0 0 0 0 0 0 0]
|
370
|
+
*
|
371
|
+
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
372
|
+
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
373
|
+
* This is the needs_escape vector and it is equal to:
|
374
|
+
* [FF 0 0 FF 0 0 0 0]
|
375
|
+
*
|
376
|
+
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
377
|
+
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
378
|
+
*
|
379
|
+
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
380
|
+
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
381
|
+
* have at least one byte that needs to be escaped.
|
382
|
+
*/
|
383
|
+
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
|
384
|
+
uint64_t mask = neon_rules_update(search->ptr);
|
385
|
+
|
386
|
+
if (!mask) {
|
387
|
+
search->ptr += sizeof(uint8x16_t);
|
388
|
+
continue;
|
389
|
+
}
|
390
|
+
search->matches_mask = mask;
|
391
|
+
search->has_matches = true;
|
392
|
+
search->chunk_base = search->ptr;
|
393
|
+
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
394
|
+
return neon_next_match(search);
|
395
|
+
}
|
396
|
+
|
397
|
+
// There are fewer than 16 bytes left.
|
398
|
+
unsigned long remaining = (search->end - search->ptr);
|
399
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
400
|
+
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
401
|
+
|
402
|
+
uint64_t mask = neon_rules_update(s);
|
403
|
+
|
404
|
+
if (!mask) {
|
405
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
406
|
+
// search->cursor to search->ptr.
|
407
|
+
fbuffer_consumed(search->buffer, remaining);
|
408
|
+
search->ptr = search->end;
|
409
|
+
search->cursor = search->end;
|
410
|
+
return 0;
|
411
|
+
}
|
412
|
+
|
413
|
+
search->matches_mask = mask;
|
414
|
+
search->has_matches = true;
|
415
|
+
search->chunk_end = search->end;
|
416
|
+
search->chunk_base = search->ptr;
|
417
|
+
return neon_next_match(search);
|
418
|
+
}
|
419
|
+
|
420
|
+
if (search->ptr < search->end) {
|
421
|
+
return search_escape_basic(search);
|
422
|
+
}
|
423
|
+
|
424
|
+
search_flush(search);
|
425
|
+
return 0;
|
426
|
+
}
|
427
|
+
#endif /* HAVE_SIMD_NEON */
|
428
|
+
|
429
|
+
#ifdef HAVE_SIMD_SSE2
|
430
|
+
|
431
|
+
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
|
432
|
+
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
433
|
+
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
434
|
+
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
435
|
+
|
436
|
+
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
437
|
+
{
|
438
|
+
int mask = search->matches_mask;
|
439
|
+
int index = trailing_zeros(mask);
|
440
|
+
|
441
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
442
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
443
|
+
// search->chunk_base + index >= search->ptr
|
444
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
445
|
+
// is one byte after the previous match then:
|
446
|
+
// search->chunk_base + index == search->ptr
|
447
|
+
search->ptr = search->chunk_base + index;
|
448
|
+
mask &= mask - 1;
|
449
|
+
search->matches_mask = mask;
|
450
|
+
search_flush(search);
|
451
|
+
return 1;
|
452
|
+
}
|
453
|
+
|
454
|
+
#if defined(__clang__) || defined(__GNUC__)
|
455
|
+
#define TARGET_SSE2 __attribute__((target("sse2")))
|
456
|
+
#else
|
457
|
+
#define TARGET_SSE2
|
458
|
+
#endif
|
459
|
+
|
460
|
+
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
|
461
|
+
{
|
462
|
+
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
463
|
+
|
464
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
465
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
466
|
+
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
467
|
+
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
468
|
+
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
469
|
+
return _mm_movemask_epi8(needs_escape);
|
470
|
+
}
|
471
|
+
|
472
|
+
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
473
|
+
{
|
474
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
475
|
+
// There are more matches if search->matches_mask > 0.
|
476
|
+
if (search->matches_mask > 0) {
|
477
|
+
return sse2_next_match(search);
|
478
|
+
} else {
|
479
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
480
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
481
|
+
search->has_matches = false;
|
482
|
+
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
483
|
+
search->ptr = search->end;
|
484
|
+
} else {
|
485
|
+
search->ptr = search->chunk_base + sizeof(__m128i);
|
486
|
+
}
|
487
|
+
}
|
488
|
+
}
|
489
|
+
|
490
|
+
while (search->ptr + sizeof(__m128i) <= search->end) {
|
491
|
+
int needs_escape_mask = sse2_update(search->ptr);
|
492
|
+
|
493
|
+
if (needs_escape_mask == 0) {
|
494
|
+
search->ptr += sizeof(__m128i);
|
495
|
+
continue;
|
496
|
+
}
|
497
|
+
|
498
|
+
search->has_matches = true;
|
499
|
+
search->matches_mask = needs_escape_mask;
|
500
|
+
search->chunk_base = search->ptr;
|
501
|
+
return sse2_next_match(search);
|
502
|
+
}
|
503
|
+
|
504
|
+
// There are fewer than 16 bytes left.
|
505
|
+
unsigned long remaining = (search->end - search->ptr);
|
506
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
507
|
+
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
508
|
+
|
509
|
+
int needs_escape_mask = sse2_update(s);
|
510
|
+
|
511
|
+
if (needs_escape_mask == 0) {
|
512
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
513
|
+
// search->cursor to search->ptr.
|
514
|
+
fbuffer_consumed(search->buffer, remaining);
|
515
|
+
search->ptr = search->end;
|
516
|
+
search->cursor = search->end;
|
517
|
+
return 0;
|
518
|
+
}
|
519
|
+
|
520
|
+
search->has_matches = true;
|
521
|
+
search->matches_mask = needs_escape_mask;
|
522
|
+
search->chunk_base = search->ptr;
|
523
|
+
return sse2_next_match(search);
|
524
|
+
}
|
525
|
+
|
526
|
+
if (search->ptr < search->end) {
|
527
|
+
return search_escape_basic(search);
|
528
|
+
}
|
529
|
+
|
530
|
+
search_flush(search);
|
531
|
+
return 0;
|
532
|
+
}
|
533
|
+
|
534
|
+
#endif /* HAVE_SIMD_SSE2 */
|
535
|
+
|
536
|
+
#endif /* HAVE_SIMD */
|
537
|
+
|
230
538
|
static const unsigned char script_safe_escape_table[256] = {
|
231
539
|
// ASCII Control Characters
|
232
540
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
@@ -789,6 +1097,21 @@ struct hash_foreach_arg {
|
|
789
1097
|
int iter;
|
790
1098
|
};
|
791
1099
|
|
1100
|
+
static VALUE
|
1101
|
+
convert_string_subclass(VALUE key)
|
1102
|
+
{
|
1103
|
+
VALUE key_to_s = rb_funcall(key, i_to_s, 0);
|
1104
|
+
|
1105
|
+
if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
|
1106
|
+
VALUE cname = rb_obj_class(key);
|
1107
|
+
rb_raise(rb_eTypeError,
|
1108
|
+
"can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
|
1109
|
+
cname, "String", cname, "to_s", rb_obj_class(key_to_s));
|
1110
|
+
}
|
1111
|
+
|
1112
|
+
return key_to_s;
|
1113
|
+
}
|
1114
|
+
|
792
1115
|
static int
|
793
1116
|
json_object_i(VALUE key, VALUE val, VALUE _arg)
|
794
1117
|
{
|
@@ -817,7 +1140,7 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
817
1140
|
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
818
1141
|
key_to_s = key;
|
819
1142
|
} else {
|
820
|
-
key_to_s =
|
1143
|
+
key_to_s = convert_string_subclass(key);
|
821
1144
|
}
|
822
1145
|
break;
|
823
1146
|
case T_SYMBOL:
|
@@ -975,6 +1298,12 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
|
|
975
1298
|
search.cursor = search.ptr;
|
976
1299
|
search.end = search.ptr + len;
|
977
1300
|
|
1301
|
+
#ifdef HAVE_SIMD
|
1302
|
+
search.matches_mask = 0;
|
1303
|
+
search.has_matches = false;
|
1304
|
+
search.chunk_base = NULL;
|
1305
|
+
#endif /* HAVE_SIMD */
|
1306
|
+
|
978
1307
|
switch(rb_enc_str_coderange(obj)) {
|
979
1308
|
case ENC_CODERANGE_7BIT:
|
980
1309
|
case ENC_CODERANGE_VALID:
|
@@ -1077,17 +1406,16 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
|
|
1077
1406
|
}
|
1078
1407
|
|
1079
1408
|
/* This implementation writes directly into the buffer. We reserve
|
1080
|
-
* the
|
1081
|
-
* 2 more characters for the potential ".0" suffix.
|
1409
|
+
* the 28 characters that fpconv_dtoa states as its maximum.
|
1082
1410
|
*/
|
1083
|
-
fbuffer_inc_capa(buffer,
|
1411
|
+
fbuffer_inc_capa(buffer, 28);
|
1084
1412
|
char* d = buffer->ptr + buffer->len;
|
1085
1413
|
int len = fpconv_dtoa(value, d);
|
1086
1414
|
|
1087
1415
|
/* fpconv_dtoa converts a float to its shortest string representation,
|
1088
1416
|
* but it adds a ".0" if this is a plain integer.
|
1089
1417
|
*/
|
1090
|
-
buffer
|
1418
|
+
fbuffer_consumed(buffer, len);
|
1091
1419
|
}
|
1092
1420
|
|
1093
1421
|
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
@@ -1838,4 +2166,23 @@ void Init_generator(void)
|
|
1838
2166
|
binary_encindex = rb_ascii8bit_encindex();
|
1839
2167
|
|
1840
2168
|
rb_require("json/ext/generator/state");
|
2169
|
+
|
2170
|
+
|
2171
|
+
switch(find_simd_implementation()) {
|
2172
|
+
#ifdef HAVE_SIMD
|
2173
|
+
#ifdef HAVE_SIMD_NEON
|
2174
|
+
case SIMD_NEON:
|
2175
|
+
search_escape_basic_impl = search_escape_basic_neon;
|
2176
|
+
break;
|
2177
|
+
#endif /* HAVE_SIMD_NEON */
|
2178
|
+
#ifdef HAVE_SIMD_SSE2
|
2179
|
+
case SIMD_SSE2:
|
2180
|
+
search_escape_basic_impl = search_escape_basic_sse2;
|
2181
|
+
break;
|
2182
|
+
#endif /* HAVE_SIMD_SSE2 */
|
2183
|
+
#endif /* HAVE_SIMD */
|
2184
|
+
default:
|
2185
|
+
search_escape_basic_impl = search_escape_basic;
|
2186
|
+
break;
|
2187
|
+
}
|
1841
2188
|
}
|