json 2.11.2 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +13 -0
- data/ext/json/ext/generator/extconf.rb +29 -0
- data/ext/json/ext/generator/generator.c +446 -97
- data/ext/json/ext/generator/simd.h +112 -0
- data/ext/json/ext/parser/parser.c +131 -92
- data/ext/json/ext/vendor/fpconv.c +5 -5
- data/lib/json/common.rb +3 -1
- data/lib/json/version.rb +1 -1
- metadata +3 -2
@@ -5,6 +5,8 @@
|
|
5
5
|
#include <math.h>
|
6
6
|
#include <ctype.h>
|
7
7
|
|
8
|
+
#include "simd.h"
|
9
|
+
|
8
10
|
/* ruby api and some helpers */
|
9
11
|
|
10
12
|
typedef struct JSON_Generator_StateStruct {
|
@@ -45,7 +47,7 @@ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_n
|
|
45
47
|
|
46
48
|
struct generate_json_data;
|
47
49
|
|
48
|
-
typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data,
|
50
|
+
typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
49
51
|
|
50
52
|
struct generate_json_data {
|
51
53
|
FBuffer *buffer;
|
@@ -57,20 +59,20 @@ struct generate_json_data {
|
|
57
59
|
|
58
60
|
static VALUE cState_from_state_s(VALUE self, VALUE opts);
|
59
61
|
static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
|
60
|
-
static void generate_json(FBuffer *buffer, struct generate_json_data *data,
|
61
|
-
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data,
|
62
|
-
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data,
|
63
|
-
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data,
|
64
|
-
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data,
|
65
|
-
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data,
|
66
|
-
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data,
|
62
|
+
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
63
|
+
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
64
|
+
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
65
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
66
|
+
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
67
|
+
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
68
|
+
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
67
69
|
#ifdef RUBY_INTEGER_UNIFICATION
|
68
|
-
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data,
|
70
|
+
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
69
71
|
#endif
|
70
|
-
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data,
|
71
|
-
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data,
|
72
|
-
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data,
|
73
|
-
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data,
|
72
|
+
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
73
|
+
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
74
|
+
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
75
|
+
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
74
76
|
|
75
77
|
static int usascii_encindex, utf8_encindex, binary_encindex;
|
76
78
|
|
@@ -109,12 +111,40 @@ typedef struct _search_state {
|
|
109
111
|
const char *end;
|
110
112
|
const char *cursor;
|
111
113
|
FBuffer *buffer;
|
114
|
+
|
115
|
+
#ifdef HAVE_SIMD
|
116
|
+
const char *chunk_base;
|
117
|
+
const char *chunk_end;
|
118
|
+
bool has_matches;
|
119
|
+
|
120
|
+
#if defined(HAVE_SIMD_NEON)
|
121
|
+
uint64_t matches_mask;
|
122
|
+
#elif defined(HAVE_SIMD_SSE2)
|
123
|
+
int matches_mask;
|
124
|
+
#else
|
125
|
+
#error "Unknown SIMD Implementation."
|
126
|
+
#endif /* HAVE_SIMD_NEON */
|
127
|
+
#endif /* HAVE_SIMD */
|
112
128
|
} search_state;
|
113
129
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
130
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
131
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
132
|
+
#else
|
133
|
+
#define FORCE_INLINE
|
134
|
+
#endif
|
135
|
+
|
136
|
+
static inline FORCE_INLINE void search_flush(search_state *search)
|
137
|
+
{
|
138
|
+
// Do not remove this conditional without profiling, specifically escape-heavy text.
|
139
|
+
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
|
140
|
+
// For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
|
141
|
+
// will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
|
142
|
+
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
|
143
|
+
// nothing needs to be flushed, we can save a few memory references with this conditional.
|
144
|
+
if (search->ptr > search->cursor) {
|
145
|
+
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
|
146
|
+
search->cursor = search->ptr;
|
147
|
+
}
|
118
148
|
}
|
119
149
|
|
120
150
|
static const unsigned char escape_table_basic[256] = {
|
@@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
|
|
130
160
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
131
161
|
};
|
132
162
|
|
163
|
+
static unsigned char (*search_escape_basic_impl)(search_state *);
|
164
|
+
|
133
165
|
static inline unsigned char search_escape_basic(search_state *search)
|
134
166
|
{
|
135
167
|
while (search->ptr < search->end) {
|
@@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
|
|
144
176
|
return 0;
|
145
177
|
}
|
146
178
|
|
147
|
-
static inline void escape_UTF8_char_basic(search_state *search)
|
179
|
+
static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
|
180
|
+
{
|
148
181
|
const unsigned char ch = (unsigned char)*search->ptr;
|
149
182
|
switch (ch) {
|
150
183
|
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
@@ -186,12 +219,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
|
|
186
219
|
*/
|
187
220
|
static inline void convert_UTF8_to_JSON(search_state *search)
|
188
221
|
{
|
189
|
-
while (
|
222
|
+
while (search_escape_basic_impl(search)) {
|
190
223
|
escape_UTF8_char_basic(search);
|
191
224
|
}
|
192
225
|
}
|
193
226
|
|
194
|
-
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
227
|
+
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
228
|
+
{
|
195
229
|
const unsigned char ch = (unsigned char)*search->ptr;
|
196
230
|
switch (ch_len) {
|
197
231
|
case 1: {
|
@@ -227,6 +261,280 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
227
261
|
search->cursor = (search->ptr += ch_len);
|
228
262
|
}
|
229
263
|
|
264
|
+
#ifdef HAVE_SIMD
|
265
|
+
|
266
|
+
static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
|
267
|
+
{
|
268
|
+
// Flush the buffer so everything up until the last 'len' characters are unflushed.
|
269
|
+
search_flush(search);
|
270
|
+
|
271
|
+
FBuffer *buf = search->buffer;
|
272
|
+
fbuffer_inc_capa(buf, vec_len);
|
273
|
+
|
274
|
+
char *s = (buf->ptr + buf->len);
|
275
|
+
|
276
|
+
// Pad the buffer with dummy characters that won't need escaping.
|
277
|
+
// This seem wateful at first sight, but memset of vector length is very fast.
|
278
|
+
memset(s, 'X', vec_len);
|
279
|
+
|
280
|
+
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
|
281
|
+
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
|
282
|
+
MEMCPY(s, search->ptr, char, len);
|
283
|
+
|
284
|
+
return s;
|
285
|
+
}
|
286
|
+
|
287
|
+
#ifdef HAVE_SIMD_NEON
|
288
|
+
|
289
|
+
static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
290
|
+
{
|
291
|
+
uint64_t mask = search->matches_mask;
|
292
|
+
uint32_t index = trailing_zeros64(mask) >> 2;
|
293
|
+
|
294
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
295
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
296
|
+
// search->chunk_base + index >= search->ptr
|
297
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
298
|
+
// is one byte after the previous match then:
|
299
|
+
// search->chunk_base + index == search->ptr
|
300
|
+
search->ptr = search->chunk_base + index;
|
301
|
+
mask &= mask - 1;
|
302
|
+
search->matches_mask = mask;
|
303
|
+
search_flush(search);
|
304
|
+
return 1;
|
305
|
+
}
|
306
|
+
|
307
|
+
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
308
|
+
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
309
|
+
{
|
310
|
+
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
311
|
+
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
312
|
+
return mask & 0x8888888888888888ull;
|
313
|
+
}
|
314
|
+
|
315
|
+
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
|
316
|
+
{
|
317
|
+
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
318
|
+
|
319
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
320
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
321
|
+
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
322
|
+
|
323
|
+
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
324
|
+
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
325
|
+
|
326
|
+
return neon_match_mask(needs_escape);
|
327
|
+
}
|
328
|
+
|
329
|
+
static inline unsigned char search_escape_basic_neon(search_state *search)
|
330
|
+
{
|
331
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
332
|
+
// There are more matches if search->matches_mask > 0.
|
333
|
+
if (search->matches_mask > 0) {
|
334
|
+
return neon_next_match(search);
|
335
|
+
} else {
|
336
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
337
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
338
|
+
search->has_matches = false;
|
339
|
+
search->ptr = search->chunk_end;
|
340
|
+
}
|
341
|
+
}
|
342
|
+
|
343
|
+
/*
|
344
|
+
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
345
|
+
* need to be escaped.
|
346
|
+
*
|
347
|
+
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
348
|
+
*
|
349
|
+
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
350
|
+
* the vector insructions may work on larger vectors.
|
351
|
+
*
|
352
|
+
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
353
|
+
*
|
354
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
355
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
356
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
357
|
+
*
|
358
|
+
* Next we load the first chunk of the ptr:
|
359
|
+
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
360
|
+
*
|
361
|
+
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
362
|
+
* as no bytes are less than 32 (0x20):
|
363
|
+
* [0 0 0 0 0 0 0 0]
|
364
|
+
*
|
365
|
+
* Next, we check if any byte in chunk is equal to a backslash:
|
366
|
+
* [0 0 0 FF 0 0 0 0]
|
367
|
+
*
|
368
|
+
* Finally we check if any byte in chunk is equal to a double quote:
|
369
|
+
* [FF 0 0 0 0 0 0 0]
|
370
|
+
*
|
371
|
+
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
372
|
+
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
373
|
+
* This is the needs_escape vector and it is equal to:
|
374
|
+
* [FF 0 0 FF 0 0 0 0]
|
375
|
+
*
|
376
|
+
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
377
|
+
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
378
|
+
*
|
379
|
+
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
380
|
+
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
381
|
+
* have at least one byte that needs to be escaped.
|
382
|
+
*/
|
383
|
+
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
|
384
|
+
uint64_t mask = neon_rules_update(search->ptr);
|
385
|
+
|
386
|
+
if (!mask) {
|
387
|
+
search->ptr += sizeof(uint8x16_t);
|
388
|
+
continue;
|
389
|
+
}
|
390
|
+
search->matches_mask = mask;
|
391
|
+
search->has_matches = true;
|
392
|
+
search->chunk_base = search->ptr;
|
393
|
+
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
394
|
+
return neon_next_match(search);
|
395
|
+
}
|
396
|
+
|
397
|
+
// There are fewer than 16 bytes left.
|
398
|
+
unsigned long remaining = (search->end - search->ptr);
|
399
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
400
|
+
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
401
|
+
|
402
|
+
uint64_t mask = neon_rules_update(s);
|
403
|
+
|
404
|
+
if (!mask) {
|
405
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
406
|
+
// search->cursor to search->ptr.
|
407
|
+
search->buffer->len += remaining;
|
408
|
+
search->ptr = search->end;
|
409
|
+
search->cursor = search->end;
|
410
|
+
return 0;
|
411
|
+
}
|
412
|
+
|
413
|
+
search->matches_mask = mask;
|
414
|
+
search->has_matches = true;
|
415
|
+
search->chunk_end = search->end;
|
416
|
+
search->chunk_base = search->ptr;
|
417
|
+
return neon_next_match(search);
|
418
|
+
}
|
419
|
+
|
420
|
+
if (search->ptr < search->end) {
|
421
|
+
return search_escape_basic(search);
|
422
|
+
}
|
423
|
+
|
424
|
+
search_flush(search);
|
425
|
+
return 0;
|
426
|
+
}
|
427
|
+
#endif /* HAVE_SIMD_NEON */
|
428
|
+
|
429
|
+
#ifdef HAVE_SIMD_SSE2
|
430
|
+
|
431
|
+
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
|
432
|
+
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
433
|
+
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
434
|
+
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
435
|
+
|
436
|
+
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
437
|
+
{
|
438
|
+
int mask = search->matches_mask;
|
439
|
+
int index = trailing_zeros(mask);
|
440
|
+
|
441
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
442
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
443
|
+
// search->chunk_base + index >= search->ptr
|
444
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
445
|
+
// is one byte after the previous match then:
|
446
|
+
// search->chunk_base + index == search->ptr
|
447
|
+
search->ptr = search->chunk_base + index;
|
448
|
+
mask &= mask - 1;
|
449
|
+
search->matches_mask = mask;
|
450
|
+
search_flush(search);
|
451
|
+
return 1;
|
452
|
+
}
|
453
|
+
|
454
|
+
#if defined(__clang__) || defined(__GNUC__)
|
455
|
+
#define TARGET_SSE2 __attribute__((target("sse2")))
|
456
|
+
#else
|
457
|
+
#define TARGET_SSE2
|
458
|
+
#endif
|
459
|
+
|
460
|
+
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
|
461
|
+
{
|
462
|
+
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
463
|
+
|
464
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
465
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
466
|
+
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
467
|
+
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
468
|
+
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
469
|
+
return _mm_movemask_epi8(needs_escape);
|
470
|
+
}
|
471
|
+
|
472
|
+
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
473
|
+
{
|
474
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
475
|
+
// There are more matches if search->matches_mask > 0.
|
476
|
+
if (search->matches_mask > 0) {
|
477
|
+
return sse2_next_match(search);
|
478
|
+
} else {
|
479
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
480
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
481
|
+
search->has_matches = false;
|
482
|
+
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
483
|
+
search->ptr = search->end;
|
484
|
+
} else {
|
485
|
+
search->ptr = search->chunk_base + sizeof(__m128i);
|
486
|
+
}
|
487
|
+
}
|
488
|
+
}
|
489
|
+
|
490
|
+
while (search->ptr + sizeof(__m128i) <= search->end) {
|
491
|
+
int needs_escape_mask = sse2_update(search->ptr);
|
492
|
+
|
493
|
+
if (needs_escape_mask == 0) {
|
494
|
+
search->ptr += sizeof(__m128i);
|
495
|
+
continue;
|
496
|
+
}
|
497
|
+
|
498
|
+
search->has_matches = true;
|
499
|
+
search->matches_mask = needs_escape_mask;
|
500
|
+
search->chunk_base = search->ptr;
|
501
|
+
return sse2_next_match(search);
|
502
|
+
}
|
503
|
+
|
504
|
+
// There are fewer than 16 bytes left.
|
505
|
+
unsigned long remaining = (search->end - search->ptr);
|
506
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
507
|
+
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
508
|
+
|
509
|
+
int needs_escape_mask = sse2_update(s);
|
510
|
+
|
511
|
+
if (needs_escape_mask == 0) {
|
512
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
513
|
+
// search->cursor to search->ptr.
|
514
|
+
search->buffer->len += remaining;
|
515
|
+
search->ptr = search->end;
|
516
|
+
search->cursor = search->end;
|
517
|
+
return 0;
|
518
|
+
}
|
519
|
+
|
520
|
+
search->has_matches = true;
|
521
|
+
search->matches_mask = needs_escape_mask;
|
522
|
+
search->chunk_base = search->ptr;
|
523
|
+
return sse2_next_match(search);
|
524
|
+
}
|
525
|
+
|
526
|
+
if (search->ptr < search->end) {
|
527
|
+
return search_escape_basic(search);
|
528
|
+
}
|
529
|
+
|
530
|
+
search_flush(search);
|
531
|
+
return 0;
|
532
|
+
}
|
533
|
+
|
534
|
+
#endif /* HAVE_SIMD_SSE2 */
|
535
|
+
|
536
|
+
#endif /* HAVE_SIMD */
|
537
|
+
|
230
538
|
static const unsigned char script_safe_escape_table[256] = {
|
231
539
|
// ASCII Control Characters
|
232
540
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
@@ -789,6 +1097,21 @@ struct hash_foreach_arg {
|
|
789
1097
|
int iter;
|
790
1098
|
};
|
791
1099
|
|
1100
|
+
static VALUE
|
1101
|
+
convert_string_subclass(VALUE key)
|
1102
|
+
{
|
1103
|
+
VALUE key_to_s = rb_funcall(key, i_to_s, 0);
|
1104
|
+
|
1105
|
+
if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
|
1106
|
+
VALUE cname = rb_obj_class(key);
|
1107
|
+
rb_raise(rb_eTypeError,
|
1108
|
+
"can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
|
1109
|
+
cname, "String", cname, "to_s", rb_obj_class(key_to_s));
|
1110
|
+
}
|
1111
|
+
|
1112
|
+
return key_to_s;
|
1113
|
+
}
|
1114
|
+
|
792
1115
|
static int
|
793
1116
|
json_object_i(VALUE key, VALUE val, VALUE _arg)
|
794
1117
|
{
|
@@ -802,12 +1125,12 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
802
1125
|
int j;
|
803
1126
|
|
804
1127
|
if (arg->iter > 0) fbuffer_append_char(buffer, ',');
|
805
|
-
if (RB_UNLIKELY(state->object_nl)) {
|
806
|
-
fbuffer_append_str(buffer, state->object_nl);
|
1128
|
+
if (RB_UNLIKELY(data->state->object_nl)) {
|
1129
|
+
fbuffer_append_str(buffer, data->state->object_nl);
|
807
1130
|
}
|
808
|
-
if (RB_UNLIKELY(state->indent)) {
|
1131
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
809
1132
|
for (j = 0; j < depth; j++) {
|
810
|
-
fbuffer_append_str(buffer, state->indent);
|
1133
|
+
fbuffer_append_str(buffer, data->state->indent);
|
811
1134
|
}
|
812
1135
|
}
|
813
1136
|
|
@@ -817,7 +1140,7 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
817
1140
|
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
818
1141
|
key_to_s = key;
|
819
1142
|
} else {
|
820
|
-
key_to_s =
|
1143
|
+
key_to_s = convert_string_subclass(key);
|
821
1144
|
}
|
822
1145
|
break;
|
823
1146
|
case T_SYMBOL:
|
@@ -829,21 +1152,22 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
829
1152
|
}
|
830
1153
|
|
831
1154
|
if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
|
832
|
-
generate_json_string(buffer, data,
|
1155
|
+
generate_json_string(buffer, data, key_to_s);
|
833
1156
|
} else {
|
834
|
-
generate_json(buffer, data,
|
1157
|
+
generate_json(buffer, data, key_to_s);
|
835
1158
|
}
|
836
|
-
if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before);
|
1159
|
+
if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before);
|
837
1160
|
fbuffer_append_char(buffer, ':');
|
838
|
-
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space);
|
839
|
-
generate_json(buffer, data,
|
1161
|
+
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
|
1162
|
+
generate_json(buffer, data, val);
|
840
1163
|
|
841
1164
|
arg->iter++;
|
842
1165
|
return ST_CONTINUE;
|
843
1166
|
}
|
844
1167
|
|
845
|
-
static inline long increase_depth(
|
1168
|
+
static inline long increase_depth(struct generate_json_data *data)
|
846
1169
|
{
|
1170
|
+
JSON_Generator_State *state = data->state;
|
847
1171
|
long depth = ++state->depth;
|
848
1172
|
if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
|
849
1173
|
rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
|
@@ -851,14 +1175,14 @@ static inline long increase_depth(JSON_Generator_State *state)
|
|
851
1175
|
return depth;
|
852
1176
|
}
|
853
1177
|
|
854
|
-
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data,
|
1178
|
+
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
855
1179
|
{
|
856
1180
|
int j;
|
857
|
-
long depth = increase_depth(
|
1181
|
+
long depth = increase_depth(data);
|
858
1182
|
|
859
1183
|
if (RHASH_SIZE(obj) == 0) {
|
860
1184
|
fbuffer_append(buffer, "{}", 2);
|
861
|
-
--state->depth;
|
1185
|
+
--data->state->depth;
|
862
1186
|
return;
|
863
1187
|
}
|
864
1188
|
|
@@ -870,49 +1194,49 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
|
|
870
1194
|
};
|
871
1195
|
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
|
872
1196
|
|
873
|
-
depth = --state->depth;
|
874
|
-
if (RB_UNLIKELY(state->object_nl)) {
|
875
|
-
fbuffer_append_str(buffer, state->object_nl);
|
876
|
-
if (RB_UNLIKELY(state->indent)) {
|
1197
|
+
depth = --data->state->depth;
|
1198
|
+
if (RB_UNLIKELY(data->state->object_nl)) {
|
1199
|
+
fbuffer_append_str(buffer, data->state->object_nl);
|
1200
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
877
1201
|
for (j = 0; j < depth; j++) {
|
878
|
-
fbuffer_append_str(buffer, state->indent);
|
1202
|
+
fbuffer_append_str(buffer, data->state->indent);
|
879
1203
|
}
|
880
1204
|
}
|
881
1205
|
}
|
882
1206
|
fbuffer_append_char(buffer, '}');
|
883
1207
|
}
|
884
1208
|
|
885
|
-
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data,
|
1209
|
+
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
886
1210
|
{
|
887
1211
|
int i, j;
|
888
|
-
long depth = increase_depth(
|
1212
|
+
long depth = increase_depth(data);
|
889
1213
|
|
890
1214
|
if (RARRAY_LEN(obj) == 0) {
|
891
1215
|
fbuffer_append(buffer, "[]", 2);
|
892
|
-
--state->depth;
|
1216
|
+
--data->state->depth;
|
893
1217
|
return;
|
894
1218
|
}
|
895
1219
|
|
896
1220
|
fbuffer_append_char(buffer, '[');
|
897
|
-
if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl);
|
1221
|
+
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
898
1222
|
for(i = 0; i < RARRAY_LEN(obj); i++) {
|
899
1223
|
if (i > 0) {
|
900
1224
|
fbuffer_append_char(buffer, ',');
|
901
|
-
if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl);
|
1225
|
+
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
902
1226
|
}
|
903
|
-
if (RB_UNLIKELY(state->indent)) {
|
1227
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
904
1228
|
for (j = 0; j < depth; j++) {
|
905
|
-
fbuffer_append_str(buffer, state->indent);
|
1229
|
+
fbuffer_append_str(buffer, data->state->indent);
|
906
1230
|
}
|
907
1231
|
}
|
908
|
-
generate_json(buffer, data,
|
1232
|
+
generate_json(buffer, data, RARRAY_AREF(obj, i));
|
909
1233
|
}
|
910
|
-
state->depth = --depth;
|
911
|
-
if (RB_UNLIKELY(state->array_nl)) {
|
912
|
-
fbuffer_append_str(buffer, state->array_nl);
|
913
|
-
if (RB_UNLIKELY(state->indent)) {
|
1234
|
+
data->state->depth = --depth;
|
1235
|
+
if (RB_UNLIKELY(data->state->array_nl)) {
|
1236
|
+
fbuffer_append_str(buffer, data->state->array_nl);
|
1237
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
914
1238
|
for (j = 0; j < depth; j++) {
|
915
|
-
fbuffer_append_str(buffer, state->indent);
|
1239
|
+
fbuffer_append_str(buffer, data->state->indent);
|
916
1240
|
}
|
917
1241
|
}
|
918
1242
|
}
|
@@ -961,7 +1285,7 @@ static inline VALUE ensure_valid_encoding(VALUE str)
|
|
961
1285
|
return str;
|
962
1286
|
}
|
963
1287
|
|
964
|
-
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data,
|
1288
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
965
1289
|
{
|
966
1290
|
obj = ensure_valid_encoding(obj);
|
967
1291
|
|
@@ -974,12 +1298,18 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
|
|
974
1298
|
search.cursor = search.ptr;
|
975
1299
|
search.end = search.ptr + len;
|
976
1300
|
|
1301
|
+
#ifdef HAVE_SIMD
|
1302
|
+
search.matches_mask = 0;
|
1303
|
+
search.has_matches = false;
|
1304
|
+
search.chunk_base = NULL;
|
1305
|
+
#endif /* HAVE_SIMD */
|
1306
|
+
|
977
1307
|
switch(rb_enc_str_coderange(obj)) {
|
978
1308
|
case ENC_CODERANGE_7BIT:
|
979
1309
|
case ENC_CODERANGE_VALID:
|
980
|
-
if (RB_UNLIKELY(state->ascii_only)) {
|
981
|
-
convert_UTF8_to_ASCII_only_JSON(&search, state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
982
|
-
} else if (RB_UNLIKELY(state->script_safe)) {
|
1310
|
+
if (RB_UNLIKELY(data->state->ascii_only)) {
|
1311
|
+
convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
1312
|
+
} else if (RB_UNLIKELY(data->state->script_safe)) {
|
983
1313
|
convert_UTF8_to_script_safe_JSON(&search);
|
984
1314
|
} else {
|
985
1315
|
convert_UTF8_to_JSON(&search);
|
@@ -992,7 +1322,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
|
|
992
1322
|
fbuffer_append_char(buffer, '"');
|
993
1323
|
}
|
994
1324
|
|
995
|
-
static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data,
|
1325
|
+
static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
996
1326
|
{
|
997
1327
|
VALUE tmp;
|
998
1328
|
if (rb_respond_to(obj, i_to_json)) {
|
@@ -1002,68 +1332,68 @@ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *d
|
|
1002
1332
|
} else {
|
1003
1333
|
tmp = rb_funcall(obj, i_to_s, 0);
|
1004
1334
|
Check_Type(tmp, T_STRING);
|
1005
|
-
generate_json_string(buffer, data,
|
1335
|
+
generate_json_string(buffer, data, tmp);
|
1006
1336
|
}
|
1007
1337
|
}
|
1008
1338
|
|
1009
|
-
static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data,
|
1339
|
+
static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1010
1340
|
{
|
1011
|
-
if (state->strict) {
|
1012
|
-
generate_json_string(buffer, data,
|
1341
|
+
if (data->state->strict) {
|
1342
|
+
generate_json_string(buffer, data, rb_sym2str(obj));
|
1013
1343
|
} else {
|
1014
|
-
generate_json_fallback(buffer, data,
|
1344
|
+
generate_json_fallback(buffer, data, obj);
|
1015
1345
|
}
|
1016
1346
|
}
|
1017
1347
|
|
1018
|
-
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data,
|
1348
|
+
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1019
1349
|
{
|
1020
1350
|
fbuffer_append(buffer, "null", 4);
|
1021
1351
|
}
|
1022
1352
|
|
1023
|
-
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data,
|
1353
|
+
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1024
1354
|
{
|
1025
1355
|
fbuffer_append(buffer, "false", 5);
|
1026
1356
|
}
|
1027
1357
|
|
1028
|
-
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data,
|
1358
|
+
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1029
1359
|
{
|
1030
1360
|
fbuffer_append(buffer, "true", 4);
|
1031
1361
|
}
|
1032
1362
|
|
1033
|
-
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data,
|
1363
|
+
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1034
1364
|
{
|
1035
1365
|
fbuffer_append_long(buffer, FIX2LONG(obj));
|
1036
1366
|
}
|
1037
1367
|
|
1038
|
-
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data,
|
1368
|
+
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1039
1369
|
{
|
1040
1370
|
VALUE tmp = rb_funcall(obj, i_to_s, 0);
|
1041
1371
|
fbuffer_append_str(buffer, tmp);
|
1042
1372
|
}
|
1043
1373
|
|
1044
1374
|
#ifdef RUBY_INTEGER_UNIFICATION
|
1045
|
-
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data,
|
1375
|
+
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1046
1376
|
{
|
1047
1377
|
if (FIXNUM_P(obj))
|
1048
|
-
generate_json_fixnum(buffer, data,
|
1378
|
+
generate_json_fixnum(buffer, data, obj);
|
1049
1379
|
else
|
1050
|
-
generate_json_bignum(buffer, data,
|
1380
|
+
generate_json_bignum(buffer, data, obj);
|
1051
1381
|
}
|
1052
1382
|
#endif
|
1053
1383
|
|
1054
|
-
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data,
|
1384
|
+
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1055
1385
|
{
|
1056
1386
|
double value = RFLOAT_VALUE(obj);
|
1057
|
-
char allow_nan = state->allow_nan;
|
1387
|
+
char allow_nan = data->state->allow_nan;
|
1058
1388
|
if (isinf(value) || isnan(value)) {
|
1059
1389
|
/* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
|
1060
1390
|
if (!allow_nan) {
|
1061
|
-
if (state->strict && state->as_json) {
|
1062
|
-
VALUE casted_obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil);
|
1391
|
+
if (data->state->strict && data->state->as_json) {
|
1392
|
+
VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
|
1063
1393
|
if (casted_obj != obj) {
|
1064
|
-
increase_depth(
|
1065
|
-
generate_json(buffer, data,
|
1066
|
-
state->depth--;
|
1394
|
+
increase_depth(data);
|
1395
|
+
generate_json(buffer, data, casted_obj);
|
1396
|
+
data->state->depth--;
|
1067
1397
|
return;
|
1068
1398
|
}
|
1069
1399
|
}
|
@@ -1089,30 +1419,30 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
|
|
1089
1419
|
buffer->len += len;
|
1090
1420
|
}
|
1091
1421
|
|
1092
|
-
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data,
|
1422
|
+
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1093
1423
|
{
|
1094
1424
|
VALUE fragment = RSTRUCT_GET(obj, 0);
|
1095
1425
|
Check_Type(fragment, T_STRING);
|
1096
1426
|
fbuffer_append_str(buffer, fragment);
|
1097
1427
|
}
|
1098
1428
|
|
1099
|
-
static void generate_json(FBuffer *buffer, struct generate_json_data *data,
|
1429
|
+
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1100
1430
|
{
|
1101
1431
|
bool as_json_called = false;
|
1102
1432
|
start:
|
1103
1433
|
if (obj == Qnil) {
|
1104
|
-
generate_json_null(buffer, data,
|
1434
|
+
generate_json_null(buffer, data, obj);
|
1105
1435
|
} else if (obj == Qfalse) {
|
1106
|
-
generate_json_false(buffer, data,
|
1436
|
+
generate_json_false(buffer, data, obj);
|
1107
1437
|
} else if (obj == Qtrue) {
|
1108
|
-
generate_json_true(buffer, data,
|
1438
|
+
generate_json_true(buffer, data, obj);
|
1109
1439
|
} else if (RB_SPECIAL_CONST_P(obj)) {
|
1110
1440
|
if (RB_FIXNUM_P(obj)) {
|
1111
|
-
generate_json_fixnum(buffer, data,
|
1441
|
+
generate_json_fixnum(buffer, data, obj);
|
1112
1442
|
} else if (RB_FLONUM_P(obj)) {
|
1113
|
-
generate_json_float(buffer, data,
|
1443
|
+
generate_json_float(buffer, data, obj);
|
1114
1444
|
} else if (RB_STATIC_SYM_P(obj)) {
|
1115
|
-
generate_json_symbol(buffer, data,
|
1445
|
+
generate_json_symbol(buffer, data, obj);
|
1116
1446
|
} else {
|
1117
1447
|
goto general;
|
1118
1448
|
}
|
@@ -1120,43 +1450,43 @@ start:
|
|
1120
1450
|
VALUE klass = RBASIC_CLASS(obj);
|
1121
1451
|
switch (RB_BUILTIN_TYPE(obj)) {
|
1122
1452
|
case T_BIGNUM:
|
1123
|
-
generate_json_bignum(buffer, data,
|
1453
|
+
generate_json_bignum(buffer, data, obj);
|
1124
1454
|
break;
|
1125
1455
|
case T_HASH:
|
1126
1456
|
if (klass != rb_cHash) goto general;
|
1127
|
-
generate_json_object(buffer, data,
|
1457
|
+
generate_json_object(buffer, data, obj);
|
1128
1458
|
break;
|
1129
1459
|
case T_ARRAY:
|
1130
1460
|
if (klass != rb_cArray) goto general;
|
1131
|
-
generate_json_array(buffer, data,
|
1461
|
+
generate_json_array(buffer, data, obj);
|
1132
1462
|
break;
|
1133
1463
|
case T_STRING:
|
1134
1464
|
if (klass != rb_cString) goto general;
|
1135
|
-
generate_json_string(buffer, data,
|
1465
|
+
generate_json_string(buffer, data, obj);
|
1136
1466
|
break;
|
1137
1467
|
case T_SYMBOL:
|
1138
|
-
generate_json_symbol(buffer, data,
|
1468
|
+
generate_json_symbol(buffer, data, obj);
|
1139
1469
|
break;
|
1140
1470
|
case T_FLOAT:
|
1141
1471
|
if (klass != rb_cFloat) goto general;
|
1142
|
-
generate_json_float(buffer, data,
|
1472
|
+
generate_json_float(buffer, data, obj);
|
1143
1473
|
break;
|
1144
1474
|
case T_STRUCT:
|
1145
1475
|
if (klass != cFragment) goto general;
|
1146
|
-
generate_json_fragment(buffer, data,
|
1476
|
+
generate_json_fragment(buffer, data, obj);
|
1147
1477
|
break;
|
1148
1478
|
default:
|
1149
1479
|
general:
|
1150
|
-
if (state->strict) {
|
1151
|
-
if (RTEST(state->as_json) && !as_json_called) {
|
1152
|
-
obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil);
|
1480
|
+
if (data->state->strict) {
|
1481
|
+
if (RTEST(data->state->as_json) && !as_json_called) {
|
1482
|
+
obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
|
1153
1483
|
as_json_called = true;
|
1154
1484
|
goto start;
|
1155
1485
|
} else {
|
1156
1486
|
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
|
1157
1487
|
}
|
1158
1488
|
} else {
|
1159
|
-
generate_json_fallback(buffer, data,
|
1489
|
+
generate_json_fallback(buffer, data, obj);
|
1160
1490
|
}
|
1161
1491
|
}
|
1162
1492
|
}
|
@@ -1166,7 +1496,7 @@ static VALUE generate_json_try(VALUE d)
|
|
1166
1496
|
{
|
1167
1497
|
struct generate_json_data *data = (struct generate_json_data *)d;
|
1168
1498
|
|
1169
|
-
data->func(data->buffer, data, data->
|
1499
|
+
data->func(data->buffer, data, data->obj);
|
1170
1500
|
|
1171
1501
|
return Qnil;
|
1172
1502
|
}
|
@@ -1837,4 +2167,23 @@ void Init_generator(void)
|
|
1837
2167
|
binary_encindex = rb_ascii8bit_encindex();
|
1838
2168
|
|
1839
2169
|
rb_require("json/ext/generator/state");
|
2170
|
+
|
2171
|
+
|
2172
|
+
switch(find_simd_implementation()) {
|
2173
|
+
#ifdef HAVE_SIMD
|
2174
|
+
#ifdef HAVE_SIMD_NEON
|
2175
|
+
case SIMD_NEON:
|
2176
|
+
search_escape_basic_impl = search_escape_basic_neon;
|
2177
|
+
break;
|
2178
|
+
#endif /* HAVE_SIMD_NEON */
|
2179
|
+
#ifdef HAVE_SIMD_SSE2
|
2180
|
+
case SIMD_SSE2:
|
2181
|
+
search_escape_basic_impl = search_escape_basic_sse2;
|
2182
|
+
break;
|
2183
|
+
#endif /* HAVE_SIMD_SSE2 */
|
2184
|
+
#endif /* HAVE_SIMD */
|
2185
|
+
default:
|
2186
|
+
search_escape_basic_impl = search_escape_basic;
|
2187
|
+
break;
|
2188
|
+
}
|
1840
2189
|
}
|