json 2.12.2 → 2.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +95 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +35 -56
- data/ext/json/ext/generator/extconf.rb +2 -26
- data/ext/json/ext/generator/generator.c +395 -359
- data/ext/json/ext/json.h +101 -0
- data/ext/json/ext/parser/extconf.rb +7 -2
- data/ext/json/ext/parser/parser.c +701 -445
- data/ext/json/ext/simd/conf.rb +24 -0
- data/ext/json/ext/simd/simd.h +218 -0
- data/ext/json/ext/vendor/fpconv.c +13 -12
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/json.gemspec +2 -3
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +78 -40
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +113 -63
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +76 -0
- metadata +8 -4
- data/ext/json/ext/generator/simd.h +0 -112
|
@@ -1,14 +1,20 @@
|
|
|
1
|
-
#include "
|
|
1
|
+
#include "../json.h"
|
|
2
2
|
#include "../fbuffer/fbuffer.h"
|
|
3
3
|
#include "../vendor/fpconv.c"
|
|
4
4
|
|
|
5
5
|
#include <math.h>
|
|
6
6
|
#include <ctype.h>
|
|
7
7
|
|
|
8
|
-
#include "simd.h"
|
|
8
|
+
#include "../simd/simd.h"
|
|
9
9
|
|
|
10
10
|
/* ruby api and some helpers */
|
|
11
11
|
|
|
12
|
+
enum duplicate_key_action {
|
|
13
|
+
JSON_DEPRECATED = 0,
|
|
14
|
+
JSON_IGNORE,
|
|
15
|
+
JSON_RAISE,
|
|
16
|
+
};
|
|
17
|
+
|
|
12
18
|
typedef struct JSON_Generator_StateStruct {
|
|
13
19
|
VALUE indent;
|
|
14
20
|
VALUE space;
|
|
@@ -21,20 +27,19 @@ typedef struct JSON_Generator_StateStruct {
|
|
|
21
27
|
long depth;
|
|
22
28
|
long buffer_initial_length;
|
|
23
29
|
|
|
30
|
+
enum duplicate_key_action on_duplicate_key;
|
|
31
|
+
|
|
32
|
+
bool as_json_single_arg;
|
|
24
33
|
bool allow_nan;
|
|
25
34
|
bool ascii_only;
|
|
26
35
|
bool script_safe;
|
|
27
36
|
bool strict;
|
|
28
37
|
} JSON_Generator_State;
|
|
29
38
|
|
|
30
|
-
|
|
31
|
-
#define RB_UNLIKELY(cond) (cond)
|
|
32
|
-
#endif
|
|
33
|
-
|
|
34
|
-
static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
|
|
39
|
+
static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
|
|
35
40
|
|
|
36
|
-
static ID i_to_s, i_to_json, i_new,
|
|
37
|
-
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
|
|
41
|
+
static ID i_to_s, i_to_json, i_new, i_encode;
|
|
42
|
+
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
|
|
38
43
|
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
|
|
39
44
|
|
|
40
45
|
|
|
@@ -55,8 +60,11 @@ struct generate_json_data {
|
|
|
55
60
|
JSON_Generator_State *state;
|
|
56
61
|
VALUE obj;
|
|
57
62
|
generator_func func;
|
|
63
|
+
long depth;
|
|
58
64
|
};
|
|
59
65
|
|
|
66
|
+
static SIMD_Implementation simd_impl;
|
|
67
|
+
|
|
60
68
|
static VALUE cState_from_state_s(VALUE self, VALUE opts);
|
|
61
69
|
static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
|
|
62
70
|
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
@@ -76,23 +84,18 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d
|
|
|
76
84
|
|
|
77
85
|
static int usascii_encindex, utf8_encindex, binary_encindex;
|
|
78
86
|
|
|
79
|
-
|
|
80
|
-
RBIMPL_ATTR_NORETURN()
|
|
81
|
-
#endif
|
|
82
|
-
static void raise_generator_error_str(VALUE invalid_object, VALUE str)
|
|
87
|
+
NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
|
|
83
88
|
{
|
|
89
|
+
rb_enc_associate_index(str, utf8_encindex);
|
|
84
90
|
VALUE exc = rb_exc_new_str(eGeneratorError, str);
|
|
85
91
|
rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
|
|
86
92
|
rb_exc_raise(exc);
|
|
87
93
|
}
|
|
88
94
|
|
|
89
|
-
#ifdef RBIMPL_ATTR_NORETURN
|
|
90
|
-
RBIMPL_ATTR_NORETURN()
|
|
91
|
-
#endif
|
|
92
95
|
#ifdef RBIMPL_ATTR_FORMAT
|
|
93
96
|
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
|
|
94
97
|
#endif
|
|
95
|
-
static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
|
|
98
|
+
NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
|
|
96
99
|
{
|
|
97
100
|
va_list args;
|
|
98
101
|
va_start(args, fmt);
|
|
@@ -127,18 +130,12 @@ typedef struct _search_state {
|
|
|
127
130
|
#endif /* HAVE_SIMD */
|
|
128
131
|
} search_state;
|
|
129
132
|
|
|
130
|
-
|
|
131
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
132
|
-
#else
|
|
133
|
-
#define FORCE_INLINE
|
|
134
|
-
#endif
|
|
135
|
-
|
|
136
|
-
static inline FORCE_INLINE void search_flush(search_state *search)
|
|
133
|
+
ALWAYS_INLINE(static) void search_flush(search_state *search)
|
|
137
134
|
{
|
|
138
135
|
// Do not remove this conditional without profiling, specifically escape-heavy text.
|
|
139
136
|
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
|
|
140
|
-
// For back-to-back characters that need to be escaped,
|
|
141
|
-
// will be called just before calling escape_UTF8_char_basic. There will be no
|
|
137
|
+
// For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
|
|
138
|
+
// will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
|
|
142
139
|
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
|
|
143
140
|
// nothing needs to be flushed, we can save a few memory references with this conditional.
|
|
144
141
|
if (search->ptr > search->cursor) {
|
|
@@ -160,8 +157,6 @@ static const unsigned char escape_table_basic[256] = {
|
|
|
160
157
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
161
158
|
};
|
|
162
159
|
|
|
163
|
-
static unsigned char (*search_escape_basic_impl)(search_state *);
|
|
164
|
-
|
|
165
160
|
static inline unsigned char search_escape_basic(search_state *search)
|
|
166
161
|
{
|
|
167
162
|
while (search->ptr < search->end) {
|
|
@@ -176,7 +171,7 @@ static inline unsigned char search_escape_basic(search_state *search)
|
|
|
176
171
|
return 0;
|
|
177
172
|
}
|
|
178
173
|
|
|
179
|
-
static
|
|
174
|
+
ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search)
|
|
180
175
|
{
|
|
181
176
|
const unsigned char ch = (unsigned char)*search->ptr;
|
|
182
177
|
switch (ch) {
|
|
@@ -217,11 +212,39 @@ static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
|
|
|
217
212
|
* Everything else (should be UTF-8) is just passed through and
|
|
218
213
|
* appended to the result.
|
|
219
214
|
*/
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
#if defined(HAVE_SIMD_NEON)
|
|
218
|
+
static inline unsigned char search_escape_basic_neon(search_state *search);
|
|
219
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
220
|
+
static inline unsigned char search_escape_basic_sse2(search_state *search);
|
|
221
|
+
#endif
|
|
222
|
+
|
|
223
|
+
static inline unsigned char search_escape_basic(search_state *search);
|
|
224
|
+
|
|
220
225
|
static inline void convert_UTF8_to_JSON(search_state *search)
|
|
221
226
|
{
|
|
222
|
-
|
|
227
|
+
#ifdef HAVE_SIMD
|
|
228
|
+
#if defined(HAVE_SIMD_NEON)
|
|
229
|
+
while (search_escape_basic_neon(search)) {
|
|
230
|
+
escape_UTF8_char_basic(search);
|
|
231
|
+
}
|
|
232
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
233
|
+
if (simd_impl == SIMD_SSE2) {
|
|
234
|
+
while (search_escape_basic_sse2(search)) {
|
|
235
|
+
escape_UTF8_char_basic(search);
|
|
236
|
+
}
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
while (search_escape_basic(search)) {
|
|
223
240
|
escape_UTF8_char_basic(search);
|
|
224
241
|
}
|
|
242
|
+
#endif
|
|
243
|
+
#else
|
|
244
|
+
while (search_escape_basic(search)) {
|
|
245
|
+
escape_UTF8_char_basic(search);
|
|
246
|
+
}
|
|
247
|
+
#endif /* HAVE_SIMD */
|
|
225
248
|
}
|
|
226
249
|
|
|
227
250
|
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
@@ -263,8 +286,10 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
|
263
286
|
|
|
264
287
|
#ifdef HAVE_SIMD
|
|
265
288
|
|
|
266
|
-
static
|
|
289
|
+
ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
|
|
267
290
|
{
|
|
291
|
+
RBIMPL_ASSERT_OR_ASSUME(len < vec_len);
|
|
292
|
+
|
|
268
293
|
// Flush the buffer so everything up until the last 'len' characters are unflushed.
|
|
269
294
|
search_flush(search);
|
|
270
295
|
|
|
@@ -274,19 +299,25 @@ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsi
|
|
|
274
299
|
char *s = (buf->ptr + buf->len);
|
|
275
300
|
|
|
276
301
|
// Pad the buffer with dummy characters that won't need escaping.
|
|
277
|
-
// This seem
|
|
278
|
-
|
|
302
|
+
// This seem wasteful at first sight, but memset of vector length is very fast.
|
|
303
|
+
// This is a space as it can be directly represented as an immediate on AArch64.
|
|
304
|
+
memset(s, ' ', vec_len);
|
|
279
305
|
|
|
280
306
|
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
|
|
281
307
|
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
|
|
282
|
-
|
|
308
|
+
if (vec_len == 16) {
|
|
309
|
+
RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD);
|
|
310
|
+
json_fast_memcpy16(s, search->ptr, len);
|
|
311
|
+
} else {
|
|
312
|
+
MEMCPY(s, search->ptr, char, len);
|
|
313
|
+
}
|
|
283
314
|
|
|
284
315
|
return s;
|
|
285
316
|
}
|
|
286
317
|
|
|
287
318
|
#ifdef HAVE_SIMD_NEON
|
|
288
319
|
|
|
289
|
-
static
|
|
320
|
+
ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search)
|
|
290
321
|
{
|
|
291
322
|
uint64_t mask = search->matches_mask;
|
|
292
323
|
uint32_t index = trailing_zeros64(mask) >> 2;
|
|
@@ -304,28 +335,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
|
|
304
335
|
return 1;
|
|
305
336
|
}
|
|
306
337
|
|
|
307
|
-
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
|
308
|
-
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
|
309
|
-
{
|
|
310
|
-
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
|
311
|
-
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
|
312
|
-
return mask & 0x8888888888888888ull;
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
|
|
316
|
-
{
|
|
317
|
-
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
|
318
|
-
|
|
319
|
-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
|
320
|
-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
|
321
|
-
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
|
322
|
-
|
|
323
|
-
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
|
324
|
-
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
|
325
|
-
|
|
326
|
-
return neon_match_mask(needs_escape);
|
|
327
|
-
}
|
|
328
|
-
|
|
329
338
|
static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
330
339
|
{
|
|
331
340
|
if (RB_UNLIKELY(search->has_matches)) {
|
|
@@ -333,7 +342,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
|
333
342
|
if (search->matches_mask > 0) {
|
|
334
343
|
return neon_next_match(search);
|
|
335
344
|
} else {
|
|
336
|
-
// neon_next_match will only advance search->ptr up to the last matching character.
|
|
345
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
|
337
346
|
// Skip over any characters in the last chunk that occur after the last match.
|
|
338
347
|
search->has_matches = false;
|
|
339
348
|
search->ptr = search->chunk_end;
|
|
@@ -342,67 +351,61 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
|
342
351
|
|
|
343
352
|
/*
|
|
344
353
|
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
|
345
|
-
* need to be escaped.
|
|
346
|
-
*
|
|
354
|
+
* need to be escaped.
|
|
355
|
+
*
|
|
347
356
|
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
|
348
|
-
*
|
|
357
|
+
*
|
|
349
358
|
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
|
350
359
|
* the vector insructions may work on larger vectors.
|
|
351
|
-
*
|
|
360
|
+
*
|
|
352
361
|
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
|
353
|
-
*
|
|
354
|
-
* lower_bound: [20 20 20 20 20 20 20 20]
|
|
355
|
-
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
|
356
|
-
* dblquote: [22 22 22 22 22 22 22 22]
|
|
357
|
-
*
|
|
358
|
-
* Next we load the first chunk of the ptr:
|
|
362
|
+
*
|
|
363
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
|
364
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
|
365
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
|
366
|
+
*
|
|
367
|
+
* Next we load the first chunk of the ptr:
|
|
359
368
|
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
|
360
|
-
*
|
|
369
|
+
*
|
|
361
370
|
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
|
362
371
|
* as no bytes are less than 32 (0x20):
|
|
363
372
|
* [0 0 0 0 0 0 0 0]
|
|
364
|
-
*
|
|
373
|
+
*
|
|
365
374
|
* Next, we check if any byte in chunk is equal to a backslash:
|
|
366
375
|
* [0 0 0 FF 0 0 0 0]
|
|
367
|
-
*
|
|
376
|
+
*
|
|
368
377
|
* Finally we check if any byte in chunk is equal to a double quote:
|
|
369
|
-
* [FF 0 0 0 0 0 0 0]
|
|
370
|
-
*
|
|
378
|
+
* [FF 0 0 0 0 0 0 0]
|
|
379
|
+
*
|
|
371
380
|
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
|
372
381
|
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
|
373
382
|
* This is the needs_escape vector and it is equal to:
|
|
374
|
-
* [FF 0 0 FF 0 0 0 0]
|
|
375
|
-
*
|
|
383
|
+
* [FF 0 0 FF 0 0 0 0]
|
|
384
|
+
*
|
|
376
385
|
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
|
377
386
|
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
|
378
|
-
*
|
|
387
|
+
*
|
|
379
388
|
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
|
380
389
|
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
|
381
390
|
* have at least one byte that needs to be escaped.
|
|
382
391
|
*/
|
|
383
|
-
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
|
|
384
|
-
uint64_t mask = neon_rules_update(search->ptr);
|
|
385
392
|
|
|
386
|
-
|
|
387
|
-
search->ptr += sizeof(uint8x16_t);
|
|
388
|
-
continue;
|
|
389
|
-
}
|
|
390
|
-
search->matches_mask = mask;
|
|
393
|
+
if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
|
|
391
394
|
search->has_matches = true;
|
|
392
395
|
search->chunk_base = search->ptr;
|
|
393
396
|
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
|
394
397
|
return neon_next_match(search);
|
|
395
398
|
}
|
|
396
399
|
|
|
397
|
-
// There are fewer than 16 bytes left.
|
|
400
|
+
// There are fewer than 16 bytes left.
|
|
398
401
|
unsigned long remaining = (search->end - search->ptr);
|
|
399
402
|
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
|
400
403
|
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
|
401
404
|
|
|
402
|
-
uint64_t mask =
|
|
405
|
+
uint64_t mask = compute_chunk_mask_neon(s);
|
|
403
406
|
|
|
404
407
|
if (!mask) {
|
|
405
|
-
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
408
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
406
409
|
// search->cursor to search->ptr.
|
|
407
410
|
fbuffer_consumed(search->buffer, remaining);
|
|
408
411
|
search->ptr = search->end;
|
|
@@ -428,12 +431,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
|
428
431
|
|
|
429
432
|
#ifdef HAVE_SIMD_SSE2
|
|
430
433
|
|
|
431
|
-
|
|
432
|
-
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
|
433
|
-
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
|
434
|
-
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
|
435
|
-
|
|
436
|
-
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
|
434
|
+
ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search)
|
|
437
435
|
{
|
|
438
436
|
int mask = search->matches_mask;
|
|
439
437
|
int index = trailing_zeros(mask);
|
|
@@ -457,26 +455,14 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
|
|
457
455
|
#define TARGET_SSE2
|
|
458
456
|
#endif
|
|
459
457
|
|
|
460
|
-
static
|
|
461
|
-
{
|
|
462
|
-
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
|
463
|
-
|
|
464
|
-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
|
465
|
-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
|
466
|
-
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
|
467
|
-
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
|
468
|
-
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
|
469
|
-
return _mm_movemask_epi8(needs_escape);
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
|
458
|
+
ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search)
|
|
473
459
|
{
|
|
474
460
|
if (RB_UNLIKELY(search->has_matches)) {
|
|
475
461
|
// There are more matches if search->matches_mask > 0.
|
|
476
462
|
if (search->matches_mask > 0) {
|
|
477
463
|
return sse2_next_match(search);
|
|
478
464
|
} else {
|
|
479
|
-
// sse2_next_match will only advance search->ptr up to the last matching character.
|
|
465
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
|
480
466
|
// Skip over any characters in the last chunk that occur after the last match.
|
|
481
467
|
search->has_matches = false;
|
|
482
468
|
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
|
@@ -487,29 +473,22 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
|
|
|
487
473
|
}
|
|
488
474
|
}
|
|
489
475
|
|
|
490
|
-
|
|
491
|
-
int needs_escape_mask = sse2_update(search->ptr);
|
|
492
|
-
|
|
493
|
-
if (needs_escape_mask == 0) {
|
|
494
|
-
search->ptr += sizeof(__m128i);
|
|
495
|
-
continue;
|
|
496
|
-
}
|
|
497
|
-
|
|
476
|
+
if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
|
|
498
477
|
search->has_matches = true;
|
|
499
|
-
search->matches_mask = needs_escape_mask;
|
|
500
478
|
search->chunk_base = search->ptr;
|
|
479
|
+
search->chunk_end = search->ptr + sizeof(__m128i);
|
|
501
480
|
return sse2_next_match(search);
|
|
502
481
|
}
|
|
503
482
|
|
|
504
|
-
// There are fewer than 16 bytes left.
|
|
483
|
+
// There are fewer than 16 bytes left.
|
|
505
484
|
unsigned long remaining = (search->end - search->ptr);
|
|
506
485
|
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
|
507
486
|
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
|
508
487
|
|
|
509
|
-
int needs_escape_mask =
|
|
488
|
+
int needs_escape_mask = compute_chunk_mask_sse2(s);
|
|
510
489
|
|
|
511
490
|
if (needs_escape_mask == 0) {
|
|
512
|
-
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
491
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
513
492
|
// search->cursor to search->ptr.
|
|
514
493
|
fbuffer_consumed(search->buffer, remaining);
|
|
515
494
|
search->ptr = search->end;
|
|
@@ -638,7 +617,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
|
|
|
638
617
|
return 0;
|
|
639
618
|
}
|
|
640
619
|
|
|
641
|
-
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
620
|
+
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
621
|
+
{
|
|
642
622
|
const unsigned char ch = (unsigned char)*search->ptr;
|
|
643
623
|
switch (ch_len) {
|
|
644
624
|
case 1: {
|
|
@@ -668,7 +648,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
|
|
|
668
648
|
|
|
669
649
|
uint32_t wchar = 0;
|
|
670
650
|
|
|
671
|
-
switch(ch_len) {
|
|
651
|
+
switch (ch_len) {
|
|
672
652
|
case 2:
|
|
673
653
|
wchar = ch & 0x1F;
|
|
674
654
|
break;
|
|
@@ -828,7 +808,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
|
|
|
828
808
|
* _state_ is a JSON::State object, that can also be used to configure the
|
|
829
809
|
* produced JSON string output further.
|
|
830
810
|
*/
|
|
831
|
-
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
|
811
|
+
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
|
812
|
+
{
|
|
832
813
|
rb_check_arity(argc, 0, 1);
|
|
833
814
|
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
|
834
815
|
return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
|
|
@@ -885,17 +866,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
|
|
|
885
866
|
return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
|
|
886
867
|
}
|
|
887
868
|
|
|
888
|
-
/*
|
|
889
|
-
* call-seq: String.included(modul)
|
|
890
|
-
*
|
|
891
|
-
* Extends _modul_ with the String::Extend module.
|
|
892
|
-
*/
|
|
893
|
-
static VALUE mString_included_s(VALUE self, VALUE modul) {
|
|
894
|
-
VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
|
|
895
|
-
rb_call_super(1, &modul);
|
|
896
|
-
return result;
|
|
897
|
-
}
|
|
898
|
-
|
|
899
869
|
/*
|
|
900
870
|
* call-seq: to_json(*)
|
|
901
871
|
*
|
|
@@ -910,51 +880,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
|
|
|
910
880
|
return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
|
|
911
881
|
}
|
|
912
882
|
|
|
913
|
-
/*
|
|
914
|
-
* call-seq: to_json_raw_object()
|
|
915
|
-
*
|
|
916
|
-
* This method creates a raw object hash, that can be nested into
|
|
917
|
-
* other data structures and will be generated as a raw string. This
|
|
918
|
-
* method should be used, if you want to convert raw strings to JSON
|
|
919
|
-
* instead of UTF-8 strings, e. g. binary data.
|
|
920
|
-
*/
|
|
921
|
-
static VALUE mString_to_json_raw_object(VALUE self)
|
|
922
|
-
{
|
|
923
|
-
VALUE ary;
|
|
924
|
-
VALUE result = rb_hash_new();
|
|
925
|
-
rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
|
|
926
|
-
ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
|
|
927
|
-
rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
|
|
928
|
-
return result;
|
|
929
|
-
}
|
|
930
|
-
|
|
931
|
-
/*
|
|
932
|
-
* call-seq: to_json_raw(*args)
|
|
933
|
-
*
|
|
934
|
-
* This method creates a JSON text from the result of a call to
|
|
935
|
-
* to_json_raw_object of this String.
|
|
936
|
-
*/
|
|
937
|
-
static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
|
|
938
|
-
{
|
|
939
|
-
VALUE obj = mString_to_json_raw_object(self);
|
|
940
|
-
Check_Type(obj, T_HASH);
|
|
941
|
-
return mHash_to_json(argc, argv, obj);
|
|
942
|
-
}
|
|
943
|
-
|
|
944
|
-
/*
|
|
945
|
-
* call-seq: json_create(o)
|
|
946
|
-
*
|
|
947
|
-
* Raw Strings are JSON Objects (the raw bytes are stored in an array for the
|
|
948
|
-
* key "raw"). The Ruby String can be created by this module method.
|
|
949
|
-
*/
|
|
950
|
-
static VALUE mString_Extend_json_create(VALUE self, VALUE o)
|
|
951
|
-
{
|
|
952
|
-
VALUE ary;
|
|
953
|
-
Check_Type(o, T_HASH);
|
|
954
|
-
ary = rb_hash_aref(o, rb_str_new2("raw"));
|
|
955
|
-
return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
|
|
956
|
-
}
|
|
957
|
-
|
|
958
883
|
/*
|
|
959
884
|
* call-seq: to_json(*)
|
|
960
885
|
*
|
|
@@ -1038,11 +963,6 @@ static size_t State_memsize(const void *ptr)
|
|
|
1038
963
|
return sizeof(JSON_Generator_State);
|
|
1039
964
|
}
|
|
1040
965
|
|
|
1041
|
-
#ifndef HAVE_RB_EXT_RACTOR_SAFE
|
|
1042
|
-
# undef RUBY_TYPED_FROZEN_SHAREABLE
|
|
1043
|
-
# define RUBY_TYPED_FROZEN_SHAREABLE 0
|
|
1044
|
-
#endif
|
|
1045
|
-
|
|
1046
966
|
static const rb_data_type_t JSON_Generator_State_type = {
|
|
1047
967
|
"JSON/Generator/State",
|
|
1048
968
|
{
|
|
@@ -1084,18 +1004,24 @@ static void vstate_spill(struct generate_json_data *data)
|
|
|
1084
1004
|
RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
|
|
1085
1005
|
}
|
|
1086
1006
|
|
|
1087
|
-
static inline VALUE
|
|
1007
|
+
static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj)
|
|
1088
1008
|
{
|
|
1089
1009
|
if (RB_UNLIKELY(!data->vstate)) {
|
|
1090
1010
|
vstate_spill(data);
|
|
1091
1011
|
}
|
|
1092
|
-
|
|
1012
|
+
GET_STATE(data->vstate);
|
|
1013
|
+
state->depth = data->depth;
|
|
1014
|
+
VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate);
|
|
1015
|
+
// no need to restore state->depth, vstate is just a temporary State
|
|
1016
|
+
return tmp;
|
|
1093
1017
|
}
|
|
1094
1018
|
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
};
|
|
1019
|
+
static VALUE
|
|
1020
|
+
json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
|
|
1021
|
+
{
|
|
1022
|
+
VALUE proc_args[2] = {object, is_key};
|
|
1023
|
+
return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
|
|
1024
|
+
}
|
|
1099
1025
|
|
|
1100
1026
|
static VALUE
|
|
1101
1027
|
convert_string_subclass(VALUE key)
|
|
@@ -1112,6 +1038,145 @@ convert_string_subclass(VALUE key)
|
|
|
1112
1038
|
return key_to_s;
|
|
1113
1039
|
}
|
|
1114
1040
|
|
|
1041
|
+
static bool enc_utf8_compatible_p(int enc_idx)
|
|
1042
|
+
{
|
|
1043
|
+
if (enc_idx == usascii_encindex) return true;
|
|
1044
|
+
if (enc_idx == utf8_encindex) return true;
|
|
1045
|
+
return false;
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
static VALUE encode_json_string_try(VALUE str)
|
|
1049
|
+
{
|
|
1050
|
+
return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
|
|
1054
|
+
{
|
|
1055
|
+
raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
|
|
1056
|
+
return Qundef;
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
static inline bool valid_json_string_p(VALUE str)
|
|
1060
|
+
{
|
|
1061
|
+
int coderange = rb_enc_str_coderange(str);
|
|
1062
|
+
|
|
1063
|
+
if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
|
|
1064
|
+
return true;
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
|
|
1068
|
+
return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
return false;
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
static inline VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
|
|
1075
|
+
{
|
|
1076
|
+
if (RB_LIKELY(valid_json_string_p(str))) {
|
|
1077
|
+
return str;
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
|
|
1081
|
+
VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
|
|
1082
|
+
if (coerced_str != str) {
|
|
1083
|
+
if (RB_TYPE_P(coerced_str, T_STRING)) {
|
|
1084
|
+
if (!valid_json_string_p(coerced_str)) {
|
|
1085
|
+
raise_generator_error(str, "source sequence is illegal/malformed utf-8");
|
|
1086
|
+
}
|
|
1087
|
+
} else {
|
|
1088
|
+
// as_json could return another type than T_STRING
|
|
1089
|
+
if (is_key) {
|
|
1090
|
+
raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
return coerced_str;
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
|
|
1099
|
+
VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
|
|
1100
|
+
switch (rb_enc_str_coderange(utf8_string)) {
|
|
1101
|
+
case ENC_CODERANGE_7BIT:
|
|
1102
|
+
return utf8_string;
|
|
1103
|
+
case ENC_CODERANGE_VALID:
|
|
1104
|
+
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
|
1105
|
+
// TODO: Raise in 3.0.0
|
|
1106
|
+
rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
|
|
1107
|
+
return utf8_string;
|
|
1108
|
+
break;
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1116
|
+
{
|
|
1117
|
+
fbuffer_append_char(buffer, '"');
|
|
1118
|
+
|
|
1119
|
+
long len;
|
|
1120
|
+
search_state search;
|
|
1121
|
+
search.buffer = buffer;
|
|
1122
|
+
RSTRING_GETMEM(obj, search.ptr, len);
|
|
1123
|
+
search.cursor = search.ptr;
|
|
1124
|
+
search.end = search.ptr + len;
|
|
1125
|
+
|
|
1126
|
+
#ifdef HAVE_SIMD
|
|
1127
|
+
search.matches_mask = 0;
|
|
1128
|
+
search.has_matches = false;
|
|
1129
|
+
search.chunk_base = NULL;
|
|
1130
|
+
search.chunk_end = NULL;
|
|
1131
|
+
#endif /* HAVE_SIMD */
|
|
1132
|
+
|
|
1133
|
+
switch (rb_enc_str_coderange(obj)) {
|
|
1134
|
+
case ENC_CODERANGE_7BIT:
|
|
1135
|
+
case ENC_CODERANGE_VALID:
|
|
1136
|
+
if (RB_UNLIKELY(data->state->ascii_only)) {
|
|
1137
|
+
convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
|
1138
|
+
} else if (RB_UNLIKELY(data->state->script_safe)) {
|
|
1139
|
+
convert_UTF8_to_script_safe_JSON(&search);
|
|
1140
|
+
} else {
|
|
1141
|
+
convert_UTF8_to_JSON(&search);
|
|
1142
|
+
}
|
|
1143
|
+
break;
|
|
1144
|
+
default:
|
|
1145
|
+
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
|
|
1146
|
+
break;
|
|
1147
|
+
}
|
|
1148
|
+
fbuffer_append_char(buffer, '"');
|
|
1149
|
+
}
|
|
1150
|
+
|
|
1151
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1152
|
+
{
|
|
1153
|
+
obj = ensure_valid_encoding(data, obj, false, false);
|
|
1154
|
+
raw_generate_json_string(buffer, data, obj);
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
struct hash_foreach_arg {
|
|
1158
|
+
VALUE hash;
|
|
1159
|
+
struct generate_json_data *data;
|
|
1160
|
+
int first_key_type;
|
|
1161
|
+
bool first;
|
|
1162
|
+
bool mixed_keys_encountered;
|
|
1163
|
+
};
|
|
1164
|
+
|
|
1165
|
+
NOINLINE(static) void
|
|
1166
|
+
json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
|
|
1167
|
+
{
|
|
1168
|
+
if (arg->mixed_keys_encountered) {
|
|
1169
|
+
return;
|
|
1170
|
+
}
|
|
1171
|
+
arg->mixed_keys_encountered = true;
|
|
1172
|
+
|
|
1173
|
+
JSON_Generator_State *state = arg->data->state;
|
|
1174
|
+
if (state->on_duplicate_key != JSON_IGNORE) {
|
|
1175
|
+
VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
|
|
1176
|
+
rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1115
1180
|
static int
|
|
1116
1181
|
json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
1117
1182
|
{
|
|
@@ -1121,22 +1186,34 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
1121
1186
|
FBuffer *buffer = data->buffer;
|
|
1122
1187
|
JSON_Generator_State *state = data->state;
|
|
1123
1188
|
|
|
1124
|
-
long depth =
|
|
1125
|
-
int
|
|
1189
|
+
long depth = data->depth;
|
|
1190
|
+
int key_type = rb_type(key);
|
|
1191
|
+
|
|
1192
|
+
if (arg->first) {
|
|
1193
|
+
arg->first = false;
|
|
1194
|
+
arg->first_key_type = key_type;
|
|
1195
|
+
}
|
|
1196
|
+
else {
|
|
1197
|
+
fbuffer_append_char(buffer, ',');
|
|
1198
|
+
}
|
|
1126
1199
|
|
|
1127
|
-
if (arg->iter > 0) fbuffer_append_char(buffer, ',');
|
|
1128
1200
|
if (RB_UNLIKELY(data->state->object_nl)) {
|
|
1129
1201
|
fbuffer_append_str(buffer, data->state->object_nl);
|
|
1130
1202
|
}
|
|
1131
1203
|
if (RB_UNLIKELY(data->state->indent)) {
|
|
1132
|
-
|
|
1133
|
-
fbuffer_append_str(buffer, data->state->indent);
|
|
1134
|
-
}
|
|
1204
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
1135
1205
|
}
|
|
1136
1206
|
|
|
1137
1207
|
VALUE key_to_s;
|
|
1138
|
-
|
|
1208
|
+
bool as_json_called = false;
|
|
1209
|
+
|
|
1210
|
+
start:
|
|
1211
|
+
switch (key_type) {
|
|
1139
1212
|
case T_STRING:
|
|
1213
|
+
if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
|
|
1214
|
+
json_inspect_hash_with_mixed_keys(arg);
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1140
1217
|
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
|
1141
1218
|
key_to_s = key;
|
|
1142
1219
|
} else {
|
|
@@ -1144,15 +1221,31 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
1144
1221
|
}
|
|
1145
1222
|
break;
|
|
1146
1223
|
case T_SYMBOL:
|
|
1224
|
+
if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
|
|
1225
|
+
json_inspect_hash_with_mixed_keys(arg);
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1147
1228
|
key_to_s = rb_sym2str(key);
|
|
1148
1229
|
break;
|
|
1149
1230
|
default:
|
|
1231
|
+
if (data->state->strict) {
|
|
1232
|
+
if (RTEST(data->state->as_json) && !as_json_called) {
|
|
1233
|
+
key = json_call_as_json(data->state, key, Qtrue);
|
|
1234
|
+
key_type = rb_type(key);
|
|
1235
|
+
as_json_called = true;
|
|
1236
|
+
goto start;
|
|
1237
|
+
} else {
|
|
1238
|
+
raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
|
|
1239
|
+
}
|
|
1240
|
+
}
|
|
1150
1241
|
key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
|
|
1151
1242
|
break;
|
|
1152
1243
|
}
|
|
1153
1244
|
|
|
1245
|
+
key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
|
|
1246
|
+
|
|
1154
1247
|
if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
|
|
1155
|
-
|
|
1248
|
+
raw_generate_json_string(buffer, data, key_to_s);
|
|
1156
1249
|
} else {
|
|
1157
1250
|
generate_json(buffer, data, key_to_s);
|
|
1158
1251
|
}
|
|
@@ -1161,46 +1254,43 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
1161
1254
|
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
|
|
1162
1255
|
generate_json(buffer, data, val);
|
|
1163
1256
|
|
|
1164
|
-
arg->iter++;
|
|
1165
1257
|
return ST_CONTINUE;
|
|
1166
1258
|
}
|
|
1167
1259
|
|
|
1168
1260
|
static inline long increase_depth(struct generate_json_data *data)
|
|
1169
1261
|
{
|
|
1170
1262
|
JSON_Generator_State *state = data->state;
|
|
1171
|
-
long depth = ++
|
|
1263
|
+
long depth = ++data->depth;
|
|
1172
1264
|
if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
|
|
1173
|
-
rb_raise(eNestingError, "nesting of %ld is too deep", --
|
|
1265
|
+
rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth);
|
|
1174
1266
|
}
|
|
1175
1267
|
return depth;
|
|
1176
1268
|
}
|
|
1177
1269
|
|
|
1178
1270
|
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1179
1271
|
{
|
|
1180
|
-
int j;
|
|
1181
1272
|
long depth = increase_depth(data);
|
|
1182
1273
|
|
|
1183
1274
|
if (RHASH_SIZE(obj) == 0) {
|
|
1184
1275
|
fbuffer_append(buffer, "{}", 2);
|
|
1185
|
-
--data->
|
|
1276
|
+
--data->depth;
|
|
1186
1277
|
return;
|
|
1187
1278
|
}
|
|
1188
1279
|
|
|
1189
1280
|
fbuffer_append_char(buffer, '{');
|
|
1190
1281
|
|
|
1191
1282
|
struct hash_foreach_arg arg = {
|
|
1283
|
+
.hash = obj,
|
|
1192
1284
|
.data = data,
|
|
1193
|
-
.
|
|
1285
|
+
.first = true,
|
|
1194
1286
|
};
|
|
1195
1287
|
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
|
|
1196
1288
|
|
|
1197
|
-
depth = --data->
|
|
1289
|
+
depth = --data->depth;
|
|
1198
1290
|
if (RB_UNLIKELY(data->state->object_nl)) {
|
|
1199
1291
|
fbuffer_append_str(buffer, data->state->object_nl);
|
|
1200
1292
|
if (RB_UNLIKELY(data->state->indent)) {
|
|
1201
|
-
|
|
1202
|
-
fbuffer_append_str(buffer, data->state->indent);
|
|
1203
|
-
}
|
|
1293
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
1204
1294
|
}
|
|
1205
1295
|
}
|
|
1206
1296
|
fbuffer_append_char(buffer, '}');
|
|
@@ -1208,125 +1298,41 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
|
|
|
1208
1298
|
|
|
1209
1299
|
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1210
1300
|
{
|
|
1211
|
-
int i, j;
|
|
1212
1301
|
long depth = increase_depth(data);
|
|
1213
1302
|
|
|
1214
1303
|
if (RARRAY_LEN(obj) == 0) {
|
|
1215
1304
|
fbuffer_append(buffer, "[]", 2);
|
|
1216
|
-
--data->
|
|
1305
|
+
--data->depth;
|
|
1217
1306
|
return;
|
|
1218
1307
|
}
|
|
1219
1308
|
|
|
1220
1309
|
fbuffer_append_char(buffer, '[');
|
|
1221
1310
|
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
|
1222
|
-
for(i = 0; i < RARRAY_LEN(obj); i++) {
|
|
1311
|
+
for (int i = 0; i < RARRAY_LEN(obj); i++) {
|
|
1223
1312
|
if (i > 0) {
|
|
1224
1313
|
fbuffer_append_char(buffer, ',');
|
|
1225
1314
|
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
|
1226
1315
|
}
|
|
1227
1316
|
if (RB_UNLIKELY(data->state->indent)) {
|
|
1228
|
-
|
|
1229
|
-
fbuffer_append_str(buffer, data->state->indent);
|
|
1230
|
-
}
|
|
1317
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
1231
1318
|
}
|
|
1232
1319
|
generate_json(buffer, data, RARRAY_AREF(obj, i));
|
|
1233
1320
|
}
|
|
1234
|
-
data->
|
|
1321
|
+
data->depth = --depth;
|
|
1235
1322
|
if (RB_UNLIKELY(data->state->array_nl)) {
|
|
1236
1323
|
fbuffer_append_str(buffer, data->state->array_nl);
|
|
1237
1324
|
if (RB_UNLIKELY(data->state->indent)) {
|
|
1238
|
-
|
|
1239
|
-
fbuffer_append_str(buffer, data->state->indent);
|
|
1240
|
-
}
|
|
1325
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
1241
1326
|
}
|
|
1242
1327
|
}
|
|
1243
1328
|
fbuffer_append_char(buffer, ']');
|
|
1244
1329
|
}
|
|
1245
1330
|
|
|
1246
|
-
static inline int enc_utf8_compatible_p(int enc_idx)
|
|
1247
|
-
{
|
|
1248
|
-
if (enc_idx == usascii_encindex) return 1;
|
|
1249
|
-
if (enc_idx == utf8_encindex) return 1;
|
|
1250
|
-
return 0;
|
|
1251
|
-
}
|
|
1252
|
-
|
|
1253
|
-
static VALUE encode_json_string_try(VALUE str)
|
|
1254
|
-
{
|
|
1255
|
-
return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
|
|
1256
|
-
}
|
|
1257
|
-
|
|
1258
|
-
static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
|
|
1259
|
-
{
|
|
1260
|
-
raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
|
|
1261
|
-
return Qundef;
|
|
1262
|
-
}
|
|
1263
|
-
|
|
1264
|
-
static inline VALUE ensure_valid_encoding(VALUE str)
|
|
1265
|
-
{
|
|
1266
|
-
int encindex = RB_ENCODING_GET(str);
|
|
1267
|
-
VALUE utf8_string;
|
|
1268
|
-
if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
|
|
1269
|
-
if (encindex == binary_encindex) {
|
|
1270
|
-
utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
|
|
1271
|
-
switch (rb_enc_str_coderange(utf8_string)) {
|
|
1272
|
-
case ENC_CODERANGE_7BIT:
|
|
1273
|
-
return utf8_string;
|
|
1274
|
-
case ENC_CODERANGE_VALID:
|
|
1275
|
-
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
|
1276
|
-
// TODO: Raise in 3.0.0
|
|
1277
|
-
rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
|
|
1278
|
-
return utf8_string;
|
|
1279
|
-
break;
|
|
1280
|
-
}
|
|
1281
|
-
}
|
|
1282
|
-
|
|
1283
|
-
str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
|
|
1284
|
-
}
|
|
1285
|
-
return str;
|
|
1286
|
-
}
|
|
1287
|
-
|
|
1288
|
-
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1289
|
-
{
|
|
1290
|
-
obj = ensure_valid_encoding(obj);
|
|
1291
|
-
|
|
1292
|
-
fbuffer_append_char(buffer, '"');
|
|
1293
|
-
|
|
1294
|
-
long len;
|
|
1295
|
-
search_state search;
|
|
1296
|
-
search.buffer = buffer;
|
|
1297
|
-
RSTRING_GETMEM(obj, search.ptr, len);
|
|
1298
|
-
search.cursor = search.ptr;
|
|
1299
|
-
search.end = search.ptr + len;
|
|
1300
|
-
|
|
1301
|
-
#ifdef HAVE_SIMD
|
|
1302
|
-
search.matches_mask = 0;
|
|
1303
|
-
search.has_matches = false;
|
|
1304
|
-
search.chunk_base = NULL;
|
|
1305
|
-
#endif /* HAVE_SIMD */
|
|
1306
|
-
|
|
1307
|
-
switch(rb_enc_str_coderange(obj)) {
|
|
1308
|
-
case ENC_CODERANGE_7BIT:
|
|
1309
|
-
case ENC_CODERANGE_VALID:
|
|
1310
|
-
if (RB_UNLIKELY(data->state->ascii_only)) {
|
|
1311
|
-
convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
|
1312
|
-
} else if (RB_UNLIKELY(data->state->script_safe)) {
|
|
1313
|
-
convert_UTF8_to_script_safe_JSON(&search);
|
|
1314
|
-
} else {
|
|
1315
|
-
convert_UTF8_to_JSON(&search);
|
|
1316
|
-
}
|
|
1317
|
-
break;
|
|
1318
|
-
default:
|
|
1319
|
-
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
|
|
1320
|
-
break;
|
|
1321
|
-
}
|
|
1322
|
-
fbuffer_append_char(buffer, '"');
|
|
1323
|
-
}
|
|
1324
|
-
|
|
1325
1331
|
static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1326
1332
|
{
|
|
1327
1333
|
VALUE tmp;
|
|
1328
1334
|
if (rb_respond_to(obj, i_to_json)) {
|
|
1329
|
-
tmp =
|
|
1335
|
+
tmp = json_call_to_json(data, obj);
|
|
1330
1336
|
Check_Type(tmp, T_STRING);
|
|
1331
1337
|
fbuffer_append_str(buffer, tmp);
|
|
1332
1338
|
} else {
|
|
@@ -1368,7 +1374,7 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat
|
|
|
1368
1374
|
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1369
1375
|
{
|
|
1370
1376
|
VALUE tmp = rb_funcall(obj, i_to_s, 0);
|
|
1371
|
-
fbuffer_append_str(buffer, tmp);
|
|
1377
|
+
fbuffer_append_str(buffer, StringValue(tmp));
|
|
1372
1378
|
}
|
|
1373
1379
|
|
|
1374
1380
|
#ifdef RUBY_INTEGER_UNIFICATION
|
|
@@ -1389,11 +1395,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
|
|
|
1389
1395
|
/* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
|
|
1390
1396
|
if (!allow_nan) {
|
|
1391
1397
|
if (data->state->strict && data->state->as_json) {
|
|
1392
|
-
VALUE casted_obj =
|
|
1398
|
+
VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
|
|
1393
1399
|
if (casted_obj != obj) {
|
|
1394
1400
|
increase_depth(data);
|
|
1395
1401
|
generate_json(buffer, data, casted_obj);
|
|
1396
|
-
data->
|
|
1402
|
+
data->depth--;
|
|
1397
1403
|
return;
|
|
1398
1404
|
}
|
|
1399
1405
|
}
|
|
@@ -1406,12 +1412,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
|
|
|
1406
1412
|
}
|
|
1407
1413
|
|
|
1408
1414
|
/* This implementation writes directly into the buffer. We reserve
|
|
1409
|
-
* the
|
|
1415
|
+
* the 32 characters that fpconv_dtoa states as its maximum.
|
|
1410
1416
|
*/
|
|
1411
|
-
fbuffer_inc_capa(buffer,
|
|
1417
|
+
fbuffer_inc_capa(buffer, 32);
|
|
1412
1418
|
char* d = buffer->ptr + buffer->len;
|
|
1413
1419
|
int len = fpconv_dtoa(value, d);
|
|
1414
|
-
|
|
1415
1420
|
/* fpconv_dtoa converts a float to its shortest string representation,
|
|
1416
1421
|
* but it adds a ".0" if this is a plain integer.
|
|
1417
1422
|
*/
|
|
@@ -1461,7 +1466,16 @@ start:
|
|
|
1461
1466
|
break;
|
|
1462
1467
|
case T_STRING:
|
|
1463
1468
|
if (klass != rb_cString) goto general;
|
|
1464
|
-
|
|
1469
|
+
|
|
1470
|
+
if (RB_LIKELY(valid_json_string_p(obj))) {
|
|
1471
|
+
raw_generate_json_string(buffer, data, obj);
|
|
1472
|
+
} else if (as_json_called) {
|
|
1473
|
+
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
|
|
1474
|
+
} else {
|
|
1475
|
+
obj = ensure_valid_encoding(data, obj, false, false);
|
|
1476
|
+
as_json_called = true;
|
|
1477
|
+
goto start;
|
|
1478
|
+
}
|
|
1465
1479
|
break;
|
|
1466
1480
|
case T_SYMBOL:
|
|
1467
1481
|
generate_json_symbol(buffer, data, obj);
|
|
@@ -1478,7 +1492,7 @@ start:
|
|
|
1478
1492
|
general:
|
|
1479
1493
|
if (data->state->strict) {
|
|
1480
1494
|
if (RTEST(data->state->as_json) && !as_json_called) {
|
|
1481
|
-
obj =
|
|
1495
|
+
obj = json_call_as_json(data->state, obj, Qfalse);
|
|
1482
1496
|
as_json_called = true;
|
|
1483
1497
|
goto start;
|
|
1484
1498
|
} else {
|
|
@@ -1497,16 +1511,14 @@ static VALUE generate_json_try(VALUE d)
|
|
|
1497
1511
|
|
|
1498
1512
|
data->func(data->buffer, data, data->obj);
|
|
1499
1513
|
|
|
1500
|
-
return
|
|
1514
|
+
return fbuffer_finalize(data->buffer);
|
|
1501
1515
|
}
|
|
1502
1516
|
|
|
1503
|
-
static VALUE
|
|
1517
|
+
static VALUE generate_json_ensure(VALUE d)
|
|
1504
1518
|
{
|
|
1505
1519
|
struct generate_json_data *data = (struct generate_json_data *)d;
|
|
1506
1520
|
fbuffer_free(data->buffer);
|
|
1507
1521
|
|
|
1508
|
-
rb_exc_raise(exc);
|
|
1509
|
-
|
|
1510
1522
|
return Qundef;
|
|
1511
1523
|
}
|
|
1512
1524
|
|
|
@@ -1522,14 +1534,15 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
|
|
|
1522
1534
|
|
|
1523
1535
|
struct generate_json_data data = {
|
|
1524
1536
|
.buffer = &buffer,
|
|
1525
|
-
.vstate = self
|
|
1537
|
+
.vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json
|
|
1526
1538
|
.state = state,
|
|
1539
|
+
.depth = state->depth,
|
|
1527
1540
|
.obj = obj,
|
|
1528
1541
|
.func = func
|
|
1529
1542
|
};
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
return
|
|
1543
|
+
VALUE result = rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
|
|
1544
|
+
RB_GC_GUARD(self);
|
|
1545
|
+
return result;
|
|
1533
1546
|
}
|
|
1534
1547
|
|
|
1535
1548
|
/* call-seq:
|
|
@@ -1545,10 +1558,7 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
|
|
|
1545
1558
|
rb_check_arity(argc, 1, 2);
|
|
1546
1559
|
VALUE obj = argv[0];
|
|
1547
1560
|
VALUE io = argc > 1 ? argv[1] : Qnil;
|
|
1548
|
-
|
|
1549
|
-
GET_STATE(self);
|
|
1550
|
-
(void)state;
|
|
1551
|
-
return result;
|
|
1561
|
+
return cState_partial_generate(self, obj, generate_json, io);
|
|
1552
1562
|
}
|
|
1553
1563
|
|
|
1554
1564
|
static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
|
|
@@ -1629,6 +1639,7 @@ static VALUE string_config(VALUE config)
|
|
|
1629
1639
|
*/
|
|
1630
1640
|
static VALUE cState_indent_set(VALUE self, VALUE indent)
|
|
1631
1641
|
{
|
|
1642
|
+
rb_check_frozen(self);
|
|
1632
1643
|
GET_STATE(self);
|
|
1633
1644
|
RB_OBJ_WRITE(self, &state->indent, string_config(indent));
|
|
1634
1645
|
return Qnil;
|
|
@@ -1654,6 +1665,7 @@ static VALUE cState_space(VALUE self)
|
|
|
1654
1665
|
*/
|
|
1655
1666
|
static VALUE cState_space_set(VALUE self, VALUE space)
|
|
1656
1667
|
{
|
|
1668
|
+
rb_check_frozen(self);
|
|
1657
1669
|
GET_STATE(self);
|
|
1658
1670
|
RB_OBJ_WRITE(self, &state->space, string_config(space));
|
|
1659
1671
|
return Qnil;
|
|
@@ -1677,6 +1689,7 @@ static VALUE cState_space_before(VALUE self)
|
|
|
1677
1689
|
*/
|
|
1678
1690
|
static VALUE cState_space_before_set(VALUE self, VALUE space_before)
|
|
1679
1691
|
{
|
|
1692
|
+
rb_check_frozen(self);
|
|
1680
1693
|
GET_STATE(self);
|
|
1681
1694
|
RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
|
|
1682
1695
|
return Qnil;
|
|
@@ -1702,6 +1715,7 @@ static VALUE cState_object_nl(VALUE self)
|
|
|
1702
1715
|
*/
|
|
1703
1716
|
static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
|
|
1704
1717
|
{
|
|
1718
|
+
rb_check_frozen(self);
|
|
1705
1719
|
GET_STATE(self);
|
|
1706
1720
|
RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
|
|
1707
1721
|
return Qnil;
|
|
@@ -1725,6 +1739,7 @@ static VALUE cState_array_nl(VALUE self)
|
|
|
1725
1739
|
*/
|
|
1726
1740
|
static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
|
|
1727
1741
|
{
|
|
1742
|
+
rb_check_frozen(self);
|
|
1728
1743
|
GET_STATE(self);
|
|
1729
1744
|
RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
|
|
1730
1745
|
return Qnil;
|
|
@@ -1748,6 +1763,7 @@ static VALUE cState_as_json(VALUE self)
|
|
|
1748
1763
|
*/
|
|
1749
1764
|
static VALUE cState_as_json_set(VALUE self, VALUE as_json)
|
|
1750
1765
|
{
|
|
1766
|
+
rb_check_frozen(self);
|
|
1751
1767
|
GET_STATE(self);
|
|
1752
1768
|
RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
|
|
1753
1769
|
return Qnil;
|
|
@@ -1790,6 +1806,7 @@ static long long_config(VALUE num)
|
|
|
1790
1806
|
*/
|
|
1791
1807
|
static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
|
|
1792
1808
|
{
|
|
1809
|
+
rb_check_frozen(self);
|
|
1793
1810
|
GET_STATE(self);
|
|
1794
1811
|
state->max_nesting = long_config(depth);
|
|
1795
1812
|
return Qnil;
|
|
@@ -1815,6 +1832,7 @@ static VALUE cState_script_safe(VALUE self)
|
|
|
1815
1832
|
*/
|
|
1816
1833
|
static VALUE cState_script_safe_set(VALUE self, VALUE enable)
|
|
1817
1834
|
{
|
|
1835
|
+
rb_check_frozen(self);
|
|
1818
1836
|
GET_STATE(self);
|
|
1819
1837
|
state->script_safe = RTEST(enable);
|
|
1820
1838
|
return Qnil;
|
|
@@ -1846,6 +1864,7 @@ static VALUE cState_strict(VALUE self)
|
|
|
1846
1864
|
*/
|
|
1847
1865
|
static VALUE cState_strict_set(VALUE self, VALUE enable)
|
|
1848
1866
|
{
|
|
1867
|
+
rb_check_frozen(self);
|
|
1849
1868
|
GET_STATE(self);
|
|
1850
1869
|
state->strict = RTEST(enable);
|
|
1851
1870
|
return Qnil;
|
|
@@ -1870,6 +1889,7 @@ static VALUE cState_allow_nan_p(VALUE self)
|
|
|
1870
1889
|
*/
|
|
1871
1890
|
static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
|
|
1872
1891
|
{
|
|
1892
|
+
rb_check_frozen(self);
|
|
1873
1893
|
GET_STATE(self);
|
|
1874
1894
|
state->allow_nan = RTEST(enable);
|
|
1875
1895
|
return Qnil;
|
|
@@ -1894,11 +1914,25 @@ static VALUE cState_ascii_only_p(VALUE self)
|
|
|
1894
1914
|
*/
|
|
1895
1915
|
static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
|
|
1896
1916
|
{
|
|
1917
|
+
rb_check_frozen(self);
|
|
1897
1918
|
GET_STATE(self);
|
|
1898
1919
|
state->ascii_only = RTEST(enable);
|
|
1899
1920
|
return Qnil;
|
|
1900
1921
|
}
|
|
1901
1922
|
|
|
1923
|
+
static VALUE cState_allow_duplicate_key_p(VALUE self)
|
|
1924
|
+
{
|
|
1925
|
+
GET_STATE(self);
|
|
1926
|
+
switch (state->on_duplicate_key) {
|
|
1927
|
+
case JSON_IGNORE:
|
|
1928
|
+
return Qtrue;
|
|
1929
|
+
case JSON_DEPRECATED:
|
|
1930
|
+
return Qnil;
|
|
1931
|
+
default:
|
|
1932
|
+
return Qfalse;
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1902
1936
|
/*
|
|
1903
1937
|
* call-seq: depth
|
|
1904
1938
|
*
|
|
@@ -1918,6 +1952,7 @@ static VALUE cState_depth(VALUE self)
|
|
|
1918
1952
|
*/
|
|
1919
1953
|
static VALUE cState_depth_set(VALUE self, VALUE depth)
|
|
1920
1954
|
{
|
|
1955
|
+
rb_check_frozen(self);
|
|
1921
1956
|
GET_STATE(self);
|
|
1922
1957
|
state->depth = long_config(depth);
|
|
1923
1958
|
return Qnil;
|
|
@@ -1951,20 +1986,36 @@ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_
|
|
|
1951
1986
|
*/
|
|
1952
1987
|
static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
|
|
1953
1988
|
{
|
|
1989
|
+
rb_check_frozen(self);
|
|
1954
1990
|
GET_STATE(self);
|
|
1955
1991
|
buffer_initial_length_set(state, buffer_initial_length);
|
|
1956
1992
|
return Qnil;
|
|
1957
1993
|
}
|
|
1958
1994
|
|
|
1995
|
+
struct configure_state_data {
|
|
1996
|
+
JSON_Generator_State *state;
|
|
1997
|
+
VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
|
|
1998
|
+
};
|
|
1999
|
+
|
|
2000
|
+
static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
|
|
2001
|
+
{
|
|
2002
|
+
if (RTEST(data->vstate)) {
|
|
2003
|
+
RB_OBJ_WRITE(data->vstate, field, value);
|
|
2004
|
+
} else {
|
|
2005
|
+
*field = value;
|
|
2006
|
+
}
|
|
2007
|
+
}
|
|
2008
|
+
|
|
1959
2009
|
static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
|
1960
2010
|
{
|
|
1961
|
-
|
|
2011
|
+
struct configure_state_data *data = (struct configure_state_data *)_arg;
|
|
2012
|
+
JSON_Generator_State *state = data->state;
|
|
1962
2013
|
|
|
1963
|
-
if (key == sym_indent) { state->indent
|
|
1964
|
-
else if (key == sym_space) { state->space
|
|
1965
|
-
else if (key == sym_space_before) { state->space_before
|
|
1966
|
-
else if (key == sym_object_nl) { state->object_nl
|
|
1967
|
-
else if (key == sym_array_nl) { state->array_nl
|
|
2014
|
+
if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
|
|
2015
|
+
else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
|
|
2016
|
+
else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
|
|
2017
|
+
else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
|
|
2018
|
+
else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
|
|
1968
2019
|
else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
|
|
1969
2020
|
else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
|
|
1970
2021
|
else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
|
|
@@ -1973,11 +2024,16 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
1973
2024
|
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
|
|
1974
2025
|
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
|
|
1975
2026
|
else if (key == sym_strict) { state->strict = RTEST(val); }
|
|
1976
|
-
else if (key ==
|
|
2027
|
+
else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
2028
|
+
else if (key == sym_as_json) {
|
|
2029
|
+
VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
|
|
2030
|
+
state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
|
|
2031
|
+
state_write_value(data, &state->as_json, proc);
|
|
2032
|
+
}
|
|
1977
2033
|
return ST_CONTINUE;
|
|
1978
2034
|
}
|
|
1979
2035
|
|
|
1980
|
-
static void configure_state(JSON_Generator_State *state, VALUE config)
|
|
2036
|
+
static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
|
|
1981
2037
|
{
|
|
1982
2038
|
if (!RTEST(config)) return;
|
|
1983
2039
|
|
|
@@ -1985,15 +2041,21 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
|
|
|
1985
2041
|
|
|
1986
2042
|
if (!RHASH_SIZE(config)) return;
|
|
1987
2043
|
|
|
2044
|
+
struct configure_state_data data = {
|
|
2045
|
+
.state = state,
|
|
2046
|
+
.vstate = vstate
|
|
2047
|
+
};
|
|
2048
|
+
|
|
1988
2049
|
// We assume in most cases few keys are set so it's faster to go over
|
|
1989
2050
|
// the provided keys than to check all possible keys.
|
|
1990
|
-
rb_hash_foreach(config, configure_state_i, (VALUE)
|
|
2051
|
+
rb_hash_foreach(config, configure_state_i, (VALUE)&data);
|
|
1991
2052
|
}
|
|
1992
2053
|
|
|
1993
2054
|
static VALUE cState_configure(VALUE self, VALUE opts)
|
|
1994
2055
|
{
|
|
2056
|
+
rb_check_frozen(self);
|
|
1995
2057
|
GET_STATE(self);
|
|
1996
|
-
configure_state(state, opts);
|
|
2058
|
+
configure_state(state, self, opts);
|
|
1997
2059
|
return self;
|
|
1998
2060
|
}
|
|
1999
2061
|
|
|
@@ -2001,7 +2063,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
|
|
|
2001
2063
|
{
|
|
2002
2064
|
JSON_Generator_State state = {0};
|
|
2003
2065
|
state_init(&state);
|
|
2004
|
-
configure_state(&state, opts);
|
|
2066
|
+
configure_state(&state, Qfalse, opts);
|
|
2005
2067
|
|
|
2006
2068
|
char stack_buffer[FBUFFER_STACK_SIZE];
|
|
2007
2069
|
FBuffer buffer = {
|
|
@@ -2013,12 +2075,11 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
|
|
|
2013
2075
|
.buffer = &buffer,
|
|
2014
2076
|
.vstate = Qfalse,
|
|
2015
2077
|
.state = &state,
|
|
2078
|
+
.depth = state.depth,
|
|
2016
2079
|
.obj = obj,
|
|
2017
2080
|
.func = generate_json,
|
|
2018
2081
|
};
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
return fbuffer_finalize(&buffer);
|
|
2082
|
+
return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
|
|
2022
2083
|
}
|
|
2023
2084
|
|
|
2024
2085
|
/*
|
|
@@ -2088,7 +2149,8 @@ void Init_generator(void)
|
|
|
2088
2149
|
rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
|
|
2089
2150
|
rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
|
|
2090
2151
|
rb_define_method(cState, "generate", cState_generate, -1);
|
|
2091
|
-
|
|
2152
|
+
|
|
2153
|
+
rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
|
|
2092
2154
|
|
|
2093
2155
|
rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
|
|
2094
2156
|
|
|
@@ -2117,13 +2179,7 @@ void Init_generator(void)
|
|
|
2117
2179
|
rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
|
|
2118
2180
|
|
|
2119
2181
|
VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
|
|
2120
|
-
rb_define_singleton_method(mString, "included", mString_included_s, 1);
|
|
2121
2182
|
rb_define_method(mString, "to_json", mString_to_json, -1);
|
|
2122
|
-
rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
|
|
2123
|
-
rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
|
|
2124
|
-
|
|
2125
|
-
mString_Extend = rb_define_module_under(mString, "Extend");
|
|
2126
|
-
rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
|
|
2127
2183
|
|
|
2128
2184
|
VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
|
|
2129
2185
|
rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
|
|
@@ -2140,10 +2196,6 @@ void Init_generator(void)
|
|
|
2140
2196
|
i_to_s = rb_intern("to_s");
|
|
2141
2197
|
i_to_json = rb_intern("to_json");
|
|
2142
2198
|
i_new = rb_intern("new");
|
|
2143
|
-
i_pack = rb_intern("pack");
|
|
2144
|
-
i_unpack = rb_intern("unpack");
|
|
2145
|
-
i_create_id = rb_intern("create_id");
|
|
2146
|
-
i_extend = rb_intern("extend");
|
|
2147
2199
|
i_encode = rb_intern("encode");
|
|
2148
2200
|
|
|
2149
2201
|
sym_indent = ID2SYM(rb_intern("indent"));
|
|
@@ -2160,6 +2212,7 @@ void Init_generator(void)
|
|
|
2160
2212
|
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
|
|
2161
2213
|
sym_strict = ID2SYM(rb_intern("strict"));
|
|
2162
2214
|
sym_as_json = ID2SYM(rb_intern("as_json"));
|
|
2215
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
2163
2216
|
|
|
2164
2217
|
usascii_encindex = rb_usascii_encindex();
|
|
2165
2218
|
utf8_encindex = rb_utf8_encindex();
|
|
@@ -2167,22 +2220,5 @@ void Init_generator(void)
|
|
|
2167
2220
|
|
|
2168
2221
|
rb_require("json/ext/generator/state");
|
|
2169
2222
|
|
|
2170
|
-
|
|
2171
|
-
switch(find_simd_implementation()) {
|
|
2172
|
-
#ifdef HAVE_SIMD
|
|
2173
|
-
#ifdef HAVE_SIMD_NEON
|
|
2174
|
-
case SIMD_NEON:
|
|
2175
|
-
search_escape_basic_impl = search_escape_basic_neon;
|
|
2176
|
-
break;
|
|
2177
|
-
#endif /* HAVE_SIMD_NEON */
|
|
2178
|
-
#ifdef HAVE_SIMD_SSE2
|
|
2179
|
-
case SIMD_SSE2:
|
|
2180
|
-
search_escape_basic_impl = search_escape_basic_sse2;
|
|
2181
|
-
break;
|
|
2182
|
-
#endif /* HAVE_SIMD_SSE2 */
|
|
2183
|
-
#endif /* HAVE_SIMD */
|
|
2184
|
-
default:
|
|
2185
|
-
search_escape_basic_impl = search_escape_basic;
|
|
2186
|
-
break;
|
|
2187
|
-
}
|
|
2223
|
+
simd_impl = find_simd_implementation();
|
|
2188
2224
|
}
|