json 2.12.2 → 2.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +70 -8
- data/LEGAL +12 -0
- data/README.md +19 -1
- data/ext/json/ext/fbuffer/fbuffer.h +26 -49
- data/ext/json/ext/generator/extconf.rb +1 -25
- data/ext/json/ext/generator/generator.c +347 -313
- data/ext/json/ext/json.h +92 -0
- data/ext/json/ext/parser/extconf.rb +7 -1
- data/ext/json/ext/parser/parser.c +557 -332
- data/ext/json/ext/simd/conf.rb +24 -0
- data/ext/json/ext/simd/simd.h +191 -0
- data/ext/json/ext/vendor/fpconv.c +12 -11
- data/ext/json/ext/vendor/ryu.h +819 -0
- data/json.gemspec +2 -3
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +57 -36
- data/lib/json/ext/generator/state.rb +11 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +96 -50
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +55 -0
- metadata +8 -4
- data/ext/json/ext/generator/simd.h +0 -112
|
@@ -1,14 +1,20 @@
|
|
|
1
|
-
#include "
|
|
1
|
+
#include "../json.h"
|
|
2
2
|
#include "../fbuffer/fbuffer.h"
|
|
3
3
|
#include "../vendor/fpconv.c"
|
|
4
4
|
|
|
5
5
|
#include <math.h>
|
|
6
6
|
#include <ctype.h>
|
|
7
7
|
|
|
8
|
-
#include "simd.h"
|
|
8
|
+
#include "../simd/simd.h"
|
|
9
9
|
|
|
10
10
|
/* ruby api and some helpers */
|
|
11
11
|
|
|
12
|
+
enum duplicate_key_action {
|
|
13
|
+
JSON_DEPRECATED = 0,
|
|
14
|
+
JSON_IGNORE,
|
|
15
|
+
JSON_RAISE,
|
|
16
|
+
};
|
|
17
|
+
|
|
12
18
|
typedef struct JSON_Generator_StateStruct {
|
|
13
19
|
VALUE indent;
|
|
14
20
|
VALUE space;
|
|
@@ -21,20 +27,19 @@ typedef struct JSON_Generator_StateStruct {
|
|
|
21
27
|
long depth;
|
|
22
28
|
long buffer_initial_length;
|
|
23
29
|
|
|
30
|
+
enum duplicate_key_action on_duplicate_key;
|
|
31
|
+
|
|
32
|
+
bool as_json_single_arg;
|
|
24
33
|
bool allow_nan;
|
|
25
34
|
bool ascii_only;
|
|
26
35
|
bool script_safe;
|
|
27
36
|
bool strict;
|
|
28
37
|
} JSON_Generator_State;
|
|
29
38
|
|
|
30
|
-
|
|
31
|
-
#define RB_UNLIKELY(cond) (cond)
|
|
32
|
-
#endif
|
|
33
|
-
|
|
34
|
-
static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
|
|
39
|
+
static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
|
|
35
40
|
|
|
36
41
|
static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
|
|
37
|
-
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
|
|
42
|
+
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
|
|
38
43
|
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
|
|
39
44
|
|
|
40
45
|
|
|
@@ -76,23 +81,18 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d
|
|
|
76
81
|
|
|
77
82
|
static int usascii_encindex, utf8_encindex, binary_encindex;
|
|
78
83
|
|
|
79
|
-
|
|
80
|
-
RBIMPL_ATTR_NORETURN()
|
|
81
|
-
#endif
|
|
82
|
-
static void raise_generator_error_str(VALUE invalid_object, VALUE str)
|
|
84
|
+
NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
|
|
83
85
|
{
|
|
86
|
+
rb_enc_associate_index(str, utf8_encindex);
|
|
84
87
|
VALUE exc = rb_exc_new_str(eGeneratorError, str);
|
|
85
88
|
rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
|
|
86
89
|
rb_exc_raise(exc);
|
|
87
90
|
}
|
|
88
91
|
|
|
89
|
-
#ifdef RBIMPL_ATTR_NORETURN
|
|
90
|
-
RBIMPL_ATTR_NORETURN()
|
|
91
|
-
#endif
|
|
92
92
|
#ifdef RBIMPL_ATTR_FORMAT
|
|
93
93
|
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
|
|
94
94
|
#endif
|
|
95
|
-
static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
|
|
95
|
+
NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
|
|
96
96
|
{
|
|
97
97
|
va_list args;
|
|
98
98
|
va_start(args, fmt);
|
|
@@ -127,18 +127,12 @@ typedef struct _search_state {
|
|
|
127
127
|
#endif /* HAVE_SIMD */
|
|
128
128
|
} search_state;
|
|
129
129
|
|
|
130
|
-
|
|
131
|
-
#define FORCE_INLINE __attribute__((always_inline))
|
|
132
|
-
#else
|
|
133
|
-
#define FORCE_INLINE
|
|
134
|
-
#endif
|
|
135
|
-
|
|
136
|
-
static inline FORCE_INLINE void search_flush(search_state *search)
|
|
130
|
+
static ALWAYS_INLINE() void search_flush(search_state *search)
|
|
137
131
|
{
|
|
138
132
|
// Do not remove this conditional without profiling, specifically escape-heavy text.
|
|
139
133
|
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
|
|
140
|
-
// For back-to-back characters that need to be escaped,
|
|
141
|
-
// will be called just before calling escape_UTF8_char_basic. There will be no
|
|
134
|
+
// For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
|
|
135
|
+
// will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
|
|
142
136
|
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
|
|
143
137
|
// nothing needs to be flushed, we can save a few memory references with this conditional.
|
|
144
138
|
if (search->ptr > search->cursor) {
|
|
@@ -176,7 +170,7 @@ static inline unsigned char search_escape_basic(search_state *search)
|
|
|
176
170
|
return 0;
|
|
177
171
|
}
|
|
178
172
|
|
|
179
|
-
static
|
|
173
|
+
static ALWAYS_INLINE() void escape_UTF8_char_basic(search_state *search)
|
|
180
174
|
{
|
|
181
175
|
const unsigned char ch = (unsigned char)*search->ptr;
|
|
182
176
|
switch (ch) {
|
|
@@ -263,7 +257,7 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
|
263
257
|
|
|
264
258
|
#ifdef HAVE_SIMD
|
|
265
259
|
|
|
266
|
-
static
|
|
260
|
+
static ALWAYS_INLINE() char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
|
|
267
261
|
{
|
|
268
262
|
// Flush the buffer so everything up until the last 'len' characters are unflushed.
|
|
269
263
|
search_flush(search);
|
|
@@ -286,7 +280,7 @@ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsi
|
|
|
286
280
|
|
|
287
281
|
#ifdef HAVE_SIMD_NEON
|
|
288
282
|
|
|
289
|
-
static
|
|
283
|
+
static ALWAYS_INLINE() unsigned char neon_next_match(search_state *search)
|
|
290
284
|
{
|
|
291
285
|
uint64_t mask = search->matches_mask;
|
|
292
286
|
uint32_t index = trailing_zeros64(mask) >> 2;
|
|
@@ -304,28 +298,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
|
|
304
298
|
return 1;
|
|
305
299
|
}
|
|
306
300
|
|
|
307
|
-
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
|
308
|
-
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
|
309
|
-
{
|
|
310
|
-
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
|
311
|
-
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
|
312
|
-
return mask & 0x8888888888888888ull;
|
|
313
|
-
}
|
|
314
|
-
|
|
315
|
-
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
|
|
316
|
-
{
|
|
317
|
-
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
|
318
|
-
|
|
319
|
-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
|
320
|
-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
|
321
|
-
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
|
322
|
-
|
|
323
|
-
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
|
324
|
-
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
|
325
|
-
|
|
326
|
-
return neon_match_mask(needs_escape);
|
|
327
|
-
}
|
|
328
|
-
|
|
329
301
|
static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
330
302
|
{
|
|
331
303
|
if (RB_UNLIKELY(search->has_matches)) {
|
|
@@ -333,7 +305,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
|
333
305
|
if (search->matches_mask > 0) {
|
|
334
306
|
return neon_next_match(search);
|
|
335
307
|
} else {
|
|
336
|
-
// neon_next_match will only advance search->ptr up to the last matching character.
|
|
308
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
|
337
309
|
// Skip over any characters in the last chunk that occur after the last match.
|
|
338
310
|
search->has_matches = false;
|
|
339
311
|
search->ptr = search->chunk_end;
|
|
@@ -342,67 +314,61 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
|
342
314
|
|
|
343
315
|
/*
|
|
344
316
|
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
|
345
|
-
* need to be escaped.
|
|
346
|
-
*
|
|
317
|
+
* need to be escaped.
|
|
318
|
+
*
|
|
347
319
|
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
|
348
|
-
*
|
|
320
|
+
*
|
|
349
321
|
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
|
350
322
|
* the vector insructions may work on larger vectors.
|
|
351
|
-
*
|
|
323
|
+
*
|
|
352
324
|
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
|
353
|
-
*
|
|
354
|
-
* lower_bound: [20 20 20 20 20 20 20 20]
|
|
355
|
-
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
|
356
|
-
* dblquote: [22 22 22 22 22 22 22 22]
|
|
357
|
-
*
|
|
358
|
-
* Next we load the first chunk of the ptr:
|
|
325
|
+
*
|
|
326
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
|
327
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
|
328
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
|
329
|
+
*
|
|
330
|
+
* Next we load the first chunk of the ptr:
|
|
359
331
|
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
|
360
|
-
*
|
|
332
|
+
*
|
|
361
333
|
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
|
362
334
|
* as no bytes are less than 32 (0x20):
|
|
363
335
|
* [0 0 0 0 0 0 0 0]
|
|
364
|
-
*
|
|
336
|
+
*
|
|
365
337
|
* Next, we check if any byte in chunk is equal to a backslash:
|
|
366
338
|
* [0 0 0 FF 0 0 0 0]
|
|
367
|
-
*
|
|
339
|
+
*
|
|
368
340
|
* Finally we check if any byte in chunk is equal to a double quote:
|
|
369
|
-
* [FF 0 0 0 0 0 0 0]
|
|
370
|
-
*
|
|
341
|
+
* [FF 0 0 0 0 0 0 0]
|
|
342
|
+
*
|
|
371
343
|
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
|
372
344
|
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
|
373
345
|
* This is the needs_escape vector and it is equal to:
|
|
374
|
-
* [FF 0 0 FF 0 0 0 0]
|
|
375
|
-
*
|
|
346
|
+
* [FF 0 0 FF 0 0 0 0]
|
|
347
|
+
*
|
|
376
348
|
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
|
377
349
|
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
|
378
|
-
*
|
|
350
|
+
*
|
|
379
351
|
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
|
380
352
|
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
|
381
353
|
* have at least one byte that needs to be escaped.
|
|
382
354
|
*/
|
|
383
|
-
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
|
|
384
|
-
uint64_t mask = neon_rules_update(search->ptr);
|
|
385
355
|
|
|
386
|
-
|
|
387
|
-
search->ptr += sizeof(uint8x16_t);
|
|
388
|
-
continue;
|
|
389
|
-
}
|
|
390
|
-
search->matches_mask = mask;
|
|
356
|
+
if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
|
|
391
357
|
search->has_matches = true;
|
|
392
358
|
search->chunk_base = search->ptr;
|
|
393
359
|
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
|
394
360
|
return neon_next_match(search);
|
|
395
361
|
}
|
|
396
362
|
|
|
397
|
-
// There are fewer than 16 bytes left.
|
|
363
|
+
// There are fewer than 16 bytes left.
|
|
398
364
|
unsigned long remaining = (search->end - search->ptr);
|
|
399
365
|
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
|
400
366
|
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
|
401
367
|
|
|
402
|
-
uint64_t mask =
|
|
368
|
+
uint64_t mask = compute_chunk_mask_neon(s);
|
|
403
369
|
|
|
404
370
|
if (!mask) {
|
|
405
|
-
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
371
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
406
372
|
// search->cursor to search->ptr.
|
|
407
373
|
fbuffer_consumed(search->buffer, remaining);
|
|
408
374
|
search->ptr = search->end;
|
|
@@ -428,12 +394,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
|
428
394
|
|
|
429
395
|
#ifdef HAVE_SIMD_SSE2
|
|
430
396
|
|
|
431
|
-
|
|
432
|
-
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
|
433
|
-
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
|
434
|
-
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
|
435
|
-
|
|
436
|
-
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
|
397
|
+
static ALWAYS_INLINE() unsigned char sse2_next_match(search_state *search)
|
|
437
398
|
{
|
|
438
399
|
int mask = search->matches_mask;
|
|
439
400
|
int index = trailing_zeros(mask);
|
|
@@ -457,26 +418,14 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
|
|
457
418
|
#define TARGET_SSE2
|
|
458
419
|
#endif
|
|
459
420
|
|
|
460
|
-
static
|
|
461
|
-
{
|
|
462
|
-
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
|
463
|
-
|
|
464
|
-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
|
465
|
-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
|
466
|
-
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
|
467
|
-
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
|
468
|
-
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
|
469
|
-
return _mm_movemask_epi8(needs_escape);
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
|
421
|
+
static TARGET_SSE2 ALWAYS_INLINE() unsigned char search_escape_basic_sse2(search_state *search)
|
|
473
422
|
{
|
|
474
423
|
if (RB_UNLIKELY(search->has_matches)) {
|
|
475
424
|
// There are more matches if search->matches_mask > 0.
|
|
476
425
|
if (search->matches_mask > 0) {
|
|
477
426
|
return sse2_next_match(search);
|
|
478
427
|
} else {
|
|
479
|
-
// sse2_next_match will only advance search->ptr up to the last matching character.
|
|
428
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
|
480
429
|
// Skip over any characters in the last chunk that occur after the last match.
|
|
481
430
|
search->has_matches = false;
|
|
482
431
|
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
|
@@ -487,29 +436,22 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
|
|
|
487
436
|
}
|
|
488
437
|
}
|
|
489
438
|
|
|
490
|
-
|
|
491
|
-
int needs_escape_mask = sse2_update(search->ptr);
|
|
492
|
-
|
|
493
|
-
if (needs_escape_mask == 0) {
|
|
494
|
-
search->ptr += sizeof(__m128i);
|
|
495
|
-
continue;
|
|
496
|
-
}
|
|
497
|
-
|
|
439
|
+
if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
|
|
498
440
|
search->has_matches = true;
|
|
499
|
-
search->matches_mask = needs_escape_mask;
|
|
500
441
|
search->chunk_base = search->ptr;
|
|
442
|
+
search->chunk_end = search->ptr + sizeof(__m128i);
|
|
501
443
|
return sse2_next_match(search);
|
|
502
444
|
}
|
|
503
445
|
|
|
504
|
-
// There are fewer than 16 bytes left.
|
|
446
|
+
// There are fewer than 16 bytes left.
|
|
505
447
|
unsigned long remaining = (search->end - search->ptr);
|
|
506
448
|
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
|
507
449
|
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
|
508
450
|
|
|
509
|
-
int needs_escape_mask =
|
|
451
|
+
int needs_escape_mask = compute_chunk_mask_sse2(s);
|
|
510
452
|
|
|
511
453
|
if (needs_escape_mask == 0) {
|
|
512
|
-
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
454
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
513
455
|
// search->cursor to search->ptr.
|
|
514
456
|
fbuffer_consumed(search->buffer, remaining);
|
|
515
457
|
search->ptr = search->end;
|
|
@@ -638,7 +580,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
|
|
|
638
580
|
return 0;
|
|
639
581
|
}
|
|
640
582
|
|
|
641
|
-
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
583
|
+
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
584
|
+
{
|
|
642
585
|
const unsigned char ch = (unsigned char)*search->ptr;
|
|
643
586
|
switch (ch_len) {
|
|
644
587
|
case 1: {
|
|
@@ -668,7 +611,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
|
|
|
668
611
|
|
|
669
612
|
uint32_t wchar = 0;
|
|
670
613
|
|
|
671
|
-
switch(ch_len) {
|
|
614
|
+
switch (ch_len) {
|
|
672
615
|
case 2:
|
|
673
616
|
wchar = ch & 0x1F;
|
|
674
617
|
break;
|
|
@@ -828,7 +771,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
|
|
|
828
771
|
* _state_ is a JSON::State object, that can also be used to configure the
|
|
829
772
|
* produced JSON string output further.
|
|
830
773
|
*/
|
|
831
|
-
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
|
774
|
+
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
|
775
|
+
{
|
|
832
776
|
rb_check_arity(argc, 0, 1);
|
|
833
777
|
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
|
834
778
|
return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
|
|
@@ -885,17 +829,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
|
|
|
885
829
|
return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
|
|
886
830
|
}
|
|
887
831
|
|
|
888
|
-
/*
|
|
889
|
-
* call-seq: String.included(modul)
|
|
890
|
-
*
|
|
891
|
-
* Extends _modul_ with the String::Extend module.
|
|
892
|
-
*/
|
|
893
|
-
static VALUE mString_included_s(VALUE self, VALUE modul) {
|
|
894
|
-
VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
|
|
895
|
-
rb_call_super(1, &modul);
|
|
896
|
-
return result;
|
|
897
|
-
}
|
|
898
|
-
|
|
899
832
|
/*
|
|
900
833
|
* call-seq: to_json(*)
|
|
901
834
|
*
|
|
@@ -910,51 +843,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
|
|
|
910
843
|
return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
|
|
911
844
|
}
|
|
912
845
|
|
|
913
|
-
/*
|
|
914
|
-
* call-seq: to_json_raw_object()
|
|
915
|
-
*
|
|
916
|
-
* This method creates a raw object hash, that can be nested into
|
|
917
|
-
* other data structures and will be generated as a raw string. This
|
|
918
|
-
* method should be used, if you want to convert raw strings to JSON
|
|
919
|
-
* instead of UTF-8 strings, e. g. binary data.
|
|
920
|
-
*/
|
|
921
|
-
static VALUE mString_to_json_raw_object(VALUE self)
|
|
922
|
-
{
|
|
923
|
-
VALUE ary;
|
|
924
|
-
VALUE result = rb_hash_new();
|
|
925
|
-
rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
|
|
926
|
-
ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
|
|
927
|
-
rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
|
|
928
|
-
return result;
|
|
929
|
-
}
|
|
930
|
-
|
|
931
|
-
/*
|
|
932
|
-
* call-seq: to_json_raw(*args)
|
|
933
|
-
*
|
|
934
|
-
* This method creates a JSON text from the result of a call to
|
|
935
|
-
* to_json_raw_object of this String.
|
|
936
|
-
*/
|
|
937
|
-
static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
|
|
938
|
-
{
|
|
939
|
-
VALUE obj = mString_to_json_raw_object(self);
|
|
940
|
-
Check_Type(obj, T_HASH);
|
|
941
|
-
return mHash_to_json(argc, argv, obj);
|
|
942
|
-
}
|
|
943
|
-
|
|
944
|
-
/*
|
|
945
|
-
* call-seq: json_create(o)
|
|
946
|
-
*
|
|
947
|
-
* Raw Strings are JSON Objects (the raw bytes are stored in an array for the
|
|
948
|
-
* key "raw"). The Ruby String can be created by this module method.
|
|
949
|
-
*/
|
|
950
|
-
static VALUE mString_Extend_json_create(VALUE self, VALUE o)
|
|
951
|
-
{
|
|
952
|
-
VALUE ary;
|
|
953
|
-
Check_Type(o, T_HASH);
|
|
954
|
-
ary = rb_hash_aref(o, rb_str_new2("raw"));
|
|
955
|
-
return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
|
|
956
|
-
}
|
|
957
|
-
|
|
958
846
|
/*
|
|
959
847
|
* call-seq: to_json(*)
|
|
960
848
|
*
|
|
@@ -1092,10 +980,12 @@ static inline VALUE vstate_get(struct generate_json_data *data)
|
|
|
1092
980
|
return data->vstate;
|
|
1093
981
|
}
|
|
1094
982
|
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
};
|
|
983
|
+
static VALUE
|
|
984
|
+
json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
|
|
985
|
+
{
|
|
986
|
+
VALUE proc_args[2] = {object, is_key};
|
|
987
|
+
return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
|
|
988
|
+
}
|
|
1099
989
|
|
|
1100
990
|
static VALUE
|
|
1101
991
|
convert_string_subclass(VALUE key)
|
|
@@ -1112,6 +1002,145 @@ convert_string_subclass(VALUE key)
|
|
|
1112
1002
|
return key_to_s;
|
|
1113
1003
|
}
|
|
1114
1004
|
|
|
1005
|
+
static bool enc_utf8_compatible_p(int enc_idx)
|
|
1006
|
+
{
|
|
1007
|
+
if (enc_idx == usascii_encindex) return true;
|
|
1008
|
+
if (enc_idx == utf8_encindex) return true;
|
|
1009
|
+
return false;
|
|
1010
|
+
}
|
|
1011
|
+
|
|
1012
|
+
static VALUE encode_json_string_try(VALUE str)
|
|
1013
|
+
{
|
|
1014
|
+
return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
|
|
1018
|
+
{
|
|
1019
|
+
raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
|
|
1020
|
+
return Qundef;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
static inline bool valid_json_string_p(VALUE str)
|
|
1024
|
+
{
|
|
1025
|
+
int coderange = rb_enc_str_coderange(str);
|
|
1026
|
+
|
|
1027
|
+
if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
|
|
1028
|
+
return true;
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
|
|
1032
|
+
return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
return false;
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
static inline VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
|
|
1039
|
+
{
|
|
1040
|
+
if (RB_LIKELY(valid_json_string_p(str))) {
|
|
1041
|
+
return str;
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
|
|
1045
|
+
VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
|
|
1046
|
+
if (coerced_str != str) {
|
|
1047
|
+
if (RB_TYPE_P(coerced_str, T_STRING)) {
|
|
1048
|
+
if (!valid_json_string_p(coerced_str)) {
|
|
1049
|
+
raise_generator_error(str, "source sequence is illegal/malformed utf-8");
|
|
1050
|
+
}
|
|
1051
|
+
} else {
|
|
1052
|
+
// as_json could return another type than T_STRING
|
|
1053
|
+
if (is_key) {
|
|
1054
|
+
raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
|
|
1055
|
+
}
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
return coerced_str;
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
|
|
1063
|
+
VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
|
|
1064
|
+
switch (rb_enc_str_coderange(utf8_string)) {
|
|
1065
|
+
case ENC_CODERANGE_7BIT:
|
|
1066
|
+
return utf8_string;
|
|
1067
|
+
case ENC_CODERANGE_VALID:
|
|
1068
|
+
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
|
1069
|
+
// TODO: Raise in 3.0.0
|
|
1070
|
+
rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
|
|
1071
|
+
return utf8_string;
|
|
1072
|
+
break;
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1080
|
+
{
|
|
1081
|
+
fbuffer_append_char(buffer, '"');
|
|
1082
|
+
|
|
1083
|
+
long len;
|
|
1084
|
+
search_state search;
|
|
1085
|
+
search.buffer = buffer;
|
|
1086
|
+
RSTRING_GETMEM(obj, search.ptr, len);
|
|
1087
|
+
search.cursor = search.ptr;
|
|
1088
|
+
search.end = search.ptr + len;
|
|
1089
|
+
|
|
1090
|
+
#ifdef HAVE_SIMD
|
|
1091
|
+
search.matches_mask = 0;
|
|
1092
|
+
search.has_matches = false;
|
|
1093
|
+
search.chunk_base = NULL;
|
|
1094
|
+
#endif /* HAVE_SIMD */
|
|
1095
|
+
|
|
1096
|
+
switch (rb_enc_str_coderange(obj)) {
|
|
1097
|
+
case ENC_CODERANGE_7BIT:
|
|
1098
|
+
case ENC_CODERANGE_VALID:
|
|
1099
|
+
if (RB_UNLIKELY(data->state->ascii_only)) {
|
|
1100
|
+
convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
|
1101
|
+
} else if (RB_UNLIKELY(data->state->script_safe)) {
|
|
1102
|
+
convert_UTF8_to_script_safe_JSON(&search);
|
|
1103
|
+
} else {
|
|
1104
|
+
convert_UTF8_to_JSON(&search);
|
|
1105
|
+
}
|
|
1106
|
+
break;
|
|
1107
|
+
default:
|
|
1108
|
+
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
|
|
1109
|
+
break;
|
|
1110
|
+
}
|
|
1111
|
+
fbuffer_append_char(buffer, '"');
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1115
|
+
{
|
|
1116
|
+
obj = ensure_valid_encoding(data, obj, false, false);
|
|
1117
|
+
raw_generate_json_string(buffer, data, obj);
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
struct hash_foreach_arg {
|
|
1121
|
+
VALUE hash;
|
|
1122
|
+
struct generate_json_data *data;
|
|
1123
|
+
int first_key_type;
|
|
1124
|
+
bool first;
|
|
1125
|
+
bool mixed_keys_encountered;
|
|
1126
|
+
};
|
|
1127
|
+
|
|
1128
|
+
NOINLINE()
|
|
1129
|
+
static void
|
|
1130
|
+
json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
|
|
1131
|
+
{
|
|
1132
|
+
if (arg->mixed_keys_encountered) {
|
|
1133
|
+
return;
|
|
1134
|
+
}
|
|
1135
|
+
arg->mixed_keys_encountered = true;
|
|
1136
|
+
|
|
1137
|
+
JSON_Generator_State *state = arg->data->state;
|
|
1138
|
+
if (state->on_duplicate_key != JSON_IGNORE) {
|
|
1139
|
+
VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
|
|
1140
|
+
rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
|
|
1141
|
+
}
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1115
1144
|
static int
|
|
1116
1145
|
json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
1117
1146
|
{
|
|
@@ -1122,21 +1151,33 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
1122
1151
|
JSON_Generator_State *state = data->state;
|
|
1123
1152
|
|
|
1124
1153
|
long depth = state->depth;
|
|
1125
|
-
int
|
|
1154
|
+
int key_type = rb_type(key);
|
|
1155
|
+
|
|
1156
|
+
if (arg->first) {
|
|
1157
|
+
arg->first = false;
|
|
1158
|
+
arg->first_key_type = key_type;
|
|
1159
|
+
}
|
|
1160
|
+
else {
|
|
1161
|
+
fbuffer_append_char(buffer, ',');
|
|
1162
|
+
}
|
|
1126
1163
|
|
|
1127
|
-
if (arg->iter > 0) fbuffer_append_char(buffer, ',');
|
|
1128
1164
|
if (RB_UNLIKELY(data->state->object_nl)) {
|
|
1129
1165
|
fbuffer_append_str(buffer, data->state->object_nl);
|
|
1130
1166
|
}
|
|
1131
1167
|
if (RB_UNLIKELY(data->state->indent)) {
|
|
1132
|
-
|
|
1133
|
-
fbuffer_append_str(buffer, data->state->indent);
|
|
1134
|
-
}
|
|
1168
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
1135
1169
|
}
|
|
1136
1170
|
|
|
1137
1171
|
VALUE key_to_s;
|
|
1138
|
-
|
|
1172
|
+
bool as_json_called = false;
|
|
1173
|
+
|
|
1174
|
+
start:
|
|
1175
|
+
switch (key_type) {
|
|
1139
1176
|
case T_STRING:
|
|
1177
|
+
if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
|
|
1178
|
+
json_inspect_hash_with_mixed_keys(arg);
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1140
1181
|
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
|
1141
1182
|
key_to_s = key;
|
|
1142
1183
|
} else {
|
|
@@ -1144,15 +1185,31 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
1144
1185
|
}
|
|
1145
1186
|
break;
|
|
1146
1187
|
case T_SYMBOL:
|
|
1188
|
+
if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
|
|
1189
|
+
json_inspect_hash_with_mixed_keys(arg);
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1147
1192
|
key_to_s = rb_sym2str(key);
|
|
1148
1193
|
break;
|
|
1149
1194
|
default:
|
|
1195
|
+
if (data->state->strict) {
|
|
1196
|
+
if (RTEST(data->state->as_json) && !as_json_called) {
|
|
1197
|
+
key = json_call_as_json(data->state, key, Qtrue);
|
|
1198
|
+
key_type = rb_type(key);
|
|
1199
|
+
as_json_called = true;
|
|
1200
|
+
goto start;
|
|
1201
|
+
} else {
|
|
1202
|
+
raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
|
|
1203
|
+
}
|
|
1204
|
+
}
|
|
1150
1205
|
key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
|
|
1151
1206
|
break;
|
|
1152
1207
|
}
|
|
1153
1208
|
|
|
1209
|
+
key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
|
|
1210
|
+
|
|
1154
1211
|
if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
|
|
1155
|
-
|
|
1212
|
+
raw_generate_json_string(buffer, data, key_to_s);
|
|
1156
1213
|
} else {
|
|
1157
1214
|
generate_json(buffer, data, key_to_s);
|
|
1158
1215
|
}
|
|
@@ -1161,7 +1218,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
1161
1218
|
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
|
|
1162
1219
|
generate_json(buffer, data, val);
|
|
1163
1220
|
|
|
1164
|
-
arg->iter++;
|
|
1165
1221
|
return ST_CONTINUE;
|
|
1166
1222
|
}
|
|
1167
1223
|
|
|
@@ -1170,14 +1226,13 @@ static inline long increase_depth(struct generate_json_data *data)
|
|
|
1170
1226
|
JSON_Generator_State *state = data->state;
|
|
1171
1227
|
long depth = ++state->depth;
|
|
1172
1228
|
if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
|
|
1173
|
-
rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
|
|
1229
|
+
rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --state->depth);
|
|
1174
1230
|
}
|
|
1175
1231
|
return depth;
|
|
1176
1232
|
}
|
|
1177
1233
|
|
|
1178
1234
|
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1179
1235
|
{
|
|
1180
|
-
int j;
|
|
1181
1236
|
long depth = increase_depth(data);
|
|
1182
1237
|
|
|
1183
1238
|
if (RHASH_SIZE(obj) == 0) {
|
|
@@ -1189,8 +1244,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
|
|
|
1189
1244
|
fbuffer_append_char(buffer, '{');
|
|
1190
1245
|
|
|
1191
1246
|
struct hash_foreach_arg arg = {
|
|
1247
|
+
.hash = obj,
|
|
1192
1248
|
.data = data,
|
|
1193
|
-
.
|
|
1249
|
+
.first = true,
|
|
1194
1250
|
};
|
|
1195
1251
|
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
|
|
1196
1252
|
|
|
@@ -1198,9 +1254,7 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
|
|
|
1198
1254
|
if (RB_UNLIKELY(data->state->object_nl)) {
|
|
1199
1255
|
fbuffer_append_str(buffer, data->state->object_nl);
|
|
1200
1256
|
if (RB_UNLIKELY(data->state->indent)) {
|
|
1201
|
-
|
|
1202
|
-
fbuffer_append_str(buffer, data->state->indent);
|
|
1203
|
-
}
|
|
1257
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
1204
1258
|
}
|
|
1205
1259
|
}
|
|
1206
1260
|
fbuffer_append_char(buffer, '}');
|
|
@@ -1208,7 +1262,6 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
|
|
|
1208
1262
|
|
|
1209
1263
|
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1210
1264
|
{
|
|
1211
|
-
int i, j;
|
|
1212
1265
|
long depth = increase_depth(data);
|
|
1213
1266
|
|
|
1214
1267
|
if (RARRAY_LEN(obj) == 0) {
|
|
@@ -1219,15 +1272,13 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
|
|
|
1219
1272
|
|
|
1220
1273
|
fbuffer_append_char(buffer, '[');
|
|
1221
1274
|
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
|
1222
|
-
for(i = 0; i < RARRAY_LEN(obj); i++) {
|
|
1275
|
+
for (int i = 0; i < RARRAY_LEN(obj); i++) {
|
|
1223
1276
|
if (i > 0) {
|
|
1224
1277
|
fbuffer_append_char(buffer, ',');
|
|
1225
1278
|
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
|
1226
1279
|
}
|
|
1227
1280
|
if (RB_UNLIKELY(data->state->indent)) {
|
|
1228
|
-
|
|
1229
|
-
fbuffer_append_str(buffer, data->state->indent);
|
|
1230
|
-
}
|
|
1281
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
1231
1282
|
}
|
|
1232
1283
|
generate_json(buffer, data, RARRAY_AREF(obj, i));
|
|
1233
1284
|
}
|
|
@@ -1235,93 +1286,12 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
|
|
|
1235
1286
|
if (RB_UNLIKELY(data->state->array_nl)) {
|
|
1236
1287
|
fbuffer_append_str(buffer, data->state->array_nl);
|
|
1237
1288
|
if (RB_UNLIKELY(data->state->indent)) {
|
|
1238
|
-
|
|
1239
|
-
fbuffer_append_str(buffer, data->state->indent);
|
|
1240
|
-
}
|
|
1289
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
1241
1290
|
}
|
|
1242
1291
|
}
|
|
1243
1292
|
fbuffer_append_char(buffer, ']');
|
|
1244
1293
|
}
|
|
1245
1294
|
|
|
1246
|
-
static inline int enc_utf8_compatible_p(int enc_idx)
|
|
1247
|
-
{
|
|
1248
|
-
if (enc_idx == usascii_encindex) return 1;
|
|
1249
|
-
if (enc_idx == utf8_encindex) return 1;
|
|
1250
|
-
return 0;
|
|
1251
|
-
}
|
|
1252
|
-
|
|
1253
|
-
static VALUE encode_json_string_try(VALUE str)
|
|
1254
|
-
{
|
|
1255
|
-
return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
|
|
1256
|
-
}
|
|
1257
|
-
|
|
1258
|
-
static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
|
|
1259
|
-
{
|
|
1260
|
-
raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
|
|
1261
|
-
return Qundef;
|
|
1262
|
-
}
|
|
1263
|
-
|
|
1264
|
-
static inline VALUE ensure_valid_encoding(VALUE str)
|
|
1265
|
-
{
|
|
1266
|
-
int encindex = RB_ENCODING_GET(str);
|
|
1267
|
-
VALUE utf8_string;
|
|
1268
|
-
if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
|
|
1269
|
-
if (encindex == binary_encindex) {
|
|
1270
|
-
utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
|
|
1271
|
-
switch (rb_enc_str_coderange(utf8_string)) {
|
|
1272
|
-
case ENC_CODERANGE_7BIT:
|
|
1273
|
-
return utf8_string;
|
|
1274
|
-
case ENC_CODERANGE_VALID:
|
|
1275
|
-
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
|
1276
|
-
// TODO: Raise in 3.0.0
|
|
1277
|
-
rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
|
|
1278
|
-
return utf8_string;
|
|
1279
|
-
break;
|
|
1280
|
-
}
|
|
1281
|
-
}
|
|
1282
|
-
|
|
1283
|
-
str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
|
|
1284
|
-
}
|
|
1285
|
-
return str;
|
|
1286
|
-
}
|
|
1287
|
-
|
|
1288
|
-
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1289
|
-
{
|
|
1290
|
-
obj = ensure_valid_encoding(obj);
|
|
1291
|
-
|
|
1292
|
-
fbuffer_append_char(buffer, '"');
|
|
1293
|
-
|
|
1294
|
-
long len;
|
|
1295
|
-
search_state search;
|
|
1296
|
-
search.buffer = buffer;
|
|
1297
|
-
RSTRING_GETMEM(obj, search.ptr, len);
|
|
1298
|
-
search.cursor = search.ptr;
|
|
1299
|
-
search.end = search.ptr + len;
|
|
1300
|
-
|
|
1301
|
-
#ifdef HAVE_SIMD
|
|
1302
|
-
search.matches_mask = 0;
|
|
1303
|
-
search.has_matches = false;
|
|
1304
|
-
search.chunk_base = NULL;
|
|
1305
|
-
#endif /* HAVE_SIMD */
|
|
1306
|
-
|
|
1307
|
-
switch(rb_enc_str_coderange(obj)) {
|
|
1308
|
-
case ENC_CODERANGE_7BIT:
|
|
1309
|
-
case ENC_CODERANGE_VALID:
|
|
1310
|
-
if (RB_UNLIKELY(data->state->ascii_only)) {
|
|
1311
|
-
convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
|
1312
|
-
} else if (RB_UNLIKELY(data->state->script_safe)) {
|
|
1313
|
-
convert_UTF8_to_script_safe_JSON(&search);
|
|
1314
|
-
} else {
|
|
1315
|
-
convert_UTF8_to_JSON(&search);
|
|
1316
|
-
}
|
|
1317
|
-
break;
|
|
1318
|
-
default:
|
|
1319
|
-
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
|
|
1320
|
-
break;
|
|
1321
|
-
}
|
|
1322
|
-
fbuffer_append_char(buffer, '"');
|
|
1323
|
-
}
|
|
1324
|
-
|
|
1325
1295
|
static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1326
1296
|
{
|
|
1327
1297
|
VALUE tmp;
|
|
@@ -1389,7 +1359,7 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
|
|
|
1389
1359
|
/* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
|
|
1390
1360
|
if (!allow_nan) {
|
|
1391
1361
|
if (data->state->strict && data->state->as_json) {
|
|
1392
|
-
VALUE casted_obj =
|
|
1362
|
+
VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
|
|
1393
1363
|
if (casted_obj != obj) {
|
|
1394
1364
|
increase_depth(data);
|
|
1395
1365
|
generate_json(buffer, data, casted_obj);
|
|
@@ -1406,12 +1376,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
|
|
|
1406
1376
|
}
|
|
1407
1377
|
|
|
1408
1378
|
/* This implementation writes directly into the buffer. We reserve
|
|
1409
|
-
* the
|
|
1379
|
+
* the 32 characters that fpconv_dtoa states as its maximum.
|
|
1410
1380
|
*/
|
|
1411
|
-
fbuffer_inc_capa(buffer,
|
|
1381
|
+
fbuffer_inc_capa(buffer, 32);
|
|
1412
1382
|
char* d = buffer->ptr + buffer->len;
|
|
1413
1383
|
int len = fpconv_dtoa(value, d);
|
|
1414
|
-
|
|
1415
1384
|
/* fpconv_dtoa converts a float to its shortest string representation,
|
|
1416
1385
|
* but it adds a ".0" if this is a plain integer.
|
|
1417
1386
|
*/
|
|
@@ -1461,7 +1430,16 @@ start:
|
|
|
1461
1430
|
break;
|
|
1462
1431
|
case T_STRING:
|
|
1463
1432
|
if (klass != rb_cString) goto general;
|
|
1464
|
-
|
|
1433
|
+
|
|
1434
|
+
if (RB_LIKELY(valid_json_string_p(obj))) {
|
|
1435
|
+
raw_generate_json_string(buffer, data, obj);
|
|
1436
|
+
} else if (as_json_called) {
|
|
1437
|
+
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
|
|
1438
|
+
} else {
|
|
1439
|
+
obj = ensure_valid_encoding(data, obj, false, false);
|
|
1440
|
+
as_json_called = true;
|
|
1441
|
+
goto start;
|
|
1442
|
+
}
|
|
1465
1443
|
break;
|
|
1466
1444
|
case T_SYMBOL:
|
|
1467
1445
|
generate_json_symbol(buffer, data, obj);
|
|
@@ -1478,7 +1456,7 @@ start:
|
|
|
1478
1456
|
general:
|
|
1479
1457
|
if (data->state->strict) {
|
|
1480
1458
|
if (RTEST(data->state->as_json) && !as_json_called) {
|
|
1481
|
-
obj =
|
|
1459
|
+
obj = json_call_as_json(data->state, obj, Qfalse);
|
|
1482
1460
|
as_json_called = true;
|
|
1483
1461
|
goto start;
|
|
1484
1462
|
} else {
|
|
@@ -1497,16 +1475,14 @@ static VALUE generate_json_try(VALUE d)
|
|
|
1497
1475
|
|
|
1498
1476
|
data->func(data->buffer, data, data->obj);
|
|
1499
1477
|
|
|
1500
|
-
return
|
|
1478
|
+
return fbuffer_finalize(data->buffer);
|
|
1501
1479
|
}
|
|
1502
1480
|
|
|
1503
|
-
static VALUE
|
|
1481
|
+
static VALUE generate_json_ensure(VALUE d)
|
|
1504
1482
|
{
|
|
1505
1483
|
struct generate_json_data *data = (struct generate_json_data *)d;
|
|
1506
1484
|
fbuffer_free(data->buffer);
|
|
1507
1485
|
|
|
1508
|
-
rb_exc_raise(exc);
|
|
1509
|
-
|
|
1510
1486
|
return Qundef;
|
|
1511
1487
|
}
|
|
1512
1488
|
|
|
@@ -1527,9 +1503,7 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
|
|
|
1527
1503
|
.obj = obj,
|
|
1528
1504
|
.func = func
|
|
1529
1505
|
};
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
return fbuffer_finalize(&buffer);
|
|
1506
|
+
return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
|
|
1533
1507
|
}
|
|
1534
1508
|
|
|
1535
1509
|
/* call-seq:
|
|
@@ -1545,10 +1519,37 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
|
|
|
1545
1519
|
rb_check_arity(argc, 1, 2);
|
|
1546
1520
|
VALUE obj = argv[0];
|
|
1547
1521
|
VALUE io = argc > 1 ? argv[1] : Qnil;
|
|
1548
|
-
|
|
1522
|
+
return cState_partial_generate(self, obj, generate_json, io);
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
static VALUE cState_generate_new(int argc, VALUE *argv, VALUE self)
|
|
1526
|
+
{
|
|
1527
|
+
rb_check_arity(argc, 1, 2);
|
|
1528
|
+
VALUE obj = argv[0];
|
|
1529
|
+
VALUE io = argc > 1 ? argv[1] : Qnil;
|
|
1530
|
+
|
|
1549
1531
|
GET_STATE(self);
|
|
1550
|
-
|
|
1551
|
-
|
|
1532
|
+
|
|
1533
|
+
JSON_Generator_State new_state;
|
|
1534
|
+
MEMCPY(&new_state, state, JSON_Generator_State, 1);
|
|
1535
|
+
|
|
1536
|
+
// FIXME: depth shouldn't be part of JSON_Generator_State, as that prevents it from being used concurrently.
|
|
1537
|
+
new_state.depth = 0;
|
|
1538
|
+
|
|
1539
|
+
char stack_buffer[FBUFFER_STACK_SIZE];
|
|
1540
|
+
FBuffer buffer = {
|
|
1541
|
+
.io = RTEST(io) ? io : Qfalse,
|
|
1542
|
+
};
|
|
1543
|
+
fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
|
|
1544
|
+
|
|
1545
|
+
struct generate_json_data data = {
|
|
1546
|
+
.buffer = &buffer,
|
|
1547
|
+
.vstate = Qfalse,
|
|
1548
|
+
.state = &new_state,
|
|
1549
|
+
.obj = obj,
|
|
1550
|
+
.func = generate_json
|
|
1551
|
+
};
|
|
1552
|
+
return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
|
|
1552
1553
|
}
|
|
1553
1554
|
|
|
1554
1555
|
static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
|
|
@@ -1899,6 +1900,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
|
|
|
1899
1900
|
return Qnil;
|
|
1900
1901
|
}
|
|
1901
1902
|
|
|
1903
|
+
static VALUE cState_allow_duplicate_key_p(VALUE self)
|
|
1904
|
+
{
|
|
1905
|
+
GET_STATE(self);
|
|
1906
|
+
switch (state->on_duplicate_key) {
|
|
1907
|
+
case JSON_IGNORE:
|
|
1908
|
+
return Qtrue;
|
|
1909
|
+
case JSON_DEPRECATED:
|
|
1910
|
+
return Qnil;
|
|
1911
|
+
default:
|
|
1912
|
+
return Qfalse;
|
|
1913
|
+
}
|
|
1914
|
+
}
|
|
1915
|
+
|
|
1902
1916
|
/*
|
|
1903
1917
|
* call-seq: depth
|
|
1904
1918
|
*
|
|
@@ -1956,15 +1970,30 @@ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_l
|
|
|
1956
1970
|
return Qnil;
|
|
1957
1971
|
}
|
|
1958
1972
|
|
|
1973
|
+
struct configure_state_data {
|
|
1974
|
+
JSON_Generator_State *state;
|
|
1975
|
+
VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
|
|
1976
|
+
};
|
|
1977
|
+
|
|
1978
|
+
static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
|
|
1979
|
+
{
|
|
1980
|
+
if (RTEST(data->vstate)) {
|
|
1981
|
+
RB_OBJ_WRITE(data->vstate, field, value);
|
|
1982
|
+
} else {
|
|
1983
|
+
*field = value;
|
|
1984
|
+
}
|
|
1985
|
+
}
|
|
1986
|
+
|
|
1959
1987
|
static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
|
1960
1988
|
{
|
|
1961
|
-
|
|
1989
|
+
struct configure_state_data *data = (struct configure_state_data *)_arg;
|
|
1990
|
+
JSON_Generator_State *state = data->state;
|
|
1962
1991
|
|
|
1963
|
-
if (key == sym_indent) { state->indent
|
|
1964
|
-
else if (key == sym_space) { state->space
|
|
1965
|
-
else if (key == sym_space_before) { state->space_before
|
|
1966
|
-
else if (key == sym_object_nl) { state->object_nl
|
|
1967
|
-
else if (key == sym_array_nl) { state->array_nl
|
|
1992
|
+
if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
|
|
1993
|
+
else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
|
|
1994
|
+
else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
|
|
1995
|
+
else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
|
|
1996
|
+
else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
|
|
1968
1997
|
else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
|
|
1969
1998
|
else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
|
|
1970
1999
|
else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
|
|
@@ -1973,11 +2002,16 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
1973
2002
|
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
|
|
1974
2003
|
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
|
|
1975
2004
|
else if (key == sym_strict) { state->strict = RTEST(val); }
|
|
1976
|
-
else if (key ==
|
|
2005
|
+
else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
2006
|
+
else if (key == sym_as_json) {
|
|
2007
|
+
VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
|
|
2008
|
+
state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
|
|
2009
|
+
state_write_value(data, &state->as_json, proc);
|
|
2010
|
+
}
|
|
1977
2011
|
return ST_CONTINUE;
|
|
1978
2012
|
}
|
|
1979
2013
|
|
|
1980
|
-
static void configure_state(JSON_Generator_State *state, VALUE config)
|
|
2014
|
+
static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
|
|
1981
2015
|
{
|
|
1982
2016
|
if (!RTEST(config)) return;
|
|
1983
2017
|
|
|
@@ -1985,15 +2019,20 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
|
|
|
1985
2019
|
|
|
1986
2020
|
if (!RHASH_SIZE(config)) return;
|
|
1987
2021
|
|
|
2022
|
+
struct configure_state_data data = {
|
|
2023
|
+
.state = state,
|
|
2024
|
+
.vstate = vstate
|
|
2025
|
+
};
|
|
2026
|
+
|
|
1988
2027
|
// We assume in most cases few keys are set so it's faster to go over
|
|
1989
2028
|
// the provided keys than to check all possible keys.
|
|
1990
|
-
rb_hash_foreach(config, configure_state_i, (VALUE)
|
|
2029
|
+
rb_hash_foreach(config, configure_state_i, (VALUE)&data);
|
|
1991
2030
|
}
|
|
1992
2031
|
|
|
1993
2032
|
static VALUE cState_configure(VALUE self, VALUE opts)
|
|
1994
2033
|
{
|
|
1995
2034
|
GET_STATE(self);
|
|
1996
|
-
configure_state(state, opts);
|
|
2035
|
+
configure_state(state, self, opts);
|
|
1997
2036
|
return self;
|
|
1998
2037
|
}
|
|
1999
2038
|
|
|
@@ -2001,7 +2040,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
|
|
|
2001
2040
|
{
|
|
2002
2041
|
JSON_Generator_State state = {0};
|
|
2003
2042
|
state_init(&state);
|
|
2004
|
-
configure_state(&state, opts);
|
|
2043
|
+
configure_state(&state, Qfalse, opts);
|
|
2005
2044
|
|
|
2006
2045
|
char stack_buffer[FBUFFER_STACK_SIZE];
|
|
2007
2046
|
FBuffer buffer = {
|
|
@@ -2016,9 +2055,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
|
|
|
2016
2055
|
.obj = obj,
|
|
2017
2056
|
.func = generate_json,
|
|
2018
2057
|
};
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
return fbuffer_finalize(&buffer);
|
|
2058
|
+
return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
|
|
2022
2059
|
}
|
|
2023
2060
|
|
|
2024
2061
|
/*
|
|
@@ -2088,7 +2125,9 @@ void Init_generator(void)
|
|
|
2088
2125
|
rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
|
|
2089
2126
|
rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
|
|
2090
2127
|
rb_define_method(cState, "generate", cState_generate, -1);
|
|
2091
|
-
|
|
2128
|
+
rb_define_method(cState, "generate_new", cState_generate_new, -1); // :nodoc:
|
|
2129
|
+
|
|
2130
|
+
rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
|
|
2092
2131
|
|
|
2093
2132
|
rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
|
|
2094
2133
|
|
|
@@ -2117,13 +2156,7 @@ void Init_generator(void)
|
|
|
2117
2156
|
rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
|
|
2118
2157
|
|
|
2119
2158
|
VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
|
|
2120
|
-
rb_define_singleton_method(mString, "included", mString_included_s, 1);
|
|
2121
2159
|
rb_define_method(mString, "to_json", mString_to_json, -1);
|
|
2122
|
-
rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
|
|
2123
|
-
rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
|
|
2124
|
-
|
|
2125
|
-
mString_Extend = rb_define_module_under(mString, "Extend");
|
|
2126
|
-
rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
|
|
2127
2160
|
|
|
2128
2161
|
VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
|
|
2129
2162
|
rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
|
|
@@ -2160,6 +2193,7 @@ void Init_generator(void)
|
|
|
2160
2193
|
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
|
|
2161
2194
|
sym_strict = ID2SYM(rb_intern("strict"));
|
|
2162
2195
|
sym_as_json = ID2SYM(rb_intern("as_json"));
|
|
2196
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
2163
2197
|
|
|
2164
2198
|
usascii_encindex = rb_usascii_encindex();
|
|
2165
2199
|
utf8_encindex = rb_utf8_encindex();
|
|
@@ -2168,7 +2202,7 @@ void Init_generator(void)
|
|
|
2168
2202
|
rb_require("json/ext/generator/state");
|
|
2169
2203
|
|
|
2170
2204
|
|
|
2171
|
-
switch(find_simd_implementation()) {
|
|
2205
|
+
switch (find_simd_implementation()) {
|
|
2172
2206
|
#ifdef HAVE_SIMD
|
|
2173
2207
|
#ifdef HAVE_SIMD_NEON
|
|
2174
2208
|
case SIMD_NEON:
|