json 2.12.2 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +51 -8
- data/README.md +3 -1
- data/ext/json/ext/fbuffer/fbuffer.h +31 -5
- data/ext/json/ext/generator/extconf.rb +1 -25
- data/ext/json/ext/generator/generator.c +171 -190
- data/ext/json/ext/parser/extconf.rb +5 -1
- data/ext/json/ext/parser/parser.c +177 -36
- data/ext/json/ext/simd/conf.rb +24 -0
- data/ext/json/ext/simd/simd.h +188 -0
- data/ext/json/ext/vendor/fpconv.c +12 -11
- data/json.gemspec +2 -3
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +50 -24
- data/lib/json/ext/generator/state.rb +7 -14
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +63 -45
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +55 -0
- metadata +6 -4
- data/ext/json/ext/generator/simd.h +0 -112
@@ -5,10 +5,16 @@
|
|
5
5
|
#include <math.h>
|
6
6
|
#include <ctype.h>
|
7
7
|
|
8
|
-
#include "simd.h"
|
8
|
+
#include "../simd/simd.h"
|
9
9
|
|
10
10
|
/* ruby api and some helpers */
|
11
11
|
|
12
|
+
enum duplicate_key_action {
|
13
|
+
JSON_DEPRECATED = 0,
|
14
|
+
JSON_IGNORE,
|
15
|
+
JSON_RAISE,
|
16
|
+
};
|
17
|
+
|
12
18
|
typedef struct JSON_Generator_StateStruct {
|
13
19
|
VALUE indent;
|
14
20
|
VALUE space;
|
@@ -21,6 +27,9 @@ typedef struct JSON_Generator_StateStruct {
|
|
21
27
|
long depth;
|
22
28
|
long buffer_initial_length;
|
23
29
|
|
30
|
+
enum duplicate_key_action on_duplicate_key;
|
31
|
+
|
32
|
+
bool as_json_single_arg;
|
24
33
|
bool allow_nan;
|
25
34
|
bool ascii_only;
|
26
35
|
bool script_safe;
|
@@ -31,10 +40,10 @@ typedef struct JSON_Generator_StateStruct {
|
|
31
40
|
#define RB_UNLIKELY(cond) (cond)
|
32
41
|
#endif
|
33
42
|
|
34
|
-
static VALUE mJSON, cState, cFragment,
|
43
|
+
static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
|
35
44
|
|
36
45
|
static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
|
37
|
-
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
|
46
|
+
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
|
38
47
|
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
|
39
48
|
|
40
49
|
|
@@ -137,8 +146,8 @@ static inline FORCE_INLINE void search_flush(search_state *search)
|
|
137
146
|
{
|
138
147
|
// Do not remove this conditional without profiling, specifically escape-heavy text.
|
139
148
|
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
|
140
|
-
// For back-to-back characters that need to be escaped,
|
141
|
-
// will be called just before calling escape_UTF8_char_basic. There will be no
|
149
|
+
// For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
|
150
|
+
// will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
|
142
151
|
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
|
143
152
|
// nothing needs to be flushed, we can save a few memory references with this conditional.
|
144
153
|
if (search->ptr > search->cursor) {
|
@@ -304,28 +313,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
|
304
313
|
return 1;
|
305
314
|
}
|
306
315
|
|
307
|
-
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
308
|
-
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
309
|
-
{
|
310
|
-
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
311
|
-
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
312
|
-
return mask & 0x8888888888888888ull;
|
313
|
-
}
|
314
|
-
|
315
|
-
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
|
316
|
-
{
|
317
|
-
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
318
|
-
|
319
|
-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
320
|
-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
321
|
-
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
322
|
-
|
323
|
-
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
324
|
-
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
325
|
-
|
326
|
-
return neon_match_mask(needs_escape);
|
327
|
-
}
|
328
|
-
|
329
316
|
static inline unsigned char search_escape_basic_neon(search_state *search)
|
330
317
|
{
|
331
318
|
if (RB_UNLIKELY(search->has_matches)) {
|
@@ -333,7 +320,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
333
320
|
if (search->matches_mask > 0) {
|
334
321
|
return neon_next_match(search);
|
335
322
|
} else {
|
336
|
-
// neon_next_match will only advance search->ptr up to the last matching character.
|
323
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
337
324
|
// Skip over any characters in the last chunk that occur after the last match.
|
338
325
|
search->has_matches = false;
|
339
326
|
search->ptr = search->chunk_end;
|
@@ -342,67 +329,61 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
342
329
|
|
343
330
|
/*
|
344
331
|
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
345
|
-
* need to be escaped.
|
346
|
-
*
|
332
|
+
* need to be escaped.
|
333
|
+
*
|
347
334
|
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
348
|
-
*
|
335
|
+
*
|
349
336
|
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
350
337
|
* the vector insructions may work on larger vectors.
|
351
|
-
*
|
338
|
+
*
|
352
339
|
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
353
|
-
*
|
354
|
-
* lower_bound: [20 20 20 20 20 20 20 20]
|
355
|
-
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
356
|
-
* dblquote: [22 22 22 22 22 22 22 22]
|
357
|
-
*
|
358
|
-
* Next we load the first chunk of the ptr:
|
340
|
+
*
|
341
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
342
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
343
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
344
|
+
*
|
345
|
+
* Next we load the first chunk of the ptr:
|
359
346
|
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
360
|
-
*
|
347
|
+
*
|
361
348
|
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
362
349
|
* as no bytes are less than 32 (0x20):
|
363
350
|
* [0 0 0 0 0 0 0 0]
|
364
|
-
*
|
351
|
+
*
|
365
352
|
* Next, we check if any byte in chunk is equal to a backslash:
|
366
353
|
* [0 0 0 FF 0 0 0 0]
|
367
|
-
*
|
354
|
+
*
|
368
355
|
* Finally we check if any byte in chunk is equal to a double quote:
|
369
|
-
* [FF 0 0 0 0 0 0 0]
|
370
|
-
*
|
356
|
+
* [FF 0 0 0 0 0 0 0]
|
357
|
+
*
|
371
358
|
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
372
359
|
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
373
360
|
* This is the needs_escape vector and it is equal to:
|
374
|
-
* [FF 0 0 FF 0 0 0 0]
|
375
|
-
*
|
361
|
+
* [FF 0 0 FF 0 0 0 0]
|
362
|
+
*
|
376
363
|
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
377
364
|
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
378
|
-
*
|
365
|
+
*
|
379
366
|
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
380
367
|
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
381
368
|
* have at least one byte that needs to be escaped.
|
382
369
|
*/
|
383
|
-
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
|
384
|
-
uint64_t mask = neon_rules_update(search->ptr);
|
385
370
|
|
386
|
-
|
387
|
-
search->ptr += sizeof(uint8x16_t);
|
388
|
-
continue;
|
389
|
-
}
|
390
|
-
search->matches_mask = mask;
|
371
|
+
if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
|
391
372
|
search->has_matches = true;
|
392
373
|
search->chunk_base = search->ptr;
|
393
374
|
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
394
375
|
return neon_next_match(search);
|
395
376
|
}
|
396
377
|
|
397
|
-
// There are fewer than 16 bytes left.
|
378
|
+
// There are fewer than 16 bytes left.
|
398
379
|
unsigned long remaining = (search->end - search->ptr);
|
399
380
|
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
400
381
|
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
401
382
|
|
402
|
-
uint64_t mask =
|
383
|
+
uint64_t mask = compute_chunk_mask_neon(s);
|
403
384
|
|
404
385
|
if (!mask) {
|
405
|
-
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
386
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
406
387
|
// search->cursor to search->ptr.
|
407
388
|
fbuffer_consumed(search->buffer, remaining);
|
408
389
|
search->ptr = search->end;
|
@@ -428,11 +409,6 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
428
409
|
|
429
410
|
#ifdef HAVE_SIMD_SSE2
|
430
411
|
|
431
|
-
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
|
432
|
-
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
433
|
-
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
434
|
-
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
435
|
-
|
436
412
|
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
437
413
|
{
|
438
414
|
int mask = search->matches_mask;
|
@@ -457,18 +433,6 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
|
457
433
|
#define TARGET_SSE2
|
458
434
|
#endif
|
459
435
|
|
460
|
-
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
|
461
|
-
{
|
462
|
-
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
463
|
-
|
464
|
-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
465
|
-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
466
|
-
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
467
|
-
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
468
|
-
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
469
|
-
return _mm_movemask_epi8(needs_escape);
|
470
|
-
}
|
471
|
-
|
472
436
|
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
473
437
|
{
|
474
438
|
if (RB_UNLIKELY(search->has_matches)) {
|
@@ -476,7 +440,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
|
|
476
440
|
if (search->matches_mask > 0) {
|
477
441
|
return sse2_next_match(search);
|
478
442
|
} else {
|
479
|
-
// sse2_next_match will only advance search->ptr up to the last matching character.
|
443
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
480
444
|
// Skip over any characters in the last chunk that occur after the last match.
|
481
445
|
search->has_matches = false;
|
482
446
|
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
@@ -487,29 +451,22 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
|
|
487
451
|
}
|
488
452
|
}
|
489
453
|
|
490
|
-
|
491
|
-
int needs_escape_mask = sse2_update(search->ptr);
|
492
|
-
|
493
|
-
if (needs_escape_mask == 0) {
|
494
|
-
search->ptr += sizeof(__m128i);
|
495
|
-
continue;
|
496
|
-
}
|
497
|
-
|
454
|
+
if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
|
498
455
|
search->has_matches = true;
|
499
|
-
search->matches_mask = needs_escape_mask;
|
500
456
|
search->chunk_base = search->ptr;
|
457
|
+
search->chunk_end = search->ptr + sizeof(__m128i);
|
501
458
|
return sse2_next_match(search);
|
502
459
|
}
|
503
460
|
|
504
|
-
// There are fewer than 16 bytes left.
|
461
|
+
// There are fewer than 16 bytes left.
|
505
462
|
unsigned long remaining = (search->end - search->ptr);
|
506
463
|
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
507
464
|
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
508
465
|
|
509
|
-
int needs_escape_mask =
|
466
|
+
int needs_escape_mask = compute_chunk_mask_sse2(s);
|
510
467
|
|
511
468
|
if (needs_escape_mask == 0) {
|
512
|
-
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
469
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
513
470
|
// search->cursor to search->ptr.
|
514
471
|
fbuffer_consumed(search->buffer, remaining);
|
515
472
|
search->ptr = search->end;
|
@@ -638,7 +595,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
|
|
638
595
|
return 0;
|
639
596
|
}
|
640
597
|
|
641
|
-
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
598
|
+
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
599
|
+
{
|
642
600
|
const unsigned char ch = (unsigned char)*search->ptr;
|
643
601
|
switch (ch_len) {
|
644
602
|
case 1: {
|
@@ -668,7 +626,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
|
|
668
626
|
|
669
627
|
uint32_t wchar = 0;
|
670
628
|
|
671
|
-
switch(ch_len) {
|
629
|
+
switch (ch_len) {
|
672
630
|
case 2:
|
673
631
|
wchar = ch & 0x1F;
|
674
632
|
break;
|
@@ -828,7 +786,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
|
|
828
786
|
* _state_ is a JSON::State object, that can also be used to configure the
|
829
787
|
* produced JSON string output further.
|
830
788
|
*/
|
831
|
-
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
789
|
+
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
790
|
+
{
|
832
791
|
rb_check_arity(argc, 0, 1);
|
833
792
|
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
834
793
|
return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
|
@@ -885,17 +844,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
|
|
885
844
|
return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
|
886
845
|
}
|
887
846
|
|
888
|
-
/*
|
889
|
-
* call-seq: String.included(modul)
|
890
|
-
*
|
891
|
-
* Extends _modul_ with the String::Extend module.
|
892
|
-
*/
|
893
|
-
static VALUE mString_included_s(VALUE self, VALUE modul) {
|
894
|
-
VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
|
895
|
-
rb_call_super(1, &modul);
|
896
|
-
return result;
|
897
|
-
}
|
898
|
-
|
899
847
|
/*
|
900
848
|
* call-seq: to_json(*)
|
901
849
|
*
|
@@ -910,51 +858,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
|
|
910
858
|
return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
|
911
859
|
}
|
912
860
|
|
913
|
-
/*
|
914
|
-
* call-seq: to_json_raw_object()
|
915
|
-
*
|
916
|
-
* This method creates a raw object hash, that can be nested into
|
917
|
-
* other data structures and will be generated as a raw string. This
|
918
|
-
* method should be used, if you want to convert raw strings to JSON
|
919
|
-
* instead of UTF-8 strings, e. g. binary data.
|
920
|
-
*/
|
921
|
-
static VALUE mString_to_json_raw_object(VALUE self)
|
922
|
-
{
|
923
|
-
VALUE ary;
|
924
|
-
VALUE result = rb_hash_new();
|
925
|
-
rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
|
926
|
-
ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
|
927
|
-
rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
|
928
|
-
return result;
|
929
|
-
}
|
930
|
-
|
931
|
-
/*
|
932
|
-
* call-seq: to_json_raw(*args)
|
933
|
-
*
|
934
|
-
* This method creates a JSON text from the result of a call to
|
935
|
-
* to_json_raw_object of this String.
|
936
|
-
*/
|
937
|
-
static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
|
938
|
-
{
|
939
|
-
VALUE obj = mString_to_json_raw_object(self);
|
940
|
-
Check_Type(obj, T_HASH);
|
941
|
-
return mHash_to_json(argc, argv, obj);
|
942
|
-
}
|
943
|
-
|
944
|
-
/*
|
945
|
-
* call-seq: json_create(o)
|
946
|
-
*
|
947
|
-
* Raw Strings are JSON Objects (the raw bytes are stored in an array for the
|
948
|
-
* key "raw"). The Ruby String can be created by this module method.
|
949
|
-
*/
|
950
|
-
static VALUE mString_Extend_json_create(VALUE self, VALUE o)
|
951
|
-
{
|
952
|
-
VALUE ary;
|
953
|
-
Check_Type(o, T_HASH);
|
954
|
-
ary = rb_hash_aref(o, rb_str_new2("raw"));
|
955
|
-
return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
|
956
|
-
}
|
957
|
-
|
958
861
|
/*
|
959
862
|
* call-seq: to_json(*)
|
960
863
|
*
|
@@ -1093,8 +996,11 @@ static inline VALUE vstate_get(struct generate_json_data *data)
|
|
1093
996
|
}
|
1094
997
|
|
1095
998
|
struct hash_foreach_arg {
|
999
|
+
VALUE hash;
|
1096
1000
|
struct generate_json_data *data;
|
1097
|
-
int
|
1001
|
+
int first_key_type;
|
1002
|
+
bool first;
|
1003
|
+
bool mixed_keys_encountered;
|
1098
1004
|
};
|
1099
1005
|
|
1100
1006
|
static VALUE
|
@@ -1112,6 +1018,29 @@ convert_string_subclass(VALUE key)
|
|
1112
1018
|
return key_to_s;
|
1113
1019
|
}
|
1114
1020
|
|
1021
|
+
NOINLINE()
|
1022
|
+
static void
|
1023
|
+
json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
|
1024
|
+
{
|
1025
|
+
if (arg->mixed_keys_encountered) {
|
1026
|
+
return;
|
1027
|
+
}
|
1028
|
+
arg->mixed_keys_encountered = true;
|
1029
|
+
|
1030
|
+
JSON_Generator_State *state = arg->data->state;
|
1031
|
+
if (state->on_duplicate_key != JSON_IGNORE) {
|
1032
|
+
VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
|
1033
|
+
rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
|
1034
|
+
}
|
1035
|
+
}
|
1036
|
+
|
1037
|
+
static VALUE
|
1038
|
+
json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
|
1039
|
+
{
|
1040
|
+
VALUE proc_args[2] = {object, is_key};
|
1041
|
+
return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
|
1042
|
+
}
|
1043
|
+
|
1115
1044
|
static int
|
1116
1045
|
json_object_i(VALUE key, VALUE val, VALUE _arg)
|
1117
1046
|
{
|
@@ -1122,21 +1051,33 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
1122
1051
|
JSON_Generator_State *state = data->state;
|
1123
1052
|
|
1124
1053
|
long depth = state->depth;
|
1125
|
-
int
|
1054
|
+
int key_type = rb_type(key);
|
1055
|
+
|
1056
|
+
if (arg->first) {
|
1057
|
+
arg->first = false;
|
1058
|
+
arg->first_key_type = key_type;
|
1059
|
+
}
|
1060
|
+
else {
|
1061
|
+
fbuffer_append_char(buffer, ',');
|
1062
|
+
}
|
1126
1063
|
|
1127
|
-
if (arg->iter > 0) fbuffer_append_char(buffer, ',');
|
1128
1064
|
if (RB_UNLIKELY(data->state->object_nl)) {
|
1129
1065
|
fbuffer_append_str(buffer, data->state->object_nl);
|
1130
1066
|
}
|
1131
1067
|
if (RB_UNLIKELY(data->state->indent)) {
|
1132
|
-
|
1133
|
-
fbuffer_append_str(buffer, data->state->indent);
|
1134
|
-
}
|
1068
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
1135
1069
|
}
|
1136
1070
|
|
1137
1071
|
VALUE key_to_s;
|
1138
|
-
|
1072
|
+
bool as_json_called = false;
|
1073
|
+
|
1074
|
+
start:
|
1075
|
+
switch (key_type) {
|
1139
1076
|
case T_STRING:
|
1077
|
+
if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
|
1078
|
+
json_inspect_hash_with_mixed_keys(arg);
|
1079
|
+
}
|
1080
|
+
|
1140
1081
|
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
1141
1082
|
key_to_s = key;
|
1142
1083
|
} else {
|
@@ -1144,9 +1085,23 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
1144
1085
|
}
|
1145
1086
|
break;
|
1146
1087
|
case T_SYMBOL:
|
1088
|
+
if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
|
1089
|
+
json_inspect_hash_with_mixed_keys(arg);
|
1090
|
+
}
|
1091
|
+
|
1147
1092
|
key_to_s = rb_sym2str(key);
|
1148
1093
|
break;
|
1149
1094
|
default:
|
1095
|
+
if (data->state->strict) {
|
1096
|
+
if (RTEST(data->state->as_json) && !as_json_called) {
|
1097
|
+
key = json_call_as_json(data->state, key, Qtrue);
|
1098
|
+
key_type = rb_type(key);
|
1099
|
+
as_json_called = true;
|
1100
|
+
goto start;
|
1101
|
+
} else {
|
1102
|
+
raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
|
1103
|
+
}
|
1104
|
+
}
|
1150
1105
|
key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
|
1151
1106
|
break;
|
1152
1107
|
}
|
@@ -1161,7 +1116,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
1161
1116
|
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
|
1162
1117
|
generate_json(buffer, data, val);
|
1163
1118
|
|
1164
|
-
arg->iter++;
|
1165
1119
|
return ST_CONTINUE;
|
1166
1120
|
}
|
1167
1121
|
|
@@ -1177,7 +1131,6 @@ static inline long increase_depth(struct generate_json_data *data)
|
|
1177
1131
|
|
1178
1132
|
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1179
1133
|
{
|
1180
|
-
int j;
|
1181
1134
|
long depth = increase_depth(data);
|
1182
1135
|
|
1183
1136
|
if (RHASH_SIZE(obj) == 0) {
|
@@ -1189,8 +1142,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
|
|
1189
1142
|
fbuffer_append_char(buffer, '{');
|
1190
1143
|
|
1191
1144
|
struct hash_foreach_arg arg = {
|
1145
|
+
.hash = obj,
|
1192
1146
|
.data = data,
|
1193
|
-
.
|
1147
|
+
.first = true,
|
1194
1148
|
};
|
1195
1149
|
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
|
1196
1150
|
|
@@ -1198,9 +1152,7 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
|
|
1198
1152
|
if (RB_UNLIKELY(data->state->object_nl)) {
|
1199
1153
|
fbuffer_append_str(buffer, data->state->object_nl);
|
1200
1154
|
if (RB_UNLIKELY(data->state->indent)) {
|
1201
|
-
|
1202
|
-
fbuffer_append_str(buffer, data->state->indent);
|
1203
|
-
}
|
1155
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
1204
1156
|
}
|
1205
1157
|
}
|
1206
1158
|
fbuffer_append_char(buffer, '}');
|
@@ -1208,7 +1160,6 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
|
|
1208
1160
|
|
1209
1161
|
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1210
1162
|
{
|
1211
|
-
int i, j;
|
1212
1163
|
long depth = increase_depth(data);
|
1213
1164
|
|
1214
1165
|
if (RARRAY_LEN(obj) == 0) {
|
@@ -1219,15 +1170,13 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
|
|
1219
1170
|
|
1220
1171
|
fbuffer_append_char(buffer, '[');
|
1221
1172
|
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
1222
|
-
for(i = 0; i < RARRAY_LEN(obj); i++) {
|
1173
|
+
for (int i = 0; i < RARRAY_LEN(obj); i++) {
|
1223
1174
|
if (i > 0) {
|
1224
1175
|
fbuffer_append_char(buffer, ',');
|
1225
1176
|
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
1226
1177
|
}
|
1227
1178
|
if (RB_UNLIKELY(data->state->indent)) {
|
1228
|
-
|
1229
|
-
fbuffer_append_str(buffer, data->state->indent);
|
1230
|
-
}
|
1179
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
1231
1180
|
}
|
1232
1181
|
generate_json(buffer, data, RARRAY_AREF(obj, i));
|
1233
1182
|
}
|
@@ -1235,9 +1184,7 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
|
|
1235
1184
|
if (RB_UNLIKELY(data->state->array_nl)) {
|
1236
1185
|
fbuffer_append_str(buffer, data->state->array_nl);
|
1237
1186
|
if (RB_UNLIKELY(data->state->indent)) {
|
1238
|
-
|
1239
|
-
fbuffer_append_str(buffer, data->state->indent);
|
1240
|
-
}
|
1187
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
1241
1188
|
}
|
1242
1189
|
}
|
1243
1190
|
fbuffer_append_char(buffer, ']');
|
@@ -1304,7 +1251,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
|
|
1304
1251
|
search.chunk_base = NULL;
|
1305
1252
|
#endif /* HAVE_SIMD */
|
1306
1253
|
|
1307
|
-
switch(rb_enc_str_coderange(obj)) {
|
1254
|
+
switch (rb_enc_str_coderange(obj)) {
|
1308
1255
|
case ENC_CODERANGE_7BIT:
|
1309
1256
|
case ENC_CODERANGE_VALID:
|
1310
1257
|
if (RB_UNLIKELY(data->state->ascii_only)) {
|
@@ -1389,7 +1336,7 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
|
|
1389
1336
|
/* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
|
1390
1337
|
if (!allow_nan) {
|
1391
1338
|
if (data->state->strict && data->state->as_json) {
|
1392
|
-
VALUE casted_obj =
|
1339
|
+
VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
|
1393
1340
|
if (casted_obj != obj) {
|
1394
1341
|
increase_depth(data);
|
1395
1342
|
generate_json(buffer, data, casted_obj);
|
@@ -1406,12 +1353,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
|
|
1406
1353
|
}
|
1407
1354
|
|
1408
1355
|
/* This implementation writes directly into the buffer. We reserve
|
1409
|
-
* the
|
1356
|
+
* the 32 characters that fpconv_dtoa states as its maximum.
|
1410
1357
|
*/
|
1411
|
-
fbuffer_inc_capa(buffer,
|
1358
|
+
fbuffer_inc_capa(buffer, 32);
|
1412
1359
|
char* d = buffer->ptr + buffer->len;
|
1413
1360
|
int len = fpconv_dtoa(value, d);
|
1414
|
-
|
1415
1361
|
/* fpconv_dtoa converts a float to its shortest string representation,
|
1416
1362
|
* but it adds a ".0" if this is a plain integer.
|
1417
1363
|
*/
|
@@ -1478,7 +1424,7 @@ start:
|
|
1478
1424
|
general:
|
1479
1425
|
if (data->state->strict) {
|
1480
1426
|
if (RTEST(data->state->as_json) && !as_json_called) {
|
1481
|
-
obj =
|
1427
|
+
obj = json_call_as_json(data->state, obj, Qfalse);
|
1482
1428
|
as_json_called = true;
|
1483
1429
|
goto start;
|
1484
1430
|
} else {
|
@@ -1899,6 +1845,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
|
|
1899
1845
|
return Qnil;
|
1900
1846
|
}
|
1901
1847
|
|
1848
|
+
static VALUE cState_allow_duplicate_key_p(VALUE self)
|
1849
|
+
{
|
1850
|
+
GET_STATE(self);
|
1851
|
+
switch (state->on_duplicate_key) {
|
1852
|
+
case JSON_IGNORE:
|
1853
|
+
return Qtrue;
|
1854
|
+
case JSON_DEPRECATED:
|
1855
|
+
return Qnil;
|
1856
|
+
default:
|
1857
|
+
return Qfalse;
|
1858
|
+
}
|
1859
|
+
}
|
1860
|
+
|
1902
1861
|
/*
|
1903
1862
|
* call-seq: depth
|
1904
1863
|
*
|
@@ -1956,15 +1915,30 @@ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_l
|
|
1956
1915
|
return Qnil;
|
1957
1916
|
}
|
1958
1917
|
|
1918
|
+
struct configure_state_data {
|
1919
|
+
JSON_Generator_State *state;
|
1920
|
+
VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
|
1921
|
+
};
|
1922
|
+
|
1923
|
+
static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
|
1924
|
+
{
|
1925
|
+
if (RTEST(data->vstate)) {
|
1926
|
+
RB_OBJ_WRITE(data->vstate, field, value);
|
1927
|
+
} else {
|
1928
|
+
*field = value;
|
1929
|
+
}
|
1930
|
+
}
|
1931
|
+
|
1959
1932
|
static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
1960
1933
|
{
|
1961
|
-
|
1934
|
+
struct configure_state_data *data = (struct configure_state_data *)_arg;
|
1935
|
+
JSON_Generator_State *state = data->state;
|
1962
1936
|
|
1963
|
-
if (key == sym_indent) { state->indent
|
1964
|
-
else if (key == sym_space) { state->space
|
1965
|
-
else if (key == sym_space_before) { state->space_before
|
1966
|
-
else if (key == sym_object_nl) { state->object_nl
|
1967
|
-
else if (key == sym_array_nl) { state->array_nl
|
1937
|
+
if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
|
1938
|
+
else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
|
1939
|
+
else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
|
1940
|
+
else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
|
1941
|
+
else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
|
1968
1942
|
else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
|
1969
1943
|
else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
|
1970
1944
|
else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
|
@@ -1973,11 +1947,16 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
|
1973
1947
|
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
|
1974
1948
|
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
|
1975
1949
|
else if (key == sym_strict) { state->strict = RTEST(val); }
|
1976
|
-
else if (key ==
|
1950
|
+
else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
1951
|
+
else if (key == sym_as_json) {
|
1952
|
+
VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
|
1953
|
+
state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
|
1954
|
+
state_write_value(data, &state->as_json, proc);
|
1955
|
+
}
|
1977
1956
|
return ST_CONTINUE;
|
1978
1957
|
}
|
1979
1958
|
|
1980
|
-
static void configure_state(JSON_Generator_State *state, VALUE config)
|
1959
|
+
static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
|
1981
1960
|
{
|
1982
1961
|
if (!RTEST(config)) return;
|
1983
1962
|
|
@@ -1985,15 +1964,20 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
|
|
1985
1964
|
|
1986
1965
|
if (!RHASH_SIZE(config)) return;
|
1987
1966
|
|
1967
|
+
struct configure_state_data data = {
|
1968
|
+
.state = state,
|
1969
|
+
.vstate = vstate
|
1970
|
+
};
|
1971
|
+
|
1988
1972
|
// We assume in most cases few keys are set so it's faster to go over
|
1989
1973
|
// the provided keys than to check all possible keys.
|
1990
|
-
rb_hash_foreach(config, configure_state_i, (VALUE)
|
1974
|
+
rb_hash_foreach(config, configure_state_i, (VALUE)&data);
|
1991
1975
|
}
|
1992
1976
|
|
1993
1977
|
static VALUE cState_configure(VALUE self, VALUE opts)
|
1994
1978
|
{
|
1995
1979
|
GET_STATE(self);
|
1996
|
-
configure_state(state, opts);
|
1980
|
+
configure_state(state, self, opts);
|
1997
1981
|
return self;
|
1998
1982
|
}
|
1999
1983
|
|
@@ -2001,7 +1985,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
|
|
2001
1985
|
{
|
2002
1986
|
JSON_Generator_State state = {0};
|
2003
1987
|
state_init(&state);
|
2004
|
-
configure_state(&state, opts);
|
1988
|
+
configure_state(&state, Qfalse, opts);
|
2005
1989
|
|
2006
1990
|
char stack_buffer[FBUFFER_STACK_SIZE];
|
2007
1991
|
FBuffer buffer = {
|
@@ -2090,6 +2074,8 @@ void Init_generator(void)
|
|
2090
2074
|
rb_define_method(cState, "generate", cState_generate, -1);
|
2091
2075
|
rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
|
2092
2076
|
|
2077
|
+
rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
|
2078
|
+
|
2093
2079
|
rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
|
2094
2080
|
|
2095
2081
|
VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
|
@@ -2117,13 +2103,7 @@ void Init_generator(void)
|
|
2117
2103
|
rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
|
2118
2104
|
|
2119
2105
|
VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
|
2120
|
-
rb_define_singleton_method(mString, "included", mString_included_s, 1);
|
2121
2106
|
rb_define_method(mString, "to_json", mString_to_json, -1);
|
2122
|
-
rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
|
2123
|
-
rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
|
2124
|
-
|
2125
|
-
mString_Extend = rb_define_module_under(mString, "Extend");
|
2126
|
-
rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
|
2127
2107
|
|
2128
2108
|
VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
|
2129
2109
|
rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
|
@@ -2160,6 +2140,7 @@ void Init_generator(void)
|
|
2160
2140
|
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
|
2161
2141
|
sym_strict = ID2SYM(rb_intern("strict"));
|
2162
2142
|
sym_as_json = ID2SYM(rb_intern("as_json"));
|
2143
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
2163
2144
|
|
2164
2145
|
usascii_encindex = rb_usascii_encindex();
|
2165
2146
|
utf8_encindex = rb_utf8_encindex();
|
@@ -2168,7 +2149,7 @@ void Init_generator(void)
|
|
2168
2149
|
rb_require("json/ext/generator/state");
|
2169
2150
|
|
2170
2151
|
|
2171
|
-
switch(find_simd_implementation()) {
|
2152
|
+
switch (find_simd_implementation()) {
|
2172
2153
|
#ifdef HAVE_SIMD
|
2173
2154
|
#ifdef HAVE_SIMD_NEON
|
2174
2155
|
case SIMD_NEON:
|