json 2.12.2 → 2.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,20 @@
1
- #include "ruby.h"
1
+ #include "../json.h"
2
2
  #include "../fbuffer/fbuffer.h"
3
3
  #include "../vendor/fpconv.c"
4
4
 
5
5
  #include <math.h>
6
6
  #include <ctype.h>
7
7
 
8
- #include "simd.h"
8
+ #include "../simd/simd.h"
9
9
 
10
10
  /* ruby api and some helpers */
11
11
 
12
+ enum duplicate_key_action {
13
+ JSON_DEPRECATED = 0,
14
+ JSON_IGNORE,
15
+ JSON_RAISE,
16
+ };
17
+
12
18
  typedef struct JSON_Generator_StateStruct {
13
19
  VALUE indent;
14
20
  VALUE space;
@@ -21,20 +27,19 @@ typedef struct JSON_Generator_StateStruct {
21
27
  long depth;
22
28
  long buffer_initial_length;
23
29
 
30
+ enum duplicate_key_action on_duplicate_key;
31
+
32
+ bool as_json_single_arg;
24
33
  bool allow_nan;
25
34
  bool ascii_only;
26
35
  bool script_safe;
27
36
  bool strict;
28
37
  } JSON_Generator_State;
29
38
 
30
- #ifndef RB_UNLIKELY
31
- #define RB_UNLIKELY(cond) (cond)
32
- #endif
33
-
34
- static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
39
+ static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
35
40
 
36
- static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37
- static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
41
+ static ID i_to_s, i_to_json, i_new, i_encode;
42
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
38
43
  sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
39
44
 
40
45
 
@@ -55,8 +60,11 @@ struct generate_json_data {
55
60
  JSON_Generator_State *state;
56
61
  VALUE obj;
57
62
  generator_func func;
63
+ long depth;
58
64
  };
59
65
 
66
+ static SIMD_Implementation simd_impl;
67
+
60
68
  static VALUE cState_from_state_s(VALUE self, VALUE opts);
61
69
  static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
62
70
  static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
@@ -76,23 +84,18 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d
76
84
 
77
85
  static int usascii_encindex, utf8_encindex, binary_encindex;
78
86
 
79
- #ifdef RBIMPL_ATTR_NORETURN
80
- RBIMPL_ATTR_NORETURN()
81
- #endif
82
- static void raise_generator_error_str(VALUE invalid_object, VALUE str)
87
+ NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
83
88
  {
89
+ rb_enc_associate_index(str, utf8_encindex);
84
90
  VALUE exc = rb_exc_new_str(eGeneratorError, str);
85
91
  rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
86
92
  rb_exc_raise(exc);
87
93
  }
88
94
 
89
- #ifdef RBIMPL_ATTR_NORETURN
90
- RBIMPL_ATTR_NORETURN()
91
- #endif
92
95
  #ifdef RBIMPL_ATTR_FORMAT
93
96
  RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
94
97
  #endif
95
- static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
98
+ NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
99
  {
97
100
  va_list args;
98
101
  va_start(args, fmt);
@@ -127,18 +130,12 @@ typedef struct _search_state {
127
130
  #endif /* HAVE_SIMD */
128
131
  } search_state;
129
132
 
130
- #if (defined(__GNUC__ ) || defined(__clang__))
131
- #define FORCE_INLINE __attribute__((always_inline))
132
- #else
133
- #define FORCE_INLINE
134
- #endif
135
-
136
- static inline FORCE_INLINE void search_flush(search_state *search)
133
+ ALWAYS_INLINE(static) void search_flush(search_state *search)
137
134
  {
138
135
  // Do not remove this conditional without profiling, specifically escape-heavy text.
139
136
  // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
140
- // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
141
- // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
137
+ // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
138
+ // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
142
139
  // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
143
140
  // nothing needs to be flushed, we can save a few memory references with this conditional.
144
141
  if (search->ptr > search->cursor) {
@@ -160,8 +157,6 @@ static const unsigned char escape_table_basic[256] = {
160
157
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
161
158
  };
162
159
 
163
- static unsigned char (*search_escape_basic_impl)(search_state *);
164
-
165
160
  static inline unsigned char search_escape_basic(search_state *search)
166
161
  {
167
162
  while (search->ptr < search->end) {
@@ -176,7 +171,7 @@ static inline unsigned char search_escape_basic(search_state *search)
176
171
  return 0;
177
172
  }
178
173
 
179
- static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
174
+ ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search)
180
175
  {
181
176
  const unsigned char ch = (unsigned char)*search->ptr;
182
177
  switch (ch) {
@@ -217,11 +212,39 @@ static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
217
212
  * Everything else (should be UTF-8) is just passed through and
218
213
  * appended to the result.
219
214
  */
215
+
216
+
217
+ #if defined(HAVE_SIMD_NEON)
218
+ static inline unsigned char search_escape_basic_neon(search_state *search);
219
+ #elif defined(HAVE_SIMD_SSE2)
220
+ static inline unsigned char search_escape_basic_sse2(search_state *search);
221
+ #endif
222
+
223
+ static inline unsigned char search_escape_basic(search_state *search);
224
+
220
225
  static inline void convert_UTF8_to_JSON(search_state *search)
221
226
  {
222
- while (search_escape_basic_impl(search)) {
227
+ #ifdef HAVE_SIMD
228
+ #if defined(HAVE_SIMD_NEON)
229
+ while (search_escape_basic_neon(search)) {
230
+ escape_UTF8_char_basic(search);
231
+ }
232
+ #elif defined(HAVE_SIMD_SSE2)
233
+ if (simd_impl == SIMD_SSE2) {
234
+ while (search_escape_basic_sse2(search)) {
235
+ escape_UTF8_char_basic(search);
236
+ }
237
+ return;
238
+ }
239
+ while (search_escape_basic(search)) {
223
240
  escape_UTF8_char_basic(search);
224
241
  }
242
+ #endif
243
+ #else
244
+ while (search_escape_basic(search)) {
245
+ escape_UTF8_char_basic(search);
246
+ }
247
+ #endif /* HAVE_SIMD */
225
248
  }
226
249
 
227
250
  static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
@@ -263,8 +286,10 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
263
286
 
264
287
  #ifdef HAVE_SIMD
265
288
 
266
- static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
289
+ ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
267
290
  {
291
+ RBIMPL_ASSERT_OR_ASSUME(len < vec_len);
292
+
268
293
  // Flush the buffer so everything up until the last 'len' characters are unflushed.
269
294
  search_flush(search);
270
295
 
@@ -274,19 +299,25 @@ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsi
274
299
  char *s = (buf->ptr + buf->len);
275
300
 
276
301
  // Pad the buffer with dummy characters that won't need escaping.
277
- // This seem wateful at first sight, but memset of vector length is very fast.
278
- memset(s, 'X', vec_len);
302
+ // This seem wasteful at first sight, but memset of vector length is very fast.
303
+ // This is a space as it can be directly represented as an immediate on AArch64.
304
+ memset(s, ' ', vec_len);
279
305
 
280
306
  // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
281
307
  // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
282
- MEMCPY(s, search->ptr, char, len);
308
+ if (vec_len == 16) {
309
+ RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD);
310
+ json_fast_memcpy16(s, search->ptr, len);
311
+ } else {
312
+ MEMCPY(s, search->ptr, char, len);
313
+ }
283
314
 
284
315
  return s;
285
316
  }
286
317
 
287
318
  #ifdef HAVE_SIMD_NEON
288
319
 
289
- static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
320
+ ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search)
290
321
  {
291
322
  uint64_t mask = search->matches_mask;
292
323
  uint32_t index = trailing_zeros64(mask) >> 2;
@@ -304,28 +335,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
304
335
  return 1;
305
336
  }
306
337
 
307
- // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308
- static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
309
- {
310
- const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
311
- const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
312
- return mask & 0x8888888888888888ull;
313
- }
314
-
315
- static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
316
- {
317
- uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
318
-
319
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321
- const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
322
-
323
- uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
324
- uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
325
-
326
- return neon_match_mask(needs_escape);
327
- }
328
-
329
338
  static inline unsigned char search_escape_basic_neon(search_state *search)
330
339
  {
331
340
  if (RB_UNLIKELY(search->has_matches)) {
@@ -333,7 +342,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
333
342
  if (search->matches_mask > 0) {
334
343
  return neon_next_match(search);
335
344
  } else {
336
- // neon_next_match will only advance search->ptr up to the last matching character.
345
+ // neon_next_match will only advance search->ptr up to the last matching character.
337
346
  // Skip over any characters in the last chunk that occur after the last match.
338
347
  search->has_matches = false;
339
348
  search->ptr = search->chunk_end;
@@ -342,67 +351,61 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
342
351
 
343
352
  /*
344
353
  * The code below implements an SIMD-based algorithm to determine if N bytes at a time
345
- * need to be escaped.
346
- *
354
+ * need to be escaped.
355
+ *
347
356
  * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
348
- *
357
+ *
349
358
  * The explanation will be limited to the first 8 bytes of the string for simplicity. However
350
359
  * the vector insructions may work on larger vectors.
351
- *
360
+ *
352
361
  * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
353
- *
354
- * lower_bound: [20 20 20 20 20 20 20 20]
355
- * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
356
- * dblquote: [22 22 22 22 22 22 22 22]
357
- *
358
- * Next we load the first chunk of the ptr:
362
+ *
363
+ * lower_bound: [20 20 20 20 20 20 20 20]
364
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
365
+ * dblquote: [22 22 22 22 22 22 22 22]
366
+ *
367
+ * Next we load the first chunk of the ptr:
359
368
  * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
360
- *
369
+ *
361
370
  * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
362
371
  * as no bytes are less than 32 (0x20):
363
372
  * [0 0 0 0 0 0 0 0]
364
- *
373
+ *
365
374
  * Next, we check if any byte in chunk is equal to a backslash:
366
375
  * [0 0 0 FF 0 0 0 0]
367
- *
376
+ *
368
377
  * Finally we check if any byte in chunk is equal to a double quote:
369
- * [FF 0 0 0 0 0 0 0]
370
- *
378
+ * [FF 0 0 0 0 0 0 0]
379
+ *
371
380
  * Now we have three vectors where each byte indicates if the corresponding byte in chunk
372
381
  * needs to be escaped. We combine these vectors with a series of logical OR instructions.
373
382
  * This is the needs_escape vector and it is equal to:
374
- * [FF 0 0 FF 0 0 0 0]
375
- *
383
+ * [FF 0 0 FF 0 0 0 0]
384
+ *
376
385
  * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
377
386
  * the values in the vector. This computes how many bytes need to be escaped within this chunk.
378
- *
387
+ *
379
388
  * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
380
389
  * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381
390
  * have at least one byte that needs to be escaped.
382
391
  */
383
- while (search->ptr + sizeof(uint8x16_t) <= search->end) {
384
- uint64_t mask = neon_rules_update(search->ptr);
385
392
 
386
- if (!mask) {
387
- search->ptr += sizeof(uint8x16_t);
388
- continue;
389
- }
390
- search->matches_mask = mask;
393
+ if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
391
394
  search->has_matches = true;
392
395
  search->chunk_base = search->ptr;
393
396
  search->chunk_end = search->ptr + sizeof(uint8x16_t);
394
397
  return neon_next_match(search);
395
398
  }
396
399
 
397
- // There are fewer than 16 bytes left.
400
+ // There are fewer than 16 bytes left.
398
401
  unsigned long remaining = (search->end - search->ptr);
399
402
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
400
403
  char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
401
404
 
402
- uint64_t mask = neon_rules_update(s);
405
+ uint64_t mask = compute_chunk_mask_neon(s);
403
406
 
404
407
  if (!mask) {
405
- // Nothing to escape, ensure search_flush doesn't do anything by setting
408
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
406
409
  // search->cursor to search->ptr.
407
410
  fbuffer_consumed(search->buffer, remaining);
408
411
  search->ptr = search->end;
@@ -428,12 +431,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
428
431
 
429
432
  #ifdef HAVE_SIMD_SSE2
430
433
 
431
- #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432
- #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
433
- #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434
- #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
435
-
436
- static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
434
+ ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search)
437
435
  {
438
436
  int mask = search->matches_mask;
439
437
  int index = trailing_zeros(mask);
@@ -457,26 +455,14 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
457
455
  #define TARGET_SSE2
458
456
  #endif
459
457
 
460
- static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
461
- {
462
- __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
463
-
464
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466
- __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
467
- __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
468
- __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
469
- return _mm_movemask_epi8(needs_escape);
470
- }
471
-
472
- static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
458
+ ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search)
473
459
  {
474
460
  if (RB_UNLIKELY(search->has_matches)) {
475
461
  // There are more matches if search->matches_mask > 0.
476
462
  if (search->matches_mask > 0) {
477
463
  return sse2_next_match(search);
478
464
  } else {
479
- // sse2_next_match will only advance search->ptr up to the last matching character.
465
+ // sse2_next_match will only advance search->ptr up to the last matching character.
480
466
  // Skip over any characters in the last chunk that occur after the last match.
481
467
  search->has_matches = false;
482
468
  if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
@@ -487,29 +473,22 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
487
473
  }
488
474
  }
489
475
 
490
- while (search->ptr + sizeof(__m128i) <= search->end) {
491
- int needs_escape_mask = sse2_update(search->ptr);
492
-
493
- if (needs_escape_mask == 0) {
494
- search->ptr += sizeof(__m128i);
495
- continue;
496
- }
497
-
476
+ if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
498
477
  search->has_matches = true;
499
- search->matches_mask = needs_escape_mask;
500
478
  search->chunk_base = search->ptr;
479
+ search->chunk_end = search->ptr + sizeof(__m128i);
501
480
  return sse2_next_match(search);
502
481
  }
503
482
 
504
- // There are fewer than 16 bytes left.
483
+ // There are fewer than 16 bytes left.
505
484
  unsigned long remaining = (search->end - search->ptr);
506
485
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
507
486
  char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
508
487
 
509
- int needs_escape_mask = sse2_update(s);
488
+ int needs_escape_mask = compute_chunk_mask_sse2(s);
510
489
 
511
490
  if (needs_escape_mask == 0) {
512
- // Nothing to escape, ensure search_flush doesn't do anything by setting
491
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
513
492
  // search->cursor to search->ptr.
514
493
  fbuffer_consumed(search->buffer, remaining);
515
494
  search->ptr = search->end;
@@ -638,7 +617,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
638
617
  return 0;
639
618
  }
640
619
 
641
- static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
620
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
621
+ {
642
622
  const unsigned char ch = (unsigned char)*search->ptr;
643
623
  switch (ch_len) {
644
624
  case 1: {
@@ -668,7 +648,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
668
648
 
669
649
  uint32_t wchar = 0;
670
650
 
671
- switch(ch_len) {
651
+ switch (ch_len) {
672
652
  case 2:
673
653
  wchar = ch & 0x1F;
674
654
  break;
@@ -828,7 +808,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
828
808
  * _state_ is a JSON::State object, that can also be used to configure the
829
809
  * produced JSON string output further.
830
810
  */
831
- static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
811
+ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
812
+ {
832
813
  rb_check_arity(argc, 0, 1);
833
814
  VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
834
815
  return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
@@ -885,17 +866,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
885
866
  return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
886
867
  }
887
868
 
888
- /*
889
- * call-seq: String.included(modul)
890
- *
891
- * Extends _modul_ with the String::Extend module.
892
- */
893
- static VALUE mString_included_s(VALUE self, VALUE modul) {
894
- VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
895
- rb_call_super(1, &modul);
896
- return result;
897
- }
898
-
899
869
  /*
900
870
  * call-seq: to_json(*)
901
871
  *
@@ -910,51 +880,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
910
880
  return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
911
881
  }
912
882
 
913
- /*
914
- * call-seq: to_json_raw_object()
915
- *
916
- * This method creates a raw object hash, that can be nested into
917
- * other data structures and will be generated as a raw string. This
918
- * method should be used, if you want to convert raw strings to JSON
919
- * instead of UTF-8 strings, e. g. binary data.
920
- */
921
- static VALUE mString_to_json_raw_object(VALUE self)
922
- {
923
- VALUE ary;
924
- VALUE result = rb_hash_new();
925
- rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
926
- ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
927
- rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
928
- return result;
929
- }
930
-
931
- /*
932
- * call-seq: to_json_raw(*args)
933
- *
934
- * This method creates a JSON text from the result of a call to
935
- * to_json_raw_object of this String.
936
- */
937
- static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
938
- {
939
- VALUE obj = mString_to_json_raw_object(self);
940
- Check_Type(obj, T_HASH);
941
- return mHash_to_json(argc, argv, obj);
942
- }
943
-
944
- /*
945
- * call-seq: json_create(o)
946
- *
947
- * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
948
- * key "raw"). The Ruby String can be created by this module method.
949
- */
950
- static VALUE mString_Extend_json_create(VALUE self, VALUE o)
951
- {
952
- VALUE ary;
953
- Check_Type(o, T_HASH);
954
- ary = rb_hash_aref(o, rb_str_new2("raw"));
955
- return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
956
- }
957
-
958
883
  /*
959
884
  * call-seq: to_json(*)
960
885
  *
@@ -1038,11 +963,6 @@ static size_t State_memsize(const void *ptr)
1038
963
  return sizeof(JSON_Generator_State);
1039
964
  }
1040
965
 
1041
- #ifndef HAVE_RB_EXT_RACTOR_SAFE
1042
- # undef RUBY_TYPED_FROZEN_SHAREABLE
1043
- # define RUBY_TYPED_FROZEN_SHAREABLE 0
1044
- #endif
1045
-
1046
966
  static const rb_data_type_t JSON_Generator_State_type = {
1047
967
  "JSON/Generator/State",
1048
968
  {
@@ -1084,18 +1004,24 @@ static void vstate_spill(struct generate_json_data *data)
1084
1004
  RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
1085
1005
  }
1086
1006
 
1087
- static inline VALUE vstate_get(struct generate_json_data *data)
1007
+ static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj)
1088
1008
  {
1089
1009
  if (RB_UNLIKELY(!data->vstate)) {
1090
1010
  vstate_spill(data);
1091
1011
  }
1092
- return data->vstate;
1012
+ GET_STATE(data->vstate);
1013
+ state->depth = data->depth;
1014
+ VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate);
1015
+ // no need to restore state->depth, vstate is just a temporary State
1016
+ return tmp;
1093
1017
  }
1094
1018
 
1095
- struct hash_foreach_arg {
1096
- struct generate_json_data *data;
1097
- int iter;
1098
- };
1019
+ static VALUE
1020
+ json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
1021
+ {
1022
+ VALUE proc_args[2] = {object, is_key};
1023
+ return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
1024
+ }
1099
1025
 
1100
1026
  static VALUE
1101
1027
  convert_string_subclass(VALUE key)
@@ -1112,6 +1038,145 @@ convert_string_subclass(VALUE key)
1112
1038
  return key_to_s;
1113
1039
  }
1114
1040
 
1041
+ static bool enc_utf8_compatible_p(int enc_idx)
1042
+ {
1043
+ if (enc_idx == usascii_encindex) return true;
1044
+ if (enc_idx == utf8_encindex) return true;
1045
+ return false;
1046
+ }
1047
+
1048
+ static VALUE encode_json_string_try(VALUE str)
1049
+ {
1050
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1051
+ }
1052
+
1053
+ static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1054
+ {
1055
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1056
+ return Qundef;
1057
+ }
1058
+
1059
+ static inline bool valid_json_string_p(VALUE str)
1060
+ {
1061
+ int coderange = rb_enc_str_coderange(str);
1062
+
1063
+ if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
1064
+ return true;
1065
+ }
1066
+
1067
+ if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
1068
+ return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
1069
+ }
1070
+
1071
+ return false;
1072
+ }
1073
+
1074
+ static inline VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
1075
+ {
1076
+ if (RB_LIKELY(valid_json_string_p(str))) {
1077
+ return str;
1078
+ }
1079
+
1080
+ if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
1081
+ VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
1082
+ if (coerced_str != str) {
1083
+ if (RB_TYPE_P(coerced_str, T_STRING)) {
1084
+ if (!valid_json_string_p(coerced_str)) {
1085
+ raise_generator_error(str, "source sequence is illegal/malformed utf-8");
1086
+ }
1087
+ } else {
1088
+ // as_json could return another type than T_STRING
1089
+ if (is_key) {
1090
+ raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
1091
+ }
1092
+ }
1093
+
1094
+ return coerced_str;
1095
+ }
1096
+ }
1097
+
1098
+ if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
1099
+ VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1100
+ switch (rb_enc_str_coderange(utf8_string)) {
1101
+ case ENC_CODERANGE_7BIT:
1102
+ return utf8_string;
1103
+ case ENC_CODERANGE_VALID:
1104
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1105
+ // TODO: Raise in 3.0.0
1106
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1107
+ return utf8_string;
1108
+ break;
1109
+ }
1110
+ }
1111
+
1112
+ return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1113
+ }
1114
+
1115
+ static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1116
+ {
1117
+ fbuffer_append_char(buffer, '"');
1118
+
1119
+ long len;
1120
+ search_state search;
1121
+ search.buffer = buffer;
1122
+ RSTRING_GETMEM(obj, search.ptr, len);
1123
+ search.cursor = search.ptr;
1124
+ search.end = search.ptr + len;
1125
+
1126
+ #ifdef HAVE_SIMD
1127
+ search.matches_mask = 0;
1128
+ search.has_matches = false;
1129
+ search.chunk_base = NULL;
1130
+ search.chunk_end = NULL;
1131
+ #endif /* HAVE_SIMD */
1132
+
1133
+ switch (rb_enc_str_coderange(obj)) {
1134
+ case ENC_CODERANGE_7BIT:
1135
+ case ENC_CODERANGE_VALID:
1136
+ if (RB_UNLIKELY(data->state->ascii_only)) {
1137
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1138
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
1139
+ convert_UTF8_to_script_safe_JSON(&search);
1140
+ } else {
1141
+ convert_UTF8_to_JSON(&search);
1142
+ }
1143
+ break;
1144
+ default:
1145
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1146
+ break;
1147
+ }
1148
+ fbuffer_append_char(buffer, '"');
1149
+ }
1150
+
1151
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1152
+ {
1153
+ obj = ensure_valid_encoding(data, obj, false, false);
1154
+ raw_generate_json_string(buffer, data, obj);
1155
+ }
1156
+
1157
+ struct hash_foreach_arg {
1158
+ VALUE hash;
1159
+ struct generate_json_data *data;
1160
+ int first_key_type;
1161
+ bool first;
1162
+ bool mixed_keys_encountered;
1163
+ };
1164
+
1165
+ NOINLINE(static) void
1166
+ json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
1167
+ {
1168
+ if (arg->mixed_keys_encountered) {
1169
+ return;
1170
+ }
1171
+ arg->mixed_keys_encountered = true;
1172
+
1173
+ JSON_Generator_State *state = arg->data->state;
1174
+ if (state->on_duplicate_key != JSON_IGNORE) {
1175
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
1176
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
1177
+ }
1178
+ }
1179
+
1115
1180
  static int
1116
1181
  json_object_i(VALUE key, VALUE val, VALUE _arg)
1117
1182
  {
@@ -1121,22 +1186,34 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1121
1186
  FBuffer *buffer = data->buffer;
1122
1187
  JSON_Generator_State *state = data->state;
1123
1188
 
1124
- long depth = state->depth;
1125
- int j;
1189
+ long depth = data->depth;
1190
+ int key_type = rb_type(key);
1191
+
1192
+ if (arg->first) {
1193
+ arg->first = false;
1194
+ arg->first_key_type = key_type;
1195
+ }
1196
+ else {
1197
+ fbuffer_append_char(buffer, ',');
1198
+ }
1126
1199
 
1127
- if (arg->iter > 0) fbuffer_append_char(buffer, ',');
1128
1200
  if (RB_UNLIKELY(data->state->object_nl)) {
1129
1201
  fbuffer_append_str(buffer, data->state->object_nl);
1130
1202
  }
1131
1203
  if (RB_UNLIKELY(data->state->indent)) {
1132
- for (j = 0; j < depth; j++) {
1133
- fbuffer_append_str(buffer, data->state->indent);
1134
- }
1204
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1135
1205
  }
1136
1206
 
1137
1207
  VALUE key_to_s;
1138
- switch(rb_type(key)) {
1208
+ bool as_json_called = false;
1209
+
1210
+ start:
1211
+ switch (key_type) {
1139
1212
  case T_STRING:
1213
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
1214
+ json_inspect_hash_with_mixed_keys(arg);
1215
+ }
1216
+
1140
1217
  if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
1141
1218
  key_to_s = key;
1142
1219
  } else {
@@ -1144,15 +1221,31 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1144
1221
  }
1145
1222
  break;
1146
1223
  case T_SYMBOL:
1224
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
1225
+ json_inspect_hash_with_mixed_keys(arg);
1226
+ }
1227
+
1147
1228
  key_to_s = rb_sym2str(key);
1148
1229
  break;
1149
1230
  default:
1231
+ if (data->state->strict) {
1232
+ if (RTEST(data->state->as_json) && !as_json_called) {
1233
+ key = json_call_as_json(data->state, key, Qtrue);
1234
+ key_type = rb_type(key);
1235
+ as_json_called = true;
1236
+ goto start;
1237
+ } else {
1238
+ raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
1239
+ }
1240
+ }
1150
1241
  key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
1151
1242
  break;
1152
1243
  }
1153
1244
 
1245
+ key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
1246
+
1154
1247
  if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
1155
- generate_json_string(buffer, data, key_to_s);
1248
+ raw_generate_json_string(buffer, data, key_to_s);
1156
1249
  } else {
1157
1250
  generate_json(buffer, data, key_to_s);
1158
1251
  }
@@ -1161,46 +1254,43 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1161
1254
  if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
1162
1255
  generate_json(buffer, data, val);
1163
1256
 
1164
- arg->iter++;
1165
1257
  return ST_CONTINUE;
1166
1258
  }
1167
1259
 
1168
1260
  static inline long increase_depth(struct generate_json_data *data)
1169
1261
  {
1170
1262
  JSON_Generator_State *state = data->state;
1171
- long depth = ++state->depth;
1263
+ long depth = ++data->depth;
1172
1264
  if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
1173
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1265
+ rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth);
1174
1266
  }
1175
1267
  return depth;
1176
1268
  }
1177
1269
 
1178
1270
  static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1179
1271
  {
1180
- int j;
1181
1272
  long depth = increase_depth(data);
1182
1273
 
1183
1274
  if (RHASH_SIZE(obj) == 0) {
1184
1275
  fbuffer_append(buffer, "{}", 2);
1185
- --data->state->depth;
1276
+ --data->depth;
1186
1277
  return;
1187
1278
  }
1188
1279
 
1189
1280
  fbuffer_append_char(buffer, '{');
1190
1281
 
1191
1282
  struct hash_foreach_arg arg = {
1283
+ .hash = obj,
1192
1284
  .data = data,
1193
- .iter = 0,
1285
+ .first = true,
1194
1286
  };
1195
1287
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
1196
1288
 
1197
- depth = --data->state->depth;
1289
+ depth = --data->depth;
1198
1290
  if (RB_UNLIKELY(data->state->object_nl)) {
1199
1291
  fbuffer_append_str(buffer, data->state->object_nl);
1200
1292
  if (RB_UNLIKELY(data->state->indent)) {
1201
- for (j = 0; j < depth; j++) {
1202
- fbuffer_append_str(buffer, data->state->indent);
1203
- }
1293
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1204
1294
  }
1205
1295
  }
1206
1296
  fbuffer_append_char(buffer, '}');
@@ -1208,125 +1298,41 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1208
1298
 
1209
1299
  static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1210
1300
  {
1211
- int i, j;
1212
1301
  long depth = increase_depth(data);
1213
1302
 
1214
1303
  if (RARRAY_LEN(obj) == 0) {
1215
1304
  fbuffer_append(buffer, "[]", 2);
1216
- --data->state->depth;
1305
+ --data->depth;
1217
1306
  return;
1218
1307
  }
1219
1308
 
1220
1309
  fbuffer_append_char(buffer, '[');
1221
1310
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1222
- for(i = 0; i < RARRAY_LEN(obj); i++) {
1311
+ for (int i = 0; i < RARRAY_LEN(obj); i++) {
1223
1312
  if (i > 0) {
1224
1313
  fbuffer_append_char(buffer, ',');
1225
1314
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1226
1315
  }
1227
1316
  if (RB_UNLIKELY(data->state->indent)) {
1228
- for (j = 0; j < depth; j++) {
1229
- fbuffer_append_str(buffer, data->state->indent);
1230
- }
1317
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1231
1318
  }
1232
1319
  generate_json(buffer, data, RARRAY_AREF(obj, i));
1233
1320
  }
1234
- data->state->depth = --depth;
1321
+ data->depth = --depth;
1235
1322
  if (RB_UNLIKELY(data->state->array_nl)) {
1236
1323
  fbuffer_append_str(buffer, data->state->array_nl);
1237
1324
  if (RB_UNLIKELY(data->state->indent)) {
1238
- for (j = 0; j < depth; j++) {
1239
- fbuffer_append_str(buffer, data->state->indent);
1240
- }
1325
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1241
1326
  }
1242
1327
  }
1243
1328
  fbuffer_append_char(buffer, ']');
1244
1329
  }
1245
1330
 
1246
- static inline int enc_utf8_compatible_p(int enc_idx)
1247
- {
1248
- if (enc_idx == usascii_encindex) return 1;
1249
- if (enc_idx == utf8_encindex) return 1;
1250
- return 0;
1251
- }
1252
-
1253
- static VALUE encode_json_string_try(VALUE str)
1254
- {
1255
- return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1256
- }
1257
-
1258
- static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1259
- {
1260
- raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1261
- return Qundef;
1262
- }
1263
-
1264
- static inline VALUE ensure_valid_encoding(VALUE str)
1265
- {
1266
- int encindex = RB_ENCODING_GET(str);
1267
- VALUE utf8_string;
1268
- if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
1269
- if (encindex == binary_encindex) {
1270
- utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1271
- switch (rb_enc_str_coderange(utf8_string)) {
1272
- case ENC_CODERANGE_7BIT:
1273
- return utf8_string;
1274
- case ENC_CODERANGE_VALID:
1275
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1276
- // TODO: Raise in 3.0.0
1277
- rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1278
- return utf8_string;
1279
- break;
1280
- }
1281
- }
1282
-
1283
- str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1284
- }
1285
- return str;
1286
- }
1287
-
1288
- static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1289
- {
1290
- obj = ensure_valid_encoding(obj);
1291
-
1292
- fbuffer_append_char(buffer, '"');
1293
-
1294
- long len;
1295
- search_state search;
1296
- search.buffer = buffer;
1297
- RSTRING_GETMEM(obj, search.ptr, len);
1298
- search.cursor = search.ptr;
1299
- search.end = search.ptr + len;
1300
-
1301
- #ifdef HAVE_SIMD
1302
- search.matches_mask = 0;
1303
- search.has_matches = false;
1304
- search.chunk_base = NULL;
1305
- #endif /* HAVE_SIMD */
1306
-
1307
- switch(rb_enc_str_coderange(obj)) {
1308
- case ENC_CODERANGE_7BIT:
1309
- case ENC_CODERANGE_VALID:
1310
- if (RB_UNLIKELY(data->state->ascii_only)) {
1311
- convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1312
- } else if (RB_UNLIKELY(data->state->script_safe)) {
1313
- convert_UTF8_to_script_safe_JSON(&search);
1314
- } else {
1315
- convert_UTF8_to_JSON(&search);
1316
- }
1317
- break;
1318
- default:
1319
- raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1320
- break;
1321
- }
1322
- fbuffer_append_char(buffer, '"');
1323
- }
1324
-
1325
1331
  static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1326
1332
  {
1327
1333
  VALUE tmp;
1328
1334
  if (rb_respond_to(obj, i_to_json)) {
1329
- tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
1335
+ tmp = json_call_to_json(data, obj);
1330
1336
  Check_Type(tmp, T_STRING);
1331
1337
  fbuffer_append_str(buffer, tmp);
1332
1338
  } else {
@@ -1368,7 +1374,7 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat
1368
1374
  static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1369
1375
  {
1370
1376
  VALUE tmp = rb_funcall(obj, i_to_s, 0);
1371
- fbuffer_append_str(buffer, tmp);
1377
+ fbuffer_append_str(buffer, StringValue(tmp));
1372
1378
  }
1373
1379
 
1374
1380
  #ifdef RUBY_INTEGER_UNIFICATION
@@ -1389,11 +1395,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1389
1395
  /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
1390
1396
  if (!allow_nan) {
1391
1397
  if (data->state->strict && data->state->as_json) {
1392
- VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1398
+ VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
1393
1399
  if (casted_obj != obj) {
1394
1400
  increase_depth(data);
1395
1401
  generate_json(buffer, data, casted_obj);
1396
- data->state->depth--;
1402
+ data->depth--;
1397
1403
  return;
1398
1404
  }
1399
1405
  }
@@ -1406,12 +1412,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1406
1412
  }
1407
1413
 
1408
1414
  /* This implementation writes directly into the buffer. We reserve
1409
- * the 28 characters that fpconv_dtoa states as its maximum.
1415
+ * the 32 characters that fpconv_dtoa states as its maximum.
1410
1416
  */
1411
- fbuffer_inc_capa(buffer, 28);
1417
+ fbuffer_inc_capa(buffer, 32);
1412
1418
  char* d = buffer->ptr + buffer->len;
1413
1419
  int len = fpconv_dtoa(value, d);
1414
-
1415
1420
  /* fpconv_dtoa converts a float to its shortest string representation,
1416
1421
  * but it adds a ".0" if this is a plain integer.
1417
1422
  */
@@ -1461,7 +1466,16 @@ start:
1461
1466
  break;
1462
1467
  case T_STRING:
1463
1468
  if (klass != rb_cString) goto general;
1464
- generate_json_string(buffer, data, obj);
1469
+
1470
+ if (RB_LIKELY(valid_json_string_p(obj))) {
1471
+ raw_generate_json_string(buffer, data, obj);
1472
+ } else if (as_json_called) {
1473
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1474
+ } else {
1475
+ obj = ensure_valid_encoding(data, obj, false, false);
1476
+ as_json_called = true;
1477
+ goto start;
1478
+ }
1465
1479
  break;
1466
1480
  case T_SYMBOL:
1467
1481
  generate_json_symbol(buffer, data, obj);
@@ -1478,7 +1492,7 @@ start:
1478
1492
  general:
1479
1493
  if (data->state->strict) {
1480
1494
  if (RTEST(data->state->as_json) && !as_json_called) {
1481
- obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1495
+ obj = json_call_as_json(data->state, obj, Qfalse);
1482
1496
  as_json_called = true;
1483
1497
  goto start;
1484
1498
  } else {
@@ -1497,16 +1511,14 @@ static VALUE generate_json_try(VALUE d)
1497
1511
 
1498
1512
  data->func(data->buffer, data, data->obj);
1499
1513
 
1500
- return Qnil;
1514
+ return fbuffer_finalize(data->buffer);
1501
1515
  }
1502
1516
 
1503
- static VALUE generate_json_rescue(VALUE d, VALUE exc)
1517
+ static VALUE generate_json_ensure(VALUE d)
1504
1518
  {
1505
1519
  struct generate_json_data *data = (struct generate_json_data *)d;
1506
1520
  fbuffer_free(data->buffer);
1507
1521
 
1508
- rb_exc_raise(exc);
1509
-
1510
1522
  return Qundef;
1511
1523
  }
1512
1524
 
@@ -1522,14 +1534,15 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
1522
1534
 
1523
1535
  struct generate_json_data data = {
1524
1536
  .buffer = &buffer,
1525
- .vstate = self,
1537
+ .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json
1526
1538
  .state = state,
1539
+ .depth = state->depth,
1527
1540
  .obj = obj,
1528
1541
  .func = func
1529
1542
  };
1530
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1531
-
1532
- return fbuffer_finalize(&buffer);
1543
+ VALUE result = rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1544
+ RB_GC_GUARD(self);
1545
+ return result;
1533
1546
  }
1534
1547
 
1535
1548
  /* call-seq:
@@ -1545,10 +1558,7 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
1545
1558
  rb_check_arity(argc, 1, 2);
1546
1559
  VALUE obj = argv[0];
1547
1560
  VALUE io = argc > 1 ? argv[1] : Qnil;
1548
- VALUE result = cState_partial_generate(self, obj, generate_json, io);
1549
- GET_STATE(self);
1550
- (void)state;
1551
- return result;
1561
+ return cState_partial_generate(self, obj, generate_json, io);
1552
1562
  }
1553
1563
 
1554
1564
  static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
@@ -1629,6 +1639,7 @@ static VALUE string_config(VALUE config)
1629
1639
  */
1630
1640
  static VALUE cState_indent_set(VALUE self, VALUE indent)
1631
1641
  {
1642
+ rb_check_frozen(self);
1632
1643
  GET_STATE(self);
1633
1644
  RB_OBJ_WRITE(self, &state->indent, string_config(indent));
1634
1645
  return Qnil;
@@ -1654,6 +1665,7 @@ static VALUE cState_space(VALUE self)
1654
1665
  */
1655
1666
  static VALUE cState_space_set(VALUE self, VALUE space)
1656
1667
  {
1668
+ rb_check_frozen(self);
1657
1669
  GET_STATE(self);
1658
1670
  RB_OBJ_WRITE(self, &state->space, string_config(space));
1659
1671
  return Qnil;
@@ -1677,6 +1689,7 @@ static VALUE cState_space_before(VALUE self)
1677
1689
  */
1678
1690
  static VALUE cState_space_before_set(VALUE self, VALUE space_before)
1679
1691
  {
1692
+ rb_check_frozen(self);
1680
1693
  GET_STATE(self);
1681
1694
  RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
1682
1695
  return Qnil;
@@ -1702,6 +1715,7 @@ static VALUE cState_object_nl(VALUE self)
1702
1715
  */
1703
1716
  static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
1704
1717
  {
1718
+ rb_check_frozen(self);
1705
1719
  GET_STATE(self);
1706
1720
  RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
1707
1721
  return Qnil;
@@ -1725,6 +1739,7 @@ static VALUE cState_array_nl(VALUE self)
1725
1739
  */
1726
1740
  static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
1727
1741
  {
1742
+ rb_check_frozen(self);
1728
1743
  GET_STATE(self);
1729
1744
  RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
1730
1745
  return Qnil;
@@ -1748,6 +1763,7 @@ static VALUE cState_as_json(VALUE self)
1748
1763
  */
1749
1764
  static VALUE cState_as_json_set(VALUE self, VALUE as_json)
1750
1765
  {
1766
+ rb_check_frozen(self);
1751
1767
  GET_STATE(self);
1752
1768
  RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
1753
1769
  return Qnil;
@@ -1790,6 +1806,7 @@ static long long_config(VALUE num)
1790
1806
  */
1791
1807
  static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1792
1808
  {
1809
+ rb_check_frozen(self);
1793
1810
  GET_STATE(self);
1794
1811
  state->max_nesting = long_config(depth);
1795
1812
  return Qnil;
@@ -1815,6 +1832,7 @@ static VALUE cState_script_safe(VALUE self)
1815
1832
  */
1816
1833
  static VALUE cState_script_safe_set(VALUE self, VALUE enable)
1817
1834
  {
1835
+ rb_check_frozen(self);
1818
1836
  GET_STATE(self);
1819
1837
  state->script_safe = RTEST(enable);
1820
1838
  return Qnil;
@@ -1846,6 +1864,7 @@ static VALUE cState_strict(VALUE self)
1846
1864
  */
1847
1865
  static VALUE cState_strict_set(VALUE self, VALUE enable)
1848
1866
  {
1867
+ rb_check_frozen(self);
1849
1868
  GET_STATE(self);
1850
1869
  state->strict = RTEST(enable);
1851
1870
  return Qnil;
@@ -1870,6 +1889,7 @@ static VALUE cState_allow_nan_p(VALUE self)
1870
1889
  */
1871
1890
  static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
1872
1891
  {
1892
+ rb_check_frozen(self);
1873
1893
  GET_STATE(self);
1874
1894
  state->allow_nan = RTEST(enable);
1875
1895
  return Qnil;
@@ -1894,11 +1914,25 @@ static VALUE cState_ascii_only_p(VALUE self)
1894
1914
  */
1895
1915
  static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1896
1916
  {
1917
+ rb_check_frozen(self);
1897
1918
  GET_STATE(self);
1898
1919
  state->ascii_only = RTEST(enable);
1899
1920
  return Qnil;
1900
1921
  }
1901
1922
 
1923
+ static VALUE cState_allow_duplicate_key_p(VALUE self)
1924
+ {
1925
+ GET_STATE(self);
1926
+ switch (state->on_duplicate_key) {
1927
+ case JSON_IGNORE:
1928
+ return Qtrue;
1929
+ case JSON_DEPRECATED:
1930
+ return Qnil;
1931
+ default:
1932
+ return Qfalse;
1933
+ }
1934
+ }
1935
+
1902
1936
  /*
1903
1937
  * call-seq: depth
1904
1938
  *
@@ -1918,6 +1952,7 @@ static VALUE cState_depth(VALUE self)
1918
1952
  */
1919
1953
  static VALUE cState_depth_set(VALUE self, VALUE depth)
1920
1954
  {
1955
+ rb_check_frozen(self);
1921
1956
  GET_STATE(self);
1922
1957
  state->depth = long_config(depth);
1923
1958
  return Qnil;
@@ -1951,20 +1986,36 @@ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_
1951
1986
  */
1952
1987
  static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
1953
1988
  {
1989
+ rb_check_frozen(self);
1954
1990
  GET_STATE(self);
1955
1991
  buffer_initial_length_set(state, buffer_initial_length);
1956
1992
  return Qnil;
1957
1993
  }
1958
1994
 
1995
+ struct configure_state_data {
1996
+ JSON_Generator_State *state;
1997
+ VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
1998
+ };
1999
+
2000
+ static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
2001
+ {
2002
+ if (RTEST(data->vstate)) {
2003
+ RB_OBJ_WRITE(data->vstate, field, value);
2004
+ } else {
2005
+ *field = value;
2006
+ }
2007
+ }
2008
+
1959
2009
  static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1960
2010
  {
1961
- JSON_Generator_State *state = (JSON_Generator_State *)_arg;
2011
+ struct configure_state_data *data = (struct configure_state_data *)_arg;
2012
+ JSON_Generator_State *state = data->state;
1962
2013
 
1963
- if (key == sym_indent) { state->indent = string_config(val); }
1964
- else if (key == sym_space) { state->space = string_config(val); }
1965
- else if (key == sym_space_before) { state->space_before = string_config(val); }
1966
- else if (key == sym_object_nl) { state->object_nl = string_config(val); }
1967
- else if (key == sym_array_nl) { state->array_nl = string_config(val); }
2014
+ if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
2015
+ else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
2016
+ else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
2017
+ else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
2018
+ else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
1968
2019
  else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
1969
2020
  else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
1970
2021
  else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
@@ -1973,11 +2024,16 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1973
2024
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1974
2025
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1975
2026
  else if (key == sym_strict) { state->strict = RTEST(val); }
1976
- else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
2027
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
2028
+ else if (key == sym_as_json) {
2029
+ VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
2030
+ state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
2031
+ state_write_value(data, &state->as_json, proc);
2032
+ }
1977
2033
  return ST_CONTINUE;
1978
2034
  }
1979
2035
 
1980
- static void configure_state(JSON_Generator_State *state, VALUE config)
2036
+ static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
1981
2037
  {
1982
2038
  if (!RTEST(config)) return;
1983
2039
 
@@ -1985,15 +2041,21 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
1985
2041
 
1986
2042
  if (!RHASH_SIZE(config)) return;
1987
2043
 
2044
+ struct configure_state_data data = {
2045
+ .state = state,
2046
+ .vstate = vstate
2047
+ };
2048
+
1988
2049
  // We assume in most cases few keys are set so it's faster to go over
1989
2050
  // the provided keys than to check all possible keys.
1990
- rb_hash_foreach(config, configure_state_i, (VALUE)state);
2051
+ rb_hash_foreach(config, configure_state_i, (VALUE)&data);
1991
2052
  }
1992
2053
 
1993
2054
  static VALUE cState_configure(VALUE self, VALUE opts)
1994
2055
  {
2056
+ rb_check_frozen(self);
1995
2057
  GET_STATE(self);
1996
- configure_state(state, opts);
2058
+ configure_state(state, self, opts);
1997
2059
  return self;
1998
2060
  }
1999
2061
 
@@ -2001,7 +2063,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2001
2063
  {
2002
2064
  JSON_Generator_State state = {0};
2003
2065
  state_init(&state);
2004
- configure_state(&state, opts);
2066
+ configure_state(&state, Qfalse, opts);
2005
2067
 
2006
2068
  char stack_buffer[FBUFFER_STACK_SIZE];
2007
2069
  FBuffer buffer = {
@@ -2013,12 +2075,11 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2013
2075
  .buffer = &buffer,
2014
2076
  .vstate = Qfalse,
2015
2077
  .state = &state,
2078
+ .depth = state.depth,
2016
2079
  .obj = obj,
2017
2080
  .func = generate_json,
2018
2081
  };
2019
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
2020
-
2021
- return fbuffer_finalize(&buffer);
2082
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
2022
2083
  }
2023
2084
 
2024
2085
  /*
@@ -2088,7 +2149,8 @@ void Init_generator(void)
2088
2149
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
2089
2150
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
2090
2151
  rb_define_method(cState, "generate", cState_generate, -1);
2091
- rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
2152
+
2153
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
2092
2154
 
2093
2155
  rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
2094
2156
 
@@ -2117,13 +2179,7 @@ void Init_generator(void)
2117
2179
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
2118
2180
 
2119
2181
  VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
2120
- rb_define_singleton_method(mString, "included", mString_included_s, 1);
2121
2182
  rb_define_method(mString, "to_json", mString_to_json, -1);
2122
- rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
2123
- rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
2124
-
2125
- mString_Extend = rb_define_module_under(mString, "Extend");
2126
- rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
2127
2183
 
2128
2184
  VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
2129
2185
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
@@ -2140,10 +2196,6 @@ void Init_generator(void)
2140
2196
  i_to_s = rb_intern("to_s");
2141
2197
  i_to_json = rb_intern("to_json");
2142
2198
  i_new = rb_intern("new");
2143
- i_pack = rb_intern("pack");
2144
- i_unpack = rb_intern("unpack");
2145
- i_create_id = rb_intern("create_id");
2146
- i_extend = rb_intern("extend");
2147
2199
  i_encode = rb_intern("encode");
2148
2200
 
2149
2201
  sym_indent = ID2SYM(rb_intern("indent"));
@@ -2160,6 +2212,7 @@ void Init_generator(void)
2160
2212
  sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
2161
2213
  sym_strict = ID2SYM(rb_intern("strict"));
2162
2214
  sym_as_json = ID2SYM(rb_intern("as_json"));
2215
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
2163
2216
 
2164
2217
  usascii_encindex = rb_usascii_encindex();
2165
2218
  utf8_encindex = rb_utf8_encindex();
@@ -2167,22 +2220,5 @@ void Init_generator(void)
2167
2220
 
2168
2221
  rb_require("json/ext/generator/state");
2169
2222
 
2170
-
2171
- switch(find_simd_implementation()) {
2172
- #ifdef HAVE_SIMD
2173
- #ifdef HAVE_SIMD_NEON
2174
- case SIMD_NEON:
2175
- search_escape_basic_impl = search_escape_basic_neon;
2176
- break;
2177
- #endif /* HAVE_SIMD_NEON */
2178
- #ifdef HAVE_SIMD_SSE2
2179
- case SIMD_SSE2:
2180
- search_escape_basic_impl = search_escape_basic_sse2;
2181
- break;
2182
- #endif /* HAVE_SIMD_SSE2 */
2183
- #endif /* HAVE_SIMD */
2184
- default:
2185
- search_escape_basic_impl = search_escape_basic;
2186
- break;
2187
- }
2223
+ simd_impl = find_simd_implementation();
2188
2224
  }