json 2.12.2 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,20 @@
1
- #include "ruby.h"
1
+ #include "../json.h"
2
2
  #include "../fbuffer/fbuffer.h"
3
3
  #include "../vendor/fpconv.c"
4
4
 
5
5
  #include <math.h>
6
6
  #include <ctype.h>
7
7
 
8
- #include "simd.h"
8
+ #include "../simd/simd.h"
9
9
 
10
10
  /* ruby api and some helpers */
11
11
 
12
+ enum duplicate_key_action {
13
+ JSON_DEPRECATED = 0,
14
+ JSON_IGNORE,
15
+ JSON_RAISE,
16
+ };
17
+
12
18
  typedef struct JSON_Generator_StateStruct {
13
19
  VALUE indent;
14
20
  VALUE space;
@@ -21,20 +27,19 @@ typedef struct JSON_Generator_StateStruct {
21
27
  long depth;
22
28
  long buffer_initial_length;
23
29
 
30
+ enum duplicate_key_action on_duplicate_key;
31
+
32
+ bool as_json_single_arg;
24
33
  bool allow_nan;
25
34
  bool ascii_only;
26
35
  bool script_safe;
27
36
  bool strict;
28
37
  } JSON_Generator_State;
29
38
 
30
- #ifndef RB_UNLIKELY
31
- #define RB_UNLIKELY(cond) (cond)
32
- #endif
33
-
34
- static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
39
+ static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
35
40
 
36
41
  static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37
- static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
42
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
38
43
  sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
39
44
 
40
45
 
@@ -76,23 +81,18 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d
76
81
 
77
82
  static int usascii_encindex, utf8_encindex, binary_encindex;
78
83
 
79
- #ifdef RBIMPL_ATTR_NORETURN
80
- RBIMPL_ATTR_NORETURN()
81
- #endif
82
- static void raise_generator_error_str(VALUE invalid_object, VALUE str)
84
+ NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
83
85
  {
86
+ rb_enc_associate_index(str, utf8_encindex);
84
87
  VALUE exc = rb_exc_new_str(eGeneratorError, str);
85
88
  rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
86
89
  rb_exc_raise(exc);
87
90
  }
88
91
 
89
- #ifdef RBIMPL_ATTR_NORETURN
90
- RBIMPL_ATTR_NORETURN()
91
- #endif
92
92
  #ifdef RBIMPL_ATTR_FORMAT
93
93
  RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
94
94
  #endif
95
- static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
95
+ NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
96
  {
97
97
  va_list args;
98
98
  va_start(args, fmt);
@@ -127,18 +127,12 @@ typedef struct _search_state {
127
127
  #endif /* HAVE_SIMD */
128
128
  } search_state;
129
129
 
130
- #if (defined(__GNUC__ ) || defined(__clang__))
131
- #define FORCE_INLINE __attribute__((always_inline))
132
- #else
133
- #define FORCE_INLINE
134
- #endif
135
-
136
- static inline FORCE_INLINE void search_flush(search_state *search)
130
+ static ALWAYS_INLINE() void search_flush(search_state *search)
137
131
  {
138
132
  // Do not remove this conditional without profiling, specifically escape-heavy text.
139
133
  // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
140
- // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
141
- // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
134
+ // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
135
+ // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
142
136
  // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
143
137
  // nothing needs to be flushed, we can save a few memory references with this conditional.
144
138
  if (search->ptr > search->cursor) {
@@ -176,7 +170,7 @@ static inline unsigned char search_escape_basic(search_state *search)
176
170
  return 0;
177
171
  }
178
172
 
179
- static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
173
+ static ALWAYS_INLINE() void escape_UTF8_char_basic(search_state *search)
180
174
  {
181
175
  const unsigned char ch = (unsigned char)*search->ptr;
182
176
  switch (ch) {
@@ -263,7 +257,7 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
263
257
 
264
258
  #ifdef HAVE_SIMD
265
259
 
266
- static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
260
+ static ALWAYS_INLINE() char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
267
261
  {
268
262
  // Flush the buffer so everything up until the last 'len' characters are unflushed.
269
263
  search_flush(search);
@@ -286,7 +280,7 @@ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsi
286
280
 
287
281
  #ifdef HAVE_SIMD_NEON
288
282
 
289
- static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
283
+ static ALWAYS_INLINE() unsigned char neon_next_match(search_state *search)
290
284
  {
291
285
  uint64_t mask = search->matches_mask;
292
286
  uint32_t index = trailing_zeros64(mask) >> 2;
@@ -304,28 +298,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
304
298
  return 1;
305
299
  }
306
300
 
307
- // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308
- static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
309
- {
310
- const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
311
- const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
312
- return mask & 0x8888888888888888ull;
313
- }
314
-
315
- static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
316
- {
317
- uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
318
-
319
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321
- const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
322
-
323
- uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
324
- uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
325
-
326
- return neon_match_mask(needs_escape);
327
- }
328
-
329
301
  static inline unsigned char search_escape_basic_neon(search_state *search)
330
302
  {
331
303
  if (RB_UNLIKELY(search->has_matches)) {
@@ -333,7 +305,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
333
305
  if (search->matches_mask > 0) {
334
306
  return neon_next_match(search);
335
307
  } else {
336
- // neon_next_match will only advance search->ptr up to the last matching character.
308
+ // neon_next_match will only advance search->ptr up to the last matching character.
337
309
  // Skip over any characters in the last chunk that occur after the last match.
338
310
  search->has_matches = false;
339
311
  search->ptr = search->chunk_end;
@@ -342,67 +314,61 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
342
314
 
343
315
  /*
344
316
  * The code below implements an SIMD-based algorithm to determine if N bytes at a time
345
- * need to be escaped.
346
- *
317
+ * need to be escaped.
318
+ *
347
319
  * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
348
- *
320
+ *
349
321
  * The explanation will be limited to the first 8 bytes of the string for simplicity. However
350
322
  * the vector insructions may work on larger vectors.
351
- *
323
+ *
352
324
  * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
353
- *
354
- * lower_bound: [20 20 20 20 20 20 20 20]
355
- * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
356
- * dblquote: [22 22 22 22 22 22 22 22]
357
- *
358
- * Next we load the first chunk of the ptr:
325
+ *
326
+ * lower_bound: [20 20 20 20 20 20 20 20]
327
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
328
+ * dblquote: [22 22 22 22 22 22 22 22]
329
+ *
330
+ * Next we load the first chunk of the ptr:
359
331
  * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
360
- *
332
+ *
361
333
  * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
362
334
  * as no bytes are less than 32 (0x20):
363
335
  * [0 0 0 0 0 0 0 0]
364
- *
336
+ *
365
337
  * Next, we check if any byte in chunk is equal to a backslash:
366
338
  * [0 0 0 FF 0 0 0 0]
367
- *
339
+ *
368
340
  * Finally we check if any byte in chunk is equal to a double quote:
369
- * [FF 0 0 0 0 0 0 0]
370
- *
341
+ * [FF 0 0 0 0 0 0 0]
342
+ *
371
343
  * Now we have three vectors where each byte indicates if the corresponding byte in chunk
372
344
  * needs to be escaped. We combine these vectors with a series of logical OR instructions.
373
345
  * This is the needs_escape vector and it is equal to:
374
- * [FF 0 0 FF 0 0 0 0]
375
- *
346
+ * [FF 0 0 FF 0 0 0 0]
347
+ *
376
348
  * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
377
349
  * the values in the vector. This computes how many bytes need to be escaped within this chunk.
378
- *
350
+ *
379
351
  * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
380
352
  * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381
353
  * have at least one byte that needs to be escaped.
382
354
  */
383
- while (search->ptr + sizeof(uint8x16_t) <= search->end) {
384
- uint64_t mask = neon_rules_update(search->ptr);
385
355
 
386
- if (!mask) {
387
- search->ptr += sizeof(uint8x16_t);
388
- continue;
389
- }
390
- search->matches_mask = mask;
356
+ if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
391
357
  search->has_matches = true;
392
358
  search->chunk_base = search->ptr;
393
359
  search->chunk_end = search->ptr + sizeof(uint8x16_t);
394
360
  return neon_next_match(search);
395
361
  }
396
362
 
397
- // There are fewer than 16 bytes left.
363
+ // There are fewer than 16 bytes left.
398
364
  unsigned long remaining = (search->end - search->ptr);
399
365
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
400
366
  char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
401
367
 
402
- uint64_t mask = neon_rules_update(s);
368
+ uint64_t mask = compute_chunk_mask_neon(s);
403
369
 
404
370
  if (!mask) {
405
- // Nothing to escape, ensure search_flush doesn't do anything by setting
371
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
406
372
  // search->cursor to search->ptr.
407
373
  fbuffer_consumed(search->buffer, remaining);
408
374
  search->ptr = search->end;
@@ -428,12 +394,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
428
394
 
429
395
  #ifdef HAVE_SIMD_SSE2
430
396
 
431
- #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432
- #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
433
- #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434
- #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
435
-
436
- static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
397
+ static ALWAYS_INLINE() unsigned char sse2_next_match(search_state *search)
437
398
  {
438
399
  int mask = search->matches_mask;
439
400
  int index = trailing_zeros(mask);
@@ -457,26 +418,14 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
457
418
  #define TARGET_SSE2
458
419
  #endif
459
420
 
460
- static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
461
- {
462
- __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
463
-
464
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466
- __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
467
- __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
468
- __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
469
- return _mm_movemask_epi8(needs_escape);
470
- }
471
-
472
- static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
421
+ static TARGET_SSE2 ALWAYS_INLINE() unsigned char search_escape_basic_sse2(search_state *search)
473
422
  {
474
423
  if (RB_UNLIKELY(search->has_matches)) {
475
424
  // There are more matches if search->matches_mask > 0.
476
425
  if (search->matches_mask > 0) {
477
426
  return sse2_next_match(search);
478
427
  } else {
479
- // sse2_next_match will only advance search->ptr up to the last matching character.
428
+ // sse2_next_match will only advance search->ptr up to the last matching character.
480
429
  // Skip over any characters in the last chunk that occur after the last match.
481
430
  search->has_matches = false;
482
431
  if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
@@ -487,29 +436,22 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
487
436
  }
488
437
  }
489
438
 
490
- while (search->ptr + sizeof(__m128i) <= search->end) {
491
- int needs_escape_mask = sse2_update(search->ptr);
492
-
493
- if (needs_escape_mask == 0) {
494
- search->ptr += sizeof(__m128i);
495
- continue;
496
- }
497
-
439
+ if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
498
440
  search->has_matches = true;
499
- search->matches_mask = needs_escape_mask;
500
441
  search->chunk_base = search->ptr;
442
+ search->chunk_end = search->ptr + sizeof(__m128i);
501
443
  return sse2_next_match(search);
502
444
  }
503
445
 
504
- // There are fewer than 16 bytes left.
446
+ // There are fewer than 16 bytes left.
505
447
  unsigned long remaining = (search->end - search->ptr);
506
448
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
507
449
  char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
508
450
 
509
- int needs_escape_mask = sse2_update(s);
451
+ int needs_escape_mask = compute_chunk_mask_sse2(s);
510
452
 
511
453
  if (needs_escape_mask == 0) {
512
- // Nothing to escape, ensure search_flush doesn't do anything by setting
454
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
513
455
  // search->cursor to search->ptr.
514
456
  fbuffer_consumed(search->buffer, remaining);
515
457
  search->ptr = search->end;
@@ -638,7 +580,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
638
580
  return 0;
639
581
  }
640
582
 
641
- static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
583
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
584
+ {
642
585
  const unsigned char ch = (unsigned char)*search->ptr;
643
586
  switch (ch_len) {
644
587
  case 1: {
@@ -668,7 +611,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
668
611
 
669
612
  uint32_t wchar = 0;
670
613
 
671
- switch(ch_len) {
614
+ switch (ch_len) {
672
615
  case 2:
673
616
  wchar = ch & 0x1F;
674
617
  break;
@@ -828,7 +771,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
828
771
  * _state_ is a JSON::State object, that can also be used to configure the
829
772
  * produced JSON string output further.
830
773
  */
831
- static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
774
+ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
775
+ {
832
776
  rb_check_arity(argc, 0, 1);
833
777
  VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
834
778
  return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
@@ -885,17 +829,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
885
829
  return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
886
830
  }
887
831
 
888
- /*
889
- * call-seq: String.included(modul)
890
- *
891
- * Extends _modul_ with the String::Extend module.
892
- */
893
- static VALUE mString_included_s(VALUE self, VALUE modul) {
894
- VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
895
- rb_call_super(1, &modul);
896
- return result;
897
- }
898
-
899
832
  /*
900
833
  * call-seq: to_json(*)
901
834
  *
@@ -910,51 +843,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
910
843
  return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
911
844
  }
912
845
 
913
- /*
914
- * call-seq: to_json_raw_object()
915
- *
916
- * This method creates a raw object hash, that can be nested into
917
- * other data structures and will be generated as a raw string. This
918
- * method should be used, if you want to convert raw strings to JSON
919
- * instead of UTF-8 strings, e. g. binary data.
920
- */
921
- static VALUE mString_to_json_raw_object(VALUE self)
922
- {
923
- VALUE ary;
924
- VALUE result = rb_hash_new();
925
- rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
926
- ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
927
- rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
928
- return result;
929
- }
930
-
931
- /*
932
- * call-seq: to_json_raw(*args)
933
- *
934
- * This method creates a JSON text from the result of a call to
935
- * to_json_raw_object of this String.
936
- */
937
- static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
938
- {
939
- VALUE obj = mString_to_json_raw_object(self);
940
- Check_Type(obj, T_HASH);
941
- return mHash_to_json(argc, argv, obj);
942
- }
943
-
944
- /*
945
- * call-seq: json_create(o)
946
- *
947
- * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
948
- * key "raw"). The Ruby String can be created by this module method.
949
- */
950
- static VALUE mString_Extend_json_create(VALUE self, VALUE o)
951
- {
952
- VALUE ary;
953
- Check_Type(o, T_HASH);
954
- ary = rb_hash_aref(o, rb_str_new2("raw"));
955
- return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
956
- }
957
-
958
846
  /*
959
847
  * call-seq: to_json(*)
960
848
  *
@@ -1092,10 +980,12 @@ static inline VALUE vstate_get(struct generate_json_data *data)
1092
980
  return data->vstate;
1093
981
  }
1094
982
 
1095
- struct hash_foreach_arg {
1096
- struct generate_json_data *data;
1097
- int iter;
1098
- };
983
+ static VALUE
984
+ json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
985
+ {
986
+ VALUE proc_args[2] = {object, is_key};
987
+ return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
988
+ }
1099
989
 
1100
990
  static VALUE
1101
991
  convert_string_subclass(VALUE key)
@@ -1112,6 +1002,145 @@ convert_string_subclass(VALUE key)
1112
1002
  return key_to_s;
1113
1003
  }
1114
1004
 
1005
+ static bool enc_utf8_compatible_p(int enc_idx)
1006
+ {
1007
+ if (enc_idx == usascii_encindex) return true;
1008
+ if (enc_idx == utf8_encindex) return true;
1009
+ return false;
1010
+ }
1011
+
1012
+ static VALUE encode_json_string_try(VALUE str)
1013
+ {
1014
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1015
+ }
1016
+
1017
+ static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1018
+ {
1019
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1020
+ return Qundef;
1021
+ }
1022
+
1023
+ static inline bool valid_json_string_p(VALUE str)
1024
+ {
1025
+ int coderange = rb_enc_str_coderange(str);
1026
+
1027
+ if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
1028
+ return true;
1029
+ }
1030
+
1031
+ if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
1032
+ return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
1033
+ }
1034
+
1035
+ return false;
1036
+ }
1037
+
1038
+ static inline VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
1039
+ {
1040
+ if (RB_LIKELY(valid_json_string_p(str))) {
1041
+ return str;
1042
+ }
1043
+
1044
+ if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
1045
+ VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
1046
+ if (coerced_str != str) {
1047
+ if (RB_TYPE_P(coerced_str, T_STRING)) {
1048
+ if (!valid_json_string_p(coerced_str)) {
1049
+ raise_generator_error(str, "source sequence is illegal/malformed utf-8");
1050
+ }
1051
+ } else {
1052
+ // as_json could return another type than T_STRING
1053
+ if (is_key) {
1054
+ raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
1055
+ }
1056
+ }
1057
+
1058
+ return coerced_str;
1059
+ }
1060
+ }
1061
+
1062
+ if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
1063
+ VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1064
+ switch (rb_enc_str_coderange(utf8_string)) {
1065
+ case ENC_CODERANGE_7BIT:
1066
+ return utf8_string;
1067
+ case ENC_CODERANGE_VALID:
1068
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1069
+ // TODO: Raise in 3.0.0
1070
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1071
+ return utf8_string;
1072
+ break;
1073
+ }
1074
+ }
1075
+
1076
+ return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1077
+ }
1078
+
1079
+ static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1080
+ {
1081
+ fbuffer_append_char(buffer, '"');
1082
+
1083
+ long len;
1084
+ search_state search;
1085
+ search.buffer = buffer;
1086
+ RSTRING_GETMEM(obj, search.ptr, len);
1087
+ search.cursor = search.ptr;
1088
+ search.end = search.ptr + len;
1089
+
1090
+ #ifdef HAVE_SIMD
1091
+ search.matches_mask = 0;
1092
+ search.has_matches = false;
1093
+ search.chunk_base = NULL;
1094
+ #endif /* HAVE_SIMD */
1095
+
1096
+ switch (rb_enc_str_coderange(obj)) {
1097
+ case ENC_CODERANGE_7BIT:
1098
+ case ENC_CODERANGE_VALID:
1099
+ if (RB_UNLIKELY(data->state->ascii_only)) {
1100
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1101
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
1102
+ convert_UTF8_to_script_safe_JSON(&search);
1103
+ } else {
1104
+ convert_UTF8_to_JSON(&search);
1105
+ }
1106
+ break;
1107
+ default:
1108
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1109
+ break;
1110
+ }
1111
+ fbuffer_append_char(buffer, '"');
1112
+ }
1113
+
1114
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1115
+ {
1116
+ obj = ensure_valid_encoding(data, obj, false, false);
1117
+ raw_generate_json_string(buffer, data, obj);
1118
+ }
1119
+
1120
+ struct hash_foreach_arg {
1121
+ VALUE hash;
1122
+ struct generate_json_data *data;
1123
+ int first_key_type;
1124
+ bool first;
1125
+ bool mixed_keys_encountered;
1126
+ };
1127
+
1128
+ NOINLINE()
1129
+ static void
1130
+ json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
1131
+ {
1132
+ if (arg->mixed_keys_encountered) {
1133
+ return;
1134
+ }
1135
+ arg->mixed_keys_encountered = true;
1136
+
1137
+ JSON_Generator_State *state = arg->data->state;
1138
+ if (state->on_duplicate_key != JSON_IGNORE) {
1139
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
1140
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
1141
+ }
1142
+ }
1143
+
1115
1144
  static int
1116
1145
  json_object_i(VALUE key, VALUE val, VALUE _arg)
1117
1146
  {
@@ -1122,21 +1151,33 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1122
1151
  JSON_Generator_State *state = data->state;
1123
1152
 
1124
1153
  long depth = state->depth;
1125
- int j;
1154
+ int key_type = rb_type(key);
1155
+
1156
+ if (arg->first) {
1157
+ arg->first = false;
1158
+ arg->first_key_type = key_type;
1159
+ }
1160
+ else {
1161
+ fbuffer_append_char(buffer, ',');
1162
+ }
1126
1163
 
1127
- if (arg->iter > 0) fbuffer_append_char(buffer, ',');
1128
1164
  if (RB_UNLIKELY(data->state->object_nl)) {
1129
1165
  fbuffer_append_str(buffer, data->state->object_nl);
1130
1166
  }
1131
1167
  if (RB_UNLIKELY(data->state->indent)) {
1132
- for (j = 0; j < depth; j++) {
1133
- fbuffer_append_str(buffer, data->state->indent);
1134
- }
1168
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1135
1169
  }
1136
1170
 
1137
1171
  VALUE key_to_s;
1138
- switch(rb_type(key)) {
1172
+ bool as_json_called = false;
1173
+
1174
+ start:
1175
+ switch (key_type) {
1139
1176
  case T_STRING:
1177
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
1178
+ json_inspect_hash_with_mixed_keys(arg);
1179
+ }
1180
+
1140
1181
  if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
1141
1182
  key_to_s = key;
1142
1183
  } else {
@@ -1144,15 +1185,31 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1144
1185
  }
1145
1186
  break;
1146
1187
  case T_SYMBOL:
1188
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
1189
+ json_inspect_hash_with_mixed_keys(arg);
1190
+ }
1191
+
1147
1192
  key_to_s = rb_sym2str(key);
1148
1193
  break;
1149
1194
  default:
1195
+ if (data->state->strict) {
1196
+ if (RTEST(data->state->as_json) && !as_json_called) {
1197
+ key = json_call_as_json(data->state, key, Qtrue);
1198
+ key_type = rb_type(key);
1199
+ as_json_called = true;
1200
+ goto start;
1201
+ } else {
1202
+ raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
1203
+ }
1204
+ }
1150
1205
  key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
1151
1206
  break;
1152
1207
  }
1153
1208
 
1209
+ key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
1210
+
1154
1211
  if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
1155
- generate_json_string(buffer, data, key_to_s);
1212
+ raw_generate_json_string(buffer, data, key_to_s);
1156
1213
  } else {
1157
1214
  generate_json(buffer, data, key_to_s);
1158
1215
  }
@@ -1161,7 +1218,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1161
1218
  if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
1162
1219
  generate_json(buffer, data, val);
1163
1220
 
1164
- arg->iter++;
1165
1221
  return ST_CONTINUE;
1166
1222
  }
1167
1223
 
@@ -1170,14 +1226,13 @@ static inline long increase_depth(struct generate_json_data *data)
1170
1226
  JSON_Generator_State *state = data->state;
1171
1227
  long depth = ++state->depth;
1172
1228
  if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
1173
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1229
+ rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --state->depth);
1174
1230
  }
1175
1231
  return depth;
1176
1232
  }
1177
1233
 
1178
1234
  static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1179
1235
  {
1180
- int j;
1181
1236
  long depth = increase_depth(data);
1182
1237
 
1183
1238
  if (RHASH_SIZE(obj) == 0) {
@@ -1189,8 +1244,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1189
1244
  fbuffer_append_char(buffer, '{');
1190
1245
 
1191
1246
  struct hash_foreach_arg arg = {
1247
+ .hash = obj,
1192
1248
  .data = data,
1193
- .iter = 0,
1249
+ .first = true,
1194
1250
  };
1195
1251
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
1196
1252
 
@@ -1198,9 +1254,7 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1198
1254
  if (RB_UNLIKELY(data->state->object_nl)) {
1199
1255
  fbuffer_append_str(buffer, data->state->object_nl);
1200
1256
  if (RB_UNLIKELY(data->state->indent)) {
1201
- for (j = 0; j < depth; j++) {
1202
- fbuffer_append_str(buffer, data->state->indent);
1203
- }
1257
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1204
1258
  }
1205
1259
  }
1206
1260
  fbuffer_append_char(buffer, '}');
@@ -1208,7 +1262,6 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1208
1262
 
1209
1263
  static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1210
1264
  {
1211
- int i, j;
1212
1265
  long depth = increase_depth(data);
1213
1266
 
1214
1267
  if (RARRAY_LEN(obj) == 0) {
@@ -1219,15 +1272,13 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
1219
1272
 
1220
1273
  fbuffer_append_char(buffer, '[');
1221
1274
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1222
- for(i = 0; i < RARRAY_LEN(obj); i++) {
1275
+ for (int i = 0; i < RARRAY_LEN(obj); i++) {
1223
1276
  if (i > 0) {
1224
1277
  fbuffer_append_char(buffer, ',');
1225
1278
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1226
1279
  }
1227
1280
  if (RB_UNLIKELY(data->state->indent)) {
1228
- for (j = 0; j < depth; j++) {
1229
- fbuffer_append_str(buffer, data->state->indent);
1230
- }
1281
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1231
1282
  }
1232
1283
  generate_json(buffer, data, RARRAY_AREF(obj, i));
1233
1284
  }
@@ -1235,93 +1286,12 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
1235
1286
  if (RB_UNLIKELY(data->state->array_nl)) {
1236
1287
  fbuffer_append_str(buffer, data->state->array_nl);
1237
1288
  if (RB_UNLIKELY(data->state->indent)) {
1238
- for (j = 0; j < depth; j++) {
1239
- fbuffer_append_str(buffer, data->state->indent);
1240
- }
1289
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1241
1290
  }
1242
1291
  }
1243
1292
  fbuffer_append_char(buffer, ']');
1244
1293
  }
1245
1294
 
1246
- static inline int enc_utf8_compatible_p(int enc_idx)
1247
- {
1248
- if (enc_idx == usascii_encindex) return 1;
1249
- if (enc_idx == utf8_encindex) return 1;
1250
- return 0;
1251
- }
1252
-
1253
- static VALUE encode_json_string_try(VALUE str)
1254
- {
1255
- return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1256
- }
1257
-
1258
- static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1259
- {
1260
- raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1261
- return Qundef;
1262
- }
1263
-
1264
- static inline VALUE ensure_valid_encoding(VALUE str)
1265
- {
1266
- int encindex = RB_ENCODING_GET(str);
1267
- VALUE utf8_string;
1268
- if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
1269
- if (encindex == binary_encindex) {
1270
- utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1271
- switch (rb_enc_str_coderange(utf8_string)) {
1272
- case ENC_CODERANGE_7BIT:
1273
- return utf8_string;
1274
- case ENC_CODERANGE_VALID:
1275
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1276
- // TODO: Raise in 3.0.0
1277
- rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1278
- return utf8_string;
1279
- break;
1280
- }
1281
- }
1282
-
1283
- str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1284
- }
1285
- return str;
1286
- }
1287
-
1288
- static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1289
- {
1290
- obj = ensure_valid_encoding(obj);
1291
-
1292
- fbuffer_append_char(buffer, '"');
1293
-
1294
- long len;
1295
- search_state search;
1296
- search.buffer = buffer;
1297
- RSTRING_GETMEM(obj, search.ptr, len);
1298
- search.cursor = search.ptr;
1299
- search.end = search.ptr + len;
1300
-
1301
- #ifdef HAVE_SIMD
1302
- search.matches_mask = 0;
1303
- search.has_matches = false;
1304
- search.chunk_base = NULL;
1305
- #endif /* HAVE_SIMD */
1306
-
1307
- switch(rb_enc_str_coderange(obj)) {
1308
- case ENC_CODERANGE_7BIT:
1309
- case ENC_CODERANGE_VALID:
1310
- if (RB_UNLIKELY(data->state->ascii_only)) {
1311
- convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1312
- } else if (RB_UNLIKELY(data->state->script_safe)) {
1313
- convert_UTF8_to_script_safe_JSON(&search);
1314
- } else {
1315
- convert_UTF8_to_JSON(&search);
1316
- }
1317
- break;
1318
- default:
1319
- raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1320
- break;
1321
- }
1322
- fbuffer_append_char(buffer, '"');
1323
- }
1324
-
1325
1295
  static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1326
1296
  {
1327
1297
  VALUE tmp;
@@ -1389,7 +1359,7 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1389
1359
  /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
1390
1360
  if (!allow_nan) {
1391
1361
  if (data->state->strict && data->state->as_json) {
1392
- VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1362
+ VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
1393
1363
  if (casted_obj != obj) {
1394
1364
  increase_depth(data);
1395
1365
  generate_json(buffer, data, casted_obj);
@@ -1406,12 +1376,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1406
1376
  }
1407
1377
 
1408
1378
  /* This implementation writes directly into the buffer. We reserve
1409
- * the 28 characters that fpconv_dtoa states as its maximum.
1379
+ * the 32 characters that fpconv_dtoa states as its maximum.
1410
1380
  */
1411
- fbuffer_inc_capa(buffer, 28);
1381
+ fbuffer_inc_capa(buffer, 32);
1412
1382
  char* d = buffer->ptr + buffer->len;
1413
1383
  int len = fpconv_dtoa(value, d);
1414
-
1415
1384
  /* fpconv_dtoa converts a float to its shortest string representation,
1416
1385
  * but it adds a ".0" if this is a plain integer.
1417
1386
  */
@@ -1461,7 +1430,16 @@ start:
1461
1430
  break;
1462
1431
  case T_STRING:
1463
1432
  if (klass != rb_cString) goto general;
1464
- generate_json_string(buffer, data, obj);
1433
+
1434
+ if (RB_LIKELY(valid_json_string_p(obj))) {
1435
+ raw_generate_json_string(buffer, data, obj);
1436
+ } else if (as_json_called) {
1437
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1438
+ } else {
1439
+ obj = ensure_valid_encoding(data, obj, false, false);
1440
+ as_json_called = true;
1441
+ goto start;
1442
+ }
1465
1443
  break;
1466
1444
  case T_SYMBOL:
1467
1445
  generate_json_symbol(buffer, data, obj);
@@ -1478,7 +1456,7 @@ start:
1478
1456
  general:
1479
1457
  if (data->state->strict) {
1480
1458
  if (RTEST(data->state->as_json) && !as_json_called) {
1481
- obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1459
+ obj = json_call_as_json(data->state, obj, Qfalse);
1482
1460
  as_json_called = true;
1483
1461
  goto start;
1484
1462
  } else {
@@ -1497,16 +1475,14 @@ static VALUE generate_json_try(VALUE d)
1497
1475
 
1498
1476
  data->func(data->buffer, data, data->obj);
1499
1477
 
1500
- return Qnil;
1478
+ return fbuffer_finalize(data->buffer);
1501
1479
  }
1502
1480
 
1503
- static VALUE generate_json_rescue(VALUE d, VALUE exc)
1481
+ static VALUE generate_json_ensure(VALUE d)
1504
1482
  {
1505
1483
  struct generate_json_data *data = (struct generate_json_data *)d;
1506
1484
  fbuffer_free(data->buffer);
1507
1485
 
1508
- rb_exc_raise(exc);
1509
-
1510
1486
  return Qundef;
1511
1487
  }
1512
1488
 
@@ -1527,9 +1503,7 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
1527
1503
  .obj = obj,
1528
1504
  .func = func
1529
1505
  };
1530
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1531
-
1532
- return fbuffer_finalize(&buffer);
1506
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1533
1507
  }
1534
1508
 
1535
1509
  /* call-seq:
@@ -1545,10 +1519,37 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
1545
1519
  rb_check_arity(argc, 1, 2);
1546
1520
  VALUE obj = argv[0];
1547
1521
  VALUE io = argc > 1 ? argv[1] : Qnil;
1548
- VALUE result = cState_partial_generate(self, obj, generate_json, io);
1522
+ return cState_partial_generate(self, obj, generate_json, io);
1523
+ }
1524
+
1525
+ static VALUE cState_generate_new(int argc, VALUE *argv, VALUE self)
1526
+ {
1527
+ rb_check_arity(argc, 1, 2);
1528
+ VALUE obj = argv[0];
1529
+ VALUE io = argc > 1 ? argv[1] : Qnil;
1530
+
1549
1531
  GET_STATE(self);
1550
- (void)state;
1551
- return result;
1532
+
1533
+ JSON_Generator_State new_state;
1534
+ MEMCPY(&new_state, state, JSON_Generator_State, 1);
1535
+
1536
+ // FIXME: depth shouldn't be part of JSON_Generator_State, as that prevents it from being used concurrently.
1537
+ new_state.depth = 0;
1538
+
1539
+ char stack_buffer[FBUFFER_STACK_SIZE];
1540
+ FBuffer buffer = {
1541
+ .io = RTEST(io) ? io : Qfalse,
1542
+ };
1543
+ fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
1544
+
1545
+ struct generate_json_data data = {
1546
+ .buffer = &buffer,
1547
+ .vstate = Qfalse,
1548
+ .state = &new_state,
1549
+ .obj = obj,
1550
+ .func = generate_json
1551
+ };
1552
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1552
1553
  }
1553
1554
 
1554
1555
  static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
@@ -1899,6 +1900,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1899
1900
  return Qnil;
1900
1901
  }
1901
1902
 
1903
+ static VALUE cState_allow_duplicate_key_p(VALUE self)
1904
+ {
1905
+ GET_STATE(self);
1906
+ switch (state->on_duplicate_key) {
1907
+ case JSON_IGNORE:
1908
+ return Qtrue;
1909
+ case JSON_DEPRECATED:
1910
+ return Qnil;
1911
+ default:
1912
+ return Qfalse;
1913
+ }
1914
+ }
1915
+
1902
1916
  /*
1903
1917
  * call-seq: depth
1904
1918
  *
@@ -1956,15 +1970,30 @@ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_l
1956
1970
  return Qnil;
1957
1971
  }
1958
1972
 
1973
+ struct configure_state_data {
1974
+ JSON_Generator_State *state;
1975
+ VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
1976
+ };
1977
+
1978
+ static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
1979
+ {
1980
+ if (RTEST(data->vstate)) {
1981
+ RB_OBJ_WRITE(data->vstate, field, value);
1982
+ } else {
1983
+ *field = value;
1984
+ }
1985
+ }
1986
+
1959
1987
  static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1960
1988
  {
1961
- JSON_Generator_State *state = (JSON_Generator_State *)_arg;
1989
+ struct configure_state_data *data = (struct configure_state_data *)_arg;
1990
+ JSON_Generator_State *state = data->state;
1962
1991
 
1963
- if (key == sym_indent) { state->indent = string_config(val); }
1964
- else if (key == sym_space) { state->space = string_config(val); }
1965
- else if (key == sym_space_before) { state->space_before = string_config(val); }
1966
- else if (key == sym_object_nl) { state->object_nl = string_config(val); }
1967
- else if (key == sym_array_nl) { state->array_nl = string_config(val); }
1992
+ if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
1993
+ else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
1994
+ else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
1995
+ else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
1996
+ else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
1968
1997
  else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
1969
1998
  else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
1970
1999
  else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
@@ -1973,11 +2002,16 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1973
2002
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1974
2003
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1975
2004
  else if (key == sym_strict) { state->strict = RTEST(val); }
1976
- else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
2005
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
2006
+ else if (key == sym_as_json) {
2007
+ VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
2008
+ state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
2009
+ state_write_value(data, &state->as_json, proc);
2010
+ }
1977
2011
  return ST_CONTINUE;
1978
2012
  }
1979
2013
 
1980
- static void configure_state(JSON_Generator_State *state, VALUE config)
2014
+ static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
1981
2015
  {
1982
2016
  if (!RTEST(config)) return;
1983
2017
 
@@ -1985,15 +2019,20 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
1985
2019
 
1986
2020
  if (!RHASH_SIZE(config)) return;
1987
2021
 
2022
+ struct configure_state_data data = {
2023
+ .state = state,
2024
+ .vstate = vstate
2025
+ };
2026
+
1988
2027
  // We assume in most cases few keys are set so it's faster to go over
1989
2028
  // the provided keys than to check all possible keys.
1990
- rb_hash_foreach(config, configure_state_i, (VALUE)state);
2029
+ rb_hash_foreach(config, configure_state_i, (VALUE)&data);
1991
2030
  }
1992
2031
 
1993
2032
  static VALUE cState_configure(VALUE self, VALUE opts)
1994
2033
  {
1995
2034
  GET_STATE(self);
1996
- configure_state(state, opts);
2035
+ configure_state(state, self, opts);
1997
2036
  return self;
1998
2037
  }
1999
2038
 
@@ -2001,7 +2040,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2001
2040
  {
2002
2041
  JSON_Generator_State state = {0};
2003
2042
  state_init(&state);
2004
- configure_state(&state, opts);
2043
+ configure_state(&state, Qfalse, opts);
2005
2044
 
2006
2045
  char stack_buffer[FBUFFER_STACK_SIZE];
2007
2046
  FBuffer buffer = {
@@ -2016,9 +2055,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2016
2055
  .obj = obj,
2017
2056
  .func = generate_json,
2018
2057
  };
2019
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
2020
-
2021
- return fbuffer_finalize(&buffer);
2058
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
2022
2059
  }
2023
2060
 
2024
2061
  /*
@@ -2088,7 +2125,9 @@ void Init_generator(void)
2088
2125
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
2089
2126
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
2090
2127
  rb_define_method(cState, "generate", cState_generate, -1);
2091
- rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
2128
+ rb_define_method(cState, "generate_new", cState_generate_new, -1); // :nodoc:
2129
+
2130
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
2092
2131
 
2093
2132
  rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
2094
2133
 
@@ -2117,13 +2156,7 @@ void Init_generator(void)
2117
2156
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
2118
2157
 
2119
2158
  VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
2120
- rb_define_singleton_method(mString, "included", mString_included_s, 1);
2121
2159
  rb_define_method(mString, "to_json", mString_to_json, -1);
2122
- rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
2123
- rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
2124
-
2125
- mString_Extend = rb_define_module_under(mString, "Extend");
2126
- rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
2127
2160
 
2128
2161
  VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
2129
2162
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
@@ -2160,6 +2193,7 @@ void Init_generator(void)
2160
2193
  sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
2161
2194
  sym_strict = ID2SYM(rb_intern("strict"));
2162
2195
  sym_as_json = ID2SYM(rb_intern("as_json"));
2196
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
2163
2197
 
2164
2198
  usascii_encindex = rb_usascii_encindex();
2165
2199
  utf8_encindex = rb_utf8_encindex();
@@ -2168,7 +2202,7 @@ void Init_generator(void)
2168
2202
  rb_require("json/ext/generator/state");
2169
2203
 
2170
2204
 
2171
- switch(find_simd_implementation()) {
2205
+ switch (find_simd_implementation()) {
2172
2206
  #ifdef HAVE_SIMD
2173
2207
  #ifdef HAVE_SIMD_NEON
2174
2208
  case SIMD_NEON: