json 2.12.2 → 2.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,20 @@
1
- #include "ruby.h"
1
+ #include "../json.h"
2
2
  #include "../fbuffer/fbuffer.h"
3
3
  #include "../vendor/fpconv.c"
4
4
 
5
5
  #include <math.h>
6
6
  #include <ctype.h>
7
7
 
8
- #include "simd.h"
8
+ #include "../simd/simd.h"
9
9
 
10
10
  /* ruby api and some helpers */
11
11
 
12
+ enum duplicate_key_action {
13
+ JSON_DEPRECATED = 0,
14
+ JSON_IGNORE,
15
+ JSON_RAISE,
16
+ };
17
+
12
18
  typedef struct JSON_Generator_StateStruct {
13
19
  VALUE indent;
14
20
  VALUE space;
@@ -21,20 +27,19 @@ typedef struct JSON_Generator_StateStruct {
21
27
  long depth;
22
28
  long buffer_initial_length;
23
29
 
30
+ enum duplicate_key_action on_duplicate_key;
31
+
32
+ bool as_json_single_arg;
24
33
  bool allow_nan;
25
34
  bool ascii_only;
26
35
  bool script_safe;
27
36
  bool strict;
28
37
  } JSON_Generator_State;
29
38
 
30
- #ifndef RB_UNLIKELY
31
- #define RB_UNLIKELY(cond) (cond)
32
- #endif
33
-
34
- static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
39
+ static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
35
40
 
36
- static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37
- static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
41
+ static ID i_to_s, i_to_json, i_new, i_encode;
42
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
38
43
  sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
39
44
 
40
45
 
@@ -55,6 +60,7 @@ struct generate_json_data {
55
60
  JSON_Generator_State *state;
56
61
  VALUE obj;
57
62
  generator_func func;
63
+ long depth;
58
64
  };
59
65
 
60
66
  static VALUE cState_from_state_s(VALUE self, VALUE opts);
@@ -76,23 +82,18 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d
76
82
 
77
83
  static int usascii_encindex, utf8_encindex, binary_encindex;
78
84
 
79
- #ifdef RBIMPL_ATTR_NORETURN
80
- RBIMPL_ATTR_NORETURN()
81
- #endif
82
- static void raise_generator_error_str(VALUE invalid_object, VALUE str)
85
+ NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
83
86
  {
87
+ rb_enc_associate_index(str, utf8_encindex);
84
88
  VALUE exc = rb_exc_new_str(eGeneratorError, str);
85
89
  rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
86
90
  rb_exc_raise(exc);
87
91
  }
88
92
 
89
- #ifdef RBIMPL_ATTR_NORETURN
90
- RBIMPL_ATTR_NORETURN()
91
- #endif
92
93
  #ifdef RBIMPL_ATTR_FORMAT
93
94
  RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
94
95
  #endif
95
- static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
+ NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
97
  {
97
98
  va_list args;
98
99
  va_start(args, fmt);
@@ -127,18 +128,12 @@ typedef struct _search_state {
127
128
  #endif /* HAVE_SIMD */
128
129
  } search_state;
129
130
 
130
- #if (defined(__GNUC__ ) || defined(__clang__))
131
- #define FORCE_INLINE __attribute__((always_inline))
132
- #else
133
- #define FORCE_INLINE
134
- #endif
135
-
136
- static inline FORCE_INLINE void search_flush(search_state *search)
131
+ ALWAYS_INLINE(static) void search_flush(search_state *search)
137
132
  {
138
133
  // Do not remove this conditional without profiling, specifically escape-heavy text.
139
134
  // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
140
- // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
141
- // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
135
+ // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
136
+ // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
142
137
  // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
143
138
  // nothing needs to be flushed, we can save a few memory references with this conditional.
144
139
  if (search->ptr > search->cursor) {
@@ -176,7 +171,7 @@ static inline unsigned char search_escape_basic(search_state *search)
176
171
  return 0;
177
172
  }
178
173
 
179
- static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
174
+ ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search)
180
175
  {
181
176
  const unsigned char ch = (unsigned char)*search->ptr;
182
177
  switch (ch) {
@@ -263,7 +258,7 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
263
258
 
264
259
  #ifdef HAVE_SIMD
265
260
 
266
- static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
261
+ ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
267
262
  {
268
263
  // Flush the buffer so everything up until the last 'len' characters are unflushed.
269
264
  search_flush(search);
@@ -286,7 +281,7 @@ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsi
286
281
 
287
282
  #ifdef HAVE_SIMD_NEON
288
283
 
289
- static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
284
+ ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search)
290
285
  {
291
286
  uint64_t mask = search->matches_mask;
292
287
  uint32_t index = trailing_zeros64(mask) >> 2;
@@ -304,28 +299,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
304
299
  return 1;
305
300
  }
306
301
 
307
- // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308
- static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
309
- {
310
- const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
311
- const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
312
- return mask & 0x8888888888888888ull;
313
- }
314
-
315
- static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
316
- {
317
- uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
318
-
319
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321
- const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
322
-
323
- uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
324
- uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
325
-
326
- return neon_match_mask(needs_escape);
327
- }
328
-
329
302
  static inline unsigned char search_escape_basic_neon(search_state *search)
330
303
  {
331
304
  if (RB_UNLIKELY(search->has_matches)) {
@@ -333,7 +306,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
333
306
  if (search->matches_mask > 0) {
334
307
  return neon_next_match(search);
335
308
  } else {
336
- // neon_next_match will only advance search->ptr up to the last matching character.
309
+ // neon_next_match will only advance search->ptr up to the last matching character.
337
310
  // Skip over any characters in the last chunk that occur after the last match.
338
311
  search->has_matches = false;
339
312
  search->ptr = search->chunk_end;
@@ -342,67 +315,61 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
342
315
 
343
316
  /*
344
317
  * The code below implements an SIMD-based algorithm to determine if N bytes at a time
345
- * need to be escaped.
346
- *
318
+ * need to be escaped.
319
+ *
347
320
  * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
348
- *
321
+ *
349
322
  * The explanation will be limited to the first 8 bytes of the string for simplicity. However
350
323
  * the vector insructions may work on larger vectors.
351
- *
324
+ *
352
325
  * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
353
- *
354
- * lower_bound: [20 20 20 20 20 20 20 20]
355
- * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
356
- * dblquote: [22 22 22 22 22 22 22 22]
357
- *
358
- * Next we load the first chunk of the ptr:
326
+ *
327
+ * lower_bound: [20 20 20 20 20 20 20 20]
328
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
329
+ * dblquote: [22 22 22 22 22 22 22 22]
330
+ *
331
+ * Next we load the first chunk of the ptr:
359
332
  * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
360
- *
333
+ *
361
334
  * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
362
335
  * as no bytes are less than 32 (0x20):
363
336
  * [0 0 0 0 0 0 0 0]
364
- *
337
+ *
365
338
  * Next, we check if any byte in chunk is equal to a backslash:
366
339
  * [0 0 0 FF 0 0 0 0]
367
- *
340
+ *
368
341
  * Finally we check if any byte in chunk is equal to a double quote:
369
- * [FF 0 0 0 0 0 0 0]
370
- *
342
+ * [FF 0 0 0 0 0 0 0]
343
+ *
371
344
  * Now we have three vectors where each byte indicates if the corresponding byte in chunk
372
345
  * needs to be escaped. We combine these vectors with a series of logical OR instructions.
373
346
  * This is the needs_escape vector and it is equal to:
374
- * [FF 0 0 FF 0 0 0 0]
375
- *
347
+ * [FF 0 0 FF 0 0 0 0]
348
+ *
376
349
  * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
377
350
  * the values in the vector. This computes how many bytes need to be escaped within this chunk.
378
- *
351
+ *
379
352
  * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
380
353
  * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381
354
  * have at least one byte that needs to be escaped.
382
355
  */
383
- while (search->ptr + sizeof(uint8x16_t) <= search->end) {
384
- uint64_t mask = neon_rules_update(search->ptr);
385
356
 
386
- if (!mask) {
387
- search->ptr += sizeof(uint8x16_t);
388
- continue;
389
- }
390
- search->matches_mask = mask;
357
+ if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
391
358
  search->has_matches = true;
392
359
  search->chunk_base = search->ptr;
393
360
  search->chunk_end = search->ptr + sizeof(uint8x16_t);
394
361
  return neon_next_match(search);
395
362
  }
396
363
 
397
- // There are fewer than 16 bytes left.
364
+ // There are fewer than 16 bytes left.
398
365
  unsigned long remaining = (search->end - search->ptr);
399
366
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
400
367
  char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
401
368
 
402
- uint64_t mask = neon_rules_update(s);
369
+ uint64_t mask = compute_chunk_mask_neon(s);
403
370
 
404
371
  if (!mask) {
405
- // Nothing to escape, ensure search_flush doesn't do anything by setting
372
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
406
373
  // search->cursor to search->ptr.
407
374
  fbuffer_consumed(search->buffer, remaining);
408
375
  search->ptr = search->end;
@@ -428,12 +395,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
428
395
 
429
396
  #ifdef HAVE_SIMD_SSE2
430
397
 
431
- #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432
- #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
433
- #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434
- #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
435
-
436
- static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
398
+ ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search)
437
399
  {
438
400
  int mask = search->matches_mask;
439
401
  int index = trailing_zeros(mask);
@@ -457,26 +419,14 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
457
419
  #define TARGET_SSE2
458
420
  #endif
459
421
 
460
- static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
461
- {
462
- __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
463
-
464
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466
- __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
467
- __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
468
- __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
469
- return _mm_movemask_epi8(needs_escape);
470
- }
471
-
472
- static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
422
+ ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search)
473
423
  {
474
424
  if (RB_UNLIKELY(search->has_matches)) {
475
425
  // There are more matches if search->matches_mask > 0.
476
426
  if (search->matches_mask > 0) {
477
427
  return sse2_next_match(search);
478
428
  } else {
479
- // sse2_next_match will only advance search->ptr up to the last matching character.
429
+ // sse2_next_match will only advance search->ptr up to the last matching character.
480
430
  // Skip over any characters in the last chunk that occur after the last match.
481
431
  search->has_matches = false;
482
432
  if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
@@ -487,29 +437,22 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
487
437
  }
488
438
  }
489
439
 
490
- while (search->ptr + sizeof(__m128i) <= search->end) {
491
- int needs_escape_mask = sse2_update(search->ptr);
492
-
493
- if (needs_escape_mask == 0) {
494
- search->ptr += sizeof(__m128i);
495
- continue;
496
- }
497
-
440
+ if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
498
441
  search->has_matches = true;
499
- search->matches_mask = needs_escape_mask;
500
442
  search->chunk_base = search->ptr;
443
+ search->chunk_end = search->ptr + sizeof(__m128i);
501
444
  return sse2_next_match(search);
502
445
  }
503
446
 
504
- // There are fewer than 16 bytes left.
447
+ // There are fewer than 16 bytes left.
505
448
  unsigned long remaining = (search->end - search->ptr);
506
449
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
507
450
  char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
508
451
 
509
- int needs_escape_mask = sse2_update(s);
452
+ int needs_escape_mask = compute_chunk_mask_sse2(s);
510
453
 
511
454
  if (needs_escape_mask == 0) {
512
- // Nothing to escape, ensure search_flush doesn't do anything by setting
455
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
513
456
  // search->cursor to search->ptr.
514
457
  fbuffer_consumed(search->buffer, remaining);
515
458
  search->ptr = search->end;
@@ -638,7 +581,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
638
581
  return 0;
639
582
  }
640
583
 
641
- static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
584
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
585
+ {
642
586
  const unsigned char ch = (unsigned char)*search->ptr;
643
587
  switch (ch_len) {
644
588
  case 1: {
@@ -668,7 +612,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
668
612
 
669
613
  uint32_t wchar = 0;
670
614
 
671
- switch(ch_len) {
615
+ switch (ch_len) {
672
616
  case 2:
673
617
  wchar = ch & 0x1F;
674
618
  break;
@@ -828,7 +772,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
828
772
  * _state_ is a JSON::State object, that can also be used to configure the
829
773
  * produced JSON string output further.
830
774
  */
831
- static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
775
+ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
776
+ {
832
777
  rb_check_arity(argc, 0, 1);
833
778
  VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
834
779
  return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
@@ -885,17 +830,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
885
830
  return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
886
831
  }
887
832
 
888
- /*
889
- * call-seq: String.included(modul)
890
- *
891
- * Extends _modul_ with the String::Extend module.
892
- */
893
- static VALUE mString_included_s(VALUE self, VALUE modul) {
894
- VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
895
- rb_call_super(1, &modul);
896
- return result;
897
- }
898
-
899
833
  /*
900
834
  * call-seq: to_json(*)
901
835
  *
@@ -910,51 +844,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
910
844
  return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
911
845
  }
912
846
 
913
- /*
914
- * call-seq: to_json_raw_object()
915
- *
916
- * This method creates a raw object hash, that can be nested into
917
- * other data structures and will be generated as a raw string. This
918
- * method should be used, if you want to convert raw strings to JSON
919
- * instead of UTF-8 strings, e. g. binary data.
920
- */
921
- static VALUE mString_to_json_raw_object(VALUE self)
922
- {
923
- VALUE ary;
924
- VALUE result = rb_hash_new();
925
- rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
926
- ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
927
- rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
928
- return result;
929
- }
930
-
931
- /*
932
- * call-seq: to_json_raw(*args)
933
- *
934
- * This method creates a JSON text from the result of a call to
935
- * to_json_raw_object of this String.
936
- */
937
- static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
938
- {
939
- VALUE obj = mString_to_json_raw_object(self);
940
- Check_Type(obj, T_HASH);
941
- return mHash_to_json(argc, argv, obj);
942
- }
943
-
944
- /*
945
- * call-seq: json_create(o)
946
- *
947
- * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
948
- * key "raw"). The Ruby String can be created by this module method.
949
- */
950
- static VALUE mString_Extend_json_create(VALUE self, VALUE o)
951
- {
952
- VALUE ary;
953
- Check_Type(o, T_HASH);
954
- ary = rb_hash_aref(o, rb_str_new2("raw"));
955
- return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
956
- }
957
-
958
847
  /*
959
848
  * call-seq: to_json(*)
960
849
  *
@@ -1038,11 +927,6 @@ static size_t State_memsize(const void *ptr)
1038
927
  return sizeof(JSON_Generator_State);
1039
928
  }
1040
929
 
1041
- #ifndef HAVE_RB_EXT_RACTOR_SAFE
1042
- # undef RUBY_TYPED_FROZEN_SHAREABLE
1043
- # define RUBY_TYPED_FROZEN_SHAREABLE 0
1044
- #endif
1045
-
1046
930
  static const rb_data_type_t JSON_Generator_State_type = {
1047
931
  "JSON/Generator/State",
1048
932
  {
@@ -1084,18 +968,24 @@ static void vstate_spill(struct generate_json_data *data)
1084
968
  RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
1085
969
  }
1086
970
 
1087
- static inline VALUE vstate_get(struct generate_json_data *data)
971
+ static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj)
1088
972
  {
1089
973
  if (RB_UNLIKELY(!data->vstate)) {
1090
974
  vstate_spill(data);
1091
975
  }
1092
- return data->vstate;
976
+ GET_STATE(data->vstate);
977
+ state->depth = data->depth;
978
+ VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate);
979
+ // no need to restore state->depth, vstate is just a temporary State
980
+ return tmp;
1093
981
  }
1094
982
 
1095
- struct hash_foreach_arg {
1096
- struct generate_json_data *data;
1097
- int iter;
1098
- };
983
+ static VALUE
984
+ json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
985
+ {
986
+ VALUE proc_args[2] = {object, is_key};
987
+ return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
988
+ }
1099
989
 
1100
990
  static VALUE
1101
991
  convert_string_subclass(VALUE key)
@@ -1112,6 +1002,144 @@ convert_string_subclass(VALUE key)
1112
1002
  return key_to_s;
1113
1003
  }
1114
1004
 
1005
+ static bool enc_utf8_compatible_p(int enc_idx)
1006
+ {
1007
+ if (enc_idx == usascii_encindex) return true;
1008
+ if (enc_idx == utf8_encindex) return true;
1009
+ return false;
1010
+ }
1011
+
1012
+ static VALUE encode_json_string_try(VALUE str)
1013
+ {
1014
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1015
+ }
1016
+
1017
+ static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1018
+ {
1019
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1020
+ return Qundef;
1021
+ }
1022
+
1023
+ static inline bool valid_json_string_p(VALUE str)
1024
+ {
1025
+ int coderange = rb_enc_str_coderange(str);
1026
+
1027
+ if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
1028
+ return true;
1029
+ }
1030
+
1031
+ if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
1032
+ return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
1033
+ }
1034
+
1035
+ return false;
1036
+ }
1037
+
1038
+ static inline VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
1039
+ {
1040
+ if (RB_LIKELY(valid_json_string_p(str))) {
1041
+ return str;
1042
+ }
1043
+
1044
+ if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
1045
+ VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
1046
+ if (coerced_str != str) {
1047
+ if (RB_TYPE_P(coerced_str, T_STRING)) {
1048
+ if (!valid_json_string_p(coerced_str)) {
1049
+ raise_generator_error(str, "source sequence is illegal/malformed utf-8");
1050
+ }
1051
+ } else {
1052
+ // as_json could return another type than T_STRING
1053
+ if (is_key) {
1054
+ raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
1055
+ }
1056
+ }
1057
+
1058
+ return coerced_str;
1059
+ }
1060
+ }
1061
+
1062
+ if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
1063
+ VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1064
+ switch (rb_enc_str_coderange(utf8_string)) {
1065
+ case ENC_CODERANGE_7BIT:
1066
+ return utf8_string;
1067
+ case ENC_CODERANGE_VALID:
1068
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1069
+ // TODO: Raise in 3.0.0
1070
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1071
+ return utf8_string;
1072
+ break;
1073
+ }
1074
+ }
1075
+
1076
+ return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1077
+ }
1078
+
1079
+ static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1080
+ {
1081
+ fbuffer_append_char(buffer, '"');
1082
+
1083
+ long len;
1084
+ search_state search;
1085
+ search.buffer = buffer;
1086
+ RSTRING_GETMEM(obj, search.ptr, len);
1087
+ search.cursor = search.ptr;
1088
+ search.end = search.ptr + len;
1089
+
1090
+ #ifdef HAVE_SIMD
1091
+ search.matches_mask = 0;
1092
+ search.has_matches = false;
1093
+ search.chunk_base = NULL;
1094
+ #endif /* HAVE_SIMD */
1095
+
1096
+ switch (rb_enc_str_coderange(obj)) {
1097
+ case ENC_CODERANGE_7BIT:
1098
+ case ENC_CODERANGE_VALID:
1099
+ if (RB_UNLIKELY(data->state->ascii_only)) {
1100
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1101
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
1102
+ convert_UTF8_to_script_safe_JSON(&search);
1103
+ } else {
1104
+ convert_UTF8_to_JSON(&search);
1105
+ }
1106
+ break;
1107
+ default:
1108
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1109
+ break;
1110
+ }
1111
+ fbuffer_append_char(buffer, '"');
1112
+ }
1113
+
1114
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1115
+ {
1116
+ obj = ensure_valid_encoding(data, obj, false, false);
1117
+ raw_generate_json_string(buffer, data, obj);
1118
+ }
1119
+
1120
+ struct hash_foreach_arg {
1121
+ VALUE hash;
1122
+ struct generate_json_data *data;
1123
+ int first_key_type;
1124
+ bool first;
1125
+ bool mixed_keys_encountered;
1126
+ };
1127
+
1128
+ NOINLINE(static) void
1129
+ json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
1130
+ {
1131
+ if (arg->mixed_keys_encountered) {
1132
+ return;
1133
+ }
1134
+ arg->mixed_keys_encountered = true;
1135
+
1136
+ JSON_Generator_State *state = arg->data->state;
1137
+ if (state->on_duplicate_key != JSON_IGNORE) {
1138
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
1139
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
1140
+ }
1141
+ }
1142
+
1115
1143
  static int
1116
1144
  json_object_i(VALUE key, VALUE val, VALUE _arg)
1117
1145
  {
@@ -1121,22 +1149,34 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1121
1149
  FBuffer *buffer = data->buffer;
1122
1150
  JSON_Generator_State *state = data->state;
1123
1151
 
1124
- long depth = state->depth;
1125
- int j;
1152
+ long depth = data->depth;
1153
+ int key_type = rb_type(key);
1154
+
1155
+ if (arg->first) {
1156
+ arg->first = false;
1157
+ arg->first_key_type = key_type;
1158
+ }
1159
+ else {
1160
+ fbuffer_append_char(buffer, ',');
1161
+ }
1126
1162
 
1127
- if (arg->iter > 0) fbuffer_append_char(buffer, ',');
1128
1163
  if (RB_UNLIKELY(data->state->object_nl)) {
1129
1164
  fbuffer_append_str(buffer, data->state->object_nl);
1130
1165
  }
1131
1166
  if (RB_UNLIKELY(data->state->indent)) {
1132
- for (j = 0; j < depth; j++) {
1133
- fbuffer_append_str(buffer, data->state->indent);
1134
- }
1167
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1135
1168
  }
1136
1169
 
1137
1170
  VALUE key_to_s;
1138
- switch(rb_type(key)) {
1171
+ bool as_json_called = false;
1172
+
1173
+ start:
1174
+ switch (key_type) {
1139
1175
  case T_STRING:
1176
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
1177
+ json_inspect_hash_with_mixed_keys(arg);
1178
+ }
1179
+
1140
1180
  if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
1141
1181
  key_to_s = key;
1142
1182
  } else {
@@ -1144,15 +1184,31 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1144
1184
  }
1145
1185
  break;
1146
1186
  case T_SYMBOL:
1187
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
1188
+ json_inspect_hash_with_mixed_keys(arg);
1189
+ }
1190
+
1147
1191
  key_to_s = rb_sym2str(key);
1148
1192
  break;
1149
1193
  default:
1194
+ if (data->state->strict) {
1195
+ if (RTEST(data->state->as_json) && !as_json_called) {
1196
+ key = json_call_as_json(data->state, key, Qtrue);
1197
+ key_type = rb_type(key);
1198
+ as_json_called = true;
1199
+ goto start;
1200
+ } else {
1201
+ raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
1202
+ }
1203
+ }
1150
1204
  key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
1151
1205
  break;
1152
1206
  }
1153
1207
 
1208
+ key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
1209
+
1154
1210
  if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
1155
- generate_json_string(buffer, data, key_to_s);
1211
+ raw_generate_json_string(buffer, data, key_to_s);
1156
1212
  } else {
1157
1213
  generate_json(buffer, data, key_to_s);
1158
1214
  }
@@ -1161,46 +1217,43 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1161
1217
  if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
1162
1218
  generate_json(buffer, data, val);
1163
1219
 
1164
- arg->iter++;
1165
1220
  return ST_CONTINUE;
1166
1221
  }
1167
1222
 
1168
1223
  static inline long increase_depth(struct generate_json_data *data)
1169
1224
  {
1170
1225
  JSON_Generator_State *state = data->state;
1171
- long depth = ++state->depth;
1226
+ long depth = ++data->depth;
1172
1227
  if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
1173
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1228
+ rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth);
1174
1229
  }
1175
1230
  return depth;
1176
1231
  }
1177
1232
 
1178
1233
  static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1179
1234
  {
1180
- int j;
1181
1235
  long depth = increase_depth(data);
1182
1236
 
1183
1237
  if (RHASH_SIZE(obj) == 0) {
1184
1238
  fbuffer_append(buffer, "{}", 2);
1185
- --data->state->depth;
1239
+ --data->depth;
1186
1240
  return;
1187
1241
  }
1188
1242
 
1189
1243
  fbuffer_append_char(buffer, '{');
1190
1244
 
1191
1245
  struct hash_foreach_arg arg = {
1246
+ .hash = obj,
1192
1247
  .data = data,
1193
- .iter = 0,
1248
+ .first = true,
1194
1249
  };
1195
1250
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
1196
1251
 
1197
- depth = --data->state->depth;
1252
+ depth = --data->depth;
1198
1253
  if (RB_UNLIKELY(data->state->object_nl)) {
1199
1254
  fbuffer_append_str(buffer, data->state->object_nl);
1200
1255
  if (RB_UNLIKELY(data->state->indent)) {
1201
- for (j = 0; j < depth; j++) {
1202
- fbuffer_append_str(buffer, data->state->indent);
1203
- }
1256
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1204
1257
  }
1205
1258
  }
1206
1259
  fbuffer_append_char(buffer, '}');
@@ -1208,125 +1261,41 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1208
1261
 
1209
1262
  static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1210
1263
  {
1211
- int i, j;
1212
1264
  long depth = increase_depth(data);
1213
1265
 
1214
1266
  if (RARRAY_LEN(obj) == 0) {
1215
1267
  fbuffer_append(buffer, "[]", 2);
1216
- --data->state->depth;
1268
+ --data->depth;
1217
1269
  return;
1218
1270
  }
1219
1271
 
1220
1272
  fbuffer_append_char(buffer, '[');
1221
1273
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1222
- for(i = 0; i < RARRAY_LEN(obj); i++) {
1274
+ for (int i = 0; i < RARRAY_LEN(obj); i++) {
1223
1275
  if (i > 0) {
1224
1276
  fbuffer_append_char(buffer, ',');
1225
1277
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1226
1278
  }
1227
1279
  if (RB_UNLIKELY(data->state->indent)) {
1228
- for (j = 0; j < depth; j++) {
1229
- fbuffer_append_str(buffer, data->state->indent);
1230
- }
1280
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1231
1281
  }
1232
1282
  generate_json(buffer, data, RARRAY_AREF(obj, i));
1233
1283
  }
1234
- data->state->depth = --depth;
1284
+ data->depth = --depth;
1235
1285
  if (RB_UNLIKELY(data->state->array_nl)) {
1236
1286
  fbuffer_append_str(buffer, data->state->array_nl);
1237
1287
  if (RB_UNLIKELY(data->state->indent)) {
1238
- for (j = 0; j < depth; j++) {
1239
- fbuffer_append_str(buffer, data->state->indent);
1240
- }
1288
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1241
1289
  }
1242
1290
  }
1243
1291
  fbuffer_append_char(buffer, ']');
1244
1292
  }
1245
1293
 
1246
- static inline int enc_utf8_compatible_p(int enc_idx)
1247
- {
1248
- if (enc_idx == usascii_encindex) return 1;
1249
- if (enc_idx == utf8_encindex) return 1;
1250
- return 0;
1251
- }
1252
-
1253
- static VALUE encode_json_string_try(VALUE str)
1254
- {
1255
- return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1256
- }
1257
-
1258
- static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1259
- {
1260
- raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1261
- return Qundef;
1262
- }
1263
-
1264
- static inline VALUE ensure_valid_encoding(VALUE str)
1265
- {
1266
- int encindex = RB_ENCODING_GET(str);
1267
- VALUE utf8_string;
1268
- if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
1269
- if (encindex == binary_encindex) {
1270
- utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1271
- switch (rb_enc_str_coderange(utf8_string)) {
1272
- case ENC_CODERANGE_7BIT:
1273
- return utf8_string;
1274
- case ENC_CODERANGE_VALID:
1275
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1276
- // TODO: Raise in 3.0.0
1277
- rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1278
- return utf8_string;
1279
- break;
1280
- }
1281
- }
1282
-
1283
- str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1284
- }
1285
- return str;
1286
- }
1287
-
1288
- static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1289
- {
1290
- obj = ensure_valid_encoding(obj);
1291
-
1292
- fbuffer_append_char(buffer, '"');
1293
-
1294
- long len;
1295
- search_state search;
1296
- search.buffer = buffer;
1297
- RSTRING_GETMEM(obj, search.ptr, len);
1298
- search.cursor = search.ptr;
1299
- search.end = search.ptr + len;
1300
-
1301
- #ifdef HAVE_SIMD
1302
- search.matches_mask = 0;
1303
- search.has_matches = false;
1304
- search.chunk_base = NULL;
1305
- #endif /* HAVE_SIMD */
1306
-
1307
- switch(rb_enc_str_coderange(obj)) {
1308
- case ENC_CODERANGE_7BIT:
1309
- case ENC_CODERANGE_VALID:
1310
- if (RB_UNLIKELY(data->state->ascii_only)) {
1311
- convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1312
- } else if (RB_UNLIKELY(data->state->script_safe)) {
1313
- convert_UTF8_to_script_safe_JSON(&search);
1314
- } else {
1315
- convert_UTF8_to_JSON(&search);
1316
- }
1317
- break;
1318
- default:
1319
- raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1320
- break;
1321
- }
1322
- fbuffer_append_char(buffer, '"');
1323
- }
1324
-
1325
1294
  static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1326
1295
  {
1327
1296
  VALUE tmp;
1328
1297
  if (rb_respond_to(obj, i_to_json)) {
1329
- tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
1298
+ tmp = json_call_to_json(data, obj);
1330
1299
  Check_Type(tmp, T_STRING);
1331
1300
  fbuffer_append_str(buffer, tmp);
1332
1301
  } else {
@@ -1389,11 +1358,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1389
1358
  /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
1390
1359
  if (!allow_nan) {
1391
1360
  if (data->state->strict && data->state->as_json) {
1392
- VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1361
+ VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
1393
1362
  if (casted_obj != obj) {
1394
1363
  increase_depth(data);
1395
1364
  generate_json(buffer, data, casted_obj);
1396
- data->state->depth--;
1365
+ data->depth--;
1397
1366
  return;
1398
1367
  }
1399
1368
  }
@@ -1406,12 +1375,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1406
1375
  }
1407
1376
 
1408
1377
  /* This implementation writes directly into the buffer. We reserve
1409
- * the 28 characters that fpconv_dtoa states as its maximum.
1378
+ * the 32 characters that fpconv_dtoa states as its maximum.
1410
1379
  */
1411
- fbuffer_inc_capa(buffer, 28);
1380
+ fbuffer_inc_capa(buffer, 32);
1412
1381
  char* d = buffer->ptr + buffer->len;
1413
1382
  int len = fpconv_dtoa(value, d);
1414
-
1415
1383
  /* fpconv_dtoa converts a float to its shortest string representation,
1416
1384
  * but it adds a ".0" if this is a plain integer.
1417
1385
  */
@@ -1461,7 +1429,16 @@ start:
1461
1429
  break;
1462
1430
  case T_STRING:
1463
1431
  if (klass != rb_cString) goto general;
1464
- generate_json_string(buffer, data, obj);
1432
+
1433
+ if (RB_LIKELY(valid_json_string_p(obj))) {
1434
+ raw_generate_json_string(buffer, data, obj);
1435
+ } else if (as_json_called) {
1436
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1437
+ } else {
1438
+ obj = ensure_valid_encoding(data, obj, false, false);
1439
+ as_json_called = true;
1440
+ goto start;
1441
+ }
1465
1442
  break;
1466
1443
  case T_SYMBOL:
1467
1444
  generate_json_symbol(buffer, data, obj);
@@ -1478,7 +1455,7 @@ start:
1478
1455
  general:
1479
1456
  if (data->state->strict) {
1480
1457
  if (RTEST(data->state->as_json) && !as_json_called) {
1481
- obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1458
+ obj = json_call_as_json(data->state, obj, Qfalse);
1482
1459
  as_json_called = true;
1483
1460
  goto start;
1484
1461
  } else {
@@ -1497,16 +1474,14 @@ static VALUE generate_json_try(VALUE d)
1497
1474
 
1498
1475
  data->func(data->buffer, data, data->obj);
1499
1476
 
1500
- return Qnil;
1477
+ return fbuffer_finalize(data->buffer);
1501
1478
  }
1502
1479
 
1503
- static VALUE generate_json_rescue(VALUE d, VALUE exc)
1480
+ static VALUE generate_json_ensure(VALUE d)
1504
1481
  {
1505
1482
  struct generate_json_data *data = (struct generate_json_data *)d;
1506
1483
  fbuffer_free(data->buffer);
1507
1484
 
1508
- rb_exc_raise(exc);
1509
-
1510
1485
  return Qundef;
1511
1486
  }
1512
1487
 
@@ -1522,14 +1497,13 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
1522
1497
 
1523
1498
  struct generate_json_data data = {
1524
1499
  .buffer = &buffer,
1525
- .vstate = self,
1500
+ .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json
1526
1501
  .state = state,
1502
+ .depth = state->depth,
1527
1503
  .obj = obj,
1528
1504
  .func = func
1529
1505
  };
1530
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1531
-
1532
- return fbuffer_finalize(&buffer);
1506
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1533
1507
  }
1534
1508
 
1535
1509
  /* call-seq:
@@ -1545,10 +1519,7 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
1545
1519
  rb_check_arity(argc, 1, 2);
1546
1520
  VALUE obj = argv[0];
1547
1521
  VALUE io = argc > 1 ? argv[1] : Qnil;
1548
- VALUE result = cState_partial_generate(self, obj, generate_json, io);
1549
- GET_STATE(self);
1550
- (void)state;
1551
- return result;
1522
+ return cState_partial_generate(self, obj, generate_json, io);
1552
1523
  }
1553
1524
 
1554
1525
  static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
@@ -1629,6 +1600,7 @@ static VALUE string_config(VALUE config)
1629
1600
  */
1630
1601
  static VALUE cState_indent_set(VALUE self, VALUE indent)
1631
1602
  {
1603
+ rb_check_frozen(self);
1632
1604
  GET_STATE(self);
1633
1605
  RB_OBJ_WRITE(self, &state->indent, string_config(indent));
1634
1606
  return Qnil;
@@ -1654,6 +1626,7 @@ static VALUE cState_space(VALUE self)
1654
1626
  */
1655
1627
  static VALUE cState_space_set(VALUE self, VALUE space)
1656
1628
  {
1629
+ rb_check_frozen(self);
1657
1630
  GET_STATE(self);
1658
1631
  RB_OBJ_WRITE(self, &state->space, string_config(space));
1659
1632
  return Qnil;
@@ -1677,6 +1650,7 @@ static VALUE cState_space_before(VALUE self)
1677
1650
  */
1678
1651
  static VALUE cState_space_before_set(VALUE self, VALUE space_before)
1679
1652
  {
1653
+ rb_check_frozen(self);
1680
1654
  GET_STATE(self);
1681
1655
  RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
1682
1656
  return Qnil;
@@ -1702,6 +1676,7 @@ static VALUE cState_object_nl(VALUE self)
1702
1676
  */
1703
1677
  static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
1704
1678
  {
1679
+ rb_check_frozen(self);
1705
1680
  GET_STATE(self);
1706
1681
  RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
1707
1682
  return Qnil;
@@ -1725,6 +1700,7 @@ static VALUE cState_array_nl(VALUE self)
1725
1700
  */
1726
1701
  static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
1727
1702
  {
1703
+ rb_check_frozen(self);
1728
1704
  GET_STATE(self);
1729
1705
  RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
1730
1706
  return Qnil;
@@ -1748,6 +1724,7 @@ static VALUE cState_as_json(VALUE self)
1748
1724
  */
1749
1725
  static VALUE cState_as_json_set(VALUE self, VALUE as_json)
1750
1726
  {
1727
+ rb_check_frozen(self);
1751
1728
  GET_STATE(self);
1752
1729
  RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
1753
1730
  return Qnil;
@@ -1790,6 +1767,7 @@ static long long_config(VALUE num)
1790
1767
  */
1791
1768
  static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1792
1769
  {
1770
+ rb_check_frozen(self);
1793
1771
  GET_STATE(self);
1794
1772
  state->max_nesting = long_config(depth);
1795
1773
  return Qnil;
@@ -1815,6 +1793,7 @@ static VALUE cState_script_safe(VALUE self)
1815
1793
  */
1816
1794
  static VALUE cState_script_safe_set(VALUE self, VALUE enable)
1817
1795
  {
1796
+ rb_check_frozen(self);
1818
1797
  GET_STATE(self);
1819
1798
  state->script_safe = RTEST(enable);
1820
1799
  return Qnil;
@@ -1846,6 +1825,7 @@ static VALUE cState_strict(VALUE self)
1846
1825
  */
1847
1826
  static VALUE cState_strict_set(VALUE self, VALUE enable)
1848
1827
  {
1828
+ rb_check_frozen(self);
1849
1829
  GET_STATE(self);
1850
1830
  state->strict = RTEST(enable);
1851
1831
  return Qnil;
@@ -1870,6 +1850,7 @@ static VALUE cState_allow_nan_p(VALUE self)
1870
1850
  */
1871
1851
  static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
1872
1852
  {
1853
+ rb_check_frozen(self);
1873
1854
  GET_STATE(self);
1874
1855
  state->allow_nan = RTEST(enable);
1875
1856
  return Qnil;
@@ -1894,11 +1875,25 @@ static VALUE cState_ascii_only_p(VALUE self)
1894
1875
  */
1895
1876
  static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1896
1877
  {
1878
+ rb_check_frozen(self);
1897
1879
  GET_STATE(self);
1898
1880
  state->ascii_only = RTEST(enable);
1899
1881
  return Qnil;
1900
1882
  }
1901
1883
 
1884
+ static VALUE cState_allow_duplicate_key_p(VALUE self)
1885
+ {
1886
+ GET_STATE(self);
1887
+ switch (state->on_duplicate_key) {
1888
+ case JSON_IGNORE:
1889
+ return Qtrue;
1890
+ case JSON_DEPRECATED:
1891
+ return Qnil;
1892
+ default:
1893
+ return Qfalse;
1894
+ }
1895
+ }
1896
+
1902
1897
  /*
1903
1898
  * call-seq: depth
1904
1899
  *
@@ -1918,6 +1913,7 @@ static VALUE cState_depth(VALUE self)
1918
1913
  */
1919
1914
  static VALUE cState_depth_set(VALUE self, VALUE depth)
1920
1915
  {
1916
+ rb_check_frozen(self);
1921
1917
  GET_STATE(self);
1922
1918
  state->depth = long_config(depth);
1923
1919
  return Qnil;
@@ -1951,20 +1947,36 @@ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_
1951
1947
  */
1952
1948
  static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
1953
1949
  {
1950
+ rb_check_frozen(self);
1954
1951
  GET_STATE(self);
1955
1952
  buffer_initial_length_set(state, buffer_initial_length);
1956
1953
  return Qnil;
1957
1954
  }
1958
1955
 
1956
+ struct configure_state_data {
1957
+ JSON_Generator_State *state;
1958
+ VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
1959
+ };
1960
+
1961
+ static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
1962
+ {
1963
+ if (RTEST(data->vstate)) {
1964
+ RB_OBJ_WRITE(data->vstate, field, value);
1965
+ } else {
1966
+ *field = value;
1967
+ }
1968
+ }
1969
+
1959
1970
  static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1960
1971
  {
1961
- JSON_Generator_State *state = (JSON_Generator_State *)_arg;
1972
+ struct configure_state_data *data = (struct configure_state_data *)_arg;
1973
+ JSON_Generator_State *state = data->state;
1962
1974
 
1963
- if (key == sym_indent) { state->indent = string_config(val); }
1964
- else if (key == sym_space) { state->space = string_config(val); }
1965
- else if (key == sym_space_before) { state->space_before = string_config(val); }
1966
- else if (key == sym_object_nl) { state->object_nl = string_config(val); }
1967
- else if (key == sym_array_nl) { state->array_nl = string_config(val); }
1975
+ if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
1976
+ else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
1977
+ else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
1978
+ else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
1979
+ else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
1968
1980
  else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
1969
1981
  else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
1970
1982
  else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
@@ -1973,11 +1985,16 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1973
1985
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1974
1986
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1975
1987
  else if (key == sym_strict) { state->strict = RTEST(val); }
1976
- else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
1988
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1989
+ else if (key == sym_as_json) {
1990
+ VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
1991
+ state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
1992
+ state_write_value(data, &state->as_json, proc);
1993
+ }
1977
1994
  return ST_CONTINUE;
1978
1995
  }
1979
1996
 
1980
- static void configure_state(JSON_Generator_State *state, VALUE config)
1997
+ static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
1981
1998
  {
1982
1999
  if (!RTEST(config)) return;
1983
2000
 
@@ -1985,15 +2002,21 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
1985
2002
 
1986
2003
  if (!RHASH_SIZE(config)) return;
1987
2004
 
2005
+ struct configure_state_data data = {
2006
+ .state = state,
2007
+ .vstate = vstate
2008
+ };
2009
+
1988
2010
  // We assume in most cases few keys are set so it's faster to go over
1989
2011
  // the provided keys than to check all possible keys.
1990
- rb_hash_foreach(config, configure_state_i, (VALUE)state);
2012
+ rb_hash_foreach(config, configure_state_i, (VALUE)&data);
1991
2013
  }
1992
2014
 
1993
2015
  static VALUE cState_configure(VALUE self, VALUE opts)
1994
2016
  {
2017
+ rb_check_frozen(self);
1995
2018
  GET_STATE(self);
1996
- configure_state(state, opts);
2019
+ configure_state(state, self, opts);
1997
2020
  return self;
1998
2021
  }
1999
2022
 
@@ -2001,7 +2024,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2001
2024
  {
2002
2025
  JSON_Generator_State state = {0};
2003
2026
  state_init(&state);
2004
- configure_state(&state, opts);
2027
+ configure_state(&state, Qfalse, opts);
2005
2028
 
2006
2029
  char stack_buffer[FBUFFER_STACK_SIZE];
2007
2030
  FBuffer buffer = {
@@ -2013,12 +2036,11 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2013
2036
  .buffer = &buffer,
2014
2037
  .vstate = Qfalse,
2015
2038
  .state = &state,
2039
+ .depth = state.depth,
2016
2040
  .obj = obj,
2017
2041
  .func = generate_json,
2018
2042
  };
2019
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
2020
-
2021
- return fbuffer_finalize(&buffer);
2043
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
2022
2044
  }
2023
2045
 
2024
2046
  /*
@@ -2088,7 +2110,8 @@ void Init_generator(void)
2088
2110
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
2089
2111
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
2090
2112
  rb_define_method(cState, "generate", cState_generate, -1);
2091
- rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
2113
+
2114
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
2092
2115
 
2093
2116
  rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
2094
2117
 
@@ -2117,13 +2140,7 @@ void Init_generator(void)
2117
2140
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
2118
2141
 
2119
2142
  VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
2120
- rb_define_singleton_method(mString, "included", mString_included_s, 1);
2121
2143
  rb_define_method(mString, "to_json", mString_to_json, -1);
2122
- rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
2123
- rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
2124
-
2125
- mString_Extend = rb_define_module_under(mString, "Extend");
2126
- rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
2127
2144
 
2128
2145
  VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
2129
2146
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
@@ -2140,10 +2157,6 @@ void Init_generator(void)
2140
2157
  i_to_s = rb_intern("to_s");
2141
2158
  i_to_json = rb_intern("to_json");
2142
2159
  i_new = rb_intern("new");
2143
- i_pack = rb_intern("pack");
2144
- i_unpack = rb_intern("unpack");
2145
- i_create_id = rb_intern("create_id");
2146
- i_extend = rb_intern("extend");
2147
2160
  i_encode = rb_intern("encode");
2148
2161
 
2149
2162
  sym_indent = ID2SYM(rb_intern("indent"));
@@ -2160,6 +2173,7 @@ void Init_generator(void)
2160
2173
  sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
2161
2174
  sym_strict = ID2SYM(rb_intern("strict"));
2162
2175
  sym_as_json = ID2SYM(rb_intern("as_json"));
2176
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
2163
2177
 
2164
2178
  usascii_encindex = rb_usascii_encindex();
2165
2179
  utf8_encindex = rb_utf8_encindex();
@@ -2168,7 +2182,7 @@ void Init_generator(void)
2168
2182
  rb_require("json/ext/generator/state");
2169
2183
 
2170
2184
 
2171
- switch(find_simd_implementation()) {
2185
+ switch (find_simd_implementation()) {
2172
2186
  #ifdef HAVE_SIMD
2173
2187
  #ifdef HAVE_SIMD_NEON
2174
2188
  case SIMD_NEON: