json 2.12.2 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,16 @@
5
5
  #include <math.h>
6
6
  #include <ctype.h>
7
7
 
8
- #include "simd.h"
8
+ #include "../simd/simd.h"
9
9
 
10
10
  /* ruby api and some helpers */
11
11
 
12
+ enum duplicate_key_action {
13
+ JSON_DEPRECATED = 0,
14
+ JSON_IGNORE,
15
+ JSON_RAISE,
16
+ };
17
+
12
18
  typedef struct JSON_Generator_StateStruct {
13
19
  VALUE indent;
14
20
  VALUE space;
@@ -21,6 +27,9 @@ typedef struct JSON_Generator_StateStruct {
21
27
  long depth;
22
28
  long buffer_initial_length;
23
29
 
30
+ enum duplicate_key_action on_duplicate_key;
31
+
32
+ bool as_json_single_arg;
24
33
  bool allow_nan;
25
34
  bool ascii_only;
26
35
  bool script_safe;
@@ -31,10 +40,10 @@ typedef struct JSON_Generator_StateStruct {
31
40
  #define RB_UNLIKELY(cond) (cond)
32
41
  #endif
33
42
 
34
- static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
43
+ static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
35
44
 
36
45
  static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37
- static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
46
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
38
47
  sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
39
48
 
40
49
 
@@ -137,8 +146,8 @@ static inline FORCE_INLINE void search_flush(search_state *search)
137
146
  {
138
147
  // Do not remove this conditional without profiling, specifically escape-heavy text.
139
148
  // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
140
- // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
141
- // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
149
+ // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
150
+ // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
142
151
  // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
143
152
  // nothing needs to be flushed, we can save a few memory references with this conditional.
144
153
  if (search->ptr > search->cursor) {
@@ -304,28 +313,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
304
313
  return 1;
305
314
  }
306
315
 
307
- // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308
- static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
309
- {
310
- const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
311
- const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
312
- return mask & 0x8888888888888888ull;
313
- }
314
-
315
- static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
316
- {
317
- uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
318
-
319
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321
- const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
322
-
323
- uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
324
- uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
325
-
326
- return neon_match_mask(needs_escape);
327
- }
328
-
329
316
  static inline unsigned char search_escape_basic_neon(search_state *search)
330
317
  {
331
318
  if (RB_UNLIKELY(search->has_matches)) {
@@ -333,7 +320,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
333
320
  if (search->matches_mask > 0) {
334
321
  return neon_next_match(search);
335
322
  } else {
336
- // neon_next_match will only advance search->ptr up to the last matching character.
323
+ // neon_next_match will only advance search->ptr up to the last matching character.
337
324
  // Skip over any characters in the last chunk that occur after the last match.
338
325
  search->has_matches = false;
339
326
  search->ptr = search->chunk_end;
@@ -342,67 +329,61 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
342
329
 
343
330
  /*
344
331
  * The code below implements an SIMD-based algorithm to determine if N bytes at a time
345
- * need to be escaped.
346
- *
332
+ * need to be escaped.
333
+ *
347
334
  * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
348
- *
335
+ *
349
336
  * The explanation will be limited to the first 8 bytes of the string for simplicity. However
350
337
  * the vector insructions may work on larger vectors.
351
- *
338
+ *
352
339
  * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
353
- *
354
- * lower_bound: [20 20 20 20 20 20 20 20]
355
- * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
356
- * dblquote: [22 22 22 22 22 22 22 22]
357
- *
358
- * Next we load the first chunk of the ptr:
340
+ *
341
+ * lower_bound: [20 20 20 20 20 20 20 20]
342
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
343
+ * dblquote: [22 22 22 22 22 22 22 22]
344
+ *
345
+ * Next we load the first chunk of the ptr:
359
346
  * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
360
- *
347
+ *
361
348
  * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
362
349
  * as no bytes are less than 32 (0x20):
363
350
  * [0 0 0 0 0 0 0 0]
364
- *
351
+ *
365
352
  * Next, we check if any byte in chunk is equal to a backslash:
366
353
  * [0 0 0 FF 0 0 0 0]
367
- *
354
+ *
368
355
  * Finally we check if any byte in chunk is equal to a double quote:
369
- * [FF 0 0 0 0 0 0 0]
370
- *
356
+ * [FF 0 0 0 0 0 0 0]
357
+ *
371
358
  * Now we have three vectors where each byte indicates if the corresponding byte in chunk
372
359
  * needs to be escaped. We combine these vectors with a series of logical OR instructions.
373
360
  * This is the needs_escape vector and it is equal to:
374
- * [FF 0 0 FF 0 0 0 0]
375
- *
361
+ * [FF 0 0 FF 0 0 0 0]
362
+ *
376
363
  * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
377
364
  * the values in the vector. This computes how many bytes need to be escaped within this chunk.
378
- *
365
+ *
379
366
  * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
380
367
  * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381
368
  * have at least one byte that needs to be escaped.
382
369
  */
383
- while (search->ptr + sizeof(uint8x16_t) <= search->end) {
384
- uint64_t mask = neon_rules_update(search->ptr);
385
370
 
386
- if (!mask) {
387
- search->ptr += sizeof(uint8x16_t);
388
- continue;
389
- }
390
- search->matches_mask = mask;
371
+ if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
391
372
  search->has_matches = true;
392
373
  search->chunk_base = search->ptr;
393
374
  search->chunk_end = search->ptr + sizeof(uint8x16_t);
394
375
  return neon_next_match(search);
395
376
  }
396
377
 
397
- // There are fewer than 16 bytes left.
378
+ // There are fewer than 16 bytes left.
398
379
  unsigned long remaining = (search->end - search->ptr);
399
380
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
400
381
  char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
401
382
 
402
- uint64_t mask = neon_rules_update(s);
383
+ uint64_t mask = compute_chunk_mask_neon(s);
403
384
 
404
385
  if (!mask) {
405
- // Nothing to escape, ensure search_flush doesn't do anything by setting
386
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
406
387
  // search->cursor to search->ptr.
407
388
  fbuffer_consumed(search->buffer, remaining);
408
389
  search->ptr = search->end;
@@ -428,11 +409,6 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
428
409
 
429
410
  #ifdef HAVE_SIMD_SSE2
430
411
 
431
- #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432
- #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
433
- #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434
- #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
435
-
436
412
  static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
437
413
  {
438
414
  int mask = search->matches_mask;
@@ -457,18 +433,6 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
457
433
  #define TARGET_SSE2
458
434
  #endif
459
435
 
460
- static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
461
- {
462
- __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
463
-
464
- // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465
- // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466
- __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
467
- __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
468
- __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
469
- return _mm_movemask_epi8(needs_escape);
470
- }
471
-
472
436
  static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
473
437
  {
474
438
  if (RB_UNLIKELY(search->has_matches)) {
@@ -476,7 +440,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
476
440
  if (search->matches_mask > 0) {
477
441
  return sse2_next_match(search);
478
442
  } else {
479
- // sse2_next_match will only advance search->ptr up to the last matching character.
443
+ // sse2_next_match will only advance search->ptr up to the last matching character.
480
444
  // Skip over any characters in the last chunk that occur after the last match.
481
445
  search->has_matches = false;
482
446
  if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
@@ -487,29 +451,22 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
487
451
  }
488
452
  }
489
453
 
490
- while (search->ptr + sizeof(__m128i) <= search->end) {
491
- int needs_escape_mask = sse2_update(search->ptr);
492
-
493
- if (needs_escape_mask == 0) {
494
- search->ptr += sizeof(__m128i);
495
- continue;
496
- }
497
-
454
+ if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
498
455
  search->has_matches = true;
499
- search->matches_mask = needs_escape_mask;
500
456
  search->chunk_base = search->ptr;
457
+ search->chunk_end = search->ptr + sizeof(__m128i);
501
458
  return sse2_next_match(search);
502
459
  }
503
460
 
504
- // There are fewer than 16 bytes left.
461
+ // There are fewer than 16 bytes left.
505
462
  unsigned long remaining = (search->end - search->ptr);
506
463
  if (remaining >= SIMD_MINIMUM_THRESHOLD) {
507
464
  char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
508
465
 
509
- int needs_escape_mask = sse2_update(s);
466
+ int needs_escape_mask = compute_chunk_mask_sse2(s);
510
467
 
511
468
  if (needs_escape_mask == 0) {
512
- // Nothing to escape, ensure search_flush doesn't do anything by setting
469
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
513
470
  // search->cursor to search->ptr.
514
471
  fbuffer_consumed(search->buffer, remaining);
515
472
  search->ptr = search->end;
@@ -638,7 +595,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
638
595
  return 0;
639
596
  }
640
597
 
641
- static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
598
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
599
+ {
642
600
  const unsigned char ch = (unsigned char)*search->ptr;
643
601
  switch (ch_len) {
644
602
  case 1: {
@@ -668,7 +626,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
668
626
 
669
627
  uint32_t wchar = 0;
670
628
 
671
- switch(ch_len) {
629
+ switch (ch_len) {
672
630
  case 2:
673
631
  wchar = ch & 0x1F;
674
632
  break;
@@ -828,7 +786,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
828
786
  * _state_ is a JSON::State object, that can also be used to configure the
829
787
  * produced JSON string output further.
830
788
  */
831
- static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
789
+ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
790
+ {
832
791
  rb_check_arity(argc, 0, 1);
833
792
  VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
834
793
  return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
@@ -885,17 +844,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
885
844
  return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
886
845
  }
887
846
 
888
- /*
889
- * call-seq: String.included(modul)
890
- *
891
- * Extends _modul_ with the String::Extend module.
892
- */
893
- static VALUE mString_included_s(VALUE self, VALUE modul) {
894
- VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
895
- rb_call_super(1, &modul);
896
- return result;
897
- }
898
-
899
847
  /*
900
848
  * call-seq: to_json(*)
901
849
  *
@@ -910,51 +858,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
910
858
  return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
911
859
  }
912
860
 
913
- /*
914
- * call-seq: to_json_raw_object()
915
- *
916
- * This method creates a raw object hash, that can be nested into
917
- * other data structures and will be generated as a raw string. This
918
- * method should be used, if you want to convert raw strings to JSON
919
- * instead of UTF-8 strings, e. g. binary data.
920
- */
921
- static VALUE mString_to_json_raw_object(VALUE self)
922
- {
923
- VALUE ary;
924
- VALUE result = rb_hash_new();
925
- rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
926
- ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
927
- rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
928
- return result;
929
- }
930
-
931
- /*
932
- * call-seq: to_json_raw(*args)
933
- *
934
- * This method creates a JSON text from the result of a call to
935
- * to_json_raw_object of this String.
936
- */
937
- static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
938
- {
939
- VALUE obj = mString_to_json_raw_object(self);
940
- Check_Type(obj, T_HASH);
941
- return mHash_to_json(argc, argv, obj);
942
- }
943
-
944
- /*
945
- * call-seq: json_create(o)
946
- *
947
- * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
948
- * key "raw"). The Ruby String can be created by this module method.
949
- */
950
- static VALUE mString_Extend_json_create(VALUE self, VALUE o)
951
- {
952
- VALUE ary;
953
- Check_Type(o, T_HASH);
954
- ary = rb_hash_aref(o, rb_str_new2("raw"));
955
- return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
956
- }
957
-
958
861
  /*
959
862
  * call-seq: to_json(*)
960
863
  *
@@ -1093,8 +996,11 @@ static inline VALUE vstate_get(struct generate_json_data *data)
1093
996
  }
1094
997
 
1095
998
  struct hash_foreach_arg {
999
+ VALUE hash;
1096
1000
  struct generate_json_data *data;
1097
- int iter;
1001
+ int first_key_type;
1002
+ bool first;
1003
+ bool mixed_keys_encountered;
1098
1004
  };
1099
1005
 
1100
1006
  static VALUE
@@ -1112,6 +1018,29 @@ convert_string_subclass(VALUE key)
1112
1018
  return key_to_s;
1113
1019
  }
1114
1020
 
1021
+ NOINLINE()
1022
+ static void
1023
+ json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
1024
+ {
1025
+ if (arg->mixed_keys_encountered) {
1026
+ return;
1027
+ }
1028
+ arg->mixed_keys_encountered = true;
1029
+
1030
+ JSON_Generator_State *state = arg->data->state;
1031
+ if (state->on_duplicate_key != JSON_IGNORE) {
1032
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
1033
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
1034
+ }
1035
+ }
1036
+
1037
+ static VALUE
1038
+ json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
1039
+ {
1040
+ VALUE proc_args[2] = {object, is_key};
1041
+ return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
1042
+ }
1043
+
1115
1044
  static int
1116
1045
  json_object_i(VALUE key, VALUE val, VALUE _arg)
1117
1046
  {
@@ -1122,21 +1051,33 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1122
1051
  JSON_Generator_State *state = data->state;
1123
1052
 
1124
1053
  long depth = state->depth;
1125
- int j;
1054
+ int key_type = rb_type(key);
1055
+
1056
+ if (arg->first) {
1057
+ arg->first = false;
1058
+ arg->first_key_type = key_type;
1059
+ }
1060
+ else {
1061
+ fbuffer_append_char(buffer, ',');
1062
+ }
1126
1063
 
1127
- if (arg->iter > 0) fbuffer_append_char(buffer, ',');
1128
1064
  if (RB_UNLIKELY(data->state->object_nl)) {
1129
1065
  fbuffer_append_str(buffer, data->state->object_nl);
1130
1066
  }
1131
1067
  if (RB_UNLIKELY(data->state->indent)) {
1132
- for (j = 0; j < depth; j++) {
1133
- fbuffer_append_str(buffer, data->state->indent);
1134
- }
1068
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1135
1069
  }
1136
1070
 
1137
1071
  VALUE key_to_s;
1138
- switch(rb_type(key)) {
1072
+ bool as_json_called = false;
1073
+
1074
+ start:
1075
+ switch (key_type) {
1139
1076
  case T_STRING:
1077
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
1078
+ json_inspect_hash_with_mixed_keys(arg);
1079
+ }
1080
+
1140
1081
  if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
1141
1082
  key_to_s = key;
1142
1083
  } else {
@@ -1144,9 +1085,23 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1144
1085
  }
1145
1086
  break;
1146
1087
  case T_SYMBOL:
1088
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
1089
+ json_inspect_hash_with_mixed_keys(arg);
1090
+ }
1091
+
1147
1092
  key_to_s = rb_sym2str(key);
1148
1093
  break;
1149
1094
  default:
1095
+ if (data->state->strict) {
1096
+ if (RTEST(data->state->as_json) && !as_json_called) {
1097
+ key = json_call_as_json(data->state, key, Qtrue);
1098
+ key_type = rb_type(key);
1099
+ as_json_called = true;
1100
+ goto start;
1101
+ } else {
1102
+ raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
1103
+ }
1104
+ }
1150
1105
  key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
1151
1106
  break;
1152
1107
  }
@@ -1161,7 +1116,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1161
1116
  if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
1162
1117
  generate_json(buffer, data, val);
1163
1118
 
1164
- arg->iter++;
1165
1119
  return ST_CONTINUE;
1166
1120
  }
1167
1121
 
@@ -1177,7 +1131,6 @@ static inline long increase_depth(struct generate_json_data *data)
1177
1131
 
1178
1132
  static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1179
1133
  {
1180
- int j;
1181
1134
  long depth = increase_depth(data);
1182
1135
 
1183
1136
  if (RHASH_SIZE(obj) == 0) {
@@ -1189,8 +1142,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1189
1142
  fbuffer_append_char(buffer, '{');
1190
1143
 
1191
1144
  struct hash_foreach_arg arg = {
1145
+ .hash = obj,
1192
1146
  .data = data,
1193
- .iter = 0,
1147
+ .first = true,
1194
1148
  };
1195
1149
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
1196
1150
 
@@ -1198,9 +1152,7 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1198
1152
  if (RB_UNLIKELY(data->state->object_nl)) {
1199
1153
  fbuffer_append_str(buffer, data->state->object_nl);
1200
1154
  if (RB_UNLIKELY(data->state->indent)) {
1201
- for (j = 0; j < depth; j++) {
1202
- fbuffer_append_str(buffer, data->state->indent);
1203
- }
1155
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1204
1156
  }
1205
1157
  }
1206
1158
  fbuffer_append_char(buffer, '}');
@@ -1208,7 +1160,6 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1208
1160
 
1209
1161
  static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1210
1162
  {
1211
- int i, j;
1212
1163
  long depth = increase_depth(data);
1213
1164
 
1214
1165
  if (RARRAY_LEN(obj) == 0) {
@@ -1219,15 +1170,13 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
1219
1170
 
1220
1171
  fbuffer_append_char(buffer, '[');
1221
1172
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1222
- for(i = 0; i < RARRAY_LEN(obj); i++) {
1173
+ for (int i = 0; i < RARRAY_LEN(obj); i++) {
1223
1174
  if (i > 0) {
1224
1175
  fbuffer_append_char(buffer, ',');
1225
1176
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1226
1177
  }
1227
1178
  if (RB_UNLIKELY(data->state->indent)) {
1228
- for (j = 0; j < depth; j++) {
1229
- fbuffer_append_str(buffer, data->state->indent);
1230
- }
1179
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1231
1180
  }
1232
1181
  generate_json(buffer, data, RARRAY_AREF(obj, i));
1233
1182
  }
@@ -1235,9 +1184,7 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
1235
1184
  if (RB_UNLIKELY(data->state->array_nl)) {
1236
1185
  fbuffer_append_str(buffer, data->state->array_nl);
1237
1186
  if (RB_UNLIKELY(data->state->indent)) {
1238
- for (j = 0; j < depth; j++) {
1239
- fbuffer_append_str(buffer, data->state->indent);
1240
- }
1187
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1241
1188
  }
1242
1189
  }
1243
1190
  fbuffer_append_char(buffer, ']');
@@ -1304,7 +1251,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
1304
1251
  search.chunk_base = NULL;
1305
1252
  #endif /* HAVE_SIMD */
1306
1253
 
1307
- switch(rb_enc_str_coderange(obj)) {
1254
+ switch (rb_enc_str_coderange(obj)) {
1308
1255
  case ENC_CODERANGE_7BIT:
1309
1256
  case ENC_CODERANGE_VALID:
1310
1257
  if (RB_UNLIKELY(data->state->ascii_only)) {
@@ -1389,7 +1336,7 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1389
1336
  /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
1390
1337
  if (!allow_nan) {
1391
1338
  if (data->state->strict && data->state->as_json) {
1392
- VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1339
+ VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
1393
1340
  if (casted_obj != obj) {
1394
1341
  increase_depth(data);
1395
1342
  generate_json(buffer, data, casted_obj);
@@ -1406,12 +1353,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1406
1353
  }
1407
1354
 
1408
1355
  /* This implementation writes directly into the buffer. We reserve
1409
- * the 28 characters that fpconv_dtoa states as its maximum.
1356
+ * the 32 characters that fpconv_dtoa states as its maximum.
1410
1357
  */
1411
- fbuffer_inc_capa(buffer, 28);
1358
+ fbuffer_inc_capa(buffer, 32);
1412
1359
  char* d = buffer->ptr + buffer->len;
1413
1360
  int len = fpconv_dtoa(value, d);
1414
-
1415
1361
  /* fpconv_dtoa converts a float to its shortest string representation,
1416
1362
  * but it adds a ".0" if this is a plain integer.
1417
1363
  */
@@ -1478,7 +1424,7 @@ start:
1478
1424
  general:
1479
1425
  if (data->state->strict) {
1480
1426
  if (RTEST(data->state->as_json) && !as_json_called) {
1481
- obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1427
+ obj = json_call_as_json(data->state, obj, Qfalse);
1482
1428
  as_json_called = true;
1483
1429
  goto start;
1484
1430
  } else {
@@ -1899,6 +1845,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1899
1845
  return Qnil;
1900
1846
  }
1901
1847
 
1848
+ static VALUE cState_allow_duplicate_key_p(VALUE self)
1849
+ {
1850
+ GET_STATE(self);
1851
+ switch (state->on_duplicate_key) {
1852
+ case JSON_IGNORE:
1853
+ return Qtrue;
1854
+ case JSON_DEPRECATED:
1855
+ return Qnil;
1856
+ default:
1857
+ return Qfalse;
1858
+ }
1859
+ }
1860
+
1902
1861
  /*
1903
1862
  * call-seq: depth
1904
1863
  *
@@ -1956,15 +1915,30 @@ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_l
1956
1915
  return Qnil;
1957
1916
  }
1958
1917
 
1918
+ struct configure_state_data {
1919
+ JSON_Generator_State *state;
1920
+ VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
1921
+ };
1922
+
1923
+ static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
1924
+ {
1925
+ if (RTEST(data->vstate)) {
1926
+ RB_OBJ_WRITE(data->vstate, field, value);
1927
+ } else {
1928
+ *field = value;
1929
+ }
1930
+ }
1931
+
1959
1932
  static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1960
1933
  {
1961
- JSON_Generator_State *state = (JSON_Generator_State *)_arg;
1934
+ struct configure_state_data *data = (struct configure_state_data *)_arg;
1935
+ JSON_Generator_State *state = data->state;
1962
1936
 
1963
- if (key == sym_indent) { state->indent = string_config(val); }
1964
- else if (key == sym_space) { state->space = string_config(val); }
1965
- else if (key == sym_space_before) { state->space_before = string_config(val); }
1966
- else if (key == sym_object_nl) { state->object_nl = string_config(val); }
1967
- else if (key == sym_array_nl) { state->array_nl = string_config(val); }
1937
+ if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
1938
+ else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
1939
+ else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
1940
+ else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
1941
+ else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
1968
1942
  else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
1969
1943
  else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
1970
1944
  else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
@@ -1973,11 +1947,16 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1973
1947
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1974
1948
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1975
1949
  else if (key == sym_strict) { state->strict = RTEST(val); }
1976
- else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
1950
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1951
+ else if (key == sym_as_json) {
1952
+ VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
1953
+ state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
1954
+ state_write_value(data, &state->as_json, proc);
1955
+ }
1977
1956
  return ST_CONTINUE;
1978
1957
  }
1979
1958
 
1980
- static void configure_state(JSON_Generator_State *state, VALUE config)
1959
+ static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
1981
1960
  {
1982
1961
  if (!RTEST(config)) return;
1983
1962
 
@@ -1985,15 +1964,20 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
1985
1964
 
1986
1965
  if (!RHASH_SIZE(config)) return;
1987
1966
 
1967
+ struct configure_state_data data = {
1968
+ .state = state,
1969
+ .vstate = vstate
1970
+ };
1971
+
1988
1972
  // We assume in most cases few keys are set so it's faster to go over
1989
1973
  // the provided keys than to check all possible keys.
1990
- rb_hash_foreach(config, configure_state_i, (VALUE)state);
1974
+ rb_hash_foreach(config, configure_state_i, (VALUE)&data);
1991
1975
  }
1992
1976
 
1993
1977
  static VALUE cState_configure(VALUE self, VALUE opts)
1994
1978
  {
1995
1979
  GET_STATE(self);
1996
- configure_state(state, opts);
1980
+ configure_state(state, self, opts);
1997
1981
  return self;
1998
1982
  }
1999
1983
 
@@ -2001,7 +1985,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2001
1985
  {
2002
1986
  JSON_Generator_State state = {0};
2003
1987
  state_init(&state);
2004
- configure_state(&state, opts);
1988
+ configure_state(&state, Qfalse, opts);
2005
1989
 
2006
1990
  char stack_buffer[FBUFFER_STACK_SIZE];
2007
1991
  FBuffer buffer = {
@@ -2090,6 +2074,8 @@ void Init_generator(void)
2090
2074
  rb_define_method(cState, "generate", cState_generate, -1);
2091
2075
  rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
2092
2076
 
2077
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
2078
+
2093
2079
  rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
2094
2080
 
2095
2081
  VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
@@ -2117,13 +2103,7 @@ void Init_generator(void)
2117
2103
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
2118
2104
 
2119
2105
  VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
2120
- rb_define_singleton_method(mString, "included", mString_included_s, 1);
2121
2106
  rb_define_method(mString, "to_json", mString_to_json, -1);
2122
- rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
2123
- rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
2124
-
2125
- mString_Extend = rb_define_module_under(mString, "Extend");
2126
- rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
2127
2107
 
2128
2108
  VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
2129
2109
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
@@ -2160,6 +2140,7 @@ void Init_generator(void)
2160
2140
  sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
2161
2141
  sym_strict = ID2SYM(rb_intern("strict"));
2162
2142
  sym_as_json = ID2SYM(rb_intern("as_json"));
2143
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
2163
2144
 
2164
2145
  usascii_encindex = rb_usascii_encindex();
2165
2146
  utf8_encindex = rb_utf8_encindex();
@@ -2168,7 +2149,7 @@ void Init_generator(void)
2168
2149
  rb_require("json/ext/generator/state");
2169
2150
 
2170
2151
 
2171
- switch(find_simd_implementation()) {
2152
+ switch (find_simd_implementation()) {
2172
2153
  #ifdef HAVE_SIMD
2173
2154
  #ifdef HAVE_SIMD_NEON
2174
2155
  case SIMD_NEON: