json 2.13.2 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- #include "ruby.h"
1
+ #include "../json.h"
2
2
  #include "../fbuffer/fbuffer.h"
3
3
  #include "../vendor/fpconv.c"
4
4
 
@@ -9,6 +9,12 @@
9
9
 
10
10
  /* ruby api and some helpers */
11
11
 
12
+ enum duplicate_key_action {
13
+ JSON_DEPRECATED = 0,
14
+ JSON_IGNORE,
15
+ JSON_RAISE,
16
+ };
17
+
12
18
  typedef struct JSON_Generator_StateStruct {
13
19
  VALUE indent;
14
20
  VALUE space;
@@ -21,20 +27,19 @@ typedef struct JSON_Generator_StateStruct {
21
27
  long depth;
22
28
  long buffer_initial_length;
23
29
 
30
+ enum duplicate_key_action on_duplicate_key;
31
+
32
+ bool as_json_single_arg;
24
33
  bool allow_nan;
25
34
  bool ascii_only;
26
35
  bool script_safe;
27
36
  bool strict;
28
37
  } JSON_Generator_State;
29
38
 
30
- #ifndef RB_UNLIKELY
31
- #define RB_UNLIKELY(cond) (cond)
32
- #endif
33
-
34
- static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
39
+ static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
35
40
 
36
41
  static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37
- static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
42
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
38
43
  sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
39
44
 
40
45
 
@@ -76,23 +81,18 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d
76
81
 
77
82
  static int usascii_encindex, utf8_encindex, binary_encindex;
78
83
 
79
- #ifdef RBIMPL_ATTR_NORETURN
80
- RBIMPL_ATTR_NORETURN()
81
- #endif
82
- static void raise_generator_error_str(VALUE invalid_object, VALUE str)
84
+ NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
83
85
  {
86
+ rb_enc_associate_index(str, utf8_encindex);
84
87
  VALUE exc = rb_exc_new_str(eGeneratorError, str);
85
88
  rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
86
89
  rb_exc_raise(exc);
87
90
  }
88
91
 
89
- #ifdef RBIMPL_ATTR_NORETURN
90
- RBIMPL_ATTR_NORETURN()
91
- #endif
92
92
  #ifdef RBIMPL_ATTR_FORMAT
93
93
  RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
94
94
  #endif
95
- static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
95
+ NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
96
  {
97
97
  va_list args;
98
98
  va_start(args, fmt);
@@ -127,18 +127,12 @@ typedef struct _search_state {
127
127
  #endif /* HAVE_SIMD */
128
128
  } search_state;
129
129
 
130
- #if (defined(__GNUC__ ) || defined(__clang__))
131
- #define FORCE_INLINE __attribute__((always_inline))
132
- #else
133
- #define FORCE_INLINE
134
- #endif
135
-
136
- static inline FORCE_INLINE void search_flush(search_state *search)
130
+ static ALWAYS_INLINE() void search_flush(search_state *search)
137
131
  {
138
132
  // Do not remove this conditional without profiling, specifically escape-heavy text.
139
133
  // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
140
- // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
141
- // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
134
+ // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
135
+ // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
142
136
  // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
143
137
  // nothing needs to be flushed, we can save a few memory references with this conditional.
144
138
  if (search->ptr > search->cursor) {
@@ -176,7 +170,7 @@ static inline unsigned char search_escape_basic(search_state *search)
176
170
  return 0;
177
171
  }
178
172
 
179
- static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
173
+ static ALWAYS_INLINE() void escape_UTF8_char_basic(search_state *search)
180
174
  {
181
175
  const unsigned char ch = (unsigned char)*search->ptr;
182
176
  switch (ch) {
@@ -263,7 +257,7 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
263
257
 
264
258
  #ifdef HAVE_SIMD
265
259
 
266
- static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
260
+ static ALWAYS_INLINE() char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
267
261
  {
268
262
  // Flush the buffer so everything up until the last 'len' characters are unflushed.
269
263
  search_flush(search);
@@ -286,7 +280,7 @@ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsi
286
280
 
287
281
  #ifdef HAVE_SIMD_NEON
288
282
 
289
- static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
283
+ static ALWAYS_INLINE() unsigned char neon_next_match(search_state *search)
290
284
  {
291
285
  uint64_t mask = search->matches_mask;
292
286
  uint32_t index = trailing_zeros64(mask) >> 2;
@@ -400,7 +394,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
400
394
 
401
395
  #ifdef HAVE_SIMD_SSE2
402
396
 
403
- static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
397
+ static ALWAYS_INLINE() unsigned char sse2_next_match(search_state *search)
404
398
  {
405
399
  int mask = search->matches_mask;
406
400
  int index = trailing_zeros(mask);
@@ -424,7 +418,7 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
424
418
  #define TARGET_SSE2
425
419
  #endif
426
420
 
427
- static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
421
+ static TARGET_SSE2 ALWAYS_INLINE() unsigned char search_escape_basic_sse2(search_state *search)
428
422
  {
429
423
  if (RB_UNLIKELY(search->has_matches)) {
430
424
  // There are more matches if search->matches_mask > 0.
@@ -835,18 +829,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
835
829
  return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
836
830
  }
837
831
 
838
- /*
839
- * call-seq: String.included(modul)
840
- *
841
- * Extends _modul_ with the String::Extend module.
842
- */
843
- static VALUE mString_included_s(VALUE self, VALUE modul)
844
- {
845
- VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
846
- rb_call_super(1, &modul);
847
- return result;
848
- }
849
-
850
832
  /*
851
833
  * call-seq: to_json(*)
852
834
  *
@@ -861,51 +843,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
861
843
  return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
862
844
  }
863
845
 
864
- /*
865
- * call-seq: to_json_raw_object()
866
- *
867
- * This method creates a raw object hash, that can be nested into
868
- * other data structures and will be generated as a raw string. This
869
- * method should be used, if you want to convert raw strings to JSON
870
- * instead of UTF-8 strings, e. g. binary data.
871
- */
872
- static VALUE mString_to_json_raw_object(VALUE self)
873
- {
874
- VALUE ary;
875
- VALUE result = rb_hash_new();
876
- rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
877
- ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
878
- rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
879
- return result;
880
- }
881
-
882
- /*
883
- * call-seq: to_json_raw(*args)
884
- *
885
- * This method creates a JSON text from the result of a call to
886
- * to_json_raw_object of this String.
887
- */
888
- static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
889
- {
890
- VALUE obj = mString_to_json_raw_object(self);
891
- Check_Type(obj, T_HASH);
892
- return mHash_to_json(argc, argv, obj);
893
- }
894
-
895
- /*
896
- * call-seq: json_create(o)
897
- *
898
- * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
899
- * key "raw"). The Ruby String can be created by this module method.
900
- */
901
- static VALUE mString_Extend_json_create(VALUE self, VALUE o)
902
- {
903
- VALUE ary;
904
- Check_Type(o, T_HASH);
905
- ary = rb_hash_aref(o, rb_str_new2("raw"));
906
- return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
907
- }
908
-
909
846
  /*
910
847
  * call-seq: to_json(*)
911
848
  *
@@ -1043,10 +980,12 @@ static inline VALUE vstate_get(struct generate_json_data *data)
1043
980
  return data->vstate;
1044
981
  }
1045
982
 
1046
- struct hash_foreach_arg {
1047
- struct generate_json_data *data;
1048
- int iter;
1049
- };
983
+ static VALUE
984
+ json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
985
+ {
986
+ VALUE proc_args[2] = {object, is_key};
987
+ return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
988
+ }
1050
989
 
1051
990
  static VALUE
1052
991
  convert_string_subclass(VALUE key)
@@ -1063,6 +1002,145 @@ convert_string_subclass(VALUE key)
1063
1002
  return key_to_s;
1064
1003
  }
1065
1004
 
1005
+ static bool enc_utf8_compatible_p(int enc_idx)
1006
+ {
1007
+ if (enc_idx == usascii_encindex) return true;
1008
+ if (enc_idx == utf8_encindex) return true;
1009
+ return false;
1010
+ }
1011
+
1012
+ static VALUE encode_json_string_try(VALUE str)
1013
+ {
1014
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1015
+ }
1016
+
1017
+ static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1018
+ {
1019
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1020
+ return Qundef;
1021
+ }
1022
+
1023
+ static inline bool valid_json_string_p(VALUE str)
1024
+ {
1025
+ int coderange = rb_enc_str_coderange(str);
1026
+
1027
+ if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
1028
+ return true;
1029
+ }
1030
+
1031
+ if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
1032
+ return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
1033
+ }
1034
+
1035
+ return false;
1036
+ }
1037
+
1038
+ static inline VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
1039
+ {
1040
+ if (RB_LIKELY(valid_json_string_p(str))) {
1041
+ return str;
1042
+ }
1043
+
1044
+ if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
1045
+ VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
1046
+ if (coerced_str != str) {
1047
+ if (RB_TYPE_P(coerced_str, T_STRING)) {
1048
+ if (!valid_json_string_p(coerced_str)) {
1049
+ raise_generator_error(str, "source sequence is illegal/malformed utf-8");
1050
+ }
1051
+ } else {
1052
+ // as_json could return another type than T_STRING
1053
+ if (is_key) {
1054
+ raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
1055
+ }
1056
+ }
1057
+
1058
+ return coerced_str;
1059
+ }
1060
+ }
1061
+
1062
+ if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
1063
+ VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1064
+ switch (rb_enc_str_coderange(utf8_string)) {
1065
+ case ENC_CODERANGE_7BIT:
1066
+ return utf8_string;
1067
+ case ENC_CODERANGE_VALID:
1068
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1069
+ // TODO: Raise in 3.0.0
1070
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1071
+ return utf8_string;
1072
+ break;
1073
+ }
1074
+ }
1075
+
1076
+ return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1077
+ }
1078
+
1079
+ static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1080
+ {
1081
+ fbuffer_append_char(buffer, '"');
1082
+
1083
+ long len;
1084
+ search_state search;
1085
+ search.buffer = buffer;
1086
+ RSTRING_GETMEM(obj, search.ptr, len);
1087
+ search.cursor = search.ptr;
1088
+ search.end = search.ptr + len;
1089
+
1090
+ #ifdef HAVE_SIMD
1091
+ search.matches_mask = 0;
1092
+ search.has_matches = false;
1093
+ search.chunk_base = NULL;
1094
+ #endif /* HAVE_SIMD */
1095
+
1096
+ switch (rb_enc_str_coderange(obj)) {
1097
+ case ENC_CODERANGE_7BIT:
1098
+ case ENC_CODERANGE_VALID:
1099
+ if (RB_UNLIKELY(data->state->ascii_only)) {
1100
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1101
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
1102
+ convert_UTF8_to_script_safe_JSON(&search);
1103
+ } else {
1104
+ convert_UTF8_to_JSON(&search);
1105
+ }
1106
+ break;
1107
+ default:
1108
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1109
+ break;
1110
+ }
1111
+ fbuffer_append_char(buffer, '"');
1112
+ }
1113
+
1114
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1115
+ {
1116
+ obj = ensure_valid_encoding(data, obj, false, false);
1117
+ raw_generate_json_string(buffer, data, obj);
1118
+ }
1119
+
1120
+ struct hash_foreach_arg {
1121
+ VALUE hash;
1122
+ struct generate_json_data *data;
1123
+ int first_key_type;
1124
+ bool first;
1125
+ bool mixed_keys_encountered;
1126
+ };
1127
+
1128
+ NOINLINE()
1129
+ static void
1130
+ json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
1131
+ {
1132
+ if (arg->mixed_keys_encountered) {
1133
+ return;
1134
+ }
1135
+ arg->mixed_keys_encountered = true;
1136
+
1137
+ JSON_Generator_State *state = arg->data->state;
1138
+ if (state->on_duplicate_key != JSON_IGNORE) {
1139
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
1140
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
1141
+ }
1142
+ }
1143
+
1066
1144
  static int
1067
1145
  json_object_i(VALUE key, VALUE val, VALUE _arg)
1068
1146
  {
@@ -1073,21 +1151,33 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1073
1151
  JSON_Generator_State *state = data->state;
1074
1152
 
1075
1153
  long depth = state->depth;
1076
- int j;
1154
+ int key_type = rb_type(key);
1155
+
1156
+ if (arg->first) {
1157
+ arg->first = false;
1158
+ arg->first_key_type = key_type;
1159
+ }
1160
+ else {
1161
+ fbuffer_append_char(buffer, ',');
1162
+ }
1077
1163
 
1078
- if (arg->iter > 0) fbuffer_append_char(buffer, ',');
1079
1164
  if (RB_UNLIKELY(data->state->object_nl)) {
1080
1165
  fbuffer_append_str(buffer, data->state->object_nl);
1081
1166
  }
1082
1167
  if (RB_UNLIKELY(data->state->indent)) {
1083
- for (j = 0; j < depth; j++) {
1084
- fbuffer_append_str(buffer, data->state->indent);
1085
- }
1168
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1086
1169
  }
1087
1170
 
1088
1171
  VALUE key_to_s;
1089
- switch (rb_type(key)) {
1172
+ bool as_json_called = false;
1173
+
1174
+ start:
1175
+ switch (key_type) {
1090
1176
  case T_STRING:
1177
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
1178
+ json_inspect_hash_with_mixed_keys(arg);
1179
+ }
1180
+
1091
1181
  if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
1092
1182
  key_to_s = key;
1093
1183
  } else {
@@ -1095,15 +1185,31 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1095
1185
  }
1096
1186
  break;
1097
1187
  case T_SYMBOL:
1188
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
1189
+ json_inspect_hash_with_mixed_keys(arg);
1190
+ }
1191
+
1098
1192
  key_to_s = rb_sym2str(key);
1099
1193
  break;
1100
1194
  default:
1195
+ if (data->state->strict) {
1196
+ if (RTEST(data->state->as_json) && !as_json_called) {
1197
+ key = json_call_as_json(data->state, key, Qtrue);
1198
+ key_type = rb_type(key);
1199
+ as_json_called = true;
1200
+ goto start;
1201
+ } else {
1202
+ raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
1203
+ }
1204
+ }
1101
1205
  key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
1102
1206
  break;
1103
1207
  }
1104
1208
 
1209
+ key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
1210
+
1105
1211
  if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
1106
- generate_json_string(buffer, data, key_to_s);
1212
+ raw_generate_json_string(buffer, data, key_to_s);
1107
1213
  } else {
1108
1214
  generate_json(buffer, data, key_to_s);
1109
1215
  }
@@ -1112,7 +1218,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1112
1218
  if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
1113
1219
  generate_json(buffer, data, val);
1114
1220
 
1115
- arg->iter++;
1116
1221
  return ST_CONTINUE;
1117
1222
  }
1118
1223
 
@@ -1121,14 +1226,13 @@ static inline long increase_depth(struct generate_json_data *data)
1121
1226
  JSON_Generator_State *state = data->state;
1122
1227
  long depth = ++state->depth;
1123
1228
  if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
1124
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1229
+ rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --state->depth);
1125
1230
  }
1126
1231
  return depth;
1127
1232
  }
1128
1233
 
1129
1234
  static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1130
1235
  {
1131
- int j;
1132
1236
  long depth = increase_depth(data);
1133
1237
 
1134
1238
  if (RHASH_SIZE(obj) == 0) {
@@ -1140,8 +1244,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1140
1244
  fbuffer_append_char(buffer, '{');
1141
1245
 
1142
1246
  struct hash_foreach_arg arg = {
1247
+ .hash = obj,
1143
1248
  .data = data,
1144
- .iter = 0,
1249
+ .first = true,
1145
1250
  };
1146
1251
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
1147
1252
 
@@ -1149,9 +1254,7 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1149
1254
  if (RB_UNLIKELY(data->state->object_nl)) {
1150
1255
  fbuffer_append_str(buffer, data->state->object_nl);
1151
1256
  if (RB_UNLIKELY(data->state->indent)) {
1152
- for (j = 0; j < depth; j++) {
1153
- fbuffer_append_str(buffer, data->state->indent);
1154
- }
1257
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1155
1258
  }
1156
1259
  }
1157
1260
  fbuffer_append_char(buffer, '}');
@@ -1159,7 +1262,6 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1159
1262
 
1160
1263
  static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1161
1264
  {
1162
- int i, j;
1163
1265
  long depth = increase_depth(data);
1164
1266
 
1165
1267
  if (RARRAY_LEN(obj) == 0) {
@@ -1170,15 +1272,13 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
1170
1272
 
1171
1273
  fbuffer_append_char(buffer, '[');
1172
1274
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1173
- for (i = 0; i < RARRAY_LEN(obj); i++) {
1275
+ for (int i = 0; i < RARRAY_LEN(obj); i++) {
1174
1276
  if (i > 0) {
1175
1277
  fbuffer_append_char(buffer, ',');
1176
1278
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1177
1279
  }
1178
1280
  if (RB_UNLIKELY(data->state->indent)) {
1179
- for (j = 0; j < depth; j++) {
1180
- fbuffer_append_str(buffer, data->state->indent);
1181
- }
1281
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1182
1282
  }
1183
1283
  generate_json(buffer, data, RARRAY_AREF(obj, i));
1184
1284
  }
@@ -1186,93 +1286,12 @@ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data
1186
1286
  if (RB_UNLIKELY(data->state->array_nl)) {
1187
1287
  fbuffer_append_str(buffer, data->state->array_nl);
1188
1288
  if (RB_UNLIKELY(data->state->indent)) {
1189
- for (j = 0; j < depth; j++) {
1190
- fbuffer_append_str(buffer, data->state->indent);
1191
- }
1289
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1192
1290
  }
1193
1291
  }
1194
1292
  fbuffer_append_char(buffer, ']');
1195
1293
  }
1196
1294
 
1197
- static inline int enc_utf8_compatible_p(int enc_idx)
1198
- {
1199
- if (enc_idx == usascii_encindex) return 1;
1200
- if (enc_idx == utf8_encindex) return 1;
1201
- return 0;
1202
- }
1203
-
1204
- static VALUE encode_json_string_try(VALUE str)
1205
- {
1206
- return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1207
- }
1208
-
1209
- static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1210
- {
1211
- raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1212
- return Qundef;
1213
- }
1214
-
1215
- static inline VALUE ensure_valid_encoding(VALUE str)
1216
- {
1217
- int encindex = RB_ENCODING_GET(str);
1218
- VALUE utf8_string;
1219
- if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
1220
- if (encindex == binary_encindex) {
1221
- utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1222
- switch (rb_enc_str_coderange(utf8_string)) {
1223
- case ENC_CODERANGE_7BIT:
1224
- return utf8_string;
1225
- case ENC_CODERANGE_VALID:
1226
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1227
- // TODO: Raise in 3.0.0
1228
- rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1229
- return utf8_string;
1230
- break;
1231
- }
1232
- }
1233
-
1234
- str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1235
- }
1236
- return str;
1237
- }
1238
-
1239
- static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1240
- {
1241
- obj = ensure_valid_encoding(obj);
1242
-
1243
- fbuffer_append_char(buffer, '"');
1244
-
1245
- long len;
1246
- search_state search;
1247
- search.buffer = buffer;
1248
- RSTRING_GETMEM(obj, search.ptr, len);
1249
- search.cursor = search.ptr;
1250
- search.end = search.ptr + len;
1251
-
1252
- #ifdef HAVE_SIMD
1253
- search.matches_mask = 0;
1254
- search.has_matches = false;
1255
- search.chunk_base = NULL;
1256
- #endif /* HAVE_SIMD */
1257
-
1258
- switch (rb_enc_str_coderange(obj)) {
1259
- case ENC_CODERANGE_7BIT:
1260
- case ENC_CODERANGE_VALID:
1261
- if (RB_UNLIKELY(data->state->ascii_only)) {
1262
- convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1263
- } else if (RB_UNLIKELY(data->state->script_safe)) {
1264
- convert_UTF8_to_script_safe_JSON(&search);
1265
- } else {
1266
- convert_UTF8_to_JSON(&search);
1267
- }
1268
- break;
1269
- default:
1270
- raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1271
- break;
1272
- }
1273
- fbuffer_append_char(buffer, '"');
1274
- }
1275
-
1276
1295
  static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1277
1296
  {
1278
1297
  VALUE tmp;
@@ -1340,7 +1359,7 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1340
1359
  /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
1341
1360
  if (!allow_nan) {
1342
1361
  if (data->state->strict && data->state->as_json) {
1343
- VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1362
+ VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
1344
1363
  if (casted_obj != obj) {
1345
1364
  increase_depth(data);
1346
1365
  generate_json(buffer, data, casted_obj);
@@ -1357,12 +1376,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1357
1376
  }
1358
1377
 
1359
1378
  /* This implementation writes directly into the buffer. We reserve
1360
- * the 28 characters that fpconv_dtoa states as its maximum.
1379
+ * the 32 characters that fpconv_dtoa states as its maximum.
1361
1380
  */
1362
- fbuffer_inc_capa(buffer, 28);
1381
+ fbuffer_inc_capa(buffer, 32);
1363
1382
  char* d = buffer->ptr + buffer->len;
1364
1383
  int len = fpconv_dtoa(value, d);
1365
-
1366
1384
  /* fpconv_dtoa converts a float to its shortest string representation,
1367
1385
  * but it adds a ".0" if this is a plain integer.
1368
1386
  */
@@ -1412,7 +1430,16 @@ start:
1412
1430
  break;
1413
1431
  case T_STRING:
1414
1432
  if (klass != rb_cString) goto general;
1415
- generate_json_string(buffer, data, obj);
1433
+
1434
+ if (RB_LIKELY(valid_json_string_p(obj))) {
1435
+ raw_generate_json_string(buffer, data, obj);
1436
+ } else if (as_json_called) {
1437
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1438
+ } else {
1439
+ obj = ensure_valid_encoding(data, obj, false, false);
1440
+ as_json_called = true;
1441
+ goto start;
1442
+ }
1416
1443
  break;
1417
1444
  case T_SYMBOL:
1418
1445
  generate_json_symbol(buffer, data, obj);
@@ -1429,7 +1456,7 @@ start:
1429
1456
  general:
1430
1457
  if (data->state->strict) {
1431
1458
  if (RTEST(data->state->as_json) && !as_json_called) {
1432
- obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1459
+ obj = json_call_as_json(data->state, obj, Qfalse);
1433
1460
  as_json_called = true;
1434
1461
  goto start;
1435
1462
  } else {
@@ -1448,16 +1475,14 @@ static VALUE generate_json_try(VALUE d)
1448
1475
 
1449
1476
  data->func(data->buffer, data, data->obj);
1450
1477
 
1451
- return Qnil;
1478
+ return fbuffer_finalize(data->buffer);
1452
1479
  }
1453
1480
 
1454
- static VALUE generate_json_rescue(VALUE d, VALUE exc)
1481
+ static VALUE generate_json_ensure(VALUE d)
1455
1482
  {
1456
1483
  struct generate_json_data *data = (struct generate_json_data *)d;
1457
1484
  fbuffer_free(data->buffer);
1458
1485
 
1459
- rb_exc_raise(exc);
1460
-
1461
1486
  return Qundef;
1462
1487
  }
1463
1488
 
@@ -1478,9 +1503,7 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
1478
1503
  .obj = obj,
1479
1504
  .func = func
1480
1505
  };
1481
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1482
-
1483
- return fbuffer_finalize(&buffer);
1506
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1484
1507
  }
1485
1508
 
1486
1509
  /* call-seq:
@@ -1496,10 +1519,37 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
1496
1519
  rb_check_arity(argc, 1, 2);
1497
1520
  VALUE obj = argv[0];
1498
1521
  VALUE io = argc > 1 ? argv[1] : Qnil;
1499
- VALUE result = cState_partial_generate(self, obj, generate_json, io);
1522
+ return cState_partial_generate(self, obj, generate_json, io);
1523
+ }
1524
+
1525
+ static VALUE cState_generate_new(int argc, VALUE *argv, VALUE self)
1526
+ {
1527
+ rb_check_arity(argc, 1, 2);
1528
+ VALUE obj = argv[0];
1529
+ VALUE io = argc > 1 ? argv[1] : Qnil;
1530
+
1500
1531
  GET_STATE(self);
1501
- (void)state;
1502
- return result;
1532
+
1533
+ JSON_Generator_State new_state;
1534
+ MEMCPY(&new_state, state, JSON_Generator_State, 1);
1535
+
1536
+ // FIXME: depth shouldn't be part of JSON_Generator_State, as that prevents it from being used concurrently.
1537
+ new_state.depth = 0;
1538
+
1539
+ char stack_buffer[FBUFFER_STACK_SIZE];
1540
+ FBuffer buffer = {
1541
+ .io = RTEST(io) ? io : Qfalse,
1542
+ };
1543
+ fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
1544
+
1545
+ struct generate_json_data data = {
1546
+ .buffer = &buffer,
1547
+ .vstate = Qfalse,
1548
+ .state = &new_state,
1549
+ .obj = obj,
1550
+ .func = generate_json
1551
+ };
1552
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1503
1553
  }
1504
1554
 
1505
1555
  static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
@@ -1850,6 +1900,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1850
1900
  return Qnil;
1851
1901
  }
1852
1902
 
1903
+ static VALUE cState_allow_duplicate_key_p(VALUE self)
1904
+ {
1905
+ GET_STATE(self);
1906
+ switch (state->on_duplicate_key) {
1907
+ case JSON_IGNORE:
1908
+ return Qtrue;
1909
+ case JSON_DEPRECATED:
1910
+ return Qnil;
1911
+ default:
1912
+ return Qfalse;
1913
+ }
1914
+ }
1915
+
1853
1916
  /*
1854
1917
  * call-seq: depth
1855
1918
  *
@@ -1939,8 +2002,10 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1939
2002
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1940
2003
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1941
2004
  else if (key == sym_strict) { state->strict = RTEST(val); }
2005
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1942
2006
  else if (key == sym_as_json) {
1943
2007
  VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
2008
+ state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
1944
2009
  state_write_value(data, &state->as_json, proc);
1945
2010
  }
1946
2011
  return ST_CONTINUE;
@@ -1990,9 +2055,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
1990
2055
  .obj = obj,
1991
2056
  .func = generate_json,
1992
2057
  };
1993
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1994
-
1995
- return fbuffer_finalize(&buffer);
2058
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1996
2059
  }
1997
2060
 
1998
2061
  /*
@@ -2062,7 +2125,9 @@ void Init_generator(void)
2062
2125
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
2063
2126
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
2064
2127
  rb_define_method(cState, "generate", cState_generate, -1);
2065
- rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
2128
+ rb_define_method(cState, "generate_new", cState_generate_new, -1); // :nodoc:
2129
+
2130
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
2066
2131
 
2067
2132
  rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
2068
2133
 
@@ -2091,13 +2156,7 @@ void Init_generator(void)
2091
2156
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
2092
2157
 
2093
2158
  VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
2094
- rb_define_singleton_method(mString, "included", mString_included_s, 1);
2095
2159
  rb_define_method(mString, "to_json", mString_to_json, -1);
2096
- rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
2097
- rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
2098
-
2099
- mString_Extend = rb_define_module_under(mString, "Extend");
2100
- rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
2101
2160
 
2102
2161
  VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
2103
2162
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
@@ -2134,6 +2193,7 @@ void Init_generator(void)
2134
2193
  sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
2135
2194
  sym_strict = ID2SYM(rb_intern("strict"));
2136
2195
  sym_as_json = ID2SYM(rb_intern("as_json"));
2196
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
2137
2197
 
2138
2198
  usascii_encindex = rb_usascii_encindex();
2139
2199
  utf8_encindex = rb_utf8_encindex();