json 2.13.2 → 2.17.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- #include "ruby.h"
1
+ #include "../json.h"
2
2
  #include "../fbuffer/fbuffer.h"
3
3
  #include "../vendor/fpconv.c"
4
4
 
@@ -9,6 +9,12 @@
9
9
 
10
10
  /* ruby api and some helpers */
11
11
 
12
+ enum duplicate_key_action {
13
+ JSON_DEPRECATED = 0,
14
+ JSON_IGNORE,
15
+ JSON_RAISE,
16
+ };
17
+
12
18
  typedef struct JSON_Generator_StateStruct {
13
19
  VALUE indent;
14
20
  VALUE space;
@@ -21,20 +27,19 @@ typedef struct JSON_Generator_StateStruct {
21
27
  long depth;
22
28
  long buffer_initial_length;
23
29
 
30
+ enum duplicate_key_action on_duplicate_key;
31
+
32
+ bool as_json_single_arg;
24
33
  bool allow_nan;
25
34
  bool ascii_only;
26
35
  bool script_safe;
27
36
  bool strict;
28
37
  } JSON_Generator_State;
29
38
 
30
- #ifndef RB_UNLIKELY
31
- #define RB_UNLIKELY(cond) (cond)
32
- #endif
39
+ static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
33
40
 
34
- static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
35
-
36
- static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37
- static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
41
+ static ID i_to_s, i_to_json, i_new, i_encode;
42
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
38
43
  sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
39
44
 
40
45
 
@@ -55,6 +60,7 @@ struct generate_json_data {
55
60
  JSON_Generator_State *state;
56
61
  VALUE obj;
57
62
  generator_func func;
63
+ long depth;
58
64
  };
59
65
 
60
66
  static VALUE cState_from_state_s(VALUE self, VALUE opts);
@@ -76,23 +82,18 @@ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *d
76
82
 
77
83
  static int usascii_encindex, utf8_encindex, binary_encindex;
78
84
 
79
- #ifdef RBIMPL_ATTR_NORETURN
80
- RBIMPL_ATTR_NORETURN()
81
- #endif
82
- static void raise_generator_error_str(VALUE invalid_object, VALUE str)
85
+ NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
83
86
  {
87
+ rb_enc_associate_index(str, utf8_encindex);
84
88
  VALUE exc = rb_exc_new_str(eGeneratorError, str);
85
89
  rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
86
90
  rb_exc_raise(exc);
87
91
  }
88
92
 
89
- #ifdef RBIMPL_ATTR_NORETURN
90
- RBIMPL_ATTR_NORETURN()
91
- #endif
92
93
  #ifdef RBIMPL_ATTR_FORMAT
93
94
  RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
94
95
  #endif
95
- static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
+ NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
97
  {
97
98
  va_list args;
98
99
  va_start(args, fmt);
@@ -127,18 +128,12 @@ typedef struct _search_state {
127
128
  #endif /* HAVE_SIMD */
128
129
  } search_state;
129
130
 
130
- #if (defined(__GNUC__ ) || defined(__clang__))
131
- #define FORCE_INLINE __attribute__((always_inline))
132
- #else
133
- #define FORCE_INLINE
134
- #endif
135
-
136
- static inline FORCE_INLINE void search_flush(search_state *search)
131
+ ALWAYS_INLINE(static) void search_flush(search_state *search)
137
132
  {
138
133
  // Do not remove this conditional without profiling, specifically escape-heavy text.
139
134
  // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
140
- // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
141
- // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
135
+ // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
136
+ // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
142
137
  // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
143
138
  // nothing needs to be flushed, we can save a few memory references with this conditional.
144
139
  if (search->ptr > search->cursor) {
@@ -176,7 +171,7 @@ static inline unsigned char search_escape_basic(search_state *search)
176
171
  return 0;
177
172
  }
178
173
 
179
- static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
174
+ ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search)
180
175
  {
181
176
  const unsigned char ch = (unsigned char)*search->ptr;
182
177
  switch (ch) {
@@ -263,7 +258,7 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
263
258
 
264
259
  #ifdef HAVE_SIMD
265
260
 
266
- static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
261
+ ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
267
262
  {
268
263
  // Flush the buffer so everything up until the last 'len' characters are unflushed.
269
264
  search_flush(search);
@@ -286,7 +281,7 @@ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsi
286
281
 
287
282
  #ifdef HAVE_SIMD_NEON
288
283
 
289
- static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
284
+ ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search)
290
285
  {
291
286
  uint64_t mask = search->matches_mask;
292
287
  uint32_t index = trailing_zeros64(mask) >> 2;
@@ -400,7 +395,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
400
395
 
401
396
  #ifdef HAVE_SIMD_SSE2
402
397
 
403
- static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
398
+ ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search)
404
399
  {
405
400
  int mask = search->matches_mask;
406
401
  int index = trailing_zeros(mask);
@@ -424,7 +419,7 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
424
419
  #define TARGET_SSE2
425
420
  #endif
426
421
 
427
- static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
422
+ ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search)
428
423
  {
429
424
  if (RB_UNLIKELY(search->has_matches)) {
430
425
  // There are more matches if search->matches_mask > 0.
@@ -835,18 +830,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
835
830
  return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
836
831
  }
837
832
 
838
- /*
839
- * call-seq: String.included(modul)
840
- *
841
- * Extends _modul_ with the String::Extend module.
842
- */
843
- static VALUE mString_included_s(VALUE self, VALUE modul)
844
- {
845
- VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
846
- rb_call_super(1, &modul);
847
- return result;
848
- }
849
-
850
833
  /*
851
834
  * call-seq: to_json(*)
852
835
  *
@@ -861,51 +844,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
861
844
  return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
862
845
  }
863
846
 
864
- /*
865
- * call-seq: to_json_raw_object()
866
- *
867
- * This method creates a raw object hash, that can be nested into
868
- * other data structures and will be generated as a raw string. This
869
- * method should be used, if you want to convert raw strings to JSON
870
- * instead of UTF-8 strings, e. g. binary data.
871
- */
872
- static VALUE mString_to_json_raw_object(VALUE self)
873
- {
874
- VALUE ary;
875
- VALUE result = rb_hash_new();
876
- rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
877
- ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
878
- rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
879
- return result;
880
- }
881
-
882
- /*
883
- * call-seq: to_json_raw(*args)
884
- *
885
- * This method creates a JSON text from the result of a call to
886
- * to_json_raw_object of this String.
887
- */
888
- static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
889
- {
890
- VALUE obj = mString_to_json_raw_object(self);
891
- Check_Type(obj, T_HASH);
892
- return mHash_to_json(argc, argv, obj);
893
- }
894
-
895
- /*
896
- * call-seq: json_create(o)
897
- *
898
- * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
899
- * key "raw"). The Ruby String can be created by this module method.
900
- */
901
- static VALUE mString_Extend_json_create(VALUE self, VALUE o)
902
- {
903
- VALUE ary;
904
- Check_Type(o, T_HASH);
905
- ary = rb_hash_aref(o, rb_str_new2("raw"));
906
- return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
907
- }
908
-
909
847
  /*
910
848
  * call-seq: to_json(*)
911
849
  *
@@ -989,11 +927,6 @@ static size_t State_memsize(const void *ptr)
989
927
  return sizeof(JSON_Generator_State);
990
928
  }
991
929
 
992
- #ifndef HAVE_RB_EXT_RACTOR_SAFE
993
- # undef RUBY_TYPED_FROZEN_SHAREABLE
994
- # define RUBY_TYPED_FROZEN_SHAREABLE 0
995
- #endif
996
-
997
930
  static const rb_data_type_t JSON_Generator_State_type = {
998
931
  "JSON/Generator/State",
999
932
  {
@@ -1035,18 +968,24 @@ static void vstate_spill(struct generate_json_data *data)
1035
968
  RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
1036
969
  }
1037
970
 
1038
- static inline VALUE vstate_get(struct generate_json_data *data)
971
+ static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj)
1039
972
  {
1040
973
  if (RB_UNLIKELY(!data->vstate)) {
1041
974
  vstate_spill(data);
1042
975
  }
1043
- return data->vstate;
976
+ GET_STATE(data->vstate);
977
+ state->depth = data->depth;
978
+ VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate);
979
+ // no need to restore state->depth, vstate is just a temporary State
980
+ return tmp;
1044
981
  }
1045
982
 
1046
- struct hash_foreach_arg {
1047
- struct generate_json_data *data;
1048
- int iter;
1049
- };
983
+ static VALUE
984
+ json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
985
+ {
986
+ VALUE proc_args[2] = {object, is_key};
987
+ return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
988
+ }
1050
989
 
1051
990
  static VALUE
1052
991
  convert_string_subclass(VALUE key)
@@ -1063,6 +1002,144 @@ convert_string_subclass(VALUE key)
1063
1002
  return key_to_s;
1064
1003
  }
1065
1004
 
1005
+ static bool enc_utf8_compatible_p(int enc_idx)
1006
+ {
1007
+ if (enc_idx == usascii_encindex) return true;
1008
+ if (enc_idx == utf8_encindex) return true;
1009
+ return false;
1010
+ }
1011
+
1012
+ static VALUE encode_json_string_try(VALUE str)
1013
+ {
1014
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1015
+ }
1016
+
1017
+ static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1018
+ {
1019
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1020
+ return Qundef;
1021
+ }
1022
+
1023
+ static inline bool valid_json_string_p(VALUE str)
1024
+ {
1025
+ int coderange = rb_enc_str_coderange(str);
1026
+
1027
+ if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
1028
+ return true;
1029
+ }
1030
+
1031
+ if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
1032
+ return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
1033
+ }
1034
+
1035
+ return false;
1036
+ }
1037
+
1038
+ static inline VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
1039
+ {
1040
+ if (RB_LIKELY(valid_json_string_p(str))) {
1041
+ return str;
1042
+ }
1043
+
1044
+ if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
1045
+ VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
1046
+ if (coerced_str != str) {
1047
+ if (RB_TYPE_P(coerced_str, T_STRING)) {
1048
+ if (!valid_json_string_p(coerced_str)) {
1049
+ raise_generator_error(str, "source sequence is illegal/malformed utf-8");
1050
+ }
1051
+ } else {
1052
+ // as_json could return another type than T_STRING
1053
+ if (is_key) {
1054
+ raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
1055
+ }
1056
+ }
1057
+
1058
+ return coerced_str;
1059
+ }
1060
+ }
1061
+
1062
+ if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
1063
+ VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1064
+ switch (rb_enc_str_coderange(utf8_string)) {
1065
+ case ENC_CODERANGE_7BIT:
1066
+ return utf8_string;
1067
+ case ENC_CODERANGE_VALID:
1068
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1069
+ // TODO: Raise in 3.0.0
1070
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1071
+ return utf8_string;
1072
+ break;
1073
+ }
1074
+ }
1075
+
1076
+ return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1077
+ }
1078
+
1079
+ static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1080
+ {
1081
+ fbuffer_append_char(buffer, '"');
1082
+
1083
+ long len;
1084
+ search_state search;
1085
+ search.buffer = buffer;
1086
+ RSTRING_GETMEM(obj, search.ptr, len);
1087
+ search.cursor = search.ptr;
1088
+ search.end = search.ptr + len;
1089
+
1090
+ #ifdef HAVE_SIMD
1091
+ search.matches_mask = 0;
1092
+ search.has_matches = false;
1093
+ search.chunk_base = NULL;
1094
+ #endif /* HAVE_SIMD */
1095
+
1096
+ switch (rb_enc_str_coderange(obj)) {
1097
+ case ENC_CODERANGE_7BIT:
1098
+ case ENC_CODERANGE_VALID:
1099
+ if (RB_UNLIKELY(data->state->ascii_only)) {
1100
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1101
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
1102
+ convert_UTF8_to_script_safe_JSON(&search);
1103
+ } else {
1104
+ convert_UTF8_to_JSON(&search);
1105
+ }
1106
+ break;
1107
+ default:
1108
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1109
+ break;
1110
+ }
1111
+ fbuffer_append_char(buffer, '"');
1112
+ }
1113
+
1114
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1115
+ {
1116
+ obj = ensure_valid_encoding(data, obj, false, false);
1117
+ raw_generate_json_string(buffer, data, obj);
1118
+ }
1119
+
1120
+ struct hash_foreach_arg {
1121
+ VALUE hash;
1122
+ struct generate_json_data *data;
1123
+ int first_key_type;
1124
+ bool first;
1125
+ bool mixed_keys_encountered;
1126
+ };
1127
+
1128
+ NOINLINE(static) void
1129
+ json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
1130
+ {
1131
+ if (arg->mixed_keys_encountered) {
1132
+ return;
1133
+ }
1134
+ arg->mixed_keys_encountered = true;
1135
+
1136
+ JSON_Generator_State *state = arg->data->state;
1137
+ if (state->on_duplicate_key != JSON_IGNORE) {
1138
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
1139
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
1140
+ }
1141
+ }
1142
+
1066
1143
  static int
1067
1144
  json_object_i(VALUE key, VALUE val, VALUE _arg)
1068
1145
  {
@@ -1072,22 +1149,34 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1072
1149
  FBuffer *buffer = data->buffer;
1073
1150
  JSON_Generator_State *state = data->state;
1074
1151
 
1075
- long depth = state->depth;
1076
- int j;
1152
+ long depth = data->depth;
1153
+ int key_type = rb_type(key);
1154
+
1155
+ if (arg->first) {
1156
+ arg->first = false;
1157
+ arg->first_key_type = key_type;
1158
+ }
1159
+ else {
1160
+ fbuffer_append_char(buffer, ',');
1161
+ }
1077
1162
 
1078
- if (arg->iter > 0) fbuffer_append_char(buffer, ',');
1079
1163
  if (RB_UNLIKELY(data->state->object_nl)) {
1080
1164
  fbuffer_append_str(buffer, data->state->object_nl);
1081
1165
  }
1082
1166
  if (RB_UNLIKELY(data->state->indent)) {
1083
- for (j = 0; j < depth; j++) {
1084
- fbuffer_append_str(buffer, data->state->indent);
1085
- }
1167
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1086
1168
  }
1087
1169
 
1088
1170
  VALUE key_to_s;
1089
- switch (rb_type(key)) {
1171
+ bool as_json_called = false;
1172
+
1173
+ start:
1174
+ switch (key_type) {
1090
1175
  case T_STRING:
1176
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
1177
+ json_inspect_hash_with_mixed_keys(arg);
1178
+ }
1179
+
1091
1180
  if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
1092
1181
  key_to_s = key;
1093
1182
  } else {
@@ -1095,15 +1184,31 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1095
1184
  }
1096
1185
  break;
1097
1186
  case T_SYMBOL:
1187
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
1188
+ json_inspect_hash_with_mixed_keys(arg);
1189
+ }
1190
+
1098
1191
  key_to_s = rb_sym2str(key);
1099
1192
  break;
1100
1193
  default:
1194
+ if (data->state->strict) {
1195
+ if (RTEST(data->state->as_json) && !as_json_called) {
1196
+ key = json_call_as_json(data->state, key, Qtrue);
1197
+ key_type = rb_type(key);
1198
+ as_json_called = true;
1199
+ goto start;
1200
+ } else {
1201
+ raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
1202
+ }
1203
+ }
1101
1204
  key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
1102
1205
  break;
1103
1206
  }
1104
1207
 
1208
+ key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
1209
+
1105
1210
  if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
1106
- generate_json_string(buffer, data, key_to_s);
1211
+ raw_generate_json_string(buffer, data, key_to_s);
1107
1212
  } else {
1108
1213
  generate_json(buffer, data, key_to_s);
1109
1214
  }
@@ -1112,46 +1217,43 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
1112
1217
  if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
1113
1218
  generate_json(buffer, data, val);
1114
1219
 
1115
- arg->iter++;
1116
1220
  return ST_CONTINUE;
1117
1221
  }
1118
1222
 
1119
1223
  static inline long increase_depth(struct generate_json_data *data)
1120
1224
  {
1121
1225
  JSON_Generator_State *state = data->state;
1122
- long depth = ++state->depth;
1226
+ long depth = ++data->depth;
1123
1227
  if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
1124
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1228
+ rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth);
1125
1229
  }
1126
1230
  return depth;
1127
1231
  }
1128
1232
 
1129
1233
  static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1130
1234
  {
1131
- int j;
1132
1235
  long depth = increase_depth(data);
1133
1236
 
1134
1237
  if (RHASH_SIZE(obj) == 0) {
1135
1238
  fbuffer_append(buffer, "{}", 2);
1136
- --data->state->depth;
1239
+ --data->depth;
1137
1240
  return;
1138
1241
  }
1139
1242
 
1140
1243
  fbuffer_append_char(buffer, '{');
1141
1244
 
1142
1245
  struct hash_foreach_arg arg = {
1246
+ .hash = obj,
1143
1247
  .data = data,
1144
- .iter = 0,
1248
+ .first = true,
1145
1249
  };
1146
1250
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
1147
1251
 
1148
- depth = --data->state->depth;
1252
+ depth = --data->depth;
1149
1253
  if (RB_UNLIKELY(data->state->object_nl)) {
1150
1254
  fbuffer_append_str(buffer, data->state->object_nl);
1151
1255
  if (RB_UNLIKELY(data->state->indent)) {
1152
- for (j = 0; j < depth; j++) {
1153
- fbuffer_append_str(buffer, data->state->indent);
1154
- }
1256
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1155
1257
  }
1156
1258
  }
1157
1259
  fbuffer_append_char(buffer, '}');
@@ -1159,125 +1261,41 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
1159
1261
 
1160
1262
  static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1161
1263
  {
1162
- int i, j;
1163
1264
  long depth = increase_depth(data);
1164
1265
 
1165
1266
  if (RARRAY_LEN(obj) == 0) {
1166
1267
  fbuffer_append(buffer, "[]", 2);
1167
- --data->state->depth;
1268
+ --data->depth;
1168
1269
  return;
1169
1270
  }
1170
1271
 
1171
1272
  fbuffer_append_char(buffer, '[');
1172
1273
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1173
- for (i = 0; i < RARRAY_LEN(obj); i++) {
1274
+ for (int i = 0; i < RARRAY_LEN(obj); i++) {
1174
1275
  if (i > 0) {
1175
1276
  fbuffer_append_char(buffer, ',');
1176
1277
  if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1177
1278
  }
1178
1279
  if (RB_UNLIKELY(data->state->indent)) {
1179
- for (j = 0; j < depth; j++) {
1180
- fbuffer_append_str(buffer, data->state->indent);
1181
- }
1280
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1182
1281
  }
1183
1282
  generate_json(buffer, data, RARRAY_AREF(obj, i));
1184
1283
  }
1185
- data->state->depth = --depth;
1284
+ data->depth = --depth;
1186
1285
  if (RB_UNLIKELY(data->state->array_nl)) {
1187
1286
  fbuffer_append_str(buffer, data->state->array_nl);
1188
1287
  if (RB_UNLIKELY(data->state->indent)) {
1189
- for (j = 0; j < depth; j++) {
1190
- fbuffer_append_str(buffer, data->state->indent);
1191
- }
1288
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
1192
1289
  }
1193
1290
  }
1194
1291
  fbuffer_append_char(buffer, ']');
1195
1292
  }
1196
1293
 
1197
- static inline int enc_utf8_compatible_p(int enc_idx)
1198
- {
1199
- if (enc_idx == usascii_encindex) return 1;
1200
- if (enc_idx == utf8_encindex) return 1;
1201
- return 0;
1202
- }
1203
-
1204
- static VALUE encode_json_string_try(VALUE str)
1205
- {
1206
- return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1207
- }
1208
-
1209
- static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1210
- {
1211
- raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1212
- return Qundef;
1213
- }
1214
-
1215
- static inline VALUE ensure_valid_encoding(VALUE str)
1216
- {
1217
- int encindex = RB_ENCODING_GET(str);
1218
- VALUE utf8_string;
1219
- if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
1220
- if (encindex == binary_encindex) {
1221
- utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1222
- switch (rb_enc_str_coderange(utf8_string)) {
1223
- case ENC_CODERANGE_7BIT:
1224
- return utf8_string;
1225
- case ENC_CODERANGE_VALID:
1226
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1227
- // TODO: Raise in 3.0.0
1228
- rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1229
- return utf8_string;
1230
- break;
1231
- }
1232
- }
1233
-
1234
- str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1235
- }
1236
- return str;
1237
- }
1238
-
1239
- static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1240
- {
1241
- obj = ensure_valid_encoding(obj);
1242
-
1243
- fbuffer_append_char(buffer, '"');
1244
-
1245
- long len;
1246
- search_state search;
1247
- search.buffer = buffer;
1248
- RSTRING_GETMEM(obj, search.ptr, len);
1249
- search.cursor = search.ptr;
1250
- search.end = search.ptr + len;
1251
-
1252
- #ifdef HAVE_SIMD
1253
- search.matches_mask = 0;
1254
- search.has_matches = false;
1255
- search.chunk_base = NULL;
1256
- #endif /* HAVE_SIMD */
1257
-
1258
- switch (rb_enc_str_coderange(obj)) {
1259
- case ENC_CODERANGE_7BIT:
1260
- case ENC_CODERANGE_VALID:
1261
- if (RB_UNLIKELY(data->state->ascii_only)) {
1262
- convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1263
- } else if (RB_UNLIKELY(data->state->script_safe)) {
1264
- convert_UTF8_to_script_safe_JSON(&search);
1265
- } else {
1266
- convert_UTF8_to_JSON(&search);
1267
- }
1268
- break;
1269
- default:
1270
- raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1271
- break;
1272
- }
1273
- fbuffer_append_char(buffer, '"');
1274
- }
1275
-
1276
1294
  static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1277
1295
  {
1278
1296
  VALUE tmp;
1279
1297
  if (rb_respond_to(obj, i_to_json)) {
1280
- tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
1298
+ tmp = json_call_to_json(data, obj);
1281
1299
  Check_Type(tmp, T_STRING);
1282
1300
  fbuffer_append_str(buffer, tmp);
1283
1301
  } else {
@@ -1340,11 +1358,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1340
1358
  /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
1341
1359
  if (!allow_nan) {
1342
1360
  if (data->state->strict && data->state->as_json) {
1343
- VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1361
+ VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
1344
1362
  if (casted_obj != obj) {
1345
1363
  increase_depth(data);
1346
1364
  generate_json(buffer, data, casted_obj);
1347
- data->state->depth--;
1365
+ data->depth--;
1348
1366
  return;
1349
1367
  }
1350
1368
  }
@@ -1357,12 +1375,11 @@ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data
1357
1375
  }
1358
1376
 
1359
1377
  /* This implementation writes directly into the buffer. We reserve
1360
- * the 28 characters that fpconv_dtoa states as its maximum.
1378
+ * the 32 characters that fpconv_dtoa states as its maximum.
1361
1379
  */
1362
- fbuffer_inc_capa(buffer, 28);
1380
+ fbuffer_inc_capa(buffer, 32);
1363
1381
  char* d = buffer->ptr + buffer->len;
1364
1382
  int len = fpconv_dtoa(value, d);
1365
-
1366
1383
  /* fpconv_dtoa converts a float to its shortest string representation,
1367
1384
  * but it adds a ".0" if this is a plain integer.
1368
1385
  */
@@ -1412,7 +1429,16 @@ start:
1412
1429
  break;
1413
1430
  case T_STRING:
1414
1431
  if (klass != rb_cString) goto general;
1415
- generate_json_string(buffer, data, obj);
1432
+
1433
+ if (RB_LIKELY(valid_json_string_p(obj))) {
1434
+ raw_generate_json_string(buffer, data, obj);
1435
+ } else if (as_json_called) {
1436
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1437
+ } else {
1438
+ obj = ensure_valid_encoding(data, obj, false, false);
1439
+ as_json_called = true;
1440
+ goto start;
1441
+ }
1416
1442
  break;
1417
1443
  case T_SYMBOL:
1418
1444
  generate_json_symbol(buffer, data, obj);
@@ -1429,7 +1455,7 @@ start:
1429
1455
  general:
1430
1456
  if (data->state->strict) {
1431
1457
  if (RTEST(data->state->as_json) && !as_json_called) {
1432
- obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1458
+ obj = json_call_as_json(data->state, obj, Qfalse);
1433
1459
  as_json_called = true;
1434
1460
  goto start;
1435
1461
  } else {
@@ -1448,16 +1474,14 @@ static VALUE generate_json_try(VALUE d)
1448
1474
 
1449
1475
  data->func(data->buffer, data, data->obj);
1450
1476
 
1451
- return Qnil;
1477
+ return fbuffer_finalize(data->buffer);
1452
1478
  }
1453
1479
 
1454
- static VALUE generate_json_rescue(VALUE d, VALUE exc)
1480
+ static VALUE generate_json_ensure(VALUE d)
1455
1481
  {
1456
1482
  struct generate_json_data *data = (struct generate_json_data *)d;
1457
1483
  fbuffer_free(data->buffer);
1458
1484
 
1459
- rb_exc_raise(exc);
1460
-
1461
1485
  return Qundef;
1462
1486
  }
1463
1487
 
@@ -1473,14 +1497,13 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
1473
1497
 
1474
1498
  struct generate_json_data data = {
1475
1499
  .buffer = &buffer,
1476
- .vstate = self,
1500
+ .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json
1477
1501
  .state = state,
1502
+ .depth = state->depth,
1478
1503
  .obj = obj,
1479
1504
  .func = func
1480
1505
  };
1481
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1482
-
1483
- return fbuffer_finalize(&buffer);
1506
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1484
1507
  }
1485
1508
 
1486
1509
  /* call-seq:
@@ -1496,10 +1519,7 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
1496
1519
  rb_check_arity(argc, 1, 2);
1497
1520
  VALUE obj = argv[0];
1498
1521
  VALUE io = argc > 1 ? argv[1] : Qnil;
1499
- VALUE result = cState_partial_generate(self, obj, generate_json, io);
1500
- GET_STATE(self);
1501
- (void)state;
1502
- return result;
1522
+ return cState_partial_generate(self, obj, generate_json, io);
1503
1523
  }
1504
1524
 
1505
1525
  static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
@@ -1580,6 +1600,7 @@ static VALUE string_config(VALUE config)
1580
1600
  */
1581
1601
  static VALUE cState_indent_set(VALUE self, VALUE indent)
1582
1602
  {
1603
+ rb_check_frozen(self);
1583
1604
  GET_STATE(self);
1584
1605
  RB_OBJ_WRITE(self, &state->indent, string_config(indent));
1585
1606
  return Qnil;
@@ -1605,6 +1626,7 @@ static VALUE cState_space(VALUE self)
1605
1626
  */
1606
1627
  static VALUE cState_space_set(VALUE self, VALUE space)
1607
1628
  {
1629
+ rb_check_frozen(self);
1608
1630
  GET_STATE(self);
1609
1631
  RB_OBJ_WRITE(self, &state->space, string_config(space));
1610
1632
  return Qnil;
@@ -1628,6 +1650,7 @@ static VALUE cState_space_before(VALUE self)
1628
1650
  */
1629
1651
  static VALUE cState_space_before_set(VALUE self, VALUE space_before)
1630
1652
  {
1653
+ rb_check_frozen(self);
1631
1654
  GET_STATE(self);
1632
1655
  RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
1633
1656
  return Qnil;
@@ -1653,6 +1676,7 @@ static VALUE cState_object_nl(VALUE self)
1653
1676
  */
1654
1677
  static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
1655
1678
  {
1679
+ rb_check_frozen(self);
1656
1680
  GET_STATE(self);
1657
1681
  RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
1658
1682
  return Qnil;
@@ -1676,6 +1700,7 @@ static VALUE cState_array_nl(VALUE self)
1676
1700
  */
1677
1701
  static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
1678
1702
  {
1703
+ rb_check_frozen(self);
1679
1704
  GET_STATE(self);
1680
1705
  RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
1681
1706
  return Qnil;
@@ -1699,6 +1724,7 @@ static VALUE cState_as_json(VALUE self)
1699
1724
  */
1700
1725
  static VALUE cState_as_json_set(VALUE self, VALUE as_json)
1701
1726
  {
1727
+ rb_check_frozen(self);
1702
1728
  GET_STATE(self);
1703
1729
  RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
1704
1730
  return Qnil;
@@ -1741,6 +1767,7 @@ static long long_config(VALUE num)
1741
1767
  */
1742
1768
  static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1743
1769
  {
1770
+ rb_check_frozen(self);
1744
1771
  GET_STATE(self);
1745
1772
  state->max_nesting = long_config(depth);
1746
1773
  return Qnil;
@@ -1766,6 +1793,7 @@ static VALUE cState_script_safe(VALUE self)
1766
1793
  */
1767
1794
  static VALUE cState_script_safe_set(VALUE self, VALUE enable)
1768
1795
  {
1796
+ rb_check_frozen(self);
1769
1797
  GET_STATE(self);
1770
1798
  state->script_safe = RTEST(enable);
1771
1799
  return Qnil;
@@ -1797,6 +1825,7 @@ static VALUE cState_strict(VALUE self)
1797
1825
  */
1798
1826
  static VALUE cState_strict_set(VALUE self, VALUE enable)
1799
1827
  {
1828
+ rb_check_frozen(self);
1800
1829
  GET_STATE(self);
1801
1830
  state->strict = RTEST(enable);
1802
1831
  return Qnil;
@@ -1821,6 +1850,7 @@ static VALUE cState_allow_nan_p(VALUE self)
1821
1850
  */
1822
1851
  static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
1823
1852
  {
1853
+ rb_check_frozen(self);
1824
1854
  GET_STATE(self);
1825
1855
  state->allow_nan = RTEST(enable);
1826
1856
  return Qnil;
@@ -1845,11 +1875,25 @@ static VALUE cState_ascii_only_p(VALUE self)
1845
1875
  */
1846
1876
  static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1847
1877
  {
1878
+ rb_check_frozen(self);
1848
1879
  GET_STATE(self);
1849
1880
  state->ascii_only = RTEST(enable);
1850
1881
  return Qnil;
1851
1882
  }
1852
1883
 
1884
+ static VALUE cState_allow_duplicate_key_p(VALUE self)
1885
+ {
1886
+ GET_STATE(self);
1887
+ switch (state->on_duplicate_key) {
1888
+ case JSON_IGNORE:
1889
+ return Qtrue;
1890
+ case JSON_DEPRECATED:
1891
+ return Qnil;
1892
+ default:
1893
+ return Qfalse;
1894
+ }
1895
+ }
1896
+
1853
1897
  /*
1854
1898
  * call-seq: depth
1855
1899
  *
@@ -1869,6 +1913,7 @@ static VALUE cState_depth(VALUE self)
1869
1913
  */
1870
1914
  static VALUE cState_depth_set(VALUE self, VALUE depth)
1871
1915
  {
1916
+ rb_check_frozen(self);
1872
1917
  GET_STATE(self);
1873
1918
  state->depth = long_config(depth);
1874
1919
  return Qnil;
@@ -1902,6 +1947,7 @@ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_
1902
1947
  */
1903
1948
  static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
1904
1949
  {
1950
+ rb_check_frozen(self);
1905
1951
  GET_STATE(self);
1906
1952
  buffer_initial_length_set(state, buffer_initial_length);
1907
1953
  return Qnil;
@@ -1939,8 +1985,10 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1939
1985
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1940
1986
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1941
1987
  else if (key == sym_strict) { state->strict = RTEST(val); }
1988
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1942
1989
  else if (key == sym_as_json) {
1943
1990
  VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
1991
+ state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
1944
1992
  state_write_value(data, &state->as_json, proc);
1945
1993
  }
1946
1994
  return ST_CONTINUE;
@@ -1966,6 +2014,7 @@ static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE con
1966
2014
 
1967
2015
  static VALUE cState_configure(VALUE self, VALUE opts)
1968
2016
  {
2017
+ rb_check_frozen(self);
1969
2018
  GET_STATE(self);
1970
2019
  configure_state(state, self, opts);
1971
2020
  return self;
@@ -1987,12 +2036,11 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
1987
2036
  .buffer = &buffer,
1988
2037
  .vstate = Qfalse,
1989
2038
  .state = &state,
2039
+ .depth = state.depth,
1990
2040
  .obj = obj,
1991
2041
  .func = generate_json,
1992
2042
  };
1993
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1994
-
1995
- return fbuffer_finalize(&buffer);
2043
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1996
2044
  }
1997
2045
 
1998
2046
  /*
@@ -2062,7 +2110,8 @@ void Init_generator(void)
2062
2110
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
2063
2111
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
2064
2112
  rb_define_method(cState, "generate", cState_generate, -1);
2065
- rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
2113
+
2114
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
2066
2115
 
2067
2116
  rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
2068
2117
 
@@ -2091,13 +2140,7 @@ void Init_generator(void)
2091
2140
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
2092
2141
 
2093
2142
  VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
2094
- rb_define_singleton_method(mString, "included", mString_included_s, 1);
2095
2143
  rb_define_method(mString, "to_json", mString_to_json, -1);
2096
- rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
2097
- rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
2098
-
2099
- mString_Extend = rb_define_module_under(mString, "Extend");
2100
- rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
2101
2144
 
2102
2145
  VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
2103
2146
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
@@ -2114,10 +2157,6 @@ void Init_generator(void)
2114
2157
  i_to_s = rb_intern("to_s");
2115
2158
  i_to_json = rb_intern("to_json");
2116
2159
  i_new = rb_intern("new");
2117
- i_pack = rb_intern("pack");
2118
- i_unpack = rb_intern("unpack");
2119
- i_create_id = rb_intern("create_id");
2120
- i_extend = rb_intern("extend");
2121
2160
  i_encode = rb_intern("encode");
2122
2161
 
2123
2162
  sym_indent = ID2SYM(rb_intern("indent"));
@@ -2134,6 +2173,7 @@ void Init_generator(void)
2134
2173
  sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
2135
2174
  sym_strict = ID2SYM(rb_intern("strict"));
2136
2175
  sym_as_json = ID2SYM(rb_intern("as_json"));
2176
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
2137
2177
 
2138
2178
  usascii_encindex = rb_usascii_encindex();
2139
2179
  utf8_encindex = rb_utf8_encindex();