json 2.9.1 → 2.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,37 +1,46 @@
1
- #include "ruby.h"
1
+ #include "../json.h"
2
2
  #include "../fbuffer/fbuffer.h"
3
+ #include "../vendor/fpconv.c"
3
4
 
4
5
  #include <math.h>
5
6
  #include <ctype.h>
6
7
 
8
+ #include "../simd/simd.h"
9
+
7
10
  /* ruby api and some helpers */
8
11
 
12
+ enum duplicate_key_action {
13
+ JSON_DEPRECATED = 0,
14
+ JSON_IGNORE,
15
+ JSON_RAISE,
16
+ };
17
+
9
18
  typedef struct JSON_Generator_StateStruct {
10
19
  VALUE indent;
11
20
  VALUE space;
12
21
  VALUE space_before;
13
22
  VALUE object_nl;
14
23
  VALUE array_nl;
24
+ VALUE as_json;
15
25
 
16
26
  long max_nesting;
17
27
  long depth;
18
28
  long buffer_initial_length;
19
29
 
30
+ enum duplicate_key_action on_duplicate_key;
31
+
32
+ bool as_json_single_arg;
20
33
  bool allow_nan;
21
34
  bool ascii_only;
22
35
  bool script_safe;
23
36
  bool strict;
24
37
  } JSON_Generator_State;
25
38
 
26
- #ifndef RB_UNLIKELY
27
- #define RB_UNLIKELY(cond) (cond)
28
- #endif
29
-
30
- static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
39
+ static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
31
40
 
32
- static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
33
- static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
34
- sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict;
41
+ static ID i_to_s, i_to_json, i_new, i_encode;
42
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
43
+ sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
35
44
 
36
45
 
37
46
  #define GET_STATE_TO(self, state) \
@@ -43,7 +52,7 @@ static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl,
43
52
 
44
53
  struct generate_json_data;
45
54
 
46
- typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
55
+ typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
47
56
 
48
57
  struct generate_json_data {
49
58
  FBuffer *buffer;
@@ -51,43 +60,42 @@ struct generate_json_data {
51
60
  JSON_Generator_State *state;
52
61
  VALUE obj;
53
62
  generator_func func;
63
+ long depth;
54
64
  };
55
65
 
66
+ static SIMD_Implementation simd_impl;
67
+
56
68
  static VALUE cState_from_state_s(VALUE self, VALUE opts);
57
69
  static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
58
- static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
59
- static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
60
- static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
61
- static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
62
- static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
63
- static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
64
- static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
70
+ static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
71
+ static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
72
+ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
73
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
74
+ static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
75
+ static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
76
+ static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
65
77
  #ifdef RUBY_INTEGER_UNIFICATION
66
- static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
78
+ static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
67
79
  #endif
68
- static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
69
- static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
70
- static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
80
+ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
81
+ static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
82
+ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
83
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
71
84
 
72
85
  static int usascii_encindex, utf8_encindex, binary_encindex;
73
86
 
74
- #ifdef RBIMPL_ATTR_NORETURN
75
- RBIMPL_ATTR_NORETURN()
76
- #endif
77
- static void raise_generator_error_str(VALUE invalid_object, VALUE str)
87
+ NORETURN(static void) raise_generator_error_str(VALUE invalid_object, VALUE str)
78
88
  {
89
+ rb_enc_associate_index(str, utf8_encindex);
79
90
  VALUE exc = rb_exc_new_str(eGeneratorError, str);
80
91
  rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
81
92
  rb_exc_raise(exc);
82
93
  }
83
94
 
84
- #ifdef RBIMPL_ATTR_NORETURN
85
- RBIMPL_ATTR_NORETURN()
86
- #endif
87
95
  #ifdef RBIMPL_ATTR_FORMAT
88
96
  RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
89
97
  #endif
90
- static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
98
+ NORETURN(static void) raise_generator_error(VALUE invalid_object, const char *fmt, ...)
91
99
  {
92
100
  va_list args;
93
101
  va_start(args, fmt);
@@ -96,6 +104,98 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
104
  raise_generator_error_str(invalid_object, str);
97
105
  }
98
106
 
107
+ // 0 - single byte char that don't need to be escaped.
108
+ // (x | 8) - char that needs to be escaped.
109
+ static const unsigned char CHAR_LENGTH_MASK = 7;
110
+ static const unsigned char ESCAPE_MASK = 8;
111
+
112
+ typedef struct _search_state {
113
+ const char *ptr;
114
+ const char *end;
115
+ const char *cursor;
116
+ FBuffer *buffer;
117
+
118
+ #ifdef HAVE_SIMD
119
+ const char *chunk_base;
120
+ const char *chunk_end;
121
+ bool has_matches;
122
+
123
+ #if defined(HAVE_SIMD_NEON)
124
+ uint64_t matches_mask;
125
+ #elif defined(HAVE_SIMD_SSE2)
126
+ int matches_mask;
127
+ #else
128
+ #error "Unknown SIMD Implementation."
129
+ #endif /* HAVE_SIMD_NEON */
130
+ #endif /* HAVE_SIMD */
131
+ } search_state;
132
+
133
+ ALWAYS_INLINE(static) void search_flush(search_state *search)
134
+ {
135
+ // Do not remove this conditional without profiling, specifically escape-heavy text.
136
+ // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
137
+ // For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
138
+ // will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
139
+ // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
140
+ // nothing needs to be flushed, we can save a few memory references with this conditional.
141
+ if (search->ptr > search->cursor) {
142
+ fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
143
+ search->cursor = search->ptr;
144
+ }
145
+ }
146
+
147
+ static const unsigned char escape_table_basic[256] = {
148
+ // ASCII Control Characters
149
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
150
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
151
+ // ASCII Characters
152
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
153
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
154
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
155
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
156
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158
+ };
159
+
160
+ static inline unsigned char search_escape_basic(search_state *search)
161
+ {
162
+ while (search->ptr < search->end) {
163
+ if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) {
164
+ search_flush(search);
165
+ return 1;
166
+ } else {
167
+ search->ptr++;
168
+ }
169
+ }
170
+ search_flush(search);
171
+ return 0;
172
+ }
173
+
174
+ ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search)
175
+ {
176
+ const unsigned char ch = (unsigned char)*search->ptr;
177
+ switch (ch) {
178
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
179
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
180
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
181
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
182
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
183
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
184
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
185
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
186
+ default: {
187
+ const char *hexdig = "0123456789abcdef";
188
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
189
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
190
+ scratch[5] = hexdig[ch & 0xf];
191
+ fbuffer_append(search->buffer, scratch, 6);
192
+ break;
193
+ }
194
+ }
195
+ search->ptr++;
196
+ search->cursor = search->ptr;
197
+ }
198
+
99
199
  /* Converts in_string to a JSON string (without the wrapping '"'
100
200
  * characters) in FBuffer out_buffer.
101
201
  *
@@ -106,282 +206,501 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
106
206
  *
107
207
  * - If out_ascii_only: non-ASCII characters (>0x7F)
108
208
  *
109
- * - If out_script_safe: forwardslash, line separator (U+2028), and
209
+ * - If script_safe: forwardslash (/), line separator (U+2028), and
110
210
  * paragraph separator (U+2029)
111
211
  *
112
212
  * Everything else (should be UTF-8) is just passed through and
113
213
  * appended to the result.
114
214
  */
115
- static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
116
- {
117
- const char *hexdig = "0123456789abcdef";
118
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
119
215
 
120
- const char *ptr = RSTRING_PTR(str);
121
- unsigned long len = RSTRING_LEN(str);
122
216
 
123
- unsigned long beg = 0, pos = 0;
217
+ #if defined(HAVE_SIMD_NEON)
218
+ static inline unsigned char search_escape_basic_neon(search_state *search);
219
+ #elif defined(HAVE_SIMD_SSE2)
220
+ static inline unsigned char search_escape_basic_sse2(search_state *search);
221
+ #endif
124
222
 
125
- #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
223
+ static inline unsigned char search_escape_basic(search_state *search);
126
224
 
127
- while (pos < len) {
128
- unsigned char ch = ptr[pos];
129
- unsigned char ch_len = escape_table[ch];
130
- /* JSON encoding */
225
+ static inline void convert_UTF8_to_JSON(search_state *search)
226
+ {
227
+ #ifdef HAVE_SIMD
228
+ #if defined(HAVE_SIMD_NEON)
229
+ while (search_escape_basic_neon(search)) {
230
+ escape_UTF8_char_basic(search);
231
+ }
232
+ #elif defined(HAVE_SIMD_SSE2)
233
+ if (simd_impl == SIMD_SSE2) {
234
+ while (search_escape_basic_sse2(search)) {
235
+ escape_UTF8_char_basic(search);
236
+ }
237
+ return;
238
+ }
239
+ while (search_escape_basic(search)) {
240
+ escape_UTF8_char_basic(search);
241
+ }
242
+ #endif
243
+ #else
244
+ while (search_escape_basic(search)) {
245
+ escape_UTF8_char_basic(search);
246
+ }
247
+ #endif /* HAVE_SIMD */
248
+ }
131
249
 
132
- if (RB_UNLIKELY(ch_len)) {
133
- switch (ch_len) {
134
- case 1: {
135
- FLUSH_POS(1);
136
- switch (ch) {
137
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
138
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
139
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
140
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
141
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
142
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
143
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
144
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
145
- default: {
146
- scratch[2] = '0';
147
- scratch[3] = '0';
148
- scratch[4] = hexdig[(ch >> 4) & 0xf];
149
- scratch[5] = hexdig[ch & 0xf];
150
- fbuffer_append(out_buffer, scratch, 6);
151
- break;
152
- }
153
- }
250
+ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
251
+ {
252
+ const unsigned char ch = (unsigned char)*search->ptr;
253
+ switch (ch_len) {
254
+ case 1: {
255
+ switch (ch) {
256
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
257
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
258
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
259
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
260
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
261
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
262
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
263
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
264
+ default: {
265
+ const char *hexdig = "0123456789abcdef";
266
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
267
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
268
+ scratch[5] = hexdig[ch & 0xf];
269
+ fbuffer_append(search->buffer, scratch, 6);
154
270
  break;
155
271
  }
156
- case 3: {
157
- unsigned char b2 = ptr[pos + 1];
158
- if (RB_UNLIKELY(out_script_safe && ch == 0xE2 && b2 == 0x80)) {
159
- unsigned char b3 = ptr[pos + 2];
160
- if (b3 == 0xA8) {
161
- FLUSH_POS(3);
162
- fbuffer_append(out_buffer, "\\u2028", 6);
163
- break;
164
- } else if (b3 == 0xA9) {
165
- FLUSH_POS(3);
166
- fbuffer_append(out_buffer, "\\u2029", 6);
167
- break;
168
- }
169
- }
170
- // fallthrough
171
- }
172
- default:
173
- pos += ch_len;
174
- break;
175
272
  }
176
- } else {
177
- pos++;
273
+ break;
274
+ }
275
+ case 3: {
276
+ if (search->ptr[2] & 1) {
277
+ fbuffer_append(search->buffer, "\\u2029", 6);
278
+ } else {
279
+ fbuffer_append(search->buffer, "\\u2028", 6);
280
+ }
281
+ break;
178
282
  }
179
283
  }
180
- #undef FLUSH_POS
284
+ search->cursor = (search->ptr += ch_len);
285
+ }
286
+
287
+ #ifdef HAVE_SIMD
288
+
289
+ ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
290
+ {
291
+ RBIMPL_ASSERT_OR_ASSUME(len < vec_len);
292
+
293
+ // Flush the buffer so everything up until the last 'len' characters are unflushed.
294
+ search_flush(search);
295
+
296
+ FBuffer *buf = search->buffer;
297
+ fbuffer_inc_capa(buf, vec_len);
298
+
299
+ char *s = (buf->ptr + buf->len);
181
300
 
182
- if (beg < len) {
183
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
301
+ // Pad the buffer with dummy characters that won't need escaping.
302
+ // This seem wasteful at first sight, but memset of vector length is very fast.
303
+ // This is a space as it can be directly represented as an immediate on AArch64.
304
+ memset(s, ' ', vec_len);
305
+
306
+ // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
307
+ // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
308
+ if (vec_len == 16) {
309
+ RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD);
310
+ json_fast_memcpy16(s, search->ptr, len);
311
+ } else {
312
+ MEMCPY(s, search->ptr, char, len);
184
313
  }
185
314
 
186
- RB_GC_GUARD(str);
315
+ return s;
187
316
  }
188
317
 
189
- static const char escape_table[256] = {
190
- // ASCII Control Characters
191
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
192
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
193
- // ASCII Characters
194
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, // '"'
195
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
196
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
197
- 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
198
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
199
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
200
- // Continuation byte
201
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
202
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
203
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
204
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
205
- // First byte of a 2-byte code point
206
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
207
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
208
- // First byte of a 4-byte code point
209
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
210
- //First byte of a 4+byte code point
211
- 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
212
- };
318
+ #ifdef HAVE_SIMD_NEON
213
319
 
214
- static const char script_safe_escape_table[256] = {
215
- // ASCII Control Characters
216
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
217
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
218
- // ASCII Characters
219
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, // '"' and '/'
220
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
221
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
222
- 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
223
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
224
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
225
- // Continuation byte
226
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
227
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
228
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
229
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
230
- // First byte of a 2-byte code point
231
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
232
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
233
- // First byte of a 4-byte code point
234
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
235
- //First byte of a 4+byte code point
236
- 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
237
- };
320
+ ALWAYS_INLINE(static) unsigned char neon_next_match(search_state *search)
321
+ {
322
+ uint64_t mask = search->matches_mask;
323
+ uint32_t index = trailing_zeros64(mask) >> 2;
238
324
 
239
- static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256])
325
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
326
+ // If we want to use a similar approach for full escaping we'll need to ensure:
327
+ // search->chunk_base + index >= search->ptr
328
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
329
+ // is one byte after the previous match then:
330
+ // search->chunk_base + index == search->ptr
331
+ search->ptr = search->chunk_base + index;
332
+ mask &= mask - 1;
333
+ search->matches_mask = mask;
334
+ search_flush(search);
335
+ return 1;
336
+ }
337
+
338
+ static inline unsigned char search_escape_basic_neon(search_state *search)
240
339
  {
241
- const char *hexdig = "0123456789abcdef";
242
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
340
+ if (RB_UNLIKELY(search->has_matches)) {
341
+ // There are more matches if search->matches_mask > 0.
342
+ if (search->matches_mask > 0) {
343
+ return neon_next_match(search);
344
+ } else {
345
+ // neon_next_match will only advance search->ptr up to the last matching character.
346
+ // Skip over any characters in the last chunk that occur after the last match.
347
+ search->has_matches = false;
348
+ search->ptr = search->chunk_end;
349
+ }
350
+ }
243
351
 
244
- const char *ptr = RSTRING_PTR(str);
245
- unsigned long len = RSTRING_LEN(str);
352
+ /*
353
+ * The code below implements an SIMD-based algorithm to determine if N bytes at a time
354
+ * need to be escaped.
355
+ *
356
+ * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
357
+ *
358
+ * The explanation will be limited to the first 8 bytes of the string for simplicity. However
359
+ * the vector insructions may work on larger vectors.
360
+ *
361
+ * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
362
+ *
363
+ * lower_bound: [20 20 20 20 20 20 20 20]
364
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
365
+ * dblquote: [22 22 22 22 22 22 22 22]
366
+ *
367
+ * Next we load the first chunk of the ptr:
368
+ * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
369
+ *
370
+ * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
371
+ * as no bytes are less than 32 (0x20):
372
+ * [0 0 0 0 0 0 0 0]
373
+ *
374
+ * Next, we check if any byte in chunk is equal to a backslash:
375
+ * [0 0 0 FF 0 0 0 0]
376
+ *
377
+ * Finally we check if any byte in chunk is equal to a double quote:
378
+ * [FF 0 0 0 0 0 0 0]
379
+ *
380
+ * Now we have three vectors where each byte indicates if the corresponding byte in chunk
381
+ * needs to be escaped. We combine these vectors with a series of logical OR instructions.
382
+ * This is the needs_escape vector and it is equal to:
383
+ * [FF 0 0 FF 0 0 0 0]
384
+ *
385
+ * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
386
+ * the values in the vector. This computes how many bytes need to be escaped within this chunk.
387
+ *
388
+ * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
389
+ * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
390
+ * have at least one byte that needs to be escaped.
391
+ */
392
+
393
+ if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
394
+ search->has_matches = true;
395
+ search->chunk_base = search->ptr;
396
+ search->chunk_end = search->ptr + sizeof(uint8x16_t);
397
+ return neon_next_match(search);
398
+ }
246
399
 
247
- unsigned long beg = 0, pos;
400
+ // There are fewer than 16 bytes left.
401
+ unsigned long remaining = (search->end - search->ptr);
402
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
403
+ char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
248
404
 
249
- for (pos = 0; pos < len;) {
250
- unsigned char ch = ptr[pos];
251
- /* JSON encoding */
252
- if (escape_table[ch]) {
253
- if (pos > beg) {
254
- fbuffer_append(out_buffer, &ptr[beg], pos - beg);
255
- }
405
+ uint64_t mask = compute_chunk_mask_neon(s);
256
406
 
257
- beg = pos + 1;
258
- switch (ch) {
259
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
260
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
261
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
262
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
263
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
264
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
265
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
266
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
267
- default:
268
- scratch[2] = '0';
269
- scratch[3] = '0';
270
- scratch[4] = hexdig[(ch >> 4) & 0xf];
271
- scratch[5] = hexdig[ch & 0xf];
272
- fbuffer_append(out_buffer, scratch, 6);
273
- }
407
+ if (!mask) {
408
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
409
+ // search->cursor to search->ptr.
410
+ fbuffer_consumed(search->buffer, remaining);
411
+ search->ptr = search->end;
412
+ search->cursor = search->end;
413
+ return 0;
274
414
  }
275
415
 
276
- pos++;
416
+ search->matches_mask = mask;
417
+ search->has_matches = true;
418
+ search->chunk_end = search->end;
419
+ search->chunk_base = search->ptr;
420
+ return neon_next_match(search);
277
421
  }
278
422
 
279
- if (beg < len) {
280
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
423
+ if (search->ptr < search->end) {
424
+ return search_escape_basic(search);
281
425
  }
282
426
 
283
- RB_GC_GUARD(str);
427
+ search_flush(search);
428
+ return 0;
429
+ }
430
+ #endif /* HAVE_SIMD_NEON */
431
+
432
+ #ifdef HAVE_SIMD_SSE2
433
+
434
+ ALWAYS_INLINE(static) unsigned char sse2_next_match(search_state *search)
435
+ {
436
+ int mask = search->matches_mask;
437
+ int index = trailing_zeros(mask);
438
+
439
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
440
+ // If we want to use a similar approach for full escaping we'll need to ensure:
441
+ // search->chunk_base + index >= search->ptr
442
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
443
+ // is one byte after the previous match then:
444
+ // search->chunk_base + index == search->ptr
445
+ search->ptr = search->chunk_base + index;
446
+ mask &= mask - 1;
447
+ search->matches_mask = mask;
448
+ search_flush(search);
449
+ return 1;
284
450
  }
285
451
 
286
- static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
452
+ #if defined(__clang__) || defined(__GNUC__)
453
+ #define TARGET_SSE2 __attribute__((target("sse2")))
454
+ #else
455
+ #define TARGET_SSE2
456
+ #endif
457
+
458
+ ALWAYS_INLINE(static) TARGET_SSE2 unsigned char search_escape_basic_sse2(search_state *search)
287
459
  {
288
- const char *hexdig = "0123456789abcdef";
289
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
460
+ if (RB_UNLIKELY(search->has_matches)) {
461
+ // There are more matches if search->matches_mask > 0.
462
+ if (search->matches_mask > 0) {
463
+ return sse2_next_match(search);
464
+ } else {
465
+ // sse2_next_match will only advance search->ptr up to the last matching character.
466
+ // Skip over any characters in the last chunk that occur after the last match.
467
+ search->has_matches = false;
468
+ if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
469
+ search->ptr = search->end;
470
+ } else {
471
+ search->ptr = search->chunk_base + sizeof(__m128i);
472
+ }
473
+ }
474
+ }
290
475
 
291
- const char *ptr = RSTRING_PTR(str);
292
- unsigned long len = RSTRING_LEN(str);
476
+ if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
477
+ search->has_matches = true;
478
+ search->chunk_base = search->ptr;
479
+ search->chunk_end = search->ptr + sizeof(__m128i);
480
+ return sse2_next_match(search);
481
+ }
293
482
 
294
- unsigned long beg = 0, pos = 0;
483
+ // There are fewer than 16 bytes left.
484
+ unsigned long remaining = (search->end - search->ptr);
485
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
486
+ char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
295
487
 
296
- #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
488
+ int needs_escape_mask = compute_chunk_mask_sse2(s);
297
489
 
298
- while (pos < len) {
299
- unsigned char ch = ptr[pos];
300
- unsigned char ch_len = escape_table[ch];
490
+ if (needs_escape_mask == 0) {
491
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
492
+ // search->cursor to search->ptr.
493
+ fbuffer_consumed(search->buffer, remaining);
494
+ search->ptr = search->end;
495
+ search->cursor = search->end;
496
+ return 0;
497
+ }
498
+
499
+ search->has_matches = true;
500
+ search->matches_mask = needs_escape_mask;
501
+ search->chunk_base = search->ptr;
502
+ return sse2_next_match(search);
503
+ }
504
+
505
+ if (search->ptr < search->end) {
506
+ return search_escape_basic(search);
507
+ }
508
+
509
+ search_flush(search);
510
+ return 0;
511
+ }
512
+
513
+ #endif /* HAVE_SIMD_SSE2 */
514
+
515
+ #endif /* HAVE_SIMD */
516
+
517
+ static const unsigned char script_safe_escape_table[256] = {
518
+ // ASCII Control Characters
519
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
520
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
521
+ // ASCII Characters
522
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/'
523
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
526
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528
+ // Continuation byte
529
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
530
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
531
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
532
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
533
+ // First byte of a 2-byte code point
534
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
535
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
536
+ // First byte of a 3-byte code point
537
+ 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029
538
+ //First byte of a 4+ byte code point
539
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
540
+ };
541
+
542
+ static inline unsigned char search_script_safe_escape(search_state *search)
543
+ {
544
+ while (search->ptr < search->end) {
545
+ unsigned char ch = (unsigned char)*search->ptr;
546
+ unsigned char ch_len = script_safe_escape_table[ch];
301
547
 
302
548
  if (RB_UNLIKELY(ch_len)) {
303
- switch (ch_len) {
304
- case 1: {
305
- FLUSH_POS(1);
306
- switch (ch) {
307
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
308
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
309
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
310
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
311
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
312
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
313
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
314
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
315
- default: {
316
- scratch[2] = '0';
317
- scratch[3] = '0';
318
- scratch[4] = hexdig[(ch >> 4) & 0xf];
319
- scratch[5] = hexdig[ch & 0xf];
320
- fbuffer_append(out_buffer, scratch, 6);
321
- break;
322
- }
549
+ if (ch_len & ESCAPE_MASK) {
550
+ if (RB_UNLIKELY(ch_len == 11)) {
551
+ const unsigned char *uptr = (const unsigned char *)search->ptr;
552
+ if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) {
553
+ search->ptr += 3;
554
+ continue;
323
555
  }
324
- break;
325
556
  }
326
- default: {
327
- uint32_t wchar = 0;
328
- switch(ch_len) {
329
- case 2:
330
- wchar = ptr[pos] & 0x1F;
331
- break;
332
- case 3:
333
- wchar = ptr[pos] & 0x0F;
334
- break;
335
- case 4:
336
- wchar = ptr[pos] & 0x07;
337
- break;
338
- }
557
+ search_flush(search);
558
+ return ch_len & CHAR_LENGTH_MASK;
559
+ } else {
560
+ search->ptr += ch_len;
561
+ }
562
+ } else {
563
+ search->ptr++;
564
+ }
565
+ }
566
+ search_flush(search);
567
+ return 0;
568
+ }
339
569
 
340
- for (short i = 1; i < ch_len; i++) {
341
- wchar = (wchar << 6) | (ptr[pos+i] & 0x3F);
342
- }
570
+ static void convert_UTF8_to_script_safe_JSON(search_state *search)
571
+ {
572
+ unsigned char ch_len;
573
+ while ((ch_len = search_script_safe_escape(search))) {
574
+ escape_UTF8_char(search, ch_len);
575
+ }
576
+ }
577
+
578
+ static const unsigned char ascii_only_escape_table[256] = {
579
+ // ASCII Control Characters
580
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
581
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
582
+ // ASCII Characters
583
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
584
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
586
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
587
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589
+ // Continuation byte
590
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
591
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
592
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
593
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
594
+ // First byte of a 2-byte code point
595
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
596
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
597
+ // First byte of a 3-byte code point
598
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
599
+ //First byte of a 4+ byte code point
600
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
601
+ };
343
602
 
344
- FLUSH_POS(ch_len);
603
+ static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256])
604
+ {
605
+ while (search->ptr < search->end) {
606
+ unsigned char ch = (unsigned char)*search->ptr;
607
+ unsigned char ch_len = escape_table[ch];
345
608
 
346
- if (wchar <= 0xFFFF) {
347
- scratch[2] = hexdig[wchar >> 12];
348
- scratch[3] = hexdig[(wchar >> 8) & 0xf];
349
- scratch[4] = hexdig[(wchar >> 4) & 0xf];
350
- scratch[5] = hexdig[wchar & 0xf];
351
- fbuffer_append(out_buffer, scratch, 6);
352
- } else {
353
- uint16_t hi, lo;
354
- wchar -= 0x10000;
355
- hi = 0xD800 + (uint16_t)(wchar >> 10);
356
- lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
357
-
358
- scratch[2] = hexdig[hi >> 12];
359
- scratch[3] = hexdig[(hi >> 8) & 0xf];
360
- scratch[4] = hexdig[(hi >> 4) & 0xf];
361
- scratch[5] = hexdig[hi & 0xf];
362
-
363
- scratch[8] = hexdig[lo >> 12];
364
- scratch[9] = hexdig[(lo >> 8) & 0xf];
365
- scratch[10] = hexdig[(lo >> 4) & 0xf];
366
- scratch[11] = hexdig[lo & 0xf];
367
-
368
- fbuffer_append(out_buffer, scratch, 12);
369
- }
609
+ if (RB_UNLIKELY(ch_len)) {
610
+ search_flush(search);
611
+ return ch_len & CHAR_LENGTH_MASK;
612
+ } else {
613
+ search->ptr++;
614
+ }
615
+ }
616
+ search_flush(search);
617
+ return 0;
618
+ }
370
619
 
620
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
621
+ {
622
+ const unsigned char ch = (unsigned char)*search->ptr;
623
+ switch (ch_len) {
624
+ case 1: {
625
+ switch (ch) {
626
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
627
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
628
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
629
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
630
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
631
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
632
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
633
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
634
+ default: {
635
+ const char *hexdig = "0123456789abcdef";
636
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
637
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
638
+ scratch[5] = hexdig[ch & 0xf];
639
+ fbuffer_append(search->buffer, scratch, 6);
371
640
  break;
372
641
  }
373
642
  }
374
- } else {
375
- pos++;
643
+ break;
376
644
  }
377
- }
378
- #undef FLUSH_POS
645
+ default: {
646
+ const char *hexdig = "0123456789abcdef";
647
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
648
+
649
+ uint32_t wchar = 0;
650
+
651
+ switch (ch_len) {
652
+ case 2:
653
+ wchar = ch & 0x1F;
654
+ break;
655
+ case 3:
656
+ wchar = ch & 0x0F;
657
+ break;
658
+ case 4:
659
+ wchar = ch & 0x07;
660
+ break;
661
+ }
662
+
663
+ for (short i = 1; i < ch_len; i++) {
664
+ wchar = (wchar << 6) | (search->ptr[i] & 0x3F);
665
+ }
666
+
667
+ if (wchar <= 0xFFFF) {
668
+ scratch[2] = hexdig[wchar >> 12];
669
+ scratch[3] = hexdig[(wchar >> 8) & 0xf];
670
+ scratch[4] = hexdig[(wchar >> 4) & 0xf];
671
+ scratch[5] = hexdig[wchar & 0xf];
672
+ fbuffer_append(search->buffer, scratch, 6);
673
+ } else {
674
+ uint16_t hi, lo;
675
+ wchar -= 0x10000;
676
+ hi = 0xD800 + (uint16_t)(wchar >> 10);
677
+ lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
678
+
679
+ scratch[2] = hexdig[hi >> 12];
680
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
681
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
682
+ scratch[5] = hexdig[hi & 0xf];
683
+
684
+ scratch[8] = hexdig[lo >> 12];
685
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
686
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
687
+ scratch[11] = hexdig[lo & 0xf];
688
+
689
+ fbuffer_append(search->buffer, scratch, 12);
690
+ }
379
691
 
380
- if (beg < len) {
381
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
692
+ break;
693
+ }
382
694
  }
695
+ search->cursor = (search->ptr += ch_len);
696
+ }
383
697
 
384
- RB_GC_GUARD(str);
698
+ static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256])
699
+ {
700
+ unsigned char ch_len;
701
+ while ((ch_len = search_ascii_only_escape(search, escape_table))) {
702
+ full_escape_UTF8_char(search, ch_len);
703
+ }
385
704
  }
386
705
 
387
706
  /*
@@ -489,7 +808,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
489
808
  * _state_ is a JSON::State object, that can also be used to configure the
490
809
  * produced JSON string output further.
491
810
  */
492
- static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
811
+ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
812
+ {
493
813
  rb_check_arity(argc, 0, 1);
494
814
  VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
495
815
  return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
@@ -546,17 +866,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
546
866
  return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
547
867
  }
548
868
 
549
- /*
550
- * call-seq: String.included(modul)
551
- *
552
- * Extends _modul_ with the String::Extend module.
553
- */
554
- static VALUE mString_included_s(VALUE self, VALUE modul) {
555
- VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
556
- rb_call_super(1, &modul);
557
- return result;
558
- }
559
-
560
869
  /*
561
870
  * call-seq: to_json(*)
562
871
  *
@@ -571,51 +880,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
571
880
  return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
572
881
  }
573
882
 
574
- /*
575
- * call-seq: to_json_raw_object()
576
- *
577
- * This method creates a raw object hash, that can be nested into
578
- * other data structures and will be generated as a raw string. This
579
- * method should be used, if you want to convert raw strings to JSON
580
- * instead of UTF-8 strings, e. g. binary data.
581
- */
582
- static VALUE mString_to_json_raw_object(VALUE self)
583
- {
584
- VALUE ary;
585
- VALUE result = rb_hash_new();
586
- rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
587
- ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
588
- rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
589
- return result;
590
- }
591
-
592
- /*
593
- * call-seq: to_json_raw(*args)
594
- *
595
- * This method creates a JSON text from the result of a call to
596
- * to_json_raw_object of this String.
597
- */
598
- static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
599
- {
600
- VALUE obj = mString_to_json_raw_object(self);
601
- Check_Type(obj, T_HASH);
602
- return mHash_to_json(argc, argv, obj);
603
- }
604
-
605
- /*
606
- * call-seq: json_create(o)
607
- *
608
- * Raw Strings are JSON Objects (the raw bytes are stored in an array for the
609
- * key "raw"). The Ruby String can be created by this module method.
610
- */
611
- static VALUE mString_Extend_json_create(VALUE self, VALUE o)
612
- {
613
- VALUE ary;
614
- Check_Type(o, T_HASH);
615
- ary = rb_hash_aref(o, rb_str_new2("raw"));
616
- return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
617
- }
618
-
619
883
  /*
620
884
  * call-seq: to_json(*)
621
885
  *
@@ -674,6 +938,7 @@ static void State_mark(void *ptr)
674
938
  rb_gc_mark_movable(state->space_before);
675
939
  rb_gc_mark_movable(state->object_nl);
676
940
  rb_gc_mark_movable(state->array_nl);
941
+ rb_gc_mark_movable(state->as_json);
677
942
  }
678
943
 
679
944
  static void State_compact(void *ptr)
@@ -684,6 +949,7 @@ static void State_compact(void *ptr)
684
949
  state->space_before = rb_gc_location(state->space_before);
685
950
  state->object_nl = rb_gc_location(state->object_nl);
686
951
  state->array_nl = rb_gc_location(state->array_nl);
952
+ state->as_json = rb_gc_location(state->as_json);
687
953
  }
688
954
 
689
955
  static void State_free(void *ptr)
@@ -697,11 +963,6 @@ static size_t State_memsize(const void *ptr)
697
963
  return sizeof(JSON_Generator_State);
698
964
  }
699
965
 
700
- #ifndef HAVE_RB_EXT_RACTOR_SAFE
701
- # undef RUBY_TYPED_FROZEN_SHAREABLE
702
- # define RUBY_TYPED_FROZEN_SHAREABLE 0
703
- #endif
704
-
705
966
  static const rb_data_type_t JSON_Generator_State_type = {
706
967
  "JSON/Generator/State",
707
968
  {
@@ -740,21 +1001,182 @@ static void vstate_spill(struct generate_json_data *data)
740
1001
  RB_OBJ_WRITTEN(vstate, Qundef, state->space_before);
741
1002
  RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl);
742
1003
  RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl);
1004
+ RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
743
1005
  }
744
1006
 
745
- static inline VALUE vstate_get(struct generate_json_data *data)
1007
+ static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj)
746
1008
  {
747
1009
  if (RB_UNLIKELY(!data->vstate)) {
748
1010
  vstate_spill(data);
749
1011
  }
750
- return data->vstate;
1012
+ GET_STATE(data->vstate);
1013
+ state->depth = data->depth;
1014
+ VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate);
1015
+ // no need to restore state->depth, vstate is just a temporary State
1016
+ return tmp;
1017
+ }
1018
+
1019
+ static VALUE
1020
+ json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
1021
+ {
1022
+ VALUE proc_args[2] = {object, is_key};
1023
+ return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
1024
+ }
1025
+
1026
+ static VALUE
1027
+ convert_string_subclass(VALUE key)
1028
+ {
1029
+ VALUE key_to_s = rb_funcall(key, i_to_s, 0);
1030
+
1031
+ if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
1032
+ VALUE cname = rb_obj_class(key);
1033
+ rb_raise(rb_eTypeError,
1034
+ "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
1035
+ cname, "String", cname, "to_s", rb_obj_class(key_to_s));
1036
+ }
1037
+
1038
+ return key_to_s;
1039
+ }
1040
+
1041
+ static bool enc_utf8_compatible_p(int enc_idx)
1042
+ {
1043
+ if (enc_idx == usascii_encindex) return true;
1044
+ if (enc_idx == utf8_encindex) return true;
1045
+ return false;
1046
+ }
1047
+
1048
+ static VALUE encode_json_string_try(VALUE str)
1049
+ {
1050
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1051
+ }
1052
+
1053
+ static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1054
+ {
1055
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1056
+ return Qundef;
1057
+ }
1058
+
1059
+ static inline bool valid_json_string_p(VALUE str)
1060
+ {
1061
+ int coderange = rb_enc_str_coderange(str);
1062
+
1063
+ if (RB_LIKELY(coderange == ENC_CODERANGE_7BIT)) {
1064
+ return true;
1065
+ }
1066
+
1067
+ if (RB_LIKELY(coderange == ENC_CODERANGE_VALID)) {
1068
+ return enc_utf8_compatible_p(RB_ENCODING_GET_INLINED(str));
1069
+ }
1070
+
1071
+ return false;
1072
+ }
1073
+
1074
+ static inline VALUE ensure_valid_encoding(struct generate_json_data *data, VALUE str, bool as_json_called, bool is_key)
1075
+ {
1076
+ if (RB_LIKELY(valid_json_string_p(str))) {
1077
+ return str;
1078
+ }
1079
+
1080
+ if (!as_json_called && data->state->strict && RTEST(data->state->as_json)) {
1081
+ VALUE coerced_str = json_call_as_json(data->state, str, Qfalse);
1082
+ if (coerced_str != str) {
1083
+ if (RB_TYPE_P(coerced_str, T_STRING)) {
1084
+ if (!valid_json_string_p(coerced_str)) {
1085
+ raise_generator_error(str, "source sequence is illegal/malformed utf-8");
1086
+ }
1087
+ } else {
1088
+ // as_json could return another type than T_STRING
1089
+ if (is_key) {
1090
+ raise_generator_error(coerced_str, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(coerced_str));
1091
+ }
1092
+ }
1093
+
1094
+ return coerced_str;
1095
+ }
1096
+ }
1097
+
1098
+ if (RB_ENCODING_GET_INLINED(str) == binary_encindex) {
1099
+ VALUE utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1100
+ switch (rb_enc_str_coderange(utf8_string)) {
1101
+ case ENC_CODERANGE_7BIT:
1102
+ return utf8_string;
1103
+ case ENC_CODERANGE_VALID:
1104
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1105
+ // TODO: Raise in 3.0.0
1106
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1107
+ return utf8_string;
1108
+ break;
1109
+ }
1110
+ }
1111
+
1112
+ return rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1113
+ }
1114
+
1115
+ static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1116
+ {
1117
+ fbuffer_append_char(buffer, '"');
1118
+
1119
+ long len;
1120
+ search_state search;
1121
+ search.buffer = buffer;
1122
+ RSTRING_GETMEM(obj, search.ptr, len);
1123
+ search.cursor = search.ptr;
1124
+ search.end = search.ptr + len;
1125
+
1126
+ #ifdef HAVE_SIMD
1127
+ search.matches_mask = 0;
1128
+ search.has_matches = false;
1129
+ search.chunk_base = NULL;
1130
+ search.chunk_end = NULL;
1131
+ #endif /* HAVE_SIMD */
1132
+
1133
+ switch (rb_enc_str_coderange(obj)) {
1134
+ case ENC_CODERANGE_7BIT:
1135
+ case ENC_CODERANGE_VALID:
1136
+ if (RB_UNLIKELY(data->state->ascii_only)) {
1137
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1138
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
1139
+ convert_UTF8_to_script_safe_JSON(&search);
1140
+ } else {
1141
+ convert_UTF8_to_JSON(&search);
1142
+ }
1143
+ break;
1144
+ default:
1145
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1146
+ break;
1147
+ }
1148
+ fbuffer_append_char(buffer, '"');
1149
+ }
1150
+
1151
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1152
+ {
1153
+ obj = ensure_valid_encoding(data, obj, false, false);
1154
+ raw_generate_json_string(buffer, data, obj);
751
1155
  }
752
1156
 
753
1157
  struct hash_foreach_arg {
1158
+ VALUE hash;
754
1159
  struct generate_json_data *data;
755
- int iter;
1160
+ int first_key_type;
1161
+ bool first;
1162
+ bool mixed_keys_encountered;
756
1163
  };
757
1164
 
1165
+ NOINLINE(static) void
1166
+ json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
1167
+ {
1168
+ if (arg->mixed_keys_encountered) {
1169
+ return;
1170
+ }
1171
+ arg->mixed_keys_encountered = true;
1172
+
1173
+ JSON_Generator_State *state = arg->data->state;
1174
+ if (state->on_duplicate_key != JSON_IGNORE) {
1175
+ VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
1176
+ rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
1177
+ }
1178
+ }
1179
+
758
1180
  static int
759
1181
  json_object_i(VALUE key, VALUE val, VALUE _arg)
760
1182
  {
@@ -764,256 +1186,267 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
764
1186
  FBuffer *buffer = data->buffer;
765
1187
  JSON_Generator_State *state = data->state;
766
1188
 
767
- long depth = state->depth;
768
- int j;
1189
+ long depth = data->depth;
1190
+ int key_type = rb_type(key);
769
1191
 
770
- if (arg->iter > 0) fbuffer_append_char(buffer, ',');
771
- if (RB_UNLIKELY(state->object_nl)) {
772
- fbuffer_append_str(buffer, state->object_nl);
1192
+ if (arg->first) {
1193
+ arg->first = false;
1194
+ arg->first_key_type = key_type;
773
1195
  }
774
- if (RB_UNLIKELY(state->indent)) {
775
- for (j = 0; j < depth; j++) {
776
- fbuffer_append_str(buffer, state->indent);
777
- }
1196
+ else {
1197
+ fbuffer_append_char(buffer, ',');
1198
+ }
1199
+
1200
+ if (RB_UNLIKELY(data->state->object_nl)) {
1201
+ fbuffer_append_str(buffer, data->state->object_nl);
1202
+ }
1203
+ if (RB_UNLIKELY(data->state->indent)) {
1204
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
778
1205
  }
779
1206
 
780
1207
  VALUE key_to_s;
781
- switch(rb_type(key)) {
1208
+ bool as_json_called = false;
1209
+
1210
+ start:
1211
+ switch (key_type) {
782
1212
  case T_STRING:
1213
+ if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
1214
+ json_inspect_hash_with_mixed_keys(arg);
1215
+ }
1216
+
783
1217
  if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
784
1218
  key_to_s = key;
785
1219
  } else {
786
- key_to_s = rb_funcall(key, i_to_s, 0);
1220
+ key_to_s = convert_string_subclass(key);
787
1221
  }
788
1222
  break;
789
1223
  case T_SYMBOL:
1224
+ if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
1225
+ json_inspect_hash_with_mixed_keys(arg);
1226
+ }
1227
+
790
1228
  key_to_s = rb_sym2str(key);
791
1229
  break;
792
1230
  default:
1231
+ if (data->state->strict) {
1232
+ if (RTEST(data->state->as_json) && !as_json_called) {
1233
+ key = json_call_as_json(data->state, key, Qtrue);
1234
+ key_type = rb_type(key);
1235
+ as_json_called = true;
1236
+ goto start;
1237
+ } else {
1238
+ raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
1239
+ }
1240
+ }
793
1241
  key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
794
1242
  break;
795
1243
  }
796
1244
 
1245
+ key_to_s = ensure_valid_encoding(data, key_to_s, as_json_called, true);
1246
+
797
1247
  if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
798
- generate_json_string(buffer, data, state, key_to_s);
1248
+ raw_generate_json_string(buffer, data, key_to_s);
799
1249
  } else {
800
- generate_json(buffer, data, state, key_to_s);
1250
+ generate_json(buffer, data, key_to_s);
801
1251
  }
802
- if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before);
1252
+ if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before);
803
1253
  fbuffer_append_char(buffer, ':');
804
- if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space);
805
- generate_json(buffer, data, state, val);
1254
+ if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
1255
+ generate_json(buffer, data, val);
806
1256
 
807
- arg->iter++;
808
1257
  return ST_CONTINUE;
809
1258
  }
810
1259
 
811
- static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1260
+ static inline long increase_depth(struct generate_json_data *data)
812
1261
  {
813
- long max_nesting = state->max_nesting;
814
- long depth = ++state->depth;
815
- int j;
816
-
817
- if (max_nesting != 0 && depth > max_nesting) {
818
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1262
+ JSON_Generator_State *state = data->state;
1263
+ long depth = ++data->depth;
1264
+ if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
1265
+ rb_raise(eNestingError, "nesting of %ld is too deep. Did you try to serialize objects with circular references?", --data->depth);
819
1266
  }
1267
+ return depth;
1268
+ }
1269
+
1270
+ static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1271
+ {
1272
+ long depth = increase_depth(data);
820
1273
 
821
1274
  if (RHASH_SIZE(obj) == 0) {
822
1275
  fbuffer_append(buffer, "{}", 2);
823
- --state->depth;
1276
+ --data->depth;
824
1277
  return;
825
1278
  }
826
1279
 
827
1280
  fbuffer_append_char(buffer, '{');
828
1281
 
829
1282
  struct hash_foreach_arg arg = {
1283
+ .hash = obj,
830
1284
  .data = data,
831
- .iter = 0,
1285
+ .first = true,
832
1286
  };
833
1287
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
834
1288
 
835
- depth = --state->depth;
836
- if (RB_UNLIKELY(state->object_nl)) {
837
- fbuffer_append_str(buffer, state->object_nl);
838
- if (RB_UNLIKELY(state->indent)) {
839
- for (j = 0; j < depth; j++) {
840
- fbuffer_append_str(buffer, state->indent);
841
- }
1289
+ depth = --data->depth;
1290
+ if (RB_UNLIKELY(data->state->object_nl)) {
1291
+ fbuffer_append_str(buffer, data->state->object_nl);
1292
+ if (RB_UNLIKELY(data->state->indent)) {
1293
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
842
1294
  }
843
1295
  }
844
1296
  fbuffer_append_char(buffer, '}');
845
1297
  }
846
1298
 
847
- static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1299
+ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
848
1300
  {
849
- long max_nesting = state->max_nesting;
850
- long depth = ++state->depth;
851
- int i, j;
852
- if (max_nesting != 0 && depth > max_nesting) {
853
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
854
- }
1301
+ long depth = increase_depth(data);
855
1302
 
856
1303
  if (RARRAY_LEN(obj) == 0) {
857
1304
  fbuffer_append(buffer, "[]", 2);
858
- --state->depth;
1305
+ --data->depth;
859
1306
  return;
860
1307
  }
861
1308
 
862
1309
  fbuffer_append_char(buffer, '[');
863
- if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl);
864
- for(i = 0; i < RARRAY_LEN(obj); i++) {
1310
+ if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1311
+ for (int i = 0; i < RARRAY_LEN(obj); i++) {
865
1312
  if (i > 0) {
866
1313
  fbuffer_append_char(buffer, ',');
867
- if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl);
1314
+ if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
868
1315
  }
869
- if (RB_UNLIKELY(state->indent)) {
870
- for (j = 0; j < depth; j++) {
871
- fbuffer_append_str(buffer, state->indent);
872
- }
1316
+ if (RB_UNLIKELY(data->state->indent)) {
1317
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
873
1318
  }
874
- generate_json(buffer, data, state, RARRAY_AREF(obj, i));
1319
+ generate_json(buffer, data, RARRAY_AREF(obj, i));
875
1320
  }
876
- state->depth = --depth;
877
- if (RB_UNLIKELY(state->array_nl)) {
878
- fbuffer_append_str(buffer, state->array_nl);
879
- if (RB_UNLIKELY(state->indent)) {
880
- for (j = 0; j < depth; j++) {
881
- fbuffer_append_str(buffer, state->indent);
882
- }
1321
+ data->depth = --depth;
1322
+ if (RB_UNLIKELY(data->state->array_nl)) {
1323
+ fbuffer_append_str(buffer, data->state->array_nl);
1324
+ if (RB_UNLIKELY(data->state->indent)) {
1325
+ fbuffer_append_str_repeat(buffer, data->state->indent, depth);
883
1326
  }
884
1327
  }
885
1328
  fbuffer_append_char(buffer, ']');
886
1329
  }
887
1330
 
888
- static inline int enc_utf8_compatible_p(int enc_idx)
1331
+ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
889
1332
  {
890
- if (enc_idx == usascii_encindex) return 1;
891
- if (enc_idx == utf8_encindex) return 1;
892
- return 0;
893
- }
894
-
895
- static VALUE encode_json_string_try(VALUE str)
896
- {
897
- return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
898
- }
899
-
900
- static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
901
- {
902
- raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
903
- return Qundef;
904
- }
905
-
906
- static inline VALUE ensure_valid_encoding(VALUE str)
907
- {
908
- int encindex = RB_ENCODING_GET(str);
909
- VALUE utf8_string;
910
- if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
911
- if (encindex == binary_encindex) {
912
- utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
913
- switch (rb_enc_str_coderange(utf8_string)) {
914
- case ENC_CODERANGE_7BIT:
915
- return utf8_string;
916
- case ENC_CODERANGE_VALID:
917
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
918
- // TODO: Raise in 3.0.0
919
- rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
920
- return utf8_string;
921
- break;
922
- }
923
- }
924
-
925
- str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1333
+ VALUE tmp;
1334
+ if (rb_respond_to(obj, i_to_json)) {
1335
+ tmp = json_call_to_json(data, obj);
1336
+ Check_Type(tmp, T_STRING);
1337
+ fbuffer_append_str(buffer, tmp);
1338
+ } else {
1339
+ tmp = rb_funcall(obj, i_to_s, 0);
1340
+ Check_Type(tmp, T_STRING);
1341
+ generate_json_string(buffer, data, tmp);
926
1342
  }
927
- return str;
928
1343
  }
929
1344
 
930
- static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1345
+ static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
931
1346
  {
932
- obj = ensure_valid_encoding(obj);
933
-
934
- fbuffer_append_char(buffer, '"');
935
-
936
- switch(rb_enc_str_coderange(obj)) {
937
- case ENC_CODERANGE_7BIT:
938
- convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table);
939
- break;
940
- case ENC_CODERANGE_VALID:
941
- if (RB_UNLIKELY(state->ascii_only)) {
942
- convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
943
- } else {
944
- convert_UTF8_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
945
- }
946
- break;
947
- default:
948
- raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
949
- break;
1347
+ if (data->state->strict) {
1348
+ generate_json_string(buffer, data, rb_sym2str(obj));
1349
+ } else {
1350
+ generate_json_fallback(buffer, data, obj);
950
1351
  }
951
- fbuffer_append_char(buffer, '"');
952
1352
  }
953
1353
 
954
- static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1354
+ static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
955
1355
  {
956
1356
  fbuffer_append(buffer, "null", 4);
957
1357
  }
958
1358
 
959
- static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1359
+ static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
960
1360
  {
961
1361
  fbuffer_append(buffer, "false", 5);
962
1362
  }
963
1363
 
964
- static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1364
+ static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
965
1365
  {
966
1366
  fbuffer_append(buffer, "true", 4);
967
1367
  }
968
1368
 
969
- static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1369
+ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
970
1370
  {
971
1371
  fbuffer_append_long(buffer, FIX2LONG(obj));
972
1372
  }
973
1373
 
974
- static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1374
+ static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
975
1375
  {
976
1376
  VALUE tmp = rb_funcall(obj, i_to_s, 0);
977
- fbuffer_append_str(buffer, tmp);
1377
+ fbuffer_append_str(buffer, StringValue(tmp));
978
1378
  }
979
1379
 
980
1380
  #ifdef RUBY_INTEGER_UNIFICATION
981
- static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1381
+ static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
982
1382
  {
983
1383
  if (FIXNUM_P(obj))
984
- generate_json_fixnum(buffer, data, state, obj);
1384
+ generate_json_fixnum(buffer, data, obj);
985
1385
  else
986
- generate_json_bignum(buffer, data, state, obj);
1386
+ generate_json_bignum(buffer, data, obj);
987
1387
  }
988
1388
  #endif
989
1389
 
990
- static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1390
+ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
991
1391
  {
992
1392
  double value = RFLOAT_VALUE(obj);
993
- char allow_nan = state->allow_nan;
994
- VALUE tmp = rb_funcall(obj, i_to_s, 0);
995
- if (!allow_nan) {
996
- if (isinf(value) || isnan(value)) {
997
- raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", tmp);
1393
+ char allow_nan = data->state->allow_nan;
1394
+ if (isinf(value) || isnan(value)) {
1395
+ /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
1396
+ if (!allow_nan) {
1397
+ if (data->state->strict && data->state->as_json) {
1398
+ VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
1399
+ if (casted_obj != obj) {
1400
+ increase_depth(data);
1401
+ generate_json(buffer, data, casted_obj);
1402
+ data->depth--;
1403
+ return;
1404
+ }
1405
+ }
1406
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
998
1407
  }
1408
+
1409
+ VALUE tmp = rb_funcall(obj, i_to_s, 0);
1410
+ fbuffer_append_str(buffer, tmp);
1411
+ return;
999
1412
  }
1000
- fbuffer_append_str(buffer, tmp);
1413
+
1414
+ /* This implementation writes directly into the buffer. We reserve
1415
+ * the 32 characters that fpconv_dtoa states as its maximum.
1416
+ */
1417
+ fbuffer_inc_capa(buffer, 32);
1418
+ char* d = buffer->ptr + buffer->len;
1419
+ int len = fpconv_dtoa(value, d);
1420
+ /* fpconv_dtoa converts a float to its shortest string representation,
1421
+ * but it adds a ".0" if this is a plain integer.
1422
+ */
1423
+ fbuffer_consumed(buffer, len);
1001
1424
  }
1002
1425
 
1003
- static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
1426
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1004
1427
  {
1005
- VALUE tmp;
1428
+ VALUE fragment = RSTRUCT_GET(obj, 0);
1429
+ Check_Type(fragment, T_STRING);
1430
+ fbuffer_append_str(buffer, fragment);
1431
+ }
1432
+
1433
+ static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1434
+ {
1435
+ bool as_json_called = false;
1436
+ start:
1006
1437
  if (obj == Qnil) {
1007
- generate_json_null(buffer, data, state, obj);
1438
+ generate_json_null(buffer, data, obj);
1008
1439
  } else if (obj == Qfalse) {
1009
- generate_json_false(buffer, data, state, obj);
1440
+ generate_json_false(buffer, data, obj);
1010
1441
  } else if (obj == Qtrue) {
1011
- generate_json_true(buffer, data, state, obj);
1442
+ generate_json_true(buffer, data, obj);
1012
1443
  } else if (RB_SPECIAL_CONST_P(obj)) {
1013
1444
  if (RB_FIXNUM_P(obj)) {
1014
- generate_json_fixnum(buffer, data, state, obj);
1445
+ generate_json_fixnum(buffer, data, obj);
1015
1446
  } else if (RB_FLONUM_P(obj)) {
1016
- generate_json_float(buffer, data, state, obj);
1447
+ generate_json_float(buffer, data, obj);
1448
+ } else if (RB_STATIC_SYM_P(obj)) {
1449
+ generate_json_symbol(buffer, data, obj);
1017
1450
  } else {
1018
1451
  goto general;
1019
1452
  }
@@ -1021,36 +1454,52 @@ static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON
1021
1454
  VALUE klass = RBASIC_CLASS(obj);
1022
1455
  switch (RB_BUILTIN_TYPE(obj)) {
1023
1456
  case T_BIGNUM:
1024
- generate_json_bignum(buffer, data, state, obj);
1457
+ generate_json_bignum(buffer, data, obj);
1025
1458
  break;
1026
1459
  case T_HASH:
1027
1460
  if (klass != rb_cHash) goto general;
1028
- generate_json_object(buffer, data, state, obj);
1461
+ generate_json_object(buffer, data, obj);
1029
1462
  break;
1030
1463
  case T_ARRAY:
1031
1464
  if (klass != rb_cArray) goto general;
1032
- generate_json_array(buffer, data, state, obj);
1465
+ generate_json_array(buffer, data, obj);
1033
1466
  break;
1034
1467
  case T_STRING:
1035
1468
  if (klass != rb_cString) goto general;
1036
- generate_json_string(buffer, data, state, obj);
1469
+
1470
+ if (RB_LIKELY(valid_json_string_p(obj))) {
1471
+ raw_generate_json_string(buffer, data, obj);
1472
+ } else if (as_json_called) {
1473
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1474
+ } else {
1475
+ obj = ensure_valid_encoding(data, obj, false, false);
1476
+ as_json_called = true;
1477
+ goto start;
1478
+ }
1479
+ break;
1480
+ case T_SYMBOL:
1481
+ generate_json_symbol(buffer, data, obj);
1037
1482
  break;
1038
1483
  case T_FLOAT:
1039
1484
  if (klass != rb_cFloat) goto general;
1040
- generate_json_float(buffer, data, state, obj);
1485
+ generate_json_float(buffer, data, obj);
1486
+ break;
1487
+ case T_STRUCT:
1488
+ if (klass != cFragment) goto general;
1489
+ generate_json_fragment(buffer, data, obj);
1041
1490
  break;
1042
1491
  default:
1043
1492
  general:
1044
- if (state->strict) {
1045
- raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
1046
- } else if (rb_respond_to(obj, i_to_json)) {
1047
- tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
1048
- Check_Type(tmp, T_STRING);
1049
- fbuffer_append_str(buffer, tmp);
1493
+ if (data->state->strict) {
1494
+ if (RTEST(data->state->as_json) && !as_json_called) {
1495
+ obj = json_call_as_json(data->state, obj, Qfalse);
1496
+ as_json_called = true;
1497
+ goto start;
1498
+ } else {
1499
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
1500
+ }
1050
1501
  } else {
1051
- tmp = rb_funcall(obj, i_to_s, 0);
1052
- Check_Type(tmp, T_STRING);
1053
- generate_json_string(buffer, data, state, tmp);
1502
+ generate_json_fallback(buffer, data, obj);
1054
1503
  }
1055
1504
  }
1056
1505
  }
@@ -1060,18 +1509,16 @@ static VALUE generate_json_try(VALUE d)
1060
1509
  {
1061
1510
  struct generate_json_data *data = (struct generate_json_data *)d;
1062
1511
 
1063
- data->func(data->buffer, data, data->state, data->obj);
1512
+ data->func(data->buffer, data, data->obj);
1064
1513
 
1065
- return Qnil;
1514
+ return fbuffer_finalize(data->buffer);
1066
1515
  }
1067
1516
 
1068
- static VALUE generate_json_rescue(VALUE d, VALUE exc)
1517
+ static VALUE generate_json_ensure(VALUE d)
1069
1518
  {
1070
1519
  struct generate_json_data *data = (struct generate_json_data *)d;
1071
1520
  fbuffer_free(data->buffer);
1072
1521
 
1073
- rb_exc_raise(exc);
1074
-
1075
1522
  return Qundef;
1076
1523
  }
1077
1524
 
@@ -1087,22 +1534,31 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
1087
1534
 
1088
1535
  struct generate_json_data data = {
1089
1536
  .buffer = &buffer,
1090
- .vstate = self,
1537
+ .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json
1091
1538
  .state = state,
1539
+ .depth = state->depth,
1092
1540
  .obj = obj,
1093
1541
  .func = func
1094
1542
  };
1095
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1096
-
1097
- return fbuffer_finalize(&buffer);
1543
+ VALUE result = rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1544
+ RB_GC_GUARD(self);
1545
+ return result;
1098
1546
  }
1099
1547
 
1100
- static VALUE cState_generate(VALUE self, VALUE obj, VALUE io)
1548
+ /* call-seq:
1549
+ * generate(obj) -> String
1550
+ * generate(obj, anIO) -> anIO
1551
+ *
1552
+ * Generates a valid JSON document from object +obj+ and returns the
1553
+ * result. If no valid JSON document can be created this method raises a
1554
+ * GeneratorError exception.
1555
+ */
1556
+ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
1101
1557
  {
1102
- VALUE result = cState_partial_generate(self, obj, generate_json, io);
1103
- GET_STATE(self);
1104
- (void)state;
1105
- return result;
1558
+ rb_check_arity(argc, 1, 2);
1559
+ VALUE obj = argv[0];
1560
+ VALUE io = argc > 1 ? argv[1] : Qnil;
1561
+ return cState_partial_generate(self, obj, generate_json, io);
1106
1562
  }
1107
1563
 
1108
1564
  static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
@@ -1132,6 +1588,7 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
1132
1588
  objState->space_before = origState->space_before;
1133
1589
  objState->object_nl = origState->object_nl;
1134
1590
  objState->array_nl = origState->array_nl;
1591
+ objState->as_json = origState->as_json;
1135
1592
  return obj;
1136
1593
  }
1137
1594
 
@@ -1182,6 +1639,7 @@ static VALUE string_config(VALUE config)
1182
1639
  */
1183
1640
  static VALUE cState_indent_set(VALUE self, VALUE indent)
1184
1641
  {
1642
+ rb_check_frozen(self);
1185
1643
  GET_STATE(self);
1186
1644
  RB_OBJ_WRITE(self, &state->indent, string_config(indent));
1187
1645
  return Qnil;
@@ -1207,6 +1665,7 @@ static VALUE cState_space(VALUE self)
1207
1665
  */
1208
1666
  static VALUE cState_space_set(VALUE self, VALUE space)
1209
1667
  {
1668
+ rb_check_frozen(self);
1210
1669
  GET_STATE(self);
1211
1670
  RB_OBJ_WRITE(self, &state->space, string_config(space));
1212
1671
  return Qnil;
@@ -1230,6 +1689,7 @@ static VALUE cState_space_before(VALUE self)
1230
1689
  */
1231
1690
  static VALUE cState_space_before_set(VALUE self, VALUE space_before)
1232
1691
  {
1692
+ rb_check_frozen(self);
1233
1693
  GET_STATE(self);
1234
1694
  RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
1235
1695
  return Qnil;
@@ -1255,6 +1715,7 @@ static VALUE cState_object_nl(VALUE self)
1255
1715
  */
1256
1716
  static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
1257
1717
  {
1718
+ rb_check_frozen(self);
1258
1719
  GET_STATE(self);
1259
1720
  RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
1260
1721
  return Qnil;
@@ -1278,11 +1739,35 @@ static VALUE cState_array_nl(VALUE self)
1278
1739
  */
1279
1740
  static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
1280
1741
  {
1742
+ rb_check_frozen(self);
1281
1743
  GET_STATE(self);
1282
1744
  RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
1283
1745
  return Qnil;
1284
1746
  }
1285
1747
 
1748
+ /*
1749
+ * call-seq: as_json()
1750
+ *
1751
+ * This string is put at the end of a line that holds a JSON array.
1752
+ */
1753
+ static VALUE cState_as_json(VALUE self)
1754
+ {
1755
+ GET_STATE(self);
1756
+ return state->as_json;
1757
+ }
1758
+
1759
+ /*
1760
+ * call-seq: as_json=(as_json)
1761
+ *
1762
+ * This string is put at the end of a line that holds a JSON array.
1763
+ */
1764
+ static VALUE cState_as_json_set(VALUE self, VALUE as_json)
1765
+ {
1766
+ rb_check_frozen(self);
1767
+ GET_STATE(self);
1768
+ RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
1769
+ return Qnil;
1770
+ }
1286
1771
 
1287
1772
  /*
1288
1773
  * call-seq: check_circular?
@@ -1321,6 +1806,7 @@ static long long_config(VALUE num)
1321
1806
  */
1322
1807
  static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1323
1808
  {
1809
+ rb_check_frozen(self);
1324
1810
  GET_STATE(self);
1325
1811
  state->max_nesting = long_config(depth);
1326
1812
  return Qnil;
@@ -1346,6 +1832,7 @@ static VALUE cState_script_safe(VALUE self)
1346
1832
  */
1347
1833
  static VALUE cState_script_safe_set(VALUE self, VALUE enable)
1348
1834
  {
1835
+ rb_check_frozen(self);
1349
1836
  GET_STATE(self);
1350
1837
  state->script_safe = RTEST(enable);
1351
1838
  return Qnil;
@@ -1377,6 +1864,7 @@ static VALUE cState_strict(VALUE self)
1377
1864
  */
1378
1865
  static VALUE cState_strict_set(VALUE self, VALUE enable)
1379
1866
  {
1867
+ rb_check_frozen(self);
1380
1868
  GET_STATE(self);
1381
1869
  state->strict = RTEST(enable);
1382
1870
  return Qnil;
@@ -1401,6 +1889,7 @@ static VALUE cState_allow_nan_p(VALUE self)
1401
1889
  */
1402
1890
  static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
1403
1891
  {
1892
+ rb_check_frozen(self);
1404
1893
  GET_STATE(self);
1405
1894
  state->allow_nan = RTEST(enable);
1406
1895
  return Qnil;
@@ -1425,11 +1914,25 @@ static VALUE cState_ascii_only_p(VALUE self)
1425
1914
  */
1426
1915
  static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1427
1916
  {
1917
+ rb_check_frozen(self);
1428
1918
  GET_STATE(self);
1429
1919
  state->ascii_only = RTEST(enable);
1430
1920
  return Qnil;
1431
1921
  }
1432
1922
 
1923
+ static VALUE cState_allow_duplicate_key_p(VALUE self)
1924
+ {
1925
+ GET_STATE(self);
1926
+ switch (state->on_duplicate_key) {
1927
+ case JSON_IGNORE:
1928
+ return Qtrue;
1929
+ case JSON_DEPRECATED:
1930
+ return Qnil;
1931
+ default:
1932
+ return Qfalse;
1933
+ }
1934
+ }
1935
+
1433
1936
  /*
1434
1937
  * call-seq: depth
1435
1938
  *
@@ -1449,6 +1952,7 @@ static VALUE cState_depth(VALUE self)
1449
1952
  */
1450
1953
  static VALUE cState_depth_set(VALUE self, VALUE depth)
1451
1954
  {
1955
+ rb_check_frozen(self);
1452
1956
  GET_STATE(self);
1453
1957
  state->depth = long_config(depth);
1454
1958
  return Qnil;
@@ -1482,20 +1986,36 @@ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_
1482
1986
  */
1483
1987
  static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
1484
1988
  {
1989
+ rb_check_frozen(self);
1485
1990
  GET_STATE(self);
1486
1991
  buffer_initial_length_set(state, buffer_initial_length);
1487
1992
  return Qnil;
1488
1993
  }
1489
1994
 
1995
+ struct configure_state_data {
1996
+ JSON_Generator_State *state;
1997
+ VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
1998
+ };
1999
+
2000
+ static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
2001
+ {
2002
+ if (RTEST(data->vstate)) {
2003
+ RB_OBJ_WRITE(data->vstate, field, value);
2004
+ } else {
2005
+ *field = value;
2006
+ }
2007
+ }
2008
+
1490
2009
  static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1491
2010
  {
1492
- JSON_Generator_State *state = (JSON_Generator_State *)_arg;
2011
+ struct configure_state_data *data = (struct configure_state_data *)_arg;
2012
+ JSON_Generator_State *state = data->state;
1493
2013
 
1494
- if (key == sym_indent) { state->indent = string_config(val); }
1495
- else if (key == sym_space) { state->space = string_config(val); }
1496
- else if (key == sym_space_before) { state->space_before = string_config(val); }
1497
- else if (key == sym_object_nl) { state->object_nl = string_config(val); }
1498
- else if (key == sym_array_nl) { state->array_nl = string_config(val); }
2014
+ if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
2015
+ else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
2016
+ else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
2017
+ else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
2018
+ else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
1499
2019
  else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
1500
2020
  else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
1501
2021
  else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
@@ -1504,10 +2024,16 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1504
2024
  else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1505
2025
  else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1506
2026
  else if (key == sym_strict) { state->strict = RTEST(val); }
2027
+ else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
2028
+ else if (key == sym_as_json) {
2029
+ VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
2030
+ state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
2031
+ state_write_value(data, &state->as_json, proc);
2032
+ }
1507
2033
  return ST_CONTINUE;
1508
2034
  }
1509
2035
 
1510
- static void configure_state(JSON_Generator_State *state, VALUE config)
2036
+ static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
1511
2037
  {
1512
2038
  if (!RTEST(config)) return;
1513
2039
 
@@ -1515,15 +2041,21 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
1515
2041
 
1516
2042
  if (!RHASH_SIZE(config)) return;
1517
2043
 
2044
+ struct configure_state_data data = {
2045
+ .state = state,
2046
+ .vstate = vstate
2047
+ };
2048
+
1518
2049
  // We assume in most cases few keys are set so it's faster to go over
1519
2050
  // the provided keys than to check all possible keys.
1520
- rb_hash_foreach(config, configure_state_i, (VALUE)state);
2051
+ rb_hash_foreach(config, configure_state_i, (VALUE)&data);
1521
2052
  }
1522
2053
 
1523
2054
  static VALUE cState_configure(VALUE self, VALUE opts)
1524
2055
  {
2056
+ rb_check_frozen(self);
1525
2057
  GET_STATE(self);
1526
- configure_state(state, opts);
2058
+ configure_state(state, self, opts);
1527
2059
  return self;
1528
2060
  }
1529
2061
 
@@ -1531,7 +2063,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
1531
2063
  {
1532
2064
  JSON_Generator_State state = {0};
1533
2065
  state_init(&state);
1534
- configure_state(&state, opts);
2066
+ configure_state(&state, Qfalse, opts);
1535
2067
 
1536
2068
  char stack_buffer[FBUFFER_STACK_SIZE];
1537
2069
  FBuffer buffer = {
@@ -1543,12 +2075,11 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
1543
2075
  .buffer = &buffer,
1544
2076
  .vstate = Qfalse,
1545
2077
  .state = &state,
2078
+ .depth = state.depth,
1546
2079
  .obj = obj,
1547
2080
  .func = generate_json,
1548
2081
  };
1549
- rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1550
-
1551
- return fbuffer_finalize(&buffer);
2082
+ return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
1552
2083
  }
1553
2084
 
1554
2085
  /*
@@ -1564,6 +2095,10 @@ void Init_generator(void)
1564
2095
  rb_require("json/common");
1565
2096
 
1566
2097
  mJSON = rb_define_module("JSON");
2098
+
2099
+ rb_global_variable(&cFragment);
2100
+ cFragment = rb_const_get(mJSON, rb_intern("Fragment"));
2101
+
1567
2102
  VALUE mExt = rb_define_module_under(mJSON, "Ext");
1568
2103
  VALUE mGenerator = rb_define_module_under(mExt, "Generator");
1569
2104
 
@@ -1591,6 +2126,8 @@ void Init_generator(void)
1591
2126
  rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
1592
2127
  rb_define_method(cState, "array_nl", cState_array_nl, 0);
1593
2128
  rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
2129
+ rb_define_method(cState, "as_json", cState_as_json, 0);
2130
+ rb_define_method(cState, "as_json=", cState_as_json_set, 1);
1594
2131
  rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
1595
2132
  rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
1596
2133
  rb_define_method(cState, "script_safe", cState_script_safe, 0);
@@ -1611,7 +2148,9 @@ void Init_generator(void)
1611
2148
  rb_define_method(cState, "depth=", cState_depth_set, 1);
1612
2149
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
1613
2150
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
1614
- rb_define_private_method(cState, "_generate", cState_generate, 2);
2151
+ rb_define_method(cState, "generate", cState_generate, -1);
2152
+
2153
+ rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
1615
2154
 
1616
2155
  rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
1617
2156
 
@@ -1640,13 +2179,7 @@ void Init_generator(void)
1640
2179
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
1641
2180
 
1642
2181
  VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
1643
- rb_define_singleton_method(mString, "included", mString_included_s, 1);
1644
2182
  rb_define_method(mString, "to_json", mString_to_json, -1);
1645
- rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
1646
- rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
1647
-
1648
- mString_Extend = rb_define_module_under(mString, "Extend");
1649
- rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
1650
2183
 
1651
2184
  VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
1652
2185
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
@@ -1663,10 +2196,6 @@ void Init_generator(void)
1663
2196
  i_to_s = rb_intern("to_s");
1664
2197
  i_to_json = rb_intern("to_json");
1665
2198
  i_new = rb_intern("new");
1666
- i_pack = rb_intern("pack");
1667
- i_unpack = rb_intern("unpack");
1668
- i_create_id = rb_intern("create_id");
1669
- i_extend = rb_intern("extend");
1670
2199
  i_encode = rb_intern("encode");
1671
2200
 
1672
2201
  sym_indent = ID2SYM(rb_intern("indent"));
@@ -1682,10 +2211,14 @@ void Init_generator(void)
1682
2211
  sym_script_safe = ID2SYM(rb_intern("script_safe"));
1683
2212
  sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
1684
2213
  sym_strict = ID2SYM(rb_intern("strict"));
2214
+ sym_as_json = ID2SYM(rb_intern("as_json"));
2215
+ sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
1685
2216
 
1686
2217
  usascii_encindex = rb_usascii_encindex();
1687
2218
  utf8_encindex = rb_utf8_encindex();
1688
2219
  binary_encindex = rb_ascii8bit_encindex();
1689
2220
 
1690
2221
  rb_require("json/ext/generator/state");
2222
+
2223
+ simd_impl = find_simd_implementation();
1691
2224
  }