json 2.7.3 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,205 @@
1
+ #include "ruby.h"
1
2
  #include "../fbuffer/fbuffer.h"
2
- #include "generator.h"
3
+ #include "../vendor/fpconv.c"
4
+
5
+ #include <math.h>
6
+ #include <ctype.h>
7
+
8
+ #include "simd.h"
9
+
10
+ /* ruby api and some helpers */
11
+
12
+ typedef struct JSON_Generator_StateStruct {
13
+ VALUE indent;
14
+ VALUE space;
15
+ VALUE space_before;
16
+ VALUE object_nl;
17
+ VALUE array_nl;
18
+ VALUE as_json;
19
+
20
+ long max_nesting;
21
+ long depth;
22
+ long buffer_initial_length;
23
+
24
+ bool allow_nan;
25
+ bool ascii_only;
26
+ bool script_safe;
27
+ bool strict;
28
+ } JSON_Generator_State;
3
29
 
4
30
  #ifndef RB_UNLIKELY
5
31
  #define RB_UNLIKELY(cond) (cond)
6
32
  #endif
7
33
 
8
- static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
34
+ static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
9
35
 
10
36
  static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
38
+ sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
39
+
40
+
41
+ #define GET_STATE_TO(self, state) \
42
+ TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state)
43
+
44
+ #define GET_STATE(self) \
45
+ JSON_Generator_State *state; \
46
+ GET_STATE_TO(self, state)
47
+
48
+ struct generate_json_data;
49
+
50
+ typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
51
+
52
+ struct generate_json_data {
53
+ FBuffer *buffer;
54
+ VALUE vstate;
55
+ JSON_Generator_State *state;
56
+ VALUE obj;
57
+ generator_func func;
58
+ };
59
+
60
+ static VALUE cState_from_state_s(VALUE self, VALUE opts);
61
+ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
62
+ static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
63
+ static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
64
+ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
65
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
66
+ static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
67
+ static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
68
+ static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
69
+ #ifdef RUBY_INTEGER_UNIFICATION
70
+ static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
71
+ #endif
72
+ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
73
+ static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
74
+ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
75
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
76
+
77
+ static int usascii_encindex, utf8_encindex, binary_encindex;
78
+
79
+ #ifdef RBIMPL_ATTR_NORETURN
80
+ RBIMPL_ATTR_NORETURN()
81
+ #endif
82
+ static void raise_generator_error_str(VALUE invalid_object, VALUE str)
83
+ {
84
+ VALUE exc = rb_exc_new_str(eGeneratorError, str);
85
+ rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
86
+ rb_exc_raise(exc);
87
+ }
88
+
89
+ #ifdef RBIMPL_ATTR_NORETURN
90
+ RBIMPL_ATTR_NORETURN()
91
+ #endif
92
+ #ifdef RBIMPL_ATTR_FORMAT
93
+ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
94
+ #endif
95
+ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
+ {
97
+ va_list args;
98
+ va_start(args, fmt);
99
+ VALUE str = rb_vsprintf(fmt, args);
100
+ va_end(args);
101
+ raise_generator_error_str(invalid_object, str);
102
+ }
103
+
104
+ // 0 - single byte char that don't need to be escaped.
105
+ // (x | 8) - char that needs to be escaped.
106
+ static const unsigned char CHAR_LENGTH_MASK = 7;
107
+ static const unsigned char ESCAPE_MASK = 8;
108
+
109
+ typedef struct _search_state {
110
+ const char *ptr;
111
+ const char *end;
112
+ const char *cursor;
113
+ FBuffer *buffer;
114
+
115
+ #ifdef HAVE_SIMD
116
+ const char *chunk_base;
117
+ const char *chunk_end;
118
+ bool has_matches;
119
+
120
+ #if defined(HAVE_SIMD_NEON)
121
+ uint64_t matches_mask;
122
+ #elif defined(HAVE_SIMD_SSE2)
123
+ int matches_mask;
124
+ #else
125
+ #error "Unknown SIMD Implementation."
126
+ #endif /* HAVE_SIMD_NEON */
127
+ #endif /* HAVE_SIMD */
128
+ } search_state;
129
+
130
+ #if (defined(__GNUC__ ) || defined(__clang__))
131
+ #define FORCE_INLINE __attribute__((always_inline))
132
+ #else
133
+ #define FORCE_INLINE
134
+ #endif
135
+
136
+ static inline FORCE_INLINE void search_flush(search_state *search)
137
+ {
138
+ // Do not remove this conditional without profiling, specifically escape-heavy text.
139
+ // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
140
+ // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
141
+ // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
142
+ // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
143
+ // nothing needs to be flushed, we can save a few memory references with this conditional.
144
+ if (search->ptr > search->cursor) {
145
+ fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
146
+ search->cursor = search->ptr;
147
+ }
148
+ }
149
+
150
+ static const unsigned char escape_table_basic[256] = {
151
+ // ASCII Control Characters
152
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
153
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
154
+ // ASCII Characters
155
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
156
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
159
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
160
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
161
+ };
162
+
163
+ static unsigned char (*search_escape_basic_impl)(search_state *);
164
+
165
+ static inline unsigned char search_escape_basic(search_state *search)
166
+ {
167
+ while (search->ptr < search->end) {
168
+ if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) {
169
+ search_flush(search);
170
+ return 1;
171
+ } else {
172
+ search->ptr++;
173
+ }
174
+ }
175
+ search_flush(search);
176
+ return 0;
177
+ }
178
+
179
+ static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
180
+ {
181
+ const unsigned char ch = (unsigned char)*search->ptr;
182
+ switch (ch) {
183
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
184
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
185
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
186
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
187
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
188
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
189
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
190
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
191
+ default: {
192
+ const char *hexdig = "0123456789abcdef";
193
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
194
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
195
+ scratch[5] = hexdig[ch & 0xf];
196
+ fbuffer_append(search->buffer, scratch, 6);
197
+ break;
198
+ }
199
+ }
200
+ search->ptr++;
201
+ search->cursor = search->ptr;
202
+ }
11
203
 
12
204
  /* Converts in_string to a JSON string (without the wrapping '"'
13
205
  * characters) in FBuffer out_buffer.
@@ -19,296 +211,516 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_e
19
211
  *
20
212
  * - If out_ascii_only: non-ASCII characters (>0x7F)
21
213
  *
22
- * - If out_script_safe: forwardslash, line separator (U+2028), and
214
+ * - If script_safe: forwardslash (/), line separator (U+2028), and
23
215
  * paragraph separator (U+2029)
24
216
  *
25
217
  * Everything else (should be UTF-8) is just passed through and
26
218
  * appended to the result.
27
219
  */
28
- static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
220
+ static inline void convert_UTF8_to_JSON(search_state *search)
29
221
  {
30
- const char *hexdig = "0123456789abcdef";
31
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
222
+ while (search_escape_basic_impl(search)) {
223
+ escape_UTF8_char_basic(search);
224
+ }
225
+ }
32
226
 
33
- const char *ptr = RSTRING_PTR(str);
34
- unsigned long len = RSTRING_LEN(str);
227
+ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
228
+ {
229
+ const unsigned char ch = (unsigned char)*search->ptr;
230
+ switch (ch_len) {
231
+ case 1: {
232
+ switch (ch) {
233
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
234
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
235
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
236
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
237
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
238
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
239
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
240
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
241
+ default: {
242
+ const char *hexdig = "0123456789abcdef";
243
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
244
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
245
+ scratch[5] = hexdig[ch & 0xf];
246
+ fbuffer_append(search->buffer, scratch, 6);
247
+ break;
248
+ }
249
+ }
250
+ break;
251
+ }
252
+ case 3: {
253
+ if (search->ptr[2] & 1) {
254
+ fbuffer_append(search->buffer, "\\u2029", 6);
255
+ } else {
256
+ fbuffer_append(search->buffer, "\\u2028", 6);
257
+ }
258
+ break;
259
+ }
260
+ }
261
+ search->cursor = (search->ptr += ch_len);
262
+ }
35
263
 
36
- unsigned long beg = 0, pos = 0;
264
+ #ifdef HAVE_SIMD
37
265
 
38
- #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
266
+ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
267
+ {
268
+ // Flush the buffer so everything up until the last 'len' characters are unflushed.
269
+ search_flush(search);
39
270
 
40
- while (pos < len) {
41
- unsigned char ch = ptr[pos];
42
- unsigned char ch_len = escape_table[ch];
43
- /* JSON encoding */
271
+ FBuffer *buf = search->buffer;
272
+ fbuffer_inc_capa(buf, vec_len);
44
273
 
45
- if (RB_UNLIKELY(ch_len)) {
46
- switch (ch_len) {
47
- case 0:
48
- pos++;
49
- break;
50
- case 1: {
51
- FLUSH_POS(1);
52
- switch (ch) {
53
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
54
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
55
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
56
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
57
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
58
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
59
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
60
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
61
- default: {
62
- scratch[2] = hexdig[ch >> 12];
63
- scratch[3] = hexdig[(ch >> 8) & 0xf];
64
- scratch[4] = hexdig[(ch >> 4) & 0xf];
65
- scratch[5] = hexdig[ch & 0xf];
66
- fbuffer_append(out_buffer, scratch, 6);
67
- break;
68
- }
69
- }
70
- break;
71
- }
72
- case 3: {
73
- unsigned char b2 = ptr[pos + 1];
74
- if (RB_UNLIKELY(out_script_safe && b2 == 0x80)) {
75
- unsigned char b3 = ptr[pos + 2];
76
- if (b3 == 0xA8) {
77
- FLUSH_POS(3);
78
- fbuffer_append(out_buffer, "\\u2028", 6);
79
- break;
80
- } else if (b3 == 0xA9) {
81
- FLUSH_POS(3);
82
- fbuffer_append(out_buffer, "\\u2029", 6);
83
- break;
84
- }
85
- }
86
- // fallthrough
87
- }
88
- default:
89
- pos += ch_len;
90
- break;
91
- }
274
+ char *s = (buf->ptr + buf->len);
275
+
276
+ // Pad the buffer with dummy characters that won't need escaping.
277
+ // This seem wateful at first sight, but memset of vector length is very fast.
278
+ memset(s, 'X', vec_len);
279
+
280
+ // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
281
+ // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
282
+ MEMCPY(s, search->ptr, char, len);
283
+
284
+ return s;
285
+ }
286
+
287
+ #ifdef HAVE_SIMD_NEON
288
+
289
+ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
290
+ {
291
+ uint64_t mask = search->matches_mask;
292
+ uint32_t index = trailing_zeros64(mask) >> 2;
293
+
294
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
295
+ // If we want to use a similar approach for full escaping we'll need to ensure:
296
+ // search->chunk_base + index >= search->ptr
297
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
298
+ // is one byte after the previous match then:
299
+ // search->chunk_base + index == search->ptr
300
+ search->ptr = search->chunk_base + index;
301
+ mask &= mask - 1;
302
+ search->matches_mask = mask;
303
+ search_flush(search);
304
+ return 1;
305
+ }
306
+
307
+ // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308
+ static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
309
+ {
310
+ const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
311
+ const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
312
+ return mask & 0x8888888888888888ull;
313
+ }
314
+
315
+ static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
316
+ {
317
+ uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
318
+
319
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321
+ const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
322
+
323
+ uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
324
+ uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
325
+
326
+ return neon_match_mask(needs_escape);
327
+ }
328
+
329
+ static inline unsigned char search_escape_basic_neon(search_state *search)
330
+ {
331
+ if (RB_UNLIKELY(search->has_matches)) {
332
+ // There are more matches if search->matches_mask > 0.
333
+ if (search->matches_mask > 0) {
334
+ return neon_next_match(search);
92
335
  } else {
93
- pos++;
336
+ // neon_next_match will only advance search->ptr up to the last matching character.
337
+ // Skip over any characters in the last chunk that occur after the last match.
338
+ search->has_matches = false;
339
+ search->ptr = search->chunk_end;
340
+ }
341
+ }
342
+
343
+ /*
344
+ * The code below implements an SIMD-based algorithm to determine if N bytes at a time
345
+ * need to be escaped.
346
+ *
347
+ * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
348
+ *
349
+ * The explanation will be limited to the first 8 bytes of the string for simplicity. However
350
+ * the vector insructions may work on larger vectors.
351
+ *
352
+ * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
353
+ *
354
+ * lower_bound: [20 20 20 20 20 20 20 20]
355
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
356
+ * dblquote: [22 22 22 22 22 22 22 22]
357
+ *
358
+ * Next we load the first chunk of the ptr:
359
+ * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
360
+ *
361
+ * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
362
+ * as no bytes are less than 32 (0x20):
363
+ * [0 0 0 0 0 0 0 0]
364
+ *
365
+ * Next, we check if any byte in chunk is equal to a backslash:
366
+ * [0 0 0 FF 0 0 0 0]
367
+ *
368
+ * Finally we check if any byte in chunk is equal to a double quote:
369
+ * [FF 0 0 0 0 0 0 0]
370
+ *
371
+ * Now we have three vectors where each byte indicates if the corresponding byte in chunk
372
+ * needs to be escaped. We combine these vectors with a series of logical OR instructions.
373
+ * This is the needs_escape vector and it is equal to:
374
+ * [FF 0 0 FF 0 0 0 0]
375
+ *
376
+ * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
377
+ * the values in the vector. This computes how many bytes need to be escaped within this chunk.
378
+ *
379
+ * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
380
+ * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381
+ * have at least one byte that needs to be escaped.
382
+ */
383
+ while (search->ptr + sizeof(uint8x16_t) <= search->end) {
384
+ uint64_t mask = neon_rules_update(search->ptr);
385
+
386
+ if (!mask) {
387
+ search->ptr += sizeof(uint8x16_t);
388
+ continue;
389
+ }
390
+ search->matches_mask = mask;
391
+ search->has_matches = true;
392
+ search->chunk_base = search->ptr;
393
+ search->chunk_end = search->ptr + sizeof(uint8x16_t);
394
+ return neon_next_match(search);
395
+ }
396
+
397
+ // There are fewer than 16 bytes left.
398
+ unsigned long remaining = (search->end - search->ptr);
399
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
400
+ char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
401
+
402
+ uint64_t mask = neon_rules_update(s);
403
+
404
+ if (!mask) {
405
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
406
+ // search->cursor to search->ptr.
407
+ search->buffer->len += remaining;
408
+ search->ptr = search->end;
409
+ search->cursor = search->end;
410
+ return 0;
94
411
  }
412
+
413
+ search->matches_mask = mask;
414
+ search->has_matches = true;
415
+ search->chunk_end = search->end;
416
+ search->chunk_base = search->ptr;
417
+ return neon_next_match(search);
95
418
  }
96
- #undef FLUSH_POS
97
419
 
98
- if (beg < len) {
99
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
420
+ if (search->ptr < search->end) {
421
+ return search_escape_basic(search);
100
422
  }
101
423
 
102
- RB_GC_GUARD(str);
424
+ search_flush(search);
425
+ return 0;
103
426
  }
427
+ #endif /* HAVE_SIMD_NEON */
104
428
 
105
- static const char escape_table[256] = {
106
- // ASCII Control Characters
107
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
108
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
109
- // ASCII Characters
110
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, // '"'
111
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
112
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
113
- 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
114
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
115
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
116
- // Continuation byte
117
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
118
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
119
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
120
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
121
- // First byte of a 2-byte code point
122
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
123
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
124
- // First byte of a 4-byte code point
125
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
126
- //First byte of a 4+byte code point
127
- 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
128
- };
429
+ #ifdef HAVE_SIMD_SSE2
129
430
 
130
- static const char script_safe_escape_table[256] = {
131
- // ASCII Control Characters
132
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
133
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
134
- // ASCII Characters
135
- 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, // '"' and '/'
136
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
137
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
138
- 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
139
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
140
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
141
- // Continuation byte
142
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
143
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
144
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
145
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
146
- // First byte of a 2-byte code point
147
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
148
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
149
- // First byte of a 4-byte code point
150
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
151
- //First byte of a 4+byte code point
152
- 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
153
- };
431
+ #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432
+ #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
433
+ #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434
+ #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
154
435
 
155
- static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256])
436
+ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
156
437
  {
157
- const char *hexdig = "0123456789abcdef";
158
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
438
+ int mask = search->matches_mask;
439
+ int index = trailing_zeros(mask);
159
440
 
160
- const char *ptr = RSTRING_PTR(str);
161
- unsigned long len = RSTRING_LEN(str);
441
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
442
+ // If we want to use a similar approach for full escaping we'll need to ensure:
443
+ // search->chunk_base + index >= search->ptr
444
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
445
+ // is one byte after the previous match then:
446
+ // search->chunk_base + index == search->ptr
447
+ search->ptr = search->chunk_base + index;
448
+ mask &= mask - 1;
449
+ search->matches_mask = mask;
450
+ search_flush(search);
451
+ return 1;
452
+ }
162
453
 
163
- unsigned long beg = 0, pos;
454
+ #if defined(__clang__) || defined(__GNUC__)
455
+ #define TARGET_SSE2 __attribute__((target("sse2")))
456
+ #else
457
+ #define TARGET_SSE2
458
+ #endif
164
459
 
165
- for (pos = 0; pos < len;) {
166
- unsigned char ch = ptr[pos];
167
- /* JSON encoding */
168
- if (escape_table[ch]) {
169
- if (pos > beg) {
170
- fbuffer_append(out_buffer, &ptr[beg], pos - beg);
171
- }
460
+ static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
461
+ {
462
+ __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
172
463
 
173
- beg = pos + 1;
174
- switch (ch) {
175
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
176
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
177
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
178
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
179
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
180
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
181
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
182
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
183
- default:
184
- scratch[2] = hexdig[ch >> 12];
185
- scratch[3] = hexdig[(ch >> 8) & 0xf];
186
- scratch[4] = hexdig[(ch >> 4) & 0xf];
187
- scratch[5] = hexdig[ch & 0xf];
188
- fbuffer_append(out_buffer, scratch, 6);
464
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466
+ __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
467
+ __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
468
+ __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
469
+ return _mm_movemask_epi8(needs_escape);
470
+ }
471
+
472
+ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
473
+ {
474
+ if (RB_UNLIKELY(search->has_matches)) {
475
+ // There are more matches if search->matches_mask > 0.
476
+ if (search->matches_mask > 0) {
477
+ return sse2_next_match(search);
478
+ } else {
479
+ // sse2_next_match will only advance search->ptr up to the last matching character.
480
+ // Skip over any characters in the last chunk that occur after the last match.
481
+ search->has_matches = false;
482
+ if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
483
+ search->ptr = search->end;
484
+ } else {
485
+ search->ptr = search->chunk_base + sizeof(__m128i);
189
486
  }
190
487
  }
488
+ }
489
+
490
+ while (search->ptr + sizeof(__m128i) <= search->end) {
491
+ int needs_escape_mask = sse2_update(search->ptr);
492
+
493
+ if (needs_escape_mask == 0) {
494
+ search->ptr += sizeof(__m128i);
495
+ continue;
496
+ }
191
497
 
192
- pos++;
498
+ search->has_matches = true;
499
+ search->matches_mask = needs_escape_mask;
500
+ search->chunk_base = search->ptr;
501
+ return sse2_next_match(search);
193
502
  }
194
503
 
195
- if (beg < len) {
196
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
504
+ // There are fewer than 16 bytes left.
505
+ unsigned long remaining = (search->end - search->ptr);
506
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
507
+ char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
508
+
509
+ int needs_escape_mask = sse2_update(s);
510
+
511
+ if (needs_escape_mask == 0) {
512
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
513
+ // search->cursor to search->ptr.
514
+ search->buffer->len += remaining;
515
+ search->ptr = search->end;
516
+ search->cursor = search->end;
517
+ return 0;
518
+ }
519
+
520
+ search->has_matches = true;
521
+ search->matches_mask = needs_escape_mask;
522
+ search->chunk_base = search->ptr;
523
+ return sse2_next_match(search);
197
524
  }
198
525
 
199
- RB_GC_GUARD(str);
200
- }
526
+ if (search->ptr < search->end) {
527
+ return search_escape_basic(search);
528
+ }
201
529
 
202
- static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, const char escape_table[256], bool out_script_safe)
203
- {
204
- const char *hexdig = "0123456789abcdef";
205
- char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
530
+ search_flush(search);
531
+ return 0;
532
+ }
206
533
 
207
- const char *ptr = RSTRING_PTR(str);
208
- unsigned long len = RSTRING_LEN(str);
534
+ #endif /* HAVE_SIMD_SSE2 */
209
535
 
210
- unsigned long beg = 0, pos = 0;
536
+ #endif /* HAVE_SIMD */
211
537
 
212
- #define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
538
+ static const unsigned char script_safe_escape_table[256] = {
539
+ // ASCII Control Characters
540
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
541
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
542
+ // ASCII Characters
543
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/'
544
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
546
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
547
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
549
+ // Continuation byte
550
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
551
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
552
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
553
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
554
+ // First byte of a 2-byte code point
555
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
556
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
557
+ // First byte of a 3-byte code point
558
+ 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029
559
+ //First byte of a 4+ byte code point
560
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
561
+ };
213
562
 
214
- while (pos < len) {
215
- unsigned char ch = ptr[pos];
216
- unsigned char ch_len = escape_table[ch];
563
+ static inline unsigned char search_script_safe_escape(search_state *search)
564
+ {
565
+ while (search->ptr < search->end) {
566
+ unsigned char ch = (unsigned char)*search->ptr;
567
+ unsigned char ch_len = script_safe_escape_table[ch];
217
568
 
218
569
  if (RB_UNLIKELY(ch_len)) {
219
- switch (ch_len) {
220
- case 0:
221
- pos++;
222
- break;
223
- case 1: {
224
- FLUSH_POS(1);
225
- switch (ch) {
226
- case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
227
- case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
228
- case '/': fbuffer_append(out_buffer, "\\/", 2); break;
229
- case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
230
- case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
231
- case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
232
- case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
233
- case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
234
- default: {
235
- scratch[2] = hexdig[ch >> 12];
236
- scratch[3] = hexdig[(ch >> 8) & 0xf];
237
- scratch[4] = hexdig[(ch >> 4) & 0xf];
238
- scratch[5] = hexdig[ch & 0xf];
239
- fbuffer_append(out_buffer, scratch, 6);
240
- break;
241
- }
570
+ if (ch_len & ESCAPE_MASK) {
571
+ if (RB_UNLIKELY(ch_len == 11)) {
572
+ const unsigned char *uptr = (const unsigned char *)search->ptr;
573
+ if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) {
574
+ search->ptr += 3;
575
+ continue;
242
576
  }
243
- break;
244
577
  }
245
- default: {
246
- uint32_t wchar = 0;
247
- switch(ch_len) {
248
- case 2:
249
- wchar = ptr[pos] & 0x1F;
250
- break;
251
- case 3:
252
- wchar = ptr[pos] & 0x0F;
253
- break;
254
- case 4:
255
- wchar = ptr[pos] & 0x07;
256
- break;
257
- }
578
+ search_flush(search);
579
+ return ch_len & CHAR_LENGTH_MASK;
580
+ } else {
581
+ search->ptr += ch_len;
582
+ }
583
+ } else {
584
+ search->ptr++;
585
+ }
586
+ }
587
+ search_flush(search);
588
+ return 0;
589
+ }
258
590
 
259
- for (short i = 1; i < ch_len; i++) {
260
- wchar = (wchar << 6) | (ptr[pos+i] & 0x3F);
261
- }
591
+ static void convert_UTF8_to_script_safe_JSON(search_state *search)
592
+ {
593
+ unsigned char ch_len;
594
+ while ((ch_len = search_script_safe_escape(search))) {
595
+ escape_UTF8_char(search, ch_len);
596
+ }
597
+ }
262
598
 
263
- FLUSH_POS(ch_len);
599
+ static const unsigned char ascii_only_escape_table[256] = {
600
+ // ASCII Control Characters
601
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
602
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
603
+ // ASCII Characters
604
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
605
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
606
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
607
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
608
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
609
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610
+ // Continuation byte
611
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
612
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
613
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
614
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
615
+ // First byte of a 2-byte code point
616
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
617
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
618
+ // First byte of a 3-byte code point
619
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
620
+ //First byte of a 4+ byte code point
621
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
622
+ };
264
623
 
265
- if (wchar <= 0xFFFF) {
266
- scratch[2] = hexdig[wchar >> 12];
267
- scratch[3] = hexdig[(wchar >> 8) & 0xf];
268
- scratch[4] = hexdig[(wchar >> 4) & 0xf];
269
- scratch[5] = hexdig[wchar & 0xf];
270
- fbuffer_append(out_buffer, scratch, 6);
271
- } else {
272
- uint16_t hi, lo;
273
- wchar -= 0x10000;
274
- hi = 0xD800 + (uint16_t)(wchar >> 10);
275
- lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
276
-
277
- scratch[2] = hexdig[hi >> 12];
278
- scratch[3] = hexdig[(hi >> 8) & 0xf];
279
- scratch[4] = hexdig[(hi >> 4) & 0xf];
280
- scratch[5] = hexdig[hi & 0xf];
281
-
282
- scratch[8] = hexdig[lo >> 12];
283
- scratch[9] = hexdig[(lo >> 8) & 0xf];
284
- scratch[10] = hexdig[(lo >> 4) & 0xf];
285
- scratch[11] = hexdig[lo & 0xf];
286
-
287
- fbuffer_append(out_buffer, scratch, 12);
288
- }
624
+ static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256])
625
+ {
626
+ while (search->ptr < search->end) {
627
+ unsigned char ch = (unsigned char)*search->ptr;
628
+ unsigned char ch_len = escape_table[ch];
289
629
 
630
+ if (RB_UNLIKELY(ch_len)) {
631
+ search_flush(search);
632
+ return ch_len & CHAR_LENGTH_MASK;
633
+ } else {
634
+ search->ptr++;
635
+ }
636
+ }
637
+ search_flush(search);
638
+ return 0;
639
+ }
640
+
641
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
642
+ const unsigned char ch = (unsigned char)*search->ptr;
643
+ switch (ch_len) {
644
+ case 1: {
645
+ switch (ch) {
646
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
647
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
648
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
649
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
650
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
651
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
652
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
653
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
654
+ default: {
655
+ const char *hexdig = "0123456789abcdef";
656
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
657
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
658
+ scratch[5] = hexdig[ch & 0xf];
659
+ fbuffer_append(search->buffer, scratch, 6);
290
660
  break;
291
661
  }
292
662
  }
293
- } else {
294
- pos++;
663
+ break;
295
664
  }
296
- }
297
- #undef FLUSH_POS
665
+ default: {
666
+ const char *hexdig = "0123456789abcdef";
667
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
298
668
 
299
- if (beg < len) {
300
- fbuffer_append(out_buffer, &ptr[beg], len - beg);
301
- }
669
+ uint32_t wchar = 0;
670
+
671
+ switch(ch_len) {
672
+ case 2:
673
+ wchar = ch & 0x1F;
674
+ break;
675
+ case 3:
676
+ wchar = ch & 0x0F;
677
+ break;
678
+ case 4:
679
+ wchar = ch & 0x07;
680
+ break;
681
+ }
682
+
683
+ for (short i = 1; i < ch_len; i++) {
684
+ wchar = (wchar << 6) | (search->ptr[i] & 0x3F);
685
+ }
686
+
687
+ if (wchar <= 0xFFFF) {
688
+ scratch[2] = hexdig[wchar >> 12];
689
+ scratch[3] = hexdig[(wchar >> 8) & 0xf];
690
+ scratch[4] = hexdig[(wchar >> 4) & 0xf];
691
+ scratch[5] = hexdig[wchar & 0xf];
692
+ fbuffer_append(search->buffer, scratch, 6);
693
+ } else {
694
+ uint16_t hi, lo;
695
+ wchar -= 0x10000;
696
+ hi = 0xD800 + (uint16_t)(wchar >> 10);
697
+ lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
698
+
699
+ scratch[2] = hexdig[hi >> 12];
700
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
701
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
702
+ scratch[5] = hexdig[hi & 0xf];
703
+
704
+ scratch[8] = hexdig[lo >> 12];
705
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
706
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
707
+ scratch[11] = hexdig[lo & 0xf];
708
+
709
+ fbuffer_append(search->buffer, scratch, 12);
710
+ }
302
711
 
303
- RB_GC_GUARD(str);
712
+ break;
713
+ }
714
+ }
715
+ search->cursor = (search->ptr += ch_len);
304
716
  }
305
717
 
306
- static char *fstrndup(const char *ptr, unsigned long len) {
307
- char *result;
308
- if (len <= 0) return NULL;
309
- result = ALLOC_N(char, len);
310
- memcpy(result, ptr, len);
311
- return result;
718
+ static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256])
719
+ {
720
+ unsigned char ch_len;
721
+ while ((ch_len = search_ascii_only_escape(search, escape_table))) {
722
+ full_escape_UTF8_char(search, ch_len);
723
+ }
312
724
  }
313
725
 
314
726
  /*
@@ -403,7 +815,9 @@ static char *fstrndup(const char *ptr, unsigned long len) {
403
815
  */
404
816
  static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
405
817
  {
406
- GENERATE_JSON(object);
818
+ rb_check_arity(argc, 0, 1);
819
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
820
+ return cState_partial_generate(Vstate, self, generate_json_object, Qfalse);
407
821
  }
408
822
 
409
823
  /*
@@ -415,7 +829,9 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
415
829
  * produced JSON string output further.
416
830
  */
417
831
  static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
418
- GENERATE_JSON(array);
832
+ rb_check_arity(argc, 0, 1);
833
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
834
+ return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
419
835
  }
420
836
 
421
837
  #ifdef RUBY_INTEGER_UNIFICATION
@@ -426,7 +842,9 @@ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
426
842
  */
427
843
  static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
428
844
  {
429
- GENERATE_JSON(integer);
845
+ rb_check_arity(argc, 0, 1);
846
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
847
+ return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse);
430
848
  }
431
849
 
432
850
  #else
@@ -437,7 +855,9 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
437
855
  */
438
856
  static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
439
857
  {
440
- GENERATE_JSON(fixnum);
858
+ rb_check_arity(argc, 0, 1);
859
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
860
+ return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse);
441
861
  }
442
862
 
443
863
  /*
@@ -447,7 +867,9 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
447
867
  */
448
868
  static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
449
869
  {
450
- GENERATE_JSON(bignum);
870
+ rb_check_arity(argc, 0, 1);
871
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
872
+ return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse);
451
873
  }
452
874
  #endif
453
875
 
@@ -458,7 +880,9 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
458
880
  */
459
881
  static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
460
882
  {
461
- GENERATE_JSON(float);
883
+ rb_check_arity(argc, 0, 1);
884
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
885
+ return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
462
886
  }
463
887
 
464
888
  /*
@@ -481,7 +905,9 @@ static VALUE mString_included_s(VALUE self, VALUE modul) {
481
905
  */
482
906
  static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
483
907
  {
484
- GENERATE_JSON(string);
908
+ rb_check_arity(argc, 0, 1);
909
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
910
+ return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
485
911
  }
486
912
 
487
913
  /*
@@ -498,7 +924,7 @@ static VALUE mString_to_json_raw_object(VALUE self)
498
924
  VALUE result = rb_hash_new();
499
925
  rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
500
926
  ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
501
- rb_hash_aset(result, rb_str_new2("raw"), ary);
927
+ rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
502
928
  return result;
503
929
  }
504
930
 
@@ -536,7 +962,8 @@ static VALUE mString_Extend_json_create(VALUE self, VALUE o)
536
962
  */
537
963
  static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
538
964
  {
539
- GENERATE_JSON(true);
965
+ rb_check_arity(argc, 0, 1);
966
+ return rb_utf8_str_new("true", 4);
540
967
  }
541
968
 
542
969
  /*
@@ -546,7 +973,8 @@ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
546
973
  */
547
974
  static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
548
975
  {
549
- GENERATE_JSON(false);
976
+ rb_check_arity(argc, 0, 1);
977
+ return rb_utf8_str_new("false", 5);
550
978
  }
551
979
 
552
980
  /*
@@ -556,7 +984,8 @@ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
556
984
  */
557
985
  static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
558
986
  {
559
- GENERATE_JSON(null);
987
+ rb_check_arity(argc, 0, 1);
988
+ return rb_utf8_str_new("null", 4);
560
989
  }
561
990
 
562
991
  /*
@@ -573,30 +1002,40 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
573
1002
  rb_scan_args(argc, argv, "01", &state);
574
1003
  Check_Type(string, T_STRING);
575
1004
  state = cState_from_state_s(cState, state);
576
- return cState_partial_generate(state, string);
1005
+ return cState_partial_generate(state, string, generate_json_string, Qfalse);
1006
+ }
1007
+
1008
+ static void State_mark(void *ptr)
1009
+ {
1010
+ JSON_Generator_State *state = ptr;
1011
+ rb_gc_mark_movable(state->indent);
1012
+ rb_gc_mark_movable(state->space);
1013
+ rb_gc_mark_movable(state->space_before);
1014
+ rb_gc_mark_movable(state->object_nl);
1015
+ rb_gc_mark_movable(state->array_nl);
1016
+ rb_gc_mark_movable(state->as_json);
1017
+ }
1018
+
1019
+ static void State_compact(void *ptr)
1020
+ {
1021
+ JSON_Generator_State *state = ptr;
1022
+ state->indent = rb_gc_location(state->indent);
1023
+ state->space = rb_gc_location(state->space);
1024
+ state->space_before = rb_gc_location(state->space_before);
1025
+ state->object_nl = rb_gc_location(state->object_nl);
1026
+ state->array_nl = rb_gc_location(state->array_nl);
1027
+ state->as_json = rb_gc_location(state->as_json);
577
1028
  }
578
1029
 
579
1030
  static void State_free(void *ptr)
580
1031
  {
581
1032
  JSON_Generator_State *state = ptr;
582
- if (state->indent) ruby_xfree(state->indent);
583
- if (state->space) ruby_xfree(state->space);
584
- if (state->space_before) ruby_xfree(state->space_before);
585
- if (state->object_nl) ruby_xfree(state->object_nl);
586
- if (state->array_nl) ruby_xfree(state->array_nl);
587
1033
  ruby_xfree(state);
588
1034
  }
589
1035
 
590
1036
  static size_t State_memsize(const void *ptr)
591
1037
  {
592
- const JSON_Generator_State *state = ptr;
593
- size_t size = sizeof(*state);
594
- if (state->indent) size += state->indent_len + 1;
595
- if (state->space) size += state->space_len + 1;
596
- if (state->space_before) size += state->space_before_len + 1;
597
- if (state->object_nl) size += state->object_nl_len + 1;
598
- if (state->array_nl) size += state->array_nl_len + 1;
599
- return size;
1038
+ return sizeof(JSON_Generator_State);
600
1039
  }
601
1040
 
602
1041
  #ifndef HAVE_RB_EXT_RACTOR_SAFE
@@ -606,52 +1045,103 @@ static size_t State_memsize(const void *ptr)
606
1045
 
607
1046
  static const rb_data_type_t JSON_Generator_State_type = {
608
1047
  "JSON/Generator/State",
609
- {NULL, State_free, State_memsize,},
1048
+ {
1049
+ .dmark = State_mark,
1050
+ .dfree = State_free,
1051
+ .dsize = State_memsize,
1052
+ .dcompact = State_compact,
1053
+ },
610
1054
  0, 0,
611
1055
  RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
612
1056
  };
613
1057
 
1058
+ static void state_init(JSON_Generator_State *state)
1059
+ {
1060
+ state->max_nesting = 100;
1061
+ state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1062
+ }
1063
+
614
1064
  static VALUE cState_s_allocate(VALUE klass)
615
1065
  {
616
1066
  JSON_Generator_State *state;
617
1067
  VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state);
618
- state->max_nesting = 100;
619
- state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1068
+ state_init(state);
620
1069
  return obj;
621
1070
  }
622
1071
 
1072
+ static void vstate_spill(struct generate_json_data *data)
1073
+ {
1074
+ VALUE vstate = cState_s_allocate(cState);
1075
+ GET_STATE(vstate);
1076
+ MEMCPY(state, data->state, JSON_Generator_State, 1);
1077
+ data->state = state;
1078
+ data->vstate = vstate;
1079
+ RB_OBJ_WRITTEN(vstate, Qundef, state->indent);
1080
+ RB_OBJ_WRITTEN(vstate, Qundef, state->space);
1081
+ RB_OBJ_WRITTEN(vstate, Qundef, state->space_before);
1082
+ RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl);
1083
+ RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl);
1084
+ RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
1085
+ }
1086
+
1087
+ static inline VALUE vstate_get(struct generate_json_data *data)
1088
+ {
1089
+ if (RB_UNLIKELY(!data->vstate)) {
1090
+ vstate_spill(data);
1091
+ }
1092
+ return data->vstate;
1093
+ }
1094
+
623
1095
  struct hash_foreach_arg {
624
- FBuffer *buffer;
625
- JSON_Generator_State *state;
626
- VALUE Vstate;
1096
+ struct generate_json_data *data;
627
1097
  int iter;
628
1098
  };
629
1099
 
1100
+ static VALUE
1101
+ convert_string_subclass(VALUE key)
1102
+ {
1103
+ VALUE key_to_s = rb_funcall(key, i_to_s, 0);
1104
+
1105
+ if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
1106
+ VALUE cname = rb_obj_class(key);
1107
+ rb_raise(rb_eTypeError,
1108
+ "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
1109
+ cname, "String", cname, "to_s", rb_obj_class(key_to_s));
1110
+ }
1111
+
1112
+ return key_to_s;
1113
+ }
1114
+
630
1115
  static int
631
1116
  json_object_i(VALUE key, VALUE val, VALUE _arg)
632
1117
  {
633
1118
  struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg;
634
- FBuffer *buffer = arg->buffer;
635
- JSON_Generator_State *state = arg->state;
636
- VALUE Vstate = arg->Vstate;
1119
+ struct generate_json_data *data = arg->data;
1120
+
1121
+ FBuffer *buffer = data->buffer;
1122
+ JSON_Generator_State *state = data->state;
637
1123
 
638
1124
  long depth = state->depth;
639
1125
  int j;
640
1126
 
641
1127
  if (arg->iter > 0) fbuffer_append_char(buffer, ',');
642
- if (RB_UNLIKELY(state->object_nl)) {
643
- fbuffer_append(buffer, state->object_nl, state->object_nl_len);
1128
+ if (RB_UNLIKELY(data->state->object_nl)) {
1129
+ fbuffer_append_str(buffer, data->state->object_nl);
644
1130
  }
645
- if (RB_UNLIKELY(state->indent)) {
1131
+ if (RB_UNLIKELY(data->state->indent)) {
646
1132
  for (j = 0; j < depth; j++) {
647
- fbuffer_append(buffer, state->indent, state->indent_len);
1133
+ fbuffer_append_str(buffer, data->state->indent);
648
1134
  }
649
1135
  }
650
1136
 
651
1137
  VALUE key_to_s;
652
1138
  switch(rb_type(key)) {
653
1139
  case T_STRING:
654
- key_to_s = key;
1140
+ if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
1141
+ key_to_s = key;
1142
+ } else {
1143
+ key_to_s = convert_string_subclass(key);
1144
+ }
655
1145
  break;
656
1146
  case T_SYMBOL:
657
1147
  key_to_s = rb_sym2str(key);
@@ -661,82 +1151,98 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
661
1151
  break;
662
1152
  }
663
1153
 
664
- generate_json_string(buffer, Vstate, state, key_to_s);
665
- if (RB_UNLIKELY(state->space_before)) fbuffer_append(buffer, state->space_before, state->space_before_len);
1154
+ if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
1155
+ generate_json_string(buffer, data, key_to_s);
1156
+ } else {
1157
+ generate_json(buffer, data, key_to_s);
1158
+ }
1159
+ if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before);
666
1160
  fbuffer_append_char(buffer, ':');
667
- if (RB_UNLIKELY(state->space)) fbuffer_append(buffer, state->space, state->space_len);
668
- generate_json(buffer, Vstate, state, val);
1161
+ if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
1162
+ generate_json(buffer, data, val);
669
1163
 
670
1164
  arg->iter++;
671
1165
  return ST_CONTINUE;
672
1166
  }
673
1167
 
674
- static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1168
+ static inline long increase_depth(struct generate_json_data *data)
675
1169
  {
676
- long max_nesting = state->max_nesting;
1170
+ JSON_Generator_State *state = data->state;
677
1171
  long depth = ++state->depth;
1172
+ if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
1173
+ rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1174
+ }
1175
+ return depth;
1176
+ }
1177
+
1178
+ static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1179
+ {
678
1180
  int j;
679
- struct hash_foreach_arg arg;
1181
+ long depth = increase_depth(data);
680
1182
 
681
- if (max_nesting != 0 && depth > max_nesting) {
682
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1183
+ if (RHASH_SIZE(obj) == 0) {
1184
+ fbuffer_append(buffer, "{}", 2);
1185
+ --data->state->depth;
1186
+ return;
683
1187
  }
1188
+
684
1189
  fbuffer_append_char(buffer, '{');
685
1190
 
686
- arg.buffer = buffer;
687
- arg.state = state;
688
- arg.Vstate = Vstate;
689
- arg.iter = 0;
1191
+ struct hash_foreach_arg arg = {
1192
+ .data = data,
1193
+ .iter = 0,
1194
+ };
690
1195
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
691
1196
 
692
- depth = --state->depth;
693
- if (RB_UNLIKELY(state->object_nl)) {
694
- fbuffer_append(buffer, state->object_nl, state->object_nl_len);
695
- if (RB_UNLIKELY(state->indent)) {
1197
+ depth = --data->state->depth;
1198
+ if (RB_UNLIKELY(data->state->object_nl)) {
1199
+ fbuffer_append_str(buffer, data->state->object_nl);
1200
+ if (RB_UNLIKELY(data->state->indent)) {
696
1201
  for (j = 0; j < depth; j++) {
697
- fbuffer_append(buffer, state->indent, state->indent_len);
1202
+ fbuffer_append_str(buffer, data->state->indent);
698
1203
  }
699
1204
  }
700
1205
  }
701
1206
  fbuffer_append_char(buffer, '}');
702
1207
  }
703
1208
 
704
- static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1209
+ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
705
1210
  {
706
- long max_nesting = state->max_nesting;
707
- long depth = ++state->depth;
708
1211
  int i, j;
709
- if (max_nesting != 0 && depth > max_nesting) {
710
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1212
+ long depth = increase_depth(data);
1213
+
1214
+ if (RARRAY_LEN(obj) == 0) {
1215
+ fbuffer_append(buffer, "[]", 2);
1216
+ --data->state->depth;
1217
+ return;
711
1218
  }
1219
+
712
1220
  fbuffer_append_char(buffer, '[');
713
- if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len);
1221
+ if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
714
1222
  for(i = 0; i < RARRAY_LEN(obj); i++) {
715
1223
  if (i > 0) {
716
1224
  fbuffer_append_char(buffer, ',');
717
- if (RB_UNLIKELY(state->array_nl)) fbuffer_append(buffer, state->array_nl, state->array_nl_len);
1225
+ if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
718
1226
  }
719
- if (RB_UNLIKELY(state->indent)) {
1227
+ if (RB_UNLIKELY(data->state->indent)) {
720
1228
  for (j = 0; j < depth; j++) {
721
- fbuffer_append(buffer, state->indent, state->indent_len);
1229
+ fbuffer_append_str(buffer, data->state->indent);
722
1230
  }
723
1231
  }
724
- generate_json(buffer, Vstate, state, RARRAY_AREF(obj, i));
1232
+ generate_json(buffer, data, RARRAY_AREF(obj, i));
725
1233
  }
726
- state->depth = --depth;
727
- if (RB_UNLIKELY(state->array_nl)) {
728
- fbuffer_append(buffer, state->array_nl, state->array_nl_len);
729
- if (RB_UNLIKELY(state->indent)) {
1234
+ data->state->depth = --depth;
1235
+ if (RB_UNLIKELY(data->state->array_nl)) {
1236
+ fbuffer_append_str(buffer, data->state->array_nl);
1237
+ if (RB_UNLIKELY(data->state->indent)) {
730
1238
  for (j = 0; j < depth; j++) {
731
- fbuffer_append(buffer, state->indent, state->indent_len);
1239
+ fbuffer_append_str(buffer, data->state->indent);
732
1240
  }
733
1241
  }
734
1242
  }
735
1243
  fbuffer_append_char(buffer, ']');
736
1244
  }
737
1245
 
738
- static int usascii_encindex, utf8_encindex, binary_encindex;
739
-
740
1246
  static inline int enc_utf8_compatible_p(int enc_idx)
741
1247
  {
742
1248
  if (enc_idx == usascii_encindex) return 1;
@@ -744,117 +1250,199 @@ static inline int enc_utf8_compatible_p(int enc_idx)
744
1250
  return 0;
745
1251
  }
746
1252
 
1253
+ static VALUE encode_json_string_try(VALUE str)
1254
+ {
1255
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1256
+ }
1257
+
1258
+ static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
1259
+ {
1260
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1261
+ return Qundef;
1262
+ }
1263
+
747
1264
  static inline VALUE ensure_valid_encoding(VALUE str)
748
1265
  {
749
1266
  int encindex = RB_ENCODING_GET(str);
750
1267
  VALUE utf8_string;
751
1268
  if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
752
1269
  if (encindex == binary_encindex) {
753
- // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
754
- // TODO: Deprecate in 2.8.0
755
- // TODO: Remove in 3.0.0
756
1270
  utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
757
1271
  switch (rb_enc_str_coderange(utf8_string)) {
758
1272
  case ENC_CODERANGE_7BIT:
1273
+ return utf8_string;
759
1274
  case ENC_CODERANGE_VALID:
1275
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1276
+ // TODO: Raise in 3.0.0
1277
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
760
1278
  return utf8_string;
761
1279
  break;
762
1280
  }
763
1281
  }
764
1282
 
765
- str = rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1283
+ str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
766
1284
  }
767
1285
  return str;
768
1286
  }
769
1287
 
770
- static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1288
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
771
1289
  {
772
1290
  obj = ensure_valid_encoding(obj);
773
1291
 
774
1292
  fbuffer_append_char(buffer, '"');
775
1293
 
1294
+ long len;
1295
+ search_state search;
1296
+ search.buffer = buffer;
1297
+ RSTRING_GETMEM(obj, search.ptr, len);
1298
+ search.cursor = search.ptr;
1299
+ search.end = search.ptr + len;
1300
+
1301
+ #ifdef HAVE_SIMD
1302
+ search.matches_mask = 0;
1303
+ search.has_matches = false;
1304
+ search.chunk_base = NULL;
1305
+ #endif /* HAVE_SIMD */
1306
+
776
1307
  switch(rb_enc_str_coderange(obj)) {
777
1308
  case ENC_CODERANGE_7BIT:
778
- convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table);
779
- break;
780
1309
  case ENC_CODERANGE_VALID:
781
- if (RB_UNLIKELY(state->ascii_only)) {
782
- convert_UTF8_to_ASCII_only_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
1310
+ if (RB_UNLIKELY(data->state->ascii_only)) {
1311
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1312
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
1313
+ convert_UTF8_to_script_safe_JSON(&search);
783
1314
  } else {
784
- convert_UTF8_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table, state->script_safe);
1315
+ convert_UTF8_to_JSON(&search);
785
1316
  }
786
1317
  break;
787
1318
  default:
788
- rb_raise(rb_path2class("JSON::GeneratorError"), "source sequence is illegal/malformed utf-8");
1319
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
789
1320
  break;
790
1321
  }
791
1322
  fbuffer_append_char(buffer, '"');
792
1323
  }
793
1324
 
794
- static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1325
+ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1326
+ {
1327
+ VALUE tmp;
1328
+ if (rb_respond_to(obj, i_to_json)) {
1329
+ tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
1330
+ Check_Type(tmp, T_STRING);
1331
+ fbuffer_append_str(buffer, tmp);
1332
+ } else {
1333
+ tmp = rb_funcall(obj, i_to_s, 0);
1334
+ Check_Type(tmp, T_STRING);
1335
+ generate_json_string(buffer, data, tmp);
1336
+ }
1337
+ }
1338
+
1339
+ static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1340
+ {
1341
+ if (data->state->strict) {
1342
+ generate_json_string(buffer, data, rb_sym2str(obj));
1343
+ } else {
1344
+ generate_json_fallback(buffer, data, obj);
1345
+ }
1346
+ }
1347
+
1348
+ static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
795
1349
  {
796
1350
  fbuffer_append(buffer, "null", 4);
797
1351
  }
798
1352
 
799
- static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1353
+ static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
800
1354
  {
801
1355
  fbuffer_append(buffer, "false", 5);
802
1356
  }
803
1357
 
804
- static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1358
+ static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
805
1359
  {
806
1360
  fbuffer_append(buffer, "true", 4);
807
1361
  }
808
1362
 
809
- static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1363
+ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
810
1364
  {
811
1365
  fbuffer_append_long(buffer, FIX2LONG(obj));
812
1366
  }
813
1367
 
814
- static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1368
+ static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
815
1369
  {
816
1370
  VALUE tmp = rb_funcall(obj, i_to_s, 0);
817
1371
  fbuffer_append_str(buffer, tmp);
818
1372
  }
819
1373
 
820
1374
  #ifdef RUBY_INTEGER_UNIFICATION
821
- static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1375
+ static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
822
1376
  {
823
1377
  if (FIXNUM_P(obj))
824
- generate_json_fixnum(buffer, Vstate, state, obj);
1378
+ generate_json_fixnum(buffer, data, obj);
825
1379
  else
826
- generate_json_bignum(buffer, Vstate, state, obj);
1380
+ generate_json_bignum(buffer, data, obj);
827
1381
  }
828
1382
  #endif
829
- static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1383
+
1384
+ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
830
1385
  {
831
1386
  double value = RFLOAT_VALUE(obj);
832
- char allow_nan = state->allow_nan;
833
- VALUE tmp = rb_funcall(obj, i_to_s, 0);
834
- if (!allow_nan) {
835
- if (isinf(value)) {
836
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
837
- } else if (isnan(value)) {
838
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", tmp);
1387
+ char allow_nan = data->state->allow_nan;
1388
+ if (isinf(value) || isnan(value)) {
1389
+ /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
1390
+ if (!allow_nan) {
1391
+ if (data->state->strict && data->state->as_json) {
1392
+ VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1393
+ if (casted_obj != obj) {
1394
+ increase_depth(data);
1395
+ generate_json(buffer, data, casted_obj);
1396
+ data->state->depth--;
1397
+ return;
1398
+ }
1399
+ }
1400
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
839
1401
  }
1402
+
1403
+ VALUE tmp = rb_funcall(obj, i_to_s, 0);
1404
+ fbuffer_append_str(buffer, tmp);
1405
+ return;
840
1406
  }
841
- fbuffer_append_str(buffer, tmp);
1407
+
1408
+ /* This implementation writes directly into the buffer. We reserve
1409
+ * the 24 characters that fpconv_dtoa states as its maximum, plus
1410
+ * 2 more characters for the potential ".0" suffix.
1411
+ */
1412
+ fbuffer_inc_capa(buffer, 26);
1413
+ char* d = buffer->ptr + buffer->len;
1414
+ int len = fpconv_dtoa(value, d);
1415
+
1416
+ /* fpconv_dtoa converts a float to its shortest string representation,
1417
+ * but it adds a ".0" if this is a plain integer.
1418
+ */
1419
+ buffer->len += len;
842
1420
  }
843
1421
 
844
- static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1422
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
845
1423
  {
846
- VALUE tmp;
1424
+ VALUE fragment = RSTRUCT_GET(obj, 0);
1425
+ Check_Type(fragment, T_STRING);
1426
+ fbuffer_append_str(buffer, fragment);
1427
+ }
1428
+
1429
+ static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1430
+ {
1431
+ bool as_json_called = false;
1432
+ start:
847
1433
  if (obj == Qnil) {
848
- generate_json_null(buffer, Vstate, state, obj);
1434
+ generate_json_null(buffer, data, obj);
849
1435
  } else if (obj == Qfalse) {
850
- generate_json_false(buffer, Vstate, state, obj);
1436
+ generate_json_false(buffer, data, obj);
851
1437
  } else if (obj == Qtrue) {
852
- generate_json_true(buffer, Vstate, state, obj);
1438
+ generate_json_true(buffer, data, obj);
853
1439
  } else if (RB_SPECIAL_CONST_P(obj)) {
854
1440
  if (RB_FIXNUM_P(obj)) {
855
- generate_json_fixnum(buffer, Vstate, state, obj);
1441
+ generate_json_fixnum(buffer, data, obj);
856
1442
  } else if (RB_FLONUM_P(obj)) {
857
- generate_json_float(buffer, Vstate, state, obj);
1443
+ generate_json_float(buffer, data, obj);
1444
+ } else if (RB_STATIC_SYM_P(obj)) {
1445
+ generate_json_symbol(buffer, data, obj);
858
1446
  } else {
859
1447
  goto general;
860
1448
  }
@@ -862,62 +1450,53 @@ static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *s
862
1450
  VALUE klass = RBASIC_CLASS(obj);
863
1451
  switch (RB_BUILTIN_TYPE(obj)) {
864
1452
  case T_BIGNUM:
865
- generate_json_bignum(buffer, Vstate, state, obj);
1453
+ generate_json_bignum(buffer, data, obj);
866
1454
  break;
867
1455
  case T_HASH:
868
1456
  if (klass != rb_cHash) goto general;
869
- generate_json_object(buffer, Vstate, state, obj);
1457
+ generate_json_object(buffer, data, obj);
870
1458
  break;
871
1459
  case T_ARRAY:
872
1460
  if (klass != rb_cArray) goto general;
873
- generate_json_array(buffer, Vstate, state, obj);
1461
+ generate_json_array(buffer, data, obj);
874
1462
  break;
875
1463
  case T_STRING:
876
1464
  if (klass != rb_cString) goto general;
877
- generate_json_string(buffer, Vstate, state, obj);
1465
+ generate_json_string(buffer, data, obj);
1466
+ break;
1467
+ case T_SYMBOL:
1468
+ generate_json_symbol(buffer, data, obj);
878
1469
  break;
879
1470
  case T_FLOAT:
880
1471
  if (klass != rb_cFloat) goto general;
881
- generate_json_float(buffer, Vstate, state, obj);
1472
+ generate_json_float(buffer, data, obj);
1473
+ break;
1474
+ case T_STRUCT:
1475
+ if (klass != cFragment) goto general;
1476
+ generate_json_fragment(buffer, data, obj);
882
1477
  break;
883
1478
  default:
884
1479
  general:
885
- if (state->strict) {
886
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
887
- } else if (rb_respond_to(obj, i_to_json)) {
888
- tmp = rb_funcall(obj, i_to_json, 1, Vstate);
889
- Check_Type(tmp, T_STRING);
890
- fbuffer_append_str(buffer, tmp);
1480
+ if (data->state->strict) {
1481
+ if (RTEST(data->state->as_json) && !as_json_called) {
1482
+ obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1483
+ as_json_called = true;
1484
+ goto start;
1485
+ } else {
1486
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
1487
+ }
891
1488
  } else {
892
- tmp = rb_funcall(obj, i_to_s, 0);
893
- Check_Type(tmp, T_STRING);
894
- generate_json_string(buffer, Vstate, state, tmp);
1489
+ generate_json_fallback(buffer, data, obj);
895
1490
  }
896
1491
  }
897
1492
  }
898
1493
  }
899
1494
 
900
- static FBuffer *cState_prepare_buffer(VALUE self)
901
- {
902
- FBuffer *buffer;
903
- GET_STATE(self);
904
- buffer = fbuffer_alloc(state->buffer_initial_length);
905
-
906
- return buffer;
907
- }
908
-
909
- struct generate_json_data {
910
- FBuffer *buffer;
911
- VALUE vstate;
912
- JSON_Generator_State *state;
913
- VALUE obj;
914
- };
915
-
916
1495
  static VALUE generate_json_try(VALUE d)
917
1496
  {
918
1497
  struct generate_json_data *data = (struct generate_json_data *)d;
919
1498
 
920
- generate_json(data->buffer, data->vstate, data->state, data->obj);
1499
+ data->func(data->buffer, data, data->obj);
921
1500
 
922
1501
  return Qnil;
923
1502
  }
@@ -932,37 +1511,53 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
932
1511
  return Qundef;
933
1512
  }
934
1513
 
935
- static VALUE cState_partial_generate(VALUE self, VALUE obj)
1514
+ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io)
936
1515
  {
937
- FBuffer *buffer = cState_prepare_buffer(self);
938
1516
  GET_STATE(self);
939
1517
 
1518
+ char stack_buffer[FBUFFER_STACK_SIZE];
1519
+ FBuffer buffer = {
1520
+ .io = RTEST(io) ? io : Qfalse,
1521
+ };
1522
+ fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
1523
+
940
1524
  struct generate_json_data data = {
941
- .buffer = buffer,
1525
+ .buffer = &buffer,
942
1526
  .vstate = self,
943
1527
  .state = state,
944
- .obj = obj
1528
+ .obj = obj,
1529
+ .func = func
945
1530
  };
946
1531
  rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
947
1532
 
948
- return fbuffer_to_s(buffer);
1533
+ return fbuffer_finalize(&buffer);
949
1534
  }
950
1535
 
951
- /*
952
- * call-seq: generate(obj)
1536
+ /* call-seq:
1537
+ * generate(obj) -> String
1538
+ * generate(obj, anIO) -> anIO
953
1539
  *
954
1540
  * Generates a valid JSON document from object +obj+ and returns the
955
1541
  * result. If no valid JSON document can be created this method raises a
956
1542
  * GeneratorError exception.
957
1543
  */
958
- static VALUE cState_generate(VALUE self, VALUE obj)
1544
+ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
959
1545
  {
960
- VALUE result = cState_partial_generate(self, obj);
1546
+ rb_check_arity(argc, 1, 2);
1547
+ VALUE obj = argv[0];
1548
+ VALUE io = argc > 1 ? argv[1] : Qnil;
1549
+ VALUE result = cState_partial_generate(self, obj, generate_json, io);
961
1550
  GET_STATE(self);
962
1551
  (void)state;
963
1552
  return result;
964
1553
  }
965
1554
 
1555
+ static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
1556
+ {
1557
+ rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`");
1558
+ return self;
1559
+ }
1560
+
966
1561
  /*
967
1562
  * call-seq: initialize_copy(orig)
968
1563
  *
@@ -979,11 +1574,12 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
979
1574
  if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State");
980
1575
 
981
1576
  MEMCPY(objState, origState, JSON_Generator_State, 1);
982
- objState->indent = fstrndup(origState->indent, origState->indent_len);
983
- objState->space = fstrndup(origState->space, origState->space_len);
984
- objState->space_before = fstrndup(origState->space_before, origState->space_before_len);
985
- objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len);
986
- objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len);
1577
+ objState->indent = origState->indent;
1578
+ objState->space = origState->space;
1579
+ objState->space_before = origState->space_before;
1580
+ objState->object_nl = origState->object_nl;
1581
+ objState->array_nl = origState->array_nl;
1582
+ objState->as_json = origState->as_json;
987
1583
  return obj;
988
1584
  }
989
1585
 
@@ -1013,7 +1609,18 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts)
1013
1609
  static VALUE cState_indent(VALUE self)
1014
1610
  {
1015
1611
  GET_STATE(self);
1016
- return state->indent ? rb_str_new(state->indent, state->indent_len) : rb_str_new2("");
1612
+ return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0));
1613
+ }
1614
+
1615
+ static VALUE string_config(VALUE config)
1616
+ {
1617
+ if (RTEST(config)) {
1618
+ Check_Type(config, T_STRING);
1619
+ if (RSTRING_LEN(config)) {
1620
+ return rb_str_new_frozen(config);
1621
+ }
1622
+ }
1623
+ return Qfalse;
1017
1624
  }
1018
1625
 
1019
1626
  /*
@@ -1023,21 +1630,8 @@ static VALUE cState_indent(VALUE self)
1023
1630
  */
1024
1631
  static VALUE cState_indent_set(VALUE self, VALUE indent)
1025
1632
  {
1026
- unsigned long len;
1027
1633
  GET_STATE(self);
1028
- Check_Type(indent, T_STRING);
1029
- len = RSTRING_LEN(indent);
1030
- if (len == 0) {
1031
- if (state->indent) {
1032
- ruby_xfree(state->indent);
1033
- state->indent = NULL;
1034
- state->indent_len = 0;
1035
- }
1036
- } else {
1037
- if (state->indent) ruby_xfree(state->indent);
1038
- state->indent = fstrndup(RSTRING_PTR(indent), len);
1039
- state->indent_len = len;
1040
- }
1634
+ RB_OBJ_WRITE(self, &state->indent, string_config(indent));
1041
1635
  return Qnil;
1042
1636
  }
1043
1637
 
@@ -1050,7 +1644,7 @@ static VALUE cState_indent_set(VALUE self, VALUE indent)
1050
1644
  static VALUE cState_space(VALUE self)
1051
1645
  {
1052
1646
  GET_STATE(self);
1053
- return state->space ? rb_str_new(state->space, state->space_len) : rb_str_new2("");
1647
+ return state->space ? state->space : rb_str_freeze(rb_utf8_str_new("", 0));
1054
1648
  }
1055
1649
 
1056
1650
  /*
@@ -1061,21 +1655,8 @@ static VALUE cState_space(VALUE self)
1061
1655
  */
1062
1656
  static VALUE cState_space_set(VALUE self, VALUE space)
1063
1657
  {
1064
- unsigned long len;
1065
1658
  GET_STATE(self);
1066
- Check_Type(space, T_STRING);
1067
- len = RSTRING_LEN(space);
1068
- if (len == 0) {
1069
- if (state->space) {
1070
- ruby_xfree(state->space);
1071
- state->space = NULL;
1072
- state->space_len = 0;
1073
- }
1074
- } else {
1075
- if (state->space) ruby_xfree(state->space);
1076
- state->space = fstrndup(RSTRING_PTR(space), len);
1077
- state->space_len = len;
1078
- }
1659
+ RB_OBJ_WRITE(self, &state->space, string_config(space));
1079
1660
  return Qnil;
1080
1661
  }
1081
1662
 
@@ -1087,7 +1668,7 @@ static VALUE cState_space_set(VALUE self, VALUE space)
1087
1668
  static VALUE cState_space_before(VALUE self)
1088
1669
  {
1089
1670
  GET_STATE(self);
1090
- return state->space_before ? rb_str_new(state->space_before, state->space_before_len) : rb_str_new2("");
1671
+ return state->space_before ? state->space_before : rb_str_freeze(rb_utf8_str_new("", 0));
1091
1672
  }
1092
1673
 
1093
1674
  /*
@@ -1097,21 +1678,8 @@ static VALUE cState_space_before(VALUE self)
1097
1678
  */
1098
1679
  static VALUE cState_space_before_set(VALUE self, VALUE space_before)
1099
1680
  {
1100
- unsigned long len;
1101
1681
  GET_STATE(self);
1102
- Check_Type(space_before, T_STRING);
1103
- len = RSTRING_LEN(space_before);
1104
- if (len == 0) {
1105
- if (state->space_before) {
1106
- ruby_xfree(state->space_before);
1107
- state->space_before = NULL;
1108
- state->space_before_len = 0;
1109
- }
1110
- } else {
1111
- if (state->space_before) ruby_xfree(state->space_before);
1112
- state->space_before = fstrndup(RSTRING_PTR(space_before), len);
1113
- state->space_before_len = len;
1114
- }
1682
+ RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
1115
1683
  return Qnil;
1116
1684
  }
1117
1685
 
@@ -1124,7 +1692,7 @@ static VALUE cState_space_before_set(VALUE self, VALUE space_before)
1124
1692
  static VALUE cState_object_nl(VALUE self)
1125
1693
  {
1126
1694
  GET_STATE(self);
1127
- return state->object_nl ? rb_str_new(state->object_nl, state->object_nl_len) : rb_str_new2("");
1695
+ return state->object_nl ? state->object_nl : rb_str_freeze(rb_utf8_str_new("", 0));
1128
1696
  }
1129
1697
 
1130
1698
  /*
@@ -1135,20 +1703,8 @@ static VALUE cState_object_nl(VALUE self)
1135
1703
  */
1136
1704
  static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
1137
1705
  {
1138
- unsigned long len;
1139
1706
  GET_STATE(self);
1140
- Check_Type(object_nl, T_STRING);
1141
- len = RSTRING_LEN(object_nl);
1142
- if (len == 0) {
1143
- if (state->object_nl) {
1144
- ruby_xfree(state->object_nl);
1145
- state->object_nl = NULL;
1146
- }
1147
- } else {
1148
- if (state->object_nl) ruby_xfree(state->object_nl);
1149
- state->object_nl = fstrndup(RSTRING_PTR(object_nl), len);
1150
- state->object_nl_len = len;
1151
- }
1707
+ RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
1152
1708
  return Qnil;
1153
1709
  }
1154
1710
 
@@ -1160,7 +1716,7 @@ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
1160
1716
  static VALUE cState_array_nl(VALUE self)
1161
1717
  {
1162
1718
  GET_STATE(self);
1163
- return state->array_nl ? rb_str_new(state->array_nl, state->array_nl_len) : rb_str_new2("");
1719
+ return state->array_nl ? state->array_nl : rb_str_freeze(rb_utf8_str_new("", 0));
1164
1720
  }
1165
1721
 
1166
1722
  /*
@@ -1170,23 +1726,33 @@ static VALUE cState_array_nl(VALUE self)
1170
1726
  */
1171
1727
  static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
1172
1728
  {
1173
- unsigned long len;
1174
1729
  GET_STATE(self);
1175
- Check_Type(array_nl, T_STRING);
1176
- len = RSTRING_LEN(array_nl);
1177
- if (len == 0) {
1178
- if (state->array_nl) {
1179
- ruby_xfree(state->array_nl);
1180
- state->array_nl = NULL;
1181
- }
1182
- } else {
1183
- if (state->array_nl) ruby_xfree(state->array_nl);
1184
- state->array_nl = fstrndup(RSTRING_PTR(array_nl), len);
1185
- state->array_nl_len = len;
1186
- }
1730
+ RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
1187
1731
  return Qnil;
1188
1732
  }
1189
1733
 
1734
+ /*
1735
+ * call-seq: as_json()
1736
+ *
1737
+ * This string is put at the end of a line that holds a JSON array.
1738
+ */
1739
+ static VALUE cState_as_json(VALUE self)
1740
+ {
1741
+ GET_STATE(self);
1742
+ return state->as_json;
1743
+ }
1744
+
1745
+ /*
1746
+ * call-seq: as_json=(as_json)
1747
+ *
1748
+ * This string is put at the end of a line that holds a JSON array.
1749
+ */
1750
+ static VALUE cState_as_json_set(VALUE self, VALUE as_json)
1751
+ {
1752
+ GET_STATE(self);
1753
+ RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
1754
+ return Qnil;
1755
+ }
1190
1756
 
1191
1757
  /*
1192
1758
  * call-seq: check_circular?
@@ -1212,6 +1778,11 @@ static VALUE cState_max_nesting(VALUE self)
1212
1778
  return LONG2FIX(state->max_nesting);
1213
1779
  }
1214
1780
 
1781
+ static long long_config(VALUE num)
1782
+ {
1783
+ return RTEST(num) ? FIX2LONG(num) : 0;
1784
+ }
1785
+
1215
1786
  /*
1216
1787
  * call-seq: max_nesting=(depth)
1217
1788
  *
@@ -1221,8 +1792,7 @@ static VALUE cState_max_nesting(VALUE self)
1221
1792
  static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1222
1793
  {
1223
1794
  GET_STATE(self);
1224
- Check_Type(depth, T_FIXNUM);
1225
- state->max_nesting = FIX2LONG(depth);
1795
+ state->max_nesting = long_config(depth);
1226
1796
  return Qnil;
1227
1797
  }
1228
1798
 
@@ -1350,8 +1920,7 @@ static VALUE cState_depth(VALUE self)
1350
1920
  static VALUE cState_depth_set(VALUE self, VALUE depth)
1351
1921
  {
1352
1922
  GET_STATE(self);
1353
- Check_Type(depth, T_FIXNUM);
1354
- state->depth = FIX2LONG(depth);
1923
+ state->depth = long_config(depth);
1355
1924
  return Qnil;
1356
1925
  }
1357
1926
 
@@ -1366,6 +1935,15 @@ static VALUE cState_buffer_initial_length(VALUE self)
1366
1935
  return LONG2FIX(state->buffer_initial_length);
1367
1936
  }
1368
1937
 
1938
+ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_initial_length)
1939
+ {
1940
+ Check_Type(buffer_initial_length, T_FIXNUM);
1941
+ long initial_length = FIX2LONG(buffer_initial_length);
1942
+ if (initial_length > 0) {
1943
+ state->buffer_initial_length = initial_length;
1944
+ }
1945
+ }
1946
+
1369
1947
  /*
1370
1948
  * call-seq: buffer_initial_length=(length)
1371
1949
  *
@@ -1374,16 +1952,76 @@ static VALUE cState_buffer_initial_length(VALUE self)
1374
1952
  */
1375
1953
  static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
1376
1954
  {
1377
- long initial_length;
1378
1955
  GET_STATE(self);
1379
- Check_Type(buffer_initial_length, T_FIXNUM);
1380
- initial_length = FIX2LONG(buffer_initial_length);
1381
- if (initial_length > 0) {
1382
- state->buffer_initial_length = initial_length;
1383
- }
1956
+ buffer_initial_length_set(state, buffer_initial_length);
1384
1957
  return Qnil;
1385
1958
  }
1386
1959
 
1960
+ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1961
+ {
1962
+ JSON_Generator_State *state = (JSON_Generator_State *)_arg;
1963
+
1964
+ if (key == sym_indent) { state->indent = string_config(val); }
1965
+ else if (key == sym_space) { state->space = string_config(val); }
1966
+ else if (key == sym_space_before) { state->space_before = string_config(val); }
1967
+ else if (key == sym_object_nl) { state->object_nl = string_config(val); }
1968
+ else if (key == sym_array_nl) { state->array_nl = string_config(val); }
1969
+ else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
1970
+ else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
1971
+ else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
1972
+ else if (key == sym_depth) { state->depth = long_config(val); }
1973
+ else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); }
1974
+ else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1975
+ else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1976
+ else if (key == sym_strict) { state->strict = RTEST(val); }
1977
+ else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
1978
+ return ST_CONTINUE;
1979
+ }
1980
+
1981
+ static void configure_state(JSON_Generator_State *state, VALUE config)
1982
+ {
1983
+ if (!RTEST(config)) return;
1984
+
1985
+ Check_Type(config, T_HASH);
1986
+
1987
+ if (!RHASH_SIZE(config)) return;
1988
+
1989
+ // We assume in most cases few keys are set so it's faster to go over
1990
+ // the provided keys than to check all possible keys.
1991
+ rb_hash_foreach(config, configure_state_i, (VALUE)state);
1992
+ }
1993
+
1994
+ static VALUE cState_configure(VALUE self, VALUE opts)
1995
+ {
1996
+ GET_STATE(self);
1997
+ configure_state(state, opts);
1998
+ return self;
1999
+ }
2000
+
2001
+ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2002
+ {
2003
+ JSON_Generator_State state = {0};
2004
+ state_init(&state);
2005
+ configure_state(&state, opts);
2006
+
2007
+ char stack_buffer[FBUFFER_STACK_SIZE];
2008
+ FBuffer buffer = {
2009
+ .io = RTEST(io) ? io : Qfalse,
2010
+ };
2011
+ fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
2012
+
2013
+ struct generate_json_data data = {
2014
+ .buffer = &buffer,
2015
+ .vstate = Qfalse,
2016
+ .state = &state,
2017
+ .obj = obj,
2018
+ .func = generate_json,
2019
+ };
2020
+ rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
2021
+
2022
+ return fbuffer_finalize(&buffer);
2023
+ }
2024
+
1387
2025
  /*
1388
2026
  *
1389
2027
  */
@@ -1397,17 +2035,26 @@ void Init_generator(void)
1397
2035
  rb_require("json/common");
1398
2036
 
1399
2037
  mJSON = rb_define_module("JSON");
2038
+
2039
+ rb_global_variable(&cFragment);
2040
+ cFragment = rb_const_get(mJSON, rb_intern("Fragment"));
2041
+
1400
2042
  VALUE mExt = rb_define_module_under(mJSON, "Ext");
1401
2043
  VALUE mGenerator = rb_define_module_under(mExt, "Generator");
1402
2044
 
2045
+ rb_global_variable(&eGeneratorError);
1403
2046
  eGeneratorError = rb_path2class("JSON::GeneratorError");
2047
+
2048
+ rb_global_variable(&eNestingError);
1404
2049
  eNestingError = rb_path2class("JSON::NestingError");
1405
- rb_gc_register_mark_object(eGeneratorError);
1406
- rb_gc_register_mark_object(eNestingError);
1407
2050
 
1408
2051
  cState = rb_define_class_under(mGenerator, "State", rb_cObject);
1409
2052
  rb_define_alloc_func(cState, cState_s_allocate);
1410
2053
  rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
2054
+ rb_define_method(cState, "initialize", cState_initialize, -1);
2055
+ rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings
2056
+ rb_define_private_method(cState, "_configure", cState_configure, 1);
2057
+
1411
2058
  rb_define_method(cState, "initialize_copy", cState_init_copy, 1);
1412
2059
  rb_define_method(cState, "indent", cState_indent, 0);
1413
2060
  rb_define_method(cState, "indent=", cState_indent_set, 1);
@@ -1419,6 +2066,8 @@ void Init_generator(void)
1419
2066
  rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
1420
2067
  rb_define_method(cState, "array_nl", cState_array_nl, 0);
1421
2068
  rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
2069
+ rb_define_method(cState, "as_json", cState_as_json, 0);
2070
+ rb_define_method(cState, "as_json=", cState_as_json_set, 1);
1422
2071
  rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
1423
2072
  rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
1424
2073
  rb_define_method(cState, "script_safe", cState_script_safe, 0);
@@ -1439,7 +2088,10 @@ void Init_generator(void)
1439
2088
  rb_define_method(cState, "depth=", cState_depth_set, 1);
1440
2089
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
1441
2090
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
1442
- rb_define_method(cState, "generate", cState_generate, 1);
2091
+ rb_define_method(cState, "generate", cState_generate, -1);
2092
+ rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
2093
+
2094
+ rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
1443
2095
 
1444
2096
  VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
1445
2097
 
@@ -1495,7 +2147,43 @@ void Init_generator(void)
1495
2147
  i_extend = rb_intern("extend");
1496
2148
  i_encode = rb_intern("encode");
1497
2149
 
2150
+ sym_indent = ID2SYM(rb_intern("indent"));
2151
+ sym_space = ID2SYM(rb_intern("space"));
2152
+ sym_space_before = ID2SYM(rb_intern("space_before"));
2153
+ sym_object_nl = ID2SYM(rb_intern("object_nl"));
2154
+ sym_array_nl = ID2SYM(rb_intern("array_nl"));
2155
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
2156
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
2157
+ sym_ascii_only = ID2SYM(rb_intern("ascii_only"));
2158
+ sym_depth = ID2SYM(rb_intern("depth"));
2159
+ sym_buffer_initial_length = ID2SYM(rb_intern("buffer_initial_length"));
2160
+ sym_script_safe = ID2SYM(rb_intern("script_safe"));
2161
+ sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
2162
+ sym_strict = ID2SYM(rb_intern("strict"));
2163
+ sym_as_json = ID2SYM(rb_intern("as_json"));
2164
+
1498
2165
  usascii_encindex = rb_usascii_encindex();
1499
2166
  utf8_encindex = rb_utf8_encindex();
1500
2167
  binary_encindex = rb_ascii8bit_encindex();
2168
+
2169
+ rb_require("json/ext/generator/state");
2170
+
2171
+
2172
+ switch(find_simd_implementation()) {
2173
+ #ifdef HAVE_SIMD
2174
+ #ifdef HAVE_SIMD_NEON
2175
+ case SIMD_NEON:
2176
+ search_escape_basic_impl = search_escape_basic_neon;
2177
+ break;
2178
+ #endif /* HAVE_SIMD_NEON */
2179
+ #ifdef HAVE_SIMD_SSE2
2180
+ case SIMD_SSE2:
2181
+ search_escape_basic_impl = search_escape_basic_sse2;
2182
+ break;
2183
+ #endif /* HAVE_SIMD_SSE2 */
2184
+ #endif /* HAVE_SIMD */
2185
+ default:
2186
+ search_escape_basic_impl = search_escape_basic;
2187
+ break;
2188
+ }
1501
2189
  }