json 2.7.2 → 2.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/BSDL +22 -0
  3. data/CHANGES.md +160 -17
  4. data/LEGAL +8 -0
  5. data/README.md +76 -211
  6. data/ext/json/ext/fbuffer/fbuffer.h +178 -95
  7. data/ext/json/ext/generator/extconf.rb +38 -2
  8. data/ext/json/ext/generator/generator.c +1311 -826
  9. data/ext/json/ext/generator/simd.h +112 -0
  10. data/ext/json/ext/parser/extconf.rb +6 -27
  11. data/ext/json/ext/parser/parser.c +1176 -1971
  12. data/ext/json/ext/vendor/fpconv.c +479 -0
  13. data/ext/json/ext/vendor/jeaiii-ltoa.h +267 -0
  14. data/json.gemspec +44 -49
  15. data/lib/json/add/bigdecimal.rb +2 -2
  16. data/lib/json/add/complex.rb +1 -1
  17. data/lib/json/add/core.rb +1 -1
  18. data/lib/json/add/date.rb +1 -1
  19. data/lib/json/add/date_time.rb +1 -1
  20. data/lib/json/add/exception.rb +1 -1
  21. data/lib/json/add/ostruct.rb +1 -1
  22. data/lib/json/add/range.rb +1 -1
  23. data/lib/json/add/rational.rb +1 -1
  24. data/lib/json/add/regexp.rb +1 -1
  25. data/lib/json/add/struct.rb +1 -1
  26. data/lib/json/add/symbol.rb +8 -4
  27. data/lib/json/add/time.rb +3 -10
  28. data/lib/json/common.rb +647 -241
  29. data/lib/json/ext/generator/state.rb +106 -0
  30. data/lib/json/ext.rb +35 -5
  31. data/lib/json/generic_object.rb +1 -1
  32. data/lib/json/{pure → truffle_ruby}/generator.rb +322 -145
  33. data/lib/json/version.rb +3 -7
  34. data/lib/json.rb +16 -21
  35. metadata +18 -22
  36. data/ext/json/ext/generator/depend +0 -1
  37. data/ext/json/ext/generator/generator.h +0 -177
  38. data/ext/json/ext/parser/depend +0 -1
  39. data/ext/json/ext/parser/parser.h +0 -96
  40. data/ext/json/ext/parser/parser.rl +0 -971
  41. data/ext/json/extconf.rb +0 -3
  42. data/lib/json/pure/parser.rb +0 -337
  43. data/lib/json/pure.rb +0 -15
  44. /data/{LICENSE → COPYING} +0 -0
@@ -1,340 +1,726 @@
1
+ #include "ruby.h"
1
2
  #include "../fbuffer/fbuffer.h"
2
- #include "generator.h"
3
+ #include "../vendor/fpconv.c"
3
4
 
4
- static VALUE mJSON, mExt, mGenerator, cState, mGeneratorMethods, mObject,
5
- mHash, mArray,
5
+ #include <math.h>
6
+ #include <ctype.h>
7
+
8
+ #include "simd.h"
9
+
10
+ /* ruby api and some helpers */
11
+
12
+ typedef struct JSON_Generator_StateStruct {
13
+ VALUE indent;
14
+ VALUE space;
15
+ VALUE space_before;
16
+ VALUE object_nl;
17
+ VALUE array_nl;
18
+ VALUE as_json;
19
+
20
+ long max_nesting;
21
+ long depth;
22
+ long buffer_initial_length;
23
+
24
+ bool allow_nan;
25
+ bool ascii_only;
26
+ bool script_safe;
27
+ bool strict;
28
+ } JSON_Generator_State;
29
+
30
+ #ifndef RB_UNLIKELY
31
+ #define RB_UNLIKELY(cond) (cond)
32
+ #endif
33
+
34
+ static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
35
+
36
+ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37
+ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
38
+ sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
39
+
40
+
41
+ #define GET_STATE_TO(self, state) \
42
+ TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state)
43
+
44
+ #define GET_STATE(self) \
45
+ JSON_Generator_State *state; \
46
+ GET_STATE_TO(self, state)
47
+
48
+ struct generate_json_data;
49
+
50
+ typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
51
+
52
+ struct generate_json_data {
53
+ FBuffer *buffer;
54
+ VALUE vstate;
55
+ JSON_Generator_State *state;
56
+ VALUE obj;
57
+ generator_func func;
58
+ };
59
+
60
+ static VALUE cState_from_state_s(VALUE self, VALUE opts);
61
+ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
62
+ static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
63
+ static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
64
+ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
65
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
66
+ static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
67
+ static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
68
+ static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
6
69
  #ifdef RUBY_INTEGER_UNIFICATION
7
- mInteger,
70
+ static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
71
+ #endif
72
+ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
73
+ static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
74
+ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
75
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
76
+
77
+ static int usascii_encindex, utf8_encindex, binary_encindex;
78
+
79
+ #ifdef RBIMPL_ATTR_NORETURN
80
+ RBIMPL_ATTR_NORETURN()
81
+ #endif
82
+ static void raise_generator_error_str(VALUE invalid_object, VALUE str)
83
+ {
84
+ VALUE exc = rb_exc_new_str(eGeneratorError, str);
85
+ rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
86
+ rb_exc_raise(exc);
87
+ }
88
+
89
+ #ifdef RBIMPL_ATTR_NORETURN
90
+ RBIMPL_ATTR_NORETURN()
91
+ #endif
92
+ #ifdef RBIMPL_ATTR_FORMAT
93
+ RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
94
+ #endif
95
+ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
96
+ {
97
+ va_list args;
98
+ va_start(args, fmt);
99
+ VALUE str = rb_vsprintf(fmt, args);
100
+ va_end(args);
101
+ raise_generator_error_str(invalid_object, str);
102
+ }
103
+
104
+ // 0 - single byte char that don't need to be escaped.
105
+ // (x | 8) - char that needs to be escaped.
106
+ static const unsigned char CHAR_LENGTH_MASK = 7;
107
+ static const unsigned char ESCAPE_MASK = 8;
108
+
109
+ typedef struct _search_state {
110
+ const char *ptr;
111
+ const char *end;
112
+ const char *cursor;
113
+ FBuffer *buffer;
114
+
115
+ #ifdef HAVE_SIMD
116
+ const char *chunk_base;
117
+ const char *chunk_end;
118
+ bool has_matches;
119
+
120
+ #if defined(HAVE_SIMD_NEON)
121
+ uint64_t matches_mask;
122
+ #elif defined(HAVE_SIMD_SSE2)
123
+ int matches_mask;
124
+ #else
125
+ #error "Unknown SIMD Implementation."
126
+ #endif /* HAVE_SIMD_NEON */
127
+ #endif /* HAVE_SIMD */
128
+ } search_state;
129
+
130
+ #if (defined(__GNUC__ ) || defined(__clang__))
131
+ #define FORCE_INLINE __attribute__((always_inline))
8
132
  #else
9
- mFixnum, mBignum,
133
+ #define FORCE_INLINE
10
134
  #endif
11
- mFloat, mString, mString_Extend,
12
- mTrueClass, mFalseClass, mNilClass, eGeneratorError,
13
- eNestingError;
14
135
 
15
- static ID i_to_s, i_to_json, i_new, i_indent, i_space, i_space_before,
16
- i_object_nl, i_array_nl, i_max_nesting, i_allow_nan, i_ascii_only,
17
- i_pack, i_unpack, i_create_id, i_extend, i_key_p,
18
- i_aref, i_send, i_respond_to_p, i_match, i_keys, i_depth,
19
- i_buffer_initial_length, i_dup, i_script_safe, i_escape_slash, i_strict;
136
+ static inline FORCE_INLINE void search_flush(search_state *search)
137
+ {
138
+ // Do not remove this conditional without profiling, specifically escape-heavy text.
139
+ // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
140
+ // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
141
+ // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
142
+ // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
143
+ // nothing needs to be flushed, we can save a few memory references with this conditional.
144
+ if (search->ptr > search->cursor) {
145
+ fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
146
+ search->cursor = search->ptr;
147
+ }
148
+ }
20
149
 
21
- /*
22
- * Copyright 2001-2004 Unicode, Inc.
150
+ static const unsigned char escape_table_basic[256] = {
151
+ // ASCII Control Characters
152
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
153
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
154
+ // ASCII Characters
155
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
156
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
157
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
159
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
160
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
161
+ };
162
+
163
+ static unsigned char (*search_escape_basic_impl)(search_state *);
164
+
165
+ static inline unsigned char search_escape_basic(search_state *search)
166
+ {
167
+ while (search->ptr < search->end) {
168
+ if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) {
169
+ search_flush(search);
170
+ return 1;
171
+ } else {
172
+ search->ptr++;
173
+ }
174
+ }
175
+ search_flush(search);
176
+ return 0;
177
+ }
178
+
179
+ static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
180
+ {
181
+ const unsigned char ch = (unsigned char)*search->ptr;
182
+ switch (ch) {
183
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
184
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
185
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
186
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
187
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
188
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
189
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
190
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
191
+ default: {
192
+ const char *hexdig = "0123456789abcdef";
193
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
194
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
195
+ scratch[5] = hexdig[ch & 0xf];
196
+ fbuffer_append(search->buffer, scratch, 6);
197
+ break;
198
+ }
199
+ }
200
+ search->ptr++;
201
+ search->cursor = search->ptr;
202
+ }
203
+
204
+ /* Converts in_string to a JSON string (without the wrapping '"'
205
+ * characters) in FBuffer out_buffer.
206
+ *
207
+ * Character are JSON-escaped according to:
23
208
  *
24
- * Disclaimer
209
+ * - Always: ASCII control characters (0x00-0x1F), dquote, and
210
+ * backslash.
25
211
  *
26
- * This source code is provided as is by Unicode, Inc. No claims are
27
- * made as to fitness for any particular purpose. No warranties of any
28
- * kind are expressed or implied. The recipient agrees to determine
29
- * applicability of information provided. If this file has been
30
- * purchased on magnetic or optical media from Unicode, Inc., the
31
- * sole remedy for any claim will be exchange of defective media
32
- * within 90 days of receipt.
212
+ * - If out_ascii_only: non-ASCII characters (>0x7F)
33
213
  *
34
- * Limitations on Rights to Redistribute This Code
214
+ * - If script_safe: forwardslash (/), line separator (U+2028), and
215
+ * paragraph separator (U+2029)
35
216
  *
36
- * Unicode, Inc. hereby grants the right to freely use the information
37
- * supplied in this file in the creation of products supporting the
38
- * Unicode Standard, and to make copies of this file in any form
39
- * for internal or external distribution as long as this notice
40
- * remains attached.
217
+ * Everything else (should be UTF-8) is just passed through and
218
+ * appended to the result.
41
219
  */
220
+ static inline void convert_UTF8_to_JSON(search_state *search)
221
+ {
222
+ while (search_escape_basic_impl(search)) {
223
+ escape_UTF8_char_basic(search);
224
+ }
225
+ }
42
226
 
43
- /*
44
- * Index into the table below with the first byte of a UTF-8 sequence to
45
- * get the number of trailing bytes that are supposed to follow it.
46
- * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
47
- * left as-is for anyone who may want to do such conversion, which was
48
- * allowed in earlier algorithms.
49
- */
50
- static const char trailingBytesForUTF8[256] = {
51
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
52
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
53
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
54
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
55
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
56
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
57
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
59
- };
227
+ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
228
+ {
229
+ const unsigned char ch = (unsigned char)*search->ptr;
230
+ switch (ch_len) {
231
+ case 1: {
232
+ switch (ch) {
233
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
234
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
235
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
236
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
237
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
238
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
239
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
240
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
241
+ default: {
242
+ const char *hexdig = "0123456789abcdef";
243
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
244
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
245
+ scratch[5] = hexdig[ch & 0xf];
246
+ fbuffer_append(search->buffer, scratch, 6);
247
+ break;
248
+ }
249
+ }
250
+ break;
251
+ }
252
+ case 3: {
253
+ if (search->ptr[2] & 1) {
254
+ fbuffer_append(search->buffer, "\\u2029", 6);
255
+ } else {
256
+ fbuffer_append(search->buffer, "\\u2028", 6);
257
+ }
258
+ break;
259
+ }
260
+ }
261
+ search->cursor = (search->ptr += ch_len);
262
+ }
60
263
 
61
- /*
62
- * Magic values subtracted from a buffer value during UTF8 conversion.
63
- * This table contains as many values as there might be trailing bytes
64
- * in a UTF-8 sequence.
65
- */
66
- static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
67
- 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
264
+ #ifdef HAVE_SIMD
68
265
 
69
- /*
70
- * Utility routine to tell whether a sequence of bytes is legal UTF-8.
71
- * This must be called with the length pre-determined by the first byte.
72
- * If not calling this from ConvertUTF8to*, then the length can be set by:
73
- * length = trailingBytesForUTF8[*source]+1;
74
- * and the sequence is illegal right away if there aren't that many bytes
75
- * available.
76
- * If presented with a length > 4, this returns 0. The Unicode
77
- * definition of UTF-8 goes up to 4-byte sequences.
78
- */
79
- static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length)
80
- {
81
- UTF8 a;
82
- const UTF8 *srcptr = source+length;
83
- switch (length) {
84
- default: return 0;
85
- /* Everything else falls through when "1"... */
86
- case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
87
- case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
88
- case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
89
-
90
- switch (*source) {
91
- /* no fall-through in this inner switch */
92
- case 0xE0: if (a < 0xA0) return 0; break;
93
- case 0xED: if (a > 0x9F) return 0; break;
94
- case 0xF0: if (a < 0x90) return 0; break;
95
- case 0xF4: if (a > 0x8F) return 0; break;
96
- default: if (a < 0x80) return 0;
97
- }
266
+ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
267
+ {
268
+ // Flush the buffer so everything up until the last 'len' characters are unflushed.
269
+ search_flush(search);
98
270
 
99
- case 1: if (*source >= 0x80 && *source < 0xC2) return 0;
100
- }
101
- if (*source > 0xF4) return 0;
102
- return 1;
271
+ FBuffer *buf = search->buffer;
272
+ fbuffer_inc_capa(buf, vec_len);
273
+
274
+ char *s = (buf->ptr + buf->len);
275
+
276
+ // Pad the buffer with dummy characters that won't need escaping.
277
+ // This seem wateful at first sight, but memset of vector length is very fast.
278
+ memset(s, 'X', vec_len);
279
+
280
+ // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
281
+ // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
282
+ MEMCPY(s, search->ptr, char, len);
283
+
284
+ return s;
103
285
  }
104
286
 
105
- /* Escapes the UTF16 character and stores the result in the buffer buf. */
106
- static void unicode_escape(char *buf, UTF16 character)
287
+ #ifdef HAVE_SIMD_NEON
288
+
289
+ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
107
290
  {
108
- const char *digits = "0123456789abcdef";
291
+ uint64_t mask = search->matches_mask;
292
+ uint32_t index = trailing_zeros64(mask) >> 2;
109
293
 
110
- buf[2] = digits[character >> 12];
111
- buf[3] = digits[(character >> 8) & 0xf];
112
- buf[4] = digits[(character >> 4) & 0xf];
113
- buf[5] = digits[character & 0xf];
294
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
295
+ // If we want to use a similar approach for full escaping we'll need to ensure:
296
+ // search->chunk_base + index >= search->ptr
297
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
298
+ // is one byte after the previous match then:
299
+ // search->chunk_base + index == search->ptr
300
+ search->ptr = search->chunk_base + index;
301
+ mask &= mask - 1;
302
+ search->matches_mask = mask;
303
+ search_flush(search);
304
+ return 1;
114
305
  }
115
306
 
116
- /* Escapes the UTF16 character and stores the result in the buffer buf, then
117
- * the buffer buf is appended to the FBuffer buffer. */
118
- static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
119
- character)
307
+ // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308
+ static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
120
309
  {
121
- unicode_escape(buf, character);
122
- fbuffer_append(buffer, buf, 6);
310
+ const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
311
+ const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
312
+ return mask & 0x8888888888888888ull;
123
313
  }
124
314
 
125
- /* Converts string to a JSON string in FBuffer buffer, where all but the ASCII
126
- * and control characters are JSON escaped. */
127
- static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
315
+ static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
128
316
  {
129
- const UTF8 *source = (UTF8 *) RSTRING_PTR(string);
130
- const UTF8 *sourceEnd = source + RSTRING_LEN(string);
131
- char buf[6] = { '\\', 'u' };
317
+ uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
318
+
319
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321
+ const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
322
+
323
+ uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
324
+ uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
132
325
 
133
- while (source < sourceEnd) {
134
- UTF32 ch = 0;
135
- unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
136
- if (source + extraBytesToRead >= sourceEnd) {
137
- rb_raise(rb_path2class("JSON::GeneratorError"),
138
- "partial character in source, but hit end");
326
+ return neon_match_mask(needs_escape);
327
+ }
328
+
329
+ static inline unsigned char search_escape_basic_neon(search_state *search)
330
+ {
331
+ if (RB_UNLIKELY(search->has_matches)) {
332
+ // There are more matches if search->matches_mask > 0.
333
+ if (search->matches_mask > 0) {
334
+ return neon_next_match(search);
335
+ } else {
336
+ // neon_next_match will only advance search->ptr up to the last matching character.
337
+ // Skip over any characters in the last chunk that occur after the last match.
338
+ search->has_matches = false;
339
+ search->ptr = search->chunk_end;
139
340
  }
140
- if (!isLegalUTF8(source, extraBytesToRead+1)) {
141
- rb_raise(rb_path2class("JSON::GeneratorError"),
142
- "source sequence is illegal/malformed utf-8");
341
+ }
342
+
343
+ /*
344
+ * The code below implements an SIMD-based algorithm to determine if N bytes at a time
345
+ * need to be escaped.
346
+ *
347
+ * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
348
+ *
349
+ * The explanation will be limited to the first 8 bytes of the string for simplicity. However
350
+ * the vector insructions may work on larger vectors.
351
+ *
352
+ * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
353
+ *
354
+ * lower_bound: [20 20 20 20 20 20 20 20]
355
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
356
+ * dblquote: [22 22 22 22 22 22 22 22]
357
+ *
358
+ * Next we load the first chunk of the ptr:
359
+ * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
360
+ *
361
+ * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
362
+ * as no bytes are less than 32 (0x20):
363
+ * [0 0 0 0 0 0 0 0]
364
+ *
365
+ * Next, we check if any byte in chunk is equal to a backslash:
366
+ * [0 0 0 FF 0 0 0 0]
367
+ *
368
+ * Finally we check if any byte in chunk is equal to a double quote:
369
+ * [FF 0 0 0 0 0 0 0]
370
+ *
371
+ * Now we have three vectors where each byte indicates if the corresponding byte in chunk
372
+ * needs to be escaped. We combine these vectors with a series of logical OR instructions.
373
+ * This is the needs_escape vector and it is equal to:
374
+ * [FF 0 0 FF 0 0 0 0]
375
+ *
376
+ * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
377
+ * the values in the vector. This computes how many bytes need to be escaped within this chunk.
378
+ *
379
+ * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
380
+ * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381
+ * have at least one byte that needs to be escaped.
382
+ */
383
+ while (search->ptr + sizeof(uint8x16_t) <= search->end) {
384
+ uint64_t mask = neon_rules_update(search->ptr);
385
+
386
+ if (!mask) {
387
+ search->ptr += sizeof(uint8x16_t);
388
+ continue;
143
389
  }
144
- /*
145
- * The cases all fall through. See "Note A" below.
146
- */
147
- switch (extraBytesToRead) {
148
- case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
149
- case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
150
- case 3: ch += *source++; ch <<= 6;
151
- case 2: ch += *source++; ch <<= 6;
152
- case 1: ch += *source++; ch <<= 6;
153
- case 0: ch += *source++;
390
+ search->matches_mask = mask;
391
+ search->has_matches = true;
392
+ search->chunk_base = search->ptr;
393
+ search->chunk_end = search->ptr + sizeof(uint8x16_t);
394
+ return neon_next_match(search);
395
+ }
396
+
397
+ // There are fewer than 16 bytes left.
398
+ unsigned long remaining = (search->end - search->ptr);
399
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
400
+ char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
401
+
402
+ uint64_t mask = neon_rules_update(s);
403
+
404
+ if (!mask) {
405
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
406
+ // search->cursor to search->ptr.
407
+ fbuffer_consumed(search->buffer, remaining);
408
+ search->ptr = search->end;
409
+ search->cursor = search->end;
410
+ return 0;
154
411
  }
155
- ch -= offsetsFromUTF8[extraBytesToRead];
156
-
157
- if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
158
- /* UTF-16 surrogate values are illegal in UTF-32 */
159
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
160
- #if UNI_STRICT_CONVERSION
161
- source -= (extraBytesToRead+1); /* return to the illegal value itself */
162
- rb_raise(rb_path2class("JSON::GeneratorError"),
163
- "source sequence is illegal/malformed utf-8");
412
+
413
+ search->matches_mask = mask;
414
+ search->has_matches = true;
415
+ search->chunk_end = search->end;
416
+ search->chunk_base = search->ptr;
417
+ return neon_next_match(search);
418
+ }
419
+
420
+ if (search->ptr < search->end) {
421
+ return search_escape_basic(search);
422
+ }
423
+
424
+ search_flush(search);
425
+ return 0;
426
+ }
427
+ #endif /* HAVE_SIMD_NEON */
428
+
429
+ #ifdef HAVE_SIMD_SSE2
430
+
431
+ #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432
+ #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
433
+ #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434
+ #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
435
+
436
+ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
437
+ {
438
+ int mask = search->matches_mask;
439
+ int index = trailing_zeros(mask);
440
+
441
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
442
+ // If we want to use a similar approach for full escaping we'll need to ensure:
443
+ // search->chunk_base + index >= search->ptr
444
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
445
+ // is one byte after the previous match then:
446
+ // search->chunk_base + index == search->ptr
447
+ search->ptr = search->chunk_base + index;
448
+ mask &= mask - 1;
449
+ search->matches_mask = mask;
450
+ search_flush(search);
451
+ return 1;
452
+ }
453
+
454
+ #if defined(__clang__) || defined(__GNUC__)
455
+ #define TARGET_SSE2 __attribute__((target("sse2")))
164
456
  #else
165
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
457
+ #define TARGET_SSE2
166
458
  #endif
459
+
460
+ static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
461
+ {
462
+ __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
463
+
464
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466
+ __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
467
+ __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
468
+ __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
469
+ return _mm_movemask_epi8(needs_escape);
470
+ }
471
+
472
+ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
473
+ {
474
+ if (RB_UNLIKELY(search->has_matches)) {
475
+ // There are more matches if search->matches_mask > 0.
476
+ if (search->matches_mask > 0) {
477
+ return sse2_next_match(search);
478
+ } else {
479
+ // sse2_next_match will only advance search->ptr up to the last matching character.
480
+ // Skip over any characters in the last chunk that occur after the last match.
481
+ search->has_matches = false;
482
+ if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
483
+ search->ptr = search->end;
167
484
  } else {
168
- /* normal case */
169
- if (ch >= 0x20 && ch <= 0x7f) {
170
- switch (ch) {
171
- case '\\':
172
- fbuffer_append(buffer, "\\\\", 2);
173
- break;
174
- case '"':
175
- fbuffer_append(buffer, "\\\"", 2);
176
- break;
177
- case '/':
178
- if(script_safe) {
179
- fbuffer_append(buffer, "\\/", 2);
180
- break;
181
- }
182
- default:
183
- fbuffer_append_char(buffer, (char)ch);
184
- break;
185
- }
186
- } else {
187
- switch (ch) {
188
- case '\n':
189
- fbuffer_append(buffer, "\\n", 2);
190
- break;
191
- case '\r':
192
- fbuffer_append(buffer, "\\r", 2);
193
- break;
194
- case '\t':
195
- fbuffer_append(buffer, "\\t", 2);
196
- break;
197
- case '\f':
198
- fbuffer_append(buffer, "\\f", 2);
199
- break;
200
- case '\b':
201
- fbuffer_append(buffer, "\\b", 2);
202
- break;
203
- default:
204
- unicode_escape_to_buffer(buffer, buf, (UTF16) ch);
205
- break;
485
+ search->ptr = search->chunk_base + sizeof(__m128i);
486
+ }
487
+ }
488
+ }
489
+
490
+ while (search->ptr + sizeof(__m128i) <= search->end) {
491
+ int needs_escape_mask = sse2_update(search->ptr);
492
+
493
+ if (needs_escape_mask == 0) {
494
+ search->ptr += sizeof(__m128i);
495
+ continue;
496
+ }
497
+
498
+ search->has_matches = true;
499
+ search->matches_mask = needs_escape_mask;
500
+ search->chunk_base = search->ptr;
501
+ return sse2_next_match(search);
502
+ }
503
+
504
+ // There are fewer than 16 bytes left.
505
+ unsigned long remaining = (search->end - search->ptr);
506
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
507
+ char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
508
+
509
+ int needs_escape_mask = sse2_update(s);
510
+
511
+ if (needs_escape_mask == 0) {
512
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
513
+ // search->cursor to search->ptr.
514
+ fbuffer_consumed(search->buffer, remaining);
515
+ search->ptr = search->end;
516
+ search->cursor = search->end;
517
+ return 0;
518
+ }
519
+
520
+ search->has_matches = true;
521
+ search->matches_mask = needs_escape_mask;
522
+ search->chunk_base = search->ptr;
523
+ return sse2_next_match(search);
524
+ }
525
+
526
+ if (search->ptr < search->end) {
527
+ return search_escape_basic(search);
528
+ }
529
+
530
+ search_flush(search);
531
+ return 0;
532
+ }
533
+
534
+ #endif /* HAVE_SIMD_SSE2 */
535
+
536
+ #endif /* HAVE_SIMD */
537
+
538
+ static const unsigned char script_safe_escape_table[256] = {
539
+ // ASCII Control Characters
540
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
541
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
542
+ // ASCII Characters
543
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/'
544
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
546
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
547
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
548
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
549
+ // Continuation byte
550
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
551
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
552
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
553
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
554
+ // First byte of a 2-byte code point
555
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
556
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
557
+ // First byte of a 3-byte code point
558
+ 3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029
559
+ //First byte of a 4+ byte code point
560
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
561
+ };
562
+
563
+ static inline unsigned char search_script_safe_escape(search_state *search)
564
+ {
565
+ while (search->ptr < search->end) {
566
+ unsigned char ch = (unsigned char)*search->ptr;
567
+ unsigned char ch_len = script_safe_escape_table[ch];
568
+
569
+ if (RB_UNLIKELY(ch_len)) {
570
+ if (ch_len & ESCAPE_MASK) {
571
+ if (RB_UNLIKELY(ch_len == 11)) {
572
+ const unsigned char *uptr = (const unsigned char *)search->ptr;
573
+ if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) {
574
+ search->ptr += 3;
575
+ continue;
206
576
  }
207
577
  }
578
+ search_flush(search);
579
+ return ch_len & CHAR_LENGTH_MASK;
580
+ } else {
581
+ search->ptr += ch_len;
208
582
  }
209
- } else if (ch > UNI_MAX_UTF16) {
210
- #if UNI_STRICT_CONVERSION
211
- source -= (extraBytesToRead+1); /* return to the start */
212
- rb_raise(rb_path2class("JSON::GeneratorError"),
213
- "source sequence is illegal/malformed utf8");
214
- #else
215
- unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
216
- #endif
217
583
  } else {
218
- /* target is a character in range 0xFFFF - 0x10FFFF. */
219
- ch -= halfBase;
220
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
221
- unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
584
+ search->ptr++;
222
585
  }
223
586
  }
224
- RB_GC_GUARD(string);
225
- }
226
-
227
- /* Converts string to a JSON string in FBuffer buffer, where only the
228
- * characters required by the JSON standard are JSON escaped. The remaining
229
- * characters (should be UTF8) are just passed through and appended to the
230
- * result. */
231
- static void convert_UTF8_to_JSON(FBuffer *buffer, VALUE string, char script_safe)
232
- {
233
- const char *ptr = RSTRING_PTR(string), *p;
234
- unsigned long len = RSTRING_LEN(string), start = 0, end = 0;
235
- const char *escape = NULL;
236
- int escape_len;
237
- unsigned char c;
238
- char buf[6] = { '\\', 'u' };
239
- int ascii_only = rb_enc_str_asciionly_p(string);
240
-
241
- for (start = 0, end = 0; end < len;) {
242
- p = ptr + end;
243
- c = (unsigned char) *p;
244
- if (c < 0x20) {
245
- switch (c) {
246
- case '\n':
247
- escape = "\\n";
248
- escape_len = 2;
249
- break;
250
- case '\r':
251
- escape = "\\r";
252
- escape_len = 2;
253
- break;
254
- case '\t':
255
- escape = "\\t";
256
- escape_len = 2;
257
- break;
258
- case '\f':
259
- escape = "\\f";
260
- escape_len = 2;
261
- break;
262
- case '\b':
263
- escape = "\\b";
264
- escape_len = 2;
265
- break;
266
- default:
267
- unicode_escape(buf, (UTF16) *p);
268
- escape = buf;
269
- escape_len = 6;
587
+ search_flush(search);
588
+ return 0;
589
+ }
590
+
591
+ static void convert_UTF8_to_script_safe_JSON(search_state *search)
592
+ {
593
+ unsigned char ch_len;
594
+ while ((ch_len = search_script_safe_escape(search))) {
595
+ escape_UTF8_char(search, ch_len);
596
+ }
597
+ }
598
+
599
+ static const unsigned char ascii_only_escape_table[256] = {
600
+ // ASCII Control Characters
601
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
602
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
603
+ // ASCII Characters
604
+ 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
605
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
606
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
607
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
608
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
609
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610
+ // Continuation byte
611
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
612
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
613
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
614
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
615
+ // First byte of a 2-byte code point
616
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
617
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
618
+ // First byte of a 3-byte code point
619
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
620
+ //First byte of a 4+ byte code point
621
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
622
+ };
623
+
624
+ static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256])
625
+ {
626
+ while (search->ptr < search->end) {
627
+ unsigned char ch = (unsigned char)*search->ptr;
628
+ unsigned char ch_len = escape_table[ch];
629
+
630
+ if (RB_UNLIKELY(ch_len)) {
631
+ search_flush(search);
632
+ return ch_len & CHAR_LENGTH_MASK;
633
+ } else {
634
+ search->ptr++;
635
+ }
636
+ }
637
+ search_flush(search);
638
+ return 0;
639
+ }
640
+
641
+ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
642
+ const unsigned char ch = (unsigned char)*search->ptr;
643
+ switch (ch_len) {
644
+ case 1: {
645
+ switch (ch) {
646
+ case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
647
+ case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
648
+ case '/': fbuffer_append(search->buffer, "\\/", 2); break;
649
+ case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
650
+ case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
651
+ case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
652
+ case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
653
+ case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
654
+ default: {
655
+ const char *hexdig = "0123456789abcdef";
656
+ char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
657
+ scratch[4] = hexdig[(ch >> 4) & 0xf];
658
+ scratch[5] = hexdig[ch & 0xf];
659
+ fbuffer_append(search->buffer, scratch, 6);
270
660
  break;
661
+ }
271
662
  }
272
- } else {
273
- switch (c) {
274
- case '\\':
275
- escape = "\\\\";
276
- escape_len = 2;
663
+ break;
664
+ }
665
+ default: {
666
+ const char *hexdig = "0123456789abcdef";
667
+ char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
668
+
669
+ uint32_t wchar = 0;
670
+
671
+ switch(ch_len) {
672
+ case 2:
673
+ wchar = ch & 0x1F;
277
674
  break;
278
- case '"':
279
- escape = "\\\"";
280
- escape_len = 2;
675
+ case 3:
676
+ wchar = ch & 0x0F;
281
677
  break;
282
- case '/':
283
- if(script_safe) {
284
- escape = "\\/";
285
- escape_len = 2;
286
- break;
287
- }
288
- default:
289
- {
290
- unsigned short clen = 1;
291
- if (!ascii_only) {
292
- clen += trailingBytesForUTF8[c];
293
- if (end + clen > len) {
294
- rb_raise(rb_path2class("JSON::GeneratorError"),
295
- "partial character in source, but hit end");
296
- }
297
-
298
- if (script_safe && c == 0xE2) {
299
- unsigned char c2 = (unsigned char) *(p+1);
300
- unsigned char c3 = (unsigned char) *(p+2);
301
- if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
302
- fbuffer_append(buffer, ptr + start, end - start);
303
- start = end = (end + clen);
304
- if (c3 == 0xA8) {
305
- fbuffer_append(buffer, "\\u2028", 6);
306
- } else {
307
- fbuffer_append(buffer, "\\u2029", 6);
308
- }
309
- continue;
310
- }
311
- }
312
-
313
- if (!isLegalUTF8((UTF8 *) p, clen)) {
314
- rb_raise(rb_path2class("JSON::GeneratorError"),
315
- "source sequence is illegal/malformed utf-8");
316
- }
317
- }
318
- end += clen;
319
- }
320
- continue;
678
+ case 4:
679
+ wchar = ch & 0x07;
321
680
  break;
322
681
  }
682
+
683
+ for (short i = 1; i < ch_len; i++) {
684
+ wchar = (wchar << 6) | (search->ptr[i] & 0x3F);
685
+ }
686
+
687
+ if (wchar <= 0xFFFF) {
688
+ scratch[2] = hexdig[wchar >> 12];
689
+ scratch[3] = hexdig[(wchar >> 8) & 0xf];
690
+ scratch[4] = hexdig[(wchar >> 4) & 0xf];
691
+ scratch[5] = hexdig[wchar & 0xf];
692
+ fbuffer_append(search->buffer, scratch, 6);
693
+ } else {
694
+ uint16_t hi, lo;
695
+ wchar -= 0x10000;
696
+ hi = 0xD800 + (uint16_t)(wchar >> 10);
697
+ lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
698
+
699
+ scratch[2] = hexdig[hi >> 12];
700
+ scratch[3] = hexdig[(hi >> 8) & 0xf];
701
+ scratch[4] = hexdig[(hi >> 4) & 0xf];
702
+ scratch[5] = hexdig[hi & 0xf];
703
+
704
+ scratch[8] = hexdig[lo >> 12];
705
+ scratch[9] = hexdig[(lo >> 8) & 0xf];
706
+ scratch[10] = hexdig[(lo >> 4) & 0xf];
707
+ scratch[11] = hexdig[lo & 0xf];
708
+
709
+ fbuffer_append(search->buffer, scratch, 12);
710
+ }
711
+
712
+ break;
323
713
  }
324
- fbuffer_append(buffer, ptr + start, end - start);
325
- fbuffer_append(buffer, escape, escape_len);
326
- start = ++end;
327
- escape = NULL;
328
714
  }
329
- fbuffer_append(buffer, ptr + start, end - start);
715
+ search->cursor = (search->ptr += ch_len);
330
716
  }
331
717
 
332
- static char *fstrndup(const char *ptr, unsigned long len) {
333
- char *result;
334
- if (len <= 0) return NULL;
335
- result = ALLOC_N(char, len);
336
- memcpy(result, ptr, len);
337
- return result;
718
+ static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256])
719
+ {
720
+ unsigned char ch_len;
721
+ while ((ch_len = search_ascii_only_escape(search, escape_table))) {
722
+ full_escape_UTF8_char(search, ch_len);
723
+ }
338
724
  }
339
725
 
340
726
  /*
@@ -429,7 +815,9 @@ static char *fstrndup(const char *ptr, unsigned long len) {
429
815
  */
430
816
  static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
431
817
  {
432
- GENERATE_JSON(object);
818
+ rb_check_arity(argc, 0, 1);
819
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
820
+ return cState_partial_generate(Vstate, self, generate_json_object, Qfalse);
433
821
  }
434
822
 
435
823
  /*
@@ -441,7 +829,9 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
441
829
  * produced JSON string output further.
442
830
  */
443
831
  static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
444
- GENERATE_JSON(array);
832
+ rb_check_arity(argc, 0, 1);
833
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
834
+ return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
445
835
  }
446
836
 
447
837
  #ifdef RUBY_INTEGER_UNIFICATION
@@ -452,7 +842,9 @@ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
452
842
  */
453
843
  static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
454
844
  {
455
- GENERATE_JSON(integer);
845
+ rb_check_arity(argc, 0, 1);
846
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
847
+ return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse);
456
848
  }
457
849
 
458
850
  #else
@@ -463,7 +855,9 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
463
855
  */
464
856
  static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
465
857
  {
466
- GENERATE_JSON(fixnum);
858
+ rb_check_arity(argc, 0, 1);
859
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
860
+ return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse);
467
861
  }
468
862
 
469
863
  /*
@@ -473,7 +867,9 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
473
867
  */
474
868
  static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
475
869
  {
476
- GENERATE_JSON(bignum);
870
+ rb_check_arity(argc, 0, 1);
871
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
872
+ return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse);
477
873
  }
478
874
  #endif
479
875
 
@@ -484,7 +880,9 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
484
880
  */
485
881
  static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
486
882
  {
487
- GENERATE_JSON(float);
883
+ rb_check_arity(argc, 0, 1);
884
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
885
+ return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
488
886
  }
489
887
 
490
888
  /*
@@ -507,7 +905,9 @@ static VALUE mString_included_s(VALUE self, VALUE modul) {
507
905
  */
508
906
  static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
509
907
  {
510
- GENERATE_JSON(string);
908
+ rb_check_arity(argc, 0, 1);
909
+ VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
910
+ return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
511
911
  }
512
912
 
513
913
  /*
@@ -524,7 +924,7 @@ static VALUE mString_to_json_raw_object(VALUE self)
524
924
  VALUE result = rb_hash_new();
525
925
  rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
526
926
  ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
527
- rb_hash_aset(result, rb_str_new2("raw"), ary);
927
+ rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
528
928
  return result;
529
929
  }
530
930
 
@@ -562,7 +962,8 @@ static VALUE mString_Extend_json_create(VALUE self, VALUE o)
562
962
  */
563
963
  static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
564
964
  {
565
- GENERATE_JSON(true);
965
+ rb_check_arity(argc, 0, 1);
966
+ return rb_utf8_str_new("true", 4);
566
967
  }
567
968
 
568
969
  /*
@@ -572,7 +973,8 @@ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
572
973
  */
573
974
  static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
574
975
  {
575
- GENERATE_JSON(false);
976
+ rb_check_arity(argc, 0, 1);
977
+ return rb_utf8_str_new("false", 5);
576
978
  }
577
979
 
578
980
  /*
@@ -582,7 +984,8 @@ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
582
984
  */
583
985
  static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
584
986
  {
585
- GENERATE_JSON(null);
987
+ rb_check_arity(argc, 0, 1);
988
+ return rb_utf8_str_new("null", 4);
586
989
  }
587
990
 
588
991
  /*
@@ -599,36 +1002,40 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
599
1002
  rb_scan_args(argc, argv, "01", &state);
600
1003
  Check_Type(string, T_STRING);
601
1004
  state = cState_from_state_s(cState, state);
602
- return cState_partial_generate(state, string);
1005
+ return cState_partial_generate(state, string, generate_json_string, Qfalse);
1006
+ }
1007
+
1008
+ static void State_mark(void *ptr)
1009
+ {
1010
+ JSON_Generator_State *state = ptr;
1011
+ rb_gc_mark_movable(state->indent);
1012
+ rb_gc_mark_movable(state->space);
1013
+ rb_gc_mark_movable(state->space_before);
1014
+ rb_gc_mark_movable(state->object_nl);
1015
+ rb_gc_mark_movable(state->array_nl);
1016
+ rb_gc_mark_movable(state->as_json);
1017
+ }
1018
+
1019
+ static void State_compact(void *ptr)
1020
+ {
1021
+ JSON_Generator_State *state = ptr;
1022
+ state->indent = rb_gc_location(state->indent);
1023
+ state->space = rb_gc_location(state->space);
1024
+ state->space_before = rb_gc_location(state->space_before);
1025
+ state->object_nl = rb_gc_location(state->object_nl);
1026
+ state->array_nl = rb_gc_location(state->array_nl);
1027
+ state->as_json = rb_gc_location(state->as_json);
603
1028
  }
604
1029
 
605
1030
  static void State_free(void *ptr)
606
1031
  {
607
1032
  JSON_Generator_State *state = ptr;
608
- if (state->indent) ruby_xfree(state->indent);
609
- if (state->space) ruby_xfree(state->space);
610
- if (state->space_before) ruby_xfree(state->space_before);
611
- if (state->object_nl) ruby_xfree(state->object_nl);
612
- if (state->array_nl) ruby_xfree(state->array_nl);
613
- if (state->array_delim) fbuffer_free(state->array_delim);
614
- if (state->object_delim) fbuffer_free(state->object_delim);
615
- if (state->object_delim2) fbuffer_free(state->object_delim2);
616
1033
  ruby_xfree(state);
617
1034
  }
618
1035
 
619
1036
  static size_t State_memsize(const void *ptr)
620
1037
  {
621
- const JSON_Generator_State *state = ptr;
622
- size_t size = sizeof(*state);
623
- if (state->indent) size += state->indent_len + 1;
624
- if (state->space) size += state->space_len + 1;
625
- if (state->space_before) size += state->space_before_len + 1;
626
- if (state->object_nl) size += state->object_nl_len + 1;
627
- if (state->array_nl) size += state->array_nl_len + 1;
628
- if (state->array_delim) size += FBUFFER_CAPA(state->array_delim);
629
- if (state->object_delim) size += FBUFFER_CAPA(state->object_delim);
630
- if (state->object_delim2) size += FBUFFER_CAPA(state->object_delim2);
631
- return size;
1038
+ return sizeof(JSON_Generator_State);
632
1039
  }
633
1040
 
634
1041
  #ifndef HAVE_RB_EXT_RACTOR_SAFE
@@ -636,474 +1043,459 @@ static size_t State_memsize(const void *ptr)
636
1043
  # define RUBY_TYPED_FROZEN_SHAREABLE 0
637
1044
  #endif
638
1045
 
639
- #ifdef NEW_TYPEDDATA_WRAPPER
640
1046
  static const rb_data_type_t JSON_Generator_State_type = {
641
1047
  "JSON/Generator/State",
642
- {NULL, State_free, State_memsize,},
643
- #ifdef RUBY_TYPED_FREE_IMMEDIATELY
1048
+ {
1049
+ .dmark = State_mark,
1050
+ .dfree = State_free,
1051
+ .dsize = State_memsize,
1052
+ .dcompact = State_compact,
1053
+ },
644
1054
  0, 0,
645
- RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
646
- #endif
1055
+ RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
647
1056
  };
648
- #endif
649
1057
 
650
- static VALUE cState_s_allocate(VALUE klass)
1058
+ static void state_init(JSON_Generator_State *state)
651
1059
  {
652
- JSON_Generator_State *state;
653
- return TypedData_Make_Struct(klass, JSON_Generator_State,
654
- &JSON_Generator_State_type, state);
655
- }
656
-
657
- /*
658
- * call-seq: configure(opts)
659
- *
660
- * Configure this State instance with the Hash _opts_, and return
661
- * itself.
662
- */
663
- static VALUE cState_configure(VALUE self, VALUE opts)
664
- {
665
- VALUE tmp;
666
- GET_STATE(self);
667
- tmp = rb_check_convert_type(opts, T_HASH, "Hash", "to_hash");
668
- if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h");
669
- opts = tmp;
670
- tmp = rb_hash_aref(opts, ID2SYM(i_indent));
671
- if (RTEST(tmp)) {
672
- unsigned long len;
673
- Check_Type(tmp, T_STRING);
674
- len = RSTRING_LEN(tmp);
675
- state->indent = fstrndup(RSTRING_PTR(tmp), len + 1);
676
- state->indent_len = len;
677
- }
678
- tmp = rb_hash_aref(opts, ID2SYM(i_space));
679
- if (RTEST(tmp)) {
680
- unsigned long len;
681
- Check_Type(tmp, T_STRING);
682
- len = RSTRING_LEN(tmp);
683
- state->space = fstrndup(RSTRING_PTR(tmp), len + 1);
684
- state->space_len = len;
685
- }
686
- tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
687
- if (RTEST(tmp)) {
688
- unsigned long len;
689
- Check_Type(tmp, T_STRING);
690
- len = RSTRING_LEN(tmp);
691
- state->space_before = fstrndup(RSTRING_PTR(tmp), len + 1);
692
- state->space_before_len = len;
693
- }
694
- tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
695
- if (RTEST(tmp)) {
696
- unsigned long len;
697
- Check_Type(tmp, T_STRING);
698
- len = RSTRING_LEN(tmp);
699
- state->array_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
700
- state->array_nl_len = len;
701
- }
702
- tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
703
- if (RTEST(tmp)) {
704
- unsigned long len;
705
- Check_Type(tmp, T_STRING);
706
- len = RSTRING_LEN(tmp);
707
- state->object_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
708
- state->object_nl_len = len;
709
- }
710
- tmp = ID2SYM(i_max_nesting);
711
1060
  state->max_nesting = 100;
712
- if (option_given_p(opts, tmp)) {
713
- VALUE max_nesting = rb_hash_aref(opts, tmp);
714
- if (RTEST(max_nesting)) {
715
- Check_Type(max_nesting, T_FIXNUM);
716
- state->max_nesting = FIX2LONG(max_nesting);
717
- } else {
718
- state->max_nesting = 0;
719
- }
720
- }
721
- tmp = ID2SYM(i_depth);
722
- state->depth = 0;
723
- if (option_given_p(opts, tmp)) {
724
- VALUE depth = rb_hash_aref(opts, tmp);
725
- if (RTEST(depth)) {
726
- Check_Type(depth, T_FIXNUM);
727
- state->depth = FIX2LONG(depth);
728
- } else {
729
- state->depth = 0;
730
- }
731
- }
732
- tmp = ID2SYM(i_buffer_initial_length);
733
- if (option_given_p(opts, tmp)) {
734
- VALUE buffer_initial_length = rb_hash_aref(opts, tmp);
735
- if (RTEST(buffer_initial_length)) {
736
- long initial_length;
737
- Check_Type(buffer_initial_length, T_FIXNUM);
738
- initial_length = FIX2LONG(buffer_initial_length);
739
- if (initial_length > 0) state->buffer_initial_length = initial_length;
740
- }
741
- }
742
- tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan));
743
- state->allow_nan = RTEST(tmp);
744
- tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
745
- state->ascii_only = RTEST(tmp);
746
- tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
747
- state->script_safe = RTEST(tmp);
748
- if (!state->script_safe) {
749
- tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
750
- state->script_safe = RTEST(tmp);
751
- }
752
- tmp = rb_hash_aref(opts, ID2SYM(i_strict));
753
- state->strict = RTEST(tmp);
754
- return self;
1061
+ state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
755
1062
  }
756
1063
 
757
- static void set_state_ivars(VALUE hash, VALUE state)
1064
+ static VALUE cState_s_allocate(VALUE klass)
758
1065
  {
759
- VALUE ivars = rb_obj_instance_variables(state);
760
- int i = 0;
761
- for (i = 0; i < RARRAY_LEN(ivars); i++) {
762
- VALUE key = rb_funcall(rb_ary_entry(ivars, i), i_to_s, 0);
763
- long key_len = RSTRING_LEN(key);
764
- VALUE value = rb_iv_get(state, StringValueCStr(key));
765
- rb_hash_aset(hash, rb_str_intern(rb_str_substr(key, 1, key_len - 1)), value);
766
- }
1066
+ JSON_Generator_State *state;
1067
+ VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state);
1068
+ state_init(state);
1069
+ return obj;
767
1070
  }
768
1071
 
769
- /*
770
- * call-seq: to_h
771
- *
772
- * Returns the configuration instance variables as a hash, that can be
773
- * passed to the configure method.
774
- */
775
- static VALUE cState_to_h(VALUE self)
1072
+ static void vstate_spill(struct generate_json_data *data)
776
1073
  {
777
- VALUE result = rb_hash_new();
778
- GET_STATE(self);
779
- set_state_ivars(result, self);
780
- rb_hash_aset(result, ID2SYM(i_indent), rb_str_new(state->indent, state->indent_len));
781
- rb_hash_aset(result, ID2SYM(i_space), rb_str_new(state->space, state->space_len));
782
- rb_hash_aset(result, ID2SYM(i_space_before), rb_str_new(state->space_before, state->space_before_len));
783
- rb_hash_aset(result, ID2SYM(i_object_nl), rb_str_new(state->object_nl, state->object_nl_len));
784
- rb_hash_aset(result, ID2SYM(i_array_nl), rb_str_new(state->array_nl, state->array_nl_len));
785
- rb_hash_aset(result, ID2SYM(i_allow_nan), state->allow_nan ? Qtrue : Qfalse);
786
- rb_hash_aset(result, ID2SYM(i_ascii_only), state->ascii_only ? Qtrue : Qfalse);
787
- rb_hash_aset(result, ID2SYM(i_max_nesting), LONG2FIX(state->max_nesting));
788
- rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
789
- rb_hash_aset(result, ID2SYM(i_strict), state->strict ? Qtrue : Qfalse);
790
- rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
791
- rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
792
- return result;
1074
+ VALUE vstate = cState_s_allocate(cState);
1075
+ GET_STATE(vstate);
1076
+ MEMCPY(state, data->state, JSON_Generator_State, 1);
1077
+ data->state = state;
1078
+ data->vstate = vstate;
1079
+ RB_OBJ_WRITTEN(vstate, Qundef, state->indent);
1080
+ RB_OBJ_WRITTEN(vstate, Qundef, state->space);
1081
+ RB_OBJ_WRITTEN(vstate, Qundef, state->space_before);
1082
+ RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl);
1083
+ RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl);
1084
+ RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
793
1085
  }
794
1086
 
795
- /*
796
- * call-seq: [](name)
797
- *
798
- * Returns the value returned by method +name+.
799
- */
800
- static VALUE cState_aref(VALUE self, VALUE name)
1087
+ static inline VALUE vstate_get(struct generate_json_data *data)
801
1088
  {
802
- name = rb_funcall(name, i_to_s, 0);
803
- if (RTEST(rb_funcall(self, i_respond_to_p, 1, name))) {
804
- return rb_funcall(self, i_send, 1, name);
805
- } else {
806
- return rb_attr_get(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)));
1089
+ if (RB_UNLIKELY(!data->vstate)) {
1090
+ vstate_spill(data);
807
1091
  }
1092
+ return data->vstate;
808
1093
  }
809
1094
 
810
- /*
811
- * call-seq: []=(name, value)
812
- *
813
- * Sets the attribute name to value.
814
- */
815
- static VALUE cState_aset(VALUE self, VALUE name, VALUE value)
1095
+ struct hash_foreach_arg {
1096
+ struct generate_json_data *data;
1097
+ int iter;
1098
+ };
1099
+
1100
+ static VALUE
1101
+ convert_string_subclass(VALUE key)
816
1102
  {
817
- VALUE name_writer;
1103
+ VALUE key_to_s = rb_funcall(key, i_to_s, 0);
818
1104
 
819
- name = rb_funcall(name, i_to_s, 0);
820
- name_writer = rb_str_cat2(rb_str_dup(name), "=");
821
- if (RTEST(rb_funcall(self, i_respond_to_p, 1, name_writer))) {
822
- return rb_funcall(self, i_send, 2, name_writer, value);
823
- } else {
824
- rb_ivar_set(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)), value);
1105
+ if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
1106
+ VALUE cname = rb_obj_class(key);
1107
+ rb_raise(rb_eTypeError,
1108
+ "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
1109
+ cname, "String", cname, "to_s", rb_obj_class(key_to_s));
825
1110
  }
826
- return Qnil;
827
- }
828
1111
 
829
- struct hash_foreach_arg {
830
- FBuffer *buffer;
831
- JSON_Generator_State *state;
832
- VALUE Vstate;
833
- int iter;
834
- };
1112
+ return key_to_s;
1113
+ }
835
1114
 
836
1115
  static int
837
1116
  json_object_i(VALUE key, VALUE val, VALUE _arg)
838
1117
  {
839
1118
  struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg;
840
- FBuffer *buffer = arg->buffer;
841
- JSON_Generator_State *state = arg->state;
842
- VALUE Vstate = arg->Vstate;
843
-
844
- char *object_nl = state->object_nl;
845
- long object_nl_len = state->object_nl_len;
846
- char *indent = state->indent;
847
- long indent_len = state->indent_len;
848
- char *delim = FBUFFER_PTR(state->object_delim);
849
- long delim_len = FBUFFER_LEN(state->object_delim);
850
- char *delim2 = FBUFFER_PTR(state->object_delim2);
851
- long delim2_len = FBUFFER_LEN(state->object_delim2);
1119
+ struct generate_json_data *data = arg->data;
1120
+
1121
+ FBuffer *buffer = data->buffer;
1122
+ JSON_Generator_State *state = data->state;
1123
+
852
1124
  long depth = state->depth;
853
1125
  int j;
854
- VALUE klass, key_to_s;
855
1126
 
856
- if (arg->iter > 0) fbuffer_append(buffer, delim, delim_len);
857
- if (object_nl) {
858
- fbuffer_append(buffer, object_nl, object_nl_len);
1127
+ if (arg->iter > 0) fbuffer_append_char(buffer, ',');
1128
+ if (RB_UNLIKELY(data->state->object_nl)) {
1129
+ fbuffer_append_str(buffer, data->state->object_nl);
859
1130
  }
860
- if (indent) {
1131
+ if (RB_UNLIKELY(data->state->indent)) {
861
1132
  for (j = 0; j < depth; j++) {
862
- fbuffer_append(buffer, indent, indent_len);
1133
+ fbuffer_append_str(buffer, data->state->indent);
863
1134
  }
864
1135
  }
865
1136
 
866
- klass = CLASS_OF(key);
867
- if (klass == rb_cString) {
868
- key_to_s = key;
869
- } else if (klass == rb_cSymbol) {
870
- key_to_s = rb_sym2str(key);
1137
+ VALUE key_to_s;
1138
+ switch(rb_type(key)) {
1139
+ case T_STRING:
1140
+ if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
1141
+ key_to_s = key;
1142
+ } else {
1143
+ key_to_s = convert_string_subclass(key);
1144
+ }
1145
+ break;
1146
+ case T_SYMBOL:
1147
+ key_to_s = rb_sym2str(key);
1148
+ break;
1149
+ default:
1150
+ key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
1151
+ break;
1152
+ }
1153
+
1154
+ if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
1155
+ generate_json_string(buffer, data, key_to_s);
871
1156
  } else {
872
- key_to_s = rb_funcall(key, i_to_s, 0);
1157
+ generate_json(buffer, data, key_to_s);
873
1158
  }
874
- Check_Type(key_to_s, T_STRING);
875
- generate_json(buffer, Vstate, state, key_to_s);
876
- fbuffer_append(buffer, delim2, delim2_len);
877
- generate_json(buffer, Vstate, state, val);
1159
+ if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before);
1160
+ fbuffer_append_char(buffer, ':');
1161
+ if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
1162
+ generate_json(buffer, data, val);
878
1163
 
879
1164
  arg->iter++;
880
1165
  return ST_CONTINUE;
881
1166
  }
882
1167
 
883
- static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1168
+ static inline long increase_depth(struct generate_json_data *data)
884
1169
  {
885
- char *object_nl = state->object_nl;
886
- long object_nl_len = state->object_nl_len;
887
- char *indent = state->indent;
888
- long indent_len = state->indent_len;
889
- long max_nesting = state->max_nesting;
1170
+ JSON_Generator_State *state = data->state;
890
1171
  long depth = ++state->depth;
1172
+ if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
1173
+ rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1174
+ }
1175
+ return depth;
1176
+ }
1177
+
1178
+ static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1179
+ {
891
1180
  int j;
892
- struct hash_foreach_arg arg;
1181
+ long depth = increase_depth(data);
893
1182
 
894
- if (max_nesting != 0 && depth > max_nesting) {
895
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1183
+ if (RHASH_SIZE(obj) == 0) {
1184
+ fbuffer_append(buffer, "{}", 2);
1185
+ --data->state->depth;
1186
+ return;
896
1187
  }
1188
+
897
1189
  fbuffer_append_char(buffer, '{');
898
1190
 
899
- arg.buffer = buffer;
900
- arg.state = state;
901
- arg.Vstate = Vstate;
902
- arg.iter = 0;
1191
+ struct hash_foreach_arg arg = {
1192
+ .data = data,
1193
+ .iter = 0,
1194
+ };
903
1195
  rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
904
1196
 
905
- depth = --state->depth;
906
- if (object_nl) {
907
- fbuffer_append(buffer, object_nl, object_nl_len);
908
- if (indent) {
1197
+ depth = --data->state->depth;
1198
+ if (RB_UNLIKELY(data->state->object_nl)) {
1199
+ fbuffer_append_str(buffer, data->state->object_nl);
1200
+ if (RB_UNLIKELY(data->state->indent)) {
909
1201
  for (j = 0; j < depth; j++) {
910
- fbuffer_append(buffer, indent, indent_len);
1202
+ fbuffer_append_str(buffer, data->state->indent);
911
1203
  }
912
1204
  }
913
1205
  }
914
1206
  fbuffer_append_char(buffer, '}');
915
1207
  }
916
1208
 
917
- static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1209
+ static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
918
1210
  {
919
- char *array_nl = state->array_nl;
920
- long array_nl_len = state->array_nl_len;
921
- char *indent = state->indent;
922
- long indent_len = state->indent_len;
923
- long max_nesting = state->max_nesting;
924
- char *delim = FBUFFER_PTR(state->array_delim);
925
- long delim_len = FBUFFER_LEN(state->array_delim);
926
- long depth = ++state->depth;
927
1211
  int i, j;
928
- if (max_nesting != 0 && depth > max_nesting) {
929
- rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
1212
+ long depth = increase_depth(data);
1213
+
1214
+ if (RARRAY_LEN(obj) == 0) {
1215
+ fbuffer_append(buffer, "[]", 2);
1216
+ --data->state->depth;
1217
+ return;
930
1218
  }
1219
+
931
1220
  fbuffer_append_char(buffer, '[');
932
- if (array_nl) fbuffer_append(buffer, array_nl, array_nl_len);
1221
+ if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
933
1222
  for(i = 0; i < RARRAY_LEN(obj); i++) {
934
- if (i > 0) fbuffer_append(buffer, delim, delim_len);
935
- if (indent) {
1223
+ if (i > 0) {
1224
+ fbuffer_append_char(buffer, ',');
1225
+ if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
1226
+ }
1227
+ if (RB_UNLIKELY(data->state->indent)) {
936
1228
  for (j = 0; j < depth; j++) {
937
- fbuffer_append(buffer, indent, indent_len);
1229
+ fbuffer_append_str(buffer, data->state->indent);
938
1230
  }
939
1231
  }
940
- generate_json(buffer, Vstate, state, rb_ary_entry(obj, i));
1232
+ generate_json(buffer, data, RARRAY_AREF(obj, i));
941
1233
  }
942
- state->depth = --depth;
943
- if (array_nl) {
944
- fbuffer_append(buffer, array_nl, array_nl_len);
945
- if (indent) {
1234
+ data->state->depth = --depth;
1235
+ if (RB_UNLIKELY(data->state->array_nl)) {
1236
+ fbuffer_append_str(buffer, data->state->array_nl);
1237
+ if (RB_UNLIKELY(data->state->indent)) {
946
1238
  for (j = 0; j < depth; j++) {
947
- fbuffer_append(buffer, indent, indent_len);
1239
+ fbuffer_append_str(buffer, data->state->indent);
948
1240
  }
949
1241
  }
950
1242
  }
951
1243
  fbuffer_append_char(buffer, ']');
952
1244
  }
953
1245
 
954
- #ifdef HAVE_RUBY_ENCODING_H
955
- static int enc_utf8_compatible_p(rb_encoding *enc)
1246
+ static inline int enc_utf8_compatible_p(int enc_idx)
956
1247
  {
957
- if (enc == rb_usascii_encoding()) return 1;
958
- if (enc == rb_utf8_encoding()) return 1;
1248
+ if (enc_idx == usascii_encindex) return 1;
1249
+ if (enc_idx == utf8_encindex) return 1;
959
1250
  return 0;
960
1251
  }
961
- #endif
962
1252
 
963
- static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1253
+ static VALUE encode_json_string_try(VALUE str)
1254
+ {
1255
+ return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
1256
+ }
1257
+
1258
+ static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
964
1259
  {
1260
+ raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
1261
+ return Qundef;
1262
+ }
1263
+
1264
+ static inline VALUE ensure_valid_encoding(VALUE str)
1265
+ {
1266
+ int encindex = RB_ENCODING_GET(str);
1267
+ VALUE utf8_string;
1268
+ if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
1269
+ if (encindex == binary_encindex) {
1270
+ utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
1271
+ switch (rb_enc_str_coderange(utf8_string)) {
1272
+ case ENC_CODERANGE_7BIT:
1273
+ return utf8_string;
1274
+ case ENC_CODERANGE_VALID:
1275
+ // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
1276
+ // TODO: Raise in 3.0.0
1277
+ rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
1278
+ return utf8_string;
1279
+ break;
1280
+ }
1281
+ }
1282
+
1283
+ str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
1284
+ }
1285
+ return str;
1286
+ }
1287
+
1288
+ static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1289
+ {
1290
+ obj = ensure_valid_encoding(obj);
1291
+
965
1292
  fbuffer_append_char(buffer, '"');
966
- #ifdef HAVE_RUBY_ENCODING_H
967
- if (!enc_utf8_compatible_p(rb_enc_get(obj))) {
968
- obj = rb_str_export_to_enc(obj, rb_utf8_encoding());
1293
+
1294
+ long len;
1295
+ search_state search;
1296
+ search.buffer = buffer;
1297
+ RSTRING_GETMEM(obj, search.ptr, len);
1298
+ search.cursor = search.ptr;
1299
+ search.end = search.ptr + len;
1300
+
1301
+ #ifdef HAVE_SIMD
1302
+ search.matches_mask = 0;
1303
+ search.has_matches = false;
1304
+ search.chunk_base = NULL;
1305
+ #endif /* HAVE_SIMD */
1306
+
1307
+ switch(rb_enc_str_coderange(obj)) {
1308
+ case ENC_CODERANGE_7BIT:
1309
+ case ENC_CODERANGE_VALID:
1310
+ if (RB_UNLIKELY(data->state->ascii_only)) {
1311
+ convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
1312
+ } else if (RB_UNLIKELY(data->state->script_safe)) {
1313
+ convert_UTF8_to_script_safe_JSON(&search);
1314
+ } else {
1315
+ convert_UTF8_to_JSON(&search);
1316
+ }
1317
+ break;
1318
+ default:
1319
+ raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
1320
+ break;
969
1321
  }
970
- #endif
971
- if (state->ascii_only) {
972
- convert_UTF8_to_JSON_ASCII(buffer, obj, state->script_safe);
1322
+ fbuffer_append_char(buffer, '"');
1323
+ }
1324
+
1325
+ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1326
+ {
1327
+ VALUE tmp;
1328
+ if (rb_respond_to(obj, i_to_json)) {
1329
+ tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
1330
+ Check_Type(tmp, T_STRING);
1331
+ fbuffer_append_str(buffer, tmp);
973
1332
  } else {
974
- convert_UTF8_to_JSON(buffer, obj, state->script_safe);
1333
+ tmp = rb_funcall(obj, i_to_s, 0);
1334
+ Check_Type(tmp, T_STRING);
1335
+ generate_json_string(buffer, data, tmp);
1336
+ }
1337
+ }
1338
+
1339
+ static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1340
+ {
1341
+ if (data->state->strict) {
1342
+ generate_json_string(buffer, data, rb_sym2str(obj));
1343
+ } else {
1344
+ generate_json_fallback(buffer, data, obj);
975
1345
  }
976
- fbuffer_append_char(buffer, '"');
977
1346
  }
978
1347
 
979
- static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1348
+ static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
980
1349
  {
981
1350
  fbuffer_append(buffer, "null", 4);
982
1351
  }
983
1352
 
984
- static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1353
+ static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
985
1354
  {
986
1355
  fbuffer_append(buffer, "false", 5);
987
1356
  }
988
1357
 
989
- static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1358
+ static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
990
1359
  {
991
1360
  fbuffer_append(buffer, "true", 4);
992
1361
  }
993
1362
 
994
- static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1363
+ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
995
1364
  {
996
1365
  fbuffer_append_long(buffer, FIX2LONG(obj));
997
1366
  }
998
1367
 
999
- static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1368
+ static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1000
1369
  {
1001
1370
  VALUE tmp = rb_funcall(obj, i_to_s, 0);
1002
1371
  fbuffer_append_str(buffer, tmp);
1003
1372
  }
1004
1373
 
1005
1374
  #ifdef RUBY_INTEGER_UNIFICATION
1006
- static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1375
+ static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1007
1376
  {
1008
1377
  if (FIXNUM_P(obj))
1009
- generate_json_fixnum(buffer, Vstate, state, obj);
1378
+ generate_json_fixnum(buffer, data, obj);
1010
1379
  else
1011
- generate_json_bignum(buffer, Vstate, state, obj);
1380
+ generate_json_bignum(buffer, data, obj);
1012
1381
  }
1013
1382
  #endif
1014
- static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1383
+
1384
+ static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1015
1385
  {
1016
1386
  double value = RFLOAT_VALUE(obj);
1017
- char allow_nan = state->allow_nan;
1018
- VALUE tmp = rb_funcall(obj, i_to_s, 0);
1019
- if (!allow_nan) {
1020
- if (isinf(value)) {
1021
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(tmp));
1022
- } else if (isnan(value)) {
1023
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(tmp));
1387
+ char allow_nan = data->state->allow_nan;
1388
+ if (isinf(value) || isnan(value)) {
1389
+ /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
1390
+ if (!allow_nan) {
1391
+ if (data->state->strict && data->state->as_json) {
1392
+ VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1393
+ if (casted_obj != obj) {
1394
+ increase_depth(data);
1395
+ generate_json(buffer, data, casted_obj);
1396
+ data->state->depth--;
1397
+ return;
1398
+ }
1399
+ }
1400
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
1024
1401
  }
1025
- }
1026
- fbuffer_append_str(buffer, tmp);
1027
- }
1028
1402
 
1029
- static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
1030
- {
1031
- VALUE tmp;
1032
- VALUE klass = CLASS_OF(obj);
1033
- if (klass == rb_cHash) {
1034
- generate_json_object(buffer, Vstate, state, obj);
1035
- } else if (klass == rb_cArray) {
1036
- generate_json_array(buffer, Vstate, state, obj);
1037
- } else if (klass == rb_cString) {
1038
- generate_json_string(buffer, Vstate, state, obj);
1039
- } else if (obj == Qnil) {
1040
- generate_json_null(buffer, Vstate, state, obj);
1041
- } else if (obj == Qfalse) {
1042
- generate_json_false(buffer, Vstate, state, obj);
1043
- } else if (obj == Qtrue) {
1044
- generate_json_true(buffer, Vstate, state, obj);
1045
- } else if (FIXNUM_P(obj)) {
1046
- generate_json_fixnum(buffer, Vstate, state, obj);
1047
- } else if (RB_TYPE_P(obj, T_BIGNUM)) {
1048
- generate_json_bignum(buffer, Vstate, state, obj);
1049
- } else if (klass == rb_cFloat) {
1050
- generate_json_float(buffer, Vstate, state, obj);
1051
- } else if (state->strict) {
1052
- rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(CLASS_OF(obj)));
1053
- } else if (rb_respond_to(obj, i_to_json)) {
1054
- tmp = rb_funcall(obj, i_to_json, 1, Vstate);
1055
- Check_Type(tmp, T_STRING);
1403
+ VALUE tmp = rb_funcall(obj, i_to_s, 0);
1056
1404
  fbuffer_append_str(buffer, tmp);
1057
- } else {
1058
- tmp = rb_funcall(obj, i_to_s, 0);
1059
- Check_Type(tmp, T_STRING);
1060
- generate_json_string(buffer, Vstate, state, tmp);
1405
+ return;
1061
1406
  }
1407
+
1408
+ /* This implementation writes directly into the buffer. We reserve
1409
+ * the 28 characters that fpconv_dtoa states as its maximum.
1410
+ */
1411
+ fbuffer_inc_capa(buffer, 28);
1412
+ char* d = buffer->ptr + buffer->len;
1413
+ int len = fpconv_dtoa(value, d);
1414
+
1415
+ /* fpconv_dtoa converts a float to its shortest string representation,
1416
+ * but it adds a ".0" if this is a plain integer.
1417
+ */
1418
+ fbuffer_consumed(buffer, len);
1062
1419
  }
1063
1420
 
1064
- static FBuffer *cState_prepare_buffer(VALUE self)
1421
+ static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1065
1422
  {
1066
- FBuffer *buffer;
1067
- GET_STATE(self);
1068
- buffer = fbuffer_alloc(state->buffer_initial_length);
1069
-
1070
- if (state->object_delim) {
1071
- fbuffer_clear(state->object_delim);
1072
- } else {
1073
- state->object_delim = fbuffer_alloc(16);
1074
- }
1075
- fbuffer_append_char(state->object_delim, ',');
1076
- if (state->object_delim2) {
1077
- fbuffer_clear(state->object_delim2);
1078
- } else {
1079
- state->object_delim2 = fbuffer_alloc(16);
1080
- }
1081
- if (state->space_before) fbuffer_append(state->object_delim2, state->space_before, state->space_before_len);
1082
- fbuffer_append_char(state->object_delim2, ':');
1083
- if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len);
1423
+ VALUE fragment = RSTRUCT_GET(obj, 0);
1424
+ Check_Type(fragment, T_STRING);
1425
+ fbuffer_append_str(buffer, fragment);
1426
+ }
1084
1427
 
1085
- if (state->array_delim) {
1086
- fbuffer_clear(state->array_delim);
1428
+ static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
1429
+ {
1430
+ bool as_json_called = false;
1431
+ start:
1432
+ if (obj == Qnil) {
1433
+ generate_json_null(buffer, data, obj);
1434
+ } else if (obj == Qfalse) {
1435
+ generate_json_false(buffer, data, obj);
1436
+ } else if (obj == Qtrue) {
1437
+ generate_json_true(buffer, data, obj);
1438
+ } else if (RB_SPECIAL_CONST_P(obj)) {
1439
+ if (RB_FIXNUM_P(obj)) {
1440
+ generate_json_fixnum(buffer, data, obj);
1441
+ } else if (RB_FLONUM_P(obj)) {
1442
+ generate_json_float(buffer, data, obj);
1443
+ } else if (RB_STATIC_SYM_P(obj)) {
1444
+ generate_json_symbol(buffer, data, obj);
1445
+ } else {
1446
+ goto general;
1447
+ }
1087
1448
  } else {
1088
- state->array_delim = fbuffer_alloc(16);
1449
+ VALUE klass = RBASIC_CLASS(obj);
1450
+ switch (RB_BUILTIN_TYPE(obj)) {
1451
+ case T_BIGNUM:
1452
+ generate_json_bignum(buffer, data, obj);
1453
+ break;
1454
+ case T_HASH:
1455
+ if (klass != rb_cHash) goto general;
1456
+ generate_json_object(buffer, data, obj);
1457
+ break;
1458
+ case T_ARRAY:
1459
+ if (klass != rb_cArray) goto general;
1460
+ generate_json_array(buffer, data, obj);
1461
+ break;
1462
+ case T_STRING:
1463
+ if (klass != rb_cString) goto general;
1464
+ generate_json_string(buffer, data, obj);
1465
+ break;
1466
+ case T_SYMBOL:
1467
+ generate_json_symbol(buffer, data, obj);
1468
+ break;
1469
+ case T_FLOAT:
1470
+ if (klass != rb_cFloat) goto general;
1471
+ generate_json_float(buffer, data, obj);
1472
+ break;
1473
+ case T_STRUCT:
1474
+ if (klass != cFragment) goto general;
1475
+ generate_json_fragment(buffer, data, obj);
1476
+ break;
1477
+ default:
1478
+ general:
1479
+ if (data->state->strict) {
1480
+ if (RTEST(data->state->as_json) && !as_json_called) {
1481
+ obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
1482
+ as_json_called = true;
1483
+ goto start;
1484
+ } else {
1485
+ raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
1486
+ }
1487
+ } else {
1488
+ generate_json_fallback(buffer, data, obj);
1489
+ }
1490
+ }
1089
1491
  }
1090
- fbuffer_append_char(state->array_delim, ',');
1091
- if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len);
1092
- return buffer;
1093
1492
  }
1094
1493
 
1095
- struct generate_json_data {
1096
- FBuffer *buffer;
1097
- VALUE vstate;
1098
- JSON_Generator_State *state;
1099
- VALUE obj;
1100
- };
1101
-
1102
1494
  static VALUE generate_json_try(VALUE d)
1103
1495
  {
1104
1496
  struct generate_json_data *data = (struct generate_json_data *)d;
1105
1497
 
1106
- generate_json(data->buffer, data->vstate, data->state, data->obj);
1498
+ data->func(data->buffer, data, data->obj);
1107
1499
 
1108
1500
  return Qnil;
1109
1501
  }
@@ -1118,65 +1510,50 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
1118
1510
  return Qundef;
1119
1511
  }
1120
1512
 
1121
- static VALUE cState_partial_generate(VALUE self, VALUE obj)
1513
+ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io)
1122
1514
  {
1123
- FBuffer *buffer = cState_prepare_buffer(self);
1124
1515
  GET_STATE(self);
1125
1516
 
1517
+ char stack_buffer[FBUFFER_STACK_SIZE];
1518
+ FBuffer buffer = {
1519
+ .io = RTEST(io) ? io : Qfalse,
1520
+ };
1521
+ fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
1522
+
1126
1523
  struct generate_json_data data = {
1127
- .buffer = buffer,
1524
+ .buffer = &buffer,
1128
1525
  .vstate = self,
1129
1526
  .state = state,
1130
- .obj = obj
1527
+ .obj = obj,
1528
+ .func = func
1131
1529
  };
1132
1530
  rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
1133
1531
 
1134
- return fbuffer_to_s(buffer);
1532
+ return fbuffer_finalize(&buffer);
1135
1533
  }
1136
1534
 
1137
- /*
1138
- * call-seq: generate(obj)
1535
+ /* call-seq:
1536
+ * generate(obj) -> String
1537
+ * generate(obj, anIO) -> anIO
1139
1538
  *
1140
1539
  * Generates a valid JSON document from object +obj+ and returns the
1141
1540
  * result. If no valid JSON document can be created this method raises a
1142
1541
  * GeneratorError exception.
1143
1542
  */
1144
- static VALUE cState_generate(VALUE self, VALUE obj)
1543
+ static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
1145
1544
  {
1146
- VALUE result = cState_partial_generate(self, obj);
1545
+ rb_check_arity(argc, 1, 2);
1546
+ VALUE obj = argv[0];
1547
+ VALUE io = argc > 1 ? argv[1] : Qnil;
1548
+ VALUE result = cState_partial_generate(self, obj, generate_json, io);
1147
1549
  GET_STATE(self);
1148
1550
  (void)state;
1149
1551
  return result;
1150
1552
  }
1151
1553
 
1152
- /*
1153
- * call-seq: new(opts = {})
1154
- *
1155
- * Instantiates a new State object, configured by _opts_.
1156
- *
1157
- * _opts_ can have the following keys:
1158
- *
1159
- * * *indent*: a string used to indent levels (default: ''),
1160
- * * *space*: a string that is put after, a : or , delimiter (default: ''),
1161
- * * *space_before*: a string that is put before a : pair delimiter (default: ''),
1162
- * * *object_nl*: a string that is put at the end of a JSON object (default: ''),
1163
- * * *array_nl*: a string that is put at the end of a JSON array (default: ''),
1164
- * * *allow_nan*: true if NaN, Infinity, and -Infinity should be
1165
- * generated, otherwise an exception is thrown, if these values are
1166
- * encountered. This options defaults to false.
1167
- * * *ascii_only*: true if only ASCII characters should be generated. This
1168
- * option defaults to false.
1169
- * * *buffer_initial_length*: sets the initial length of the generator's
1170
- * internal buffer.
1171
- */
1172
1554
  static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
1173
1555
  {
1174
- VALUE opts;
1175
- GET_STATE(self);
1176
- state->max_nesting = 100;
1177
- state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
1178
- rb_scan_args(argc, argv, "01", &opts);
1179
- if (!NIL_P(opts)) cState_configure(self, opts);
1556
+ rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`");
1180
1557
  return self;
1181
1558
  }
1182
1559
 
@@ -1196,14 +1573,12 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
1196
1573
  if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State");
1197
1574
 
1198
1575
  MEMCPY(objState, origState, JSON_Generator_State, 1);
1199
- objState->indent = fstrndup(origState->indent, origState->indent_len);
1200
- objState->space = fstrndup(origState->space, origState->space_len);
1201
- objState->space_before = fstrndup(origState->space_before, origState->space_before_len);
1202
- objState->object_nl = fstrndup(origState->object_nl, origState->object_nl_len);
1203
- objState->array_nl = fstrndup(origState->array_nl, origState->array_nl_len);
1204
- if (origState->array_delim) objState->array_delim = fbuffer_dup(origState->array_delim);
1205
- if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim);
1206
- if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2);
1576
+ objState->indent = origState->indent;
1577
+ objState->space = origState->space;
1578
+ objState->space_before = origState->space_before;
1579
+ objState->object_nl = origState->object_nl;
1580
+ objState->array_nl = origState->array_nl;
1581
+ objState->as_json = origState->as_json;
1207
1582
  return obj;
1208
1583
  }
1209
1584
 
@@ -1233,7 +1608,18 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts)
1233
1608
  static VALUE cState_indent(VALUE self)
1234
1609
  {
1235
1610
  GET_STATE(self);
1236
- return state->indent ? rb_str_new(state->indent, state->indent_len) : rb_str_new2("");
1611
+ return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0));
1612
+ }
1613
+
1614
+ static VALUE string_config(VALUE config)
1615
+ {
1616
+ if (RTEST(config)) {
1617
+ Check_Type(config, T_STRING);
1618
+ if (RSTRING_LEN(config)) {
1619
+ return rb_str_new_frozen(config);
1620
+ }
1621
+ }
1622
+ return Qfalse;
1237
1623
  }
1238
1624
 
1239
1625
  /*
@@ -1243,21 +1629,8 @@ static VALUE cState_indent(VALUE self)
1243
1629
  */
1244
1630
  static VALUE cState_indent_set(VALUE self, VALUE indent)
1245
1631
  {
1246
- unsigned long len;
1247
1632
  GET_STATE(self);
1248
- Check_Type(indent, T_STRING);
1249
- len = RSTRING_LEN(indent);
1250
- if (len == 0) {
1251
- if (state->indent) {
1252
- ruby_xfree(state->indent);
1253
- state->indent = NULL;
1254
- state->indent_len = 0;
1255
- }
1256
- } else {
1257
- if (state->indent) ruby_xfree(state->indent);
1258
- state->indent = fstrndup(RSTRING_PTR(indent), len);
1259
- state->indent_len = len;
1260
- }
1633
+ RB_OBJ_WRITE(self, &state->indent, string_config(indent));
1261
1634
  return Qnil;
1262
1635
  }
1263
1636
 
@@ -1270,7 +1643,7 @@ static VALUE cState_indent_set(VALUE self, VALUE indent)
1270
1643
  static VALUE cState_space(VALUE self)
1271
1644
  {
1272
1645
  GET_STATE(self);
1273
- return state->space ? rb_str_new(state->space, state->space_len) : rb_str_new2("");
1646
+ return state->space ? state->space : rb_str_freeze(rb_utf8_str_new("", 0));
1274
1647
  }
1275
1648
 
1276
1649
  /*
@@ -1281,21 +1654,8 @@ static VALUE cState_space(VALUE self)
1281
1654
  */
1282
1655
  static VALUE cState_space_set(VALUE self, VALUE space)
1283
1656
  {
1284
- unsigned long len;
1285
1657
  GET_STATE(self);
1286
- Check_Type(space, T_STRING);
1287
- len = RSTRING_LEN(space);
1288
- if (len == 0) {
1289
- if (state->space) {
1290
- ruby_xfree(state->space);
1291
- state->space = NULL;
1292
- state->space_len = 0;
1293
- }
1294
- } else {
1295
- if (state->space) ruby_xfree(state->space);
1296
- state->space = fstrndup(RSTRING_PTR(space), len);
1297
- state->space_len = len;
1298
- }
1658
+ RB_OBJ_WRITE(self, &state->space, string_config(space));
1299
1659
  return Qnil;
1300
1660
  }
1301
1661
 
@@ -1307,7 +1667,7 @@ static VALUE cState_space_set(VALUE self, VALUE space)
1307
1667
  static VALUE cState_space_before(VALUE self)
1308
1668
  {
1309
1669
  GET_STATE(self);
1310
- return state->space_before ? rb_str_new(state->space_before, state->space_before_len) : rb_str_new2("");
1670
+ return state->space_before ? state->space_before : rb_str_freeze(rb_utf8_str_new("", 0));
1311
1671
  }
1312
1672
 
1313
1673
  /*
@@ -1317,21 +1677,8 @@ static VALUE cState_space_before(VALUE self)
1317
1677
  */
1318
1678
  static VALUE cState_space_before_set(VALUE self, VALUE space_before)
1319
1679
  {
1320
- unsigned long len;
1321
1680
  GET_STATE(self);
1322
- Check_Type(space_before, T_STRING);
1323
- len = RSTRING_LEN(space_before);
1324
- if (len == 0) {
1325
- if (state->space_before) {
1326
- ruby_xfree(state->space_before);
1327
- state->space_before = NULL;
1328
- state->space_before_len = 0;
1329
- }
1330
- } else {
1331
- if (state->space_before) ruby_xfree(state->space_before);
1332
- state->space_before = fstrndup(RSTRING_PTR(space_before), len);
1333
- state->space_before_len = len;
1334
- }
1681
+ RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
1335
1682
  return Qnil;
1336
1683
  }
1337
1684
 
@@ -1344,7 +1691,7 @@ static VALUE cState_space_before_set(VALUE self, VALUE space_before)
1344
1691
  static VALUE cState_object_nl(VALUE self)
1345
1692
  {
1346
1693
  GET_STATE(self);
1347
- return state->object_nl ? rb_str_new(state->object_nl, state->object_nl_len) : rb_str_new2("");
1694
+ return state->object_nl ? state->object_nl : rb_str_freeze(rb_utf8_str_new("", 0));
1348
1695
  }
1349
1696
 
1350
1697
  /*
@@ -1355,20 +1702,8 @@ static VALUE cState_object_nl(VALUE self)
1355
1702
  */
1356
1703
  static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
1357
1704
  {
1358
- unsigned long len;
1359
1705
  GET_STATE(self);
1360
- Check_Type(object_nl, T_STRING);
1361
- len = RSTRING_LEN(object_nl);
1362
- if (len == 0) {
1363
- if (state->object_nl) {
1364
- ruby_xfree(state->object_nl);
1365
- state->object_nl = NULL;
1366
- }
1367
- } else {
1368
- if (state->object_nl) ruby_xfree(state->object_nl);
1369
- state->object_nl = fstrndup(RSTRING_PTR(object_nl), len);
1370
- state->object_nl_len = len;
1371
- }
1706
+ RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
1372
1707
  return Qnil;
1373
1708
  }
1374
1709
 
@@ -1380,7 +1715,7 @@ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
1380
1715
  static VALUE cState_array_nl(VALUE self)
1381
1716
  {
1382
1717
  GET_STATE(self);
1383
- return state->array_nl ? rb_str_new(state->array_nl, state->array_nl_len) : rb_str_new2("");
1718
+ return state->array_nl ? state->array_nl : rb_str_freeze(rb_utf8_str_new("", 0));
1384
1719
  }
1385
1720
 
1386
1721
  /*
@@ -1390,23 +1725,33 @@ static VALUE cState_array_nl(VALUE self)
1390
1725
  */
1391
1726
  static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
1392
1727
  {
1393
- unsigned long len;
1394
1728
  GET_STATE(self);
1395
- Check_Type(array_nl, T_STRING);
1396
- len = RSTRING_LEN(array_nl);
1397
- if (len == 0) {
1398
- if (state->array_nl) {
1399
- ruby_xfree(state->array_nl);
1400
- state->array_nl = NULL;
1401
- }
1402
- } else {
1403
- if (state->array_nl) ruby_xfree(state->array_nl);
1404
- state->array_nl = fstrndup(RSTRING_PTR(array_nl), len);
1405
- state->array_nl_len = len;
1406
- }
1729
+ RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
1407
1730
  return Qnil;
1408
1731
  }
1409
1732
 
1733
+ /*
1734
+ * call-seq: as_json()
1735
+ *
1736
+ * This string is put at the end of a line that holds a JSON array.
1737
+ */
1738
+ static VALUE cState_as_json(VALUE self)
1739
+ {
1740
+ GET_STATE(self);
1741
+ return state->as_json;
1742
+ }
1743
+
1744
+ /*
1745
+ * call-seq: as_json=(as_json)
1746
+ *
1747
+ * This string is put at the end of a line that holds a JSON array.
1748
+ */
1749
+ static VALUE cState_as_json_set(VALUE self, VALUE as_json)
1750
+ {
1751
+ GET_STATE(self);
1752
+ RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
1753
+ return Qnil;
1754
+ }
1410
1755
 
1411
1756
  /*
1412
1757
  * call-seq: check_circular?
@@ -1432,6 +1777,11 @@ static VALUE cState_max_nesting(VALUE self)
1432
1777
  return LONG2FIX(state->max_nesting);
1433
1778
  }
1434
1779
 
1780
+ static long long_config(VALUE num)
1781
+ {
1782
+ return RTEST(num) ? FIX2LONG(num) : 0;
1783
+ }
1784
+
1435
1785
  /*
1436
1786
  * call-seq: max_nesting=(depth)
1437
1787
  *
@@ -1441,8 +1791,8 @@ static VALUE cState_max_nesting(VALUE self)
1441
1791
  static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
1442
1792
  {
1443
1793
  GET_STATE(self);
1444
- Check_Type(depth, T_FIXNUM);
1445
- return state->max_nesting = FIX2LONG(depth);
1794
+ state->max_nesting = long_config(depth);
1795
+ return Qnil;
1446
1796
  }
1447
1797
 
1448
1798
  /*
@@ -1513,6 +1863,18 @@ static VALUE cState_allow_nan_p(VALUE self)
1513
1863
  return state->allow_nan ? Qtrue : Qfalse;
1514
1864
  }
1515
1865
 
1866
+ /*
1867
+ * call-seq: allow_nan=(enable)
1868
+ *
1869
+ * This sets whether or not to serialize NaN, Infinity, and -Infinity
1870
+ */
1871
+ static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
1872
+ {
1873
+ GET_STATE(self);
1874
+ state->allow_nan = RTEST(enable);
1875
+ return Qnil;
1876
+ }
1877
+
1516
1878
  /*
1517
1879
  * call-seq: ascii_only?
1518
1880
  *
@@ -1525,6 +1887,18 @@ static VALUE cState_ascii_only_p(VALUE self)
1525
1887
  return state->ascii_only ? Qtrue : Qfalse;
1526
1888
  }
1527
1889
 
1890
+ /*
1891
+ * call-seq: ascii_only=(enable)
1892
+ *
1893
+ * This sets whether only ASCII characters should be generated.
1894
+ */
1895
+ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
1896
+ {
1897
+ GET_STATE(self);
1898
+ state->ascii_only = RTEST(enable);
1899
+ return Qnil;
1900
+ }
1901
+
1528
1902
  /*
1529
1903
  * call-seq: depth
1530
1904
  *
@@ -1545,8 +1919,7 @@ static VALUE cState_depth(VALUE self)
1545
1919
  static VALUE cState_depth_set(VALUE self, VALUE depth)
1546
1920
  {
1547
1921
  GET_STATE(self);
1548
- Check_Type(depth, T_FIXNUM);
1549
- state->depth = FIX2LONG(depth);
1922
+ state->depth = long_config(depth);
1550
1923
  return Qnil;
1551
1924
  }
1552
1925
 
@@ -1561,6 +1934,15 @@ static VALUE cState_buffer_initial_length(VALUE self)
1561
1934
  return LONG2FIX(state->buffer_initial_length);
1562
1935
  }
1563
1936
 
1937
+ static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_initial_length)
1938
+ {
1939
+ Check_Type(buffer_initial_length, T_FIXNUM);
1940
+ long initial_length = FIX2LONG(buffer_initial_length);
1941
+ if (initial_length > 0) {
1942
+ state->buffer_initial_length = initial_length;
1943
+ }
1944
+ }
1945
+
1564
1946
  /*
1565
1947
  * call-seq: buffer_initial_length=(length)
1566
1948
  *
@@ -1569,16 +1951,76 @@ static VALUE cState_buffer_initial_length(VALUE self)
1569
1951
  */
1570
1952
  static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
1571
1953
  {
1572
- long initial_length;
1573
1954
  GET_STATE(self);
1574
- Check_Type(buffer_initial_length, T_FIXNUM);
1575
- initial_length = FIX2LONG(buffer_initial_length);
1576
- if (initial_length > 0) {
1577
- state->buffer_initial_length = initial_length;
1578
- }
1955
+ buffer_initial_length_set(state, buffer_initial_length);
1579
1956
  return Qnil;
1580
1957
  }
1581
1958
 
1959
+ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
1960
+ {
1961
+ JSON_Generator_State *state = (JSON_Generator_State *)_arg;
1962
+
1963
+ if (key == sym_indent) { state->indent = string_config(val); }
1964
+ else if (key == sym_space) { state->space = string_config(val); }
1965
+ else if (key == sym_space_before) { state->space_before = string_config(val); }
1966
+ else if (key == sym_object_nl) { state->object_nl = string_config(val); }
1967
+ else if (key == sym_array_nl) { state->array_nl = string_config(val); }
1968
+ else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
1969
+ else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
1970
+ else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
1971
+ else if (key == sym_depth) { state->depth = long_config(val); }
1972
+ else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); }
1973
+ else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
1974
+ else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
1975
+ else if (key == sym_strict) { state->strict = RTEST(val); }
1976
+ else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
1977
+ return ST_CONTINUE;
1978
+ }
1979
+
1980
+ static void configure_state(JSON_Generator_State *state, VALUE config)
1981
+ {
1982
+ if (!RTEST(config)) return;
1983
+
1984
+ Check_Type(config, T_HASH);
1985
+
1986
+ if (!RHASH_SIZE(config)) return;
1987
+
1988
+ // We assume in most cases few keys are set so it's faster to go over
1989
+ // the provided keys than to check all possible keys.
1990
+ rb_hash_foreach(config, configure_state_i, (VALUE)state);
1991
+ }
1992
+
1993
+ static VALUE cState_configure(VALUE self, VALUE opts)
1994
+ {
1995
+ GET_STATE(self);
1996
+ configure_state(state, opts);
1997
+ return self;
1998
+ }
1999
+
2000
+ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
2001
+ {
2002
+ JSON_Generator_State state = {0};
2003
+ state_init(&state);
2004
+ configure_state(&state, opts);
2005
+
2006
+ char stack_buffer[FBUFFER_STACK_SIZE];
2007
+ FBuffer buffer = {
2008
+ .io = RTEST(io) ? io : Qfalse,
2009
+ };
2010
+ fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
2011
+
2012
+ struct generate_json_data data = {
2013
+ .buffer = &buffer,
2014
+ .vstate = Qfalse,
2015
+ .state = &state,
2016
+ .obj = obj,
2017
+ .func = generate_json,
2018
+ };
2019
+ rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
2020
+
2021
+ return fbuffer_finalize(&buffer);
2022
+ }
2023
+
1582
2024
  /*
1583
2025
  *
1584
2026
  */
@@ -1592,18 +2034,26 @@ void Init_generator(void)
1592
2034
  rb_require("json/common");
1593
2035
 
1594
2036
  mJSON = rb_define_module("JSON");
1595
- mExt = rb_define_module_under(mJSON, "Ext");
1596
- mGenerator = rb_define_module_under(mExt, "Generator");
1597
2037
 
2038
+ rb_global_variable(&cFragment);
2039
+ cFragment = rb_const_get(mJSON, rb_intern("Fragment"));
2040
+
2041
+ VALUE mExt = rb_define_module_under(mJSON, "Ext");
2042
+ VALUE mGenerator = rb_define_module_under(mExt, "Generator");
2043
+
2044
+ rb_global_variable(&eGeneratorError);
1598
2045
  eGeneratorError = rb_path2class("JSON::GeneratorError");
2046
+
2047
+ rb_global_variable(&eNestingError);
1599
2048
  eNestingError = rb_path2class("JSON::NestingError");
1600
- rb_gc_register_mark_object(eGeneratorError);
1601
- rb_gc_register_mark_object(eNestingError);
1602
2049
 
1603
2050
  cState = rb_define_class_under(mGenerator, "State", rb_cObject);
1604
2051
  rb_define_alloc_func(cState, cState_s_allocate);
1605
2052
  rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
1606
2053
  rb_define_method(cState, "initialize", cState_initialize, -1);
2054
+ rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings
2055
+ rb_define_private_method(cState, "_configure", cState_configure, 1);
2056
+
1607
2057
  rb_define_method(cState, "initialize_copy", cState_init_copy, 1);
1608
2058
  rb_define_method(cState, "indent", cState_indent, 0);
1609
2059
  rb_define_method(cState, "indent=", cState_indent_set, 1);
@@ -1615,6 +2065,8 @@ void Init_generator(void)
1615
2065
  rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
1616
2066
  rb_define_method(cState, "array_nl", cState_array_nl, 0);
1617
2067
  rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
2068
+ rb_define_method(cState, "as_json", cState_as_json, 0);
2069
+ rb_define_method(cState, "as_json=", cState_as_json_set, 1);
1618
2070
  rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
1619
2071
  rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
1620
2072
  rb_define_method(cState, "script_safe", cState_script_safe, 0);
@@ -1628,76 +2080,109 @@ void Init_generator(void)
1628
2080
  rb_define_method(cState, "strict=", cState_strict_set, 1);
1629
2081
  rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
1630
2082
  rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
2083
+ rb_define_method(cState, "allow_nan=", cState_allow_nan_set, 1);
1631
2084
  rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
2085
+ rb_define_method(cState, "ascii_only=", cState_ascii_only_set, 1);
1632
2086
  rb_define_method(cState, "depth", cState_depth, 0);
1633
2087
  rb_define_method(cState, "depth=", cState_depth_set, 1);
1634
2088
  rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
1635
2089
  rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
1636
- rb_define_method(cState, "configure", cState_configure, 1);
1637
- rb_define_alias(cState, "merge", "configure");
1638
- rb_define_method(cState, "to_h", cState_to_h, 0);
1639
- rb_define_alias(cState, "to_hash", "to_h");
1640
- rb_define_method(cState, "[]", cState_aref, 1);
1641
- rb_define_method(cState, "[]=", cState_aset, 2);
1642
- rb_define_method(cState, "generate", cState_generate, 1);
1643
-
1644
- mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
1645
- mObject = rb_define_module_under(mGeneratorMethods, "Object");
2090
+ rb_define_method(cState, "generate", cState_generate, -1);
2091
+ rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
2092
+
2093
+ rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
2094
+
2095
+ VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
2096
+
2097
+ VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object");
1646
2098
  rb_define_method(mObject, "to_json", mObject_to_json, -1);
1647
- mHash = rb_define_module_under(mGeneratorMethods, "Hash");
2099
+
2100
+ VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash");
1648
2101
  rb_define_method(mHash, "to_json", mHash_to_json, -1);
1649
- mArray = rb_define_module_under(mGeneratorMethods, "Array");
2102
+
2103
+ VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array");
1650
2104
  rb_define_method(mArray, "to_json", mArray_to_json, -1);
2105
+
1651
2106
  #ifdef RUBY_INTEGER_UNIFICATION
1652
- mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
2107
+ VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
1653
2108
  rb_define_method(mInteger, "to_json", mInteger_to_json, -1);
1654
2109
  #else
1655
- mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
2110
+ VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
1656
2111
  rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1);
1657
- mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
2112
+
2113
+ VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
1658
2114
  rb_define_method(mBignum, "to_json", mBignum_to_json, -1);
1659
2115
  #endif
1660
- mFloat = rb_define_module_under(mGeneratorMethods, "Float");
2116
+ VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float");
1661
2117
  rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
1662
- mString = rb_define_module_under(mGeneratorMethods, "String");
2118
+
2119
+ VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
1663
2120
  rb_define_singleton_method(mString, "included", mString_included_s, 1);
1664
2121
  rb_define_method(mString, "to_json", mString_to_json, -1);
1665
2122
  rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
1666
2123
  rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
2124
+
1667
2125
  mString_Extend = rb_define_module_under(mString, "Extend");
1668
2126
  rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
1669
- mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
2127
+
2128
+ VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
1670
2129
  rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
1671
- mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
2130
+
2131
+ VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
1672
2132
  rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
1673
- mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
2133
+
2134
+ VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
1674
2135
  rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
1675
2136
 
2137
+ rb_global_variable(&Encoding_UTF_8);
2138
+ Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
2139
+
1676
2140
  i_to_s = rb_intern("to_s");
1677
2141
  i_to_json = rb_intern("to_json");
1678
2142
  i_new = rb_intern("new");
1679
- i_indent = rb_intern("indent");
1680
- i_space = rb_intern("space");
1681
- i_space_before = rb_intern("space_before");
1682
- i_object_nl = rb_intern("object_nl");
1683
- i_array_nl = rb_intern("array_nl");
1684
- i_max_nesting = rb_intern("max_nesting");
1685
- i_script_safe = rb_intern("script_safe");
1686
- i_escape_slash = rb_intern("escape_slash");
1687
- i_strict = rb_intern("strict");
1688
- i_allow_nan = rb_intern("allow_nan");
1689
- i_ascii_only = rb_intern("ascii_only");
1690
- i_depth = rb_intern("depth");
1691
- i_buffer_initial_length = rb_intern("buffer_initial_length");
1692
2143
  i_pack = rb_intern("pack");
1693
2144
  i_unpack = rb_intern("unpack");
1694
2145
  i_create_id = rb_intern("create_id");
1695
2146
  i_extend = rb_intern("extend");
1696
- i_key_p = rb_intern("key?");
1697
- i_aref = rb_intern("[]");
1698
- i_send = rb_intern("__send__");
1699
- i_respond_to_p = rb_intern("respond_to?");
1700
- i_match = rb_intern("match");
1701
- i_keys = rb_intern("keys");
1702
- i_dup = rb_intern("dup");
2147
+ i_encode = rb_intern("encode");
2148
+
2149
+ sym_indent = ID2SYM(rb_intern("indent"));
2150
+ sym_space = ID2SYM(rb_intern("space"));
2151
+ sym_space_before = ID2SYM(rb_intern("space_before"));
2152
+ sym_object_nl = ID2SYM(rb_intern("object_nl"));
2153
+ sym_array_nl = ID2SYM(rb_intern("array_nl"));
2154
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
2155
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
2156
+ sym_ascii_only = ID2SYM(rb_intern("ascii_only"));
2157
+ sym_depth = ID2SYM(rb_intern("depth"));
2158
+ sym_buffer_initial_length = ID2SYM(rb_intern("buffer_initial_length"));
2159
+ sym_script_safe = ID2SYM(rb_intern("script_safe"));
2160
+ sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
2161
+ sym_strict = ID2SYM(rb_intern("strict"));
2162
+ sym_as_json = ID2SYM(rb_intern("as_json"));
2163
+
2164
+ usascii_encindex = rb_usascii_encindex();
2165
+ utf8_encindex = rb_utf8_encindex();
2166
+ binary_encindex = rb_ascii8bit_encindex();
2167
+
2168
+ rb_require("json/ext/generator/state");
2169
+
2170
+
2171
+ switch(find_simd_implementation()) {
2172
+ #ifdef HAVE_SIMD
2173
+ #ifdef HAVE_SIMD_NEON
2174
+ case SIMD_NEON:
2175
+ search_escape_basic_impl = search_escape_basic_neon;
2176
+ break;
2177
+ #endif /* HAVE_SIMD_NEON */
2178
+ #ifdef HAVE_SIMD_SSE2
2179
+ case SIMD_SSE2:
2180
+ search_escape_basic_impl = search_escape_basic_sse2;
2181
+ break;
2182
+ #endif /* HAVE_SIMD_SSE2 */
2183
+ #endif /* HAVE_SIMD */
2184
+ default:
2185
+ search_escape_basic_impl = search_escape_basic;
2186
+ break;
2187
+ }
1703
2188
  }