json 2.7.3 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +114 -0
- data/LEGAL +0 -52
- data/README.md +61 -57
- data/ext/json/ext/fbuffer/fbuffer.h +156 -66
- data/ext/json/ext/generator/extconf.rb +29 -0
- data/ext/json/ext/generator/generator.c +1166 -478
- data/ext/json/ext/generator/simd.h +112 -0
- data/ext/json/ext/parser/extconf.rb +4 -27
- data/ext/json/ext/parser/parser.c +1138 -1971
- data/ext/json/ext/vendor/fpconv.c +479 -0
- data/ext/json/ext/vendor/jeaiii-ltoa.h +267 -0
- data/json.gemspec +7 -5
- data/lib/json/add/bigdecimal.rb +1 -1
- data/lib/json/add/symbol.rb +7 -2
- data/lib/json/common.rb +594 -219
- data/lib/json/ext/generator/state.rb +2 -31
- data/lib/json/ext.rb +28 -11
- data/lib/json/{pure → truffle_ruby}/generator.rb +264 -154
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +15 -20
- metadata +11 -17
- data/ext/json/ext/generator/generator.h +0 -129
- data/ext/json/ext/parser/parser.h +0 -60
- data/ext/json/ext/parser/parser.rl +0 -997
- data/lib/json/pure/parser.rb +0 -331
- data/lib/json/pure.rb +0 -16
@@ -1,13 +1,205 @@
|
|
1
|
+
#include "ruby.h"
|
1
2
|
#include "../fbuffer/fbuffer.h"
|
2
|
-
#include "
|
3
|
+
#include "../vendor/fpconv.c"
|
4
|
+
|
5
|
+
#include <math.h>
|
6
|
+
#include <ctype.h>
|
7
|
+
|
8
|
+
#include "simd.h"
|
9
|
+
|
10
|
+
/* ruby api and some helpers */
|
11
|
+
|
12
|
+
typedef struct JSON_Generator_StateStruct {
|
13
|
+
VALUE indent;
|
14
|
+
VALUE space;
|
15
|
+
VALUE space_before;
|
16
|
+
VALUE object_nl;
|
17
|
+
VALUE array_nl;
|
18
|
+
VALUE as_json;
|
19
|
+
|
20
|
+
long max_nesting;
|
21
|
+
long depth;
|
22
|
+
long buffer_initial_length;
|
23
|
+
|
24
|
+
bool allow_nan;
|
25
|
+
bool ascii_only;
|
26
|
+
bool script_safe;
|
27
|
+
bool strict;
|
28
|
+
} JSON_Generator_State;
|
3
29
|
|
4
30
|
#ifndef RB_UNLIKELY
|
5
31
|
#define RB_UNLIKELY(cond) (cond)
|
6
32
|
#endif
|
7
33
|
|
8
|
-
static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
|
34
|
+
static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
|
9
35
|
|
10
36
|
static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
|
37
|
+
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
|
38
|
+
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
|
39
|
+
|
40
|
+
|
41
|
+
#define GET_STATE_TO(self, state) \
|
42
|
+
TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state)
|
43
|
+
|
44
|
+
#define GET_STATE(self) \
|
45
|
+
JSON_Generator_State *state; \
|
46
|
+
GET_STATE_TO(self, state)
|
47
|
+
|
48
|
+
struct generate_json_data;
|
49
|
+
|
50
|
+
typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
51
|
+
|
52
|
+
struct generate_json_data {
|
53
|
+
FBuffer *buffer;
|
54
|
+
VALUE vstate;
|
55
|
+
JSON_Generator_State *state;
|
56
|
+
VALUE obj;
|
57
|
+
generator_func func;
|
58
|
+
};
|
59
|
+
|
60
|
+
static VALUE cState_from_state_s(VALUE self, VALUE opts);
|
61
|
+
static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
|
62
|
+
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
63
|
+
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
64
|
+
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
65
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
66
|
+
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
67
|
+
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
68
|
+
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
69
|
+
#ifdef RUBY_INTEGER_UNIFICATION
|
70
|
+
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
71
|
+
#endif
|
72
|
+
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
73
|
+
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
74
|
+
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
75
|
+
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
76
|
+
|
77
|
+
static int usascii_encindex, utf8_encindex, binary_encindex;
|
78
|
+
|
79
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
80
|
+
RBIMPL_ATTR_NORETURN()
|
81
|
+
#endif
|
82
|
+
static void raise_generator_error_str(VALUE invalid_object, VALUE str)
|
83
|
+
{
|
84
|
+
VALUE exc = rb_exc_new_str(eGeneratorError, str);
|
85
|
+
rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
|
86
|
+
rb_exc_raise(exc);
|
87
|
+
}
|
88
|
+
|
89
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
90
|
+
RBIMPL_ATTR_NORETURN()
|
91
|
+
#endif
|
92
|
+
#ifdef RBIMPL_ATTR_FORMAT
|
93
|
+
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
|
94
|
+
#endif
|
95
|
+
static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
|
96
|
+
{
|
97
|
+
va_list args;
|
98
|
+
va_start(args, fmt);
|
99
|
+
VALUE str = rb_vsprintf(fmt, args);
|
100
|
+
va_end(args);
|
101
|
+
raise_generator_error_str(invalid_object, str);
|
102
|
+
}
|
103
|
+
|
104
|
+
// 0 - single byte char that don't need to be escaped.
|
105
|
+
// (x | 8) - char that needs to be escaped.
|
106
|
+
static const unsigned char CHAR_LENGTH_MASK = 7;
|
107
|
+
static const unsigned char ESCAPE_MASK = 8;
|
108
|
+
|
109
|
+
typedef struct _search_state {
|
110
|
+
const char *ptr;
|
111
|
+
const char *end;
|
112
|
+
const char *cursor;
|
113
|
+
FBuffer *buffer;
|
114
|
+
|
115
|
+
#ifdef HAVE_SIMD
|
116
|
+
const char *chunk_base;
|
117
|
+
const char *chunk_end;
|
118
|
+
bool has_matches;
|
119
|
+
|
120
|
+
#if defined(HAVE_SIMD_NEON)
|
121
|
+
uint64_t matches_mask;
|
122
|
+
#elif defined(HAVE_SIMD_SSE2)
|
123
|
+
int matches_mask;
|
124
|
+
#else
|
125
|
+
#error "Unknown SIMD Implementation."
|
126
|
+
#endif /* HAVE_SIMD_NEON */
|
127
|
+
#endif /* HAVE_SIMD */
|
128
|
+
} search_state;
|
129
|
+
|
130
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
131
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
132
|
+
#else
|
133
|
+
#define FORCE_INLINE
|
134
|
+
#endif
|
135
|
+
|
136
|
+
static inline FORCE_INLINE void search_flush(search_state *search)
|
137
|
+
{
|
138
|
+
// Do not remove this conditional without profiling, specifically escape-heavy text.
|
139
|
+
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
|
140
|
+
// For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
|
141
|
+
// will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
|
142
|
+
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
|
143
|
+
// nothing needs to be flushed, we can save a few memory references with this conditional.
|
144
|
+
if (search->ptr > search->cursor) {
|
145
|
+
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
|
146
|
+
search->cursor = search->ptr;
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
static const unsigned char escape_table_basic[256] = {
|
151
|
+
// ASCII Control Characters
|
152
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
153
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
154
|
+
// ASCII Characters
|
155
|
+
0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
|
156
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
157
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
158
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
|
159
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
160
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
161
|
+
};
|
162
|
+
|
163
|
+
static unsigned char (*search_escape_basic_impl)(search_state *);
|
164
|
+
|
165
|
+
static inline unsigned char search_escape_basic(search_state *search)
|
166
|
+
{
|
167
|
+
while (search->ptr < search->end) {
|
168
|
+
if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) {
|
169
|
+
search_flush(search);
|
170
|
+
return 1;
|
171
|
+
} else {
|
172
|
+
search->ptr++;
|
173
|
+
}
|
174
|
+
}
|
175
|
+
search_flush(search);
|
176
|
+
return 0;
|
177
|
+
}
|
178
|
+
|
179
|
+
static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
|
180
|
+
{
|
181
|
+
const unsigned char ch = (unsigned char)*search->ptr;
|
182
|
+
switch (ch) {
|
183
|
+
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
184
|
+
case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
|
185
|
+
case '/': fbuffer_append(search->buffer, "\\/", 2); break;
|
186
|
+
case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
|
187
|
+
case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
|
188
|
+
case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
|
189
|
+
case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
|
190
|
+
case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
|
191
|
+
default: {
|
192
|
+
const char *hexdig = "0123456789abcdef";
|
193
|
+
char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
|
194
|
+
scratch[4] = hexdig[(ch >> 4) & 0xf];
|
195
|
+
scratch[5] = hexdig[ch & 0xf];
|
196
|
+
fbuffer_append(search->buffer, scratch, 6);
|
197
|
+
break;
|
198
|
+
}
|
199
|
+
}
|
200
|
+
search->ptr++;
|
201
|
+
search->cursor = search->ptr;
|
202
|
+
}
|
11
203
|
|
12
204
|
/* Converts in_string to a JSON string (without the wrapping '"'
|
13
205
|
* characters) in FBuffer out_buffer.
|
@@ -19,296 +211,516 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_e
|
|
19
211
|
*
|
20
212
|
* - If out_ascii_only: non-ASCII characters (>0x7F)
|
21
213
|
*
|
22
|
-
* - If
|
214
|
+
* - If script_safe: forwardslash (/), line separator (U+2028), and
|
23
215
|
* paragraph separator (U+2029)
|
24
216
|
*
|
25
217
|
* Everything else (should be UTF-8) is just passed through and
|
26
218
|
* appended to the result.
|
27
219
|
*/
|
28
|
-
static void convert_UTF8_to_JSON(
|
220
|
+
static inline void convert_UTF8_to_JSON(search_state *search)
|
29
221
|
{
|
30
|
-
|
31
|
-
|
222
|
+
while (search_escape_basic_impl(search)) {
|
223
|
+
escape_UTF8_char_basic(search);
|
224
|
+
}
|
225
|
+
}
|
32
226
|
|
33
|
-
|
34
|
-
|
227
|
+
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
228
|
+
{
|
229
|
+
const unsigned char ch = (unsigned char)*search->ptr;
|
230
|
+
switch (ch_len) {
|
231
|
+
case 1: {
|
232
|
+
switch (ch) {
|
233
|
+
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
234
|
+
case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
|
235
|
+
case '/': fbuffer_append(search->buffer, "\\/", 2); break;
|
236
|
+
case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
|
237
|
+
case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
|
238
|
+
case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
|
239
|
+
case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
|
240
|
+
case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
|
241
|
+
default: {
|
242
|
+
const char *hexdig = "0123456789abcdef";
|
243
|
+
char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
|
244
|
+
scratch[4] = hexdig[(ch >> 4) & 0xf];
|
245
|
+
scratch[5] = hexdig[ch & 0xf];
|
246
|
+
fbuffer_append(search->buffer, scratch, 6);
|
247
|
+
break;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
break;
|
251
|
+
}
|
252
|
+
case 3: {
|
253
|
+
if (search->ptr[2] & 1) {
|
254
|
+
fbuffer_append(search->buffer, "\\u2029", 6);
|
255
|
+
} else {
|
256
|
+
fbuffer_append(search->buffer, "\\u2028", 6);
|
257
|
+
}
|
258
|
+
break;
|
259
|
+
}
|
260
|
+
}
|
261
|
+
search->cursor = (search->ptr += ch_len);
|
262
|
+
}
|
35
263
|
|
36
|
-
|
264
|
+
#ifdef HAVE_SIMD
|
37
265
|
|
38
|
-
|
266
|
+
static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
|
267
|
+
{
|
268
|
+
// Flush the buffer so everything up until the last 'len' characters are unflushed.
|
269
|
+
search_flush(search);
|
39
270
|
|
40
|
-
|
41
|
-
|
42
|
-
unsigned char ch_len = escape_table[ch];
|
43
|
-
/* JSON encoding */
|
271
|
+
FBuffer *buf = search->buffer;
|
272
|
+
fbuffer_inc_capa(buf, vec_len);
|
44
273
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
274
|
+
char *s = (buf->ptr + buf->len);
|
275
|
+
|
276
|
+
// Pad the buffer with dummy characters that won't need escaping.
|
277
|
+
// This seem wateful at first sight, but memset of vector length is very fast.
|
278
|
+
memset(s, 'X', vec_len);
|
279
|
+
|
280
|
+
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
|
281
|
+
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
|
282
|
+
MEMCPY(s, search->ptr, char, len);
|
283
|
+
|
284
|
+
return s;
|
285
|
+
}
|
286
|
+
|
287
|
+
#ifdef HAVE_SIMD_NEON
|
288
|
+
|
289
|
+
static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
290
|
+
{
|
291
|
+
uint64_t mask = search->matches_mask;
|
292
|
+
uint32_t index = trailing_zeros64(mask) >> 2;
|
293
|
+
|
294
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
295
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
296
|
+
// search->chunk_base + index >= search->ptr
|
297
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
298
|
+
// is one byte after the previous match then:
|
299
|
+
// search->chunk_base + index == search->ptr
|
300
|
+
search->ptr = search->chunk_base + index;
|
301
|
+
mask &= mask - 1;
|
302
|
+
search->matches_mask = mask;
|
303
|
+
search_flush(search);
|
304
|
+
return 1;
|
305
|
+
}
|
306
|
+
|
307
|
+
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
308
|
+
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
309
|
+
{
|
310
|
+
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
311
|
+
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
312
|
+
return mask & 0x8888888888888888ull;
|
313
|
+
}
|
314
|
+
|
315
|
+
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
|
316
|
+
{
|
317
|
+
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
318
|
+
|
319
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
320
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
321
|
+
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
322
|
+
|
323
|
+
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
324
|
+
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
325
|
+
|
326
|
+
return neon_match_mask(needs_escape);
|
327
|
+
}
|
328
|
+
|
329
|
+
static inline unsigned char search_escape_basic_neon(search_state *search)
|
330
|
+
{
|
331
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
332
|
+
// There are more matches if search->matches_mask > 0.
|
333
|
+
if (search->matches_mask > 0) {
|
334
|
+
return neon_next_match(search);
|
92
335
|
} else {
|
93
|
-
|
336
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
337
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
338
|
+
search->has_matches = false;
|
339
|
+
search->ptr = search->chunk_end;
|
340
|
+
}
|
341
|
+
}
|
342
|
+
|
343
|
+
/*
|
344
|
+
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
345
|
+
* need to be escaped.
|
346
|
+
*
|
347
|
+
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
348
|
+
*
|
349
|
+
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
350
|
+
* the vector insructions may work on larger vectors.
|
351
|
+
*
|
352
|
+
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
353
|
+
*
|
354
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
355
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
356
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
357
|
+
*
|
358
|
+
* Next we load the first chunk of the ptr:
|
359
|
+
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
360
|
+
*
|
361
|
+
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
362
|
+
* as no bytes are less than 32 (0x20):
|
363
|
+
* [0 0 0 0 0 0 0 0]
|
364
|
+
*
|
365
|
+
* Next, we check if any byte in chunk is equal to a backslash:
|
366
|
+
* [0 0 0 FF 0 0 0 0]
|
367
|
+
*
|
368
|
+
* Finally we check if any byte in chunk is equal to a double quote:
|
369
|
+
* [FF 0 0 0 0 0 0 0]
|
370
|
+
*
|
371
|
+
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
372
|
+
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
373
|
+
* This is the needs_escape vector and it is equal to:
|
374
|
+
* [FF 0 0 FF 0 0 0 0]
|
375
|
+
*
|
376
|
+
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
377
|
+
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
378
|
+
*
|
379
|
+
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
380
|
+
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
381
|
+
* have at least one byte that needs to be escaped.
|
382
|
+
*/
|
383
|
+
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
|
384
|
+
uint64_t mask = neon_rules_update(search->ptr);
|
385
|
+
|
386
|
+
if (!mask) {
|
387
|
+
search->ptr += sizeof(uint8x16_t);
|
388
|
+
continue;
|
389
|
+
}
|
390
|
+
search->matches_mask = mask;
|
391
|
+
search->has_matches = true;
|
392
|
+
search->chunk_base = search->ptr;
|
393
|
+
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
394
|
+
return neon_next_match(search);
|
395
|
+
}
|
396
|
+
|
397
|
+
// There are fewer than 16 bytes left.
|
398
|
+
unsigned long remaining = (search->end - search->ptr);
|
399
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
400
|
+
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
401
|
+
|
402
|
+
uint64_t mask = neon_rules_update(s);
|
403
|
+
|
404
|
+
if (!mask) {
|
405
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
406
|
+
// search->cursor to search->ptr.
|
407
|
+
search->buffer->len += remaining;
|
408
|
+
search->ptr = search->end;
|
409
|
+
search->cursor = search->end;
|
410
|
+
return 0;
|
94
411
|
}
|
412
|
+
|
413
|
+
search->matches_mask = mask;
|
414
|
+
search->has_matches = true;
|
415
|
+
search->chunk_end = search->end;
|
416
|
+
search->chunk_base = search->ptr;
|
417
|
+
return neon_next_match(search);
|
95
418
|
}
|
96
|
-
#undef FLUSH_POS
|
97
419
|
|
98
|
-
if (
|
99
|
-
|
420
|
+
if (search->ptr < search->end) {
|
421
|
+
return search_escape_basic(search);
|
100
422
|
}
|
101
423
|
|
102
|
-
|
424
|
+
search_flush(search);
|
425
|
+
return 0;
|
103
426
|
}
|
427
|
+
#endif /* HAVE_SIMD_NEON */
|
104
428
|
|
105
|
-
|
106
|
-
// ASCII Control Characters
|
107
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
108
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
109
|
-
// ASCII Characters
|
110
|
-
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0, // '"'
|
111
|
-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
112
|
-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
113
|
-
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
|
114
|
-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
115
|
-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
116
|
-
// Continuation byte
|
117
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
118
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
119
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
120
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
121
|
-
// First byte of a 2-byte code point
|
122
|
-
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
123
|
-
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
124
|
-
// First byte of a 4-byte code point
|
125
|
-
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
126
|
-
//First byte of a 4+byte code point
|
127
|
-
4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
|
128
|
-
};
|
429
|
+
#ifdef HAVE_SIMD_SSE2
|
129
430
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
// ASCII Characters
|
135
|
-
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1, // '"' and '/'
|
136
|
-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
137
|
-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
138
|
-
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, // '\\'
|
139
|
-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
140
|
-
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
141
|
-
// Continuation byte
|
142
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
143
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
144
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
145
|
-
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
146
|
-
// First byte of a 2-byte code point
|
147
|
-
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
148
|
-
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
149
|
-
// First byte of a 4-byte code point
|
150
|
-
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
|
151
|
-
//First byte of a 4+byte code point
|
152
|
-
4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
|
153
|
-
};
|
431
|
+
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
|
432
|
+
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
433
|
+
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
434
|
+
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
154
435
|
|
155
|
-
static
|
436
|
+
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
156
437
|
{
|
157
|
-
|
158
|
-
|
438
|
+
int mask = search->matches_mask;
|
439
|
+
int index = trailing_zeros(mask);
|
159
440
|
|
160
|
-
|
161
|
-
|
441
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
442
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
443
|
+
// search->chunk_base + index >= search->ptr
|
444
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
445
|
+
// is one byte after the previous match then:
|
446
|
+
// search->chunk_base + index == search->ptr
|
447
|
+
search->ptr = search->chunk_base + index;
|
448
|
+
mask &= mask - 1;
|
449
|
+
search->matches_mask = mask;
|
450
|
+
search_flush(search);
|
451
|
+
return 1;
|
452
|
+
}
|
162
453
|
|
163
|
-
|
454
|
+
#if defined(__clang__) || defined(__GNUC__)
|
455
|
+
#define TARGET_SSE2 __attribute__((target("sse2")))
|
456
|
+
#else
|
457
|
+
#define TARGET_SSE2
|
458
|
+
#endif
|
164
459
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
if (escape_table[ch]) {
|
169
|
-
if (pos > beg) {
|
170
|
-
fbuffer_append(out_buffer, &ptr[beg], pos - beg);
|
171
|
-
}
|
460
|
+
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
|
461
|
+
{
|
462
|
+
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
172
463
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
464
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
465
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
466
|
+
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
467
|
+
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
468
|
+
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
469
|
+
return _mm_movemask_epi8(needs_escape);
|
470
|
+
}
|
471
|
+
|
472
|
+
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
473
|
+
{
|
474
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
475
|
+
// There are more matches if search->matches_mask > 0.
|
476
|
+
if (search->matches_mask > 0) {
|
477
|
+
return sse2_next_match(search);
|
478
|
+
} else {
|
479
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
480
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
481
|
+
search->has_matches = false;
|
482
|
+
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
483
|
+
search->ptr = search->end;
|
484
|
+
} else {
|
485
|
+
search->ptr = search->chunk_base + sizeof(__m128i);
|
189
486
|
}
|
190
487
|
}
|
488
|
+
}
|
489
|
+
|
490
|
+
while (search->ptr + sizeof(__m128i) <= search->end) {
|
491
|
+
int needs_escape_mask = sse2_update(search->ptr);
|
492
|
+
|
493
|
+
if (needs_escape_mask == 0) {
|
494
|
+
search->ptr += sizeof(__m128i);
|
495
|
+
continue;
|
496
|
+
}
|
191
497
|
|
192
|
-
|
498
|
+
search->has_matches = true;
|
499
|
+
search->matches_mask = needs_escape_mask;
|
500
|
+
search->chunk_base = search->ptr;
|
501
|
+
return sse2_next_match(search);
|
193
502
|
}
|
194
503
|
|
195
|
-
|
196
|
-
|
504
|
+
// There are fewer than 16 bytes left.
|
505
|
+
unsigned long remaining = (search->end - search->ptr);
|
506
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
507
|
+
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
508
|
+
|
509
|
+
int needs_escape_mask = sse2_update(s);
|
510
|
+
|
511
|
+
if (needs_escape_mask == 0) {
|
512
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
513
|
+
// search->cursor to search->ptr.
|
514
|
+
search->buffer->len += remaining;
|
515
|
+
search->ptr = search->end;
|
516
|
+
search->cursor = search->end;
|
517
|
+
return 0;
|
518
|
+
}
|
519
|
+
|
520
|
+
search->has_matches = true;
|
521
|
+
search->matches_mask = needs_escape_mask;
|
522
|
+
search->chunk_base = search->ptr;
|
523
|
+
return sse2_next_match(search);
|
197
524
|
}
|
198
525
|
|
199
|
-
|
200
|
-
|
526
|
+
if (search->ptr < search->end) {
|
527
|
+
return search_escape_basic(search);
|
528
|
+
}
|
201
529
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
|
530
|
+
search_flush(search);
|
531
|
+
return 0;
|
532
|
+
}
|
206
533
|
|
207
|
-
|
208
|
-
unsigned long len = RSTRING_LEN(str);
|
534
|
+
#endif /* HAVE_SIMD_SSE2 */
|
209
535
|
|
210
|
-
|
536
|
+
#endif /* HAVE_SIMD */
|
211
537
|
|
212
|
-
|
538
|
+
static const unsigned char script_safe_escape_table[256] = {
|
539
|
+
// ASCII Control Characters
|
540
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
541
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
542
|
+
// ASCII Characters
|
543
|
+
0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/'
|
544
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
545
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
546
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
|
547
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
548
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
549
|
+
// Continuation byte
|
550
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
551
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
552
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
553
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
554
|
+
// First byte of a 2-byte code point
|
555
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
556
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
557
|
+
// First byte of a 3-byte code point
|
558
|
+
3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029
|
559
|
+
//First byte of a 4+ byte code point
|
560
|
+
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
|
561
|
+
};
|
213
562
|
|
214
|
-
|
215
|
-
|
216
|
-
|
563
|
+
static inline unsigned char search_script_safe_escape(search_state *search)
|
564
|
+
{
|
565
|
+
while (search->ptr < search->end) {
|
566
|
+
unsigned char ch = (unsigned char)*search->ptr;
|
567
|
+
unsigned char ch_len = script_safe_escape_table[ch];
|
217
568
|
|
218
569
|
if (RB_UNLIKELY(ch_len)) {
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
switch (ch) {
|
226
|
-
case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
|
227
|
-
case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
|
228
|
-
case '/': fbuffer_append(out_buffer, "\\/", 2); break;
|
229
|
-
case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
|
230
|
-
case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
|
231
|
-
case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
|
232
|
-
case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
|
233
|
-
case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
|
234
|
-
default: {
|
235
|
-
scratch[2] = hexdig[ch >> 12];
|
236
|
-
scratch[3] = hexdig[(ch >> 8) & 0xf];
|
237
|
-
scratch[4] = hexdig[(ch >> 4) & 0xf];
|
238
|
-
scratch[5] = hexdig[ch & 0xf];
|
239
|
-
fbuffer_append(out_buffer, scratch, 6);
|
240
|
-
break;
|
241
|
-
}
|
570
|
+
if (ch_len & ESCAPE_MASK) {
|
571
|
+
if (RB_UNLIKELY(ch_len == 11)) {
|
572
|
+
const unsigned char *uptr = (const unsigned char *)search->ptr;
|
573
|
+
if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) {
|
574
|
+
search->ptr += 3;
|
575
|
+
continue;
|
242
576
|
}
|
243
|
-
break;
|
244
577
|
}
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
}
|
578
|
+
search_flush(search);
|
579
|
+
return ch_len & CHAR_LENGTH_MASK;
|
580
|
+
} else {
|
581
|
+
search->ptr += ch_len;
|
582
|
+
}
|
583
|
+
} else {
|
584
|
+
search->ptr++;
|
585
|
+
}
|
586
|
+
}
|
587
|
+
search_flush(search);
|
588
|
+
return 0;
|
589
|
+
}
|
258
590
|
|
259
|
-
|
260
|
-
|
261
|
-
|
591
|
+
static void convert_UTF8_to_script_safe_JSON(search_state *search)
|
592
|
+
{
|
593
|
+
unsigned char ch_len;
|
594
|
+
while ((ch_len = search_script_safe_escape(search))) {
|
595
|
+
escape_UTF8_char(search, ch_len);
|
596
|
+
}
|
597
|
+
}
|
262
598
|
|
263
|
-
|
599
|
+
static const unsigned char ascii_only_escape_table[256] = {
|
600
|
+
// ASCII Control Characters
|
601
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
602
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
603
|
+
// ASCII Characters
|
604
|
+
0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
|
605
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
606
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
607
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
|
608
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
609
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
610
|
+
// Continuation byte
|
611
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
612
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
613
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
614
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
615
|
+
// First byte of a 2-byte code point
|
616
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
617
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
618
|
+
// First byte of a 3-byte code point
|
619
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
620
|
+
//First byte of a 4+ byte code point
|
621
|
+
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
|
622
|
+
};
|
264
623
|
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
fbuffer_append(out_buffer, scratch, 6);
|
271
|
-
} else {
|
272
|
-
uint16_t hi, lo;
|
273
|
-
wchar -= 0x10000;
|
274
|
-
hi = 0xD800 + (uint16_t)(wchar >> 10);
|
275
|
-
lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
|
276
|
-
|
277
|
-
scratch[2] = hexdig[hi >> 12];
|
278
|
-
scratch[3] = hexdig[(hi >> 8) & 0xf];
|
279
|
-
scratch[4] = hexdig[(hi >> 4) & 0xf];
|
280
|
-
scratch[5] = hexdig[hi & 0xf];
|
281
|
-
|
282
|
-
scratch[8] = hexdig[lo >> 12];
|
283
|
-
scratch[9] = hexdig[(lo >> 8) & 0xf];
|
284
|
-
scratch[10] = hexdig[(lo >> 4) & 0xf];
|
285
|
-
scratch[11] = hexdig[lo & 0xf];
|
286
|
-
|
287
|
-
fbuffer_append(out_buffer, scratch, 12);
|
288
|
-
}
|
624
|
+
static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256])
|
625
|
+
{
|
626
|
+
while (search->ptr < search->end) {
|
627
|
+
unsigned char ch = (unsigned char)*search->ptr;
|
628
|
+
unsigned char ch_len = escape_table[ch];
|
289
629
|
|
630
|
+
if (RB_UNLIKELY(ch_len)) {
|
631
|
+
search_flush(search);
|
632
|
+
return ch_len & CHAR_LENGTH_MASK;
|
633
|
+
} else {
|
634
|
+
search->ptr++;
|
635
|
+
}
|
636
|
+
}
|
637
|
+
search_flush(search);
|
638
|
+
return 0;
|
639
|
+
}
|
640
|
+
|
641
|
+
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
|
642
|
+
const unsigned char ch = (unsigned char)*search->ptr;
|
643
|
+
switch (ch_len) {
|
644
|
+
case 1: {
|
645
|
+
switch (ch) {
|
646
|
+
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
647
|
+
case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
|
648
|
+
case '/': fbuffer_append(search->buffer, "\\/", 2); break;
|
649
|
+
case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
|
650
|
+
case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
|
651
|
+
case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
|
652
|
+
case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
|
653
|
+
case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
|
654
|
+
default: {
|
655
|
+
const char *hexdig = "0123456789abcdef";
|
656
|
+
char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
|
657
|
+
scratch[4] = hexdig[(ch >> 4) & 0xf];
|
658
|
+
scratch[5] = hexdig[ch & 0xf];
|
659
|
+
fbuffer_append(search->buffer, scratch, 6);
|
290
660
|
break;
|
291
661
|
}
|
292
662
|
}
|
293
|
-
|
294
|
-
pos++;
|
663
|
+
break;
|
295
664
|
}
|
296
|
-
|
297
|
-
|
665
|
+
default: {
|
666
|
+
const char *hexdig = "0123456789abcdef";
|
667
|
+
char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
|
298
668
|
|
299
|
-
|
300
|
-
|
301
|
-
|
669
|
+
uint32_t wchar = 0;
|
670
|
+
|
671
|
+
switch(ch_len) {
|
672
|
+
case 2:
|
673
|
+
wchar = ch & 0x1F;
|
674
|
+
break;
|
675
|
+
case 3:
|
676
|
+
wchar = ch & 0x0F;
|
677
|
+
break;
|
678
|
+
case 4:
|
679
|
+
wchar = ch & 0x07;
|
680
|
+
break;
|
681
|
+
}
|
682
|
+
|
683
|
+
for (short i = 1; i < ch_len; i++) {
|
684
|
+
wchar = (wchar << 6) | (search->ptr[i] & 0x3F);
|
685
|
+
}
|
686
|
+
|
687
|
+
if (wchar <= 0xFFFF) {
|
688
|
+
scratch[2] = hexdig[wchar >> 12];
|
689
|
+
scratch[3] = hexdig[(wchar >> 8) & 0xf];
|
690
|
+
scratch[4] = hexdig[(wchar >> 4) & 0xf];
|
691
|
+
scratch[5] = hexdig[wchar & 0xf];
|
692
|
+
fbuffer_append(search->buffer, scratch, 6);
|
693
|
+
} else {
|
694
|
+
uint16_t hi, lo;
|
695
|
+
wchar -= 0x10000;
|
696
|
+
hi = 0xD800 + (uint16_t)(wchar >> 10);
|
697
|
+
lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
|
698
|
+
|
699
|
+
scratch[2] = hexdig[hi >> 12];
|
700
|
+
scratch[3] = hexdig[(hi >> 8) & 0xf];
|
701
|
+
scratch[4] = hexdig[(hi >> 4) & 0xf];
|
702
|
+
scratch[5] = hexdig[hi & 0xf];
|
703
|
+
|
704
|
+
scratch[8] = hexdig[lo >> 12];
|
705
|
+
scratch[9] = hexdig[(lo >> 8) & 0xf];
|
706
|
+
scratch[10] = hexdig[(lo >> 4) & 0xf];
|
707
|
+
scratch[11] = hexdig[lo & 0xf];
|
708
|
+
|
709
|
+
fbuffer_append(search->buffer, scratch, 12);
|
710
|
+
}
|
302
711
|
|
303
|
-
|
712
|
+
break;
|
713
|
+
}
|
714
|
+
}
|
715
|
+
search->cursor = (search->ptr += ch_len);
|
304
716
|
}
|
305
717
|
|
306
|
-
static
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
718
|
+
static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256])
|
719
|
+
{
|
720
|
+
unsigned char ch_len;
|
721
|
+
while ((ch_len = search_ascii_only_escape(search, escape_table))) {
|
722
|
+
full_escape_UTF8_char(search, ch_len);
|
723
|
+
}
|
312
724
|
}
|
313
725
|
|
314
726
|
/*
|
@@ -403,7 +815,9 @@ static char *fstrndup(const char *ptr, unsigned long len) {
|
|
403
815
|
*/
|
404
816
|
static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
|
405
817
|
{
|
406
|
-
|
818
|
+
rb_check_arity(argc, 0, 1);
|
819
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
820
|
+
return cState_partial_generate(Vstate, self, generate_json_object, Qfalse);
|
407
821
|
}
|
408
822
|
|
409
823
|
/*
|
@@ -415,7 +829,9 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
|
|
415
829
|
* produced JSON string output further.
|
416
830
|
*/
|
417
831
|
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
|
418
|
-
|
832
|
+
rb_check_arity(argc, 0, 1);
|
833
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
834
|
+
return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
|
419
835
|
}
|
420
836
|
|
421
837
|
#ifdef RUBY_INTEGER_UNIFICATION
|
@@ -426,7 +842,9 @@ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
|
|
426
842
|
*/
|
427
843
|
static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
|
428
844
|
{
|
429
|
-
|
845
|
+
rb_check_arity(argc, 0, 1);
|
846
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
847
|
+
return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse);
|
430
848
|
}
|
431
849
|
|
432
850
|
#else
|
@@ -437,7 +855,9 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
|
|
437
855
|
*/
|
438
856
|
static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
|
439
857
|
{
|
440
|
-
|
858
|
+
rb_check_arity(argc, 0, 1);
|
859
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
860
|
+
return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse);
|
441
861
|
}
|
442
862
|
|
443
863
|
/*
|
@@ -447,7 +867,9 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
|
|
447
867
|
*/
|
448
868
|
static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
|
449
869
|
{
|
450
|
-
|
870
|
+
rb_check_arity(argc, 0, 1);
|
871
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
872
|
+
return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse);
|
451
873
|
}
|
452
874
|
#endif
|
453
875
|
|
@@ -458,7 +880,9 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
|
|
458
880
|
*/
|
459
881
|
static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
|
460
882
|
{
|
461
|
-
|
883
|
+
rb_check_arity(argc, 0, 1);
|
884
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
885
|
+
return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
|
462
886
|
}
|
463
887
|
|
464
888
|
/*
|
@@ -481,7 +905,9 @@ static VALUE mString_included_s(VALUE self, VALUE modul) {
|
|
481
905
|
*/
|
482
906
|
static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
|
483
907
|
{
|
484
|
-
|
908
|
+
rb_check_arity(argc, 0, 1);
|
909
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
910
|
+
return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
|
485
911
|
}
|
486
912
|
|
487
913
|
/*
|
@@ -498,7 +924,7 @@ static VALUE mString_to_json_raw_object(VALUE self)
|
|
498
924
|
VALUE result = rb_hash_new();
|
499
925
|
rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
|
500
926
|
ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
|
501
|
-
rb_hash_aset(result,
|
927
|
+
rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
|
502
928
|
return result;
|
503
929
|
}
|
504
930
|
|
@@ -536,7 +962,8 @@ static VALUE mString_Extend_json_create(VALUE self, VALUE o)
|
|
536
962
|
*/
|
537
963
|
static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
|
538
964
|
{
|
539
|
-
|
965
|
+
rb_check_arity(argc, 0, 1);
|
966
|
+
return rb_utf8_str_new("true", 4);
|
540
967
|
}
|
541
968
|
|
542
969
|
/*
|
@@ -546,7 +973,8 @@ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
|
|
546
973
|
*/
|
547
974
|
static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
|
548
975
|
{
|
549
|
-
|
976
|
+
rb_check_arity(argc, 0, 1);
|
977
|
+
return rb_utf8_str_new("false", 5);
|
550
978
|
}
|
551
979
|
|
552
980
|
/*
|
@@ -556,7 +984,8 @@ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
|
|
556
984
|
*/
|
557
985
|
static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
|
558
986
|
{
|
559
|
-
|
987
|
+
rb_check_arity(argc, 0, 1);
|
988
|
+
return rb_utf8_str_new("null", 4);
|
560
989
|
}
|
561
990
|
|
562
991
|
/*
|
@@ -573,30 +1002,40 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
|
|
573
1002
|
rb_scan_args(argc, argv, "01", &state);
|
574
1003
|
Check_Type(string, T_STRING);
|
575
1004
|
state = cState_from_state_s(cState, state);
|
576
|
-
return cState_partial_generate(state, string);
|
1005
|
+
return cState_partial_generate(state, string, generate_json_string, Qfalse);
|
1006
|
+
}
|
1007
|
+
|
1008
|
+
static void State_mark(void *ptr)
|
1009
|
+
{
|
1010
|
+
JSON_Generator_State *state = ptr;
|
1011
|
+
rb_gc_mark_movable(state->indent);
|
1012
|
+
rb_gc_mark_movable(state->space);
|
1013
|
+
rb_gc_mark_movable(state->space_before);
|
1014
|
+
rb_gc_mark_movable(state->object_nl);
|
1015
|
+
rb_gc_mark_movable(state->array_nl);
|
1016
|
+
rb_gc_mark_movable(state->as_json);
|
1017
|
+
}
|
1018
|
+
|
1019
|
+
static void State_compact(void *ptr)
|
1020
|
+
{
|
1021
|
+
JSON_Generator_State *state = ptr;
|
1022
|
+
state->indent = rb_gc_location(state->indent);
|
1023
|
+
state->space = rb_gc_location(state->space);
|
1024
|
+
state->space_before = rb_gc_location(state->space_before);
|
1025
|
+
state->object_nl = rb_gc_location(state->object_nl);
|
1026
|
+
state->array_nl = rb_gc_location(state->array_nl);
|
1027
|
+
state->as_json = rb_gc_location(state->as_json);
|
577
1028
|
}
|
578
1029
|
|
579
1030
|
static void State_free(void *ptr)
|
580
1031
|
{
|
581
1032
|
JSON_Generator_State *state = ptr;
|
582
|
-
if (state->indent) ruby_xfree(state->indent);
|
583
|
-
if (state->space) ruby_xfree(state->space);
|
584
|
-
if (state->space_before) ruby_xfree(state->space_before);
|
585
|
-
if (state->object_nl) ruby_xfree(state->object_nl);
|
586
|
-
if (state->array_nl) ruby_xfree(state->array_nl);
|
587
1033
|
ruby_xfree(state);
|
588
1034
|
}
|
589
1035
|
|
590
1036
|
static size_t State_memsize(const void *ptr)
|
591
1037
|
{
|
592
|
-
|
593
|
-
size_t size = sizeof(*state);
|
594
|
-
if (state->indent) size += state->indent_len + 1;
|
595
|
-
if (state->space) size += state->space_len + 1;
|
596
|
-
if (state->space_before) size += state->space_before_len + 1;
|
597
|
-
if (state->object_nl) size += state->object_nl_len + 1;
|
598
|
-
if (state->array_nl) size += state->array_nl_len + 1;
|
599
|
-
return size;
|
1038
|
+
return sizeof(JSON_Generator_State);
|
600
1039
|
}
|
601
1040
|
|
602
1041
|
#ifndef HAVE_RB_EXT_RACTOR_SAFE
|
@@ -606,52 +1045,103 @@ static size_t State_memsize(const void *ptr)
|
|
606
1045
|
|
607
1046
|
static const rb_data_type_t JSON_Generator_State_type = {
|
608
1047
|
"JSON/Generator/State",
|
609
|
-
{
|
1048
|
+
{
|
1049
|
+
.dmark = State_mark,
|
1050
|
+
.dfree = State_free,
|
1051
|
+
.dsize = State_memsize,
|
1052
|
+
.dcompact = State_compact,
|
1053
|
+
},
|
610
1054
|
0, 0,
|
611
1055
|
RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
|
612
1056
|
};
|
613
1057
|
|
1058
|
+
static void state_init(JSON_Generator_State *state)
|
1059
|
+
{
|
1060
|
+
state->max_nesting = 100;
|
1061
|
+
state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
|
1062
|
+
}
|
1063
|
+
|
614
1064
|
static VALUE cState_s_allocate(VALUE klass)
|
615
1065
|
{
|
616
1066
|
JSON_Generator_State *state;
|
617
1067
|
VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state);
|
618
|
-
state
|
619
|
-
state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
|
1068
|
+
state_init(state);
|
620
1069
|
return obj;
|
621
1070
|
}
|
622
1071
|
|
1072
|
+
static void vstate_spill(struct generate_json_data *data)
|
1073
|
+
{
|
1074
|
+
VALUE vstate = cState_s_allocate(cState);
|
1075
|
+
GET_STATE(vstate);
|
1076
|
+
MEMCPY(state, data->state, JSON_Generator_State, 1);
|
1077
|
+
data->state = state;
|
1078
|
+
data->vstate = vstate;
|
1079
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->indent);
|
1080
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->space);
|
1081
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->space_before);
|
1082
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl);
|
1083
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl);
|
1084
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
|
1085
|
+
}
|
1086
|
+
|
1087
|
+
static inline VALUE vstate_get(struct generate_json_data *data)
|
1088
|
+
{
|
1089
|
+
if (RB_UNLIKELY(!data->vstate)) {
|
1090
|
+
vstate_spill(data);
|
1091
|
+
}
|
1092
|
+
return data->vstate;
|
1093
|
+
}
|
1094
|
+
|
623
1095
|
struct hash_foreach_arg {
|
624
|
-
|
625
|
-
JSON_Generator_State *state;
|
626
|
-
VALUE Vstate;
|
1096
|
+
struct generate_json_data *data;
|
627
1097
|
int iter;
|
628
1098
|
};
|
629
1099
|
|
1100
|
+
static VALUE
|
1101
|
+
convert_string_subclass(VALUE key)
|
1102
|
+
{
|
1103
|
+
VALUE key_to_s = rb_funcall(key, i_to_s, 0);
|
1104
|
+
|
1105
|
+
if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
|
1106
|
+
VALUE cname = rb_obj_class(key);
|
1107
|
+
rb_raise(rb_eTypeError,
|
1108
|
+
"can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
|
1109
|
+
cname, "String", cname, "to_s", rb_obj_class(key_to_s));
|
1110
|
+
}
|
1111
|
+
|
1112
|
+
return key_to_s;
|
1113
|
+
}
|
1114
|
+
|
630
1115
|
static int
|
631
1116
|
json_object_i(VALUE key, VALUE val, VALUE _arg)
|
632
1117
|
{
|
633
1118
|
struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg;
|
634
|
-
|
635
|
-
|
636
|
-
|
1119
|
+
struct generate_json_data *data = arg->data;
|
1120
|
+
|
1121
|
+
FBuffer *buffer = data->buffer;
|
1122
|
+
JSON_Generator_State *state = data->state;
|
637
1123
|
|
638
1124
|
long depth = state->depth;
|
639
1125
|
int j;
|
640
1126
|
|
641
1127
|
if (arg->iter > 0) fbuffer_append_char(buffer, ',');
|
642
|
-
if (RB_UNLIKELY(state->object_nl)) {
|
643
|
-
|
1128
|
+
if (RB_UNLIKELY(data->state->object_nl)) {
|
1129
|
+
fbuffer_append_str(buffer, data->state->object_nl);
|
644
1130
|
}
|
645
|
-
if (RB_UNLIKELY(state->indent)) {
|
1131
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
646
1132
|
for (j = 0; j < depth; j++) {
|
647
|
-
|
1133
|
+
fbuffer_append_str(buffer, data->state->indent);
|
648
1134
|
}
|
649
1135
|
}
|
650
1136
|
|
651
1137
|
VALUE key_to_s;
|
652
1138
|
switch(rb_type(key)) {
|
653
1139
|
case T_STRING:
|
654
|
-
|
1140
|
+
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
1141
|
+
key_to_s = key;
|
1142
|
+
} else {
|
1143
|
+
key_to_s = convert_string_subclass(key);
|
1144
|
+
}
|
655
1145
|
break;
|
656
1146
|
case T_SYMBOL:
|
657
1147
|
key_to_s = rb_sym2str(key);
|
@@ -661,82 +1151,98 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
661
1151
|
break;
|
662
1152
|
}
|
663
1153
|
|
664
|
-
|
665
|
-
|
1154
|
+
if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
|
1155
|
+
generate_json_string(buffer, data, key_to_s);
|
1156
|
+
} else {
|
1157
|
+
generate_json(buffer, data, key_to_s);
|
1158
|
+
}
|
1159
|
+
if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before);
|
666
1160
|
fbuffer_append_char(buffer, ':');
|
667
|
-
if (RB_UNLIKELY(state->space))
|
668
|
-
generate_json(buffer,
|
1161
|
+
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
|
1162
|
+
generate_json(buffer, data, val);
|
669
1163
|
|
670
1164
|
arg->iter++;
|
671
1165
|
return ST_CONTINUE;
|
672
1166
|
}
|
673
1167
|
|
674
|
-
static
|
1168
|
+
static inline long increase_depth(struct generate_json_data *data)
|
675
1169
|
{
|
676
|
-
|
1170
|
+
JSON_Generator_State *state = data->state;
|
677
1171
|
long depth = ++state->depth;
|
1172
|
+
if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
|
1173
|
+
rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
|
1174
|
+
}
|
1175
|
+
return depth;
|
1176
|
+
}
|
1177
|
+
|
1178
|
+
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1179
|
+
{
|
678
1180
|
int j;
|
679
|
-
|
1181
|
+
long depth = increase_depth(data);
|
680
1182
|
|
681
|
-
if (
|
682
|
-
|
1183
|
+
if (RHASH_SIZE(obj) == 0) {
|
1184
|
+
fbuffer_append(buffer, "{}", 2);
|
1185
|
+
--data->state->depth;
|
1186
|
+
return;
|
683
1187
|
}
|
1188
|
+
|
684
1189
|
fbuffer_append_char(buffer, '{');
|
685
1190
|
|
686
|
-
arg
|
687
|
-
|
688
|
-
|
689
|
-
|
1191
|
+
struct hash_foreach_arg arg = {
|
1192
|
+
.data = data,
|
1193
|
+
.iter = 0,
|
1194
|
+
};
|
690
1195
|
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
|
691
1196
|
|
692
|
-
depth = --state->depth;
|
693
|
-
if (RB_UNLIKELY(state->object_nl)) {
|
694
|
-
|
695
|
-
if (RB_UNLIKELY(state->indent)) {
|
1197
|
+
depth = --data->state->depth;
|
1198
|
+
if (RB_UNLIKELY(data->state->object_nl)) {
|
1199
|
+
fbuffer_append_str(buffer, data->state->object_nl);
|
1200
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
696
1201
|
for (j = 0; j < depth; j++) {
|
697
|
-
|
1202
|
+
fbuffer_append_str(buffer, data->state->indent);
|
698
1203
|
}
|
699
1204
|
}
|
700
1205
|
}
|
701
1206
|
fbuffer_append_char(buffer, '}');
|
702
1207
|
}
|
703
1208
|
|
704
|
-
static void generate_json_array(FBuffer *buffer,
|
1209
|
+
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
705
1210
|
{
|
706
|
-
long max_nesting = state->max_nesting;
|
707
|
-
long depth = ++state->depth;
|
708
1211
|
int i, j;
|
709
|
-
|
710
|
-
|
1212
|
+
long depth = increase_depth(data);
|
1213
|
+
|
1214
|
+
if (RARRAY_LEN(obj) == 0) {
|
1215
|
+
fbuffer_append(buffer, "[]", 2);
|
1216
|
+
--data->state->depth;
|
1217
|
+
return;
|
711
1218
|
}
|
1219
|
+
|
712
1220
|
fbuffer_append_char(buffer, '[');
|
713
|
-
if (RB_UNLIKELY(state->array_nl))
|
1221
|
+
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
714
1222
|
for(i = 0; i < RARRAY_LEN(obj); i++) {
|
715
1223
|
if (i > 0) {
|
716
1224
|
fbuffer_append_char(buffer, ',');
|
717
|
-
if (RB_UNLIKELY(state->array_nl))
|
1225
|
+
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
718
1226
|
}
|
719
|
-
if (RB_UNLIKELY(state->indent)) {
|
1227
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
720
1228
|
for (j = 0; j < depth; j++) {
|
721
|
-
|
1229
|
+
fbuffer_append_str(buffer, data->state->indent);
|
722
1230
|
}
|
723
1231
|
}
|
724
|
-
generate_json(buffer,
|
1232
|
+
generate_json(buffer, data, RARRAY_AREF(obj, i));
|
725
1233
|
}
|
726
|
-
state->depth = --depth;
|
727
|
-
if (RB_UNLIKELY(state->array_nl)) {
|
728
|
-
|
729
|
-
if (RB_UNLIKELY(state->indent)) {
|
1234
|
+
data->state->depth = --depth;
|
1235
|
+
if (RB_UNLIKELY(data->state->array_nl)) {
|
1236
|
+
fbuffer_append_str(buffer, data->state->array_nl);
|
1237
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
730
1238
|
for (j = 0; j < depth; j++) {
|
731
|
-
|
1239
|
+
fbuffer_append_str(buffer, data->state->indent);
|
732
1240
|
}
|
733
1241
|
}
|
734
1242
|
}
|
735
1243
|
fbuffer_append_char(buffer, ']');
|
736
1244
|
}
|
737
1245
|
|
738
|
-
static int usascii_encindex, utf8_encindex, binary_encindex;
|
739
|
-
|
740
1246
|
static inline int enc_utf8_compatible_p(int enc_idx)
|
741
1247
|
{
|
742
1248
|
if (enc_idx == usascii_encindex) return 1;
|
@@ -744,117 +1250,199 @@ static inline int enc_utf8_compatible_p(int enc_idx)
|
|
744
1250
|
return 0;
|
745
1251
|
}
|
746
1252
|
|
1253
|
+
static VALUE encode_json_string_try(VALUE str)
|
1254
|
+
{
|
1255
|
+
return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
|
1256
|
+
}
|
1257
|
+
|
1258
|
+
static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
|
1259
|
+
{
|
1260
|
+
raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
|
1261
|
+
return Qundef;
|
1262
|
+
}
|
1263
|
+
|
747
1264
|
static inline VALUE ensure_valid_encoding(VALUE str)
|
748
1265
|
{
|
749
1266
|
int encindex = RB_ENCODING_GET(str);
|
750
1267
|
VALUE utf8_string;
|
751
1268
|
if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
|
752
1269
|
if (encindex == binary_encindex) {
|
753
|
-
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
754
|
-
// TODO: Deprecate in 2.8.0
|
755
|
-
// TODO: Remove in 3.0.0
|
756
1270
|
utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
|
757
1271
|
switch (rb_enc_str_coderange(utf8_string)) {
|
758
1272
|
case ENC_CODERANGE_7BIT:
|
1273
|
+
return utf8_string;
|
759
1274
|
case ENC_CODERANGE_VALID:
|
1275
|
+
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
1276
|
+
// TODO: Raise in 3.0.0
|
1277
|
+
rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
|
760
1278
|
return utf8_string;
|
761
1279
|
break;
|
762
1280
|
}
|
763
1281
|
}
|
764
1282
|
|
765
|
-
str =
|
1283
|
+
str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
|
766
1284
|
}
|
767
1285
|
return str;
|
768
1286
|
}
|
769
1287
|
|
770
|
-
static void generate_json_string(FBuffer *buffer,
|
1288
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
771
1289
|
{
|
772
1290
|
obj = ensure_valid_encoding(obj);
|
773
1291
|
|
774
1292
|
fbuffer_append_char(buffer, '"');
|
775
1293
|
|
1294
|
+
long len;
|
1295
|
+
search_state search;
|
1296
|
+
search.buffer = buffer;
|
1297
|
+
RSTRING_GETMEM(obj, search.ptr, len);
|
1298
|
+
search.cursor = search.ptr;
|
1299
|
+
search.end = search.ptr + len;
|
1300
|
+
|
1301
|
+
#ifdef HAVE_SIMD
|
1302
|
+
search.matches_mask = 0;
|
1303
|
+
search.has_matches = false;
|
1304
|
+
search.chunk_base = NULL;
|
1305
|
+
#endif /* HAVE_SIMD */
|
1306
|
+
|
776
1307
|
switch(rb_enc_str_coderange(obj)) {
|
777
1308
|
case ENC_CODERANGE_7BIT:
|
778
|
-
convert_ASCII_to_JSON(buffer, obj, state->script_safe ? script_safe_escape_table : escape_table);
|
779
|
-
break;
|
780
1309
|
case ENC_CODERANGE_VALID:
|
781
|
-
if (RB_UNLIKELY(state->ascii_only)) {
|
782
|
-
convert_UTF8_to_ASCII_only_JSON(
|
1310
|
+
if (RB_UNLIKELY(data->state->ascii_only)) {
|
1311
|
+
convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
1312
|
+
} else if (RB_UNLIKELY(data->state->script_safe)) {
|
1313
|
+
convert_UTF8_to_script_safe_JSON(&search);
|
783
1314
|
} else {
|
784
|
-
convert_UTF8_to_JSON(
|
1315
|
+
convert_UTF8_to_JSON(&search);
|
785
1316
|
}
|
786
1317
|
break;
|
787
1318
|
default:
|
788
|
-
|
1319
|
+
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
|
789
1320
|
break;
|
790
1321
|
}
|
791
1322
|
fbuffer_append_char(buffer, '"');
|
792
1323
|
}
|
793
1324
|
|
794
|
-
static void
|
1325
|
+
static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1326
|
+
{
|
1327
|
+
VALUE tmp;
|
1328
|
+
if (rb_respond_to(obj, i_to_json)) {
|
1329
|
+
tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
|
1330
|
+
Check_Type(tmp, T_STRING);
|
1331
|
+
fbuffer_append_str(buffer, tmp);
|
1332
|
+
} else {
|
1333
|
+
tmp = rb_funcall(obj, i_to_s, 0);
|
1334
|
+
Check_Type(tmp, T_STRING);
|
1335
|
+
generate_json_string(buffer, data, tmp);
|
1336
|
+
}
|
1337
|
+
}
|
1338
|
+
|
1339
|
+
static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1340
|
+
{
|
1341
|
+
if (data->state->strict) {
|
1342
|
+
generate_json_string(buffer, data, rb_sym2str(obj));
|
1343
|
+
} else {
|
1344
|
+
generate_json_fallback(buffer, data, obj);
|
1345
|
+
}
|
1346
|
+
}
|
1347
|
+
|
1348
|
+
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
795
1349
|
{
|
796
1350
|
fbuffer_append(buffer, "null", 4);
|
797
1351
|
}
|
798
1352
|
|
799
|
-
static void generate_json_false(FBuffer *buffer,
|
1353
|
+
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
800
1354
|
{
|
801
1355
|
fbuffer_append(buffer, "false", 5);
|
802
1356
|
}
|
803
1357
|
|
804
|
-
static void generate_json_true(FBuffer *buffer,
|
1358
|
+
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
805
1359
|
{
|
806
1360
|
fbuffer_append(buffer, "true", 4);
|
807
1361
|
}
|
808
1362
|
|
809
|
-
static void generate_json_fixnum(FBuffer *buffer,
|
1363
|
+
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
810
1364
|
{
|
811
1365
|
fbuffer_append_long(buffer, FIX2LONG(obj));
|
812
1366
|
}
|
813
1367
|
|
814
|
-
static void generate_json_bignum(FBuffer *buffer,
|
1368
|
+
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
815
1369
|
{
|
816
1370
|
VALUE tmp = rb_funcall(obj, i_to_s, 0);
|
817
1371
|
fbuffer_append_str(buffer, tmp);
|
818
1372
|
}
|
819
1373
|
|
820
1374
|
#ifdef RUBY_INTEGER_UNIFICATION
|
821
|
-
static void generate_json_integer(FBuffer *buffer,
|
1375
|
+
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
822
1376
|
{
|
823
1377
|
if (FIXNUM_P(obj))
|
824
|
-
generate_json_fixnum(buffer,
|
1378
|
+
generate_json_fixnum(buffer, data, obj);
|
825
1379
|
else
|
826
|
-
generate_json_bignum(buffer,
|
1380
|
+
generate_json_bignum(buffer, data, obj);
|
827
1381
|
}
|
828
1382
|
#endif
|
829
|
-
|
1383
|
+
|
1384
|
+
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
830
1385
|
{
|
831
1386
|
double value = RFLOAT_VALUE(obj);
|
832
|
-
char allow_nan = state->allow_nan;
|
833
|
-
|
834
|
-
|
835
|
-
if (
|
836
|
-
|
837
|
-
|
838
|
-
|
1387
|
+
char allow_nan = data->state->allow_nan;
|
1388
|
+
if (isinf(value) || isnan(value)) {
|
1389
|
+
/* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
|
1390
|
+
if (!allow_nan) {
|
1391
|
+
if (data->state->strict && data->state->as_json) {
|
1392
|
+
VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
|
1393
|
+
if (casted_obj != obj) {
|
1394
|
+
increase_depth(data);
|
1395
|
+
generate_json(buffer, data, casted_obj);
|
1396
|
+
data->state->depth--;
|
1397
|
+
return;
|
1398
|
+
}
|
1399
|
+
}
|
1400
|
+
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
|
839
1401
|
}
|
1402
|
+
|
1403
|
+
VALUE tmp = rb_funcall(obj, i_to_s, 0);
|
1404
|
+
fbuffer_append_str(buffer, tmp);
|
1405
|
+
return;
|
840
1406
|
}
|
841
|
-
|
1407
|
+
|
1408
|
+
/* This implementation writes directly into the buffer. We reserve
|
1409
|
+
* the 24 characters that fpconv_dtoa states as its maximum, plus
|
1410
|
+
* 2 more characters for the potential ".0" suffix.
|
1411
|
+
*/
|
1412
|
+
fbuffer_inc_capa(buffer, 26);
|
1413
|
+
char* d = buffer->ptr + buffer->len;
|
1414
|
+
int len = fpconv_dtoa(value, d);
|
1415
|
+
|
1416
|
+
/* fpconv_dtoa converts a float to its shortest string representation,
|
1417
|
+
* but it adds a ".0" if this is a plain integer.
|
1418
|
+
*/
|
1419
|
+
buffer->len += len;
|
842
1420
|
}
|
843
1421
|
|
844
|
-
static void
|
1422
|
+
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
845
1423
|
{
|
846
|
-
VALUE
|
1424
|
+
VALUE fragment = RSTRUCT_GET(obj, 0);
|
1425
|
+
Check_Type(fragment, T_STRING);
|
1426
|
+
fbuffer_append_str(buffer, fragment);
|
1427
|
+
}
|
1428
|
+
|
1429
|
+
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1430
|
+
{
|
1431
|
+
bool as_json_called = false;
|
1432
|
+
start:
|
847
1433
|
if (obj == Qnil) {
|
848
|
-
generate_json_null(buffer,
|
1434
|
+
generate_json_null(buffer, data, obj);
|
849
1435
|
} else if (obj == Qfalse) {
|
850
|
-
generate_json_false(buffer,
|
1436
|
+
generate_json_false(buffer, data, obj);
|
851
1437
|
} else if (obj == Qtrue) {
|
852
|
-
generate_json_true(buffer,
|
1438
|
+
generate_json_true(buffer, data, obj);
|
853
1439
|
} else if (RB_SPECIAL_CONST_P(obj)) {
|
854
1440
|
if (RB_FIXNUM_P(obj)) {
|
855
|
-
generate_json_fixnum(buffer,
|
1441
|
+
generate_json_fixnum(buffer, data, obj);
|
856
1442
|
} else if (RB_FLONUM_P(obj)) {
|
857
|
-
generate_json_float(buffer,
|
1443
|
+
generate_json_float(buffer, data, obj);
|
1444
|
+
} else if (RB_STATIC_SYM_P(obj)) {
|
1445
|
+
generate_json_symbol(buffer, data, obj);
|
858
1446
|
} else {
|
859
1447
|
goto general;
|
860
1448
|
}
|
@@ -862,62 +1450,53 @@ static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *s
|
|
862
1450
|
VALUE klass = RBASIC_CLASS(obj);
|
863
1451
|
switch (RB_BUILTIN_TYPE(obj)) {
|
864
1452
|
case T_BIGNUM:
|
865
|
-
generate_json_bignum(buffer,
|
1453
|
+
generate_json_bignum(buffer, data, obj);
|
866
1454
|
break;
|
867
1455
|
case T_HASH:
|
868
1456
|
if (klass != rb_cHash) goto general;
|
869
|
-
generate_json_object(buffer,
|
1457
|
+
generate_json_object(buffer, data, obj);
|
870
1458
|
break;
|
871
1459
|
case T_ARRAY:
|
872
1460
|
if (klass != rb_cArray) goto general;
|
873
|
-
generate_json_array(buffer,
|
1461
|
+
generate_json_array(buffer, data, obj);
|
874
1462
|
break;
|
875
1463
|
case T_STRING:
|
876
1464
|
if (klass != rb_cString) goto general;
|
877
|
-
generate_json_string(buffer,
|
1465
|
+
generate_json_string(buffer, data, obj);
|
1466
|
+
break;
|
1467
|
+
case T_SYMBOL:
|
1468
|
+
generate_json_symbol(buffer, data, obj);
|
878
1469
|
break;
|
879
1470
|
case T_FLOAT:
|
880
1471
|
if (klass != rb_cFloat) goto general;
|
881
|
-
generate_json_float(buffer,
|
1472
|
+
generate_json_float(buffer, data, obj);
|
1473
|
+
break;
|
1474
|
+
case T_STRUCT:
|
1475
|
+
if (klass != cFragment) goto general;
|
1476
|
+
generate_json_fragment(buffer, data, obj);
|
882
1477
|
break;
|
883
1478
|
default:
|
884
1479
|
general:
|
885
|
-
if (state->strict) {
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
1480
|
+
if (data->state->strict) {
|
1481
|
+
if (RTEST(data->state->as_json) && !as_json_called) {
|
1482
|
+
obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
|
1483
|
+
as_json_called = true;
|
1484
|
+
goto start;
|
1485
|
+
} else {
|
1486
|
+
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
|
1487
|
+
}
|
891
1488
|
} else {
|
892
|
-
|
893
|
-
Check_Type(tmp, T_STRING);
|
894
|
-
generate_json_string(buffer, Vstate, state, tmp);
|
1489
|
+
generate_json_fallback(buffer, data, obj);
|
895
1490
|
}
|
896
1491
|
}
|
897
1492
|
}
|
898
1493
|
}
|
899
1494
|
|
900
|
-
static FBuffer *cState_prepare_buffer(VALUE self)
|
901
|
-
{
|
902
|
-
FBuffer *buffer;
|
903
|
-
GET_STATE(self);
|
904
|
-
buffer = fbuffer_alloc(state->buffer_initial_length);
|
905
|
-
|
906
|
-
return buffer;
|
907
|
-
}
|
908
|
-
|
909
|
-
struct generate_json_data {
|
910
|
-
FBuffer *buffer;
|
911
|
-
VALUE vstate;
|
912
|
-
JSON_Generator_State *state;
|
913
|
-
VALUE obj;
|
914
|
-
};
|
915
|
-
|
916
1495
|
static VALUE generate_json_try(VALUE d)
|
917
1496
|
{
|
918
1497
|
struct generate_json_data *data = (struct generate_json_data *)d;
|
919
1498
|
|
920
|
-
|
1499
|
+
data->func(data->buffer, data, data->obj);
|
921
1500
|
|
922
1501
|
return Qnil;
|
923
1502
|
}
|
@@ -932,37 +1511,53 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
|
|
932
1511
|
return Qundef;
|
933
1512
|
}
|
934
1513
|
|
935
|
-
static VALUE cState_partial_generate(VALUE self, VALUE obj)
|
1514
|
+
static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io)
|
936
1515
|
{
|
937
|
-
FBuffer *buffer = cState_prepare_buffer(self);
|
938
1516
|
GET_STATE(self);
|
939
1517
|
|
1518
|
+
char stack_buffer[FBUFFER_STACK_SIZE];
|
1519
|
+
FBuffer buffer = {
|
1520
|
+
.io = RTEST(io) ? io : Qfalse,
|
1521
|
+
};
|
1522
|
+
fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
|
1523
|
+
|
940
1524
|
struct generate_json_data data = {
|
941
|
-
.buffer = buffer,
|
1525
|
+
.buffer = &buffer,
|
942
1526
|
.vstate = self,
|
943
1527
|
.state = state,
|
944
|
-
.obj = obj
|
1528
|
+
.obj = obj,
|
1529
|
+
.func = func
|
945
1530
|
};
|
946
1531
|
rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
|
947
1532
|
|
948
|
-
return
|
1533
|
+
return fbuffer_finalize(&buffer);
|
949
1534
|
}
|
950
1535
|
|
951
|
-
/*
|
952
|
-
*
|
1536
|
+
/* call-seq:
|
1537
|
+
* generate(obj) -> String
|
1538
|
+
* generate(obj, anIO) -> anIO
|
953
1539
|
*
|
954
1540
|
* Generates a valid JSON document from object +obj+ and returns the
|
955
1541
|
* result. If no valid JSON document can be created this method raises a
|
956
1542
|
* GeneratorError exception.
|
957
1543
|
*/
|
958
|
-
static VALUE cState_generate(VALUE
|
1544
|
+
static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
|
959
1545
|
{
|
960
|
-
|
1546
|
+
rb_check_arity(argc, 1, 2);
|
1547
|
+
VALUE obj = argv[0];
|
1548
|
+
VALUE io = argc > 1 ? argv[1] : Qnil;
|
1549
|
+
VALUE result = cState_partial_generate(self, obj, generate_json, io);
|
961
1550
|
GET_STATE(self);
|
962
1551
|
(void)state;
|
963
1552
|
return result;
|
964
1553
|
}
|
965
1554
|
|
1555
|
+
static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
|
1556
|
+
{
|
1557
|
+
rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`");
|
1558
|
+
return self;
|
1559
|
+
}
|
1560
|
+
|
966
1561
|
/*
|
967
1562
|
* call-seq: initialize_copy(orig)
|
968
1563
|
*
|
@@ -979,11 +1574,12 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
|
|
979
1574
|
if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State");
|
980
1575
|
|
981
1576
|
MEMCPY(objState, origState, JSON_Generator_State, 1);
|
982
|
-
objState->indent =
|
983
|
-
objState->space =
|
984
|
-
objState->space_before =
|
985
|
-
objState->object_nl =
|
986
|
-
objState->array_nl =
|
1577
|
+
objState->indent = origState->indent;
|
1578
|
+
objState->space = origState->space;
|
1579
|
+
objState->space_before = origState->space_before;
|
1580
|
+
objState->object_nl = origState->object_nl;
|
1581
|
+
objState->array_nl = origState->array_nl;
|
1582
|
+
objState->as_json = origState->as_json;
|
987
1583
|
return obj;
|
988
1584
|
}
|
989
1585
|
|
@@ -1013,7 +1609,18 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts)
|
|
1013
1609
|
static VALUE cState_indent(VALUE self)
|
1014
1610
|
{
|
1015
1611
|
GET_STATE(self);
|
1016
|
-
return state->indent ?
|
1612
|
+
return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0));
|
1613
|
+
}
|
1614
|
+
|
1615
|
+
static VALUE string_config(VALUE config)
|
1616
|
+
{
|
1617
|
+
if (RTEST(config)) {
|
1618
|
+
Check_Type(config, T_STRING);
|
1619
|
+
if (RSTRING_LEN(config)) {
|
1620
|
+
return rb_str_new_frozen(config);
|
1621
|
+
}
|
1622
|
+
}
|
1623
|
+
return Qfalse;
|
1017
1624
|
}
|
1018
1625
|
|
1019
1626
|
/*
|
@@ -1023,21 +1630,8 @@ static VALUE cState_indent(VALUE self)
|
|
1023
1630
|
*/
|
1024
1631
|
static VALUE cState_indent_set(VALUE self, VALUE indent)
|
1025
1632
|
{
|
1026
|
-
unsigned long len;
|
1027
1633
|
GET_STATE(self);
|
1028
|
-
|
1029
|
-
len = RSTRING_LEN(indent);
|
1030
|
-
if (len == 0) {
|
1031
|
-
if (state->indent) {
|
1032
|
-
ruby_xfree(state->indent);
|
1033
|
-
state->indent = NULL;
|
1034
|
-
state->indent_len = 0;
|
1035
|
-
}
|
1036
|
-
} else {
|
1037
|
-
if (state->indent) ruby_xfree(state->indent);
|
1038
|
-
state->indent = fstrndup(RSTRING_PTR(indent), len);
|
1039
|
-
state->indent_len = len;
|
1040
|
-
}
|
1634
|
+
RB_OBJ_WRITE(self, &state->indent, string_config(indent));
|
1041
1635
|
return Qnil;
|
1042
1636
|
}
|
1043
1637
|
|
@@ -1050,7 +1644,7 @@ static VALUE cState_indent_set(VALUE self, VALUE indent)
|
|
1050
1644
|
static VALUE cState_space(VALUE self)
|
1051
1645
|
{
|
1052
1646
|
GET_STATE(self);
|
1053
|
-
return state->space ?
|
1647
|
+
return state->space ? state->space : rb_str_freeze(rb_utf8_str_new("", 0));
|
1054
1648
|
}
|
1055
1649
|
|
1056
1650
|
/*
|
@@ -1061,21 +1655,8 @@ static VALUE cState_space(VALUE self)
|
|
1061
1655
|
*/
|
1062
1656
|
static VALUE cState_space_set(VALUE self, VALUE space)
|
1063
1657
|
{
|
1064
|
-
unsigned long len;
|
1065
1658
|
GET_STATE(self);
|
1066
|
-
|
1067
|
-
len = RSTRING_LEN(space);
|
1068
|
-
if (len == 0) {
|
1069
|
-
if (state->space) {
|
1070
|
-
ruby_xfree(state->space);
|
1071
|
-
state->space = NULL;
|
1072
|
-
state->space_len = 0;
|
1073
|
-
}
|
1074
|
-
} else {
|
1075
|
-
if (state->space) ruby_xfree(state->space);
|
1076
|
-
state->space = fstrndup(RSTRING_PTR(space), len);
|
1077
|
-
state->space_len = len;
|
1078
|
-
}
|
1659
|
+
RB_OBJ_WRITE(self, &state->space, string_config(space));
|
1079
1660
|
return Qnil;
|
1080
1661
|
}
|
1081
1662
|
|
@@ -1087,7 +1668,7 @@ static VALUE cState_space_set(VALUE self, VALUE space)
|
|
1087
1668
|
static VALUE cState_space_before(VALUE self)
|
1088
1669
|
{
|
1089
1670
|
GET_STATE(self);
|
1090
|
-
return state->space_before ?
|
1671
|
+
return state->space_before ? state->space_before : rb_str_freeze(rb_utf8_str_new("", 0));
|
1091
1672
|
}
|
1092
1673
|
|
1093
1674
|
/*
|
@@ -1097,21 +1678,8 @@ static VALUE cState_space_before(VALUE self)
|
|
1097
1678
|
*/
|
1098
1679
|
static VALUE cState_space_before_set(VALUE self, VALUE space_before)
|
1099
1680
|
{
|
1100
|
-
unsigned long len;
|
1101
1681
|
GET_STATE(self);
|
1102
|
-
|
1103
|
-
len = RSTRING_LEN(space_before);
|
1104
|
-
if (len == 0) {
|
1105
|
-
if (state->space_before) {
|
1106
|
-
ruby_xfree(state->space_before);
|
1107
|
-
state->space_before = NULL;
|
1108
|
-
state->space_before_len = 0;
|
1109
|
-
}
|
1110
|
-
} else {
|
1111
|
-
if (state->space_before) ruby_xfree(state->space_before);
|
1112
|
-
state->space_before = fstrndup(RSTRING_PTR(space_before), len);
|
1113
|
-
state->space_before_len = len;
|
1114
|
-
}
|
1682
|
+
RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
|
1115
1683
|
return Qnil;
|
1116
1684
|
}
|
1117
1685
|
|
@@ -1124,7 +1692,7 @@ static VALUE cState_space_before_set(VALUE self, VALUE space_before)
|
|
1124
1692
|
static VALUE cState_object_nl(VALUE self)
|
1125
1693
|
{
|
1126
1694
|
GET_STATE(self);
|
1127
|
-
return state->object_nl ?
|
1695
|
+
return state->object_nl ? state->object_nl : rb_str_freeze(rb_utf8_str_new("", 0));
|
1128
1696
|
}
|
1129
1697
|
|
1130
1698
|
/*
|
@@ -1135,20 +1703,8 @@ static VALUE cState_object_nl(VALUE self)
|
|
1135
1703
|
*/
|
1136
1704
|
static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
|
1137
1705
|
{
|
1138
|
-
unsigned long len;
|
1139
1706
|
GET_STATE(self);
|
1140
|
-
|
1141
|
-
len = RSTRING_LEN(object_nl);
|
1142
|
-
if (len == 0) {
|
1143
|
-
if (state->object_nl) {
|
1144
|
-
ruby_xfree(state->object_nl);
|
1145
|
-
state->object_nl = NULL;
|
1146
|
-
}
|
1147
|
-
} else {
|
1148
|
-
if (state->object_nl) ruby_xfree(state->object_nl);
|
1149
|
-
state->object_nl = fstrndup(RSTRING_PTR(object_nl), len);
|
1150
|
-
state->object_nl_len = len;
|
1151
|
-
}
|
1707
|
+
RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
|
1152
1708
|
return Qnil;
|
1153
1709
|
}
|
1154
1710
|
|
@@ -1160,7 +1716,7 @@ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
|
|
1160
1716
|
static VALUE cState_array_nl(VALUE self)
|
1161
1717
|
{
|
1162
1718
|
GET_STATE(self);
|
1163
|
-
return state->array_nl ?
|
1719
|
+
return state->array_nl ? state->array_nl : rb_str_freeze(rb_utf8_str_new("", 0));
|
1164
1720
|
}
|
1165
1721
|
|
1166
1722
|
/*
|
@@ -1170,23 +1726,33 @@ static VALUE cState_array_nl(VALUE self)
|
|
1170
1726
|
*/
|
1171
1727
|
static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
|
1172
1728
|
{
|
1173
|
-
unsigned long len;
|
1174
1729
|
GET_STATE(self);
|
1175
|
-
|
1176
|
-
len = RSTRING_LEN(array_nl);
|
1177
|
-
if (len == 0) {
|
1178
|
-
if (state->array_nl) {
|
1179
|
-
ruby_xfree(state->array_nl);
|
1180
|
-
state->array_nl = NULL;
|
1181
|
-
}
|
1182
|
-
} else {
|
1183
|
-
if (state->array_nl) ruby_xfree(state->array_nl);
|
1184
|
-
state->array_nl = fstrndup(RSTRING_PTR(array_nl), len);
|
1185
|
-
state->array_nl_len = len;
|
1186
|
-
}
|
1730
|
+
RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
|
1187
1731
|
return Qnil;
|
1188
1732
|
}
|
1189
1733
|
|
1734
|
+
/*
|
1735
|
+
* call-seq: as_json()
|
1736
|
+
*
|
1737
|
+
* This string is put at the end of a line that holds a JSON array.
|
1738
|
+
*/
|
1739
|
+
static VALUE cState_as_json(VALUE self)
|
1740
|
+
{
|
1741
|
+
GET_STATE(self);
|
1742
|
+
return state->as_json;
|
1743
|
+
}
|
1744
|
+
|
1745
|
+
/*
|
1746
|
+
* call-seq: as_json=(as_json)
|
1747
|
+
*
|
1748
|
+
* This string is put at the end of a line that holds a JSON array.
|
1749
|
+
*/
|
1750
|
+
static VALUE cState_as_json_set(VALUE self, VALUE as_json)
|
1751
|
+
{
|
1752
|
+
GET_STATE(self);
|
1753
|
+
RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
|
1754
|
+
return Qnil;
|
1755
|
+
}
|
1190
1756
|
|
1191
1757
|
/*
|
1192
1758
|
* call-seq: check_circular?
|
@@ -1212,6 +1778,11 @@ static VALUE cState_max_nesting(VALUE self)
|
|
1212
1778
|
return LONG2FIX(state->max_nesting);
|
1213
1779
|
}
|
1214
1780
|
|
1781
|
+
static long long_config(VALUE num)
|
1782
|
+
{
|
1783
|
+
return RTEST(num) ? FIX2LONG(num) : 0;
|
1784
|
+
}
|
1785
|
+
|
1215
1786
|
/*
|
1216
1787
|
* call-seq: max_nesting=(depth)
|
1217
1788
|
*
|
@@ -1221,8 +1792,7 @@ static VALUE cState_max_nesting(VALUE self)
|
|
1221
1792
|
static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
|
1222
1793
|
{
|
1223
1794
|
GET_STATE(self);
|
1224
|
-
|
1225
|
-
state->max_nesting = FIX2LONG(depth);
|
1795
|
+
state->max_nesting = long_config(depth);
|
1226
1796
|
return Qnil;
|
1227
1797
|
}
|
1228
1798
|
|
@@ -1350,8 +1920,7 @@ static VALUE cState_depth(VALUE self)
|
|
1350
1920
|
static VALUE cState_depth_set(VALUE self, VALUE depth)
|
1351
1921
|
{
|
1352
1922
|
GET_STATE(self);
|
1353
|
-
|
1354
|
-
state->depth = FIX2LONG(depth);
|
1923
|
+
state->depth = long_config(depth);
|
1355
1924
|
return Qnil;
|
1356
1925
|
}
|
1357
1926
|
|
@@ -1366,6 +1935,15 @@ static VALUE cState_buffer_initial_length(VALUE self)
|
|
1366
1935
|
return LONG2FIX(state->buffer_initial_length);
|
1367
1936
|
}
|
1368
1937
|
|
1938
|
+
static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_initial_length)
|
1939
|
+
{
|
1940
|
+
Check_Type(buffer_initial_length, T_FIXNUM);
|
1941
|
+
long initial_length = FIX2LONG(buffer_initial_length);
|
1942
|
+
if (initial_length > 0) {
|
1943
|
+
state->buffer_initial_length = initial_length;
|
1944
|
+
}
|
1945
|
+
}
|
1946
|
+
|
1369
1947
|
/*
|
1370
1948
|
* call-seq: buffer_initial_length=(length)
|
1371
1949
|
*
|
@@ -1374,16 +1952,76 @@ static VALUE cState_buffer_initial_length(VALUE self)
|
|
1374
1952
|
*/
|
1375
1953
|
static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
|
1376
1954
|
{
|
1377
|
-
long initial_length;
|
1378
1955
|
GET_STATE(self);
|
1379
|
-
|
1380
|
-
initial_length = FIX2LONG(buffer_initial_length);
|
1381
|
-
if (initial_length > 0) {
|
1382
|
-
state->buffer_initial_length = initial_length;
|
1383
|
-
}
|
1956
|
+
buffer_initial_length_set(state, buffer_initial_length);
|
1384
1957
|
return Qnil;
|
1385
1958
|
}
|
1386
1959
|
|
1960
|
+
static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
1961
|
+
{
|
1962
|
+
JSON_Generator_State *state = (JSON_Generator_State *)_arg;
|
1963
|
+
|
1964
|
+
if (key == sym_indent) { state->indent = string_config(val); }
|
1965
|
+
else if (key == sym_space) { state->space = string_config(val); }
|
1966
|
+
else if (key == sym_space_before) { state->space_before = string_config(val); }
|
1967
|
+
else if (key == sym_object_nl) { state->object_nl = string_config(val); }
|
1968
|
+
else if (key == sym_array_nl) { state->array_nl = string_config(val); }
|
1969
|
+
else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
|
1970
|
+
else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
|
1971
|
+
else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
|
1972
|
+
else if (key == sym_depth) { state->depth = long_config(val); }
|
1973
|
+
else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); }
|
1974
|
+
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
|
1975
|
+
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
|
1976
|
+
else if (key == sym_strict) { state->strict = RTEST(val); }
|
1977
|
+
else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
|
1978
|
+
return ST_CONTINUE;
|
1979
|
+
}
|
1980
|
+
|
1981
|
+
static void configure_state(JSON_Generator_State *state, VALUE config)
|
1982
|
+
{
|
1983
|
+
if (!RTEST(config)) return;
|
1984
|
+
|
1985
|
+
Check_Type(config, T_HASH);
|
1986
|
+
|
1987
|
+
if (!RHASH_SIZE(config)) return;
|
1988
|
+
|
1989
|
+
// We assume in most cases few keys are set so it's faster to go over
|
1990
|
+
// the provided keys than to check all possible keys.
|
1991
|
+
rb_hash_foreach(config, configure_state_i, (VALUE)state);
|
1992
|
+
}
|
1993
|
+
|
1994
|
+
static VALUE cState_configure(VALUE self, VALUE opts)
|
1995
|
+
{
|
1996
|
+
GET_STATE(self);
|
1997
|
+
configure_state(state, opts);
|
1998
|
+
return self;
|
1999
|
+
}
|
2000
|
+
|
2001
|
+
static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
|
2002
|
+
{
|
2003
|
+
JSON_Generator_State state = {0};
|
2004
|
+
state_init(&state);
|
2005
|
+
configure_state(&state, opts);
|
2006
|
+
|
2007
|
+
char stack_buffer[FBUFFER_STACK_SIZE];
|
2008
|
+
FBuffer buffer = {
|
2009
|
+
.io = RTEST(io) ? io : Qfalse,
|
2010
|
+
};
|
2011
|
+
fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
|
2012
|
+
|
2013
|
+
struct generate_json_data data = {
|
2014
|
+
.buffer = &buffer,
|
2015
|
+
.vstate = Qfalse,
|
2016
|
+
.state = &state,
|
2017
|
+
.obj = obj,
|
2018
|
+
.func = generate_json,
|
2019
|
+
};
|
2020
|
+
rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
|
2021
|
+
|
2022
|
+
return fbuffer_finalize(&buffer);
|
2023
|
+
}
|
2024
|
+
|
1387
2025
|
/*
|
1388
2026
|
*
|
1389
2027
|
*/
|
@@ -1397,17 +2035,26 @@ void Init_generator(void)
|
|
1397
2035
|
rb_require("json/common");
|
1398
2036
|
|
1399
2037
|
mJSON = rb_define_module("JSON");
|
2038
|
+
|
2039
|
+
rb_global_variable(&cFragment);
|
2040
|
+
cFragment = rb_const_get(mJSON, rb_intern("Fragment"));
|
2041
|
+
|
1400
2042
|
VALUE mExt = rb_define_module_under(mJSON, "Ext");
|
1401
2043
|
VALUE mGenerator = rb_define_module_under(mExt, "Generator");
|
1402
2044
|
|
2045
|
+
rb_global_variable(&eGeneratorError);
|
1403
2046
|
eGeneratorError = rb_path2class("JSON::GeneratorError");
|
2047
|
+
|
2048
|
+
rb_global_variable(&eNestingError);
|
1404
2049
|
eNestingError = rb_path2class("JSON::NestingError");
|
1405
|
-
rb_gc_register_mark_object(eGeneratorError);
|
1406
|
-
rb_gc_register_mark_object(eNestingError);
|
1407
2050
|
|
1408
2051
|
cState = rb_define_class_under(mGenerator, "State", rb_cObject);
|
1409
2052
|
rb_define_alloc_func(cState, cState_s_allocate);
|
1410
2053
|
rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
|
2054
|
+
rb_define_method(cState, "initialize", cState_initialize, -1);
|
2055
|
+
rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings
|
2056
|
+
rb_define_private_method(cState, "_configure", cState_configure, 1);
|
2057
|
+
|
1411
2058
|
rb_define_method(cState, "initialize_copy", cState_init_copy, 1);
|
1412
2059
|
rb_define_method(cState, "indent", cState_indent, 0);
|
1413
2060
|
rb_define_method(cState, "indent=", cState_indent_set, 1);
|
@@ -1419,6 +2066,8 @@ void Init_generator(void)
|
|
1419
2066
|
rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
|
1420
2067
|
rb_define_method(cState, "array_nl", cState_array_nl, 0);
|
1421
2068
|
rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
|
2069
|
+
rb_define_method(cState, "as_json", cState_as_json, 0);
|
2070
|
+
rb_define_method(cState, "as_json=", cState_as_json_set, 1);
|
1422
2071
|
rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
|
1423
2072
|
rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
|
1424
2073
|
rb_define_method(cState, "script_safe", cState_script_safe, 0);
|
@@ -1439,7 +2088,10 @@ void Init_generator(void)
|
|
1439
2088
|
rb_define_method(cState, "depth=", cState_depth_set, 1);
|
1440
2089
|
rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
|
1441
2090
|
rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
|
1442
|
-
rb_define_method(cState, "generate", cState_generate, 1);
|
2091
|
+
rb_define_method(cState, "generate", cState_generate, -1);
|
2092
|
+
rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
|
2093
|
+
|
2094
|
+
rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
|
1443
2095
|
|
1444
2096
|
VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
|
1445
2097
|
|
@@ -1495,7 +2147,43 @@ void Init_generator(void)
|
|
1495
2147
|
i_extend = rb_intern("extend");
|
1496
2148
|
i_encode = rb_intern("encode");
|
1497
2149
|
|
2150
|
+
sym_indent = ID2SYM(rb_intern("indent"));
|
2151
|
+
sym_space = ID2SYM(rb_intern("space"));
|
2152
|
+
sym_space_before = ID2SYM(rb_intern("space_before"));
|
2153
|
+
sym_object_nl = ID2SYM(rb_intern("object_nl"));
|
2154
|
+
sym_array_nl = ID2SYM(rb_intern("array_nl"));
|
2155
|
+
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
2156
|
+
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
2157
|
+
sym_ascii_only = ID2SYM(rb_intern("ascii_only"));
|
2158
|
+
sym_depth = ID2SYM(rb_intern("depth"));
|
2159
|
+
sym_buffer_initial_length = ID2SYM(rb_intern("buffer_initial_length"));
|
2160
|
+
sym_script_safe = ID2SYM(rb_intern("script_safe"));
|
2161
|
+
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
|
2162
|
+
sym_strict = ID2SYM(rb_intern("strict"));
|
2163
|
+
sym_as_json = ID2SYM(rb_intern("as_json"));
|
2164
|
+
|
1498
2165
|
usascii_encindex = rb_usascii_encindex();
|
1499
2166
|
utf8_encindex = rb_utf8_encindex();
|
1500
2167
|
binary_encindex = rb_ascii8bit_encindex();
|
2168
|
+
|
2169
|
+
rb_require("json/ext/generator/state");
|
2170
|
+
|
2171
|
+
|
2172
|
+
switch(find_simd_implementation()) {
|
2173
|
+
#ifdef HAVE_SIMD
|
2174
|
+
#ifdef HAVE_SIMD_NEON
|
2175
|
+
case SIMD_NEON:
|
2176
|
+
search_escape_basic_impl = search_escape_basic_neon;
|
2177
|
+
break;
|
2178
|
+
#endif /* HAVE_SIMD_NEON */
|
2179
|
+
#ifdef HAVE_SIMD_SSE2
|
2180
|
+
case SIMD_SSE2:
|
2181
|
+
search_escape_basic_impl = search_escape_basic_sse2;
|
2182
|
+
break;
|
2183
|
+
#endif /* HAVE_SIMD_SSE2 */
|
2184
|
+
#endif /* HAVE_SIMD */
|
2185
|
+
default:
|
2186
|
+
search_escape_basic_impl = search_escape_basic;
|
2187
|
+
break;
|
2188
|
+
}
|
1501
2189
|
}
|