json 2.7.2 → 2.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BSDL +22 -0
- data/CHANGES.md +160 -17
- data/LEGAL +8 -0
- data/README.md +76 -211
- data/ext/json/ext/fbuffer/fbuffer.h +178 -95
- data/ext/json/ext/generator/extconf.rb +38 -2
- data/ext/json/ext/generator/generator.c +1311 -826
- data/ext/json/ext/generator/simd.h +112 -0
- data/ext/json/ext/parser/extconf.rb +6 -27
- data/ext/json/ext/parser/parser.c +1176 -1971
- data/ext/json/ext/vendor/fpconv.c +479 -0
- data/ext/json/ext/vendor/jeaiii-ltoa.h +267 -0
- data/json.gemspec +44 -49
- data/lib/json/add/bigdecimal.rb +2 -2
- data/lib/json/add/complex.rb +1 -1
- data/lib/json/add/core.rb +1 -1
- data/lib/json/add/date.rb +1 -1
- data/lib/json/add/date_time.rb +1 -1
- data/lib/json/add/exception.rb +1 -1
- data/lib/json/add/ostruct.rb +1 -1
- data/lib/json/add/range.rb +1 -1
- data/lib/json/add/rational.rb +1 -1
- data/lib/json/add/regexp.rb +1 -1
- data/lib/json/add/struct.rb +1 -1
- data/lib/json/add/symbol.rb +8 -4
- data/lib/json/add/time.rb +3 -10
- data/lib/json/common.rb +647 -241
- data/lib/json/ext/generator/state.rb +106 -0
- data/lib/json/ext.rb +35 -5
- data/lib/json/generic_object.rb +1 -1
- data/lib/json/{pure → truffle_ruby}/generator.rb +322 -145
- data/lib/json/version.rb +3 -7
- data/lib/json.rb +16 -21
- metadata +18 -22
- data/ext/json/ext/generator/depend +0 -1
- data/ext/json/ext/generator/generator.h +0 -177
- data/ext/json/ext/parser/depend +0 -1
- data/ext/json/ext/parser/parser.h +0 -96
- data/ext/json/ext/parser/parser.rl +0 -971
- data/ext/json/extconf.rb +0 -3
- data/lib/json/pure/parser.rb +0 -337
- data/lib/json/pure.rb +0 -15
- /data/{LICENSE → COPYING} +0 -0
@@ -1,340 +1,726 @@
|
|
1
|
+
#include "ruby.h"
|
1
2
|
#include "../fbuffer/fbuffer.h"
|
2
|
-
#include "
|
3
|
+
#include "../vendor/fpconv.c"
|
3
4
|
|
4
|
-
|
5
|
-
|
5
|
+
#include <math.h>
|
6
|
+
#include <ctype.h>
|
7
|
+
|
8
|
+
#include "simd.h"
|
9
|
+
|
10
|
+
/* ruby api and some helpers */
|
11
|
+
|
12
|
+
typedef struct JSON_Generator_StateStruct {
|
13
|
+
VALUE indent;
|
14
|
+
VALUE space;
|
15
|
+
VALUE space_before;
|
16
|
+
VALUE object_nl;
|
17
|
+
VALUE array_nl;
|
18
|
+
VALUE as_json;
|
19
|
+
|
20
|
+
long max_nesting;
|
21
|
+
long depth;
|
22
|
+
long buffer_initial_length;
|
23
|
+
|
24
|
+
bool allow_nan;
|
25
|
+
bool ascii_only;
|
26
|
+
bool script_safe;
|
27
|
+
bool strict;
|
28
|
+
} JSON_Generator_State;
|
29
|
+
|
30
|
+
#ifndef RB_UNLIKELY
|
31
|
+
#define RB_UNLIKELY(cond) (cond)
|
32
|
+
#endif
|
33
|
+
|
34
|
+
static VALUE mJSON, cState, cFragment, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8;
|
35
|
+
|
36
|
+
static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
|
37
|
+
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
|
38
|
+
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
|
39
|
+
|
40
|
+
|
41
|
+
#define GET_STATE_TO(self, state) \
|
42
|
+
TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state)
|
43
|
+
|
44
|
+
#define GET_STATE(self) \
|
45
|
+
JSON_Generator_State *state; \
|
46
|
+
GET_STATE_TO(self, state)
|
47
|
+
|
48
|
+
struct generate_json_data;
|
49
|
+
|
50
|
+
typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
51
|
+
|
52
|
+
struct generate_json_data {
|
53
|
+
FBuffer *buffer;
|
54
|
+
VALUE vstate;
|
55
|
+
JSON_Generator_State *state;
|
56
|
+
VALUE obj;
|
57
|
+
generator_func func;
|
58
|
+
};
|
59
|
+
|
60
|
+
static VALUE cState_from_state_s(VALUE self, VALUE opts);
|
61
|
+
static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
|
62
|
+
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
63
|
+
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
64
|
+
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
65
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
66
|
+
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
67
|
+
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
68
|
+
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
6
69
|
#ifdef RUBY_INTEGER_UNIFICATION
|
7
|
-
|
70
|
+
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
71
|
+
#endif
|
72
|
+
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
73
|
+
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
74
|
+
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
75
|
+
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
76
|
+
|
77
|
+
static int usascii_encindex, utf8_encindex, binary_encindex;
|
78
|
+
|
79
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
80
|
+
RBIMPL_ATTR_NORETURN()
|
81
|
+
#endif
|
82
|
+
static void raise_generator_error_str(VALUE invalid_object, VALUE str)
|
83
|
+
{
|
84
|
+
VALUE exc = rb_exc_new_str(eGeneratorError, str);
|
85
|
+
rb_ivar_set(exc, rb_intern("@invalid_object"), invalid_object);
|
86
|
+
rb_exc_raise(exc);
|
87
|
+
}
|
88
|
+
|
89
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
90
|
+
RBIMPL_ATTR_NORETURN()
|
91
|
+
#endif
|
92
|
+
#ifdef RBIMPL_ATTR_FORMAT
|
93
|
+
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
|
94
|
+
#endif
|
95
|
+
static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
|
96
|
+
{
|
97
|
+
va_list args;
|
98
|
+
va_start(args, fmt);
|
99
|
+
VALUE str = rb_vsprintf(fmt, args);
|
100
|
+
va_end(args);
|
101
|
+
raise_generator_error_str(invalid_object, str);
|
102
|
+
}
|
103
|
+
|
104
|
+
// 0 - single byte char that don't need to be escaped.
|
105
|
+
// (x | 8) - char that needs to be escaped.
|
106
|
+
static const unsigned char CHAR_LENGTH_MASK = 7;
|
107
|
+
static const unsigned char ESCAPE_MASK = 8;
|
108
|
+
|
109
|
+
typedef struct _search_state {
|
110
|
+
const char *ptr;
|
111
|
+
const char *end;
|
112
|
+
const char *cursor;
|
113
|
+
FBuffer *buffer;
|
114
|
+
|
115
|
+
#ifdef HAVE_SIMD
|
116
|
+
const char *chunk_base;
|
117
|
+
const char *chunk_end;
|
118
|
+
bool has_matches;
|
119
|
+
|
120
|
+
#if defined(HAVE_SIMD_NEON)
|
121
|
+
uint64_t matches_mask;
|
122
|
+
#elif defined(HAVE_SIMD_SSE2)
|
123
|
+
int matches_mask;
|
124
|
+
#else
|
125
|
+
#error "Unknown SIMD Implementation."
|
126
|
+
#endif /* HAVE_SIMD_NEON */
|
127
|
+
#endif /* HAVE_SIMD */
|
128
|
+
} search_state;
|
129
|
+
|
130
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
131
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
8
132
|
#else
|
9
|
-
|
133
|
+
#define FORCE_INLINE
|
10
134
|
#endif
|
11
|
-
mFloat, mString, mString_Extend,
|
12
|
-
mTrueClass, mFalseClass, mNilClass, eGeneratorError,
|
13
|
-
eNestingError;
|
14
135
|
|
15
|
-
static
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
136
|
+
static inline FORCE_INLINE void search_flush(search_state *search)
|
137
|
+
{
|
138
|
+
// Do not remove this conditional without profiling, specifically escape-heavy text.
|
139
|
+
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
|
140
|
+
// For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
|
141
|
+
// will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
|
142
|
+
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
|
143
|
+
// nothing needs to be flushed, we can save a few memory references with this conditional.
|
144
|
+
if (search->ptr > search->cursor) {
|
145
|
+
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
|
146
|
+
search->cursor = search->ptr;
|
147
|
+
}
|
148
|
+
}
|
20
149
|
|
21
|
-
|
22
|
-
|
150
|
+
static const unsigned char escape_table_basic[256] = {
|
151
|
+
// ASCII Control Characters
|
152
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
153
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
154
|
+
// ASCII Characters
|
155
|
+
0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
|
156
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
157
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
158
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
|
159
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
160
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
161
|
+
};
|
162
|
+
|
163
|
+
static unsigned char (*search_escape_basic_impl)(search_state *);
|
164
|
+
|
165
|
+
static inline unsigned char search_escape_basic(search_state *search)
|
166
|
+
{
|
167
|
+
while (search->ptr < search->end) {
|
168
|
+
if (RB_UNLIKELY(escape_table_basic[(const unsigned char)*search->ptr])) {
|
169
|
+
search_flush(search);
|
170
|
+
return 1;
|
171
|
+
} else {
|
172
|
+
search->ptr++;
|
173
|
+
}
|
174
|
+
}
|
175
|
+
search_flush(search);
|
176
|
+
return 0;
|
177
|
+
}
|
178
|
+
|
179
|
+
static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
|
180
|
+
{
|
181
|
+
const unsigned char ch = (unsigned char)*search->ptr;
|
182
|
+
switch (ch) {
|
183
|
+
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
184
|
+
case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
|
185
|
+
case '/': fbuffer_append(search->buffer, "\\/", 2); break;
|
186
|
+
case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
|
187
|
+
case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
|
188
|
+
case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
|
189
|
+
case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
|
190
|
+
case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
|
191
|
+
default: {
|
192
|
+
const char *hexdig = "0123456789abcdef";
|
193
|
+
char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
|
194
|
+
scratch[4] = hexdig[(ch >> 4) & 0xf];
|
195
|
+
scratch[5] = hexdig[ch & 0xf];
|
196
|
+
fbuffer_append(search->buffer, scratch, 6);
|
197
|
+
break;
|
198
|
+
}
|
199
|
+
}
|
200
|
+
search->ptr++;
|
201
|
+
search->cursor = search->ptr;
|
202
|
+
}
|
203
|
+
|
204
|
+
/* Converts in_string to a JSON string (without the wrapping '"'
|
205
|
+
* characters) in FBuffer out_buffer.
|
206
|
+
*
|
207
|
+
* Character are JSON-escaped according to:
|
23
208
|
*
|
24
|
-
*
|
209
|
+
* - Always: ASCII control characters (0x00-0x1F), dquote, and
|
210
|
+
* backslash.
|
25
211
|
*
|
26
|
-
*
|
27
|
-
* made as to fitness for any particular purpose. No warranties of any
|
28
|
-
* kind are expressed or implied. The recipient agrees to determine
|
29
|
-
* applicability of information provided. If this file has been
|
30
|
-
* purchased on magnetic or optical media from Unicode, Inc., the
|
31
|
-
* sole remedy for any claim will be exchange of defective media
|
32
|
-
* within 90 days of receipt.
|
212
|
+
* - If out_ascii_only: non-ASCII characters (>0x7F)
|
33
213
|
*
|
34
|
-
*
|
214
|
+
* - If script_safe: forwardslash (/), line separator (U+2028), and
|
215
|
+
* paragraph separator (U+2029)
|
35
216
|
*
|
36
|
-
*
|
37
|
-
*
|
38
|
-
* Unicode Standard, and to make copies of this file in any form
|
39
|
-
* for internal or external distribution as long as this notice
|
40
|
-
* remains attached.
|
217
|
+
* Everything else (should be UTF-8) is just passed through and
|
218
|
+
* appended to the result.
|
41
219
|
*/
|
220
|
+
static inline void convert_UTF8_to_JSON(search_state *search)
|
221
|
+
{
|
222
|
+
while (search_escape_basic_impl(search)) {
|
223
|
+
escape_UTF8_char_basic(search);
|
224
|
+
}
|
225
|
+
}
|
42
226
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
};
|
227
|
+
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
228
|
+
{
|
229
|
+
const unsigned char ch = (unsigned char)*search->ptr;
|
230
|
+
switch (ch_len) {
|
231
|
+
case 1: {
|
232
|
+
switch (ch) {
|
233
|
+
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
234
|
+
case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
|
235
|
+
case '/': fbuffer_append(search->buffer, "\\/", 2); break;
|
236
|
+
case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
|
237
|
+
case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
|
238
|
+
case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
|
239
|
+
case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
|
240
|
+
case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
|
241
|
+
default: {
|
242
|
+
const char *hexdig = "0123456789abcdef";
|
243
|
+
char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
|
244
|
+
scratch[4] = hexdig[(ch >> 4) & 0xf];
|
245
|
+
scratch[5] = hexdig[ch & 0xf];
|
246
|
+
fbuffer_append(search->buffer, scratch, 6);
|
247
|
+
break;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
break;
|
251
|
+
}
|
252
|
+
case 3: {
|
253
|
+
if (search->ptr[2] & 1) {
|
254
|
+
fbuffer_append(search->buffer, "\\u2029", 6);
|
255
|
+
} else {
|
256
|
+
fbuffer_append(search->buffer, "\\u2028", 6);
|
257
|
+
}
|
258
|
+
break;
|
259
|
+
}
|
260
|
+
}
|
261
|
+
search->cursor = (search->ptr += ch_len);
|
262
|
+
}
|
60
263
|
|
61
|
-
|
62
|
-
* Magic values subtracted from a buffer value during UTF8 conversion.
|
63
|
-
* This table contains as many values as there might be trailing bytes
|
64
|
-
* in a UTF-8 sequence.
|
65
|
-
*/
|
66
|
-
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
67
|
-
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
|
264
|
+
#ifdef HAVE_SIMD
|
68
265
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
* length = trailingBytesForUTF8[*source]+1;
|
74
|
-
* and the sequence is illegal right away if there aren't that many bytes
|
75
|
-
* available.
|
76
|
-
* If presented with a length > 4, this returns 0. The Unicode
|
77
|
-
* definition of UTF-8 goes up to 4-byte sequences.
|
78
|
-
*/
|
79
|
-
static unsigned char isLegalUTF8(const UTF8 *source, unsigned long length)
|
80
|
-
{
|
81
|
-
UTF8 a;
|
82
|
-
const UTF8 *srcptr = source+length;
|
83
|
-
switch (length) {
|
84
|
-
default: return 0;
|
85
|
-
/* Everything else falls through when "1"... */
|
86
|
-
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
|
87
|
-
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
|
88
|
-
case 2: if ((a = (*--srcptr)) > 0xBF) return 0;
|
89
|
-
|
90
|
-
switch (*source) {
|
91
|
-
/* no fall-through in this inner switch */
|
92
|
-
case 0xE0: if (a < 0xA0) return 0; break;
|
93
|
-
case 0xED: if (a > 0x9F) return 0; break;
|
94
|
-
case 0xF0: if (a < 0x90) return 0; break;
|
95
|
-
case 0xF4: if (a > 0x8F) return 0; break;
|
96
|
-
default: if (a < 0x80) return 0;
|
97
|
-
}
|
266
|
+
static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
|
267
|
+
{
|
268
|
+
// Flush the buffer so everything up until the last 'len' characters are unflushed.
|
269
|
+
search_flush(search);
|
98
270
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
271
|
+
FBuffer *buf = search->buffer;
|
272
|
+
fbuffer_inc_capa(buf, vec_len);
|
273
|
+
|
274
|
+
char *s = (buf->ptr + buf->len);
|
275
|
+
|
276
|
+
// Pad the buffer with dummy characters that won't need escaping.
|
277
|
+
// This seem wateful at first sight, but memset of vector length is very fast.
|
278
|
+
memset(s, 'X', vec_len);
|
279
|
+
|
280
|
+
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
|
281
|
+
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
|
282
|
+
MEMCPY(s, search->ptr, char, len);
|
283
|
+
|
284
|
+
return s;
|
103
285
|
}
|
104
286
|
|
105
|
-
|
106
|
-
|
287
|
+
#ifdef HAVE_SIMD_NEON
|
288
|
+
|
289
|
+
static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
107
290
|
{
|
108
|
-
|
291
|
+
uint64_t mask = search->matches_mask;
|
292
|
+
uint32_t index = trailing_zeros64(mask) >> 2;
|
109
293
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
294
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
295
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
296
|
+
// search->chunk_base + index >= search->ptr
|
297
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
298
|
+
// is one byte after the previous match then:
|
299
|
+
// search->chunk_base + index == search->ptr
|
300
|
+
search->ptr = search->chunk_base + index;
|
301
|
+
mask &= mask - 1;
|
302
|
+
search->matches_mask = mask;
|
303
|
+
search_flush(search);
|
304
|
+
return 1;
|
114
305
|
}
|
115
306
|
|
116
|
-
|
117
|
-
|
118
|
-
static void unicode_escape_to_buffer(FBuffer *buffer, char buf[6], UTF16
|
119
|
-
character)
|
307
|
+
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
308
|
+
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
120
309
|
{
|
121
|
-
|
122
|
-
|
310
|
+
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
311
|
+
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
312
|
+
return mask & 0x8888888888888888ull;
|
123
313
|
}
|
124
314
|
|
125
|
-
|
126
|
-
* and control characters are JSON escaped. */
|
127
|
-
static void convert_UTF8_to_JSON_ASCII(FBuffer *buffer, VALUE string, char script_safe)
|
315
|
+
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
|
128
316
|
{
|
129
|
-
|
130
|
-
|
131
|
-
|
317
|
+
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
318
|
+
|
319
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
320
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
321
|
+
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
322
|
+
|
323
|
+
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
324
|
+
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
132
325
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
326
|
+
return neon_match_mask(needs_escape);
|
327
|
+
}
|
328
|
+
|
329
|
+
static inline unsigned char search_escape_basic_neon(search_state *search)
|
330
|
+
{
|
331
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
332
|
+
// There are more matches if search->matches_mask > 0.
|
333
|
+
if (search->matches_mask > 0) {
|
334
|
+
return neon_next_match(search);
|
335
|
+
} else {
|
336
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
337
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
338
|
+
search->has_matches = false;
|
339
|
+
search->ptr = search->chunk_end;
|
139
340
|
}
|
140
|
-
|
141
|
-
|
142
|
-
|
341
|
+
}
|
342
|
+
|
343
|
+
/*
|
344
|
+
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
345
|
+
* need to be escaped.
|
346
|
+
*
|
347
|
+
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
348
|
+
*
|
349
|
+
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
350
|
+
* the vector insructions may work on larger vectors.
|
351
|
+
*
|
352
|
+
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
353
|
+
*
|
354
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
355
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
356
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
357
|
+
*
|
358
|
+
* Next we load the first chunk of the ptr:
|
359
|
+
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
360
|
+
*
|
361
|
+
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
362
|
+
* as no bytes are less than 32 (0x20):
|
363
|
+
* [0 0 0 0 0 0 0 0]
|
364
|
+
*
|
365
|
+
* Next, we check if any byte in chunk is equal to a backslash:
|
366
|
+
* [0 0 0 FF 0 0 0 0]
|
367
|
+
*
|
368
|
+
* Finally we check if any byte in chunk is equal to a double quote:
|
369
|
+
* [FF 0 0 0 0 0 0 0]
|
370
|
+
*
|
371
|
+
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
372
|
+
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
373
|
+
* This is the needs_escape vector and it is equal to:
|
374
|
+
* [FF 0 0 FF 0 0 0 0]
|
375
|
+
*
|
376
|
+
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
377
|
+
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
378
|
+
*
|
379
|
+
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
380
|
+
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
381
|
+
* have at least one byte that needs to be escaped.
|
382
|
+
*/
|
383
|
+
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
|
384
|
+
uint64_t mask = neon_rules_update(search->ptr);
|
385
|
+
|
386
|
+
if (!mask) {
|
387
|
+
search->ptr += sizeof(uint8x16_t);
|
388
|
+
continue;
|
143
389
|
}
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
390
|
+
search->matches_mask = mask;
|
391
|
+
search->has_matches = true;
|
392
|
+
search->chunk_base = search->ptr;
|
393
|
+
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
394
|
+
return neon_next_match(search);
|
395
|
+
}
|
396
|
+
|
397
|
+
// There are fewer than 16 bytes left.
|
398
|
+
unsigned long remaining = (search->end - search->ptr);
|
399
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
400
|
+
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
401
|
+
|
402
|
+
uint64_t mask = neon_rules_update(s);
|
403
|
+
|
404
|
+
if (!mask) {
|
405
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
406
|
+
// search->cursor to search->ptr.
|
407
|
+
fbuffer_consumed(search->buffer, remaining);
|
408
|
+
search->ptr = search->end;
|
409
|
+
search->cursor = search->end;
|
410
|
+
return 0;
|
154
411
|
}
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
412
|
+
|
413
|
+
search->matches_mask = mask;
|
414
|
+
search->has_matches = true;
|
415
|
+
search->chunk_end = search->end;
|
416
|
+
search->chunk_base = search->ptr;
|
417
|
+
return neon_next_match(search);
|
418
|
+
}
|
419
|
+
|
420
|
+
if (search->ptr < search->end) {
|
421
|
+
return search_escape_basic(search);
|
422
|
+
}
|
423
|
+
|
424
|
+
search_flush(search);
|
425
|
+
return 0;
|
426
|
+
}
|
427
|
+
#endif /* HAVE_SIMD_NEON */
|
428
|
+
|
429
|
+
#ifdef HAVE_SIMD_SSE2
|
430
|
+
|
431
|
+
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
|
432
|
+
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
433
|
+
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
434
|
+
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
435
|
+
|
436
|
+
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
437
|
+
{
|
438
|
+
int mask = search->matches_mask;
|
439
|
+
int index = trailing_zeros(mask);
|
440
|
+
|
441
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
442
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
443
|
+
// search->chunk_base + index >= search->ptr
|
444
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
445
|
+
// is one byte after the previous match then:
|
446
|
+
// search->chunk_base + index == search->ptr
|
447
|
+
search->ptr = search->chunk_base + index;
|
448
|
+
mask &= mask - 1;
|
449
|
+
search->matches_mask = mask;
|
450
|
+
search_flush(search);
|
451
|
+
return 1;
|
452
|
+
}
|
453
|
+
|
454
|
+
#if defined(__clang__) || defined(__GNUC__)
|
455
|
+
#define TARGET_SSE2 __attribute__((target("sse2")))
|
164
456
|
#else
|
165
|
-
|
457
|
+
#define TARGET_SSE2
|
166
458
|
#endif
|
459
|
+
|
460
|
+
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
|
461
|
+
{
|
462
|
+
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
463
|
+
|
464
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
465
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
466
|
+
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
467
|
+
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
468
|
+
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
469
|
+
return _mm_movemask_epi8(needs_escape);
|
470
|
+
}
|
471
|
+
|
472
|
+
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
473
|
+
{
|
474
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
475
|
+
// There are more matches if search->matches_mask > 0.
|
476
|
+
if (search->matches_mask > 0) {
|
477
|
+
return sse2_next_match(search);
|
478
|
+
} else {
|
479
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
480
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
481
|
+
search->has_matches = false;
|
482
|
+
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
483
|
+
search->ptr = search->end;
|
167
484
|
} else {
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
485
|
+
search->ptr = search->chunk_base + sizeof(__m128i);
|
486
|
+
}
|
487
|
+
}
|
488
|
+
}
|
489
|
+
|
490
|
+
while (search->ptr + sizeof(__m128i) <= search->end) {
|
491
|
+
int needs_escape_mask = sse2_update(search->ptr);
|
492
|
+
|
493
|
+
if (needs_escape_mask == 0) {
|
494
|
+
search->ptr += sizeof(__m128i);
|
495
|
+
continue;
|
496
|
+
}
|
497
|
+
|
498
|
+
search->has_matches = true;
|
499
|
+
search->matches_mask = needs_escape_mask;
|
500
|
+
search->chunk_base = search->ptr;
|
501
|
+
return sse2_next_match(search);
|
502
|
+
}
|
503
|
+
|
504
|
+
// There are fewer than 16 bytes left.
|
505
|
+
unsigned long remaining = (search->end - search->ptr);
|
506
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
507
|
+
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
508
|
+
|
509
|
+
int needs_escape_mask = sse2_update(s);
|
510
|
+
|
511
|
+
if (needs_escape_mask == 0) {
|
512
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
513
|
+
// search->cursor to search->ptr.
|
514
|
+
fbuffer_consumed(search->buffer, remaining);
|
515
|
+
search->ptr = search->end;
|
516
|
+
search->cursor = search->end;
|
517
|
+
return 0;
|
518
|
+
}
|
519
|
+
|
520
|
+
search->has_matches = true;
|
521
|
+
search->matches_mask = needs_escape_mask;
|
522
|
+
search->chunk_base = search->ptr;
|
523
|
+
return sse2_next_match(search);
|
524
|
+
}
|
525
|
+
|
526
|
+
if (search->ptr < search->end) {
|
527
|
+
return search_escape_basic(search);
|
528
|
+
}
|
529
|
+
|
530
|
+
search_flush(search);
|
531
|
+
return 0;
|
532
|
+
}
|
533
|
+
|
534
|
+
#endif /* HAVE_SIMD_SSE2 */
|
535
|
+
|
536
|
+
#endif /* HAVE_SIMD */
|
537
|
+
|
538
|
+
static const unsigned char script_safe_escape_table[256] = {
|
539
|
+
// ASCII Control Characters
|
540
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
541
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
542
|
+
// ASCII Characters
|
543
|
+
0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, // '"' and '/'
|
544
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
545
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
546
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
|
547
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
548
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
549
|
+
// Continuation byte
|
550
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
551
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
552
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
553
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
554
|
+
// First byte of a 2-byte code point
|
555
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
556
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
557
|
+
// First byte of a 3-byte code point
|
558
|
+
3, 3,11, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xE2 is the start of \u2028 and \u2029
|
559
|
+
//First byte of a 4+ byte code point
|
560
|
+
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
|
561
|
+
};
|
562
|
+
|
563
|
+
static inline unsigned char search_script_safe_escape(search_state *search)
|
564
|
+
{
|
565
|
+
while (search->ptr < search->end) {
|
566
|
+
unsigned char ch = (unsigned char)*search->ptr;
|
567
|
+
unsigned char ch_len = script_safe_escape_table[ch];
|
568
|
+
|
569
|
+
if (RB_UNLIKELY(ch_len)) {
|
570
|
+
if (ch_len & ESCAPE_MASK) {
|
571
|
+
if (RB_UNLIKELY(ch_len == 11)) {
|
572
|
+
const unsigned char *uptr = (const unsigned char *)search->ptr;
|
573
|
+
if (!(uptr[1] == 0x80 && (uptr[2] >> 1) == 0x54)) {
|
574
|
+
search->ptr += 3;
|
575
|
+
continue;
|
206
576
|
}
|
207
577
|
}
|
578
|
+
search_flush(search);
|
579
|
+
return ch_len & CHAR_LENGTH_MASK;
|
580
|
+
} else {
|
581
|
+
search->ptr += ch_len;
|
208
582
|
}
|
209
|
-
} else if (ch > UNI_MAX_UTF16) {
|
210
|
-
#if UNI_STRICT_CONVERSION
|
211
|
-
source -= (extraBytesToRead+1); /* return to the start */
|
212
|
-
rb_raise(rb_path2class("JSON::GeneratorError"),
|
213
|
-
"source sequence is illegal/malformed utf8");
|
214
|
-
#else
|
215
|
-
unicode_escape_to_buffer(buffer, buf, UNI_REPLACEMENT_CHAR);
|
216
|
-
#endif
|
217
583
|
} else {
|
218
|
-
|
219
|
-
ch -= halfBase;
|
220
|
-
unicode_escape_to_buffer(buffer, buf, (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START));
|
221
|
-
unicode_escape_to_buffer(buffer, buf, (UTF16)((ch & halfMask) + UNI_SUR_LOW_START));
|
584
|
+
search->ptr++;
|
222
585
|
}
|
223
586
|
}
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
587
|
+
search_flush(search);
|
588
|
+
return 0;
|
589
|
+
}
|
590
|
+
|
591
|
+
static void convert_UTF8_to_script_safe_JSON(search_state *search)
|
592
|
+
{
|
593
|
+
unsigned char ch_len;
|
594
|
+
while ((ch_len = search_script_safe_escape(search))) {
|
595
|
+
escape_UTF8_char(search, ch_len);
|
596
|
+
}
|
597
|
+
}
|
598
|
+
|
599
|
+
static const unsigned char ascii_only_escape_table[256] = {
|
600
|
+
// ASCII Control Characters
|
601
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
602
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
603
|
+
// ASCII Characters
|
604
|
+
0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '"'
|
605
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
606
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
607
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, // '\\'
|
608
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
609
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
610
|
+
// Continuation byte
|
611
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
612
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
613
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
614
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
615
|
+
// First byte of a 2-byte code point
|
616
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
617
|
+
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
618
|
+
// First byte of a 3-byte code point
|
619
|
+
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
620
|
+
//First byte of a 4+ byte code point
|
621
|
+
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 9, 9,
|
622
|
+
};
|
623
|
+
|
624
|
+
static inline unsigned char search_ascii_only_escape(search_state *search, const unsigned char escape_table[256])
|
625
|
+
{
|
626
|
+
while (search->ptr < search->end) {
|
627
|
+
unsigned char ch = (unsigned char)*search->ptr;
|
628
|
+
unsigned char ch_len = escape_table[ch];
|
629
|
+
|
630
|
+
if (RB_UNLIKELY(ch_len)) {
|
631
|
+
search_flush(search);
|
632
|
+
return ch_len & CHAR_LENGTH_MASK;
|
633
|
+
} else {
|
634
|
+
search->ptr++;
|
635
|
+
}
|
636
|
+
}
|
637
|
+
search_flush(search);
|
638
|
+
return 0;
|
639
|
+
}
|
640
|
+
|
641
|
+
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
|
642
|
+
const unsigned char ch = (unsigned char)*search->ptr;
|
643
|
+
switch (ch_len) {
|
644
|
+
case 1: {
|
645
|
+
switch (ch) {
|
646
|
+
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
647
|
+
case '\\': fbuffer_append(search->buffer, "\\\\", 2); break;
|
648
|
+
case '/': fbuffer_append(search->buffer, "\\/", 2); break;
|
649
|
+
case '\b': fbuffer_append(search->buffer, "\\b", 2); break;
|
650
|
+
case '\f': fbuffer_append(search->buffer, "\\f", 2); break;
|
651
|
+
case '\n': fbuffer_append(search->buffer, "\\n", 2); break;
|
652
|
+
case '\r': fbuffer_append(search->buffer, "\\r", 2); break;
|
653
|
+
case '\t': fbuffer_append(search->buffer, "\\t", 2); break;
|
654
|
+
default: {
|
655
|
+
const char *hexdig = "0123456789abcdef";
|
656
|
+
char scratch[6] = { '\\', 'u', '0', '0', 0, 0 };
|
657
|
+
scratch[4] = hexdig[(ch >> 4) & 0xf];
|
658
|
+
scratch[5] = hexdig[ch & 0xf];
|
659
|
+
fbuffer_append(search->buffer, scratch, 6);
|
270
660
|
break;
|
661
|
+
}
|
271
662
|
}
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
663
|
+
break;
|
664
|
+
}
|
665
|
+
default: {
|
666
|
+
const char *hexdig = "0123456789abcdef";
|
667
|
+
char scratch[12] = { '\\', 'u', 0, 0, 0, 0, '\\', 'u' };
|
668
|
+
|
669
|
+
uint32_t wchar = 0;
|
670
|
+
|
671
|
+
switch(ch_len) {
|
672
|
+
case 2:
|
673
|
+
wchar = ch & 0x1F;
|
277
674
|
break;
|
278
|
-
case
|
279
|
-
|
280
|
-
escape_len = 2;
|
675
|
+
case 3:
|
676
|
+
wchar = ch & 0x0F;
|
281
677
|
break;
|
282
|
-
case
|
283
|
-
|
284
|
-
escape = "\\/";
|
285
|
-
escape_len = 2;
|
286
|
-
break;
|
287
|
-
}
|
288
|
-
default:
|
289
|
-
{
|
290
|
-
unsigned short clen = 1;
|
291
|
-
if (!ascii_only) {
|
292
|
-
clen += trailingBytesForUTF8[c];
|
293
|
-
if (end + clen > len) {
|
294
|
-
rb_raise(rb_path2class("JSON::GeneratorError"),
|
295
|
-
"partial character in source, but hit end");
|
296
|
-
}
|
297
|
-
|
298
|
-
if (script_safe && c == 0xE2) {
|
299
|
-
unsigned char c2 = (unsigned char) *(p+1);
|
300
|
-
unsigned char c3 = (unsigned char) *(p+2);
|
301
|
-
if (c2 == 0x80 && (c3 == 0xA8 || c3 == 0xA9)) {
|
302
|
-
fbuffer_append(buffer, ptr + start, end - start);
|
303
|
-
start = end = (end + clen);
|
304
|
-
if (c3 == 0xA8) {
|
305
|
-
fbuffer_append(buffer, "\\u2028", 6);
|
306
|
-
} else {
|
307
|
-
fbuffer_append(buffer, "\\u2029", 6);
|
308
|
-
}
|
309
|
-
continue;
|
310
|
-
}
|
311
|
-
}
|
312
|
-
|
313
|
-
if (!isLegalUTF8((UTF8 *) p, clen)) {
|
314
|
-
rb_raise(rb_path2class("JSON::GeneratorError"),
|
315
|
-
"source sequence is illegal/malformed utf-8");
|
316
|
-
}
|
317
|
-
}
|
318
|
-
end += clen;
|
319
|
-
}
|
320
|
-
continue;
|
678
|
+
case 4:
|
679
|
+
wchar = ch & 0x07;
|
321
680
|
break;
|
322
681
|
}
|
682
|
+
|
683
|
+
for (short i = 1; i < ch_len; i++) {
|
684
|
+
wchar = (wchar << 6) | (search->ptr[i] & 0x3F);
|
685
|
+
}
|
686
|
+
|
687
|
+
if (wchar <= 0xFFFF) {
|
688
|
+
scratch[2] = hexdig[wchar >> 12];
|
689
|
+
scratch[3] = hexdig[(wchar >> 8) & 0xf];
|
690
|
+
scratch[4] = hexdig[(wchar >> 4) & 0xf];
|
691
|
+
scratch[5] = hexdig[wchar & 0xf];
|
692
|
+
fbuffer_append(search->buffer, scratch, 6);
|
693
|
+
} else {
|
694
|
+
uint16_t hi, lo;
|
695
|
+
wchar -= 0x10000;
|
696
|
+
hi = 0xD800 + (uint16_t)(wchar >> 10);
|
697
|
+
lo = 0xDC00 + (uint16_t)(wchar & 0x3FF);
|
698
|
+
|
699
|
+
scratch[2] = hexdig[hi >> 12];
|
700
|
+
scratch[3] = hexdig[(hi >> 8) & 0xf];
|
701
|
+
scratch[4] = hexdig[(hi >> 4) & 0xf];
|
702
|
+
scratch[5] = hexdig[hi & 0xf];
|
703
|
+
|
704
|
+
scratch[8] = hexdig[lo >> 12];
|
705
|
+
scratch[9] = hexdig[(lo >> 8) & 0xf];
|
706
|
+
scratch[10] = hexdig[(lo >> 4) & 0xf];
|
707
|
+
scratch[11] = hexdig[lo & 0xf];
|
708
|
+
|
709
|
+
fbuffer_append(search->buffer, scratch, 12);
|
710
|
+
}
|
711
|
+
|
712
|
+
break;
|
323
713
|
}
|
324
|
-
fbuffer_append(buffer, ptr + start, end - start);
|
325
|
-
fbuffer_append(buffer, escape, escape_len);
|
326
|
-
start = ++end;
|
327
|
-
escape = NULL;
|
328
714
|
}
|
329
|
-
|
715
|
+
search->cursor = (search->ptr += ch_len);
|
330
716
|
}
|
331
717
|
|
332
|
-
static
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
718
|
+
static void convert_UTF8_to_ASCII_only_JSON(search_state *search, const unsigned char escape_table[256])
|
719
|
+
{
|
720
|
+
unsigned char ch_len;
|
721
|
+
while ((ch_len = search_ascii_only_escape(search, escape_table))) {
|
722
|
+
full_escape_UTF8_char(search, ch_len);
|
723
|
+
}
|
338
724
|
}
|
339
725
|
|
340
726
|
/*
|
@@ -429,7 +815,9 @@ static char *fstrndup(const char *ptr, unsigned long len) {
|
|
429
815
|
*/
|
430
816
|
static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
|
431
817
|
{
|
432
|
-
|
818
|
+
rb_check_arity(argc, 0, 1);
|
819
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
820
|
+
return cState_partial_generate(Vstate, self, generate_json_object, Qfalse);
|
433
821
|
}
|
434
822
|
|
435
823
|
/*
|
@@ -441,7 +829,9 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
|
|
441
829
|
* produced JSON string output further.
|
442
830
|
*/
|
443
831
|
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
|
444
|
-
|
832
|
+
rb_check_arity(argc, 0, 1);
|
833
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
834
|
+
return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
|
445
835
|
}
|
446
836
|
|
447
837
|
#ifdef RUBY_INTEGER_UNIFICATION
|
@@ -452,7 +842,9 @@ static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
|
|
452
842
|
*/
|
453
843
|
static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
|
454
844
|
{
|
455
|
-
|
845
|
+
rb_check_arity(argc, 0, 1);
|
846
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
847
|
+
return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse);
|
456
848
|
}
|
457
849
|
|
458
850
|
#else
|
@@ -463,7 +855,9 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self)
|
|
463
855
|
*/
|
464
856
|
static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
|
465
857
|
{
|
466
|
-
|
858
|
+
rb_check_arity(argc, 0, 1);
|
859
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
860
|
+
return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse);
|
467
861
|
}
|
468
862
|
|
469
863
|
/*
|
@@ -473,7 +867,9 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self)
|
|
473
867
|
*/
|
474
868
|
static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
|
475
869
|
{
|
476
|
-
|
870
|
+
rb_check_arity(argc, 0, 1);
|
871
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
872
|
+
return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse);
|
477
873
|
}
|
478
874
|
#endif
|
479
875
|
|
@@ -484,7 +880,9 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self)
|
|
484
880
|
*/
|
485
881
|
static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
|
486
882
|
{
|
487
|
-
|
883
|
+
rb_check_arity(argc, 0, 1);
|
884
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
885
|
+
return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
|
488
886
|
}
|
489
887
|
|
490
888
|
/*
|
@@ -507,7 +905,9 @@ static VALUE mString_included_s(VALUE self, VALUE modul) {
|
|
507
905
|
*/
|
508
906
|
static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
|
509
907
|
{
|
510
|
-
|
908
|
+
rb_check_arity(argc, 0, 1);
|
909
|
+
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
910
|
+
return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
|
511
911
|
}
|
512
912
|
|
513
913
|
/*
|
@@ -524,7 +924,7 @@ static VALUE mString_to_json_raw_object(VALUE self)
|
|
524
924
|
VALUE result = rb_hash_new();
|
525
925
|
rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
|
526
926
|
ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
|
527
|
-
rb_hash_aset(result,
|
927
|
+
rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
|
528
928
|
return result;
|
529
929
|
}
|
530
930
|
|
@@ -562,7 +962,8 @@ static VALUE mString_Extend_json_create(VALUE self, VALUE o)
|
|
562
962
|
*/
|
563
963
|
static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
|
564
964
|
{
|
565
|
-
|
965
|
+
rb_check_arity(argc, 0, 1);
|
966
|
+
return rb_utf8_str_new("true", 4);
|
566
967
|
}
|
567
968
|
|
568
969
|
/*
|
@@ -572,7 +973,8 @@ static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self)
|
|
572
973
|
*/
|
573
974
|
static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
|
574
975
|
{
|
575
|
-
|
976
|
+
rb_check_arity(argc, 0, 1);
|
977
|
+
return rb_utf8_str_new("false", 5);
|
576
978
|
}
|
577
979
|
|
578
980
|
/*
|
@@ -582,7 +984,8 @@ static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self)
|
|
582
984
|
*/
|
583
985
|
static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self)
|
584
986
|
{
|
585
|
-
|
987
|
+
rb_check_arity(argc, 0, 1);
|
988
|
+
return rb_utf8_str_new("null", 4);
|
586
989
|
}
|
587
990
|
|
588
991
|
/*
|
@@ -599,36 +1002,40 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self)
|
|
599
1002
|
rb_scan_args(argc, argv, "01", &state);
|
600
1003
|
Check_Type(string, T_STRING);
|
601
1004
|
state = cState_from_state_s(cState, state);
|
602
|
-
return cState_partial_generate(state, string);
|
1005
|
+
return cState_partial_generate(state, string, generate_json_string, Qfalse);
|
1006
|
+
}
|
1007
|
+
|
1008
|
+
static void State_mark(void *ptr)
|
1009
|
+
{
|
1010
|
+
JSON_Generator_State *state = ptr;
|
1011
|
+
rb_gc_mark_movable(state->indent);
|
1012
|
+
rb_gc_mark_movable(state->space);
|
1013
|
+
rb_gc_mark_movable(state->space_before);
|
1014
|
+
rb_gc_mark_movable(state->object_nl);
|
1015
|
+
rb_gc_mark_movable(state->array_nl);
|
1016
|
+
rb_gc_mark_movable(state->as_json);
|
1017
|
+
}
|
1018
|
+
|
1019
|
+
static void State_compact(void *ptr)
|
1020
|
+
{
|
1021
|
+
JSON_Generator_State *state = ptr;
|
1022
|
+
state->indent = rb_gc_location(state->indent);
|
1023
|
+
state->space = rb_gc_location(state->space);
|
1024
|
+
state->space_before = rb_gc_location(state->space_before);
|
1025
|
+
state->object_nl = rb_gc_location(state->object_nl);
|
1026
|
+
state->array_nl = rb_gc_location(state->array_nl);
|
1027
|
+
state->as_json = rb_gc_location(state->as_json);
|
603
1028
|
}
|
604
1029
|
|
605
1030
|
static void State_free(void *ptr)
|
606
1031
|
{
|
607
1032
|
JSON_Generator_State *state = ptr;
|
608
|
-
if (state->indent) ruby_xfree(state->indent);
|
609
|
-
if (state->space) ruby_xfree(state->space);
|
610
|
-
if (state->space_before) ruby_xfree(state->space_before);
|
611
|
-
if (state->object_nl) ruby_xfree(state->object_nl);
|
612
|
-
if (state->array_nl) ruby_xfree(state->array_nl);
|
613
|
-
if (state->array_delim) fbuffer_free(state->array_delim);
|
614
|
-
if (state->object_delim) fbuffer_free(state->object_delim);
|
615
|
-
if (state->object_delim2) fbuffer_free(state->object_delim2);
|
616
1033
|
ruby_xfree(state);
|
617
1034
|
}
|
618
1035
|
|
619
1036
|
static size_t State_memsize(const void *ptr)
|
620
1037
|
{
|
621
|
-
|
622
|
-
size_t size = sizeof(*state);
|
623
|
-
if (state->indent) size += state->indent_len + 1;
|
624
|
-
if (state->space) size += state->space_len + 1;
|
625
|
-
if (state->space_before) size += state->space_before_len + 1;
|
626
|
-
if (state->object_nl) size += state->object_nl_len + 1;
|
627
|
-
if (state->array_nl) size += state->array_nl_len + 1;
|
628
|
-
if (state->array_delim) size += FBUFFER_CAPA(state->array_delim);
|
629
|
-
if (state->object_delim) size += FBUFFER_CAPA(state->object_delim);
|
630
|
-
if (state->object_delim2) size += FBUFFER_CAPA(state->object_delim2);
|
631
|
-
return size;
|
1038
|
+
return sizeof(JSON_Generator_State);
|
632
1039
|
}
|
633
1040
|
|
634
1041
|
#ifndef HAVE_RB_EXT_RACTOR_SAFE
|
@@ -636,474 +1043,459 @@ static size_t State_memsize(const void *ptr)
|
|
636
1043
|
# define RUBY_TYPED_FROZEN_SHAREABLE 0
|
637
1044
|
#endif
|
638
1045
|
|
639
|
-
#ifdef NEW_TYPEDDATA_WRAPPER
|
640
1046
|
static const rb_data_type_t JSON_Generator_State_type = {
|
641
1047
|
"JSON/Generator/State",
|
642
|
-
{
|
643
|
-
|
1048
|
+
{
|
1049
|
+
.dmark = State_mark,
|
1050
|
+
.dfree = State_free,
|
1051
|
+
.dsize = State_memsize,
|
1052
|
+
.dcompact = State_compact,
|
1053
|
+
},
|
644
1054
|
0, 0,
|
645
|
-
RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
|
646
|
-
#endif
|
1055
|
+
RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
|
647
1056
|
};
|
648
|
-
#endif
|
649
1057
|
|
650
|
-
static
|
1058
|
+
static void state_init(JSON_Generator_State *state)
|
651
1059
|
{
|
652
|
-
JSON_Generator_State *state;
|
653
|
-
return TypedData_Make_Struct(klass, JSON_Generator_State,
|
654
|
-
&JSON_Generator_State_type, state);
|
655
|
-
}
|
656
|
-
|
657
|
-
/*
|
658
|
-
* call-seq: configure(opts)
|
659
|
-
*
|
660
|
-
* Configure this State instance with the Hash _opts_, and return
|
661
|
-
* itself.
|
662
|
-
*/
|
663
|
-
static VALUE cState_configure(VALUE self, VALUE opts)
|
664
|
-
{
|
665
|
-
VALUE tmp;
|
666
|
-
GET_STATE(self);
|
667
|
-
tmp = rb_check_convert_type(opts, T_HASH, "Hash", "to_hash");
|
668
|
-
if (NIL_P(tmp)) tmp = rb_convert_type(opts, T_HASH, "Hash", "to_h");
|
669
|
-
opts = tmp;
|
670
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_indent));
|
671
|
-
if (RTEST(tmp)) {
|
672
|
-
unsigned long len;
|
673
|
-
Check_Type(tmp, T_STRING);
|
674
|
-
len = RSTRING_LEN(tmp);
|
675
|
-
state->indent = fstrndup(RSTRING_PTR(tmp), len + 1);
|
676
|
-
state->indent_len = len;
|
677
|
-
}
|
678
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_space));
|
679
|
-
if (RTEST(tmp)) {
|
680
|
-
unsigned long len;
|
681
|
-
Check_Type(tmp, T_STRING);
|
682
|
-
len = RSTRING_LEN(tmp);
|
683
|
-
state->space = fstrndup(RSTRING_PTR(tmp), len + 1);
|
684
|
-
state->space_len = len;
|
685
|
-
}
|
686
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_space_before));
|
687
|
-
if (RTEST(tmp)) {
|
688
|
-
unsigned long len;
|
689
|
-
Check_Type(tmp, T_STRING);
|
690
|
-
len = RSTRING_LEN(tmp);
|
691
|
-
state->space_before = fstrndup(RSTRING_PTR(tmp), len + 1);
|
692
|
-
state->space_before_len = len;
|
693
|
-
}
|
694
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_array_nl));
|
695
|
-
if (RTEST(tmp)) {
|
696
|
-
unsigned long len;
|
697
|
-
Check_Type(tmp, T_STRING);
|
698
|
-
len = RSTRING_LEN(tmp);
|
699
|
-
state->array_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
|
700
|
-
state->array_nl_len = len;
|
701
|
-
}
|
702
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_object_nl));
|
703
|
-
if (RTEST(tmp)) {
|
704
|
-
unsigned long len;
|
705
|
-
Check_Type(tmp, T_STRING);
|
706
|
-
len = RSTRING_LEN(tmp);
|
707
|
-
state->object_nl = fstrndup(RSTRING_PTR(tmp), len + 1);
|
708
|
-
state->object_nl_len = len;
|
709
|
-
}
|
710
|
-
tmp = ID2SYM(i_max_nesting);
|
711
1060
|
state->max_nesting = 100;
|
712
|
-
|
713
|
-
VALUE max_nesting = rb_hash_aref(opts, tmp);
|
714
|
-
if (RTEST(max_nesting)) {
|
715
|
-
Check_Type(max_nesting, T_FIXNUM);
|
716
|
-
state->max_nesting = FIX2LONG(max_nesting);
|
717
|
-
} else {
|
718
|
-
state->max_nesting = 0;
|
719
|
-
}
|
720
|
-
}
|
721
|
-
tmp = ID2SYM(i_depth);
|
722
|
-
state->depth = 0;
|
723
|
-
if (option_given_p(opts, tmp)) {
|
724
|
-
VALUE depth = rb_hash_aref(opts, tmp);
|
725
|
-
if (RTEST(depth)) {
|
726
|
-
Check_Type(depth, T_FIXNUM);
|
727
|
-
state->depth = FIX2LONG(depth);
|
728
|
-
} else {
|
729
|
-
state->depth = 0;
|
730
|
-
}
|
731
|
-
}
|
732
|
-
tmp = ID2SYM(i_buffer_initial_length);
|
733
|
-
if (option_given_p(opts, tmp)) {
|
734
|
-
VALUE buffer_initial_length = rb_hash_aref(opts, tmp);
|
735
|
-
if (RTEST(buffer_initial_length)) {
|
736
|
-
long initial_length;
|
737
|
-
Check_Type(buffer_initial_length, T_FIXNUM);
|
738
|
-
initial_length = FIX2LONG(buffer_initial_length);
|
739
|
-
if (initial_length > 0) state->buffer_initial_length = initial_length;
|
740
|
-
}
|
741
|
-
}
|
742
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_allow_nan));
|
743
|
-
state->allow_nan = RTEST(tmp);
|
744
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_ascii_only));
|
745
|
-
state->ascii_only = RTEST(tmp);
|
746
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_script_safe));
|
747
|
-
state->script_safe = RTEST(tmp);
|
748
|
-
if (!state->script_safe) {
|
749
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_escape_slash));
|
750
|
-
state->script_safe = RTEST(tmp);
|
751
|
-
}
|
752
|
-
tmp = rb_hash_aref(opts, ID2SYM(i_strict));
|
753
|
-
state->strict = RTEST(tmp);
|
754
|
-
return self;
|
1061
|
+
state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
|
755
1062
|
}
|
756
1063
|
|
757
|
-
static
|
1064
|
+
static VALUE cState_s_allocate(VALUE klass)
|
758
1065
|
{
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
long key_len = RSTRING_LEN(key);
|
764
|
-
VALUE value = rb_iv_get(state, StringValueCStr(key));
|
765
|
-
rb_hash_aset(hash, rb_str_intern(rb_str_substr(key, 1, key_len - 1)), value);
|
766
|
-
}
|
1066
|
+
JSON_Generator_State *state;
|
1067
|
+
VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state);
|
1068
|
+
state_init(state);
|
1069
|
+
return obj;
|
767
1070
|
}
|
768
1071
|
|
769
|
-
|
770
|
-
* call-seq: to_h
|
771
|
-
*
|
772
|
-
* Returns the configuration instance variables as a hash, that can be
|
773
|
-
* passed to the configure method.
|
774
|
-
*/
|
775
|
-
static VALUE cState_to_h(VALUE self)
|
1072
|
+
static void vstate_spill(struct generate_json_data *data)
|
776
1073
|
{
|
777
|
-
VALUE
|
778
|
-
GET_STATE(
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
rb_hash_aset(result, ID2SYM(i_script_safe), state->script_safe ? Qtrue : Qfalse);
|
789
|
-
rb_hash_aset(result, ID2SYM(i_strict), state->strict ? Qtrue : Qfalse);
|
790
|
-
rb_hash_aset(result, ID2SYM(i_depth), LONG2FIX(state->depth));
|
791
|
-
rb_hash_aset(result, ID2SYM(i_buffer_initial_length), LONG2FIX(state->buffer_initial_length));
|
792
|
-
return result;
|
1074
|
+
VALUE vstate = cState_s_allocate(cState);
|
1075
|
+
GET_STATE(vstate);
|
1076
|
+
MEMCPY(state, data->state, JSON_Generator_State, 1);
|
1077
|
+
data->state = state;
|
1078
|
+
data->vstate = vstate;
|
1079
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->indent);
|
1080
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->space);
|
1081
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->space_before);
|
1082
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl);
|
1083
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl);
|
1084
|
+
RB_OBJ_WRITTEN(vstate, Qundef, state->as_json);
|
793
1085
|
}
|
794
1086
|
|
795
|
-
|
796
|
-
* call-seq: [](name)
|
797
|
-
*
|
798
|
-
* Returns the value returned by method +name+.
|
799
|
-
*/
|
800
|
-
static VALUE cState_aref(VALUE self, VALUE name)
|
1087
|
+
static inline VALUE vstate_get(struct generate_json_data *data)
|
801
1088
|
{
|
802
|
-
|
803
|
-
|
804
|
-
return rb_funcall(self, i_send, 1, name);
|
805
|
-
} else {
|
806
|
-
return rb_attr_get(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)));
|
1089
|
+
if (RB_UNLIKELY(!data->vstate)) {
|
1090
|
+
vstate_spill(data);
|
807
1091
|
}
|
1092
|
+
return data->vstate;
|
808
1093
|
}
|
809
1094
|
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
static VALUE
|
1095
|
+
struct hash_foreach_arg {
|
1096
|
+
struct generate_json_data *data;
|
1097
|
+
int iter;
|
1098
|
+
};
|
1099
|
+
|
1100
|
+
static VALUE
|
1101
|
+
convert_string_subclass(VALUE key)
|
816
1102
|
{
|
817
|
-
VALUE
|
1103
|
+
VALUE key_to_s = rb_funcall(key, i_to_s, 0);
|
818
1104
|
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
rb_ivar_set(self, rb_intern_str(rb_str_concat(rb_str_new2("@"), name)), value);
|
1105
|
+
if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
|
1106
|
+
VALUE cname = rb_obj_class(key);
|
1107
|
+
rb_raise(rb_eTypeError,
|
1108
|
+
"can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
|
1109
|
+
cname, "String", cname, "to_s", rb_obj_class(key_to_s));
|
825
1110
|
}
|
826
|
-
return Qnil;
|
827
|
-
}
|
828
1111
|
|
829
|
-
|
830
|
-
|
831
|
-
JSON_Generator_State *state;
|
832
|
-
VALUE Vstate;
|
833
|
-
int iter;
|
834
|
-
};
|
1112
|
+
return key_to_s;
|
1113
|
+
}
|
835
1114
|
|
836
1115
|
static int
|
837
1116
|
json_object_i(VALUE key, VALUE val, VALUE _arg)
|
838
1117
|
{
|
839
1118
|
struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg;
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
long object_nl_len = state->object_nl_len;
|
846
|
-
char *indent = state->indent;
|
847
|
-
long indent_len = state->indent_len;
|
848
|
-
char *delim = FBUFFER_PTR(state->object_delim);
|
849
|
-
long delim_len = FBUFFER_LEN(state->object_delim);
|
850
|
-
char *delim2 = FBUFFER_PTR(state->object_delim2);
|
851
|
-
long delim2_len = FBUFFER_LEN(state->object_delim2);
|
1119
|
+
struct generate_json_data *data = arg->data;
|
1120
|
+
|
1121
|
+
FBuffer *buffer = data->buffer;
|
1122
|
+
JSON_Generator_State *state = data->state;
|
1123
|
+
|
852
1124
|
long depth = state->depth;
|
853
1125
|
int j;
|
854
|
-
VALUE klass, key_to_s;
|
855
1126
|
|
856
|
-
if (arg->iter > 0)
|
857
|
-
if (object_nl) {
|
858
|
-
|
1127
|
+
if (arg->iter > 0) fbuffer_append_char(buffer, ',');
|
1128
|
+
if (RB_UNLIKELY(data->state->object_nl)) {
|
1129
|
+
fbuffer_append_str(buffer, data->state->object_nl);
|
859
1130
|
}
|
860
|
-
if (indent) {
|
1131
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
861
1132
|
for (j = 0; j < depth; j++) {
|
862
|
-
|
1133
|
+
fbuffer_append_str(buffer, data->state->indent);
|
863
1134
|
}
|
864
1135
|
}
|
865
1136
|
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
1137
|
+
VALUE key_to_s;
|
1138
|
+
switch(rb_type(key)) {
|
1139
|
+
case T_STRING:
|
1140
|
+
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
1141
|
+
key_to_s = key;
|
1142
|
+
} else {
|
1143
|
+
key_to_s = convert_string_subclass(key);
|
1144
|
+
}
|
1145
|
+
break;
|
1146
|
+
case T_SYMBOL:
|
1147
|
+
key_to_s = rb_sym2str(key);
|
1148
|
+
break;
|
1149
|
+
default:
|
1150
|
+
key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
|
1151
|
+
break;
|
1152
|
+
}
|
1153
|
+
|
1154
|
+
if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
|
1155
|
+
generate_json_string(buffer, data, key_to_s);
|
871
1156
|
} else {
|
872
|
-
|
1157
|
+
generate_json(buffer, data, key_to_s);
|
873
1158
|
}
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
generate_json(buffer,
|
1159
|
+
if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before);
|
1160
|
+
fbuffer_append_char(buffer, ':');
|
1161
|
+
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
|
1162
|
+
generate_json(buffer, data, val);
|
878
1163
|
|
879
1164
|
arg->iter++;
|
880
1165
|
return ST_CONTINUE;
|
881
1166
|
}
|
882
1167
|
|
883
|
-
static
|
1168
|
+
static inline long increase_depth(struct generate_json_data *data)
|
884
1169
|
{
|
885
|
-
|
886
|
-
long object_nl_len = state->object_nl_len;
|
887
|
-
char *indent = state->indent;
|
888
|
-
long indent_len = state->indent_len;
|
889
|
-
long max_nesting = state->max_nesting;
|
1170
|
+
JSON_Generator_State *state = data->state;
|
890
1171
|
long depth = ++state->depth;
|
1172
|
+
if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
|
1173
|
+
rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
|
1174
|
+
}
|
1175
|
+
return depth;
|
1176
|
+
}
|
1177
|
+
|
1178
|
+
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1179
|
+
{
|
891
1180
|
int j;
|
892
|
-
|
1181
|
+
long depth = increase_depth(data);
|
893
1182
|
|
894
|
-
if (
|
895
|
-
|
1183
|
+
if (RHASH_SIZE(obj) == 0) {
|
1184
|
+
fbuffer_append(buffer, "{}", 2);
|
1185
|
+
--data->state->depth;
|
1186
|
+
return;
|
896
1187
|
}
|
1188
|
+
|
897
1189
|
fbuffer_append_char(buffer, '{');
|
898
1190
|
|
899
|
-
arg
|
900
|
-
|
901
|
-
|
902
|
-
|
1191
|
+
struct hash_foreach_arg arg = {
|
1192
|
+
.data = data,
|
1193
|
+
.iter = 0,
|
1194
|
+
};
|
903
1195
|
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
|
904
1196
|
|
905
|
-
depth = --state->depth;
|
906
|
-
if (object_nl) {
|
907
|
-
|
908
|
-
if (indent) {
|
1197
|
+
depth = --data->state->depth;
|
1198
|
+
if (RB_UNLIKELY(data->state->object_nl)) {
|
1199
|
+
fbuffer_append_str(buffer, data->state->object_nl);
|
1200
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
909
1201
|
for (j = 0; j < depth; j++) {
|
910
|
-
|
1202
|
+
fbuffer_append_str(buffer, data->state->indent);
|
911
1203
|
}
|
912
1204
|
}
|
913
1205
|
}
|
914
1206
|
fbuffer_append_char(buffer, '}');
|
915
1207
|
}
|
916
1208
|
|
917
|
-
static void generate_json_array(FBuffer *buffer,
|
1209
|
+
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
918
1210
|
{
|
919
|
-
char *array_nl = state->array_nl;
|
920
|
-
long array_nl_len = state->array_nl_len;
|
921
|
-
char *indent = state->indent;
|
922
|
-
long indent_len = state->indent_len;
|
923
|
-
long max_nesting = state->max_nesting;
|
924
|
-
char *delim = FBUFFER_PTR(state->array_delim);
|
925
|
-
long delim_len = FBUFFER_LEN(state->array_delim);
|
926
|
-
long depth = ++state->depth;
|
927
1211
|
int i, j;
|
928
|
-
|
929
|
-
|
1212
|
+
long depth = increase_depth(data);
|
1213
|
+
|
1214
|
+
if (RARRAY_LEN(obj) == 0) {
|
1215
|
+
fbuffer_append(buffer, "[]", 2);
|
1216
|
+
--data->state->depth;
|
1217
|
+
return;
|
930
1218
|
}
|
1219
|
+
|
931
1220
|
fbuffer_append_char(buffer, '[');
|
932
|
-
if (array_nl)
|
1221
|
+
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
933
1222
|
for(i = 0; i < RARRAY_LEN(obj); i++) {
|
934
|
-
if (i > 0)
|
935
|
-
|
1223
|
+
if (i > 0) {
|
1224
|
+
fbuffer_append_char(buffer, ',');
|
1225
|
+
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
1226
|
+
}
|
1227
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
936
1228
|
for (j = 0; j < depth; j++) {
|
937
|
-
|
1229
|
+
fbuffer_append_str(buffer, data->state->indent);
|
938
1230
|
}
|
939
1231
|
}
|
940
|
-
generate_json(buffer,
|
1232
|
+
generate_json(buffer, data, RARRAY_AREF(obj, i));
|
941
1233
|
}
|
942
|
-
state->depth = --depth;
|
943
|
-
if (array_nl) {
|
944
|
-
|
945
|
-
if (indent) {
|
1234
|
+
data->state->depth = --depth;
|
1235
|
+
if (RB_UNLIKELY(data->state->array_nl)) {
|
1236
|
+
fbuffer_append_str(buffer, data->state->array_nl);
|
1237
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
946
1238
|
for (j = 0; j < depth; j++) {
|
947
|
-
|
1239
|
+
fbuffer_append_str(buffer, data->state->indent);
|
948
1240
|
}
|
949
1241
|
}
|
950
1242
|
}
|
951
1243
|
fbuffer_append_char(buffer, ']');
|
952
1244
|
}
|
953
1245
|
|
954
|
-
|
955
|
-
static int enc_utf8_compatible_p(rb_encoding *enc)
|
1246
|
+
static inline int enc_utf8_compatible_p(int enc_idx)
|
956
1247
|
{
|
957
|
-
if (
|
958
|
-
if (
|
1248
|
+
if (enc_idx == usascii_encindex) return 1;
|
1249
|
+
if (enc_idx == utf8_encindex) return 1;
|
959
1250
|
return 0;
|
960
1251
|
}
|
961
|
-
#endif
|
962
1252
|
|
963
|
-
static
|
1253
|
+
static VALUE encode_json_string_try(VALUE str)
|
1254
|
+
{
|
1255
|
+
return rb_funcall(str, i_encode, 1, Encoding_UTF_8);
|
1256
|
+
}
|
1257
|
+
|
1258
|
+
static VALUE encode_json_string_rescue(VALUE str, VALUE exception)
|
964
1259
|
{
|
1260
|
+
raise_generator_error_str(str, rb_funcall(exception, rb_intern("message"), 0));
|
1261
|
+
return Qundef;
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
static inline VALUE ensure_valid_encoding(VALUE str)
|
1265
|
+
{
|
1266
|
+
int encindex = RB_ENCODING_GET(str);
|
1267
|
+
VALUE utf8_string;
|
1268
|
+
if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) {
|
1269
|
+
if (encindex == binary_encindex) {
|
1270
|
+
utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex);
|
1271
|
+
switch (rb_enc_str_coderange(utf8_string)) {
|
1272
|
+
case ENC_CODERANGE_7BIT:
|
1273
|
+
return utf8_string;
|
1274
|
+
case ENC_CODERANGE_VALID:
|
1275
|
+
// For historical reason, we silently reinterpret binary strings as UTF-8 if it would work.
|
1276
|
+
// TODO: Raise in 3.0.0
|
1277
|
+
rb_warn("JSON.generate: UTF-8 string passed as BINARY, this will raise an encoding error in json 3.0");
|
1278
|
+
return utf8_string;
|
1279
|
+
break;
|
1280
|
+
}
|
1281
|
+
}
|
1282
|
+
|
1283
|
+
str = rb_rescue(encode_json_string_try, str, encode_json_string_rescue, str);
|
1284
|
+
}
|
1285
|
+
return str;
|
1286
|
+
}
|
1287
|
+
|
1288
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1289
|
+
{
|
1290
|
+
obj = ensure_valid_encoding(obj);
|
1291
|
+
|
965
1292
|
fbuffer_append_char(buffer, '"');
|
966
|
-
|
967
|
-
|
968
|
-
|
1293
|
+
|
1294
|
+
long len;
|
1295
|
+
search_state search;
|
1296
|
+
search.buffer = buffer;
|
1297
|
+
RSTRING_GETMEM(obj, search.ptr, len);
|
1298
|
+
search.cursor = search.ptr;
|
1299
|
+
search.end = search.ptr + len;
|
1300
|
+
|
1301
|
+
#ifdef HAVE_SIMD
|
1302
|
+
search.matches_mask = 0;
|
1303
|
+
search.has_matches = false;
|
1304
|
+
search.chunk_base = NULL;
|
1305
|
+
#endif /* HAVE_SIMD */
|
1306
|
+
|
1307
|
+
switch(rb_enc_str_coderange(obj)) {
|
1308
|
+
case ENC_CODERANGE_7BIT:
|
1309
|
+
case ENC_CODERANGE_VALID:
|
1310
|
+
if (RB_UNLIKELY(data->state->ascii_only)) {
|
1311
|
+
convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
1312
|
+
} else if (RB_UNLIKELY(data->state->script_safe)) {
|
1313
|
+
convert_UTF8_to_script_safe_JSON(&search);
|
1314
|
+
} else {
|
1315
|
+
convert_UTF8_to_JSON(&search);
|
1316
|
+
}
|
1317
|
+
break;
|
1318
|
+
default:
|
1319
|
+
raise_generator_error(obj, "source sequence is illegal/malformed utf-8");
|
1320
|
+
break;
|
969
1321
|
}
|
970
|
-
|
971
|
-
|
972
|
-
|
1322
|
+
fbuffer_append_char(buffer, '"');
|
1323
|
+
}
|
1324
|
+
|
1325
|
+
static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1326
|
+
{
|
1327
|
+
VALUE tmp;
|
1328
|
+
if (rb_respond_to(obj, i_to_json)) {
|
1329
|
+
tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
|
1330
|
+
Check_Type(tmp, T_STRING);
|
1331
|
+
fbuffer_append_str(buffer, tmp);
|
973
1332
|
} else {
|
974
|
-
|
1333
|
+
tmp = rb_funcall(obj, i_to_s, 0);
|
1334
|
+
Check_Type(tmp, T_STRING);
|
1335
|
+
generate_json_string(buffer, data, tmp);
|
1336
|
+
}
|
1337
|
+
}
|
1338
|
+
|
1339
|
+
static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1340
|
+
{
|
1341
|
+
if (data->state->strict) {
|
1342
|
+
generate_json_string(buffer, data, rb_sym2str(obj));
|
1343
|
+
} else {
|
1344
|
+
generate_json_fallback(buffer, data, obj);
|
975
1345
|
}
|
976
|
-
fbuffer_append_char(buffer, '"');
|
977
1346
|
}
|
978
1347
|
|
979
|
-
static void generate_json_null(FBuffer *buffer,
|
1348
|
+
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
980
1349
|
{
|
981
1350
|
fbuffer_append(buffer, "null", 4);
|
982
1351
|
}
|
983
1352
|
|
984
|
-
static void generate_json_false(FBuffer *buffer,
|
1353
|
+
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
985
1354
|
{
|
986
1355
|
fbuffer_append(buffer, "false", 5);
|
987
1356
|
}
|
988
1357
|
|
989
|
-
static void generate_json_true(FBuffer *buffer,
|
1358
|
+
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
990
1359
|
{
|
991
1360
|
fbuffer_append(buffer, "true", 4);
|
992
1361
|
}
|
993
1362
|
|
994
|
-
static void generate_json_fixnum(FBuffer *buffer,
|
1363
|
+
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
995
1364
|
{
|
996
1365
|
fbuffer_append_long(buffer, FIX2LONG(obj));
|
997
1366
|
}
|
998
1367
|
|
999
|
-
static void generate_json_bignum(FBuffer *buffer,
|
1368
|
+
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1000
1369
|
{
|
1001
1370
|
VALUE tmp = rb_funcall(obj, i_to_s, 0);
|
1002
1371
|
fbuffer_append_str(buffer, tmp);
|
1003
1372
|
}
|
1004
1373
|
|
1005
1374
|
#ifdef RUBY_INTEGER_UNIFICATION
|
1006
|
-
static void generate_json_integer(FBuffer *buffer,
|
1375
|
+
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1007
1376
|
{
|
1008
1377
|
if (FIXNUM_P(obj))
|
1009
|
-
generate_json_fixnum(buffer,
|
1378
|
+
generate_json_fixnum(buffer, data, obj);
|
1010
1379
|
else
|
1011
|
-
generate_json_bignum(buffer,
|
1380
|
+
generate_json_bignum(buffer, data, obj);
|
1012
1381
|
}
|
1013
1382
|
#endif
|
1014
|
-
|
1383
|
+
|
1384
|
+
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1015
1385
|
{
|
1016
1386
|
double value = RFLOAT_VALUE(obj);
|
1017
|
-
char allow_nan = state->allow_nan;
|
1018
|
-
|
1019
|
-
|
1020
|
-
if (
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1387
|
+
char allow_nan = data->state->allow_nan;
|
1388
|
+
if (isinf(value) || isnan(value)) {
|
1389
|
+
/* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
|
1390
|
+
if (!allow_nan) {
|
1391
|
+
if (data->state->strict && data->state->as_json) {
|
1392
|
+
VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
|
1393
|
+
if (casted_obj != obj) {
|
1394
|
+
increase_depth(data);
|
1395
|
+
generate_json(buffer, data, casted_obj);
|
1396
|
+
data->state->depth--;
|
1397
|
+
return;
|
1398
|
+
}
|
1399
|
+
}
|
1400
|
+
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
|
1024
1401
|
}
|
1025
|
-
}
|
1026
|
-
fbuffer_append_str(buffer, tmp);
|
1027
|
-
}
|
1028
1402
|
|
1029
|
-
|
1030
|
-
{
|
1031
|
-
VALUE tmp;
|
1032
|
-
VALUE klass = CLASS_OF(obj);
|
1033
|
-
if (klass == rb_cHash) {
|
1034
|
-
generate_json_object(buffer, Vstate, state, obj);
|
1035
|
-
} else if (klass == rb_cArray) {
|
1036
|
-
generate_json_array(buffer, Vstate, state, obj);
|
1037
|
-
} else if (klass == rb_cString) {
|
1038
|
-
generate_json_string(buffer, Vstate, state, obj);
|
1039
|
-
} else if (obj == Qnil) {
|
1040
|
-
generate_json_null(buffer, Vstate, state, obj);
|
1041
|
-
} else if (obj == Qfalse) {
|
1042
|
-
generate_json_false(buffer, Vstate, state, obj);
|
1043
|
-
} else if (obj == Qtrue) {
|
1044
|
-
generate_json_true(buffer, Vstate, state, obj);
|
1045
|
-
} else if (FIXNUM_P(obj)) {
|
1046
|
-
generate_json_fixnum(buffer, Vstate, state, obj);
|
1047
|
-
} else if (RB_TYPE_P(obj, T_BIGNUM)) {
|
1048
|
-
generate_json_bignum(buffer, Vstate, state, obj);
|
1049
|
-
} else if (klass == rb_cFloat) {
|
1050
|
-
generate_json_float(buffer, Vstate, state, obj);
|
1051
|
-
} else if (state->strict) {
|
1052
|
-
rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", RB_OBJ_STRING(CLASS_OF(obj)));
|
1053
|
-
} else if (rb_respond_to(obj, i_to_json)) {
|
1054
|
-
tmp = rb_funcall(obj, i_to_json, 1, Vstate);
|
1055
|
-
Check_Type(tmp, T_STRING);
|
1403
|
+
VALUE tmp = rb_funcall(obj, i_to_s, 0);
|
1056
1404
|
fbuffer_append_str(buffer, tmp);
|
1057
|
-
|
1058
|
-
tmp = rb_funcall(obj, i_to_s, 0);
|
1059
|
-
Check_Type(tmp, T_STRING);
|
1060
|
-
generate_json_string(buffer, Vstate, state, tmp);
|
1405
|
+
return;
|
1061
1406
|
}
|
1407
|
+
|
1408
|
+
/* This implementation writes directly into the buffer. We reserve
|
1409
|
+
* the 28 characters that fpconv_dtoa states as its maximum.
|
1410
|
+
*/
|
1411
|
+
fbuffer_inc_capa(buffer, 28);
|
1412
|
+
char* d = buffer->ptr + buffer->len;
|
1413
|
+
int len = fpconv_dtoa(value, d);
|
1414
|
+
|
1415
|
+
/* fpconv_dtoa converts a float to its shortest string representation,
|
1416
|
+
* but it adds a ".0" if this is a plain integer.
|
1417
|
+
*/
|
1418
|
+
fbuffer_consumed(buffer, len);
|
1062
1419
|
}
|
1063
1420
|
|
1064
|
-
static FBuffer *
|
1421
|
+
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1065
1422
|
{
|
1066
|
-
|
1067
|
-
|
1068
|
-
buffer
|
1069
|
-
|
1070
|
-
if (state->object_delim) {
|
1071
|
-
fbuffer_clear(state->object_delim);
|
1072
|
-
} else {
|
1073
|
-
state->object_delim = fbuffer_alloc(16);
|
1074
|
-
}
|
1075
|
-
fbuffer_append_char(state->object_delim, ',');
|
1076
|
-
if (state->object_delim2) {
|
1077
|
-
fbuffer_clear(state->object_delim2);
|
1078
|
-
} else {
|
1079
|
-
state->object_delim2 = fbuffer_alloc(16);
|
1080
|
-
}
|
1081
|
-
if (state->space_before) fbuffer_append(state->object_delim2, state->space_before, state->space_before_len);
|
1082
|
-
fbuffer_append_char(state->object_delim2, ':');
|
1083
|
-
if (state->space) fbuffer_append(state->object_delim2, state->space, state->space_len);
|
1423
|
+
VALUE fragment = RSTRUCT_GET(obj, 0);
|
1424
|
+
Check_Type(fragment, T_STRING);
|
1425
|
+
fbuffer_append_str(buffer, fragment);
|
1426
|
+
}
|
1084
1427
|
|
1085
|
-
|
1086
|
-
|
1428
|
+
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
1429
|
+
{
|
1430
|
+
bool as_json_called = false;
|
1431
|
+
start:
|
1432
|
+
if (obj == Qnil) {
|
1433
|
+
generate_json_null(buffer, data, obj);
|
1434
|
+
} else if (obj == Qfalse) {
|
1435
|
+
generate_json_false(buffer, data, obj);
|
1436
|
+
} else if (obj == Qtrue) {
|
1437
|
+
generate_json_true(buffer, data, obj);
|
1438
|
+
} else if (RB_SPECIAL_CONST_P(obj)) {
|
1439
|
+
if (RB_FIXNUM_P(obj)) {
|
1440
|
+
generate_json_fixnum(buffer, data, obj);
|
1441
|
+
} else if (RB_FLONUM_P(obj)) {
|
1442
|
+
generate_json_float(buffer, data, obj);
|
1443
|
+
} else if (RB_STATIC_SYM_P(obj)) {
|
1444
|
+
generate_json_symbol(buffer, data, obj);
|
1445
|
+
} else {
|
1446
|
+
goto general;
|
1447
|
+
}
|
1087
1448
|
} else {
|
1088
|
-
|
1449
|
+
VALUE klass = RBASIC_CLASS(obj);
|
1450
|
+
switch (RB_BUILTIN_TYPE(obj)) {
|
1451
|
+
case T_BIGNUM:
|
1452
|
+
generate_json_bignum(buffer, data, obj);
|
1453
|
+
break;
|
1454
|
+
case T_HASH:
|
1455
|
+
if (klass != rb_cHash) goto general;
|
1456
|
+
generate_json_object(buffer, data, obj);
|
1457
|
+
break;
|
1458
|
+
case T_ARRAY:
|
1459
|
+
if (klass != rb_cArray) goto general;
|
1460
|
+
generate_json_array(buffer, data, obj);
|
1461
|
+
break;
|
1462
|
+
case T_STRING:
|
1463
|
+
if (klass != rb_cString) goto general;
|
1464
|
+
generate_json_string(buffer, data, obj);
|
1465
|
+
break;
|
1466
|
+
case T_SYMBOL:
|
1467
|
+
generate_json_symbol(buffer, data, obj);
|
1468
|
+
break;
|
1469
|
+
case T_FLOAT:
|
1470
|
+
if (klass != rb_cFloat) goto general;
|
1471
|
+
generate_json_float(buffer, data, obj);
|
1472
|
+
break;
|
1473
|
+
case T_STRUCT:
|
1474
|
+
if (klass != cFragment) goto general;
|
1475
|
+
generate_json_fragment(buffer, data, obj);
|
1476
|
+
break;
|
1477
|
+
default:
|
1478
|
+
general:
|
1479
|
+
if (data->state->strict) {
|
1480
|
+
if (RTEST(data->state->as_json) && !as_json_called) {
|
1481
|
+
obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
|
1482
|
+
as_json_called = true;
|
1483
|
+
goto start;
|
1484
|
+
} else {
|
1485
|
+
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
|
1486
|
+
}
|
1487
|
+
} else {
|
1488
|
+
generate_json_fallback(buffer, data, obj);
|
1489
|
+
}
|
1490
|
+
}
|
1089
1491
|
}
|
1090
|
-
fbuffer_append_char(state->array_delim, ',');
|
1091
|
-
if (state->array_nl) fbuffer_append(state->array_delim, state->array_nl, state->array_nl_len);
|
1092
|
-
return buffer;
|
1093
1492
|
}
|
1094
1493
|
|
1095
|
-
struct generate_json_data {
|
1096
|
-
FBuffer *buffer;
|
1097
|
-
VALUE vstate;
|
1098
|
-
JSON_Generator_State *state;
|
1099
|
-
VALUE obj;
|
1100
|
-
};
|
1101
|
-
|
1102
1494
|
static VALUE generate_json_try(VALUE d)
|
1103
1495
|
{
|
1104
1496
|
struct generate_json_data *data = (struct generate_json_data *)d;
|
1105
1497
|
|
1106
|
-
|
1498
|
+
data->func(data->buffer, data, data->obj);
|
1107
1499
|
|
1108
1500
|
return Qnil;
|
1109
1501
|
}
|
@@ -1118,65 +1510,50 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
|
|
1118
1510
|
return Qundef;
|
1119
1511
|
}
|
1120
1512
|
|
1121
|
-
static VALUE cState_partial_generate(VALUE self, VALUE obj)
|
1513
|
+
static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io)
|
1122
1514
|
{
|
1123
|
-
FBuffer *buffer = cState_prepare_buffer(self);
|
1124
1515
|
GET_STATE(self);
|
1125
1516
|
|
1517
|
+
char stack_buffer[FBUFFER_STACK_SIZE];
|
1518
|
+
FBuffer buffer = {
|
1519
|
+
.io = RTEST(io) ? io : Qfalse,
|
1520
|
+
};
|
1521
|
+
fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
|
1522
|
+
|
1126
1523
|
struct generate_json_data data = {
|
1127
|
-
.buffer = buffer,
|
1524
|
+
.buffer = &buffer,
|
1128
1525
|
.vstate = self,
|
1129
1526
|
.state = state,
|
1130
|
-
.obj = obj
|
1527
|
+
.obj = obj,
|
1528
|
+
.func = func
|
1131
1529
|
};
|
1132
1530
|
rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
|
1133
1531
|
|
1134
|
-
return
|
1532
|
+
return fbuffer_finalize(&buffer);
|
1135
1533
|
}
|
1136
1534
|
|
1137
|
-
/*
|
1138
|
-
*
|
1535
|
+
/* call-seq:
|
1536
|
+
* generate(obj) -> String
|
1537
|
+
* generate(obj, anIO) -> anIO
|
1139
1538
|
*
|
1140
1539
|
* Generates a valid JSON document from object +obj+ and returns the
|
1141
1540
|
* result. If no valid JSON document can be created this method raises a
|
1142
1541
|
* GeneratorError exception.
|
1143
1542
|
*/
|
1144
|
-
static VALUE cState_generate(VALUE
|
1543
|
+
static VALUE cState_generate(int argc, VALUE *argv, VALUE self)
|
1145
1544
|
{
|
1146
|
-
|
1545
|
+
rb_check_arity(argc, 1, 2);
|
1546
|
+
VALUE obj = argv[0];
|
1547
|
+
VALUE io = argc > 1 ? argv[1] : Qnil;
|
1548
|
+
VALUE result = cState_partial_generate(self, obj, generate_json, io);
|
1147
1549
|
GET_STATE(self);
|
1148
1550
|
(void)state;
|
1149
1551
|
return result;
|
1150
1552
|
}
|
1151
1553
|
|
1152
|
-
/*
|
1153
|
-
* call-seq: new(opts = {})
|
1154
|
-
*
|
1155
|
-
* Instantiates a new State object, configured by _opts_.
|
1156
|
-
*
|
1157
|
-
* _opts_ can have the following keys:
|
1158
|
-
*
|
1159
|
-
* * *indent*: a string used to indent levels (default: ''),
|
1160
|
-
* * *space*: a string that is put after, a : or , delimiter (default: ''),
|
1161
|
-
* * *space_before*: a string that is put before a : pair delimiter (default: ''),
|
1162
|
-
* * *object_nl*: a string that is put at the end of a JSON object (default: ''),
|
1163
|
-
* * *array_nl*: a string that is put at the end of a JSON array (default: ''),
|
1164
|
-
* * *allow_nan*: true if NaN, Infinity, and -Infinity should be
|
1165
|
-
* generated, otherwise an exception is thrown, if these values are
|
1166
|
-
* encountered. This options defaults to false.
|
1167
|
-
* * *ascii_only*: true if only ASCII characters should be generated. This
|
1168
|
-
* option defaults to false.
|
1169
|
-
* * *buffer_initial_length*: sets the initial length of the generator's
|
1170
|
-
* internal buffer.
|
1171
|
-
*/
|
1172
1554
|
static VALUE cState_initialize(int argc, VALUE *argv, VALUE self)
|
1173
1555
|
{
|
1174
|
-
|
1175
|
-
GET_STATE(self);
|
1176
|
-
state->max_nesting = 100;
|
1177
|
-
state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
|
1178
|
-
rb_scan_args(argc, argv, "01", &opts);
|
1179
|
-
if (!NIL_P(opts)) cState_configure(self, opts);
|
1556
|
+
rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`");
|
1180
1557
|
return self;
|
1181
1558
|
}
|
1182
1559
|
|
@@ -1196,14 +1573,12 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig)
|
|
1196
1573
|
if (!objState) rb_raise(rb_eArgError, "unallocated JSON::State");
|
1197
1574
|
|
1198
1575
|
MEMCPY(objState, origState, JSON_Generator_State, 1);
|
1199
|
-
objState->indent =
|
1200
|
-
objState->space =
|
1201
|
-
objState->space_before =
|
1202
|
-
objState->object_nl =
|
1203
|
-
objState->array_nl =
|
1204
|
-
|
1205
|
-
if (origState->object_delim) objState->object_delim = fbuffer_dup(origState->object_delim);
|
1206
|
-
if (origState->object_delim2) objState->object_delim2 = fbuffer_dup(origState->object_delim2);
|
1576
|
+
objState->indent = origState->indent;
|
1577
|
+
objState->space = origState->space;
|
1578
|
+
objState->space_before = origState->space_before;
|
1579
|
+
objState->object_nl = origState->object_nl;
|
1580
|
+
objState->array_nl = origState->array_nl;
|
1581
|
+
objState->as_json = origState->as_json;
|
1207
1582
|
return obj;
|
1208
1583
|
}
|
1209
1584
|
|
@@ -1233,7 +1608,18 @@ static VALUE cState_from_state_s(VALUE self, VALUE opts)
|
|
1233
1608
|
static VALUE cState_indent(VALUE self)
|
1234
1609
|
{
|
1235
1610
|
GET_STATE(self);
|
1236
|
-
return state->indent ?
|
1611
|
+
return state->indent ? state->indent : rb_str_freeze(rb_utf8_str_new("", 0));
|
1612
|
+
}
|
1613
|
+
|
1614
|
+
static VALUE string_config(VALUE config)
|
1615
|
+
{
|
1616
|
+
if (RTEST(config)) {
|
1617
|
+
Check_Type(config, T_STRING);
|
1618
|
+
if (RSTRING_LEN(config)) {
|
1619
|
+
return rb_str_new_frozen(config);
|
1620
|
+
}
|
1621
|
+
}
|
1622
|
+
return Qfalse;
|
1237
1623
|
}
|
1238
1624
|
|
1239
1625
|
/*
|
@@ -1243,21 +1629,8 @@ static VALUE cState_indent(VALUE self)
|
|
1243
1629
|
*/
|
1244
1630
|
static VALUE cState_indent_set(VALUE self, VALUE indent)
|
1245
1631
|
{
|
1246
|
-
unsigned long len;
|
1247
1632
|
GET_STATE(self);
|
1248
|
-
|
1249
|
-
len = RSTRING_LEN(indent);
|
1250
|
-
if (len == 0) {
|
1251
|
-
if (state->indent) {
|
1252
|
-
ruby_xfree(state->indent);
|
1253
|
-
state->indent = NULL;
|
1254
|
-
state->indent_len = 0;
|
1255
|
-
}
|
1256
|
-
} else {
|
1257
|
-
if (state->indent) ruby_xfree(state->indent);
|
1258
|
-
state->indent = fstrndup(RSTRING_PTR(indent), len);
|
1259
|
-
state->indent_len = len;
|
1260
|
-
}
|
1633
|
+
RB_OBJ_WRITE(self, &state->indent, string_config(indent));
|
1261
1634
|
return Qnil;
|
1262
1635
|
}
|
1263
1636
|
|
@@ -1270,7 +1643,7 @@ static VALUE cState_indent_set(VALUE self, VALUE indent)
|
|
1270
1643
|
static VALUE cState_space(VALUE self)
|
1271
1644
|
{
|
1272
1645
|
GET_STATE(self);
|
1273
|
-
return state->space ?
|
1646
|
+
return state->space ? state->space : rb_str_freeze(rb_utf8_str_new("", 0));
|
1274
1647
|
}
|
1275
1648
|
|
1276
1649
|
/*
|
@@ -1281,21 +1654,8 @@ static VALUE cState_space(VALUE self)
|
|
1281
1654
|
*/
|
1282
1655
|
static VALUE cState_space_set(VALUE self, VALUE space)
|
1283
1656
|
{
|
1284
|
-
unsigned long len;
|
1285
1657
|
GET_STATE(self);
|
1286
|
-
|
1287
|
-
len = RSTRING_LEN(space);
|
1288
|
-
if (len == 0) {
|
1289
|
-
if (state->space) {
|
1290
|
-
ruby_xfree(state->space);
|
1291
|
-
state->space = NULL;
|
1292
|
-
state->space_len = 0;
|
1293
|
-
}
|
1294
|
-
} else {
|
1295
|
-
if (state->space) ruby_xfree(state->space);
|
1296
|
-
state->space = fstrndup(RSTRING_PTR(space), len);
|
1297
|
-
state->space_len = len;
|
1298
|
-
}
|
1658
|
+
RB_OBJ_WRITE(self, &state->space, string_config(space));
|
1299
1659
|
return Qnil;
|
1300
1660
|
}
|
1301
1661
|
|
@@ -1307,7 +1667,7 @@ static VALUE cState_space_set(VALUE self, VALUE space)
|
|
1307
1667
|
static VALUE cState_space_before(VALUE self)
|
1308
1668
|
{
|
1309
1669
|
GET_STATE(self);
|
1310
|
-
return state->space_before ?
|
1670
|
+
return state->space_before ? state->space_before : rb_str_freeze(rb_utf8_str_new("", 0));
|
1311
1671
|
}
|
1312
1672
|
|
1313
1673
|
/*
|
@@ -1317,21 +1677,8 @@ static VALUE cState_space_before(VALUE self)
|
|
1317
1677
|
*/
|
1318
1678
|
static VALUE cState_space_before_set(VALUE self, VALUE space_before)
|
1319
1679
|
{
|
1320
|
-
unsigned long len;
|
1321
1680
|
GET_STATE(self);
|
1322
|
-
|
1323
|
-
len = RSTRING_LEN(space_before);
|
1324
|
-
if (len == 0) {
|
1325
|
-
if (state->space_before) {
|
1326
|
-
ruby_xfree(state->space_before);
|
1327
|
-
state->space_before = NULL;
|
1328
|
-
state->space_before_len = 0;
|
1329
|
-
}
|
1330
|
-
} else {
|
1331
|
-
if (state->space_before) ruby_xfree(state->space_before);
|
1332
|
-
state->space_before = fstrndup(RSTRING_PTR(space_before), len);
|
1333
|
-
state->space_before_len = len;
|
1334
|
-
}
|
1681
|
+
RB_OBJ_WRITE(self, &state->space_before, string_config(space_before));
|
1335
1682
|
return Qnil;
|
1336
1683
|
}
|
1337
1684
|
|
@@ -1344,7 +1691,7 @@ static VALUE cState_space_before_set(VALUE self, VALUE space_before)
|
|
1344
1691
|
static VALUE cState_object_nl(VALUE self)
|
1345
1692
|
{
|
1346
1693
|
GET_STATE(self);
|
1347
|
-
return state->object_nl ?
|
1694
|
+
return state->object_nl ? state->object_nl : rb_str_freeze(rb_utf8_str_new("", 0));
|
1348
1695
|
}
|
1349
1696
|
|
1350
1697
|
/*
|
@@ -1355,20 +1702,8 @@ static VALUE cState_object_nl(VALUE self)
|
|
1355
1702
|
*/
|
1356
1703
|
static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
|
1357
1704
|
{
|
1358
|
-
unsigned long len;
|
1359
1705
|
GET_STATE(self);
|
1360
|
-
|
1361
|
-
len = RSTRING_LEN(object_nl);
|
1362
|
-
if (len == 0) {
|
1363
|
-
if (state->object_nl) {
|
1364
|
-
ruby_xfree(state->object_nl);
|
1365
|
-
state->object_nl = NULL;
|
1366
|
-
}
|
1367
|
-
} else {
|
1368
|
-
if (state->object_nl) ruby_xfree(state->object_nl);
|
1369
|
-
state->object_nl = fstrndup(RSTRING_PTR(object_nl), len);
|
1370
|
-
state->object_nl_len = len;
|
1371
|
-
}
|
1706
|
+
RB_OBJ_WRITE(self, &state->object_nl, string_config(object_nl));
|
1372
1707
|
return Qnil;
|
1373
1708
|
}
|
1374
1709
|
|
@@ -1380,7 +1715,7 @@ static VALUE cState_object_nl_set(VALUE self, VALUE object_nl)
|
|
1380
1715
|
static VALUE cState_array_nl(VALUE self)
|
1381
1716
|
{
|
1382
1717
|
GET_STATE(self);
|
1383
|
-
return state->array_nl ?
|
1718
|
+
return state->array_nl ? state->array_nl : rb_str_freeze(rb_utf8_str_new("", 0));
|
1384
1719
|
}
|
1385
1720
|
|
1386
1721
|
/*
|
@@ -1390,23 +1725,33 @@ static VALUE cState_array_nl(VALUE self)
|
|
1390
1725
|
*/
|
1391
1726
|
static VALUE cState_array_nl_set(VALUE self, VALUE array_nl)
|
1392
1727
|
{
|
1393
|
-
unsigned long len;
|
1394
1728
|
GET_STATE(self);
|
1395
|
-
|
1396
|
-
len = RSTRING_LEN(array_nl);
|
1397
|
-
if (len == 0) {
|
1398
|
-
if (state->array_nl) {
|
1399
|
-
ruby_xfree(state->array_nl);
|
1400
|
-
state->array_nl = NULL;
|
1401
|
-
}
|
1402
|
-
} else {
|
1403
|
-
if (state->array_nl) ruby_xfree(state->array_nl);
|
1404
|
-
state->array_nl = fstrndup(RSTRING_PTR(array_nl), len);
|
1405
|
-
state->array_nl_len = len;
|
1406
|
-
}
|
1729
|
+
RB_OBJ_WRITE(self, &state->array_nl, string_config(array_nl));
|
1407
1730
|
return Qnil;
|
1408
1731
|
}
|
1409
1732
|
|
1733
|
+
/*
|
1734
|
+
* call-seq: as_json()
|
1735
|
+
*
|
1736
|
+
* This string is put at the end of a line that holds a JSON array.
|
1737
|
+
*/
|
1738
|
+
static VALUE cState_as_json(VALUE self)
|
1739
|
+
{
|
1740
|
+
GET_STATE(self);
|
1741
|
+
return state->as_json;
|
1742
|
+
}
|
1743
|
+
|
1744
|
+
/*
|
1745
|
+
* call-seq: as_json=(as_json)
|
1746
|
+
*
|
1747
|
+
* This string is put at the end of a line that holds a JSON array.
|
1748
|
+
*/
|
1749
|
+
static VALUE cState_as_json_set(VALUE self, VALUE as_json)
|
1750
|
+
{
|
1751
|
+
GET_STATE(self);
|
1752
|
+
RB_OBJ_WRITE(self, &state->as_json, rb_convert_type(as_json, T_DATA, "Proc", "to_proc"));
|
1753
|
+
return Qnil;
|
1754
|
+
}
|
1410
1755
|
|
1411
1756
|
/*
|
1412
1757
|
* call-seq: check_circular?
|
@@ -1432,6 +1777,11 @@ static VALUE cState_max_nesting(VALUE self)
|
|
1432
1777
|
return LONG2FIX(state->max_nesting);
|
1433
1778
|
}
|
1434
1779
|
|
1780
|
+
static long long_config(VALUE num)
|
1781
|
+
{
|
1782
|
+
return RTEST(num) ? FIX2LONG(num) : 0;
|
1783
|
+
}
|
1784
|
+
|
1435
1785
|
/*
|
1436
1786
|
* call-seq: max_nesting=(depth)
|
1437
1787
|
*
|
@@ -1441,8 +1791,8 @@ static VALUE cState_max_nesting(VALUE self)
|
|
1441
1791
|
static VALUE cState_max_nesting_set(VALUE self, VALUE depth)
|
1442
1792
|
{
|
1443
1793
|
GET_STATE(self);
|
1444
|
-
|
1445
|
-
return
|
1794
|
+
state->max_nesting = long_config(depth);
|
1795
|
+
return Qnil;
|
1446
1796
|
}
|
1447
1797
|
|
1448
1798
|
/*
|
@@ -1513,6 +1863,18 @@ static VALUE cState_allow_nan_p(VALUE self)
|
|
1513
1863
|
return state->allow_nan ? Qtrue : Qfalse;
|
1514
1864
|
}
|
1515
1865
|
|
1866
|
+
/*
|
1867
|
+
* call-seq: allow_nan=(enable)
|
1868
|
+
*
|
1869
|
+
* This sets whether or not to serialize NaN, Infinity, and -Infinity
|
1870
|
+
*/
|
1871
|
+
static VALUE cState_allow_nan_set(VALUE self, VALUE enable)
|
1872
|
+
{
|
1873
|
+
GET_STATE(self);
|
1874
|
+
state->allow_nan = RTEST(enable);
|
1875
|
+
return Qnil;
|
1876
|
+
}
|
1877
|
+
|
1516
1878
|
/*
|
1517
1879
|
* call-seq: ascii_only?
|
1518
1880
|
*
|
@@ -1525,6 +1887,18 @@ static VALUE cState_ascii_only_p(VALUE self)
|
|
1525
1887
|
return state->ascii_only ? Qtrue : Qfalse;
|
1526
1888
|
}
|
1527
1889
|
|
1890
|
+
/*
|
1891
|
+
* call-seq: ascii_only=(enable)
|
1892
|
+
*
|
1893
|
+
* This sets whether only ASCII characters should be generated.
|
1894
|
+
*/
|
1895
|
+
static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
|
1896
|
+
{
|
1897
|
+
GET_STATE(self);
|
1898
|
+
state->ascii_only = RTEST(enable);
|
1899
|
+
return Qnil;
|
1900
|
+
}
|
1901
|
+
|
1528
1902
|
/*
|
1529
1903
|
* call-seq: depth
|
1530
1904
|
*
|
@@ -1545,8 +1919,7 @@ static VALUE cState_depth(VALUE self)
|
|
1545
1919
|
static VALUE cState_depth_set(VALUE self, VALUE depth)
|
1546
1920
|
{
|
1547
1921
|
GET_STATE(self);
|
1548
|
-
|
1549
|
-
state->depth = FIX2LONG(depth);
|
1922
|
+
state->depth = long_config(depth);
|
1550
1923
|
return Qnil;
|
1551
1924
|
}
|
1552
1925
|
|
@@ -1561,6 +1934,15 @@ static VALUE cState_buffer_initial_length(VALUE self)
|
|
1561
1934
|
return LONG2FIX(state->buffer_initial_length);
|
1562
1935
|
}
|
1563
1936
|
|
1937
|
+
static void buffer_initial_length_set(JSON_Generator_State *state, VALUE buffer_initial_length)
|
1938
|
+
{
|
1939
|
+
Check_Type(buffer_initial_length, T_FIXNUM);
|
1940
|
+
long initial_length = FIX2LONG(buffer_initial_length);
|
1941
|
+
if (initial_length > 0) {
|
1942
|
+
state->buffer_initial_length = initial_length;
|
1943
|
+
}
|
1944
|
+
}
|
1945
|
+
|
1564
1946
|
/*
|
1565
1947
|
* call-seq: buffer_initial_length=(length)
|
1566
1948
|
*
|
@@ -1569,16 +1951,76 @@ static VALUE cState_buffer_initial_length(VALUE self)
|
|
1569
1951
|
*/
|
1570
1952
|
static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_length)
|
1571
1953
|
{
|
1572
|
-
long initial_length;
|
1573
1954
|
GET_STATE(self);
|
1574
|
-
|
1575
|
-
initial_length = FIX2LONG(buffer_initial_length);
|
1576
|
-
if (initial_length > 0) {
|
1577
|
-
state->buffer_initial_length = initial_length;
|
1578
|
-
}
|
1955
|
+
buffer_initial_length_set(state, buffer_initial_length);
|
1579
1956
|
return Qnil;
|
1580
1957
|
}
|
1581
1958
|
|
1959
|
+
static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
1960
|
+
{
|
1961
|
+
JSON_Generator_State *state = (JSON_Generator_State *)_arg;
|
1962
|
+
|
1963
|
+
if (key == sym_indent) { state->indent = string_config(val); }
|
1964
|
+
else if (key == sym_space) { state->space = string_config(val); }
|
1965
|
+
else if (key == sym_space_before) { state->space_before = string_config(val); }
|
1966
|
+
else if (key == sym_object_nl) { state->object_nl = string_config(val); }
|
1967
|
+
else if (key == sym_array_nl) { state->array_nl = string_config(val); }
|
1968
|
+
else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
|
1969
|
+
else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
|
1970
|
+
else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
|
1971
|
+
else if (key == sym_depth) { state->depth = long_config(val); }
|
1972
|
+
else if (key == sym_buffer_initial_length) { buffer_initial_length_set(state, val); }
|
1973
|
+
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
|
1974
|
+
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
|
1975
|
+
else if (key == sym_strict) { state->strict = RTEST(val); }
|
1976
|
+
else if (key == sym_as_json) { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
|
1977
|
+
return ST_CONTINUE;
|
1978
|
+
}
|
1979
|
+
|
1980
|
+
static void configure_state(JSON_Generator_State *state, VALUE config)
|
1981
|
+
{
|
1982
|
+
if (!RTEST(config)) return;
|
1983
|
+
|
1984
|
+
Check_Type(config, T_HASH);
|
1985
|
+
|
1986
|
+
if (!RHASH_SIZE(config)) return;
|
1987
|
+
|
1988
|
+
// We assume in most cases few keys are set so it's faster to go over
|
1989
|
+
// the provided keys than to check all possible keys.
|
1990
|
+
rb_hash_foreach(config, configure_state_i, (VALUE)state);
|
1991
|
+
}
|
1992
|
+
|
1993
|
+
static VALUE cState_configure(VALUE self, VALUE opts)
|
1994
|
+
{
|
1995
|
+
GET_STATE(self);
|
1996
|
+
configure_state(state, opts);
|
1997
|
+
return self;
|
1998
|
+
}
|
1999
|
+
|
2000
|
+
static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
|
2001
|
+
{
|
2002
|
+
JSON_Generator_State state = {0};
|
2003
|
+
state_init(&state);
|
2004
|
+
configure_state(&state, opts);
|
2005
|
+
|
2006
|
+
char stack_buffer[FBUFFER_STACK_SIZE];
|
2007
|
+
FBuffer buffer = {
|
2008
|
+
.io = RTEST(io) ? io : Qfalse,
|
2009
|
+
};
|
2010
|
+
fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
|
2011
|
+
|
2012
|
+
struct generate_json_data data = {
|
2013
|
+
.buffer = &buffer,
|
2014
|
+
.vstate = Qfalse,
|
2015
|
+
.state = &state,
|
2016
|
+
.obj = obj,
|
2017
|
+
.func = generate_json,
|
2018
|
+
};
|
2019
|
+
rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
|
2020
|
+
|
2021
|
+
return fbuffer_finalize(&buffer);
|
2022
|
+
}
|
2023
|
+
|
1582
2024
|
/*
|
1583
2025
|
*
|
1584
2026
|
*/
|
@@ -1592,18 +2034,26 @@ void Init_generator(void)
|
|
1592
2034
|
rb_require("json/common");
|
1593
2035
|
|
1594
2036
|
mJSON = rb_define_module("JSON");
|
1595
|
-
mExt = rb_define_module_under(mJSON, "Ext");
|
1596
|
-
mGenerator = rb_define_module_under(mExt, "Generator");
|
1597
2037
|
|
2038
|
+
rb_global_variable(&cFragment);
|
2039
|
+
cFragment = rb_const_get(mJSON, rb_intern("Fragment"));
|
2040
|
+
|
2041
|
+
VALUE mExt = rb_define_module_under(mJSON, "Ext");
|
2042
|
+
VALUE mGenerator = rb_define_module_under(mExt, "Generator");
|
2043
|
+
|
2044
|
+
rb_global_variable(&eGeneratorError);
|
1598
2045
|
eGeneratorError = rb_path2class("JSON::GeneratorError");
|
2046
|
+
|
2047
|
+
rb_global_variable(&eNestingError);
|
1599
2048
|
eNestingError = rb_path2class("JSON::NestingError");
|
1600
|
-
rb_gc_register_mark_object(eGeneratorError);
|
1601
|
-
rb_gc_register_mark_object(eNestingError);
|
1602
2049
|
|
1603
2050
|
cState = rb_define_class_under(mGenerator, "State", rb_cObject);
|
1604
2051
|
rb_define_alloc_func(cState, cState_s_allocate);
|
1605
2052
|
rb_define_singleton_method(cState, "from_state", cState_from_state_s, 1);
|
1606
2053
|
rb_define_method(cState, "initialize", cState_initialize, -1);
|
2054
|
+
rb_define_alias(cState, "initialize", "initialize"); // avoid method redefinition warnings
|
2055
|
+
rb_define_private_method(cState, "_configure", cState_configure, 1);
|
2056
|
+
|
1607
2057
|
rb_define_method(cState, "initialize_copy", cState_init_copy, 1);
|
1608
2058
|
rb_define_method(cState, "indent", cState_indent, 0);
|
1609
2059
|
rb_define_method(cState, "indent=", cState_indent_set, 1);
|
@@ -1615,6 +2065,8 @@ void Init_generator(void)
|
|
1615
2065
|
rb_define_method(cState, "object_nl=", cState_object_nl_set, 1);
|
1616
2066
|
rb_define_method(cState, "array_nl", cState_array_nl, 0);
|
1617
2067
|
rb_define_method(cState, "array_nl=", cState_array_nl_set, 1);
|
2068
|
+
rb_define_method(cState, "as_json", cState_as_json, 0);
|
2069
|
+
rb_define_method(cState, "as_json=", cState_as_json_set, 1);
|
1618
2070
|
rb_define_method(cState, "max_nesting", cState_max_nesting, 0);
|
1619
2071
|
rb_define_method(cState, "max_nesting=", cState_max_nesting_set, 1);
|
1620
2072
|
rb_define_method(cState, "script_safe", cState_script_safe, 0);
|
@@ -1628,76 +2080,109 @@ void Init_generator(void)
|
|
1628
2080
|
rb_define_method(cState, "strict=", cState_strict_set, 1);
|
1629
2081
|
rb_define_method(cState, "check_circular?", cState_check_circular_p, 0);
|
1630
2082
|
rb_define_method(cState, "allow_nan?", cState_allow_nan_p, 0);
|
2083
|
+
rb_define_method(cState, "allow_nan=", cState_allow_nan_set, 1);
|
1631
2084
|
rb_define_method(cState, "ascii_only?", cState_ascii_only_p, 0);
|
2085
|
+
rb_define_method(cState, "ascii_only=", cState_ascii_only_set, 1);
|
1632
2086
|
rb_define_method(cState, "depth", cState_depth, 0);
|
1633
2087
|
rb_define_method(cState, "depth=", cState_depth_set, 1);
|
1634
2088
|
rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0);
|
1635
2089
|
rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
|
1636
|
-
rb_define_method(cState, "
|
1637
|
-
rb_define_alias(cState, "
|
1638
|
-
|
1639
|
-
|
1640
|
-
|
1641
|
-
|
1642
|
-
|
1643
|
-
|
1644
|
-
mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
|
1645
|
-
mObject = rb_define_module_under(mGeneratorMethods, "Object");
|
2090
|
+
rb_define_method(cState, "generate", cState_generate, -1);
|
2091
|
+
rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
|
2092
|
+
|
2093
|
+
rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
|
2094
|
+
|
2095
|
+
VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
|
2096
|
+
|
2097
|
+
VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object");
|
1646
2098
|
rb_define_method(mObject, "to_json", mObject_to_json, -1);
|
1647
|
-
|
2099
|
+
|
2100
|
+
VALUE mHash = rb_define_module_under(mGeneratorMethods, "Hash");
|
1648
2101
|
rb_define_method(mHash, "to_json", mHash_to_json, -1);
|
1649
|
-
|
2102
|
+
|
2103
|
+
VALUE mArray = rb_define_module_under(mGeneratorMethods, "Array");
|
1650
2104
|
rb_define_method(mArray, "to_json", mArray_to_json, -1);
|
2105
|
+
|
1651
2106
|
#ifdef RUBY_INTEGER_UNIFICATION
|
1652
|
-
mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
|
2107
|
+
VALUE mInteger = rb_define_module_under(mGeneratorMethods, "Integer");
|
1653
2108
|
rb_define_method(mInteger, "to_json", mInteger_to_json, -1);
|
1654
2109
|
#else
|
1655
|
-
mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
|
2110
|
+
VALUE mFixnum = rb_define_module_under(mGeneratorMethods, "Fixnum");
|
1656
2111
|
rb_define_method(mFixnum, "to_json", mFixnum_to_json, -1);
|
1657
|
-
|
2112
|
+
|
2113
|
+
VALUE mBignum = rb_define_module_under(mGeneratorMethods, "Bignum");
|
1658
2114
|
rb_define_method(mBignum, "to_json", mBignum_to_json, -1);
|
1659
2115
|
#endif
|
1660
|
-
mFloat = rb_define_module_under(mGeneratorMethods, "Float");
|
2116
|
+
VALUE mFloat = rb_define_module_under(mGeneratorMethods, "Float");
|
1661
2117
|
rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
|
1662
|
-
|
2118
|
+
|
2119
|
+
VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
|
1663
2120
|
rb_define_singleton_method(mString, "included", mString_included_s, 1);
|
1664
2121
|
rb_define_method(mString, "to_json", mString_to_json, -1);
|
1665
2122
|
rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
|
1666
2123
|
rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
|
2124
|
+
|
1667
2125
|
mString_Extend = rb_define_module_under(mString, "Extend");
|
1668
2126
|
rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
|
1669
|
-
|
2127
|
+
|
2128
|
+
VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
|
1670
2129
|
rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
|
1671
|
-
|
2130
|
+
|
2131
|
+
VALUE mFalseClass = rb_define_module_under(mGeneratorMethods, "FalseClass");
|
1672
2132
|
rb_define_method(mFalseClass, "to_json", mFalseClass_to_json, -1);
|
1673
|
-
|
2133
|
+
|
2134
|
+
VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass");
|
1674
2135
|
rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1);
|
1675
2136
|
|
2137
|
+
rb_global_variable(&Encoding_UTF_8);
|
2138
|
+
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
|
2139
|
+
|
1676
2140
|
i_to_s = rb_intern("to_s");
|
1677
2141
|
i_to_json = rb_intern("to_json");
|
1678
2142
|
i_new = rb_intern("new");
|
1679
|
-
i_indent = rb_intern("indent");
|
1680
|
-
i_space = rb_intern("space");
|
1681
|
-
i_space_before = rb_intern("space_before");
|
1682
|
-
i_object_nl = rb_intern("object_nl");
|
1683
|
-
i_array_nl = rb_intern("array_nl");
|
1684
|
-
i_max_nesting = rb_intern("max_nesting");
|
1685
|
-
i_script_safe = rb_intern("script_safe");
|
1686
|
-
i_escape_slash = rb_intern("escape_slash");
|
1687
|
-
i_strict = rb_intern("strict");
|
1688
|
-
i_allow_nan = rb_intern("allow_nan");
|
1689
|
-
i_ascii_only = rb_intern("ascii_only");
|
1690
|
-
i_depth = rb_intern("depth");
|
1691
|
-
i_buffer_initial_length = rb_intern("buffer_initial_length");
|
1692
2143
|
i_pack = rb_intern("pack");
|
1693
2144
|
i_unpack = rb_intern("unpack");
|
1694
2145
|
i_create_id = rb_intern("create_id");
|
1695
2146
|
i_extend = rb_intern("extend");
|
1696
|
-
|
1697
|
-
|
1698
|
-
|
1699
|
-
|
1700
|
-
|
1701
|
-
|
1702
|
-
|
2147
|
+
i_encode = rb_intern("encode");
|
2148
|
+
|
2149
|
+
sym_indent = ID2SYM(rb_intern("indent"));
|
2150
|
+
sym_space = ID2SYM(rb_intern("space"));
|
2151
|
+
sym_space_before = ID2SYM(rb_intern("space_before"));
|
2152
|
+
sym_object_nl = ID2SYM(rb_intern("object_nl"));
|
2153
|
+
sym_array_nl = ID2SYM(rb_intern("array_nl"));
|
2154
|
+
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
|
2155
|
+
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
|
2156
|
+
sym_ascii_only = ID2SYM(rb_intern("ascii_only"));
|
2157
|
+
sym_depth = ID2SYM(rb_intern("depth"));
|
2158
|
+
sym_buffer_initial_length = ID2SYM(rb_intern("buffer_initial_length"));
|
2159
|
+
sym_script_safe = ID2SYM(rb_intern("script_safe"));
|
2160
|
+
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
|
2161
|
+
sym_strict = ID2SYM(rb_intern("strict"));
|
2162
|
+
sym_as_json = ID2SYM(rb_intern("as_json"));
|
2163
|
+
|
2164
|
+
usascii_encindex = rb_usascii_encindex();
|
2165
|
+
utf8_encindex = rb_utf8_encindex();
|
2166
|
+
binary_encindex = rb_ascii8bit_encindex();
|
2167
|
+
|
2168
|
+
rb_require("json/ext/generator/state");
|
2169
|
+
|
2170
|
+
|
2171
|
+
switch(find_simd_implementation()) {
|
2172
|
+
#ifdef HAVE_SIMD
|
2173
|
+
#ifdef HAVE_SIMD_NEON
|
2174
|
+
case SIMD_NEON:
|
2175
|
+
search_escape_basic_impl = search_escape_basic_neon;
|
2176
|
+
break;
|
2177
|
+
#endif /* HAVE_SIMD_NEON */
|
2178
|
+
#ifdef HAVE_SIMD_SSE2
|
2179
|
+
case SIMD_SSE2:
|
2180
|
+
search_escape_basic_impl = search_escape_basic_sse2;
|
2181
|
+
break;
|
2182
|
+
#endif /* HAVE_SIMD_SSE2 */
|
2183
|
+
#endif /* HAVE_SIMD */
|
2184
|
+
default:
|
2185
|
+
search_escape_basic_impl = search_escape_basic;
|
2186
|
+
break;
|
2187
|
+
}
|
1703
2188
|
}
|