json 2.10.2 → 2.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +110 -7
- data/README.md +16 -1
- data/ext/json/ext/fbuffer/fbuffer.h +110 -19
- data/ext/json/ext/generator/extconf.rb +6 -0
- data/ext/json/ext/generator/generator.c +543 -196
- data/ext/json/ext/parser/extconf.rb +5 -2
- data/ext/json/ext/parser/parser.c +339 -268
- data/ext/json/ext/simd/conf.rb +24 -0
- data/ext/json/ext/simd/simd.h +188 -0
- data/ext/json/ext/vendor/fpconv.c +480 -0
- data/ext/json/ext/vendor/jeaiii-ltoa.h +267 -0
- data/json.gemspec +2 -3
- data/lib/json/add/core.rb +1 -0
- data/lib/json/add/string.rb +35 -0
- data/lib/json/common.rb +312 -169
- data/lib/json/ext/generator/state.rb +7 -14
- data/lib/json/ext.rb +2 -2
- data/lib/json/generic_object.rb +0 -8
- data/lib/json/truffle_ruby/generator.rb +64 -46
- data/lib/json/version.rb +1 -1
- data/lib/json.rb +55 -0
- metadata +8 -3
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
#include "ruby.h"
|
|
2
2
|
#include "../fbuffer/fbuffer.h"
|
|
3
|
+
#include "../vendor/fpconv.c"
|
|
3
4
|
|
|
4
5
|
#include <math.h>
|
|
5
6
|
#include <ctype.h>
|
|
6
7
|
|
|
8
|
+
#include "../simd/simd.h"
|
|
9
|
+
|
|
7
10
|
/* ruby api and some helpers */
|
|
8
11
|
|
|
12
|
+
enum duplicate_key_action {
|
|
13
|
+
JSON_DEPRECATED = 0,
|
|
14
|
+
JSON_IGNORE,
|
|
15
|
+
JSON_RAISE,
|
|
16
|
+
};
|
|
17
|
+
|
|
9
18
|
typedef struct JSON_Generator_StateStruct {
|
|
10
19
|
VALUE indent;
|
|
11
20
|
VALUE space;
|
|
@@ -18,6 +27,9 @@ typedef struct JSON_Generator_StateStruct {
|
|
|
18
27
|
long depth;
|
|
19
28
|
long buffer_initial_length;
|
|
20
29
|
|
|
30
|
+
enum duplicate_key_action on_duplicate_key;
|
|
31
|
+
|
|
32
|
+
bool as_json_single_arg;
|
|
21
33
|
bool allow_nan;
|
|
22
34
|
bool ascii_only;
|
|
23
35
|
bool script_safe;
|
|
@@ -28,10 +40,10 @@ typedef struct JSON_Generator_StateStruct {
|
|
|
28
40
|
#define RB_UNLIKELY(cond) (cond)
|
|
29
41
|
#endif
|
|
30
42
|
|
|
31
|
-
static VALUE mJSON, cState, cFragment,
|
|
43
|
+
static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
|
|
32
44
|
|
|
33
45
|
static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
|
|
34
|
-
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
|
|
46
|
+
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
|
|
35
47
|
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
|
|
36
48
|
|
|
37
49
|
|
|
@@ -44,7 +56,7 @@ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_n
|
|
|
44
56
|
|
|
45
57
|
struct generate_json_data;
|
|
46
58
|
|
|
47
|
-
typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data,
|
|
59
|
+
typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
48
60
|
|
|
49
61
|
struct generate_json_data {
|
|
50
62
|
FBuffer *buffer;
|
|
@@ -56,20 +68,20 @@ struct generate_json_data {
|
|
|
56
68
|
|
|
57
69
|
static VALUE cState_from_state_s(VALUE self, VALUE opts);
|
|
58
70
|
static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
|
|
59
|
-
static void generate_json(FBuffer *buffer, struct generate_json_data *data,
|
|
60
|
-
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data,
|
|
61
|
-
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data,
|
|
62
|
-
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data,
|
|
63
|
-
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data,
|
|
64
|
-
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data,
|
|
65
|
-
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data,
|
|
71
|
+
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
72
|
+
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
73
|
+
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
74
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
75
|
+
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
76
|
+
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
77
|
+
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
66
78
|
#ifdef RUBY_INTEGER_UNIFICATION
|
|
67
|
-
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data,
|
|
79
|
+
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
68
80
|
#endif
|
|
69
|
-
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data,
|
|
70
|
-
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data,
|
|
71
|
-
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data,
|
|
72
|
-
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data,
|
|
81
|
+
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
82
|
+
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
83
|
+
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
84
|
+
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
|
|
73
85
|
|
|
74
86
|
static int usascii_encindex, utf8_encindex, binary_encindex;
|
|
75
87
|
|
|
@@ -108,12 +120,40 @@ typedef struct _search_state {
|
|
|
108
120
|
const char *end;
|
|
109
121
|
const char *cursor;
|
|
110
122
|
FBuffer *buffer;
|
|
123
|
+
|
|
124
|
+
#ifdef HAVE_SIMD
|
|
125
|
+
const char *chunk_base;
|
|
126
|
+
const char *chunk_end;
|
|
127
|
+
bool has_matches;
|
|
128
|
+
|
|
129
|
+
#if defined(HAVE_SIMD_NEON)
|
|
130
|
+
uint64_t matches_mask;
|
|
131
|
+
#elif defined(HAVE_SIMD_SSE2)
|
|
132
|
+
int matches_mask;
|
|
133
|
+
#else
|
|
134
|
+
#error "Unknown SIMD Implementation."
|
|
135
|
+
#endif /* HAVE_SIMD_NEON */
|
|
136
|
+
#endif /* HAVE_SIMD */
|
|
111
137
|
} search_state;
|
|
112
138
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
139
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
|
140
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
|
141
|
+
#else
|
|
142
|
+
#define FORCE_INLINE
|
|
143
|
+
#endif
|
|
144
|
+
|
|
145
|
+
static inline FORCE_INLINE void search_flush(search_state *search)
|
|
146
|
+
{
|
|
147
|
+
// Do not remove this conditional without profiling, specifically escape-heavy text.
|
|
148
|
+
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
|
|
149
|
+
// For back-to-back characters that need to be escaped, specifically for the SIMD code paths, this method
|
|
150
|
+
// will be called just before calling escape_UTF8_char_basic. There will be no characters to append for the
|
|
151
|
+
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
|
|
152
|
+
// nothing needs to be flushed, we can save a few memory references with this conditional.
|
|
153
|
+
if (search->ptr > search->cursor) {
|
|
154
|
+
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
|
|
155
|
+
search->cursor = search->ptr;
|
|
156
|
+
}
|
|
117
157
|
}
|
|
118
158
|
|
|
119
159
|
static const unsigned char escape_table_basic[256] = {
|
|
@@ -129,6 +169,8 @@ static const unsigned char escape_table_basic[256] = {
|
|
|
129
169
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
130
170
|
};
|
|
131
171
|
|
|
172
|
+
static unsigned char (*search_escape_basic_impl)(search_state *);
|
|
173
|
+
|
|
132
174
|
static inline unsigned char search_escape_basic(search_state *search)
|
|
133
175
|
{
|
|
134
176
|
while (search->ptr < search->end) {
|
|
@@ -143,7 +185,8 @@ static inline unsigned char search_escape_basic(search_state *search)
|
|
|
143
185
|
return 0;
|
|
144
186
|
}
|
|
145
187
|
|
|
146
|
-
static inline void escape_UTF8_char_basic(search_state *search)
|
|
188
|
+
static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
|
|
189
|
+
{
|
|
147
190
|
const unsigned char ch = (unsigned char)*search->ptr;
|
|
148
191
|
switch (ch) {
|
|
149
192
|
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
|
@@ -185,12 +228,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
|
|
|
185
228
|
*/
|
|
186
229
|
static inline void convert_UTF8_to_JSON(search_state *search)
|
|
187
230
|
{
|
|
188
|
-
while (
|
|
231
|
+
while (search_escape_basic_impl(search)) {
|
|
189
232
|
escape_UTF8_char_basic(search);
|
|
190
233
|
}
|
|
191
234
|
}
|
|
192
235
|
|
|
193
|
-
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
236
|
+
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
237
|
+
{
|
|
194
238
|
const unsigned char ch = (unsigned char)*search->ptr;
|
|
195
239
|
switch (ch_len) {
|
|
196
240
|
case 1: {
|
|
@@ -226,6 +270,228 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
|
226
270
|
search->cursor = (search->ptr += ch_len);
|
|
227
271
|
}
|
|
228
272
|
|
|
273
|
+
#ifdef HAVE_SIMD
|
|
274
|
+
|
|
275
|
+
static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
|
|
276
|
+
{
|
|
277
|
+
// Flush the buffer so everything up until the last 'len' characters are unflushed.
|
|
278
|
+
search_flush(search);
|
|
279
|
+
|
|
280
|
+
FBuffer *buf = search->buffer;
|
|
281
|
+
fbuffer_inc_capa(buf, vec_len);
|
|
282
|
+
|
|
283
|
+
char *s = (buf->ptr + buf->len);
|
|
284
|
+
|
|
285
|
+
// Pad the buffer with dummy characters that won't need escaping.
|
|
286
|
+
// This seem wateful at first sight, but memset of vector length is very fast.
|
|
287
|
+
memset(s, 'X', vec_len);
|
|
288
|
+
|
|
289
|
+
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
|
|
290
|
+
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
|
|
291
|
+
MEMCPY(s, search->ptr, char, len);
|
|
292
|
+
|
|
293
|
+
return s;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
#ifdef HAVE_SIMD_NEON
|
|
297
|
+
|
|
298
|
+
static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
|
299
|
+
{
|
|
300
|
+
uint64_t mask = search->matches_mask;
|
|
301
|
+
uint32_t index = trailing_zeros64(mask) >> 2;
|
|
302
|
+
|
|
303
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
|
304
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
|
305
|
+
// search->chunk_base + index >= search->ptr
|
|
306
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
|
307
|
+
// is one byte after the previous match then:
|
|
308
|
+
// search->chunk_base + index == search->ptr
|
|
309
|
+
search->ptr = search->chunk_base + index;
|
|
310
|
+
mask &= mask - 1;
|
|
311
|
+
search->matches_mask = mask;
|
|
312
|
+
search_flush(search);
|
|
313
|
+
return 1;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
static inline unsigned char search_escape_basic_neon(search_state *search)
|
|
317
|
+
{
|
|
318
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
|
319
|
+
// There are more matches if search->matches_mask > 0.
|
|
320
|
+
if (search->matches_mask > 0) {
|
|
321
|
+
return neon_next_match(search);
|
|
322
|
+
} else {
|
|
323
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
|
324
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
|
325
|
+
search->has_matches = false;
|
|
326
|
+
search->ptr = search->chunk_end;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/*
|
|
331
|
+
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
|
332
|
+
* need to be escaped.
|
|
333
|
+
*
|
|
334
|
+
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
|
335
|
+
*
|
|
336
|
+
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
|
337
|
+
* the vector insructions may work on larger vectors.
|
|
338
|
+
*
|
|
339
|
+
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
|
340
|
+
*
|
|
341
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
|
342
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
|
343
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
|
344
|
+
*
|
|
345
|
+
* Next we load the first chunk of the ptr:
|
|
346
|
+
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
|
347
|
+
*
|
|
348
|
+
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
|
349
|
+
* as no bytes are less than 32 (0x20):
|
|
350
|
+
* [0 0 0 0 0 0 0 0]
|
|
351
|
+
*
|
|
352
|
+
* Next, we check if any byte in chunk is equal to a backslash:
|
|
353
|
+
* [0 0 0 FF 0 0 0 0]
|
|
354
|
+
*
|
|
355
|
+
* Finally we check if any byte in chunk is equal to a double quote:
|
|
356
|
+
* [FF 0 0 0 0 0 0 0]
|
|
357
|
+
*
|
|
358
|
+
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
|
359
|
+
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
|
360
|
+
* This is the needs_escape vector and it is equal to:
|
|
361
|
+
* [FF 0 0 FF 0 0 0 0]
|
|
362
|
+
*
|
|
363
|
+
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
|
364
|
+
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
|
365
|
+
*
|
|
366
|
+
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
|
367
|
+
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
|
368
|
+
* have at least one byte that needs to be escaped.
|
|
369
|
+
*/
|
|
370
|
+
|
|
371
|
+
if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
|
|
372
|
+
search->has_matches = true;
|
|
373
|
+
search->chunk_base = search->ptr;
|
|
374
|
+
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
|
375
|
+
return neon_next_match(search);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// There are fewer than 16 bytes left.
|
|
379
|
+
unsigned long remaining = (search->end - search->ptr);
|
|
380
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
|
381
|
+
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
|
382
|
+
|
|
383
|
+
uint64_t mask = compute_chunk_mask_neon(s);
|
|
384
|
+
|
|
385
|
+
if (!mask) {
|
|
386
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
387
|
+
// search->cursor to search->ptr.
|
|
388
|
+
fbuffer_consumed(search->buffer, remaining);
|
|
389
|
+
search->ptr = search->end;
|
|
390
|
+
search->cursor = search->end;
|
|
391
|
+
return 0;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
search->matches_mask = mask;
|
|
395
|
+
search->has_matches = true;
|
|
396
|
+
search->chunk_end = search->end;
|
|
397
|
+
search->chunk_base = search->ptr;
|
|
398
|
+
return neon_next_match(search);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
if (search->ptr < search->end) {
|
|
402
|
+
return search_escape_basic(search);
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
search_flush(search);
|
|
406
|
+
return 0;
|
|
407
|
+
}
|
|
408
|
+
#endif /* HAVE_SIMD_NEON */
|
|
409
|
+
|
|
410
|
+
#ifdef HAVE_SIMD_SSE2
|
|
411
|
+
|
|
412
|
+
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
|
413
|
+
{
|
|
414
|
+
int mask = search->matches_mask;
|
|
415
|
+
int index = trailing_zeros(mask);
|
|
416
|
+
|
|
417
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
|
418
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
|
419
|
+
// search->chunk_base + index >= search->ptr
|
|
420
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
|
421
|
+
// is one byte after the previous match then:
|
|
422
|
+
// search->chunk_base + index == search->ptr
|
|
423
|
+
search->ptr = search->chunk_base + index;
|
|
424
|
+
mask &= mask - 1;
|
|
425
|
+
search->matches_mask = mask;
|
|
426
|
+
search_flush(search);
|
|
427
|
+
return 1;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
#if defined(__clang__) || defined(__GNUC__)
|
|
431
|
+
#define TARGET_SSE2 __attribute__((target("sse2")))
|
|
432
|
+
#else
|
|
433
|
+
#define TARGET_SSE2
|
|
434
|
+
#endif
|
|
435
|
+
|
|
436
|
+
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
|
437
|
+
{
|
|
438
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
|
439
|
+
// There are more matches if search->matches_mask > 0.
|
|
440
|
+
if (search->matches_mask > 0) {
|
|
441
|
+
return sse2_next_match(search);
|
|
442
|
+
} else {
|
|
443
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
|
444
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
|
445
|
+
search->has_matches = false;
|
|
446
|
+
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
|
447
|
+
search->ptr = search->end;
|
|
448
|
+
} else {
|
|
449
|
+
search->ptr = search->chunk_base + sizeof(__m128i);
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
|
|
455
|
+
search->has_matches = true;
|
|
456
|
+
search->chunk_base = search->ptr;
|
|
457
|
+
search->chunk_end = search->ptr + sizeof(__m128i);
|
|
458
|
+
return sse2_next_match(search);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// There are fewer than 16 bytes left.
|
|
462
|
+
unsigned long remaining = (search->end - search->ptr);
|
|
463
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
|
464
|
+
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
|
465
|
+
|
|
466
|
+
int needs_escape_mask = compute_chunk_mask_sse2(s);
|
|
467
|
+
|
|
468
|
+
if (needs_escape_mask == 0) {
|
|
469
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
|
470
|
+
// search->cursor to search->ptr.
|
|
471
|
+
fbuffer_consumed(search->buffer, remaining);
|
|
472
|
+
search->ptr = search->end;
|
|
473
|
+
search->cursor = search->end;
|
|
474
|
+
return 0;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
search->has_matches = true;
|
|
478
|
+
search->matches_mask = needs_escape_mask;
|
|
479
|
+
search->chunk_base = search->ptr;
|
|
480
|
+
return sse2_next_match(search);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
if (search->ptr < search->end) {
|
|
484
|
+
return search_escape_basic(search);
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
search_flush(search);
|
|
488
|
+
return 0;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
#endif /* HAVE_SIMD_SSE2 */
|
|
492
|
+
|
|
493
|
+
#endif /* HAVE_SIMD */
|
|
494
|
+
|
|
229
495
|
static const unsigned char script_safe_escape_table[256] = {
|
|
230
496
|
// ASCII Control Characters
|
|
231
497
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
|
@@ -329,7 +595,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
|
|
|
329
595
|
return 0;
|
|
330
596
|
}
|
|
331
597
|
|
|
332
|
-
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
598
|
+
static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
599
|
+
{
|
|
333
600
|
const unsigned char ch = (unsigned char)*search->ptr;
|
|
334
601
|
switch (ch_len) {
|
|
335
602
|
case 1: {
|
|
@@ -359,7 +626,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
|
|
|
359
626
|
|
|
360
627
|
uint32_t wchar = 0;
|
|
361
628
|
|
|
362
|
-
switch(ch_len) {
|
|
629
|
+
switch (ch_len) {
|
|
363
630
|
case 2:
|
|
364
631
|
wchar = ch & 0x1F;
|
|
365
632
|
break;
|
|
@@ -519,7 +786,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
|
|
|
519
786
|
* _state_ is a JSON::State object, that can also be used to configure the
|
|
520
787
|
* produced JSON string output further.
|
|
521
788
|
*/
|
|
522
|
-
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
|
789
|
+
static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
|
|
790
|
+
{
|
|
523
791
|
rb_check_arity(argc, 0, 1);
|
|
524
792
|
VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
|
|
525
793
|
return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
|
|
@@ -576,17 +844,6 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
|
|
|
576
844
|
return cState_partial_generate(Vstate, self, generate_json_float, Qfalse);
|
|
577
845
|
}
|
|
578
846
|
|
|
579
|
-
/*
|
|
580
|
-
* call-seq: String.included(modul)
|
|
581
|
-
*
|
|
582
|
-
* Extends _modul_ with the String::Extend module.
|
|
583
|
-
*/
|
|
584
|
-
static VALUE mString_included_s(VALUE self, VALUE modul) {
|
|
585
|
-
VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
|
|
586
|
-
rb_call_super(1, &modul);
|
|
587
|
-
return result;
|
|
588
|
-
}
|
|
589
|
-
|
|
590
847
|
/*
|
|
591
848
|
* call-seq: to_json(*)
|
|
592
849
|
*
|
|
@@ -601,51 +858,6 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self)
|
|
|
601
858
|
return cState_partial_generate(Vstate, self, generate_json_string, Qfalse);
|
|
602
859
|
}
|
|
603
860
|
|
|
604
|
-
/*
|
|
605
|
-
* call-seq: to_json_raw_object()
|
|
606
|
-
*
|
|
607
|
-
* This method creates a raw object hash, that can be nested into
|
|
608
|
-
* other data structures and will be generated as a raw string. This
|
|
609
|
-
* method should be used, if you want to convert raw strings to JSON
|
|
610
|
-
* instead of UTF-8 strings, e. g. binary data.
|
|
611
|
-
*/
|
|
612
|
-
static VALUE mString_to_json_raw_object(VALUE self)
|
|
613
|
-
{
|
|
614
|
-
VALUE ary;
|
|
615
|
-
VALUE result = rb_hash_new();
|
|
616
|
-
rb_hash_aset(result, rb_funcall(mJSON, i_create_id, 0), rb_class_name(rb_obj_class(self)));
|
|
617
|
-
ary = rb_funcall(self, i_unpack, 1, rb_str_new2("C*"));
|
|
618
|
-
rb_hash_aset(result, rb_utf8_str_new_lit("raw"), ary);
|
|
619
|
-
return result;
|
|
620
|
-
}
|
|
621
|
-
|
|
622
|
-
/*
|
|
623
|
-
* call-seq: to_json_raw(*args)
|
|
624
|
-
*
|
|
625
|
-
* This method creates a JSON text from the result of a call to
|
|
626
|
-
* to_json_raw_object of this String.
|
|
627
|
-
*/
|
|
628
|
-
static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self)
|
|
629
|
-
{
|
|
630
|
-
VALUE obj = mString_to_json_raw_object(self);
|
|
631
|
-
Check_Type(obj, T_HASH);
|
|
632
|
-
return mHash_to_json(argc, argv, obj);
|
|
633
|
-
}
|
|
634
|
-
|
|
635
|
-
/*
|
|
636
|
-
* call-seq: json_create(o)
|
|
637
|
-
*
|
|
638
|
-
* Raw Strings are JSON Objects (the raw bytes are stored in an array for the
|
|
639
|
-
* key "raw"). The Ruby String can be created by this module method.
|
|
640
|
-
*/
|
|
641
|
-
static VALUE mString_Extend_json_create(VALUE self, VALUE o)
|
|
642
|
-
{
|
|
643
|
-
VALUE ary;
|
|
644
|
-
Check_Type(o, T_HASH);
|
|
645
|
-
ary = rb_hash_aref(o, rb_str_new2("raw"));
|
|
646
|
-
return rb_funcall(ary, i_pack, 1, rb_str_new2("C*"));
|
|
647
|
-
}
|
|
648
|
-
|
|
649
861
|
/*
|
|
650
862
|
* call-seq: to_json(*)
|
|
651
863
|
*
|
|
@@ -784,10 +996,51 @@ static inline VALUE vstate_get(struct generate_json_data *data)
|
|
|
784
996
|
}
|
|
785
997
|
|
|
786
998
|
struct hash_foreach_arg {
|
|
999
|
+
VALUE hash;
|
|
787
1000
|
struct generate_json_data *data;
|
|
788
|
-
int
|
|
1001
|
+
int first_key_type;
|
|
1002
|
+
bool first;
|
|
1003
|
+
bool mixed_keys_encountered;
|
|
789
1004
|
};
|
|
790
1005
|
|
|
1006
|
+
static VALUE
|
|
1007
|
+
convert_string_subclass(VALUE key)
|
|
1008
|
+
{
|
|
1009
|
+
VALUE key_to_s = rb_funcall(key, i_to_s, 0);
|
|
1010
|
+
|
|
1011
|
+
if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
|
|
1012
|
+
VALUE cname = rb_obj_class(key);
|
|
1013
|
+
rb_raise(rb_eTypeError,
|
|
1014
|
+
"can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
|
|
1015
|
+
cname, "String", cname, "to_s", rb_obj_class(key_to_s));
|
|
1016
|
+
}
|
|
1017
|
+
|
|
1018
|
+
return key_to_s;
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
NOINLINE()
|
|
1022
|
+
static void
|
|
1023
|
+
json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
|
|
1024
|
+
{
|
|
1025
|
+
if (arg->mixed_keys_encountered) {
|
|
1026
|
+
return;
|
|
1027
|
+
}
|
|
1028
|
+
arg->mixed_keys_encountered = true;
|
|
1029
|
+
|
|
1030
|
+
JSON_Generator_State *state = arg->data->state;
|
|
1031
|
+
if (state->on_duplicate_key != JSON_IGNORE) {
|
|
1032
|
+
VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
|
|
1033
|
+
rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
static VALUE
|
|
1038
|
+
json_call_as_json(JSON_Generator_State *state, VALUE object, VALUE is_key)
|
|
1039
|
+
{
|
|
1040
|
+
VALUE proc_args[2] = {object, is_key};
|
|
1041
|
+
return rb_proc_call_with_block(state->as_json, 2, proc_args, Qnil);
|
|
1042
|
+
}
|
|
1043
|
+
|
|
791
1044
|
static int
|
|
792
1045
|
json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
793
1046
|
{
|
|
@@ -798,51 +1051,77 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
798
1051
|
JSON_Generator_State *state = data->state;
|
|
799
1052
|
|
|
800
1053
|
long depth = state->depth;
|
|
801
|
-
int
|
|
1054
|
+
int key_type = rb_type(key);
|
|
802
1055
|
|
|
803
|
-
if (arg->
|
|
804
|
-
|
|
805
|
-
|
|
1056
|
+
if (arg->first) {
|
|
1057
|
+
arg->first = false;
|
|
1058
|
+
arg->first_key_type = key_type;
|
|
806
1059
|
}
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
1060
|
+
else {
|
|
1061
|
+
fbuffer_append_char(buffer, ',');
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
if (RB_UNLIKELY(data->state->object_nl)) {
|
|
1065
|
+
fbuffer_append_str(buffer, data->state->object_nl);
|
|
1066
|
+
}
|
|
1067
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
|
1068
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
811
1069
|
}
|
|
812
1070
|
|
|
813
1071
|
VALUE key_to_s;
|
|
814
|
-
|
|
1072
|
+
bool as_json_called = false;
|
|
1073
|
+
|
|
1074
|
+
start:
|
|
1075
|
+
switch (key_type) {
|
|
815
1076
|
case T_STRING:
|
|
1077
|
+
if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
|
|
1078
|
+
json_inspect_hash_with_mixed_keys(arg);
|
|
1079
|
+
}
|
|
1080
|
+
|
|
816
1081
|
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
|
817
1082
|
key_to_s = key;
|
|
818
1083
|
} else {
|
|
819
|
-
key_to_s =
|
|
1084
|
+
key_to_s = convert_string_subclass(key);
|
|
820
1085
|
}
|
|
821
1086
|
break;
|
|
822
1087
|
case T_SYMBOL:
|
|
1088
|
+
if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
|
|
1089
|
+
json_inspect_hash_with_mixed_keys(arg);
|
|
1090
|
+
}
|
|
1091
|
+
|
|
823
1092
|
key_to_s = rb_sym2str(key);
|
|
824
1093
|
break;
|
|
825
1094
|
default:
|
|
1095
|
+
if (data->state->strict) {
|
|
1096
|
+
if (RTEST(data->state->as_json) && !as_json_called) {
|
|
1097
|
+
key = json_call_as_json(data->state, key, Qtrue);
|
|
1098
|
+
key_type = rb_type(key);
|
|
1099
|
+
as_json_called = true;
|
|
1100
|
+
goto start;
|
|
1101
|
+
} else {
|
|
1102
|
+
raise_generator_error(key, "%"PRIsVALUE" not allowed as object key in JSON", CLASS_OF(key));
|
|
1103
|
+
}
|
|
1104
|
+
}
|
|
826
1105
|
key_to_s = rb_convert_type(key, T_STRING, "String", "to_s");
|
|
827
1106
|
break;
|
|
828
1107
|
}
|
|
829
1108
|
|
|
830
1109
|
if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
|
|
831
|
-
generate_json_string(buffer, data,
|
|
1110
|
+
generate_json_string(buffer, data, key_to_s);
|
|
832
1111
|
} else {
|
|
833
|
-
generate_json(buffer, data,
|
|
1112
|
+
generate_json(buffer, data, key_to_s);
|
|
834
1113
|
}
|
|
835
|
-
if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before);
|
|
1114
|
+
if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before);
|
|
836
1115
|
fbuffer_append_char(buffer, ':');
|
|
837
|
-
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space);
|
|
838
|
-
generate_json(buffer, data,
|
|
1116
|
+
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
|
|
1117
|
+
generate_json(buffer, data, val);
|
|
839
1118
|
|
|
840
|
-
arg->iter++;
|
|
841
1119
|
return ST_CONTINUE;
|
|
842
1120
|
}
|
|
843
1121
|
|
|
844
|
-
static inline long increase_depth(
|
|
1122
|
+
static inline long increase_depth(struct generate_json_data *data)
|
|
845
1123
|
{
|
|
1124
|
+
JSON_Generator_State *state = data->state;
|
|
846
1125
|
long depth = ++state->depth;
|
|
847
1126
|
if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
|
|
848
1127
|
rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
|
|
@@ -850,69 +1129,62 @@ static inline long increase_depth(JSON_Generator_State *state)
|
|
|
850
1129
|
return depth;
|
|
851
1130
|
}
|
|
852
1131
|
|
|
853
|
-
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data,
|
|
1132
|
+
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
854
1133
|
{
|
|
855
|
-
|
|
856
|
-
long depth = increase_depth(state);
|
|
1134
|
+
long depth = increase_depth(data);
|
|
857
1135
|
|
|
858
1136
|
if (RHASH_SIZE(obj) == 0) {
|
|
859
1137
|
fbuffer_append(buffer, "{}", 2);
|
|
860
|
-
--state->depth;
|
|
1138
|
+
--data->state->depth;
|
|
861
1139
|
return;
|
|
862
1140
|
}
|
|
863
1141
|
|
|
864
1142
|
fbuffer_append_char(buffer, '{');
|
|
865
1143
|
|
|
866
1144
|
struct hash_foreach_arg arg = {
|
|
1145
|
+
.hash = obj,
|
|
867
1146
|
.data = data,
|
|
868
|
-
.
|
|
1147
|
+
.first = true,
|
|
869
1148
|
};
|
|
870
1149
|
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
|
|
871
1150
|
|
|
872
|
-
depth = --state->depth;
|
|
873
|
-
if (RB_UNLIKELY(state->object_nl)) {
|
|
874
|
-
fbuffer_append_str(buffer, state->object_nl);
|
|
875
|
-
if (RB_UNLIKELY(state->indent)) {
|
|
876
|
-
|
|
877
|
-
fbuffer_append_str(buffer, state->indent);
|
|
878
|
-
}
|
|
1151
|
+
depth = --data->state->depth;
|
|
1152
|
+
if (RB_UNLIKELY(data->state->object_nl)) {
|
|
1153
|
+
fbuffer_append_str(buffer, data->state->object_nl);
|
|
1154
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
|
1155
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
879
1156
|
}
|
|
880
1157
|
}
|
|
881
1158
|
fbuffer_append_char(buffer, '}');
|
|
882
1159
|
}
|
|
883
1160
|
|
|
884
|
-
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data,
|
|
1161
|
+
static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
885
1162
|
{
|
|
886
|
-
|
|
887
|
-
long depth = increase_depth(state);
|
|
1163
|
+
long depth = increase_depth(data);
|
|
888
1164
|
|
|
889
1165
|
if (RARRAY_LEN(obj) == 0) {
|
|
890
1166
|
fbuffer_append(buffer, "[]", 2);
|
|
891
|
-
--state->depth;
|
|
1167
|
+
--data->state->depth;
|
|
892
1168
|
return;
|
|
893
1169
|
}
|
|
894
1170
|
|
|
895
1171
|
fbuffer_append_char(buffer, '[');
|
|
896
|
-
if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl);
|
|
897
|
-
for(i = 0; i < RARRAY_LEN(obj); i++) {
|
|
1172
|
+
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
|
1173
|
+
for (int i = 0; i < RARRAY_LEN(obj); i++) {
|
|
898
1174
|
if (i > 0) {
|
|
899
1175
|
fbuffer_append_char(buffer, ',');
|
|
900
|
-
if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl);
|
|
1176
|
+
if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
|
|
901
1177
|
}
|
|
902
|
-
if (RB_UNLIKELY(state->indent)) {
|
|
903
|
-
|
|
904
|
-
fbuffer_append_str(buffer, state->indent);
|
|
905
|
-
}
|
|
1178
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
|
1179
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
906
1180
|
}
|
|
907
|
-
generate_json(buffer, data,
|
|
1181
|
+
generate_json(buffer, data, RARRAY_AREF(obj, i));
|
|
908
1182
|
}
|
|
909
|
-
state->depth = --depth;
|
|
910
|
-
if (RB_UNLIKELY(state->array_nl)) {
|
|
911
|
-
fbuffer_append_str(buffer, state->array_nl);
|
|
912
|
-
if (RB_UNLIKELY(state->indent)) {
|
|
913
|
-
|
|
914
|
-
fbuffer_append_str(buffer, state->indent);
|
|
915
|
-
}
|
|
1183
|
+
data->state->depth = --depth;
|
|
1184
|
+
if (RB_UNLIKELY(data->state->array_nl)) {
|
|
1185
|
+
fbuffer_append_str(buffer, data->state->array_nl);
|
|
1186
|
+
if (RB_UNLIKELY(data->state->indent)) {
|
|
1187
|
+
fbuffer_append_str_repeat(buffer, data->state->indent, depth);
|
|
916
1188
|
}
|
|
917
1189
|
}
|
|
918
1190
|
fbuffer_append_char(buffer, ']');
|
|
@@ -960,7 +1232,7 @@ static inline VALUE ensure_valid_encoding(VALUE str)
|
|
|
960
1232
|
return str;
|
|
961
1233
|
}
|
|
962
1234
|
|
|
963
|
-
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data,
|
|
1235
|
+
static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
964
1236
|
{
|
|
965
1237
|
obj = ensure_valid_encoding(obj);
|
|
966
1238
|
|
|
@@ -973,12 +1245,18 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
|
|
|
973
1245
|
search.cursor = search.ptr;
|
|
974
1246
|
search.end = search.ptr + len;
|
|
975
1247
|
|
|
976
|
-
|
|
1248
|
+
#ifdef HAVE_SIMD
|
|
1249
|
+
search.matches_mask = 0;
|
|
1250
|
+
search.has_matches = false;
|
|
1251
|
+
search.chunk_base = NULL;
|
|
1252
|
+
#endif /* HAVE_SIMD */
|
|
1253
|
+
|
|
1254
|
+
switch (rb_enc_str_coderange(obj)) {
|
|
977
1255
|
case ENC_CODERANGE_7BIT:
|
|
978
1256
|
case ENC_CODERANGE_VALID:
|
|
979
|
-
if (RB_UNLIKELY(state->ascii_only)) {
|
|
980
|
-
convert_UTF8_to_ASCII_only_JSON(&search, state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
|
981
|
-
} else if (RB_UNLIKELY(state->script_safe)) {
|
|
1257
|
+
if (RB_UNLIKELY(data->state->ascii_only)) {
|
|
1258
|
+
convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
|
|
1259
|
+
} else if (RB_UNLIKELY(data->state->script_safe)) {
|
|
982
1260
|
convert_UTF8_to_script_safe_JSON(&search);
|
|
983
1261
|
} else {
|
|
984
1262
|
convert_UTF8_to_JSON(&search);
|
|
@@ -991,7 +1269,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
|
|
|
991
1269
|
fbuffer_append_char(buffer, '"');
|
|
992
1270
|
}
|
|
993
1271
|
|
|
994
|
-
static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data,
|
|
1272
|
+
static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
995
1273
|
{
|
|
996
1274
|
VALUE tmp;
|
|
997
1275
|
if (rb_respond_to(obj, i_to_json)) {
|
|
@@ -1001,100 +1279,115 @@ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *d
|
|
|
1001
1279
|
} else {
|
|
1002
1280
|
tmp = rb_funcall(obj, i_to_s, 0);
|
|
1003
1281
|
Check_Type(tmp, T_STRING);
|
|
1004
|
-
generate_json_string(buffer, data,
|
|
1282
|
+
generate_json_string(buffer, data, tmp);
|
|
1005
1283
|
}
|
|
1006
1284
|
}
|
|
1007
1285
|
|
|
1008
|
-
static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data,
|
|
1286
|
+
static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1009
1287
|
{
|
|
1010
|
-
if (state->strict) {
|
|
1011
|
-
generate_json_string(buffer, data,
|
|
1288
|
+
if (data->state->strict) {
|
|
1289
|
+
generate_json_string(buffer, data, rb_sym2str(obj));
|
|
1012
1290
|
} else {
|
|
1013
|
-
generate_json_fallback(buffer, data,
|
|
1291
|
+
generate_json_fallback(buffer, data, obj);
|
|
1014
1292
|
}
|
|
1015
1293
|
}
|
|
1016
1294
|
|
|
1017
|
-
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data,
|
|
1295
|
+
static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1018
1296
|
{
|
|
1019
1297
|
fbuffer_append(buffer, "null", 4);
|
|
1020
1298
|
}
|
|
1021
1299
|
|
|
1022
|
-
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data,
|
|
1300
|
+
static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1023
1301
|
{
|
|
1024
1302
|
fbuffer_append(buffer, "false", 5);
|
|
1025
1303
|
}
|
|
1026
1304
|
|
|
1027
|
-
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data,
|
|
1305
|
+
static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1028
1306
|
{
|
|
1029
1307
|
fbuffer_append(buffer, "true", 4);
|
|
1030
1308
|
}
|
|
1031
1309
|
|
|
1032
|
-
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data,
|
|
1310
|
+
static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1033
1311
|
{
|
|
1034
1312
|
fbuffer_append_long(buffer, FIX2LONG(obj));
|
|
1035
1313
|
}
|
|
1036
1314
|
|
|
1037
|
-
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data,
|
|
1315
|
+
static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1038
1316
|
{
|
|
1039
1317
|
VALUE tmp = rb_funcall(obj, i_to_s, 0);
|
|
1040
1318
|
fbuffer_append_str(buffer, tmp);
|
|
1041
1319
|
}
|
|
1042
1320
|
|
|
1043
1321
|
#ifdef RUBY_INTEGER_UNIFICATION
|
|
1044
|
-
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data,
|
|
1322
|
+
static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1045
1323
|
{
|
|
1046
1324
|
if (FIXNUM_P(obj))
|
|
1047
|
-
generate_json_fixnum(buffer, data,
|
|
1325
|
+
generate_json_fixnum(buffer, data, obj);
|
|
1048
1326
|
else
|
|
1049
|
-
generate_json_bignum(buffer, data,
|
|
1327
|
+
generate_json_bignum(buffer, data, obj);
|
|
1050
1328
|
}
|
|
1051
1329
|
#endif
|
|
1052
1330
|
|
|
1053
|
-
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data,
|
|
1331
|
+
static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1054
1332
|
{
|
|
1055
1333
|
double value = RFLOAT_VALUE(obj);
|
|
1056
|
-
char allow_nan = state->allow_nan;
|
|
1057
|
-
if (
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1334
|
+
char allow_nan = data->state->allow_nan;
|
|
1335
|
+
if (isinf(value) || isnan(value)) {
|
|
1336
|
+
/* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
|
|
1337
|
+
if (!allow_nan) {
|
|
1338
|
+
if (data->state->strict && data->state->as_json) {
|
|
1339
|
+
VALUE casted_obj = json_call_as_json(data->state, obj, Qfalse);
|
|
1061
1340
|
if (casted_obj != obj) {
|
|
1062
|
-
increase_depth(
|
|
1063
|
-
generate_json(buffer, data,
|
|
1064
|
-
state->depth--;
|
|
1341
|
+
increase_depth(data);
|
|
1342
|
+
generate_json(buffer, data, casted_obj);
|
|
1343
|
+
data->state->depth--;
|
|
1065
1344
|
return;
|
|
1066
1345
|
}
|
|
1067
1346
|
}
|
|
1068
1347
|
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
|
|
1069
1348
|
}
|
|
1349
|
+
|
|
1350
|
+
VALUE tmp = rb_funcall(obj, i_to_s, 0);
|
|
1351
|
+
fbuffer_append_str(buffer, tmp);
|
|
1352
|
+
return;
|
|
1070
1353
|
}
|
|
1071
|
-
|
|
1354
|
+
|
|
1355
|
+
/* This implementation writes directly into the buffer. We reserve
|
|
1356
|
+
* the 32 characters that fpconv_dtoa states as its maximum.
|
|
1357
|
+
*/
|
|
1358
|
+
fbuffer_inc_capa(buffer, 32);
|
|
1359
|
+
char* d = buffer->ptr + buffer->len;
|
|
1360
|
+
int len = fpconv_dtoa(value, d);
|
|
1361
|
+
/* fpconv_dtoa converts a float to its shortest string representation,
|
|
1362
|
+
* but it adds a ".0" if this is a plain integer.
|
|
1363
|
+
*/
|
|
1364
|
+
fbuffer_consumed(buffer, len);
|
|
1072
1365
|
}
|
|
1073
1366
|
|
|
1074
|
-
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data,
|
|
1367
|
+
static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1075
1368
|
{
|
|
1076
1369
|
VALUE fragment = RSTRUCT_GET(obj, 0);
|
|
1077
1370
|
Check_Type(fragment, T_STRING);
|
|
1078
1371
|
fbuffer_append_str(buffer, fragment);
|
|
1079
1372
|
}
|
|
1080
1373
|
|
|
1081
|
-
static void generate_json(FBuffer *buffer, struct generate_json_data *data,
|
|
1374
|
+
static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
|
|
1082
1375
|
{
|
|
1083
1376
|
bool as_json_called = false;
|
|
1084
1377
|
start:
|
|
1085
1378
|
if (obj == Qnil) {
|
|
1086
|
-
generate_json_null(buffer, data,
|
|
1379
|
+
generate_json_null(buffer, data, obj);
|
|
1087
1380
|
} else if (obj == Qfalse) {
|
|
1088
|
-
generate_json_false(buffer, data,
|
|
1381
|
+
generate_json_false(buffer, data, obj);
|
|
1089
1382
|
} else if (obj == Qtrue) {
|
|
1090
|
-
generate_json_true(buffer, data,
|
|
1383
|
+
generate_json_true(buffer, data, obj);
|
|
1091
1384
|
} else if (RB_SPECIAL_CONST_P(obj)) {
|
|
1092
1385
|
if (RB_FIXNUM_P(obj)) {
|
|
1093
|
-
generate_json_fixnum(buffer, data,
|
|
1386
|
+
generate_json_fixnum(buffer, data, obj);
|
|
1094
1387
|
} else if (RB_FLONUM_P(obj)) {
|
|
1095
|
-
generate_json_float(buffer, data,
|
|
1388
|
+
generate_json_float(buffer, data, obj);
|
|
1096
1389
|
} else if (RB_STATIC_SYM_P(obj)) {
|
|
1097
|
-
generate_json_symbol(buffer, data,
|
|
1390
|
+
generate_json_symbol(buffer, data, obj);
|
|
1098
1391
|
} else {
|
|
1099
1392
|
goto general;
|
|
1100
1393
|
}
|
|
@@ -1102,43 +1395,43 @@ start:
|
|
|
1102
1395
|
VALUE klass = RBASIC_CLASS(obj);
|
|
1103
1396
|
switch (RB_BUILTIN_TYPE(obj)) {
|
|
1104
1397
|
case T_BIGNUM:
|
|
1105
|
-
generate_json_bignum(buffer, data,
|
|
1398
|
+
generate_json_bignum(buffer, data, obj);
|
|
1106
1399
|
break;
|
|
1107
1400
|
case T_HASH:
|
|
1108
1401
|
if (klass != rb_cHash) goto general;
|
|
1109
|
-
generate_json_object(buffer, data,
|
|
1402
|
+
generate_json_object(buffer, data, obj);
|
|
1110
1403
|
break;
|
|
1111
1404
|
case T_ARRAY:
|
|
1112
1405
|
if (klass != rb_cArray) goto general;
|
|
1113
|
-
generate_json_array(buffer, data,
|
|
1406
|
+
generate_json_array(buffer, data, obj);
|
|
1114
1407
|
break;
|
|
1115
1408
|
case T_STRING:
|
|
1116
1409
|
if (klass != rb_cString) goto general;
|
|
1117
|
-
generate_json_string(buffer, data,
|
|
1410
|
+
generate_json_string(buffer, data, obj);
|
|
1118
1411
|
break;
|
|
1119
1412
|
case T_SYMBOL:
|
|
1120
|
-
generate_json_symbol(buffer, data,
|
|
1413
|
+
generate_json_symbol(buffer, data, obj);
|
|
1121
1414
|
break;
|
|
1122
1415
|
case T_FLOAT:
|
|
1123
1416
|
if (klass != rb_cFloat) goto general;
|
|
1124
|
-
generate_json_float(buffer, data,
|
|
1417
|
+
generate_json_float(buffer, data, obj);
|
|
1125
1418
|
break;
|
|
1126
1419
|
case T_STRUCT:
|
|
1127
1420
|
if (klass != cFragment) goto general;
|
|
1128
|
-
generate_json_fragment(buffer, data,
|
|
1421
|
+
generate_json_fragment(buffer, data, obj);
|
|
1129
1422
|
break;
|
|
1130
1423
|
default:
|
|
1131
1424
|
general:
|
|
1132
|
-
if (state->strict) {
|
|
1133
|
-
if (RTEST(state->as_json) && !as_json_called) {
|
|
1134
|
-
obj =
|
|
1425
|
+
if (data->state->strict) {
|
|
1426
|
+
if (RTEST(data->state->as_json) && !as_json_called) {
|
|
1427
|
+
obj = json_call_as_json(data->state, obj, Qfalse);
|
|
1135
1428
|
as_json_called = true;
|
|
1136
1429
|
goto start;
|
|
1137
1430
|
} else {
|
|
1138
1431
|
raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
|
|
1139
1432
|
}
|
|
1140
1433
|
} else {
|
|
1141
|
-
generate_json_fallback(buffer, data,
|
|
1434
|
+
generate_json_fallback(buffer, data, obj);
|
|
1142
1435
|
}
|
|
1143
1436
|
}
|
|
1144
1437
|
}
|
|
@@ -1148,7 +1441,7 @@ static VALUE generate_json_try(VALUE d)
|
|
|
1148
1441
|
{
|
|
1149
1442
|
struct generate_json_data *data = (struct generate_json_data *)d;
|
|
1150
1443
|
|
|
1151
|
-
data->func(data->buffer, data, data->
|
|
1444
|
+
data->func(data->buffer, data, data->obj);
|
|
1152
1445
|
|
|
1153
1446
|
return Qnil;
|
|
1154
1447
|
}
|
|
@@ -1552,6 +1845,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
|
|
|
1552
1845
|
return Qnil;
|
|
1553
1846
|
}
|
|
1554
1847
|
|
|
1848
|
+
static VALUE cState_allow_duplicate_key_p(VALUE self)
|
|
1849
|
+
{
|
|
1850
|
+
GET_STATE(self);
|
|
1851
|
+
switch (state->on_duplicate_key) {
|
|
1852
|
+
case JSON_IGNORE:
|
|
1853
|
+
return Qtrue;
|
|
1854
|
+
case JSON_DEPRECATED:
|
|
1855
|
+
return Qnil;
|
|
1856
|
+
default:
|
|
1857
|
+
return Qfalse;
|
|
1858
|
+
}
|
|
1859
|
+
}
|
|
1860
|
+
|
|
1555
1861
|
/*
|
|
1556
1862
|
* call-seq: depth
|
|
1557
1863
|
*
|
|
@@ -1609,15 +1915,30 @@ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_l
|
|
|
1609
1915
|
return Qnil;
|
|
1610
1916
|
}
|
|
1611
1917
|
|
|
1918
|
+
struct configure_state_data {
|
|
1919
|
+
JSON_Generator_State *state;
|
|
1920
|
+
VALUE vstate; // Ruby object that owns the state, or Qfalse if stack-allocated
|
|
1921
|
+
};
|
|
1922
|
+
|
|
1923
|
+
static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
|
|
1924
|
+
{
|
|
1925
|
+
if (RTEST(data->vstate)) {
|
|
1926
|
+
RB_OBJ_WRITE(data->vstate, field, value);
|
|
1927
|
+
} else {
|
|
1928
|
+
*field = value;
|
|
1929
|
+
}
|
|
1930
|
+
}
|
|
1931
|
+
|
|
1612
1932
|
static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
|
1613
1933
|
{
|
|
1614
|
-
|
|
1934
|
+
struct configure_state_data *data = (struct configure_state_data *)_arg;
|
|
1935
|
+
JSON_Generator_State *state = data->state;
|
|
1615
1936
|
|
|
1616
|
-
if (key == sym_indent) { state->indent
|
|
1617
|
-
else if (key == sym_space) { state->space
|
|
1618
|
-
else if (key == sym_space_before) { state->space_before
|
|
1619
|
-
else if (key == sym_object_nl) { state->object_nl
|
|
1620
|
-
else if (key == sym_array_nl) { state->array_nl
|
|
1937
|
+
if (key == sym_indent) { state_write_value(data, &state->indent, string_config(val)); }
|
|
1938
|
+
else if (key == sym_space) { state_write_value(data, &state->space, string_config(val)); }
|
|
1939
|
+
else if (key == sym_space_before) { state_write_value(data, &state->space_before, string_config(val)); }
|
|
1940
|
+
else if (key == sym_object_nl) { state_write_value(data, &state->object_nl, string_config(val)); }
|
|
1941
|
+
else if (key == sym_array_nl) { state_write_value(data, &state->array_nl, string_config(val)); }
|
|
1621
1942
|
else if (key == sym_max_nesting) { state->max_nesting = long_config(val); }
|
|
1622
1943
|
else if (key == sym_allow_nan) { state->allow_nan = RTEST(val); }
|
|
1623
1944
|
else if (key == sym_ascii_only) { state->ascii_only = RTEST(val); }
|
|
@@ -1626,11 +1947,16 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
|
|
|
1626
1947
|
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
|
|
1627
1948
|
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
|
|
1628
1949
|
else if (key == sym_strict) { state->strict = RTEST(val); }
|
|
1629
|
-
else if (key ==
|
|
1950
|
+
else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
|
|
1951
|
+
else if (key == sym_as_json) {
|
|
1952
|
+
VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
|
|
1953
|
+
state->as_json_single_arg = proc && rb_proc_arity(proc) == 1;
|
|
1954
|
+
state_write_value(data, &state->as_json, proc);
|
|
1955
|
+
}
|
|
1630
1956
|
return ST_CONTINUE;
|
|
1631
1957
|
}
|
|
1632
1958
|
|
|
1633
|
-
static void configure_state(JSON_Generator_State *state, VALUE config)
|
|
1959
|
+
static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
|
|
1634
1960
|
{
|
|
1635
1961
|
if (!RTEST(config)) return;
|
|
1636
1962
|
|
|
@@ -1638,15 +1964,20 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
|
|
|
1638
1964
|
|
|
1639
1965
|
if (!RHASH_SIZE(config)) return;
|
|
1640
1966
|
|
|
1967
|
+
struct configure_state_data data = {
|
|
1968
|
+
.state = state,
|
|
1969
|
+
.vstate = vstate
|
|
1970
|
+
};
|
|
1971
|
+
|
|
1641
1972
|
// We assume in most cases few keys are set so it's faster to go over
|
|
1642
1973
|
// the provided keys than to check all possible keys.
|
|
1643
|
-
rb_hash_foreach(config, configure_state_i, (VALUE)
|
|
1974
|
+
rb_hash_foreach(config, configure_state_i, (VALUE)&data);
|
|
1644
1975
|
}
|
|
1645
1976
|
|
|
1646
1977
|
static VALUE cState_configure(VALUE self, VALUE opts)
|
|
1647
1978
|
{
|
|
1648
1979
|
GET_STATE(self);
|
|
1649
|
-
configure_state(state, opts);
|
|
1980
|
+
configure_state(state, self, opts);
|
|
1650
1981
|
return self;
|
|
1651
1982
|
}
|
|
1652
1983
|
|
|
@@ -1654,7 +1985,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
|
|
|
1654
1985
|
{
|
|
1655
1986
|
JSON_Generator_State state = {0};
|
|
1656
1987
|
state_init(&state);
|
|
1657
|
-
configure_state(&state, opts);
|
|
1988
|
+
configure_state(&state, Qfalse, opts);
|
|
1658
1989
|
|
|
1659
1990
|
char stack_buffer[FBUFFER_STACK_SIZE];
|
|
1660
1991
|
FBuffer buffer = {
|
|
@@ -1743,6 +2074,8 @@ void Init_generator(void)
|
|
|
1743
2074
|
rb_define_method(cState, "generate", cState_generate, -1);
|
|
1744
2075
|
rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
|
|
1745
2076
|
|
|
2077
|
+
rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
|
|
2078
|
+
|
|
1746
2079
|
rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
|
|
1747
2080
|
|
|
1748
2081
|
VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
|
|
@@ -1770,13 +2103,7 @@ void Init_generator(void)
|
|
|
1770
2103
|
rb_define_method(mFloat, "to_json", mFloat_to_json, -1);
|
|
1771
2104
|
|
|
1772
2105
|
VALUE mString = rb_define_module_under(mGeneratorMethods, "String");
|
|
1773
|
-
rb_define_singleton_method(mString, "included", mString_included_s, 1);
|
|
1774
2106
|
rb_define_method(mString, "to_json", mString_to_json, -1);
|
|
1775
|
-
rb_define_method(mString, "to_json_raw", mString_to_json_raw, -1);
|
|
1776
|
-
rb_define_method(mString, "to_json_raw_object", mString_to_json_raw_object, 0);
|
|
1777
|
-
|
|
1778
|
-
mString_Extend = rb_define_module_under(mString, "Extend");
|
|
1779
|
-
rb_define_method(mString_Extend, "json_create", mString_Extend_json_create, 1);
|
|
1780
2107
|
|
|
1781
2108
|
VALUE mTrueClass = rb_define_module_under(mGeneratorMethods, "TrueClass");
|
|
1782
2109
|
rb_define_method(mTrueClass, "to_json", mTrueClass_to_json, -1);
|
|
@@ -1813,10 +2140,30 @@ void Init_generator(void)
|
|
|
1813
2140
|
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
|
|
1814
2141
|
sym_strict = ID2SYM(rb_intern("strict"));
|
|
1815
2142
|
sym_as_json = ID2SYM(rb_intern("as_json"));
|
|
2143
|
+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
|
|
1816
2144
|
|
|
1817
2145
|
usascii_encindex = rb_usascii_encindex();
|
|
1818
2146
|
utf8_encindex = rb_utf8_encindex();
|
|
1819
2147
|
binary_encindex = rb_ascii8bit_encindex();
|
|
1820
2148
|
|
|
1821
2149
|
rb_require("json/ext/generator/state");
|
|
2150
|
+
|
|
2151
|
+
|
|
2152
|
+
switch (find_simd_implementation()) {
|
|
2153
|
+
#ifdef HAVE_SIMD
|
|
2154
|
+
#ifdef HAVE_SIMD_NEON
|
|
2155
|
+
case SIMD_NEON:
|
|
2156
|
+
search_escape_basic_impl = search_escape_basic_neon;
|
|
2157
|
+
break;
|
|
2158
|
+
#endif /* HAVE_SIMD_NEON */
|
|
2159
|
+
#ifdef HAVE_SIMD_SSE2
|
|
2160
|
+
case SIMD_SSE2:
|
|
2161
|
+
search_escape_basic_impl = search_escape_basic_sse2;
|
|
2162
|
+
break;
|
|
2163
|
+
#endif /* HAVE_SIMD_SSE2 */
|
|
2164
|
+
#endif /* HAVE_SIMD */
|
|
2165
|
+
default:
|
|
2166
|
+
search_escape_basic_impl = search_escape_basic;
|
|
2167
|
+
break;
|
|
2168
|
+
}
|
|
1822
2169
|
}
|