json 2.11.3 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +9 -0
- data/ext/json/ext/generator/extconf.rb +29 -0
- data/ext/json/ext/generator/generator.c +356 -8
- data/ext/json/ext/generator/simd.h +112 -0
- data/ext/json/ext/parser/parser.c +131 -92
- data/ext/json/ext/vendor/fpconv.c +5 -5
- data/lib/json/common.rb +3 -1
- data/lib/json/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e71f977a9d4c1316007814d62236fd185f5aaade7a79f3e5d48a9ffde32f520
|
4
|
+
data.tar.gz: f1be8ac3136a6dcf48aa15c7ec08fa4dfcedb6f89b1b6ad8944727708a16e074
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23f2d490dfb7ea60b189f8227787fde0c53844f62c8e9023ba1d413a72b46b7a3b77836d1a6050dd0a2fa925370bd260da0a52d738bd1231c81ad1ef4a17adda
|
7
|
+
data.tar.gz: 22326ad3f75f99e20c7f1ad3cc0f519ffc56b7c85c94aa124a2ea47c8d0c86f604307fe504f216b347651d3c82df83623798dbdbabc45be78a1e4721cc7b8cbe
|
data/CHANGES.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Changes
|
2
2
|
|
3
|
+
### Unreleased
|
4
|
+
|
5
|
+
### 2025-05-12 (2.12.0)
|
6
|
+
|
7
|
+
* Improve floating point generation to not use scientific notation as much.
|
8
|
+
* Include line and column in parser errors. Both in the message and as exception attributes.
|
9
|
+
* Handle non-string hash keys with broken `to_s` implementations.
|
10
|
+
* `JSON.generate` now uses SSE2 (x86) or NEON (arm64) instructions when available to escape strings.
|
11
|
+
|
3
12
|
### 2025-04-25 (2.11.3)
|
4
13
|
|
5
14
|
* Fix a regression in `JSON.pretty_generate` that could cause indentation to be off once some `#to_json` has been called.
|
@@ -6,5 +6,34 @@ if RUBY_ENGINE == 'truffleruby'
|
|
6
6
|
else
|
7
7
|
append_cflags("-std=c99")
|
8
8
|
$defs << "-DJSON_GENERATOR"
|
9
|
+
|
10
|
+
if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
|
11
|
+
if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
|
12
|
+
# Try to compile a small program using NEON instructions
|
13
|
+
if have_header('arm_neon.h')
|
14
|
+
have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
|
15
|
+
#include <arm_neon.h>
|
16
|
+
int main() {
|
17
|
+
uint8x16_t test = vdupq_n_u8(32);
|
18
|
+
return 0;
|
19
|
+
}
|
20
|
+
SRC
|
21
|
+
$defs.push("-DJSON_ENABLE_SIMD")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC')
|
26
|
+
#include <x86intrin.h>
|
27
|
+
int main() {
|
28
|
+
__m128i test = _mm_set1_epi8(32);
|
29
|
+
return 0;
|
30
|
+
}
|
31
|
+
SRC
|
32
|
+
$defs.push("-DJSON_ENABLE_SIMD")
|
33
|
+
end
|
34
|
+
|
35
|
+
have_header('cpuid.h')
|
36
|
+
end
|
37
|
+
|
9
38
|
create_makefile 'json/ext/generator'
|
10
39
|
end
|
@@ -5,6 +5,8 @@
|
|
5
5
|
#include <math.h>
|
6
6
|
#include <ctype.h>
|
7
7
|
|
8
|
+
#include "simd.h"
|
9
|
+
|
8
10
|
/* ruby api and some helpers */
|
9
11
|
|
10
12
|
typedef struct JSON_Generator_StateStruct {
|
@@ -109,12 +111,40 @@ typedef struct _search_state {
|
|
109
111
|
const char *end;
|
110
112
|
const char *cursor;
|
111
113
|
FBuffer *buffer;
|
114
|
+
|
115
|
+
#ifdef HAVE_SIMD
|
116
|
+
const char *chunk_base;
|
117
|
+
const char *chunk_end;
|
118
|
+
bool has_matches;
|
119
|
+
|
120
|
+
#if defined(HAVE_SIMD_NEON)
|
121
|
+
uint64_t matches_mask;
|
122
|
+
#elif defined(HAVE_SIMD_SSE2)
|
123
|
+
int matches_mask;
|
124
|
+
#else
|
125
|
+
#error "Unknown SIMD Implementation."
|
126
|
+
#endif /* HAVE_SIMD_NEON */
|
127
|
+
#endif /* HAVE_SIMD */
|
112
128
|
} search_state;
|
113
129
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
130
|
+
#if (defined(__GNUC__ ) || defined(__clang__))
|
131
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
132
|
+
#else
|
133
|
+
#define FORCE_INLINE
|
134
|
+
#endif
|
135
|
+
|
136
|
+
static inline FORCE_INLINE void search_flush(search_state *search)
|
137
|
+
{
|
138
|
+
// Do not remove this conditional without profiling, specifically escape-heavy text.
|
139
|
+
// escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
|
140
|
+
// For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
|
141
|
+
// will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
|
142
|
+
// consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
|
143
|
+
// nothing needs to be flushed, we can save a few memory references with this conditional.
|
144
|
+
if (search->ptr > search->cursor) {
|
145
|
+
fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
|
146
|
+
search->cursor = search->ptr;
|
147
|
+
}
|
118
148
|
}
|
119
149
|
|
120
150
|
static const unsigned char escape_table_basic[256] = {
|
@@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
|
|
130
160
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
131
161
|
};
|
132
162
|
|
163
|
+
static unsigned char (*search_escape_basic_impl)(search_state *);
|
164
|
+
|
133
165
|
static inline unsigned char search_escape_basic(search_state *search)
|
134
166
|
{
|
135
167
|
while (search->ptr < search->end) {
|
@@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
|
|
144
176
|
return 0;
|
145
177
|
}
|
146
178
|
|
147
|
-
static inline void escape_UTF8_char_basic(search_state *search)
|
179
|
+
static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
|
180
|
+
{
|
148
181
|
const unsigned char ch = (unsigned char)*search->ptr;
|
149
182
|
switch (ch) {
|
150
183
|
case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
|
@@ -186,12 +219,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
|
|
186
219
|
*/
|
187
220
|
static inline void convert_UTF8_to_JSON(search_state *search)
|
188
221
|
{
|
189
|
-
while (
|
222
|
+
while (search_escape_basic_impl(search)) {
|
190
223
|
escape_UTF8_char_basic(search);
|
191
224
|
}
|
192
225
|
}
|
193
226
|
|
194
|
-
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
227
|
+
static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
228
|
+
{
|
195
229
|
const unsigned char ch = (unsigned char)*search->ptr;
|
196
230
|
switch (ch_len) {
|
197
231
|
case 1: {
|
@@ -227,6 +261,280 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
|
|
227
261
|
search->cursor = (search->ptr += ch_len);
|
228
262
|
}
|
229
263
|
|
264
|
+
#ifdef HAVE_SIMD
|
265
|
+
|
266
|
+
static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
|
267
|
+
{
|
268
|
+
// Flush the buffer so everything up until the last 'len' characters are unflushed.
|
269
|
+
search_flush(search);
|
270
|
+
|
271
|
+
FBuffer *buf = search->buffer;
|
272
|
+
fbuffer_inc_capa(buf, vec_len);
|
273
|
+
|
274
|
+
char *s = (buf->ptr + buf->len);
|
275
|
+
|
276
|
+
// Pad the buffer with dummy characters that won't need escaping.
|
277
|
+
// This seem wateful at first sight, but memset of vector length is very fast.
|
278
|
+
memset(s, 'X', vec_len);
|
279
|
+
|
280
|
+
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
|
281
|
+
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
|
282
|
+
MEMCPY(s, search->ptr, char, len);
|
283
|
+
|
284
|
+
return s;
|
285
|
+
}
|
286
|
+
|
287
|
+
#ifdef HAVE_SIMD_NEON
|
288
|
+
|
289
|
+
static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
|
290
|
+
{
|
291
|
+
uint64_t mask = search->matches_mask;
|
292
|
+
uint32_t index = trailing_zeros64(mask) >> 2;
|
293
|
+
|
294
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
295
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
296
|
+
// search->chunk_base + index >= search->ptr
|
297
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
298
|
+
// is one byte after the previous match then:
|
299
|
+
// search->chunk_base + index == search->ptr
|
300
|
+
search->ptr = search->chunk_base + index;
|
301
|
+
mask &= mask - 1;
|
302
|
+
search->matches_mask = mask;
|
303
|
+
search_flush(search);
|
304
|
+
return 1;
|
305
|
+
}
|
306
|
+
|
307
|
+
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
308
|
+
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
|
309
|
+
{
|
310
|
+
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
|
311
|
+
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
|
312
|
+
return mask & 0x8888888888888888ull;
|
313
|
+
}
|
314
|
+
|
315
|
+
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
|
316
|
+
{
|
317
|
+
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
|
318
|
+
|
319
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
320
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
321
|
+
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
|
322
|
+
|
323
|
+
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
|
324
|
+
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
|
325
|
+
|
326
|
+
return neon_match_mask(needs_escape);
|
327
|
+
}
|
328
|
+
|
329
|
+
static inline unsigned char search_escape_basic_neon(search_state *search)
|
330
|
+
{
|
331
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
332
|
+
// There are more matches if search->matches_mask > 0.
|
333
|
+
if (search->matches_mask > 0) {
|
334
|
+
return neon_next_match(search);
|
335
|
+
} else {
|
336
|
+
// neon_next_match will only advance search->ptr up to the last matching character.
|
337
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
338
|
+
search->has_matches = false;
|
339
|
+
search->ptr = search->chunk_end;
|
340
|
+
}
|
341
|
+
}
|
342
|
+
|
343
|
+
/*
|
344
|
+
* The code below implements an SIMD-based algorithm to determine if N bytes at a time
|
345
|
+
* need to be escaped.
|
346
|
+
*
|
347
|
+
* Assume the ptr = "Te\sting!" (the double quotes are included in the string)
|
348
|
+
*
|
349
|
+
* The explanation will be limited to the first 8 bytes of the string for simplicity. However
|
350
|
+
* the vector insructions may work on larger vectors.
|
351
|
+
*
|
352
|
+
* First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
|
353
|
+
*
|
354
|
+
* lower_bound: [20 20 20 20 20 20 20 20]
|
355
|
+
* backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
|
356
|
+
* dblquote: [22 22 22 22 22 22 22 22]
|
357
|
+
*
|
358
|
+
* Next we load the first chunk of the ptr:
|
359
|
+
* [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
|
360
|
+
*
|
361
|
+
* First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
|
362
|
+
* as no bytes are less than 32 (0x20):
|
363
|
+
* [0 0 0 0 0 0 0 0]
|
364
|
+
*
|
365
|
+
* Next, we check if any byte in chunk is equal to a backslash:
|
366
|
+
* [0 0 0 FF 0 0 0 0]
|
367
|
+
*
|
368
|
+
* Finally we check if any byte in chunk is equal to a double quote:
|
369
|
+
* [FF 0 0 0 0 0 0 0]
|
370
|
+
*
|
371
|
+
* Now we have three vectors where each byte indicates if the corresponding byte in chunk
|
372
|
+
* needs to be escaped. We combine these vectors with a series of logical OR instructions.
|
373
|
+
* This is the needs_escape vector and it is equal to:
|
374
|
+
* [FF 0 0 FF 0 0 0 0]
|
375
|
+
*
|
376
|
+
* Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
|
377
|
+
* the values in the vector. This computes how many bytes need to be escaped within this chunk.
|
378
|
+
*
|
379
|
+
* Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
|
380
|
+
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
|
381
|
+
* have at least one byte that needs to be escaped.
|
382
|
+
*/
|
383
|
+
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
|
384
|
+
uint64_t mask = neon_rules_update(search->ptr);
|
385
|
+
|
386
|
+
if (!mask) {
|
387
|
+
search->ptr += sizeof(uint8x16_t);
|
388
|
+
continue;
|
389
|
+
}
|
390
|
+
search->matches_mask = mask;
|
391
|
+
search->has_matches = true;
|
392
|
+
search->chunk_base = search->ptr;
|
393
|
+
search->chunk_end = search->ptr + sizeof(uint8x16_t);
|
394
|
+
return neon_next_match(search);
|
395
|
+
}
|
396
|
+
|
397
|
+
// There are fewer than 16 bytes left.
|
398
|
+
unsigned long remaining = (search->end - search->ptr);
|
399
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
400
|
+
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
|
401
|
+
|
402
|
+
uint64_t mask = neon_rules_update(s);
|
403
|
+
|
404
|
+
if (!mask) {
|
405
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
406
|
+
// search->cursor to search->ptr.
|
407
|
+
search->buffer->len += remaining;
|
408
|
+
search->ptr = search->end;
|
409
|
+
search->cursor = search->end;
|
410
|
+
return 0;
|
411
|
+
}
|
412
|
+
|
413
|
+
search->matches_mask = mask;
|
414
|
+
search->has_matches = true;
|
415
|
+
search->chunk_end = search->end;
|
416
|
+
search->chunk_base = search->ptr;
|
417
|
+
return neon_next_match(search);
|
418
|
+
}
|
419
|
+
|
420
|
+
if (search->ptr < search->end) {
|
421
|
+
return search_escape_basic(search);
|
422
|
+
}
|
423
|
+
|
424
|
+
search_flush(search);
|
425
|
+
return 0;
|
426
|
+
}
|
427
|
+
#endif /* HAVE_SIMD_NEON */
|
428
|
+
|
429
|
+
#ifdef HAVE_SIMD_SSE2
|
430
|
+
|
431
|
+
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
|
432
|
+
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
|
433
|
+
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
|
434
|
+
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
|
435
|
+
|
436
|
+
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
|
437
|
+
{
|
438
|
+
int mask = search->matches_mask;
|
439
|
+
int index = trailing_zeros(mask);
|
440
|
+
|
441
|
+
// It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
|
442
|
+
// If we want to use a similar approach for full escaping we'll need to ensure:
|
443
|
+
// search->chunk_base + index >= search->ptr
|
444
|
+
// However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
|
445
|
+
// is one byte after the previous match then:
|
446
|
+
// search->chunk_base + index == search->ptr
|
447
|
+
search->ptr = search->chunk_base + index;
|
448
|
+
mask &= mask - 1;
|
449
|
+
search->matches_mask = mask;
|
450
|
+
search_flush(search);
|
451
|
+
return 1;
|
452
|
+
}
|
453
|
+
|
454
|
+
#if defined(__clang__) || defined(__GNUC__)
|
455
|
+
#define TARGET_SSE2 __attribute__((target("sse2")))
|
456
|
+
#else
|
457
|
+
#define TARGET_SSE2
|
458
|
+
#endif
|
459
|
+
|
460
|
+
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
|
461
|
+
{
|
462
|
+
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
|
463
|
+
|
464
|
+
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
|
465
|
+
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
|
466
|
+
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
|
467
|
+
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
|
468
|
+
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
|
469
|
+
return _mm_movemask_epi8(needs_escape);
|
470
|
+
}
|
471
|
+
|
472
|
+
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
|
473
|
+
{
|
474
|
+
if (RB_UNLIKELY(search->has_matches)) {
|
475
|
+
// There are more matches if search->matches_mask > 0.
|
476
|
+
if (search->matches_mask > 0) {
|
477
|
+
return sse2_next_match(search);
|
478
|
+
} else {
|
479
|
+
// sse2_next_match will only advance search->ptr up to the last matching character.
|
480
|
+
// Skip over any characters in the last chunk that occur after the last match.
|
481
|
+
search->has_matches = false;
|
482
|
+
if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
|
483
|
+
search->ptr = search->end;
|
484
|
+
} else {
|
485
|
+
search->ptr = search->chunk_base + sizeof(__m128i);
|
486
|
+
}
|
487
|
+
}
|
488
|
+
}
|
489
|
+
|
490
|
+
while (search->ptr + sizeof(__m128i) <= search->end) {
|
491
|
+
int needs_escape_mask = sse2_update(search->ptr);
|
492
|
+
|
493
|
+
if (needs_escape_mask == 0) {
|
494
|
+
search->ptr += sizeof(__m128i);
|
495
|
+
continue;
|
496
|
+
}
|
497
|
+
|
498
|
+
search->has_matches = true;
|
499
|
+
search->matches_mask = needs_escape_mask;
|
500
|
+
search->chunk_base = search->ptr;
|
501
|
+
return sse2_next_match(search);
|
502
|
+
}
|
503
|
+
|
504
|
+
// There are fewer than 16 bytes left.
|
505
|
+
unsigned long remaining = (search->end - search->ptr);
|
506
|
+
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
|
507
|
+
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
|
508
|
+
|
509
|
+
int needs_escape_mask = sse2_update(s);
|
510
|
+
|
511
|
+
if (needs_escape_mask == 0) {
|
512
|
+
// Nothing to escape, ensure search_flush doesn't do anything by setting
|
513
|
+
// search->cursor to search->ptr.
|
514
|
+
search->buffer->len += remaining;
|
515
|
+
search->ptr = search->end;
|
516
|
+
search->cursor = search->end;
|
517
|
+
return 0;
|
518
|
+
}
|
519
|
+
|
520
|
+
search->has_matches = true;
|
521
|
+
search->matches_mask = needs_escape_mask;
|
522
|
+
search->chunk_base = search->ptr;
|
523
|
+
return sse2_next_match(search);
|
524
|
+
}
|
525
|
+
|
526
|
+
if (search->ptr < search->end) {
|
527
|
+
return search_escape_basic(search);
|
528
|
+
}
|
529
|
+
|
530
|
+
search_flush(search);
|
531
|
+
return 0;
|
532
|
+
}
|
533
|
+
|
534
|
+
#endif /* HAVE_SIMD_SSE2 */
|
535
|
+
|
536
|
+
#endif /* HAVE_SIMD */
|
537
|
+
|
230
538
|
static const unsigned char script_safe_escape_table[256] = {
|
231
539
|
// ASCII Control Characters
|
232
540
|
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
@@ -789,6 +1097,21 @@ struct hash_foreach_arg {
|
|
789
1097
|
int iter;
|
790
1098
|
};
|
791
1099
|
|
1100
|
+
static VALUE
|
1101
|
+
convert_string_subclass(VALUE key)
|
1102
|
+
{
|
1103
|
+
VALUE key_to_s = rb_funcall(key, i_to_s, 0);
|
1104
|
+
|
1105
|
+
if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
|
1106
|
+
VALUE cname = rb_obj_class(key);
|
1107
|
+
rb_raise(rb_eTypeError,
|
1108
|
+
"can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
|
1109
|
+
cname, "String", cname, "to_s", rb_obj_class(key_to_s));
|
1110
|
+
}
|
1111
|
+
|
1112
|
+
return key_to_s;
|
1113
|
+
}
|
1114
|
+
|
792
1115
|
static int
|
793
1116
|
json_object_i(VALUE key, VALUE val, VALUE _arg)
|
794
1117
|
{
|
@@ -817,7 +1140,7 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
|
|
817
1140
|
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
|
818
1141
|
key_to_s = key;
|
819
1142
|
} else {
|
820
|
-
key_to_s =
|
1143
|
+
key_to_s = convert_string_subclass(key);
|
821
1144
|
}
|
822
1145
|
break;
|
823
1146
|
case T_SYMBOL:
|
@@ -975,6 +1298,12 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
|
|
975
1298
|
search.cursor = search.ptr;
|
976
1299
|
search.end = search.ptr + len;
|
977
1300
|
|
1301
|
+
#ifdef HAVE_SIMD
|
1302
|
+
search.matches_mask = 0;
|
1303
|
+
search.has_matches = false;
|
1304
|
+
search.chunk_base = NULL;
|
1305
|
+
#endif /* HAVE_SIMD */
|
1306
|
+
|
978
1307
|
switch(rb_enc_str_coderange(obj)) {
|
979
1308
|
case ENC_CODERANGE_7BIT:
|
980
1309
|
case ENC_CODERANGE_VALID:
|
@@ -1838,4 +2167,23 @@ void Init_generator(void)
|
|
1838
2167
|
binary_encindex = rb_ascii8bit_encindex();
|
1839
2168
|
|
1840
2169
|
rb_require("json/ext/generator/state");
|
2170
|
+
|
2171
|
+
|
2172
|
+
switch(find_simd_implementation()) {
|
2173
|
+
#ifdef HAVE_SIMD
|
2174
|
+
#ifdef HAVE_SIMD_NEON
|
2175
|
+
case SIMD_NEON:
|
2176
|
+
search_escape_basic_impl = search_escape_basic_neon;
|
2177
|
+
break;
|
2178
|
+
#endif /* HAVE_SIMD_NEON */
|
2179
|
+
#ifdef HAVE_SIMD_SSE2
|
2180
|
+
case SIMD_SSE2:
|
2181
|
+
search_escape_basic_impl = search_escape_basic_sse2;
|
2182
|
+
break;
|
2183
|
+
#endif /* HAVE_SIMD_SSE2 */
|
2184
|
+
#endif /* HAVE_SIMD */
|
2185
|
+
default:
|
2186
|
+
search_escape_basic_impl = search_escape_basic;
|
2187
|
+
break;
|
2188
|
+
}
|
1841
2189
|
}
|
@@ -0,0 +1,112 @@
|
|
1
|
+
typedef enum {
|
2
|
+
SIMD_NONE,
|
3
|
+
SIMD_NEON,
|
4
|
+
SIMD_SSE2
|
5
|
+
} SIMD_Implementation;
|
6
|
+
|
7
|
+
#ifdef JSON_ENABLE_SIMD
|
8
|
+
|
9
|
+
#ifdef __clang__
|
10
|
+
#if __has_builtin(__builtin_ctzll)
|
11
|
+
#define HAVE_BUILTIN_CTZLL 1
|
12
|
+
#else
|
13
|
+
#define HAVE_BUILTIN_CTZLL 0
|
14
|
+
#endif
|
15
|
+
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
16
|
+
#define HAVE_BUILTIN_CTZLL 1
|
17
|
+
#else
|
18
|
+
#define HAVE_BUILTIN_CTZLL 0
|
19
|
+
#endif
|
20
|
+
|
21
|
+
static inline uint32_t trailing_zeros64(uint64_t input) {
|
22
|
+
#if HAVE_BUILTIN_CTZLL
|
23
|
+
return __builtin_ctzll(input);
|
24
|
+
#else
|
25
|
+
uint32_t trailing_zeros = 0;
|
26
|
+
uint64_t temp = input;
|
27
|
+
while ((temp & 1) == 0 && temp > 0) {
|
28
|
+
trailing_zeros++;
|
29
|
+
temp >>= 1;
|
30
|
+
}
|
31
|
+
return trailing_zeros;
|
32
|
+
#endif
|
33
|
+
}
|
34
|
+
|
35
|
+
static inline int trailing_zeros(int input) {
|
36
|
+
#if HAVE_BUILTIN_CTZLL
|
37
|
+
return __builtin_ctz(input);
|
38
|
+
#else
|
39
|
+
int trailing_zeros = 0;
|
40
|
+
int temp = input;
|
41
|
+
while ((temp & 1) == 0 && temp > 0) {
|
42
|
+
trailing_zeros++;
|
43
|
+
temp >>= 1;
|
44
|
+
}
|
45
|
+
return trailing_zeros;
|
46
|
+
#endif
|
47
|
+
}
|
48
|
+
|
49
|
+
#define SIMD_MINIMUM_THRESHOLD 6
|
50
|
+
|
51
|
+
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
|
52
|
+
#include <arm_neon.h>
|
53
|
+
|
54
|
+
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
|
55
|
+
static SIMD_Implementation find_simd_implementation(void) {
|
56
|
+
return SIMD_NEON;
|
57
|
+
}
|
58
|
+
|
59
|
+
#define HAVE_SIMD 1
|
60
|
+
#define HAVE_SIMD_NEON 1
|
61
|
+
|
62
|
+
uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
|
63
|
+
uint8x16x4_t tab;
|
64
|
+
tab.val[0] = vld1q_u8(table);
|
65
|
+
tab.val[1] = vld1q_u8(table+16);
|
66
|
+
tab.val[2] = vld1q_u8(table+32);
|
67
|
+
tab.val[3] = vld1q_u8(table+48);
|
68
|
+
return tab;
|
69
|
+
}
|
70
|
+
|
71
|
+
#endif /* ARM Neon Support.*/
|
72
|
+
|
73
|
+
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
|
74
|
+
|
75
|
+
#ifdef HAVE_X86INTRIN_H
|
76
|
+
#include <x86intrin.h>
|
77
|
+
|
78
|
+
#define HAVE_SIMD 1
|
79
|
+
#define HAVE_SIMD_SSE2 1
|
80
|
+
|
81
|
+
#ifdef HAVE_CPUID_H
|
82
|
+
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
|
83
|
+
|
84
|
+
#include <cpuid.h>
|
85
|
+
#endif /* HAVE_CPUID_H */
|
86
|
+
|
87
|
+
static SIMD_Implementation find_simd_implementation(void) {
|
88
|
+
|
89
|
+
#if defined(__GNUC__ ) || defined(__clang__)
|
90
|
+
#ifdef __GNUC__
|
91
|
+
__builtin_cpu_init();
|
92
|
+
#endif /* __GNUC__ */
|
93
|
+
|
94
|
+
// TODO Revisit. I think the SSE version now only uses SSE2 instructions.
|
95
|
+
if (__builtin_cpu_supports("sse2")) {
|
96
|
+
return SIMD_SSE2;
|
97
|
+
}
|
98
|
+
#endif /* __GNUC__ || __clang__*/
|
99
|
+
|
100
|
+
return SIMD_NONE;
|
101
|
+
}
|
102
|
+
|
103
|
+
#endif /* HAVE_X86INTRIN_H */
|
104
|
+
#endif /* X86_64 Support */
|
105
|
+
|
106
|
+
#endif /* JSON_ENABLE_SIMD */
|
107
|
+
|
108
|
+
#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
|
109
|
+
static SIMD_Implementation find_simd_implementation(void) {
|
110
|
+
return SIMD_NONE;
|
111
|
+
}
|
112
|
+
#endif
|
@@ -337,19 +337,86 @@ static size_t strnlen(const char *s, size_t maxlen)
|
|
337
337
|
}
|
338
338
|
#endif
|
339
339
|
|
340
|
+
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
341
|
+
{
|
342
|
+
int len = 1;
|
343
|
+
if (ch <= 0x7F) {
|
344
|
+
buf[0] = (char) ch;
|
345
|
+
} else if (ch <= 0x07FF) {
|
346
|
+
buf[0] = (char) ((ch >> 6) | 0xC0);
|
347
|
+
buf[1] = (char) ((ch & 0x3F) | 0x80);
|
348
|
+
len++;
|
349
|
+
} else if (ch <= 0xFFFF) {
|
350
|
+
buf[0] = (char) ((ch >> 12) | 0xE0);
|
351
|
+
buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
|
352
|
+
buf[2] = (char) ((ch & 0x3F) | 0x80);
|
353
|
+
len += 2;
|
354
|
+
} else if (ch <= 0x1fffff) {
|
355
|
+
buf[0] =(char) ((ch >> 18) | 0xF0);
|
356
|
+
buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
|
357
|
+
buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
|
358
|
+
buf[3] =(char) ((ch & 0x3F) | 0x80);
|
359
|
+
len += 3;
|
360
|
+
} else {
|
361
|
+
buf[0] = '?';
|
362
|
+
}
|
363
|
+
return len;
|
364
|
+
}
|
365
|
+
|
366
|
+
typedef struct JSON_ParserStruct {
|
367
|
+
VALUE on_load_proc;
|
368
|
+
VALUE decimal_class;
|
369
|
+
ID decimal_method_id;
|
370
|
+
int max_nesting;
|
371
|
+
bool allow_nan;
|
372
|
+
bool allow_trailing_comma;
|
373
|
+
bool parsing_name;
|
374
|
+
bool symbolize_names;
|
375
|
+
bool freeze;
|
376
|
+
} JSON_ParserConfig;
|
377
|
+
|
378
|
+
typedef struct JSON_ParserStateStruct {
|
379
|
+
VALUE stack_handle;
|
380
|
+
const char *start;
|
381
|
+
const char *cursor;
|
382
|
+
const char *end;
|
383
|
+
rvalue_stack *stack;
|
384
|
+
rvalue_cache name_cache;
|
385
|
+
int in_array;
|
386
|
+
int current_nesting;
|
387
|
+
} JSON_ParserState;
|
388
|
+
|
389
|
+
|
340
390
|
#define PARSE_ERROR_FRAGMENT_LEN 32
|
341
391
|
#ifdef RBIMPL_ATTR_NORETURN
|
342
392
|
RBIMPL_ATTR_NORETURN()
|
343
393
|
#endif
|
344
|
-
static void raise_parse_error(const char *format,
|
394
|
+
static void raise_parse_error(const char *format, JSON_ParserState *state)
|
345
395
|
{
|
346
396
|
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
|
347
397
|
|
348
|
-
|
349
|
-
|
398
|
+
const char *cursor = state->cursor;
|
399
|
+
long column = 0;
|
400
|
+
long line = 1;
|
401
|
+
|
402
|
+
while (cursor >= state->start) {
|
403
|
+
if (*cursor-- == '\n') {
|
404
|
+
break;
|
405
|
+
}
|
406
|
+
column++;
|
407
|
+
}
|
408
|
+
|
409
|
+
while (cursor >= state->start) {
|
410
|
+
if (*cursor-- == '\n') {
|
411
|
+
line++;
|
412
|
+
}
|
413
|
+
}
|
414
|
+
|
415
|
+
const char *ptr = state->cursor;
|
416
|
+
size_t len = ptr ? strnlen(ptr, PARSE_ERROR_FRAGMENT_LEN) : 0;
|
350
417
|
|
351
418
|
if (len == PARSE_ERROR_FRAGMENT_LEN) {
|
352
|
-
MEMCPY(buffer,
|
419
|
+
MEMCPY(buffer, ptr, char, PARSE_ERROR_FRAGMENT_LEN);
|
353
420
|
|
354
421
|
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
|
355
422
|
len--;
|
@@ -363,7 +430,23 @@ static void raise_parse_error(const char *format, const char *start)
|
|
363
430
|
ptr = (const char *)buffer;
|
364
431
|
}
|
365
432
|
|
366
|
-
|
433
|
+
VALUE msg = rb_sprintf(format, ptr);
|
434
|
+
VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
|
435
|
+
RB_GC_GUARD(msg);
|
436
|
+
|
437
|
+
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
|
438
|
+
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
|
439
|
+
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
|
440
|
+
rb_exc_raise(exc);
|
441
|
+
}
|
442
|
+
|
443
|
+
#ifdef RBIMPL_ATTR_NORETURN
|
444
|
+
RBIMPL_ATTR_NORETURN()
|
445
|
+
#endif
|
446
|
+
static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
|
447
|
+
{
|
448
|
+
state->cursor = at;
|
449
|
+
raise_parse_error(format, state);
|
367
450
|
}
|
368
451
|
|
369
452
|
/* unicode */
|
@@ -385,73 +468,25 @@ static const signed char digit_values[256] = {
|
|
385
468
|
-1, -1, -1, -1, -1, -1, -1
|
386
469
|
};
|
387
470
|
|
388
|
-
static uint32_t unescape_unicode(const unsigned char *p)
|
471
|
+
static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
|
389
472
|
{
|
390
473
|
signed char b;
|
391
474
|
uint32_t result = 0;
|
392
475
|
b = digit_values[p[0]];
|
393
|
-
if (b < 0)
|
476
|
+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
394
477
|
result = (result << 4) | (unsigned char)b;
|
395
478
|
b = digit_values[p[1]];
|
396
|
-
if (b < 0)
|
479
|
+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
397
480
|
result = (result << 4) | (unsigned char)b;
|
398
481
|
b = digit_values[p[2]];
|
399
|
-
if (b < 0)
|
482
|
+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
400
483
|
result = (result << 4) | (unsigned char)b;
|
401
484
|
b = digit_values[p[3]];
|
402
|
-
if (b < 0)
|
485
|
+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
|
403
486
|
result = (result << 4) | (unsigned char)b;
|
404
487
|
return result;
|
405
488
|
}
|
406
489
|
|
407
|
-
static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
|
408
|
-
{
|
409
|
-
int len = 1;
|
410
|
-
if (ch <= 0x7F) {
|
411
|
-
buf[0] = (char) ch;
|
412
|
-
} else if (ch <= 0x07FF) {
|
413
|
-
buf[0] = (char) ((ch >> 6) | 0xC0);
|
414
|
-
buf[1] = (char) ((ch & 0x3F) | 0x80);
|
415
|
-
len++;
|
416
|
-
} else if (ch <= 0xFFFF) {
|
417
|
-
buf[0] = (char) ((ch >> 12) | 0xE0);
|
418
|
-
buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
|
419
|
-
buf[2] = (char) ((ch & 0x3F) | 0x80);
|
420
|
-
len += 2;
|
421
|
-
} else if (ch <= 0x1fffff) {
|
422
|
-
buf[0] =(char) ((ch >> 18) | 0xF0);
|
423
|
-
buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
|
424
|
-
buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
|
425
|
-
buf[3] =(char) ((ch & 0x3F) | 0x80);
|
426
|
-
len += 3;
|
427
|
-
} else {
|
428
|
-
buf[0] = '?';
|
429
|
-
}
|
430
|
-
return len;
|
431
|
-
}
|
432
|
-
|
433
|
-
typedef struct JSON_ParserStruct {
|
434
|
-
VALUE on_load_proc;
|
435
|
-
VALUE decimal_class;
|
436
|
-
ID decimal_method_id;
|
437
|
-
int max_nesting;
|
438
|
-
bool allow_nan;
|
439
|
-
bool allow_trailing_comma;
|
440
|
-
bool parsing_name;
|
441
|
-
bool symbolize_names;
|
442
|
-
bool freeze;
|
443
|
-
} JSON_ParserConfig;
|
444
|
-
|
445
|
-
typedef struct JSON_ParserStateStruct {
|
446
|
-
VALUE stack_handle;
|
447
|
-
const char *cursor;
|
448
|
-
const char *end;
|
449
|
-
rvalue_stack *stack;
|
450
|
-
rvalue_cache name_cache;
|
451
|
-
int in_array;
|
452
|
-
int current_nesting;
|
453
|
-
} JSON_ParserState;
|
454
|
-
|
455
490
|
#define GET_PARSER_CONFIG \
|
456
491
|
JSON_ParserConfig *config; \
|
457
492
|
TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
|
@@ -485,8 +520,7 @@ json_eat_comments(JSON_ParserState *state)
|
|
485
520
|
while (true) {
|
486
521
|
state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
|
487
522
|
if (!state->cursor) {
|
488
|
-
|
489
|
-
raise_parse_error("unexpected end of input, expected closing '*/'", state->cursor);
|
523
|
+
raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
|
490
524
|
} else {
|
491
525
|
state->cursor++;
|
492
526
|
if (state->cursor < state->end && *state->cursor == '/') {
|
@@ -498,11 +532,11 @@ json_eat_comments(JSON_ParserState *state)
|
|
498
532
|
break;
|
499
533
|
}
|
500
534
|
default:
|
501
|
-
raise_parse_error("unexpected token
|
535
|
+
raise_parse_error("unexpected token '%s'", state);
|
502
536
|
break;
|
503
537
|
}
|
504
538
|
} else {
|
505
|
-
raise_parse_error("unexpected token
|
539
|
+
raise_parse_error("unexpected token '%s'", state);
|
506
540
|
}
|
507
541
|
}
|
508
542
|
|
@@ -621,9 +655,9 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
621
655
|
break;
|
622
656
|
case 'u':
|
623
657
|
if (pe > stringEnd - 5) {
|
624
|
-
|
658
|
+
raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, p);
|
625
659
|
} else {
|
626
|
-
uint32_t ch = unescape_unicode((unsigned char *) ++pe);
|
660
|
+
uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
|
627
661
|
pe += 3;
|
628
662
|
/* To handle values above U+FFFF, we take a sequence of
|
629
663
|
* \uXXXX escapes in the U+D800..U+DBFF then
|
@@ -638,10 +672,10 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
|
|
638
672
|
if ((ch & 0xFC00) == 0xD800) {
|
639
673
|
pe++;
|
640
674
|
if (pe > stringEnd - 6) {
|
641
|
-
|
675
|
+
raise_parse_error_at("incomplete surrogate pair at '%s'", state, p);
|
642
676
|
}
|
643
677
|
if (pe[0] == '\\' && pe[1] == 'u') {
|
644
|
-
uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
|
678
|
+
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
|
645
679
|
ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
|
646
680
|
| (sur & 0x3FF));
|
647
681
|
pe += 5;
|
@@ -829,12 +863,12 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
|
|
829
863
|
state->cursor++;
|
830
864
|
escaped = true;
|
831
865
|
if ((unsigned char)*state->cursor < 0x20) {
|
832
|
-
raise_parse_error("invalid ASCII control character in string: %s", state
|
866
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
833
867
|
}
|
834
868
|
break;
|
835
869
|
}
|
836
870
|
default:
|
837
|
-
raise_parse_error("invalid ASCII control character in string: %s", state
|
871
|
+
raise_parse_error("invalid ASCII control character in string: %s", state);
|
838
872
|
break;
|
839
873
|
}
|
840
874
|
}
|
@@ -842,7 +876,7 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
|
|
842
876
|
state->cursor++;
|
843
877
|
}
|
844
878
|
|
845
|
-
raise_parse_error("unexpected end of input, expected closing \"", state
|
879
|
+
raise_parse_error("unexpected end of input, expected closing \"", state);
|
846
880
|
return Qfalse;
|
847
881
|
}
|
848
882
|
|
@@ -850,7 +884,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
850
884
|
{
|
851
885
|
json_eat_whitespace(state);
|
852
886
|
if (state->cursor >= state->end) {
|
853
|
-
raise_parse_error("unexpected end of input", state
|
887
|
+
raise_parse_error("unexpected end of input", state);
|
854
888
|
}
|
855
889
|
|
856
890
|
switch (*state->cursor) {
|
@@ -860,7 +894,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
860
894
|
return json_push_value(state, config, Qnil);
|
861
895
|
}
|
862
896
|
|
863
|
-
raise_parse_error("unexpected token
|
897
|
+
raise_parse_error("unexpected token '%s'", state);
|
864
898
|
break;
|
865
899
|
case 't':
|
866
900
|
if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
|
@@ -868,7 +902,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
868
902
|
return json_push_value(state, config, Qtrue);
|
869
903
|
}
|
870
904
|
|
871
|
-
raise_parse_error("unexpected token
|
905
|
+
raise_parse_error("unexpected token '%s'", state);
|
872
906
|
break;
|
873
907
|
case 'f':
|
874
908
|
// Note: memcmp with a small power of two compile to an integer comparison
|
@@ -877,7 +911,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
877
911
|
return json_push_value(state, config, Qfalse);
|
878
912
|
}
|
879
913
|
|
880
|
-
raise_parse_error("unexpected token
|
914
|
+
raise_parse_error("unexpected token '%s'", state);
|
881
915
|
break;
|
882
916
|
case 'N':
|
883
917
|
// Note: memcmp with a small power of two compile to an integer comparison
|
@@ -886,7 +920,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
886
920
|
return json_push_value(state, config, CNaN);
|
887
921
|
}
|
888
922
|
|
889
|
-
raise_parse_error("unexpected token
|
923
|
+
raise_parse_error("unexpected token '%s'", state);
|
890
924
|
break;
|
891
925
|
case 'I':
|
892
926
|
if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
|
@@ -894,7 +928,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
894
928
|
return json_push_value(state, config, CInfinity);
|
895
929
|
}
|
896
930
|
|
897
|
-
raise_parse_error("unexpected token
|
931
|
+
raise_parse_error("unexpected token '%s'", state);
|
898
932
|
break;
|
899
933
|
case '-':
|
900
934
|
// Note: memcmp with a small power of two compile to an integer comparison
|
@@ -903,7 +937,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
903
937
|
state->cursor += 9;
|
904
938
|
return json_push_value(state, config, CMinusInfinity);
|
905
939
|
} else {
|
906
|
-
raise_parse_error("unexpected token
|
940
|
+
raise_parse_error("unexpected token '%s'", state);
|
907
941
|
}
|
908
942
|
}
|
909
943
|
// Fallthrough
|
@@ -921,11 +955,11 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
921
955
|
long integer_length = state->cursor - start;
|
922
956
|
|
923
957
|
if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
|
924
|
-
|
958
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
925
959
|
} else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
|
926
|
-
|
960
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
927
961
|
} else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
|
928
|
-
|
962
|
+
raise_parse_error_at("invalid number: %s", state, start);
|
929
963
|
}
|
930
964
|
|
931
965
|
if ((state->cursor < state->end) && (*state->cursor == '.')) {
|
@@ -933,7 +967,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
933
967
|
state->cursor++;
|
934
968
|
|
935
969
|
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
936
|
-
raise_parse_error("invalid number: %s", state
|
970
|
+
raise_parse_error("invalid number: %s", state);
|
937
971
|
}
|
938
972
|
|
939
973
|
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
@@ -949,7 +983,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
949
983
|
}
|
950
984
|
|
951
985
|
if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
|
952
|
-
raise_parse_error("invalid number: %s", state
|
986
|
+
raise_parse_error("invalid number: %s", state);
|
953
987
|
}
|
954
988
|
|
955
989
|
while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
|
@@ -1009,7 +1043,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1009
1043
|
}
|
1010
1044
|
}
|
1011
1045
|
|
1012
|
-
raise_parse_error("expected ',' or ']' after array value", state
|
1046
|
+
raise_parse_error("expected ',' or ']' after array value", state);
|
1013
1047
|
}
|
1014
1048
|
break;
|
1015
1049
|
}
|
@@ -1028,13 +1062,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1028
1062
|
}
|
1029
1063
|
|
1030
1064
|
if (*state->cursor != '"') {
|
1031
|
-
raise_parse_error("expected object key, got '%s", state
|
1065
|
+
raise_parse_error("expected object key, got '%s", state);
|
1032
1066
|
}
|
1033
1067
|
json_parse_string(state, config, true);
|
1034
1068
|
|
1035
1069
|
json_eat_whitespace(state);
|
1036
1070
|
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
|
1037
|
-
raise_parse_error("expected ':' after object key", state
|
1071
|
+
raise_parse_error("expected ':' after object key", state);
|
1038
1072
|
}
|
1039
1073
|
state->cursor++;
|
1040
1074
|
|
@@ -1063,13 +1097,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1063
1097
|
}
|
1064
1098
|
|
1065
1099
|
if (*state->cursor != '"') {
|
1066
|
-
raise_parse_error("expected object key, got: '%s'", state
|
1100
|
+
raise_parse_error("expected object key, got: '%s'", state);
|
1067
1101
|
}
|
1068
1102
|
json_parse_string(state, config, true);
|
1069
1103
|
|
1070
1104
|
json_eat_whitespace(state);
|
1071
1105
|
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
|
1072
|
-
raise_parse_error("expected ':' after object key, got: '%s", state
|
1106
|
+
raise_parse_error("expected ':' after object key, got: '%s", state);
|
1073
1107
|
}
|
1074
1108
|
state->cursor++;
|
1075
1109
|
|
@@ -1079,24 +1113,24 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
|
|
1079
1113
|
}
|
1080
1114
|
}
|
1081
1115
|
|
1082
|
-
raise_parse_error("expected ',' or '}' after object value, got: '%s'", state
|
1116
|
+
raise_parse_error("expected ',' or '}' after object value, got: '%s'", state);
|
1083
1117
|
}
|
1084
1118
|
break;
|
1085
1119
|
}
|
1086
1120
|
|
1087
1121
|
default:
|
1088
|
-
raise_parse_error("unexpected character: '%s'", state
|
1122
|
+
raise_parse_error("unexpected character: '%s'", state);
|
1089
1123
|
break;
|
1090
1124
|
}
|
1091
1125
|
|
1092
|
-
raise_parse_error("unreacheable: '%s'", state
|
1126
|
+
raise_parse_error("unreacheable: '%s'", state);
|
1093
1127
|
}
|
1094
1128
|
|
1095
1129
|
static void json_ensure_eof(JSON_ParserState *state)
|
1096
1130
|
{
|
1097
1131
|
json_eat_whitespace(state);
|
1098
1132
|
if (state->cursor != state->end) {
|
1099
|
-
raise_parse_error("unexpected token at end of stream '%s'", state
|
1133
|
+
raise_parse_error("unexpected token at end of stream '%s'", state);
|
1100
1134
|
}
|
1101
1135
|
}
|
1102
1136
|
|
@@ -1232,9 +1266,14 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
|
|
1232
1266
|
.capa = RVALUE_STACK_INITIAL_CAPA,
|
1233
1267
|
};
|
1234
1268
|
|
1269
|
+
long len;
|
1270
|
+
const char *start;
|
1271
|
+
RSTRING_GETMEM(Vsource, start, len);
|
1272
|
+
|
1235
1273
|
JSON_ParserState _state = {
|
1236
|
-
.
|
1237
|
-
.
|
1274
|
+
.start = start,
|
1275
|
+
.cursor = start,
|
1276
|
+
.end = start + len,
|
1238
1277
|
.stack = &stack,
|
1239
1278
|
};
|
1240
1279
|
JSON_ParserState *state = &_state;
|
@@ -41,7 +41,7 @@ typedef struct Fp {
|
|
41
41
|
int exp;
|
42
42
|
} Fp;
|
43
43
|
|
44
|
-
static Fp powers_ten[] = {
|
44
|
+
static const Fp powers_ten[] = {
|
45
45
|
{ 18054884314459144840U, -1220 }, { 13451937075301367670U, -1193 },
|
46
46
|
{ 10022474136428063862U, -1166 }, { 14934650266808366570U, -1140 },
|
47
47
|
{ 11127181549972568877U, -1113 }, { 16580792590934885855U, -1087 },
|
@@ -123,7 +123,7 @@ static Fp find_cachedpow10(int exp, int* k)
|
|
123
123
|
#define absv(n) ((n) < 0 ? -(n) : (n))
|
124
124
|
#define minv(a, b) ((a) < (b) ? (a) : (b))
|
125
125
|
|
126
|
-
static uint64_t tens[] = {
|
126
|
+
static const uint64_t tens[] = {
|
127
127
|
10000000000000000000U, 1000000000000000000U, 100000000000000000U,
|
128
128
|
10000000000000000U, 1000000000000000U, 100000000000000U,
|
129
129
|
10000000000000U, 1000000000000U, 100000000000U,
|
@@ -244,7 +244,7 @@ static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K)
|
|
244
244
|
uint64_t part2 = upper->frac & (one.frac - 1);
|
245
245
|
|
246
246
|
int idx = 0, kappa = 10;
|
247
|
-
uint64_t* divp;
|
247
|
+
const uint64_t* divp;
|
248
248
|
/* 1000000000 */
|
249
249
|
for(divp = tens + 10; kappa > 0; divp++) {
|
250
250
|
|
@@ -268,7 +268,7 @@ static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K)
|
|
268
268
|
}
|
269
269
|
|
270
270
|
/* 10 */
|
271
|
-
uint64_t* unit = tens + 18;
|
271
|
+
const uint64_t* unit = tens + 18;
|
272
272
|
|
273
273
|
while(true) {
|
274
274
|
part2 *= 10;
|
@@ -340,7 +340,7 @@ static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg)
|
|
340
340
|
}
|
341
341
|
|
342
342
|
/* write decimal w/o scientific notation */
|
343
|
-
if(K < 0 && (K > -7 || exp <
|
343
|
+
if(K < 0 && (K > -7 || exp < 10)) {
|
344
344
|
int offset = ndigits - absv(K);
|
345
345
|
/* fp < 1.0 -> write leading zero */
|
346
346
|
if(offset <= 0) {
|
data/lib/json/common.rb
CHANGED
@@ -230,7 +230,9 @@ module JSON
|
|
230
230
|
class JSONError < StandardError; end
|
231
231
|
|
232
232
|
# This exception is raised if a parser error occurs.
|
233
|
-
class ParserError < JSONError
|
233
|
+
class ParserError < JSONError
|
234
|
+
attr_reader :line, :column
|
235
|
+
end
|
234
236
|
|
235
237
|
# This exception is raised if the nesting of parsed data structures is too
|
236
238
|
# deep.
|
data/lib/json/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.12.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Frank
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-
|
10
|
+
date: 2025-05-12 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: This is a JSON implementation as a Ruby extension in C.
|
13
13
|
email: flori@ping.de
|
@@ -26,6 +26,7 @@ files:
|
|
26
26
|
- ext/json/ext/fbuffer/fbuffer.h
|
27
27
|
- ext/json/ext/generator/extconf.rb
|
28
28
|
- ext/json/ext/generator/generator.c
|
29
|
+
- ext/json/ext/generator/simd.h
|
29
30
|
- ext/json/ext/parser/extconf.rb
|
30
31
|
- ext/json/ext/parser/parser.c
|
31
32
|
- ext/json/ext/vendor/fpconv.c
|