json 2.11.3 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1919e2040a180b81eba1f475c511ace075b32015997b7d58098f93103941f8b2
4
- data.tar.gz: d958784bea1136d935835d3e602fae96f97d25208cafec8a68db03619e6d34d0
3
+ metadata.gz: 8e71f977a9d4c1316007814d62236fd185f5aaade7a79f3e5d48a9ffde32f520
4
+ data.tar.gz: f1be8ac3136a6dcf48aa15c7ec08fa4dfcedb6f89b1b6ad8944727708a16e074
5
5
  SHA512:
6
- metadata.gz: 742da3e909b2b6d8c1c9de5833b11be0f80e3b50f5296973b57f03cd45ae584162ac33bcbeb5b99fa767714b8531fef71b6d7ff559da40c3b04e75026ba3158f
7
- data.tar.gz: e55ae407cc5b0da66922a41119b000da925391b58ea9da154a058b15f034334fae9e9c813cda12f4db75936c50e281df00acf8a4053808923d89d5efaa6927af
6
+ metadata.gz: 23f2d490dfb7ea60b189f8227787fde0c53844f62c8e9023ba1d413a72b46b7a3b77836d1a6050dd0a2fa925370bd260da0a52d738bd1231c81ad1ef4a17adda
7
+ data.tar.gz: 22326ad3f75f99e20c7f1ad3cc0f519ffc56b7c85c94aa124a2ea47c8d0c86f604307fe504f216b347651d3c82df83623798dbdbabc45be78a1e4721cc7b8cbe
data/CHANGES.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Changes
2
2
 
3
+ ### Unreleased
4
+
5
+ ### 2025-05-12 (2.12.0)
6
+
7
+ * Improve floating point generation to not use scientific notation as much.
8
+ * Include line and column in parser errors. Both in the message and as exception attributes.
9
+ * Handle non-string hash keys with broken `to_s` implementations.
10
+ * `JSON.generate` now uses SSE2 (x86) or NEON (arm64) instructions when available to escape strings.
11
+
3
12
  ### 2025-04-25 (2.11.3)
4
13
 
5
14
  * Fix a regression in `JSON.pretty_generate` that could cause indentation to be off once some `#to_json` has been called.
@@ -6,5 +6,34 @@ if RUBY_ENGINE == 'truffleruby'
6
6
  else
7
7
  append_cflags("-std=c99")
8
8
  $defs << "-DJSON_GENERATOR"
9
+
10
+ if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
11
+ if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
12
+ # Try to compile a small program using NEON instructions
13
+ if have_header('arm_neon.h')
14
+ have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
15
+ #include <arm_neon.h>
16
+ int main() {
17
+ uint8x16_t test = vdupq_n_u8(32);
18
+ return 0;
19
+ }
20
+ SRC
21
+ $defs.push("-DJSON_ENABLE_SIMD")
22
+ end
23
+ end
24
+
25
+ if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC')
26
+ #include <x86intrin.h>
27
+ int main() {
28
+ __m128i test = _mm_set1_epi8(32);
29
+ return 0;
30
+ }
31
+ SRC
32
+ $defs.push("-DJSON_ENABLE_SIMD")
33
+ end
34
+
35
+ have_header('cpuid.h')
36
+ end
37
+
9
38
  create_makefile 'json/ext/generator'
10
39
  end
@@ -5,6 +5,8 @@
5
5
  #include <math.h>
6
6
  #include <ctype.h>
7
7
 
8
+ #include "simd.h"
9
+
8
10
  /* ruby api and some helpers */
9
11
 
10
12
  typedef struct JSON_Generator_StateStruct {
@@ -109,12 +111,40 @@ typedef struct _search_state {
109
111
  const char *end;
110
112
  const char *cursor;
111
113
  FBuffer *buffer;
114
+
115
+ #ifdef HAVE_SIMD
116
+ const char *chunk_base;
117
+ const char *chunk_end;
118
+ bool has_matches;
119
+
120
+ #if defined(HAVE_SIMD_NEON)
121
+ uint64_t matches_mask;
122
+ #elif defined(HAVE_SIMD_SSE2)
123
+ int matches_mask;
124
+ #else
125
+ #error "Unknown SIMD Implementation."
126
+ #endif /* HAVE_SIMD_NEON */
127
+ #endif /* HAVE_SIMD */
112
128
  } search_state;
113
129
 
114
- static inline void search_flush(search_state *search)
115
- {
116
- fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
117
- search->cursor = search->ptr;
130
+ #if (defined(__GNUC__ ) || defined(__clang__))
131
+ #define FORCE_INLINE __attribute__((always_inline))
132
+ #else
133
+ #define FORCE_INLINE
134
+ #endif
135
+
136
+ static inline FORCE_INLINE void search_flush(search_state *search)
137
+ {
138
+ // Do not remove this conditional without profiling, specifically escape-heavy text.
139
+ // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
140
+ // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
141
+ // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
142
+ // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
143
+ // nothing needs to be flushed, we can save a few memory references with this conditional.
144
+ if (search->ptr > search->cursor) {
145
+ fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
146
+ search->cursor = search->ptr;
147
+ }
118
148
  }
119
149
 
120
150
  static const unsigned char escape_table_basic[256] = {
@@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
130
160
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131
161
  };
132
162
 
163
+ static unsigned char (*search_escape_basic_impl)(search_state *);
164
+
133
165
  static inline unsigned char search_escape_basic(search_state *search)
134
166
  {
135
167
  while (search->ptr < search->end) {
@@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
144
176
  return 0;
145
177
  }
146
178
 
147
- static inline void escape_UTF8_char_basic(search_state *search) {
179
+ static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
180
+ {
148
181
  const unsigned char ch = (unsigned char)*search->ptr;
149
182
  switch (ch) {
150
183
  case '"': fbuffer_append(search->buffer, "\\\"", 2); break;
@@ -186,12 +219,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
186
219
  */
187
220
  static inline void convert_UTF8_to_JSON(search_state *search)
188
221
  {
189
- while (search_escape_basic(search)) {
222
+ while (search_escape_basic_impl(search)) {
190
223
  escape_UTF8_char_basic(search);
191
224
  }
192
225
  }
193
226
 
194
- static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) {
227
+ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
228
+ {
195
229
  const unsigned char ch = (unsigned char)*search->ptr;
196
230
  switch (ch_len) {
197
231
  case 1: {
@@ -227,6 +261,280 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
227
261
  search->cursor = (search->ptr += ch_len);
228
262
  }
229
263
 
264
+ #ifdef HAVE_SIMD
265
+
266
+ static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
267
+ {
268
+ // Flush the buffer so everything up until the last 'len' characters are unflushed.
269
+ search_flush(search);
270
+
271
+ FBuffer *buf = search->buffer;
272
+ fbuffer_inc_capa(buf, vec_len);
273
+
274
+ char *s = (buf->ptr + buf->len);
275
+
276
+ // Pad the buffer with dummy characters that won't need escaping.
277
+ // This seem wateful at first sight, but memset of vector length is very fast.
278
+ memset(s, 'X', vec_len);
279
+
280
+ // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
281
+ // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
282
+ MEMCPY(s, search->ptr, char, len);
283
+
284
+ return s;
285
+ }
286
+
287
+ #ifdef HAVE_SIMD_NEON
288
+
289
+ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
290
+ {
291
+ uint64_t mask = search->matches_mask;
292
+ uint32_t index = trailing_zeros64(mask) >> 2;
293
+
294
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
295
+ // If we want to use a similar approach for full escaping we'll need to ensure:
296
+ // search->chunk_base + index >= search->ptr
297
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
298
+ // is one byte after the previous match then:
299
+ // search->chunk_base + index == search->ptr
300
+ search->ptr = search->chunk_base + index;
301
+ mask &= mask - 1;
302
+ search->matches_mask = mask;
303
+ search_flush(search);
304
+ return 1;
305
+ }
306
+
307
+ // See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308
+ static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
309
+ {
310
+ const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
311
+ const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
312
+ return mask & 0x8888888888888888ull;
313
+ }
314
+
315
+ static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
316
+ {
317
+ uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
318
+
319
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321
+ const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
322
+
323
+ uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
324
+ uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
325
+
326
+ return neon_match_mask(needs_escape);
327
+ }
328
+
329
+ static inline unsigned char search_escape_basic_neon(search_state *search)
330
+ {
331
+ if (RB_UNLIKELY(search->has_matches)) {
332
+ // There are more matches if search->matches_mask > 0.
333
+ if (search->matches_mask > 0) {
334
+ return neon_next_match(search);
335
+ } else {
336
+ // neon_next_match will only advance search->ptr up to the last matching character.
337
+ // Skip over any characters in the last chunk that occur after the last match.
338
+ search->has_matches = false;
339
+ search->ptr = search->chunk_end;
340
+ }
341
+ }
342
+
343
+ /*
344
+ * The code below implements an SIMD-based algorithm to determine if N bytes at a time
345
+ * need to be escaped.
346
+ *
347
+ * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
348
+ *
349
+ * The explanation will be limited to the first 8 bytes of the string for simplicity. However
350
+ * the vector insructions may work on larger vectors.
351
+ *
352
+ * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
353
+ *
354
+ * lower_bound: [20 20 20 20 20 20 20 20]
355
+ * backslash: [5C 5C 5C 5C 5C 5C 5C 5C]
356
+ * dblquote: [22 22 22 22 22 22 22 22]
357
+ *
358
+ * Next we load the first chunk of the ptr:
359
+ * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n)
360
+ *
361
+ * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
362
+ * as no bytes are less than 32 (0x20):
363
+ * [0 0 0 0 0 0 0 0]
364
+ *
365
+ * Next, we check if any byte in chunk is equal to a backslash:
366
+ * [0 0 0 FF 0 0 0 0]
367
+ *
368
+ * Finally we check if any byte in chunk is equal to a double quote:
369
+ * [FF 0 0 0 0 0 0 0]
370
+ *
371
+ * Now we have three vectors where each byte indicates if the corresponding byte in chunk
372
+ * needs to be escaped. We combine these vectors with a series of logical OR instructions.
373
+ * This is the needs_escape vector and it is equal to:
374
+ * [FF 0 0 FF 0 0 0 0]
375
+ *
376
+ * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
377
+ * the values in the vector. This computes how many bytes need to be escaped within this chunk.
378
+ *
379
+ * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
380
+ * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381
+ * have at least one byte that needs to be escaped.
382
+ */
383
+ while (search->ptr + sizeof(uint8x16_t) <= search->end) {
384
+ uint64_t mask = neon_rules_update(search->ptr);
385
+
386
+ if (!mask) {
387
+ search->ptr += sizeof(uint8x16_t);
388
+ continue;
389
+ }
390
+ search->matches_mask = mask;
391
+ search->has_matches = true;
392
+ search->chunk_base = search->ptr;
393
+ search->chunk_end = search->ptr + sizeof(uint8x16_t);
394
+ return neon_next_match(search);
395
+ }
396
+
397
+ // There are fewer than 16 bytes left.
398
+ unsigned long remaining = (search->end - search->ptr);
399
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
400
+ char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
401
+
402
+ uint64_t mask = neon_rules_update(s);
403
+
404
+ if (!mask) {
405
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
406
+ // search->cursor to search->ptr.
407
+ search->buffer->len += remaining;
408
+ search->ptr = search->end;
409
+ search->cursor = search->end;
410
+ return 0;
411
+ }
412
+
413
+ search->matches_mask = mask;
414
+ search->has_matches = true;
415
+ search->chunk_end = search->end;
416
+ search->chunk_base = search->ptr;
417
+ return neon_next_match(search);
418
+ }
419
+
420
+ if (search->ptr < search->end) {
421
+ return search_escape_basic(search);
422
+ }
423
+
424
+ search_flush(search);
425
+ return 0;
426
+ }
427
+ #endif /* HAVE_SIMD_NEON */
428
+
429
+ #ifdef HAVE_SIMD_SSE2
430
+
431
+ #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432
+ #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
433
+ #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434
+ #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
435
+
436
+ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
437
+ {
438
+ int mask = search->matches_mask;
439
+ int index = trailing_zeros(mask);
440
+
441
+ // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
442
+ // If we want to use a similar approach for full escaping we'll need to ensure:
443
+ // search->chunk_base + index >= search->ptr
444
+ // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
445
+ // is one byte after the previous match then:
446
+ // search->chunk_base + index == search->ptr
447
+ search->ptr = search->chunk_base + index;
448
+ mask &= mask - 1;
449
+ search->matches_mask = mask;
450
+ search_flush(search);
451
+ return 1;
452
+ }
453
+
454
+ #if defined(__clang__) || defined(__GNUC__)
455
+ #define TARGET_SSE2 __attribute__((target("sse2")))
456
+ #else
457
+ #define TARGET_SSE2
458
+ #endif
459
+
460
+ static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
461
+ {
462
+ __m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
463
+
464
+ // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465
+ // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466
+ __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
467
+ __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
468
+ __m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
469
+ return _mm_movemask_epi8(needs_escape);
470
+ }
471
+
472
+ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
473
+ {
474
+ if (RB_UNLIKELY(search->has_matches)) {
475
+ // There are more matches if search->matches_mask > 0.
476
+ if (search->matches_mask > 0) {
477
+ return sse2_next_match(search);
478
+ } else {
479
+ // sse2_next_match will only advance search->ptr up to the last matching character.
480
+ // Skip over any characters in the last chunk that occur after the last match.
481
+ search->has_matches = false;
482
+ if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
483
+ search->ptr = search->end;
484
+ } else {
485
+ search->ptr = search->chunk_base + sizeof(__m128i);
486
+ }
487
+ }
488
+ }
489
+
490
+ while (search->ptr + sizeof(__m128i) <= search->end) {
491
+ int needs_escape_mask = sse2_update(search->ptr);
492
+
493
+ if (needs_escape_mask == 0) {
494
+ search->ptr += sizeof(__m128i);
495
+ continue;
496
+ }
497
+
498
+ search->has_matches = true;
499
+ search->matches_mask = needs_escape_mask;
500
+ search->chunk_base = search->ptr;
501
+ return sse2_next_match(search);
502
+ }
503
+
504
+ // There are fewer than 16 bytes left.
505
+ unsigned long remaining = (search->end - search->ptr);
506
+ if (remaining >= SIMD_MINIMUM_THRESHOLD) {
507
+ char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
508
+
509
+ int needs_escape_mask = sse2_update(s);
510
+
511
+ if (needs_escape_mask == 0) {
512
+ // Nothing to escape, ensure search_flush doesn't do anything by setting
513
+ // search->cursor to search->ptr.
514
+ search->buffer->len += remaining;
515
+ search->ptr = search->end;
516
+ search->cursor = search->end;
517
+ return 0;
518
+ }
519
+
520
+ search->has_matches = true;
521
+ search->matches_mask = needs_escape_mask;
522
+ search->chunk_base = search->ptr;
523
+ return sse2_next_match(search);
524
+ }
525
+
526
+ if (search->ptr < search->end) {
527
+ return search_escape_basic(search);
528
+ }
529
+
530
+ search_flush(search);
531
+ return 0;
532
+ }
533
+
534
+ #endif /* HAVE_SIMD_SSE2 */
535
+
536
+ #endif /* HAVE_SIMD */
537
+
230
538
  static const unsigned char script_safe_escape_table[256] = {
231
539
  // ASCII Control Characters
232
540
  9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@@ -789,6 +1097,21 @@ struct hash_foreach_arg {
789
1097
  int iter;
790
1098
  };
791
1099
 
1100
+ static VALUE
1101
+ convert_string_subclass(VALUE key)
1102
+ {
1103
+ VALUE key_to_s = rb_funcall(key, i_to_s, 0);
1104
+
1105
+ if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
1106
+ VALUE cname = rb_obj_class(key);
1107
+ rb_raise(rb_eTypeError,
1108
+ "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
1109
+ cname, "String", cname, "to_s", rb_obj_class(key_to_s));
1110
+ }
1111
+
1112
+ return key_to_s;
1113
+ }
1114
+
792
1115
  static int
793
1116
  json_object_i(VALUE key, VALUE val, VALUE _arg)
794
1117
  {
@@ -817,7 +1140,7 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
817
1140
  if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
818
1141
  key_to_s = key;
819
1142
  } else {
820
- key_to_s = rb_funcall(key, i_to_s, 0);
1143
+ key_to_s = convert_string_subclass(key);
821
1144
  }
822
1145
  break;
823
1146
  case T_SYMBOL:
@@ -975,6 +1298,12 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
975
1298
  search.cursor = search.ptr;
976
1299
  search.end = search.ptr + len;
977
1300
 
1301
+ #ifdef HAVE_SIMD
1302
+ search.matches_mask = 0;
1303
+ search.has_matches = false;
1304
+ search.chunk_base = NULL;
1305
+ #endif /* HAVE_SIMD */
1306
+
978
1307
  switch(rb_enc_str_coderange(obj)) {
979
1308
  case ENC_CODERANGE_7BIT:
980
1309
  case ENC_CODERANGE_VALID:
@@ -1838,4 +2167,23 @@ void Init_generator(void)
1838
2167
  binary_encindex = rb_ascii8bit_encindex();
1839
2168
 
1840
2169
  rb_require("json/ext/generator/state");
2170
+
2171
+
2172
+ switch(find_simd_implementation()) {
2173
+ #ifdef HAVE_SIMD
2174
+ #ifdef HAVE_SIMD_NEON
2175
+ case SIMD_NEON:
2176
+ search_escape_basic_impl = search_escape_basic_neon;
2177
+ break;
2178
+ #endif /* HAVE_SIMD_NEON */
2179
+ #ifdef HAVE_SIMD_SSE2
2180
+ case SIMD_SSE2:
2181
+ search_escape_basic_impl = search_escape_basic_sse2;
2182
+ break;
2183
+ #endif /* HAVE_SIMD_SSE2 */
2184
+ #endif /* HAVE_SIMD */
2185
+ default:
2186
+ search_escape_basic_impl = search_escape_basic;
2187
+ break;
2188
+ }
1841
2189
  }
@@ -0,0 +1,112 @@
1
+ typedef enum {
2
+ SIMD_NONE,
3
+ SIMD_NEON,
4
+ SIMD_SSE2
5
+ } SIMD_Implementation;
6
+
7
+ #ifdef JSON_ENABLE_SIMD
8
+
9
+ #ifdef __clang__
10
+ #if __has_builtin(__builtin_ctzll)
11
+ #define HAVE_BUILTIN_CTZLL 1
12
+ #else
13
+ #define HAVE_BUILTIN_CTZLL 0
14
+ #endif
15
+ #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
16
+ #define HAVE_BUILTIN_CTZLL 1
17
+ #else
18
+ #define HAVE_BUILTIN_CTZLL 0
19
+ #endif
20
+
21
+ static inline uint32_t trailing_zeros64(uint64_t input) {
22
+ #if HAVE_BUILTIN_CTZLL
23
+ return __builtin_ctzll(input);
24
+ #else
25
+ uint32_t trailing_zeros = 0;
26
+ uint64_t temp = input;
27
+ while ((temp & 1) == 0 && temp > 0) {
28
+ trailing_zeros++;
29
+ temp >>= 1;
30
+ }
31
+ return trailing_zeros;
32
+ #endif
33
+ }
34
+
35
+ static inline int trailing_zeros(int input) {
36
+ #if HAVE_BUILTIN_CTZLL
37
+ return __builtin_ctz(input);
38
+ #else
39
+ int trailing_zeros = 0;
40
+ int temp = input;
41
+ while ((temp & 1) == 0 && temp > 0) {
42
+ trailing_zeros++;
43
+ temp >>= 1;
44
+ }
45
+ return trailing_zeros;
46
+ #endif
47
+ }
48
+
49
+ #define SIMD_MINIMUM_THRESHOLD 6
50
+
51
+ #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
52
+ #include <arm_neon.h>
53
+
54
+ #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
55
+ static SIMD_Implementation find_simd_implementation(void) {
56
+ return SIMD_NEON;
57
+ }
58
+
59
+ #define HAVE_SIMD 1
60
+ #define HAVE_SIMD_NEON 1
61
+
62
+ uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
63
+ uint8x16x4_t tab;
64
+ tab.val[0] = vld1q_u8(table);
65
+ tab.val[1] = vld1q_u8(table+16);
66
+ tab.val[2] = vld1q_u8(table+32);
67
+ tab.val[3] = vld1q_u8(table+48);
68
+ return tab;
69
+ }
70
+
71
+ #endif /* ARM Neon Support.*/
72
+
73
+ #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
74
+
75
+ #ifdef HAVE_X86INTRIN_H
76
+ #include <x86intrin.h>
77
+
78
+ #define HAVE_SIMD 1
79
+ #define HAVE_SIMD_SSE2 1
80
+
81
+ #ifdef HAVE_CPUID_H
82
+ #define FIND_SIMD_IMPLEMENTATION_DEFINED 1
83
+
84
+ #include <cpuid.h>
85
+ #endif /* HAVE_CPUID_H */
86
+
87
+ static SIMD_Implementation find_simd_implementation(void) {
88
+
89
+ #if defined(__GNUC__ ) || defined(__clang__)
90
+ #ifdef __GNUC__
91
+ __builtin_cpu_init();
92
+ #endif /* __GNUC__ */
93
+
94
+ // TODO Revisit. I think the SSE version now only uses SSE2 instructions.
95
+ if (__builtin_cpu_supports("sse2")) {
96
+ return SIMD_SSE2;
97
+ }
98
+ #endif /* __GNUC__ || __clang__*/
99
+
100
+ return SIMD_NONE;
101
+ }
102
+
103
+ #endif /* HAVE_X86INTRIN_H */
104
+ #endif /* X86_64 Support */
105
+
106
+ #endif /* JSON_ENABLE_SIMD */
107
+
108
+ #ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
109
+ static SIMD_Implementation find_simd_implementation(void) {
110
+ return SIMD_NONE;
111
+ }
112
+ #endif
@@ -337,19 +337,86 @@ static size_t strnlen(const char *s, size_t maxlen)
337
337
  }
338
338
  #endif
339
339
 
340
+ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
341
+ {
342
+ int len = 1;
343
+ if (ch <= 0x7F) {
344
+ buf[0] = (char) ch;
345
+ } else if (ch <= 0x07FF) {
346
+ buf[0] = (char) ((ch >> 6) | 0xC0);
347
+ buf[1] = (char) ((ch & 0x3F) | 0x80);
348
+ len++;
349
+ } else if (ch <= 0xFFFF) {
350
+ buf[0] = (char) ((ch >> 12) | 0xE0);
351
+ buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
352
+ buf[2] = (char) ((ch & 0x3F) | 0x80);
353
+ len += 2;
354
+ } else if (ch <= 0x1fffff) {
355
+ buf[0] =(char) ((ch >> 18) | 0xF0);
356
+ buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
357
+ buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
358
+ buf[3] =(char) ((ch & 0x3F) | 0x80);
359
+ len += 3;
360
+ } else {
361
+ buf[0] = '?';
362
+ }
363
+ return len;
364
+ }
365
+
366
+ typedef struct JSON_ParserStruct {
367
+ VALUE on_load_proc;
368
+ VALUE decimal_class;
369
+ ID decimal_method_id;
370
+ int max_nesting;
371
+ bool allow_nan;
372
+ bool allow_trailing_comma;
373
+ bool parsing_name;
374
+ bool symbolize_names;
375
+ bool freeze;
376
+ } JSON_ParserConfig;
377
+
378
+ typedef struct JSON_ParserStateStruct {
379
+ VALUE stack_handle;
380
+ const char *start;
381
+ const char *cursor;
382
+ const char *end;
383
+ rvalue_stack *stack;
384
+ rvalue_cache name_cache;
385
+ int in_array;
386
+ int current_nesting;
387
+ } JSON_ParserState;
388
+
389
+
340
390
  #define PARSE_ERROR_FRAGMENT_LEN 32
341
391
  #ifdef RBIMPL_ATTR_NORETURN
342
392
  RBIMPL_ATTR_NORETURN()
343
393
  #endif
344
- static void raise_parse_error(const char *format, const char *start)
394
+ static void raise_parse_error(const char *format, JSON_ParserState *state)
345
395
  {
346
396
  unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
347
397
 
348
- size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
349
- const char *ptr = start;
398
+ const char *cursor = state->cursor;
399
+ long column = 0;
400
+ long line = 1;
401
+
402
+ while (cursor >= state->start) {
403
+ if (*cursor-- == '\n') {
404
+ break;
405
+ }
406
+ column++;
407
+ }
408
+
409
+ while (cursor >= state->start) {
410
+ if (*cursor-- == '\n') {
411
+ line++;
412
+ }
413
+ }
414
+
415
+ const char *ptr = state->cursor;
416
+ size_t len = ptr ? strnlen(ptr, PARSE_ERROR_FRAGMENT_LEN) : 0;
350
417
 
351
418
  if (len == PARSE_ERROR_FRAGMENT_LEN) {
352
- MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
419
+ MEMCPY(buffer, ptr, char, PARSE_ERROR_FRAGMENT_LEN);
353
420
 
354
421
  while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
355
422
  len--;
@@ -363,7 +430,23 @@ static void raise_parse_error(const char *format, const char *start)
363
430
  ptr = (const char *)buffer;
364
431
  }
365
432
 
366
- rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
433
+ VALUE msg = rb_sprintf(format, ptr);
434
+ VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
435
+ RB_GC_GUARD(msg);
436
+
437
+ VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
438
+ rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
439
+ rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
440
+ rb_exc_raise(exc);
441
+ }
442
+
443
+ #ifdef RBIMPL_ATTR_NORETURN
444
+ RBIMPL_ATTR_NORETURN()
445
+ #endif
446
+ static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
447
+ {
448
+ state->cursor = at;
449
+ raise_parse_error(format, state);
367
450
  }
368
451
 
369
452
  /* unicode */
@@ -385,73 +468,25 @@ static const signed char digit_values[256] = {
385
468
  -1, -1, -1, -1, -1, -1, -1
386
469
  };
387
470
 
388
- static uint32_t unescape_unicode(const unsigned char *p)
471
+ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
389
472
  {
390
473
  signed char b;
391
474
  uint32_t result = 0;
392
475
  b = digit_values[p[0]];
393
- if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
476
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
394
477
  result = (result << 4) | (unsigned char)b;
395
478
  b = digit_values[p[1]];
396
- if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
479
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
397
480
  result = (result << 4) | (unsigned char)b;
398
481
  b = digit_values[p[2]];
399
- if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
482
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
400
483
  result = (result << 4) | (unsigned char)b;
401
484
  b = digit_values[p[3]];
402
- if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
485
+ if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
403
486
  result = (result << 4) | (unsigned char)b;
404
487
  return result;
405
488
  }
406
489
 
407
- static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
408
- {
409
- int len = 1;
410
- if (ch <= 0x7F) {
411
- buf[0] = (char) ch;
412
- } else if (ch <= 0x07FF) {
413
- buf[0] = (char) ((ch >> 6) | 0xC0);
414
- buf[1] = (char) ((ch & 0x3F) | 0x80);
415
- len++;
416
- } else if (ch <= 0xFFFF) {
417
- buf[0] = (char) ((ch >> 12) | 0xE0);
418
- buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
419
- buf[2] = (char) ((ch & 0x3F) | 0x80);
420
- len += 2;
421
- } else if (ch <= 0x1fffff) {
422
- buf[0] =(char) ((ch >> 18) | 0xF0);
423
- buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
424
- buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
425
- buf[3] =(char) ((ch & 0x3F) | 0x80);
426
- len += 3;
427
- } else {
428
- buf[0] = '?';
429
- }
430
- return len;
431
- }
432
-
433
- typedef struct JSON_ParserStruct {
434
- VALUE on_load_proc;
435
- VALUE decimal_class;
436
- ID decimal_method_id;
437
- int max_nesting;
438
- bool allow_nan;
439
- bool allow_trailing_comma;
440
- bool parsing_name;
441
- bool symbolize_names;
442
- bool freeze;
443
- } JSON_ParserConfig;
444
-
445
- typedef struct JSON_ParserStateStruct {
446
- VALUE stack_handle;
447
- const char *cursor;
448
- const char *end;
449
- rvalue_stack *stack;
450
- rvalue_cache name_cache;
451
- int in_array;
452
- int current_nesting;
453
- } JSON_ParserState;
454
-
455
490
  #define GET_PARSER_CONFIG \
456
491
  JSON_ParserConfig *config; \
457
492
  TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
@@ -485,8 +520,7 @@ json_eat_comments(JSON_ParserState *state)
485
520
  while (true) {
486
521
  state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
487
522
  if (!state->cursor) {
488
- state->cursor = state->end;
489
- raise_parse_error("unexpected end of input, expected closing '*/'", state->cursor);
523
+ raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
490
524
  } else {
491
525
  state->cursor++;
492
526
  if (state->cursor < state->end && *state->cursor == '/') {
@@ -498,11 +532,11 @@ json_eat_comments(JSON_ParserState *state)
498
532
  break;
499
533
  }
500
534
  default:
501
- raise_parse_error("unexpected token at '%s'", state->cursor);
535
+ raise_parse_error("unexpected token '%s'", state);
502
536
  break;
503
537
  }
504
538
  } else {
505
- raise_parse_error("unexpected token at '%s'", state->cursor);
539
+ raise_parse_error("unexpected token '%s'", state);
506
540
  }
507
541
  }
508
542
 
@@ -621,9 +655,9 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
621
655
  break;
622
656
  case 'u':
623
657
  if (pe > stringEnd - 5) {
624
- raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
658
+ raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, p);
625
659
  } else {
626
- uint32_t ch = unescape_unicode((unsigned char *) ++pe);
660
+ uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
627
661
  pe += 3;
628
662
  /* To handle values above U+FFFF, we take a sequence of
629
663
  * \uXXXX escapes in the U+D800..U+DBFF then
@@ -638,10 +672,10 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
638
672
  if ((ch & 0xFC00) == 0xD800) {
639
673
  pe++;
640
674
  if (pe > stringEnd - 6) {
641
- raise_parse_error("incomplete surrogate pair at '%s'", p);
675
+ raise_parse_error_at("incomplete surrogate pair at '%s'", state, p);
642
676
  }
643
677
  if (pe[0] == '\\' && pe[1] == 'u') {
644
- uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
678
+ uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
645
679
  ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
646
680
  | (sur & 0x3FF));
647
681
  pe += 5;
@@ -829,12 +863,12 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
829
863
  state->cursor++;
830
864
  escaped = true;
831
865
  if ((unsigned char)*state->cursor < 0x20) {
832
- raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
866
+ raise_parse_error("invalid ASCII control character in string: %s", state);
833
867
  }
834
868
  break;
835
869
  }
836
870
  default:
837
- raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
871
+ raise_parse_error("invalid ASCII control character in string: %s", state);
838
872
  break;
839
873
  }
840
874
  }
@@ -842,7 +876,7 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
842
876
  state->cursor++;
843
877
  }
844
878
 
845
- raise_parse_error("unexpected end of input, expected closing \"", state->cursor);
879
+ raise_parse_error("unexpected end of input, expected closing \"", state);
846
880
  return Qfalse;
847
881
  }
848
882
 
@@ -850,7 +884,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
850
884
  {
851
885
  json_eat_whitespace(state);
852
886
  if (state->cursor >= state->end) {
853
- raise_parse_error("unexpected end of input", state->cursor);
887
+ raise_parse_error("unexpected end of input", state);
854
888
  }
855
889
 
856
890
  switch (*state->cursor) {
@@ -860,7 +894,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
860
894
  return json_push_value(state, config, Qnil);
861
895
  }
862
896
 
863
- raise_parse_error("unexpected token at '%s'", state->cursor);
897
+ raise_parse_error("unexpected token '%s'", state);
864
898
  break;
865
899
  case 't':
866
900
  if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
@@ -868,7 +902,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
868
902
  return json_push_value(state, config, Qtrue);
869
903
  }
870
904
 
871
- raise_parse_error("unexpected token at '%s'", state->cursor);
905
+ raise_parse_error("unexpected token '%s'", state);
872
906
  break;
873
907
  case 'f':
874
908
  // Note: memcmp with a small power of two compile to an integer comparison
@@ -877,7 +911,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
877
911
  return json_push_value(state, config, Qfalse);
878
912
  }
879
913
 
880
- raise_parse_error("unexpected token at '%s'", state->cursor);
914
+ raise_parse_error("unexpected token '%s'", state);
881
915
  break;
882
916
  case 'N':
883
917
  // Note: memcmp with a small power of two compile to an integer comparison
@@ -886,7 +920,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
886
920
  return json_push_value(state, config, CNaN);
887
921
  }
888
922
 
889
- raise_parse_error("unexpected token at '%s'", state->cursor);
923
+ raise_parse_error("unexpected token '%s'", state);
890
924
  break;
891
925
  case 'I':
892
926
  if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
@@ -894,7 +928,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
894
928
  return json_push_value(state, config, CInfinity);
895
929
  }
896
930
 
897
- raise_parse_error("unexpected token at '%s'", state->cursor);
931
+ raise_parse_error("unexpected token '%s'", state);
898
932
  break;
899
933
  case '-':
900
934
  // Note: memcmp with a small power of two compile to an integer comparison
@@ -903,7 +937,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
903
937
  state->cursor += 9;
904
938
  return json_push_value(state, config, CMinusInfinity);
905
939
  } else {
906
- raise_parse_error("unexpected token at '%s'", state->cursor);
940
+ raise_parse_error("unexpected token '%s'", state);
907
941
  }
908
942
  }
909
943
  // Fallthrough
@@ -921,11 +955,11 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
921
955
  long integer_length = state->cursor - start;
922
956
 
923
957
  if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
924
- raise_parse_error("invalid number: %s", start);
958
+ raise_parse_error_at("invalid number: %s", state, start);
925
959
  } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
926
- raise_parse_error("invalid number: %s", start);
960
+ raise_parse_error_at("invalid number: %s", state, start);
927
961
  } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
928
- raise_parse_error("invalid number: %s", start);
962
+ raise_parse_error_at("invalid number: %s", state, start);
929
963
  }
930
964
 
931
965
  if ((state->cursor < state->end) && (*state->cursor == '.')) {
@@ -933,7 +967,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
933
967
  state->cursor++;
934
968
 
935
969
  if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
936
- raise_parse_error("invalid number: %s", state->cursor);
970
+ raise_parse_error("invalid number: %s", state);
937
971
  }
938
972
 
939
973
  while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
@@ -949,7 +983,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
949
983
  }
950
984
 
951
985
  if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
952
- raise_parse_error("invalid number: %s", state->cursor);
986
+ raise_parse_error("invalid number: %s", state);
953
987
  }
954
988
 
955
989
  while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
@@ -1009,7 +1043,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1009
1043
  }
1010
1044
  }
1011
1045
 
1012
- raise_parse_error("expected ',' or ']' after array value", state->cursor);
1046
+ raise_parse_error("expected ',' or ']' after array value", state);
1013
1047
  }
1014
1048
  break;
1015
1049
  }
@@ -1028,13 +1062,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1028
1062
  }
1029
1063
 
1030
1064
  if (*state->cursor != '"') {
1031
- raise_parse_error("expected object key, got '%s", state->cursor);
1065
+ raise_parse_error("expected object key, got '%s", state);
1032
1066
  }
1033
1067
  json_parse_string(state, config, true);
1034
1068
 
1035
1069
  json_eat_whitespace(state);
1036
1070
  if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1037
- raise_parse_error("expected ':' after object key", state->cursor);
1071
+ raise_parse_error("expected ':' after object key", state);
1038
1072
  }
1039
1073
  state->cursor++;
1040
1074
 
@@ -1063,13 +1097,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1063
1097
  }
1064
1098
 
1065
1099
  if (*state->cursor != '"') {
1066
- raise_parse_error("expected object key, got: '%s'", state->cursor);
1100
+ raise_parse_error("expected object key, got: '%s'", state);
1067
1101
  }
1068
1102
  json_parse_string(state, config, true);
1069
1103
 
1070
1104
  json_eat_whitespace(state);
1071
1105
  if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1072
- raise_parse_error("expected ':' after object key, got: '%s", state->cursor);
1106
+ raise_parse_error("expected ':' after object key, got: '%s", state);
1073
1107
  }
1074
1108
  state->cursor++;
1075
1109
 
@@ -1079,24 +1113,24 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
1079
1113
  }
1080
1114
  }
1081
1115
 
1082
- raise_parse_error("expected ',' or '}' after object value, got: '%s'", state->cursor);
1116
+ raise_parse_error("expected ',' or '}' after object value, got: '%s'", state);
1083
1117
  }
1084
1118
  break;
1085
1119
  }
1086
1120
 
1087
1121
  default:
1088
- raise_parse_error("unexpected character: '%s'", state->cursor);
1122
+ raise_parse_error("unexpected character: '%s'", state);
1089
1123
  break;
1090
1124
  }
1091
1125
 
1092
- raise_parse_error("unreacheable: '%s'", state->cursor);
1126
+ raise_parse_error("unreacheable: '%s'", state);
1093
1127
  }
1094
1128
 
1095
1129
  static void json_ensure_eof(JSON_ParserState *state)
1096
1130
  {
1097
1131
  json_eat_whitespace(state);
1098
1132
  if (state->cursor != state->end) {
1099
- raise_parse_error("unexpected token at end of stream '%s'", state->cursor);
1133
+ raise_parse_error("unexpected token at end of stream '%s'", state);
1100
1134
  }
1101
1135
  }
1102
1136
 
@@ -1232,9 +1266,14 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
1232
1266
  .capa = RVALUE_STACK_INITIAL_CAPA,
1233
1267
  };
1234
1268
 
1269
+ long len;
1270
+ const char *start;
1271
+ RSTRING_GETMEM(Vsource, start, len);
1272
+
1235
1273
  JSON_ParserState _state = {
1236
- .cursor = RSTRING_PTR(Vsource),
1237
- .end = RSTRING_END(Vsource),
1274
+ .start = start,
1275
+ .cursor = start,
1276
+ .end = start + len,
1238
1277
  .stack = &stack,
1239
1278
  };
1240
1279
  JSON_ParserState *state = &_state;
@@ -41,7 +41,7 @@ typedef struct Fp {
41
41
  int exp;
42
42
  } Fp;
43
43
 
44
- static Fp powers_ten[] = {
44
+ static const Fp powers_ten[] = {
45
45
  { 18054884314459144840U, -1220 }, { 13451937075301367670U, -1193 },
46
46
  { 10022474136428063862U, -1166 }, { 14934650266808366570U, -1140 },
47
47
  { 11127181549972568877U, -1113 }, { 16580792590934885855U, -1087 },
@@ -123,7 +123,7 @@ static Fp find_cachedpow10(int exp, int* k)
123
123
  #define absv(n) ((n) < 0 ? -(n) : (n))
124
124
  #define minv(a, b) ((a) < (b) ? (a) : (b))
125
125
 
126
- static uint64_t tens[] = {
126
+ static const uint64_t tens[] = {
127
127
  10000000000000000000U, 1000000000000000000U, 100000000000000000U,
128
128
  10000000000000000U, 1000000000000000U, 100000000000000U,
129
129
  10000000000000U, 1000000000000U, 100000000000U,
@@ -244,7 +244,7 @@ static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K)
244
244
  uint64_t part2 = upper->frac & (one.frac - 1);
245
245
 
246
246
  int idx = 0, kappa = 10;
247
- uint64_t* divp;
247
+ const uint64_t* divp;
248
248
  /* 1000000000 */
249
249
  for(divp = tens + 10; kappa > 0; divp++) {
250
250
 
@@ -268,7 +268,7 @@ static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K)
268
268
  }
269
269
 
270
270
  /* 10 */
271
- uint64_t* unit = tens + 18;
271
+ const uint64_t* unit = tens + 18;
272
272
 
273
273
  while(true) {
274
274
  part2 *= 10;
@@ -340,7 +340,7 @@ static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg)
340
340
  }
341
341
 
342
342
  /* write decimal w/o scientific notation */
343
- if(K < 0 && (K > -7 || exp < 4)) {
343
+ if(K < 0 && (K > -7 || exp < 10)) {
344
344
  int offset = ndigits - absv(K);
345
345
  /* fp < 1.0 -> write leading zero */
346
346
  if(offset <= 0) {
data/lib/json/common.rb CHANGED
@@ -230,7 +230,9 @@ module JSON
230
230
  class JSONError < StandardError; end
231
231
 
232
232
  # This exception is raised if a parser error occurs.
233
- class ParserError < JSONError; end
233
+ class ParserError < JSONError
234
+ attr_reader :line, :column
235
+ end
234
236
 
235
237
  # This exception is raised if the nesting of parsed data structures is too
236
238
  # deep.
data/lib/json/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JSON
4
- VERSION = '2.11.3'
4
+ VERSION = '2.12.0'
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.11.3
4
+ version: 2.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-04-25 00:00:00.000000000 Z
10
+ date: 2025-05-12 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: This is a JSON implementation as a Ruby extension in C.
13
13
  email: flori@ping.de
@@ -26,6 +26,7 @@ files:
26
26
  - ext/json/ext/fbuffer/fbuffer.h
27
27
  - ext/json/ext/generator/extconf.rb
28
28
  - ext/json/ext/generator/generator.c
29
+ - ext/json/ext/generator/simd.h
29
30
  - ext/json/ext/parser/extconf.rb
30
31
  - ext/json/ext/parser/parser.c
31
32
  - ext/json/ext/vendor/fpconv.c