RubyGems - json - Versions diffs - 2.11.3 → 2.12.0 - Mend

json 2.11.3 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGES.md +9 -0
data/ext/json/ext/generator/extconf.rb +29 -0
data/ext/json/ext/generator/generator.c +356 -8
data/ext/json/ext/generator/simd.h +112 -0
data/ext/json/ext/parser/parser.c +131 -92
data/ext/json/ext/vendor/fpconv.c +5 -5
data/lib/json/common.rb +3 -1
data/lib/json/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1919e2040a180b81eba1f475c511ace075b32015997b7d58098f93103941f8b2
-  data.tar.gz: d958784bea1136d935835d3e602fae96f97d25208cafec8a68db03619e6d34d0
+  metadata.gz: 8e71f977a9d4c1316007814d62236fd185f5aaade7a79f3e5d48a9ffde32f520
+  data.tar.gz: f1be8ac3136a6dcf48aa15c7ec08fa4dfcedb6f89b1b6ad8944727708a16e074
 SHA512:
-  metadata.gz: 742da3e909b2b6d8c1c9de5833b11be0f80e3b50f5296973b57f03cd45ae584162ac33bcbeb5b99fa767714b8531fef71b6d7ff559da40c3b04e75026ba3158f
-  data.tar.gz: e55ae407cc5b0da66922a41119b000da925391b58ea9da154a058b15f034334fae9e9c813cda12f4db75936c50e281df00acf8a4053808923d89d5efaa6927af
+  metadata.gz: 23f2d490dfb7ea60b189f8227787fde0c53844f62c8e9023ba1d413a72b46b7a3b77836d1a6050dd0a2fa925370bd260da0a52d738bd1231c81ad1ef4a17adda
+  data.tar.gz: 22326ad3f75f99e20c7f1ad3cc0f519ffc56b7c85c94aa124a2ea47c8d0c86f604307fe504f216b347651d3c82df83623798dbdbabc45be78a1e4721cc7b8cbe

data/CHANGES.md CHANGED Viewed

@@ -1,5 +1,14 @@
 # Changes
+### Unreleased
+### 2025-05-12 (2.12.0)
+* Improve floating point generation to not use scientific notation as much.
+* Include line and column in parser errors. Both in the message and as exception attributes.
+* Handle non-string hash keys with broken `to_s` implementations.
+* `JSON.generate` now uses SSE2 (x86) or NEON (arm64) instructions when available to escape strings.
 ### 2025-04-25 (2.11.3)
 * Fix a regression in `JSON.pretty_generate` that could cause indentation to be off once some `#to_json` has been called.

data/ext/json/ext/generator/extconf.rb CHANGED Viewed

@@ -6,5 +6,34 @@ if RUBY_ENGINE == 'truffleruby'
 else
   append_cflags("-std=c99")
   $defs << "-DJSON_GENERATOR"
+  if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
+    if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
+      # Try to compile a small program using NEON instructions
+      if have_header('arm_neon.h')
+        have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
+          #include <arm_neon.h>
+          int main() {
+              uint8x16_t test = vdupq_n_u8(32);
+              return 0;
+          }
+        SRC
+          $defs.push("-DJSON_ENABLE_SIMD")
+      end
+    end
+    if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC')
+      #include <x86intrin.h>
+      int main() {
+          __m128i test = _mm_set1_epi8(32);
+          return 0;
+      }
+      SRC
+        $defs.push("-DJSON_ENABLE_SIMD")
+    end
+    have_header('cpuid.h')
+  end
   create_makefile 'json/ext/generator'
 end

data/ext/json/ext/generator/generator.c CHANGED Viewed

@@ -5,6 +5,8 @@
 #include <math.h>
 #include <ctype.h>
+#include "simd.h"
 /* ruby api and some helpers */
 typedef struct JSON_Generator_StateStruct {
@@ -109,12 +111,40 @@ typedef struct _search_state {
     const char *end;
     const char *cursor;
     FBuffer *buffer;
+#ifdef HAVE_SIMD
+    const char *chunk_base;
+    const char *chunk_end;
+    bool has_matches;
+#if defined(HAVE_SIMD_NEON)
+    uint64_t matches_mask;
+#elif defined(HAVE_SIMD_SSE2)
+    int matches_mask;
+#else
+#error "Unknown SIMD Implementation."
+#endif /* HAVE_SIMD_NEON */
+#endif /* HAVE_SIMD */
 } search_state;
-static inline void search_flush(search_state *search)
-{
-    fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
-    search->cursor = search->ptr;
+#if (defined(__GNUC__ ) || defined(__clang__))
+#define FORCE_INLINE __attribute__((always_inline))
+#else
+#define FORCE_INLINE
+#endif
+static inline FORCE_INLINE void search_flush(search_state *search)
+{
+    // Do not remove this conditional without profiling, specifically escape-heavy text.
+    // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
+    // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
+    // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
+    // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
+    // nothing needs to be flushed, we can save a few memory references with this conditional.
+    if (search->ptr > search->cursor) {
+        fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
+        search->cursor = search->ptr;
+    }
 }
 static const unsigned char escape_table_basic[256] = {
@@ -130,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 };
+static unsigned char (*search_escape_basic_impl)(search_state *);
 static inline unsigned char search_escape_basic(search_state *search)
 {
     while (search->ptr < search->end) {
@@ -144,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
     return 0;
 }
-static inline void escape_UTF8_char_basic(search_state *search) {
+static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
+{
     const unsigned char ch = (unsigned char)*search->ptr;
     switch (ch) {
         case '"':  fbuffer_append(search->buffer, "\\\"", 2); break;
@@ -186,12 +219,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
  */
 static inline void convert_UTF8_to_JSON(search_state *search)
 {
-    while (search_escape_basic(search)) {
+    while (search_escape_basic_impl(search)) {
         escape_UTF8_char_basic(search);
     }
 }
-static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) {
+static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
+{
     const unsigned char ch = (unsigned char)*search->ptr;
     switch (ch_len) {
         case 1: {
@@ -227,6 +261,280 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
     search->cursor = (search->ptr += ch_len);
 }
+#ifdef HAVE_SIMD
+static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
+{
+    // Flush the buffer so everything up until the last 'len' characters are unflushed.
+    search_flush(search);
+    FBuffer *buf = search->buffer;
+    fbuffer_inc_capa(buf, vec_len);
+    char *s = (buf->ptr + buf->len);
+    // Pad the buffer with dummy characters that won't need escaping.
+    // This seem wateful at first sight, but memset of vector length is very fast.
+    memset(s, 'X', vec_len);
+    // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
+    // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
+    MEMCPY(s, search->ptr, char, len);
+    return s;
+}
+#ifdef HAVE_SIMD_NEON
+static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
+{
+    uint64_t mask = search->matches_mask;
+    uint32_t index = trailing_zeros64(mask) >> 2;
+    // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
+    // If we want to use a similar approach for full escaping we'll need to ensure:
+    //     search->chunk_base + index >= search->ptr
+    // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
+    // is one byte after the previous match then:
+    //     search->chunk_base + index == search->ptr
+    search->ptr = search->chunk_base + index;
+    mask &= mask - 1;
+    search->matches_mask = mask;
+    search_flush(search);
+    return 1;
+}
+// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
+static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
+{
+    const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
+    const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
+    return mask & 0x8888888888888888ull;
+}
+static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
+{
+    uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
+    // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
+    // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
+    const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
+    uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
+    uint8x16_t needs_escape  = vorrq_u8(too_low_or_dbl_quote, has_backslash);
+    return neon_match_mask(needs_escape);
+}
+static inline unsigned char search_escape_basic_neon(search_state *search)
+{
+    if (RB_UNLIKELY(search->has_matches)) {
+        // There are more matches if search->matches_mask > 0.
+        if (search->matches_mask > 0) {
+            return neon_next_match(search);
+        } else {
+            // neon_next_match will only advance search->ptr up to the last matching character.
+            // Skip over any characters in the last chunk that occur after the last match.
+            search->has_matches = false;
+            search->ptr = search->chunk_end;
+        }
+    }
+    /*
+    * The code below implements an SIMD-based algorithm to determine if N bytes at a time
+    * need to be escaped.
+    *
+    * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
+    *
+    * The explanation will be limited to the first 8 bytes of the string for simplicity. However
+    * the vector insructions may work on larger vectors.
+    *
+    * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
+    *
+    * lower_bound: [20 20 20 20 20 20 20 20]
+    * backslash:   [5C 5C 5C 5C 5C 5C 5C 5C]
+    * dblquote:    [22 22 22 22 22 22 22 22]
+    *
+    * Next we load the first chunk of the ptr:
+    * [22 54 65 5C 73 74 69 6E] ("  T  e  \  s  t  i  n)
+    *
+    * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
+    * as no bytes are less than 32 (0x20):
+    * [0 0 0 0 0 0 0 0]
+    *
+    * Next, we check if any byte in chunk is equal to a backslash:
+    * [0 0 0 FF 0 0 0 0]
+    *
+    * Finally we check if any byte in chunk is equal to a double quote:
+    * [FF 0 0 0 0 0 0 0]
+    *
+    * Now we have three vectors where each byte indicates if the corresponding byte in chunk
+    * needs to be escaped. We combine these vectors with a series of logical OR instructions.
+    * This is the needs_escape vector and it is equal to:
+    * [FF 0 0 FF 0 0 0 0]
+    *
+    * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
+    * the values in the vector. This computes how many bytes need to be escaped within this chunk.
+    *
+    * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
+    * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
+    * have at least one byte that needs to be escaped.
+    */
+    while (search->ptr + sizeof(uint8x16_t) <= search->end) {
+        uint64_t mask = neon_rules_update(search->ptr);
+        if (!mask) {
+            search->ptr += sizeof(uint8x16_t);
+            continue;
+        }
+        search->matches_mask = mask;
+        search->has_matches = true;
+        search->chunk_base = search->ptr;
+        search->chunk_end = search->ptr + sizeof(uint8x16_t);
+        return neon_next_match(search);
+    }
+    // There are fewer than 16 bytes left.
+    unsigned long remaining = (search->end - search->ptr);
+    if (remaining >= SIMD_MINIMUM_THRESHOLD) {
+        char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
+        uint64_t mask = neon_rules_update(s);
+        if (!mask) {
+            // Nothing to escape, ensure search_flush doesn't do anything by setting
+            // search->cursor to search->ptr.
+            search->buffer->len += remaining;
+            search->ptr = search->end;
+            search->cursor = search->end;
+            return 0;
+        }
+        search->matches_mask = mask;
+        search->has_matches = true;
+        search->chunk_end = search->end;
+        search->chunk_base = search->ptr;
+        return neon_next_match(search);
+    }
+    if (search->ptr < search->end) {
+        return search_escape_basic(search);
+    }
+    search_flush(search);
+    return 0;
+}
+#endif /* HAVE_SIMD_NEON */
+#ifdef HAVE_SIMD_SSE2
+#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
+#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
+#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
+#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
+static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
+{
+    int mask = search->matches_mask;
+    int index = trailing_zeros(mask);
+    // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
+    // If we want to use a similar approach for full escaping we'll need to ensure:
+    //     search->chunk_base + index >= search->ptr
+    // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
+    // is one byte after the previous match then:
+    //     search->chunk_base + index == search->ptr
+    search->ptr = search->chunk_base + index;
+    mask &= mask - 1;
+    search->matches_mask = mask;
+    search_flush(search);
+    return 1;
+}
+#if defined(__clang__) || defined(__GNUC__)
+#define TARGET_SSE2 __attribute__((target("sse2")))
+#else
+#define TARGET_SSE2
+#endif
+static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
+{
+    __m128i chunk         = _mm_loadu_si128((__m128i const*)ptr);
+    // Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
+    // https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
+    __m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
+    __m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
+    __m128i needs_escape  = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
+    return _mm_movemask_epi8(needs_escape);
+}
+static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
+{
+    if (RB_UNLIKELY(search->has_matches)) {
+        // There are more matches if search->matches_mask > 0.
+        if (search->matches_mask > 0) {
+            return sse2_next_match(search);
+        } else {
+            // sse2_next_match will only advance search->ptr up to the last matching character.
+            // Skip over any characters in the last chunk that occur after the last match.
+            search->has_matches = false;
+            if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
+                search->ptr = search->end;
+            } else {
+                search->ptr = search->chunk_base + sizeof(__m128i);
+            }
+        }
+    }
+    while (search->ptr + sizeof(__m128i) <= search->end) {
+        int needs_escape_mask = sse2_update(search->ptr);
+        if (needs_escape_mask == 0) {
+            search->ptr += sizeof(__m128i);
+            continue;
+        }
+        search->has_matches = true;
+        search->matches_mask = needs_escape_mask;
+        search->chunk_base = search->ptr;
+        return sse2_next_match(search);
+    }
+    // There are fewer than 16 bytes left.
+    unsigned long remaining = (search->end - search->ptr);
+    if (remaining >= SIMD_MINIMUM_THRESHOLD) {
+        char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
+        int needs_escape_mask = sse2_update(s);
+        if (needs_escape_mask == 0) {
+            // Nothing to escape, ensure search_flush doesn't do anything by setting
+            // search->cursor to search->ptr.
+            search->buffer->len += remaining;
+            search->ptr = search->end;
+            search->cursor = search->end;
+            return 0;
+        }
+        search->has_matches = true;
+        search->matches_mask = needs_escape_mask;
+        search->chunk_base = search->ptr;
+        return sse2_next_match(search);
+    }
+    if (search->ptr < search->end) {
+        return search_escape_basic(search);
+    }
+    search_flush(search);
+    return 0;
+}
+#endif /* HAVE_SIMD_SSE2 */
+#endif /* HAVE_SIMD */
 static const unsigned char script_safe_escape_table[256] = {
     // ASCII Control Characters
      9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@@ -789,6 +1097,21 @@ struct hash_foreach_arg {
     int iter;
 };
+static VALUE
+convert_string_subclass(VALUE key)
+{
+    VALUE key_to_s = rb_funcall(key, i_to_s, 0);
+    if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
+        VALUE cname = rb_obj_class(key);
+        rb_raise(rb_eTypeError,
+                 "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
+                 cname, "String", cname, "to_s", rb_obj_class(key_to_s));
+    }
+    return key_to_s;
+}
 static int
 json_object_i(VALUE key, VALUE val, VALUE _arg)
 {
@@ -817,7 +1140,7 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
             if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
                 key_to_s = key;
             } else {
-                key_to_s = rb_funcall(key, i_to_s, 0);
+                key_to_s = convert_string_subclass(key);
             }
             break;
         case T_SYMBOL:
@@ -975,6 +1298,12 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
     search.cursor = search.ptr;
     search.end = search.ptr + len;
+#ifdef HAVE_SIMD
+    search.matches_mask = 0;
+    search.has_matches = false;
+    search.chunk_base = NULL;
+#endif /* HAVE_SIMD */
     switch(rb_enc_str_coderange(obj)) {
         case ENC_CODERANGE_7BIT:
         case ENC_CODERANGE_VALID:
@@ -1838,4 +2167,23 @@ void Init_generator(void)
     binary_encindex = rb_ascii8bit_encindex();
     rb_require("json/ext/generator/state");
+    switch(find_simd_implementation()) {
+#ifdef HAVE_SIMD
+#ifdef HAVE_SIMD_NEON
+        case SIMD_NEON:
+            search_escape_basic_impl = search_escape_basic_neon;
+            break;
+#endif /* HAVE_SIMD_NEON */
+#ifdef HAVE_SIMD_SSE2
+        case SIMD_SSE2:
+            search_escape_basic_impl = search_escape_basic_sse2;
+            break;
+#endif /* HAVE_SIMD_SSE2 */
+#endif /* HAVE_SIMD */
+        default:
+            search_escape_basic_impl = search_escape_basic;
+            break;
+    }
 }

data/ext/json/ext/generator/simd.h ADDED Viewed

@@ -0,0 +1,112 @@
+typedef enum {
+    SIMD_NONE,
+    SIMD_NEON,
+    SIMD_SSE2
+} SIMD_Implementation;
+#ifdef JSON_ENABLE_SIMD
+#ifdef __clang__
+  #if __has_builtin(__builtin_ctzll)
+    #define HAVE_BUILTIN_CTZLL 1
+  #else
+    #define HAVE_BUILTIN_CTZLL 0
+  #endif
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+  #define HAVE_BUILTIN_CTZLL 1
+#else
+  #define HAVE_BUILTIN_CTZLL 0
+#endif
+static inline uint32_t trailing_zeros64(uint64_t input) {
+#if HAVE_BUILTIN_CTZLL
+  return __builtin_ctzll(input);
+#else
+  uint32_t trailing_zeros = 0;
+  uint64_t temp = input;
+  while ((temp & 1) == 0 && temp > 0) {
+    trailing_zeros++;
+    temp >>= 1;
+  }
+  return trailing_zeros;
+#endif
+}
+static inline int trailing_zeros(int input) {
+  #if HAVE_BUILTIN_CTZLL
+    return __builtin_ctz(input);
+  #else
+    int trailing_zeros = 0;
+    int temp = input;
+    while ((temp & 1) == 0 && temp > 0) {
+      trailing_zeros++;
+      temp >>= 1;
+    }
+    return trailing_zeros;
+  #endif
+}
+#define SIMD_MINIMUM_THRESHOLD 6
+#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
+#include <arm_neon.h>
+#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
+static SIMD_Implementation find_simd_implementation(void) {
+    return SIMD_NEON;
+}
+#define HAVE_SIMD 1
+#define HAVE_SIMD_NEON 1
+uint8x16x4_t load_uint8x16_4(const unsigned char *table) {
+  uint8x16x4_t tab;
+  tab.val[0] = vld1q_u8(table);
+  tab.val[1] = vld1q_u8(table+16);
+  tab.val[2] = vld1q_u8(table+32);
+  tab.val[3] = vld1q_u8(table+48);
+  return tab;
+}
+#endif /* ARM Neon Support.*/
+#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+#ifdef HAVE_X86INTRIN_H
+#include <x86intrin.h>
+#define HAVE_SIMD 1
+#define HAVE_SIMD_SSE2 1
+#ifdef HAVE_CPUID_H
+#define FIND_SIMD_IMPLEMENTATION_DEFINED 1
+#include <cpuid.h>
+#endif /* HAVE_CPUID_H */
+static SIMD_Implementation find_simd_implementation(void) {
+#if defined(__GNUC__ ) || defined(__clang__)
+#ifdef __GNUC__
+    __builtin_cpu_init();
+#endif /* __GNUC__  */
+    // TODO Revisit. I think the SSE version now only uses SSE2 instructions.
+    if (__builtin_cpu_supports("sse2")) {
+        return SIMD_SSE2;
+    }
+#endif /* __GNUC__ || __clang__*/
+    return SIMD_NONE;
+}
+#endif /* HAVE_X86INTRIN_H */
+#endif /* X86_64 Support */
+#endif /* JSON_ENABLE_SIMD */
+#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED
+static SIMD_Implementation find_simd_implementation(void) {
+    return SIMD_NONE;
+}
+#endif

data/ext/json/ext/parser/parser.c CHANGED Viewed

@@ -337,19 +337,86 @@ static size_t strnlen(const char *s, size_t maxlen)
 }
 #endif
+static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
+{
+    int len = 1;
+    if (ch <= 0x7F) {
+        buf[0] = (char) ch;
+    } else if (ch <= 0x07FF) {
+        buf[0] = (char) ((ch >> 6) | 0xC0);
+        buf[1] = (char) ((ch & 0x3F) | 0x80);
+        len++;
+    } else if (ch <= 0xFFFF) {
+        buf[0] = (char) ((ch >> 12) | 0xE0);
+        buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
+        buf[2] = (char) ((ch & 0x3F) | 0x80);
+        len += 2;
+    } else if (ch <= 0x1fffff) {
+        buf[0] =(char) ((ch >> 18) | 0xF0);
+        buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
+        buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
+        buf[3] =(char) ((ch & 0x3F) | 0x80);
+        len += 3;
+    } else {
+        buf[0] = '?';
+    }
+    return len;
+}
+typedef struct JSON_ParserStruct {
+    VALUE on_load_proc;
+    VALUE decimal_class;
+    ID decimal_method_id;
+    int max_nesting;
+    bool allow_nan;
+    bool allow_trailing_comma;
+    bool parsing_name;
+    bool symbolize_names;
+    bool freeze;
+} JSON_ParserConfig;
+typedef struct JSON_ParserStateStruct {
+    VALUE stack_handle;
+    const char *start;
+    const char *cursor;
+    const char *end;
+    rvalue_stack *stack;
+    rvalue_cache name_cache;
+    int in_array;
+    int current_nesting;
+} JSON_ParserState;
 #define PARSE_ERROR_FRAGMENT_LEN 32
 #ifdef RBIMPL_ATTR_NORETURN
 RBIMPL_ATTR_NORETURN()
 #endif
-static void raise_parse_error(const char *format, const char *start)
+static void raise_parse_error(const char *format, JSON_ParserState *state)
 {
     unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
-    size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
-    const char *ptr = start;
+    const char *cursor = state->cursor;
+    long column = 0;
+    long line = 1;
+    while (cursor >= state->start) {
+        if (*cursor-- == '\n') {
+            break;
+        }
+        column++;
+    }
+    while (cursor >= state->start) {
+        if (*cursor-- == '\n') {
+            line++;
+        }
+    }
+    const char *ptr = state->cursor;
+    size_t len = ptr ? strnlen(ptr, PARSE_ERROR_FRAGMENT_LEN) : 0;
     if (len == PARSE_ERROR_FRAGMENT_LEN) {
-        MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
+        MEMCPY(buffer, ptr, char, PARSE_ERROR_FRAGMENT_LEN);
         while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
             len--;
@@ -363,7 +430,23 @@ static void raise_parse_error(const char *format, const char *start)
         ptr = (const char *)buffer;
     }
-    rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
+    VALUE msg = rb_sprintf(format, ptr);
+    VALUE message = rb_enc_sprintf(enc_utf8, "%s at line %ld column %ld", RSTRING_PTR(msg), line, column);
+    RB_GC_GUARD(msg);
+    VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
+    rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
+    rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
+    rb_exc_raise(exc);
+}
+#ifdef RBIMPL_ATTR_NORETURN
+RBIMPL_ATTR_NORETURN()
+#endif
+static void raise_parse_error_at(const char *format, JSON_ParserState *state, const char *at)
+{
+    state->cursor = at;
+    raise_parse_error(format, state);
 }
 /* unicode */
@@ -385,73 +468,25 @@ static const signed char digit_values[256] = {
     -1, -1, -1, -1, -1, -1, -1
 };
-static uint32_t unescape_unicode(const unsigned char *p)
+static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
 {
     signed char b;
     uint32_t result = 0;
     b = digit_values[p[0]];
-    if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
+    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
     result = (result << 4) | (unsigned char)b;
     b = digit_values[p[1]];
-    if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
+    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
     result = (result << 4) | (unsigned char)b;
     b = digit_values[p[2]];
-    if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
+    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
     result = (result << 4) | (unsigned char)b;
     b = digit_values[p[3]];
-    if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
+    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
     result = (result << 4) | (unsigned char)b;
     return result;
 }
-static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
-{
-    int len = 1;
-    if (ch <= 0x7F) {
-        buf[0] = (char) ch;
-    } else if (ch <= 0x07FF) {
-        buf[0] = (char) ((ch >> 6) | 0xC0);
-        buf[1] = (char) ((ch & 0x3F) | 0x80);
-        len++;
-    } else if (ch <= 0xFFFF) {
-        buf[0] = (char) ((ch >> 12) | 0xE0);
-        buf[1] = (char) (((ch >> 6) & 0x3F) | 0x80);
-        buf[2] = (char) ((ch & 0x3F) | 0x80);
-        len += 2;
-    } else if (ch <= 0x1fffff) {
-        buf[0] =(char) ((ch >> 18) | 0xF0);
-        buf[1] =(char) (((ch >> 12) & 0x3F) | 0x80);
-        buf[2] =(char) (((ch >> 6) & 0x3F) | 0x80);
-        buf[3] =(char) ((ch & 0x3F) | 0x80);
-        len += 3;
-    } else {
-        buf[0] = '?';
-    }
-    return len;
-}
-typedef struct JSON_ParserStruct {
-    VALUE on_load_proc;
-    VALUE decimal_class;
-    ID decimal_method_id;
-    int max_nesting;
-    bool allow_nan;
-    bool allow_trailing_comma;
-    bool parsing_name;
-    bool symbolize_names;
-    bool freeze;
-} JSON_ParserConfig;
-typedef struct JSON_ParserStateStruct {
-    VALUE stack_handle;
-    const char *cursor;
-    const char *end;
-    rvalue_stack *stack;
-    rvalue_cache name_cache;
-    int in_array;
-    int current_nesting;
-} JSON_ParserState;
 #define GET_PARSER_CONFIG                          \
     JSON_ParserConfig *config;                      \
     TypedData_Get_Struct(self, JSON_ParserConfig, &JSON_ParserConfig_type, config)
@@ -485,8 +520,7 @@ json_eat_comments(JSON_ParserState *state)
                 while (true) {
                     state->cursor = memchr(state->cursor, '*', state->end - state->cursor);
                     if (!state->cursor) {
-                        state->cursor = state->end;
-                        raise_parse_error("unexpected end of input, expected closing '*/'", state->cursor);
+                        raise_parse_error_at("unexpected end of input, expected closing '*/'", state, state->end);
                     } else {
                         state->cursor++;
                         if (state->cursor < state->end && *state->cursor == '/') {
@@ -498,11 +532,11 @@ json_eat_comments(JSON_ParserState *state)
                 break;
             }
             default:
-                raise_parse_error("unexpected token at '%s'", state->cursor);
+                raise_parse_error("unexpected token '%s'", state);
                 break;
         }
     } else {
-        raise_parse_error("unexpected token at '%s'", state->cursor);
+        raise_parse_error("unexpected token '%s'", state);
     }
 }
@@ -621,9 +655,9 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
                 break;
             case 'u':
                 if (pe > stringEnd - 5) {
-                  raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
+                    raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, p);
                 } else {
-                    uint32_t ch = unescape_unicode((unsigned char *) ++pe);
+                    uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
                     pe += 3;
                     /* To handle values above U+FFFF, we take a sequence of
                      * \uXXXX escapes in the U+D800..U+DBFF then
@@ -638,10 +672,10 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
                     if ((ch & 0xFC00) == 0xD800) {
                         pe++;
                         if (pe > stringEnd - 6) {
-                          raise_parse_error("incomplete surrogate pair at '%s'", p);
+                            raise_parse_error_at("incomplete surrogate pair at '%s'", state, p);
                         }
                         if (pe[0] == '\\' && pe[1] == 'u') {
-                            uint32_t sur = unescape_unicode((unsigned char *) pe + 2);
+                            uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
                             ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
                                     | (sur & 0x3FF));
                             pe += 5;
@@ -829,12 +863,12 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
                     state->cursor++;
                     escaped = true;
                     if ((unsigned char)*state->cursor < 0x20) {
-                        raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
+                        raise_parse_error("invalid ASCII control character in string: %s", state);
                     }
                     break;
                 }
                 default:
-                    raise_parse_error("invalid ASCII control character in string: %s", state->cursor);
+                    raise_parse_error("invalid ASCII control character in string: %s", state);
                     break;
             }
         }
@@ -842,7 +876,7 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
         state->cursor++;
     }
-    raise_parse_error("unexpected end of input, expected closing \"", state->cursor);
+    raise_parse_error("unexpected end of input, expected closing \"", state);
     return Qfalse;
 }
@@ -850,7 +884,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
 {
     json_eat_whitespace(state);
     if (state->cursor >= state->end) {
-        raise_parse_error("unexpected end of input", state->cursor);
+        raise_parse_error("unexpected end of input", state);
     }
     switch (*state->cursor) {
@@ -860,7 +894,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                 return json_push_value(state, config, Qnil);
             }
-            raise_parse_error("unexpected token at '%s'", state->cursor);
+            raise_parse_error("unexpected token '%s'", state);
             break;
         case 't':
             if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
@@ -868,7 +902,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                 return json_push_value(state, config, Qtrue);
             }
-            raise_parse_error("unexpected token at '%s'", state->cursor);
+            raise_parse_error("unexpected token '%s'", state);
             break;
         case 'f':
             // Note: memcmp with a small power of two compile to an integer comparison
@@ -877,7 +911,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                 return json_push_value(state, config, Qfalse);
             }
-            raise_parse_error("unexpected token at '%s'", state->cursor);
+            raise_parse_error("unexpected token '%s'", state);
             break;
         case 'N':
             // Note: memcmp with a small power of two compile to an integer comparison
@@ -886,7 +920,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                 return json_push_value(state, config, CNaN);
             }
-            raise_parse_error("unexpected token at '%s'", state->cursor);
+            raise_parse_error("unexpected token '%s'", state);
             break;
         case 'I':
             if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
@@ -894,7 +928,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                 return json_push_value(state, config, CInfinity);
             }
-            raise_parse_error("unexpected token at '%s'", state->cursor);
+            raise_parse_error("unexpected token '%s'", state);
             break;
         case '-':
             // Note: memcmp with a small power of two compile to an integer comparison
@@ -903,7 +937,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                     state->cursor += 9;
                     return json_push_value(state, config, CMinusInfinity);
                 } else {
-                    raise_parse_error("unexpected token at '%s'", state->cursor);
+                    raise_parse_error("unexpected token '%s'", state);
                 }
             }
             // Fallthrough
@@ -921,11 +955,11 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
             long integer_length = state->cursor - start;
             if (RB_UNLIKELY(start[0] == '0' && integer_length > 1)) {
-                raise_parse_error("invalid number: %s", start);
+                raise_parse_error_at("invalid number: %s", state, start);
             } else if (RB_UNLIKELY(integer_length > 2 && start[0] == '-' && start[1] == '0')) {
-                raise_parse_error("invalid number: %s", start);
+                raise_parse_error_at("invalid number: %s", state, start);
             } else if (RB_UNLIKELY(integer_length == 1 && start[0] == '-')) {
-                raise_parse_error("invalid number: %s", start);
+                raise_parse_error_at("invalid number: %s", state, start);
             }
             if ((state->cursor < state->end) && (*state->cursor == '.')) {
@@ -933,7 +967,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                 state->cursor++;
                 if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
-                    raise_parse_error("invalid number: %s", state->cursor);
+                    raise_parse_error("invalid number: %s", state);
                 }
                 while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
@@ -949,7 +983,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                 }
                 if (state->cursor == state->end || *state->cursor < '0' || *state->cursor > '9') {
-                    raise_parse_error("invalid number: %s", state->cursor);
+                    raise_parse_error("invalid number: %s", state);
                 }
                 while ((state->cursor < state->end) && (*state->cursor >= '0') && (*state->cursor <= '9')) {
@@ -1009,7 +1043,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                     }
                 }
-                raise_parse_error("expected ',' or ']' after array value", state->cursor);
+                raise_parse_error("expected ',' or ']' after array value", state);
             }
             break;
         }
@@ -1028,13 +1062,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                 }
                 if (*state->cursor != '"') {
-                    raise_parse_error("expected object key, got '%s", state->cursor);
+                    raise_parse_error("expected object key, got '%s", state);
                 }
                 json_parse_string(state, config, true);
                 json_eat_whitespace(state);
                 if ((state->cursor >= state->end) || (*state->cursor != ':')) {
-                    raise_parse_error("expected ':' after object key", state->cursor);
+                    raise_parse_error("expected ':' after object key", state);
                 }
                 state->cursor++;
@@ -1063,13 +1097,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                         }
                         if (*state->cursor != '"') {
-                            raise_parse_error("expected object key, got: '%s'", state->cursor);
+                            raise_parse_error("expected object key, got: '%s'", state);
                         }
                         json_parse_string(state, config, true);
                         json_eat_whitespace(state);
                         if ((state->cursor >= state->end) || (*state->cursor != ':')) {
-                            raise_parse_error("expected ':' after object key, got: '%s", state->cursor);
+                            raise_parse_error("expected ':' after object key, got: '%s", state);
                         }
                         state->cursor++;
@@ -1079,24 +1113,24 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
                     }
                 }
-                raise_parse_error("expected ',' or '}' after object value, got: '%s'", state->cursor);
+                raise_parse_error("expected ',' or '}' after object value, got: '%s'", state);
             }
             break;
         }
         default:
-            raise_parse_error("unexpected character: '%s'", state->cursor);
+            raise_parse_error("unexpected character: '%s'", state);
             break;
     }
-    raise_parse_error("unreacheable: '%s'", state->cursor);
+    raise_parse_error("unreacheable: '%s'", state);
 }
 static void json_ensure_eof(JSON_ParserState *state)
 {
     json_eat_whitespace(state);
     if (state->cursor != state->end) {
-        raise_parse_error("unexpected token at end of stream '%s'", state->cursor);
+        raise_parse_error("unexpected token at end of stream '%s'", state);
     }
 }
@@ -1232,9 +1266,14 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE Vsource)
         .capa = RVALUE_STACK_INITIAL_CAPA,
     };
+    long len;
+    const char *start;
+    RSTRING_GETMEM(Vsource, start, len);
     JSON_ParserState _state = {
-        .cursor = RSTRING_PTR(Vsource),
-        .end = RSTRING_END(Vsource),
+        .start = start,
+        .cursor = start,
+        .end = start + len,
         .stack = &stack,
     };
     JSON_ParserState *state = &_state;

data/ext/json/ext/vendor/fpconv.c CHANGED Viewed

@@ -41,7 +41,7 @@ typedef struct Fp {
     int exp;
 } Fp;
-static Fp powers_ten[] = {
+static const Fp powers_ten[] = {
     { 18054884314459144840U, -1220 }, { 13451937075301367670U, -1193 },
     { 10022474136428063862U, -1166 }, { 14934650266808366570U, -1140 },
     { 11127181549972568877U, -1113 }, { 16580792590934885855U, -1087 },
@@ -123,7 +123,7 @@ static Fp find_cachedpow10(int exp, int* k)
 #define absv(n) ((n) < 0 ? -(n) : (n))
 #define minv(a, b) ((a) < (b) ? (a) : (b))
-static uint64_t tens[] = {
+static const uint64_t tens[] = {
     10000000000000000000U, 1000000000000000000U, 100000000000000000U,
     10000000000000000U, 1000000000000000U, 100000000000000U,
     10000000000000U, 1000000000000U, 100000000000U,
@@ -244,7 +244,7 @@ static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K)
     uint64_t part2 = upper->frac & (one.frac - 1);
     int idx = 0, kappa = 10;
-    uint64_t* divp;
+    const uint64_t* divp;
     /* 1000000000 */
     for(divp = tens + 10; kappa > 0; divp++) {
@@ -268,7 +268,7 @@ static int generate_digits(Fp* fp, Fp* upper, Fp* lower, char* digits, int* K)
     }
     /* 10 */
-    uint64_t* unit = tens + 18;
+    const uint64_t* unit = tens + 18;
     while(true) {
         part2 *= 10;
@@ -340,7 +340,7 @@ static int emit_digits(char* digits, int ndigits, char* dest, int K, bool neg)
     }
     /* write decimal w/o scientific notation */
-    if(K < 0 && (K > -7 || exp < 4)) {
+    if(K < 0 && (K > -7 || exp < 10)) {
         int offset = ndigits - absv(K);
         /* fp < 1.0 -> write leading zero */
         if(offset <= 0) {

data/lib/json/common.rb CHANGED Viewed

@@ -230,7 +230,9 @@ module JSON
   class JSONError < StandardError; end
   # This exception is raised if a parser error occurs.
-  class ParserError < JSONError; end
+  class ParserError < JSONError
+    attr_reader :line, :column
+  end
   # This exception is raised if the nesting of parsed data structures is too
   # deep.

data/lib/json/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module JSON
-  VERSION = '2.11.3'
+  VERSION = '2.12.0'
 end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: json
 version: !ruby/object:Gem::Version
-  version: 2.11.3
+  version: 2.12.0
 platform: ruby
 authors:
 - Florian Frank
 bindir: bin
 cert_chain: []
-date: 2025-04-25 00:00:00.000000000 Z
+date: 2025-05-12 00:00:00.000000000 Z
 dependencies: []
 description: This is a JSON implementation as a Ruby extension in C.
 email: flori@ping.de
@@ -26,6 +26,7 @@ files:
 - ext/json/ext/fbuffer/fbuffer.h
 - ext/json/ext/generator/extconf.rb
 - ext/json/ext/generator/generator.c
+- ext/json/ext/generator/simd.h
 - ext/json/ext/parser/extconf.rb
 - ext/json/ext/parser/parser.c
 - ext/json/ext/vendor/fpconv.c