RubyGems - json - Versions diffs - 2.18.0 → 2.18.1 - Mend

json 2.18.0 → 2.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/CHANGES.md +6 -1
data/ext/json/ext/fbuffer/fbuffer.h +8 -6
data/ext/json/ext/generator/generator.c +48 -26
data/ext/json/ext/json.h +4 -0
data/ext/json/ext/parser/parser.c +53 -60
data/ext/json/ext/simd/simd.h +28 -1
data/ext/json/ext/vendor/fpconv.c +1 -1
data/lib/json/version.rb +1 -1
data/lib/json.rb +21 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 51eab66896e862b679d424133f11e1367d5d8e71add943e67cf0673d0d562fcd
-  data.tar.gz: 7b69d4a42137897fe9a45bd60a21b759d133c112cb4ba16020099f27074ac2fd
+  metadata.gz: cb2890db4c527125d27bc7c21fc64d3ac532ffbec8080f89a678daf48c36e09e
+  data.tar.gz: c4b37d085d05d3c43df97b3c24898dc6be61c76ba64c749b5a8a86bf4fc1198d
 SHA512:
-  metadata.gz: ea3b026c8ccd6cb477858bf06f07f8b5adc5bcf7b52a175487c19dc2835ef63db2e4f87074a00ec2fe2c70e588c205f679116536da40f15e767f35351a52fc5c
-  data.tar.gz: f58144a5329ad95128e00bbc5670280f6a699e04cf05060bdfc7acaf112e62eb44c14c36328a9683d86e138c6eb2f3599f5ec35347867ac99ab9f0e16813c4df
+  metadata.gz: fb55ef5a0aa6961ef0fe3bb30f398834820357045ad27a8fdb7e53eaba3af7c4d356ef26c0e73b7a87d2d9d51e500eae7193d7d1ae3aa1058c7973bcc462674b
+  data.tar.gz: bfb499789bbcee7f5f8d67e32ded664dc62c632ae39fe80bf4bff3d6aec16eee3730a7c4883216a393326f2ab33e94e5f9c58da4c5b31627347108c36c2b211c

data/CHANGES.md CHANGED Viewed

@@ -2,6 +2,11 @@
 ### Unreleased
+### 2026-02-03 (2.18.1)
+* Fix a potential crash in very specific circumstance if GC triggers during a call to `to_json`
+  without first invoking a user defined `#to_json` method.
 ### 2025-12-11 (2.18.0)
 * Add `:allow_control_characters` parser options, to allow JSON strings containing unescaped ASCII control characters (e.g. newlines).
@@ -66,7 +71,7 @@
 * Fix `JSON.generate` `strict: true` mode to also restrict hash keys.
 * Fix `JSON::Coder` to also invoke block for hash keys that aren't strings nor symbols.
 * Fix `JSON.unsafe_load` usage with proc
-* Fix the parser to more consistently reject invalid UTF-16 surogate pairs.
+* Fix the parser to more consistently reject invalid UTF-16 surogate pairs.
 * Stop defining `String.json_create`, `String#to_json_raw`, `String#to_json_raw_object` when `json/add` isn't loaded.
 ### 2025-07-28 (2.13.2)

data/ext/json/ext/fbuffer/fbuffer.h CHANGED Viewed

@@ -161,23 +161,25 @@ static inline void fbuffer_append_reserved_char(FBuffer *fb, char chr)
 static void fbuffer_append_str(FBuffer *fb, VALUE str)
 {
-    const char *newstr = StringValuePtr(str);
-    unsigned long len = RSTRING_LEN(str);
+    const char *ptr;
+    unsigned long len;
+    RSTRING_GETMEM(str, ptr, len);
-    fbuffer_append(fb, newstr, len);
+    fbuffer_append(fb, ptr, len);
 }
 static void fbuffer_append_str_repeat(FBuffer *fb, VALUE str, size_t repeat)
 {
-    const char *newstr = StringValuePtr(str);
-    unsigned long len = RSTRING_LEN(str);
+    const char *ptr;
+    unsigned long len;
+    RSTRING_GETMEM(str, ptr, len);
     fbuffer_inc_capa(fb, repeat * len);
     while (repeat) {
 #if JSON_DEBUG
         fb->requested = len;
 #endif
-        fbuffer_append_reserved(fb, newstr, len);
+        fbuffer_append_reserved(fb, ptr, len);
         repeat--;
     }
 }

data/ext/json/ext/generator/generator.c CHANGED Viewed

@@ -63,6 +63,8 @@ struct generate_json_data {
     long depth;
 };
+static SIMD_Implementation simd_impl;
 static VALUE cState_from_state_s(VALUE self, VALUE opts);
 static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
 static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
@@ -155,8 +157,6 @@ static const unsigned char escape_table_basic[256] = {
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 };
-static unsigned char (*search_escape_basic_impl)(search_state *);
 static inline unsigned char search_escape_basic(search_state *search)
 {
     while (search->ptr < search->end) {
@@ -212,11 +212,39 @@ ALWAYS_INLINE(static) void escape_UTF8_char_basic(search_state *search)
  * Everything else (should be UTF-8) is just passed through and
  * appended to the result.
  */
+#if defined(HAVE_SIMD_NEON)
+static inline unsigned char search_escape_basic_neon(search_state *search);
+#elif defined(HAVE_SIMD_SSE2)
+static inline unsigned char search_escape_basic_sse2(search_state *search);
+#endif
+static inline unsigned char search_escape_basic(search_state *search);
 static inline void convert_UTF8_to_JSON(search_state *search)
 {
-    while (search_escape_basic_impl(search)) {
+#ifdef HAVE_SIMD
+#if defined(HAVE_SIMD_NEON)
+    while (search_escape_basic_neon(search)) {
+        escape_UTF8_char_basic(search);
+    }
+#elif defined(HAVE_SIMD_SSE2)
+    if (simd_impl == SIMD_SSE2) {
+        while (search_escape_basic_sse2(search)) {
+            escape_UTF8_char_basic(search);
+        }
+        return;
+    }
+    while (search_escape_basic(search)) {
+        escape_UTF8_char_basic(search);
+    }
+#endif
+#else
+    while (search_escape_basic(search)) {
         escape_UTF8_char_basic(search);
     }
+#endif /* HAVE_SIMD */
 }
 static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
@@ -260,6 +288,8 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
 ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
 {
+    RBIMPL_ASSERT_OR_ASSUME(len < vec_len);
     // Flush the buffer so everything up until the last 'len' characters are unflushed.
     search_flush(search);
@@ -269,12 +299,18 @@ ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned
     char *s = (buf->ptr + buf->len);
     // Pad the buffer with dummy characters that won't need escaping.
-    // This seem wateful at first sight, but memset of vector length is very fast.
-    memset(s, 'X', vec_len);
+    // This seem wasteful at first sight, but memset of vector length is very fast.
+    // This is a space as it can be directly represented as an immediate on AArch64.
+    memset(s, ' ', vec_len);
     // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
     // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
-    MEMCPY(s, search->ptr, char, len);
+    if (vec_len == 16) {
+        RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD);
+        json_fast_memcpy16(s, search->ptr, len);
+    } else {
+        MEMCPY(s, search->ptr, char, len);
+    }
     return s;
 }
@@ -1091,6 +1127,7 @@ static void raw_generate_json_string(FBuffer *buffer, struct generate_json_data
     search.matches_mask = 0;
     search.has_matches = false;
     search.chunk_base = NULL;
+    search.chunk_end = NULL;
 #endif /* HAVE_SIMD */
     switch (rb_enc_str_coderange(obj)) {
@@ -1337,7 +1374,7 @@ static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *dat
 static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     VALUE tmp = rb_funcall(obj, i_to_s, 0);
-    fbuffer_append_str(buffer, tmp);
+    fbuffer_append_str(buffer, StringValue(tmp));
 }
 #ifdef RUBY_INTEGER_UNIFICATION
@@ -1503,7 +1540,9 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func,
         .obj = obj,
         .func = func
     };
-    return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
+    VALUE result = rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data);
+    RB_GC_GUARD(self);
+    return result;
 }
 /* call-seq:
@@ -2181,22 +2220,5 @@ void Init_generator(void)
     rb_require("json/ext/generator/state");
-    switch (find_simd_implementation()) {
-#ifdef HAVE_SIMD
-#ifdef HAVE_SIMD_NEON
-        case SIMD_NEON:
-            search_escape_basic_impl = search_escape_basic_neon;
-            break;
-#endif /* HAVE_SIMD_NEON */
-#ifdef HAVE_SIMD_SSE2
-        case SIMD_SSE2:
-            search_escape_basic_impl = search_escape_basic_sse2;
-            break;
-#endif /* HAVE_SIMD_SSE2 */
-#endif /* HAVE_SIMD */
-        default:
-            search_escape_basic_impl = search_escape_basic;
-            break;
-    }
+    simd_impl = find_simd_implementation();
 }

data/ext/json/ext/json.h CHANGED Viewed

@@ -5,6 +5,10 @@
 #include "ruby/encoding.h"
 #include <stdint.h>
+#ifndef RBIMPL_ASSERT_OR_ASSUME
+# define RBIMPL_ASSERT_OR_ASSUME(x)
+#endif
 #if defined(RUBY_DEBUG) && RUBY_DEBUG
 # define JSON_ASSERT RUBY_ASSERT
 #else

data/ext/json/ext/parser/parser.c CHANGED Viewed

@@ -477,23 +477,24 @@ static const signed char digit_values[256] = {
     -1, -1, -1, -1, -1, -1, -1
 };
-static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
+static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
 {
-    signed char b;
-    uint32_t result = 0;
-    b = digit_values[p[0]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    b = digit_values[p[1]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    b = digit_values[p[2]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    b = digit_values[p[3]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    return result;
+    if (RB_UNLIKELY(sp > spe - 4)) {
+        raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
+    }
+    const unsigned char *p = (const unsigned char *)sp;
+    const signed char b0 = digit_values[p[0]];
+    const signed char b1 = digit_values[p[1]];
+    const signed char b2 = digit_values[p[2]];
+    const signed char b3 = digit_values[p[3]];
+    if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
+        raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
+    }
+    return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
 }
 #define GET_PARSER_CONFIG                          \
@@ -643,7 +644,7 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserCon
 typedef struct _json_unescape_positions {
     long size;
     const char **positions;
-    bool has_more;
+    unsigned long additional_backslashes;
 } JSON_UnescapePositions;
 static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
@@ -657,7 +658,8 @@ static inline const char *json_next_backslash(const char *pe, const char *string
         }
     }
-    if (positions->has_more) {
+    if (positions->additional_backslashes) {
+        positions->additional_backslashes--;
         return memchr(pe, '\\', stringEnd - pe);
     }
@@ -707,50 +709,41 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
             case 'f':
                 APPEND_CHAR('\f');
                 break;
-            case 'u':
-                if (pe > stringEnd - 5) {
-                    raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
-                } else {
-                    uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
-                    pe += 3;
-                    /* To handle values above U+FFFF, we take a sequence of
-                     * \uXXXX escapes in the U+D800..U+DBFF then
-                     * U+DC00..U+DFFF ranges, take the low 10 bits from each
-                     * to make a 20-bit number, then add 0x10000 to get the
-                     * final codepoint.
-                     *
-                     * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
-                     * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
-                     * Area".
-                     */
-                    if ((ch & 0xFC00) == 0xD800) {
-                        pe++;
-                        if (pe > stringEnd - 6) {
-                            raise_parse_error_at("incomplete surrogate pair at %s", state, p);
-                        }
-                        if (pe[0] == '\\' && pe[1] == 'u') {
-                            uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
-                            if ((sur & 0xFC00) != 0xDC00) {
-                                raise_parse_error_at("invalid surrogate pair at %s", state, p);
-                            }
-                            ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
-                                    | (sur & 0x3FF));
-                            pe += 5;
-                        } else {
-                            raise_parse_error_at("incomplete surrogate pair at %s", state, p);
-                            break;
+            case 'u': {
+                uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
+                pe += 3;
+                /* To handle values above U+FFFF, we take a sequence of
+                 * \uXXXX escapes in the U+D800..U+DBFF then
+                 * U+DC00..U+DFFF ranges, take the low 10 bits from each
+                 * to make a 20-bit number, then add 0x10000 to get the
+                 * final codepoint.
+                 *
+                 * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
+                 * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
+                 * Area".
+                 */
+                if ((ch & 0xFC00) == 0xD800) {
+                    pe++;
+                    if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
+                        uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
+                        if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
+                            raise_parse_error_at("invalid surrogate pair at %s", state, p);
                         }
-                    }
-                    char buf[4];
-                    int unescape_len = convert_UTF32_to_UTF8(buf, ch);
-                    MEMCPY(buffer, buf, char, unescape_len);
-                    buffer += unescape_len;
-                    p = ++pe;
+                        ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
+                        pe += 5;
+                    } else {
+                        raise_parse_error_at("incomplete surrogate pair at %s", state, p);
+                        break;
+                    }
                 }
+                int unescape_len = convert_UTF32_to_UTF8(buffer, ch);
+                buffer += unescape_len;
+                p = ++pe;
                 break;
+            }
             default:
                 if ((unsigned char)*pe < 0x20) {
                     if (!config->allow_control_characters) {
@@ -992,7 +985,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
     JSON_UnescapePositions positions = {
         .size = 0,
         .positions = backslashes,
-        .has_more = false,
+        .additional_backslashes = 0,
     };
     do {
@@ -1007,7 +1000,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
                     backslashes[positions.size] = state->cursor;
                     positions.size++;
                 } else {
-                    positions.has_more = true;
+                    positions.additional_backslashes++;
                 }
                 state->cursor++;
                 break;

data/ext/json/ext/simd/simd.h CHANGED Viewed

@@ -58,7 +58,34 @@ static inline int trailing_zeros(int input)
 #ifdef JSON_ENABLE_SIMD
-#define SIMD_MINIMUM_THRESHOLD 6
+#define SIMD_MINIMUM_THRESHOLD 4
+ALWAYS_INLINE(static) void json_fast_memcpy16(char *dst, const char *src, size_t len)
+{
+    RBIMPL_ASSERT_OR_ASSUME(len < 16);
+    RBIMPL_ASSERT_OR_ASSUME(len >= SIMD_MINIMUM_THRESHOLD); // 4
+#if defined(__has_builtin) && __has_builtin(__builtin_memcpy)
+    // If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD (4) and vec_len-1 (15) bytes.
+    // These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy
+    // the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct
+    // position in both copies.
+    // Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the
+    // generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)),
+    // when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional
+    // select instruction instead of direct loads and stores with a branch. This ends up slower than the branch
+    // plus two loads and stores generated when using __builtin_memcpy.
+    if (len >= 8) {
+        __builtin_memcpy(dst, src, 8);
+        __builtin_memcpy(dst + len - 8, src + len - 8, 8);
+    } else {
+        __builtin_memcpy(dst, src, 4);
+        __builtin_memcpy(dst + len - 4, src + len - 4, 4);
+    }
+#else
+    MEMCPY(dst, src, char, len);
+#endif
+}
 #if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
 #include <arm_neon.h>

data/ext/json/ext/vendor/fpconv.c CHANGED Viewed

@@ -449,7 +449,7 @@ static int filter_special(double fp, char* dest)
  * }
  *
  */
-static int fpconv_dtoa(double d, char dest[28])
+static int fpconv_dtoa(double d, char dest[32])
 {
     char digits[18];

data/lib/json/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module JSON
-  VERSION = '2.18.0'
+  VERSION = '2.18.1'
 end

data/lib/json.rb CHANGED Viewed

@@ -6,6 +6,15 @@ require 'json/common'
 #
 # \JSON is a lightweight data-interchange format.
 #
+# \JSON is easy for us humans to read and write,
+# and equally simple for machines to read (parse) and write (generate).
+#
+# \JSON is language-independent, making it an ideal interchange format
+# for applications in differing programming languages
+# and on differing operating systems.
+#
+# == \JSON Values
+#
 # A \JSON value is one of the following:
 # - Double-quoted text:  <tt>"foo"</tt>.
 # - Number:  +1+, +1.0+, +2.0e2+.
@@ -173,6 +182,18 @@ require 'json/common'
 # When enabled:
 #   JSON.parse('[1,]', allow_trailing_comma: true) # => [1]
 #
+# ---
+#
+# Option +allow_control_characters+ (boolean) specifies whether to allow
+# unescaped ASCII control characters, such as newlines, in strings;
+# defaults to +false+.
+#
+# With the default, +false+:
+#   JSON.parse(%{"Hello\nWorld"}) # invalid ASCII control character in string (JSON::ParserError)
+#
+# When enabled:
+#   JSON.parse(%{"Hello\nWorld"}, allow_control_characters: true) # => "Hello\nWorld"
+#
 # ====== Output Options
 #
 # Option +freeze+ (boolean) specifies whether the returned objects will be frozen;

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: json
 version: !ruby/object:Gem::Version
-  version: 2.18.0
+  version: 2.18.1
 platform: ruby
 authors:
 - Florian Frank
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.6.9
+rubygems_version: 4.1.0.dev
 specification_version: 4
 summary: JSON Implementation for Ruby
 test_files: []