RubyGems - json - Versions diffs - 2.10.2 → 2.13.2 - Mend

json 2.10.2 → 2.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGES.md +70 -0
data/README.md +13 -0
data/ext/json/ext/fbuffer/fbuffer.h +80 -15
data/ext/json/ext/generator/extconf.rb +6 -0
data/ext/json/ext/generator/generator.c +458 -118
data/ext/json/ext/parser/extconf.rb +5 -2
data/ext/json/ext/parser/parser.c +333 -267
data/ext/json/ext/simd/conf.rb +24 -0
data/ext/json/ext/simd/simd.h +188 -0
data/ext/json/ext/vendor/fpconv.c +479 -0
data/ext/json/ext/vendor/jeaiii-ltoa.h +267 -0
data/json.gemspec +2 -3
data/lib/json/common.rb +282 -164
data/lib/json/ext.rb +2 -2
data/lib/json/truffle_ruby/generator.rb +1 -1
data/lib/json/version.rb +1 -1
data/lib/json.rb +33 -0
metadata +6 -2

data/ext/json/ext/generator/generator.c CHANGED Viewed

@@ -1,9 +1,12 @@
 #include "ruby.h"
 #include "../fbuffer/fbuffer.h"
+#include "../vendor/fpconv.c"
 #include <math.h>
 #include <ctype.h>
+#include "../simd/simd.h"
 /* ruby api and some helpers */
 typedef struct JSON_Generator_StateStruct {
@@ -44,7 +47,7 @@ static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_n
 struct generate_json_data;
-typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
 struct generate_json_data {
     FBuffer *buffer;
@@ -56,20 +59,20 @@ struct generate_json_data {
 static VALUE cState_from_state_s(VALUE self, VALUE opts);
 static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io);
-static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
-static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
-static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
-static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
-static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
-static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
-static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
 #ifdef RUBY_INTEGER_UNIFICATION
-static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
 #endif
-static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
-static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
-static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
-static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
+static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj);
 static int usascii_encindex, utf8_encindex, binary_encindex;
@@ -108,12 +111,40 @@ typedef struct _search_state {
     const char *end;
     const char *cursor;
     FBuffer *buffer;
+#ifdef HAVE_SIMD
+    const char *chunk_base;
+    const char *chunk_end;
+    bool has_matches;
+#if defined(HAVE_SIMD_NEON)
+    uint64_t matches_mask;
+#elif defined(HAVE_SIMD_SSE2)
+    int matches_mask;
+#else
+#error "Unknown SIMD Implementation."
+#endif /* HAVE_SIMD_NEON */
+#endif /* HAVE_SIMD */
 } search_state;
-static inline void search_flush(search_state *search)
-{
-    fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
-    search->cursor = search->ptr;
+#if (defined(__GNUC__ ) || defined(__clang__))
+#define FORCE_INLINE __attribute__((always_inline))
+#else
+#define FORCE_INLINE
+#endif
+static inline FORCE_INLINE void search_flush(search_state *search)
+{
+    // Do not remove this conditional without profiling, specifically escape-heavy text.
+    // escape_UTF8_char_basic will advance search->ptr and search->cursor (effectively a search_flush).
+    // For back-to-back characters that need to be escaped, specifcally for the SIMD code paths, this method
+    // will be called just before calling escape_UTF8_char_basic. There will be no characers to append for the
+    // consecutive characters that need to be escaped. While the fbuffer_append is a no-op if
+    // nothing needs to be flushed, we can save a few memory references with this conditional.
+    if (search->ptr > search->cursor) {
+        fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor);
+        search->cursor = search->ptr;
+    }
 }
 static const unsigned char escape_table_basic[256] = {
@@ -129,6 +160,8 @@ static const unsigned char escape_table_basic[256] = {
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 };
+static unsigned char (*search_escape_basic_impl)(search_state *);
 static inline unsigned char search_escape_basic(search_state *search)
 {
     while (search->ptr < search->end) {
@@ -143,7 +176,8 @@ static inline unsigned char search_escape_basic(search_state *search)
     return 0;
 }
-static inline void escape_UTF8_char_basic(search_state *search) {
+static inline FORCE_INLINE void escape_UTF8_char_basic(search_state *search)
+{
     const unsigned char ch = (unsigned char)*search->ptr;
     switch (ch) {
         case '"':  fbuffer_append(search->buffer, "\\\"", 2); break;
@@ -185,12 +219,13 @@ static inline void escape_UTF8_char_basic(search_state *search) {
  */
 static inline void convert_UTF8_to_JSON(search_state *search)
 {
-    while (search_escape_basic(search)) {
+    while (search_escape_basic_impl(search)) {
         escape_UTF8_char_basic(search);
     }
 }
-static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) {
+static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
+{
     const unsigned char ch = (unsigned char)*search->ptr;
     switch (ch_len) {
         case 1: {
@@ -226,6 +261,228 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len)
     search->cursor = (search->ptr += ch_len);
 }
+#ifdef HAVE_SIMD
+static inline FORCE_INLINE char *copy_remaining_bytes(search_state *search, unsigned long vec_len, unsigned long len)
+{
+    // Flush the buffer so everything up until the last 'len' characters are unflushed.
+    search_flush(search);
+    FBuffer *buf = search->buffer;
+    fbuffer_inc_capa(buf, vec_len);
+    char *s = (buf->ptr + buf->len);
+    // Pad the buffer with dummy characters that won't need escaping.
+    // This seem wateful at first sight, but memset of vector length is very fast.
+    memset(s, 'X', vec_len);
+    // Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
+    // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
+    MEMCPY(s, search->ptr, char, len);
+    return s;
+}
+#ifdef HAVE_SIMD_NEON
+static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
+{
+    uint64_t mask = search->matches_mask;
+    uint32_t index = trailing_zeros64(mask) >> 2;
+    // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
+    // If we want to use a similar approach for full escaping we'll need to ensure:
+    //     search->chunk_base + index >= search->ptr
+    // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
+    // is one byte after the previous match then:
+    //     search->chunk_base + index == search->ptr
+    search->ptr = search->chunk_base + index;
+    mask &= mask - 1;
+    search->matches_mask = mask;
+    search_flush(search);
+    return 1;
+}
+static inline unsigned char search_escape_basic_neon(search_state *search)
+{
+    if (RB_UNLIKELY(search->has_matches)) {
+        // There are more matches if search->matches_mask > 0.
+        if (search->matches_mask > 0) {
+            return neon_next_match(search);
+        } else {
+            // neon_next_match will only advance search->ptr up to the last matching character.
+            // Skip over any characters in the last chunk that occur after the last match.
+            search->has_matches = false;
+            search->ptr = search->chunk_end;
+        }
+    }
+    /*
+    * The code below implements an SIMD-based algorithm to determine if N bytes at a time
+    * need to be escaped.
+    *
+    * Assume the ptr = "Te\sting!" (the double quotes are included in the string)
+    *
+    * The explanation will be limited to the first 8 bytes of the string for simplicity. However
+    * the vector insructions may work on larger vectors.
+    *
+    * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers.
+    *
+    * lower_bound: [20 20 20 20 20 20 20 20]
+    * backslash:   [5C 5C 5C 5C 5C 5C 5C 5C]
+    * dblquote:    [22 22 22 22 22 22 22 22]
+    *
+    * Next we load the first chunk of the ptr:
+    * [22 54 65 5C 73 74 69 6E] ("  T  e  \  s  t  i  n)
+    *
+    * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector
+    * as no bytes are less than 32 (0x20):
+    * [0 0 0 0 0 0 0 0]
+    *
+    * Next, we check if any byte in chunk is equal to a backslash:
+    * [0 0 0 FF 0 0 0 0]
+    *
+    * Finally we check if any byte in chunk is equal to a double quote:
+    * [FF 0 0 0 0 0 0 0]
+    *
+    * Now we have three vectors where each byte indicates if the corresponding byte in chunk
+    * needs to be escaped. We combine these vectors with a series of logical OR instructions.
+    * This is the needs_escape vector and it is equal to:
+    * [FF 0 0 FF 0 0 0 0]
+    *
+    * Next we compute the bitwise AND between each byte and 0x1 and compute the horizontal sum of
+    * the values in the vector. This computes how many bytes need to be escaped within this chunk.
+    *
+    * Finally we compute a mask that indicates which bytes need to be escaped. If the mask is 0 then,
+    * no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
+    * have at least one byte that needs to be escaped.
+    */
+    if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
+        search->has_matches = true;
+        search->chunk_base = search->ptr;
+        search->chunk_end = search->ptr + sizeof(uint8x16_t);
+        return neon_next_match(search);
+    }
+    // There are fewer than 16 bytes left.
+    unsigned long remaining = (search->end - search->ptr);
+    if (remaining >= SIMD_MINIMUM_THRESHOLD) {
+        char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
+        uint64_t mask = compute_chunk_mask_neon(s);
+        if (!mask) {
+            // Nothing to escape, ensure search_flush doesn't do anything by setting
+            // search->cursor to search->ptr.
+            fbuffer_consumed(search->buffer, remaining);
+            search->ptr = search->end;
+            search->cursor = search->end;
+            return 0;
+        }
+        search->matches_mask = mask;
+        search->has_matches = true;
+        search->chunk_end = search->end;
+        search->chunk_base = search->ptr;
+        return neon_next_match(search);
+    }
+    if (search->ptr < search->end) {
+        return search_escape_basic(search);
+    }
+    search_flush(search);
+    return 0;
+}
+#endif /* HAVE_SIMD_NEON */
+#ifdef HAVE_SIMD_SSE2
+static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
+{
+    int mask = search->matches_mask;
+    int index = trailing_zeros(mask);
+    // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character.
+    // If we want to use a similar approach for full escaping we'll need to ensure:
+    //     search->chunk_base + index >= search->ptr
+    // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match
+    // is one byte after the previous match then:
+    //     search->chunk_base + index == search->ptr
+    search->ptr = search->chunk_base + index;
+    mask &= mask - 1;
+    search->matches_mask = mask;
+    search_flush(search);
+    return 1;
+}
+#if defined(__clang__) || defined(__GNUC__)
+#define TARGET_SSE2 __attribute__((target("sse2")))
+#else
+#define TARGET_SSE2
+#endif
+static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
+{
+    if (RB_UNLIKELY(search->has_matches)) {
+        // There are more matches if search->matches_mask > 0.
+        if (search->matches_mask > 0) {
+            return sse2_next_match(search);
+        } else {
+            // sse2_next_match will only advance search->ptr up to the last matching character.
+            // Skip over any characters in the last chunk that occur after the last match.
+            search->has_matches = false;
+            if (RB_UNLIKELY(search->chunk_base + sizeof(__m128i) >= search->end)) {
+                search->ptr = search->end;
+            } else {
+                search->ptr = search->chunk_base + sizeof(__m128i);
+            }
+        }
+    }
+    if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
+        search->has_matches = true;
+        search->chunk_base = search->ptr;
+        search->chunk_end = search->ptr + sizeof(__m128i);
+        return sse2_next_match(search);
+    }
+    // There are fewer than 16 bytes left.
+    unsigned long remaining = (search->end - search->ptr);
+    if (remaining >= SIMD_MINIMUM_THRESHOLD) {
+        char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
+        int needs_escape_mask = compute_chunk_mask_sse2(s);
+        if (needs_escape_mask == 0) {
+            // Nothing to escape, ensure search_flush doesn't do anything by setting
+            // search->cursor to search->ptr.
+            fbuffer_consumed(search->buffer, remaining);
+            search->ptr = search->end;
+            search->cursor = search->end;
+            return 0;
+        }
+        search->has_matches = true;
+        search->matches_mask = needs_escape_mask;
+        search->chunk_base = search->ptr;
+        return sse2_next_match(search);
+    }
+    if (search->ptr < search->end) {
+        return search_escape_basic(search);
+    }
+    search_flush(search);
+    return 0;
+}
+#endif /* HAVE_SIMD_SSE2 */
+#endif /* HAVE_SIMD */
 static const unsigned char script_safe_escape_table[256] = {
     // ASCII Control Characters
      9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
@@ -329,7 +586,8 @@ static inline unsigned char search_ascii_only_escape(search_state *search, const
     return 0;
 }
-static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len) {
+static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_len)
+{
     const unsigned char ch = (unsigned char)*search->ptr;
     switch (ch_len) {
         case 1: {
@@ -359,7 +617,7 @@ static inline void full_escape_UTF8_char(search_state *search, unsigned char ch_
             uint32_t wchar = 0;
-            switch(ch_len) {
+            switch (ch_len) {
                 case 2:
                     wchar = ch & 0x1F;
                     break;
@@ -519,7 +777,8 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self)
  * _state_ is a JSON::State object, that can also be used to configure the
  * produced JSON string output further.
  */
-static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) {
+static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self)
+{
     rb_check_arity(argc, 0, 1);
     VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil);
     return cState_partial_generate(Vstate, self, generate_json_array, Qfalse);
@@ -581,7 +840,8 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self)
  *
  * Extends _modul_ with the String::Extend module.
  */
-static VALUE mString_included_s(VALUE self, VALUE modul) {
+static VALUE mString_included_s(VALUE self, VALUE modul)
+{
     VALUE result = rb_funcall(modul, i_extend, 1, mString_Extend);
     rb_call_super(1, &modul);
     return result;
@@ -788,6 +1048,21 @@ struct hash_foreach_arg {
     int iter;
 };
+static VALUE
+convert_string_subclass(VALUE key)
+{
+    VALUE key_to_s = rb_funcall(key, i_to_s, 0);
+    if (RB_UNLIKELY(!RB_TYPE_P(key_to_s, T_STRING))) {
+        VALUE cname = rb_obj_class(key);
+        rb_raise(rb_eTypeError,
+                 "can't convert %"PRIsVALUE" to %s (%"PRIsVALUE"#%s gives %"PRIsVALUE")",
+                 cname, "String", cname, "to_s", rb_obj_class(key_to_s));
+    }
+    return key_to_s;
+}
 static int
 json_object_i(VALUE key, VALUE val, VALUE _arg)
 {
@@ -801,22 +1076,22 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
     int j;
     if (arg->iter > 0) fbuffer_append_char(buffer, ',');
-    if (RB_UNLIKELY(state->object_nl)) {
-        fbuffer_append_str(buffer, state->object_nl);
+    if (RB_UNLIKELY(data->state->object_nl)) {
+        fbuffer_append_str(buffer, data->state->object_nl);
     }
-    if (RB_UNLIKELY(state->indent)) {
+    if (RB_UNLIKELY(data->state->indent)) {
         for (j = 0; j < depth; j++) {
-            fbuffer_append_str(buffer, state->indent);
+            fbuffer_append_str(buffer, data->state->indent);
         }
     }
     VALUE key_to_s;
-    switch(rb_type(key)) {
+    switch (rb_type(key)) {
         case T_STRING:
             if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
                 key_to_s = key;
             } else {
-                key_to_s = rb_funcall(key, i_to_s, 0);
+                key_to_s = convert_string_subclass(key);
             }
             break;
         case T_SYMBOL:
@@ -828,21 +1103,22 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
     }
     if (RB_LIKELY(RBASIC_CLASS(key_to_s) == rb_cString)) {
-        generate_json_string(buffer, data, state, key_to_s);
+        generate_json_string(buffer, data, key_to_s);
     } else {
-        generate_json(buffer, data, state, key_to_s);
+        generate_json(buffer, data, key_to_s);
     }
-    if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before);
+    if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, data->state->space_before);
     fbuffer_append_char(buffer, ':');
-    if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space);
-    generate_json(buffer, data, state, val);
+    if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
+    generate_json(buffer, data, val);
     arg->iter++;
     return ST_CONTINUE;
 }
-static inline long increase_depth(JSON_Generator_State *state)
+static inline long increase_depth(struct generate_json_data *data)
 {
+    JSON_Generator_State *state = data->state;
     long depth = ++state->depth;
     if (RB_UNLIKELY(depth > state->max_nesting && state->max_nesting)) {
         rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
@@ -850,14 +1126,14 @@ static inline long increase_depth(JSON_Generator_State *state)
     return depth;
 }
-static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     int j;
-    long depth = increase_depth(state);
+    long depth = increase_depth(data);
     if (RHASH_SIZE(obj) == 0) {
         fbuffer_append(buffer, "{}", 2);
-        --state->depth;
+        --data->state->depth;
         return;
     }
@@ -869,49 +1145,49 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
     };
     rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
-    depth = --state->depth;
-    if (RB_UNLIKELY(state->object_nl)) {
-        fbuffer_append_str(buffer, state->object_nl);
-        if (RB_UNLIKELY(state->indent)) {
+    depth = --data->state->depth;
+    if (RB_UNLIKELY(data->state->object_nl)) {
+        fbuffer_append_str(buffer, data->state->object_nl);
+        if (RB_UNLIKELY(data->state->indent)) {
             for (j = 0; j < depth; j++) {
-                fbuffer_append_str(buffer, state->indent);
+                fbuffer_append_str(buffer, data->state->indent);
             }
         }
     }
     fbuffer_append_char(buffer, '}');
 }
-static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     int i, j;
-    long depth = increase_depth(state);
+    long depth = increase_depth(data);
     if (RARRAY_LEN(obj) == 0) {
         fbuffer_append(buffer, "[]", 2);
-        --state->depth;
+        --data->state->depth;
         return;
     }
     fbuffer_append_char(buffer, '[');
-    if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl);
-    for(i = 0; i < RARRAY_LEN(obj); i++) {
+    if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
+    for (i = 0; i < RARRAY_LEN(obj); i++) {
         if (i > 0) {
             fbuffer_append_char(buffer, ',');
-            if (RB_UNLIKELY(state->array_nl)) fbuffer_append_str(buffer, state->array_nl);
+            if (RB_UNLIKELY(data->state->array_nl)) fbuffer_append_str(buffer, data->state->array_nl);
         }
-        if (RB_UNLIKELY(state->indent)) {
+        if (RB_UNLIKELY(data->state->indent)) {
             for (j = 0; j < depth; j++) {
-                fbuffer_append_str(buffer, state->indent);
+                fbuffer_append_str(buffer, data->state->indent);
             }
         }
-        generate_json(buffer, data, state, RARRAY_AREF(obj, i));
+        generate_json(buffer, data, RARRAY_AREF(obj, i));
     }
-    state->depth = --depth;
-    if (RB_UNLIKELY(state->array_nl)) {
-        fbuffer_append_str(buffer, state->array_nl);
-        if (RB_UNLIKELY(state->indent)) {
+    data->state->depth = --depth;
+    if (RB_UNLIKELY(data->state->array_nl)) {
+        fbuffer_append_str(buffer, data->state->array_nl);
+        if (RB_UNLIKELY(data->state->indent)) {
             for (j = 0; j < depth; j++) {
-                fbuffer_append_str(buffer, state->indent);
+                fbuffer_append_str(buffer, data->state->indent);
             }
         }
     }
@@ -960,7 +1236,7 @@ static inline VALUE ensure_valid_encoding(VALUE str)
     return str;
 }
-static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     obj = ensure_valid_encoding(obj);
@@ -973,12 +1249,18 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
     search.cursor = search.ptr;
     search.end = search.ptr + len;
-    switch(rb_enc_str_coderange(obj)) {
+#ifdef HAVE_SIMD
+    search.matches_mask = 0;
+    search.has_matches = false;
+    search.chunk_base = NULL;
+#endif /* HAVE_SIMD */
+    switch (rb_enc_str_coderange(obj)) {
         case ENC_CODERANGE_7BIT:
         case ENC_CODERANGE_VALID:
-            if (RB_UNLIKELY(state->ascii_only)) {
-                convert_UTF8_to_ASCII_only_JSON(&search, state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
-            } else if (RB_UNLIKELY(state->script_safe)) {
+            if (RB_UNLIKELY(data->state->ascii_only)) {
+                convert_UTF8_to_ASCII_only_JSON(&search, data->state->script_safe ? script_safe_escape_table : ascii_only_escape_table);
+            } else if (RB_UNLIKELY(data->state->script_safe)) {
                 convert_UTF8_to_script_safe_JSON(&search);
             } else {
                 convert_UTF8_to_JSON(&search);
@@ -991,7 +1273,7 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
     fbuffer_append_char(buffer, '"');
 }
-static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     VALUE tmp;
     if (rb_respond_to(obj, i_to_json)) {
@@ -1001,100 +1283,116 @@ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *d
     } else {
         tmp = rb_funcall(obj, i_to_s, 0);
         Check_Type(tmp, T_STRING);
-        generate_json_string(buffer, data, state, tmp);
+        generate_json_string(buffer, data, tmp);
     }
 }
-static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static inline void generate_json_symbol(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
-    if (state->strict) {
-        generate_json_string(buffer, data, state, rb_sym2str(obj));
+    if (data->state->strict) {
+        generate_json_string(buffer, data, rb_sym2str(obj));
     } else {
-        generate_json_fallback(buffer, data, state, obj);
+        generate_json_fallback(buffer, data, obj);
     }
 }
-static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     fbuffer_append(buffer, "null", 4);
 }
-static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     fbuffer_append(buffer, "false", 5);
 }
-static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     fbuffer_append(buffer, "true", 4);
 }
-static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     fbuffer_append_long(buffer, FIX2LONG(obj));
 }
-static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     VALUE tmp = rb_funcall(obj, i_to_s, 0);
     fbuffer_append_str(buffer, tmp);
 }
 #ifdef RUBY_INTEGER_UNIFICATION
-static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     if (FIXNUM_P(obj))
-        generate_json_fixnum(buffer, data, state, obj);
+        generate_json_fixnum(buffer, data, obj);
     else
-        generate_json_bignum(buffer, data, state, obj);
+        generate_json_bignum(buffer, data, obj);
 }
 #endif
-static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     double value = RFLOAT_VALUE(obj);
-    char allow_nan = state->allow_nan;
-    if (!allow_nan) {
-        if (isinf(value) || isnan(value)) {
-            if (state->strict && state->as_json) {
-                VALUE casted_obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil);
+    char allow_nan = data->state->allow_nan;
+    if (isinf(value) || isnan(value)) {
+        /* for NaN and Infinity values we either raise an error or rely on Float#to_s. */
+        if (!allow_nan) {
+            if (data->state->strict && data->state->as_json) {
+                VALUE casted_obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
                 if (casted_obj != obj) {
-                    increase_depth(state);
-                    generate_json(buffer, data, state, casted_obj);
-                    state->depth--;
+                    increase_depth(data);
+                    generate_json(buffer, data, casted_obj);
+                    data->state->depth--;
                     return;
                 }
             }
             raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", rb_funcall(obj, i_to_s, 0));
         }
+        VALUE tmp = rb_funcall(obj, i_to_s, 0);
+        fbuffer_append_str(buffer, tmp);
+        return;
     }
-    fbuffer_append_str(buffer, rb_funcall(obj, i_to_s, 0));
+    /* This implementation writes directly into the buffer. We reserve
+     * the 28 characters that fpconv_dtoa states as its maximum.
+     */
+    fbuffer_inc_capa(buffer, 28);
+    char* d = buffer->ptr + buffer->len;
+    int len = fpconv_dtoa(value, d);
+    /* fpconv_dtoa converts a float to its shortest string representation,
+     * but it adds a ".0" if this is a plain integer.
+     */
+    fbuffer_consumed(buffer, len);
 }
-static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json_fragment(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     VALUE fragment = RSTRUCT_GET(obj, 0);
     Check_Type(fragment, T_STRING);
     fbuffer_append_str(buffer, fragment);
 }
-static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
+static void generate_json(FBuffer *buffer, struct generate_json_data *data, VALUE obj)
 {
     bool as_json_called = false;
 start:
     if (obj == Qnil) {
-        generate_json_null(buffer, data, state, obj);
+        generate_json_null(buffer, data, obj);
     } else if (obj == Qfalse) {
-        generate_json_false(buffer, data, state, obj);
+        generate_json_false(buffer, data, obj);
     } else if (obj == Qtrue) {
-        generate_json_true(buffer, data, state, obj);
+        generate_json_true(buffer, data, obj);
     } else if (RB_SPECIAL_CONST_P(obj)) {
         if (RB_FIXNUM_P(obj)) {
-            generate_json_fixnum(buffer, data, state, obj);
+            generate_json_fixnum(buffer, data, obj);
         } else if (RB_FLONUM_P(obj)) {
-            generate_json_float(buffer, data, state, obj);
+            generate_json_float(buffer, data, obj);
         } else if (RB_STATIC_SYM_P(obj)) {
-            generate_json_symbol(buffer, data, state, obj);
+            generate_json_symbol(buffer, data, obj);
         } else {
             goto general;
         }
@@ -1102,43 +1400,43 @@ start:
         VALUE klass = RBASIC_CLASS(obj);
         switch (RB_BUILTIN_TYPE(obj)) {
             case T_BIGNUM:
-                generate_json_bignum(buffer, data, state, obj);
+                generate_json_bignum(buffer, data, obj);
                 break;
             case T_HASH:
                 if (klass != rb_cHash) goto general;
-                generate_json_object(buffer, data, state, obj);
+                generate_json_object(buffer, data, obj);
                 break;
             case T_ARRAY:
                 if (klass != rb_cArray) goto general;
-                generate_json_array(buffer, data, state, obj);
+                generate_json_array(buffer, data, obj);
                 break;
             case T_STRING:
                 if (klass != rb_cString) goto general;
-                generate_json_string(buffer, data, state, obj);
+                generate_json_string(buffer, data, obj);
                 break;
             case T_SYMBOL:
-                generate_json_symbol(buffer, data, state, obj);
+                generate_json_symbol(buffer, data, obj);
                 break;
             case T_FLOAT:
                 if (klass != rb_cFloat) goto general;
-                generate_json_float(buffer, data, state, obj);
+                generate_json_float(buffer, data, obj);
                 break;
             case T_STRUCT:
                 if (klass != cFragment) goto general;
-                generate_json_fragment(buffer, data, state, obj);
+                generate_json_fragment(buffer, data, obj);
                 break;
             default:
             general:
-                if (state->strict) {
-                    if (RTEST(state->as_json) && !as_json_called) {
-                        obj = rb_proc_call_with_block(state->as_json, 1, &obj, Qnil);
+                if (data->state->strict) {
+                    if (RTEST(data->state->as_json) && !as_json_called) {
+                        obj = rb_proc_call_with_block(data->state->as_json, 1, &obj, Qnil);
                         as_json_called = true;
                         goto start;
                     } else {
                         raise_generator_error(obj, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
                     }
                 } else {
-                    generate_json_fallback(buffer, data, state, obj);
+                    generate_json_fallback(buffer, data, obj);
                 }
         }
     }
@@ -1148,7 +1446,7 @@ static VALUE generate_json_try(VALUE d)
 {
     struct generate_json_data *data = (struct generate_json_data *)d;
-    data->func(data->buffer, data, data->state, data->obj);
+    data->func(data->buffer, data, data->obj);
     return Qnil;
 }
@@ -1609,15 +1907,30 @@ static VALUE cState_buffer_initial_length_set(VALUE self, VALUE buffer_initial_l
     return Qnil;
 }
+struct configure_state_data {
+    JSON_Generator_State *state;
+    VALUE vstate;  // Ruby object that owns the state, or Qfalse if stack-allocated
+};
+static inline void state_write_value(struct configure_state_data *data, VALUE *field, VALUE value)
+{
+    if (RTEST(data->vstate)) {
+        RB_OBJ_WRITE(data->vstate, field, value);
+    } else {
+        *field = value;
+    }
+}
 static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
 {
-    JSON_Generator_State *state = (JSON_Generator_State *)_arg;
+    struct configure_state_data *data = (struct configure_state_data *)_arg;
+    JSON_Generator_State *state = data->state;
-         if (key == sym_indent)                { state->indent = string_config(val); }
-    else if (key == sym_space)                 { state->space = string_config(val); }
-    else if (key == sym_space_before)          { state->space_before = string_config(val); }
-    else if (key == sym_object_nl)             { state->object_nl = string_config(val); }
-    else if (key == sym_array_nl)              { state->array_nl = string_config(val); }
+         if (key == sym_indent)                { state_write_value(data, &state->indent, string_config(val)); }
+    else if (key == sym_space)                 { state_write_value(data, &state->space, string_config(val)); }
+    else if (key == sym_space_before)          { state_write_value(data, &state->space_before, string_config(val)); }
+    else if (key == sym_object_nl)             { state_write_value(data, &state->object_nl, string_config(val)); }
+    else if (key == sym_array_nl)              { state_write_value(data, &state->array_nl, string_config(val)); }
     else if (key == sym_max_nesting)           { state->max_nesting = long_config(val); }
     else if (key == sym_allow_nan)             { state->allow_nan = RTEST(val); }
     else if (key == sym_ascii_only)            { state->ascii_only = RTEST(val); }
@@ -1626,11 +1939,14 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
     else if (key == sym_script_safe)           { state->script_safe = RTEST(val); }
     else if (key == sym_escape_slash)          { state->script_safe = RTEST(val); }
     else if (key == sym_strict)                { state->strict = RTEST(val); }
-    else if (key == sym_as_json)               { state->as_json = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse; }
+    else if (key == sym_as_json)               {
+        VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
+        state_write_value(data, &state->as_json, proc);
+    }
     return ST_CONTINUE;
 }
-static void configure_state(JSON_Generator_State *state, VALUE config)
+static void configure_state(JSON_Generator_State *state, VALUE vstate, VALUE config)
 {
     if (!RTEST(config)) return;
@@ -1638,15 +1954,20 @@ static void configure_state(JSON_Generator_State *state, VALUE config)
     if (!RHASH_SIZE(config)) return;
+    struct configure_state_data data = {
+        .state = state,
+        .vstate = vstate
+    };
     // We assume in most cases few keys are set so it's faster to go over
     // the provided keys than to check all possible keys.
-    rb_hash_foreach(config, configure_state_i, (VALUE)state);
+    rb_hash_foreach(config, configure_state_i, (VALUE)&data);
 }
 static VALUE cState_configure(VALUE self, VALUE opts)
 {
     GET_STATE(self);
-    configure_state(state, opts);
+    configure_state(state, self, opts);
     return self;
 }
@@ -1654,7 +1975,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io)
 {
     JSON_Generator_State state = {0};
     state_init(&state);
-    configure_state(&state, opts);
+    configure_state(&state, Qfalse, opts);
     char stack_buffer[FBUFFER_STACK_SIZE];
     FBuffer buffer = {
@@ -1819,4 +2140,23 @@ void Init_generator(void)
     binary_encindex = rb_ascii8bit_encindex();
     rb_require("json/ext/generator/state");
+    switch (find_simd_implementation()) {
+#ifdef HAVE_SIMD
+#ifdef HAVE_SIMD_NEON
+        case SIMD_NEON:
+            search_escape_basic_impl = search_escape_basic_neon;
+            break;
+#endif /* HAVE_SIMD_NEON */
+#ifdef HAVE_SIMD_SSE2
+        case SIMD_SSE2:
+            search_escape_basic_impl = search_escape_basic_sse2;
+            break;
+#endif /* HAVE_SIMD_SSE2 */
+#endif /* HAVE_SIMD */
+        default:
+            search_escape_basic_impl = search_escape_basic;
+            break;
+    }
 }