RubyGems - oj - Versions diffs - 3.13.3 → 3.13.7 - Mend

oj 3.13.3 → 3.13.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

data/README.md CHANGED Viewed

@@ -64,7 +64,7 @@ links.
 ## Releases
-See [{file:CHANGELOG.md}](CHANGELOG.md)
+See [{file:CHANGELOG.md}](CHANGELOG.md) and [{file:RELEASE_NOTES.md}](RELEASE_NOTES.md)
 ## Links

data/RELEASE_NOTES.md ADDED Viewed

@@ -0,0 +1,61 @@
+# RELEASE NOTES
+The release notes here are organized by release. For a list of changes
+see the See [{file:CHANGELOG.md}](CHANGELOG.md) file. In this file are
+the steps to take to aid in keeping things rolling after updating to
+the latest version.
+## 3.13.7
+The default for JSON when mimicked by Oj is now to set
+`:allow_invalid_unicode`. To change that behavior JSON.load, set that
+option to false.
+## 3.13.x
+This release included a new cache that performs better than the
+earlier cache and a new high performance parser.
+### Cache
+The new cache includes a least recently used expiration to reduce
+memory use. The cache is also self adjusting and will expand as needed
+for better performance. It also handles Hash keys and string values
+with two options, `:cache_keys`, a boolean and `:cache_str` an
+integer. The `:cache_str` if set to more than zero is the limit for
+the length of string values to cache. The maximum value is 35 which
+allows strings up to 34 bytes to be cached.
+One interesting aspect of the cache is not so much the string caching
+which performs similar to the Ruby intern functions but the caching of
+symbols and object attribute names. There is a significant gain for
+symbols and object attributes.
+If the cache is not desired then setting the default options to turn
+it off can be done with this line:
+``` ruby
+Oj.default_options = { cache_keys: false, cache_str: 0 }
+```
+### Oj::Parser
+The new parser uses a different core that follows the approach taken
+by [OjC](https://github.com/ohler55/ojc) and
+[OjG](https://github.com/ohler55/ojg). It also takes advantage of the
+bulk Array and Hash functions. Another issue the new parser addresses
+is option management. Instead of a single global default_options each
+parser instance maintains it's own options.
+There is a price to be paid when using the Oj::Parser. The API is not
+the same the older parser. A single parser can only be used in a
+single thread. This allows reuse of internal buffers for additional
+improvements in performance.
+The performane advantage of the Oj::Parse is that it is more than 3
+times faster than the Oj::compat_load call and 6 times faster than the
+JSON gem.
+### Dump Performance
+Thanks to Watson1978 Oj.dump also received a speed boost.

data/ext/oj/cache.c CHANGED Viewed

@@ -4,9 +4,14 @@
 #if HAVE_PTHREAD_MUTEX_INIT
 #include <pthread.h>
 #endif
+#include <stdlib.h>
 #include "cache.h"
+// The stdlib calloc, realloc, and free are used instead of the Ruby ALLOC,
+// ALLOC_N, REALLOC, and xfree since the later could trigger a GC which will
+// either corrupt memory or if the mark function locks will deadlock.
 #define REHASH_LIMIT 4
 #define MIN_SHIFT 8
 #define REUSE_MAX 8192
@@ -23,23 +28,23 @@
 #define M 0x5bd1e995
 typedef struct _slot {
-    struct _slot *next;
-    VALUE         val;
-    uint64_t      hash;
-    uint32_t      use_cnt;
-    uint8_t       klen;
-    char          key[CACHE_MAX_KEY];
+    struct _slot *    next;
+    VALUE             val;
+    uint64_t          hash;
+    volatile uint32_t use_cnt;
+    uint8_t           klen;
+    char              key[CACHE_MAX_KEY];
 } * Slot;
 typedef struct _cache {
-    Slot * slots;
-    size_t cnt;
+    volatile Slot * slots;
+    volatile size_t cnt;
     VALUE (*form)(const char *str, size_t len);
     uint64_t size;
     uint64_t mask;
     VALUE (*intern)(struct _cache *c, const char *key, size_t len);
-    Slot   reuse;
-    size_t rcnt;
+    volatile Slot reuse;
+    size_t        rcnt;
 #if HAVE_PTHREAD_MUTEX_INIT
     pthread_mutex_t mutex;
 #else
@@ -53,22 +58,6 @@ void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
     c->form = form;
 }
-#if 0
-// For debugging only.
-static void cache_print(Cache c) {
-    for (uint64_t i = 0; i < c->size; i++) {
-        printf("%4d:", i);
-        for (Slot s = c->slots[i]; NULL != s; s = s->next) {
-            char buf[40];
-            strncpy(buf, s->key, s->klen);
-            buf[s->klen] = '\0';
-            printf(" %s", buf);
-        }
-        printf("\n");
-    }
-}
-#endif
 static uint64_t hash_calc(const uint8_t *key, size_t len) {
     const uint8_t *end     = key + len;
     const uint8_t *endless = key + (len & 0xFFFFFFFC);
@@ -104,23 +93,24 @@ static uint64_t hash_calc(const uint8_t *key, size_t len) {
 }
 static void rehash(Cache c) {
-    uint64_t osize = c->size;
+    uint64_t osize;
     Slot *   end;
     Slot *   sp;
-    c->size = osize * 4;
-    c->mask = c->size - 1;
-    REALLOC_N(c->slots, Slot, c->size);
-    memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);
-    end = c->slots + osize;
-    for (sp = c->slots; sp < end; sp++) {
+    osize    = c->size;
+    c->size  = osize * 4;
+    c->mask  = c->size - 1;
+    c->slots = realloc((void *)c->slots, sizeof(Slot) * c->size);
+    memset((Slot *)c->slots + osize, 0, sizeof(Slot) * osize * 3);
+    end = (Slot *)c->slots + osize;
+    for (sp = (Slot *)c->slots; sp < end; sp++) {
         Slot s    = *sp;
         Slot next = NULL;
         *sp = NULL;
         for (; NULL != s; s = next) {
             uint64_t h      = s->hash & c->mask;
-            Slot *   bucket = c->slots + h;
+            Slot *   bucket = (Slot *)c->slots + h;
             next    = s->next;
             s->next = *bucket;
@@ -130,14 +120,15 @@ static void rehash(Cache c) {
 }
 static VALUE lockless_intern(Cache c, const char *key, size_t len) {
-    uint64_t h      = hash_calc((const uint8_t *)key, len);
-    Slot *   bucket = c->slots + (h & c->mask);
-    Slot     b;
+    uint64_t       h      = hash_calc((const uint8_t *)key, len);
+    Slot *         bucket = (Slot *)c->slots + (h & c->mask);
+    Slot           b;
+    volatile VALUE rkey;
     while (REUSE_MAX < c->rcnt) {
         if (NULL != (b = c->reuse)) {
             c->reuse = b->next;
-            xfree(b);
+            free(b);
             c->rcnt--;
         } else {
             // An accounting error occured somewhere so correct it.
@@ -146,46 +137,44 @@ static VALUE lockless_intern(Cache c, const char *key, size_t len) {
     }
     for (b = *bucket; NULL != b; b = b->next) {
         if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
-            b->use_cnt += 4;
+            b->use_cnt += 16;
             return b->val;
         }
     }
-    {
-        volatile VALUE rkey = c->form(key, len);
-        if (NULL == (b = c->reuse)) {
-            b = ALLOC(struct _slot);
-        } else {
-            c->reuse = b->next;
-            c->rcnt--;
-        }
-        b->hash = h;
-        memcpy(b->key, key, len);
-        b->klen     = (uint8_t)len;
-        b->key[len] = '\0';
-        b->val      = rkey;
-        b->use_cnt  = 4;
-        b->next     = *bucket;
-        *bucket     = b;
-        c->cnt++;  // Don't worry about wrapping. Worse case is the entry is removed and recreated.
-        if (REHASH_LIMIT < c->cnt / c->size) {
-            rehash(c);
-        }
+    rkey = c->form(key, len);
+    if (NULL == (b = c->reuse)) {
+        b = calloc(1, sizeof(struct _slot));
+    } else {
+        c->reuse = b->next;
+        c->rcnt--;
+    }
+    b->hash = h;
+    memcpy(b->key, key, len);
+    b->klen     = (uint8_t)len;
+    b->key[len] = '\0';
+    b->val      = rkey;
+    b->use_cnt  = 4;
+    b->next     = *bucket;
+    *bucket     = b;
+    c->cnt++;  // Don't worry about wrapping. Worse case is the entry is removed and recreated.
+    if (REHASH_LIMIT < c->cnt / c->size) {
+        rehash(c);
     }
-    return b->val;
+    return rkey;
 }
 static VALUE locking_intern(Cache c, const char *key, size_t len) {
-    uint64_t h;
-    Slot *   bucket;
-    Slot     b;
-    uint64_t old_size;
+    uint64_t       h;
+    Slot *         bucket;
+    Slot           b;
+    uint64_t       old_size;
+    volatile VALUE rkey;
     CACHE_LOCK(c);
     while (REUSE_MAX < c->rcnt) {
         if (NULL != (b = c->reuse)) {
             c->reuse = b->next;
-            xfree(b);
+            free(b);
             c->rcnt--;
         } else {
             // An accounting error occured somewhere so correct it.
@@ -193,53 +182,53 @@ static VALUE locking_intern(Cache c, const char *key, size_t len) {
         }
     }
     h      = hash_calc((const uint8_t *)key, len);
-    bucket = c->slots + (h & c->mask);
+    bucket = (Slot *)c->slots + (h & c->mask);
     for (b = *bucket; NULL != b; b = b->next) {
         if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
             b->use_cnt += 4;
             CACHE_UNLOCK(c);
             return b->val;
         }
     }
     old_size = c->size;
     // The creation of a new value may trigger a GC which be a problem if the
     // cache is locked so make sure it is unlocked for the key value creation.
-    if (NULL == (b = c->reuse)) {
-        b = ALLOC(struct _slot);
-    } else {
+    if (NULL != (b = c->reuse)) {
         c->reuse = b->next;
         c->rcnt--;
     }
     CACHE_UNLOCK(c);
-    {
-        volatile VALUE rkey = c->form(key, len);
-        b->hash = h;
-        memcpy(b->key, key, len);
-        b->klen     = (uint8_t)len;
-        b->key[len] = '\0';
-        b->val      = rkey;
-        b->use_cnt  = 4;
-        // Lock again to add the new entry.
-        CACHE_LOCK(c);
-        if (old_size != c->size) {
-            h      = hash_calc((const uint8_t *)key, len);
-            bucket = c->slots + (h & c->mask);
-        }
-        b->next = *bucket;
-        *bucket = b;
-        c->cnt++;  // Don't worry about wrapping. Worse case is the entry is removed and recreated.
-        if (REHASH_LIMIT < c->cnt / c->size) {
-            rehash(c);
-        }
-        CACHE_UNLOCK(c);
+    if (NULL == b) {
+        b = calloc(1, sizeof(struct _slot));
+    }
+    rkey    = c->form(key, len);
+    b->hash = h;
+    memcpy(b->key, key, len);
+    b->klen     = (uint8_t)len;
+    b->key[len] = '\0';
+    b->val      = rkey;
+    b->use_cnt  = 16;
+    // Lock again to add the new entry.
+    CACHE_LOCK(c);
+    if (old_size != c->size) {
+        h      = hash_calc((const uint8_t *)key, len);
+        bucket = (Slot *)c->slots + (h & c->mask);
+    }
+    b->next = *bucket;
+    *bucket = b;
+    c->cnt++;  // Don't worry about wrapping. Worse case is the entry is removed and recreated.
+    if (REHASH_LIMIT < c->cnt / c->size) {
+        rehash(c);
     }
-    return b->val;
+    CACHE_UNLOCK(c);
+    return rkey;
 }
 Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking) {
-    Cache c     = ALLOC(struct _cache);
+    Cache c     = calloc(1, sizeof(struct _cache));
     int   shift = 0;
     for (; REHASH_LIMIT < size; size /= 2, shift++) {
@@ -252,16 +241,12 @@ Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool
 #else
     c->mutex = rb_mutex_new();
 #endif
-    c->size  = 1 << shift;
-    c->mask  = c->size - 1;
-    c->slots = ALLOC_N(Slot, c->size);
-    memset(c->slots, 0, sizeof(Slot) * c->size);
-    c->form  = form;
-    c->cnt   = 0;
-    c->xrate = 1;  // low
-    c->mark  = mark;
-    c->reuse = NULL;
-    c->rcnt  = 0;
+    c->size    = 1 << shift;
+    c->mask    = c->size - 1;
+    c->slots   = calloc(c->size, sizeof(Slot));
+    c->form    = form;
+    c->xrate   = 1;  // low
+    c->mark    = mark;
     if (locking) {
         c->intern = locking_intern;
     } else {
@@ -283,11 +268,11 @@ void cache_free(Cache c) {
         for (s = c->slots[i]; NULL != s; s = next) {
             next = s->next;
-            xfree(s);
+            free(s);
         }
     }
-    xfree(c->slots);
-    xfree(c);
+    free((void *)c->slots);
+    free(c);
 }
 void cache_mark(Cache c) {
@@ -334,7 +319,7 @@ void cache_mark(Cache c) {
 VALUE
 cache_intern(Cache c, const char *key, size_t len) {
-    if (CACHE_MAX_KEY < len) {
+    if (CACHE_MAX_KEY <= len) {
         return c->form(key, len);
     }
     return c->intern(c, key, len);

data/ext/oj/custom.c CHANGED Viewed

@@ -955,8 +955,8 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
             }
         }
     } else {
-	//volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
-        volatile VALUE rstr = rb_utf8_str_new(str, len);
+	volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
+        //volatile VALUE rstr = rb_utf8_str_new(str, len);
         if (Qundef == rkey) {
             if (Yes == pi->options.sym_key) {

data/ext/oj/intern.c CHANGED Viewed

@@ -51,7 +51,7 @@ static VALUE form_str(const char *str, size_t len) {
 }
 static VALUE form_sym(const char *str, size_t len) {
-    return rb_str_intern(rb_utf8_str_new(str, len));
+    return rb_to_symbol(rb_str_intern(rb_utf8_str_new(str, len)));
 }
 static VALUE form_attr(const char *str, size_t len) {
@@ -112,7 +112,14 @@ void oj_hash_init() {
 VALUE
 oj_str_intern(const char *key, size_t len) {
+    // For huge cache sizes over half a million the rb_enc_interned_str
+    // performs slightly better but at more "normal" size of a several
+    // thousands the cache intern performs about 20% better.
+#if HAVE_RB_ENC_INTERNED_STR && 0
+    return rb_enc_interned_str(key, len, rb_utf8_encoding());
+#else
     return cache_intern(str_cache, key, len);
+#endif
 }
 VALUE

data/ext/oj/mimic_json.c CHANGED Viewed

@@ -516,7 +516,7 @@ static VALUE mimic_parse_core(int argc, VALUE *argv, VALUE self, bool bang) {
     pi.options               = oj_default_options;
     pi.options.auto_define   = No;
     pi.options.quirks_mode   = Yes;
-    pi.options.allow_invalid = No;
+    pi.options.allow_invalid = Yes;
     pi.options.empty_string  = No;
     pi.options.create_ok     = No;
     pi.options.allow_nan     = (bang ? Yes : No);
@@ -573,8 +573,7 @@ static VALUE mimic_parse_core(int argc, VALUE *argv, VALUE self, bool bang) {
             }
         }
         if (oj_hash_has_key(ropts, oj_decimal_class_sym)) {
-            pi.options.compat_bigdec = (oj_bigdecimal_class ==
-                                        rb_hash_lookup(ropts, oj_decimal_class_sym));
+            pi.options.compat_bigdec = (oj_bigdecimal_class == rb_hash_lookup(ropts, oj_decimal_class_sym));
         }
         v = rb_hash_lookup(ropts, oj_max_nesting_sym);
         if (Qtrue == v) {
@@ -682,7 +681,7 @@ static VALUE mimic_set_create_id(VALUE self, VALUE id) {
  */
 static VALUE mimic_create_id(VALUE self) {
     if (NULL != oj_default_options.create_id) {
-        return  rb_utf8_str_new(oj_default_options.create_id, oj_default_options.create_id_len);
+        return rb_utf8_str_new(oj_default_options.create_id, oj_default_options.create_id_len);
     }
     return rb_str_new_cstr(oj_json_class);
 }
@@ -706,7 +705,7 @@ static struct _options mimic_object_to_json_options = {0,              // indent
                                                        No,             // empty_string
                                                        Yes,            // allow_gc
                                                        Yes,            // quirks_mode
-                                                       No,             // allow_invalid
+                                                       Yes,            // allow_invalid
                                                        No,             // create_ok
                                                        No,             // allow_nan
                                                        No,             // trace