oj 3.13.3 → 3.13.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -64,7 +64,7 @@ links.
64
64
 
65
65
  ## Releases
66
66
 
67
- See [{file:CHANGELOG.md}](CHANGELOG.md)
67
+ See [{file:CHANGELOG.md}](CHANGELOG.md) and [{file:RELEASE_NOTES.md}](RELEASE_NOTES.md)
68
68
 
69
69
  ## Links
70
70
 
data/RELEASE_NOTES.md ADDED
@@ -0,0 +1,61 @@
1
+ # RELEASE NOTES
2
+
3
+ The release notes here are organized by release. For a list of changes
4
+ see the See [{file:CHANGELOG.md}](CHANGELOG.md) file. In this file are
5
+ the steps to take to aid in keeping things rolling after updating to
6
+ the latest version.
7
+
8
+ ## 3.13.7
9
+
10
+ The default for JSON when mimicked by Oj is now to set
11
+ `:allow_invalid_unicode`. To change that behavior JSON.load, set that
12
+ option to false.
13
+
14
+ ## 3.13.x
15
+
16
+ This release included a new cache that performs better than the
17
+ earlier cache and a new high performance parser.
18
+
19
+ ### Cache
20
+
21
+ The new cache includes a least recently used expiration to reduce
22
+ memory use. The cache is also self adjusting and will expand as needed
23
+ for better performance. It also handles Hash keys and string values
24
+ with two options, `:cache_keys`, a boolean and `:cache_str` an
25
+ integer. The `:cache_str` if set to more than zero is the limit for
26
+ the length of string values to cache. The maximum value is 35 which
27
+ allows strings up to 34 bytes to be cached.
28
+
29
+ One interesting aspect of the cache is not so much the string caching
30
+ which performs similar to the Ruby intern functions but the caching of
31
+ symbols and object attribute names. There is a significant gain for
32
+ symbols and object attributes.
33
+
34
+ If the cache is not desired then setting the default options to turn
35
+ it off can be done with this line:
36
+
37
+ ``` ruby
38
+ Oj.default_options = { cache_keys: false, cache_str: 0 }
39
+ ```
40
+
41
+ ### Oj::Parser
42
+
43
+ The new parser uses a different core that follows the approach taken
44
+ by [OjC](https://github.com/ohler55/ojc) and
45
+ [OjG](https://github.com/ohler55/ojg). It also takes advantage of the
46
+ bulk Array and Hash functions. Another issue the new parser addresses
47
+ is option management. Instead of a single global default_options each
48
+ parser instance maintains it's own options.
49
+
50
+ There is a price to be paid when using the Oj::Parser. The API is not
51
+ the same the older parser. A single parser can only be used in a
52
+ single thread. This allows reuse of internal buffers for additional
53
+ improvements in performance.
54
+
55
+ The performane advantage of the Oj::Parse is that it is more than 3
56
+ times faster than the Oj::compat_load call and 6 times faster than the
57
+ JSON gem.
58
+
59
+ ### Dump Performance
60
+
61
+ Thanks to Watson1978 Oj.dump also received a speed boost.
data/ext/oj/cache.c CHANGED
@@ -4,9 +4,14 @@
4
4
  #if HAVE_PTHREAD_MUTEX_INIT
5
5
  #include <pthread.h>
6
6
  #endif
7
+ #include <stdlib.h>
7
8
 
8
9
  #include "cache.h"
9
10
 
11
+ // The stdlib calloc, realloc, and free are used instead of the Ruby ALLOC,
12
+ // ALLOC_N, REALLOC, and xfree since the later could trigger a GC which will
13
+ // either corrupt memory or if the mark function locks will deadlock.
14
+
10
15
  #define REHASH_LIMIT 4
11
16
  #define MIN_SHIFT 8
12
17
  #define REUSE_MAX 8192
@@ -23,23 +28,23 @@
23
28
  #define M 0x5bd1e995
24
29
 
25
30
  typedef struct _slot {
26
- struct _slot *next;
27
- VALUE val;
28
- uint64_t hash;
29
- uint32_t use_cnt;
30
- uint8_t klen;
31
- char key[CACHE_MAX_KEY];
31
+ struct _slot * next;
32
+ VALUE val;
33
+ uint64_t hash;
34
+ volatile uint32_t use_cnt;
35
+ uint8_t klen;
36
+ char key[CACHE_MAX_KEY];
32
37
  } * Slot;
33
38
 
34
39
  typedef struct _cache {
35
- Slot * slots;
36
- size_t cnt;
40
+ volatile Slot * slots;
41
+ volatile size_t cnt;
37
42
  VALUE (*form)(const char *str, size_t len);
38
43
  uint64_t size;
39
44
  uint64_t mask;
40
45
  VALUE (*intern)(struct _cache *c, const char *key, size_t len);
41
- Slot reuse;
42
- size_t rcnt;
46
+ volatile Slot reuse;
47
+ size_t rcnt;
43
48
  #if HAVE_PTHREAD_MUTEX_INIT
44
49
  pthread_mutex_t mutex;
45
50
  #else
@@ -53,22 +58,6 @@ void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
53
58
  c->form = form;
54
59
  }
55
60
 
56
- #if 0
57
- // For debugging only.
58
- static void cache_print(Cache c) {
59
- for (uint64_t i = 0; i < c->size; i++) {
60
- printf("%4d:", i);
61
- for (Slot s = c->slots[i]; NULL != s; s = s->next) {
62
- char buf[40];
63
- strncpy(buf, s->key, s->klen);
64
- buf[s->klen] = '\0';
65
- printf(" %s", buf);
66
- }
67
- printf("\n");
68
- }
69
- }
70
- #endif
71
-
72
61
  static uint64_t hash_calc(const uint8_t *key, size_t len) {
73
62
  const uint8_t *end = key + len;
74
63
  const uint8_t *endless = key + (len & 0xFFFFFFFC);
@@ -104,23 +93,24 @@ static uint64_t hash_calc(const uint8_t *key, size_t len) {
104
93
  }
105
94
 
106
95
  static void rehash(Cache c) {
107
- uint64_t osize = c->size;
96
+ uint64_t osize;
108
97
  Slot * end;
109
98
  Slot * sp;
110
99
 
111
- c->size = osize * 4;
112
- c->mask = c->size - 1;
113
- REALLOC_N(c->slots, Slot, c->size);
114
- memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);
115
- end = c->slots + osize;
116
- for (sp = c->slots; sp < end; sp++) {
100
+ osize = c->size;
101
+ c->size = osize * 4;
102
+ c->mask = c->size - 1;
103
+ c->slots = realloc((void *)c->slots, sizeof(Slot) * c->size);
104
+ memset((Slot *)c->slots + osize, 0, sizeof(Slot) * osize * 3);
105
+ end = (Slot *)c->slots + osize;
106
+ for (sp = (Slot *)c->slots; sp < end; sp++) {
117
107
  Slot s = *sp;
118
108
  Slot next = NULL;
119
109
 
120
110
  *sp = NULL;
121
111
  for (; NULL != s; s = next) {
122
112
  uint64_t h = s->hash & c->mask;
123
- Slot * bucket = c->slots + h;
113
+ Slot * bucket = (Slot *)c->slots + h;
124
114
 
125
115
  next = s->next;
126
116
  s->next = *bucket;
@@ -130,14 +120,15 @@ static void rehash(Cache c) {
130
120
  }
131
121
 
132
122
  static VALUE lockless_intern(Cache c, const char *key, size_t len) {
133
- uint64_t h = hash_calc((const uint8_t *)key, len);
134
- Slot * bucket = c->slots + (h & c->mask);
135
- Slot b;
123
+ uint64_t h = hash_calc((const uint8_t *)key, len);
124
+ Slot * bucket = (Slot *)c->slots + (h & c->mask);
125
+ Slot b;
126
+ volatile VALUE rkey;
136
127
 
137
128
  while (REUSE_MAX < c->rcnt) {
138
129
  if (NULL != (b = c->reuse)) {
139
130
  c->reuse = b->next;
140
- xfree(b);
131
+ free(b);
141
132
  c->rcnt--;
142
133
  } else {
143
134
  // An accounting error occured somewhere so correct it.
@@ -146,46 +137,44 @@ static VALUE lockless_intern(Cache c, const char *key, size_t len) {
146
137
  }
147
138
  for (b = *bucket; NULL != b; b = b->next) {
148
139
  if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
149
- b->use_cnt += 4;
140
+ b->use_cnt += 16;
150
141
  return b->val;
151
142
  }
152
143
  }
153
- {
154
- volatile VALUE rkey = c->form(key, len);
155
-
156
- if (NULL == (b = c->reuse)) {
157
- b = ALLOC(struct _slot);
158
- } else {
159
- c->reuse = b->next;
160
- c->rcnt--;
161
- }
162
- b->hash = h;
163
- memcpy(b->key, key, len);
164
- b->klen = (uint8_t)len;
165
- b->key[len] = '\0';
166
- b->val = rkey;
167
- b->use_cnt = 4;
168
- b->next = *bucket;
169
- *bucket = b;
170
- c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
171
- if (REHASH_LIMIT < c->cnt / c->size) {
172
- rehash(c);
173
- }
144
+ rkey = c->form(key, len);
145
+ if (NULL == (b = c->reuse)) {
146
+ b = calloc(1, sizeof(struct _slot));
147
+ } else {
148
+ c->reuse = b->next;
149
+ c->rcnt--;
150
+ }
151
+ b->hash = h;
152
+ memcpy(b->key, key, len);
153
+ b->klen = (uint8_t)len;
154
+ b->key[len] = '\0';
155
+ b->val = rkey;
156
+ b->use_cnt = 4;
157
+ b->next = *bucket;
158
+ *bucket = b;
159
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
160
+ if (REHASH_LIMIT < c->cnt / c->size) {
161
+ rehash(c);
174
162
  }
175
- return b->val;
163
+ return rkey;
176
164
  }
177
165
 
178
166
  static VALUE locking_intern(Cache c, const char *key, size_t len) {
179
- uint64_t h;
180
- Slot * bucket;
181
- Slot b;
182
- uint64_t old_size;
167
+ uint64_t h;
168
+ Slot * bucket;
169
+ Slot b;
170
+ uint64_t old_size;
171
+ volatile VALUE rkey;
183
172
 
184
173
  CACHE_LOCK(c);
185
174
  while (REUSE_MAX < c->rcnt) {
186
175
  if (NULL != (b = c->reuse)) {
187
176
  c->reuse = b->next;
188
- xfree(b);
177
+ free(b);
189
178
  c->rcnt--;
190
179
  } else {
191
180
  // An accounting error occured somewhere so correct it.
@@ -193,53 +182,53 @@ static VALUE locking_intern(Cache c, const char *key, size_t len) {
193
182
  }
194
183
  }
195
184
  h = hash_calc((const uint8_t *)key, len);
196
- bucket = c->slots + (h & c->mask);
185
+ bucket = (Slot *)c->slots + (h & c->mask);
197
186
  for (b = *bucket; NULL != b; b = b->next) {
198
187
  if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
199
188
  b->use_cnt += 4;
200
189
  CACHE_UNLOCK(c);
190
+
201
191
  return b->val;
202
192
  }
203
193
  }
204
194
  old_size = c->size;
205
195
  // The creation of a new value may trigger a GC which be a problem if the
206
196
  // cache is locked so make sure it is unlocked for the key value creation.
207
- if (NULL == (b = c->reuse)) {
208
- b = ALLOC(struct _slot);
209
- } else {
197
+ if (NULL != (b = c->reuse)) {
210
198
  c->reuse = b->next;
211
199
  c->rcnt--;
212
200
  }
213
201
  CACHE_UNLOCK(c);
214
- {
215
- volatile VALUE rkey = c->form(key, len);
216
-
217
- b->hash = h;
218
- memcpy(b->key, key, len);
219
- b->klen = (uint8_t)len;
220
- b->key[len] = '\0';
221
- b->val = rkey;
222
- b->use_cnt = 4;
223
-
224
- // Lock again to add the new entry.
225
- CACHE_LOCK(c);
226
- if (old_size != c->size) {
227
- h = hash_calc((const uint8_t *)key, len);
228
- bucket = c->slots + (h & c->mask);
229
- }
230
- b->next = *bucket;
231
- *bucket = b;
232
- c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
233
- if (REHASH_LIMIT < c->cnt / c->size) {
234
- rehash(c);
235
- }
236
- CACHE_UNLOCK(c);
202
+ if (NULL == b) {
203
+ b = calloc(1, sizeof(struct _slot));
204
+ }
205
+ rkey = c->form(key, len);
206
+ b->hash = h;
207
+ memcpy(b->key, key, len);
208
+ b->klen = (uint8_t)len;
209
+ b->key[len] = '\0';
210
+ b->val = rkey;
211
+ b->use_cnt = 16;
212
+
213
+ // Lock again to add the new entry.
214
+ CACHE_LOCK(c);
215
+ if (old_size != c->size) {
216
+ h = hash_calc((const uint8_t *)key, len);
217
+ bucket = (Slot *)c->slots + (h & c->mask);
218
+ }
219
+ b->next = *bucket;
220
+ *bucket = b;
221
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
222
+ if (REHASH_LIMIT < c->cnt / c->size) {
223
+ rehash(c);
237
224
  }
238
- return b->val;
225
+ CACHE_UNLOCK(c);
226
+
227
+ return rkey;
239
228
  }
240
229
 
241
230
  Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking) {
242
- Cache c = ALLOC(struct _cache);
231
+ Cache c = calloc(1, sizeof(struct _cache));
243
232
  int shift = 0;
244
233
 
245
234
  for (; REHASH_LIMIT < size; size /= 2, shift++) {
@@ -252,16 +241,12 @@ Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool
252
241
  #else
253
242
  c->mutex = rb_mutex_new();
254
243
  #endif
255
- c->size = 1 << shift;
256
- c->mask = c->size - 1;
257
- c->slots = ALLOC_N(Slot, c->size);
258
- memset(c->slots, 0, sizeof(Slot) * c->size);
259
- c->form = form;
260
- c->cnt = 0;
261
- c->xrate = 1; // low
262
- c->mark = mark;
263
- c->reuse = NULL;
264
- c->rcnt = 0;
244
+ c->size = 1 << shift;
245
+ c->mask = c->size - 1;
246
+ c->slots = calloc(c->size, sizeof(Slot));
247
+ c->form = form;
248
+ c->xrate = 1; // low
249
+ c->mark = mark;
265
250
  if (locking) {
266
251
  c->intern = locking_intern;
267
252
  } else {
@@ -283,11 +268,11 @@ void cache_free(Cache c) {
283
268
 
284
269
  for (s = c->slots[i]; NULL != s; s = next) {
285
270
  next = s->next;
286
- xfree(s);
271
+ free(s);
287
272
  }
288
273
  }
289
- xfree(c->slots);
290
- xfree(c);
274
+ free((void *)c->slots);
275
+ free(c);
291
276
  }
292
277
 
293
278
  void cache_mark(Cache c) {
@@ -334,7 +319,7 @@ void cache_mark(Cache c) {
334
319
 
335
320
  VALUE
336
321
  cache_intern(Cache c, const char *key, size_t len) {
337
- if (CACHE_MAX_KEY < len) {
322
+ if (CACHE_MAX_KEY <= len) {
338
323
  return c->form(key, len);
339
324
  }
340
325
  return c->intern(c, key, len);
data/ext/oj/custom.c CHANGED
@@ -955,8 +955,8 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
955
955
  }
956
956
  }
957
957
  } else {
958
- //volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
959
- volatile VALUE rstr = rb_utf8_str_new(str, len);
958
+ volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
959
+ //volatile VALUE rstr = rb_utf8_str_new(str, len);
960
960
 
961
961
  if (Qundef == rkey) {
962
962
  if (Yes == pi->options.sym_key) {
data/ext/oj/intern.c CHANGED
@@ -51,7 +51,7 @@ static VALUE form_str(const char *str, size_t len) {
51
51
  }
52
52
 
53
53
  static VALUE form_sym(const char *str, size_t len) {
54
- return rb_str_intern(rb_utf8_str_new(str, len));
54
+ return rb_to_symbol(rb_str_intern(rb_utf8_str_new(str, len)));
55
55
  }
56
56
 
57
57
  static VALUE form_attr(const char *str, size_t len) {
@@ -112,7 +112,14 @@ void oj_hash_init() {
112
112
 
113
113
  VALUE
114
114
  oj_str_intern(const char *key, size_t len) {
115
+ // For huge cache sizes over half a million the rb_enc_interned_str
116
+ // performs slightly better but at more "normal" size of a several
117
+ // thousands the cache intern performs about 20% better.
118
+ #if HAVE_RB_ENC_INTERNED_STR && 0
119
+ return rb_enc_interned_str(key, len, rb_utf8_encoding());
120
+ #else
115
121
  return cache_intern(str_cache, key, len);
122
+ #endif
116
123
  }
117
124
 
118
125
  VALUE
data/ext/oj/mimic_json.c CHANGED
@@ -516,7 +516,7 @@ static VALUE mimic_parse_core(int argc, VALUE *argv, VALUE self, bool bang) {
516
516
  pi.options = oj_default_options;
517
517
  pi.options.auto_define = No;
518
518
  pi.options.quirks_mode = Yes;
519
- pi.options.allow_invalid = No;
519
+ pi.options.allow_invalid = Yes;
520
520
  pi.options.empty_string = No;
521
521
  pi.options.create_ok = No;
522
522
  pi.options.allow_nan = (bang ? Yes : No);
@@ -573,8 +573,7 @@ static VALUE mimic_parse_core(int argc, VALUE *argv, VALUE self, bool bang) {
573
573
  }
574
574
  }
575
575
  if (oj_hash_has_key(ropts, oj_decimal_class_sym)) {
576
- pi.options.compat_bigdec = (oj_bigdecimal_class ==
577
- rb_hash_lookup(ropts, oj_decimal_class_sym));
576
+ pi.options.compat_bigdec = (oj_bigdecimal_class == rb_hash_lookup(ropts, oj_decimal_class_sym));
578
577
  }
579
578
  v = rb_hash_lookup(ropts, oj_max_nesting_sym);
580
579
  if (Qtrue == v) {
@@ -682,7 +681,7 @@ static VALUE mimic_set_create_id(VALUE self, VALUE id) {
682
681
  */
683
682
  static VALUE mimic_create_id(VALUE self) {
684
683
  if (NULL != oj_default_options.create_id) {
685
- return rb_utf8_str_new(oj_default_options.create_id, oj_default_options.create_id_len);
684
+ return rb_utf8_str_new(oj_default_options.create_id, oj_default_options.create_id_len);
686
685
  }
687
686
  return rb_str_new_cstr(oj_json_class);
688
687
  }
@@ -706,7 +705,7 @@ static struct _options mimic_object_to_json_options = {0, // indent
706
705
  No, // empty_string
707
706
  Yes, // allow_gc
708
707
  Yes, // quirks_mode
709
- No, // allow_invalid
708
+ Yes, // allow_invalid
710
709
  No, // create_ok
711
710
  No, // allow_nan
712
711
  No, // trace