oj 3.13.3 → 3.13.7

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -64,7 +64,7 @@ links.
64
64
 
65
65
  ## Releases
66
66
 
67
- See [{file:CHANGELOG.md}](CHANGELOG.md)
67
+ See [{file:CHANGELOG.md}](CHANGELOG.md) and [{file:RELEASE_NOTES.md}](RELEASE_NOTES.md)
68
68
 
69
69
  ## Links
70
70
 
data/RELEASE_NOTES.md ADDED
@@ -0,0 +1,61 @@
1
+ # RELEASE NOTES
2
+
3
+ The release notes here are organized by release. For a list of changes
4
+ see the See [{file:CHANGELOG.md}](CHANGELOG.md) file. In this file are
5
+ the steps to take to aid in keeping things rolling after updating to
6
+ the latest version.
7
+
8
+ ## 3.13.7
9
+
10
+ The default for JSON when mimicked by Oj is now to set
11
+ `:allow_invalid_unicode`. To change that behavior JSON.load, set that
12
+ option to false.
13
+
14
+ ## 3.13.x
15
+
16
+ This release included a new cache that performs better than the
17
+ earlier cache and a new high performance parser.
18
+
19
+ ### Cache
20
+
21
+ The new cache includes a least recently used expiration to reduce
22
+ memory use. The cache is also self adjusting and will expand as needed
23
+ for better performance. It also handles Hash keys and string values
24
+ with two options, `:cache_keys`, a boolean and `:cache_str` an
25
+ integer. The `:cache_str` if set to more than zero is the limit for
26
+ the length of string values to cache. The maximum value is 35 which
27
+ allows strings up to 34 bytes to be cached.
28
+
29
+ One interesting aspect of the cache is not so much the string caching
30
+ which performs similar to the Ruby intern functions but the caching of
31
+ symbols and object attribute names. There is a significant gain for
32
+ symbols and object attributes.
33
+
34
+ If the cache is not desired then setting the default options to turn
35
+ it off can be done with this line:
36
+
37
+ ``` ruby
38
+ Oj.default_options = { cache_keys: false, cache_str: 0 }
39
+ ```
40
+
41
+ ### Oj::Parser
42
+
43
+ The new parser uses a different core that follows the approach taken
44
+ by [OjC](https://github.com/ohler55/ojc) and
45
+ [OjG](https://github.com/ohler55/ojg). It also takes advantage of the
46
+ bulk Array and Hash functions. Another issue the new parser addresses
47
+ is option management. Instead of a single global default_options each
48
+ parser instance maintains it's own options.
49
+
50
+ There is a price to be paid when using the Oj::Parser. The API is not
51
+ the same the older parser. A single parser can only be used in a
52
+ single thread. This allows reuse of internal buffers for additional
53
+ improvements in performance.
54
+
55
+ The performane advantage of the Oj::Parse is that it is more than 3
56
+ times faster than the Oj::compat_load call and 6 times faster than the
57
+ JSON gem.
58
+
59
+ ### Dump Performance
60
+
61
+ Thanks to Watson1978 Oj.dump also received a speed boost.
data/ext/oj/cache.c CHANGED
@@ -4,9 +4,14 @@
4
4
  #if HAVE_PTHREAD_MUTEX_INIT
5
5
  #include <pthread.h>
6
6
  #endif
7
+ #include <stdlib.h>
7
8
 
8
9
  #include "cache.h"
9
10
 
11
+ // The stdlib calloc, realloc, and free are used instead of the Ruby ALLOC,
12
+ // ALLOC_N, REALLOC, and xfree since the later could trigger a GC which will
13
+ // either corrupt memory or if the mark function locks will deadlock.
14
+
10
15
  #define REHASH_LIMIT 4
11
16
  #define MIN_SHIFT 8
12
17
  #define REUSE_MAX 8192
@@ -23,23 +28,23 @@
23
28
  #define M 0x5bd1e995
24
29
 
25
30
  typedef struct _slot {
26
- struct _slot *next;
27
- VALUE val;
28
- uint64_t hash;
29
- uint32_t use_cnt;
30
- uint8_t klen;
31
- char key[CACHE_MAX_KEY];
31
+ struct _slot * next;
32
+ VALUE val;
33
+ uint64_t hash;
34
+ volatile uint32_t use_cnt;
35
+ uint8_t klen;
36
+ char key[CACHE_MAX_KEY];
32
37
  } * Slot;
33
38
 
34
39
  typedef struct _cache {
35
- Slot * slots;
36
- size_t cnt;
40
+ volatile Slot * slots;
41
+ volatile size_t cnt;
37
42
  VALUE (*form)(const char *str, size_t len);
38
43
  uint64_t size;
39
44
  uint64_t mask;
40
45
  VALUE (*intern)(struct _cache *c, const char *key, size_t len);
41
- Slot reuse;
42
- size_t rcnt;
46
+ volatile Slot reuse;
47
+ size_t rcnt;
43
48
  #if HAVE_PTHREAD_MUTEX_INIT
44
49
  pthread_mutex_t mutex;
45
50
  #else
@@ -53,22 +58,6 @@ void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
53
58
  c->form = form;
54
59
  }
55
60
 
56
- #if 0
57
- // For debugging only.
58
- static void cache_print(Cache c) {
59
- for (uint64_t i = 0; i < c->size; i++) {
60
- printf("%4d:", i);
61
- for (Slot s = c->slots[i]; NULL != s; s = s->next) {
62
- char buf[40];
63
- strncpy(buf, s->key, s->klen);
64
- buf[s->klen] = '\0';
65
- printf(" %s", buf);
66
- }
67
- printf("\n");
68
- }
69
- }
70
- #endif
71
-
72
61
  static uint64_t hash_calc(const uint8_t *key, size_t len) {
73
62
  const uint8_t *end = key + len;
74
63
  const uint8_t *endless = key + (len & 0xFFFFFFFC);
@@ -104,23 +93,24 @@ static uint64_t hash_calc(const uint8_t *key, size_t len) {
104
93
  }
105
94
 
106
95
  static void rehash(Cache c) {
107
- uint64_t osize = c->size;
96
+ uint64_t osize;
108
97
  Slot * end;
109
98
  Slot * sp;
110
99
 
111
- c->size = osize * 4;
112
- c->mask = c->size - 1;
113
- REALLOC_N(c->slots, Slot, c->size);
114
- memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);
115
- end = c->slots + osize;
116
- for (sp = c->slots; sp < end; sp++) {
100
+ osize = c->size;
101
+ c->size = osize * 4;
102
+ c->mask = c->size - 1;
103
+ c->slots = realloc((void *)c->slots, sizeof(Slot) * c->size);
104
+ memset((Slot *)c->slots + osize, 0, sizeof(Slot) * osize * 3);
105
+ end = (Slot *)c->slots + osize;
106
+ for (sp = (Slot *)c->slots; sp < end; sp++) {
117
107
  Slot s = *sp;
118
108
  Slot next = NULL;
119
109
 
120
110
  *sp = NULL;
121
111
  for (; NULL != s; s = next) {
122
112
  uint64_t h = s->hash & c->mask;
123
- Slot * bucket = c->slots + h;
113
+ Slot * bucket = (Slot *)c->slots + h;
124
114
 
125
115
  next = s->next;
126
116
  s->next = *bucket;
@@ -130,14 +120,15 @@ static void rehash(Cache c) {
130
120
  }
131
121
 
132
122
  static VALUE lockless_intern(Cache c, const char *key, size_t len) {
133
- uint64_t h = hash_calc((const uint8_t *)key, len);
134
- Slot * bucket = c->slots + (h & c->mask);
135
- Slot b;
123
+ uint64_t h = hash_calc((const uint8_t *)key, len);
124
+ Slot * bucket = (Slot *)c->slots + (h & c->mask);
125
+ Slot b;
126
+ volatile VALUE rkey;
136
127
 
137
128
  while (REUSE_MAX < c->rcnt) {
138
129
  if (NULL != (b = c->reuse)) {
139
130
  c->reuse = b->next;
140
- xfree(b);
131
+ free(b);
141
132
  c->rcnt--;
142
133
  } else {
143
134
  // An accounting error occured somewhere so correct it.
@@ -146,46 +137,44 @@ static VALUE lockless_intern(Cache c, const char *key, size_t len) {
146
137
  }
147
138
  for (b = *bucket; NULL != b; b = b->next) {
148
139
  if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
149
- b->use_cnt += 4;
140
+ b->use_cnt += 16;
150
141
  return b->val;
151
142
  }
152
143
  }
153
- {
154
- volatile VALUE rkey = c->form(key, len);
155
-
156
- if (NULL == (b = c->reuse)) {
157
- b = ALLOC(struct _slot);
158
- } else {
159
- c->reuse = b->next;
160
- c->rcnt--;
161
- }
162
- b->hash = h;
163
- memcpy(b->key, key, len);
164
- b->klen = (uint8_t)len;
165
- b->key[len] = '\0';
166
- b->val = rkey;
167
- b->use_cnt = 4;
168
- b->next = *bucket;
169
- *bucket = b;
170
- c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
171
- if (REHASH_LIMIT < c->cnt / c->size) {
172
- rehash(c);
173
- }
144
+ rkey = c->form(key, len);
145
+ if (NULL == (b = c->reuse)) {
146
+ b = calloc(1, sizeof(struct _slot));
147
+ } else {
148
+ c->reuse = b->next;
149
+ c->rcnt--;
150
+ }
151
+ b->hash = h;
152
+ memcpy(b->key, key, len);
153
+ b->klen = (uint8_t)len;
154
+ b->key[len] = '\0';
155
+ b->val = rkey;
156
+ b->use_cnt = 4;
157
+ b->next = *bucket;
158
+ *bucket = b;
159
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
160
+ if (REHASH_LIMIT < c->cnt / c->size) {
161
+ rehash(c);
174
162
  }
175
- return b->val;
163
+ return rkey;
176
164
  }
177
165
 
178
166
  static VALUE locking_intern(Cache c, const char *key, size_t len) {
179
- uint64_t h;
180
- Slot * bucket;
181
- Slot b;
182
- uint64_t old_size;
167
+ uint64_t h;
168
+ Slot * bucket;
169
+ Slot b;
170
+ uint64_t old_size;
171
+ volatile VALUE rkey;
183
172
 
184
173
  CACHE_LOCK(c);
185
174
  while (REUSE_MAX < c->rcnt) {
186
175
  if (NULL != (b = c->reuse)) {
187
176
  c->reuse = b->next;
188
- xfree(b);
177
+ free(b);
189
178
  c->rcnt--;
190
179
  } else {
191
180
  // An accounting error occured somewhere so correct it.
@@ -193,53 +182,53 @@ static VALUE locking_intern(Cache c, const char *key, size_t len) {
193
182
  }
194
183
  }
195
184
  h = hash_calc((const uint8_t *)key, len);
196
- bucket = c->slots + (h & c->mask);
185
+ bucket = (Slot *)c->slots + (h & c->mask);
197
186
  for (b = *bucket; NULL != b; b = b->next) {
198
187
  if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
199
188
  b->use_cnt += 4;
200
189
  CACHE_UNLOCK(c);
190
+
201
191
  return b->val;
202
192
  }
203
193
  }
204
194
  old_size = c->size;
205
195
  // The creation of a new value may trigger a GC which be a problem if the
206
196
  // cache is locked so make sure it is unlocked for the key value creation.
207
- if (NULL == (b = c->reuse)) {
208
- b = ALLOC(struct _slot);
209
- } else {
197
+ if (NULL != (b = c->reuse)) {
210
198
  c->reuse = b->next;
211
199
  c->rcnt--;
212
200
  }
213
201
  CACHE_UNLOCK(c);
214
- {
215
- volatile VALUE rkey = c->form(key, len);
216
-
217
- b->hash = h;
218
- memcpy(b->key, key, len);
219
- b->klen = (uint8_t)len;
220
- b->key[len] = '\0';
221
- b->val = rkey;
222
- b->use_cnt = 4;
223
-
224
- // Lock again to add the new entry.
225
- CACHE_LOCK(c);
226
- if (old_size != c->size) {
227
- h = hash_calc((const uint8_t *)key, len);
228
- bucket = c->slots + (h & c->mask);
229
- }
230
- b->next = *bucket;
231
- *bucket = b;
232
- c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
233
- if (REHASH_LIMIT < c->cnt / c->size) {
234
- rehash(c);
235
- }
236
- CACHE_UNLOCK(c);
202
+ if (NULL == b) {
203
+ b = calloc(1, sizeof(struct _slot));
204
+ }
205
+ rkey = c->form(key, len);
206
+ b->hash = h;
207
+ memcpy(b->key, key, len);
208
+ b->klen = (uint8_t)len;
209
+ b->key[len] = '\0';
210
+ b->val = rkey;
211
+ b->use_cnt = 16;
212
+
213
+ // Lock again to add the new entry.
214
+ CACHE_LOCK(c);
215
+ if (old_size != c->size) {
216
+ h = hash_calc((const uint8_t *)key, len);
217
+ bucket = (Slot *)c->slots + (h & c->mask);
218
+ }
219
+ b->next = *bucket;
220
+ *bucket = b;
221
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
222
+ if (REHASH_LIMIT < c->cnt / c->size) {
223
+ rehash(c);
237
224
  }
238
- return b->val;
225
+ CACHE_UNLOCK(c);
226
+
227
+ return rkey;
239
228
  }
240
229
 
241
230
  Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking) {
242
- Cache c = ALLOC(struct _cache);
231
+ Cache c = calloc(1, sizeof(struct _cache));
243
232
  int shift = 0;
244
233
 
245
234
  for (; REHASH_LIMIT < size; size /= 2, shift++) {
@@ -252,16 +241,12 @@ Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool
252
241
  #else
253
242
  c->mutex = rb_mutex_new();
254
243
  #endif
255
- c->size = 1 << shift;
256
- c->mask = c->size - 1;
257
- c->slots = ALLOC_N(Slot, c->size);
258
- memset(c->slots, 0, sizeof(Slot) * c->size);
259
- c->form = form;
260
- c->cnt = 0;
261
- c->xrate = 1; // low
262
- c->mark = mark;
263
- c->reuse = NULL;
264
- c->rcnt = 0;
244
+ c->size = 1 << shift;
245
+ c->mask = c->size - 1;
246
+ c->slots = calloc(c->size, sizeof(Slot));
247
+ c->form = form;
248
+ c->xrate = 1; // low
249
+ c->mark = mark;
265
250
  if (locking) {
266
251
  c->intern = locking_intern;
267
252
  } else {
@@ -283,11 +268,11 @@ void cache_free(Cache c) {
283
268
 
284
269
  for (s = c->slots[i]; NULL != s; s = next) {
285
270
  next = s->next;
286
- xfree(s);
271
+ free(s);
287
272
  }
288
273
  }
289
- xfree(c->slots);
290
- xfree(c);
274
+ free((void *)c->slots);
275
+ free(c);
291
276
  }
292
277
 
293
278
  void cache_mark(Cache c) {
@@ -334,7 +319,7 @@ void cache_mark(Cache c) {
334
319
 
335
320
  VALUE
336
321
  cache_intern(Cache c, const char *key, size_t len) {
337
- if (CACHE_MAX_KEY < len) {
322
+ if (CACHE_MAX_KEY <= len) {
338
323
  return c->form(key, len);
339
324
  }
340
325
  return c->intern(c, key, len);
data/ext/oj/custom.c CHANGED
@@ -955,8 +955,8 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
955
955
  }
956
956
  }
957
957
  } else {
958
- //volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
959
- volatile VALUE rstr = rb_utf8_str_new(str, len);
958
+ volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
959
+ //volatile VALUE rstr = rb_utf8_str_new(str, len);
960
960
 
961
961
  if (Qundef == rkey) {
962
962
  if (Yes == pi->options.sym_key) {
data/ext/oj/intern.c CHANGED
@@ -51,7 +51,7 @@ static VALUE form_str(const char *str, size_t len) {
51
51
  }
52
52
 
53
53
  static VALUE form_sym(const char *str, size_t len) {
54
- return rb_str_intern(rb_utf8_str_new(str, len));
54
+ return rb_to_symbol(rb_str_intern(rb_utf8_str_new(str, len)));
55
55
  }
56
56
 
57
57
  static VALUE form_attr(const char *str, size_t len) {
@@ -112,7 +112,14 @@ void oj_hash_init() {
112
112
 
113
113
  VALUE
114
114
  oj_str_intern(const char *key, size_t len) {
115
+ // For huge cache sizes over half a million the rb_enc_interned_str
116
+ // performs slightly better but at more "normal" size of a several
117
+ // thousands the cache intern performs about 20% better.
118
+ #if HAVE_RB_ENC_INTERNED_STR && 0
119
+ return rb_enc_interned_str(key, len, rb_utf8_encoding());
120
+ #else
115
121
  return cache_intern(str_cache, key, len);
122
+ #endif
116
123
  }
117
124
 
118
125
  VALUE
data/ext/oj/mimic_json.c CHANGED
@@ -516,7 +516,7 @@ static VALUE mimic_parse_core(int argc, VALUE *argv, VALUE self, bool bang) {
516
516
  pi.options = oj_default_options;
517
517
  pi.options.auto_define = No;
518
518
  pi.options.quirks_mode = Yes;
519
- pi.options.allow_invalid = No;
519
+ pi.options.allow_invalid = Yes;
520
520
  pi.options.empty_string = No;
521
521
  pi.options.create_ok = No;
522
522
  pi.options.allow_nan = (bang ? Yes : No);
@@ -573,8 +573,7 @@ static VALUE mimic_parse_core(int argc, VALUE *argv, VALUE self, bool bang) {
573
573
  }
574
574
  }
575
575
  if (oj_hash_has_key(ropts, oj_decimal_class_sym)) {
576
- pi.options.compat_bigdec = (oj_bigdecimal_class ==
577
- rb_hash_lookup(ropts, oj_decimal_class_sym));
576
+ pi.options.compat_bigdec = (oj_bigdecimal_class == rb_hash_lookup(ropts, oj_decimal_class_sym));
578
577
  }
579
578
  v = rb_hash_lookup(ropts, oj_max_nesting_sym);
580
579
  if (Qtrue == v) {
@@ -682,7 +681,7 @@ static VALUE mimic_set_create_id(VALUE self, VALUE id) {
682
681
  */
683
682
  static VALUE mimic_create_id(VALUE self) {
684
683
  if (NULL != oj_default_options.create_id) {
685
- return rb_utf8_str_new(oj_default_options.create_id, oj_default_options.create_id_len);
684
+ return rb_utf8_str_new(oj_default_options.create_id, oj_default_options.create_id_len);
686
685
  }
687
686
  return rb_str_new_cstr(oj_json_class);
688
687
  }
@@ -706,7 +705,7 @@ static struct _options mimic_object_to_json_options = {0, // indent
706
705
  No, // empty_string
707
706
  Yes, // allow_gc
708
707
  Yes, // quirks_mode
709
- No, // allow_invalid
708
+ Yes, // allow_invalid
710
709
  No, // create_ok
711
710
  No, // allow_nan
712
711
  No, // trace