oj 3.13.3 → 3.13.4

Sign up to get free protection for your applications and to get access to all the features.
data/ext/oj/cache.c CHANGED
@@ -4,9 +4,14 @@
4
4
  #if HAVE_PTHREAD_MUTEX_INIT
5
5
  #include <pthread.h>
6
6
  #endif
7
+ #include <stdlib.h>
7
8
 
8
9
  #include "cache.h"
9
10
 
11
+ // The stdlib calloc, realloc, and free are used instead of the Ruby ALLOC,
12
+ // ALLOC_N, REALLOC, and xfree since the later could trigger a GC which will
13
+ // either corrupt memory or if the mark function locks will deadlock.
14
+
10
15
  #define REHASH_LIMIT 4
11
16
  #define MIN_SHIFT 8
12
17
  #define REUSE_MAX 8192
@@ -23,23 +28,23 @@
23
28
  #define M 0x5bd1e995
24
29
 
25
30
  typedef struct _slot {
26
- struct _slot *next;
27
- VALUE val;
28
- uint64_t hash;
29
- uint32_t use_cnt;
30
- uint8_t klen;
31
- char key[CACHE_MAX_KEY];
31
+ struct _slot * next;
32
+ VALUE val;
33
+ uint64_t hash;
34
+ volatile uint32_t use_cnt;
35
+ uint8_t klen;
36
+ char key[CACHE_MAX_KEY];
32
37
  } * Slot;
33
38
 
34
39
  typedef struct _cache {
35
- Slot * slots;
36
- size_t cnt;
40
+ volatile Slot * slots;
41
+ volatile size_t cnt;
37
42
  VALUE (*form)(const char *str, size_t len);
38
43
  uint64_t size;
39
44
  uint64_t mask;
40
45
  VALUE (*intern)(struct _cache *c, const char *key, size_t len);
41
- Slot reuse;
42
- size_t rcnt;
46
+ volatile Slot reuse;
47
+ size_t rcnt;
43
48
  #if HAVE_PTHREAD_MUTEX_INIT
44
49
  pthread_mutex_t mutex;
45
50
  #else
@@ -53,22 +58,6 @@ void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
53
58
  c->form = form;
54
59
  }
55
60
 
56
- #if 0
57
- // For debugging only.
58
- static void cache_print(Cache c) {
59
- for (uint64_t i = 0; i < c->size; i++) {
60
- printf("%4d:", i);
61
- for (Slot s = c->slots[i]; NULL != s; s = s->next) {
62
- char buf[40];
63
- strncpy(buf, s->key, s->klen);
64
- buf[s->klen] = '\0';
65
- printf(" %s", buf);
66
- }
67
- printf("\n");
68
- }
69
- }
70
- #endif
71
-
72
61
  static uint64_t hash_calc(const uint8_t *key, size_t len) {
73
62
  const uint8_t *end = key + len;
74
63
  const uint8_t *endless = key + (len & 0xFFFFFFFC);
@@ -104,23 +93,24 @@ static uint64_t hash_calc(const uint8_t *key, size_t len) {
104
93
  }
105
94
 
106
95
  static void rehash(Cache c) {
107
- uint64_t osize = c->size;
96
+ uint64_t osize;
108
97
  Slot * end;
109
98
  Slot * sp;
110
99
 
111
- c->size = osize * 4;
112
- c->mask = c->size - 1;
113
- REALLOC_N(c->slots, Slot, c->size);
114
- memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);
115
- end = c->slots + osize;
116
- for (sp = c->slots; sp < end; sp++) {
100
+ osize = c->size;
101
+ c->size = osize * 4;
102
+ c->mask = c->size - 1;
103
+ c->slots = realloc((void *)c->slots, sizeof(Slot) * c->size);
104
+ memset((Slot *)c->slots + osize, 0, sizeof(Slot) * osize * 3);
105
+ end = (Slot *)c->slots + osize;
106
+ for (sp = (Slot *)c->slots; sp < end; sp++) {
117
107
  Slot s = *sp;
118
108
  Slot next = NULL;
119
109
 
120
110
  *sp = NULL;
121
111
  for (; NULL != s; s = next) {
122
112
  uint64_t h = s->hash & c->mask;
123
- Slot * bucket = c->slots + h;
113
+ Slot * bucket = (Slot *)c->slots + h;
124
114
 
125
115
  next = s->next;
126
116
  s->next = *bucket;
@@ -130,14 +120,15 @@ static void rehash(Cache c) {
130
120
  }
131
121
 
132
122
  static VALUE lockless_intern(Cache c, const char *key, size_t len) {
133
- uint64_t h = hash_calc((const uint8_t *)key, len);
134
- Slot * bucket = c->slots + (h & c->mask);
135
- Slot b;
123
+ uint64_t h = hash_calc((const uint8_t *)key, len);
124
+ Slot * bucket = (Slot *)c->slots + (h & c->mask);
125
+ Slot b;
126
+ volatile VALUE rkey;
136
127
 
137
128
  while (REUSE_MAX < c->rcnt) {
138
129
  if (NULL != (b = c->reuse)) {
139
130
  c->reuse = b->next;
140
- xfree(b);
131
+ free(b);
141
132
  c->rcnt--;
142
133
  } else {
143
134
  // An accounting error occured somewhere so correct it.
@@ -146,46 +137,44 @@ static VALUE lockless_intern(Cache c, const char *key, size_t len) {
146
137
  }
147
138
  for (b = *bucket; NULL != b; b = b->next) {
148
139
  if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
149
- b->use_cnt += 4;
140
+ b->use_cnt += 16;
150
141
  return b->val;
151
142
  }
152
143
  }
153
- {
154
- volatile VALUE rkey = c->form(key, len);
155
-
156
- if (NULL == (b = c->reuse)) {
157
- b = ALLOC(struct _slot);
158
- } else {
159
- c->reuse = b->next;
160
- c->rcnt--;
161
- }
162
- b->hash = h;
163
- memcpy(b->key, key, len);
164
- b->klen = (uint8_t)len;
165
- b->key[len] = '\0';
166
- b->val = rkey;
167
- b->use_cnt = 4;
168
- b->next = *bucket;
169
- *bucket = b;
170
- c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
171
- if (REHASH_LIMIT < c->cnt / c->size) {
172
- rehash(c);
173
- }
144
+ rkey = c->form(key, len);
145
+ if (NULL == (b = c->reuse)) {
146
+ b = calloc(1, sizeof(struct _slot));
147
+ } else {
148
+ c->reuse = b->next;
149
+ c->rcnt--;
150
+ }
151
+ b->hash = h;
152
+ memcpy(b->key, key, len);
153
+ b->klen = (uint8_t)len;
154
+ b->key[len] = '\0';
155
+ b->val = rkey;
156
+ b->use_cnt = 4;
157
+ b->next = *bucket;
158
+ *bucket = b;
159
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
160
+ if (REHASH_LIMIT < c->cnt / c->size) {
161
+ rehash(c);
174
162
  }
175
- return b->val;
163
+ return rkey;
176
164
  }
177
165
 
178
166
  static VALUE locking_intern(Cache c, const char *key, size_t len) {
179
- uint64_t h;
180
- Slot * bucket;
181
- Slot b;
182
- uint64_t old_size;
167
+ uint64_t h;
168
+ Slot * bucket;
169
+ Slot b;
170
+ uint64_t old_size;
171
+ volatile VALUE rkey;
183
172
 
184
173
  CACHE_LOCK(c);
185
174
  while (REUSE_MAX < c->rcnt) {
186
175
  if (NULL != (b = c->reuse)) {
187
176
  c->reuse = b->next;
188
- xfree(b);
177
+ free(b);
189
178
  c->rcnt--;
190
179
  } else {
191
180
  // An accounting error occured somewhere so correct it.
@@ -193,53 +182,53 @@ static VALUE locking_intern(Cache c, const char *key, size_t len) {
193
182
  }
194
183
  }
195
184
  h = hash_calc((const uint8_t *)key, len);
196
- bucket = c->slots + (h & c->mask);
185
+ bucket = (Slot *)c->slots + (h & c->mask);
197
186
  for (b = *bucket; NULL != b; b = b->next) {
198
187
  if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
199
188
  b->use_cnt += 4;
200
189
  CACHE_UNLOCK(c);
190
+
201
191
  return b->val;
202
192
  }
203
193
  }
204
194
  old_size = c->size;
205
195
  // The creation of a new value may trigger a GC which be a problem if the
206
196
  // cache is locked so make sure it is unlocked for the key value creation.
207
- if (NULL == (b = c->reuse)) {
208
- b = ALLOC(struct _slot);
209
- } else {
197
+ if (NULL != (b = c->reuse)) {
210
198
  c->reuse = b->next;
211
199
  c->rcnt--;
212
200
  }
213
201
  CACHE_UNLOCK(c);
214
- {
215
- volatile VALUE rkey = c->form(key, len);
216
-
217
- b->hash = h;
218
- memcpy(b->key, key, len);
219
- b->klen = (uint8_t)len;
220
- b->key[len] = '\0';
221
- b->val = rkey;
222
- b->use_cnt = 4;
223
-
224
- // Lock again to add the new entry.
225
- CACHE_LOCK(c);
226
- if (old_size != c->size) {
227
- h = hash_calc((const uint8_t *)key, len);
228
- bucket = c->slots + (h & c->mask);
229
- }
230
- b->next = *bucket;
231
- *bucket = b;
232
- c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
233
- if (REHASH_LIMIT < c->cnt / c->size) {
234
- rehash(c);
235
- }
236
- CACHE_UNLOCK(c);
202
+ if (NULL == b) {
203
+ b = calloc(1, sizeof(struct _slot));
204
+ }
205
+ rkey = c->form(key, len);
206
+ b->hash = h;
207
+ memcpy(b->key, key, len);
208
+ b->klen = (uint8_t)len;
209
+ b->key[len] = '\0';
210
+ b->val = rkey;
211
+ b->use_cnt = 16;
212
+
213
+ // Lock again to add the new entry.
214
+ CACHE_LOCK(c);
215
+ if (old_size != c->size) {
216
+ h = hash_calc((const uint8_t *)key, len);
217
+ bucket = (Slot *)c->slots + (h & c->mask);
218
+ }
219
+ b->next = *bucket;
220
+ *bucket = b;
221
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
222
+ if (REHASH_LIMIT < c->cnt / c->size) {
223
+ rehash(c);
237
224
  }
238
- return b->val;
225
+ CACHE_UNLOCK(c);
226
+
227
+ return rkey;
239
228
  }
240
229
 
241
230
  Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking) {
242
- Cache c = ALLOC(struct _cache);
231
+ Cache c = calloc(1, sizeof(struct _cache));
243
232
  int shift = 0;
244
233
 
245
234
  for (; REHASH_LIMIT < size; size /= 2, shift++) {
@@ -252,16 +241,12 @@ Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool
252
241
  #else
253
242
  c->mutex = rb_mutex_new();
254
243
  #endif
255
- c->size = 1 << shift;
256
- c->mask = c->size - 1;
257
- c->slots = ALLOC_N(Slot, c->size);
258
- memset(c->slots, 0, sizeof(Slot) * c->size);
259
- c->form = form;
260
- c->cnt = 0;
261
- c->xrate = 1; // low
262
- c->mark = mark;
263
- c->reuse = NULL;
264
- c->rcnt = 0;
244
+ c->size = 1 << shift;
245
+ c->mask = c->size - 1;
246
+ c->slots = calloc(c->size, sizeof(Slot));
247
+ c->form = form;
248
+ c->xrate = 1; // low
249
+ c->mark = mark;
265
250
  if (locking) {
266
251
  c->intern = locking_intern;
267
252
  } else {
@@ -283,11 +268,11 @@ void cache_free(Cache c) {
283
268
 
284
269
  for (s = c->slots[i]; NULL != s; s = next) {
285
270
  next = s->next;
286
- xfree(s);
271
+ free(s);
287
272
  }
288
273
  }
289
- xfree(c->slots);
290
- xfree(c);
274
+ free((void *)c->slots);
275
+ free(c);
291
276
  }
292
277
 
293
278
  void cache_mark(Cache c) {
@@ -334,7 +319,7 @@ void cache_mark(Cache c) {
334
319
 
335
320
  VALUE
336
321
  cache_intern(Cache c, const char *key, size_t len) {
337
- if (CACHE_MAX_KEY < len) {
322
+ if (CACHE_MAX_KEY <= len) {
338
323
  return c->form(key, len);
339
324
  }
340
325
  return c->intern(c, key, len);
data/ext/oj/intern.c CHANGED
@@ -51,7 +51,7 @@ static VALUE form_str(const char *str, size_t len) {
51
51
  }
52
52
 
53
53
  static VALUE form_sym(const char *str, size_t len) {
54
- return rb_str_intern(rb_utf8_str_new(str, len));
54
+ return rb_to_symbol(rb_str_intern(rb_utf8_str_new(str, len)));
55
55
  }
56
56
 
57
57
  static VALUE form_attr(const char *str, size_t len) {
@@ -112,7 +112,14 @@ void oj_hash_init() {
112
112
 
113
113
  VALUE
114
114
  oj_str_intern(const char *key, size_t len) {
115
+ // For huge cache sizes over half a million the rb_enc_interned_str
116
+ // performs slightly better but at more "normal" size of a several
117
+ // thousands the cache intern performs about 20% better.
118
+ #if HAVE_RB_ENC_INTERNED_STR && 0
119
+ return rb_enc_interned_str(key, len, rb_utf8_encoding());
120
+ #else
115
121
  return cache_intern(str_cache, key, len);
122
+ #endif
116
123
  }
117
124
 
118
125
  VALUE
data/ext/oj/oj.c CHANGED
@@ -107,6 +107,7 @@ static VALUE bigdecimal_load_sym;
107
107
  static VALUE bigdecimal_sym;
108
108
  static VALUE cache_keys_sym;
109
109
  static VALUE cache_str_sym;
110
+ static VALUE cache_string_sym;
110
111
  static VALUE circular_sym;
111
112
  static VALUE class_cache_sym;
112
113
  static VALUE compat_bigdecimal_sym;
@@ -287,7 +288,7 @@ struct _options oj_default_options = {
287
288
  * - *:ignore* [_nil_|_Array_] either nil or an Array of classes to ignore when dumping
288
289
  * - *:ignore_under* [_Boolean_] if true then attributes that start with _ are ignored when dumping in
289
290
  *object or custom mode.
290
- * - *:cache_keys* [_Boolean_] if true then hash keys are cached
291
+ * - *:cache_keys* [_Boolean_] if true then hash keys are cached if less than 35 bytes.
291
292
  * - *:cache_str* [_Fixnum_] maximum string value length to cache (strings less than this are cached)
292
293
  * - *:integer_range* [_Range_] Dump integers outside range as strings.
293
294
  * - *:trace* [_true,_|_false_] Trace all load and dump calls, default is false (trace is off)
@@ -692,7 +693,7 @@ static int parse_options_cb(VALUE k, VALUE v, VALUE opts)
692
693
  sprintf(copts->float_fmt, "%%0.%dg", n);
693
694
  copts->float_prec = n;
694
695
  }
695
- } else if (cache_str_sym == k) {
696
+ } else if (cache_str_sym == k || cache_string_sym == k) {
696
697
  int n;
697
698
 
698
699
  #ifdef RUBY_INTEGER_UNIFICATION
@@ -1920,6 +1921,8 @@ void Init_oj() {
1920
1921
  rb_gc_register_address(&cache_keys_sym);
1921
1922
  cache_str_sym = ID2SYM(rb_intern("cache_str"));
1922
1923
  rb_gc_register_address(&cache_str_sym);
1924
+ cache_string_sym = ID2SYM(rb_intern("cache_string"));
1925
+ rb_gc_register_address(&cache_string_sym);
1923
1926
  circular_sym = ID2SYM(rb_intern("circular"));
1924
1927
  rb_gc_register_address(&circular_sym);
1925
1928
  class_cache_sym = ID2SYM(rb_intern("class_cache"));
data/lib/oj/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Oj
3
3
  # Current version of the module.
4
- VERSION = '3.13.3'
4
+ VERSION = '3.13.4'
5
5
  end
data/pages/Options.md CHANGED
@@ -75,12 +75,22 @@ parse option to match the JSON gem. In that case either `Float`,
75
75
  If true Hash keys are cached or interned. There are trade-offs with
76
76
  caching keys. Large caches will use more memory and in extreme cases
77
77
  (like over a million) the cache may be slower than not using
78
- it. Repeated parsing of similar JSON docs is where cache_keys shines.
78
+ it. Repeated parsing of similar JSON docs is where cache_keys shines
79
+ especially with symbol keys.
80
+
81
+ There is a maximum length for cached keys. Any key longer than 34
82
+ bytes is not cached. Everything still works but the key is not cached.
79
83
 
80
84
  ### :cache_strings [Int]
81
85
 
82
86
  Shorter strings can be cached for better performance. A limit,
83
- cache_strings, defines the upper limit on what strings are cached.
87
+ cache_strings, defines the upper limit on what strings are cached. As
88
+ with cached keys only strings less than 35 bytes are cached even if
89
+ the limit is set higher. Setting the limit to zero effectively
90
+ disables the caching of string values.
91
+
92
+ Note that caching for strings is for string values and not Hash keys
93
+ or Object attributes.
84
94
 
85
95
  ### :circular [Boolean]
86
96
 
data/test/benny.rb ADDED
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal
3
+
4
+ require 'bundler/inline'
5
+
6
+ gemfile do
7
+ source 'https://rubygems.org'
8
+
9
+ gem 'oj'
10
+ gem 'benchmark-ips', require: 'benchmark/ips'
11
+ end
12
+
13
+ require 'json'
14
+ require 'open-uri'
15
+
16
+ CANADA_DATA_JSON = URI.parse('https://raw.githubusercontent.com/serde-rs/json-benchmark/master/data/canada.json').read
17
+ CANADA_DATA = JSON.parse(CANADA_DATA_JSON)
18
+
19
+ Benchmark.ips do |x|
20
+ x.config(:time => 10, :warmup => 5)
21
+
22
+ x.report("marshall Canada data with Oj") do
23
+ Oj.dump(CANADA_DATA)
24
+ end
25
+
26
+ x.report("marshall Canada data with JSON") do
27
+ JSON.dump(CANADA_DATA)
28
+ end
29
+
30
+ x.compare!
31
+ end
32
+
33
+ Oj.default_options = {
34
+ mode: :strict,
35
+ bigdecimal_load: :fast
36
+ }
37
+
38
+ Benchmark.ips do |x|
39
+ x.config(:time => 10, :warmup => 5)
40
+
41
+ x.report("unmarshall Canada data with Oj") do
42
+ Oj.load(CANADA_DATA_JSON)
43
+ end
44
+
45
+ x.report("unmarshall Canada data with JSON") do
46
+ JSON.parse(CANADA_DATA_JSON)
47
+ end
48
+
49
+ x.compare!
50
+ end