oj 3.13.0 → 3.13.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -53,7 +53,7 @@ For more details on options, modes, advanced features, and more follow these
53
53
  links.
54
54
 
55
55
  - [{file:Options.md}](pages/Options.md) for parse and dump options.
56
- - [{file:Modes.md}](pages/Modes.md) for details on modes for strict JSON compliance, mimicing the JSON gem, and mimicing Rails and ActiveSupport behavior.
56
+ - [{file:Modes.md}](pages/Modes.md) for details on modes for strict JSON compliance, mimicking the JSON gem, and mimicking Rails and ActiveSupport behavior.
57
57
  - [{file:JsonGem.md}](pages/JsonGem.md) includes more details on json gem compatibility and use.
58
58
  - [{file:Rails.md}](pages/Rails.md) includes more details on Rails and ActiveSupport compatibility and use.
59
59
  - [{file:Custom.md}](pages/Custom.md) includes more details on Custom mode.
data/ext/oj/cache.c CHANGED
@@ -1,65 +1,74 @@
1
1
  // Copyright (c) 2011, 2021 Peter Ohler. All rights reserved.
2
2
  // Licensed under the MIT License. See LICENSE file in the project root for license details.
3
3
 
4
+ #if HAVE_PTHREAD_MUTEX_INIT
5
+ #include <pthread.h>
6
+ #endif
7
+ #include <stdlib.h>
8
+
4
9
  #include "cache.h"
5
10
 
6
- #define REHASH_LIMIT 64
11
+ // The stdlib calloc, realloc, and free are used instead of the Ruby ALLOC,
12
+ // ALLOC_N, REALLOC, and xfree since the later could trigger a GC which will
13
+ // either corrupt memory or if the mark function locks will deadlock.
14
+
15
+ #define REHASH_LIMIT 4
7
16
  #define MIN_SHIFT 8
17
+ #define REUSE_MAX 8192
18
+
19
+ #if HAVE_PTHREAD_MUTEX_INIT
20
+ #define CACHE_LOCK(c) pthread_mutex_lock(&((c)->mutex))
21
+ #define CACHE_UNLOCK(c) pthread_mutex_unlock(&((c)->mutex))
22
+ #else
23
+ #define CACHE_LOCK(c) rb_mutex_lock((c)->mutex)
24
+ #define CACHE_UNLOCK(c) rb_mutex_unlock((c)->mutex)
25
+ #endif
26
+
27
+ // almost the Murmur hash algorithm
28
+ #define M 0x5bd1e995
8
29
 
9
30
  typedef struct _slot {
10
- struct _slot *next;
11
- VALUE val;
12
- uint32_t hash;
13
- uint8_t klen;
14
- char key[CACHE_MAX_KEY];
31
+ struct _slot * next;
32
+ VALUE val;
33
+ uint64_t hash;
34
+ volatile uint32_t use_cnt;
35
+ uint8_t klen;
36
+ char key[CACHE_MAX_KEY];
15
37
  } * Slot;
16
38
 
17
39
  typedef struct _cache {
18
- Slot * slots;
19
- size_t cnt;
40
+ volatile Slot * slots;
41
+ volatile size_t cnt;
20
42
  VALUE (*form)(const char *str, size_t len);
21
- uint32_t size;
22
- uint32_t mask;
23
- bool mark;
43
+ uint64_t size;
44
+ uint64_t mask;
45
+ VALUE (*intern)(struct _cache *c, const char *key, size_t len);
46
+ volatile Slot reuse;
47
+ size_t rcnt;
48
+ #if HAVE_PTHREAD_MUTEX_INIT
49
+ pthread_mutex_t mutex;
50
+ #else
51
+ VALUE mutex;
52
+ #endif
53
+ uint8_t xrate;
54
+ bool mark;
24
55
  } * Cache;
25
56
 
26
- // almost the Murmur hash algorithm
27
- #define M 0x5bd1e995
28
- #define C1 0xCC9E2D51
29
- #define C2 0x1B873593
30
- #define N 0xE6546B64
31
-
32
57
  void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
33
58
  c->form = form;
34
59
  }
35
60
 
36
- #if 0
37
- // For debugging only.
38
- static void cache_print(Cache c) {
39
- for (uint32_t i = 0; i < c->size; i++) {
40
- printf("%4d:", i);
41
- for (Slot s = c->slots[i]; NULL != s; s = s->next) {
42
- char buf[40];
43
- strncpy(buf, s->key, s->klen);
44
- buf[s->klen] = '\0';
45
- printf(" %s", buf);
46
- }
47
- printf("\n");
48
- }
49
- }
50
- #endif
51
-
52
- static uint32_t hash_calc(const uint8_t *key, size_t len) {
61
+ static uint64_t hash_calc(const uint8_t *key, size_t len) {
53
62
  const uint8_t *end = key + len;
54
63
  const uint8_t *endless = key + (len & 0xFFFFFFFC);
55
- uint32_t h = (uint32_t)len;
56
- uint32_t k;
64
+ uint64_t h = (uint64_t)len;
65
+ uint64_t k;
57
66
 
58
67
  while (key < endless) {
59
- k = (uint32_t)*key++;
60
- k |= (uint32_t)*key++ << 8;
61
- k |= (uint32_t)*key++ << 16;
62
- k |= (uint32_t)*key++ << 24;
68
+ k = (uint64_t)*key++;
69
+ k |= (uint64_t)*key++ << 8;
70
+ k |= (uint64_t)*key++ << 16;
71
+ k |= (uint64_t)*key++ << 24;
63
72
 
64
73
  k *= M;
65
74
  k ^= k >> 24;
@@ -83,105 +92,235 @@ static uint32_t hash_calc(const uint8_t *key, size_t len) {
83
92
  return h;
84
93
  }
85
94
 
86
- Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark) {
87
- Cache c = ALLOC(struct _cache);
88
- int shift = 0;
89
-
90
- for (; REHASH_LIMIT < size; size /= 2, shift++) {
91
- }
92
- if (shift < MIN_SHIFT) {
93
- shift = MIN_SHIFT;
94
- }
95
- c->size = 1 << shift;
96
- c->mask = c->size - 1;
97
- c->slots = ALLOC_N(Slot, c->size);
98
- memset(c->slots, 0, sizeof(Slot) * c->size);
99
- c->form = form;
100
- c->cnt = 0;
101
- c->mark = mark;
102
-
103
- return c;
104
- }
105
-
106
95
  static void rehash(Cache c) {
107
- uint32_t osize = c->size;
96
+ uint64_t osize;
97
+ Slot * end;
98
+ Slot * sp;
108
99
 
109
- c->size = osize * 4;
110
- c->mask = c->size - 1;
111
- REALLOC_N(c->slots, Slot, c->size);
112
- memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);
113
-
114
- Slot *end = c->slots + osize;
115
- for (Slot *sp = c->slots; sp < end; sp++) {
100
+ osize = c->size;
101
+ c->size = osize * 4;
102
+ c->mask = c->size - 1;
103
+ c->slots = realloc((void *)c->slots, sizeof(Slot) * c->size);
104
+ memset((Slot *)c->slots + osize, 0, sizeof(Slot) * osize * 3);
105
+ end = (Slot *)c->slots + osize;
106
+ for (sp = (Slot *)c->slots; sp < end; sp++) {
116
107
  Slot s = *sp;
117
108
  Slot next = NULL;
118
109
 
119
110
  *sp = NULL;
120
111
  for (; NULL != s; s = next) {
121
- next = s->next;
122
-
123
- uint32_t h = s->hash & c->mask;
124
- Slot * bucket = c->slots + h;
112
+ uint64_t h = s->hash & c->mask;
113
+ Slot * bucket = (Slot *)c->slots + h;
125
114
 
115
+ next = s->next;
126
116
  s->next = *bucket;
127
117
  *bucket = s;
128
118
  }
129
119
  }
130
120
  }
131
121
 
132
- void cache_free(Cache c) {
133
- for (uint32_t i = 0; i < c->size; i++) {
134
- Slot next;
135
- for (Slot s = c->slots[i]; NULL != s; s = next) {
136
- next = s->next;
137
- xfree(s);
122
+ static VALUE lockless_intern(Cache c, const char *key, size_t len) {
123
+ uint64_t h = hash_calc((const uint8_t *)key, len);
124
+ Slot * bucket = (Slot *)c->slots + (h & c->mask);
125
+ Slot b;
126
+ volatile VALUE rkey;
127
+
128
+ while (REUSE_MAX < c->rcnt) {
129
+ if (NULL != (b = c->reuse)) {
130
+ c->reuse = b->next;
131
+ free(b);
132
+ c->rcnt--;
133
+ } else {
134
+ // An accounting error occured somewhere so correct it.
135
+ c->rcnt = 0;
138
136
  }
139
137
  }
140
- xfree(c->slots);
141
- xfree(c);
142
- }
143
-
144
- void cache_mark(Cache c) {
145
- if (c->mark) {
146
- for (uint32_t i = 0; i < c->size; i++) {
147
- for (Slot s = c->slots[i]; NULL != s; s = s->next) {
148
- rb_gc_mark(s->val);
149
- }
138
+ for (b = *bucket; NULL != b; b = b->next) {
139
+ if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
140
+ b->use_cnt += 16;
141
+ return b->val;
150
142
  }
151
143
  }
144
+ rkey = c->form(key, len);
145
+ if (NULL == (b = c->reuse)) {
146
+ b = calloc(1, sizeof(struct _slot));
147
+ } else {
148
+ c->reuse = b->next;
149
+ c->rcnt--;
150
+ }
151
+ b->hash = h;
152
+ memcpy(b->key, key, len);
153
+ b->klen = (uint8_t)len;
154
+ b->key[len] = '\0';
155
+ b->val = rkey;
156
+ b->use_cnt = 4;
157
+ b->next = *bucket;
158
+ *bucket = b;
159
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
160
+ if (REHASH_LIMIT < c->cnt / c->size) {
161
+ rehash(c);
162
+ }
163
+ return rkey;
152
164
  }
153
165
 
154
- VALUE
155
- cache_intern(Cache c, const char *key, size_t len) {
156
- if (CACHE_MAX_KEY < len) {
157
- return c->form(key, len);
158
- }
159
- uint32_t h = hash_calc((const uint8_t *)key, len);
160
- Slot * bucket = c->slots + (h & c->mask);
161
- Slot b;
162
- Slot tail = NULL;
166
+ static VALUE locking_intern(Cache c, const char *key, size_t len) {
167
+ uint64_t h;
168
+ Slot * bucket;
169
+ Slot b;
170
+ uint64_t old_size;
171
+ volatile VALUE rkey;
163
172
 
173
+ CACHE_LOCK(c);
174
+ while (REUSE_MAX < c->rcnt) {
175
+ if (NULL != (b = c->reuse)) {
176
+ c->reuse = b->next;
177
+ free(b);
178
+ c->rcnt--;
179
+ } else {
180
+ // An accounting error occured somewhere so correct it.
181
+ c->rcnt = 0;
182
+ }
183
+ }
184
+ h = hash_calc((const uint8_t *)key, len);
185
+ bucket = (Slot *)c->slots + (h & c->mask);
164
186
  for (b = *bucket; NULL != b; b = b->next) {
165
187
  if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
188
+ b->use_cnt += 4;
189
+ CACHE_UNLOCK(c);
190
+
166
191
  return b->val;
167
192
  }
168
- tail = b;
169
193
  }
170
- b = ALLOC(struct _slot);
194
+ old_size = c->size;
195
+ // The creation of a new value may trigger a GC which be a problem if the
196
+ // cache is locked so make sure it is unlocked for the key value creation.
197
+ if (NULL != (b = c->reuse)) {
198
+ c->reuse = b->next;
199
+ c->rcnt--;
200
+ }
201
+ CACHE_UNLOCK(c);
202
+ if (NULL == b) {
203
+ b = calloc(1, sizeof(struct _slot));
204
+ }
205
+ rkey = c->form(key, len);
171
206
  b->hash = h;
172
- b->next = NULL;
173
207
  memcpy(b->key, key, len);
174
208
  b->klen = (uint8_t)len;
175
209
  b->key[len] = '\0';
176
- b->val = c->form(key, len);
177
- if (NULL == tail) {
178
- *bucket = b;
179
- } else {
180
- tail->next = b;
210
+ b->val = rkey;
211
+ b->use_cnt = 16;
212
+
213
+ // Lock again to add the new entry.
214
+ CACHE_LOCK(c);
215
+ if (old_size != c->size) {
216
+ h = hash_calc((const uint8_t *)key, len);
217
+ bucket = (Slot *)c->slots + (h & c->mask);
181
218
  }
182
- c->cnt++;
219
+ b->next = *bucket;
220
+ *bucket = b;
221
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
183
222
  if (REHASH_LIMIT < c->cnt / c->size) {
184
223
  rehash(c);
185
224
  }
186
- return b->val;
225
+ CACHE_UNLOCK(c);
226
+
227
+ return rkey;
228
+ }
229
+
230
+ Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking) {
231
+ Cache c = calloc(1, sizeof(struct _cache));
232
+ int shift = 0;
233
+
234
+ for (; REHASH_LIMIT < size; size /= 2, shift++) {
235
+ }
236
+ if (shift < MIN_SHIFT) {
237
+ shift = MIN_SHIFT;
238
+ }
239
+ #if HAVE_PTHREAD_MUTEX_INIT
240
+ pthread_mutex_init(&c->mutex, NULL);
241
+ #else
242
+ c->mutex = rb_mutex_new();
243
+ #endif
244
+ c->size = 1 << shift;
245
+ c->mask = c->size - 1;
246
+ c->slots = calloc(c->size, sizeof(Slot));
247
+ c->form = form;
248
+ c->xrate = 1; // low
249
+ c->mark = mark;
250
+ if (locking) {
251
+ c->intern = locking_intern;
252
+ } else {
253
+ c->intern = lockless_intern;
254
+ }
255
+ return c;
256
+ }
257
+
258
+ void cache_set_expunge_rate(Cache c, int rate) {
259
+ c->xrate = (uint8_t)rate;
260
+ }
261
+
262
+ void cache_free(Cache c) {
263
+ uint64_t i;
264
+
265
+ for (i = 0; i < c->size; i++) {
266
+ Slot next;
267
+ Slot s;
268
+
269
+ for (s = c->slots[i]; NULL != s; s = next) {
270
+ next = s->next;
271
+ free(s);
272
+ }
273
+ }
274
+ free((void *)c->slots);
275
+ free(c);
276
+ }
277
+
278
+ void cache_mark(Cache c) {
279
+ uint64_t i;
280
+
281
+ #if !HAVE_PTHREAD_MUTEX_INIT
282
+ rb_gc_mark(c->mutex);
283
+ #endif
284
+ if (0 == c->cnt) {
285
+ return;
286
+ }
287
+ for (i = 0; i < c->size; i++) {
288
+ Slot s;
289
+ Slot prev = NULL;
290
+ Slot next;
291
+
292
+ for (s = c->slots[i]; NULL != s; s = next) {
293
+ next = s->next;
294
+ if (0 == s->use_cnt) {
295
+ if (NULL == prev) {
296
+ c->slots[i] = next;
297
+ } else {
298
+ prev->next = next;
299
+ }
300
+ c->cnt--;
301
+ s->next = c->reuse;
302
+ c->reuse = s;
303
+ c->rcnt++;
304
+ continue;
305
+ }
306
+ switch (c->xrate) {
307
+ case 0: break;
308
+ case 2: s->use_cnt -= 2; break;
309
+ case 3: s->use_cnt /= 2; break;
310
+ default: s->use_cnt--; break;
311
+ }
312
+ if (c->mark) {
313
+ rb_gc_mark(s->val);
314
+ }
315
+ prev = s;
316
+ }
317
+ }
318
+ }
319
+
320
+ VALUE
321
+ cache_intern(Cache c, const char *key, size_t len) {
322
+ if (CACHE_MAX_KEY <= len) {
323
+ return c->form(key, len);
324
+ }
325
+ return c->intern(c, key, len);
187
326
  }
data/ext/oj/cache.h CHANGED
@@ -11,10 +11,11 @@
11
11
 
12
12
  struct _cache;
13
13
 
14
- extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark);
14
+ extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking);
15
15
  extern void cache_free(struct _cache *c);
16
16
  extern void cache_mark(struct _cache *c);
17
17
  extern void cache_set_form(struct _cache *c, VALUE (*form)(const char *str, size_t len));
18
18
  extern VALUE cache_intern(struct _cache *c, const char *key, size_t len);
19
+ extern void cache_set_expunge_rate(struct _cache *c, int rate);
19
20
 
20
21
  #endif /* CACHE_H */
data/ext/oj/compat.c CHANGED
@@ -30,8 +30,7 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
30
30
  if (Yes == pi->options.sym_key) {
31
31
  rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
32
32
  } else {
33
- rkey = rb_str_new(key, klen);
34
- rkey = oj_encode(rkey);
33
+ rkey = rb_utf8_str_new(key, klen);
35
34
  }
36
35
  } else if (Yes == pi->options.sym_key) {
37
36
  rkey = oj_sym_intern(key, klen);