oj 3.13.0 → 3.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -53,7 +53,7 @@ For more details on options, modes, advanced features, and more follow these
53
53
  links.
54
54
 
55
55
  - [{file:Options.md}](pages/Options.md) for parse and dump options.
56
- - [{file:Modes.md}](pages/Modes.md) for details on modes for strict JSON compliance, mimicing the JSON gem, and mimicing Rails and ActiveSupport behavior.
56
+ - [{file:Modes.md}](pages/Modes.md) for details on modes for strict JSON compliance, mimicking the JSON gem, and mimicking Rails and ActiveSupport behavior.
57
57
  - [{file:JsonGem.md}](pages/JsonGem.md) includes more details on json gem compatibility and use.
58
58
  - [{file:Rails.md}](pages/Rails.md) includes more details on Rails and ActiveSupport compatibility and use.
59
59
  - [{file:Custom.md}](pages/Custom.md) includes more details on Custom mode.
data/ext/oj/cache.c CHANGED
@@ -1,65 +1,74 @@
1
1
  // Copyright (c) 2011, 2021 Peter Ohler. All rights reserved.
2
2
  // Licensed under the MIT License. See LICENSE file in the project root for license details.
3
3
 
4
+ #if HAVE_PTHREAD_MUTEX_INIT
5
+ #include <pthread.h>
6
+ #endif
7
+ #include <stdlib.h>
8
+
4
9
  #include "cache.h"
5
10
 
6
- #define REHASH_LIMIT 64
11
+ // The stdlib calloc, realloc, and free are used instead of the Ruby ALLOC,
12
+ // ALLOC_N, REALLOC, and xfree since the later could trigger a GC which will
13
+ // either corrupt memory or if the mark function locks will deadlock.
14
+
15
+ #define REHASH_LIMIT 4
7
16
  #define MIN_SHIFT 8
17
+ #define REUSE_MAX 8192
18
+
19
+ #if HAVE_PTHREAD_MUTEX_INIT
20
+ #define CACHE_LOCK(c) pthread_mutex_lock(&((c)->mutex))
21
+ #define CACHE_UNLOCK(c) pthread_mutex_unlock(&((c)->mutex))
22
+ #else
23
+ #define CACHE_LOCK(c) rb_mutex_lock((c)->mutex)
24
+ #define CACHE_UNLOCK(c) rb_mutex_unlock((c)->mutex)
25
+ #endif
26
+
27
+ // almost the Murmur hash algorithm
28
+ #define M 0x5bd1e995
8
29
 
9
30
  typedef struct _slot {
10
- struct _slot *next;
11
- VALUE val;
12
- uint32_t hash;
13
- uint8_t klen;
14
- char key[CACHE_MAX_KEY];
31
+ struct _slot * next;
32
+ VALUE val;
33
+ uint64_t hash;
34
+ volatile uint32_t use_cnt;
35
+ uint8_t klen;
36
+ char key[CACHE_MAX_KEY];
15
37
  } * Slot;
16
38
 
17
39
  typedef struct _cache {
18
- Slot * slots;
19
- size_t cnt;
40
+ volatile Slot * slots;
41
+ volatile size_t cnt;
20
42
  VALUE (*form)(const char *str, size_t len);
21
- uint32_t size;
22
- uint32_t mask;
23
- bool mark;
43
+ uint64_t size;
44
+ uint64_t mask;
45
+ VALUE (*intern)(struct _cache *c, const char *key, size_t len);
46
+ volatile Slot reuse;
47
+ size_t rcnt;
48
+ #if HAVE_PTHREAD_MUTEX_INIT
49
+ pthread_mutex_t mutex;
50
+ #else
51
+ VALUE mutex;
52
+ #endif
53
+ uint8_t xrate;
54
+ bool mark;
24
55
  } * Cache;
25
56
 
26
- // almost the Murmur hash algorithm
27
- #define M 0x5bd1e995
28
- #define C1 0xCC9E2D51
29
- #define C2 0x1B873593
30
- #define N 0xE6546B64
31
-
32
57
  void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
33
58
  c->form = form;
34
59
  }
35
60
 
36
- #if 0
37
- // For debugging only.
38
- static void cache_print(Cache c) {
39
- for (uint32_t i = 0; i < c->size; i++) {
40
- printf("%4d:", i);
41
- for (Slot s = c->slots[i]; NULL != s; s = s->next) {
42
- char buf[40];
43
- strncpy(buf, s->key, s->klen);
44
- buf[s->klen] = '\0';
45
- printf(" %s", buf);
46
- }
47
- printf("\n");
48
- }
49
- }
50
- #endif
51
-
52
- static uint32_t hash_calc(const uint8_t *key, size_t len) {
61
+ static uint64_t hash_calc(const uint8_t *key, size_t len) {
53
62
  const uint8_t *end = key + len;
54
63
  const uint8_t *endless = key + (len & 0xFFFFFFFC);
55
- uint32_t h = (uint32_t)len;
56
- uint32_t k;
64
+ uint64_t h = (uint64_t)len;
65
+ uint64_t k;
57
66
 
58
67
  while (key < endless) {
59
- k = (uint32_t)*key++;
60
- k |= (uint32_t)*key++ << 8;
61
- k |= (uint32_t)*key++ << 16;
62
- k |= (uint32_t)*key++ << 24;
68
+ k = (uint64_t)*key++;
69
+ k |= (uint64_t)*key++ << 8;
70
+ k |= (uint64_t)*key++ << 16;
71
+ k |= (uint64_t)*key++ << 24;
63
72
 
64
73
  k *= M;
65
74
  k ^= k >> 24;
@@ -83,105 +92,235 @@ static uint32_t hash_calc(const uint8_t *key, size_t len) {
83
92
  return h;
84
93
  }
85
94
 
86
- Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark) {
87
- Cache c = ALLOC(struct _cache);
88
- int shift = 0;
89
-
90
- for (; REHASH_LIMIT < size; size /= 2, shift++) {
91
- }
92
- if (shift < MIN_SHIFT) {
93
- shift = MIN_SHIFT;
94
- }
95
- c->size = 1 << shift;
96
- c->mask = c->size - 1;
97
- c->slots = ALLOC_N(Slot, c->size);
98
- memset(c->slots, 0, sizeof(Slot) * c->size);
99
- c->form = form;
100
- c->cnt = 0;
101
- c->mark = mark;
102
-
103
- return c;
104
- }
105
-
106
95
  static void rehash(Cache c) {
107
- uint32_t osize = c->size;
96
+ uint64_t osize;
97
+ Slot * end;
98
+ Slot * sp;
108
99
 
109
- c->size = osize * 4;
110
- c->mask = c->size - 1;
111
- REALLOC_N(c->slots, Slot, c->size);
112
- memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);
113
-
114
- Slot *end = c->slots + osize;
115
- for (Slot *sp = c->slots; sp < end; sp++) {
100
+ osize = c->size;
101
+ c->size = osize * 4;
102
+ c->mask = c->size - 1;
103
+ c->slots = realloc((void *)c->slots, sizeof(Slot) * c->size);
104
+ memset((Slot *)c->slots + osize, 0, sizeof(Slot) * osize * 3);
105
+ end = (Slot *)c->slots + osize;
106
+ for (sp = (Slot *)c->slots; sp < end; sp++) {
116
107
  Slot s = *sp;
117
108
  Slot next = NULL;
118
109
 
119
110
  *sp = NULL;
120
111
  for (; NULL != s; s = next) {
121
- next = s->next;
122
-
123
- uint32_t h = s->hash & c->mask;
124
- Slot * bucket = c->slots + h;
112
+ uint64_t h = s->hash & c->mask;
113
+ Slot * bucket = (Slot *)c->slots + h;
125
114
 
115
+ next = s->next;
126
116
  s->next = *bucket;
127
117
  *bucket = s;
128
118
  }
129
119
  }
130
120
  }
131
121
 
132
- void cache_free(Cache c) {
133
- for (uint32_t i = 0; i < c->size; i++) {
134
- Slot next;
135
- for (Slot s = c->slots[i]; NULL != s; s = next) {
136
- next = s->next;
137
- xfree(s);
122
+ static VALUE lockless_intern(Cache c, const char *key, size_t len) {
123
+ uint64_t h = hash_calc((const uint8_t *)key, len);
124
+ Slot * bucket = (Slot *)c->slots + (h & c->mask);
125
+ Slot b;
126
+ volatile VALUE rkey;
127
+
128
+ while (REUSE_MAX < c->rcnt) {
129
+ if (NULL != (b = c->reuse)) {
130
+ c->reuse = b->next;
131
+ free(b);
132
+ c->rcnt--;
133
+ } else {
134
+ // An accounting error occured somewhere so correct it.
135
+ c->rcnt = 0;
138
136
  }
139
137
  }
140
- xfree(c->slots);
141
- xfree(c);
142
- }
143
-
144
- void cache_mark(Cache c) {
145
- if (c->mark) {
146
- for (uint32_t i = 0; i < c->size; i++) {
147
- for (Slot s = c->slots[i]; NULL != s; s = s->next) {
148
- rb_gc_mark(s->val);
149
- }
138
+ for (b = *bucket; NULL != b; b = b->next) {
139
+ if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
140
+ b->use_cnt += 16;
141
+ return b->val;
150
142
  }
151
143
  }
144
+ rkey = c->form(key, len);
145
+ if (NULL == (b = c->reuse)) {
146
+ b = calloc(1, sizeof(struct _slot));
147
+ } else {
148
+ c->reuse = b->next;
149
+ c->rcnt--;
150
+ }
151
+ b->hash = h;
152
+ memcpy(b->key, key, len);
153
+ b->klen = (uint8_t)len;
154
+ b->key[len] = '\0';
155
+ b->val = rkey;
156
+ b->use_cnt = 4;
157
+ b->next = *bucket;
158
+ *bucket = b;
159
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
160
+ if (REHASH_LIMIT < c->cnt / c->size) {
161
+ rehash(c);
162
+ }
163
+ return rkey;
152
164
  }
153
165
 
154
- VALUE
155
- cache_intern(Cache c, const char *key, size_t len) {
156
- if (CACHE_MAX_KEY < len) {
157
- return c->form(key, len);
158
- }
159
- uint32_t h = hash_calc((const uint8_t *)key, len);
160
- Slot * bucket = c->slots + (h & c->mask);
161
- Slot b;
162
- Slot tail = NULL;
166
+ static VALUE locking_intern(Cache c, const char *key, size_t len) {
167
+ uint64_t h;
168
+ Slot * bucket;
169
+ Slot b;
170
+ uint64_t old_size;
171
+ volatile VALUE rkey;
163
172
 
173
+ CACHE_LOCK(c);
174
+ while (REUSE_MAX < c->rcnt) {
175
+ if (NULL != (b = c->reuse)) {
176
+ c->reuse = b->next;
177
+ free(b);
178
+ c->rcnt--;
179
+ } else {
180
+ // An accounting error occured somewhere so correct it.
181
+ c->rcnt = 0;
182
+ }
183
+ }
184
+ h = hash_calc((const uint8_t *)key, len);
185
+ bucket = (Slot *)c->slots + (h & c->mask);
164
186
  for (b = *bucket; NULL != b; b = b->next) {
165
187
  if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
188
+ b->use_cnt += 4;
189
+ CACHE_UNLOCK(c);
190
+
166
191
  return b->val;
167
192
  }
168
- tail = b;
169
193
  }
170
- b = ALLOC(struct _slot);
194
+ old_size = c->size;
195
+ // The creation of a new value may trigger a GC which be a problem if the
196
+ // cache is locked so make sure it is unlocked for the key value creation.
197
+ if (NULL != (b = c->reuse)) {
198
+ c->reuse = b->next;
199
+ c->rcnt--;
200
+ }
201
+ CACHE_UNLOCK(c);
202
+ if (NULL == b) {
203
+ b = calloc(1, sizeof(struct _slot));
204
+ }
205
+ rkey = c->form(key, len);
171
206
  b->hash = h;
172
- b->next = NULL;
173
207
  memcpy(b->key, key, len);
174
208
  b->klen = (uint8_t)len;
175
209
  b->key[len] = '\0';
176
- b->val = c->form(key, len);
177
- if (NULL == tail) {
178
- *bucket = b;
179
- } else {
180
- tail->next = b;
210
+ b->val = rkey;
211
+ b->use_cnt = 16;
212
+
213
+ // Lock again to add the new entry.
214
+ CACHE_LOCK(c);
215
+ if (old_size != c->size) {
216
+ h = hash_calc((const uint8_t *)key, len);
217
+ bucket = (Slot *)c->slots + (h & c->mask);
181
218
  }
182
- c->cnt++;
219
+ b->next = *bucket;
220
+ *bucket = b;
221
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
183
222
  if (REHASH_LIMIT < c->cnt / c->size) {
184
223
  rehash(c);
185
224
  }
186
- return b->val;
225
+ CACHE_UNLOCK(c);
226
+
227
+ return rkey;
228
+ }
229
+
230
+ Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking) {
231
+ Cache c = calloc(1, sizeof(struct _cache));
232
+ int shift = 0;
233
+
234
+ for (; REHASH_LIMIT < size; size /= 2, shift++) {
235
+ }
236
+ if (shift < MIN_SHIFT) {
237
+ shift = MIN_SHIFT;
238
+ }
239
+ #if HAVE_PTHREAD_MUTEX_INIT
240
+ pthread_mutex_init(&c->mutex, NULL);
241
+ #else
242
+ c->mutex = rb_mutex_new();
243
+ #endif
244
+ c->size = 1 << shift;
245
+ c->mask = c->size - 1;
246
+ c->slots = calloc(c->size, sizeof(Slot));
247
+ c->form = form;
248
+ c->xrate = 1; // low
249
+ c->mark = mark;
250
+ if (locking) {
251
+ c->intern = locking_intern;
252
+ } else {
253
+ c->intern = lockless_intern;
254
+ }
255
+ return c;
256
+ }
257
+
258
+ void cache_set_expunge_rate(Cache c, int rate) {
259
+ c->xrate = (uint8_t)rate;
260
+ }
261
+
262
+ void cache_free(Cache c) {
263
+ uint64_t i;
264
+
265
+ for (i = 0; i < c->size; i++) {
266
+ Slot next;
267
+ Slot s;
268
+
269
+ for (s = c->slots[i]; NULL != s; s = next) {
270
+ next = s->next;
271
+ free(s);
272
+ }
273
+ }
274
+ free((void *)c->slots);
275
+ free(c);
276
+ }
277
+
278
+ void cache_mark(Cache c) {
279
+ uint64_t i;
280
+
281
+ #if !HAVE_PTHREAD_MUTEX_INIT
282
+ rb_gc_mark(c->mutex);
283
+ #endif
284
+ if (0 == c->cnt) {
285
+ return;
286
+ }
287
+ for (i = 0; i < c->size; i++) {
288
+ Slot s;
289
+ Slot prev = NULL;
290
+ Slot next;
291
+
292
+ for (s = c->slots[i]; NULL != s; s = next) {
293
+ next = s->next;
294
+ if (0 == s->use_cnt) {
295
+ if (NULL == prev) {
296
+ c->slots[i] = next;
297
+ } else {
298
+ prev->next = next;
299
+ }
300
+ c->cnt--;
301
+ s->next = c->reuse;
302
+ c->reuse = s;
303
+ c->rcnt++;
304
+ continue;
305
+ }
306
+ switch (c->xrate) {
307
+ case 0: break;
308
+ case 2: s->use_cnt -= 2; break;
309
+ case 3: s->use_cnt /= 2; break;
310
+ default: s->use_cnt--; break;
311
+ }
312
+ if (c->mark) {
313
+ rb_gc_mark(s->val);
314
+ }
315
+ prev = s;
316
+ }
317
+ }
318
+ }
319
+
320
+ VALUE
321
+ cache_intern(Cache c, const char *key, size_t len) {
322
+ if (CACHE_MAX_KEY <= len) {
323
+ return c->form(key, len);
324
+ }
325
+ return c->intern(c, key, len);
187
326
  }
data/ext/oj/cache.h CHANGED
@@ -11,10 +11,11 @@
11
11
 
12
12
  struct _cache;
13
13
 
14
- extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark);
14
+ extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking);
15
15
  extern void cache_free(struct _cache *c);
16
16
  extern void cache_mark(struct _cache *c);
17
17
  extern void cache_set_form(struct _cache *c, VALUE (*form)(const char *str, size_t len));
18
18
  extern VALUE cache_intern(struct _cache *c, const char *key, size_t len);
19
+ extern void cache_set_expunge_rate(struct _cache *c, int rate);
19
20
 
20
21
  #endif /* CACHE_H */
data/ext/oj/compat.c CHANGED
@@ -30,8 +30,7 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
30
30
  if (Yes == pi->options.sym_key) {
31
31
  rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
32
32
  } else {
33
- rkey = rb_str_new(key, klen);
34
- rkey = oj_encode(rkey);
33
+ rkey = rb_utf8_str_new(key, klen);
35
34
  }
36
35
  } else if (Yes == pi->options.sym_key) {
37
36
  rkey = oj_sym_intern(key, klen);