oj 3.13.2 → 3.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/oj/cache.c +224 -76
- data/ext/oj/cache.h +2 -1
- data/ext/oj/compat.c +1 -2
- data/ext/oj/custom.c +3 -6
- data/ext/oj/extconf.rb +1 -0
- data/ext/oj/intern.c +101 -218
- data/ext/oj/intern.h +0 -1
- data/ext/oj/mimic_json.c +2 -2
- data/ext/oj/object.c +10 -39
- data/ext/oj/oj.c +3 -3
- data/ext/oj/parser.c +94 -123
- data/ext/oj/saj2.c +3 -3
- data/ext/oj/strict.c +1 -2
- data/ext/oj/usual.c +40 -16
- data/ext/oj/wab.c +6 -3
- data/lib/oj/state.rb +8 -7
- data/lib/oj/version.rb +1 -1
- data/test/mem.rb +33 -0
- data/test/perf_once.rb +58 -0
- data/test/perf_parser.rb +6 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5350547ea8224c68aefa0d8f1539689ffb24c9addc20b3cbad3537af06b9f45
|
4
|
+
data.tar.gz: d27f8a3fa9311685f215f1636f7ebaf4ed3b10d03725531f8b42c4d1e64b0985
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e3497f0ac6fb20ae4b0aa252855120f2a3e938bbd703dfcebc1d433ea112924b24e3e352080b3f7c09c5cae99d57dde5f26659031126bcb6855d85761e282b1e
|
7
|
+
data.tar.gz: c35a5190c600bcd237041cd30cbe227700fc6aed723fb2d54f13013b4af6f79b96b1c5c2dfbcb4dc9c9db30b2894d97c6242a9b0183376da2c2aaa9fb08d4f0a
|
data/ext/oj/cache.c
CHANGED
@@ -1,15 +1,32 @@
|
|
1
1
|
// Copyright (c) 2011, 2021 Peter Ohler. All rights reserved.
|
2
2
|
// Licensed under the MIT License. See LICENSE file in the project root for license details.
|
3
3
|
|
4
|
+
#if HAVE_PTHREAD_MUTEX_INIT
|
5
|
+
#include <pthread.h>
|
6
|
+
#endif
|
7
|
+
|
4
8
|
#include "cache.h"
|
5
9
|
|
6
|
-
#define REHASH_LIMIT
|
10
|
+
#define REHASH_LIMIT 4
|
7
11
|
#define MIN_SHIFT 8
|
12
|
+
#define REUSE_MAX 8192
|
13
|
+
|
14
|
+
#if HAVE_PTHREAD_MUTEX_INIT
|
15
|
+
#define CACHE_LOCK(c) pthread_mutex_lock(&((c)->mutex))
|
16
|
+
#define CACHE_UNLOCK(c) pthread_mutex_unlock(&((c)->mutex))
|
17
|
+
#else
|
18
|
+
#define CACHE_LOCK(c) rb_mutex_lock((c)->mutex)
|
19
|
+
#define CACHE_UNLOCK(c) rb_mutex_unlock((c)->mutex)
|
20
|
+
#endif
|
21
|
+
|
22
|
+
// almost the Murmur hash algorithm
|
23
|
+
#define M 0x5bd1e995
|
8
24
|
|
9
25
|
typedef struct _slot {
|
10
26
|
struct _slot *next;
|
11
27
|
VALUE val;
|
12
|
-
|
28
|
+
uint64_t hash;
|
29
|
+
uint32_t use_cnt;
|
13
30
|
uint8_t klen;
|
14
31
|
char key[CACHE_MAX_KEY];
|
15
32
|
} * Slot;
|
@@ -18,17 +35,20 @@ typedef struct _cache {
|
|
18
35
|
Slot * slots;
|
19
36
|
size_t cnt;
|
20
37
|
VALUE (*form)(const char *str, size_t len);
|
21
|
-
|
22
|
-
|
23
|
-
|
38
|
+
uint64_t size;
|
39
|
+
uint64_t mask;
|
40
|
+
VALUE (*intern)(struct _cache *c, const char *key, size_t len);
|
41
|
+
Slot reuse;
|
42
|
+
size_t rcnt;
|
43
|
+
#if HAVE_PTHREAD_MUTEX_INIT
|
44
|
+
pthread_mutex_t mutex;
|
45
|
+
#else
|
46
|
+
VALUE mutex;
|
47
|
+
#endif
|
48
|
+
uint8_t xrate;
|
49
|
+
bool mark;
|
24
50
|
} * Cache;
|
25
51
|
|
26
|
-
// almost the Murmur hash algorithm
|
27
|
-
#define M 0x5bd1e995
|
28
|
-
#define C1 0xCC9E2D51
|
29
|
-
#define C2 0x1B873593
|
30
|
-
#define N 0xE6546B64
|
31
|
-
|
32
52
|
void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
|
33
53
|
c->form = form;
|
34
54
|
}
|
@@ -36,7 +56,7 @@ void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
|
|
36
56
|
#if 0
|
37
57
|
// For debugging only.
|
38
58
|
static void cache_print(Cache c) {
|
39
|
-
for (
|
59
|
+
for (uint64_t i = 0; i < c->size; i++) {
|
40
60
|
printf("%4d:", i);
|
41
61
|
for (Slot s = c->slots[i]; NULL != s; s = s->next) {
|
42
62
|
char buf[40];
|
@@ -49,17 +69,17 @@ static void cache_print(Cache c) {
|
|
49
69
|
}
|
50
70
|
#endif
|
51
71
|
|
52
|
-
static
|
72
|
+
static uint64_t hash_calc(const uint8_t *key, size_t len) {
|
53
73
|
const uint8_t *end = key + len;
|
54
74
|
const uint8_t *endless = key + (len & 0xFFFFFFFC);
|
55
|
-
|
56
|
-
|
75
|
+
uint64_t h = (uint64_t)len;
|
76
|
+
uint64_t k;
|
57
77
|
|
58
78
|
while (key < endless) {
|
59
|
-
k = (
|
60
|
-
k |= (
|
61
|
-
k |= (
|
62
|
-
k |= (
|
79
|
+
k = (uint64_t)*key++;
|
80
|
+
k |= (uint64_t)*key++ << 8;
|
81
|
+
k |= (uint64_t)*key++ << 16;
|
82
|
+
k |= (uint64_t)*key++ << 24;
|
63
83
|
|
64
84
|
k *= M;
|
65
85
|
k ^= k >> 24;
|
@@ -83,42 +103,23 @@ static uint32_t hash_calc(const uint8_t *key, size_t len) {
|
|
83
103
|
return h;
|
84
104
|
}
|
85
105
|
|
86
|
-
Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark) {
|
87
|
-
Cache c = ALLOC(struct _cache);
|
88
|
-
int shift = 0;
|
89
|
-
|
90
|
-
for (; REHASH_LIMIT < size; size /= 2, shift++) {
|
91
|
-
}
|
92
|
-
if (shift < MIN_SHIFT) {
|
93
|
-
shift = MIN_SHIFT;
|
94
|
-
}
|
95
|
-
c->size = 1 << shift;
|
96
|
-
c->mask = c->size - 1;
|
97
|
-
c->slots = ALLOC_N(Slot, c->size);
|
98
|
-
memset(c->slots, 0, sizeof(Slot) * c->size);
|
99
|
-
c->form = form;
|
100
|
-
c->cnt = 0;
|
101
|
-
c->mark = mark;
|
102
|
-
|
103
|
-
return c;
|
104
|
-
}
|
105
|
-
|
106
106
|
static void rehash(Cache c) {
|
107
|
-
|
108
|
-
Slot * end
|
107
|
+
uint64_t osize = c->size;
|
108
|
+
Slot * end;
|
109
109
|
Slot * sp;
|
110
110
|
|
111
111
|
c->size = osize * 4;
|
112
112
|
c->mask = c->size - 1;
|
113
113
|
REALLOC_N(c->slots, Slot, c->size);
|
114
114
|
memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);
|
115
|
+
end = c->slots + osize;
|
115
116
|
for (sp = c->slots; sp < end; sp++) {
|
116
117
|
Slot s = *sp;
|
117
118
|
Slot next = NULL;
|
118
119
|
|
119
120
|
*sp = NULL;
|
120
121
|
for (; NULL != s; s = next) {
|
121
|
-
|
122
|
+
uint64_t h = s->hash & c->mask;
|
122
123
|
Slot * bucket = c->slots + h;
|
123
124
|
|
124
125
|
next = s->next;
|
@@ -128,8 +129,153 @@ static void rehash(Cache c) {
|
|
128
129
|
}
|
129
130
|
}
|
130
131
|
|
132
|
+
static VALUE lockless_intern(Cache c, const char *key, size_t len) {
|
133
|
+
uint64_t h = hash_calc((const uint8_t *)key, len);
|
134
|
+
Slot * bucket = c->slots + (h & c->mask);
|
135
|
+
Slot b;
|
136
|
+
|
137
|
+
while (REUSE_MAX < c->rcnt) {
|
138
|
+
if (NULL != (b = c->reuse)) {
|
139
|
+
c->reuse = b->next;
|
140
|
+
xfree(b);
|
141
|
+
c->rcnt--;
|
142
|
+
} else {
|
143
|
+
// An accounting error occured somewhere so correct it.
|
144
|
+
c->rcnt = 0;
|
145
|
+
}
|
146
|
+
}
|
147
|
+
for (b = *bucket; NULL != b; b = b->next) {
|
148
|
+
if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
|
149
|
+
b->use_cnt += 4;
|
150
|
+
return b->val;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
{
|
154
|
+
volatile VALUE rkey = c->form(key, len);
|
155
|
+
|
156
|
+
if (NULL == (b = c->reuse)) {
|
157
|
+
b = ALLOC(struct _slot);
|
158
|
+
} else {
|
159
|
+
c->reuse = b->next;
|
160
|
+
c->rcnt--;
|
161
|
+
}
|
162
|
+
b->hash = h;
|
163
|
+
memcpy(b->key, key, len);
|
164
|
+
b->klen = (uint8_t)len;
|
165
|
+
b->key[len] = '\0';
|
166
|
+
b->val = rkey;
|
167
|
+
b->use_cnt = 4;
|
168
|
+
b->next = *bucket;
|
169
|
+
*bucket = b;
|
170
|
+
c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
|
171
|
+
if (REHASH_LIMIT < c->cnt / c->size) {
|
172
|
+
rehash(c);
|
173
|
+
}
|
174
|
+
}
|
175
|
+
return b->val;
|
176
|
+
}
|
177
|
+
|
178
|
+
static VALUE locking_intern(Cache c, const char *key, size_t len) {
|
179
|
+
uint64_t h;
|
180
|
+
Slot * bucket;
|
181
|
+
Slot b;
|
182
|
+
uint64_t old_size;
|
183
|
+
|
184
|
+
CACHE_LOCK(c);
|
185
|
+
while (REUSE_MAX < c->rcnt) {
|
186
|
+
if (NULL != (b = c->reuse)) {
|
187
|
+
c->reuse = b->next;
|
188
|
+
xfree(b);
|
189
|
+
c->rcnt--;
|
190
|
+
} else {
|
191
|
+
// An accounting error occured somewhere so correct it.
|
192
|
+
c->rcnt = 0;
|
193
|
+
}
|
194
|
+
}
|
195
|
+
h = hash_calc((const uint8_t *)key, len);
|
196
|
+
bucket = c->slots + (h & c->mask);
|
197
|
+
for (b = *bucket; NULL != b; b = b->next) {
|
198
|
+
if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
|
199
|
+
b->use_cnt += 4;
|
200
|
+
CACHE_UNLOCK(c);
|
201
|
+
return b->val;
|
202
|
+
}
|
203
|
+
}
|
204
|
+
old_size = c->size;
|
205
|
+
// The creation of a new value may trigger a GC which be a problem if the
|
206
|
+
// cache is locked so make sure it is unlocked for the key value creation.
|
207
|
+
if (NULL == (b = c->reuse)) {
|
208
|
+
b = ALLOC(struct _slot);
|
209
|
+
} else {
|
210
|
+
c->reuse = b->next;
|
211
|
+
c->rcnt--;
|
212
|
+
}
|
213
|
+
CACHE_UNLOCK(c);
|
214
|
+
{
|
215
|
+
volatile VALUE rkey = c->form(key, len);
|
216
|
+
|
217
|
+
b->hash = h;
|
218
|
+
memcpy(b->key, key, len);
|
219
|
+
b->klen = (uint8_t)len;
|
220
|
+
b->key[len] = '\0';
|
221
|
+
b->val = rkey;
|
222
|
+
b->use_cnt = 4;
|
223
|
+
|
224
|
+
// Lock again to add the new entry.
|
225
|
+
CACHE_LOCK(c);
|
226
|
+
if (old_size != c->size) {
|
227
|
+
h = hash_calc((const uint8_t *)key, len);
|
228
|
+
bucket = c->slots + (h & c->mask);
|
229
|
+
}
|
230
|
+
b->next = *bucket;
|
231
|
+
*bucket = b;
|
232
|
+
c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
|
233
|
+
if (REHASH_LIMIT < c->cnt / c->size) {
|
234
|
+
rehash(c);
|
235
|
+
}
|
236
|
+
CACHE_UNLOCK(c);
|
237
|
+
}
|
238
|
+
return b->val;
|
239
|
+
}
|
240
|
+
|
241
|
+
Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking) {
|
242
|
+
Cache c = ALLOC(struct _cache);
|
243
|
+
int shift = 0;
|
244
|
+
|
245
|
+
for (; REHASH_LIMIT < size; size /= 2, shift++) {
|
246
|
+
}
|
247
|
+
if (shift < MIN_SHIFT) {
|
248
|
+
shift = MIN_SHIFT;
|
249
|
+
}
|
250
|
+
#if HAVE_PTHREAD_MUTEX_INIT
|
251
|
+
pthread_mutex_init(&c->mutex, NULL);
|
252
|
+
#else
|
253
|
+
c->mutex = rb_mutex_new();
|
254
|
+
#endif
|
255
|
+
c->size = 1 << shift;
|
256
|
+
c->mask = c->size - 1;
|
257
|
+
c->slots = ALLOC_N(Slot, c->size);
|
258
|
+
memset(c->slots, 0, sizeof(Slot) * c->size);
|
259
|
+
c->form = form;
|
260
|
+
c->cnt = 0;
|
261
|
+
c->xrate = 1; // low
|
262
|
+
c->mark = mark;
|
263
|
+
c->reuse = NULL;
|
264
|
+
c->rcnt = 0;
|
265
|
+
if (locking) {
|
266
|
+
c->intern = locking_intern;
|
267
|
+
} else {
|
268
|
+
c->intern = lockless_intern;
|
269
|
+
}
|
270
|
+
return c;
|
271
|
+
}
|
272
|
+
|
273
|
+
void cache_set_expunge_rate(Cache c, int rate) {
|
274
|
+
c->xrate = (uint8_t)rate;
|
275
|
+
}
|
276
|
+
|
131
277
|
void cache_free(Cache c) {
|
132
|
-
|
278
|
+
uint64_t i;
|
133
279
|
|
134
280
|
for (i = 0; i < c->size; i++) {
|
135
281
|
Slot next;
|
@@ -145,14 +291,43 @@ void cache_free(Cache c) {
|
|
145
291
|
}
|
146
292
|
|
147
293
|
void cache_mark(Cache c) {
|
148
|
-
|
149
|
-
uint32_t i;
|
294
|
+
uint64_t i;
|
150
295
|
|
151
|
-
|
152
|
-
|
153
|
-
|
296
|
+
#if !HAVE_PTHREAD_MUTEX_INIT
|
297
|
+
rb_gc_mark(c->mutex);
|
298
|
+
#endif
|
299
|
+
if (0 == c->cnt) {
|
300
|
+
return;
|
301
|
+
}
|
302
|
+
for (i = 0; i < c->size; i++) {
|
303
|
+
Slot s;
|
304
|
+
Slot prev = NULL;
|
305
|
+
Slot next;
|
306
|
+
|
307
|
+
for (s = c->slots[i]; NULL != s; s = next) {
|
308
|
+
next = s->next;
|
309
|
+
if (0 == s->use_cnt) {
|
310
|
+
if (NULL == prev) {
|
311
|
+
c->slots[i] = next;
|
312
|
+
} else {
|
313
|
+
prev->next = next;
|
314
|
+
}
|
315
|
+
c->cnt--;
|
316
|
+
s->next = c->reuse;
|
317
|
+
c->reuse = s;
|
318
|
+
c->rcnt++;
|
319
|
+
continue;
|
320
|
+
}
|
321
|
+
switch (c->xrate) {
|
322
|
+
case 0: break;
|
323
|
+
case 2: s->use_cnt -= 2; break;
|
324
|
+
case 3: s->use_cnt /= 2; break;
|
325
|
+
default: s->use_cnt--; break;
|
326
|
+
}
|
327
|
+
if (c->mark) {
|
154
328
|
rb_gc_mark(s->val);
|
155
329
|
}
|
330
|
+
prev = s;
|
156
331
|
}
|
157
332
|
}
|
158
333
|
}
|
@@ -162,32 +337,5 @@ cache_intern(Cache c, const char *key, size_t len) {
|
|
162
337
|
if (CACHE_MAX_KEY < len) {
|
163
338
|
return c->form(key, len);
|
164
339
|
}
|
165
|
-
|
166
|
-
Slot * bucket = c->slots + (h & c->mask);
|
167
|
-
Slot b;
|
168
|
-
Slot tail = NULL;
|
169
|
-
|
170
|
-
for (b = *bucket; NULL != b; b = b->next) {
|
171
|
-
if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
|
172
|
-
return b->val;
|
173
|
-
}
|
174
|
-
tail = b;
|
175
|
-
}
|
176
|
-
b = ALLOC(struct _slot);
|
177
|
-
b->hash = h;
|
178
|
-
b->next = NULL;
|
179
|
-
memcpy(b->key, key, len);
|
180
|
-
b->klen = (uint8_t)len;
|
181
|
-
b->key[len] = '\0';
|
182
|
-
b->val = c->form(key, len);
|
183
|
-
if (NULL == tail) {
|
184
|
-
*bucket = b;
|
185
|
-
} else {
|
186
|
-
tail->next = b;
|
187
|
-
}
|
188
|
-
c->cnt++;
|
189
|
-
if (REHASH_LIMIT < c->cnt / c->size) {
|
190
|
-
rehash(c);
|
191
|
-
}
|
192
|
-
return b->val;
|
340
|
+
return c->intern(c, key, len);
|
193
341
|
}
|
data/ext/oj/cache.h
CHANGED
@@ -11,10 +11,11 @@
|
|
11
11
|
|
12
12
|
struct _cache;
|
13
13
|
|
14
|
-
extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark);
|
14
|
+
extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking);
|
15
15
|
extern void cache_free(struct _cache *c);
|
16
16
|
extern void cache_mark(struct _cache *c);
|
17
17
|
extern void cache_set_form(struct _cache *c, VALUE (*form)(const char *str, size_t len));
|
18
18
|
extern VALUE cache_intern(struct _cache *c, const char *key, size_t len);
|
19
|
+
extern void cache_set_expunge_rate(struct _cache *c, int rate);
|
19
20
|
|
20
21
|
#endif /* CACHE_H */
|
data/ext/oj/compat.c
CHANGED
@@ -30,8 +30,7 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
|
|
30
30
|
if (Yes == pi->options.sym_key) {
|
31
31
|
rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
|
32
32
|
} else {
|
33
|
-
|
34
|
-
rkey = oj_encode(rkey);
|
33
|
+
rkey = rb_utf8_str_new(key, klen);
|
35
34
|
}
|
36
35
|
} else if (Yes == pi->options.sym_key) {
|
37
36
|
rkey = oj_sym_intern(key, klen);
|
data/ext/oj/custom.c
CHANGED
@@ -956,16 +956,14 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
|
|
956
956
|
}
|
957
957
|
} else {
|
958
958
|
//volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
|
959
|
-
volatile VALUE rstr =
|
959
|
+
volatile VALUE rstr = rb_utf8_str_new(str, len);
|
960
960
|
|
961
961
|
if (Qundef == rkey) {
|
962
962
|
if (Yes == pi->options.sym_key) {
|
963
963
|
rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
|
964
964
|
} else {
|
965
|
-
|
966
|
-
rkey = oj_encode(rkey);
|
965
|
+
rkey = rb_utf8_str_new(key, klen);
|
967
966
|
}
|
968
|
-
rstr = oj_encode(rstr);
|
969
967
|
}
|
970
968
|
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
|
971
969
|
VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);
|
@@ -1090,9 +1088,8 @@ static void array_append_num(ParseInfo pi, NumInfo ni) {
|
|
1090
1088
|
}
|
1091
1089
|
|
1092
1090
|
static void array_append_cstr(ParseInfo pi, const char *str, size_t len, const char *orig) {
|
1093
|
-
volatile VALUE rstr =
|
1091
|
+
volatile VALUE rstr = rb_utf8_str_new(str, len);
|
1094
1092
|
|
1095
|
-
rstr = oj_encode(rstr);
|
1096
1093
|
if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
|
1097
1094
|
VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);
|
1098
1095
|
|
data/ext/oj/extconf.rb
CHANGED
@@ -31,6 +31,7 @@ have_func('rb_gc_mark_movable')
|
|
31
31
|
have_func('stpcpy')
|
32
32
|
have_func('pthread_mutex_init')
|
33
33
|
have_func('rb_enc_associate')
|
34
|
+
have_func('rb_enc_interned_str')
|
34
35
|
have_func('rb_ext_ractor_safe', 'ruby.h')
|
35
36
|
# rb_hash_bulk_insert is deep down in a header not included in normal build and that seems to fool have_func.
|
36
37
|
have_func('rb_hash_bulk_insert', 'ruby.h') unless '2' == version[0] && '6' == version[1]
|