oj 3.13.2 → 3.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/oj/cache.c +224 -76
- data/ext/oj/cache.h +2 -1
- data/ext/oj/compat.c +1 -2
- data/ext/oj/custom.c +3 -6
- data/ext/oj/extconf.rb +1 -0
- data/ext/oj/intern.c +101 -218
- data/ext/oj/intern.h +0 -1
- data/ext/oj/mimic_json.c +2 -2
- data/ext/oj/object.c +10 -39
- data/ext/oj/oj.c +3 -3
- data/ext/oj/parser.c +94 -123
- data/ext/oj/saj2.c +3 -3
- data/ext/oj/strict.c +1 -2
- data/ext/oj/usual.c +40 -16
- data/ext/oj/wab.c +6 -3
- data/lib/oj/state.rb +8 -7
- data/lib/oj/version.rb +1 -1
- data/test/mem.rb +33 -0
- data/test/perf_once.rb +58 -0
- data/test/perf_parser.rb +6 -1
- metadata +6 -2
data/ext/oj/intern.c
CHANGED
@@ -8,11 +8,16 @@
|
|
8
8
|
#if HAVE_PTHREAD_MUTEX_INIT
|
9
9
|
#include <pthread.h>
|
10
10
|
#endif
|
11
|
+
#include "cache.h"
|
11
12
|
#include "parse.h"
|
12
13
|
|
13
|
-
|
14
|
+
// Only used for the class cache so 256 should be sufficient.
|
15
|
+
#define HASH_SLOT_CNT ((uint64_t)256)
|
14
16
|
#define HASH_MASK (HASH_SLOT_CNT - 1)
|
15
17
|
|
18
|
+
// almost the Murmur hash algorithm
|
19
|
+
#define M 0x5bd1e995
|
20
|
+
|
16
21
|
typedef struct _keyVal {
|
17
22
|
struct _keyVal *next;
|
18
23
|
const char * key;
|
@@ -30,256 +35,128 @@ typedef struct _hash {
|
|
30
35
|
} * Hash;
|
31
36
|
|
32
37
|
struct _hash class_hash;
|
33
|
-
struct _hash str_hash;
|
34
|
-
struct _hash sym_hash;
|
35
38
|
struct _hash attr_hash;
|
36
39
|
|
37
|
-
|
38
|
-
|
39
|
-
#define C1 0xCC9E2D51
|
40
|
-
#define C2 0x1B873593
|
41
|
-
#define N 0xE6546B64
|
40
|
+
static struct _cache *str_cache = NULL;
|
41
|
+
static VALUE str_cache_obj;
|
42
42
|
|
43
|
-
static
|
44
|
-
|
45
|
-
const uint8_t *endless = key + (len & 0xFFFFFFFC);
|
46
|
-
uint32_t h = (uint32_t)len;
|
47
|
-
uint32_t k;
|
43
|
+
static struct _cache *sym_cache = NULL;
|
44
|
+
static VALUE sym_cache_obj;
|
48
45
|
|
49
|
-
|
50
|
-
|
51
|
-
k |= (uint32_t)*key++ << 8;
|
52
|
-
k |= (uint32_t)*key++ << 16;
|
53
|
-
k |= (uint32_t)*key++ << 24;
|
46
|
+
static struct _cache *attr_cache = NULL;
|
47
|
+
static VALUE attr_cache_obj;
|
54
48
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
h ^= k * M;
|
59
|
-
}
|
60
|
-
if (1 < end - key) {
|
61
|
-
uint16_t k16 = (uint16_t)*key++;
|
49
|
+
static VALUE form_str(const char *str, size_t len) {
|
50
|
+
return rb_str_freeze(rb_utf8_str_new(str, len));
|
51
|
+
}
|
62
52
|
|
63
|
-
|
64
|
-
|
53
|
+
static VALUE form_sym(const char *str, size_t len) {
|
54
|
+
return rb_str_intern(rb_utf8_str_new(str, len));
|
55
|
+
}
|
56
|
+
|
57
|
+
static VALUE form_attr(const char *str, size_t len) {
|
58
|
+
char buf[256];
|
59
|
+
|
60
|
+
if (sizeof(buf) - 2 <= len) {
|
61
|
+
char *b = ALLOC_N(char, len + 2);
|
62
|
+
ID id;
|
63
|
+
|
64
|
+
if ('~' == *str) {
|
65
|
+
memcpy(b, str + 1, len - 1);
|
66
|
+
b[len - 1] = '\0';
|
67
|
+
len -= 2;
|
68
|
+
} else {
|
69
|
+
*b = '@';
|
70
|
+
memcpy(b + 1, str, len);
|
71
|
+
b[len + 1] = '\0';
|
72
|
+
}
|
73
|
+
id = rb_intern3(buf, len + 1, oj_utf8_encoding);
|
74
|
+
xfree(b);
|
75
|
+
return id;
|
65
76
|
}
|
66
|
-
if (
|
67
|
-
|
77
|
+
if ('~' == *str) {
|
78
|
+
memcpy(buf, str + 1, len - 1);
|
79
|
+
buf[len - 1] = '\0';
|
80
|
+
len -= 2;
|
81
|
+
} else {
|
82
|
+
*buf = '@';
|
83
|
+
memcpy(buf + 1, str, len);
|
84
|
+
buf[len + 1] = '\0';
|
68
85
|
}
|
69
|
-
|
70
|
-
h ^= h >> 13;
|
71
|
-
h *= M;
|
72
|
-
h ^= h >> 15;
|
73
|
-
|
74
|
-
return h;
|
86
|
+
return (VALUE)rb_intern3(buf, len + 1, oj_utf8_encoding);
|
75
87
|
}
|
76
88
|
|
77
89
|
void oj_hash_init() {
|
90
|
+
VALUE cache_class = rb_define_class_under(Oj, "Cache", rb_cObject);
|
91
|
+
|
92
|
+
str_cache = cache_create(0, form_str, true, true);
|
93
|
+
str_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, str_cache);
|
94
|
+
rb_gc_register_address(&str_cache_obj);
|
95
|
+
|
96
|
+
sym_cache = cache_create(0, form_sym, true, true);
|
97
|
+
sym_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, sym_cache);
|
98
|
+
rb_gc_register_address(&sym_cache_obj);
|
99
|
+
|
100
|
+
attr_cache = cache_create(0, form_attr, false, true);
|
101
|
+
attr_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, attr_cache);
|
102
|
+
rb_gc_register_address(&attr_cache_obj);
|
103
|
+
|
78
104
|
memset(class_hash.slots, 0, sizeof(class_hash.slots));
|
79
|
-
memset(str_hash.slots, 0, sizeof(str_hash.slots));
|
80
|
-
memset(sym_hash.slots, 0, sizeof(sym_hash.slots));
|
81
|
-
memset(attr_hash.slots, 0, sizeof(attr_hash.slots));
|
82
105
|
#if HAVE_PTHREAD_MUTEX_INIT
|
83
106
|
pthread_mutex_init(&class_hash.mutex, NULL);
|
84
|
-
pthread_mutex_init(&str_hash.mutex, NULL);
|
85
|
-
pthread_mutex_init(&sym_hash.mutex, NULL);
|
86
|
-
pthread_mutex_init(&attr_hash.mutex, NULL);
|
87
107
|
#else
|
88
108
|
class_hash.mutex = rb_mutex_new();
|
89
109
|
rb_gc_register_address(&class_hash.mutex);
|
90
|
-
str_hash.mutex = rb_mutex_new();
|
91
|
-
rb_gc_register_address(&str_hash.mutex);
|
92
|
-
sym_hash.mutex = rb_mutex_new();
|
93
|
-
rb_gc_register_address(&sym_hash.mutex);
|
94
|
-
attr_hash.mutex = rb_mutex_new();
|
95
|
-
rb_gc_register_address(&attr_hash.mutex);
|
96
110
|
#endif
|
97
111
|
}
|
98
112
|
|
99
|
-
void oj_hash_print() {
|
100
|
-
uint32_t i;
|
101
|
-
KeyVal b;
|
102
|
-
|
103
|
-
for (i = 0; i < HASH_SLOT_CNT; i++) {
|
104
|
-
printf("%4d:", i);
|
105
|
-
for (b = class_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
|
106
|
-
printf(" %s", b->key);
|
107
|
-
}
|
108
|
-
printf("\n");
|
109
|
-
}
|
110
|
-
}
|
111
|
-
|
112
|
-
void oj_hash_sizes() {
|
113
|
-
uint32_t i;
|
114
|
-
KeyVal b;
|
115
|
-
int max = 0;
|
116
|
-
int min = 1000000;
|
117
|
-
|
118
|
-
for (i = 0; i < HASH_SLOT_CNT; i++) {
|
119
|
-
int cnt = 0;
|
120
|
-
|
121
|
-
for (b = str_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
|
122
|
-
cnt++;
|
123
|
-
}
|
124
|
-
// printf(" %4d\n", cnt);
|
125
|
-
if (max < cnt) {
|
126
|
-
max = cnt;
|
127
|
-
}
|
128
|
-
if (cnt < min) {
|
129
|
-
min = cnt;
|
130
|
-
}
|
131
|
-
}
|
132
|
-
printf("min: %d max: %d\n", min, max);
|
133
|
-
}
|
134
|
-
|
135
113
|
VALUE
|
136
114
|
oj_str_intern(const char *key, size_t len) {
|
137
|
-
|
138
|
-
KeyVal bucket = str_hash.slots + h;
|
139
|
-
KeyVal b;
|
140
|
-
|
141
|
-
#if HAVE_PTHREAD_MUTEX_INIT
|
142
|
-
pthread_mutex_lock(&str_hash.mutex);
|
143
|
-
#else
|
144
|
-
rb_mutex_lock(str_hash.mutex);
|
145
|
-
#endif
|
146
|
-
if (NULL != bucket->key) { // not the top slot
|
147
|
-
for (b = bucket; 0 != b; b = b->next) {
|
148
|
-
if (len == b->len && 0 == strncmp(b->key, key, len)) {
|
149
|
-
#if HAVE_PTHREAD_MUTEX_INIT
|
150
|
-
pthread_mutex_unlock(&str_hash.mutex);
|
151
|
-
#else
|
152
|
-
rb_mutex_unlock(str_hash.mutex);
|
153
|
-
#endif
|
154
|
-
return b->val;
|
155
|
-
}
|
156
|
-
bucket = b;
|
157
|
-
}
|
158
|
-
b = ALLOC(struct _keyVal);
|
159
|
-
b->next = NULL;
|
160
|
-
bucket->next = b;
|
161
|
-
bucket = b;
|
162
|
-
}
|
163
|
-
bucket->key = oj_strndup(key, len);
|
164
|
-
bucket->len = len;
|
165
|
-
bucket->val = rb_utf8_str_new(key, len);
|
166
|
-
bucket->val = rb_str_freeze(bucket->val);
|
167
|
-
rb_gc_register_address(&bucket->val);
|
168
|
-
#if HAVE_PTHREAD_MUTEX_INIT
|
169
|
-
pthread_mutex_unlock(&str_hash.mutex);
|
170
|
-
#else
|
171
|
-
rb_mutex_unlock(str_hash.mutex);
|
172
|
-
#endif
|
173
|
-
return bucket->val;
|
115
|
+
return cache_intern(str_cache, key, len);
|
174
116
|
}
|
175
117
|
|
176
118
|
VALUE
|
177
119
|
oj_sym_intern(const char *key, size_t len) {
|
178
|
-
|
179
|
-
|
180
|
-
KeyVal b;
|
120
|
+
return cache_intern(sym_cache, key, len);
|
121
|
+
}
|
181
122
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
rb_mutex_lock(sym_hash.mutex);
|
186
|
-
#endif
|
187
|
-
if (NULL != bucket->key) { // not the top slot
|
188
|
-
for (b = bucket; 0 != b; b = b->next) {
|
189
|
-
if (len == b->len && 0 == strncmp(b->key, key, len)) {
|
190
|
-
#if HAVE_PTHREAD_MUTEX_INIT
|
191
|
-
pthread_mutex_unlock(&sym_hash.mutex);
|
192
|
-
#else
|
193
|
-
rb_mutex_unlock(sym_hash.mutex);
|
194
|
-
#endif
|
195
|
-
return b->val;
|
196
|
-
}
|
197
|
-
bucket = b;
|
198
|
-
}
|
199
|
-
b = ALLOC(struct _keyVal);
|
200
|
-
b->next = NULL;
|
201
|
-
bucket->next = b;
|
202
|
-
bucket = b;
|
203
|
-
}
|
204
|
-
bucket->key = oj_strndup(key, len);
|
205
|
-
bucket->len = len;
|
206
|
-
bucket->val = ID2SYM(rb_intern3(key, len, oj_utf8_encoding));
|
207
|
-
rb_gc_register_address(&bucket->val);
|
208
|
-
#if HAVE_PTHREAD_MUTEX_INIT
|
209
|
-
pthread_mutex_unlock(&sym_hash.mutex);
|
210
|
-
#else
|
211
|
-
rb_mutex_unlock(sym_hash.mutex);
|
212
|
-
#endif
|
213
|
-
return bucket->val;
|
123
|
+
ID
|
124
|
+
oj_attr_intern(const char *key, size_t len) {
|
125
|
+
return cache_intern(attr_cache, key, len);
|
214
126
|
}
|
215
127
|
|
216
|
-
static
|
217
|
-
|
218
|
-
|
128
|
+
static uint64_t hash_calc(const uint8_t *key, size_t len) {
|
129
|
+
const uint8_t *end = key + len;
|
130
|
+
const uint8_t *endless = key + (len & 0xFFFFFFFC);
|
131
|
+
uint64_t h = (uint64_t)len;
|
132
|
+
uint64_t k;
|
219
133
|
|
220
|
-
|
221
|
-
|
134
|
+
while (key < endless) {
|
135
|
+
k = (uint64_t)*key++;
|
136
|
+
k |= (uint64_t)*key++ << 8;
|
137
|
+
k |= (uint64_t)*key++ << 16;
|
138
|
+
k |= (uint64_t)*key++ << 24;
|
222
139
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
*buf = '@';
|
228
|
-
memcpy(buf + 1, key, klen);
|
229
|
-
buf[klen + 1] = '\0';
|
230
|
-
}
|
231
|
-
var_id = rb_intern(buf);
|
232
|
-
xfree(buf);
|
233
|
-
} else {
|
234
|
-
if ('~' == *key) {
|
235
|
-
memcpy(attr, key + 1, klen - 1);
|
236
|
-
attr[klen - 1] = '\0';
|
237
|
-
} else {
|
238
|
-
*attr = '@';
|
239
|
-
memcpy(attr + 1, key, klen);
|
240
|
-
attr[klen + 1] = '\0';
|
241
|
-
}
|
242
|
-
var_id = rb_intern(attr);
|
140
|
+
k *= M;
|
141
|
+
k ^= k >> 24;
|
142
|
+
h *= M;
|
143
|
+
h ^= k * M;
|
243
144
|
}
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
ID oj_attr_intern(const char *key, size_t len) {
|
248
|
-
uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
|
249
|
-
KeyVal bucket = attr_hash.slots + h;
|
250
|
-
KeyVal b;
|
145
|
+
if (1 < end - key) {
|
146
|
+
uint16_t k16 = (uint16_t)*key++;
|
251
147
|
|
252
|
-
|
253
|
-
|
254
|
-
#else
|
255
|
-
rb_mutex_lock(attr_hash.mutex);
|
256
|
-
#endif
|
257
|
-
if (NULL != bucket->key) { // not the top slot
|
258
|
-
for (b = bucket; 0 != b; b = b->next) {
|
259
|
-
if (len == b->len && 0 == strncmp(b->key, key, len)) {
|
260
|
-
#if HAVE_PTHREAD_MUTEX_INIT
|
261
|
-
pthread_mutex_unlock(&attr_hash.mutex);
|
262
|
-
#else
|
263
|
-
rb_mutex_unlock(attr_hash.mutex);
|
264
|
-
#endif
|
265
|
-
return (ID)b->val;
|
266
|
-
}
|
267
|
-
bucket = b;
|
268
|
-
}
|
269
|
-
b = ALLOC(struct _keyVal);
|
270
|
-
b->next = NULL;
|
271
|
-
bucket->next = b;
|
272
|
-
bucket = b;
|
148
|
+
k16 |= (uint16_t)*key++ << 8;
|
149
|
+
h ^= k16 << 8;
|
273
150
|
}
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
return
|
151
|
+
if (key < end) {
|
152
|
+
h ^= *key;
|
153
|
+
}
|
154
|
+
h *= M;
|
155
|
+
h ^= h >> 13;
|
156
|
+
h *= M;
|
157
|
+
h ^= h >> 15;
|
158
|
+
|
159
|
+
return h;
|
283
160
|
}
|
284
161
|
|
285
162
|
static VALUE resolve_classname(VALUE mod, const char *classname, int auto_define) {
|
@@ -333,7 +210,7 @@ static VALUE resolve_classpath(ParseInfo pi, const char *name, size_t len, int a
|
|
333
210
|
}
|
334
211
|
|
335
212
|
VALUE oj_class_intern(const char *key, size_t len, bool safe, ParseInfo pi, int auto_define, VALUE error_class) {
|
336
|
-
|
213
|
+
uint64_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
|
337
214
|
KeyVal bucket = class_hash.slots + h;
|
338
215
|
KeyVal b;
|
339
216
|
|
@@ -396,3 +273,9 @@ char *oj_strndup(const char *s, size_t len) {
|
|
396
273
|
|
397
274
|
return d;
|
398
275
|
}
|
276
|
+
|
277
|
+
void intern_cleanup() {
|
278
|
+
cache_free(str_cache);
|
279
|
+
cache_free(sym_cache);
|
280
|
+
cache_free(attr_cache);
|
281
|
+
}
|
data/ext/oj/intern.h
CHANGED
data/ext/oj/mimic_json.c
CHANGED
@@ -682,7 +682,7 @@ static VALUE mimic_set_create_id(VALUE self, VALUE id) {
|
|
682
682
|
*/
|
683
683
|
static VALUE mimic_create_id(VALUE self) {
|
684
684
|
if (NULL != oj_default_options.create_id) {
|
685
|
-
return
|
685
|
+
return rb_utf8_str_new(oj_default_options.create_id, oj_default_options.create_id_len);
|
686
686
|
}
|
687
687
|
return rb_str_new_cstr(oj_json_class);
|
688
688
|
}
|
@@ -714,7 +714,7 @@ static struct _options mimic_object_to_json_options = {0, // indent
|
|
714
714
|
false, // sec_prec_set
|
715
715
|
No, // ignore_under
|
716
716
|
Yes, // cache_keys
|
717
|
-
|
717
|
+
0, // cache_str
|
718
718
|
0, // int_range_min
|
719
719
|
0, // int_range_max
|
720
720
|
oj_json_class, // create_id
|
data/ext/oj/object.c
CHANGED
@@ -30,46 +30,19 @@ inline static long read_long(const char *str, size_t len) {
|
|
30
30
|
|
31
31
|
static VALUE calc_hash_key(ParseInfo pi, Val kval, char k1) {
|
32
32
|
volatile VALUE rkey;
|
33
|
-
#if 0
|
34
|
-
VALUE *slot;
|
35
33
|
|
36
34
|
if (':' == k1) {
|
37
|
-
|
38
|
-
rkey = rb_str_new(kval->key + 1, kval->klen - 1);
|
39
|
-
rkey = oj_encode(rkey);
|
40
|
-
rkey = rb_str_intern(rkey);
|
41
|
-
*slot = rkey;
|
42
|
-
rb_gc_register_address(slot);
|
43
|
-
}
|
44
|
-
} else if (Yes == pi->options.sym_key) {
|
45
|
-
if (Qnil == (rkey = oj_sym_hash_get(kval->key, kval->klen, &slot))) {
|
46
|
-
rkey = rb_str_new(kval->key, kval->klen);
|
47
|
-
rkey = oj_encode(rkey);
|
48
|
-
rkey = rb_str_intern(rkey);
|
49
|
-
*slot = rkey;
|
50
|
-
rb_gc_register_address(slot);
|
51
|
-
}
|
52
|
-
} else {
|
53
|
-
if (Qnil == (rkey = oj_str_hash_get(kval->key, kval->klen, &slot))) {
|
54
|
-
rkey = rb_str_new(kval->key, kval->klen);
|
55
|
-
rkey = oj_encode(rkey);
|
56
|
-
*slot = rkey;
|
57
|
-
rb_gc_register_address(slot);
|
58
|
-
}
|
35
|
+
return ID2SYM(rb_intern3(kval->key + 1, kval->klen - 1, oj_utf8_encoding));
|
59
36
|
}
|
60
|
-
|
61
|
-
|
62
|
-
rkey = ID2SYM(rb_intern3(kval->key + 1, kval->klen - 1, oj_utf8_encoding));
|
63
|
-
} else {
|
64
|
-
if (Yes == pi->options.sym_key) {
|
65
|
-
rkey = ID2SYM(rb_intern3(kval->key, kval->klen, oj_utf8_encoding));
|
66
|
-
} else {
|
67
|
-
rkey = rb_str_new(kval->key, kval->klen);
|
68
|
-
rkey = oj_encode(rkey);
|
69
|
-
}
|
37
|
+
if (Yes == pi->options.sym_key) {
|
38
|
+
return ID2SYM(rb_intern3(kval->key, kval->klen, oj_utf8_encoding));
|
70
39
|
}
|
71
|
-
#
|
40
|
+
#if HAVE_RB_ENC_INTERNED_STR
|
41
|
+
rkey = rb_enc_interned_str(kval->key, kval->klen, oj_utf8_encoding);
|
42
|
+
#else
|
43
|
+
rkey = rb_utf8_str_new(kval->key, kval->klen);
|
72
44
|
OBJ_FREEZE(rkey);
|
45
|
+
#endif
|
73
46
|
return rkey;
|
74
47
|
}
|
75
48
|
|
@@ -87,8 +60,7 @@ static VALUE str_to_value(ParseInfo pi, const char *str, size_t len, const char
|
|
87
60
|
}
|
88
61
|
rstr = oj_circ_array_get(pi->circ_array, i);
|
89
62
|
} else {
|
90
|
-
|
91
|
-
rstr = oj_encode(rstr);
|
63
|
+
rstr = rb_utf8_str_new(str, len);
|
92
64
|
}
|
93
65
|
return rstr;
|
94
66
|
}
|
@@ -259,8 +231,7 @@ static int hat_cstr(ParseInfo pi, Val parent, Val kval, const char *str, size_t
|
|
259
231
|
parent->val = ID2SYM(rb_intern3(str + 1, len - 1, oj_utf8_encoding));
|
260
232
|
break;
|
261
233
|
case 's':
|
262
|
-
|
263
|
-
parent->val = oj_encode(parent->val);
|
234
|
+
parent->val = rb_utf8_str_new(str, len);
|
264
235
|
break;
|
265
236
|
case 'c': // class
|
266
237
|
{
|