oj 3.13.3 → 3.13.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1280 -0
- data/README.md +1 -1
- data/RELEASE_NOTES.md +55 -0
- data/ext/oj/cache.c +94 -109
- data/ext/oj/custom.c +2 -2
- data/ext/oj/intern.c +8 -1
- data/ext/oj/oj.c +5 -2
- data/ext/oj/parser.c +76 -28
- data/ext/oj/strict.c +2 -2
- data/ext/oj/usual.c +15 -15
- data/lib/oj/version.rb +1 -1
- data/pages/Options.md +12 -2
- data/test/bar.rb +9 -28
- data/test/foo.rb +8 -8
- data/test/test_parser_usual.rb +9 -5
- metadata +7 -2
data/README.md
CHANGED
data/RELEASE_NOTES.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# RELEASE NOTES
|
2
|
+
|
3
|
+
The release notes here are organized by release. For a list of changes
|
4
|
+
see the See [{file:CHANGELOG.md}](CHANGELOG.md) file. In this file are
|
5
|
+
the steps to take to aid in keeping things rolling after updating to
|
6
|
+
the latest version.
|
7
|
+
|
8
|
+
## 3.13.x
|
9
|
+
|
10
|
+
This release included a new cache that performs better than the
|
11
|
+
earlier cache and a new high performance parser.
|
12
|
+
|
13
|
+
### Cache
|
14
|
+
|
15
|
+
The new cache includes a least recently used expiration to reduce
|
16
|
+
memory use. The cache is also self adjusting and will expand as needed
|
17
|
+
for better performance. It also handles Hash keys and string values
|
18
|
+
with two options, `:cache_keys`, a boolean and `:cache_str` an
|
19
|
+
integer. The `:cache_str` if set to more than zero is the limit for
|
20
|
+
the length of string values to cache. The maximum value is 35 which
|
21
|
+
allows strings up to 34 bytes to be cached.
|
22
|
+
|
23
|
+
One interesting aspect of the cache is not so much the string caching
|
24
|
+
which performs similar to the Ruby intern functions but the caching of
|
25
|
+
symbols and object attribute names. There is a significant gain for
|
26
|
+
symbols and object attributes.
|
27
|
+
|
28
|
+
If the cache is not desired then setting the default options to turn
|
29
|
+
it off can be done with this line:
|
30
|
+
|
31
|
+
``` ruby
|
32
|
+
Oj.default_options = { cache_keys: false, cache_str: 0 }
|
33
|
+
```
|
34
|
+
|
35
|
+
### Oj::Parser
|
36
|
+
|
37
|
+
The new parser uses a different core that follows the approach taken
|
38
|
+
by [OjC](https://github.com/ohler55/ojc) and
|
39
|
+
[OjG](https://github.com/ohler55/ojg). It also takes advantage of the
|
40
|
+
bulk Array and Hash functions. Another issue the new parser addresses
|
41
|
+
is option management. Instead of a single global default_options each
|
42
|
+
parser instance maintains it's own options.
|
43
|
+
|
44
|
+
There is a price to be paid when using the Oj::Parser. The API is not
|
45
|
+
the same the older parser. A single parser can only be used in a
|
46
|
+
single thread. This allows reuse of internal buffers for additional
|
47
|
+
improvements in performance.
|
48
|
+
|
49
|
+
The performane advantage of the Oj::Parse is that it is more than 3
|
50
|
+
times faster than the Oj::compat_load call and 6 times faster than the
|
51
|
+
JSON gem.
|
52
|
+
|
53
|
+
### Dump Performance
|
54
|
+
|
55
|
+
Thanks to Watson1978 Oj.dump also received a speed boost.
|
data/ext/oj/cache.c
CHANGED
@@ -4,9 +4,14 @@
|
|
4
4
|
#if HAVE_PTHREAD_MUTEX_INIT
|
5
5
|
#include <pthread.h>
|
6
6
|
#endif
|
7
|
+
#include <stdlib.h>
|
7
8
|
|
8
9
|
#include "cache.h"
|
9
10
|
|
11
|
+
// The stdlib calloc, realloc, and free are used instead of the Ruby ALLOC,
|
12
|
+
// ALLOC_N, REALLOC, and xfree since the later could trigger a GC which will
|
13
|
+
// either corrupt memory or if the mark function locks will deadlock.
|
14
|
+
|
10
15
|
#define REHASH_LIMIT 4
|
11
16
|
#define MIN_SHIFT 8
|
12
17
|
#define REUSE_MAX 8192
|
@@ -23,23 +28,23 @@
|
|
23
28
|
#define M 0x5bd1e995
|
24
29
|
|
25
30
|
typedef struct _slot {
|
26
|
-
struct _slot *next;
|
27
|
-
VALUE
|
28
|
-
uint64_t
|
29
|
-
uint32_t
|
30
|
-
uint8_t
|
31
|
-
char
|
31
|
+
struct _slot * next;
|
32
|
+
VALUE val;
|
33
|
+
uint64_t hash;
|
34
|
+
volatile uint32_t use_cnt;
|
35
|
+
uint8_t klen;
|
36
|
+
char key[CACHE_MAX_KEY];
|
32
37
|
} * Slot;
|
33
38
|
|
34
39
|
typedef struct _cache {
|
35
|
-
Slot * slots;
|
36
|
-
size_t cnt;
|
40
|
+
volatile Slot * slots;
|
41
|
+
volatile size_t cnt;
|
37
42
|
VALUE (*form)(const char *str, size_t len);
|
38
43
|
uint64_t size;
|
39
44
|
uint64_t mask;
|
40
45
|
VALUE (*intern)(struct _cache *c, const char *key, size_t len);
|
41
|
-
Slot
|
42
|
-
size_t
|
46
|
+
volatile Slot reuse;
|
47
|
+
size_t rcnt;
|
43
48
|
#if HAVE_PTHREAD_MUTEX_INIT
|
44
49
|
pthread_mutex_t mutex;
|
45
50
|
#else
|
@@ -53,22 +58,6 @@ void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
|
|
53
58
|
c->form = form;
|
54
59
|
}
|
55
60
|
|
56
|
-
#if 0
|
57
|
-
// For debugging only.
|
58
|
-
static void cache_print(Cache c) {
|
59
|
-
for (uint64_t i = 0; i < c->size; i++) {
|
60
|
-
printf("%4d:", i);
|
61
|
-
for (Slot s = c->slots[i]; NULL != s; s = s->next) {
|
62
|
-
char buf[40];
|
63
|
-
strncpy(buf, s->key, s->klen);
|
64
|
-
buf[s->klen] = '\0';
|
65
|
-
printf(" %s", buf);
|
66
|
-
}
|
67
|
-
printf("\n");
|
68
|
-
}
|
69
|
-
}
|
70
|
-
#endif
|
71
|
-
|
72
61
|
static uint64_t hash_calc(const uint8_t *key, size_t len) {
|
73
62
|
const uint8_t *end = key + len;
|
74
63
|
const uint8_t *endless = key + (len & 0xFFFFFFFC);
|
@@ -104,23 +93,24 @@ static uint64_t hash_calc(const uint8_t *key, size_t len) {
|
|
104
93
|
}
|
105
94
|
|
106
95
|
static void rehash(Cache c) {
|
107
|
-
uint64_t osize
|
96
|
+
uint64_t osize;
|
108
97
|
Slot * end;
|
109
98
|
Slot * sp;
|
110
99
|
|
111
|
-
c->size
|
112
|
-
c->
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
100
|
+
osize = c->size;
|
101
|
+
c->size = osize * 4;
|
102
|
+
c->mask = c->size - 1;
|
103
|
+
c->slots = realloc((void *)c->slots, sizeof(Slot) * c->size);
|
104
|
+
memset((Slot *)c->slots + osize, 0, sizeof(Slot) * osize * 3);
|
105
|
+
end = (Slot *)c->slots + osize;
|
106
|
+
for (sp = (Slot *)c->slots; sp < end; sp++) {
|
117
107
|
Slot s = *sp;
|
118
108
|
Slot next = NULL;
|
119
109
|
|
120
110
|
*sp = NULL;
|
121
111
|
for (; NULL != s; s = next) {
|
122
112
|
uint64_t h = s->hash & c->mask;
|
123
|
-
Slot * bucket = c->slots + h;
|
113
|
+
Slot * bucket = (Slot *)c->slots + h;
|
124
114
|
|
125
115
|
next = s->next;
|
126
116
|
s->next = *bucket;
|
@@ -130,14 +120,15 @@ static void rehash(Cache c) {
|
|
130
120
|
}
|
131
121
|
|
132
122
|
static VALUE lockless_intern(Cache c, const char *key, size_t len) {
|
133
|
-
uint64_t
|
134
|
-
Slot *
|
135
|
-
Slot
|
123
|
+
uint64_t h = hash_calc((const uint8_t *)key, len);
|
124
|
+
Slot * bucket = (Slot *)c->slots + (h & c->mask);
|
125
|
+
Slot b;
|
126
|
+
volatile VALUE rkey;
|
136
127
|
|
137
128
|
while (REUSE_MAX < c->rcnt) {
|
138
129
|
if (NULL != (b = c->reuse)) {
|
139
130
|
c->reuse = b->next;
|
140
|
-
|
131
|
+
free(b);
|
141
132
|
c->rcnt--;
|
142
133
|
} else {
|
143
134
|
// An accounting error occured somewhere so correct it.
|
@@ -146,46 +137,44 @@ static VALUE lockless_intern(Cache c, const char *key, size_t len) {
|
|
146
137
|
}
|
147
138
|
for (b = *bucket; NULL != b; b = b->next) {
|
148
139
|
if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
|
149
|
-
b->use_cnt +=
|
140
|
+
b->use_cnt += 16;
|
150
141
|
return b->val;
|
151
142
|
}
|
152
143
|
}
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
c
|
171
|
-
if (REHASH_LIMIT < c->cnt / c->size) {
|
172
|
-
rehash(c);
|
173
|
-
}
|
144
|
+
rkey = c->form(key, len);
|
145
|
+
if (NULL == (b = c->reuse)) {
|
146
|
+
b = calloc(1, sizeof(struct _slot));
|
147
|
+
} else {
|
148
|
+
c->reuse = b->next;
|
149
|
+
c->rcnt--;
|
150
|
+
}
|
151
|
+
b->hash = h;
|
152
|
+
memcpy(b->key, key, len);
|
153
|
+
b->klen = (uint8_t)len;
|
154
|
+
b->key[len] = '\0';
|
155
|
+
b->val = rkey;
|
156
|
+
b->use_cnt = 4;
|
157
|
+
b->next = *bucket;
|
158
|
+
*bucket = b;
|
159
|
+
c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
|
160
|
+
if (REHASH_LIMIT < c->cnt / c->size) {
|
161
|
+
rehash(c);
|
174
162
|
}
|
175
|
-
return
|
163
|
+
return rkey;
|
176
164
|
}
|
177
165
|
|
178
166
|
static VALUE locking_intern(Cache c, const char *key, size_t len) {
|
179
|
-
uint64_t
|
180
|
-
Slot *
|
181
|
-
Slot
|
182
|
-
uint64_t
|
167
|
+
uint64_t h;
|
168
|
+
Slot * bucket;
|
169
|
+
Slot b;
|
170
|
+
uint64_t old_size;
|
171
|
+
volatile VALUE rkey;
|
183
172
|
|
184
173
|
CACHE_LOCK(c);
|
185
174
|
while (REUSE_MAX < c->rcnt) {
|
186
175
|
if (NULL != (b = c->reuse)) {
|
187
176
|
c->reuse = b->next;
|
188
|
-
|
177
|
+
free(b);
|
189
178
|
c->rcnt--;
|
190
179
|
} else {
|
191
180
|
// An accounting error occured somewhere so correct it.
|
@@ -193,53 +182,53 @@ static VALUE locking_intern(Cache c, const char *key, size_t len) {
|
|
193
182
|
}
|
194
183
|
}
|
195
184
|
h = hash_calc((const uint8_t *)key, len);
|
196
|
-
bucket = c->slots + (h & c->mask);
|
185
|
+
bucket = (Slot *)c->slots + (h & c->mask);
|
197
186
|
for (b = *bucket; NULL != b; b = b->next) {
|
198
187
|
if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
|
199
188
|
b->use_cnt += 4;
|
200
189
|
CACHE_UNLOCK(c);
|
190
|
+
|
201
191
|
return b->val;
|
202
192
|
}
|
203
193
|
}
|
204
194
|
old_size = c->size;
|
205
195
|
// The creation of a new value may trigger a GC which be a problem if the
|
206
196
|
// cache is locked so make sure it is unlocked for the key value creation.
|
207
|
-
if (NULL
|
208
|
-
b = ALLOC(struct _slot);
|
209
|
-
} else {
|
197
|
+
if (NULL != (b = c->reuse)) {
|
210
198
|
c->reuse = b->next;
|
211
199
|
c->rcnt--;
|
212
200
|
}
|
213
201
|
CACHE_UNLOCK(c);
|
214
|
-
{
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
CACHE_UNLOCK(c);
|
202
|
+
if (NULL == b) {
|
203
|
+
b = calloc(1, sizeof(struct _slot));
|
204
|
+
}
|
205
|
+
rkey = c->form(key, len);
|
206
|
+
b->hash = h;
|
207
|
+
memcpy(b->key, key, len);
|
208
|
+
b->klen = (uint8_t)len;
|
209
|
+
b->key[len] = '\0';
|
210
|
+
b->val = rkey;
|
211
|
+
b->use_cnt = 16;
|
212
|
+
|
213
|
+
// Lock again to add the new entry.
|
214
|
+
CACHE_LOCK(c);
|
215
|
+
if (old_size != c->size) {
|
216
|
+
h = hash_calc((const uint8_t *)key, len);
|
217
|
+
bucket = (Slot *)c->slots + (h & c->mask);
|
218
|
+
}
|
219
|
+
b->next = *bucket;
|
220
|
+
*bucket = b;
|
221
|
+
c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
|
222
|
+
if (REHASH_LIMIT < c->cnt / c->size) {
|
223
|
+
rehash(c);
|
237
224
|
}
|
238
|
-
|
225
|
+
CACHE_UNLOCK(c);
|
226
|
+
|
227
|
+
return rkey;
|
239
228
|
}
|
240
229
|
|
241
230
|
Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking) {
|
242
|
-
Cache c =
|
231
|
+
Cache c = calloc(1, sizeof(struct _cache));
|
243
232
|
int shift = 0;
|
244
233
|
|
245
234
|
for (; REHASH_LIMIT < size; size /= 2, shift++) {
|
@@ -252,16 +241,12 @@ Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool
|
|
252
241
|
#else
|
253
242
|
c->mutex = rb_mutex_new();
|
254
243
|
#endif
|
255
|
-
c->size
|
256
|
-
c->mask
|
257
|
-
c->slots
|
258
|
-
|
259
|
-
c->
|
260
|
-
c->
|
261
|
-
c->xrate = 1; // low
|
262
|
-
c->mark = mark;
|
263
|
-
c->reuse = NULL;
|
264
|
-
c->rcnt = 0;
|
244
|
+
c->size = 1 << shift;
|
245
|
+
c->mask = c->size - 1;
|
246
|
+
c->slots = calloc(c->size, sizeof(Slot));
|
247
|
+
c->form = form;
|
248
|
+
c->xrate = 1; // low
|
249
|
+
c->mark = mark;
|
265
250
|
if (locking) {
|
266
251
|
c->intern = locking_intern;
|
267
252
|
} else {
|
@@ -283,11 +268,11 @@ void cache_free(Cache c) {
|
|
283
268
|
|
284
269
|
for (s = c->slots[i]; NULL != s; s = next) {
|
285
270
|
next = s->next;
|
286
|
-
|
271
|
+
free(s);
|
287
272
|
}
|
288
273
|
}
|
289
|
-
|
290
|
-
|
274
|
+
free((void *)c->slots);
|
275
|
+
free(c);
|
291
276
|
}
|
292
277
|
|
293
278
|
void cache_mark(Cache c) {
|
@@ -334,7 +319,7 @@ void cache_mark(Cache c) {
|
|
334
319
|
|
335
320
|
VALUE
|
336
321
|
cache_intern(Cache c, const char *key, size_t len) {
|
337
|
-
if (CACHE_MAX_KEY
|
322
|
+
if (CACHE_MAX_KEY <= len) {
|
338
323
|
return c->form(key, len);
|
339
324
|
}
|
340
325
|
return c->intern(c, key, len);
|
data/ext/oj/custom.c
CHANGED
@@ -955,8 +955,8 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
|
|
955
955
|
}
|
956
956
|
}
|
957
957
|
} else {
|
958
|
-
|
959
|
-
volatile VALUE rstr = rb_utf8_str_new(str, len);
|
958
|
+
volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
|
959
|
+
//volatile VALUE rstr = rb_utf8_str_new(str, len);
|
960
960
|
|
961
961
|
if (Qundef == rkey) {
|
962
962
|
if (Yes == pi->options.sym_key) {
|
data/ext/oj/intern.c
CHANGED
@@ -51,7 +51,7 @@ static VALUE form_str(const char *str, size_t len) {
|
|
51
51
|
}
|
52
52
|
|
53
53
|
static VALUE form_sym(const char *str, size_t len) {
|
54
|
-
return rb_str_intern(rb_utf8_str_new(str, len));
|
54
|
+
return rb_to_symbol(rb_str_intern(rb_utf8_str_new(str, len)));
|
55
55
|
}
|
56
56
|
|
57
57
|
static VALUE form_attr(const char *str, size_t len) {
|
@@ -112,7 +112,14 @@ void oj_hash_init() {
|
|
112
112
|
|
113
113
|
VALUE
|
114
114
|
oj_str_intern(const char *key, size_t len) {
|
115
|
+
// For huge cache sizes over half a million the rb_enc_interned_str
|
116
|
+
// performs slightly better but at more "normal" size of a several
|
117
|
+
// thousands the cache intern performs about 20% better.
|
118
|
+
#if HAVE_RB_ENC_INTERNED_STR && 0
|
119
|
+
return rb_enc_interned_str(key, len, rb_utf8_encoding());
|
120
|
+
#else
|
115
121
|
return cache_intern(str_cache, key, len);
|
122
|
+
#endif
|
116
123
|
}
|
117
124
|
|
118
125
|
VALUE
|
data/ext/oj/oj.c
CHANGED
@@ -107,6 +107,7 @@ static VALUE bigdecimal_load_sym;
|
|
107
107
|
static VALUE bigdecimal_sym;
|
108
108
|
static VALUE cache_keys_sym;
|
109
109
|
static VALUE cache_str_sym;
|
110
|
+
static VALUE cache_string_sym;
|
110
111
|
static VALUE circular_sym;
|
111
112
|
static VALUE class_cache_sym;
|
112
113
|
static VALUE compat_bigdecimal_sym;
|
@@ -287,7 +288,7 @@ struct _options oj_default_options = {
|
|
287
288
|
* - *:ignore* [_nil_|_Array_] either nil or an Array of classes to ignore when dumping
|
288
289
|
* - *:ignore_under* [_Boolean_] if true then attributes that start with _ are ignored when dumping in
|
289
290
|
*object or custom mode.
|
290
|
-
* - *:cache_keys* [_Boolean_] if true then hash keys are cached
|
291
|
+
* - *:cache_keys* [_Boolean_] if true then hash keys are cached if less than 35 bytes.
|
291
292
|
* - *:cache_str* [_Fixnum_] maximum string value length to cache (strings less than this are cached)
|
292
293
|
* - *:integer_range* [_Range_] Dump integers outside range as strings.
|
293
294
|
* - *:trace* [_true,_|_false_] Trace all load and dump calls, default is false (trace is off)
|
@@ -692,7 +693,7 @@ static int parse_options_cb(VALUE k, VALUE v, VALUE opts)
|
|
692
693
|
sprintf(copts->float_fmt, "%%0.%dg", n);
|
693
694
|
copts->float_prec = n;
|
694
695
|
}
|
695
|
-
} else if (cache_str_sym == k) {
|
696
|
+
} else if (cache_str_sym == k || cache_string_sym == k) {
|
696
697
|
int n;
|
697
698
|
|
698
699
|
#ifdef RUBY_INTEGER_UNIFICATION
|
@@ -1920,6 +1921,8 @@ void Init_oj() {
|
|
1920
1921
|
rb_gc_register_address(&cache_keys_sym);
|
1921
1922
|
cache_str_sym = ID2SYM(rb_intern("cache_str"));
|
1922
1923
|
rb_gc_register_address(&cache_str_sym);
|
1924
|
+
cache_string_sym = ID2SYM(rb_intern("cache_string"));
|
1925
|
+
rb_gc_register_address(&cache_string_sym);
|
1923
1926
|
circular_sym = ID2SYM(rb_intern("circular"));
|
1924
1927
|
rb_gc_register_address(&circular_sym);
|
1925
1928
|
class_cache_sym = ID2SYM(rb_intern("class_cache"));
|