oj 3.12.3 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/oj/buf.h +9 -0
  4. data/ext/oj/cache.c +341 -0
  5. data/ext/oj/cache.h +21 -0
  6. data/ext/oj/compat.c +7 -22
  7. data/ext/oj/custom.c +15 -17
  8. data/ext/oj/debug.c +132 -0
  9. data/ext/oj/dump.c +12 -15
  10. data/ext/oj/dump_compat.c +3 -3
  11. data/ext/oj/dump_object.c +9 -9
  12. data/ext/oj/dump_strict.c +3 -3
  13. data/ext/oj/err.h +19 -0
  14. data/ext/oj/extconf.rb +5 -0
  15. data/ext/oj/fast.c +7 -18
  16. data/ext/oj/intern.c +281 -0
  17. data/ext/oj/intern.h +26 -0
  18. data/ext/oj/mimic_json.c +2 -2
  19. data/ext/oj/object.c +15 -92
  20. data/ext/oj/odd.c +1 -1
  21. data/ext/oj/oj.c +117 -94
  22. data/ext/oj/oj.h +1 -1
  23. data/ext/oj/parse.c +5 -5
  24. data/ext/oj/parser.c +1483 -0
  25. data/ext/oj/parser.h +90 -0
  26. data/ext/oj/rails.c +5 -5
  27. data/ext/oj/resolve.c +2 -20
  28. data/ext/oj/rxclass.c +1 -1
  29. data/ext/oj/saj.c +1 -1
  30. data/ext/oj/saj2.c +348 -0
  31. data/ext/oj/scp.c +1 -1
  32. data/ext/oj/sparse.c +2 -2
  33. data/ext/oj/stream_writer.c +4 -4
  34. data/ext/oj/strict.c +9 -27
  35. data/ext/oj/string_writer.c +2 -2
  36. data/ext/oj/usual.c +1252 -0
  37. data/ext/oj/validate.c +51 -0
  38. data/ext/oj/wab.c +14 -19
  39. data/lib/oj/error.rb +1 -1
  40. data/lib/oj/state.rb +8 -7
  41. data/lib/oj/version.rb +1 -1
  42. data/pages/Options.md +1 -1
  43. data/pages/Parser.md +309 -0
  44. data/pages/Rails.md +2 -2
  45. data/test/json_gem/json_generator_test.rb +1 -1
  46. data/test/mem.rb +33 -0
  47. data/test/perf_once.rb +58 -0
  48. data/test/perf_parser.rb +189 -0
  49. data/test/test_hash.rb +1 -1
  50. data/test/test_parser.rb +27 -0
  51. data/test/test_parser_saj.rb +245 -0
  52. data/test/test_parser_usual.rb +213 -0
  53. metadata +26 -5
  54. data/ext/oj/hash.c +0 -168
  55. data/ext/oj/hash.h +0 -21
  56. data/ext/oj/hash_test.c +0 -491
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 125ec67a260b09db65d47add3487d46c55ee805ebaa721d3bf841ef27fe3ade7
4
- data.tar.gz: a1cd38c40217e8c5ffd24f8759ff00f0d6770dcab56a736bd0c4ba52d5f3a03a
3
+ metadata.gz: f5350547ea8224c68aefa0d8f1539689ffb24c9addc20b3cbad3537af06b9f45
4
+ data.tar.gz: d27f8a3fa9311685f215f1636f7ebaf4ed3b10d03725531f8b42c4d1e64b0985
5
5
  SHA512:
6
- metadata.gz: 63496098643062caf40131db78e9ef1ccd75f9fd56f32e49538ccb6e005a42dcc86198d202562edc0b149b7ee649492376584d21554423846ac078efec700899
7
- data.tar.gz: 9d619857d1f9f217d5e7c27c8a999013516680de8e28bc835ee2e267e60eee5dacde1a69e2de694d99d8caf21eb7e5c0c5a51ab9407d03b509423af894913985
6
+ metadata.gz: e3497f0ac6fb20ae4b0aa252855120f2a3e938bbd703dfcebc1d433ea112924b24e3e352080b3f7c09c5cae99d57dde5f26659031126bcb6855d85761e282b1e
7
+ data.tar.gz: c35a5190c600bcd237041cd30cbe227700fc6aed723fb2d54f13013b4af6f79b96b1c5c2dfbcb4dc9c9db30b2894d97c6242a9b0183376da2c2aaa9fb08d4f0a
data/README.md CHANGED
@@ -8,8 +8,7 @@
8
8
 
9
9
  A *fast* JSON parser and Object marshaller as a Ruby gem.
10
10
 
11
- Version 3.0 is out! 3.0 provides better json gem and Rails compatibility. It
12
- also provides additional optimization options.
11
+ Version 3.13 is out with a much faster parser (`Oj::Parser`) and option isolation.
13
12
 
14
13
  ## Using
15
14
 
@@ -54,7 +53,7 @@ For more details on options, modes, advanced features, and more follow these
54
53
  links.
55
54
 
56
55
  - [{file:Options.md}](pages/Options.md) for parse and dump options.
57
- - [{file:Modes.md}](pages/Modes.md) for details on modes for strict JSON compliance, mimicing the JSON gem, and mimicing Rails and ActiveSupport behavior.
56
+ - [{file:Modes.md}](pages/Modes.md) for details on modes for strict JSON compliance, mimicking the JSON gem, and mimicking Rails and ActiveSupport behavior.
58
57
  - [{file:JsonGem.md}](pages/JsonGem.md) includes more details on json gem compatibility and use.
59
58
  - [{file:Rails.md}](pages/Rails.md) includes more details on Rails and ActiveSupport compatibility and use.
60
59
  - [{file:Custom.md}](pages/Custom.md) includes more details on Custom mode.
data/ext/oj/buf.h CHANGED
@@ -19,6 +19,10 @@ inline static void buf_init(Buf buf) {
19
19
  buf->tail = buf->head;
20
20
  }
21
21
 
22
+ inline static void buf_reset(Buf buf) {
23
+ buf->tail = buf->head;
24
+ }
25
+
22
26
  inline static void buf_cleanup(Buf buf) {
23
27
  if (buf->base != buf->head) {
24
28
  xfree(buf->head);
@@ -29,6 +33,11 @@ inline static size_t buf_len(Buf buf) {
29
33
  return buf->tail - buf->head;
30
34
  }
31
35
 
36
+ inline static const char *buf_str(Buf buf) {
37
+ *buf->tail = '\0';
38
+ return buf->head;
39
+ }
40
+
32
41
  inline static void buf_append_string(Buf buf, const char *s, size_t slen) {
33
42
  if (buf->end <= buf->tail + slen) {
34
43
  size_t len = buf->end - buf->head;
data/ext/oj/cache.c ADDED
@@ -0,0 +1,341 @@
1
+ // Copyright (c) 2011, 2021 Peter Ohler. All rights reserved.
2
+ // Licensed under the MIT License. See LICENSE file in the project root for license details.
3
+
4
+ #if HAVE_PTHREAD_MUTEX_INIT
5
+ #include <pthread.h>
6
+ #endif
7
+
8
+ #include "cache.h"
9
+
10
+ #define REHASH_LIMIT 4
11
+ #define MIN_SHIFT 8
12
+ #define REUSE_MAX 8192
13
+
14
+ #if HAVE_PTHREAD_MUTEX_INIT
15
+ #define CACHE_LOCK(c) pthread_mutex_lock(&((c)->mutex))
16
+ #define CACHE_UNLOCK(c) pthread_mutex_unlock(&((c)->mutex))
17
+ #else
18
+ #define CACHE_LOCK(c) rb_mutex_lock((c)->mutex)
19
+ #define CACHE_UNLOCK(c) rb_mutex_unlock((c)->mutex)
20
+ #endif
21
+
22
+ // almost the Murmur hash algorithm
23
+ #define M 0x5bd1e995
24
+
25
+ typedef struct _slot {
26
+ struct _slot *next;
27
+ VALUE val;
28
+ uint64_t hash;
29
+ uint32_t use_cnt;
30
+ uint8_t klen;
31
+ char key[CACHE_MAX_KEY];
32
+ } * Slot;
33
+
34
+ typedef struct _cache {
35
+ Slot * slots;
36
+ size_t cnt;
37
+ VALUE (*form)(const char *str, size_t len);
38
+ uint64_t size;
39
+ uint64_t mask;
40
+ VALUE (*intern)(struct _cache *c, const char *key, size_t len);
41
+ Slot reuse;
42
+ size_t rcnt;
43
+ #if HAVE_PTHREAD_MUTEX_INIT
44
+ pthread_mutex_t mutex;
45
+ #else
46
+ VALUE mutex;
47
+ #endif
48
+ uint8_t xrate;
49
+ bool mark;
50
+ } * Cache;
51
+
52
+ void cache_set_form(Cache c, VALUE (*form)(const char *str, size_t len)) {
53
+ c->form = form;
54
+ }
55
+
56
+ #if 0
57
+ // For debugging only.
58
+ static void cache_print(Cache c) {
59
+ for (uint64_t i = 0; i < c->size; i++) {
60
+ printf("%4d:", i);
61
+ for (Slot s = c->slots[i]; NULL != s; s = s->next) {
62
+ char buf[40];
63
+ strncpy(buf, s->key, s->klen);
64
+ buf[s->klen] = '\0';
65
+ printf(" %s", buf);
66
+ }
67
+ printf("\n");
68
+ }
69
+ }
70
+ #endif
71
+
72
+ static uint64_t hash_calc(const uint8_t *key, size_t len) {
73
+ const uint8_t *end = key + len;
74
+ const uint8_t *endless = key + (len & 0xFFFFFFFC);
75
+ uint64_t h = (uint64_t)len;
76
+ uint64_t k;
77
+
78
+ while (key < endless) {
79
+ k = (uint64_t)*key++;
80
+ k |= (uint64_t)*key++ << 8;
81
+ k |= (uint64_t)*key++ << 16;
82
+ k |= (uint64_t)*key++ << 24;
83
+
84
+ k *= M;
85
+ k ^= k >> 24;
86
+ h *= M;
87
+ h ^= k * M;
88
+ }
89
+ if (1 < end - key) {
90
+ uint16_t k16 = (uint16_t)*key++;
91
+
92
+ k16 |= (uint16_t)*key++ << 8;
93
+ h ^= k16 << 8;
94
+ }
95
+ if (key < end) {
96
+ h ^= *key;
97
+ }
98
+ h *= M;
99
+ h ^= h >> 13;
100
+ h *= M;
101
+ h ^= h >> 15;
102
+
103
+ return h;
104
+ }
105
+
106
+ static void rehash(Cache c) {
107
+ uint64_t osize = c->size;
108
+ Slot * end;
109
+ Slot * sp;
110
+
111
+ c->size = osize * 4;
112
+ c->mask = c->size - 1;
113
+ REALLOC_N(c->slots, Slot, c->size);
114
+ memset(c->slots + osize, 0, sizeof(Slot) * osize * 3);
115
+ end = c->slots + osize;
116
+ for (sp = c->slots; sp < end; sp++) {
117
+ Slot s = *sp;
118
+ Slot next = NULL;
119
+
120
+ *sp = NULL;
121
+ for (; NULL != s; s = next) {
122
+ uint64_t h = s->hash & c->mask;
123
+ Slot * bucket = c->slots + h;
124
+
125
+ next = s->next;
126
+ s->next = *bucket;
127
+ *bucket = s;
128
+ }
129
+ }
130
+ }
131
+
132
+ static VALUE lockless_intern(Cache c, const char *key, size_t len) {
133
+ uint64_t h = hash_calc((const uint8_t *)key, len);
134
+ Slot * bucket = c->slots + (h & c->mask);
135
+ Slot b;
136
+
137
+ while (REUSE_MAX < c->rcnt) {
138
+ if (NULL != (b = c->reuse)) {
139
+ c->reuse = b->next;
140
+ xfree(b);
141
+ c->rcnt--;
142
+ } else {
143
+ // An accounting error occured somewhere so correct it.
144
+ c->rcnt = 0;
145
+ }
146
+ }
147
+ for (b = *bucket; NULL != b; b = b->next) {
148
+ if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
149
+ b->use_cnt += 4;
150
+ return b->val;
151
+ }
152
+ }
153
+ {
154
+ volatile VALUE rkey = c->form(key, len);
155
+
156
+ if (NULL == (b = c->reuse)) {
157
+ b = ALLOC(struct _slot);
158
+ } else {
159
+ c->reuse = b->next;
160
+ c->rcnt--;
161
+ }
162
+ b->hash = h;
163
+ memcpy(b->key, key, len);
164
+ b->klen = (uint8_t)len;
165
+ b->key[len] = '\0';
166
+ b->val = rkey;
167
+ b->use_cnt = 4;
168
+ b->next = *bucket;
169
+ *bucket = b;
170
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
171
+ if (REHASH_LIMIT < c->cnt / c->size) {
172
+ rehash(c);
173
+ }
174
+ }
175
+ return b->val;
176
+ }
177
+
178
+ static VALUE locking_intern(Cache c, const char *key, size_t len) {
179
+ uint64_t h;
180
+ Slot * bucket;
181
+ Slot b;
182
+ uint64_t old_size;
183
+
184
+ CACHE_LOCK(c);
185
+ while (REUSE_MAX < c->rcnt) {
186
+ if (NULL != (b = c->reuse)) {
187
+ c->reuse = b->next;
188
+ xfree(b);
189
+ c->rcnt--;
190
+ } else {
191
+ // An accounting error occured somewhere so correct it.
192
+ c->rcnt = 0;
193
+ }
194
+ }
195
+ h = hash_calc((const uint8_t *)key, len);
196
+ bucket = c->slots + (h & c->mask);
197
+ for (b = *bucket; NULL != b; b = b->next) {
198
+ if ((uint8_t)len == b->klen && 0 == strncmp(b->key, key, len)) {
199
+ b->use_cnt += 4;
200
+ CACHE_UNLOCK(c);
201
+ return b->val;
202
+ }
203
+ }
204
+ old_size = c->size;
205
+ // The creation of a new value may trigger a GC which be a problem if the
206
+ // cache is locked so make sure it is unlocked for the key value creation.
207
+ if (NULL == (b = c->reuse)) {
208
+ b = ALLOC(struct _slot);
209
+ } else {
210
+ c->reuse = b->next;
211
+ c->rcnt--;
212
+ }
213
+ CACHE_UNLOCK(c);
214
+ {
215
+ volatile VALUE rkey = c->form(key, len);
216
+
217
+ b->hash = h;
218
+ memcpy(b->key, key, len);
219
+ b->klen = (uint8_t)len;
220
+ b->key[len] = '\0';
221
+ b->val = rkey;
222
+ b->use_cnt = 4;
223
+
224
+ // Lock again to add the new entry.
225
+ CACHE_LOCK(c);
226
+ if (old_size != c->size) {
227
+ h = hash_calc((const uint8_t *)key, len);
228
+ bucket = c->slots + (h & c->mask);
229
+ }
230
+ b->next = *bucket;
231
+ *bucket = b;
232
+ c->cnt++; // Don't worry about wrapping. Worse case is the entry is removed and recreated.
233
+ if (REHASH_LIMIT < c->cnt / c->size) {
234
+ rehash(c);
235
+ }
236
+ CACHE_UNLOCK(c);
237
+ }
238
+ return b->val;
239
+ }
240
+
241
+ Cache cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking) {
242
+ Cache c = ALLOC(struct _cache);
243
+ int shift = 0;
244
+
245
+ for (; REHASH_LIMIT < size; size /= 2, shift++) {
246
+ }
247
+ if (shift < MIN_SHIFT) {
248
+ shift = MIN_SHIFT;
249
+ }
250
+ #if HAVE_PTHREAD_MUTEX_INIT
251
+ pthread_mutex_init(&c->mutex, NULL);
252
+ #else
253
+ c->mutex = rb_mutex_new();
254
+ #endif
255
+ c->size = 1 << shift;
256
+ c->mask = c->size - 1;
257
+ c->slots = ALLOC_N(Slot, c->size);
258
+ memset(c->slots, 0, sizeof(Slot) * c->size);
259
+ c->form = form;
260
+ c->cnt = 0;
261
+ c->xrate = 1; // low
262
+ c->mark = mark;
263
+ c->reuse = NULL;
264
+ c->rcnt = 0;
265
+ if (locking) {
266
+ c->intern = locking_intern;
267
+ } else {
268
+ c->intern = lockless_intern;
269
+ }
270
+ return c;
271
+ }
272
+
273
+ void cache_set_expunge_rate(Cache c, int rate) {
274
+ c->xrate = (uint8_t)rate;
275
+ }
276
+
277
+ void cache_free(Cache c) {
278
+ uint64_t i;
279
+
280
+ for (i = 0; i < c->size; i++) {
281
+ Slot next;
282
+ Slot s;
283
+
284
+ for (s = c->slots[i]; NULL != s; s = next) {
285
+ next = s->next;
286
+ xfree(s);
287
+ }
288
+ }
289
+ xfree(c->slots);
290
+ xfree(c);
291
+ }
292
+
293
+ void cache_mark(Cache c) {
294
+ uint64_t i;
295
+
296
+ #if !HAVE_PTHREAD_MUTEX_INIT
297
+ rb_gc_mark(c->mutex);
298
+ #endif
299
+ if (0 == c->cnt) {
300
+ return;
301
+ }
302
+ for (i = 0; i < c->size; i++) {
303
+ Slot s;
304
+ Slot prev = NULL;
305
+ Slot next;
306
+
307
+ for (s = c->slots[i]; NULL != s; s = next) {
308
+ next = s->next;
309
+ if (0 == s->use_cnt) {
310
+ if (NULL == prev) {
311
+ c->slots[i] = next;
312
+ } else {
313
+ prev->next = next;
314
+ }
315
+ c->cnt--;
316
+ s->next = c->reuse;
317
+ c->reuse = s;
318
+ c->rcnt++;
319
+ continue;
320
+ }
321
+ switch (c->xrate) {
322
+ case 0: break;
323
+ case 2: s->use_cnt -= 2; break;
324
+ case 3: s->use_cnt /= 2; break;
325
+ default: s->use_cnt--; break;
326
+ }
327
+ if (c->mark) {
328
+ rb_gc_mark(s->val);
329
+ }
330
+ prev = s;
331
+ }
332
+ }
333
+ }
334
+
335
+ VALUE
336
+ cache_intern(Cache c, const char *key, size_t len) {
337
+ if (CACHE_MAX_KEY < len) {
338
+ return c->form(key, len);
339
+ }
340
+ return c->intern(c, key, len);
341
+ }
data/ext/oj/cache.h ADDED
@@ -0,0 +1,21 @@
1
+ // Copyright (c) 2021 Peter Ohler. All rights reserved.
2
+ // Licensed under the MIT License. See LICENSE file in the project root for license details.
3
+
4
+ #ifndef CACHE_H
5
+ #define CACHE_H
6
+
7
+ #include <ruby.h>
8
+ #include <stdbool.h>
9
+
10
+ #define CACHE_MAX_KEY 35
11
+
12
+ struct _cache;
13
+
14
+ extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking);
15
+ extern void cache_free(struct _cache *c);
16
+ extern void cache_mark(struct _cache *c);
17
+ extern void cache_set_form(struct _cache *c, VALUE (*form)(const char *str, size_t len));
18
+ extern VALUE cache_intern(struct _cache *c, const char *key, size_t len);
19
+ extern void cache_set_expunge_rate(struct _cache *c, int rate);
20
+
21
+ #endif /* CACHE_H */
data/ext/oj/compat.c CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  #include "encode.h"
7
7
  #include "err.h"
8
- #include "hash.h"
8
+ #include "intern.h"
9
9
  #include "oj.h"
10
10
  #include "parse.h"
11
11
  #include "resolve.h"
@@ -27,30 +27,15 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
27
27
 
28
28
  if (Qundef == rkey) {
29
29
  if (Yes != pi->options.cache_keys) {
30
- rkey = rb_str_new(key, klen);
31
- rkey = oj_encode(rkey);
32
30
  if (Yes == pi->options.sym_key) {
33
- rkey = rb_str_intern(rkey);
34
- }
35
- } else {
36
- VALUE *slot;
37
-
38
- if (Yes == pi->options.sym_key) {
39
- if (Qnil == (rkey = oj_sym_hash_get(key, klen, &slot))) {
40
- rkey = rb_str_new(key, klen);
41
- rkey = oj_encode(rkey);
42
- rkey = rb_str_intern(rkey);
43
- *slot = rkey;
44
- rb_gc_register_address(slot);
45
- }
31
+ rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
46
32
  } else {
47
- if (Qnil == (rkey = oj_str_hash_get(key, klen, &slot))) {
48
- rkey = rb_str_new(key, klen);
49
- rkey = oj_encode(rkey);
50
- *slot = rkey;
51
- rb_gc_register_address(slot);
52
- }
33
+ rkey = rb_utf8_str_new(key, klen);
53
34
  }
35
+ } else if (Yes == pi->options.sym_key) {
36
+ rkey = oj_sym_intern(key, klen);
37
+ } else {
38
+ rkey = oj_str_intern(key, klen);
54
39
  }
55
40
  }
56
41
  if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {