oj 3.13.1 → 3.13.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/oj/cache.h CHANGED
@@ -11,10 +11,11 @@
11
11
 
12
12
  struct _cache;
13
13
 
14
- extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark);
14
+ extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking);
15
15
  extern void cache_free(struct _cache *c);
16
16
  extern void cache_mark(struct _cache *c);
17
17
  extern void cache_set_form(struct _cache *c, VALUE (*form)(const char *str, size_t len));
18
18
  extern VALUE cache_intern(struct _cache *c, const char *key, size_t len);
19
+ extern void cache_set_expunge_rate(struct _cache *c, int rate);
19
20
 
20
21
  #endif /* CACHE_H */
data/ext/oj/compat.c CHANGED
@@ -30,8 +30,7 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
30
30
  if (Yes == pi->options.sym_key) {
31
31
  rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
32
32
  } else {
33
- rkey = rb_str_new(key, klen);
34
- rkey = oj_encode(rkey);
33
+ rkey = rb_utf8_str_new(key, klen);
35
34
  }
36
35
  } else if (Yes == pi->options.sym_key) {
37
36
  rkey = oj_sym_intern(key, klen);
data/ext/oj/custom.c CHANGED
@@ -955,17 +955,15 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
955
955
  }
956
956
  }
957
957
  } else {
958
- //volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
959
- volatile VALUE rstr = rb_str_new(str, len);
958
+ volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
959
+ //volatile VALUE rstr = rb_utf8_str_new(str, len);
960
960
 
961
961
  if (Qundef == rkey) {
962
962
  if (Yes == pi->options.sym_key) {
963
963
  rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
964
964
  } else {
965
- rkey = rb_str_new(key, klen);
966
- rkey = oj_encode(rkey);
965
+ rkey = rb_utf8_str_new(key, klen);
967
966
  }
968
- rstr = oj_encode(rstr);
969
967
  }
970
968
  if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
971
969
  VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);
@@ -1032,7 +1030,7 @@ static void hash_set_num(struct _parseInfo *pi, Val kval, NumInfo ni) {
1032
1030
  }
1033
1031
  if (86400 == ni->exp) { // UTC time
1034
1032
  parent->val = rb_time_nano_new(ni->i, (long)nsec);
1035
- // Since the ruby C routines alway create local time, the
1033
+ // Since the ruby C routines always create local time, the
1036
1034
  // offset and then a conversion to UTC keeps makes the time
1037
1035
  // match the expected value.
1038
1036
  parent->val = rb_funcall2(parent->val, oj_utc_id, 0, 0);
@@ -1090,9 +1088,8 @@ static void array_append_num(ParseInfo pi, NumInfo ni) {
1090
1088
  }
1091
1089
 
1092
1090
  static void array_append_cstr(ParseInfo pi, const char *str, size_t len, const char *orig) {
1093
- volatile VALUE rstr = rb_str_new(str, len);
1091
+ volatile VALUE rstr = rb_utf8_str_new(str, len);
1094
1092
 
1095
- rstr = oj_encode(rstr);
1096
1093
  if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
1097
1094
  VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);
1098
1095
 
data/ext/oj/debug.c CHANGED
@@ -109,8 +109,9 @@ static void mark(struct _ojParser *p) {
109
109
 
110
110
  void oj_set_parser_debug(ojParser p) {
111
111
  Funcs end = p->funcs + 3;
112
+ Funcs f;
112
113
 
113
- for (Funcs f = p->funcs; f < end; f++) {
114
+ for (f = p->funcs; f < end; f++) {
114
115
  f->add_null = add_null;
115
116
  f->add_true = add_true;
116
117
  f->add_false = add_false;
data/ext/oj/extconf.rb CHANGED
@@ -31,6 +31,7 @@ have_func('rb_gc_mark_movable')
31
31
  have_func('stpcpy')
32
32
  have_func('pthread_mutex_init')
33
33
  have_func('rb_enc_associate')
34
+ have_func('rb_enc_interned_str')
34
35
  have_func('rb_ext_ractor_safe', 'ruby.h')
35
36
  # rb_hash_bulk_insert is deep down in a header not included in normal build and that seems to fool have_func.
36
37
  have_func('rb_hash_bulk_insert', 'ruby.h') unless '2' == version[0] && '6' == version[1]
data/ext/oj/fast.c CHANGED
@@ -1472,7 +1472,7 @@ static VALUE doc_move(VALUE self, VALUE str) {
1472
1472
  * to the block on yield is the Doc instance after moving to the child
1473
1473
  * location.
1474
1474
  * @param [String] path if provided it identified the top of the branch to
1475
- * process the chilren of
1475
+ * process the children of
1476
1476
  * @yieldparam [Doc] Doc at the child location
1477
1477
  * @example
1478
1478
  * Oj::Doc.open('[3,[2,1]]') { |doc|
data/ext/oj/intern.c CHANGED
@@ -8,11 +8,16 @@
8
8
  #if HAVE_PTHREAD_MUTEX_INIT
9
9
  #include <pthread.h>
10
10
  #endif
11
+ #include "cache.h"
11
12
  #include "parse.h"
12
13
 
13
- #define HASH_SLOT_CNT ((uint32_t)8192)
14
+ // Only used for the class cache so 256 should be sufficient.
15
+ #define HASH_SLOT_CNT ((uint64_t)256)
14
16
  #define HASH_MASK (HASH_SLOT_CNT - 1)
15
17
 
18
+ // almost the Murmur hash algorithm
19
+ #define M 0x5bd1e995
20
+
16
21
  typedef struct _keyVal {
17
22
  struct _keyVal *next;
18
23
  const char * key;
@@ -30,256 +35,135 @@ typedef struct _hash {
30
35
  } * Hash;
31
36
 
32
37
  struct _hash class_hash;
33
- struct _hash str_hash;
34
- struct _hash sym_hash;
35
38
  struct _hash attr_hash;
36
39
 
37
- // almost the Murmur hash algorithm
38
- #define M 0x5bd1e995
39
- #define C1 0xCC9E2D51
40
- #define C2 0x1B873593
41
- #define N 0xE6546B64
40
+ static struct _cache *str_cache = NULL;
41
+ static VALUE str_cache_obj;
42
42
 
43
- static uint32_t hash_calc(const uint8_t *key, size_t len) {
44
- const uint8_t *end = key + len;
45
- const uint8_t *endless = key + (len & 0xFFFFFFFC);
46
- uint32_t h = (uint32_t)len;
47
- uint32_t k;
43
+ static struct _cache *sym_cache = NULL;
44
+ static VALUE sym_cache_obj;
48
45
 
49
- while (key < endless) {
50
- k = (uint32_t)*key++;
51
- k |= (uint32_t)*key++ << 8;
52
- k |= (uint32_t)*key++ << 16;
53
- k |= (uint32_t)*key++ << 24;
46
+ static struct _cache *attr_cache = NULL;
47
+ static VALUE attr_cache_obj;
54
48
 
55
- k *= M;
56
- k ^= k >> 24;
57
- h *= M;
58
- h ^= k * M;
59
- }
60
- if (1 < end - key) {
61
- uint16_t k16 = (uint16_t)*key++;
49
+ static VALUE form_str(const char *str, size_t len) {
50
+ return rb_str_freeze(rb_utf8_str_new(str, len));
51
+ }
62
52
 
63
- k16 |= (uint16_t)*key++ << 8;
64
- h ^= k16 << 8;
53
+ static VALUE form_sym(const char *str, size_t len) {
54
+ return rb_to_symbol(rb_str_intern(rb_utf8_str_new(str, len)));
55
+ }
56
+
57
+ static VALUE form_attr(const char *str, size_t len) {
58
+ char buf[256];
59
+
60
+ if (sizeof(buf) - 2 <= len) {
61
+ char *b = ALLOC_N(char, len + 2);
62
+ ID id;
63
+
64
+ if ('~' == *str) {
65
+ memcpy(b, str + 1, len - 1);
66
+ b[len - 1] = '\0';
67
+ len -= 2;
68
+ } else {
69
+ *b = '@';
70
+ memcpy(b + 1, str, len);
71
+ b[len + 1] = '\0';
72
+ }
73
+ id = rb_intern3(buf, len + 1, oj_utf8_encoding);
74
+ xfree(b);
75
+ return id;
65
76
  }
66
- if (key < end) {
67
- h ^= *key;
77
+ if ('~' == *str) {
78
+ memcpy(buf, str + 1, len - 1);
79
+ buf[len - 1] = '\0';
80
+ len -= 2;
81
+ } else {
82
+ *buf = '@';
83
+ memcpy(buf + 1, str, len);
84
+ buf[len + 1] = '\0';
68
85
  }
69
- h *= M;
70
- h ^= h >> 13;
71
- h *= M;
72
- h ^= h >> 15;
73
-
74
- return h;
86
+ return (VALUE)rb_intern3(buf, len + 1, oj_utf8_encoding);
75
87
  }
76
88
 
77
89
  void oj_hash_init() {
90
+ VALUE cache_class = rb_define_class_under(Oj, "Cache", rb_cObject);
91
+
92
+ str_cache = cache_create(0, form_str, true, true);
93
+ str_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, str_cache);
94
+ rb_gc_register_address(&str_cache_obj);
95
+
96
+ sym_cache = cache_create(0, form_sym, true, true);
97
+ sym_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, sym_cache);
98
+ rb_gc_register_address(&sym_cache_obj);
99
+
100
+ attr_cache = cache_create(0, form_attr, false, true);
101
+ attr_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, attr_cache);
102
+ rb_gc_register_address(&attr_cache_obj);
103
+
78
104
  memset(class_hash.slots, 0, sizeof(class_hash.slots));
79
- memset(str_hash.slots, 0, sizeof(str_hash.slots));
80
- memset(sym_hash.slots, 0, sizeof(sym_hash.slots));
81
- memset(attr_hash.slots, 0, sizeof(attr_hash.slots));
82
105
  #if HAVE_PTHREAD_MUTEX_INIT
83
106
  pthread_mutex_init(&class_hash.mutex, NULL);
84
- pthread_mutex_init(&str_hash.mutex, NULL);
85
- pthread_mutex_init(&sym_hash.mutex, NULL);
86
- pthread_mutex_init(&attr_hash.mutex, NULL);
87
107
  #else
88
108
  class_hash.mutex = rb_mutex_new();
89
109
  rb_gc_register_address(&class_hash.mutex);
90
- str_hash.mutex = rb_mutex_new();
91
- rb_gc_register_address(&str_hash.mutex);
92
- sym_hash.mutex = rb_mutex_new();
93
- rb_gc_register_address(&sym_hash.mutex);
94
- attr_hash.mutex = rb_mutex_new();
95
- rb_gc_register_address(&attr_hash.mutex);
96
110
  #endif
97
111
  }
98
112
 
99
- void oj_hash_print() {
100
- uint32_t i;
101
- KeyVal b;
102
-
103
- for (i = 0; i < HASH_SLOT_CNT; i++) {
104
- printf("%4d:", i);
105
- for (b = class_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
106
- printf(" %s", b->key);
107
- }
108
- printf("\n");
109
- }
110
- }
111
-
112
- void oj_hash_sizes() {
113
- uint32_t i;
114
- KeyVal b;
115
- int max = 0;
116
- int min = 1000000;
117
-
118
- for (i = 0; i < HASH_SLOT_CNT; i++) {
119
- int cnt = 0;
120
-
121
- for (b = str_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
122
- cnt++;
123
- }
124
- // printf(" %4d\n", cnt);
125
- if (max < cnt) {
126
- max = cnt;
127
- }
128
- if (cnt < min) {
129
- min = cnt;
130
- }
131
- }
132
- printf("min: %d max: %d\n", min, max);
133
- }
134
-
135
113
  VALUE
136
114
  oj_str_intern(const char *key, size_t len) {
137
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
138
- KeyVal bucket = str_hash.slots + h;
139
- KeyVal b;
140
-
141
- #if HAVE_PTHREAD_MUTEX_INIT
142
- pthread_mutex_lock(&str_hash.mutex);
143
- #else
144
- rb_mutex_lock(str_hash.mutex);
145
- #endif
146
- if (NULL != bucket->key) { // not the top slot
147
- for (b = bucket; 0 != b; b = b->next) {
148
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
149
- #if HAVE_PTHREAD_MUTEX_INIT
150
- pthread_mutex_unlock(&str_hash.mutex);
115
+ // For huge cache sizes over half a million the rb_enc_interned_str
116
+ // performs slightly better but at more "normal" size of a several
117
+ // thousands the cache intern performs about 20% better.
118
+ #if HAVE_RB_ENC_INTERNED_STR && 0
119
+ return rb_enc_interned_str(key, len, rb_utf8_encoding());
151
120
  #else
152
- rb_mutex_unlock(str_hash.mutex);
121
+ return cache_intern(str_cache, key, len);
153
122
  #endif
154
- return b->val;
155
- }
156
- bucket = b;
157
- }
158
- b = ALLOC(struct _keyVal);
159
- b->next = NULL;
160
- bucket->next = b;
161
- bucket = b;
162
- }
163
- bucket->key = oj_strndup(key, len);
164
- bucket->len = len;
165
- bucket->val = rb_utf8_str_new(key, len);
166
- bucket->val = rb_str_freeze(bucket->val);
167
- rb_gc_register_address(&bucket->val);
168
- #if HAVE_PTHREAD_MUTEX_INIT
169
- pthread_mutex_unlock(&str_hash.mutex);
170
- #else
171
- rb_mutex_unlock(str_hash.mutex);
172
- #endif
173
- return bucket->val;
174
123
  }
175
124
 
176
125
  VALUE
177
126
  oj_sym_intern(const char *key, size_t len) {
178
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
179
- KeyVal bucket = sym_hash.slots + h;
180
- KeyVal b;
127
+ return cache_intern(sym_cache, key, len);
128
+ }
181
129
 
182
- #if HAVE_PTHREAD_MUTEX_INIT
183
- pthread_mutex_lock(&sym_hash.mutex);
184
- #else
185
- rb_mutex_lock(sym_hash.mutex);
186
- #endif
187
- if (NULL != bucket->key) { // not the top slot
188
- for (b = bucket; 0 != b; b = b->next) {
189
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
190
- #if HAVE_PTHREAD_MUTEX_INIT
191
- pthread_mutex_unlock(&sym_hash.mutex);
192
- #else
193
- rb_mutex_unlock(sym_hash.mutex);
194
- #endif
195
- return b->val;
196
- }
197
- bucket = b;
198
- }
199
- b = ALLOC(struct _keyVal);
200
- b->next = NULL;
201
- bucket->next = b;
202
- bucket = b;
203
- }
204
- bucket->key = oj_strndup(key, len);
205
- bucket->len = len;
206
- bucket->val = ID2SYM(rb_intern3(key, len, oj_utf8_encoding));
207
- rb_gc_register_address(&bucket->val);
208
- #if HAVE_PTHREAD_MUTEX_INIT
209
- pthread_mutex_unlock(&sym_hash.mutex);
210
- #else
211
- rb_mutex_unlock(sym_hash.mutex);
212
- #endif
213
- return bucket->val;
130
+ ID
131
+ oj_attr_intern(const char *key, size_t len) {
132
+ return cache_intern(attr_cache, key, len);
214
133
  }
215
134
 
216
- static ID form_attr(const char *key, size_t klen) {
217
- char attr[256];
218
- ID var_id;
135
+ static uint64_t hash_calc(const uint8_t *key, size_t len) {
136
+ const uint8_t *end = key + len;
137
+ const uint8_t *endless = key + (len & 0xFFFFFFFC);
138
+ uint64_t h = (uint64_t)len;
139
+ uint64_t k;
219
140
 
220
- if ((int)sizeof(attr) <= klen + 2) {
221
- char *buf = ALLOC_N(char, klen + 2);
141
+ while (key < endless) {
142
+ k = (uint64_t)*key++;
143
+ k |= (uint64_t)*key++ << 8;
144
+ k |= (uint64_t)*key++ << 16;
145
+ k |= (uint64_t)*key++ << 24;
222
146
 
223
- if ('~' == *key) {
224
- memcpy(buf, key + 1, klen - 1);
225
- buf[klen - 1] = '\0';
226
- } else {
227
- *buf = '@';
228
- memcpy(buf + 1, key, klen);
229
- buf[klen + 1] = '\0';
230
- }
231
- var_id = rb_intern(buf);
232
- xfree(buf);
233
- } else {
234
- if ('~' == *key) {
235
- memcpy(attr, key + 1, klen - 1);
236
- attr[klen - 1] = '\0';
237
- } else {
238
- *attr = '@';
239
- memcpy(attr + 1, key, klen);
240
- attr[klen + 1] = '\0';
241
- }
242
- var_id = rb_intern(attr);
147
+ k *= M;
148
+ k ^= k >> 24;
149
+ h *= M;
150
+ h ^= k * M;
243
151
  }
244
- return var_id;
245
- }
246
-
247
- ID oj_attr_intern(const char *key, size_t len) {
248
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
249
- KeyVal bucket = attr_hash.slots + h;
250
- KeyVal b;
152
+ if (1 < end - key) {
153
+ uint16_t k16 = (uint16_t)*key++;
251
154
 
252
- #if HAVE_PTHREAD_MUTEX_INIT
253
- pthread_mutex_lock(&attr_hash.mutex);
254
- #else
255
- rb_mutex_lock(attr_hash.mutex);
256
- #endif
257
- if (NULL != bucket->key) { // not the top slot
258
- for (b = bucket; 0 != b; b = b->next) {
259
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
260
- #if HAVE_PTHREAD_MUTEX_INIT
261
- pthread_mutex_unlock(&attr_hash.mutex);
262
- #else
263
- rb_mutex_unlock(attr_hash.mutex);
264
- #endif
265
- return (ID)b->val;
266
- }
267
- bucket = b;
268
- }
269
- b = ALLOC(struct _keyVal);
270
- b->next = NULL;
271
- bucket->next = b;
272
- bucket = b;
155
+ k16 |= (uint16_t)*key++ << 8;
156
+ h ^= k16 << 8;
273
157
  }
274
- bucket->key = oj_strndup(key, len);
275
- bucket->len = len;
276
- bucket->val = (VALUE)form_attr(key, len);
277
- #if HAVE_PTHREAD_MUTEX_INIT
278
- pthread_mutex_unlock(&attr_hash.mutex);
279
- #else
280
- rb_mutex_unlock(attr_hash.mutex);
281
- #endif
282
- return (ID)bucket->val;
158
+ if (key < end) {
159
+ h ^= *key;
160
+ }
161
+ h *= M;
162
+ h ^= h >> 13;
163
+ h *= M;
164
+ h ^= h >> 15;
165
+
166
+ return h;
283
167
  }
284
168
 
285
169
  static VALUE resolve_classname(VALUE mod, const char *classname, int auto_define) {
@@ -333,7 +217,7 @@ static VALUE resolve_classpath(ParseInfo pi, const char *name, size_t len, int a
333
217
  }
334
218
 
335
219
  VALUE oj_class_intern(const char *key, size_t len, bool safe, ParseInfo pi, int auto_define, VALUE error_class) {
336
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
220
+ uint64_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
337
221
  KeyVal bucket = class_hash.slots + h;
338
222
  KeyVal b;
339
223
 
@@ -396,3 +280,9 @@ char *oj_strndup(const char *s, size_t len) {
396
280
 
397
281
  return d;
398
282
  }
283
+
284
+ void intern_cleanup() {
285
+ cache_free(str_cache);
286
+ cache_free(sym_cache);
287
+ cache_free(attr_cache);
288
+ }