oj 3.13.1 → 3.13.5

Sign up to get free protection for your applications and to get access to all the features.
data/ext/oj/cache.h CHANGED
@@ -11,10 +11,11 @@
11
11
 
12
12
  struct _cache;
13
13
 
14
- extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark);
14
+ extern struct _cache *cache_create(size_t size, VALUE (*form)(const char *str, size_t len), bool mark, bool locking);
15
15
  extern void cache_free(struct _cache *c);
16
16
  extern void cache_mark(struct _cache *c);
17
17
  extern void cache_set_form(struct _cache *c, VALUE (*form)(const char *str, size_t len));
18
18
  extern VALUE cache_intern(struct _cache *c, const char *key, size_t len);
19
+ extern void cache_set_expunge_rate(struct _cache *c, int rate);
19
20
 
20
21
  #endif /* CACHE_H */
data/ext/oj/compat.c CHANGED
@@ -30,8 +30,7 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
30
30
  if (Yes == pi->options.sym_key) {
31
31
  rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
32
32
  } else {
33
- rkey = rb_str_new(key, klen);
34
- rkey = oj_encode(rkey);
33
+ rkey = rb_utf8_str_new(key, klen);
35
34
  }
36
35
  } else if (Yes == pi->options.sym_key) {
37
36
  rkey = oj_sym_intern(key, klen);
data/ext/oj/custom.c CHANGED
@@ -955,17 +955,15 @@ static void hash_set_cstr(ParseInfo pi, Val kval, const char *str, size_t len, c
955
955
  }
956
956
  }
957
957
  } else {
958
- //volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
959
- volatile VALUE rstr = rb_str_new(str, len);
958
+ volatile VALUE rstr = oj_cstr_to_value(str, len, (size_t)pi->options.cache_str);
959
+ //volatile VALUE rstr = rb_utf8_str_new(str, len);
960
960
 
961
961
  if (Qundef == rkey) {
962
962
  if (Yes == pi->options.sym_key) {
963
963
  rkey = ID2SYM(rb_intern3(key, klen, oj_utf8_encoding));
964
964
  } else {
965
- rkey = rb_str_new(key, klen);
966
- rkey = oj_encode(rkey);
965
+ rkey = rb_utf8_str_new(key, klen);
967
966
  }
968
- rstr = oj_encode(rstr);
969
967
  }
970
968
  if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
971
969
  VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);
@@ -1032,7 +1030,7 @@ static void hash_set_num(struct _parseInfo *pi, Val kval, NumInfo ni) {
1032
1030
  }
1033
1031
  if (86400 == ni->exp) { // UTC time
1034
1032
  parent->val = rb_time_nano_new(ni->i, (long)nsec);
1035
- // Since the ruby C routines alway create local time, the
1033
+ // Since the ruby C routines always create local time, the
1036
1034
  // offset and then a conversion to UTC keeps makes the time
1037
1035
  // match the expected value.
1038
1036
  parent->val = rb_funcall2(parent->val, oj_utc_id, 0, 0);
@@ -1090,9 +1088,8 @@ static void array_append_num(ParseInfo pi, NumInfo ni) {
1090
1088
  }
1091
1089
 
1092
1090
  static void array_append_cstr(ParseInfo pi, const char *str, size_t len, const char *orig) {
1093
- volatile VALUE rstr = rb_str_new(str, len);
1091
+ volatile VALUE rstr = rb_utf8_str_new(str, len);
1094
1092
 
1095
- rstr = oj_encode(rstr);
1096
1093
  if (Yes == pi->options.create_ok && NULL != pi->options.str_rx.head) {
1097
1094
  VALUE clas = oj_rxclass_match(&pi->options.str_rx, str, (int)len);
1098
1095
 
data/ext/oj/debug.c CHANGED
@@ -109,8 +109,9 @@ static void mark(struct _ojParser *p) {
109
109
 
110
110
  void oj_set_parser_debug(ojParser p) {
111
111
  Funcs end = p->funcs + 3;
112
+ Funcs f;
112
113
 
113
- for (Funcs f = p->funcs; f < end; f++) {
114
+ for (f = p->funcs; f < end; f++) {
114
115
  f->add_null = add_null;
115
116
  f->add_true = add_true;
116
117
  f->add_false = add_false;
data/ext/oj/extconf.rb CHANGED
@@ -31,6 +31,7 @@ have_func('rb_gc_mark_movable')
31
31
  have_func('stpcpy')
32
32
  have_func('pthread_mutex_init')
33
33
  have_func('rb_enc_associate')
34
+ have_func('rb_enc_interned_str')
34
35
  have_func('rb_ext_ractor_safe', 'ruby.h')
35
36
  # rb_hash_bulk_insert is deep down in a header not included in normal build and that seems to fool have_func.
36
37
  have_func('rb_hash_bulk_insert', 'ruby.h') unless '2' == version[0] && '6' == version[1]
data/ext/oj/fast.c CHANGED
@@ -1472,7 +1472,7 @@ static VALUE doc_move(VALUE self, VALUE str) {
1472
1472
  * to the block on yield is the Doc instance after moving to the child
1473
1473
  * location.
1474
1474
  * @param [String] path if provided it identified the top of the branch to
1475
- * process the chilren of
1475
+ * process the children of
1476
1476
  * @yieldparam [Doc] Doc at the child location
1477
1477
  * @example
1478
1478
  * Oj::Doc.open('[3,[2,1]]') { |doc|
data/ext/oj/intern.c CHANGED
@@ -8,11 +8,16 @@
8
8
  #if HAVE_PTHREAD_MUTEX_INIT
9
9
  #include <pthread.h>
10
10
  #endif
11
+ #include "cache.h"
11
12
  #include "parse.h"
12
13
 
13
- #define HASH_SLOT_CNT ((uint32_t)8192)
14
+ // Only used for the class cache so 256 should be sufficient.
15
+ #define HASH_SLOT_CNT ((uint64_t)256)
14
16
  #define HASH_MASK (HASH_SLOT_CNT - 1)
15
17
 
18
+ // almost the Murmur hash algorithm
19
+ #define M 0x5bd1e995
20
+
16
21
  typedef struct _keyVal {
17
22
  struct _keyVal *next;
18
23
  const char * key;
@@ -30,256 +35,135 @@ typedef struct _hash {
30
35
  } * Hash;
31
36
 
32
37
  struct _hash class_hash;
33
- struct _hash str_hash;
34
- struct _hash sym_hash;
35
38
  struct _hash attr_hash;
36
39
 
37
- // almost the Murmur hash algorithm
38
- #define M 0x5bd1e995
39
- #define C1 0xCC9E2D51
40
- #define C2 0x1B873593
41
- #define N 0xE6546B64
40
+ static struct _cache *str_cache = NULL;
41
+ static VALUE str_cache_obj;
42
42
 
43
- static uint32_t hash_calc(const uint8_t *key, size_t len) {
44
- const uint8_t *end = key + len;
45
- const uint8_t *endless = key + (len & 0xFFFFFFFC);
46
- uint32_t h = (uint32_t)len;
47
- uint32_t k;
43
+ static struct _cache *sym_cache = NULL;
44
+ static VALUE sym_cache_obj;
48
45
 
49
- while (key < endless) {
50
- k = (uint32_t)*key++;
51
- k |= (uint32_t)*key++ << 8;
52
- k |= (uint32_t)*key++ << 16;
53
- k |= (uint32_t)*key++ << 24;
46
+ static struct _cache *attr_cache = NULL;
47
+ static VALUE attr_cache_obj;
54
48
 
55
- k *= M;
56
- k ^= k >> 24;
57
- h *= M;
58
- h ^= k * M;
59
- }
60
- if (1 < end - key) {
61
- uint16_t k16 = (uint16_t)*key++;
49
+ static VALUE form_str(const char *str, size_t len) {
50
+ return rb_str_freeze(rb_utf8_str_new(str, len));
51
+ }
62
52
 
63
- k16 |= (uint16_t)*key++ << 8;
64
- h ^= k16 << 8;
53
+ static VALUE form_sym(const char *str, size_t len) {
54
+ return rb_to_symbol(rb_str_intern(rb_utf8_str_new(str, len)));
55
+ }
56
+
57
+ static VALUE form_attr(const char *str, size_t len) {
58
+ char buf[256];
59
+
60
+ if (sizeof(buf) - 2 <= len) {
61
+ char *b = ALLOC_N(char, len + 2);
62
+ ID id;
63
+
64
+ if ('~' == *str) {
65
+ memcpy(b, str + 1, len - 1);
66
+ b[len - 1] = '\0';
67
+ len -= 2;
68
+ } else {
69
+ *b = '@';
70
+ memcpy(b + 1, str, len);
71
+ b[len + 1] = '\0';
72
+ }
73
+ id = rb_intern3(buf, len + 1, oj_utf8_encoding);
74
+ xfree(b);
75
+ return id;
65
76
  }
66
- if (key < end) {
67
- h ^= *key;
77
+ if ('~' == *str) {
78
+ memcpy(buf, str + 1, len - 1);
79
+ buf[len - 1] = '\0';
80
+ len -= 2;
81
+ } else {
82
+ *buf = '@';
83
+ memcpy(buf + 1, str, len);
84
+ buf[len + 1] = '\0';
68
85
  }
69
- h *= M;
70
- h ^= h >> 13;
71
- h *= M;
72
- h ^= h >> 15;
73
-
74
- return h;
86
+ return (VALUE)rb_intern3(buf, len + 1, oj_utf8_encoding);
75
87
  }
76
88
 
77
89
  void oj_hash_init() {
90
+ VALUE cache_class = rb_define_class_under(Oj, "Cache", rb_cObject);
91
+
92
+ str_cache = cache_create(0, form_str, true, true);
93
+ str_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, str_cache);
94
+ rb_gc_register_address(&str_cache_obj);
95
+
96
+ sym_cache = cache_create(0, form_sym, true, true);
97
+ sym_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, sym_cache);
98
+ rb_gc_register_address(&sym_cache_obj);
99
+
100
+ attr_cache = cache_create(0, form_attr, false, true);
101
+ attr_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, attr_cache);
102
+ rb_gc_register_address(&attr_cache_obj);
103
+
78
104
  memset(class_hash.slots, 0, sizeof(class_hash.slots));
79
- memset(str_hash.slots, 0, sizeof(str_hash.slots));
80
- memset(sym_hash.slots, 0, sizeof(sym_hash.slots));
81
- memset(attr_hash.slots, 0, sizeof(attr_hash.slots));
82
105
  #if HAVE_PTHREAD_MUTEX_INIT
83
106
  pthread_mutex_init(&class_hash.mutex, NULL);
84
- pthread_mutex_init(&str_hash.mutex, NULL);
85
- pthread_mutex_init(&sym_hash.mutex, NULL);
86
- pthread_mutex_init(&attr_hash.mutex, NULL);
87
107
  #else
88
108
  class_hash.mutex = rb_mutex_new();
89
109
  rb_gc_register_address(&class_hash.mutex);
90
- str_hash.mutex = rb_mutex_new();
91
- rb_gc_register_address(&str_hash.mutex);
92
- sym_hash.mutex = rb_mutex_new();
93
- rb_gc_register_address(&sym_hash.mutex);
94
- attr_hash.mutex = rb_mutex_new();
95
- rb_gc_register_address(&attr_hash.mutex);
96
110
  #endif
97
111
  }
98
112
 
99
- void oj_hash_print() {
100
- uint32_t i;
101
- KeyVal b;
102
-
103
- for (i = 0; i < HASH_SLOT_CNT; i++) {
104
- printf("%4d:", i);
105
- for (b = class_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
106
- printf(" %s", b->key);
107
- }
108
- printf("\n");
109
- }
110
- }
111
-
112
- void oj_hash_sizes() {
113
- uint32_t i;
114
- KeyVal b;
115
- int max = 0;
116
- int min = 1000000;
117
-
118
- for (i = 0; i < HASH_SLOT_CNT; i++) {
119
- int cnt = 0;
120
-
121
- for (b = str_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
122
- cnt++;
123
- }
124
- // printf(" %4d\n", cnt);
125
- if (max < cnt) {
126
- max = cnt;
127
- }
128
- if (cnt < min) {
129
- min = cnt;
130
- }
131
- }
132
- printf("min: %d max: %d\n", min, max);
133
- }
134
-
135
113
  VALUE
136
114
  oj_str_intern(const char *key, size_t len) {
137
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
138
- KeyVal bucket = str_hash.slots + h;
139
- KeyVal b;
140
-
141
- #if HAVE_PTHREAD_MUTEX_INIT
142
- pthread_mutex_lock(&str_hash.mutex);
143
- #else
144
- rb_mutex_lock(str_hash.mutex);
145
- #endif
146
- if (NULL != bucket->key) { // not the top slot
147
- for (b = bucket; 0 != b; b = b->next) {
148
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
149
- #if HAVE_PTHREAD_MUTEX_INIT
150
- pthread_mutex_unlock(&str_hash.mutex);
115
+ // For huge cache sizes over half a million the rb_enc_interned_str
116
+ // performs slightly better but at more "normal" size of a several
117
+ // thousands the cache intern performs about 20% better.
118
+ #if HAVE_RB_ENC_INTERNED_STR && 0
119
+ return rb_enc_interned_str(key, len, rb_utf8_encoding());
151
120
  #else
152
- rb_mutex_unlock(str_hash.mutex);
121
+ return cache_intern(str_cache, key, len);
153
122
  #endif
154
- return b->val;
155
- }
156
- bucket = b;
157
- }
158
- b = ALLOC(struct _keyVal);
159
- b->next = NULL;
160
- bucket->next = b;
161
- bucket = b;
162
- }
163
- bucket->key = oj_strndup(key, len);
164
- bucket->len = len;
165
- bucket->val = rb_utf8_str_new(key, len);
166
- bucket->val = rb_str_freeze(bucket->val);
167
- rb_gc_register_address(&bucket->val);
168
- #if HAVE_PTHREAD_MUTEX_INIT
169
- pthread_mutex_unlock(&str_hash.mutex);
170
- #else
171
- rb_mutex_unlock(str_hash.mutex);
172
- #endif
173
- return bucket->val;
174
123
  }
175
124
 
176
125
  VALUE
177
126
  oj_sym_intern(const char *key, size_t len) {
178
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
179
- KeyVal bucket = sym_hash.slots + h;
180
- KeyVal b;
127
+ return cache_intern(sym_cache, key, len);
128
+ }
181
129
 
182
- #if HAVE_PTHREAD_MUTEX_INIT
183
- pthread_mutex_lock(&sym_hash.mutex);
184
- #else
185
- rb_mutex_lock(sym_hash.mutex);
186
- #endif
187
- if (NULL != bucket->key) { // not the top slot
188
- for (b = bucket; 0 != b; b = b->next) {
189
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
190
- #if HAVE_PTHREAD_MUTEX_INIT
191
- pthread_mutex_unlock(&sym_hash.mutex);
192
- #else
193
- rb_mutex_unlock(sym_hash.mutex);
194
- #endif
195
- return b->val;
196
- }
197
- bucket = b;
198
- }
199
- b = ALLOC(struct _keyVal);
200
- b->next = NULL;
201
- bucket->next = b;
202
- bucket = b;
203
- }
204
- bucket->key = oj_strndup(key, len);
205
- bucket->len = len;
206
- bucket->val = ID2SYM(rb_intern3(key, len, oj_utf8_encoding));
207
- rb_gc_register_address(&bucket->val);
208
- #if HAVE_PTHREAD_MUTEX_INIT
209
- pthread_mutex_unlock(&sym_hash.mutex);
210
- #else
211
- rb_mutex_unlock(sym_hash.mutex);
212
- #endif
213
- return bucket->val;
130
+ ID
131
+ oj_attr_intern(const char *key, size_t len) {
132
+ return cache_intern(attr_cache, key, len);
214
133
  }
215
134
 
216
- static ID form_attr(const char *key, size_t klen) {
217
- char attr[256];
218
- ID var_id;
135
+ static uint64_t hash_calc(const uint8_t *key, size_t len) {
136
+ const uint8_t *end = key + len;
137
+ const uint8_t *endless = key + (len & 0xFFFFFFFC);
138
+ uint64_t h = (uint64_t)len;
139
+ uint64_t k;
219
140
 
220
- if ((int)sizeof(attr) <= klen + 2) {
221
- char *buf = ALLOC_N(char, klen + 2);
141
+ while (key < endless) {
142
+ k = (uint64_t)*key++;
143
+ k |= (uint64_t)*key++ << 8;
144
+ k |= (uint64_t)*key++ << 16;
145
+ k |= (uint64_t)*key++ << 24;
222
146
 
223
- if ('~' == *key) {
224
- memcpy(buf, key + 1, klen - 1);
225
- buf[klen - 1] = '\0';
226
- } else {
227
- *buf = '@';
228
- memcpy(buf + 1, key, klen);
229
- buf[klen + 1] = '\0';
230
- }
231
- var_id = rb_intern(buf);
232
- xfree(buf);
233
- } else {
234
- if ('~' == *key) {
235
- memcpy(attr, key + 1, klen - 1);
236
- attr[klen - 1] = '\0';
237
- } else {
238
- *attr = '@';
239
- memcpy(attr + 1, key, klen);
240
- attr[klen + 1] = '\0';
241
- }
242
- var_id = rb_intern(attr);
147
+ k *= M;
148
+ k ^= k >> 24;
149
+ h *= M;
150
+ h ^= k * M;
243
151
  }
244
- return var_id;
245
- }
246
-
247
- ID oj_attr_intern(const char *key, size_t len) {
248
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
249
- KeyVal bucket = attr_hash.slots + h;
250
- KeyVal b;
152
+ if (1 < end - key) {
153
+ uint16_t k16 = (uint16_t)*key++;
251
154
 
252
- #if HAVE_PTHREAD_MUTEX_INIT
253
- pthread_mutex_lock(&attr_hash.mutex);
254
- #else
255
- rb_mutex_lock(attr_hash.mutex);
256
- #endif
257
- if (NULL != bucket->key) { // not the top slot
258
- for (b = bucket; 0 != b; b = b->next) {
259
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
260
- #if HAVE_PTHREAD_MUTEX_INIT
261
- pthread_mutex_unlock(&attr_hash.mutex);
262
- #else
263
- rb_mutex_unlock(attr_hash.mutex);
264
- #endif
265
- return (ID)b->val;
266
- }
267
- bucket = b;
268
- }
269
- b = ALLOC(struct _keyVal);
270
- b->next = NULL;
271
- bucket->next = b;
272
- bucket = b;
155
+ k16 |= (uint16_t)*key++ << 8;
156
+ h ^= k16 << 8;
273
157
  }
274
- bucket->key = oj_strndup(key, len);
275
- bucket->len = len;
276
- bucket->val = (VALUE)form_attr(key, len);
277
- #if HAVE_PTHREAD_MUTEX_INIT
278
- pthread_mutex_unlock(&attr_hash.mutex);
279
- #else
280
- rb_mutex_unlock(attr_hash.mutex);
281
- #endif
282
- return (ID)bucket->val;
158
+ if (key < end) {
159
+ h ^= *key;
160
+ }
161
+ h *= M;
162
+ h ^= h >> 13;
163
+ h *= M;
164
+ h ^= h >> 15;
165
+
166
+ return h;
283
167
  }
284
168
 
285
169
  static VALUE resolve_classname(VALUE mod, const char *classname, int auto_define) {
@@ -333,7 +217,7 @@ static VALUE resolve_classpath(ParseInfo pi, const char *name, size_t len, int a
333
217
  }
334
218
 
335
219
  VALUE oj_class_intern(const char *key, size_t len, bool safe, ParseInfo pi, int auto_define, VALUE error_class) {
336
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
220
+ uint64_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
337
221
  KeyVal bucket = class_hash.slots + h;
338
222
  KeyVal b;
339
223
 
@@ -396,3 +280,9 @@ char *oj_strndup(const char *s, size_t len) {
396
280
 
397
281
  return d;
398
282
  }
283
+
284
+ void intern_cleanup() {
285
+ cache_free(str_cache);
286
+ cache_free(sym_cache);
287
+ cache_free(attr_cache);
288
+ }