oj 3.13.2 → 3.13.3

Sign up to get free protection for your applications and to get access to all the features.
data/ext/oj/intern.c CHANGED
@@ -8,11 +8,16 @@
8
8
  #if HAVE_PTHREAD_MUTEX_INIT
9
9
  #include <pthread.h>
10
10
  #endif
11
+ #include "cache.h"
11
12
  #include "parse.h"
12
13
 
13
- #define HASH_SLOT_CNT ((uint32_t)8192)
14
+ // Only used for the class cache so 256 should be sufficient.
15
+ #define HASH_SLOT_CNT ((uint64_t)256)
14
16
  #define HASH_MASK (HASH_SLOT_CNT - 1)
15
17
 
18
+ // almost the Murmur hash algorithm
19
+ #define M 0x5bd1e995
20
+
16
21
  typedef struct _keyVal {
17
22
  struct _keyVal *next;
18
23
  const char * key;
@@ -30,256 +35,128 @@ typedef struct _hash {
30
35
  } * Hash;
31
36
 
32
37
  struct _hash class_hash;
33
- struct _hash str_hash;
34
- struct _hash sym_hash;
35
38
  struct _hash attr_hash;
36
39
 
37
- // almost the Murmur hash algorithm
38
- #define M 0x5bd1e995
39
- #define C1 0xCC9E2D51
40
- #define C2 0x1B873593
41
- #define N 0xE6546B64
40
+ static struct _cache *str_cache = NULL;
41
+ static VALUE str_cache_obj;
42
42
 
43
- static uint32_t hash_calc(const uint8_t *key, size_t len) {
44
- const uint8_t *end = key + len;
45
- const uint8_t *endless = key + (len & 0xFFFFFFFC);
46
- uint32_t h = (uint32_t)len;
47
- uint32_t k;
43
+ static struct _cache *sym_cache = NULL;
44
+ static VALUE sym_cache_obj;
48
45
 
49
- while (key < endless) {
50
- k = (uint32_t)*key++;
51
- k |= (uint32_t)*key++ << 8;
52
- k |= (uint32_t)*key++ << 16;
53
- k |= (uint32_t)*key++ << 24;
46
+ static struct _cache *attr_cache = NULL;
47
+ static VALUE attr_cache_obj;
54
48
 
55
- k *= M;
56
- k ^= k >> 24;
57
- h *= M;
58
- h ^= k * M;
59
- }
60
- if (1 < end - key) {
61
- uint16_t k16 = (uint16_t)*key++;
49
+ static VALUE form_str(const char *str, size_t len) {
50
+ return rb_str_freeze(rb_utf8_str_new(str, len));
51
+ }
62
52
 
63
- k16 |= (uint16_t)*key++ << 8;
64
- h ^= k16 << 8;
53
+ static VALUE form_sym(const char *str, size_t len) {
54
+ return rb_str_intern(rb_utf8_str_new(str, len));
55
+ }
56
+
57
+ static VALUE form_attr(const char *str, size_t len) {
58
+ char buf[256];
59
+
60
+ if (sizeof(buf) - 2 <= len) {
61
+ char *b = ALLOC_N(char, len + 2);
62
+ ID id;
63
+
64
+ if ('~' == *str) {
65
+ memcpy(b, str + 1, len - 1);
66
+ b[len - 1] = '\0';
67
+ len -= 2;
68
+ } else {
69
+ *b = '@';
70
+ memcpy(b + 1, str, len);
71
+ b[len + 1] = '\0';
72
+ }
73
+ id = rb_intern3(buf, len + 1, oj_utf8_encoding);
74
+ xfree(b);
75
+ return id;
65
76
  }
66
- if (key < end) {
67
- h ^= *key;
77
+ if ('~' == *str) {
78
+ memcpy(buf, str + 1, len - 1);
79
+ buf[len - 1] = '\0';
80
+ len -= 2;
81
+ } else {
82
+ *buf = '@';
83
+ memcpy(buf + 1, str, len);
84
+ buf[len + 1] = '\0';
68
85
  }
69
- h *= M;
70
- h ^= h >> 13;
71
- h *= M;
72
- h ^= h >> 15;
73
-
74
- return h;
86
+ return (VALUE)rb_intern3(buf, len + 1, oj_utf8_encoding);
75
87
  }
76
88
 
77
89
  void oj_hash_init() {
90
+ VALUE cache_class = rb_define_class_under(Oj, "Cache", rb_cObject);
91
+
92
+ str_cache = cache_create(0, form_str, true, true);
93
+ str_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, str_cache);
94
+ rb_gc_register_address(&str_cache_obj);
95
+
96
+ sym_cache = cache_create(0, form_sym, true, true);
97
+ sym_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, sym_cache);
98
+ rb_gc_register_address(&sym_cache_obj);
99
+
100
+ attr_cache = cache_create(0, form_attr, false, true);
101
+ attr_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, attr_cache);
102
+ rb_gc_register_address(&attr_cache_obj);
103
+
78
104
  memset(class_hash.slots, 0, sizeof(class_hash.slots));
79
- memset(str_hash.slots, 0, sizeof(str_hash.slots));
80
- memset(sym_hash.slots, 0, sizeof(sym_hash.slots));
81
- memset(attr_hash.slots, 0, sizeof(attr_hash.slots));
82
105
  #if HAVE_PTHREAD_MUTEX_INIT
83
106
  pthread_mutex_init(&class_hash.mutex, NULL);
84
- pthread_mutex_init(&str_hash.mutex, NULL);
85
- pthread_mutex_init(&sym_hash.mutex, NULL);
86
- pthread_mutex_init(&attr_hash.mutex, NULL);
87
107
  #else
88
108
  class_hash.mutex = rb_mutex_new();
89
109
  rb_gc_register_address(&class_hash.mutex);
90
- str_hash.mutex = rb_mutex_new();
91
- rb_gc_register_address(&str_hash.mutex);
92
- sym_hash.mutex = rb_mutex_new();
93
- rb_gc_register_address(&sym_hash.mutex);
94
- attr_hash.mutex = rb_mutex_new();
95
- rb_gc_register_address(&attr_hash.mutex);
96
110
  #endif
97
111
  }
98
112
 
99
- void oj_hash_print() {
100
- uint32_t i;
101
- KeyVal b;
102
-
103
- for (i = 0; i < HASH_SLOT_CNT; i++) {
104
- printf("%4d:", i);
105
- for (b = class_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
106
- printf(" %s", b->key);
107
- }
108
- printf("\n");
109
- }
110
- }
111
-
112
- void oj_hash_sizes() {
113
- uint32_t i;
114
- KeyVal b;
115
- int max = 0;
116
- int min = 1000000;
117
-
118
- for (i = 0; i < HASH_SLOT_CNT; i++) {
119
- int cnt = 0;
120
-
121
- for (b = str_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
122
- cnt++;
123
- }
124
- // printf(" %4d\n", cnt);
125
- if (max < cnt) {
126
- max = cnt;
127
- }
128
- if (cnt < min) {
129
- min = cnt;
130
- }
131
- }
132
- printf("min: %d max: %d\n", min, max);
133
- }
134
-
135
113
  VALUE
136
114
  oj_str_intern(const char *key, size_t len) {
137
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
138
- KeyVal bucket = str_hash.slots + h;
139
- KeyVal b;
140
-
141
- #if HAVE_PTHREAD_MUTEX_INIT
142
- pthread_mutex_lock(&str_hash.mutex);
143
- #else
144
- rb_mutex_lock(str_hash.mutex);
145
- #endif
146
- if (NULL != bucket->key) { // not the top slot
147
- for (b = bucket; 0 != b; b = b->next) {
148
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
149
- #if HAVE_PTHREAD_MUTEX_INIT
150
- pthread_mutex_unlock(&str_hash.mutex);
151
- #else
152
- rb_mutex_unlock(str_hash.mutex);
153
- #endif
154
- return b->val;
155
- }
156
- bucket = b;
157
- }
158
- b = ALLOC(struct _keyVal);
159
- b->next = NULL;
160
- bucket->next = b;
161
- bucket = b;
162
- }
163
- bucket->key = oj_strndup(key, len);
164
- bucket->len = len;
165
- bucket->val = rb_utf8_str_new(key, len);
166
- bucket->val = rb_str_freeze(bucket->val);
167
- rb_gc_register_address(&bucket->val);
168
- #if HAVE_PTHREAD_MUTEX_INIT
169
- pthread_mutex_unlock(&str_hash.mutex);
170
- #else
171
- rb_mutex_unlock(str_hash.mutex);
172
- #endif
173
- return bucket->val;
115
+ return cache_intern(str_cache, key, len);
174
116
  }
175
117
 
176
118
  VALUE
177
119
  oj_sym_intern(const char *key, size_t len) {
178
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
179
- KeyVal bucket = sym_hash.slots + h;
180
- KeyVal b;
120
+ return cache_intern(sym_cache, key, len);
121
+ }
181
122
 
182
- #if HAVE_PTHREAD_MUTEX_INIT
183
- pthread_mutex_lock(&sym_hash.mutex);
184
- #else
185
- rb_mutex_lock(sym_hash.mutex);
186
- #endif
187
- if (NULL != bucket->key) { // not the top slot
188
- for (b = bucket; 0 != b; b = b->next) {
189
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
190
- #if HAVE_PTHREAD_MUTEX_INIT
191
- pthread_mutex_unlock(&sym_hash.mutex);
192
- #else
193
- rb_mutex_unlock(sym_hash.mutex);
194
- #endif
195
- return b->val;
196
- }
197
- bucket = b;
198
- }
199
- b = ALLOC(struct _keyVal);
200
- b->next = NULL;
201
- bucket->next = b;
202
- bucket = b;
203
- }
204
- bucket->key = oj_strndup(key, len);
205
- bucket->len = len;
206
- bucket->val = ID2SYM(rb_intern3(key, len, oj_utf8_encoding));
207
- rb_gc_register_address(&bucket->val);
208
- #if HAVE_PTHREAD_MUTEX_INIT
209
- pthread_mutex_unlock(&sym_hash.mutex);
210
- #else
211
- rb_mutex_unlock(sym_hash.mutex);
212
- #endif
213
- return bucket->val;
123
+ ID
124
+ oj_attr_intern(const char *key, size_t len) {
125
+ return cache_intern(attr_cache, key, len);
214
126
  }
215
127
 
216
- static ID form_attr(const char *key, size_t klen) {
217
- char attr[256];
218
- ID var_id;
128
+ static uint64_t hash_calc(const uint8_t *key, size_t len) {
129
+ const uint8_t *end = key + len;
130
+ const uint8_t *endless = key + (len & 0xFFFFFFFC);
131
+ uint64_t h = (uint64_t)len;
132
+ uint64_t k;
219
133
 
220
- if ((int)sizeof(attr) <= klen + 2) {
221
- char *buf = ALLOC_N(char, klen + 2);
134
+ while (key < endless) {
135
+ k = (uint64_t)*key++;
136
+ k |= (uint64_t)*key++ << 8;
137
+ k |= (uint64_t)*key++ << 16;
138
+ k |= (uint64_t)*key++ << 24;
222
139
 
223
- if ('~' == *key) {
224
- memcpy(buf, key + 1, klen - 1);
225
- buf[klen - 1] = '\0';
226
- } else {
227
- *buf = '@';
228
- memcpy(buf + 1, key, klen);
229
- buf[klen + 1] = '\0';
230
- }
231
- var_id = rb_intern(buf);
232
- xfree(buf);
233
- } else {
234
- if ('~' == *key) {
235
- memcpy(attr, key + 1, klen - 1);
236
- attr[klen - 1] = '\0';
237
- } else {
238
- *attr = '@';
239
- memcpy(attr + 1, key, klen);
240
- attr[klen + 1] = '\0';
241
- }
242
- var_id = rb_intern(attr);
140
+ k *= M;
141
+ k ^= k >> 24;
142
+ h *= M;
143
+ h ^= k * M;
243
144
  }
244
- return var_id;
245
- }
246
-
247
- ID oj_attr_intern(const char *key, size_t len) {
248
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
249
- KeyVal bucket = attr_hash.slots + h;
250
- KeyVal b;
145
+ if (1 < end - key) {
146
+ uint16_t k16 = (uint16_t)*key++;
251
147
 
252
- #if HAVE_PTHREAD_MUTEX_INIT
253
- pthread_mutex_lock(&attr_hash.mutex);
254
- #else
255
- rb_mutex_lock(attr_hash.mutex);
256
- #endif
257
- if (NULL != bucket->key) { // not the top slot
258
- for (b = bucket; 0 != b; b = b->next) {
259
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
260
- #if HAVE_PTHREAD_MUTEX_INIT
261
- pthread_mutex_unlock(&attr_hash.mutex);
262
- #else
263
- rb_mutex_unlock(attr_hash.mutex);
264
- #endif
265
- return (ID)b->val;
266
- }
267
- bucket = b;
268
- }
269
- b = ALLOC(struct _keyVal);
270
- b->next = NULL;
271
- bucket->next = b;
272
- bucket = b;
148
+ k16 |= (uint16_t)*key++ << 8;
149
+ h ^= k16 << 8;
273
150
  }
274
- bucket->key = oj_strndup(key, len);
275
- bucket->len = len;
276
- bucket->val = (VALUE)form_attr(key, len);
277
- #if HAVE_PTHREAD_MUTEX_INIT
278
- pthread_mutex_unlock(&attr_hash.mutex);
279
- #else
280
- rb_mutex_unlock(attr_hash.mutex);
281
- #endif
282
- return (ID)bucket->val;
151
+ if (key < end) {
152
+ h ^= *key;
153
+ }
154
+ h *= M;
155
+ h ^= h >> 13;
156
+ h *= M;
157
+ h ^= h >> 15;
158
+
159
+ return h;
283
160
  }
284
161
 
285
162
  static VALUE resolve_classname(VALUE mod, const char *classname, int auto_define) {
@@ -333,7 +210,7 @@ static VALUE resolve_classpath(ParseInfo pi, const char *name, size_t len, int a
333
210
  }
334
211
 
335
212
  VALUE oj_class_intern(const char *key, size_t len, bool safe, ParseInfo pi, int auto_define, VALUE error_class) {
336
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
213
+ uint64_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
337
214
  KeyVal bucket = class_hash.slots + h;
338
215
  KeyVal b;
339
216
 
@@ -396,3 +273,9 @@ char *oj_strndup(const char *s, size_t len) {
396
273
 
397
274
  return d;
398
275
  }
276
+
277
+ void intern_cleanup() {
278
+ cache_free(str_cache);
279
+ cache_free(sym_cache);
280
+ cache_free(attr_cache);
281
+ }
data/ext/oj/intern.h CHANGED
@@ -21,7 +21,6 @@ extern VALUE oj_class_intern(const char * key,
21
21
  int auto_define,
22
22
  VALUE error_class);
23
23
 
24
- extern void oj_hash_print();
25
24
  extern char *oj_strndup(const char *s, size_t len);
26
25
 
27
26
  #endif /* OJ_INTERN_H */
data/ext/oj/mimic_json.c CHANGED
@@ -682,7 +682,7 @@ static VALUE mimic_set_create_id(VALUE self, VALUE id) {
682
682
  */
683
683
  static VALUE mimic_create_id(VALUE self) {
684
684
  if (NULL != oj_default_options.create_id) {
685
- return oj_encode(rb_str_new_cstr(oj_default_options.create_id));
685
+ return rb_utf8_str_new(oj_default_options.create_id, oj_default_options.create_id_len);
686
686
  }
687
687
  return rb_str_new_cstr(oj_json_class);
688
688
  }
@@ -714,7 +714,7 @@ static struct _options mimic_object_to_json_options = {0, // indent
714
714
  false, // sec_prec_set
715
715
  No, // ignore_under
716
716
  Yes, // cache_keys
717
- 3, // cache_str
717
+ 0, // cache_str
718
718
  0, // int_range_min
719
719
  0, // int_range_max
720
720
  oj_json_class, // create_id
data/ext/oj/object.c CHANGED
@@ -30,46 +30,19 @@ inline static long read_long(const char *str, size_t len) {
30
30
 
31
31
  static VALUE calc_hash_key(ParseInfo pi, Val kval, char k1) {
32
32
  volatile VALUE rkey;
33
- #if 0
34
- VALUE *slot;
35
33
 
36
34
  if (':' == k1) {
37
- if (Qnil == (rkey = oj_sym_hash_get(kval->key + 1, kval->klen - 1, &slot))) {
38
- rkey = rb_str_new(kval->key + 1, kval->klen - 1);
39
- rkey = oj_encode(rkey);
40
- rkey = rb_str_intern(rkey);
41
- *slot = rkey;
42
- rb_gc_register_address(slot);
43
- }
44
- } else if (Yes == pi->options.sym_key) {
45
- if (Qnil == (rkey = oj_sym_hash_get(kval->key, kval->klen, &slot))) {
46
- rkey = rb_str_new(kval->key, kval->klen);
47
- rkey = oj_encode(rkey);
48
- rkey = rb_str_intern(rkey);
49
- *slot = rkey;
50
- rb_gc_register_address(slot);
51
- }
52
- } else {
53
- if (Qnil == (rkey = oj_str_hash_get(kval->key, kval->klen, &slot))) {
54
- rkey = rb_str_new(kval->key, kval->klen);
55
- rkey = oj_encode(rkey);
56
- *slot = rkey;
57
- rb_gc_register_address(slot);
58
- }
35
+ return ID2SYM(rb_intern3(kval->key + 1, kval->klen - 1, oj_utf8_encoding));
59
36
  }
60
- #else
61
- if (':' == k1) {
62
- rkey = ID2SYM(rb_intern3(kval->key + 1, kval->klen - 1, oj_utf8_encoding));
63
- } else {
64
- if (Yes == pi->options.sym_key) {
65
- rkey = ID2SYM(rb_intern3(kval->key, kval->klen, oj_utf8_encoding));
66
- } else {
67
- rkey = rb_str_new(kval->key, kval->klen);
68
- rkey = oj_encode(rkey);
69
- }
37
+ if (Yes == pi->options.sym_key) {
38
+ return ID2SYM(rb_intern3(kval->key, kval->klen, oj_utf8_encoding));
70
39
  }
71
- #endif
40
+ #if HAVE_RB_ENC_INTERNED_STR
41
+ rkey = rb_enc_interned_str(kval->key, kval->klen, oj_utf8_encoding);
42
+ #else
43
+ rkey = rb_utf8_str_new(kval->key, kval->klen);
72
44
  OBJ_FREEZE(rkey);
45
+ #endif
73
46
  return rkey;
74
47
  }
75
48
 
@@ -87,8 +60,7 @@ static VALUE str_to_value(ParseInfo pi, const char *str, size_t len, const char
87
60
  }
88
61
  rstr = oj_circ_array_get(pi->circ_array, i);
89
62
  } else {
90
- rstr = rb_str_new(str, len);
91
- rstr = oj_encode(rstr);
63
+ rstr = rb_utf8_str_new(str, len);
92
64
  }
93
65
  return rstr;
94
66
  }
@@ -259,8 +231,7 @@ static int hat_cstr(ParseInfo pi, Val parent, Val kval, const char *str, size_t
259
231
  parent->val = ID2SYM(rb_intern3(str + 1, len - 1, oj_utf8_encoding));
260
232
  break;
261
233
  case 's':
262
- parent->val = rb_str_new(str, len);
263
- parent->val = oj_encode(parent->val);
234
+ parent->val = rb_utf8_str_new(str, len);
264
235
  break;
265
236
  case 'c': // class
266
237
  {