oj 3.13.2 → 3.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/oj/intern.c CHANGED
@@ -8,11 +8,16 @@
8
8
  #if HAVE_PTHREAD_MUTEX_INIT
9
9
  #include <pthread.h>
10
10
  #endif
11
+ #include "cache.h"
11
12
  #include "parse.h"
12
13
 
13
- #define HASH_SLOT_CNT ((uint32_t)8192)
14
+ // Only used for the class cache so 256 should be sufficient.
15
+ #define HASH_SLOT_CNT ((uint64_t)256)
14
16
  #define HASH_MASK (HASH_SLOT_CNT - 1)
15
17
 
18
+ // almost the Murmur hash algorithm
19
+ #define M 0x5bd1e995
20
+
16
21
  typedef struct _keyVal {
17
22
  struct _keyVal *next;
18
23
  const char * key;
@@ -30,256 +35,128 @@ typedef struct _hash {
30
35
  } * Hash;
31
36
 
32
37
  struct _hash class_hash;
33
- struct _hash str_hash;
34
- struct _hash sym_hash;
35
38
  struct _hash attr_hash;
36
39
 
37
- // almost the Murmur hash algorithm
38
- #define M 0x5bd1e995
39
- #define C1 0xCC9E2D51
40
- #define C2 0x1B873593
41
- #define N 0xE6546B64
40
+ static struct _cache *str_cache = NULL;
41
+ static VALUE str_cache_obj;
42
42
 
43
- static uint32_t hash_calc(const uint8_t *key, size_t len) {
44
- const uint8_t *end = key + len;
45
- const uint8_t *endless = key + (len & 0xFFFFFFFC);
46
- uint32_t h = (uint32_t)len;
47
- uint32_t k;
43
+ static struct _cache *sym_cache = NULL;
44
+ static VALUE sym_cache_obj;
48
45
 
49
- while (key < endless) {
50
- k = (uint32_t)*key++;
51
- k |= (uint32_t)*key++ << 8;
52
- k |= (uint32_t)*key++ << 16;
53
- k |= (uint32_t)*key++ << 24;
46
+ static struct _cache *attr_cache = NULL;
47
+ static VALUE attr_cache_obj;
54
48
 
55
- k *= M;
56
- k ^= k >> 24;
57
- h *= M;
58
- h ^= k * M;
59
- }
60
- if (1 < end - key) {
61
- uint16_t k16 = (uint16_t)*key++;
49
+ static VALUE form_str(const char *str, size_t len) {
50
+ return rb_str_freeze(rb_utf8_str_new(str, len));
51
+ }
62
52
 
63
- k16 |= (uint16_t)*key++ << 8;
64
- h ^= k16 << 8;
53
+ static VALUE form_sym(const char *str, size_t len) {
54
+ return rb_str_intern(rb_utf8_str_new(str, len));
55
+ }
56
+
57
+ static VALUE form_attr(const char *str, size_t len) {
58
+ char buf[256];
59
+
60
+ if (sizeof(buf) - 2 <= len) {
61
+ char *b = ALLOC_N(char, len + 2);
62
+ ID id;
63
+
64
+ if ('~' == *str) {
65
+ memcpy(b, str + 1, len - 1);
66
+ b[len - 1] = '\0';
67
+ len -= 2;
68
+ } else {
69
+ *b = '@';
70
+ memcpy(b + 1, str, len);
71
+ b[len + 1] = '\0';
72
+ }
73
+ id = rb_intern3(buf, len + 1, oj_utf8_encoding);
74
+ xfree(b);
75
+ return id;
65
76
  }
66
- if (key < end) {
67
- h ^= *key;
77
+ if ('~' == *str) {
78
+ memcpy(buf, str + 1, len - 1);
79
+ buf[len - 1] = '\0';
80
+ len -= 2;
81
+ } else {
82
+ *buf = '@';
83
+ memcpy(buf + 1, str, len);
84
+ buf[len + 1] = '\0';
68
85
  }
69
- h *= M;
70
- h ^= h >> 13;
71
- h *= M;
72
- h ^= h >> 15;
73
-
74
- return h;
86
+ return (VALUE)rb_intern3(buf, len + 1, oj_utf8_encoding);
75
87
  }
76
88
 
77
89
  void oj_hash_init() {
90
+ VALUE cache_class = rb_define_class_under(Oj, "Cache", rb_cObject);
91
+
92
+ str_cache = cache_create(0, form_str, true, true);
93
+ str_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, str_cache);
94
+ rb_gc_register_address(&str_cache_obj);
95
+
96
+ sym_cache = cache_create(0, form_sym, true, true);
97
+ sym_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, sym_cache);
98
+ rb_gc_register_address(&sym_cache_obj);
99
+
100
+ attr_cache = cache_create(0, form_attr, false, true);
101
+ attr_cache_obj = Data_Wrap_Struct(cache_class, cache_mark, cache_free, attr_cache);
102
+ rb_gc_register_address(&attr_cache_obj);
103
+
78
104
  memset(class_hash.slots, 0, sizeof(class_hash.slots));
79
- memset(str_hash.slots, 0, sizeof(str_hash.slots));
80
- memset(sym_hash.slots, 0, sizeof(sym_hash.slots));
81
- memset(attr_hash.slots, 0, sizeof(attr_hash.slots));
82
105
  #if HAVE_PTHREAD_MUTEX_INIT
83
106
  pthread_mutex_init(&class_hash.mutex, NULL);
84
- pthread_mutex_init(&str_hash.mutex, NULL);
85
- pthread_mutex_init(&sym_hash.mutex, NULL);
86
- pthread_mutex_init(&attr_hash.mutex, NULL);
87
107
  #else
88
108
  class_hash.mutex = rb_mutex_new();
89
109
  rb_gc_register_address(&class_hash.mutex);
90
- str_hash.mutex = rb_mutex_new();
91
- rb_gc_register_address(&str_hash.mutex);
92
- sym_hash.mutex = rb_mutex_new();
93
- rb_gc_register_address(&sym_hash.mutex);
94
- attr_hash.mutex = rb_mutex_new();
95
- rb_gc_register_address(&attr_hash.mutex);
96
110
  #endif
97
111
  }
98
112
 
99
- void oj_hash_print() {
100
- uint32_t i;
101
- KeyVal b;
102
-
103
- for (i = 0; i < HASH_SLOT_CNT; i++) {
104
- printf("%4d:", i);
105
- for (b = class_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
106
- printf(" %s", b->key);
107
- }
108
- printf("\n");
109
- }
110
- }
111
-
112
- void oj_hash_sizes() {
113
- uint32_t i;
114
- KeyVal b;
115
- int max = 0;
116
- int min = 1000000;
117
-
118
- for (i = 0; i < HASH_SLOT_CNT; i++) {
119
- int cnt = 0;
120
-
121
- for (b = str_hash.slots + i; 0 != b && 0 != b->key; b = b->next) {
122
- cnt++;
123
- }
124
- // printf(" %4d\n", cnt);
125
- if (max < cnt) {
126
- max = cnt;
127
- }
128
- if (cnt < min) {
129
- min = cnt;
130
- }
131
- }
132
- printf("min: %d max: %d\n", min, max);
133
- }
134
-
135
113
  VALUE
136
114
  oj_str_intern(const char *key, size_t len) {
137
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
138
- KeyVal bucket = str_hash.slots + h;
139
- KeyVal b;
140
-
141
- #if HAVE_PTHREAD_MUTEX_INIT
142
- pthread_mutex_lock(&str_hash.mutex);
143
- #else
144
- rb_mutex_lock(str_hash.mutex);
145
- #endif
146
- if (NULL != bucket->key) { // not the top slot
147
- for (b = bucket; 0 != b; b = b->next) {
148
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
149
- #if HAVE_PTHREAD_MUTEX_INIT
150
- pthread_mutex_unlock(&str_hash.mutex);
151
- #else
152
- rb_mutex_unlock(str_hash.mutex);
153
- #endif
154
- return b->val;
155
- }
156
- bucket = b;
157
- }
158
- b = ALLOC(struct _keyVal);
159
- b->next = NULL;
160
- bucket->next = b;
161
- bucket = b;
162
- }
163
- bucket->key = oj_strndup(key, len);
164
- bucket->len = len;
165
- bucket->val = rb_utf8_str_new(key, len);
166
- bucket->val = rb_str_freeze(bucket->val);
167
- rb_gc_register_address(&bucket->val);
168
- #if HAVE_PTHREAD_MUTEX_INIT
169
- pthread_mutex_unlock(&str_hash.mutex);
170
- #else
171
- rb_mutex_unlock(str_hash.mutex);
172
- #endif
173
- return bucket->val;
115
+ return cache_intern(str_cache, key, len);
174
116
  }
175
117
 
176
118
  VALUE
177
119
  oj_sym_intern(const char *key, size_t len) {
178
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
179
- KeyVal bucket = sym_hash.slots + h;
180
- KeyVal b;
120
+ return cache_intern(sym_cache, key, len);
121
+ }
181
122
 
182
- #if HAVE_PTHREAD_MUTEX_INIT
183
- pthread_mutex_lock(&sym_hash.mutex);
184
- #else
185
- rb_mutex_lock(sym_hash.mutex);
186
- #endif
187
- if (NULL != bucket->key) { // not the top slot
188
- for (b = bucket; 0 != b; b = b->next) {
189
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
190
- #if HAVE_PTHREAD_MUTEX_INIT
191
- pthread_mutex_unlock(&sym_hash.mutex);
192
- #else
193
- rb_mutex_unlock(sym_hash.mutex);
194
- #endif
195
- return b->val;
196
- }
197
- bucket = b;
198
- }
199
- b = ALLOC(struct _keyVal);
200
- b->next = NULL;
201
- bucket->next = b;
202
- bucket = b;
203
- }
204
- bucket->key = oj_strndup(key, len);
205
- bucket->len = len;
206
- bucket->val = ID2SYM(rb_intern3(key, len, oj_utf8_encoding));
207
- rb_gc_register_address(&bucket->val);
208
- #if HAVE_PTHREAD_MUTEX_INIT
209
- pthread_mutex_unlock(&sym_hash.mutex);
210
- #else
211
- rb_mutex_unlock(sym_hash.mutex);
212
- #endif
213
- return bucket->val;
123
+ ID
124
+ oj_attr_intern(const char *key, size_t len) {
125
+ return cache_intern(attr_cache, key, len);
214
126
  }
215
127
 
216
- static ID form_attr(const char *key, size_t klen) {
217
- char attr[256];
218
- ID var_id;
128
+ static uint64_t hash_calc(const uint8_t *key, size_t len) {
129
+ const uint8_t *end = key + len;
130
+ const uint8_t *endless = key + (len & 0xFFFFFFFC);
131
+ uint64_t h = (uint64_t)len;
132
+ uint64_t k;
219
133
 
220
- if ((int)sizeof(attr) <= klen + 2) {
221
- char *buf = ALLOC_N(char, klen + 2);
134
+ while (key < endless) {
135
+ k = (uint64_t)*key++;
136
+ k |= (uint64_t)*key++ << 8;
137
+ k |= (uint64_t)*key++ << 16;
138
+ k |= (uint64_t)*key++ << 24;
222
139
 
223
- if ('~' == *key) {
224
- memcpy(buf, key + 1, klen - 1);
225
- buf[klen - 1] = '\0';
226
- } else {
227
- *buf = '@';
228
- memcpy(buf + 1, key, klen);
229
- buf[klen + 1] = '\0';
230
- }
231
- var_id = rb_intern(buf);
232
- xfree(buf);
233
- } else {
234
- if ('~' == *key) {
235
- memcpy(attr, key + 1, klen - 1);
236
- attr[klen - 1] = '\0';
237
- } else {
238
- *attr = '@';
239
- memcpy(attr + 1, key, klen);
240
- attr[klen + 1] = '\0';
241
- }
242
- var_id = rb_intern(attr);
140
+ k *= M;
141
+ k ^= k >> 24;
142
+ h *= M;
143
+ h ^= k * M;
243
144
  }
244
- return var_id;
245
- }
246
-
247
- ID oj_attr_intern(const char *key, size_t len) {
248
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
249
- KeyVal bucket = attr_hash.slots + h;
250
- KeyVal b;
145
+ if (1 < end - key) {
146
+ uint16_t k16 = (uint16_t)*key++;
251
147
 
252
- #if HAVE_PTHREAD_MUTEX_INIT
253
- pthread_mutex_lock(&attr_hash.mutex);
254
- #else
255
- rb_mutex_lock(attr_hash.mutex);
256
- #endif
257
- if (NULL != bucket->key) { // not the top slot
258
- for (b = bucket; 0 != b; b = b->next) {
259
- if (len == b->len && 0 == strncmp(b->key, key, len)) {
260
- #if HAVE_PTHREAD_MUTEX_INIT
261
- pthread_mutex_unlock(&attr_hash.mutex);
262
- #else
263
- rb_mutex_unlock(attr_hash.mutex);
264
- #endif
265
- return (ID)b->val;
266
- }
267
- bucket = b;
268
- }
269
- b = ALLOC(struct _keyVal);
270
- b->next = NULL;
271
- bucket->next = b;
272
- bucket = b;
148
+ k16 |= (uint16_t)*key++ << 8;
149
+ h ^= k16 << 8;
273
150
  }
274
- bucket->key = oj_strndup(key, len);
275
- bucket->len = len;
276
- bucket->val = (VALUE)form_attr(key, len);
277
- #if HAVE_PTHREAD_MUTEX_INIT
278
- pthread_mutex_unlock(&attr_hash.mutex);
279
- #else
280
- rb_mutex_unlock(attr_hash.mutex);
281
- #endif
282
- return (ID)bucket->val;
151
+ if (key < end) {
152
+ h ^= *key;
153
+ }
154
+ h *= M;
155
+ h ^= h >> 13;
156
+ h *= M;
157
+ h ^= h >> 15;
158
+
159
+ return h;
283
160
  }
284
161
 
285
162
  static VALUE resolve_classname(VALUE mod, const char *classname, int auto_define) {
@@ -333,7 +210,7 @@ static VALUE resolve_classpath(ParseInfo pi, const char *name, size_t len, int a
333
210
  }
334
211
 
335
212
  VALUE oj_class_intern(const char *key, size_t len, bool safe, ParseInfo pi, int auto_define, VALUE error_class) {
336
- uint32_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
213
+ uint64_t h = hash_calc((const uint8_t *)key, len) & HASH_MASK;
337
214
  KeyVal bucket = class_hash.slots + h;
338
215
  KeyVal b;
339
216
 
@@ -396,3 +273,9 @@ char *oj_strndup(const char *s, size_t len) {
396
273
 
397
274
  return d;
398
275
  }
276
+
277
+ void intern_cleanup() {
278
+ cache_free(str_cache);
279
+ cache_free(sym_cache);
280
+ cache_free(attr_cache);
281
+ }
data/ext/oj/intern.h CHANGED
@@ -21,7 +21,6 @@ extern VALUE oj_class_intern(const char * key,
21
21
  int auto_define,
22
22
  VALUE error_class);
23
23
 
24
- extern void oj_hash_print();
25
24
  extern char *oj_strndup(const char *s, size_t len);
26
25
 
27
26
  #endif /* OJ_INTERN_H */
data/ext/oj/mimic_json.c CHANGED
@@ -682,7 +682,7 @@ static VALUE mimic_set_create_id(VALUE self, VALUE id) {
682
682
  */
683
683
  static VALUE mimic_create_id(VALUE self) {
684
684
  if (NULL != oj_default_options.create_id) {
685
- return oj_encode(rb_str_new_cstr(oj_default_options.create_id));
685
+ return rb_utf8_str_new(oj_default_options.create_id, oj_default_options.create_id_len);
686
686
  }
687
687
  return rb_str_new_cstr(oj_json_class);
688
688
  }
@@ -714,7 +714,7 @@ static struct _options mimic_object_to_json_options = {0, // indent
714
714
  false, // sec_prec_set
715
715
  No, // ignore_under
716
716
  Yes, // cache_keys
717
- 3, // cache_str
717
+ 0, // cache_str
718
718
  0, // int_range_min
719
719
  0, // int_range_max
720
720
  oj_json_class, // create_id
data/ext/oj/object.c CHANGED
@@ -30,46 +30,19 @@ inline static long read_long(const char *str, size_t len) {
30
30
 
31
31
  static VALUE calc_hash_key(ParseInfo pi, Val kval, char k1) {
32
32
  volatile VALUE rkey;
33
- #if 0
34
- VALUE *slot;
35
33
 
36
34
  if (':' == k1) {
37
- if (Qnil == (rkey = oj_sym_hash_get(kval->key + 1, kval->klen - 1, &slot))) {
38
- rkey = rb_str_new(kval->key + 1, kval->klen - 1);
39
- rkey = oj_encode(rkey);
40
- rkey = rb_str_intern(rkey);
41
- *slot = rkey;
42
- rb_gc_register_address(slot);
43
- }
44
- } else if (Yes == pi->options.sym_key) {
45
- if (Qnil == (rkey = oj_sym_hash_get(kval->key, kval->klen, &slot))) {
46
- rkey = rb_str_new(kval->key, kval->klen);
47
- rkey = oj_encode(rkey);
48
- rkey = rb_str_intern(rkey);
49
- *slot = rkey;
50
- rb_gc_register_address(slot);
51
- }
52
- } else {
53
- if (Qnil == (rkey = oj_str_hash_get(kval->key, kval->klen, &slot))) {
54
- rkey = rb_str_new(kval->key, kval->klen);
55
- rkey = oj_encode(rkey);
56
- *slot = rkey;
57
- rb_gc_register_address(slot);
58
- }
35
+ return ID2SYM(rb_intern3(kval->key + 1, kval->klen - 1, oj_utf8_encoding));
59
36
  }
60
- #else
61
- if (':' == k1) {
62
- rkey = ID2SYM(rb_intern3(kval->key + 1, kval->klen - 1, oj_utf8_encoding));
63
- } else {
64
- if (Yes == pi->options.sym_key) {
65
- rkey = ID2SYM(rb_intern3(kval->key, kval->klen, oj_utf8_encoding));
66
- } else {
67
- rkey = rb_str_new(kval->key, kval->klen);
68
- rkey = oj_encode(rkey);
69
- }
37
+ if (Yes == pi->options.sym_key) {
38
+ return ID2SYM(rb_intern3(kval->key, kval->klen, oj_utf8_encoding));
70
39
  }
71
- #endif
40
+ #if HAVE_RB_ENC_INTERNED_STR
41
+ rkey = rb_enc_interned_str(kval->key, kval->klen, oj_utf8_encoding);
42
+ #else
43
+ rkey = rb_utf8_str_new(kval->key, kval->klen);
72
44
  OBJ_FREEZE(rkey);
45
+ #endif
73
46
  return rkey;
74
47
  }
75
48
 
@@ -87,8 +60,7 @@ static VALUE str_to_value(ParseInfo pi, const char *str, size_t len, const char
87
60
  }
88
61
  rstr = oj_circ_array_get(pi->circ_array, i);
89
62
  } else {
90
- rstr = rb_str_new(str, len);
91
- rstr = oj_encode(rstr);
63
+ rstr = rb_utf8_str_new(str, len);
92
64
  }
93
65
  return rstr;
94
66
  }
@@ -259,8 +231,7 @@ static int hat_cstr(ParseInfo pi, Val parent, Val kval, const char *str, size_t
259
231
  parent->val = ID2SYM(rb_intern3(str + 1, len - 1, oj_utf8_encoding));
260
232
  break;
261
233
  case 's':
262
- parent->val = rb_str_new(str, len);
263
- parent->val = oj_encode(parent->val);
234
+ parent->val = rb_utf8_str_new(str, len);
264
235
  break;
265
236
  case 'c': // class
266
237
  {