fast-xml 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,384 @@
1
+ #ifndef _XH_RUBY_HASH_H_
2
+ #define _XH_RUBY_HASH_H_
3
+
4
+ #include "xh_config.h"
5
+ #include "xh_core.h"
6
+ #include "ccan/list/list.h"
7
+
8
+ struct RHash {
9
+ struct RBasic basic;
10
+ struct st_table *ntbl; /* possibly 0 */
11
+ int iter_lev;
12
+ const VALUE ifnone;
13
+ };
14
+
15
+ #define RHASH(obj) (R_CAST(RHash)(obj))
16
+
17
+ #ifdef RHASH_ITER_LEV
18
+ #undef RHASH_ITER_LEV
19
+ #undef RHASH_IFNONE
20
+ #undef RHASH_SIZE
21
+ #define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev)
22
+ #define RHASH_IFNONE(h) (RHASH(h)->ifnone)
23
+ #define RHASH_SIZE(h) (RHASH(h)->ntbl ? (st_index_t)RHASH(h)->ntbl->num_entries : 0)
24
+ #endif
25
+
26
+ typedef struct st_table_entry st_table_entry;
27
+
28
+ struct st_table_entry {
29
+ st_index_t hash;
30
+ st_data_t key;
31
+ st_data_t record;
32
+ st_table_entry *next;
33
+ struct list_node olist;
34
+ };
35
+
36
+ typedef struct st_packed_entry {
37
+ st_index_t hash;
38
+ st_data_t key, val;
39
+ } st_packed_entry;
40
+
41
+ #define ST_DEFAULT_MAX_DENSITY 5
42
+ #define ST_DEFAULT_INIT_TABLE_SIZE 16
43
+ #define ST_DEFAULT_PACKED_TABLE_SIZE 18
44
+ #define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
45
+ #define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
46
+
47
+ #define FNV1_32A_INIT 0x811c9dc5
48
+ #define FNV_32_PRIME 0x01000193
49
+
50
+ #define type_numhash st_hashtype_num
51
+ const struct st_hash_type st_hashtype_num = {
52
+ st_numcmp,
53
+ st_numhash,
54
+ };
55
+
56
+ static st_index_t
57
+ strcasehash(st_data_t arg)
58
+ {
59
+ register const char *string = (const char *)arg;
60
+ register st_index_t hval = FNV1_32A_INIT;
61
+
62
+ /*
63
+ * FNV-1a hash each octet in the buffer
64
+ */
65
+ while (*string) {
66
+ unsigned int c = (unsigned char)*string++;
67
+ if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
68
+ hval ^= c;
69
+
70
+ /* multiply by the 32 bit FNV magic prime mod 2^32 */
71
+ hval *= FNV_32_PRIME;
72
+ }
73
+ return hval;
74
+ }
75
+
76
+ static st_index_t
77
+ strhash(st_data_t arg)
78
+ {
79
+ register const char *string = (const char *)arg;
80
+ return st_hash(string, strlen(string), FNV1_32A_INIT);
81
+ }
82
+
83
+ /* extern int strcmp(const char *, const char *); */
84
+ static st_index_t strhash(st_data_t);
85
+ static const struct st_hash_type type_strhash = {
86
+ strcmp,
87
+ strhash,
88
+ };
89
+
90
+ static st_index_t strcasehash(st_data_t);
91
+ static const struct st_hash_type type_strcasehash = {
92
+ st_locale_insensitive_strcasecmp,
93
+ strcasehash,
94
+ };
95
+
96
+ #define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0)
97
+
98
+ #define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
99
+ #define hash_pos(h,n) ((h) & (n - 1))
100
+
101
+ #define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
102
+ ((ptr) != 0 && ((ptr)->hash != (hash_val) || !EQUAL((table), (key), (ptr))))
103
+
104
+ /* preparation for possible allocation improvements */
105
+ #define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
106
+ #define st_free_entry(entry) free(entry)
107
+ #define st_alloc_table() (st_table *)malloc(sizeof(st_table))
108
+ #define st_dealloc_table(table) free(table)
109
+ #define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
110
+ #define st_free_bins(bins, size) free(bins)
111
+ static inline st_table_entry**
112
+ st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
113
+ {
114
+ bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
115
+ MEMZERO(bins, st_table_entry*, newsize);
116
+ return bins;
117
+ }
118
+
119
+ /* Shortcut */
120
+ #define bins as.big.bins
121
+ #define real_entries as.packed.real_entries
122
+
123
+ /* preparation for possible packing improvements */
124
+ #define PACKED_BINS(table) ((table)->as.packed.entries)
125
+ #define PACKED_ENT(table, i) PACKED_BINS(table)[i]
126
+ #define PKEY(table, i) PACKED_ENT((table), (i)).key
127
+ #define PVAL(table, i) PACKED_ENT((table), (i)).val
128
+ #define PHASH(table, i) PACKED_ENT((table), (i)).hash
129
+ #define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
130
+ #define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
131
+ #define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
132
+
133
+ static struct list_head *
134
+ st_head(const st_table *tbl)
135
+ {
136
+ uintptr_t addr = (uintptr_t)&tbl->as.big.private_list_head;
137
+ return (struct list_head *)addr;
138
+ }
139
+
140
+ static st_index_t
141
+ next_pow2(st_index_t x)
142
+ {
143
+ x |= x >> 1;
144
+ x |= x >> 2;
145
+ x |= x >> 4;
146
+ x |= x >> 8;
147
+ x |= x >> 16;
148
+ #if SIZEOF_ST_INDEX_T == 8
149
+ x |= x >> 32;
150
+ #endif
151
+ return x + 1;
152
+ }
153
+
154
+ static st_index_t
155
+ new_size(st_index_t size)
156
+ {
157
+ st_index_t n;
158
+
159
+ if (size && (size & ~(size - 1)) == size) /* already a power-of-two? */
160
+ return size;
161
+
162
+ n = next_pow2(size);
163
+ if (n > size)
164
+ return n;
165
+ #ifndef NOT_RUBY
166
+ rb_raise(rb_eRuntimeError, "st_table too big");
167
+ #endif
168
+ return -1; /* should raise exception */
169
+ }
170
+
171
+ static void
172
+ rehash(register st_table *table)
173
+ {
174
+ register st_table_entry *ptr = 0, **new_bins;
175
+ st_index_t new_num_bins, hash_val;
176
+
177
+ new_num_bins = new_size(table->num_bins+1);
178
+ new_bins = st_realloc_bins(table->bins, new_num_bins, table->num_bins);
179
+ table->num_bins = new_num_bins;
180
+ table->bins = new_bins;
181
+
182
+ list_for_each(st_head(table), ptr, olist) {
183
+ hash_val = hash_pos(ptr->hash, new_num_bins);
184
+ ptr->next = new_bins[hash_val];
185
+ new_bins[hash_val] = ptr;
186
+ }
187
+ }
188
+
189
+ static st_table_entry *
190
+ find_entry(const st_table *table, st_data_t key, st_index_t hash_val,
191
+ st_index_t bin_pos)
192
+ {
193
+ register st_table_entry *ptr = table->bins[bin_pos];
194
+ if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
195
+ while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
196
+ ptr = ptr->next;
197
+ }
198
+ ptr = ptr->next;
199
+ }
200
+ return ptr;
201
+ }
202
+
203
+ static inline st_index_t
204
+ find_packed_index_from(const st_table *table, st_index_t hash_val,
205
+ st_data_t key, st_index_t i)
206
+ {
207
+ while (i < table->real_entries &&
208
+ (PHASH(table, i) != hash_val || !EQUAL(table, key, &PACKED_ENT(table, i)))) {
209
+ i++;
210
+ }
211
+ return i;
212
+ }
213
+
214
+ static inline st_index_t
215
+ find_packed_index(const st_table *table, st_index_t hash_val, st_data_t key)
216
+ {
217
+ return find_packed_index_from(table, hash_val, key, 0);
218
+ }
219
+
220
+ static inline st_table_entry *
221
+ new_entry(st_table * table, st_data_t key, st_data_t value,
222
+ st_index_t hash_val, register st_index_t bin_pos)
223
+ {
224
+ register st_table_entry *entry = st_alloc_entry();
225
+
226
+ entry->next = table->bins[bin_pos];
227
+ table->bins[bin_pos] = entry;
228
+ entry->hash = hash_val;
229
+ entry->key = key;
230
+ entry->record = value;
231
+
232
+ return entry;
233
+ }
234
+
235
+ static inline st_data_t *
236
+ add_direct(st_table *table, st_data_t key, st_data_t value,
237
+ st_index_t hash_val, register st_index_t bin_pos)
238
+ {
239
+ register st_table_entry *entry;
240
+ if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {
241
+ rehash(table);
242
+ bin_pos = hash_pos(hash_val, table->num_bins);
243
+ }
244
+
245
+ entry = new_entry(table, key, value, hash_val, bin_pos);
246
+ list_add_tail(st_head(table), &entry->olist);
247
+ table->num_entries++;
248
+ return &entry->record;
249
+ }
250
+
251
+ static void
252
+ unpack_entries(register st_table *table)
253
+ {
254
+ st_index_t i;
255
+ st_packed_entry packed_bins[MAX_PACKED_HASH];
256
+ register st_table_entry *entry;
257
+ st_table tmp_table = *table;
258
+
259
+ MEMCPY(packed_bins, PACKED_BINS(table), st_packed_entry, MAX_PACKED_HASH);
260
+ table->as.packed.entries = packed_bins;
261
+ tmp_table.entries_packed = 0;
262
+ #if ST_DEFAULT_INIT_TABLE_SIZE == ST_DEFAULT_PACKED_TABLE_SIZE
263
+ MEMZERO(tmp_table.bins, st_table_entry*, tmp_table.num_bins);
264
+ #else
265
+ tmp_table.bins = st_realloc_bins(tmp_table.bins, ST_DEFAULT_INIT_TABLE_SIZE, tmp_table.num_bins);
266
+ tmp_table.num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
267
+ #endif
268
+
269
+ /*
270
+ * order is important here, we need to keep the original table
271
+ * walkable during GC (GC may be triggered by new_entry call)
272
+ */
273
+ i = 0;
274
+ list_head_init(st_head(&tmp_table));
275
+ do {
276
+ st_data_t key = packed_bins[i].key;
277
+ st_data_t val = packed_bins[i].val;
278
+ st_index_t hash = packed_bins[i].hash;
279
+ entry = new_entry(&tmp_table, key, val, hash,
280
+ hash_pos(hash, ST_DEFAULT_INIT_TABLE_SIZE));
281
+ list_add_tail(st_head(&tmp_table), &entry->olist);
282
+ } while (++i < MAX_PACKED_HASH);
283
+ *table = tmp_table;
284
+ list_head_init(st_head(table));
285
+ list_append_list(st_head(table), st_head(&tmp_table));
286
+ }
287
+
288
+ static st_data_t *
289
+ add_packed_direct(st_table *table, st_data_t key, st_data_t value, st_index_t hash_val)
290
+ {
291
+ st_data_t *lval;
292
+
293
+ if (table->real_entries < MAX_PACKED_HASH) {
294
+ st_index_t i = table->real_entries++;
295
+ PKEY_SET(table, i, key);
296
+ PVAL_SET(table, i, value);
297
+ PHASH_SET(table, i, hash_val);
298
+ table->num_entries++;
299
+ lval = &PVAL(table, i);
300
+ }
301
+ else {
302
+ unpack_entries(table);
303
+ lval = add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins));
304
+ }
305
+
306
+ return lval;
307
+ }
308
+
309
+ static st_data_t *
310
+ st_store(register st_table *table, register st_data_t key, st_data_t value, xh_bool_t update)
311
+ {
312
+ st_index_t hash_val;
313
+ register st_index_t bin_pos;
314
+ register st_table_entry *ptr;
315
+ st_data_t *lval;
316
+
317
+ hash_val = do_hash(key, table);
318
+
319
+ if (table->entries_packed) {
320
+ st_index_t i = find_packed_index(table, hash_val, key);
321
+ if (i < table->real_entries) {
322
+ lval = &PVAL(table, i);
323
+ }
324
+ else {
325
+ lval = add_packed_direct(table, key, value, hash_val);
326
+ }
327
+ }
328
+ else {
329
+ ptr = find_entry(table, key, hash_val, bin_pos = hash_pos(hash_val, table->num_bins));
330
+
331
+ if (ptr == 0) {
332
+ lval = add_direct(table, key, value, hash_val, bin_pos);
333
+ }
334
+ else {
335
+ lval = &ptr->record;
336
+ }
337
+ }
338
+
339
+ if (update) *lval = value;
340
+
341
+ return lval;
342
+ }
343
+
344
+ static VALUE *
345
+ hash_store(VALUE hash, const char *key , size_t keylen, VALUE val)
346
+ {
347
+ VALUE key_val = rb_utf8_str_new(key, keylen);
348
+ return st_store(RHASH(hash)->ntbl, (st_data_t) key_val, val, TRUE);
349
+ }
350
+
351
+ static VALUE *
352
+ hash_fetch(VALUE hash, const char *key , size_t keylen, VALUE val)
353
+ {
354
+ VALUE key_val = rb_utf8_str_new(key, keylen);
355
+ VALUE *lval;
356
+ lval = st_store(RHASH(hash)->ntbl, (st_data_t) key_val, val, FALSE);
357
+
358
+ return lval;
359
+ }
360
+
361
+ static VALUE
362
+ hash_new(void)
363
+ {
364
+ VALUE hash = rb_hash_new();
365
+ rb_funcall(hash, xh_id_initialize, 0);
366
+ return hash;
367
+ }
368
+
369
+ static int
370
+ hash_first_value_i(VALUE key, VALUE value, VALUE first)
371
+ {
372
+ *((VALUE *) first) = value;
373
+ return ST_STOP;
374
+ }
375
+
376
+ static VALUE
377
+ hash_first_value(VALUE hash)
378
+ {
379
+ VALUE value = Qnil;
380
+ rb_hash_foreach(hash, hash_first_value_i, (VALUE) &value);
381
+ return value;
382
+ }
383
+
384
+ #endif /* _XH_RUBY_HASH_H_ */