fast-xml 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,25 @@
1
+ #ifndef _XH_RUBY_INTERNAL_H_
2
+ #define _XH_RUBY_INTERNAL_H_
3
+
4
+ #include "xh_config.h"
5
+ #include "xh_core.h"
6
+
7
+ struct RHash {
8
+ struct RBasic basic;
9
+ struct st_table *ntbl; /* possibly 0 */
10
+ int iter_lev;
11
+ const VALUE ifnone;
12
+ };
13
+
14
+ #define RHASH(obj) (R_CAST(RHash)(obj))
15
+
16
+ #ifdef RHASH_ITER_LEV
17
+ #undef RHASH_ITER_LEV
18
+ #undef RHASH_IFNONE
19
+ #undef RHASH_SIZE
20
+ #define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev)
21
+ #define RHASH_IFNONE(h) (RHASH(h)->ifnone)
22
+ #define RHASH_SIZE(h) (RHASH(h)->ntbl ? (st_index_t)RHASH(h)->ntbl->num_entries : 0)
23
+ #endif
24
+
25
+ #endif /* _XH_RUBY_INTERNAL_H_ */
@@ -0,0 +1,783 @@
1
+ #ifndef _XH_RUBY_ST_H_
2
+ #define _XH_RUBY_ST_H_
3
+
4
+ #include "xh_config.h"
5
+ #include "xh_core.h"
6
+ #include "xh_ruby_internal.h"
7
+
8
+ #ifdef __GNUC__
9
+ #define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
10
+ #define EXPECT(expr, val) __builtin_expect(expr, val)
11
+ #define ATTRIBUTE_UNUSED __attribute__((unused))
12
+ #else
13
+ #define PREFETCH(addr, write_p)
14
+ #define EXPECT(expr, val) (expr)
15
+ #define ATTRIBUTE_UNUSED
16
+ #endif
17
+
18
+ #ifdef ST_DEBUG
19
+ #define st_assert(cond) assert(cond)
20
+ #else
21
+ #define st_assert(cond) ((void)(0 && (cond)))
22
+ #endif
23
+
24
+ /* The type of hashes. */
25
+ typedef st_index_t st_hash_t;
26
+
27
+ struct st_table_entry {
28
+ st_hash_t hash;
29
+ st_data_t key;
30
+ st_data_t record;
31
+ };
32
+
33
+ #define type_numhash st_hashtype_num
34
+ const struct st_hash_type st_hashtype_num = {
35
+ st_numcmp,
36
+ st_numhash,
37
+ };
38
+
39
+ /* extern int strcmp(const char *, const char *); */
40
+ static st_index_t strhash(st_data_t);
41
+ static const struct st_hash_type type_strhash = {
42
+ strcmp,
43
+ strhash,
44
+ };
45
+
46
+ static st_index_t strcasehash(st_data_t);
47
+ static const struct st_hash_type type_strcasehash = {
48
+ st_locale_insensitive_strcasecmp,
49
+ strcasehash,
50
+ };
51
+
52
+ /* Value used to catch uninitialized entries/bins during debugging.
53
+ There is a possibility for a false alarm, but its probability is
54
+ extremely small. */
55
+ #define ST_INIT_VAL 0xafafafafafafafaf
56
+ #define ST_INIT_VAL_BYTE 0xafa
57
+
58
+ #ifdef RUBY
59
+ #undef malloc
60
+ #undef realloc
61
+ #undef calloc
62
+ #undef free
63
+ #define malloc ruby_xmalloc
64
+ #define calloc ruby_xcalloc
65
+ #define realloc ruby_xrealloc
66
+ #define free ruby_xfree
67
+ #endif
68
+
69
+ #define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
70
+ #define PTR_EQUAL(tab, ptr, hash_val, key) \
71
+ ((ptr)->hash == (hash_val) && EQUAL((tab), (key), (ptr)->key))
72
+
73
+ /* Features of a table. */
74
+ struct st_features {
75
+ /* Power of 2 used for number of allocated entries. */
76
+ unsigned char entry_power;
77
+ /* Power of 2 used for number of allocated bins. Depending on the
78
+ table size, the number of bins is 2-4 times more than the
79
+ number of entries. */
80
+ unsigned char bin_power;
81
+ /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
82
+ unsigned char size_ind;
83
+ /* Bins are packed in words of type st_index_t. The following is
84
+ a size of bins counted by words. */
85
+ st_index_t bins_words;
86
+ };
87
+
88
+ /* Features of all possible size tables. */
89
+ #if SIZEOF_ST_INDEX_T == 8
90
+ #define MAX_POWER2 62
91
+ static const struct st_features features[] = {
92
+ {0, 1, 0, 0x0},
93
+ {1, 2, 0, 0x1},
94
+ {2, 3, 0, 0x1},
95
+ {3, 4, 0, 0x2},
96
+ {4, 5, 0, 0x4},
97
+ {5, 6, 0, 0x8},
98
+ {6, 7, 0, 0x10},
99
+ {7, 8, 0, 0x20},
100
+ {8, 9, 1, 0x80},
101
+ {9, 10, 1, 0x100},
102
+ {10, 11, 1, 0x200},
103
+ {11, 12, 1, 0x400},
104
+ {12, 13, 1, 0x800},
105
+ {13, 14, 1, 0x1000},
106
+ {14, 15, 1, 0x2000},
107
+ {15, 16, 1, 0x4000},
108
+ {16, 17, 2, 0x10000},
109
+ {17, 18, 2, 0x20000},
110
+ {18, 19, 2, 0x40000},
111
+ {19, 20, 2, 0x80000},
112
+ {20, 21, 2, 0x100000},
113
+ {21, 22, 2, 0x200000},
114
+ {22, 23, 2, 0x400000},
115
+ {23, 24, 2, 0x800000},
116
+ {24, 25, 2, 0x1000000},
117
+ {25, 26, 2, 0x2000000},
118
+ {26, 27, 2, 0x4000000},
119
+ {27, 28, 2, 0x8000000},
120
+ {28, 29, 2, 0x10000000},
121
+ {29, 30, 2, 0x20000000},
122
+ {30, 31, 2, 0x40000000},
123
+ {31, 32, 2, 0x80000000},
124
+ {32, 33, 3, 0x200000000},
125
+ {33, 34, 3, 0x400000000},
126
+ {34, 35, 3, 0x800000000},
127
+ {35, 36, 3, 0x1000000000},
128
+ {36, 37, 3, 0x2000000000},
129
+ {37, 38, 3, 0x4000000000},
130
+ {38, 39, 3, 0x8000000000},
131
+ {39, 40, 3, 0x10000000000},
132
+ {40, 41, 3, 0x20000000000},
133
+ {41, 42, 3, 0x40000000000},
134
+ {42, 43, 3, 0x80000000000},
135
+ {43, 44, 3, 0x100000000000},
136
+ {44, 45, 3, 0x200000000000},
137
+ {45, 46, 3, 0x400000000000},
138
+ {46, 47, 3, 0x800000000000},
139
+ {47, 48, 3, 0x1000000000000},
140
+ {48, 49, 3, 0x2000000000000},
141
+ {49, 50, 3, 0x4000000000000},
142
+ {50, 51, 3, 0x8000000000000},
143
+ {51, 52, 3, 0x10000000000000},
144
+ {52, 53, 3, 0x20000000000000},
145
+ {53, 54, 3, 0x40000000000000},
146
+ {54, 55, 3, 0x80000000000000},
147
+ {55, 56, 3, 0x100000000000000},
148
+ {56, 57, 3, 0x200000000000000},
149
+ {57, 58, 3, 0x400000000000000},
150
+ {58, 59, 3, 0x800000000000000},
151
+ {59, 60, 3, 0x1000000000000000},
152
+ {60, 61, 3, 0x2000000000000000},
153
+ {61, 62, 3, 0x4000000000000000},
154
+ {62, 63, 3, 0x8000000000000000},
155
+ };
156
+
157
+ #else
158
+ #define MAX_POWER2 30
159
+
160
+ static const struct st_features features[] = {
161
+ {0, 1, 0, 0x1},
162
+ {1, 2, 0, 0x1},
163
+ {2, 3, 0, 0x2},
164
+ {3, 4, 0, 0x4},
165
+ {4, 5, 0, 0x8},
166
+ {5, 6, 0, 0x10},
167
+ {6, 7, 0, 0x20},
168
+ {7, 8, 0, 0x40},
169
+ {8, 9, 1, 0x100},
170
+ {9, 10, 1, 0x200},
171
+ {10, 11, 1, 0x400},
172
+ {11, 12, 1, 0x800},
173
+ {12, 13, 1, 0x1000},
174
+ {13, 14, 1, 0x2000},
175
+ {14, 15, 1, 0x4000},
176
+ {15, 16, 1, 0x8000},
177
+ {16, 17, 2, 0x20000},
178
+ {17, 18, 2, 0x40000},
179
+ {18, 19, 2, 0x80000},
180
+ {19, 20, 2, 0x100000},
181
+ {20, 21, 2, 0x200000},
182
+ {21, 22, 2, 0x400000},
183
+ {22, 23, 2, 0x800000},
184
+ {23, 24, 2, 0x1000000},
185
+ {24, 25, 2, 0x2000000},
186
+ {25, 26, 2, 0x4000000},
187
+ {26, 27, 2, 0x8000000},
188
+ {27, 28, 2, 0x10000000},
189
+ {28, 29, 2, 0x20000000},
190
+ {29, 30, 2, 0x40000000},
191
+ {30, 31, 2, 0x80000000},
192
+ };
193
+
194
+ #endif
195
+
196
+ /* The reserved hash value and its substitution. */
197
+ #define RESERVED_HASH_VAL (~(st_hash_t) 0)
198
+ #define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
199
+
200
+ /* Return hash value of KEY for table TAB. */
201
+ static inline st_hash_t
202
+ do_hash(st_data_t key, st_table *tab)
203
+ {
204
+ st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
205
+
206
+ /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
207
+ another value. Such mapping should be extremely rare. */
208
+ return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
209
+ }
210
+
211
+ /* Power of 2 defining the minimal number of allocated entries. */
212
+ #define MINIMAL_POWER2 2
213
+
214
+ #if MINIMAL_POWER2 < 2
215
+ #error "MINIMAL_POWER2 should be >= 2"
216
+ #endif
217
+
218
+ /* If the power2 of the allocated `entries` is less than the following
219
+ value, don't allocate bins and use a linear search. */
220
+ #define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
221
+
222
+ /* Return value of N-th bin in array BINS of table with bins size
223
+ index S. */
224
+ static inline st_index_t
225
+ get_bin(st_index_t *bins, int s, st_index_t n)
226
+ {
227
+ return (s == 0 ? ((unsigned char *) bins)[n]
228
+ : s == 1 ? ((unsigned short *) bins)[n]
229
+ : s == 2 ? ((unsigned int *) bins)[n]
230
+ : ((st_index_t *) bins)[n]);
231
+ }
232
+
233
+ /* Set up N-th bin in array BINS of table with bins size index S to
234
+ value V. */
235
+ static inline void
236
+ set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
237
+ {
238
+ if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
239
+ else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
240
+ else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
241
+ else ((st_index_t *) bins)[n] = v;
242
+ }
243
+
244
+ /* These macros define reserved values for empty table bin and table
245
+ bin which contains a deleted entry. We will never use such values
246
+ for an entry index in bins. */
247
+ #define EMPTY_BIN 0
248
+ #define DELETED_BIN 1
249
+ /* Base of a real entry index in the bins. */
250
+ #define ENTRY_BASE 2
251
+
252
+ /* Mark I-th bin of table TAB as empty, in other words not
253
+ corresponding to any entry. */
254
+ #define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
255
+
256
+ /* Values used for not found entry and bin with given
257
+ characteristics. */
258
+ #define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
259
+ #define UNDEFINED_BIN_IND (~(st_index_t) 0)
260
+
261
+ /* Mark I-th bin of table TAB as corresponding to a deleted table
262
+ entry. Update number of entries in the table and number of bins
263
+ corresponding to deleted entries. */
264
+ #define MARK_BIN_DELETED(tab, i) \
265
+ do { \
266
+ st_assert(i != UNDEFINED_BIN_IND); \
267
+ st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \
268
+ set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
269
+ } while (0)
270
+
271
+ /* Macros to check that value B is used empty bins and bins
272
+ corresponding deleted entries. */
273
+ #define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
274
+ #define DELETED_BIN_P(b) ((b) == DELETED_BIN)
275
+ #define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
276
+
277
+ /* Macros to check empty bins and bins corresponding to deleted
278
+ entries. Bins are given by their index I in table TAB. */
279
+ #define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
280
+ #define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
281
+ #define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
282
+
283
+ /* Macros for marking and checking deleted entries given by their
284
+ pointer E_PTR. */
285
+ #define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
286
+ #define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
287
+
288
+ /* Return bin size index of table TAB. */
289
+ static inline unsigned int
290
+ get_size_ind(const st_table *tab)
291
+ {
292
+ return tab->size_ind;
293
+ }
294
+
295
+ /* Return the number of allocated bins of table TAB. */
296
+ static inline st_index_t
297
+ get_bins_num(const st_table *tab)
298
+ {
299
+ return ((st_index_t) 1)<<tab->bin_power;
300
+ }
301
+
302
+ /* Return mask for a bin index in table TAB. */
303
+ static inline st_index_t
304
+ bins_mask(const st_table *tab)
305
+ {
306
+ return get_bins_num(tab) - 1;
307
+ }
308
+
309
+ /* Return the index of table TAB bin corresponding to
310
+ HASH_VALUE. */
311
+ static inline st_index_t
312
+ hash_bin(st_hash_t hash_value, st_table *tab)
313
+ {
314
+ return hash_value & bins_mask(tab);
315
+ }
316
+
317
+ /* Return the number of allocated entries of table TAB. */
318
+ static inline st_index_t
319
+ get_allocated_entries(const st_table *tab)
320
+ {
321
+ return ((st_index_t) 1)<<tab->entry_power;
322
+ }
323
+
324
+ /* Return size of the allocated bins of table TAB. */
325
+ static inline st_index_t
326
+ bins_size(const st_table *tab)
327
+ {
328
+ return features[tab->entry_power].bins_words * sizeof (st_index_t);
329
+ }
330
+
331
+ /* Mark all bins of table TAB as empty. */
332
+ static void
333
+ initialize_bins(st_table *tab)
334
+ {
335
+ memset(tab->bins, 0, bins_size(tab));
336
+ }
337
+
338
+ #ifdef ST_DEBUG
339
+ /* Check the table T consistency. It can be extremely slow. So use
340
+ it only for debugging. */
341
+ static void
342
+ st_check(st_table *tab)
343
+ {
344
+ st_index_t d, e, i, n, p;
345
+
346
+ for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1)
347
+ ;
348
+ p = i;
349
+ assert(p >= MINIMAL_POWER2);
350
+ assert(tab->entries_bound <= get_allocated_entries(tab)
351
+ && tab->entries_start <= tab->entries_bound);
352
+ n = 0;
353
+ return;
354
+ if (tab->entries_bound != 0)
355
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
356
+ assert(tab->entries[i].hash != (st_hash_t) ST_INIT_VAL
357
+ && tab->entries[i].key != ST_INIT_VAL
358
+ && tab->entries[i].record != ST_INIT_VAL);
359
+ if (! DELETED_ENTRY_P(&tab->entries[i]))
360
+ n++;
361
+ }
362
+ assert(n == tab->num_entries);
363
+ if (tab->bins == NULL)
364
+ assert(p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
365
+ else {
366
+ assert(p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
367
+ for (n = d = i = 0; i < get_bins_num(tab); i++) {
368
+ assert(get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL);
369
+ if (IND_DELETED_BIN_P(tab, i)) {
370
+ d++;
371
+ continue;
372
+ }
373
+ else if (IND_EMPTY_BIN_P(tab, i))
374
+ continue;
375
+ n++;
376
+ e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE;
377
+ assert(tab->entries_start <= e && e < tab->entries_bound);
378
+ assert(! DELETED_ENTRY_P(&tab->entries[e]));
379
+ assert(tab->entries[e].hash != (st_hash_t) ST_INIT_VAL
380
+ && tab->entries[e].key != ST_INIT_VAL
381
+ && tab->entries[e].record != ST_INIT_VAL);
382
+ }
383
+ assert(n == tab->num_entries);
384
+ assert(n + d < get_bins_num(tab));
385
+ }
386
+ }
387
+ #endif
388
+
389
+ static st_index_t
390
+ find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
391
+
392
+ static st_index_t
393
+ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
394
+ st_data_t key, st_index_t *bin_ind);
395
+
396
+ /* If the number of entries in the table is at least REBUILD_THRESHOLD
397
+ times less than the entry array length, decrease the table
398
+ size. */
399
+ #define REBUILD_THRESHOLD 4
400
+
401
+ #if REBUILD_THRESHOLD < 2
402
+ #error "REBUILD_THRESHOLD should be >= 2"
403
+ #endif
404
+
405
+ /* Rebuild table TAB. Rebuilding removes all deleted bins and entries
406
+ and can change size of the table entries and bins arrays.
407
+ Rebuilding is implemented by creation of a new table or by
408
+ compaction of the existing one. */
409
+ static void
410
+ rebuild_table(st_table *tab)
411
+ {
412
+ st_index_t i, ni, bound;
413
+ unsigned int size_ind;
414
+ st_table *new_tab;
415
+ st_table_entry *entries, *new_entries;
416
+ st_table_entry *curr_entry_ptr;
417
+ st_index_t *bins;
418
+ st_index_t bin_ind;
419
+
420
+ st_assert(tab != NULL);
421
+ bound = tab->entries_bound;
422
+ entries = tab->entries;
423
+ if ((2 * tab->num_entries <= get_allocated_entries(tab)
424
+ && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
425
+ || tab->num_entries < (1 << MINIMAL_POWER2)) {
426
+ /* Compaction: */
427
+ tab->num_entries = 0;
428
+ if (tab->bins != NULL)
429
+ initialize_bins(tab);
430
+ new_tab = tab;
431
+ new_entries = entries;
432
+ }
433
+ else {
434
+ new_tab = st_init_table_with_size(tab->type,
435
+ 2 * tab->num_entries - 1);
436
+ new_entries = new_tab->entries;
437
+ }
438
+ ni = 0;
439
+ bins = new_tab->bins;
440
+ size_ind = get_size_ind(new_tab);
441
+ for (i = tab->entries_start; i < bound; i++) {
442
+ curr_entry_ptr = &entries[i];
443
+ PREFETCH(entries + i + 1, 0);
444
+ if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
445
+ continue;
446
+ if (&new_entries[ni] != curr_entry_ptr)
447
+ new_entries[ni] = *curr_entry_ptr;
448
+ if (EXPECT(bins != NULL, 1)) {
449
+ bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
450
+ curr_entry_ptr->key);
451
+ st_assert(bin_ind != UNDEFINED_BIN_IND
452
+ && (tab == new_tab || new_tab->rebuilds_num == 0)
453
+ && IND_EMPTY_BIN_P(new_tab, bin_ind));
454
+ set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
455
+ }
456
+ new_tab->num_entries++;
457
+ ni++;
458
+ }
459
+ if (new_tab != tab) {
460
+ tab->entry_power = new_tab->entry_power;
461
+ tab->bin_power = new_tab->bin_power;
462
+ tab->size_ind = new_tab->size_ind;
463
+ st_assert (tab->num_entries == ni && new_tab->num_entries == ni);
464
+ if (tab->bins != NULL)
465
+ free(tab->bins);
466
+ tab->bins = new_tab->bins;
467
+ free(tab->entries);
468
+ tab->entries = new_tab->entries;
469
+ free(new_tab);
470
+ }
471
+ tab->entries_start = 0;
472
+ tab->entries_bound = tab->num_entries;
473
+ tab->rebuilds_num++;
474
+ #ifdef ST_DEBUG
475
+ st_check(tab);
476
+ #endif
477
+ }
478
+
479
+ /* Return the next secondary hash index for table TAB using previous
480
+ index IND and PERTERB. Finally modulo of the function becomes a
481
+ full *cycle linear congruential generator*, in other words it
482
+ guarantees traversing all table bins in extreme case.
483
+
484
+ According the Hull-Dobell theorem a generator
485
+ "Xnext = (a*Xprev + c) mod m" is a full cycle generator iff
486
+ o m and c are relatively prime
487
+ o a-1 is divisible by all prime factors of m
488
+ o a-1 is divisible by 4 if m is divisible by 4.
489
+
490
+ For our case a is 5, c is 1, and m is a power of two. */
491
+ static inline st_index_t
492
+ secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
493
+ {
494
+ *perterb >>= 11;
495
+ ind = (ind << 2) + ind + *perterb + 1;
496
+ return hash_bin(ind, tab);
497
+ }
498
+
499
+ /* Find an entry with HASH_VALUE and KEY in TABLE using a linear
500
+ search. Return the index of the found entry in array `entries`.
501
+ If it is not found, return UNDEFINED_ENTRY_IND. */
502
+ static inline st_index_t
503
+ find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
504
+ {
505
+ st_index_t i, bound;
506
+ st_table_entry *entries;
507
+
508
+ bound = tab->entries_bound;
509
+ entries = tab->entries;
510
+ for (i = tab->entries_start; i < bound; i++) {
511
+ if (PTR_EQUAL(tab, &entries[i], hash_value, key))
512
+ return i;
513
+ }
514
+ return UNDEFINED_ENTRY_IND;
515
+ }
516
+
517
+ /* Find and return index of table TAB bin corresponding to an entry
518
+ with HASH_VALUE and KEY. The entry should be in the table
519
+ already. */
520
+ static st_index_t
521
+ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
522
+ {
523
+ st_index_t ind;
524
+ #ifdef QUADRATIC_PROBE
525
+ st_index_t d;
526
+ #else
527
+ st_index_t peterb;
528
+ #endif
529
+ st_index_t bin;
530
+ st_table_entry *entries = tab->entries;
531
+
532
+ st_assert(tab != NULL && tab->bins != NULL);
533
+ ind = hash_bin(hash_value, tab);
534
+ #ifdef QUADRATIC_PROBE
535
+ d = 1;
536
+ #else
537
+ peterb = hash_value;
538
+ #endif
539
+ for (;;) {
540
+ bin = get_bin(tab->bins, get_size_ind(tab), ind);
541
+ if (EMPTY_OR_DELETED_BIN_P(bin))
542
+ return ind;
543
+ st_assert (! PTR_EQUAL(tab, &entries[bin - ENTRY_BASE], hash_value, key));
544
+ #ifdef QUADRATIC_PROBE
545
+ ind = hash_bin(ind + d, tab);
546
+ d++;
547
+ #else
548
+ ind = secondary_hash(ind, tab, &peterb);
549
+ #endif
550
+ }
551
+ }
552
+
553
+ /* Return index of table TAB bin for HASH_VALUE and KEY through
554
+ BIN_IND and the pointed value as the function result. Reserve the
555
+ bin for inclusion of the corresponding entry into the table if it
556
+ is not there yet. We always find such bin as bins array length is
557
+ bigger entries array. Although we can reuse a deleted bin, the
558
+ result bin value is always empty if the table has no entry with
559
+ KEY. Return the entries array index of the found entry or
560
+ UNDEFINED_ENTRY_IND if it is not found. */
561
+ static st_index_t
562
+ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
563
+ st_data_t key, st_index_t *bin_ind) {
564
+ st_index_t ind;
565
+ st_hash_t curr_hash_value = *hash_value;
566
+ #ifdef QUADRATIC_PROBE
567
+ st_index_t d;
568
+ #else
569
+ st_index_t peterb;
570
+ #endif
571
+ st_index_t entry_index;
572
+ st_index_t first_deleted_bin_ind;
573
+ st_table_entry *entries;
574
+
575
+ st_assert(tab != NULL && tab->bins != NULL
576
+ && tab->entries_bound <= get_allocated_entries(tab)
577
+ && tab->entries_start <= tab->entries_bound);
578
+ ind = hash_bin(curr_hash_value, tab);
579
+ #ifdef QUADRATIC_PROBE
580
+ d = 1;
581
+ #else
582
+ peterb = curr_hash_value;
583
+ #endif
584
+ first_deleted_bin_ind = UNDEFINED_BIN_IND;
585
+ entries = tab->entries;
586
+ for (;;) {
587
+ entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
588
+ if (EMPTY_BIN_P(entry_index)) {
589
+ tab->num_entries++;
590
+ entry_index = UNDEFINED_ENTRY_IND;
591
+ if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
592
+ /* We can reuse bin of a deleted entry. */
593
+ ind = first_deleted_bin_ind;
594
+ MARK_BIN_EMPTY(tab, ind);
595
+ }
596
+ break;
597
+ } else if (! DELETED_BIN_P(entry_index)) {
598
+ if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
599
+ break;
600
+ } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
601
+ first_deleted_bin_ind = ind;
602
+ #ifdef QUADRATIC_PROBE
603
+ ind = hash_bin(ind + d, tab);
604
+ d++;
605
+ #else
606
+ ind = secondary_hash(ind, tab, &peterb);
607
+ #endif
608
+ }
609
+ *bin_ind = ind;
610
+ return entry_index;
611
+ }
612
+
613
+
614
+ /* Check the table and rebuild it if it is necessary. */
615
+ static inline void
616
+ rebuild_table_if_necessary (st_table *tab)
617
+ {
618
+ st_index_t bound = tab->entries_bound;
619
+
620
+ if (bound == get_allocated_entries(tab))
621
+ rebuild_table(tab);
622
+ st_assert(tab->entries_bound < get_allocated_entries(tab));
623
+ }
624
+
625
+ #define FNV1_32A_INIT 0x811c9dc5
626
+
627
+ /*
628
+ * 32 bit magic FNV-1a prime
629
+ */
630
+ #define FNV_32_PRIME 0x01000193
631
+
632
+ #ifndef UNALIGNED_WORD_ACCESS
633
+ # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
634
+ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
635
+ defined(__powerpc64__) || \
636
+ defined(__mc68020__)
637
+ # define UNALIGNED_WORD_ACCESS 1
638
+ # endif
639
+ #endif
640
+ #ifndef UNALIGNED_WORD_ACCESS
641
+ # define UNALIGNED_WORD_ACCESS 0
642
+ #endif
643
+
644
+ /* This hash function is quite simplified MurmurHash3
645
+ * Simplification is legal, cause most of magic still happens in finalizator.
646
+ * And finalizator is almost the same as in MurmurHash3 */
647
+ #define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
648
+ #define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
649
+
650
+ #if ST_INDEX_BITS <= 32
651
+ #define C1 (st_index_t)0xcc9e2d51
652
+ #define C2 (st_index_t)0x1b873593
653
+ #else
654
+ #define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
655
+ #define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
656
+ #endif
657
+ static inline st_index_t
658
+ murmur_step(st_index_t h, st_index_t k)
659
+ {
660
+ #if ST_INDEX_BITS <= 32
661
+ #define r1 (17)
662
+ #define r2 (11)
663
+ #else
664
+ #define r1 (33)
665
+ #define r2 (24)
666
+ #endif
667
+ k *= C1;
668
+ h ^= ROTL(k, r1);
669
+ h *= C2;
670
+ h = ROTL(h, r2);
671
+ return h;
672
+ }
673
+ #undef r1
674
+ #undef r2
675
+
676
+ static inline st_index_t
677
+ murmur_finish(st_index_t h)
678
+ {
679
+ #if ST_INDEX_BITS <= 32
680
+ #define r1 (16)
681
+ #define r2 (13)
682
+ #define r3 (16)
683
+ const st_index_t c1 = 0x85ebca6b;
684
+ const st_index_t c2 = 0xc2b2ae35;
685
+ #else
686
+ /* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
687
+ #define r1 (30)
688
+ #define r2 (27)
689
+ #define r3 (31)
690
+ const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
691
+ const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
692
+ #endif
693
+ #if ST_INDEX_BITS > 64
694
+ h ^= h >> 64;
695
+ h *= c2;
696
+ h ^= h >> 65;
697
+ #endif
698
+ h ^= h >> r1;
699
+ h *= c1;
700
+ h ^= h >> r2;
701
+ h *= c2;
702
+ h ^= h >> r3;
703
+ return h;
704
+ }
705
+ #undef r1
706
+ #undef r2
707
+ #undef r3
708
+
709
+ static st_index_t
710
+ strhash(st_data_t arg)
711
+ {
712
+ register const char *string = (const char *)arg;
713
+ return st_hash(string, strlen(string), FNV1_32A_INIT);
714
+ }
715
+
716
+ PUREFUNC(static st_index_t strcasehash(st_data_t));
717
+ static st_index_t
718
+ strcasehash(st_data_t arg)
719
+ {
720
+ register const char *string = (const char *)arg;
721
+ register st_index_t hval = FNV1_32A_INIT;
722
+
723
+ /*
724
+ * FNV-1a hash each octet in the buffer
725
+ */
726
+ while (*string) {
727
+ unsigned int c = (unsigned char)*string++;
728
+ if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
729
+ hval ^= c;
730
+
731
+ /* multiply by the 32 bit FNV magic prime mod 2^32 */
732
+ hval *= FNV_32_PRIME;
733
+ }
734
+ return hval;
735
+ }
736
+
737
+ static st_data_t *
738
+ st_store(st_table *tab, st_data_t key, st_data_t value, xh_bool_t update)
739
+ {
740
+ st_table_entry *entry;
741
+ st_index_t bin;
742
+ st_index_t ind;
743
+ st_hash_t hash_value;
744
+ st_index_t bin_ind;
745
+ int new_p;
746
+ st_data_t *lval;
747
+
748
+ rebuild_table_if_necessary(tab);
749
+ hash_value = do_hash(key, tab);
750
+ if (tab->bins == NULL) {
751
+ bin = find_entry(tab, hash_value, key);
752
+ new_p = bin == UNDEFINED_ENTRY_IND;
753
+ if (new_p)
754
+ tab->num_entries++;
755
+ bin_ind = UNDEFINED_BIN_IND;
756
+ } else {
757
+ bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
758
+ key, &bin_ind);
759
+ new_p = bin == UNDEFINED_ENTRY_IND;
760
+ bin -= ENTRY_BASE;
761
+ }
762
+
763
+ if (new_p) {
764
+ ind = tab->entries_bound++;
765
+ entry = &tab->entries[ind];
766
+ entry->hash = hash_value;
767
+ entry->key = key;
768
+ entry->record = value;
769
+ if (bin_ind != UNDEFINED_BIN_IND)
770
+ set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
771
+ }
772
+ else {
773
+ entry = &tab->entries[bin];
774
+ }
775
+
776
+ lval = &entry->record;
777
+
778
+ if (update) *lval = value;
779
+
780
+ return lval;
781
+ }
782
+
783
+ #endif /* _XH_RUBY_ST_H_ */