google_hash 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/README +21 -0
  2. data/Rakefile +11 -0
  3. data/VERSION +1 -0
  4. data/ext/extconf.rb +15 -0
  5. data/ext/go.cpp +109 -0
  6. data/ext/sparsehash-1.5.2/AUTHORS +2 -0
  7. data/ext/sparsehash-1.5.2/COPYING +28 -0
  8. data/ext/sparsehash-1.5.2/ChangeLog +167 -0
  9. data/ext/sparsehash-1.5.2/INSTALL +236 -0
  10. data/ext/sparsehash-1.5.2/Makefile.am +157 -0
  11. data/ext/sparsehash-1.5.2/Makefile.in +1019 -0
  12. data/ext/sparsehash-1.5.2/NEWS +0 -0
  13. data/ext/sparsehash-1.5.2/README +149 -0
  14. data/ext/sparsehash-1.5.2/README.windows +25 -0
  15. data/ext/sparsehash-1.5.2/TODO +28 -0
  16. data/ext/sparsehash-1.5.2/aclocal.m4 +868 -0
  17. data/ext/sparsehash-1.5.2/compile +99 -0
  18. data/ext/sparsehash-1.5.2/config.guess +1516 -0
  19. data/ext/sparsehash-1.5.2/config.sub +1626 -0
  20. data/ext/sparsehash-1.5.2/configure +8054 -0
  21. data/ext/sparsehash-1.5.2/configure.ac +74 -0
  22. data/ext/sparsehash-1.5.2/depcomp +530 -0
  23. data/ext/sparsehash-1.5.2/doc/dense_hash_map.html +1591 -0
  24. data/ext/sparsehash-1.5.2/doc/dense_hash_set.html +1445 -0
  25. data/ext/sparsehash-1.5.2/doc/designstyle.css +115 -0
  26. data/ext/sparsehash-1.5.2/doc/implementation.html +365 -0
  27. data/ext/sparsehash-1.5.2/doc/index.html +69 -0
  28. data/ext/sparsehash-1.5.2/doc/performance.html +96 -0
  29. data/ext/sparsehash-1.5.2/doc/sparse_hash_map.html +1527 -0
  30. data/ext/sparsehash-1.5.2/doc/sparse_hash_set.html +1376 -0
  31. data/ext/sparsehash-1.5.2/doc/sparsetable.html +1393 -0
  32. data/ext/sparsehash-1.5.2/experimental/Makefile +9 -0
  33. data/ext/sparsehash-1.5.2/experimental/README +14 -0
  34. data/ext/sparsehash-1.5.2/experimental/example.c +54 -0
  35. data/ext/sparsehash-1.5.2/experimental/libchash.c +1537 -0
  36. data/ext/sparsehash-1.5.2/experimental/libchash.h +252 -0
  37. data/ext/sparsehash-1.5.2/google-sparsehash.sln +47 -0
  38. data/ext/sparsehash-1.5.2/install-sh +323 -0
  39. data/ext/sparsehash-1.5.2/m4/acx_pthread.m4 +363 -0
  40. data/ext/sparsehash-1.5.2/m4/google_namespace.m4 +42 -0
  41. data/ext/sparsehash-1.5.2/m4/namespaces.m4 +15 -0
  42. data/ext/sparsehash-1.5.2/m4/stl_hash.m4 +70 -0
  43. data/ext/sparsehash-1.5.2/m4/stl_hash_fun.m4 +36 -0
  44. data/ext/sparsehash-1.5.2/m4/stl_namespace.m4 +25 -0
  45. data/ext/sparsehash-1.5.2/missing +360 -0
  46. data/ext/sparsehash-1.5.2/mkinstalldirs +158 -0
  47. data/ext/sparsehash-1.5.2/packages/deb.sh +74 -0
  48. data/ext/sparsehash-1.5.2/packages/deb/README +7 -0
  49. data/ext/sparsehash-1.5.2/packages/deb/changelog +107 -0
  50. data/ext/sparsehash-1.5.2/packages/deb/compat +1 -0
  51. data/ext/sparsehash-1.5.2/packages/deb/control +17 -0
  52. data/ext/sparsehash-1.5.2/packages/deb/copyright +35 -0
  53. data/ext/sparsehash-1.5.2/packages/deb/docs +16 -0
  54. data/ext/sparsehash-1.5.2/packages/deb/rules +117 -0
  55. data/ext/sparsehash-1.5.2/packages/deb/sparsehash.dirs +2 -0
  56. data/ext/sparsehash-1.5.2/packages/deb/sparsehash.install +2 -0
  57. data/ext/sparsehash-1.5.2/packages/rpm.sh +86 -0
  58. data/ext/sparsehash-1.5.2/packages/rpm/rpm.spec +61 -0
  59. data/ext/sparsehash-1.5.2/src/config.h.in +131 -0
  60. data/ext/sparsehash-1.5.2/src/config.h.include +23 -0
  61. data/ext/sparsehash-1.5.2/src/google/dense_hash_map +310 -0
  62. data/ext/sparsehash-1.5.2/src/google/dense_hash_set +287 -0
  63. data/ext/sparsehash-1.5.2/src/google/sparse_hash_map +294 -0
  64. data/ext/sparsehash-1.5.2/src/google/sparse_hash_set +275 -0
  65. data/ext/sparsehash-1.5.2/src/google/sparsehash/densehashtable.h +1062 -0
  66. data/ext/sparsehash-1.5.2/src/google/sparsehash/sparsehashtable.h +1015 -0
  67. data/ext/sparsehash-1.5.2/src/google/sparsetable +1468 -0
  68. data/ext/sparsehash-1.5.2/src/google/type_traits.h +250 -0
  69. data/ext/sparsehash-1.5.2/src/hashtable_unittest.cc +1375 -0
  70. data/ext/sparsehash-1.5.2/src/simple_test.cc +103 -0
  71. data/ext/sparsehash-1.5.2/src/sparsetable_unittest.cc +696 -0
  72. data/ext/sparsehash-1.5.2/src/time_hash_map.cc +488 -0
  73. data/ext/sparsehash-1.5.2/src/type_traits_unittest.cc +492 -0
  74. data/ext/sparsehash-1.5.2/src/windows/config.h +149 -0
  75. data/ext/sparsehash-1.5.2/src/windows/google/sparsehash/sparseconfig.h +32 -0
  76. data/ext/sparsehash-1.5.2/src/windows/port.cc +63 -0
  77. data/ext/sparsehash-1.5.2/src/windows/port.h +81 -0
  78. data/ext/sparsehash-1.5.2/src/words +8944 -0
  79. data/ext/sparsehash-1.5.2/vsprojects/hashtable_unittest/hashtable_unittest.vcproj +187 -0
  80. data/ext/sparsehash-1.5.2/vsprojects/sparsetable_unittest/sparsetable_unittest.vcproj +172 -0
  81. data/ext/sparsehash-1.5.2/vsprojects/time_hash_map/time_hash_map.vcproj +187 -0
  82. data/ext/sparsehash-1.5.2/vsprojects/type_traits_unittest/type_traits_unittest.vcproj +169 -0
  83. data/ext/test.rb +10 -0
  84. data/test/spec.go +70 -0
  85. metadata +147 -0
@@ -0,0 +1,9 @@
1
+ example: example.o libchash.o
2
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
3
+
4
+ .SUFFIXES: .c .o .h
5
+ .c.o:
6
+ $(CC) -c $(CPPFLAGS) $(CFLAGS) -o $@ $<
7
+
8
+ example.o: example.c libchash.h
9
+ libchash.o: libchash.c libchash.h
@@ -0,0 +1,14 @@
1
+ This is a C version of sparsehash (and also, maybe, densehash) that I
2
+ wrote way back when, and served as the inspiration for the C++
3
+ version. The API for the C version is much uglier than the C++,
4
+ because of the lack of template support. I believe the class works,
5
+ but I'm not convinced it's really flexible or easy enough to use.
6
+
7
+ It would be nice to rework this C class to follow the C++ API as
8
+ closely as possible (eg have a set_deleted_key() instead of using a
9
+ #define like this code does now). I believe the code compiles and
10
+ runs, if anybody is interested in using it now, but it's subject to
11
+ major change in the future, as people work on it.
12
+
13
+ Craig Silverstein
14
+ 20 March 2005
@@ -0,0 +1,54 @@
1
+ #include <stdlib.h>
2
+ #include <stdio.h>
3
+ #include <assert.h>
4
+ #include "libchash.h"
5
+
6
+ static void TestInsert() {
7
+ struct HashTable* ht;
8
+ HTItem* bck;
9
+
10
+ ht = AllocateHashTable(1, 0); /* value is 1 byte, 0: don't copy keys */
11
+
12
+ HashInsert(ht, PTR_KEY(ht, "January"), 31); /* 0: don't overwrite old val */
13
+ bck = HashInsert(ht, PTR_KEY(ht, "February"), 28);
14
+ bck = HashInsert(ht, PTR_KEY(ht, "March"), 31);
15
+
16
+ bck = HashFind(ht, PTR_KEY(ht, "February"));
17
+ assert(bck);
18
+ assert(bck->data == 28);
19
+
20
+ FreeHashTable(ht);
21
+ }
22
+
23
+ static void TestFindOrInsert() {
24
+ struct HashTable* ht;
25
+ int i;
26
+ int iterations = 1000000;
27
+ int range = 30; /* random number between 1 and 30 */
28
+
29
+ ht = AllocateHashTable(4, 0); /* value is 4 bytes, 0: don't copy keys */
30
+
31
+ /* We'll test how good rand() is as a random number generator */
32
+ for (i = 0; i < iterations; ++i) {
33
+ int key = rand() % range;
34
+ HTItem* bck = HashFindOrInsert(ht, key, 0); /* initialize to 0 */
35
+ bck->data++; /* found one more of them */
36
+ }
37
+
38
+ for (i = 0; i < range; ++i) {
39
+ HTItem* bck = HashFind(ht, i);
40
+ if (bck) {
41
+ printf("%3d: %d\n", bck->key, bck->data);
42
+ } else {
43
+ printf("%3d: 0\n", i);
44
+ }
45
+ }
46
+
47
+ FreeHashTable(ht);
48
+ }
49
+
50
+ int main(int argc, char** argv) {
51
+ TestInsert();
52
+ TestFindOrInsert();
53
+ return 0;
54
+ }
@@ -0,0 +1,1537 @@
1
+ /* Copyright (c) 1998 - 2005, Google Inc.
2
+ * All rights reserved.
3
+ *
4
+ * Redistribution and use in source and binary forms, with or without
5
+ * modification, are permitted provided that the following conditions are
6
+ * met:
7
+ *
8
+ * * Redistributions of source code must retain the above copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ * * Redistributions in binary form must reproduce the above
11
+ * copyright notice, this list of conditions and the following disclaimer
12
+ * in the documentation and/or other materials provided with the
13
+ * distribution.
14
+ * * Neither the name of Google Inc. nor the names of its
15
+ * contributors may be used to endorse or promote products derived from
16
+ * this software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ *
30
+ * ---
31
+ * Author: Craig Silverstein
32
+ *
33
+ * This library is intended to be used for in-memory hash tables,
34
+ * though it provides rudimentary permanent-storage capabilities.
35
+ * It attempts to be fast, portable, and small. The best algorithm
36
+ * to fulfill these goals is an internal probing hashing algorithm,
37
+ * as in Knuth, _Art of Computer Programming_, vol III. Unlike
38
+ * chained (open) hashing, it doesn't require a pointer for every
39
+ * item, yet it is still constant time lookup in practice.
40
+ *
41
+ * Also to save space, we let the contents (both data and key) that
42
+ * you insert be a union: if the key/data is small, we store it
43
+ * directly in the hashtable, otherwise we store a pointer to it.
44
+ * To keep you from having to figure out which, use KEY_PTR and
45
+ * PTR_KEY to convert between the arguments to these functions and
46
+ * a pointer to the real data. For instance:
47
+ * char key[] = "ab", *key2;
48
+ * HTItem *bck; HashTable *ht;
49
+ * HashInsert(ht, PTR_KEY(ht, key), 0);
50
+ * bck = HashFind(ht, PTR_KEY(ht, "ab"));
51
+ * key2 = KEY_PTR(ht, bck->key);
52
+ *
53
+ * There are a rich set of operations supported:
54
+ * AllocateHashTable() -- Allocates a hashtable structure and
55
+ * returns it.
56
+ * cchKey: if it's a positive number, then each key is a
57
+ * fixed-length record of that length. If it's 0,
58
+ * the key is assumed to be a \0-terminated string.
59
+ * fSaveKey: normally, you are responsible for allocating
60
+ * space for the key. If this is 1, we make a
61
+ * copy of the key for you.
62
+ * ClearHashTable() -- Removes everything from a hashtable
63
+ * FreeHashTable() -- Frees memory used by a hashtable
64
+ *
65
+ * HashFind() -- takes a key (use PTR_KEY) and returns the
66
+ * HTItem containing that key, or NULL if the
67
+ * key is not in the hashtable.
68
+ * HashFindLast() -- returns the item found by last HashFind()
69
+ * HashFindOrInsert() -- inserts the key/data pair if the key
70
+ * is not already in the hashtable, or
71
+ * returns the appropraite HTItem if it is.
72
+ * HashFindOrInsertItem() -- takes key/data as an HTItem.
73
+ * HashInsert() -- adds a key/data pair to the hashtable. What
74
+ * it does if the key is already in the table
75
+ * depends on the value of SAMEKEY_OVERWRITE.
76
+ * HashInsertItem() -- takes key/data as an HTItem.
77
+ * HashDelete() -- removes a key/data pair from the hashtable,
78
+ * if it's there. RETURNS 1 if it was there,
79
+ * 0 else.
80
+ * If you use sparse tables and never delete, the full data
81
+ * space is available. Otherwise we steal -2 (maybe -3),
82
+ * so you can't have data fields with those values.
83
+ * HashDeleteLast() -- deletes the item returned by the last Find().
84
+ *
85
+ * HashFirstBucket() -- used to iterate over the buckets in a
86
+ * hashtable. DON'T INSERT OR DELETE WHILE
87
+ * ITERATING! You can't nest iterations.
88
+ * HashNextBucket() -- RETURNS NULL at the end of iterating.
89
+ *
90
+ * HashSetDeltaGoalSize() -- if you're going to insert 1000 items
91
+ * at once, call this fn with arg 1000.
92
+ * It grows the table more intelligently.
93
+ *
94
+ * HashSave() -- saves the hashtable to a file. It saves keys ok,
95
+ * but it doesn't know how to interpret the data field,
96
+ * so if the data field is a pointer to some complex
97
+ * structure, you must send a function that takes a
98
+ * file pointer and a pointer to the structure, and
99
+ * write whatever you want to write. It should return
100
+ * the number of bytes written. If the file is NULL,
101
+ * it should just return the number of bytes it would
102
+ * write, without writing anything.
103
+ * If your data field is just an integer, not a
104
+ * pointer, just send NULL for the function.
105
+ * HashLoad() -- loads a hashtable. It needs a function that takes
106
+ * a file and the size of the structure, and expects
107
+ * you to read in the structure and return a pointer
108
+ * to it. You must do memory allocation, etc. If
109
+ * the data is just a number, send NULL.
110
+ * HashLoadKeys() -- unlike HashLoad(), doesn't load the data off disk
111
+ * until needed. This saves memory, but if you look
112
+ * up the same key a lot, it does a disk access each
113
+ * time.
114
+ * You can't do Insert() or Delete() on hashtables that were loaded
115
+ * from disk.
116
+ *
117
+ * See libchash.h for parameters you can modify. Make sure LOG_WORD_SIZE
118
+ * is defined correctly for your machine! (5 for 32 bit words, 6 for 64).
119
+ */
120
+
121
+ #include <stdlib.h>
122
+ #include <stdio.h>
123
+ #include <string.h> /* for strcmp, memcmp, etc */
124
+ #include <sys/types.h> /* ULTRIX needs this for in.h */
125
+ #include <netinet/in.h> /* for reading/writing hashtables */
126
+ #include <assert.h>
127
+ #include "libchash.h" /* all the types */
128
+
129
+ /* if keys are stored directly but cchKey is less than sizeof(ulong), */
130
+ /* this cuts off the bits at the end */
131
+ char grgKeyTruncMask[sizeof(ulong)][sizeof(ulong)];
132
+ #define KEY_TRUNC(ht, key) \
133
+ ( STORES_PTR(ht) || (ht)->cchKey == sizeof(ulong) \
134
+ ? (key) : ((key) & *(ulong *)&(grgKeyTruncMask[(ht)->cchKey][0])) )
135
+
136
+ /* round num up to a multiple of wordsize. (LOG_WORD_SIZE-3 is in bytes) */
137
+ #define WORD_ROUND(num) ( ((num-1) | ((1<<(LOG_WORD_SIZE-3))-1)) + 1 )
138
+ #define NULL_TERMINATED 0 /* val of cchKey if keys are null-term strings */
139
+
140
+ /* Useful operations we do to keys: compare them, copy them, free them */
141
+
142
+ #define KEY_CMP(ht, key1, key2) ( !STORES_PTR(ht) ? (key1) - (key2) : \
143
+ (key1) == (key2) ? 0 : \
144
+ HashKeySize(ht) == NULL_TERMINATED ? \
145
+ strcmp((char *)key1, (char *)key2) :\
146
+ memcmp((void *)key1, (void *)key2, \
147
+ HashKeySize(ht)) )
148
+
149
+ #define COPY_KEY(ht, keyTo, keyFrom) do \
150
+ if ( !STORES_PTR(ht) || !(ht)->fSaveKeys ) \
151
+ (keyTo) = (keyFrom); /* just copy pointer or info */\
152
+ else if ( (ht)->cchKey == NULL_TERMINATED ) /* copy 0-term.ed str */\
153
+ { \
154
+ (keyTo) = (ulong)HTsmalloc( WORD_ROUND(strlen((char *)(keyFrom))+1) ); \
155
+ strcpy((char *)(keyTo), (char *)(keyFrom)); \
156
+ } \
157
+ else \
158
+ { \
159
+ (keyTo) = (ulong) HTsmalloc( WORD_ROUND((ht)->cchKey) ); \
160
+ memcpy( (char *)(keyTo), (char *)(keyFrom), (ht)->cchKey); \
161
+ } \
162
+ while ( 0 )
163
+
164
+ #define FREE_KEY(ht, key) do \
165
+ if ( STORES_PTR(ht) && (ht)->fSaveKeys ) \
166
+ if ( (ht)->cchKey == NULL_TERMINATED ) \
167
+ HTfree((char *)(key), WORD_ROUND(strlen((char *)(key))+1)); \
168
+ else \
169
+ HTfree((char *)(key), WORD_ROUND((ht)->cchKey)); \
170
+ while ( 0 )
171
+
172
+ /* the following are useful for bitmaps */
173
+ /* Format is like this (if 1 word = 4 bits): 3210 7654 ba98 fedc ... */
174
+ typedef ulong HTBitmapPart; /* this has to be unsigned, for >> */
175
+ typedef HTBitmapPart HTBitmap[1<<LOG_BM_WORDS];
176
+ typedef ulong HTOffset; /* something big enough to hold offsets */
177
+
178
+ #define BM_BYTES(cBuckets) /* we must ensure it's a multiple of word size */\
179
+ ( (((cBuckets) + 8*sizeof(ulong)-1) >> LOG_WORD_SIZE) << (LOG_WORD_SIZE-3) )
180
+ #define MOD2(i, logmod) ( (i) & ((1<<(logmod))-1) )
181
+ #define DIV_NUM_ENTRIES(i) ( (i) >> LOG_WORD_SIZE )
182
+ #define MOD_NUM_ENTRIES(i) ( MOD2(i, LOG_WORD_SIZE) )
183
+ #define MODBIT(i) ( ((ulong)1) << MOD_NUM_ENTRIES(i) )
184
+
185
+ #define TEST_BITMAP(bm, i) ( (bm)[DIV_NUM_ENTRIES(i)] & MODBIT(i) ? 1 : 0 )
186
+ #define SET_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] |= MODBIT(i)
187
+ #define CLEAR_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] &= ~MODBIT(i)
188
+
189
+ /* the following are useful for reading and writing hashtables */
190
+ #define READ_UL(fp, data) \
191
+ do { \
192
+ long _ul; \
193
+ fread(&_ul, sizeof(_ul), 1, (fp)); \
194
+ data = ntohl(_ul); \
195
+ } while (0)
196
+
197
+ #define WRITE_UL(fp, data) \
198
+ do { \
199
+ long _ul = htonl((long)(data)); \
200
+ fwrite(&_ul, sizeof(_ul), 1, (fp)); \
201
+ } while (0)
202
+
203
+ /* Moves data from disk to memory if necessary. Note dataRead cannot be *
204
+ * NULL, because then we might as well (and do) load the data into memory */
205
+ #define LOAD_AND_RETURN(ht, loadCommand) /* lC returns an HTItem * */ \
206
+ if ( !(ht)->fpData ) /* data is stored in memory */ \
207
+ return (loadCommand); \
208
+ else /* must read data off of disk */ \
209
+ { \
210
+ int cchData; \
211
+ HTItem *bck; \
212
+ if ( (ht)->bckData.data ) free((char *)(ht)->bckData.data); \
213
+ ht->bckData.data = (ulong)NULL; /* needed if loadCommand fails */ \
214
+ bck = (loadCommand); \
215
+ if ( bck == NULL ) /* loadCommand failed: key not found */ \
216
+ return NULL; \
217
+ else \
218
+ (ht)->bckData = *bck; \
219
+ fseek(ht->fpData, (ht)->bckData.data, SEEK_SET); \
220
+ READ_UL((ht)->fpData, cchData); \
221
+ (ht)->bckData.data = (ulong)(ht)->dataRead((ht)->fpData, cchData); \
222
+ return &((ht)->bckData); \
223
+ }
224
+
225
+
226
+ /* ======================================================================== */
227
+ /* UTILITY ROUTINES */
228
+ /* ---------------------- */
229
+
230
+ /* HTsmalloc() -- safe malloc
231
+ * allocates memory, or crashes if the allocation fails.
232
+ */
233
+ static void *HTsmalloc(unsigned long size)
234
+ {
235
+ void *retval;
236
+
237
+ if ( size == 0 )
238
+ return NULL;
239
+ retval = (void *)malloc(size);
240
+ if ( !retval )
241
+ {
242
+ fprintf(stderr, "HTsmalloc: Unable to allocate %lu bytes of memory\n",
243
+ size);
244
+ exit(1);
245
+ }
246
+ return retval;
247
+ }
248
+
249
+ /* HTscalloc() -- safe calloc
250
+ * allocates memory and initializes it to 0, or crashes if
251
+ * the allocation fails.
252
+ */
253
+ static void *HTscalloc(unsigned long size)
254
+ {
255
+ void *retval;
256
+
257
+ retval = (void *)calloc(size, 1);
258
+ if ( !retval && size > 0 )
259
+ {
260
+ fprintf(stderr, "HTscalloc: Unable to allocate %lu bytes of memory\n",
261
+ size);
262
+ exit(1);
263
+ }
264
+ return retval;
265
+ }
266
+
267
+ /* HTsrealloc() -- safe calloc
268
+ * grows the amount of memory from a source, or crashes if
269
+ * the allocation fails.
270
+ */
271
+ static void *HTsrealloc(void *ptr, unsigned long new_size, long delta)
272
+ {
273
+ if ( ptr == NULL )
274
+ return HTsmalloc(new_size);
275
+ ptr = realloc(ptr, new_size);
276
+ if ( !ptr && new_size > 0 )
277
+ {
278
+ fprintf(stderr, "HTsrealloc: Unable to reallocate %lu bytes of memory\n",
279
+ new_size);
280
+ exit(1);
281
+ }
282
+ return ptr;
283
+ }
284
+
285
+ /* HTfree() -- keep track of memory use
286
+ * frees memory using free, but updates count of how much memory
287
+ * is being used.
288
+ */
289
+ static void HTfree(void *ptr, unsigned long size)
290
+ {
291
+ if ( size > 0 ) /* some systems seem to not like freeing NULL */
292
+ free(ptr);
293
+ }
294
+
295
+ /*************************************************************************\
296
+ | HTcopy() |
297
+ | Sometimes we interpret data as a ulong. But ulongs must be |
298
+ | aligned on some machines, so instead of casting we copy. |
299
+ \*************************************************************************/
300
+
301
+ unsigned long HTcopy(char *ul)
302
+ {
303
+ unsigned long retval;
304
+
305
+ memcpy(&retval, ul, sizeof(retval));
306
+ return retval;
307
+ }
308
+
309
+ /*************************************************************************\
310
+ | HTSetupKeyTrunc() |
311
+ | If keys are stored directly but cchKey is less than |
312
+ | sizeof(ulong), this cuts off the bits at the end. |
313
+ \*************************************************************************/
314
+
315
+ static void HTSetupKeyTrunc(void)
316
+ {
317
+ int i, j;
318
+
319
+ for ( i = 0; i < sizeof(unsigned long); i++ )
320
+ for ( j = 0; j < sizeof(unsigned long); j++ )
321
+ grgKeyTruncMask[i][j] = j < i ? 255 : 0; /* chars have 8 bits */
322
+ }
323
+
324
+
325
+ /* ======================================================================== */
326
+ /* TABLE ROUTINES */
327
+ /* -------------------- */
328
+
329
+ /* The idea is that a hashtable with (logically) t buckets is divided
330
+ * into t/M groups of M buckets each. (M is a constant set in
331
+ * LOG_BM_WORDS for efficiency.) Each group is stored sparsely.
332
+ * Thus, inserting into the table causes some array to grow, which is
333
+ * slow but still constant time. Lookup involves doing a
334
+ * logical-position-to-sparse-position lookup, which is also slow but
335
+ * constant time. The larger M is, the slower these operations are
336
+ * but the less overhead (slightly).
337
+ *
338
+ * To store the sparse array, we store a bitmap B, where B[i] = 1 iff
339
+ * bucket i is non-empty. Then to look up bucket i we really look up
340
+ * array[# of 1s before i in B]. This is constant time for fixed M.
341
+ *
342
+ * Terminology: the position of an item in the overall table (from
343
+ * 1 .. t) is called its "location." The logical position in a group
344
+ * (from 1 .. M ) is called its "position." The actual location in
345
+ * the array (from 1 .. # of non-empty buckets in the group) is
346
+ * called its "offset."
347
+ *
348
+ * The following operations are supported:
349
+ * o Allocate an array with t buckets, all empty
350
+ * o Free a array (but not whatever was stored in the buckets)
351
+ * o Tell whether or not a bucket is empty
352
+ * o Return a bucket with a given location
353
+ * o Set the value of a bucket at a given location
354
+ * o Iterate through all the buckets in the array
355
+ * o Read and write an occupancy bitmap to disk
356
+ * o Return how much memory is being allocated by the array structure
357
+ */
358
+
359
+ #ifndef SparseBucket /* by default, each bucket holds an HTItem */
360
+ #define SparseBucket HTItem
361
+ #endif
362
+
363
+ typedef struct SparseBin {
364
+ SparseBucket *binSparse;
365
+ HTBitmap bmOccupied; /* bmOccupied[i] is 1 if bucket i has an item */
366
+ short cOccupied; /* size of binSparse; useful for iterators, eg */
367
+ } SparseBin;
368
+
369
+ typedef struct SparseIterator {
370
+ long posGroup;
371
+ long posOffset;
372
+ SparseBin *binSparse; /* state info, to avoid args for NextBucket() */
373
+ ulong cBuckets;
374
+ } SparseIterator;
375
+
376
+ #define LOG_LOW_BIN_SIZE ( LOG_BM_WORDS+LOG_WORD_SIZE )
377
+ #define SPARSE_GROUPS(cBuckets) ( (((cBuckets)-1) >> LOG_LOW_BIN_SIZE) + 1 )
378
+
379
+ /* we need a small function to figure out # of items set in the bm */
380
+ static HTOffset EntriesUpto(HTBitmapPart *bm, int i)
381
+ { /* returns # of set bits in 0..i-1 */
382
+ HTOffset retval = 0;
383
+ static HTOffset rgcBits[256] = /* # of bits set in one char */
384
+ {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
385
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
386
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
387
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
388
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
389
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
390
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
391
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
392
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
393
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
394
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
395
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
396
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
397
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
398
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
399
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
400
+
401
+ if ( i == 0 ) return 0;
402
+ for ( ; i > sizeof(*bm)*8; i -= sizeof(*bm)*8, bm++ )
403
+ { /* think of it as loop unrolling */
404
+ #if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */
405
+ retval += rgcBits[*bm & 255]; /* get the low byte */
406
+ #if LOG_WORD_SIZE >= 4 /* at least 2 bytes */
407
+ retval += rgcBits[(*bm >> 8) & 255];
408
+ #if LOG_WORD_SIZE >= 5 /* at least 4 bytes */
409
+ retval += rgcBits[(*bm >> 16) & 255];
410
+ retval += rgcBits[(*bm >> 24) & 255];
411
+ #if LOG_WORD_SIZE >= 6 /* 8 bytes! */
412
+ retval += rgcBits[(*bm >> 32) & 255];
413
+ retval += rgcBits[(*bm >> 40) & 255];
414
+ retval += rgcBits[(*bm >> 48) & 255];
415
+ retval += rgcBits[(*bm >> 56) & 255];
416
+ #if LOG_WORD_SIZE >= 7 /* not a concern for a while... */
417
+ #error Need to rewrite EntriesUpto to support such big words
418
+ #endif /* >8 bytes */
419
+ #endif /* 8 bytes */
420
+ #endif /* 4 bytes */
421
+ #endif /* 2 bytes */
422
+ #endif /* 1 byte */
423
+ }
424
+ switch ( i ) { /* from 0 to 63 */
425
+ case 0:
426
+ return retval;
427
+ #if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */
428
+ case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
429
+ return (retval + rgcBits[*bm & ((1 << i)-1)]);
430
+ #if LOG_WORD_SIZE >= 4 /* at least 2 bytes */
431
+ case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16:
432
+ return (retval + rgcBits[*bm & 255] +
433
+ rgcBits[(*bm >> 8) & ((1 << (i-8))-1)]);
434
+ #if LOG_WORD_SIZE >= 5 /* at least 4 bytes */
435
+ case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24:
436
+ return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
437
+ rgcBits[(*bm >> 16) & ((1 << (i-16))-1)]);
438
+ case 25: case 26: case 27: case 28: case 29: case 30: case 31: case 32:
439
+ return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
440
+ rgcBits[(*bm >> 16) & 255] +
441
+ rgcBits[(*bm >> 24) & ((1 << (i-24))-1)]);
442
+ #if LOG_WORD_SIZE >= 6 /* 8 bytes! */
443
+ case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40:
444
+ return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
445
+ rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
446
+ rgcBits[(*bm >> 32) & ((1 << (i-32))-1)]);
447
+ case 41: case 42: case 43: case 44: case 45: case 46: case 47: case 48:
448
+ return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
449
+ rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
450
+ rgcBits[(*bm >> 32) & 255] +
451
+ rgcBits[(*bm >> 40) & ((1 << (i-40))-1)]);
452
+ case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56:
453
+ return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
454
+ rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
455
+ rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +
456
+ rgcBits[(*bm >> 48) & ((1 << (i-48))-1)]);
457
+ case 57: case 58: case 59: case 60: case 61: case 62: case 63: case 64:
458
+ return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
459
+ rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
460
+ rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +
461
+ rgcBits[(*bm >> 48) & 255] +
462
+ rgcBits[(*bm >> 56) & ((1 << (i-56))-1)]);
463
+ #endif /* 8 bytes */
464
+ #endif /* 4 bytes */
465
+ #endif /* 2 bytes */
466
+ #endif /* 1 byte */
467
+ }
468
+ assert("" == "word size is too big in EntriesUpto()");
469
+ return -1;
470
+ }
471
+ #define SPARSE_POS_TO_OFFSET(bm, i) ( EntriesUpto(&((bm)[0]), i) )
472
+ #define SPARSE_BUCKET(bin, location) \
473
+ ( (bin)[(location) >> LOG_LOW_BIN_SIZE].binSparse + \
474
+ SPARSE_POS_TO_OFFSET((bin)[(location)>>LOG_LOW_BIN_SIZE].bmOccupied, \
475
+ MOD2(location, LOG_LOW_BIN_SIZE)) )
476
+
477
+
478
+ /*************************************************************************\
479
+ | SparseAllocate() |
480
+ | SparseFree() |
481
+ | Allocates, sets-to-empty, and frees a sparse array. All you need |
482
+ | to tell me is how many buckets you want. I return the number of |
483
+ | buckets I actually allocated, setting the array as a parameter. |
484
+ | Note that you have to set auxilliary parameters, like cOccupied. |
485
+ \*************************************************************************/
486
+
487
+ static ulong SparseAllocate(SparseBin **pbinSparse, ulong cBuckets)
488
+ {
489
+ int cGroups = SPARSE_GROUPS(cBuckets);
490
+
491
+ *pbinSparse = (SparseBin *) HTscalloc(sizeof(**pbinSparse) * cGroups);
492
+ return cGroups << LOG_LOW_BIN_SIZE;
493
+ }
494
+
495
+ static SparseBin *SparseFree(SparseBin *binSparse, ulong cBuckets)
496
+ {
497
+ ulong iGroup, cGroups = SPARSE_GROUPS(cBuckets);
498
+
499
+ for ( iGroup = 0; iGroup < cGroups; iGroup++ )
500
+ HTfree(binSparse[iGroup].binSparse, (sizeof(*binSparse[iGroup].binSparse)
501
+ * binSparse[iGroup].cOccupied));
502
+ HTfree(binSparse, sizeof(*binSparse) * cGroups);
503
+ return NULL;
504
+ }
505
+
506
+ /*************************************************************************\
507
+ | SparseIsEmpty() |
508
+ | SparseFind() |
509
+ | You give me a location (ie a number between 1 and t), and I |
510
+ | return the bucket at that location, or NULL if the bucket is |
511
+ | empty. It's OK to call Find() on an empty table. |
512
+ \*************************************************************************/
513
+
514
+ static int SparseIsEmpty(SparseBin *binSparse, ulong location)
515
+ {
516
+ return !TEST_BITMAP(binSparse[location>>LOG_LOW_BIN_SIZE].bmOccupied,
517
+ MOD2(location, LOG_LOW_BIN_SIZE));
518
+ }
519
+
520
+ static SparseBucket *SparseFind(SparseBin *binSparse, ulong location)
521
+ {
522
+ if ( SparseIsEmpty(binSparse, location) )
523
+ return NULL;
524
+ return SPARSE_BUCKET(binSparse, location);
525
+ }
526
+
527
+ /*************************************************************************\
528
+ | SparseInsert() |
529
+ | You give me a location, and contents to put there, and I insert |
530
+ | into that location and RETURN a pointer to the location. If |
531
+ | bucket was already occupied, I write over the contents only if |
532
+ | *pfOverwrite is 1. We set *pfOverwrite to 1 if there was someone |
533
+ | there (whether or not we overwrote) and 0 else. |
534
+ \*************************************************************************/
535
+
536
+ static SparseBucket *SparseInsert(SparseBin *binSparse, SparseBucket *bckInsert,
537
+ ulong location, int *pfOverwrite)
538
+ {
539
+ SparseBucket *bckPlace;
540
+ HTOffset offset;
541
+
542
+ bckPlace = SparseFind(binSparse, location);
543
+ if ( bckPlace ) /* means we replace old contents */
544
+ {
545
+ if ( *pfOverwrite )
546
+ *bckPlace = *bckInsert;
547
+ *pfOverwrite = 1;
548
+ return bckPlace;
549
+ }
550
+
551
+ binSparse += (location >> LOG_LOW_BIN_SIZE);
552
+ offset = SPARSE_POS_TO_OFFSET(binSparse->bmOccupied,
553
+ MOD2(location, LOG_LOW_BIN_SIZE));
554
+ binSparse->binSparse = (SparseBucket *)
555
+ HTsrealloc(binSparse->binSparse,
556
+ sizeof(*binSparse->binSparse) * ++binSparse->cOccupied,
557
+ sizeof(*binSparse->binSparse));
558
+ memmove(binSparse->binSparse + offset+1,
559
+ binSparse->binSparse + offset,
560
+ (binSparse->cOccupied-1 - offset) * sizeof(*binSparse->binSparse));
561
+ binSparse->binSparse[offset] = *bckInsert;
562
+ SET_BITMAP(binSparse->bmOccupied, MOD2(location, LOG_LOW_BIN_SIZE));
563
+ *pfOverwrite = 0;
564
+ return binSparse->binSparse + offset;
565
+ }
566
+
567
+ /*************************************************************************\
568
+ | SparseFirstBucket() |
569
+ | SparseNextBucket() |
570
+ | SparseCurrentBit() |
571
+ | Iterate through the occupied buckets of a dense hashtable. You |
572
+ | must, of course, have allocated space yourself for the iterator. |
573
+ \*************************************************************************/
574
+
575
+ static SparseBucket *SparseNextBucket(SparseIterator *iter)
576
+ {
577
+ if ( iter->posOffset != -1 && /* not called from FirstBucket()? */
578
+ (++iter->posOffset < iter->binSparse[iter->posGroup].cOccupied) )
579
+ return iter->binSparse[iter->posGroup].binSparse + iter->posOffset;
580
+
581
+ iter->posOffset = 0; /* start the next group */
582
+ for ( iter->posGroup++; iter->posGroup < SPARSE_GROUPS(iter->cBuckets);
583
+ iter->posGroup++ )
584
+ if ( iter->binSparse[iter->posGroup].cOccupied > 0 )
585
+ return iter->binSparse[iter->posGroup].binSparse; /* + 0 */
586
+ return NULL; /* all remaining groups were empty */
587
+ }
588
+
589
+ static SparseBucket *SparseFirstBucket(SparseIterator *iter,
590
+ SparseBin *binSparse, ulong cBuckets)
591
+ {
592
+ iter->binSparse = binSparse; /* set it up for NextBucket() */
593
+ iter->cBuckets = cBuckets;
594
+ iter->posOffset = -1; /* when we advance, we're at 0 */
595
+ iter->posGroup = -1;
596
+ return SparseNextBucket(iter);
597
+ }
598
+
599
+ /*************************************************************************\
600
+ | SparseWrite() |
601
+ | SparseRead() |
602
+ | These are routines for storing a sparse hashtable onto disk. We |
603
+ | store the number of buckets and a bitmap indicating which buckets |
604
+ | are allocated (occupied). The actual contents of the buckets |
605
+ | must be stored separately. |
606
+ \*************************************************************************/
607
+
608
+ static void SparseWrite(FILE *fp, SparseBin *binSparse, ulong cBuckets)
609
+ {
610
+ ulong i, j;
611
+
612
+ WRITE_UL(fp, cBuckets);
613
+ for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )
614
+ for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )
615
+ WRITE_UL(fp, binSparse[i].bmOccupied[j]);
616
+ }
617
+
618
+ static ulong SparseRead(FILE *fp, SparseBin **pbinSparse)
619
+ {
620
+ ulong i, j, cBuckets;
621
+
622
+ READ_UL(fp, cBuckets); /* actually, cBuckets is stored */
623
+ cBuckets = SparseAllocate(pbinSparse, cBuckets);
624
+ for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )
625
+ {
626
+ for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )
627
+ READ_UL(fp, (*pbinSparse)[i].bmOccupied[j]);
628
+ (*pbinSparse)[i].cOccupied =
629
+ SPARSE_POS_TO_OFFSET((*pbinSparse)[i].bmOccupied,1<<LOG_LOW_BIN_SIZE);
630
+ (*pbinSparse)[i].binSparse =
631
+ (SparseBucket *) HTsmalloc(sizeof(*((*pbinSparse)[i].binSparse)) *
632
+ (*pbinSparse)[i].cOccupied);
633
+ }
634
+ return cBuckets;
635
+ }
636
+
637
+ /*************************************************************************\
638
+ | SparseMemory() |
639
+ | SparseMemory() tells us how much memory is being allocated for |
640
+ | the dense table. You need to tell me not only how many buckets |
641
+ | there are, but how many are occupied. |
642
+ \*************************************************************************/
643
+
644
+ static ulong SparseMemory(ulong cBuckets, ulong cOccupied)
645
+ {
646
+ return ( cOccupied * sizeof(SparseBucket) +
647
+ SPARSE_GROUPS(cBuckets) * sizeof(SparseBin) );
648
+ }
649
+
650
+
651
+ /* Just for fun, I also provide support for dense tables. These are
652
+ * just regulr arrays. Access is fast, but they can get big.
653
+ * Use Table(x) at the top of chash.h to decide which you want.
654
+ * A disadvantage is we need to steal more of the data space for
655
+ * indicating empty buckets. We choose -3.
656
+ */
657
+
658
+ #ifndef DenseBucket /* by default, each bucket holds an HTItem */
659
+ #define DenseBucket HTItem
660
+ #endif
661
+
662
+ typedef struct DenseBin { /* needs to be a struct for C typing reasons */
663
+ DenseBucket *rgBuckets; /* A bin is an array of buckets */
664
+ } DenseBin;
665
+
666
+ typedef struct DenseIterator {
667
+ long pos; /* the actual iterator */
668
+ DenseBin *bin; /* state info, to avoid args for NextBucket() */
669
+ ulong cBuckets;
670
+ } DenseIterator;
671
+
672
+ #define DENSE_IS_EMPTY(bin, i) ( (bin)[i].data == EMPTY )
673
+ #define DENSE_SET_EMPTY(bin, i) (bin)[i].data = EMPTY /* fks-hash.h */
674
+ #define DENSE_SET_OCCUPIED(bin, i) (bin)[i].data = 1 /* not EMPTY */
675
+
676
+ static void DenseClear(DenseBin *bin, ulong cBuckets)
677
+ {
678
+ while ( cBuckets-- )
679
+ DENSE_SET_EMPTY(bin->rgBuckets, cBuckets);
680
+ }
681
+
682
+ static ulong DenseAllocate(DenseBin **pbin, ulong cBuckets)
683
+ {
684
+ *pbin = (DenseBin *) HTsmalloc(sizeof(*pbin));
685
+ (*pbin)->rgBuckets = (DenseBucket *) HTsmalloc(sizeof(*(*pbin)->rgBuckets)
686
+ * cBuckets);
687
+ DenseClear(*pbin, cBuckets);
688
+ return cBuckets;
689
+ }
690
+
691
+ static DenseBin *DenseFree(DenseBin *bin, ulong cBuckets)
692
+ {
693
+ HTfree(bin->rgBuckets, sizeof(*bin->rgBuckets) * cBuckets);
694
+ HTfree(bin, sizeof(*bin));
695
+ return NULL;
696
+ }
697
+
698
+ static int DenseIsEmpty(DenseBin *bin, ulong location)
699
+ {
700
+ return DENSE_IS_EMPTY(bin->rgBuckets, location);
701
+ }
702
+
703
+ static DenseBucket *DenseFind(DenseBin *bin, ulong location)
704
+ {
705
+ if ( DenseIsEmpty(bin, location) )
706
+ return NULL;
707
+ return bin->rgBuckets + location;
708
+ }
709
+
710
+ static DenseBucket *DenseInsert(DenseBin *bin, DenseBucket *bckInsert,
711
+ ulong location, int *pfOverwrite)
712
+ {
713
+ DenseBucket *bckPlace;
714
+
715
+ bckPlace = DenseFind(bin, location);
716
+ if ( bckPlace ) /* means something is already there */
717
+ {
718
+ if ( *pfOverwrite )
719
+ *bckPlace = *bckInsert;
720
+ *pfOverwrite = 1; /* set to 1 to indicate someone was there */
721
+ return bckPlace;
722
+ }
723
+ else
724
+ {
725
+ bin->rgBuckets[location] = *bckInsert;
726
+ *pfOverwrite = 0;
727
+ return bin->rgBuckets + location;
728
+ }
729
+ }
730
+
731
+ static DenseBucket *DenseNextBucket(DenseIterator *iter)
732
+ {
733
+ for ( iter->pos++; iter->pos < iter->cBuckets; iter->pos++ )
734
+ if ( !DenseIsEmpty(iter->bin, iter->pos) )
735
+ return iter->bin->rgBuckets + iter->pos;
736
+ return NULL; /* all remaining groups were empty */
737
+ }
738
+
739
+ static DenseBucket *DenseFirstBucket(DenseIterator *iter,
740
+ DenseBin *bin, ulong cBuckets)
741
+ {
742
+ iter->bin = bin; /* set it up for NextBucket() */
743
+ iter->cBuckets = cBuckets;
744
+ iter->pos = -1; /* thus the next bucket will be 0 */
745
+ return DenseNextBucket(iter);
746
+ }
747
+
748
+ static void DenseWrite(FILE *fp, DenseBin *bin, ulong cBuckets)
749
+ {
750
+ ulong pos = 0, bit, bm;
751
+
752
+ WRITE_UL(fp, cBuckets);
753
+ while ( pos < cBuckets )
754
+ {
755
+ bm = 0;
756
+ for ( bit = 0; bit < 8*sizeof(ulong); bit++ )
757
+ {
758
+ if ( !DenseIsEmpty(bin, pos) )
759
+ SET_BITMAP(&bm, bit); /* in fks-hash.h */
760
+ if ( ++pos == cBuckets )
761
+ break;
762
+ }
763
+ WRITE_UL(fp, bm);
764
+ }
765
+ }
766
+
767
+ static ulong DenseRead(FILE *fp, DenseBin **pbin)
768
+ {
769
+ ulong pos = 0, bit, bm, cBuckets;
770
+
771
+ READ_UL(fp, cBuckets);
772
+ cBuckets = DenseAllocate(pbin, cBuckets);
773
+ while ( pos < cBuckets )
774
+ {
775
+ READ_UL(fp, bm);
776
+ for ( bit = 0; bit < 8*sizeof(ulong); bit++ )
777
+ {
778
+ if ( TEST_BITMAP(&bm, bit) ) /* in fks-hash.h */
779
+ DENSE_SET_OCCUPIED((*pbin)->rgBuckets, pos);
780
+ else
781
+ DENSE_SET_EMPTY((*pbin)->rgBuckets, pos);
782
+ if ( ++pos == cBuckets )
783
+ break;
784
+ }
785
+ }
786
+ return cBuckets;
787
+ }
788
+
789
+ static ulong DenseMemory(ulong cBuckets, ulong cOccupied)
790
+ {
791
+ return cBuckets * sizeof(DenseBucket);
792
+ }
793
+
794
+
795
+ /* ======================================================================== */
796
+ /* HASHING ROUTINES */
797
+ /* ---------------------- */
798
+
799
+ /* Implements a simple quadratic hashing scheme. We have a single hash
800
+ * table of size t and a single hash function h(x). When inserting an
801
+ * item, first we try h(x) % t. If it's occupied, we try h(x) +
802
+ * i*(i-1)/2 % t for increasing values of i until we hit a not-occupied
803
+ * space. To make this dynamic, we double the size of the hash table as
804
+ * soon as more than half the cells are occupied. When deleting, we can
805
+ * choose to shrink the hashtable when less than a quarter of the
806
+ * cells are occupied, or we can choose never to shrink the hashtable.
807
+ * For lookup, we check h(x) + i*(i-1)/2 % t (starting with i=0) until
808
+ * we get a match or we hit an empty space. Note that as a result,
809
+ * we can't make a cell empty on deletion, or lookups may end prematurely.
810
+ * Instead we mark the cell as "deleted." We thus steal the value
811
+ * DELETED as a possible "data" value. As long as data are pointers,
812
+ * that's ok.
813
+ * The hash increment we use, i(i-1)/2, is not the standard quadratic
814
+ * hash increment, which is i^2. i(i-1)/2 covers the entire bucket space
815
+ * when the hashtable size is a power of two, as it is for us. In fact,
816
+ * the first n probes cover n distinct buckets; then it repeats. This
817
+ * guarantees insertion will always succeed.
818
+ * If you linear hashing, set JUMP in chash.h. You can also change
819
+ * various other parameters there.
820
+ */
821
+
822
+ /*************************************************************************\
823
+ | Hash() |
824
+ | The hash function I use is due to Bob Jenkins (see |
825
+ | http://burtleburtle.net/bob/hash/evahash.html |
826
+ | According to http://burtleburtle.net/bob/c/lookup2.c, |
827
+ | his implementation is public domain.) |
828
+ | It takes 36 instructions, in 18 cycles if you're lucky. |
829
+ | hashing depends on the fact the hashtable size is always a |
830
+ | power of 2. cBuckets is probably ht->cBuckets. |
831
+ \*************************************************************************/
832
+
833
+ #if LOG_WORD_SIZE == 5 /* 32 bit words */
834
+
835
+ #define mix(a,b,c) \
836
+ { \
837
+ a -= b; a -= c; a ^= (c>>13); \
838
+ b -= c; b -= a; b ^= (a<<8); \
839
+ c -= a; c -= b; c ^= (b>>13); \
840
+ a -= b; a -= c; a ^= (c>>12); \
841
+ b -= c; b -= a; b ^= (a<<16); \
842
+ c -= a; c -= b; c ^= (b>>5); \
843
+ a -= b; a -= c; a ^= (c>>3); \
844
+ b -= c; b -= a; b ^= (a<<10); \
845
+ c -= a; c -= b; c ^= (b>>15); \
846
+ }
847
+ #ifdef WORD_HASH /* play with this on little-endian machines */
848
+ #define WORD_AT(ptr) ( *(ulong *)(ptr) )
849
+ #else
850
+ #define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \
851
+ ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) )
852
+ #endif
853
+
854
+ #elif LOG_WORD_SIZE == 6 /* 64 bit words */
855
+
856
+ #define mix(a,b,c) \
857
+ { \
858
+ a -= b; a -= c; a ^= (c>>43); \
859
+ b -= c; b -= a; b ^= (a<<9); \
860
+ c -= a; c -= b; c ^= (b>>8); \
861
+ a -= b; a -= c; a ^= (c>>38); \
862
+ b -= c; b -= a; b ^= (a<<23); \
863
+ c -= a; c -= b; c ^= (b>>5); \
864
+ a -= b; a -= c; a ^= (c>>35); \
865
+ b -= c; b -= a; b ^= (a<<49); \
866
+ c -= a; c -= b; c ^= (b>>11); \
867
+ a -= b; a -= c; a ^= (c>>12); \
868
+ b -= c; b -= a; b ^= (a<<18); \
869
+ c -= a; c -= b; c ^= (b>>22); \
870
+ }
871
+ #ifdef WORD_HASH /* alpha is little-endian, btw */
872
+ #define WORD_AT(ptr) ( *(ulong *)(ptr) )
873
+ #else
874
+ #define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \
875
+ ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) + \
876
+ ((ulong)(ptr)[4]<<32) + ((ulong)(ptr)[5]<<40) + \
877
+ ((ulong)(ptr)[6]<<48) + ((ulong)(ptr)[7]<<56) )
878
+ #endif
879
+
880
+ #else /* neither 32 or 64 bit words */
881
+ #error This hash function can only hash 32 or 64 bit words. Sorry.
882
+ #endif
883
+
884
+ static ulong Hash(HashTable *ht, char *key, ulong cBuckets)
885
+ {
886
+ ulong a, b, c, cchKey, cchKeyOrig;
887
+
888
+ cchKeyOrig = ht->cchKey == NULL_TERMINATED ? strlen(key) : ht->cchKey;
889
+ a = b = c = 0x9e3779b9; /* the golden ratio; an arbitrary value */
890
+
891
+ for ( cchKey = cchKeyOrig; cchKey >= 3 * sizeof(ulong);
892
+ cchKey -= 3 * sizeof(ulong), key += 3 * sizeof(ulong) )
893
+ {
894
+ a += WORD_AT(key);
895
+ b += WORD_AT(key + sizeof(ulong));
896
+ c += WORD_AT(key + sizeof(ulong)*2);
897
+ mix(a,b,c);
898
+ }
899
+
900
+ c += cchKeyOrig;
901
+ switch ( cchKey ) { /* deal with rest. Cases fall through */
902
+ #if LOG_WORD_SIZE == 5
903
+ case 11: c += (ulong)key[10]<<24;
904
+ case 10: c += (ulong)key[9]<<16;
905
+ case 9 : c += (ulong)key[8]<<8;
906
+ /* the first byte of c is reserved for the length */
907
+ case 8 : b += WORD_AT(key+4); a+= WORD_AT(key); break;
908
+ case 7 : b += (ulong)key[6]<<16;
909
+ case 6 : b += (ulong)key[5]<<8;
910
+ case 5 : b += key[4];
911
+ case 4 : a += WORD_AT(key); break;
912
+ case 3 : a += (ulong)key[2]<<16;
913
+ case 2 : a += (ulong)key[1]<<8;
914
+ case 1 : a += key[0];
915
+ /* case 0 : nothing left to add */
916
+ #elif LOG_WORD_SIZE == 6
917
+ case 23: c += (ulong)key[22]<<56;
918
+ case 22: c += (ulong)key[21]<<48;
919
+ case 21: c += (ulong)key[20]<<40;
920
+ case 20: c += (ulong)key[19]<<32;
921
+ case 19: c += (ulong)key[18]<<24;
922
+ case 18: c += (ulong)key[17]<<16;
923
+ case 17: c += (ulong)key[16]<<8;
924
+ /* the first byte of c is reserved for the length */
925
+ case 16: b += WORD_AT(key+8); a+= WORD_AT(key); break;
926
+ case 15: b += (ulong)key[14]<<48;
927
+ case 14: b += (ulong)key[13]<<40;
928
+ case 13: b += (ulong)key[12]<<32;
929
+ case 12: b += (ulong)key[11]<<24;
930
+ case 11: b += (ulong)key[10]<<16;
931
+ case 10: b += (ulong)key[ 9]<<8;
932
+ case 9: b += (ulong)key[ 8];
933
+ case 8: a += WORD_AT(key); break;
934
+ case 7: a += (ulong)key[ 6]<<48;
935
+ case 6: a += (ulong)key[ 5]<<40;
936
+ case 5: a += (ulong)key[ 4]<<32;
937
+ case 4: a += (ulong)key[ 3]<<24;
938
+ case 3: a += (ulong)key[ 2]<<16;
939
+ case 2: a += (ulong)key[ 1]<<8;
940
+ case 1: a += (ulong)key[ 0];
941
+ /* case 0: nothing left to add */
942
+ #endif
943
+ }
944
+ mix(a,b,c);
945
+ return c & (cBuckets-1);
946
+ }
947
+
948
+
949
+ /*************************************************************************\
950
+ | Rehash() |
951
+ | You give me a hashtable, a new size, and a bucket to follow, and |
952
+ | I resize the hashtable's bin to be the new size, rehashing |
953
+ | everything in it. I keep particular track of the bucket you pass |
954
+ | in, and RETURN a pointer to where the item in the bucket got to. |
955
+ | (If you pass in NULL, I return an arbitrary pointer.) |
956
+ \*************************************************************************/
957
+
958
+ static HTItem *Rehash(HashTable *ht, ulong cNewBuckets, HTItem *bckWatch)
959
+ {
960
+ Table *tableNew;
961
+ ulong iBucketFirst;
962
+ HTItem *bck, *bckNew = NULL;
963
+ ulong offset; /* the i in h(x) + i*(i-1)/2 */
964
+ int fOverwrite = 0; /* not an issue: there can be no collisions */
965
+
966
+ assert( ht->table );
967
+ cNewBuckets = Table(Allocate)(&tableNew, cNewBuckets);
968
+ /* Since we RETURN the new position of bckWatch, we want *
969
+ * to make sure it doesn't get moved due to some table *
970
+ * rehashing that comes after it's inserted. Thus, we *
971
+ * have to put it in last. This makes the loop weird. */
972
+ for ( bck = HashFirstBucket(ht); ; bck = HashNextBucket(ht) )
973
+ {
974
+ if ( bck == NULL ) /* we're done iterating, so look at bckWatch */
975
+ {
976
+ bck = bckWatch;
977
+ if ( bck == NULL ) /* I guess bckWatch wasn't specified */
978
+ break;
979
+ }
980
+ else if ( bck == bckWatch )
981
+ continue; /* ignore if we see it during the iteration */
982
+
983
+ offset = 0; /* a new i for a new bucket */
984
+ for ( iBucketFirst = Hash(ht, KEY_PTR(ht, bck->key), cNewBuckets);
985
+ !Table(IsEmpty)(tableNew, iBucketFirst);
986
+ iBucketFirst = (iBucketFirst + JUMP(KEY_PTR(ht,bck->key), offset))
987
+ & (cNewBuckets-1) )
988
+ ;
989
+ bckNew = Table(Insert)(tableNew, bck, iBucketFirst, &fOverwrite);
990
+ if ( bck == bckWatch ) /* we're done with the last thing to do */
991
+ break;
992
+ }
993
+ Table(Free)(ht->table, ht->cBuckets);
994
+ ht->table = tableNew;
995
+ ht->cBuckets = cNewBuckets;
996
+ ht->cDeletedItems = 0;
997
+ return bckNew; /* new position of bckWatch, which was inserted last */
998
+ }
999
+
1000
+ /*************************************************************************\
1001
+ | Find() |
1002
+ | Does the quadratic searching stuff. RETURNS NULL if we don't |
1003
+ | find an object with the given key, and a pointer to the Item |
1004
+ | holding the key, if we do. Also sets posLastFind. If piEmpty is |
1005
+ | non-NULL, we set it to the first open bucket we pass; helpful for |
1006
+ | doing a later insert if the search fails, for instance. |
1007
+ \*************************************************************************/
1008
+
1009
+ static HTItem *Find(HashTable *ht, ulong key, ulong *piEmpty)
1010
+ {
1011
+ ulong iBucketFirst;
1012
+ HTItem *item;
1013
+ ulong offset = 0; /* the i in h(x) + i*(i-1)/2 */
1014
+ int fFoundEmpty = 0; /* set when we pass over an empty bucket */
1015
+
1016
+ ht->posLastFind = NULL; /* set up for failure: a new find starts */
1017
+ if ( ht->table == NULL ) /* empty hash table: find is bound to fail */
1018
+ return NULL;
1019
+
1020
+ iBucketFirst = Hash(ht, KEY_PTR(ht, key), ht->cBuckets);
1021
+ while ( 1 ) /* now try all i > 0 */
1022
+ {
1023
+ item = Table(Find)(ht->table, iBucketFirst);
1024
+ if ( item == NULL ) /* it's not in the table */
1025
+ {
1026
+ if ( piEmpty && !fFoundEmpty ) *piEmpty = iBucketFirst;
1027
+ return NULL;
1028
+ }
1029
+ else
1030
+ {
1031
+ if ( IS_BCK_DELETED(item) ) /* always 0 ifdef INSERT_ONLY */
1032
+ {
1033
+ if ( piEmpty && !fFoundEmpty )
1034
+ {
1035
+ *piEmpty = iBucketFirst;
1036
+ fFoundEmpty = 1;
1037
+ }
1038
+ } else
1039
+ if ( !KEY_CMP(ht, key, item->key) ) /* must be occupied */
1040
+ {
1041
+ ht->posLastFind = item;
1042
+ return item; /* we found it! */
1043
+ }
1044
+ }
1045
+ iBucketFirst = ((iBucketFirst + JUMP(KEY_PTR(ht, key), offset))
1046
+ & (ht->cBuckets-1));
1047
+ }
1048
+ }
1049
+
1050
+ /*************************************************************************\
1051
+ | Insert() |
1052
+ | If an item with the key already exists in the hashtable, RETURNS |
1053
+ | a pointer to the item (replacing its data if fOverwrite is 1). |
1054
+ | If not, we find the first place-to-insert (which Find() is nice |
1055
+ | enough to set for us) and insert the item there, RETURNing a |
1056
+ | pointer to the item. We might grow the hashtable if it's getting |
1057
+ | full. Note we include buckets holding DELETED when determining |
1058
+ | fullness, because they slow down searching. |
1059
+ \*************************************************************************/
1060
+
1061
+ static ulong NextPow2(ulong x) /* returns next power of 2 > x, or 2^31 */
1062
+ {
1063
+ if ( ((x << 1) >> 1) != x ) /* next power of 2 overflows */
1064
+ x >>= 1; /* so we return highest power of 2 we can */
1065
+ while ( (x & (x-1)) != 0 ) /* blacks out all but the top bit */
1066
+ x &= (x-1);
1067
+ return x << 1; /* makes it the *next* power of 2 */
1068
+ }
1069
+
1070
+ static HTItem *Insert(HashTable *ht, ulong key, ulong data, int fOverwrite)
1071
+ {
1072
+ HTItem *item, bckInsert;
1073
+ ulong iEmpty; /* first empty bucket key probes */
1074
+
1075
+ if ( ht->table == NULL ) /* empty hash table: find is bound to fail */
1076
+ return NULL;
1077
+ item = Find(ht, key, &iEmpty);
1078
+ ht->posLastFind = NULL; /* last operation is insert, not find */
1079
+ if ( item )
1080
+ {
1081
+ if ( fOverwrite )
1082
+ item->data = data; /* key already matches */
1083
+ return item;
1084
+ }
1085
+
1086
+ COPY_KEY(ht, bckInsert.key, key); /* make our own copy of the key */
1087
+ bckInsert.data = data; /* oh, and the data too */
1088
+ item = Table(Insert)(ht->table, &bckInsert, iEmpty, &fOverwrite);
1089
+ if ( fOverwrite ) /* we overwrote a deleted bucket */
1090
+ ht->cDeletedItems--;
1091
+ ht->cItems++; /* insert couldn't have overwritten */
1092
+ if ( ht->cDeltaGoalSize > 0 ) /* closer to our goal size */
1093
+ ht->cDeltaGoalSize--;
1094
+ if ( ht->cItems + ht->cDeletedItems >= ht->cBuckets * OCCUPANCY_PCT
1095
+ || ht->cDeltaGoalSize < 0 ) /* we must've overestimated # of deletes */
1096
+ item = Rehash(ht,
1097
+ NextPow2((ulong)(((ht->cDeltaGoalSize > 0 ?
1098
+ ht->cDeltaGoalSize : 0)
1099
+ + ht->cItems) / OCCUPANCY_PCT)),
1100
+ item);
1101
+ return item;
1102
+ }
1103
+
1104
+ /*************************************************************************\
1105
+ | Delete() |
1106
+ | Removes the item from the hashtable, and if fShrink is 1, will |
1107
+ | shrink the hashtable if it's too small (ie even after halving, |
1108
+ | the ht would be less than half full, though in order to avoid |
1109
+ | oscillating table size, we insist that after halving the ht would |
1110
+ | be less than 40% full). RETURNS 1 if the item was found, 0 else. |
1111
+ | If fLastFindSet is true, then this function is basically |
1112
+ | DeleteLastFind. |
1113
+ \*************************************************************************/
1114
+
1115
+ static int Delete(HashTable *ht, ulong key, int fShrink, int fLastFindSet)
1116
+ {
1117
+ if ( !fLastFindSet && !Find(ht, key, NULL) )
1118
+ return 0;
1119
+ SET_BCK_DELETED(ht, ht->posLastFind); /* find set this, how nice */
1120
+ ht->cItems--;
1121
+ ht->cDeletedItems++;
1122
+ if ( ht->cDeltaGoalSize < 0 ) /* heading towards our goal of deletion */
1123
+ ht->cDeltaGoalSize++;
1124
+
1125
+ if ( fShrink && ht->cItems < ht->cBuckets * OCCUPANCY_PCT*0.4
1126
+ && ht->cDeltaGoalSize >= 0 /* wait until we're done deleting */
1127
+ && (ht->cBuckets >> 1) >= MIN_HASH_SIZE ) /* shrink */
1128
+ Rehash(ht,
1129
+ NextPow2((ulong)((ht->cItems+ht->cDeltaGoalSize)/OCCUPANCY_PCT)),
1130
+ NULL);
1131
+ ht->posLastFind = NULL; /* last operation is delete, not find */
1132
+ return 1;
1133
+ }
1134
+
1135
+
1136
+ /* ======================================================================== */
1137
+ /* USER-VISIBLE API */
1138
+ /* ---------------------- */
1139
+
1140
+ /*************************************************************************\
1141
+ | AllocateHashTable() |
1142
+ | ClearHashTable() |
1143
+ | FreeHashTable() |
1144
+ | Allocate() allocates a hash table and sets up size parameters. |
1145
+ | Free() frees it. Clear() deletes all the items from the hash |
1146
+ | table, but frees not. |
1147
+ | cchKey is < 0 if the keys you send me are meant to be pointers |
1148
+ | to \0-terminated strings. Then -cchKey is the maximum key size. |
1149
+ | If cchKey < one word (ulong), the keys you send me are the keys |
1150
+ | themselves; else the keys you send me are pointers to the data. |
1151
+ | If fSaveKeys is 1, we copy any keys given to us to insert. We |
1152
+ | also free these keys when freeing the hash table. If it's 0, the |
1153
+ | user is responsible for key space management. |
1154
+ | AllocateHashTable() RETURNS a hash table; the others TAKE one. |
1155
+ \*************************************************************************/
1156
+
1157
+ HashTable *AllocateHashTable(int cchKey, int fSaveKeys)
1158
+ {
1159
+ HashTable *ht;
1160
+
1161
+ ht = (HashTable *) HTsmalloc(sizeof(*ht)); /* set everything to 0 */
1162
+ ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);
1163
+ ht->cchKey = cchKey <= 0 ? NULL_TERMINATED : cchKey;
1164
+ ht->cItems = 0;
1165
+ ht->cDeletedItems = 0;
1166
+ ht->fSaveKeys = fSaveKeys;
1167
+ ht->cDeltaGoalSize = 0;
1168
+ ht->iter = HTsmalloc( sizeof(TableIterator) );
1169
+
1170
+ ht->fpData = NULL; /* set by HashLoad, maybe */
1171
+ ht->bckData.data = (ulong) NULL; /* this must be done */
1172
+ HTSetupKeyTrunc(); /* in util.c */
1173
+ return ht;
1174
+ }
1175
+
1176
+ void ClearHashTable(HashTable *ht)
1177
+ {
1178
+ HTItem *bck;
1179
+
1180
+ if ( STORES_PTR(ht) && ht->fSaveKeys ) /* need to free keys */
1181
+ for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) )
1182
+ {
1183
+ FREE_KEY(ht, bck->key);
1184
+ if ( ht->fSaveKeys == 2 ) /* this means key stored in one block */
1185
+ break; /* ...so only free once */
1186
+ }
1187
+ Table(Free)(ht->table, ht->cBuckets);
1188
+ ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);
1189
+
1190
+ ht->cItems = 0;
1191
+ ht->cDeletedItems = 0;
1192
+ ht->cDeltaGoalSize = 0;
1193
+ ht->posLastFind = NULL;
1194
+ ht->fpData = NULL; /* no longer HashLoading */
1195
+ if ( ht->bckData.data ) free( (char *)(ht)->bckData.data);
1196
+ ht->bckData.data = (ulong) NULL;
1197
+ }
1198
+
1199
+ void FreeHashTable(HashTable *ht)
1200
+ {
1201
+ ClearHashTable(ht);
1202
+ if ( ht->iter ) HTfree(ht->iter, sizeof(TableIterator));
1203
+ if ( ht->table ) Table(Free)(ht->table, ht->cBuckets);
1204
+ free(ht);
1205
+ }
1206
+
1207
+ /*************************************************************************\
1208
+ | HashFind() |
1209
+ | HashFindLast() |
1210
+ | HashFind(): looks in h(x) + i(i-1)/2 % t as i goes up from 0 |
1211
+ | until we either find the key or hit an empty bucket. RETURNS a |
1212
+ | pointer to the item in the hit bucket, if we find it, else |
1213
+ | RETURNS NULL. |
1214
+ | HashFindLast() returns the item returned by the last |
1215
+ | HashFind(), which may be NULL if the last HashFind() failed. |
1216
+ | LOAD_AND_RETURN reads the data from off disk, if necessary. |
1217
+ \*************************************************************************/
1218
+
1219
+ HTItem *HashFind(HashTable *ht, ulong key)
1220
+ {
1221
+ LOAD_AND_RETURN(ht, Find(ht, KEY_TRUNC(ht, key), NULL));
1222
+ }
1223
+
1224
+ HTItem *HashFindLast(HashTable *ht)
1225
+ {
1226
+ LOAD_AND_RETURN(ht, ht->posLastFind);
1227
+ }
1228
+
1229
+ /*************************************************************************\
1230
+ | HashFindOrInsert() |
1231
+ | HashFindOrInsertItem() |
1232
+ | HashInsert() |
1233
+ | HashInsertItem() |
1234
+ | HashDelete() |
1235
+ | HashDeleteLast() |
1236
+ | Pretty obvious what these guys do. Some take buckets (items), |
1237
+ | some take keys and data separately. All things RETURN the bucket |
1238
+ | (a pointer into the hashtable) if appropriate. |
1239
+ \*************************************************************************/
1240
+
1241
+ HTItem *HashFindOrInsert(HashTable *ht, ulong key, ulong dataInsert)
1242
+ {
1243
+ /* This is equivalent to Insert without samekey-overwrite */
1244
+ return Insert(ht, KEY_TRUNC(ht, key), dataInsert, 0);
1245
+ }
1246
+
1247
+ HTItem *HashFindOrInsertItem(HashTable *ht, HTItem *pItem)
1248
+ {
1249
+ return HashFindOrInsert(ht, pItem->key, pItem->data);
1250
+ }
1251
+
1252
+ HTItem *HashInsert(HashTable *ht, ulong key, ulong data)
1253
+ {
1254
+ return Insert(ht, KEY_TRUNC(ht, key), data, SAMEKEY_OVERWRITE);
1255
+ }
1256
+
1257
+ HTItem *HashInsertItem(HashTable *ht, HTItem *pItem)
1258
+ {
1259
+ return HashInsert(ht, pItem->key, pItem->data);
1260
+ }
1261
+
1262
+ int HashDelete(HashTable *ht, ulong key)
1263
+ {
1264
+ return Delete(ht, KEY_TRUNC(ht, key), !FAST_DELETE, 0);
1265
+ }
1266
+
1267
+ int HashDeleteLast(HashTable *ht)
1268
+ {
1269
+ if ( !ht->posLastFind ) /* last find failed */
1270
+ return 0;
1271
+ return Delete(ht, 0, !FAST_DELETE, 1); /* no need to specify a key */
1272
+ }
1273
+
1274
+ /*************************************************************************\
1275
+ | HashFirstBucket() |
1276
+ | HashNextBucket() |
1277
+ | Iterates through the items in the hashtable by iterating through |
1278
+ | the table. Since we know about deleted buckets and loading data |
1279
+ | off disk, and the table doesn't, our job is to take care of these |
1280
+ | things. RETURNS a bucket, or NULL after the last bucket. |
1281
+ \*************************************************************************/
1282
+
1283
+ HTItem *HashFirstBucket(HashTable *ht)
1284
+ {
1285
+ HTItem *retval;
1286
+
1287
+ for ( retval = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
1288
+ retval; retval = Table(NextBucket)(ht->iter) )
1289
+ if ( !IS_BCK_DELETED(retval) )
1290
+ LOAD_AND_RETURN(ht, retval);
1291
+ return NULL;
1292
+ }
1293
+
1294
+ HTItem *HashNextBucket(HashTable *ht)
1295
+ {
1296
+ HTItem *retval;
1297
+
1298
+ while ( (retval=Table(NextBucket)(ht->iter)) )
1299
+ if ( !IS_BCK_DELETED(retval) )
1300
+ LOAD_AND_RETURN(ht, retval);
1301
+ return NULL;
1302
+ }
1303
+
1304
+ /*************************************************************************\
1305
+ | HashSetDeltaGoalSize() |
1306
+ | If we're going to insert 100 items, set the delta goal size to |
1307
+ | 100 and we take that into account when inserting. Likewise, if |
1308
+ | we're going to delete 10 items, set it to -100 and we won't |
1309
+ | rehash until all 100 have been done. It's ok to be wrong, but |
1310
+ | it's efficient to be right. Returns the delta value. |
1311
+ \*************************************************************************/
1312
+
1313
+ int HashSetDeltaGoalSize(HashTable *ht, int delta)
1314
+ {
1315
+ ht->cDeltaGoalSize = delta;
1316
+ #if FAST_DELETE == 1 || defined INSERT_ONLY
1317
+ if ( ht->cDeltaGoalSize < 0 ) /* for fast delete, we never */
1318
+ ht->cDeltaGoalSize = 0; /* ...rehash after deletion */
1319
+ #endif
1320
+ return ht->cDeltaGoalSize;
1321
+ }
1322
+
1323
+
1324
+ /*************************************************************************\
1325
+ | HashSave() |
1326
+ | HashLoad() |
1327
+ | HashLoadKeys() |
1328
+ | Routines for saving and loading the hashtable from disk. We can |
1329
+ | then use the hashtable in two ways: loading it back into memory |
1330
+ | (HashLoad()) or loading only the keys into memory, in which case |
1331
+ | the data for a given key is loaded off disk when the key is |
1332
+ | retrieved. The data is freed when something new is retrieved in |
1333
+ | its place, so this is not a "lazy-load" scheme. |
1334
+ | The key is saved automatically and restored upon load, but the |
1335
+ | user needs to specify a routine for reading and writing the data. |
1336
+ | fSaveKeys is of course set to 1 when you read in a hashtable. |
1337
+ | HashLoad RETURNS a newly allocated hashtable. |
1338
+ | DATA_WRITE() takes an fp and a char * (representing the data |
1339
+ | field), and must perform two separate tasks. If fp is NULL, |
1340
+ | return the number of bytes written. If not, writes the data to |
1341
+ | disk at the place the fp points to. |
1342
+ | DATA_READ() takes an fp and the number of bytes in the data |
1343
+ | field, and returns a char * which points to wherever you've |
1344
+ | written the data. Thus, you must allocate memory for the data. |
1345
+ | Both dataRead and dataWrite may be NULL if you just wish to |
1346
+ | store the data field directly, as an integer. |
1347
+ \*************************************************************************/
1348
+
1349
+ void HashSave(FILE *fp, HashTable *ht, int (*dataWrite)(FILE *, char *))
1350
+ {
1351
+ long cchData, posStart;
1352
+ HTItem *bck;
1353
+
1354
+ /* File format: magic number (4 bytes)
1355
+ : cchKey (one word)
1356
+ : cItems (one word)
1357
+ : cDeletedItems (one word)
1358
+ : table info (buckets and a bitmap)
1359
+ : cchAllKeys (one word)
1360
+ Then the keys, in a block. If cchKey is NULL_TERMINATED, the keys
1361
+ are null-terminated too, otherwise this takes up cchKey*cItems bytes.
1362
+ Note that keys are not written for DELETED buckets.
1363
+ Then the data:
1364
+ : EITHER DELETED (one word) to indicate it's a deleted bucket,
1365
+ : OR number of bytes for this (non-empty) bucket's data
1366
+ (one word). This is not stored if dataWrite == NULL
1367
+ since the size is known to be sizeof(ul). Plus:
1368
+ : the data for this bucket (variable length)
1369
+ All words are in network byte order. */
1370
+
1371
+ fprintf(fp, "%s", MAGIC_KEY);
1372
+ WRITE_UL(fp, ht->cchKey); /* WRITE_UL, READ_UL, etc in fks-hash.h */
1373
+ WRITE_UL(fp, ht->cItems);
1374
+ WRITE_UL(fp, ht->cDeletedItems);
1375
+ Table(Write)(fp, ht->table, ht->cBuckets); /* writes cBuckets too */
1376
+
1377
+ WRITE_UL(fp, 0); /* to be replaced with sizeof(key block) */
1378
+ posStart = ftell(fp);
1379
+ for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) )
1380
+ fwrite(KEY_PTR(ht, bck->key), 1,
1381
+ (ht->cchKey == NULL_TERMINATED ?
1382
+ strlen(KEY_PTR(ht, bck->key))+1 : ht->cchKey), fp);
1383
+ cchData = ftell(fp) - posStart;
1384
+ fseek(fp, posStart - sizeof(unsigned long), SEEK_SET);
1385
+ WRITE_UL(fp, cchData);
1386
+ fseek(fp, 0, SEEK_END); /* done with our sojourn at the header */
1387
+
1388
+ /* Unlike HashFirstBucket, TableFirstBucket iters through deleted bcks */
1389
+ for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
1390
+ bck; bck = Table(NextBucket)(ht->iter) )
1391
+ if ( dataWrite == NULL || IS_BCK_DELETED(bck) )
1392
+ WRITE_UL(fp, bck->data);
1393
+ else /* write cchData followed by the data */
1394
+ {
1395
+ WRITE_UL(fp, (*dataWrite)(NULL, (char *)bck->data));
1396
+ (*dataWrite)(fp, (char *)bck->data);
1397
+ }
1398
+ }
1399
+
1400
+ static HashTable *HashDoLoad(FILE *fp, char * (*dataRead)(FILE *, int),
1401
+ HashTable *ht)
1402
+ {
1403
+ ulong cchKey;
1404
+ char szMagicKey[4], *rgchKeys;
1405
+ HTItem *bck;
1406
+
1407
+ fread(szMagicKey, 1, 4, fp);
1408
+ if ( strncmp(szMagicKey, MAGIC_KEY, 4) )
1409
+ {
1410
+ fprintf(stderr, "ERROR: not a hash table (magic key is %4.4s, not %s)\n",
1411
+ szMagicKey, MAGIC_KEY);
1412
+ exit(3);
1413
+ }
1414
+ Table(Free)(ht->table, ht->cBuckets); /* allocated in AllocateHashTable */
1415
+
1416
+ READ_UL(fp, ht->cchKey);
1417
+ READ_UL(fp, ht->cItems);
1418
+ READ_UL(fp, ht->cDeletedItems);
1419
+ ht->cBuckets = Table(Read)(fp, &ht->table); /* next is the table info */
1420
+
1421
+ READ_UL(fp, cchKey);
1422
+ rgchKeys = (char *) HTsmalloc( cchKey ); /* stores all the keys */
1423
+ fread(rgchKeys, 1, cchKey, fp);
1424
+ /* We use the table iterator so we don't try to LOAD_AND_RETURN */
1425
+ for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
1426
+ bck; bck = Table(NextBucket)(ht->iter) )
1427
+ {
1428
+ READ_UL(fp, bck->data); /* all we need if dataRead is NULL */
1429
+ if ( IS_BCK_DELETED(bck) ) /* always 0 if defined(INSERT_ONLY) */
1430
+ continue; /* this is why we read the data first */
1431
+ if ( dataRead != NULL ) /* if it's null, we're done */
1432
+ if ( !ht->fpData ) /* load data into memory */
1433
+ bck->data = (ulong)dataRead(fp, bck->data);
1434
+ else /* store location of data on disk */
1435
+ {
1436
+ fseek(fp, bck->data, SEEK_CUR); /* bck->data held size of data */
1437
+ bck->data = ftell(fp) - bck->data - sizeof(unsigned long);
1438
+ }
1439
+
1440
+ if ( ht->cchKey == NULL_TERMINATED ) /* now read the key */
1441
+ {
1442
+ bck->key = (ulong) rgchKeys;
1443
+ rgchKeys = strchr(rgchKeys, '\0') + 1; /* read past the string */
1444
+ }
1445
+ else
1446
+ {
1447
+ if ( STORES_PTR(ht) ) /* small keys stored directly */
1448
+ bck->key = (ulong) rgchKeys;
1449
+ else
1450
+ memcpy(&bck->key, rgchKeys, ht->cchKey);
1451
+ rgchKeys += ht->cchKey;
1452
+ }
1453
+ }
1454
+ if ( !STORES_PTR(ht) ) /* keys are stored directly */
1455
+ HTfree(rgchKeys - cchKey, cchKey); /* we've advanced rgchK to end */
1456
+ return ht;
1457
+ }
1458
+
1459
+ HashTable *HashLoad(FILE *fp, char * (*dataRead)(FILE *, int))
1460
+ {
1461
+ HashTable *ht;
1462
+ ht = AllocateHashTable(0, 2); /* cchKey set later, fSaveKey should be 2! */
1463
+ return HashDoLoad(fp, dataRead, ht);
1464
+ }
1465
+
1466
+ HashTable *HashLoadKeys(FILE *fp, char * (*dataRead)(FILE *, int))
1467
+ {
1468
+ HashTable *ht;
1469
+
1470
+ if ( dataRead == NULL )
1471
+ return HashLoad(fp, NULL); /* no reason not to load the data here */
1472
+ ht = AllocateHashTable(0, 2); /* cchKey set later, fSaveKey should be 2! */
1473
+ ht->fpData = fp; /* tells HashDoLoad() to only load keys */
1474
+ ht->dataRead = dataRead;
1475
+ return HashDoLoad(fp, dataRead, ht);
1476
+ }
1477
+
1478
+ /*************************************************************************\
1479
+ | PrintHashTable() |
1480
+ | A debugging tool. Prints the entire contents of the hash table, |
1481
+ | like so: <bin #>: key of the contents. Returns number of bytes |
1482
+ | allocated. If time is not -1, we print it as the time required |
1483
+ | for the hash. If iForm is 0, we just print the stats. If it's |
1484
+ | 1, we print the keys and data too, but the keys are printed as |
1485
+ | ulongs. If it's 2, we print the keys correctly (as long numbers |
1486
+ | or as strings). |
1487
+ \*************************************************************************/
1488
+
1489
+ ulong PrintHashTable(HashTable *ht, double time, int iForm)
1490
+ {
1491
+ ulong cbData = 0, cbBin = 0, cItems = 0, cOccupied = 0;
1492
+ HTItem *item;
1493
+
1494
+ printf("HASH TABLE.\n");
1495
+ if ( time > -1.0 )
1496
+ {
1497
+ printf("----------\n");
1498
+ printf("Time: %27.2f\n", time);
1499
+ }
1500
+
1501
+ for ( item = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
1502
+ item; item = Table(NextBucket)(ht->iter) )
1503
+ {
1504
+ cOccupied++; /* this includes deleted buckets */
1505
+ if ( IS_BCK_DELETED(item) ) /* we don't need you for anything else */
1506
+ continue;
1507
+ cItems++; /* this is for a sanity check */
1508
+ if ( STORES_PTR(ht) )
1509
+ cbData += ht->cchKey == NULL_TERMINATED ?
1510
+ WORD_ROUND(strlen((char *)item->key)+1) : ht->cchKey;
1511
+ else
1512
+ cbBin -= sizeof(item->key), cbData += sizeof(item->key);
1513
+ cbBin -= sizeof(item->data), cbData += sizeof(item->data);
1514
+ if ( iForm != 0 ) /* we want the actual contents */
1515
+ {
1516
+ if ( iForm == 2 && ht->cchKey == NULL_TERMINATED )
1517
+ printf("%s/%lu\n", (char *)item->key, item->data);
1518
+ else if ( iForm == 2 && STORES_PTR(ht) )
1519
+ printf("%.*s/%lu\n",
1520
+ (int)ht->cchKey, (char *)item->key, item->data);
1521
+ else /* either key actually is a ulong, or iForm == 1 */
1522
+ printf("%lu/%lu\n", item->key, item->data);
1523
+ }
1524
+ }
1525
+ assert( cItems == ht->cItems ); /* sanity check */
1526
+ cbBin = Table(Memory)(ht->cBuckets, cOccupied);
1527
+
1528
+ printf("----------\n");
1529
+ printf("%lu buckets (%lu bytes). %lu empty. %lu hold deleted items.\n"
1530
+ "%lu items (%lu bytes).\n"
1531
+ "%lu bytes total. %lu bytes (%2.1f%%) of this is ht overhead.\n",
1532
+ ht->cBuckets, cbBin, ht->cBuckets - cOccupied, cOccupied - ht->cItems,
1533
+ ht->cItems, cbData,
1534
+ cbData + cbBin, cbBin, cbBin*100.0/(cbBin+cbData));
1535
+
1536
+ return cbData + cbBin;
1537
+ }