google_hash 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +21 -0
- data/Rakefile +11 -0
- data/VERSION +1 -0
- data/ext/extconf.rb +15 -0
- data/ext/go.cpp +109 -0
- data/ext/sparsehash-1.5.2/AUTHORS +2 -0
- data/ext/sparsehash-1.5.2/COPYING +28 -0
- data/ext/sparsehash-1.5.2/ChangeLog +167 -0
- data/ext/sparsehash-1.5.2/INSTALL +236 -0
- data/ext/sparsehash-1.5.2/Makefile.am +157 -0
- data/ext/sparsehash-1.5.2/Makefile.in +1019 -0
- data/ext/sparsehash-1.5.2/NEWS +0 -0
- data/ext/sparsehash-1.5.2/README +149 -0
- data/ext/sparsehash-1.5.2/README.windows +25 -0
- data/ext/sparsehash-1.5.2/TODO +28 -0
- data/ext/sparsehash-1.5.2/aclocal.m4 +868 -0
- data/ext/sparsehash-1.5.2/compile +99 -0
- data/ext/sparsehash-1.5.2/config.guess +1516 -0
- data/ext/sparsehash-1.5.2/config.sub +1626 -0
- data/ext/sparsehash-1.5.2/configure +8054 -0
- data/ext/sparsehash-1.5.2/configure.ac +74 -0
- data/ext/sparsehash-1.5.2/depcomp +530 -0
- data/ext/sparsehash-1.5.2/doc/dense_hash_map.html +1591 -0
- data/ext/sparsehash-1.5.2/doc/dense_hash_set.html +1445 -0
- data/ext/sparsehash-1.5.2/doc/designstyle.css +115 -0
- data/ext/sparsehash-1.5.2/doc/implementation.html +365 -0
- data/ext/sparsehash-1.5.2/doc/index.html +69 -0
- data/ext/sparsehash-1.5.2/doc/performance.html +96 -0
- data/ext/sparsehash-1.5.2/doc/sparse_hash_map.html +1527 -0
- data/ext/sparsehash-1.5.2/doc/sparse_hash_set.html +1376 -0
- data/ext/sparsehash-1.5.2/doc/sparsetable.html +1393 -0
- data/ext/sparsehash-1.5.2/experimental/Makefile +9 -0
- data/ext/sparsehash-1.5.2/experimental/README +14 -0
- data/ext/sparsehash-1.5.2/experimental/example.c +54 -0
- data/ext/sparsehash-1.5.2/experimental/libchash.c +1537 -0
- data/ext/sparsehash-1.5.2/experimental/libchash.h +252 -0
- data/ext/sparsehash-1.5.2/google-sparsehash.sln +47 -0
- data/ext/sparsehash-1.5.2/install-sh +323 -0
- data/ext/sparsehash-1.5.2/m4/acx_pthread.m4 +363 -0
- data/ext/sparsehash-1.5.2/m4/google_namespace.m4 +42 -0
- data/ext/sparsehash-1.5.2/m4/namespaces.m4 +15 -0
- data/ext/sparsehash-1.5.2/m4/stl_hash.m4 +70 -0
- data/ext/sparsehash-1.5.2/m4/stl_hash_fun.m4 +36 -0
- data/ext/sparsehash-1.5.2/m4/stl_namespace.m4 +25 -0
- data/ext/sparsehash-1.5.2/missing +360 -0
- data/ext/sparsehash-1.5.2/mkinstalldirs +158 -0
- data/ext/sparsehash-1.5.2/packages/deb.sh +74 -0
- data/ext/sparsehash-1.5.2/packages/deb/README +7 -0
- data/ext/sparsehash-1.5.2/packages/deb/changelog +107 -0
- data/ext/sparsehash-1.5.2/packages/deb/compat +1 -0
- data/ext/sparsehash-1.5.2/packages/deb/control +17 -0
- data/ext/sparsehash-1.5.2/packages/deb/copyright +35 -0
- data/ext/sparsehash-1.5.2/packages/deb/docs +16 -0
- data/ext/sparsehash-1.5.2/packages/deb/rules +117 -0
- data/ext/sparsehash-1.5.2/packages/deb/sparsehash.dirs +2 -0
- data/ext/sparsehash-1.5.2/packages/deb/sparsehash.install +2 -0
- data/ext/sparsehash-1.5.2/packages/rpm.sh +86 -0
- data/ext/sparsehash-1.5.2/packages/rpm/rpm.spec +61 -0
- data/ext/sparsehash-1.5.2/src/config.h.in +131 -0
- data/ext/sparsehash-1.5.2/src/config.h.include +23 -0
- data/ext/sparsehash-1.5.2/src/google/dense_hash_map +310 -0
- data/ext/sparsehash-1.5.2/src/google/dense_hash_set +287 -0
- data/ext/sparsehash-1.5.2/src/google/sparse_hash_map +294 -0
- data/ext/sparsehash-1.5.2/src/google/sparse_hash_set +275 -0
- data/ext/sparsehash-1.5.2/src/google/sparsehash/densehashtable.h +1062 -0
- data/ext/sparsehash-1.5.2/src/google/sparsehash/sparsehashtable.h +1015 -0
- data/ext/sparsehash-1.5.2/src/google/sparsetable +1468 -0
- data/ext/sparsehash-1.5.2/src/google/type_traits.h +250 -0
- data/ext/sparsehash-1.5.2/src/hashtable_unittest.cc +1375 -0
- data/ext/sparsehash-1.5.2/src/simple_test.cc +103 -0
- data/ext/sparsehash-1.5.2/src/sparsetable_unittest.cc +696 -0
- data/ext/sparsehash-1.5.2/src/time_hash_map.cc +488 -0
- data/ext/sparsehash-1.5.2/src/type_traits_unittest.cc +492 -0
- data/ext/sparsehash-1.5.2/src/windows/config.h +149 -0
- data/ext/sparsehash-1.5.2/src/windows/google/sparsehash/sparseconfig.h +32 -0
- data/ext/sparsehash-1.5.2/src/windows/port.cc +63 -0
- data/ext/sparsehash-1.5.2/src/windows/port.h +81 -0
- data/ext/sparsehash-1.5.2/src/words +8944 -0
- data/ext/sparsehash-1.5.2/vsprojects/hashtable_unittest/hashtable_unittest.vcproj +187 -0
- data/ext/sparsehash-1.5.2/vsprojects/sparsetable_unittest/sparsetable_unittest.vcproj +172 -0
- data/ext/sparsehash-1.5.2/vsprojects/time_hash_map/time_hash_map.vcproj +187 -0
- data/ext/sparsehash-1.5.2/vsprojects/type_traits_unittest/type_traits_unittest.vcproj +169 -0
- data/ext/test.rb +10 -0
- data/test/spec.go +70 -0
- metadata +147 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
This is a C version of sparsehash (and also, maybe, densehash) that I
|
2
|
+
wrote way back when, and served as the inspiration for the C++
|
3
|
+
version. The API for the C version is much uglier than the C++,
|
4
|
+
because of the lack of template support. I believe the class works,
|
5
|
+
but I'm not convinced it's really flexible or easy enough to use.
|
6
|
+
|
7
|
+
It would be nice to rework this C class to follow the C++ API as
|
8
|
+
closely as possible (eg have a set_deleted_key() instead of using a
|
9
|
+
#define like this code does now). I believe the code compiles and
|
10
|
+
runs, if anybody is interested in using it now, but it's subject to
|
11
|
+
major change in the future, as people work on it.
|
12
|
+
|
13
|
+
Craig Silverstein
|
14
|
+
20 March 2005
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <assert.h>
|
4
|
+
#include "libchash.h"
|
5
|
+
|
6
|
+
static void TestInsert() {
|
7
|
+
struct HashTable* ht;
|
8
|
+
HTItem* bck;
|
9
|
+
|
10
|
+
ht = AllocateHashTable(1, 0); /* value is 1 byte, 0: don't copy keys */
|
11
|
+
|
12
|
+
HashInsert(ht, PTR_KEY(ht, "January"), 31); /* 0: don't overwrite old val */
|
13
|
+
bck = HashInsert(ht, PTR_KEY(ht, "February"), 28);
|
14
|
+
bck = HashInsert(ht, PTR_KEY(ht, "March"), 31);
|
15
|
+
|
16
|
+
bck = HashFind(ht, PTR_KEY(ht, "February"));
|
17
|
+
assert(bck);
|
18
|
+
assert(bck->data == 28);
|
19
|
+
|
20
|
+
FreeHashTable(ht);
|
21
|
+
}
|
22
|
+
|
23
|
+
static void TestFindOrInsert() {
|
24
|
+
struct HashTable* ht;
|
25
|
+
int i;
|
26
|
+
int iterations = 1000000;
|
27
|
+
int range = 30; /* random number between 1 and 30 */
|
28
|
+
|
29
|
+
ht = AllocateHashTable(4, 0); /* value is 4 bytes, 0: don't copy keys */
|
30
|
+
|
31
|
+
/* We'll test how good rand() is as a random number generator */
|
32
|
+
for (i = 0; i < iterations; ++i) {
|
33
|
+
int key = rand() % range;
|
34
|
+
HTItem* bck = HashFindOrInsert(ht, key, 0); /* initialize to 0 */
|
35
|
+
bck->data++; /* found one more of them */
|
36
|
+
}
|
37
|
+
|
38
|
+
for (i = 0; i < range; ++i) {
|
39
|
+
HTItem* bck = HashFind(ht, i);
|
40
|
+
if (bck) {
|
41
|
+
printf("%3d: %d\n", bck->key, bck->data);
|
42
|
+
} else {
|
43
|
+
printf("%3d: 0\n", i);
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
FreeHashTable(ht);
|
48
|
+
}
|
49
|
+
|
50
|
+
int main(int argc, char** argv) {
|
51
|
+
TestInsert();
|
52
|
+
TestFindOrInsert();
|
53
|
+
return 0;
|
54
|
+
}
|
@@ -0,0 +1,1537 @@
|
|
1
|
+
/* Copyright (c) 1998 - 2005, Google Inc.
|
2
|
+
* All rights reserved.
|
3
|
+
*
|
4
|
+
* Redistribution and use in source and binary forms, with or without
|
5
|
+
* modification, are permitted provided that the following conditions are
|
6
|
+
* met:
|
7
|
+
*
|
8
|
+
* * Redistributions of source code must retain the above copyright
|
9
|
+
* notice, this list of conditions and the following disclaimer.
|
10
|
+
* * Redistributions in binary form must reproduce the above
|
11
|
+
* copyright notice, this list of conditions and the following disclaimer
|
12
|
+
* in the documentation and/or other materials provided with the
|
13
|
+
* distribution.
|
14
|
+
* * Neither the name of Google Inc. nor the names of its
|
15
|
+
* contributors may be used to endorse or promote products derived from
|
16
|
+
* this software without specific prior written permission.
|
17
|
+
*
|
18
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
*
|
30
|
+
* ---
|
31
|
+
* Author: Craig Silverstein
|
32
|
+
*
|
33
|
+
* This library is intended to be used for in-memory hash tables,
|
34
|
+
* though it provides rudimentary permanent-storage capabilities.
|
35
|
+
* It attempts to be fast, portable, and small. The best algorithm
|
36
|
+
* to fulfill these goals is an internal probing hashing algorithm,
|
37
|
+
* as in Knuth, _Art of Computer Programming_, vol III. Unlike
|
38
|
+
* chained (open) hashing, it doesn't require a pointer for every
|
39
|
+
* item, yet it is still constant time lookup in practice.
|
40
|
+
*
|
41
|
+
* Also to save space, we let the contents (both data and key) that
|
42
|
+
* you insert be a union: if the key/data is small, we store it
|
43
|
+
* directly in the hashtable, otherwise we store a pointer to it.
|
44
|
+
* To keep you from having to figure out which, use KEY_PTR and
|
45
|
+
* PTR_KEY to convert between the arguments to these functions and
|
46
|
+
* a pointer to the real data. For instance:
|
47
|
+
* char key[] = "ab", *key2;
|
48
|
+
* HTItem *bck; HashTable *ht;
|
49
|
+
* HashInsert(ht, PTR_KEY(ht, key), 0);
|
50
|
+
* bck = HashFind(ht, PTR_KEY(ht, "ab"));
|
51
|
+
* key2 = KEY_PTR(ht, bck->key);
|
52
|
+
*
|
53
|
+
* There are a rich set of operations supported:
|
54
|
+
* AllocateHashTable() -- Allocates a hashtable structure and
|
55
|
+
* returns it.
|
56
|
+
* cchKey: if it's a positive number, then each key is a
|
57
|
+
* fixed-length record of that length. If it's 0,
|
58
|
+
* the key is assumed to be a \0-terminated string.
|
59
|
+
* fSaveKey: normally, you are responsible for allocating
|
60
|
+
* space for the key. If this is 1, we make a
|
61
|
+
* copy of the key for you.
|
62
|
+
* ClearHashTable() -- Removes everything from a hashtable
|
63
|
+
* FreeHashTable() -- Frees memory used by a hashtable
|
64
|
+
*
|
65
|
+
* HashFind() -- takes a key (use PTR_KEY) and returns the
|
66
|
+
* HTItem containing that key, or NULL if the
|
67
|
+
* key is not in the hashtable.
|
68
|
+
* HashFindLast() -- returns the item found by last HashFind()
|
69
|
+
* HashFindOrInsert() -- inserts the key/data pair if the key
|
70
|
+
* is not already in the hashtable, or
|
71
|
+
* returns the appropraite HTItem if it is.
|
72
|
+
* HashFindOrInsertItem() -- takes key/data as an HTItem.
|
73
|
+
* HashInsert() -- adds a key/data pair to the hashtable. What
|
74
|
+
* it does if the key is already in the table
|
75
|
+
* depends on the value of SAMEKEY_OVERWRITE.
|
76
|
+
* HashInsertItem() -- takes key/data as an HTItem.
|
77
|
+
* HashDelete() -- removes a key/data pair from the hashtable,
|
78
|
+
* if it's there. RETURNS 1 if it was there,
|
79
|
+
* 0 else.
|
80
|
+
* If you use sparse tables and never delete, the full data
|
81
|
+
* space is available. Otherwise we steal -2 (maybe -3),
|
82
|
+
* so you can't have data fields with those values.
|
83
|
+
* HashDeleteLast() -- deletes the item returned by the last Find().
|
84
|
+
*
|
85
|
+
* HashFirstBucket() -- used to iterate over the buckets in a
|
86
|
+
* hashtable. DON'T INSERT OR DELETE WHILE
|
87
|
+
* ITERATING! You can't nest iterations.
|
88
|
+
* HashNextBucket() -- RETURNS NULL at the end of iterating.
|
89
|
+
*
|
90
|
+
* HashSetDeltaGoalSize() -- if you're going to insert 1000 items
|
91
|
+
* at once, call this fn with arg 1000.
|
92
|
+
* It grows the table more intelligently.
|
93
|
+
*
|
94
|
+
* HashSave() -- saves the hashtable to a file. It saves keys ok,
|
95
|
+
* but it doesn't know how to interpret the data field,
|
96
|
+
* so if the data field is a pointer to some complex
|
97
|
+
* structure, you must send a function that takes a
|
98
|
+
* file pointer and a pointer to the structure, and
|
99
|
+
* write whatever you want to write. It should return
|
100
|
+
* the number of bytes written. If the file is NULL,
|
101
|
+
* it should just return the number of bytes it would
|
102
|
+
* write, without writing anything.
|
103
|
+
* If your data field is just an integer, not a
|
104
|
+
* pointer, just send NULL for the function.
|
105
|
+
* HashLoad() -- loads a hashtable. It needs a function that takes
|
106
|
+
* a file and the size of the structure, and expects
|
107
|
+
* you to read in the structure and return a pointer
|
108
|
+
* to it. You must do memory allocation, etc. If
|
109
|
+
* the data is just a number, send NULL.
|
110
|
+
* HashLoadKeys() -- unlike HashLoad(), doesn't load the data off disk
|
111
|
+
* until needed. This saves memory, but if you look
|
112
|
+
* up the same key a lot, it does a disk access each
|
113
|
+
* time.
|
114
|
+
* You can't do Insert() or Delete() on hashtables that were loaded
|
115
|
+
* from disk.
|
116
|
+
*
|
117
|
+
* See libchash.h for parameters you can modify. Make sure LOG_WORD_SIZE
|
118
|
+
* is defined correctly for your machine! (5 for 32 bit words, 6 for 64).
|
119
|
+
*/
|
120
|
+
|
121
|
+
#include <stdlib.h>
|
122
|
+
#include <stdio.h>
|
123
|
+
#include <string.h> /* for strcmp, memcmp, etc */
|
124
|
+
#include <sys/types.h> /* ULTRIX needs this for in.h */
|
125
|
+
#include <netinet/in.h> /* for reading/writing hashtables */
|
126
|
+
#include <assert.h>
|
127
|
+
#include "libchash.h" /* all the types */
|
128
|
+
|
129
|
+
/* if keys are stored directly but cchKey is less than sizeof(ulong), */
|
130
|
+
/* this cuts off the bits at the end */
|
131
|
+
char grgKeyTruncMask[sizeof(ulong)][sizeof(ulong)];
|
132
|
+
#define KEY_TRUNC(ht, key) \
|
133
|
+
( STORES_PTR(ht) || (ht)->cchKey == sizeof(ulong) \
|
134
|
+
? (key) : ((key) & *(ulong *)&(grgKeyTruncMask[(ht)->cchKey][0])) )
|
135
|
+
|
136
|
+
/* round num up to a multiple of wordsize. (LOG_WORD_SIZE-3 is in bytes) */
|
137
|
+
#define WORD_ROUND(num) ( ((num-1) | ((1<<(LOG_WORD_SIZE-3))-1)) + 1 )
|
138
|
+
#define NULL_TERMINATED 0 /* val of cchKey if keys are null-term strings */
|
139
|
+
|
140
|
+
/* Useful operations we do to keys: compare them, copy them, free them */
|
141
|
+
|
142
|
+
#define KEY_CMP(ht, key1, key2) ( !STORES_PTR(ht) ? (key1) - (key2) : \
|
143
|
+
(key1) == (key2) ? 0 : \
|
144
|
+
HashKeySize(ht) == NULL_TERMINATED ? \
|
145
|
+
strcmp((char *)key1, (char *)key2) :\
|
146
|
+
memcmp((void *)key1, (void *)key2, \
|
147
|
+
HashKeySize(ht)) )
|
148
|
+
|
149
|
+
#define COPY_KEY(ht, keyTo, keyFrom) do \
|
150
|
+
if ( !STORES_PTR(ht) || !(ht)->fSaveKeys ) \
|
151
|
+
(keyTo) = (keyFrom); /* just copy pointer or info */\
|
152
|
+
else if ( (ht)->cchKey == NULL_TERMINATED ) /* copy 0-term.ed str */\
|
153
|
+
{ \
|
154
|
+
(keyTo) = (ulong)HTsmalloc( WORD_ROUND(strlen((char *)(keyFrom))+1) ); \
|
155
|
+
strcpy((char *)(keyTo), (char *)(keyFrom)); \
|
156
|
+
} \
|
157
|
+
else \
|
158
|
+
{ \
|
159
|
+
(keyTo) = (ulong) HTsmalloc( WORD_ROUND((ht)->cchKey) ); \
|
160
|
+
memcpy( (char *)(keyTo), (char *)(keyFrom), (ht)->cchKey); \
|
161
|
+
} \
|
162
|
+
while ( 0 )
|
163
|
+
|
164
|
+
#define FREE_KEY(ht, key) do \
|
165
|
+
if ( STORES_PTR(ht) && (ht)->fSaveKeys ) \
|
166
|
+
if ( (ht)->cchKey == NULL_TERMINATED ) \
|
167
|
+
HTfree((char *)(key), WORD_ROUND(strlen((char *)(key))+1)); \
|
168
|
+
else \
|
169
|
+
HTfree((char *)(key), WORD_ROUND((ht)->cchKey)); \
|
170
|
+
while ( 0 )
|
171
|
+
|
172
|
+
/* the following are useful for bitmaps */
|
173
|
+
/* Format is like this (if 1 word = 4 bits): 3210 7654 ba98 fedc ... */
|
174
|
+
typedef ulong HTBitmapPart; /* this has to be unsigned, for >> */
|
175
|
+
typedef HTBitmapPart HTBitmap[1<<LOG_BM_WORDS];
|
176
|
+
typedef ulong HTOffset; /* something big enough to hold offsets */
|
177
|
+
|
178
|
+
#define BM_BYTES(cBuckets) /* we must ensure it's a multiple of word size */\
|
179
|
+
( (((cBuckets) + 8*sizeof(ulong)-1) >> LOG_WORD_SIZE) << (LOG_WORD_SIZE-3) )
|
180
|
+
#define MOD2(i, logmod) ( (i) & ((1<<(logmod))-1) )
|
181
|
+
#define DIV_NUM_ENTRIES(i) ( (i) >> LOG_WORD_SIZE )
|
182
|
+
#define MOD_NUM_ENTRIES(i) ( MOD2(i, LOG_WORD_SIZE) )
|
183
|
+
#define MODBIT(i) ( ((ulong)1) << MOD_NUM_ENTRIES(i) )
|
184
|
+
|
185
|
+
#define TEST_BITMAP(bm, i) ( (bm)[DIV_NUM_ENTRIES(i)] & MODBIT(i) ? 1 : 0 )
|
186
|
+
#define SET_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] |= MODBIT(i)
|
187
|
+
#define CLEAR_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] &= ~MODBIT(i)
|
188
|
+
|
189
|
+
/* the following are useful for reading and writing hashtables */
|
190
|
+
#define READ_UL(fp, data) \
|
191
|
+
do { \
|
192
|
+
long _ul; \
|
193
|
+
fread(&_ul, sizeof(_ul), 1, (fp)); \
|
194
|
+
data = ntohl(_ul); \
|
195
|
+
} while (0)
|
196
|
+
|
197
|
+
#define WRITE_UL(fp, data) \
|
198
|
+
do { \
|
199
|
+
long _ul = htonl((long)(data)); \
|
200
|
+
fwrite(&_ul, sizeof(_ul), 1, (fp)); \
|
201
|
+
} while (0)
|
202
|
+
|
203
|
+
/* Moves data from disk to memory if necessary. Note dataRead cannot be *
|
204
|
+
* NULL, because then we might as well (and do) load the data into memory */
|
205
|
+
#define LOAD_AND_RETURN(ht, loadCommand) /* lC returns an HTItem * */ \
|
206
|
+
if ( !(ht)->fpData ) /* data is stored in memory */ \
|
207
|
+
return (loadCommand); \
|
208
|
+
else /* must read data off of disk */ \
|
209
|
+
{ \
|
210
|
+
int cchData; \
|
211
|
+
HTItem *bck; \
|
212
|
+
if ( (ht)->bckData.data ) free((char *)(ht)->bckData.data); \
|
213
|
+
ht->bckData.data = (ulong)NULL; /* needed if loadCommand fails */ \
|
214
|
+
bck = (loadCommand); \
|
215
|
+
if ( bck == NULL ) /* loadCommand failed: key not found */ \
|
216
|
+
return NULL; \
|
217
|
+
else \
|
218
|
+
(ht)->bckData = *bck; \
|
219
|
+
fseek(ht->fpData, (ht)->bckData.data, SEEK_SET); \
|
220
|
+
READ_UL((ht)->fpData, cchData); \
|
221
|
+
(ht)->bckData.data = (ulong)(ht)->dataRead((ht)->fpData, cchData); \
|
222
|
+
return &((ht)->bckData); \
|
223
|
+
}
|
224
|
+
|
225
|
+
|
226
|
+
/* ======================================================================== */
|
227
|
+
/* UTILITY ROUTINES */
|
228
|
+
/* ---------------------- */
|
229
|
+
|
230
|
+
/* HTsmalloc() -- safe malloc
|
231
|
+
* allocates memory, or crashes if the allocation fails.
|
232
|
+
*/
|
233
|
+
static void *HTsmalloc(unsigned long size)
|
234
|
+
{
|
235
|
+
void *retval;
|
236
|
+
|
237
|
+
if ( size == 0 )
|
238
|
+
return NULL;
|
239
|
+
retval = (void *)malloc(size);
|
240
|
+
if ( !retval )
|
241
|
+
{
|
242
|
+
fprintf(stderr, "HTsmalloc: Unable to allocate %lu bytes of memory\n",
|
243
|
+
size);
|
244
|
+
exit(1);
|
245
|
+
}
|
246
|
+
return retval;
|
247
|
+
}
|
248
|
+
|
249
|
+
/* HTscalloc() -- safe calloc
|
250
|
+
* allocates memory and initializes it to 0, or crashes if
|
251
|
+
* the allocation fails.
|
252
|
+
*/
|
253
|
+
static void *HTscalloc(unsigned long size)
|
254
|
+
{
|
255
|
+
void *retval;
|
256
|
+
|
257
|
+
retval = (void *)calloc(size, 1);
|
258
|
+
if ( !retval && size > 0 )
|
259
|
+
{
|
260
|
+
fprintf(stderr, "HTscalloc: Unable to allocate %lu bytes of memory\n",
|
261
|
+
size);
|
262
|
+
exit(1);
|
263
|
+
}
|
264
|
+
return retval;
|
265
|
+
}
|
266
|
+
|
267
|
+
/* HTsrealloc() -- safe calloc
|
268
|
+
* grows the amount of memory from a source, or crashes if
|
269
|
+
* the allocation fails.
|
270
|
+
*/
|
271
|
+
static void *HTsrealloc(void *ptr, unsigned long new_size, long delta)
|
272
|
+
{
|
273
|
+
if ( ptr == NULL )
|
274
|
+
return HTsmalloc(new_size);
|
275
|
+
ptr = realloc(ptr, new_size);
|
276
|
+
if ( !ptr && new_size > 0 )
|
277
|
+
{
|
278
|
+
fprintf(stderr, "HTsrealloc: Unable to reallocate %lu bytes of memory\n",
|
279
|
+
new_size);
|
280
|
+
exit(1);
|
281
|
+
}
|
282
|
+
return ptr;
|
283
|
+
}
|
284
|
+
|
285
|
+
/* HTfree() -- keep track of memory use
|
286
|
+
* frees memory using free, but updates count of how much memory
|
287
|
+
* is being used.
|
288
|
+
*/
|
289
|
+
static void HTfree(void *ptr, unsigned long size)
|
290
|
+
{
|
291
|
+
if ( size > 0 ) /* some systems seem to not like freeing NULL */
|
292
|
+
free(ptr);
|
293
|
+
}
|
294
|
+
|
295
|
+
/*************************************************************************\
|
296
|
+
| HTcopy() |
|
297
|
+
| Sometimes we interpret data as a ulong. But ulongs must be |
|
298
|
+
| aligned on some machines, so instead of casting we copy. |
|
299
|
+
\*************************************************************************/
|
300
|
+
|
301
|
+
unsigned long HTcopy(char *ul)
|
302
|
+
{
|
303
|
+
unsigned long retval;
|
304
|
+
|
305
|
+
memcpy(&retval, ul, sizeof(retval));
|
306
|
+
return retval;
|
307
|
+
}
|
308
|
+
|
309
|
+
/*************************************************************************\
|
310
|
+
| HTSetupKeyTrunc() |
|
311
|
+
| If keys are stored directly but cchKey is less than |
|
312
|
+
| sizeof(ulong), this cuts off the bits at the end. |
|
313
|
+
\*************************************************************************/
|
314
|
+
|
315
|
+
static void HTSetupKeyTrunc(void)
|
316
|
+
{
|
317
|
+
int i, j;
|
318
|
+
|
319
|
+
for ( i = 0; i < sizeof(unsigned long); i++ )
|
320
|
+
for ( j = 0; j < sizeof(unsigned long); j++ )
|
321
|
+
grgKeyTruncMask[i][j] = j < i ? 255 : 0; /* chars have 8 bits */
|
322
|
+
}
|
323
|
+
|
324
|
+
|
325
|
+
/* ======================================================================== */
|
326
|
+
/* TABLE ROUTINES */
|
327
|
+
/* -------------------- */
|
328
|
+
|
329
|
+
/* The idea is that a hashtable with (logically) t buckets is divided
|
330
|
+
* into t/M groups of M buckets each. (M is a constant set in
|
331
|
+
* LOG_BM_WORDS for efficiency.) Each group is stored sparsely.
|
332
|
+
* Thus, inserting into the table causes some array to grow, which is
|
333
|
+
* slow but still constant time. Lookup involves doing a
|
334
|
+
* logical-position-to-sparse-position lookup, which is also slow but
|
335
|
+
* constant time. The larger M is, the slower these operations are
|
336
|
+
* but the less overhead (slightly).
|
337
|
+
*
|
338
|
+
* To store the sparse array, we store a bitmap B, where B[i] = 1 iff
|
339
|
+
* bucket i is non-empty. Then to look up bucket i we really look up
|
340
|
+
* array[# of 1s before i in B]. This is constant time for fixed M.
|
341
|
+
*
|
342
|
+
* Terminology: the position of an item in the overall table (from
|
343
|
+
* 1 .. t) is called its "location." The logical position in a group
|
344
|
+
* (from 1 .. M ) is called its "position." The actual location in
|
345
|
+
* the array (from 1 .. # of non-empty buckets in the group) is
|
346
|
+
* called its "offset."
|
347
|
+
*
|
348
|
+
* The following operations are supported:
|
349
|
+
* o Allocate an array with t buckets, all empty
|
350
|
+
* o Free a array (but not whatever was stored in the buckets)
|
351
|
+
* o Tell whether or not a bucket is empty
|
352
|
+
* o Return a bucket with a given location
|
353
|
+
* o Set the value of a bucket at a given location
|
354
|
+
* o Iterate through all the buckets in the array
|
355
|
+
* o Read and write an occupancy bitmap to disk
|
356
|
+
* o Return how much memory is being allocated by the array structure
|
357
|
+
*/
|
358
|
+
|
359
|
+
#ifndef SparseBucket /* by default, each bucket holds an HTItem */
|
360
|
+
#define SparseBucket HTItem
|
361
|
+
#endif
|
362
|
+
|
363
|
+
typedef struct SparseBin {
|
364
|
+
SparseBucket *binSparse;
|
365
|
+
HTBitmap bmOccupied; /* bmOccupied[i] is 1 if bucket i has an item */
|
366
|
+
short cOccupied; /* size of binSparse; useful for iterators, eg */
|
367
|
+
} SparseBin;
|
368
|
+
|
369
|
+
typedef struct SparseIterator {
|
370
|
+
long posGroup;
|
371
|
+
long posOffset;
|
372
|
+
SparseBin *binSparse; /* state info, to avoid args for NextBucket() */
|
373
|
+
ulong cBuckets;
|
374
|
+
} SparseIterator;
|
375
|
+
|
376
|
+
#define LOG_LOW_BIN_SIZE ( LOG_BM_WORDS+LOG_WORD_SIZE )
|
377
|
+
#define SPARSE_GROUPS(cBuckets) ( (((cBuckets)-1) >> LOG_LOW_BIN_SIZE) + 1 )
|
378
|
+
|
379
|
+
/* we need a small function to figure out # of items set in the bm */
|
380
|
+
static HTOffset EntriesUpto(HTBitmapPart *bm, int i)
|
381
|
+
{ /* returns # of set bits in 0..i-1 */
|
382
|
+
HTOffset retval = 0;
|
383
|
+
static HTOffset rgcBits[256] = /* # of bits set in one char */
|
384
|
+
{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
385
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
386
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
387
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
388
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
389
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
390
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
391
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
392
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
393
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
394
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
395
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
396
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
397
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
398
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
399
|
+
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
|
400
|
+
|
401
|
+
if ( i == 0 ) return 0;
|
402
|
+
for ( ; i > sizeof(*bm)*8; i -= sizeof(*bm)*8, bm++ )
|
403
|
+
{ /* think of it as loop unrolling */
|
404
|
+
#if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */
|
405
|
+
retval += rgcBits[*bm & 255]; /* get the low byte */
|
406
|
+
#if LOG_WORD_SIZE >= 4 /* at least 2 bytes */
|
407
|
+
retval += rgcBits[(*bm >> 8) & 255];
|
408
|
+
#if LOG_WORD_SIZE >= 5 /* at least 4 bytes */
|
409
|
+
retval += rgcBits[(*bm >> 16) & 255];
|
410
|
+
retval += rgcBits[(*bm >> 24) & 255];
|
411
|
+
#if LOG_WORD_SIZE >= 6 /* 8 bytes! */
|
412
|
+
retval += rgcBits[(*bm >> 32) & 255];
|
413
|
+
retval += rgcBits[(*bm >> 40) & 255];
|
414
|
+
retval += rgcBits[(*bm >> 48) & 255];
|
415
|
+
retval += rgcBits[(*bm >> 56) & 255];
|
416
|
+
#if LOG_WORD_SIZE >= 7 /* not a concern for a while... */
|
417
|
+
#error Need to rewrite EntriesUpto to support such big words
|
418
|
+
#endif /* >8 bytes */
|
419
|
+
#endif /* 8 bytes */
|
420
|
+
#endif /* 4 bytes */
|
421
|
+
#endif /* 2 bytes */
|
422
|
+
#endif /* 1 byte */
|
423
|
+
}
|
424
|
+
switch ( i ) { /* from 0 to 63 */
|
425
|
+
case 0:
|
426
|
+
return retval;
|
427
|
+
#if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */
|
428
|
+
case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:
|
429
|
+
return (retval + rgcBits[*bm & ((1 << i)-1)]);
|
430
|
+
#if LOG_WORD_SIZE >= 4 /* at least 2 bytes */
|
431
|
+
case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16:
|
432
|
+
return (retval + rgcBits[*bm & 255] +
|
433
|
+
rgcBits[(*bm >> 8) & ((1 << (i-8))-1)]);
|
434
|
+
#if LOG_WORD_SIZE >= 5 /* at least 4 bytes */
|
435
|
+
case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24:
|
436
|
+
return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
437
|
+
rgcBits[(*bm >> 16) & ((1 << (i-16))-1)]);
|
438
|
+
case 25: case 26: case 27: case 28: case 29: case 30: case 31: case 32:
|
439
|
+
return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
440
|
+
rgcBits[(*bm >> 16) & 255] +
|
441
|
+
rgcBits[(*bm >> 24) & ((1 << (i-24))-1)]);
|
442
|
+
#if LOG_WORD_SIZE >= 6 /* 8 bytes! */
|
443
|
+
case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40:
|
444
|
+
return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
445
|
+
rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
|
446
|
+
rgcBits[(*bm >> 32) & ((1 << (i-32))-1)]);
|
447
|
+
case 41: case 42: case 43: case 44: case 45: case 46: case 47: case 48:
|
448
|
+
return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
449
|
+
rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
|
450
|
+
rgcBits[(*bm >> 32) & 255] +
|
451
|
+
rgcBits[(*bm >> 40) & ((1 << (i-40))-1)]);
|
452
|
+
case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56:
|
453
|
+
return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
454
|
+
rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
|
455
|
+
rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +
|
456
|
+
rgcBits[(*bm >> 48) & ((1 << (i-48))-1)]);
|
457
|
+
case 57: case 58: case 59: case 60: case 61: case 62: case 63: case 64:
|
458
|
+
return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +
|
459
|
+
rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] +
|
460
|
+
rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +
|
461
|
+
rgcBits[(*bm >> 48) & 255] +
|
462
|
+
rgcBits[(*bm >> 56) & ((1 << (i-56))-1)]);
|
463
|
+
#endif /* 8 bytes */
|
464
|
+
#endif /* 4 bytes */
|
465
|
+
#endif /* 2 bytes */
|
466
|
+
#endif /* 1 byte */
|
467
|
+
}
|
468
|
+
assert("" == "word size is too big in EntriesUpto()");
|
469
|
+
return -1;
|
470
|
+
}
|
471
|
+
#define SPARSE_POS_TO_OFFSET(bm, i) ( EntriesUpto(&((bm)[0]), i) )
|
472
|
+
#define SPARSE_BUCKET(bin, location) \
|
473
|
+
( (bin)[(location) >> LOG_LOW_BIN_SIZE].binSparse + \
|
474
|
+
SPARSE_POS_TO_OFFSET((bin)[(location)>>LOG_LOW_BIN_SIZE].bmOccupied, \
|
475
|
+
MOD2(location, LOG_LOW_BIN_SIZE)) )
|
476
|
+
|
477
|
+
|
478
|
+
/*************************************************************************\
|
479
|
+
| SparseAllocate() |
|
480
|
+
| SparseFree() |
|
481
|
+
| Allocates, sets-to-empty, and frees a sparse array. All you need |
|
482
|
+
| to tell me is how many buckets you want. I return the number of |
|
483
|
+
| buckets I actually allocated, setting the array as a parameter. |
|
484
|
+
| Note that you have to set auxilliary parameters, like cOccupied. |
|
485
|
+
\*************************************************************************/
|
486
|
+
|
487
|
+
static ulong SparseAllocate(SparseBin **pbinSparse, ulong cBuckets)
|
488
|
+
{
|
489
|
+
int cGroups = SPARSE_GROUPS(cBuckets);
|
490
|
+
|
491
|
+
*pbinSparse = (SparseBin *) HTscalloc(sizeof(**pbinSparse) * cGroups);
|
492
|
+
return cGroups << LOG_LOW_BIN_SIZE;
|
493
|
+
}
|
494
|
+
|
495
|
+
static SparseBin *SparseFree(SparseBin *binSparse, ulong cBuckets)
|
496
|
+
{
|
497
|
+
ulong iGroup, cGroups = SPARSE_GROUPS(cBuckets);
|
498
|
+
|
499
|
+
for ( iGroup = 0; iGroup < cGroups; iGroup++ )
|
500
|
+
HTfree(binSparse[iGroup].binSparse, (sizeof(*binSparse[iGroup].binSparse)
|
501
|
+
* binSparse[iGroup].cOccupied));
|
502
|
+
HTfree(binSparse, sizeof(*binSparse) * cGroups);
|
503
|
+
return NULL;
|
504
|
+
}
|
505
|
+
|
506
|
+
/*************************************************************************\
|
507
|
+
| SparseIsEmpty() |
|
508
|
+
| SparseFind() |
|
509
|
+
| You give me a location (ie a number between 1 and t), and I |
|
510
|
+
| return the bucket at that location, or NULL if the bucket is |
|
511
|
+
| empty. It's OK to call Find() on an empty table. |
|
512
|
+
\*************************************************************************/
|
513
|
+
|
514
|
+
static int SparseIsEmpty(SparseBin *binSparse, ulong location)
|
515
|
+
{
|
516
|
+
return !TEST_BITMAP(binSparse[location>>LOG_LOW_BIN_SIZE].bmOccupied,
|
517
|
+
MOD2(location, LOG_LOW_BIN_SIZE));
|
518
|
+
}
|
519
|
+
|
520
|
+
static SparseBucket *SparseFind(SparseBin *binSparse, ulong location)
|
521
|
+
{
|
522
|
+
if ( SparseIsEmpty(binSparse, location) )
|
523
|
+
return NULL;
|
524
|
+
return SPARSE_BUCKET(binSparse, location);
|
525
|
+
}
|
526
|
+
|
527
|
+
/*************************************************************************\
|
528
|
+
| SparseInsert() |
|
529
|
+
| You give me a location, and contents to put there, and I insert |
|
530
|
+
| into that location and RETURN a pointer to the location. If |
|
531
|
+
| bucket was already occupied, I write over the contents only if |
|
532
|
+
| *pfOverwrite is 1. We set *pfOverwrite to 1 if there was someone |
|
533
|
+
| there (whether or not we overwrote) and 0 else. |
|
534
|
+
\*************************************************************************/
|
535
|
+
|
536
|
+
static SparseBucket *SparseInsert(SparseBin *binSparse, SparseBucket *bckInsert,
|
537
|
+
ulong location, int *pfOverwrite)
|
538
|
+
{
|
539
|
+
SparseBucket *bckPlace;
|
540
|
+
HTOffset offset;
|
541
|
+
|
542
|
+
bckPlace = SparseFind(binSparse, location);
|
543
|
+
if ( bckPlace ) /* means we replace old contents */
|
544
|
+
{
|
545
|
+
if ( *pfOverwrite )
|
546
|
+
*bckPlace = *bckInsert;
|
547
|
+
*pfOverwrite = 1;
|
548
|
+
return bckPlace;
|
549
|
+
}
|
550
|
+
|
551
|
+
binSparse += (location >> LOG_LOW_BIN_SIZE);
|
552
|
+
offset = SPARSE_POS_TO_OFFSET(binSparse->bmOccupied,
|
553
|
+
MOD2(location, LOG_LOW_BIN_SIZE));
|
554
|
+
binSparse->binSparse = (SparseBucket *)
|
555
|
+
HTsrealloc(binSparse->binSparse,
|
556
|
+
sizeof(*binSparse->binSparse) * ++binSparse->cOccupied,
|
557
|
+
sizeof(*binSparse->binSparse));
|
558
|
+
memmove(binSparse->binSparse + offset+1,
|
559
|
+
binSparse->binSparse + offset,
|
560
|
+
(binSparse->cOccupied-1 - offset) * sizeof(*binSparse->binSparse));
|
561
|
+
binSparse->binSparse[offset] = *bckInsert;
|
562
|
+
SET_BITMAP(binSparse->bmOccupied, MOD2(location, LOG_LOW_BIN_SIZE));
|
563
|
+
*pfOverwrite = 0;
|
564
|
+
return binSparse->binSparse + offset;
|
565
|
+
}
|
566
|
+
|
567
|
+
/*************************************************************************\
|
568
|
+
| SparseFirstBucket() |
|
569
|
+
| SparseNextBucket() |
|
570
|
+
| SparseCurrentBit() |
|
571
|
+
| Iterate through the occupied buckets of a dense hashtable. You |
|
572
|
+
| must, of course, have allocated space yourself for the iterator. |
|
573
|
+
\*************************************************************************/
|
574
|
+
|
575
|
+
static SparseBucket *SparseNextBucket(SparseIterator *iter)
|
576
|
+
{
|
577
|
+
if ( iter->posOffset != -1 && /* not called from FirstBucket()? */
|
578
|
+
(++iter->posOffset < iter->binSparse[iter->posGroup].cOccupied) )
|
579
|
+
return iter->binSparse[iter->posGroup].binSparse + iter->posOffset;
|
580
|
+
|
581
|
+
iter->posOffset = 0; /* start the next group */
|
582
|
+
for ( iter->posGroup++; iter->posGroup < SPARSE_GROUPS(iter->cBuckets);
|
583
|
+
iter->posGroup++ )
|
584
|
+
if ( iter->binSparse[iter->posGroup].cOccupied > 0 )
|
585
|
+
return iter->binSparse[iter->posGroup].binSparse; /* + 0 */
|
586
|
+
return NULL; /* all remaining groups were empty */
|
587
|
+
}
|
588
|
+
|
589
|
+
static SparseBucket *SparseFirstBucket(SparseIterator *iter,
|
590
|
+
SparseBin *binSparse, ulong cBuckets)
|
591
|
+
{
|
592
|
+
iter->binSparse = binSparse; /* set it up for NextBucket() */
|
593
|
+
iter->cBuckets = cBuckets;
|
594
|
+
iter->posOffset = -1; /* when we advance, we're at 0 */
|
595
|
+
iter->posGroup = -1;
|
596
|
+
return SparseNextBucket(iter);
|
597
|
+
}
|
598
|
+
|
599
|
+
/*************************************************************************\
|
600
|
+
| SparseWrite() |
|
601
|
+
| SparseRead() |
|
602
|
+
| These are routines for storing a sparse hashtable onto disk. We |
|
603
|
+
| store the number of buckets and a bitmap indicating which buckets |
|
604
|
+
| are allocated (occupied). The actual contents of the buckets |
|
605
|
+
| must be stored separately. |
|
606
|
+
\*************************************************************************/
|
607
|
+
|
608
|
+
static void SparseWrite(FILE *fp, SparseBin *binSparse, ulong cBuckets)
|
609
|
+
{
|
610
|
+
ulong i, j;
|
611
|
+
|
612
|
+
WRITE_UL(fp, cBuckets);
|
613
|
+
for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )
|
614
|
+
for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )
|
615
|
+
WRITE_UL(fp, binSparse[i].bmOccupied[j]);
|
616
|
+
}
|
617
|
+
|
618
|
+
static ulong SparseRead(FILE *fp, SparseBin **pbinSparse)
|
619
|
+
{
|
620
|
+
ulong i, j, cBuckets;
|
621
|
+
|
622
|
+
READ_UL(fp, cBuckets); /* actually, cBuckets is stored */
|
623
|
+
cBuckets = SparseAllocate(pbinSparse, cBuckets);
|
624
|
+
for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )
|
625
|
+
{
|
626
|
+
for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )
|
627
|
+
READ_UL(fp, (*pbinSparse)[i].bmOccupied[j]);
|
628
|
+
(*pbinSparse)[i].cOccupied =
|
629
|
+
SPARSE_POS_TO_OFFSET((*pbinSparse)[i].bmOccupied,1<<LOG_LOW_BIN_SIZE);
|
630
|
+
(*pbinSparse)[i].binSparse =
|
631
|
+
(SparseBucket *) HTsmalloc(sizeof(*((*pbinSparse)[i].binSparse)) *
|
632
|
+
(*pbinSparse)[i].cOccupied);
|
633
|
+
}
|
634
|
+
return cBuckets;
|
635
|
+
}
|
636
|
+
|
637
|
+
/*************************************************************************\
|
638
|
+
| SparseMemory() |
|
639
|
+
| SparseMemory() tells us how much memory is being allocated for |
|
640
|
+
| the dense table. You need to tell me not only how many buckets |
|
641
|
+
| there are, but how many are occupied. |
|
642
|
+
\*************************************************************************/
|
643
|
+
|
644
|
+
static ulong SparseMemory(ulong cBuckets, ulong cOccupied)
|
645
|
+
{
|
646
|
+
return ( cOccupied * sizeof(SparseBucket) +
|
647
|
+
SPARSE_GROUPS(cBuckets) * sizeof(SparseBin) );
|
648
|
+
}
|
649
|
+
|
650
|
+
|
651
|
+
/* Just for fun, I also provide support for dense tables. These are
|
652
|
+
* just regulr arrays. Access is fast, but they can get big.
|
653
|
+
* Use Table(x) at the top of chash.h to decide which you want.
|
654
|
+
* A disadvantage is we need to steal more of the data space for
|
655
|
+
* indicating empty buckets. We choose -3.
|
656
|
+
*/
|
657
|
+
|
658
|
+
#ifndef DenseBucket /* by default, each bucket holds an HTItem */
|
659
|
+
#define DenseBucket HTItem
|
660
|
+
#endif
|
661
|
+
|
662
|
+
typedef struct DenseBin { /* needs to be a struct for C typing reasons */
|
663
|
+
DenseBucket *rgBuckets; /* A bin is an array of buckets */
|
664
|
+
} DenseBin;
|
665
|
+
|
666
|
+
typedef struct DenseIterator {
|
667
|
+
long pos; /* the actual iterator */
|
668
|
+
DenseBin *bin; /* state info, to avoid args for NextBucket() */
|
669
|
+
ulong cBuckets;
|
670
|
+
} DenseIterator;
|
671
|
+
|
672
|
+
#define DENSE_IS_EMPTY(bin, i) ( (bin)[i].data == EMPTY )
|
673
|
+
#define DENSE_SET_EMPTY(bin, i) (bin)[i].data = EMPTY /* fks-hash.h */
|
674
|
+
#define DENSE_SET_OCCUPIED(bin, i) (bin)[i].data = 1 /* not EMPTY */
|
675
|
+
|
676
|
+
static void DenseClear(DenseBin *bin, ulong cBuckets)
|
677
|
+
{
|
678
|
+
while ( cBuckets-- )
|
679
|
+
DENSE_SET_EMPTY(bin->rgBuckets, cBuckets);
|
680
|
+
}
|
681
|
+
|
682
|
+
static ulong DenseAllocate(DenseBin **pbin, ulong cBuckets)
|
683
|
+
{
|
684
|
+
*pbin = (DenseBin *) HTsmalloc(sizeof(*pbin));
|
685
|
+
(*pbin)->rgBuckets = (DenseBucket *) HTsmalloc(sizeof(*(*pbin)->rgBuckets)
|
686
|
+
* cBuckets);
|
687
|
+
DenseClear(*pbin, cBuckets);
|
688
|
+
return cBuckets;
|
689
|
+
}
|
690
|
+
|
691
|
+
static DenseBin *DenseFree(DenseBin *bin, ulong cBuckets)
|
692
|
+
{
|
693
|
+
HTfree(bin->rgBuckets, sizeof(*bin->rgBuckets) * cBuckets);
|
694
|
+
HTfree(bin, sizeof(*bin));
|
695
|
+
return NULL;
|
696
|
+
}
|
697
|
+
|
698
|
+
static int DenseIsEmpty(DenseBin *bin, ulong location)
|
699
|
+
{
|
700
|
+
return DENSE_IS_EMPTY(bin->rgBuckets, location);
|
701
|
+
}
|
702
|
+
|
703
|
+
static DenseBucket *DenseFind(DenseBin *bin, ulong location)
|
704
|
+
{
|
705
|
+
if ( DenseIsEmpty(bin, location) )
|
706
|
+
return NULL;
|
707
|
+
return bin->rgBuckets + location;
|
708
|
+
}
|
709
|
+
|
710
|
+
static DenseBucket *DenseInsert(DenseBin *bin, DenseBucket *bckInsert,
|
711
|
+
ulong location, int *pfOverwrite)
|
712
|
+
{
|
713
|
+
DenseBucket *bckPlace;
|
714
|
+
|
715
|
+
bckPlace = DenseFind(bin, location);
|
716
|
+
if ( bckPlace ) /* means something is already there */
|
717
|
+
{
|
718
|
+
if ( *pfOverwrite )
|
719
|
+
*bckPlace = *bckInsert;
|
720
|
+
*pfOverwrite = 1; /* set to 1 to indicate someone was there */
|
721
|
+
return bckPlace;
|
722
|
+
}
|
723
|
+
else
|
724
|
+
{
|
725
|
+
bin->rgBuckets[location] = *bckInsert;
|
726
|
+
*pfOverwrite = 0;
|
727
|
+
return bin->rgBuckets + location;
|
728
|
+
}
|
729
|
+
}
|
730
|
+
|
731
|
+
static DenseBucket *DenseNextBucket(DenseIterator *iter)
|
732
|
+
{
|
733
|
+
for ( iter->pos++; iter->pos < iter->cBuckets; iter->pos++ )
|
734
|
+
if ( !DenseIsEmpty(iter->bin, iter->pos) )
|
735
|
+
return iter->bin->rgBuckets + iter->pos;
|
736
|
+
return NULL; /* all remaining groups were empty */
|
737
|
+
}
|
738
|
+
|
739
|
+
static DenseBucket *DenseFirstBucket(DenseIterator *iter,
|
740
|
+
DenseBin *bin, ulong cBuckets)
|
741
|
+
{
|
742
|
+
iter->bin = bin; /* set it up for NextBucket() */
|
743
|
+
iter->cBuckets = cBuckets;
|
744
|
+
iter->pos = -1; /* thus the next bucket will be 0 */
|
745
|
+
return DenseNextBucket(iter);
|
746
|
+
}
|
747
|
+
|
748
|
+
static void DenseWrite(FILE *fp, DenseBin *bin, ulong cBuckets)
|
749
|
+
{
|
750
|
+
ulong pos = 0, bit, bm;
|
751
|
+
|
752
|
+
WRITE_UL(fp, cBuckets);
|
753
|
+
while ( pos < cBuckets )
|
754
|
+
{
|
755
|
+
bm = 0;
|
756
|
+
for ( bit = 0; bit < 8*sizeof(ulong); bit++ )
|
757
|
+
{
|
758
|
+
if ( !DenseIsEmpty(bin, pos) )
|
759
|
+
SET_BITMAP(&bm, bit); /* in fks-hash.h */
|
760
|
+
if ( ++pos == cBuckets )
|
761
|
+
break;
|
762
|
+
}
|
763
|
+
WRITE_UL(fp, bm);
|
764
|
+
}
|
765
|
+
}
|
766
|
+
|
767
|
+
static ulong DenseRead(FILE *fp, DenseBin **pbin)
|
768
|
+
{
|
769
|
+
ulong pos = 0, bit, bm, cBuckets;
|
770
|
+
|
771
|
+
READ_UL(fp, cBuckets);
|
772
|
+
cBuckets = DenseAllocate(pbin, cBuckets);
|
773
|
+
while ( pos < cBuckets )
|
774
|
+
{
|
775
|
+
READ_UL(fp, bm);
|
776
|
+
for ( bit = 0; bit < 8*sizeof(ulong); bit++ )
|
777
|
+
{
|
778
|
+
if ( TEST_BITMAP(&bm, bit) ) /* in fks-hash.h */
|
779
|
+
DENSE_SET_OCCUPIED((*pbin)->rgBuckets, pos);
|
780
|
+
else
|
781
|
+
DENSE_SET_EMPTY((*pbin)->rgBuckets, pos);
|
782
|
+
if ( ++pos == cBuckets )
|
783
|
+
break;
|
784
|
+
}
|
785
|
+
}
|
786
|
+
return cBuckets;
|
787
|
+
}
|
788
|
+
|
789
|
+
static ulong DenseMemory(ulong cBuckets, ulong cOccupied)
|
790
|
+
{
|
791
|
+
return cBuckets * sizeof(DenseBucket);
|
792
|
+
}
|
793
|
+
|
794
|
+
|
795
|
+
/* ======================================================================== */
|
796
|
+
/* HASHING ROUTINES */
|
797
|
+
/* ---------------------- */
|
798
|
+
|
799
|
+
/* Implements a simple quadratic hashing scheme. We have a single hash
|
800
|
+
* table of size t and a single hash function h(x). When inserting an
|
801
|
+
* item, first we try h(x) % t. If it's occupied, we try h(x) +
|
802
|
+
* i*(i-1)/2 % t for increasing values of i until we hit a not-occupied
|
803
|
+
* space. To make this dynamic, we double the size of the hash table as
|
804
|
+
* soon as more than half the cells are occupied. When deleting, we can
|
805
|
+
* choose to shrink the hashtable when less than a quarter of the
|
806
|
+
* cells are occupied, or we can choose never to shrink the hashtable.
|
807
|
+
* For lookup, we check h(x) + i*(i-1)/2 % t (starting with i=0) until
|
808
|
+
* we get a match or we hit an empty space. Note that as a result,
|
809
|
+
* we can't make a cell empty on deletion, or lookups may end prematurely.
|
810
|
+
* Instead we mark the cell as "deleted." We thus steal the value
|
811
|
+
* DELETED as a possible "data" value. As long as data are pointers,
|
812
|
+
* that's ok.
|
813
|
+
* The hash increment we use, i(i-1)/2, is not the standard quadratic
|
814
|
+
* hash increment, which is i^2. i(i-1)/2 covers the entire bucket space
|
815
|
+
* when the hashtable size is a power of two, as it is for us. In fact,
|
816
|
+
* the first n probes cover n distinct buckets; then it repeats. This
|
817
|
+
* guarantees insertion will always succeed.
|
818
|
+
* If you linear hashing, set JUMP in chash.h. You can also change
|
819
|
+
* various other parameters there.
|
820
|
+
*/
|
821
|
+
|
822
|
+
/*************************************************************************\
|
823
|
+
| Hash() |
|
824
|
+
| The hash function I use is due to Bob Jenkins (see |
|
825
|
+
| http://burtleburtle.net/bob/hash/evahash.html |
|
826
|
+
| According to http://burtleburtle.net/bob/c/lookup2.c, |
|
827
|
+
| his implementation is public domain.) |
|
828
|
+
| It takes 36 instructions, in 18 cycles if you're lucky. |
|
829
|
+
| hashing depends on the fact the hashtable size is always a |
|
830
|
+
| power of 2. cBuckets is probably ht->cBuckets. |
|
831
|
+
\*************************************************************************/
|
832
|
+
|
833
|
+
#if LOG_WORD_SIZE == 5 /* 32 bit words */
|
834
|
+
|
835
|
+
#define mix(a,b,c) \
|
836
|
+
{ \
|
837
|
+
a -= b; a -= c; a ^= (c>>13); \
|
838
|
+
b -= c; b -= a; b ^= (a<<8); \
|
839
|
+
c -= a; c -= b; c ^= (b>>13); \
|
840
|
+
a -= b; a -= c; a ^= (c>>12); \
|
841
|
+
b -= c; b -= a; b ^= (a<<16); \
|
842
|
+
c -= a; c -= b; c ^= (b>>5); \
|
843
|
+
a -= b; a -= c; a ^= (c>>3); \
|
844
|
+
b -= c; b -= a; b ^= (a<<10); \
|
845
|
+
c -= a; c -= b; c ^= (b>>15); \
|
846
|
+
}
|
847
|
+
#ifdef WORD_HASH /* play with this on little-endian machines */
|
848
|
+
#define WORD_AT(ptr) ( *(ulong *)(ptr) )
|
849
|
+
#else
|
850
|
+
#define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \
|
851
|
+
((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) )
|
852
|
+
#endif
|
853
|
+
|
854
|
+
#elif LOG_WORD_SIZE == 6 /* 64 bit words */
|
855
|
+
|
856
|
+
#define mix(a,b,c) \
|
857
|
+
{ \
|
858
|
+
a -= b; a -= c; a ^= (c>>43); \
|
859
|
+
b -= c; b -= a; b ^= (a<<9); \
|
860
|
+
c -= a; c -= b; c ^= (b>>8); \
|
861
|
+
a -= b; a -= c; a ^= (c>>38); \
|
862
|
+
b -= c; b -= a; b ^= (a<<23); \
|
863
|
+
c -= a; c -= b; c ^= (b>>5); \
|
864
|
+
a -= b; a -= c; a ^= (c>>35); \
|
865
|
+
b -= c; b -= a; b ^= (a<<49); \
|
866
|
+
c -= a; c -= b; c ^= (b>>11); \
|
867
|
+
a -= b; a -= c; a ^= (c>>12); \
|
868
|
+
b -= c; b -= a; b ^= (a<<18); \
|
869
|
+
c -= a; c -= b; c ^= (b>>22); \
|
870
|
+
}
|
871
|
+
#ifdef WORD_HASH /* alpha is little-endian, btw */
|
872
|
+
#define WORD_AT(ptr) ( *(ulong *)(ptr) )
|
873
|
+
#else
|
874
|
+
#define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \
|
875
|
+
((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) + \
|
876
|
+
((ulong)(ptr)[4]<<32) + ((ulong)(ptr)[5]<<40) + \
|
877
|
+
((ulong)(ptr)[6]<<48) + ((ulong)(ptr)[7]<<56) )
|
878
|
+
#endif
|
879
|
+
|
880
|
+
#else /* neither 32 or 64 bit words */
|
881
|
+
#error This hash function can only hash 32 or 64 bit words. Sorry.
|
882
|
+
#endif
|
883
|
+
|
884
|
+
static ulong Hash(HashTable *ht, char *key, ulong cBuckets)
|
885
|
+
{
|
886
|
+
ulong a, b, c, cchKey, cchKeyOrig;
|
887
|
+
|
888
|
+
cchKeyOrig = ht->cchKey == NULL_TERMINATED ? strlen(key) : ht->cchKey;
|
889
|
+
a = b = c = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
890
|
+
|
891
|
+
for ( cchKey = cchKeyOrig; cchKey >= 3 * sizeof(ulong);
|
892
|
+
cchKey -= 3 * sizeof(ulong), key += 3 * sizeof(ulong) )
|
893
|
+
{
|
894
|
+
a += WORD_AT(key);
|
895
|
+
b += WORD_AT(key + sizeof(ulong));
|
896
|
+
c += WORD_AT(key + sizeof(ulong)*2);
|
897
|
+
mix(a,b,c);
|
898
|
+
}
|
899
|
+
|
900
|
+
c += cchKeyOrig;
|
901
|
+
switch ( cchKey ) { /* deal with rest. Cases fall through */
|
902
|
+
#if LOG_WORD_SIZE == 5
|
903
|
+
case 11: c += (ulong)key[10]<<24;
|
904
|
+
case 10: c += (ulong)key[9]<<16;
|
905
|
+
case 9 : c += (ulong)key[8]<<8;
|
906
|
+
/* the first byte of c is reserved for the length */
|
907
|
+
case 8 : b += WORD_AT(key+4); a+= WORD_AT(key); break;
|
908
|
+
case 7 : b += (ulong)key[6]<<16;
|
909
|
+
case 6 : b += (ulong)key[5]<<8;
|
910
|
+
case 5 : b += key[4];
|
911
|
+
case 4 : a += WORD_AT(key); break;
|
912
|
+
case 3 : a += (ulong)key[2]<<16;
|
913
|
+
case 2 : a += (ulong)key[1]<<8;
|
914
|
+
case 1 : a += key[0];
|
915
|
+
/* case 0 : nothing left to add */
|
916
|
+
#elif LOG_WORD_SIZE == 6
|
917
|
+
case 23: c += (ulong)key[22]<<56;
|
918
|
+
case 22: c += (ulong)key[21]<<48;
|
919
|
+
case 21: c += (ulong)key[20]<<40;
|
920
|
+
case 20: c += (ulong)key[19]<<32;
|
921
|
+
case 19: c += (ulong)key[18]<<24;
|
922
|
+
case 18: c += (ulong)key[17]<<16;
|
923
|
+
case 17: c += (ulong)key[16]<<8;
|
924
|
+
/* the first byte of c is reserved for the length */
|
925
|
+
case 16: b += WORD_AT(key+8); a+= WORD_AT(key); break;
|
926
|
+
case 15: b += (ulong)key[14]<<48;
|
927
|
+
case 14: b += (ulong)key[13]<<40;
|
928
|
+
case 13: b += (ulong)key[12]<<32;
|
929
|
+
case 12: b += (ulong)key[11]<<24;
|
930
|
+
case 11: b += (ulong)key[10]<<16;
|
931
|
+
case 10: b += (ulong)key[ 9]<<8;
|
932
|
+
case 9: b += (ulong)key[ 8];
|
933
|
+
case 8: a += WORD_AT(key); break;
|
934
|
+
case 7: a += (ulong)key[ 6]<<48;
|
935
|
+
case 6: a += (ulong)key[ 5]<<40;
|
936
|
+
case 5: a += (ulong)key[ 4]<<32;
|
937
|
+
case 4: a += (ulong)key[ 3]<<24;
|
938
|
+
case 3: a += (ulong)key[ 2]<<16;
|
939
|
+
case 2: a += (ulong)key[ 1]<<8;
|
940
|
+
case 1: a += (ulong)key[ 0];
|
941
|
+
/* case 0: nothing left to add */
|
942
|
+
#endif
|
943
|
+
}
|
944
|
+
mix(a,b,c);
|
945
|
+
return c & (cBuckets-1);
|
946
|
+
}
|
947
|
+
|
948
|
+
|
949
|
+
/*************************************************************************\
|
950
|
+
| Rehash() |
|
951
|
+
| You give me a hashtable, a new size, and a bucket to follow, and |
|
952
|
+
| I resize the hashtable's bin to be the new size, rehashing |
|
953
|
+
| everything in it. I keep particular track of the bucket you pass |
|
954
|
+
| in, and RETURN a pointer to where the item in the bucket got to. |
|
955
|
+
| (If you pass in NULL, I return an arbitrary pointer.) |
|
956
|
+
\*************************************************************************/
|
957
|
+
|
958
|
+
static HTItem *Rehash(HashTable *ht, ulong cNewBuckets, HTItem *bckWatch)
|
959
|
+
{
|
960
|
+
Table *tableNew;
|
961
|
+
ulong iBucketFirst;
|
962
|
+
HTItem *bck, *bckNew = NULL;
|
963
|
+
ulong offset; /* the i in h(x) + i*(i-1)/2 */
|
964
|
+
int fOverwrite = 0; /* not an issue: there can be no collisions */
|
965
|
+
|
966
|
+
assert( ht->table );
|
967
|
+
cNewBuckets = Table(Allocate)(&tableNew, cNewBuckets);
|
968
|
+
/* Since we RETURN the new position of bckWatch, we want *
|
969
|
+
* to make sure it doesn't get moved due to some table *
|
970
|
+
* rehashing that comes after it's inserted. Thus, we *
|
971
|
+
* have to put it in last. This makes the loop weird. */
|
972
|
+
for ( bck = HashFirstBucket(ht); ; bck = HashNextBucket(ht) )
|
973
|
+
{
|
974
|
+
if ( bck == NULL ) /* we're done iterating, so look at bckWatch */
|
975
|
+
{
|
976
|
+
bck = bckWatch;
|
977
|
+
if ( bck == NULL ) /* I guess bckWatch wasn't specified */
|
978
|
+
break;
|
979
|
+
}
|
980
|
+
else if ( bck == bckWatch )
|
981
|
+
continue; /* ignore if we see it during the iteration */
|
982
|
+
|
983
|
+
offset = 0; /* a new i for a new bucket */
|
984
|
+
for ( iBucketFirst = Hash(ht, KEY_PTR(ht, bck->key), cNewBuckets);
|
985
|
+
!Table(IsEmpty)(tableNew, iBucketFirst);
|
986
|
+
iBucketFirst = (iBucketFirst + JUMP(KEY_PTR(ht,bck->key), offset))
|
987
|
+
& (cNewBuckets-1) )
|
988
|
+
;
|
989
|
+
bckNew = Table(Insert)(tableNew, bck, iBucketFirst, &fOverwrite);
|
990
|
+
if ( bck == bckWatch ) /* we're done with the last thing to do */
|
991
|
+
break;
|
992
|
+
}
|
993
|
+
Table(Free)(ht->table, ht->cBuckets);
|
994
|
+
ht->table = tableNew;
|
995
|
+
ht->cBuckets = cNewBuckets;
|
996
|
+
ht->cDeletedItems = 0;
|
997
|
+
return bckNew; /* new position of bckWatch, which was inserted last */
|
998
|
+
}
|
999
|
+
|
1000
|
+
/*************************************************************************\
|
1001
|
+
| Find() |
|
1002
|
+
| Does the quadratic searching stuff. RETURNS NULL if we don't |
|
1003
|
+
| find an object with the given key, and a pointer to the Item |
|
1004
|
+
| holding the key, if we do. Also sets posLastFind. If piEmpty is |
|
1005
|
+
| non-NULL, we set it to the first open bucket we pass; helpful for |
|
1006
|
+
| doing a later insert if the search fails, for instance. |
|
1007
|
+
\*************************************************************************/
|
1008
|
+
|
1009
|
+
static HTItem *Find(HashTable *ht, ulong key, ulong *piEmpty)
|
1010
|
+
{
|
1011
|
+
ulong iBucketFirst;
|
1012
|
+
HTItem *item;
|
1013
|
+
ulong offset = 0; /* the i in h(x) + i*(i-1)/2 */
|
1014
|
+
int fFoundEmpty = 0; /* set when we pass over an empty bucket */
|
1015
|
+
|
1016
|
+
ht->posLastFind = NULL; /* set up for failure: a new find starts */
|
1017
|
+
if ( ht->table == NULL ) /* empty hash table: find is bound to fail */
|
1018
|
+
return NULL;
|
1019
|
+
|
1020
|
+
iBucketFirst = Hash(ht, KEY_PTR(ht, key), ht->cBuckets);
|
1021
|
+
while ( 1 ) /* now try all i > 0 */
|
1022
|
+
{
|
1023
|
+
item = Table(Find)(ht->table, iBucketFirst);
|
1024
|
+
if ( item == NULL ) /* it's not in the table */
|
1025
|
+
{
|
1026
|
+
if ( piEmpty && !fFoundEmpty ) *piEmpty = iBucketFirst;
|
1027
|
+
return NULL;
|
1028
|
+
}
|
1029
|
+
else
|
1030
|
+
{
|
1031
|
+
if ( IS_BCK_DELETED(item) ) /* always 0 ifdef INSERT_ONLY */
|
1032
|
+
{
|
1033
|
+
if ( piEmpty && !fFoundEmpty )
|
1034
|
+
{
|
1035
|
+
*piEmpty = iBucketFirst;
|
1036
|
+
fFoundEmpty = 1;
|
1037
|
+
}
|
1038
|
+
} else
|
1039
|
+
if ( !KEY_CMP(ht, key, item->key) ) /* must be occupied */
|
1040
|
+
{
|
1041
|
+
ht->posLastFind = item;
|
1042
|
+
return item; /* we found it! */
|
1043
|
+
}
|
1044
|
+
}
|
1045
|
+
iBucketFirst = ((iBucketFirst + JUMP(KEY_PTR(ht, key), offset))
|
1046
|
+
& (ht->cBuckets-1));
|
1047
|
+
}
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
/*************************************************************************\
|
1051
|
+
| Insert() |
|
1052
|
+
| If an item with the key already exists in the hashtable, RETURNS |
|
1053
|
+
| a pointer to the item (replacing its data if fOverwrite is 1). |
|
1054
|
+
| If not, we find the first place-to-insert (which Find() is nice |
|
1055
|
+
| enough to set for us) and insert the item there, RETURNing a |
|
1056
|
+
| pointer to the item. We might grow the hashtable if it's getting |
|
1057
|
+
| full. Note we include buckets holding DELETED when determining |
|
1058
|
+
| fullness, because they slow down searching. |
|
1059
|
+
\*************************************************************************/
|
1060
|
+
|
1061
|
+
static ulong NextPow2(ulong x) /* returns next power of 2 > x, or 2^31 */
|
1062
|
+
{
|
1063
|
+
if ( ((x << 1) >> 1) != x ) /* next power of 2 overflows */
|
1064
|
+
x >>= 1; /* so we return highest power of 2 we can */
|
1065
|
+
while ( (x & (x-1)) != 0 ) /* blacks out all but the top bit */
|
1066
|
+
x &= (x-1);
|
1067
|
+
return x << 1; /* makes it the *next* power of 2 */
|
1068
|
+
}
|
1069
|
+
|
1070
|
+
static HTItem *Insert(HashTable *ht, ulong key, ulong data, int fOverwrite)
|
1071
|
+
{
|
1072
|
+
HTItem *item, bckInsert;
|
1073
|
+
ulong iEmpty; /* first empty bucket key probes */
|
1074
|
+
|
1075
|
+
if ( ht->table == NULL ) /* empty hash table: find is bound to fail */
|
1076
|
+
return NULL;
|
1077
|
+
item = Find(ht, key, &iEmpty);
|
1078
|
+
ht->posLastFind = NULL; /* last operation is insert, not find */
|
1079
|
+
if ( item )
|
1080
|
+
{
|
1081
|
+
if ( fOverwrite )
|
1082
|
+
item->data = data; /* key already matches */
|
1083
|
+
return item;
|
1084
|
+
}
|
1085
|
+
|
1086
|
+
COPY_KEY(ht, bckInsert.key, key); /* make our own copy of the key */
|
1087
|
+
bckInsert.data = data; /* oh, and the data too */
|
1088
|
+
item = Table(Insert)(ht->table, &bckInsert, iEmpty, &fOverwrite);
|
1089
|
+
if ( fOverwrite ) /* we overwrote a deleted bucket */
|
1090
|
+
ht->cDeletedItems--;
|
1091
|
+
ht->cItems++; /* insert couldn't have overwritten */
|
1092
|
+
if ( ht->cDeltaGoalSize > 0 ) /* closer to our goal size */
|
1093
|
+
ht->cDeltaGoalSize--;
|
1094
|
+
if ( ht->cItems + ht->cDeletedItems >= ht->cBuckets * OCCUPANCY_PCT
|
1095
|
+
|| ht->cDeltaGoalSize < 0 ) /* we must've overestimated # of deletes */
|
1096
|
+
item = Rehash(ht,
|
1097
|
+
NextPow2((ulong)(((ht->cDeltaGoalSize > 0 ?
|
1098
|
+
ht->cDeltaGoalSize : 0)
|
1099
|
+
+ ht->cItems) / OCCUPANCY_PCT)),
|
1100
|
+
item);
|
1101
|
+
return item;
|
1102
|
+
}
|
1103
|
+
|
1104
|
+
/*************************************************************************\
|
1105
|
+
| Delete() |
|
1106
|
+
| Removes the item from the hashtable, and if fShrink is 1, will |
|
1107
|
+
| shrink the hashtable if it's too small (ie even after halving, |
|
1108
|
+
| the ht would be less than half full, though in order to avoid |
|
1109
|
+
| oscillating table size, we insist that after halving the ht would |
|
1110
|
+
| be less than 40% full). RETURNS 1 if the item was found, 0 else. |
|
1111
|
+
| If fLastFindSet is true, then this function is basically |
|
1112
|
+
| DeleteLastFind. |
|
1113
|
+
\*************************************************************************/
|
1114
|
+
|
1115
|
+
static int Delete(HashTable *ht, ulong key, int fShrink, int fLastFindSet)
|
1116
|
+
{
|
1117
|
+
if ( !fLastFindSet && !Find(ht, key, NULL) )
|
1118
|
+
return 0;
|
1119
|
+
SET_BCK_DELETED(ht, ht->posLastFind); /* find set this, how nice */
|
1120
|
+
ht->cItems--;
|
1121
|
+
ht->cDeletedItems++;
|
1122
|
+
if ( ht->cDeltaGoalSize < 0 ) /* heading towards our goal of deletion */
|
1123
|
+
ht->cDeltaGoalSize++;
|
1124
|
+
|
1125
|
+
if ( fShrink && ht->cItems < ht->cBuckets * OCCUPANCY_PCT*0.4
|
1126
|
+
&& ht->cDeltaGoalSize >= 0 /* wait until we're done deleting */
|
1127
|
+
&& (ht->cBuckets >> 1) >= MIN_HASH_SIZE ) /* shrink */
|
1128
|
+
Rehash(ht,
|
1129
|
+
NextPow2((ulong)((ht->cItems+ht->cDeltaGoalSize)/OCCUPANCY_PCT)),
|
1130
|
+
NULL);
|
1131
|
+
ht->posLastFind = NULL; /* last operation is delete, not find */
|
1132
|
+
return 1;
|
1133
|
+
}
|
1134
|
+
|
1135
|
+
|
1136
|
+
/* ======================================================================== */
|
1137
|
+
/* USER-VISIBLE API */
|
1138
|
+
/* ---------------------- */
|
1139
|
+
|
1140
|
+
/*************************************************************************\
|
1141
|
+
| AllocateHashTable() |
|
1142
|
+
| ClearHashTable() |
|
1143
|
+
| FreeHashTable() |
|
1144
|
+
| Allocate() allocates a hash table and sets up size parameters. |
|
1145
|
+
| Free() frees it. Clear() deletes all the items from the hash |
|
1146
|
+
| table, but frees not. |
|
1147
|
+
| cchKey is < 0 if the keys you send me are meant to be pointers |
|
1148
|
+
| to \0-terminated strings. Then -cchKey is the maximum key size. |
|
1149
|
+
| If cchKey < one word (ulong), the keys you send me are the keys |
|
1150
|
+
| themselves; else the keys you send me are pointers to the data. |
|
1151
|
+
| If fSaveKeys is 1, we copy any keys given to us to insert. We |
|
1152
|
+
| also free these keys when freeing the hash table. If it's 0, the |
|
1153
|
+
| user is responsible for key space management. |
|
1154
|
+
| AllocateHashTable() RETURNS a hash table; the others TAKE one. |
|
1155
|
+
\*************************************************************************/
|
1156
|
+
|
1157
|
+
HashTable *AllocateHashTable(int cchKey, int fSaveKeys)
|
1158
|
+
{
|
1159
|
+
HashTable *ht;
|
1160
|
+
|
1161
|
+
ht = (HashTable *) HTsmalloc(sizeof(*ht)); /* set everything to 0 */
|
1162
|
+
ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);
|
1163
|
+
ht->cchKey = cchKey <= 0 ? NULL_TERMINATED : cchKey;
|
1164
|
+
ht->cItems = 0;
|
1165
|
+
ht->cDeletedItems = 0;
|
1166
|
+
ht->fSaveKeys = fSaveKeys;
|
1167
|
+
ht->cDeltaGoalSize = 0;
|
1168
|
+
ht->iter = HTsmalloc( sizeof(TableIterator) );
|
1169
|
+
|
1170
|
+
ht->fpData = NULL; /* set by HashLoad, maybe */
|
1171
|
+
ht->bckData.data = (ulong) NULL; /* this must be done */
|
1172
|
+
HTSetupKeyTrunc(); /* in util.c */
|
1173
|
+
return ht;
|
1174
|
+
}
|
1175
|
+
|
1176
|
+
void ClearHashTable(HashTable *ht)
|
1177
|
+
{
|
1178
|
+
HTItem *bck;
|
1179
|
+
|
1180
|
+
if ( STORES_PTR(ht) && ht->fSaveKeys ) /* need to free keys */
|
1181
|
+
for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) )
|
1182
|
+
{
|
1183
|
+
FREE_KEY(ht, bck->key);
|
1184
|
+
if ( ht->fSaveKeys == 2 ) /* this means key stored in one block */
|
1185
|
+
break; /* ...so only free once */
|
1186
|
+
}
|
1187
|
+
Table(Free)(ht->table, ht->cBuckets);
|
1188
|
+
ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);
|
1189
|
+
|
1190
|
+
ht->cItems = 0;
|
1191
|
+
ht->cDeletedItems = 0;
|
1192
|
+
ht->cDeltaGoalSize = 0;
|
1193
|
+
ht->posLastFind = NULL;
|
1194
|
+
ht->fpData = NULL; /* no longer HashLoading */
|
1195
|
+
if ( ht->bckData.data ) free( (char *)(ht)->bckData.data);
|
1196
|
+
ht->bckData.data = (ulong) NULL;
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
void FreeHashTable(HashTable *ht)
|
1200
|
+
{
|
1201
|
+
ClearHashTable(ht);
|
1202
|
+
if ( ht->iter ) HTfree(ht->iter, sizeof(TableIterator));
|
1203
|
+
if ( ht->table ) Table(Free)(ht->table, ht->cBuckets);
|
1204
|
+
free(ht);
|
1205
|
+
}
|
1206
|
+
|
1207
|
+
/*************************************************************************\
|
1208
|
+
| HashFind() |
|
1209
|
+
| HashFindLast() |
|
1210
|
+
| HashFind(): looks in h(x) + i(i-1)/2 % t as i goes up from 0 |
|
1211
|
+
| until we either find the key or hit an empty bucket. RETURNS a |
|
1212
|
+
| pointer to the item in the hit bucket, if we find it, else |
|
1213
|
+
| RETURNS NULL. |
|
1214
|
+
| HashFindLast() returns the item returned by the last |
|
1215
|
+
| HashFind(), which may be NULL if the last HashFind() failed. |
|
1216
|
+
| LOAD_AND_RETURN reads the data from off disk, if necessary. |
|
1217
|
+
\*************************************************************************/
|
1218
|
+
|
1219
|
+
HTItem *HashFind(HashTable *ht, ulong key)
|
1220
|
+
{
|
1221
|
+
LOAD_AND_RETURN(ht, Find(ht, KEY_TRUNC(ht, key), NULL));
|
1222
|
+
}
|
1223
|
+
|
1224
|
+
HTItem *HashFindLast(HashTable *ht)
|
1225
|
+
{
|
1226
|
+
LOAD_AND_RETURN(ht, ht->posLastFind);
|
1227
|
+
}
|
1228
|
+
|
1229
|
+
/*************************************************************************\
|
1230
|
+
| HashFindOrInsert() |
|
1231
|
+
| HashFindOrInsertItem() |
|
1232
|
+
| HashInsert() |
|
1233
|
+
| HashInsertItem() |
|
1234
|
+
| HashDelete() |
|
1235
|
+
| HashDeleteLast() |
|
1236
|
+
| Pretty obvious what these guys do. Some take buckets (items), |
|
1237
|
+
| some take keys and data separately. All things RETURN the bucket |
|
1238
|
+
| (a pointer into the hashtable) if appropriate. |
|
1239
|
+
\*************************************************************************/
|
1240
|
+
|
1241
|
+
HTItem *HashFindOrInsert(HashTable *ht, ulong key, ulong dataInsert)
|
1242
|
+
{
|
1243
|
+
/* This is equivalent to Insert without samekey-overwrite */
|
1244
|
+
return Insert(ht, KEY_TRUNC(ht, key), dataInsert, 0);
|
1245
|
+
}
|
1246
|
+
|
1247
|
+
HTItem *HashFindOrInsertItem(HashTable *ht, HTItem *pItem)
|
1248
|
+
{
|
1249
|
+
return HashFindOrInsert(ht, pItem->key, pItem->data);
|
1250
|
+
}
|
1251
|
+
|
1252
|
+
HTItem *HashInsert(HashTable *ht, ulong key, ulong data)
|
1253
|
+
{
|
1254
|
+
return Insert(ht, KEY_TRUNC(ht, key), data, SAMEKEY_OVERWRITE);
|
1255
|
+
}
|
1256
|
+
|
1257
|
+
HTItem *HashInsertItem(HashTable *ht, HTItem *pItem)
|
1258
|
+
{
|
1259
|
+
return HashInsert(ht, pItem->key, pItem->data);
|
1260
|
+
}
|
1261
|
+
|
1262
|
+
int HashDelete(HashTable *ht, ulong key)
|
1263
|
+
{
|
1264
|
+
return Delete(ht, KEY_TRUNC(ht, key), !FAST_DELETE, 0);
|
1265
|
+
}
|
1266
|
+
|
1267
|
+
int HashDeleteLast(HashTable *ht)
|
1268
|
+
{
|
1269
|
+
if ( !ht->posLastFind ) /* last find failed */
|
1270
|
+
return 0;
|
1271
|
+
return Delete(ht, 0, !FAST_DELETE, 1); /* no need to specify a key */
|
1272
|
+
}
|
1273
|
+
|
1274
|
+
/*************************************************************************\
|
1275
|
+
| HashFirstBucket() |
|
1276
|
+
| HashNextBucket() |
|
1277
|
+
| Iterates through the items in the hashtable by iterating through |
|
1278
|
+
| the table. Since we know about deleted buckets and loading data |
|
1279
|
+
| off disk, and the table doesn't, our job is to take care of these |
|
1280
|
+
| things. RETURNS a bucket, or NULL after the last bucket. |
|
1281
|
+
\*************************************************************************/
|
1282
|
+
|
1283
|
+
HTItem *HashFirstBucket(HashTable *ht)
|
1284
|
+
{
|
1285
|
+
HTItem *retval;
|
1286
|
+
|
1287
|
+
for ( retval = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
|
1288
|
+
retval; retval = Table(NextBucket)(ht->iter) )
|
1289
|
+
if ( !IS_BCK_DELETED(retval) )
|
1290
|
+
LOAD_AND_RETURN(ht, retval);
|
1291
|
+
return NULL;
|
1292
|
+
}
|
1293
|
+
|
1294
|
+
HTItem *HashNextBucket(HashTable *ht)
|
1295
|
+
{
|
1296
|
+
HTItem *retval;
|
1297
|
+
|
1298
|
+
while ( (retval=Table(NextBucket)(ht->iter)) )
|
1299
|
+
if ( !IS_BCK_DELETED(retval) )
|
1300
|
+
LOAD_AND_RETURN(ht, retval);
|
1301
|
+
return NULL;
|
1302
|
+
}
|
1303
|
+
|
1304
|
+
/*************************************************************************\
|
1305
|
+
| HashSetDeltaGoalSize() |
|
1306
|
+
| If we're going to insert 100 items, set the delta goal size to |
|
1307
|
+
| 100 and we take that into account when inserting. Likewise, if |
|
1308
|
+
| we're going to delete 10 items, set it to -100 and we won't |
|
1309
|
+
| rehash until all 100 have been done. It's ok to be wrong, but |
|
1310
|
+
| it's efficient to be right. Returns the delta value. |
|
1311
|
+
\*************************************************************************/
|
1312
|
+
|
1313
|
+
int HashSetDeltaGoalSize(HashTable *ht, int delta)
|
1314
|
+
{
|
1315
|
+
ht->cDeltaGoalSize = delta;
|
1316
|
+
#if FAST_DELETE == 1 || defined INSERT_ONLY
|
1317
|
+
if ( ht->cDeltaGoalSize < 0 ) /* for fast delete, we never */
|
1318
|
+
ht->cDeltaGoalSize = 0; /* ...rehash after deletion */
|
1319
|
+
#endif
|
1320
|
+
return ht->cDeltaGoalSize;
|
1321
|
+
}
|
1322
|
+
|
1323
|
+
|
1324
|
+
/*************************************************************************\
|
1325
|
+
| HashSave() |
|
1326
|
+
| HashLoad() |
|
1327
|
+
| HashLoadKeys() |
|
1328
|
+
| Routines for saving and loading the hashtable from disk. We can |
|
1329
|
+
| then use the hashtable in two ways: loading it back into memory |
|
1330
|
+
| (HashLoad()) or loading only the keys into memory, in which case |
|
1331
|
+
| the data for a given key is loaded off disk when the key is |
|
1332
|
+
| retrieved. The data is freed when something new is retrieved in |
|
1333
|
+
| its place, so this is not a "lazy-load" scheme. |
|
1334
|
+
| The key is saved automatically and restored upon load, but the |
|
1335
|
+
| user needs to specify a routine for reading and writing the data. |
|
1336
|
+
| fSaveKeys is of course set to 1 when you read in a hashtable. |
|
1337
|
+
| HashLoad RETURNS a newly allocated hashtable. |
|
1338
|
+
| DATA_WRITE() takes an fp and a char * (representing the data |
|
1339
|
+
| field), and must perform two separate tasks. If fp is NULL, |
|
1340
|
+
| return the number of bytes written. If not, writes the data to |
|
1341
|
+
| disk at the place the fp points to. |
|
1342
|
+
| DATA_READ() takes an fp and the number of bytes in the data |
|
1343
|
+
| field, and returns a char * which points to wherever you've |
|
1344
|
+
| written the data. Thus, you must allocate memory for the data. |
|
1345
|
+
| Both dataRead and dataWrite may be NULL if you just wish to |
|
1346
|
+
| store the data field directly, as an integer. |
|
1347
|
+
\*************************************************************************/
|
1348
|
+
|
1349
|
+
void HashSave(FILE *fp, HashTable *ht, int (*dataWrite)(FILE *, char *))
|
1350
|
+
{
|
1351
|
+
long cchData, posStart;
|
1352
|
+
HTItem *bck;
|
1353
|
+
|
1354
|
+
/* File format: magic number (4 bytes)
|
1355
|
+
: cchKey (one word)
|
1356
|
+
: cItems (one word)
|
1357
|
+
: cDeletedItems (one word)
|
1358
|
+
: table info (buckets and a bitmap)
|
1359
|
+
: cchAllKeys (one word)
|
1360
|
+
Then the keys, in a block. If cchKey is NULL_TERMINATED, the keys
|
1361
|
+
are null-terminated too, otherwise this takes up cchKey*cItems bytes.
|
1362
|
+
Note that keys are not written for DELETED buckets.
|
1363
|
+
Then the data:
|
1364
|
+
: EITHER DELETED (one word) to indicate it's a deleted bucket,
|
1365
|
+
: OR number of bytes for this (non-empty) bucket's data
|
1366
|
+
(one word). This is not stored if dataWrite == NULL
|
1367
|
+
since the size is known to be sizeof(ul). Plus:
|
1368
|
+
: the data for this bucket (variable length)
|
1369
|
+
All words are in network byte order. */
|
1370
|
+
|
1371
|
+
fprintf(fp, "%s", MAGIC_KEY);
|
1372
|
+
WRITE_UL(fp, ht->cchKey); /* WRITE_UL, READ_UL, etc in fks-hash.h */
|
1373
|
+
WRITE_UL(fp, ht->cItems);
|
1374
|
+
WRITE_UL(fp, ht->cDeletedItems);
|
1375
|
+
Table(Write)(fp, ht->table, ht->cBuckets); /* writes cBuckets too */
|
1376
|
+
|
1377
|
+
WRITE_UL(fp, 0); /* to be replaced with sizeof(key block) */
|
1378
|
+
posStart = ftell(fp);
|
1379
|
+
for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) )
|
1380
|
+
fwrite(KEY_PTR(ht, bck->key), 1,
|
1381
|
+
(ht->cchKey == NULL_TERMINATED ?
|
1382
|
+
strlen(KEY_PTR(ht, bck->key))+1 : ht->cchKey), fp);
|
1383
|
+
cchData = ftell(fp) - posStart;
|
1384
|
+
fseek(fp, posStart - sizeof(unsigned long), SEEK_SET);
|
1385
|
+
WRITE_UL(fp, cchData);
|
1386
|
+
fseek(fp, 0, SEEK_END); /* done with our sojourn at the header */
|
1387
|
+
|
1388
|
+
/* Unlike HashFirstBucket, TableFirstBucket iters through deleted bcks */
|
1389
|
+
for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
|
1390
|
+
bck; bck = Table(NextBucket)(ht->iter) )
|
1391
|
+
if ( dataWrite == NULL || IS_BCK_DELETED(bck) )
|
1392
|
+
WRITE_UL(fp, bck->data);
|
1393
|
+
else /* write cchData followed by the data */
|
1394
|
+
{
|
1395
|
+
WRITE_UL(fp, (*dataWrite)(NULL, (char *)bck->data));
|
1396
|
+
(*dataWrite)(fp, (char *)bck->data);
|
1397
|
+
}
|
1398
|
+
}
|
1399
|
+
|
1400
|
+
static HashTable *HashDoLoad(FILE *fp, char * (*dataRead)(FILE *, int),
|
1401
|
+
HashTable *ht)
|
1402
|
+
{
|
1403
|
+
ulong cchKey;
|
1404
|
+
char szMagicKey[4], *rgchKeys;
|
1405
|
+
HTItem *bck;
|
1406
|
+
|
1407
|
+
fread(szMagicKey, 1, 4, fp);
|
1408
|
+
if ( strncmp(szMagicKey, MAGIC_KEY, 4) )
|
1409
|
+
{
|
1410
|
+
fprintf(stderr, "ERROR: not a hash table (magic key is %4.4s, not %s)\n",
|
1411
|
+
szMagicKey, MAGIC_KEY);
|
1412
|
+
exit(3);
|
1413
|
+
}
|
1414
|
+
Table(Free)(ht->table, ht->cBuckets); /* allocated in AllocateHashTable */
|
1415
|
+
|
1416
|
+
READ_UL(fp, ht->cchKey);
|
1417
|
+
READ_UL(fp, ht->cItems);
|
1418
|
+
READ_UL(fp, ht->cDeletedItems);
|
1419
|
+
ht->cBuckets = Table(Read)(fp, &ht->table); /* next is the table info */
|
1420
|
+
|
1421
|
+
READ_UL(fp, cchKey);
|
1422
|
+
rgchKeys = (char *) HTsmalloc( cchKey ); /* stores all the keys */
|
1423
|
+
fread(rgchKeys, 1, cchKey, fp);
|
1424
|
+
/* We use the table iterator so we don't try to LOAD_AND_RETURN */
|
1425
|
+
for ( bck = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
|
1426
|
+
bck; bck = Table(NextBucket)(ht->iter) )
|
1427
|
+
{
|
1428
|
+
READ_UL(fp, bck->data); /* all we need if dataRead is NULL */
|
1429
|
+
if ( IS_BCK_DELETED(bck) ) /* always 0 if defined(INSERT_ONLY) */
|
1430
|
+
continue; /* this is why we read the data first */
|
1431
|
+
if ( dataRead != NULL ) /* if it's null, we're done */
|
1432
|
+
if ( !ht->fpData ) /* load data into memory */
|
1433
|
+
bck->data = (ulong)dataRead(fp, bck->data);
|
1434
|
+
else /* store location of data on disk */
|
1435
|
+
{
|
1436
|
+
fseek(fp, bck->data, SEEK_CUR); /* bck->data held size of data */
|
1437
|
+
bck->data = ftell(fp) - bck->data - sizeof(unsigned long);
|
1438
|
+
}
|
1439
|
+
|
1440
|
+
if ( ht->cchKey == NULL_TERMINATED ) /* now read the key */
|
1441
|
+
{
|
1442
|
+
bck->key = (ulong) rgchKeys;
|
1443
|
+
rgchKeys = strchr(rgchKeys, '\0') + 1; /* read past the string */
|
1444
|
+
}
|
1445
|
+
else
|
1446
|
+
{
|
1447
|
+
if ( STORES_PTR(ht) ) /* small keys stored directly */
|
1448
|
+
bck->key = (ulong) rgchKeys;
|
1449
|
+
else
|
1450
|
+
memcpy(&bck->key, rgchKeys, ht->cchKey);
|
1451
|
+
rgchKeys += ht->cchKey;
|
1452
|
+
}
|
1453
|
+
}
|
1454
|
+
if ( !STORES_PTR(ht) ) /* keys are stored directly */
|
1455
|
+
HTfree(rgchKeys - cchKey, cchKey); /* we've advanced rgchK to end */
|
1456
|
+
return ht;
|
1457
|
+
}
|
1458
|
+
|
1459
|
+
HashTable *HashLoad(FILE *fp, char * (*dataRead)(FILE *, int))
|
1460
|
+
{
|
1461
|
+
HashTable *ht;
|
1462
|
+
ht = AllocateHashTable(0, 2); /* cchKey set later, fSaveKey should be 2! */
|
1463
|
+
return HashDoLoad(fp, dataRead, ht);
|
1464
|
+
}
|
1465
|
+
|
1466
|
+
HashTable *HashLoadKeys(FILE *fp, char * (*dataRead)(FILE *, int))
|
1467
|
+
{
|
1468
|
+
HashTable *ht;
|
1469
|
+
|
1470
|
+
if ( dataRead == NULL )
|
1471
|
+
return HashLoad(fp, NULL); /* no reason not to load the data here */
|
1472
|
+
ht = AllocateHashTable(0, 2); /* cchKey set later, fSaveKey should be 2! */
|
1473
|
+
ht->fpData = fp; /* tells HashDoLoad() to only load keys */
|
1474
|
+
ht->dataRead = dataRead;
|
1475
|
+
return HashDoLoad(fp, dataRead, ht);
|
1476
|
+
}
|
1477
|
+
|
1478
|
+
/*************************************************************************\
|
1479
|
+
| PrintHashTable() |
|
1480
|
+
| A debugging tool. Prints the entire contents of the hash table, |
|
1481
|
+
| like so: <bin #>: key of the contents. Returns number of bytes |
|
1482
|
+
| allocated. If time is not -1, we print it as the time required |
|
1483
|
+
| for the hash. If iForm is 0, we just print the stats. If it's |
|
1484
|
+
| 1, we print the keys and data too, but the keys are printed as |
|
1485
|
+
| ulongs. If it's 2, we print the keys correctly (as long numbers |
|
1486
|
+
| or as strings). |
|
1487
|
+
\*************************************************************************/
|
1488
|
+
|
1489
|
+
ulong PrintHashTable(HashTable *ht, double time, int iForm)
|
1490
|
+
{
|
1491
|
+
ulong cbData = 0, cbBin = 0, cItems = 0, cOccupied = 0;
|
1492
|
+
HTItem *item;
|
1493
|
+
|
1494
|
+
printf("HASH TABLE.\n");
|
1495
|
+
if ( time > -1.0 )
|
1496
|
+
{
|
1497
|
+
printf("----------\n");
|
1498
|
+
printf("Time: %27.2f\n", time);
|
1499
|
+
}
|
1500
|
+
|
1501
|
+
for ( item = Table(FirstBucket)(ht->iter, ht->table, ht->cBuckets);
|
1502
|
+
item; item = Table(NextBucket)(ht->iter) )
|
1503
|
+
{
|
1504
|
+
cOccupied++; /* this includes deleted buckets */
|
1505
|
+
if ( IS_BCK_DELETED(item) ) /* we don't need you for anything else */
|
1506
|
+
continue;
|
1507
|
+
cItems++; /* this is for a sanity check */
|
1508
|
+
if ( STORES_PTR(ht) )
|
1509
|
+
cbData += ht->cchKey == NULL_TERMINATED ?
|
1510
|
+
WORD_ROUND(strlen((char *)item->key)+1) : ht->cchKey;
|
1511
|
+
else
|
1512
|
+
cbBin -= sizeof(item->key), cbData += sizeof(item->key);
|
1513
|
+
cbBin -= sizeof(item->data), cbData += sizeof(item->data);
|
1514
|
+
if ( iForm != 0 ) /* we want the actual contents */
|
1515
|
+
{
|
1516
|
+
if ( iForm == 2 && ht->cchKey == NULL_TERMINATED )
|
1517
|
+
printf("%s/%lu\n", (char *)item->key, item->data);
|
1518
|
+
else if ( iForm == 2 && STORES_PTR(ht) )
|
1519
|
+
printf("%.*s/%lu\n",
|
1520
|
+
(int)ht->cchKey, (char *)item->key, item->data);
|
1521
|
+
else /* either key actually is a ulong, or iForm == 1 */
|
1522
|
+
printf("%lu/%lu\n", item->key, item->data);
|
1523
|
+
}
|
1524
|
+
}
|
1525
|
+
assert( cItems == ht->cItems ); /* sanity check */
|
1526
|
+
cbBin = Table(Memory)(ht->cBuckets, cOccupied);
|
1527
|
+
|
1528
|
+
printf("----------\n");
|
1529
|
+
printf("%lu buckets (%lu bytes). %lu empty. %lu hold deleted items.\n"
|
1530
|
+
"%lu items (%lu bytes).\n"
|
1531
|
+
"%lu bytes total. %lu bytes (%2.1f%%) of this is ht overhead.\n",
|
1532
|
+
ht->cBuckets, cbBin, ht->cBuckets - cOccupied, cOccupied - ht->cItems,
|
1533
|
+
ht->cItems, cbData,
|
1534
|
+
cbData + cbBin, cbBin, cbBin*100.0/(cbBin+cbData));
|
1535
|
+
|
1536
|
+
return cbData + cbBin;
|
1537
|
+
}
|