whistlepig 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,294 @@
1
+ #include <stdio.h>
2
+ #include <sys/types.h>
3
+ #include <sys/stat.h>
4
+ #include <unistd.h>
5
+ #include "whistlepig.h"
6
+
7
+ #define PATH_BUF_SIZE 4096
8
+
9
+ int wp_index_exists(const char* pathname_base) {
10
+ char buf[PATH_BUF_SIZE];
11
+ snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
12
+ return wp_segment_exists(buf);
13
+ }
14
+
15
+ wp_error* wp_index_create(wp_index** indexptr, const char* pathname_base) {
16
+ char buf[PATH_BUF_SIZE];
17
+
18
+ snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
19
+ if(wp_segment_exists(buf)) RAISE_ERROR("index with base path '%s' already exists", pathname_base);
20
+
21
+ wp_index* index = *indexptr = malloc(sizeof(wp_index));
22
+ index->pathname_base = pathname_base;
23
+ index->num_segments = 1;
24
+ index->sizeof_segments = 1;
25
+ index->open = 1;
26
+ index->segments = malloc(sizeof(wp_segment));
27
+ index->docid_offsets = malloc(sizeof(uint64_t));
28
+
29
+ RELAY_ERROR(wp_segment_create(&index->segments[0], buf));
30
+ index->docid_offsets[0] = 0;
31
+
32
+ return NO_ERROR;
33
+ }
34
+
35
+ RAISING_STATIC(ensure_num_segments(wp_index* index)) {
36
+ if(index->num_segments >= index->sizeof_segments) {
37
+ index->sizeof_segments *= 2;
38
+ index->segments = realloc(index->segments, sizeof(wp_segment) * index->sizeof_segments);
39
+ index->docid_offsets = realloc(index->docid_offsets, sizeof(uint64_t) * index->sizeof_segments);
40
+ if(index->segments == NULL) RAISE_ERROR("oom");
41
+ }
42
+
43
+ return NO_ERROR;
44
+ }
45
+
46
+ wp_error* wp_index_load(wp_index** indexptr, const char* pathname_base) {
47
+ char buf[PATH_BUF_SIZE];
48
+ snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
49
+ if(!wp_segment_exists(buf)) RAISE_ERROR("index with base path '%s' does not exist", pathname_base);
50
+
51
+ wp_index* index = *indexptr = malloc(sizeof(wp_index));
52
+
53
+ index->pathname_base = pathname_base;
54
+ index->num_segments = 0;
55
+ index->sizeof_segments = 1;
56
+ index->open = 1;
57
+ index->segments = malloc(sizeof(wp_segment));
58
+ index->docid_offsets = malloc(sizeof(uint64_t));
59
+
60
+ // load all the segments we can
61
+ while(index->num_segments < WP_MAX_SEGMENTS) {
62
+ snprintf(buf, PATH_BUF_SIZE, "%s%d", pathname_base, index->num_segments);
63
+ if(!wp_segment_exists(buf)) break;
64
+
65
+ RELAY_ERROR(ensure_num_segments(index));
66
+ DEBUG("loading segment %s", buf);
67
+ RELAY_ERROR(wp_segment_load(&index->segments[index->num_segments], buf));
68
+ if(index->num_segments == 0)
69
+ index->docid_offsets[index->num_segments] = 0;
70
+ else {
71
+ // segments return docids 1 through N, so the num_docs in a segment is
72
+ // also the max document id
73
+ postings_region* prevpr = MMAP_OBJ(index->segments[index->num_segments - 1].postings, postings_region);
74
+ index->docid_offsets[index->num_segments] = prevpr->num_docs + index->docid_offsets[index->num_segments - 1];
75
+ }
76
+
77
+ index->num_segments++;
78
+ }
79
+
80
+ return NO_ERROR;
81
+ }
82
+
83
+ // we have two special values at our disposal to mark where we are in
84
+ // the sequence of segments
85
+ #define SEGMENT_UNINITIALIZED WP_MAX_SEGMENTS
86
+ #define SEGMENT_DONE (WP_MAX_SEGMENTS + 1)
87
+
88
+ wp_error* wp_index_setup_query(wp_index* index, wp_query* query) {
89
+ (void)index;
90
+ query->segment_idx = SEGMENT_UNINITIALIZED;
91
+
92
+ return NO_ERROR;
93
+ }
94
+
95
+ // can be called multiple times to resume
96
+ wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_results, uint32_t* num_results, uint64_t* results) {
97
+ *num_results = 0;
98
+ if(index->num_segments == 0) return NO_ERROR;
99
+
100
+ if(query->segment_idx == SEGMENT_UNINITIALIZED) {
101
+ query->segment_idx = index->num_segments - 1;
102
+ DEBUG("setting up segment %u", query->segment_idx);
103
+ RELAY_ERROR(wp_search_init_search_state(query, &index->segments[query->segment_idx]));
104
+ }
105
+
106
+ // at this point, we assume we're initialized and query->segment_idx is the index
107
+ // of the segment we're searching against
108
+ while((*num_results < max_num_results) && (query->segment_idx != SEGMENT_DONE)) {
109
+ uint32_t want_num_results = max_num_results - *num_results;
110
+ uint32_t got_num_results = 0;
111
+ search_result* segment_results = malloc(sizeof(search_result) * want_num_results);
112
+
113
+ DEBUG("searching segment %d", query->segment_idx);
114
+ RELAY_ERROR(wp_search_run_query_on_segment(query, &index->segments[query->segment_idx], want_num_results, &got_num_results, segment_results));
115
+ DEBUG("asked segment %d for %d results, got %d", query->segment_idx, want_num_results, got_num_results);
116
+
117
+ // extract the per-segment docids from the search results and adjust by
118
+ // each segment's docid offset to form global docids
119
+ for(uint32_t i = 0; i < got_num_results; i++) {
120
+ results[*num_results + i] = index->docid_offsets[query->segment_idx] + segment_results[i].doc_id;
121
+ wp_search_result_free(&segment_results[i]);
122
+ }
123
+ free(segment_results);
124
+ *num_results += got_num_results;
125
+
126
+ if(got_num_results < want_num_results) { // this segment is finished; move to the next one
127
+ DEBUG("releasing index %d", query->segment_idx);
128
+ RELAY_ERROR(wp_search_release_search_state(query));
129
+ if(query->segment_idx > 0) {
130
+ query->segment_idx--;
131
+ DEBUG("setting up index %d", query->segment_idx);
132
+ RELAY_ERROR(wp_search_init_search_state(query, &index->segments[query->segment_idx]));
133
+ }
134
+ else query->segment_idx = SEGMENT_DONE;
135
+ }
136
+ }
137
+
138
+ return NO_ERROR;
139
+ }
140
+
141
+ #define RESULT_BUF_SIZE 1024
142
+ // count the results by just running the query until it stops. slow!
143
+ wp_error* wp_index_count_results(wp_index* index, wp_query* query, uint32_t* num_results) {
144
+ uint64_t results[RESULT_BUF_SIZE];
145
+
146
+ *num_results = 0;
147
+ RELAY_ERROR(wp_index_setup_query(index, query));
148
+ while(1) {
149
+ uint32_t this_num_results;
150
+ RELAY_ERROR(wp_index_run_query(index, query, RESULT_BUF_SIZE, &this_num_results, results));
151
+ *num_results += this_num_results;
152
+ if(this_num_results < RESULT_BUF_SIZE) break; // done
153
+ }
154
+
155
+ RELAY_ERROR(wp_index_teardown_query(index, query));
156
+
157
+ return NO_ERROR;
158
+ }
159
+
160
+ wp_error* wp_index_teardown_query(wp_index* index, wp_query* query) {
161
+ (void)index;
162
+ if((query->segment_idx != SEGMENT_UNINITIALIZED) && (query->segment_idx != SEGMENT_DONE)) {
163
+ RELAY_ERROR(wp_search_release_search_state(query));
164
+ }
165
+ query->segment_idx = SEGMENT_UNINITIALIZED;
166
+
167
+ return NO_ERROR;
168
+ }
169
+
170
+ wp_error* wp_index_add_entry(wp_index* index, wp_entry* entry, uint64_t* doc_id) {
171
+ int success;
172
+ wp_segment* seg = &index->segments[index->num_segments - 1];
173
+
174
+ // first, ensure we have enough space in the current segment
175
+ uint32_t postings_bytes;
176
+ RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
177
+ RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
178
+
179
+ // if not, we need to open a new one
180
+ if(!success) {
181
+ DEBUG("segment %d is full, loading a new one", index->num_segments - 1);
182
+ char buf[PATH_BUF_SIZE];
183
+ snprintf(buf, PATH_BUF_SIZE, "%s%d", index->pathname_base, index->num_segments);
184
+ RELAY_ERROR(ensure_num_segments(index));
185
+ RELAY_ERROR(wp_segment_create(&index->segments[index->num_segments], buf));
186
+ index->num_segments++;
187
+
188
+ // set the docid_offset
189
+ postings_region* prevpr = MMAP_OBJ(index->segments[index->num_segments - 2].postings, postings_region);
190
+ index->docid_offsets[index->num_segments - 1] = prevpr->num_docs + index->docid_offsets[index->num_segments - 2];
191
+
192
+ seg = &index->segments[index->num_segments - 1];
193
+ DEBUG("loaded new segment %d at %p", index->num_segments - 1, &index->segments[index->num_segments - 1]);
194
+
195
+ RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
196
+ RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
197
+ if(!success) RAISE_ERROR("can't fit new entry into fresh segment. that's crazy");
198
+ }
199
+
200
+ docid_t seg_doc_id;
201
+ RELAY_ERROR(wp_segment_grab_docid(seg, &seg_doc_id));
202
+ RELAY_ERROR(wp_entry_write_to_segment(entry, seg, seg_doc_id));
203
+ *doc_id = seg_doc_id + index->docid_offsets[index->num_segments - 1];
204
+
205
+ return NO_ERROR;
206
+ }
207
+
208
+ wp_error* wp_index_unload(wp_index* index) {
209
+ for(uint16_t i = 0; i < index->num_segments; i++) RELAY_ERROR(wp_segment_unload(&index->segments[i]));
210
+ index->open = 0;
211
+
212
+ return NO_ERROR;
213
+ }
214
+
215
+ wp_error* wp_index_free(wp_index* index) {
216
+ if(index->open) RELAY_ERROR(wp_index_unload(index));
217
+ free(index->segments);
218
+ free(index->docid_offsets);
219
+ free(index);
220
+
221
+ return NO_ERROR;
222
+ }
223
+
224
+ wp_error* wp_index_dumpinfo(wp_index* index, FILE* stream) {
225
+ fprintf(stream, "index has %d segments\n", index->num_segments);
226
+ for(int i = 0; i < index->num_segments; i++) {
227
+ fprintf(stream, "\nsegment %d:\n", i);
228
+ RELAY_ERROR(wp_segment_dumpinfo(&index->segments[i], stream));
229
+ }
230
+
231
+ return NO_ERROR;
232
+ }
233
+
234
+ wp_error* wp_index_delete(const char* pathname_base) {
235
+ char buf[PATH_BUF_SIZE];
236
+
237
+ int i = 0;
238
+ while(1) {
239
+ snprintf(buf, PATH_BUF_SIZE, "%s%d", pathname_base, i);
240
+ if(wp_segment_exists(buf)) {
241
+ DEBUG("deleting segment %s", buf);
242
+ RELAY_ERROR(wp_segment_delete(buf));
243
+ i++;
244
+ }
245
+ else break;
246
+ }
247
+
248
+ return NO_ERROR;
249
+ }
250
+
251
+ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id) {
252
+ int found = 0;
253
+
254
+ for(uint32_t i = index->num_segments; i > 0; i--) {
255
+ if(doc_id > index->docid_offsets[i - 1]) {
256
+ DEBUG("found doc %llu in segment %u", doc_id, i - 1);
257
+ RELAY_ERROR(wp_segment_add_label(&index->segments[i - 1], label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
258
+ found = 1;
259
+ break;
260
+ }
261
+ else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
262
+ }
263
+
264
+ if(!found) RAISE_ERROR("couldn't find doc id %llu", doc_id);
265
+
266
+ return NO_ERROR;
267
+ }
268
+
269
+ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc_id) {
270
+ int found = 0;
271
+
272
+ for(uint32_t i = index->num_segments; i > 0; i--) {
273
+ if(doc_id > index->docid_offsets[i - 1]) {
274
+ DEBUG("found doc %llu in segment %u", doc_id, i - 1);
275
+ RELAY_ERROR(wp_segment_remove_label(&index->segments[i - 1], label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
276
+ found = 1;
277
+ break;
278
+ }
279
+ else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
280
+ }
281
+
282
+ if(!found) RAISE_ERROR("couldn't find doc id %llu", doc_id);
283
+
284
+ return NO_ERROR;
285
+ }
286
+
287
+ uint64_t wp_index_num_docs(wp_index* index) {
288
+ uint64_t ret = 0;
289
+
290
+ // TODO check for overflow or some shit
291
+ for(uint32_t i = index->num_segments; i > 0; i--) ret += wp_segment_num_docs(&index->segments[i - 1]);
292
+
293
+ return ret;
294
+ }
@@ -0,0 +1,88 @@
1
+ #ifndef WP_INDEX_H_
2
+ #define WP_INDEX_H_
3
+
4
+ // whistlepig index
5
+ // (c) 2011 William Morgan. See COPYING for license terms.
6
+ //
7
+ // the main public interaction point with whistlepig, in addition to the
8
+ // supporting entry and query objects. it holds a collection of segments and
9
+ // essentially relays commands to the appropriate ones, creating new segments
10
+ // as needed.
11
+
12
+ #include "defaults.h"
13
+ #include "segment.h"
14
+ #include "error.h"
15
+ #include "entry.h"
16
+
17
+ #define WP_MAX_SEGMENTS 65534 // max value of wp_search_query->segment_idx - 2 because we need two special numbers
18
+
19
+ typedef struct wp_index {
20
+ const char* pathname_base;
21
+ uint16_t num_segments;
22
+ uint16_t sizeof_segments;
23
+ uint64_t* docid_offsets;
24
+ struct wp_segment* segments;
25
+ uint8_t open;
26
+ } wp_index;
27
+
28
+ // API methods
29
+
30
+ // public: returns non-zero if an index with base pathname pathname_base
31
+ // exists, zero otherwise
32
+ int wp_index_exists(const char* pathname_base);
33
+
34
+ // public: creates an index, raising an exception if it already exists
35
+ wp_error* wp_index_create(wp_index** index, const char* pathname_base) RAISES_ERROR;
36
+
37
+ // public: loads an existing index, raising an exception if it doesn't exist
38
+ wp_error* wp_index_load(wp_index** index, const char* pathname_base) RAISES_ERROR;
39
+
40
+ // public: releases an index
41
+ wp_error* wp_index_unload(wp_index* index) RAISES_ERROR;
42
+
43
+ // public: frees all memory. can be called after unload, or not. don't call
44
+ // anything on the index after calling this, though...
45
+ wp_error* wp_index_free(wp_index* index) RAISES_ERROR;
46
+
47
+ // public: returns the number of documents in the index.
48
+ uint64_t wp_index_num_docs(wp_index* index);
49
+
50
+ // public: initializes a query for use on the index. must be called before
51
+ // run_query
52
+ wp_error* wp_index_setup_query(wp_index* index, wp_query* query) RAISES_ERROR;
53
+
54
+ // public: tears down a query from use on the index. must be called after
55
+ // run_query, or memory will leak.
56
+ wp_error* wp_index_teardown_query(wp_index* index, wp_query* query) RAISES_ERROR;
57
+
58
+ // public: runs a query on an index. must be called in between setup_query and
59
+ // teardown_query. can be called multiple times and the query will be resumed.
60
+ // when the number of documents returned is < num_results, then you're at the
61
+ // end!
62
+ wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_results, uint32_t* num_results, uint64_t* results) RAISES_ERROR;
63
+
64
+ // public: returns the number of results that match a query. note that this is
65
+ // roughly as expensive as just running the query competely, modulo some memory
66
+ // allocations here and there...
67
+ wp_error* wp_index_count_results(wp_index* index, wp_query* query, uint32_t* num_results) RAISES_ERROR;
68
+
69
+ // public: adds an entry to the index. sets doc_id to the new docid.
70
+ wp_error* wp_index_add_entry(wp_index* index, wp_entry* entry, uint64_t* doc_id) RAISES_ERROR;
71
+
72
+ // public: adds an label to a doc_id. throws an exception if the document
73
+ // doesn't exist. does nothing if the label has already been added to the
74
+ // document.
75
+ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id);
76
+
77
+ // public: removes a label from a doc_id. throws an exception if the document
78
+ // doesn't exist. does nothing if the label has already been added to the
79
+ // document.
80
+ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc_id);
81
+
82
+ // dumps some index to the stream.
83
+ wp_error* wp_index_dumpinfo(wp_index* index, FILE* stream) RAISES_ERROR;
84
+
85
+ // public: deletes a document from disk.
86
+ wp_error* wp_index_delete(const char* path) RAISES_ERROR;
87
+
88
+ #endif
@@ -0,0 +1,316 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /*
27
+ An example:
28
+
29
+ #include "khash.h"
30
+ KHASH_MAP_INIT_INT(32, char)
31
+ int main() {
32
+ int ret, is_missing;
33
+ khiter_t k;
34
+ khash_t(32) *h = kh_init(32);
35
+ k = kh_put(32, h, 5, &ret);
36
+ if (!ret) kh_del(32, h, k);
37
+ kh_value(h, k) = 10;
38
+ k = kh_get(32, h, 10);
39
+ is_missing = (k == kh_end(h));
40
+ k = kh_get(32, h, 5);
41
+ kh_del(32, h, k);
42
+ for (k = kh_begin(h); k != kh_end(h); ++k)
43
+ if (kh_exist(h, k)) kh_value(h, k) = 1;
44
+ kh_destroy(32, h);
45
+ return 0;
46
+ }
47
+ */
48
+
49
+ /*
50
+ 2008-09-19 (0.2.3):
51
+
52
+ * Corrected the example
53
+ * Improved interfaces
54
+
55
+ 2008-09-11 (0.2.2):
56
+
57
+ * Improved speed a little in kh_put()
58
+
59
+ 2008-09-10 (0.2.1):
60
+
61
+ * Added kh_clear()
62
+ * Fixed a compiling error
63
+
64
+ 2008-09-02 (0.2.0):
65
+
66
+ * Changed to token concatenation which increases flexibility.
67
+
68
+ 2008-08-31 (0.1.2):
69
+
70
+ * Fixed a bug in kh_get(), which has not been tested previously.
71
+
72
+ 2008-08-31 (0.1.1):
73
+
74
+ * Added destructor
75
+ */
76
+
77
+
78
+ #ifndef __AC_KHASH_H
79
+ #define __AC_KHASH_H
80
+
81
+ #define AC_VERSION_KHASH_H "0.2.2"
82
+
83
+ #include <stdint.h>
84
+ #include <stdlib.h>
85
+ #include <string.h>
86
+
87
+ typedef uint32_t khint_t;
88
+ typedef khint_t khiter_t;
89
+
90
+ #define __ac_HASH_PRIME_SIZE 32
91
+ static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
92
+ {
93
+ 0ul, 3ul, 11ul, 23ul, 53ul,
94
+ 97ul, 193ul, 389ul, 769ul, 1543ul,
95
+ 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
96
+ 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
97
+ 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
98
+ 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
99
+ 3221225473ul, 4294967291ul
100
+ };
101
+
102
+ #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
103
+ #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
104
+ #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
105
+ #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
106
+ #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
107
+ #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
108
+ #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
109
+
110
+ static const double __ac_HASH_UPPER = 0.77;
111
+
112
+ #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
113
+ typedef struct { \
114
+ khint_t n_buckets, size, n_occupied, upper_bound; \
115
+ uint32_t *flags; \
116
+ khkey_t *keys; \
117
+ khval_t *vals; \
118
+ } kh_##name##_t; \
119
+ static inline kh_##name##_t *kh_init_##name() { \
120
+ return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
121
+ } \
122
+ static inline void kh_destroy_##name(kh_##name##_t *h) \
123
+ { \
124
+ if (h) { \
125
+ free(h->keys); free(h->flags); \
126
+ free(h->vals); \
127
+ free(h); \
128
+ } \
129
+ } \
130
+ static inline void kh_clear_##name(kh_##name##_t *h) \
131
+ { \
132
+ if (h && h->flags) { \
133
+ memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t)); \
134
+ h->size = h->n_occupied = 0; \
135
+ } \
136
+ } \
137
+ static inline khint_t kh_get_##name(kh_##name##_t *h, khkey_t key) \
138
+ { \
139
+ if (h->n_buckets) { \
140
+ khint_t inc, k, i, last; \
141
+ k = __hash_func(key); i = k % h->n_buckets; \
142
+ inc = 1 + k % (h->n_buckets - 1); last = i; \
143
+ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
144
+ if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
145
+ else i += inc; \
146
+ if (i == last) return h->n_buckets; \
147
+ } \
148
+ return __ac_iseither(h->flags, i)? h->n_buckets : i; \
149
+ } else return 0; \
150
+ } \
151
+ static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
152
+ { \
153
+ uint32_t *new_flags = 0; \
154
+ khint_t j = 1; \
155
+ { \
156
+ khint_t t = __ac_HASH_PRIME_SIZE - 1; \
157
+ while (__ac_prime_list[t] > new_n_buckets) --t; \
158
+ new_n_buckets = __ac_prime_list[t+1]; \
159
+ if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
160
+ else { \
161
+ new_flags = (uint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \
162
+ memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \
163
+ if (h->n_buckets < new_n_buckets) { \
164
+ h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
165
+ if (kh_is_map) \
166
+ h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
167
+ } \
168
+ } \
169
+ } \
170
+ if (j) { \
171
+ for (j = 0; j != h->n_buckets; ++j) { \
172
+ if (__ac_iseither(h->flags, j) == 0) { \
173
+ khkey_t key = h->keys[j]; \
174
+ khval_t val; \
175
+ if (kh_is_map) val = h->vals[j]; \
176
+ __ac_set_isdel_true(h->flags, j); \
177
+ while (1) { \
178
+ khint_t inc, k, i; \
179
+ k = __hash_func(key); \
180
+ i = k % new_n_buckets; \
181
+ inc = 1 + k % (new_n_buckets - 1); \
182
+ while (!__ac_isempty(new_flags, i)) { \
183
+ if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
184
+ else i += inc; \
185
+ } \
186
+ __ac_set_isempty_false(new_flags, i); \
187
+ if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
188
+ { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
189
+ if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
190
+ __ac_set_isdel_true(h->flags, i); \
191
+ } else { \
192
+ h->keys[i] = key; \
193
+ if (kh_is_map) h->vals[i] = val; \
194
+ break; \
195
+ } \
196
+ } \
197
+ } \
198
+ } \
199
+ if (h->n_buckets > new_n_buckets) { \
200
+ h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
201
+ if (kh_is_map) \
202
+ h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
203
+ } \
204
+ free(h->flags); \
205
+ h->flags = new_flags; \
206
+ h->n_buckets = new_n_buckets; \
207
+ h->n_occupied = h->size; \
208
+ h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
209
+ } \
210
+ } \
211
+ static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
212
+ { \
213
+ khint_t x; \
214
+ if (h->n_occupied >= h->upper_bound) { \
215
+ if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
216
+ else kh_resize_##name(h, h->n_buckets + 1); \
217
+ } \
218
+ { \
219
+ khint_t inc, k, i, site, last; \
220
+ x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
221
+ if (__ac_isempty(h->flags, i)) x = i; \
222
+ else { \
223
+ inc = 1 + k % (h->n_buckets - 1); last = i; \
224
+ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
225
+ if (__ac_isdel(h->flags, i)) site = i; \
226
+ if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
227
+ else i += inc; \
228
+ if (i == last) { x = site; break; } \
229
+ } \
230
+ if (x == h->n_buckets) { \
231
+ if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
232
+ else x = i; \
233
+ } \
234
+ } \
235
+ } \
236
+ if (__ac_isempty(h->flags, x)) { \
237
+ h->keys[x] = key; \
238
+ __ac_set_isboth_false(h->flags, x); \
239
+ ++h->size; ++h->n_occupied; \
240
+ *ret = 1; \
241
+ } else if (__ac_isdel(h->flags, x)) { \
242
+ h->keys[x] = key; \
243
+ __ac_set_isboth_false(h->flags, x); \
244
+ ++h->size; \
245
+ *ret = 2; \
246
+ } else *ret = 0; \
247
+ return x; \
248
+ } \
249
+ static inline void kh_del_##name(kh_##name##_t *h, khint_t x) \
250
+ { \
251
+ if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
252
+ __ac_set_isdel_true(h->flags, x); \
253
+ --h->size; \
254
+ } \
255
+ }
256
+
257
+ /* --- BEGIN OF HASH FUNCTIONS --- */
258
+
259
+ #define kh_int_hash_func(key) (uint32_t)(key)
260
+ #define kh_int_hash_equal(a, b) (a == b)
261
+ #define kh_int64_hash_func(key) (uint32_t)((key)>>33^(key)^(key)<<11)
262
+ #define kh_int64_hash_equal(a, b) (a == b)
263
+ static inline khint_t __ac_X31_hash_string(const char *s)
264
+ {
265
+ khint_t h = *s;
266
+ if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
267
+ return h;
268
+ }
269
+ #define kh_str_hash_func(key) __ac_X31_hash_string(key)
270
+ #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
271
+
272
+ /* --- END OF HASH FUNCTIONS --- */
273
+
274
+ /* Other necessary macros... */
275
+
276
+ #define khash_t(name) kh_##name##_t
277
+
278
+ #define kh_init(name) kh_init_##name()
279
+ #define kh_destroy(name, h) kh_destroy_##name(h)
280
+ #define kh_clear(name, h) kh_clear_##name(h)
281
+ #define kh_resize(name, h, s) kh_resize_##name(h, s)
282
+ #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
283
+ #define kh_get(name, h, k) kh_get_##name(h, k)
284
+ #define kh_del(name, h, k) kh_del_##name(h, k)
285
+
286
+ #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
287
+ #define kh_key(h, x) ((h)->keys[x])
288
+ #define kh_val(h, x) ((h)->vals[x])
289
+ #define kh_value(h, x) ((h)->vals[x])
290
+ #define kh_begin(h) (khint_t)(0)
291
+ #define kh_end(h) ((h)->n_buckets)
292
+ #define kh_size(h) ((h)->size)
293
+ #define kh_n_buckets(h) ((h)->n_buckets)
294
+
295
+ /* More conenient interfaces */
296
+
297
+ #define KHASH_SET_INIT_INT(name) \
298
+ KHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
299
+
300
+ #define KHASH_MAP_INIT_INT(name, khval_t) \
301
+ KHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
302
+
303
+ #define KHASH_SET_INIT_INT64(name) \
304
+ KHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
305
+
306
+ #define KHASH_MAP_INIT_INT64(name, khval_t) \
307
+ KHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
308
+
309
+ typedef const char *kh_cstr_t;
310
+ #define KHASH_SET_INIT_STR(name) \
311
+ KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
312
+
313
+ #define KHASH_MAP_INIT_STR(name, khval_t) \
314
+ KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
315
+
316
+ #endif /* __AC_KHASH_H */