whistlepig 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,294 @@
1
+ #include <stdio.h>
2
+ #include <sys/types.h>
3
+ #include <sys/stat.h>
4
+ #include <unistd.h>
5
+ #include "whistlepig.h"
6
+
7
+ #define PATH_BUF_SIZE 4096
8
+
9
+ int wp_index_exists(const char* pathname_base) {
10
+ char buf[PATH_BUF_SIZE];
11
+ snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
12
+ return wp_segment_exists(buf);
13
+ }
14
+
15
+ wp_error* wp_index_create(wp_index** indexptr, const char* pathname_base) {
16
+ char buf[PATH_BUF_SIZE];
17
+
18
+ snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
19
+ if(wp_segment_exists(buf)) RAISE_ERROR("index with base path '%s' already exists", pathname_base);
20
+
21
+ wp_index* index = *indexptr = malloc(sizeof(wp_index));
22
+ index->pathname_base = pathname_base;
23
+ index->num_segments = 1;
24
+ index->sizeof_segments = 1;
25
+ index->open = 1;
26
+ index->segments = malloc(sizeof(wp_segment));
27
+ index->docid_offsets = malloc(sizeof(uint64_t));
28
+
29
+ RELAY_ERROR(wp_segment_create(&index->segments[0], buf));
30
+ index->docid_offsets[0] = 0;
31
+
32
+ return NO_ERROR;
33
+ }
34
+
35
+ RAISING_STATIC(ensure_num_segments(wp_index* index)) {
36
+ if(index->num_segments >= index->sizeof_segments) {
37
+ index->sizeof_segments *= 2;
38
+ index->segments = realloc(index->segments, sizeof(wp_segment) * index->sizeof_segments);
39
+ index->docid_offsets = realloc(index->docid_offsets, sizeof(uint64_t) * index->sizeof_segments);
40
+ if(index->segments == NULL) RAISE_ERROR("oom");
41
+ }
42
+
43
+ return NO_ERROR;
44
+ }
45
+
46
+ wp_error* wp_index_load(wp_index** indexptr, const char* pathname_base) {
47
+ char buf[PATH_BUF_SIZE];
48
+ snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
49
+ if(!wp_segment_exists(buf)) RAISE_ERROR("index with base path '%s' does not exist", pathname_base);
50
+
51
+ wp_index* index = *indexptr = malloc(sizeof(wp_index));
52
+
53
+ index->pathname_base = pathname_base;
54
+ index->num_segments = 0;
55
+ index->sizeof_segments = 1;
56
+ index->open = 1;
57
+ index->segments = malloc(sizeof(wp_segment));
58
+ index->docid_offsets = malloc(sizeof(uint64_t));
59
+
60
+ // load all the segments we can
61
+ while(index->num_segments < WP_MAX_SEGMENTS) {
62
+ snprintf(buf, PATH_BUF_SIZE, "%s%d", pathname_base, index->num_segments);
63
+ if(!wp_segment_exists(buf)) break;
64
+
65
+ RELAY_ERROR(ensure_num_segments(index));
66
+ DEBUG("loading segment %s", buf);
67
+ RELAY_ERROR(wp_segment_load(&index->segments[index->num_segments], buf));
68
+ if(index->num_segments == 0)
69
+ index->docid_offsets[index->num_segments] = 0;
70
+ else {
71
+ // segments return docids 1 through N, so the num_docs in a segment is
72
+ // also the max document id
73
+ postings_region* prevpr = MMAP_OBJ(index->segments[index->num_segments - 1].postings, postings_region);
74
+ index->docid_offsets[index->num_segments] = prevpr->num_docs + index->docid_offsets[index->num_segments - 1];
75
+ }
76
+
77
+ index->num_segments++;
78
+ }
79
+
80
+ return NO_ERROR;
81
+ }
82
+
83
+ // we have two special values at our disposal to mark where we are in
84
+ // the sequence of segments
85
+ #define SEGMENT_UNINITIALIZED WP_MAX_SEGMENTS
86
+ #define SEGMENT_DONE (WP_MAX_SEGMENTS + 1)
87
+
88
+ wp_error* wp_index_setup_query(wp_index* index, wp_query* query) {
89
+ (void)index;
90
+ query->segment_idx = SEGMENT_UNINITIALIZED;
91
+
92
+ return NO_ERROR;
93
+ }
94
+
95
+ // can be called multiple times to resume
96
+ wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_results, uint32_t* num_results, uint64_t* results) {
97
+ *num_results = 0;
98
+ if(index->num_segments == 0) return NO_ERROR;
99
+
100
+ if(query->segment_idx == SEGMENT_UNINITIALIZED) {
101
+ query->segment_idx = index->num_segments - 1;
102
+ DEBUG("setting up segment %u", query->segment_idx);
103
+ RELAY_ERROR(wp_search_init_search_state(query, &index->segments[query->segment_idx]));
104
+ }
105
+
106
+ // at this point, we assume we're initialized and query->segment_idx is the index
107
+ // of the segment we're searching against
108
+ while((*num_results < max_num_results) && (query->segment_idx != SEGMENT_DONE)) {
109
+ uint32_t want_num_results = max_num_results - *num_results;
110
+ uint32_t got_num_results = 0;
111
+ search_result* segment_results = malloc(sizeof(search_result) * want_num_results);
112
+
113
+ DEBUG("searching segment %d", query->segment_idx);
114
+ RELAY_ERROR(wp_search_run_query_on_segment(query, &index->segments[query->segment_idx], want_num_results, &got_num_results, segment_results));
115
+ DEBUG("asked segment %d for %d results, got %d", query->segment_idx, want_num_results, got_num_results);
116
+
117
+ // extract the per-segment docids from the search results and adjust by
118
+ // each segment's docid offset to form global docids
119
+ for(uint32_t i = 0; i < got_num_results; i++) {
120
+ results[*num_results + i] = index->docid_offsets[query->segment_idx] + segment_results[i].doc_id;
121
+ wp_search_result_free(&segment_results[i]);
122
+ }
123
+ free(segment_results);
124
+ *num_results += got_num_results;
125
+
126
+ if(got_num_results < want_num_results) { // this segment is finished; move to the next one
127
+ DEBUG("releasing index %d", query->segment_idx);
128
+ RELAY_ERROR(wp_search_release_search_state(query));
129
+ if(query->segment_idx > 0) {
130
+ query->segment_idx--;
131
+ DEBUG("setting up index %d", query->segment_idx);
132
+ RELAY_ERROR(wp_search_init_search_state(query, &index->segments[query->segment_idx]));
133
+ }
134
+ else query->segment_idx = SEGMENT_DONE;
135
+ }
136
+ }
137
+
138
+ return NO_ERROR;
139
+ }
140
+
141
+ #define RESULT_BUF_SIZE 1024
142
+ // count the results by just running the query until it stops. slow!
143
+ wp_error* wp_index_count_results(wp_index* index, wp_query* query, uint32_t* num_results) {
144
+ uint64_t results[RESULT_BUF_SIZE];
145
+
146
+ *num_results = 0;
147
+ RELAY_ERROR(wp_index_setup_query(index, query));
148
+ while(1) {
149
+ uint32_t this_num_results;
150
+ RELAY_ERROR(wp_index_run_query(index, query, RESULT_BUF_SIZE, &this_num_results, results));
151
+ *num_results += this_num_results;
152
+ if(this_num_results < RESULT_BUF_SIZE) break; // done
153
+ }
154
+
155
+ RELAY_ERROR(wp_index_teardown_query(index, query));
156
+
157
+ return NO_ERROR;
158
+ }
159
+
160
+ wp_error* wp_index_teardown_query(wp_index* index, wp_query* query) {
161
+ (void)index;
162
+ if((query->segment_idx != SEGMENT_UNINITIALIZED) && (query->segment_idx != SEGMENT_DONE)) {
163
+ RELAY_ERROR(wp_search_release_search_state(query));
164
+ }
165
+ query->segment_idx = SEGMENT_UNINITIALIZED;
166
+
167
+ return NO_ERROR;
168
+ }
169
+
170
+ wp_error* wp_index_add_entry(wp_index* index, wp_entry* entry, uint64_t* doc_id) {
171
+ int success;
172
+ wp_segment* seg = &index->segments[index->num_segments - 1];
173
+
174
+ // first, ensure we have enough space in the current segment
175
+ uint32_t postings_bytes;
176
+ RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
177
+ RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
178
+
179
+ // if not, we need to open a new one
180
+ if(!success) {
181
+ DEBUG("segment %d is full, loading a new one", index->num_segments - 1);
182
+ char buf[PATH_BUF_SIZE];
183
+ snprintf(buf, PATH_BUF_SIZE, "%s%d", index->pathname_base, index->num_segments);
184
+ RELAY_ERROR(ensure_num_segments(index));
185
+ RELAY_ERROR(wp_segment_create(&index->segments[index->num_segments], buf));
186
+ index->num_segments++;
187
+
188
+ // set the docid_offset
189
+ postings_region* prevpr = MMAP_OBJ(index->segments[index->num_segments - 2].postings, postings_region);
190
+ index->docid_offsets[index->num_segments - 1] = prevpr->num_docs + index->docid_offsets[index->num_segments - 2];
191
+
192
+ seg = &index->segments[index->num_segments - 1];
193
+ DEBUG("loaded new segment %d at %p", index->num_segments - 1, &index->segments[index->num_segments - 1]);
194
+
195
+ RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
196
+ RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
197
+ if(!success) RAISE_ERROR("can't fit new entry into fresh segment. that's crazy");
198
+ }
199
+
200
+ docid_t seg_doc_id;
201
+ RELAY_ERROR(wp_segment_grab_docid(seg, &seg_doc_id));
202
+ RELAY_ERROR(wp_entry_write_to_segment(entry, seg, seg_doc_id));
203
+ *doc_id = seg_doc_id + index->docid_offsets[index->num_segments - 1];
204
+
205
+ return NO_ERROR;
206
+ }
207
+
208
+ wp_error* wp_index_unload(wp_index* index) {
209
+ for(uint16_t i = 0; i < index->num_segments; i++) RELAY_ERROR(wp_segment_unload(&index->segments[i]));
210
+ index->open = 0;
211
+
212
+ return NO_ERROR;
213
+ }
214
+
215
+ wp_error* wp_index_free(wp_index* index) {
216
+ if(index->open) RELAY_ERROR(wp_index_unload(index));
217
+ free(index->segments);
218
+ free(index->docid_offsets);
219
+ free(index);
220
+
221
+ return NO_ERROR;
222
+ }
223
+
224
+ wp_error* wp_index_dumpinfo(wp_index* index, FILE* stream) {
225
+ fprintf(stream, "index has %d segments\n", index->num_segments);
226
+ for(int i = 0; i < index->num_segments; i++) {
227
+ fprintf(stream, "\nsegment %d:\n", i);
228
+ RELAY_ERROR(wp_segment_dumpinfo(&index->segments[i], stream));
229
+ }
230
+
231
+ return NO_ERROR;
232
+ }
233
+
234
+ wp_error* wp_index_delete(const char* pathname_base) {
235
+ char buf[PATH_BUF_SIZE];
236
+
237
+ int i = 0;
238
+ while(1) {
239
+ snprintf(buf, PATH_BUF_SIZE, "%s%d", pathname_base, i);
240
+ if(wp_segment_exists(buf)) {
241
+ DEBUG("deleting segment %s", buf);
242
+ RELAY_ERROR(wp_segment_delete(buf));
243
+ i++;
244
+ }
245
+ else break;
246
+ }
247
+
248
+ return NO_ERROR;
249
+ }
250
+
251
+ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id) {
252
+ int found = 0;
253
+
254
+ for(uint32_t i = index->num_segments; i > 0; i--) {
255
+ if(doc_id > index->docid_offsets[i - 1]) {
256
+ DEBUG("found doc %llu in segment %u", doc_id, i - 1);
257
+ RELAY_ERROR(wp_segment_add_label(&index->segments[i - 1], label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
258
+ found = 1;
259
+ break;
260
+ }
261
+ else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
262
+ }
263
+
264
+ if(!found) RAISE_ERROR("couldn't find doc id %llu", doc_id);
265
+
266
+ return NO_ERROR;
267
+ }
268
+
269
+ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc_id) {
270
+ int found = 0;
271
+
272
+ for(uint32_t i = index->num_segments; i > 0; i--) {
273
+ if(doc_id > index->docid_offsets[i - 1]) {
274
+ DEBUG("found doc %llu in segment %u", doc_id, i - 1);
275
+ RELAY_ERROR(wp_segment_remove_label(&index->segments[i - 1], label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
276
+ found = 1;
277
+ break;
278
+ }
279
+ else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
280
+ }
281
+
282
+ if(!found) RAISE_ERROR("couldn't find doc id %llu", doc_id);
283
+
284
+ return NO_ERROR;
285
+ }
286
+
287
+ uint64_t wp_index_num_docs(wp_index* index) {
288
+ uint64_t ret = 0;
289
+
290
+ // TODO check for overflow or some shit
291
+ for(uint32_t i = index->num_segments; i > 0; i--) ret += wp_segment_num_docs(&index->segments[i - 1]);
292
+
293
+ return ret;
294
+ }
@@ -0,0 +1,88 @@
1
+ #ifndef WP_INDEX_H_
2
+ #define WP_INDEX_H_
3
+
4
+ // whistlepig index
5
+ // (c) 2011 William Morgan. See COPYING for license terms.
6
+ //
7
+ // the main public interaction point with whistlepig, in addition to the
8
+ // supporting entry and query objects. it holds a collection of segments and
9
+ // essentially relays commands to the appropriate ones, creating new segments
10
+ // as needed.
11
+
12
+ #include "defaults.h"
13
+ #include "segment.h"
14
+ #include "error.h"
15
+ #include "entry.h"
16
+
17
+ #define WP_MAX_SEGMENTS 65534 // max value of wp_search_query->segment_idx - 2 because we need two special numbers
18
+
19
+ typedef struct wp_index {
20
+ const char* pathname_base;
21
+ uint16_t num_segments;
22
+ uint16_t sizeof_segments;
23
+ uint64_t* docid_offsets;
24
+ struct wp_segment* segments;
25
+ uint8_t open;
26
+ } wp_index;
27
+
28
+ // API methods
29
+
30
+ // public: returns non-zero if an index with base pathname pathname_base
31
+ // exists, zero otherwise
32
+ int wp_index_exists(const char* pathname_base);
33
+
34
+ // public: creates an index, raising an exception if it already exists
35
+ wp_error* wp_index_create(wp_index** index, const char* pathname_base) RAISES_ERROR;
36
+
37
+ // public: loads an existing index, raising an exception if it doesn't exist
38
+ wp_error* wp_index_load(wp_index** index, const char* pathname_base) RAISES_ERROR;
39
+
40
+ // public: releases an index
41
+ wp_error* wp_index_unload(wp_index* index) RAISES_ERROR;
42
+
43
+ // public: frees all memory. can be called after unload, or not. don't call
44
+ // anything on the index after calling this, though...
45
+ wp_error* wp_index_free(wp_index* index) RAISES_ERROR;
46
+
47
+ // public: returns the number of documents in the index.
48
+ uint64_t wp_index_num_docs(wp_index* index);
49
+
50
+ // public: initializes a query for use on the index. must be called before
51
+ // run_query
52
+ wp_error* wp_index_setup_query(wp_index* index, wp_query* query) RAISES_ERROR;
53
+
54
+ // public: tears down a query from use on the index. must be called after
55
+ // run_query, or memory will leak.
56
+ wp_error* wp_index_teardown_query(wp_index* index, wp_query* query) RAISES_ERROR;
57
+
58
+ // public: runs a query on an index. must be called in between setup_query and
59
+ // teardown_query. can be called multiple times and the query will be resumed.
60
+ // when the number of documents returned is < num_results, then you're at the
61
+ // end!
62
+ wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_results, uint32_t* num_results, uint64_t* results) RAISES_ERROR;
63
+
64
+ // public: returns the number of results that match a query. note that this is
65
+ // roughly as expensive as just running the query competely, modulo some memory
66
+ // allocations here and there...
67
+ wp_error* wp_index_count_results(wp_index* index, wp_query* query, uint32_t* num_results) RAISES_ERROR;
68
+
69
+ // public: adds an entry to the index. sets doc_id to the new docid.
70
+ wp_error* wp_index_add_entry(wp_index* index, wp_entry* entry, uint64_t* doc_id) RAISES_ERROR;
71
+
72
+ // public: adds an label to a doc_id. throws an exception if the document
73
+ // doesn't exist. does nothing if the label has already been added to the
74
+ // document.
75
+ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id);
76
+
77
+ // public: removes a label from a doc_id. throws an exception if the document
78
+ // doesn't exist. does nothing if the label has already been added to the
79
+ // document.
80
+ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc_id);
81
+
82
+ // dumps some index to the stream.
83
+ wp_error* wp_index_dumpinfo(wp_index* index, FILE* stream) RAISES_ERROR;
84
+
85
+ // public: deletes a document from disk.
86
+ wp_error* wp_index_delete(const char* path) RAISES_ERROR;
87
+
88
+ #endif
@@ -0,0 +1,316 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /*
27
+ An example:
28
+
29
+ #include "khash.h"
30
+ KHASH_MAP_INIT_INT(32, char)
31
+ int main() {
32
+ int ret, is_missing;
33
+ khiter_t k;
34
+ khash_t(32) *h = kh_init(32);
35
+ k = kh_put(32, h, 5, &ret);
36
+ if (!ret) kh_del(32, h, k);
37
+ kh_value(h, k) = 10;
38
+ k = kh_get(32, h, 10);
39
+ is_missing = (k == kh_end(h));
40
+ k = kh_get(32, h, 5);
41
+ kh_del(32, h, k);
42
+ for (k = kh_begin(h); k != kh_end(h); ++k)
43
+ if (kh_exist(h, k)) kh_value(h, k) = 1;
44
+ kh_destroy(32, h);
45
+ return 0;
46
+ }
47
+ */
48
+
49
+ /*
50
+ 2008-09-19 (0.2.3):
51
+
52
+ * Corrected the example
53
+ * Improved interfaces
54
+
55
+ 2008-09-11 (0.2.2):
56
+
57
+ * Improved speed a little in kh_put()
58
+
59
+ 2008-09-10 (0.2.1):
60
+
61
+ * Added kh_clear()
62
+ * Fixed a compiling error
63
+
64
+ 2008-09-02 (0.2.0):
65
+
66
+ * Changed to token concatenation which increases flexibility.
67
+
68
+ 2008-08-31 (0.1.2):
69
+
70
+ * Fixed a bug in kh_get(), which has not been tested previously.
71
+
72
+ 2008-08-31 (0.1.1):
73
+
74
+ * Added destructor
75
+ */
76
+
77
+
78
+ #ifndef __AC_KHASH_H
79
+ #define __AC_KHASH_H
80
+
81
+ #define AC_VERSION_KHASH_H "0.2.2"
82
+
83
+ #include <stdint.h>
84
+ #include <stdlib.h>
85
+ #include <string.h>
86
+
87
+ typedef uint32_t khint_t;
88
+ typedef khint_t khiter_t;
89
+
90
+ #define __ac_HASH_PRIME_SIZE 32
91
+ static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
92
+ {
93
+ 0ul, 3ul, 11ul, 23ul, 53ul,
94
+ 97ul, 193ul, 389ul, 769ul, 1543ul,
95
+ 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
96
+ 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
97
+ 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
98
+ 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
99
+ 3221225473ul, 4294967291ul
100
+ };
101
+
102
+ #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
103
+ #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
104
+ #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
105
+ #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
106
+ #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
107
+ #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
108
+ #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
109
+
110
+ static const double __ac_HASH_UPPER = 0.77;
111
+
112
+ #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
113
+ typedef struct { \
114
+ khint_t n_buckets, size, n_occupied, upper_bound; \
115
+ uint32_t *flags; \
116
+ khkey_t *keys; \
117
+ khval_t *vals; \
118
+ } kh_##name##_t; \
119
+ static inline kh_##name##_t *kh_init_##name() { \
120
+ return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
121
+ } \
122
+ static inline void kh_destroy_##name(kh_##name##_t *h) \
123
+ { \
124
+ if (h) { \
125
+ free(h->keys); free(h->flags); \
126
+ free(h->vals); \
127
+ free(h); \
128
+ } \
129
+ } \
130
+ static inline void kh_clear_##name(kh_##name##_t *h) \
131
+ { \
132
+ if (h && h->flags) { \
133
+ memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t)); \
134
+ h->size = h->n_occupied = 0; \
135
+ } \
136
+ } \
137
+ static inline khint_t kh_get_##name(kh_##name##_t *h, khkey_t key) \
138
+ { \
139
+ if (h->n_buckets) { \
140
+ khint_t inc, k, i, last; \
141
+ k = __hash_func(key); i = k % h->n_buckets; \
142
+ inc = 1 + k % (h->n_buckets - 1); last = i; \
143
+ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
144
+ if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
145
+ else i += inc; \
146
+ if (i == last) return h->n_buckets; \
147
+ } \
148
+ return __ac_iseither(h->flags, i)? h->n_buckets : i; \
149
+ } else return 0; \
150
+ } \
151
+ static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
152
+ { \
153
+ uint32_t *new_flags = 0; \
154
+ khint_t j = 1; \
155
+ { \
156
+ khint_t t = __ac_HASH_PRIME_SIZE - 1; \
157
+ while (__ac_prime_list[t] > new_n_buckets) --t; \
158
+ new_n_buckets = __ac_prime_list[t+1]; \
159
+ if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
160
+ else { \
161
+ new_flags = (uint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \
162
+ memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \
163
+ if (h->n_buckets < new_n_buckets) { \
164
+ h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
165
+ if (kh_is_map) \
166
+ h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
167
+ } \
168
+ } \
169
+ } \
170
+ if (j) { \
171
+ for (j = 0; j != h->n_buckets; ++j) { \
172
+ if (__ac_iseither(h->flags, j) == 0) { \
173
+ khkey_t key = h->keys[j]; \
174
+ khval_t val; \
175
+ if (kh_is_map) val = h->vals[j]; \
176
+ __ac_set_isdel_true(h->flags, j); \
177
+ while (1) { \
178
+ khint_t inc, k, i; \
179
+ k = __hash_func(key); \
180
+ i = k % new_n_buckets; \
181
+ inc = 1 + k % (new_n_buckets - 1); \
182
+ while (!__ac_isempty(new_flags, i)) { \
183
+ if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
184
+ else i += inc; \
185
+ } \
186
+ __ac_set_isempty_false(new_flags, i); \
187
+ if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
188
+ { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
189
+ if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
190
+ __ac_set_isdel_true(h->flags, i); \
191
+ } else { \
192
+ h->keys[i] = key; \
193
+ if (kh_is_map) h->vals[i] = val; \
194
+ break; \
195
+ } \
196
+ } \
197
+ } \
198
+ } \
199
+ if (h->n_buckets > new_n_buckets) { \
200
+ h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
201
+ if (kh_is_map) \
202
+ h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
203
+ } \
204
+ free(h->flags); \
205
+ h->flags = new_flags; \
206
+ h->n_buckets = new_n_buckets; \
207
+ h->n_occupied = h->size; \
208
+ h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
209
+ } \
210
+ } \
211
+ static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
212
+ { \
213
+ khint_t x; \
214
+ if (h->n_occupied >= h->upper_bound) { \
215
+ if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
216
+ else kh_resize_##name(h, h->n_buckets + 1); \
217
+ } \
218
+ { \
219
+ khint_t inc, k, i, site, last; \
220
+ x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
221
+ if (__ac_isempty(h->flags, i)) x = i; \
222
+ else { \
223
+ inc = 1 + k % (h->n_buckets - 1); last = i; \
224
+ while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
225
+ if (__ac_isdel(h->flags, i)) site = i; \
226
+ if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
227
+ else i += inc; \
228
+ if (i == last) { x = site; break; } \
229
+ } \
230
+ if (x == h->n_buckets) { \
231
+ if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
232
+ else x = i; \
233
+ } \
234
+ } \
235
+ } \
236
+ if (__ac_isempty(h->flags, x)) { \
237
+ h->keys[x] = key; \
238
+ __ac_set_isboth_false(h->flags, x); \
239
+ ++h->size; ++h->n_occupied; \
240
+ *ret = 1; \
241
+ } else if (__ac_isdel(h->flags, x)) { \
242
+ h->keys[x] = key; \
243
+ __ac_set_isboth_false(h->flags, x); \
244
+ ++h->size; \
245
+ *ret = 2; \
246
+ } else *ret = 0; \
247
+ return x; \
248
+ } \
249
+ static inline void kh_del_##name(kh_##name##_t *h, khint_t x) \
250
+ { \
251
+ if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
252
+ __ac_set_isdel_true(h->flags, x); \
253
+ --h->size; \
254
+ } \
255
+ }
256
+
257
+ /* --- BEGIN OF HASH FUNCTIONS --- */
258
+
259
+ #define kh_int_hash_func(key) (uint32_t)(key)
260
+ #define kh_int_hash_equal(a, b) (a == b)
261
+ #define kh_int64_hash_func(key) (uint32_t)((key)>>33^(key)^(key)<<11)
262
+ #define kh_int64_hash_equal(a, b) (a == b)
263
+ static inline khint_t __ac_X31_hash_string(const char *s)
264
+ {
265
+ khint_t h = *s;
266
+ if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
267
+ return h;
268
+ }
269
+ #define kh_str_hash_func(key) __ac_X31_hash_string(key)
270
+ #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
271
+
272
+ /* --- END OF HASH FUNCTIONS --- */
273
+
274
+ /* Other necessary macros... */
275
+
276
+ #define khash_t(name) kh_##name##_t
277
+
278
+ #define kh_init(name) kh_init_##name()
279
+ #define kh_destroy(name, h) kh_destroy_##name(h)
280
+ #define kh_clear(name, h) kh_clear_##name(h)
281
+ #define kh_resize(name, h, s) kh_resize_##name(h, s)
282
+ #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
283
+ #define kh_get(name, h, k) kh_get_##name(h, k)
284
+ #define kh_del(name, h, k) kh_del_##name(h, k)
285
+
286
+ #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
287
+ #define kh_key(h, x) ((h)->keys[x])
288
+ #define kh_val(h, x) ((h)->vals[x])
289
+ #define kh_value(h, x) ((h)->vals[x])
290
+ #define kh_begin(h) (khint_t)(0)
291
+ #define kh_end(h) ((h)->n_buckets)
292
+ #define kh_size(h) ((h)->size)
293
+ #define kh_n_buckets(h) ((h)->n_buckets)
294
+
295
+ /* More conenient interfaces */
296
+
297
+ #define KHASH_SET_INIT_INT(name) \
298
+ KHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
299
+
300
+ #define KHASH_MAP_INIT_INT(name, khval_t) \
301
+ KHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
302
+
303
+ #define KHASH_SET_INIT_INT64(name) \
304
+ KHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
305
+
306
+ #define KHASH_MAP_INIT_INT64(name, khval_t) \
307
+ KHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
308
+
309
+ typedef const char *kh_cstr_t;
310
+ #define KHASH_SET_INIT_STR(name) \
311
+ KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
312
+
313
+ #define KHASH_MAP_INIT_STR(name, khval_t) \
314
+ KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
315
+
316
+ #endif /* __AC_KHASH_H */