whistlepig 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,137 @@
1
+ #ifndef WP_SEGMENT_H_
2
+ #define WP_SEGMENT_H_
3
+
4
+ // whistlepig segments
5
+ // (c) 2011 William Morgan. See COPYING for license terms.
6
+ //
7
+ // a segment is the basic persistence mechanism for indexed documents.
8
+ // each segment contains a string hash and pool, a term hash, a
9
+ // postings region, and a separate labels posting region.
10
+ //
11
+ // segments store documents until MAX_DOCID or MAX_POSTINGS_REGION_SIZE
12
+ // are reached. then you have to make a new segment.
13
+ //
14
+ // labels are stored in a separate postings region because they're stored in a
15
+ // different, mutable format. regular text is stored in a compressed format
16
+ // that is not amenable to later changes.
17
+
18
+ #include "defaults.h"
19
+ #include "stringmap.h"
20
+ #include "termhash.h"
21
+ #include "query.h"
22
+ #include "error.h"
23
+ #include "search.h"
24
+ #include "mmap-obj.h"
25
+
26
+ // a posting entry. used to represent postings when actively working with them.
27
+ // the actual structure on disk/mmap memory region is delta- and variable-byte
28
+ // encoded.
29
+ typedef struct posting {
30
+ docid_t doc_id;
31
+ uint32_t num_positions;
32
+ uint32_t next_offset;
33
+ pos_t* positions;
34
+ } posting;
35
+
36
+ // a label posting entry. currently this is also the actual representation of
37
+ // label postings on disk.
38
+ typedef struct label_posting {
39
+ docid_t doc_id;
40
+ uint32_t next_offset;
41
+ } label_posting;
42
+
43
+ #define OFFSET_NONE (uint32_t)0
44
+ #define DOCID_NONE (docid_t)0
45
+
46
+ // docids:
47
+ //
48
+ // docid 0 is reserved as a sentinel value, so the doc ids returned from this
49
+ // segment will be between 1 and num_docs inclusive.
50
+ //
51
+ // docid num_docs + 1 is also a sentinel value for negative queries. also, we
52
+ // reserve one bit of each docid in the posting region as a marker for when
53
+ // there's only one occurrence in the document (this saves us a byte for this
54
+ // case). so the logical maximum number of docs per segment is 2^31 - 2 =
55
+ // 2,147,483,646.
56
+ //
57
+ // we make the segments smaller than that anyways, under the assumption that
58
+ // this will make automatic segment loading and unloading easier, once we have
59
+ // that implemented. (and there are limits to things like the number of unique
60
+ // terms also; see termhash.h.)
61
+
62
+ #define MAX_LOGICAL_DOCID 2147483646 // don't tweak me
63
+ #define MAX_POSTINGS_REGION_SIZE (512*1024*1024) // tweak me
64
+
65
+ #define WP_SEGMENT_POSTING_REGION_PATH_SUFFIX "pr"
66
+
67
+ // the header for the postings region
68
+ typedef struct postings_region {
69
+ uint32_t index_type_and_flags;
70
+ uint32_t num_docs;
71
+ uint32_t num_postings;
72
+ uint32_t postings_head, postings_tail;
73
+ uint8_t postings[]; // where the postings go yo
74
+ } postings_region;
75
+
76
+ // a segment is a bunch of all these things
77
+ typedef struct wp_segment {
78
+ mmap_obj stringmap;
79
+ mmap_obj stringpool;
80
+ mmap_obj termhash;
81
+ mmap_obj postings;
82
+ mmap_obj labels;
83
+ } wp_segment;
84
+
85
+ // API methods
86
+
87
+ // public: does a segment exist with this base pathname?
88
+ int wp_segment_exists(const char* pathname_base);
89
+
90
+ // public: create a segment, raising an error if it already exists
91
+ wp_error* wp_segment_create(wp_segment* segment, const char* pathname_base) RAISES_ERROR;
92
+
93
+ // public: load a segment, raising an error unless it already exists
94
+ wp_error* wp_segment_load(wp_segment* segment, const char* pathname_base) RAISES_ERROR;
95
+
96
+ // public: unload a segment
97
+ wp_error* wp_segment_unload(wp_segment* s) RAISES_ERROR;
98
+
99
+ // public: number of docs in a segment
100
+ uint64_t wp_segment_num_docs(wp_segment* s);
101
+
102
+ // public: delete a segment from disk
103
+ wp_error* wp_segment_delete(const char* pathname_base) RAISES_ERROR;
104
+
105
+ // private: read a posting from the postings region at a given offset
106
+ wp_error* wp_segment_read_posting(wp_segment* s, uint32_t offset, posting* po, int include_positions) RAISES_ERROR;
107
+
108
+ // private: read a label from the label postings region at a given offset
109
+ wp_error* wp_segment_read_label(wp_segment* s, uint32_t offset, posting* po) RAISES_ERROR;
110
+
111
+ // public: add a posting. be sure you've called wp_segment_ensure_fit with the
112
+ // size of the postings list entry before doing this! (you can obtain the size
113
+ // by calling wp_entry_sizeof_postings_region()).
114
+ wp_error* wp_segment_add_posting(wp_segment* s, const char* field, const char* word, docid_t doc_id, uint32_t num_positions, pos_t positions[]) RAISES_ERROR;
115
+
116
+ // public: add a label to an existing document
117
+ wp_error* wp_segment_add_label(wp_segment* s, const char* label, docid_t doc_id) RAISES_ERROR;
118
+
119
+ // public: remove a label from an existing document
120
+ wp_error* wp_segment_remove_label(wp_segment* s, const char* label, docid_t doc_id) RAISES_ERROR;
121
+
122
+ // public: get a new docid
123
+ wp_error* wp_segment_grab_docid(wp_segment* s, docid_t* docid) RAISES_ERROR;
124
+
125
+ // public: dump a lot of info about the segment to a stream
126
+ wp_error* wp_segment_dumpinfo(wp_segment* s, FILE* stream) RAISES_ERROR;
127
+
128
+ // public: ensure that adding a certain number of postings bytes and label
129
+ // postings bytes will still fit within the bounds of the segment. sets success
130
+ // to 1 if true or 0 if false. if false, you should put that stuff in a new
131
+ // segment.
132
+ wp_error* wp_segment_ensure_fit(wp_segment* seg, uint32_t postings_bytes, uint32_t label_bytes, int* success) RAISES_ERROR;
133
+
134
+ // private: return the size on disk of a position array
135
+ wp_error* wp_segment_sizeof_posarray(wp_segment* seg, uint32_t num_positions, pos_t* positions, uint32_t* size) RAISES_ERROR;
136
+
137
+ #endif
@@ -0,0 +1,278 @@
1
+ #include "whistlepig.h"
2
+
3
+ static const int HASH_PRIME_SIZE = 32;
4
+
5
+ static const uint32_t prime_list[] = {
6
+ 0ul, 3ul, 11ul, 23ul, 53ul,
7
+ 97ul, 193ul, 389ul, 769ul, 1543ul,
8
+ 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
9
+ 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
10
+ 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
11
+ 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
12
+ 3221225473ul, 4294967291ul
13
+ };
14
+
15
+ #define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
16
+ #define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
17
+ #define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
18
+ #define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
19
+ #define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
20
+ #define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
21
+ #define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
22
+
23
+ static const double HASH_UPPER = 0.77;
24
+
25
+ static inline uint32_t string_hash(const char *s) {
26
+ uint32_t h = *s;
27
+ if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
28
+ return h;
29
+ }
30
+
31
+ static inline int string_equals(const char* a, const char* b) {
32
+ //DEBUG("comparing '%s' (%p) and '%s' (%p)", a, a, b, b);
33
+ return strcmp(a, b) == 0;
34
+ }
35
+
36
+ // set flags, keys and vals to correct locations based on h->n_buckets
37
+ void stringmap_setup(stringmap* h, stringpool* p) {
38
+ h->pool = p;
39
+ h->flags = (uint32_t*)h->boundary;
40
+ h->keys = (uint32_t*)((uint32_t*)h->boundary + ((h->n_buckets >> 4) + 1));
41
+ }
42
+
43
+ void stringmap_init(stringmap* h, stringpool* p) {
44
+ h->n_buckets_idx = INITIAL_N_BUCKETS_IDX;
45
+ h->n_buckets = prime_list[h->n_buckets_idx];
46
+ h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
47
+ h->size = h->n_occupied = 0;
48
+ stringmap_setup(h, p);
49
+ memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
50
+ }
51
+
52
+ /*
53
+ static void kh_destroy_##name(kh_##name##_t *h) {
54
+ if (h) {
55
+ free(h->keys); free(h->flags);
56
+ free(h->vals);
57
+ free(h);
58
+ }
59
+ }
60
+
61
+ static void kh_clear_##name(kh_##name##_t *h) {
62
+ if (h && h->flags) {
63
+ memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
64
+ h->size = h->n_occupied = 0;
65
+ }
66
+ }
67
+ */
68
+
69
+ uint32_t stringmap_get(stringmap *h, const char* key) {
70
+ if(h->n_buckets) {
71
+ uint32_t inc, k, i, last;
72
+ k = string_hash(key); i = k % h->n_buckets;
73
+ inc = 1 + k % (h->n_buckets - 1); last = i;
74
+ while (!isempty(h->flags, i) && (isdel(h->flags, i) || !string_equals(stringpool_lookup(h->pool, h->keys[i]), key))) {
75
+ if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
76
+ else i += inc;
77
+ if (i == last) return h->n_buckets;
78
+ }
79
+ return iseither(h->flags, i)? h->n_buckets : i;
80
+ }
81
+ else return 0;
82
+ }
83
+
84
+ wp_error* stringmap_bump_size(stringmap *h) {
85
+ DEBUG("bumping size for string hash at %p with size %u and boundary %p", h, stringmap_size(h), h->boundary);
86
+
87
+ if(h->n_buckets_idx >= (HASH_PRIME_SIZE - 1)) RAISE_ERROR("stringmap can't be this big");
88
+
89
+ h->n_buckets_idx++;
90
+ uint32_t new_n_buckets = prime_list[h->n_buckets_idx];
91
+
92
+ // first make a backup of the oldflags
93
+ size_t oldflagsize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
94
+ uint32_t* oldflags = malloc(oldflagsize);
95
+ memcpy(oldflags, h->flags, oldflagsize);
96
+
97
+ // keep pointers to the old locations
98
+ uint32_t* oldkeys = h->keys;
99
+
100
+ // set pointers to the new locations
101
+ h->keys = (uint32_t*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
102
+
103
+ // move the keys
104
+ memmove(h->keys, oldkeys, h->n_buckets * sizeof(uint32_t));
105
+
106
+ // clear the new flags
107
+ memset(h->flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
108
+
109
+ // do the complicated stuff from khash.h
110
+ for (unsigned int j = 0; j != h->n_buckets; ++j) {
111
+ if (iseither(oldflags, j) == 0) {
112
+ uint32_t key = h->keys[j];
113
+ set_isdel_true(oldflags, j);
114
+ while (1) {
115
+ uint32_t inc, k, i;
116
+ k = string_hash(stringpool_lookup(h->pool, key));
117
+ i = k % new_n_buckets;
118
+ inc = 1 + k % (new_n_buckets - 1);
119
+ while (!isempty(h->flags, i)) {
120
+ if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets;
121
+ else i += inc;
122
+ }
123
+ set_isempty_false(h->flags, i);
124
+ if (i < h->n_buckets && iseither(oldflags, i) == 0) {
125
+ { uint32_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; }
126
+ set_isdel_true(oldflags, i);
127
+ } else {
128
+ h->keys[i] = key;
129
+ break;
130
+ }
131
+ }
132
+ }
133
+ }
134
+
135
+ free(oldflags);
136
+ h->n_buckets = new_n_buckets;
137
+ h->n_occupied = h->size;
138
+ h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
139
+
140
+ #ifdef DEBUGOUTPUT
141
+ DEBUG("after bump, string hash at %p has size %u and boundary %p", h, stringmap_size(h), h->boundary);
142
+ #endif
143
+
144
+ return NO_ERROR;
145
+ }
146
+
147
+ uint32_t stringmap_put(stringmap *h, const char* key, int *ret) {
148
+ uint32_t x;
149
+
150
+ {
151
+ #ifdef DEBUGOUTPUT
152
+ int num_loops = 0;
153
+ #endif
154
+ uint32_t inc, k, i, site, last;
155
+ x = site = h->n_buckets; k = string_hash(key); i = k % h->n_buckets;
156
+ //DEBUG("asked to hash '%s'. initial hash is %u => %u and n_occupied is %u", key, k, i, h->n_occupied);
157
+ if (isempty(h->flags, i)) x = i;
158
+ else {
159
+ inc = 1 + k % (h->n_buckets - 1); last = i;
160
+ while (!isempty(h->flags, i) && (isdel(h->flags, i) || !string_equals(stringpool_lookup(h->pool, h->keys[i]), key))) {
161
+ #ifdef DEBUGOUTPUT
162
+ num_loops++;
163
+ #endif
164
+ if (isdel(h->flags, i)) site = i;
165
+ if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
166
+ else i += inc;
167
+ if (i == last) { x = site; break; }
168
+ }
169
+ if ((x == h->n_buckets) && (i == last)) { // out of space
170
+ if(!string_equals(stringpool_lookup(h->pool, h->keys[i]), key)) {
171
+ DEBUG("out of space!");
172
+ *ret = -1;
173
+ return x;
174
+ }
175
+ }
176
+ if (x == h->n_buckets) { // didn't find it on the first try
177
+ if (isempty(h->flags, i) && site != h->n_buckets) x = site;
178
+ else x = i;
179
+ }
180
+ }
181
+ DEBUG("looped %u times to put", num_loops);
182
+ //DEBUG("x is %u, site is %u, n_buckets is %u", x, site, h->n_buckets);
183
+ }
184
+
185
+ //DEBUG("for pos %u, isempty? %d and isdel %d", x, isempty(h->flags, x), isdel(h->flags, x));
186
+
187
+ uint32_t idx;
188
+ if(isempty(h->flags, x) || isdel(h->flags, x)) {
189
+ idx = stringpool_add(h->pool, key);
190
+ if(idx == (uint32_t)-1) {
191
+ *ret = -2;
192
+ return x;
193
+ }
194
+ if (isempty(h->flags, x)) ++h->n_occupied;
195
+ h->keys[x] = idx;
196
+ set_isboth_false(h->flags, x);
197
+ ++h->size;
198
+ *ret = 1;
199
+ }
200
+ else *ret = 0;
201
+
202
+ return x;
203
+ }
204
+
205
+ void stringmap_del(stringmap *h, uint32_t x) {
206
+ if (x != h->n_buckets && !iseither(h->flags, x)) {
207
+ set_isdel_true(h->flags, x);
208
+ --h->size;
209
+ }
210
+ }
211
+
212
+ /*
213
+ uint32_t stringmap_get_val(stringmap* h, string t) {
214
+ uint32_t idx = termhash_get(h, t);
215
+ if(idx == h->n_buckets) return (uint32_t)-1;
216
+ return h->vals[idx];
217
+ }
218
+
219
+ wp_error* termhash_put_val(termhash* h, term t, uint32_t val) {
220
+ int status;
221
+ uint32_t loc = termhash_put(h, t, &status);
222
+ DEBUG("put(%u,%u) has status %d and ret %u (error val is %u)", t.field_s, t.word_s, status, loc, h->n_buckets);
223
+ if(status == -1) RAISE_ERROR("out of space in hash");
224
+ h->vals[loc] = val;
225
+ return NO_ERROR;
226
+ }
227
+ */
228
+
229
+ int stringmap_needs_bump(stringmap* h) {
230
+ return (h->n_occupied >= h->upper_bound);
231
+ }
232
+
233
+ // memory layout: stringmap, then:
234
+ // ((n_buckets >> 4) + 1) uint32_t's for the flags
235
+ // n_buckets uint32_t for the keys
236
+ static uint32_t size(uint32_t n_buckets) {
237
+ uint32_t size = sizeof(stringmap) +
238
+ (((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
239
+ (n_buckets * sizeof(uint32_t));
240
+ return size;
241
+ }
242
+
243
+ // returns the total size in bytes
244
+ uint32_t stringmap_size(stringmap* h) {
245
+ return size(h->n_buckets);
246
+ }
247
+
248
+ uint32_t stringmap_initial_size() {
249
+ return size(prime_list[INITIAL_N_BUCKETS_IDX]);
250
+ }
251
+
252
+ // the size if we embiggen by one notch
253
+ uint32_t stringmap_next_size(stringmap* h) {
254
+ int next_idx = (h->n_buckets_idx < (HASH_PRIME_SIZE - 1)) ? h->n_buckets_idx + 1 : h->n_buckets_idx;
255
+ return size(prime_list[next_idx]);
256
+ }
257
+
258
+ const char* stringmap_int_to_string(stringmap* h, uint32_t i) {
259
+ return stringpool_lookup(h->pool, i);
260
+ }
261
+
262
+ // returns -1 if not found
263
+ uint32_t stringmap_string_to_int(stringmap* h, const char* s) {
264
+ uint32_t idx = stringmap_get(h, s);
265
+ if(idx == h->n_buckets) return (uint32_t)-1; // not there
266
+ return h->keys[idx];
267
+ }
268
+
269
+ wp_error* stringmap_add(stringmap *h, const char* s, uint32_t* id) {
270
+ int status;
271
+ uint32_t idx = stringmap_put(h, s, &status);
272
+ if(status == -1) RAISE_ERROR("out of space in hash put");
273
+ if(status == -2) RAISE_ERROR("out of space in pool put");
274
+
275
+ *id = h->keys[idx];
276
+
277
+ return NO_ERROR;
278
+ }
@@ -0,0 +1,82 @@
1
+ #ifndef WP_STRINGHASH_H_
2
+ #define WP_STRINGHASH_H_
3
+
4
+ // whistlepig string map
5
+ // (c) 2011 William Morgan. See COPYING for license terms.
6
+ //
7
+ // based on a heavily modified khash.h
8
+ //
9
+ // a stringmap is a bidirectional map from strings to int values. like termhash
10
+ // and stringpool, it uses a slightly funny API that never allocates memory,
11
+ // but instead operates on pointers to preallocated blocks of memory.
12
+ //
13
+ // uses a stringpool internally to do the int->string mapping. so if you're so
14
+ // you shouldn't have to interact with the stringpool directly; you can just
15
+ // use this object.
16
+ //
17
+ // like termhash and pool, has a slightly funny API that is designed to work on
18
+ // a pre-allocated chunk of memory rather than allocate any of its own.
19
+
20
+ #include <stdint.h>
21
+ #include "stringpool.h"
22
+ #include "error.h"
23
+
24
+ /* list of primes from khash.h:
25
+ 0ul, 3ul, 11ul, 23ul, 53ul,
26
+ 97ul, 193ul, 389ul, 769ul, 1543ul,
27
+ 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
28
+ 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
29
+ 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
30
+ 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
31
+ 3221225473ul, 4294967291ul
32
+ */
33
+
34
+ #define INITIAL_N_BUCKETS_IDX 1
35
+
36
+ typedef struct stringmap {
37
+ uint8_t n_buckets_idx;
38
+ uint32_t n_buckets, size, n_occupied, upper_bound;
39
+ uint32_t *flags;
40
+ uint32_t *keys;
41
+ stringpool* pool;
42
+ uint8_t boundary[];
43
+ // in memory at this point
44
+ // ((n_buckets >> 4) + 1) uint32_t's for the flags
45
+ // n_buckets uint32_t's for the keys
46
+ } stringmap;
47
+
48
+ // API methods
49
+
50
+ // public: write a new stringmap to memory
51
+ void stringmap_init(stringmap* h, stringpool* p);
52
+
53
+ // public: set up an existing stringmap in memory
54
+ void stringmap_setup(stringmap* h, stringpool* p);
55
+
56
+ // public: add a string. sets id to its id. dupes are fine; will just set the
57
+ // id correctly.
58
+ wp_error* stringmap_add(stringmap *h, const char* s, uint32_t* id) RAISES_ERROR;
59
+
60
+ // public: get the int value given a string. returns (uint32_t)-1 if not found.
61
+ uint32_t stringmap_string_to_int(stringmap* h, const char* s);
62
+
63
+ // public: get the string value given an int. returns corrupt data if the int
64
+ // is invalid.
65
+ const char* stringmap_int_to_string(stringmap* h, uint32_t i);
66
+
67
+ // public: returns the byte size of the stringmap
68
+ uint32_t stringmap_size(stringmap* h);
69
+
70
+ // public: returns the initial byte size for an empty stringmap
71
+ uint32_t stringmap_initial_size();
72
+
73
+ // public: returns the byte size for the next larger version of a stringmap
74
+ uint32_t stringmap_next_size(stringmap* h);
75
+
76
+ // public: does the stringmap need a size increase?
77
+ int stringmap_needs_bump(stringmap* h);
78
+
79
+ // public: increases the size of the stringmap
80
+ wp_error* stringmap_bump_size(stringmap *h) RAISES_ERROR;
81
+
82
+ #endif
@@ -0,0 +1,44 @@
1
+ #include "whistlepig.h"
2
+
3
+ void stringpool_init(stringpool* p) {
4
+ p->next = 1;
5
+ p->size = INITIAL_POOL_SIZE;
6
+ }
7
+
8
+ uint32_t stringpool_size(stringpool* p) {
9
+ return sizeof(stringpool) + (p->size * sizeof(char));
10
+ }
11
+
12
+ uint32_t stringpool_add(stringpool* p, const char* s) {
13
+ int len = strlen(s) + 1;
14
+ if((p->next + len) >= p->size) {
15
+ DEBUG("out of space in string pool for %s (len %d, next %d, size %d)", s, len, p->next, p->size);
16
+ return (uint32_t)-1;
17
+ }
18
+ uint32_t ret = p->next;
19
+ p->next += len;
20
+ DEBUG("writing %d bytes to %p -- %p", len, &(p->pool[ret]), &(p->pool[ret]) + len);
21
+ strncpy(&(p->pool[ret]), s, len);
22
+ return ret;
23
+ }
24
+
25
+ int stringpool_needs_bump(stringpool* p) {
26
+ return (p->next >= (int)((float)p->size * 0.9) ? 1 : 0);
27
+ }
28
+
29
+ uint32_t stringpool_next_size(stringpool* p) {
30
+ return sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * sizeof(char));
31
+ }
32
+
33
+ uint32_t stringpool_initial_size() {
34
+ return sizeof(stringpool) + INITIAL_POOL_SIZE;
35
+ }
36
+
37
+ void stringpool_bump_size(stringpool* p) {
38
+ p->size = stringpool_next_size(p);
39
+ }
40
+
41
+ char* stringpool_lookup(stringpool* p, uint32_t id) {
42
+ if((id == 0) || (id >= p->next)) return NULL;
43
+ return &p->pool[id];
44
+ }
@@ -0,0 +1,58 @@
1
+ #ifndef WP_STRINGPOOL_H_
2
+ #define WP_STRINGPOOL_H_
3
+
4
+ // whistlepig string pool
5
+ // (c) 2011 William Morgan. See COPYING for license terms.
6
+ //
7
+ // a string pool. adds strings to a big blob and returns an int which can be
8
+ // used to look them up later. in other words, an int->string mapping, where
9
+ // you provide the string and we'll give you an int.
10
+ //
11
+ // does no duplicate detection, if you add the same string twice, you will
12
+ // get two different ints and you will have wasted memory.
13
+ //
14
+ // this is used by stringmap to maintain a bidirectional string<->int mapping
15
+ // and is not really used directly.
16
+ //
17
+ // int 0 is a special case for the null string. passing in invalid ints (i.e.
18
+ // ints i didn't return) will result in garbage data.
19
+ //
20
+ // like termhash and stringmap, has a slightly funny API that is designed to
21
+ // work on a pre-allocated chunk of memory rather than allocate any of its own.
22
+
23
+ #include <stdint.h>
24
+
25
+ #define INITIAL_POOL_SIZE 2048
26
+
27
+ typedef struct stringpool {
28
+ uint32_t size, next;
29
+ char pool[];
30
+ } stringpool;
31
+
32
+ // API methods
33
+
34
+ // public: create a stringpool
35
+ void stringpool_init(stringpool* p);
36
+
37
+ // public: add a string, returning an int
38
+ uint32_t stringpool_add(stringpool* p, const char* s);
39
+
40
+ // public: does this stringpool need to be increased?
41
+ int stringpool_needs_bump(stringpool* p);
42
+
43
+ // public: increase the size of the stringpool
44
+ void stringpool_bump_size(stringpool* p);
45
+
46
+ // public: given an id, return the string
47
+ char* stringpool_lookup(stringpool* p, uint32_t id);
48
+
49
+ // public: returns the byte size of the pool
50
+ uint32_t stringpool_size(stringpool* p);
51
+
52
+ // public: returns the initial byte size for an empty pool
53
+ uint32_t stringpool_initial_size();
54
+
55
+ // public: returns the byte size for the next larger version of a pool
56
+ uint32_t stringpool_next_size(stringpool* p);
57
+
58
+ #endif