RubyGems - whistlepig - Versions diffs - 0.1 - Mend

whistlepig 0.1

Files changed (36) hide show

data/README +86 -0
data/ext/whistlepig/defaults.h +28 -0
data/ext/whistlepig/entry.c +181 -0
data/ext/whistlepig/entry.h +66 -0
data/ext/whistlepig/error.c +24 -0
data/ext/whistlepig/error.h +94 -0
data/ext/whistlepig/extconf.rb +6 -0
data/ext/whistlepig/index.c +294 -0
data/ext/whistlepig/index.h +88 -0
data/ext/whistlepig/khash.h +316 -0
data/ext/whistlepig/mmap-obj.c +76 -0
data/ext/whistlepig/mmap-obj.h +52 -0
data/ext/whistlepig/query-parser.c +37 -0
data/ext/whistlepig/query-parser.h +25 -0
data/ext/whistlepig/query-parser.lex.c +2249 -0
data/ext/whistlepig/query-parser.lex.h +359 -0
data/ext/whistlepig/query-parser.tab.c +1757 -0
data/ext/whistlepig/query-parser.tab.h +85 -0
data/ext/whistlepig/query.c +194 -0
data/ext/whistlepig/query.h +78 -0
data/ext/whistlepig/search.c +746 -0
data/ext/whistlepig/search.h +76 -0
data/ext/whistlepig/segment.c +615 -0
data/ext/whistlepig/segment.h +137 -0
data/ext/whistlepig/stringmap.c +278 -0
data/ext/whistlepig/stringmap.h +82 -0
data/ext/whistlepig/stringpool.c +44 -0
data/ext/whistlepig/stringpool.h +58 -0
data/ext/whistlepig/termhash.c +294 -0
data/ext/whistlepig/termhash.h +79 -0
data/ext/whistlepig/tokenizer.lex.c +2263 -0
data/ext/whistlepig/tokenizer.lex.h +360 -0
data/ext/whistlepig/whistlepig.h +15 -0
data/ext/whistlepig/whistlepigc.c +537 -0
data/lib/whistlepig.rb +119 -0
metadata +103 -0

data/ext/whistlepig/segment.h ADDED Viewed

@@ -0,0 +1,137 @@
+#ifndef WP_SEGMENT_H_
+#define WP_SEGMENT_H_
+// whistlepig segments
+// (c) 2011 William Morgan. See COPYING for license terms.
+//
+// a segment is the basic persistence mechanism for indexed documents.
+// each segment contains a string hash and pool, a term hash, a
+// postings region, and a separate labels posting region.
+//
+// segments store documents until MAX_DOCID or MAX_POSTINGS_REGION_SIZE
+// are reached. then you have to make a new segment.
+//
+// labels are stored in a separate postings region because they're stored in a
+// different, mutable format. regular text is stored in a compressed format
+// that is not amenable to later changes.
+#include "defaults.h"
+#include "stringmap.h"
+#include "termhash.h"
+#include "query.h"
+#include "error.h"
+#include "search.h"
+#include "mmap-obj.h"
+// a posting entry. used to represent postings when actively working with them.
+// the actual structure on disk/mmap memory region is delta- and variable-byte
+// encoded.
+typedef struct posting {
+  docid_t doc_id;
+  uint32_t num_positions;
+  uint32_t next_offset;
+  pos_t* positions;
+} posting;
+// a label posting entry. currently this is also the actual representation of
+// label postings on disk.
+typedef struct label_posting {
+  docid_t doc_id;
+  uint32_t next_offset;
+} label_posting;
+#define OFFSET_NONE (uint32_t)0
+#define DOCID_NONE (docid_t)0
+// docids:
+//
+// docid 0 is reserved as a sentinel value, so the doc ids returned from this
+// segment will be between 1 and num_docs inclusive.
+//
+// docid num_docs + 1 is also a sentinel value for negative queries.  also, we
+// reserve one bit of each docid in the posting region as a marker for when
+// there's only one occurrence in the document (this saves us a byte for this
+// case). so the logical maximum number of docs per segment is 2^31 - 2 =
+// 2,147,483,646.
+//
+// we make the segments smaller than that anyways, under the assumption that
+// this will make automatic segment loading and unloading easier, once we have
+// that implemented. (and there are limits to things like the number of unique
+// terms also; see termhash.h.)
+#define MAX_LOGICAL_DOCID 2147483646 // don't tweak me
+#define MAX_POSTINGS_REGION_SIZE (512*1024*1024) // tweak me
+#define WP_SEGMENT_POSTING_REGION_PATH_SUFFIX "pr"
+// the header for the postings region
+typedef struct postings_region {
+  uint32_t index_type_and_flags;
+  uint32_t num_docs;
+  uint32_t num_postings;
+  uint32_t postings_head, postings_tail;
+  uint8_t postings[]; // where the postings go yo
+} postings_region;
+// a segment is a bunch of all these things
+typedef struct wp_segment {
+  mmap_obj stringmap;
+  mmap_obj stringpool;
+  mmap_obj termhash;
+  mmap_obj postings;
+  mmap_obj labels;
+} wp_segment;
+// API methods
+// public: does a segment exist with this base pathname?
+int wp_segment_exists(const char* pathname_base);
+// public: create a segment, raising an error if it already exists
+wp_error* wp_segment_create(wp_segment* segment, const char* pathname_base) RAISES_ERROR;
+// public: load a segment, raising an error unless it already exists
+wp_error* wp_segment_load(wp_segment* segment, const char* pathname_base) RAISES_ERROR;
+// public: unload a segment
+wp_error* wp_segment_unload(wp_segment* s) RAISES_ERROR;
+// public: number of docs in a segment
+uint64_t wp_segment_num_docs(wp_segment* s);
+// public: delete a segment from disk
+wp_error* wp_segment_delete(const char* pathname_base) RAISES_ERROR;
+// private: read a posting from the postings region at a given offset
+wp_error* wp_segment_read_posting(wp_segment* s, uint32_t offset, posting* po, int include_positions) RAISES_ERROR;
+// private: read a label from the label postings region at a given offset
+wp_error* wp_segment_read_label(wp_segment* s, uint32_t offset, posting* po) RAISES_ERROR;
+// public: add a posting. be sure you've called wp_segment_ensure_fit with the
+// size of the postings list entry before doing this! (you can obtain the size
+// by calling wp_entry_sizeof_postings_region()).
+wp_error* wp_segment_add_posting(wp_segment* s, const char* field, const char* word, docid_t doc_id, uint32_t num_positions, pos_t positions[]) RAISES_ERROR;
+// public: add a label to an existing document
+wp_error* wp_segment_add_label(wp_segment* s, const char* label, docid_t doc_id) RAISES_ERROR;
+// public: remove a label from an existing document
+wp_error* wp_segment_remove_label(wp_segment* s, const char* label, docid_t doc_id) RAISES_ERROR;
+// public: get a new docid
+wp_error* wp_segment_grab_docid(wp_segment* s, docid_t* docid) RAISES_ERROR;
+// public: dump a lot of info about the segment to a stream
+wp_error* wp_segment_dumpinfo(wp_segment* s, FILE* stream) RAISES_ERROR;
+// public: ensure that adding a certain number of postings bytes and label
+// postings bytes will still fit within the bounds of the segment. sets success
+// to 1 if true or 0 if false. if false, you should put that stuff in a new
+// segment.
+wp_error* wp_segment_ensure_fit(wp_segment* seg, uint32_t postings_bytes, uint32_t label_bytes, int* success) RAISES_ERROR;
+// private: return the size on disk of a position array
+wp_error* wp_segment_sizeof_posarray(wp_segment* seg, uint32_t num_positions, pos_t* positions, uint32_t* size) RAISES_ERROR;
+#endif

data/ext/whistlepig/stringmap.c ADDED Viewed

@@ -0,0 +1,278 @@
+#include "whistlepig.h"
+static const int HASH_PRIME_SIZE = 32;
+static const uint32_t prime_list[] = {
+  0ul,          3ul,          11ul,         23ul,         53ul,
+  97ul,         193ul,        389ul,        769ul,        1543ul,
+  3079ul,       6151ul,       12289ul,      24593ul,      49157ul,
+  98317ul,      196613ul,     393241ul,     786433ul,     1572869ul,
+  3145739ul,    6291469ul,    12582917ul,   25165843ul,   50331653ul,
+  100663319ul,  201326611ul,  402653189ul,  805306457ul,  1610612741ul,
+  3221225473ul, 4294967291ul
+};
+#define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
+#define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
+#define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
+#define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
+#define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
+#define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
+#define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
+static const double HASH_UPPER = 0.77;
+static inline uint32_t string_hash(const char *s) {
+  uint32_t h = *s;
+  if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
+  return h;
+}
+static inline int string_equals(const char* a, const char* b) {
+  //DEBUG("comparing '%s' (%p) and '%s' (%p)", a, a, b, b);
+  return strcmp(a, b) == 0;
+}
+// set flags, keys and vals to correct locations based on h->n_buckets
+void stringmap_setup(stringmap* h, stringpool* p) {
+  h->pool = p;
+  h->flags = (uint32_t*)h->boundary;
+  h->keys = (uint32_t*)((uint32_t*)h->boundary + ((h->n_buckets >> 4) + 1));
+}
+void stringmap_init(stringmap* h, stringpool* p) {
+  h->n_buckets_idx = INITIAL_N_BUCKETS_IDX;
+  h->n_buckets = prime_list[h->n_buckets_idx];
+  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
+  h->size = h->n_occupied = 0;
+  stringmap_setup(h, p);
+  memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
+}
+/*
+static void kh_destroy_##name(kh_##name##_t *h) {
+  if (h) {
+    free(h->keys); free(h->flags);
+    free(h->vals);
+    free(h);
+  }
+}
+static void kh_clear_##name(kh_##name##_t *h) {
+  if (h && h->flags) {
+    memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
+    h->size = h->n_occupied = 0;
+  }
+}
+*/
+uint32_t stringmap_get(stringmap *h, const char* key) {
+  if(h->n_buckets) {
+    uint32_t inc, k, i, last;
+    k = string_hash(key); i = k % h->n_buckets;
+    inc = 1 + k % (h->n_buckets - 1); last = i;
+    while (!isempty(h->flags, i) && (isdel(h->flags, i) || !string_equals(stringpool_lookup(h->pool, h->keys[i]), key))) {
+      if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
+      else i += inc;
+      if (i == last) return h->n_buckets;
+    }
+    return iseither(h->flags, i)? h->n_buckets : i;
+  }
+  else return 0;
+}
+wp_error* stringmap_bump_size(stringmap *h) {
+  DEBUG("bumping size for string hash at %p with size %u and boundary %p", h, stringmap_size(h), h->boundary);
+  if(h->n_buckets_idx >= (HASH_PRIME_SIZE - 1)) RAISE_ERROR("stringmap can't be this big");
+  h->n_buckets_idx++;
+  uint32_t new_n_buckets = prime_list[h->n_buckets_idx];
+  // first make a backup of the oldflags
+  size_t oldflagsize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
+  uint32_t* oldflags = malloc(oldflagsize);
+  memcpy(oldflags, h->flags, oldflagsize);
+  // keep pointers to the old locations
+  uint32_t* oldkeys = h->keys;
+  // set pointers to the new locations
+  h->keys = (uint32_t*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
+  // move the keys
+  memmove(h->keys, oldkeys, h->n_buckets * sizeof(uint32_t));
+  // clear the new flags
+  memset(h->flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
+  // do the complicated stuff from khash.h
+  for (unsigned int j = 0; j != h->n_buckets; ++j) {
+    if (iseither(oldflags, j) == 0) {
+      uint32_t key = h->keys[j];
+      set_isdel_true(oldflags, j);
+      while (1) {
+        uint32_t inc, k, i;
+        k = string_hash(stringpool_lookup(h->pool, key));
+        i = k % new_n_buckets;
+        inc = 1 + k % (new_n_buckets - 1);
+        while (!isempty(h->flags, i)) {
+          if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets;
+          else i += inc;
+        }
+        set_isempty_false(h->flags, i);
+        if (i < h->n_buckets && iseither(oldflags, i) == 0) {
+          { uint32_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; }
+          set_isdel_true(oldflags, i);
+        } else {
+          h->keys[i] = key;
+          break;
+        }
+      }
+    }
+  }
+  free(oldflags);
+  h->n_buckets = new_n_buckets;
+  h->n_occupied = h->size;
+  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
+#ifdef DEBUGOUTPUT
+  DEBUG("after bump, string hash at %p has size %u and boundary %p", h, stringmap_size(h), h->boundary);
+#endif
+  return NO_ERROR;
+}
+uint32_t stringmap_put(stringmap *h, const char* key, int *ret) {
+  uint32_t x;
+  {
+#ifdef DEBUGOUTPUT
+int num_loops = 0;
+#endif
+    uint32_t inc, k, i, site, last;
+    x = site = h->n_buckets; k = string_hash(key); i = k % h->n_buckets;
+    //DEBUG("asked to hash '%s'. initial hash is %u => %u and n_occupied is %u", key, k, i, h->n_occupied);
+    if (isempty(h->flags, i)) x = i;
+    else {
+      inc = 1 + k % (h->n_buckets - 1); last = i;
+      while (!isempty(h->flags, i) && (isdel(h->flags, i) || !string_equals(stringpool_lookup(h->pool, h->keys[i]), key))) {
+#ifdef DEBUGOUTPUT
+num_loops++;
+#endif
+        if (isdel(h->flags, i)) site = i;
+        if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
+        else i += inc;
+        if (i == last) { x = site; break; }
+      }
+      if ((x == h->n_buckets) && (i == last)) { // out of space
+        if(!string_equals(stringpool_lookup(h->pool, h->keys[i]), key)) {
+          DEBUG("out of space!");
+          *ret = -1;
+          return x;
+        }
+      }
+      if (x == h->n_buckets) { // didn't find it on the first try
+        if (isempty(h->flags, i) && site != h->n_buckets) x = site;
+        else x = i;
+      }
+    }
+    DEBUG("looped %u times to put", num_loops);
+    //DEBUG("x is %u, site is %u, n_buckets is %u", x, site, h->n_buckets);
+  }
+  //DEBUG("for pos %u, isempty? %d and isdel %d", x, isempty(h->flags, x), isdel(h->flags, x));
+  uint32_t idx;
+  if(isempty(h->flags, x) || isdel(h->flags, x)) {
+    idx = stringpool_add(h->pool, key);
+    if(idx == (uint32_t)-1) {
+      *ret = -2;
+      return x;
+    }
+    if (isempty(h->flags, x)) ++h->n_occupied;
+    h->keys[x] = idx;
+    set_isboth_false(h->flags, x);
+    ++h->size;
+    *ret = 1;
+  }
+  else *ret = 0;
+  return x;
+}
+void stringmap_del(stringmap *h, uint32_t x) {
+  if (x != h->n_buckets && !iseither(h->flags, x)) {
+    set_isdel_true(h->flags, x);
+    --h->size;
+  }
+}
+/*
+uint32_t stringmap_get_val(stringmap* h, string t) {
+  uint32_t idx = termhash_get(h, t);
+  if(idx == h->n_buckets) return (uint32_t)-1;
+  return h->vals[idx];
+}
+wp_error* termhash_put_val(termhash* h, term t, uint32_t val) {
+  int status;
+  uint32_t loc = termhash_put(h, t, &status);
+  DEBUG("put(%u,%u) has status %d and ret %u (error val is %u)", t.field_s, t.word_s, status, loc, h->n_buckets);
+  if(status == -1) RAISE_ERROR("out of space in hash");
+  h->vals[loc] = val;
+  return NO_ERROR;
+}
+*/
+int stringmap_needs_bump(stringmap* h) {
+  return (h->n_occupied >= h->upper_bound);
+}
+//   memory layout: stringmap, then:
+//   ((n_buckets >> 4) + 1) uint32_t's for the flags
+//   n_buckets uint32_t for the keys
+static uint32_t size(uint32_t n_buckets) {
+  uint32_t size = sizeof(stringmap) +
+    (((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
+    (n_buckets * sizeof(uint32_t));
+  return size;
+}
+// returns the total size in bytes
+uint32_t stringmap_size(stringmap* h) {
+  return size(h->n_buckets);
+}
+uint32_t stringmap_initial_size() {
+  return size(prime_list[INITIAL_N_BUCKETS_IDX]);
+}
+// the size if we embiggen by one notch
+uint32_t stringmap_next_size(stringmap* h) {
+  int next_idx = (h->n_buckets_idx < (HASH_PRIME_SIZE - 1)) ? h->n_buckets_idx + 1 : h->n_buckets_idx;
+  return size(prime_list[next_idx]);
+}
+const char* stringmap_int_to_string(stringmap* h, uint32_t i) {
+  return stringpool_lookup(h->pool, i);
+}
+// returns -1 if not found
+uint32_t stringmap_string_to_int(stringmap* h, const char* s) {
+  uint32_t idx = stringmap_get(h, s);
+  if(idx == h->n_buckets) return (uint32_t)-1; // not there
+  return h->keys[idx];
+}
+wp_error* stringmap_add(stringmap *h, const char* s, uint32_t* id) {
+  int status;
+  uint32_t idx = stringmap_put(h, s, &status);
+  if(status == -1) RAISE_ERROR("out of space in hash put");
+  if(status == -2) RAISE_ERROR("out of space in pool put");
+  *id = h->keys[idx];
+  return NO_ERROR;
+}

data/ext/whistlepig/stringmap.h ADDED Viewed

@@ -0,0 +1,82 @@
+#ifndef WP_STRINGHASH_H_
+#define WP_STRINGHASH_H_
+// whistlepig string map
+// (c) 2011 William Morgan. See COPYING for license terms.
+//
+// based on a heavily modified khash.h
+//
+// a stringmap is a bidirectional map from strings to int values. like termhash
+// and stringpool, it uses a slightly funny API that never allocates memory,
+// but instead operates on pointers to preallocated blocks of memory.
+//
+// uses a stringpool internally to do the int->string mapping. so if you're so
+// you shouldn't have to interact with the stringpool directly; you can just
+// use this object.
+//
+// like termhash and pool, has a slightly funny API that is designed to work on
+// a pre-allocated chunk of memory rather than allocate any of its own.
+#include <stdint.h>
+#include "stringpool.h"
+#include "error.h"
+/* list of primes from khash.h:
+  0ul,          3ul,          11ul,         23ul,         53ul,
+  97ul,         193ul,        389ul,        769ul,        1543ul,
+  3079ul,       6151ul,       12289ul,      24593ul,      49157ul,
+  98317ul,      196613ul,     393241ul,     786433ul,     1572869ul,
+  3145739ul,    6291469ul,    12582917ul,   25165843ul,   50331653ul,
+  100663319ul,  201326611ul,  402653189ul,  805306457ul,  1610612741ul,
+  3221225473ul, 4294967291ul
+*/
+#define INITIAL_N_BUCKETS_IDX 1
+typedef struct stringmap {
+  uint8_t n_buckets_idx;
+  uint32_t n_buckets, size, n_occupied, upper_bound;
+  uint32_t *flags;
+  uint32_t *keys;
+  stringpool* pool;
+  uint8_t boundary[];
+  // in memory at this point
+  // ((n_buckets >> 4) + 1) uint32_t's for the flags
+  // n_buckets uint32_t's for the keys
+} stringmap;
+// API methods
+// public: write a new stringmap to memory
+void stringmap_init(stringmap* h, stringpool* p);
+// public: set up an existing stringmap in memory
+void stringmap_setup(stringmap* h, stringpool* p);
+// public: add a string. sets id to its id. dupes are fine; will just set the
+// id correctly.
+wp_error* stringmap_add(stringmap *h, const char* s, uint32_t* id) RAISES_ERROR;
+// public: get the int value given a string. returns (uint32_t)-1 if not found.
+uint32_t stringmap_string_to_int(stringmap* h, const char* s);
+// public: get the string value given an int. returns corrupt data if the int
+// is invalid.
+const char* stringmap_int_to_string(stringmap* h, uint32_t i);
+// public: returns the byte size of the stringmap
+uint32_t stringmap_size(stringmap* h);
+// public: returns the initial byte size for an empty stringmap
+uint32_t stringmap_initial_size();
+// public: returns the byte size for the next larger version of a stringmap
+uint32_t stringmap_next_size(stringmap* h);
+// public: does the stringmap need a size increase?
+int stringmap_needs_bump(stringmap* h);
+// public: increases the size of the stringmap
+wp_error* stringmap_bump_size(stringmap *h) RAISES_ERROR;
+#endif

data/ext/whistlepig/stringpool.c ADDED Viewed

@@ -0,0 +1,44 @@
+#include "whistlepig.h"
+void stringpool_init(stringpool* p) {
+  p->next = 1;
+  p->size = INITIAL_POOL_SIZE;
+}
+uint32_t stringpool_size(stringpool* p) {
+  return sizeof(stringpool) + (p->size * sizeof(char));
+}
+uint32_t stringpool_add(stringpool* p, const char* s) {
+  int len = strlen(s) + 1;
+  if((p->next + len) >= p->size) {
+    DEBUG("out of space in string pool for %s (len %d, next %d, size %d)", s, len, p->next, p->size);
+    return (uint32_t)-1;
+  }
+  uint32_t ret = p->next;
+  p->next += len;
+  DEBUG("writing %d bytes to %p -- %p", len, &(p->pool[ret]), &(p->pool[ret]) + len);
+  strncpy(&(p->pool[ret]), s, len);
+  return ret;
+}
+int stringpool_needs_bump(stringpool* p) {
+  return (p->next >= (int)((float)p->size * 0.9) ? 1 : 0);
+}
+uint32_t stringpool_next_size(stringpool* p) {
+  return sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * sizeof(char));
+}
+uint32_t stringpool_initial_size() {
+  return sizeof(stringpool) + INITIAL_POOL_SIZE;
+}
+void stringpool_bump_size(stringpool* p) {
+  p->size = stringpool_next_size(p);
+}
+char* stringpool_lookup(stringpool* p, uint32_t id) {
+  if((id == 0) || (id >= p->next)) return NULL;
+  return &p->pool[id];
+}

data/ext/whistlepig/stringpool.h ADDED Viewed

@@ -0,0 +1,58 @@
+#ifndef WP_STRINGPOOL_H_
+#define WP_STRINGPOOL_H_
+// whistlepig string pool
+// (c) 2011 William Morgan. See COPYING for license terms.
+//
+// a string pool. adds strings to a big blob and returns an int which can be
+// used to look them up later. in other words, an int->string mapping, where
+// you provide the string and we'll give you an int.
+//
+// does no duplicate detection, if you add the same string twice, you will
+// get two different ints and you will have wasted memory.
+//
+// this is used by stringmap to maintain a bidirectional string<->int mapping
+// and is not really used directly.
+//
+// int 0 is a special case for the null string. passing in invalid ints (i.e.
+// ints i didn't return) will result in garbage data.
+//
+// like termhash and stringmap, has a slightly funny API that is designed to
+// work on a pre-allocated chunk of memory rather than allocate any of its own.
+#include <stdint.h>
+#define INITIAL_POOL_SIZE 2048
+typedef struct stringpool {
+  uint32_t size, next;
+  char pool[];
+} stringpool;
+// API methods
+// public: create a stringpool
+void stringpool_init(stringpool* p);
+// public: add a string, returning an int
+uint32_t stringpool_add(stringpool* p, const char* s);
+// public: does this stringpool need to be increased?
+int stringpool_needs_bump(stringpool* p);
+// public: increase the size of the stringpool
+void stringpool_bump_size(stringpool* p);
+// public: given an id, return the string
+char* stringpool_lookup(stringpool* p, uint32_t id);
+// public: returns the byte size of the pool
+uint32_t stringpool_size(stringpool* p);
+// public: returns the initial byte size for an empty pool
+uint32_t stringpool_initial_size();
+// public: returns the byte size for the next larger version of a pool
+uint32_t stringpool_next_size(stringpool* p);
+#endif