RubyGems - whistlepig - Versions diffs - 0.9.1 → 0.10 - Mend

whistlepig 0.9.1 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

data/README +40 -12
data/ext/whistlepig/extconf.rb +1 -1
data/ext/whistlepig/index.c +201 -62
data/ext/whistlepig/index.h +11 -2
data/ext/whistlepig/lock.c +153 -0
data/ext/whistlepig/lock.h +18 -0
data/ext/whistlepig/mmap-obj.c +36 -20
data/ext/whistlepig/mmap-obj.h +12 -7
data/ext/whistlepig/search.c +7 -6
data/ext/whistlepig/segment.c +97 -47
data/ext/whistlepig/segment.h +19 -3
data/ext/whistlepig/stringmap.c +61 -56
data/ext/whistlepig/stringmap.h +7 -14
data/ext/whistlepig/termhash.c +60 -62
data/ext/whistlepig/termhash.h +4 -6
data/ext/whistlepig/whistlepig.c +5 -1
data/ext/whistlepig/whistlepig.h +1 -0
metadata +29 -38
data/ext/whistlepig/dump.c +0 -65
data/ext/whistlepig/extconf.h +0 -3
data/ext/whistlepig/test-segment.c +0 -404
data/ext/whistlepig/test-stringmap.c +0 -82
data/ext/whistlepig/test-stringpool.c +0 -67
data/ext/whistlepig/test-termhash.c +0 -95
data/ext/whistlepig/test-tokenizer.c +0 -55
data/ext/whistlepig/test.h +0 -38
data/ext/whistlepig/timer.h +0 -28

data/README CHANGED Viewed

@@ -1,14 +1,14 @@
 = Whistlepig
-Whistlepig is a minimalist realtime full-text search index. Its goal is to be
-as small and feature-free as possible, while still remaining useful, performant
-and scalable to large corpora. If you want realtime full-text search without
-the frills, Whistlepig may be for you.
+Whistlepig is a minimalist realtime full-text search index. Its goal is
+to be as small and maintainable as possible, while still remaining
+useful, performant and scalable to large corpora. If you want realtime
+full-text search without the frills, Whistlepig may be for you.
 Whistlepig is written in ANSI C99. It currently provides a C API and Ruby
 bindings.
-Latest version: 0.9.1, released 2012-03-14.
+Latest version: 0.10, released 2012-04-01.
         Status: beta
           News: http://all-thing.net/label/whistlepig/
       Homepage: http://masanjin.net/whistlepig/
@@ -27,7 +27,7 @@ Roughly speaking, realtime search means:
   reindexing or index merging;
 - later documents are more important than earlier documents.
-Whistlepig takes these principles to an extreme.
+Whistlepig takes these principles at face value.
 - It only returns documents in the reverse (LIFO) order to which they were
   added, and performs no ranking, reordering, or scoring.
 - It only supports incremental indexing. There is no notion of batch indexing
@@ -47,6 +47,17 @@ Features that Whistlepig does provide:
 - Early query termination and resumable queries.
 - A tiny, < 3 KLOC ANSI C99 implementation.
+== Benchmarks
+On my not-particularly-new Linux desktop, I can index 8.5 MB/s of text
+data per process, including some minor parsing.
+Index sizes are roughly 50% of the original corpus size, e.g. the 1.4gb
+Enron email corpus (http://cs.cmu.edu/~enron/) is 753mb in the index.
+Query performance is entirely dependent on the queries and the index
+size. Run the benchmark-queries to see some examples.
 == Synopsis (using Ruby bindings)
   require 'rubygems'
@@ -83,11 +94,28 @@ Features that Whistlepig does provide:
   q4 = Query.new "body", "subject:know hello"
   results4 = index.search q4                   # => [3]
-== A note on concurrency:
+== Concurrency
+Whistlepig supports multi-process concurrency. Multiple reader and
+writer processes can access the same index without mangling data.
+Internally, Whistlepig uses pthread read-write locks to synchronize
+readers and writers. This allows multiple concurrent readers but only a
+single writer.
+While this locking approach guarantees index correctness, it decreases
+read and write performance when one or more writers exist. Systems with
+high write loads may benefit from sharding documents across independent
+indexes rather than sending everything to the same index.
+== Design tradeoffs
+I have generally erred on the side of maintainable code at the expense
+of speed. Simpler implementations have been preferred over more complex,
+faster versions. If you ever have to modify Whistlepig to suit your
+needs, you will appreciate this.
-Whistlepig is currently single-process and single-thread only. However, it is
-built with multi-process access in mind. Per-segment single-writer,
-multi-reader support is planned in the near future. Multi-writer support can be
-accomplished via index striping and may be attempted in the distant future.
+== Bug reports
-Please send bug reports and comments to: wmorgan-whistlepig-readme@masanjin.net.
+Please file bugs here: https://github.com/wmorgan/whistlepig/issues
+Please send comments to: wmorgan-whistlepig-readme@masanjin.net.

data/ext/whistlepig/extconf.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require 'mkmf'
-$CFLAGS = "-g -O3 -std=c99 $(cflags) -D_ANSI_SOURCE"
+$CFLAGS= "-std=c99 -D_ANSI_SOURCE -D_XOPEN_SOURCE=600 $(cflags)"
 create_header
 create_makefile "whistlepig/whistlepig"

data/ext/whistlepig/index.c CHANGED Viewed

@@ -6,6 +6,7 @@
 #include "whistlepig.h"
 #define PATH_BUF_SIZE 4096
+#define INDEX_VERSION 1
 int wp_index_exists(const char* pathname_base) {
   char buf[PATH_BUF_SIZE];
@@ -13,74 +14,129 @@ int wp_index_exists(const char* pathname_base) {
   return wp_segment_exists(buf);
 }
+RAISING_STATIC(grab_writelock(wp_index* index)) {
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  RELAY_ERROR(wp_lock_grab(&ii->lock, WP_LOCK_WRITELOCK));
+  return NO_ERROR;
+}
+RAISING_STATIC(grab_readlock(wp_index* index)) {
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  RELAY_ERROR(wp_lock_grab(&ii->lock, WP_LOCK_READLOCK));
+  return NO_ERROR;
+}
+RAISING_STATIC(release_lock(wp_index* index)) {
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  RELAY_ERROR(wp_lock_release(&ii->lock));
+  return NO_ERROR;
+}
+RAISING_STATIC(index_info_init(index_info* ii, uint32_t index_version)) {
+  ii->index_version = index_version;
+  ii->num_segments = 0;
+  RELAY_ERROR(wp_lock_setup(&ii->lock));
+  return NO_ERROR;
+}
+RAISING_STATIC(index_info_validate(index_info* ii, uint32_t index_version)) {
+  if(ii->index_version != index_version) RAISE_ERROR("index has type %u; expecting type %u", ii->index_version, index_version);
+  return NO_ERROR;
+}
 wp_error* wp_index_create(wp_index** indexptr, const char* pathname_base) {
   char buf[PATH_BUF_SIZE];
-  snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
-  if(wp_segment_exists(buf)) RAISE_ERROR("index with base path '%s' already exists", pathname_base);
   wp_index* index = *indexptr = malloc(sizeof(wp_index));
+  snprintf(buf, PATH_BUF_SIZE, "%s.ii", pathname_base);
+  RELAY_ERROR(mmap_obj_create(&index->indexinfo, "wp/indexinfo", buf, sizeof(index_info)));
+  RELAY_ERROR(index_info_init(MMAP_OBJ(index->indexinfo, index_info), INDEX_VERSION));
   index->pathname_base = pathname_base;
-  index->num_segments = 1;
   index->sizeof_segments = 1;
   index->open = 1;
   index->segments = malloc(sizeof(wp_segment));
   index->docid_offsets = malloc(sizeof(uint64_t));
+  snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
   RELAY_ERROR(wp_segment_create(&index->segments[0], buf));
   index->docid_offsets[0] = 0;
+  index->num_segments = 1;
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  ii->num_segments = 1;
   return NO_ERROR;
 }
-RAISING_STATIC(ensure_num_segments(wp_index* index)) {
+// increases the index->segments array until we have enough
+// space to represent index->num_segments
+RAISING_STATIC(ensure_segment_pointer_fit(wp_index* index)) {
   if(index->num_segments >= index->sizeof_segments) {
-    index->sizeof_segments *= 2;
+    if(index->sizeof_segments == 0) index->sizeof_segments = 1; // lame
+    while(index->sizeof_segments < index->num_segments) index->sizeof_segments *= 2; // lame
     index->segments = realloc(index->segments, sizeof(wp_segment) * index->sizeof_segments);
     index->docid_offsets = realloc(index->docid_offsets, sizeof(uint64_t) * index->sizeof_segments);
     if(index->segments == NULL) RAISE_ERROR("oom");
+    if(index->segments == NULL) RAISE_ERROR("oom");
   }
   return NO_ERROR;
 }
-wp_error* wp_index_load(wp_index** indexptr, const char* pathname_base) {
+// ensures that we know about all segments. should be wrapped
+// in a global read mutex to prevent creation.
+RAISING_STATIC(ensure_all_segments(wp_index* index)) {
   char buf[PATH_BUF_SIZE];
-  snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
-  if(!wp_segment_exists(buf)) RAISE_ERROR("index with base path '%s' does not exist", pathname_base);
-  wp_index* index = *indexptr = malloc(sizeof(wp_index));
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  if(ii->num_segments < index->num_segments) RAISE_ERROR("invalid value for num_segments: %u vs %u", index->num_segments, ii->num_segments);
+  if(ii->num_segments == index->num_segments) return NO_ERROR;
-  index->pathname_base = pathname_base;
-  index->num_segments = 0;
-  index->sizeof_segments = 1;
-  index->open = 1;
-  index->segments = malloc(sizeof(wp_segment));
-  index->docid_offsets = malloc(sizeof(uint64_t));
+  // otherwise, we need to load some more segments
+  uint16_t old_num_segments = index->num_segments;
+  index->num_segments = ii->num_segments;
+  RELAY_ERROR(ensure_segment_pointer_fit(index));
-  // load all the segments we can
-  while(index->num_segments < WP_MAX_SEGMENTS) {
-    snprintf(buf, PATH_BUF_SIZE, "%s%d", pathname_base, index->num_segments);
-    if(!wp_segment_exists(buf)) break;
+  for(uint16_t i = old_num_segments; i < index->num_segments; i++) {
+    snprintf(buf, PATH_BUF_SIZE, "%s%u", index->pathname_base, i);
+    DEBUG("trying to loading segment %u from %s", i, buf);
+    RELAY_ERROR(wp_segment_load(&index->segments[i], buf));
-    RELAY_ERROR(ensure_num_segments(index));
-    DEBUG("loading segment %s", buf);
-    RELAY_ERROR(wp_segment_load(&index->segments[index->num_segments], buf));
-    if(index->num_segments == 0)
-      index->docid_offsets[index->num_segments] = 0;
+    if(i == 0) index->docid_offsets[i] = 0;
     else {
       // segments return docids 1 through N, so the num_docs in a segment is
       // also the max document id
-      postings_region* prevpr = MMAP_OBJ(index->segments[index->num_segments - 1].postings, postings_region);
-      index->docid_offsets[index->num_segments] = prevpr->num_docs + index->docid_offsets[index->num_segments - 1];
+      segment_info* prevsi = MMAP_OBJ(index->segments[i - 1].seginfo, segment_info);
+      index->docid_offsets[i] = prevsi->num_docs + index->docid_offsets[i - 1];
     }
-    index->num_segments++;
   }
   return NO_ERROR;
 }
+wp_error* wp_index_load(wp_index** indexptr, const char* pathname_base) {
+  char buf[PATH_BUF_SIZE];
+  wp_index* index = *indexptr = malloc(sizeof(wp_index));
+  snprintf(buf, PATH_BUF_SIZE, "%s.ii", pathname_base);
+  RELAY_ERROR(mmap_obj_load(&index->indexinfo, "wp/indexinfo", buf));
+  RELAY_ERROR(index_info_validate(MMAP_OBJ(index->indexinfo, index_info), INDEX_VERSION));
+  index->pathname_base = pathname_base;
+  index->open = 1;
+  index->num_segments = 0;
+  index->sizeof_segments = 0;
+  index->segments = NULL;
+  index->docid_offsets = NULL;
+  RELAY_ERROR(ensure_all_segments(index));
+  return NO_ERROR;
+}
 // we have two special values at our disposal to mark where we are in
 // the sequence of segments
 #define SEGMENT_UNINITIALIZED WP_MAX_SEGMENTS
@@ -96,12 +152,22 @@ wp_error* wp_index_setup_query(wp_index* index, wp_query* query) {
 // can be called multiple times to resume
 wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_results, uint32_t* num_results, uint64_t* results) {
   *num_results = 0;
+  // make sure we have know about all segments (one could've been added by a writer)
+  RELAY_ERROR(grab_readlock(index));
+  RELAY_ERROR(ensure_all_segments(index));
+  RELAY_ERROR(release_lock(index));
   if(index->num_segments == 0) return NO_ERROR;
   if(query->segment_idx == SEGMENT_UNINITIALIZED) {
     query->segment_idx = index->num_segments - 1;
     DEBUG("setting up segment %u", query->segment_idx);
-    RELAY_ERROR(wp_search_init_search_state(query, &index->segments[query->segment_idx]));
+    wp_segment* seg = &index->segments[query->segment_idx];
+    RELAY_ERROR(wp_segment_grab_readlock(seg));
+    RELAY_ERROR(wp_segment_reload(seg));
+    RELAY_ERROR(wp_search_init_search_state(query, seg));
+    RELAY_ERROR(wp_segment_release_lock(seg));
   }
   // at this point, we assume we're initialized and query->segment_idx is the index
@@ -112,7 +178,11 @@ wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_
     search_result* segment_results = malloc(sizeof(search_result) * want_num_results);
     DEBUG("searching segment %d", query->segment_idx);
-    RELAY_ERROR(wp_search_run_query_on_segment(query, &index->segments[query->segment_idx], want_num_results, &got_num_results, segment_results));
+    wp_segment* seg = &index->segments[query->segment_idx];
+    RELAY_ERROR(wp_segment_grab_readlock(seg));
+    RELAY_ERROR(wp_segment_reload(seg));
+    RELAY_ERROR(wp_search_run_query_on_segment(query, seg, want_num_results, &got_num_results, segment_results));
+    RELAY_ERROR(wp_segment_release_lock(seg));
     DEBUG("asked segment %d for %d results, got %d", query->segment_idx, want_num_results, got_num_results);
     // extract the per-segment docids from the search results and adjust by
@@ -168,39 +238,73 @@ wp_error* wp_index_teardown_query(wp_index* index, wp_query* query) {
   return NO_ERROR;
 }
-wp_error* wp_index_add_entry(wp_index* index, wp_entry* entry, uint64_t* doc_id) {
-  int success;
-  wp_segment* seg = &index->segments[index->num_segments - 1];
+RAISING_STATIC(get_and_writelock_last_segment(wp_index* index, wp_entry* entry, wp_segment** returned_seg)) {
+  // assume we have a writelock on the index object here, so that no one can
+  // add segments while we're doing this stuff.
-  // first, ensure we have enough space in the current segment
-  uint32_t postings_bytes;
+  int success;
+  RELAY_ERROR(ensure_all_segments(index)); // make sure we know about all segments
+  wp_segment* seg = &index->segments[index->num_segments - 1]; // get last segment
+  RELAY_ERROR(wp_segment_grab_writelock(seg)); // grab the writelock
+  uint32_t postings_bytes; // calculate how much space we'll need to fit this entry in there
   RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
   RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
-  // if not, we need to open a new one
-  if(!success) {
-    DEBUG("segment %d is full, loading a new one", index->num_segments - 1);
-    char buf[PATH_BUF_SIZE];
-    snprintf(buf, PATH_BUF_SIZE, "%s%d", index->pathname_base, index->num_segments);
-    RELAY_ERROR(ensure_num_segments(index));
-    RELAY_ERROR(wp_segment_create(&index->segments[index->num_segments], buf));
-    index->num_segments++;
-    // set the docid_offset
-    postings_region* prevpr = MMAP_OBJ(index->segments[index->num_segments - 2].postings, postings_region);
-    index->docid_offsets[index->num_segments - 1] = prevpr->num_docs + index->docid_offsets[index->num_segments - 2];
-    seg = &index->segments[index->num_segments - 1];
-    DEBUG("loaded new segment %d at %p", index->num_segments - 1, &index->segments[index->num_segments - 1]);
-    RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
-    RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
-    if(!success) RAISE_ERROR("can't fit new entry into fresh segment. that's crazy");
+  // if we can fit in there, then return it! (still locked)
+  if(success) {
+    *returned_seg = seg;
+    return NO_ERROR;
   }
+  RAISE_ERROR("making new");
+  // otherwise, unlock it and let's make a new one
+  RELAY_ERROR(wp_segment_release_lock(seg));
+  char buf[PATH_BUF_SIZE];
+  DEBUG("segment %d is full, loading a new one", index->num_segments - 1);
+  snprintf(buf, PATH_BUF_SIZE, "%s%d", index->pathname_base, index->num_segments);
+  // increase the two counters
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  ii->num_segments++;
+  index->num_segments++;
+  // make sure we have a pointer for this guy
+  RELAY_ERROR(ensure_segment_pointer_fit(index));
+  // create the new segment
+  RELAY_ERROR(wp_segment_create(&index->segments[index->num_segments - 1], buf));
+  // set the docid_offset
+  segment_info* prevsi = MMAP_OBJ(index->segments[index->num_segments - 2].seginfo, segment_info);
+  index->docid_offsets[index->num_segments - 1] = prevsi->num_docs + index->docid_offsets[index->num_segments - 2];
+  seg = &index->segments[index->num_segments - 1];
+  DEBUG("loaded new segment %d at %p", index->num_segments - 1, seg);
+  RELAY_ERROR(wp_segment_grab_writelock(seg)); // lock it
+  RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
+  RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
+  if(!success) RAISE_ERROR("can't fit new entry into fresh segment. that's crazy");
+  *returned_seg = seg;
+  return NO_ERROR;
+}
+wp_error* wp_index_add_entry(wp_index* index, wp_entry* entry, uint64_t* doc_id) {
+  wp_segment* seg = NULL;
   docid_t seg_doc_id;
+  // interleaving lock access -- potential for deadlock is high. :(
+  RELAY_ERROR(grab_writelock(index)); // grab full-index lock
+  RELAY_ERROR(get_and_writelock_last_segment(index, entry, &seg));
+  RELAY_ERROR(release_lock(index)); // release full-index lock
+  RELAY_ERROR(wp_segment_reload(seg));
   RELAY_ERROR(wp_segment_grab_docid(seg, &seg_doc_id));
   RELAY_ERROR(wp_entry_write_to_segment(entry, seg, seg_doc_id));
+  RELAY_ERROR(wp_segment_release_lock(seg));
   *doc_id = seg_doc_id + index->docid_offsets[index->num_segments - 1];
   return NO_ERROR;
@@ -226,7 +330,11 @@ wp_error* wp_index_dumpinfo(wp_index* index, FILE* stream) {
   fprintf(stream, "index has %d segments\n", index->num_segments);
   for(int i = 0; i < index->num_segments; i++) {
     fprintf(stream, "\nsegment %d:\n", i);
-    RELAY_ERROR(wp_segment_dumpinfo(&index->segments[i], stream));
+    wp_segment* seg = &index->segments[i];
+    RELAY_ERROR(wp_segment_grab_readlock(seg));
+    RELAY_ERROR(wp_segment_reload(seg));
+    RELAY_ERROR(wp_segment_dumpinfo(seg, stream));
+    RELAY_ERROR(wp_segment_release_lock(seg));
   }
   return NO_ERROR;
@@ -246,16 +354,28 @@ wp_error* wp_index_delete(const char* pathname_base) {
     else break;
   }
+  snprintf(buf, PATH_BUF_SIZE, "%s.ii", pathname_base);
+  unlink(buf);
   return NO_ERROR;
 }
 wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id) {
   int found = 0;
+  RELAY_ERROR(grab_writelock(index));
+  RELAY_ERROR(ensure_all_segments(index));
+  RELAY_ERROR(release_lock(index));
   for(uint32_t i = index->num_segments; i > 0; i--) {
     if(doc_id > index->docid_offsets[i - 1]) {
+      wp_segment* seg = &index->segments[i - 1];
       DEBUG("found doc %llu in segment %u", doc_id, i - 1);
-      RELAY_ERROR(wp_segment_add_label(&index->segments[i - 1], label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
+      RELAY_ERROR(wp_segment_grab_writelock(seg));
+      RELAY_ERROR(wp_segment_reload(seg));
+      RELAY_ERROR(wp_segment_add_label(seg, label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
+      RELAY_ERROR(wp_segment_release_lock(seg));
       found = 1;
       break;
     }
@@ -270,10 +390,19 @@ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id
 wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc_id) {
   int found = 0;
+  RELAY_ERROR(grab_writelock(index));
+  RELAY_ERROR(ensure_all_segments(index));
+  RELAY_ERROR(release_lock(index));
   for(uint32_t i = index->num_segments; i > 0; i--) {
     if(doc_id > index->docid_offsets[i - 1]) {
+      wp_segment* seg = &index->segments[i - 1];
       DEBUG("found doc %llu in segment %u", doc_id, i - 1);
-      RELAY_ERROR(wp_segment_remove_label(&index->segments[i - 1], label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
+      RELAY_ERROR(wp_segment_grab_writelock(seg));
+      RELAY_ERROR(wp_segment_reload(seg));
+      RELAY_ERROR(wp_segment_remove_label(seg, label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
+      RELAY_ERROR(wp_segment_release_lock(seg));
       found = 1;
       break;
     }
@@ -285,13 +414,23 @@ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc
   return NO_ERROR;
 }
-uint64_t wp_index_num_docs(wp_index* index) {
-  uint64_t ret = 0;
+wp_error* wp_index_num_docs(wp_index* index, uint64_t* num_docs) {
+  *num_docs = 0;
+  RELAY_ERROR(grab_readlock(index));
+  RELAY_ERROR(ensure_all_segments(index));
+  RELAY_ERROR(release_lock(index));
   // TODO check for overflow or some shit
-  for(uint32_t i = index->num_segments; i > 0; i--) ret += wp_segment_num_docs(&index->segments[i - 1]);
+  for(uint32_t i = index->num_segments; i > 0; i--) {
+    wp_segment* seg = &index->segments[i - 1];
+    RELAY_ERROR(wp_segment_grab_readlock(seg));
+    RELAY_ERROR(wp_segment_reload(seg));
+    *num_docs += wp_segment_num_docs(seg);
+    RELAY_ERROR(wp_segment_release_lock(seg));
+  }
-  return ret;
+  return NO_ERROR;
 }
 // insane. but i'm putting this here. not defined in c99. don't want to make a

data/ext/whistlepig/index.h CHANGED Viewed

@@ -9,6 +9,8 @@
 // essentially relays commands to the appropriate ones, creating new segments
 // as needed.
+#include <pthread.h>
 #include "defaults.h"
 #include "segment.h"
 #include "error.h"
@@ -16,13 +18,20 @@
 #define WP_MAX_SEGMENTS 65534 // max value of wp_search_query->segment_idx - 2 because we need two special numbers
+typedef struct index_info {
+  uint32_t index_version;
+  uint32_t num_segments;
+  pthread_rwlock_t lock;
+} index_info;
 typedef struct wp_index {
   const char* pathname_base;
   uint16_t num_segments;
   uint16_t sizeof_segments;
   uint64_t* docid_offsets;
-  struct wp_segment* segments;
+  wp_segment* segments;
   uint8_t open;
+  mmap_obj indexinfo;
 } wp_index;
 // API methods
@@ -45,7 +54,7 @@ wp_error* wp_index_unload(wp_index* index) RAISES_ERROR;
 wp_error* wp_index_free(wp_index* index) RAISES_ERROR;
 // public: returns the number of documents in the index.
-uint64_t wp_index_num_docs(wp_index* index);
+wp_error* wp_index_num_docs(wp_index* index, uint64_t* num_docs) RAISES_ERROR;
 // public: initializes a query for use on the index. must be called before
 // run_query

data/ext/whistlepig/lock.c ADDED Viewed

@@ -0,0 +1,153 @@
+#include <unistd.h>
+#include <pthread.h>
+#include "whistlepig.h"
+wp_error* wp_lock_setup(pthread_rwlock_t* lock) {
+  pthread_rwlockattr_t attr;
+  if(pthread_rwlockattr_init(&attr) != 0) RAISE_SYSERROR("cannot initialize pthreads rwlock attr");
+  if(pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) != 0) RAISE_SYSERROR("cannot set pthreads rwlockattr to PTHREAD_PROCESS_SHARED");
+  if(pthread_rwlock_init(lock, &attr) != 0) RAISE_SYSERROR("cannot initialize pthreads rwlock");
+  if(pthread_rwlockattr_destroy(&attr) != 0) RAISE_SYSERROR("cannot destroy pthreads rwlock attr");
+  return NO_ERROR;
+}
+/*
+alernative implementation that uses rdlock. doesn't allow us to detect
+and break stale locks, but gives us a good sense of what the timing should
+be like.
+wp_error* wp_segment_grab_lock2(wp_segment* seg, int lock_type) {
+  segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
+  const char* lock_name = (lock_type == WP_LOCK_READLOCK ? "read" : "write");
+  DEBUG("grabbing %slock for segment %p", lock_name, seg);
+  struct timespec start, end;
+  clock_gettime(CLOCK_MONOTONIC, &start);
+  int ret = 0;
+  switch(lock_type) {
+    case WP_LOCK_READLOCK: ret = pthread_rwlock_rdlock(&si->lock); break;
+    case WP_LOCK_WRITELOCK: ret = pthread_rwlock_wrlock(&si->lock); break;
+  }
+  if(ret != 0) RAISE_SYSERROR("grabbing %slock", lock_name);
+  clock_gettime(CLOCK_MONOTONIC, &end);
+  uint64_t diff_in_ns = ((end.tv_sec * 1000000000) + end.tv_nsec) -
+             ((start.tv_sec * 1000000000) + start.tv_nsec);
+  uint32_t total_delay_ms = diff_in_ns / 1000000;
+  if(total_delay_ms > 0) printf("XXX acquired %slock for segment %p after %ums\n", lock_name, seg, total_delay_ms);
+  return NO_ERROR;
+}
+*/
+// we will wait this many milliseconds before assuming the lock
+// is stale and breaking it.
+#define LOCK_STALE_TIME_MS 2500
+/* here's the best implementation i can find, empirically, of being
+   able to grab pthread read and write locks, while still being able
+   to detect stale locks and repair them.
+   it involves a busyloop, which is lame.
+*/
+wp_error* wp_lock_grab(pthread_rwlock_t* lock, int lock_type) {
+  const char* lock_name = (lock_type == WP_LOCK_READLOCK ? "read" : "write");
+  DEBUG("grabbing %slock at %p", lock_name, lock);
+  unsigned int delay_ms = 1;
+  uint32_t total_delay_ms = 0;
+  while(1) {
+    int ret = 0;
+    switch(lock_type) {
+      case WP_LOCK_READLOCK: ret = pthread_rwlock_tryrdlock(lock); break;
+      case WP_LOCK_WRITELOCK: ret = pthread_rwlock_trywrlock(lock); break;
+      default: RAISE_ERROR("invalid lock type");
+    }
+    if(ret == 0) break; // acquired!
+    // we get EAGAINs here if the writer died before closing the lock.
+    if((ret != EBUSY) && (ret != EAGAIN)) RAISE_SYSERROR("acquiring %slock", lock_name);
+    if(total_delay_ms >= LOCK_STALE_TIME_MS) {
+      //RAISE_ERROR("timeout acquiring %slock: %ums", lock_name, total_delay_ms);
+      DEBUG("assuming lock is stale and breaking it!");
+      RELAY_ERROR(wp_lock_setup(lock));
+    }
+    if(delay_ms > 1000) sleep(delay_ms / 1000);
+    usleep(1000 * (delay_ms % 1000));
+    total_delay_ms += delay_ms;
+  }
+  if(total_delay_ms > 0) DEBUG(":( acquired %slock for after %ums\n", lock_name, total_delay_ms);
+  return NO_ERROR;
+}
+/* an alternative implementation that uses the _timed pthread operations.
+   although this should be the best version, i had many problems with it.  the
+   timeout didn't seem to ever trigger. i would also see an EINVAL whenever a
+   writer had a readlock. in the case of a stale lock, i would just get EINVALS
+   forever rather than a proper ETIMEDOUT.
+   since using this would require implementing my own stale lock detection
+   anyways, so i'm just going to use the simpler version above instead.
+*/
+/*
+wp_error* wp_segment_grab_lock3(wp_segment* seg, int lock_type) {
+  segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
+  const char* lock_name = (lock_type == WP_LOCK_READLOCK ? "read" : "write");
+  DEBUG("grabbing %slock for segment %p", lock_name, seg);
+  struct timespec timeout;
+  timeout.tv_sec = 3;//LOCK_STALE_TIME_MS / 1000;
+  timeout.tv_nsec = 0;//(LOCK_STALE_TIME_MS % 1000) * 1000000;
+  struct timeval startt, endt;
+  gettimeofday(&startt, NULL);
+  int acquired = 0;
+  while(!acquired) {
+    int ret = 0;
+    switch(lock_type) {
+      case WP_LOCK_READLOCK: ret = pthread_rwlock_timedrdlock(&si->lock, &timeout); break;
+      case WP_LOCK_WRITELOCK: ret = pthread_rwlock_timedwrlock(&si->lock, &timeout); break;
+      default: RAISE_ERROR("invalid lock type");
+    }
+    switch(ret) {
+      case 0: acquired = 1; break;
+      case ETIMEDOUT:
+        DEBUG("assuming lock is stale and breaking it!");
+        RELAY_ERROR(setup_lock(&si->lock));
+        break;
+      case EAGAIN:
+        // despite the documentation, this seems to happen every time we request a readlock and
+        // the lock is already held by the writer. so we will just busyloop here. this happens
+        // fairly frequently, so this is lame.
+        usleep(1000);
+        break;
+      default:
+        RAISE_SYSERROR("acquiring %slock", lock_name);
+    }
+  }
+  gettimeofday(&endt, NULL);
+  long elapsed = ((endt.tv_sec - startt.tv_sec) * 1000) + ((endt.tv_usec - startt.tv_usec) / 1000);
+  if(elapsed > 0) printf(":( acquired %slock for segment %p after %ldms\n", lock_name, seg, elapsed);
+  return NO_ERROR;
+}
+*/
+wp_error* wp_lock_release(pthread_rwlock_t* lock) {
+  if(pthread_rwlock_unlock(lock) != 0) RAISE_SYSERROR("releasing lock");
+  return NO_ERROR;
+}