RubyGems - whistlepig - Versions diffs - 0.9.1 → 0.10 - Mend

whistlepig 0.9.1 → 0.10

Files changed (27) hide show

data/README +40 -12
data/ext/whistlepig/extconf.rb +1 -1
data/ext/whistlepig/index.c +201 -62
data/ext/whistlepig/index.h +11 -2
data/ext/whistlepig/lock.c +153 -0
data/ext/whistlepig/lock.h +18 -0
data/ext/whistlepig/mmap-obj.c +36 -20
data/ext/whistlepig/mmap-obj.h +12 -7
data/ext/whistlepig/search.c +7 -6
data/ext/whistlepig/segment.c +97 -47
data/ext/whistlepig/segment.h +19 -3
data/ext/whistlepig/stringmap.c +61 -56
data/ext/whistlepig/stringmap.h +7 -14
data/ext/whistlepig/termhash.c +60 -62
data/ext/whistlepig/termhash.h +4 -6
data/ext/whistlepig/whistlepig.c +5 -1
data/ext/whistlepig/whistlepig.h +1 -0
metadata +29 -38
data/ext/whistlepig/dump.c +0 -65
data/ext/whistlepig/extconf.h +0 -3
data/ext/whistlepig/test-segment.c +0 -404
data/ext/whistlepig/test-stringmap.c +0 -82
data/ext/whistlepig/test-stringpool.c +0 -67
data/ext/whistlepig/test-termhash.c +0 -95
data/ext/whistlepig/test-tokenizer.c +0 -55
data/ext/whistlepig/test.h +0 -38
data/ext/whistlepig/timer.h +0 -28

data/README CHANGED Viewed

@@ -1,14 +1,14 @@
 = Whistlepig
-Whistlepig is a minimalist realtime full-text search index. Its goal is to be
-as small and feature-free as possible, while still remaining useful, performant
-and scalable to large corpora. If you want realtime full-text search without
-the frills, Whistlepig may be for you.
+Whistlepig is a minimalist realtime full-text search index. Its goal is
+to be as small and maintainable as possible, while still remaining
+useful, performant and scalable to large corpora. If you want realtime
+full-text search without the frills, Whistlepig may be for you.
 Whistlepig is written in ANSI C99. It currently provides a C API and Ruby
 bindings.
-Latest version: 0.9.1, released 2012-03-14.
+Latest version: 0.10, released 2012-04-01.
         Status: beta
           News: http://all-thing.net/label/whistlepig/
       Homepage: http://masanjin.net/whistlepig/
@@ -27,7 +27,7 @@ Roughly speaking, realtime search means:
   reindexing or index merging;
 - later documents are more important than earlier documents.
-Whistlepig takes these principles to an extreme.
+Whistlepig takes these principles at face value.
 - It only returns documents in the reverse (LIFO) order to which they were
   added, and performs no ranking, reordering, or scoring.
 - It only supports incremental indexing. There is no notion of batch indexing
@@ -47,6 +47,17 @@ Features that Whistlepig does provide:
 - Early query termination and resumable queries.
 - A tiny, < 3 KLOC ANSI C99 implementation.
+== Benchmarks
+On my not-particularly-new Linux desktop, I can index 8.5 MB/s of text
+data per process, including some minor parsing.
+Index sizes are roughly 50% of the original corpus size, e.g. the 1.4gb
+Enron email corpus (http://cs.cmu.edu/~enron/) is 753mb in the index.
+Query performance is entirely dependent on the queries and the index
+size. Run the benchmark-queries to see some examples.
 == Synopsis (using Ruby bindings)
   require 'rubygems'
@@ -83,11 +94,28 @@ Features that Whistlepig does provide:
   q4 = Query.new "body", "subject:know hello"
   results4 = index.search q4                   # => [3]
-== A note on concurrency:
+== Concurrency
+Whistlepig supports multi-process concurrency. Multiple reader and
+writer processes can access the same index without mangling data.
+Internally, Whistlepig uses pthread read-write locks to synchronize
+readers and writers. This allows multiple concurrent readers but only a
+single writer.
+While this locking approach guarantees index correctness, it decreases
+read and write performance when one or more writers exist. Systems with
+high write loads may benefit from sharding documents across independent
+indexes rather than sending everything to the same index.
+== Design tradeoffs
+I have generally erred on the side of maintainable code at the expense
+of speed. Simpler implementations have been preferred over more complex,
+faster versions. If you ever have to modify Whistlepig to suit your
+needs, you will appreciate this.
-Whistlepig is currently single-process and single-thread only. However, it is
-built with multi-process access in mind. Per-segment single-writer,
-multi-reader support is planned in the near future. Multi-writer support can be
-accomplished via index striping and may be attempted in the distant future.
+== Bug reports
-Please send bug reports and comments to: wmorgan-whistlepig-readme@masanjin.net.
+Please file bugs here: https://github.com/wmorgan/whistlepig/issues
+Please send comments to: wmorgan-whistlepig-readme@masanjin.net.

data/ext/whistlepig/extconf.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require 'mkmf'
-$CFLAGS = "-g -O3 -std=c99 $(cflags) -D_ANSI_SOURCE"
+$CFLAGS= "-std=c99 -D_ANSI_SOURCE -D_XOPEN_SOURCE=600 $(cflags)"
 create_header
 create_makefile "whistlepig/whistlepig"

data/ext/whistlepig/index.c CHANGED Viewed

@@ -6,6 +6,7 @@
 #include "whistlepig.h"
 #define PATH_BUF_SIZE 4096
+#define INDEX_VERSION 1
 int wp_index_exists(const char* pathname_base) {
   char buf[PATH_BUF_SIZE];
@@ -13,74 +14,129 @@ int wp_index_exists(const char* pathname_base) {
   return wp_segment_exists(buf);
 }
+RAISING_STATIC(grab_writelock(wp_index* index)) {
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  RELAY_ERROR(wp_lock_grab(&ii->lock, WP_LOCK_WRITELOCK));
+  return NO_ERROR;
+}
+RAISING_STATIC(grab_readlock(wp_index* index)) {
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  RELAY_ERROR(wp_lock_grab(&ii->lock, WP_LOCK_READLOCK));
+  return NO_ERROR;
+}
+RAISING_STATIC(release_lock(wp_index* index)) {
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  RELAY_ERROR(wp_lock_release(&ii->lock));
+  return NO_ERROR;
+}
+RAISING_STATIC(index_info_init(index_info* ii, uint32_t index_version)) {
+  ii->index_version = index_version;
+  ii->num_segments = 0;
+  RELAY_ERROR(wp_lock_setup(&ii->lock));
+  return NO_ERROR;
+}
+RAISING_STATIC(index_info_validate(index_info* ii, uint32_t index_version)) {
+  if(ii->index_version != index_version) RAISE_ERROR("index has type %u; expecting type %u", ii->index_version, index_version);
+  return NO_ERROR;
+}
 wp_error* wp_index_create(wp_index** indexptr, const char* pathname_base) {
   char buf[PATH_BUF_SIZE];
-  snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
-  if(wp_segment_exists(buf)) RAISE_ERROR("index with base path '%s' already exists", pathname_base);
   wp_index* index = *indexptr = malloc(sizeof(wp_index));
+  snprintf(buf, PATH_BUF_SIZE, "%s.ii", pathname_base);
+  RELAY_ERROR(mmap_obj_create(&index->indexinfo, "wp/indexinfo", buf, sizeof(index_info)));
+  RELAY_ERROR(index_info_init(MMAP_OBJ(index->indexinfo, index_info), INDEX_VERSION));
   index->pathname_base = pathname_base;
-  index->num_segments = 1;
   index->sizeof_segments = 1;
   index->open = 1;
   index->segments = malloc(sizeof(wp_segment));
   index->docid_offsets = malloc(sizeof(uint64_t));
+  snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
   RELAY_ERROR(wp_segment_create(&index->segments[0], buf));
   index->docid_offsets[0] = 0;
+  index->num_segments = 1;
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  ii->num_segments = 1;
   return NO_ERROR;
 }
-RAISING_STATIC(ensure_num_segments(wp_index* index)) {
+// increases the index->segments array until we have enough
+// space to represent index->num_segments
+RAISING_STATIC(ensure_segment_pointer_fit(wp_index* index)) {
   if(index->num_segments >= index->sizeof_segments) {
-    index->sizeof_segments *= 2;
+    if(index->sizeof_segments == 0) index->sizeof_segments = 1; // lame
+    while(index->sizeof_segments < index->num_segments) index->sizeof_segments *= 2; // lame
     index->segments = realloc(index->segments, sizeof(wp_segment) * index->sizeof_segments);
     index->docid_offsets = realloc(index->docid_offsets, sizeof(uint64_t) * index->sizeof_segments);
     if(index->segments == NULL) RAISE_ERROR("oom");
+    if(index->segments == NULL) RAISE_ERROR("oom");
   }
   return NO_ERROR;
 }
-wp_error* wp_index_load(wp_index** indexptr, const char* pathname_base) {
+// ensures that we know about all segments. should be wrapped
+// in a global read mutex to prevent creation.
+RAISING_STATIC(ensure_all_segments(wp_index* index)) {
   char buf[PATH_BUF_SIZE];
-  snprintf(buf, PATH_BUF_SIZE, "%s0", pathname_base);
-  if(!wp_segment_exists(buf)) RAISE_ERROR("index with base path '%s' does not exist", pathname_base);
-  wp_index* index = *indexptr = malloc(sizeof(wp_index));
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  if(ii->num_segments < index->num_segments) RAISE_ERROR("invalid value for num_segments: %u vs %u", index->num_segments, ii->num_segments);
+  if(ii->num_segments == index->num_segments) return NO_ERROR;
-  index->pathname_base = pathname_base;
-  index->num_segments = 0;
-  index->sizeof_segments = 1;
-  index->open = 1;
-  index->segments = malloc(sizeof(wp_segment));
-  index->docid_offsets = malloc(sizeof(uint64_t));
+  // otherwise, we need to load some more segments
+  uint16_t old_num_segments = index->num_segments;
+  index->num_segments = ii->num_segments;
+  RELAY_ERROR(ensure_segment_pointer_fit(index));
-  // load all the segments we can
-  while(index->num_segments < WP_MAX_SEGMENTS) {
-    snprintf(buf, PATH_BUF_SIZE, "%s%d", pathname_base, index->num_segments);
-    if(!wp_segment_exists(buf)) break;
+  for(uint16_t i = old_num_segments; i < index->num_segments; i++) {
+    snprintf(buf, PATH_BUF_SIZE, "%s%u", index->pathname_base, i);
+    DEBUG("trying to loading segment %u from %s", i, buf);
+    RELAY_ERROR(wp_segment_load(&index->segments[i], buf));
-    RELAY_ERROR(ensure_num_segments(index));
-    DEBUG("loading segment %s", buf);
-    RELAY_ERROR(wp_segment_load(&index->segments[index->num_segments], buf));
-    if(index->num_segments == 0)
-      index->docid_offsets[index->num_segments] = 0;
+    if(i == 0) index->docid_offsets[i] = 0;
     else {
       // segments return docids 1 through N, so the num_docs in a segment is
       // also the max document id
-      postings_region* prevpr = MMAP_OBJ(index->segments[index->num_segments - 1].postings, postings_region);
-      index->docid_offsets[index->num_segments] = prevpr->num_docs + index->docid_offsets[index->num_segments - 1];
+      segment_info* prevsi = MMAP_OBJ(index->segments[i - 1].seginfo, segment_info);
+      index->docid_offsets[i] = prevsi->num_docs + index->docid_offsets[i - 1];
     }
-    index->num_segments++;
   }
   return NO_ERROR;
 }
+wp_error* wp_index_load(wp_index** indexptr, const char* pathname_base) {
+  char buf[PATH_BUF_SIZE];
+  wp_index* index = *indexptr = malloc(sizeof(wp_index));
+  snprintf(buf, PATH_BUF_SIZE, "%s.ii", pathname_base);
+  RELAY_ERROR(mmap_obj_load(&index->indexinfo, "wp/indexinfo", buf));
+  RELAY_ERROR(index_info_validate(MMAP_OBJ(index->indexinfo, index_info), INDEX_VERSION));
+  index->pathname_base = pathname_base;
+  index->open = 1;
+  index->num_segments = 0;
+  index->sizeof_segments = 0;
+  index->segments = NULL;
+  index->docid_offsets = NULL;
+  RELAY_ERROR(ensure_all_segments(index));
+  return NO_ERROR;
+}
 // we have two special values at our disposal to mark where we are in
 // the sequence of segments
 #define SEGMENT_UNINITIALIZED WP_MAX_SEGMENTS
@@ -96,12 +152,22 @@ wp_error* wp_index_setup_query(wp_index* index, wp_query* query) {
 // can be called multiple times to resume
 wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_results, uint32_t* num_results, uint64_t* results) {
   *num_results = 0;
+  // make sure we have know about all segments (one could've been added by a writer)
+  RELAY_ERROR(grab_readlock(index));
+  RELAY_ERROR(ensure_all_segments(index));
+  RELAY_ERROR(release_lock(index));
   if(index->num_segments == 0) return NO_ERROR;
   if(query->segment_idx == SEGMENT_UNINITIALIZED) {
     query->segment_idx = index->num_segments - 1;
     DEBUG("setting up segment %u", query->segment_idx);
-    RELAY_ERROR(wp_search_init_search_state(query, &index->segments[query->segment_idx]));
+    wp_segment* seg = &index->segments[query->segment_idx];
+    RELAY_ERROR(wp_segment_grab_readlock(seg));
+    RELAY_ERROR(wp_segment_reload(seg));
+    RELAY_ERROR(wp_search_init_search_state(query, seg));
+    RELAY_ERROR(wp_segment_release_lock(seg));
   }
   // at this point, we assume we're initialized and query->segment_idx is the index
@@ -112,7 +178,11 @@ wp_error* wp_index_run_query(wp_index* index, wp_query* query, uint32_t max_num_
     search_result* segment_results = malloc(sizeof(search_result) * want_num_results);
     DEBUG("searching segment %d", query->segment_idx);
-    RELAY_ERROR(wp_search_run_query_on_segment(query, &index->segments[query->segment_idx], want_num_results, &got_num_results, segment_results));
+    wp_segment* seg = &index->segments[query->segment_idx];
+    RELAY_ERROR(wp_segment_grab_readlock(seg));
+    RELAY_ERROR(wp_segment_reload(seg));
+    RELAY_ERROR(wp_search_run_query_on_segment(query, seg, want_num_results, &got_num_results, segment_results));
+    RELAY_ERROR(wp_segment_release_lock(seg));
     DEBUG("asked segment %d for %d results, got %d", query->segment_idx, want_num_results, got_num_results);
     // extract the per-segment docids from the search results and adjust by
@@ -168,39 +238,73 @@ wp_error* wp_index_teardown_query(wp_index* index, wp_query* query) {
   return NO_ERROR;
 }
-wp_error* wp_index_add_entry(wp_index* index, wp_entry* entry, uint64_t* doc_id) {
-  int success;
-  wp_segment* seg = &index->segments[index->num_segments - 1];
+RAISING_STATIC(get_and_writelock_last_segment(wp_index* index, wp_entry* entry, wp_segment** returned_seg)) {
+  // assume we have a writelock on the index object here, so that no one can
+  // add segments while we're doing this stuff.
-  // first, ensure we have enough space in the current segment
-  uint32_t postings_bytes;
+  int success;
+  RELAY_ERROR(ensure_all_segments(index)); // make sure we know about all segments
+  wp_segment* seg = &index->segments[index->num_segments - 1]; // get last segment
+  RELAY_ERROR(wp_segment_grab_writelock(seg)); // grab the writelock
+  uint32_t postings_bytes; // calculate how much space we'll need to fit this entry in there
   RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
   RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
-  // if not, we need to open a new one
-  if(!success) {
-    DEBUG("segment %d is full, loading a new one", index->num_segments - 1);
-    char buf[PATH_BUF_SIZE];
-    snprintf(buf, PATH_BUF_SIZE, "%s%d", index->pathname_base, index->num_segments);
-    RELAY_ERROR(ensure_num_segments(index));
-    RELAY_ERROR(wp_segment_create(&index->segments[index->num_segments], buf));
-    index->num_segments++;
-    // set the docid_offset
-    postings_region* prevpr = MMAP_OBJ(index->segments[index->num_segments - 2].postings, postings_region);
-    index->docid_offsets[index->num_segments - 1] = prevpr->num_docs + index->docid_offsets[index->num_segments - 2];
-    seg = &index->segments[index->num_segments - 1];
-    DEBUG("loaded new segment %d at %p", index->num_segments - 1, &index->segments[index->num_segments - 1]);
-    RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
-    RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
-    if(!success) RAISE_ERROR("can't fit new entry into fresh segment. that's crazy");
+  // if we can fit in there, then return it! (still locked)
+  if(success) {
+    *returned_seg = seg;
+    return NO_ERROR;
   }
+  RAISE_ERROR("making new");
+  // otherwise, unlock it and let's make a new one
+  RELAY_ERROR(wp_segment_release_lock(seg));
+  char buf[PATH_BUF_SIZE];
+  DEBUG("segment %d is full, loading a new one", index->num_segments - 1);
+  snprintf(buf, PATH_BUF_SIZE, "%s%d", index->pathname_base, index->num_segments);
+  // increase the two counters
+  index_info* ii = MMAP_OBJ(index->indexinfo, index_info);
+  ii->num_segments++;
+  index->num_segments++;
+  // make sure we have a pointer for this guy
+  RELAY_ERROR(ensure_segment_pointer_fit(index));
+  // create the new segment
+  RELAY_ERROR(wp_segment_create(&index->segments[index->num_segments - 1], buf));
+  // set the docid_offset
+  segment_info* prevsi = MMAP_OBJ(index->segments[index->num_segments - 2].seginfo, segment_info);
+  index->docid_offsets[index->num_segments - 1] = prevsi->num_docs + index->docid_offsets[index->num_segments - 2];
+  seg = &index->segments[index->num_segments - 1];
+  DEBUG("loaded new segment %d at %p", index->num_segments - 1, seg);
+  RELAY_ERROR(wp_segment_grab_writelock(seg)); // lock it
+  RELAY_ERROR(wp_entry_sizeof_postings_region(entry, seg, &postings_bytes));
+  RELAY_ERROR(wp_segment_ensure_fit(seg, postings_bytes, 0, &success));
+  if(!success) RAISE_ERROR("can't fit new entry into fresh segment. that's crazy");
+  *returned_seg = seg;
+  return NO_ERROR;
+}
+wp_error* wp_index_add_entry(wp_index* index, wp_entry* entry, uint64_t* doc_id) {
+  wp_segment* seg = NULL;
   docid_t seg_doc_id;
+  // interleaving lock access -- potential for deadlock is high. :(
+  RELAY_ERROR(grab_writelock(index)); // grab full-index lock
+  RELAY_ERROR(get_and_writelock_last_segment(index, entry, &seg));
+  RELAY_ERROR(release_lock(index)); // release full-index lock
+  RELAY_ERROR(wp_segment_reload(seg));
   RELAY_ERROR(wp_segment_grab_docid(seg, &seg_doc_id));
   RELAY_ERROR(wp_entry_write_to_segment(entry, seg, seg_doc_id));
+  RELAY_ERROR(wp_segment_release_lock(seg));
   *doc_id = seg_doc_id + index->docid_offsets[index->num_segments - 1];
   return NO_ERROR;
@@ -226,7 +330,11 @@ wp_error* wp_index_dumpinfo(wp_index* index, FILE* stream) {
   fprintf(stream, "index has %d segments\n", index->num_segments);
   for(int i = 0; i < index->num_segments; i++) {
     fprintf(stream, "\nsegment %d:\n", i);
-    RELAY_ERROR(wp_segment_dumpinfo(&index->segments[i], stream));
+    wp_segment* seg = &index->segments[i];
+    RELAY_ERROR(wp_segment_grab_readlock(seg));
+    RELAY_ERROR(wp_segment_reload(seg));
+    RELAY_ERROR(wp_segment_dumpinfo(seg, stream));
+    RELAY_ERROR(wp_segment_release_lock(seg));
   }
   return NO_ERROR;
@@ -246,16 +354,28 @@ wp_error* wp_index_delete(const char* pathname_base) {
     else break;
   }
+  snprintf(buf, PATH_BUF_SIZE, "%s.ii", pathname_base);
+  unlink(buf);
   return NO_ERROR;
 }
 wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id) {
   int found = 0;
+  RELAY_ERROR(grab_writelock(index));
+  RELAY_ERROR(ensure_all_segments(index));
+  RELAY_ERROR(release_lock(index));
   for(uint32_t i = index->num_segments; i > 0; i--) {
     if(doc_id > index->docid_offsets[i - 1]) {
+      wp_segment* seg = &index->segments[i - 1];
       DEBUG("found doc %llu in segment %u", doc_id, i - 1);
-      RELAY_ERROR(wp_segment_add_label(&index->segments[i - 1], label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
+      RELAY_ERROR(wp_segment_grab_writelock(seg));
+      RELAY_ERROR(wp_segment_reload(seg));
+      RELAY_ERROR(wp_segment_add_label(seg, label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
+      RELAY_ERROR(wp_segment_release_lock(seg));
       found = 1;
       break;
     }
@@ -270,10 +390,19 @@ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id
 wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc_id) {
   int found = 0;
+  RELAY_ERROR(grab_writelock(index));
+  RELAY_ERROR(ensure_all_segments(index));
+  RELAY_ERROR(release_lock(index));
   for(uint32_t i = index->num_segments; i > 0; i--) {
     if(doc_id > index->docid_offsets[i - 1]) {
+      wp_segment* seg = &index->segments[i - 1];
       DEBUG("found doc %llu in segment %u", doc_id, i - 1);
-      RELAY_ERROR(wp_segment_remove_label(&index->segments[i - 1], label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
+      RELAY_ERROR(wp_segment_grab_writelock(seg));
+      RELAY_ERROR(wp_segment_reload(seg));
+      RELAY_ERROR(wp_segment_remove_label(seg, label, (docid_t)(doc_id - index->docid_offsets[i - 1])));
+      RELAY_ERROR(wp_segment_release_lock(seg));
       found = 1;
       break;
     }
@@ -285,13 +414,23 @@ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc
   return NO_ERROR;
 }
-uint64_t wp_index_num_docs(wp_index* index) {
-  uint64_t ret = 0;
+wp_error* wp_index_num_docs(wp_index* index, uint64_t* num_docs) {
+  *num_docs = 0;
+  RELAY_ERROR(grab_readlock(index));
+  RELAY_ERROR(ensure_all_segments(index));
+  RELAY_ERROR(release_lock(index));
   // TODO check for overflow or some shit
-  for(uint32_t i = index->num_segments; i > 0; i--) ret += wp_segment_num_docs(&index->segments[i - 1]);
+  for(uint32_t i = index->num_segments; i > 0; i--) {
+    wp_segment* seg = &index->segments[i - 1];
+    RELAY_ERROR(wp_segment_grab_readlock(seg));
+    RELAY_ERROR(wp_segment_reload(seg));
+    *num_docs += wp_segment_num_docs(seg);
+    RELAY_ERROR(wp_segment_release_lock(seg));
+  }
-  return ret;
+  return NO_ERROR;
 }
 // insane. but i'm putting this here. not defined in c99. don't want to make a

data/ext/whistlepig/index.h CHANGED Viewed

@@ -9,6 +9,8 @@
 // essentially relays commands to the appropriate ones, creating new segments
 // as needed.
+#include <pthread.h>
 #include "defaults.h"
 #include "segment.h"
 #include "error.h"
@@ -16,13 +18,20 @@
 #define WP_MAX_SEGMENTS 65534 // max value of wp_search_query->segment_idx - 2 because we need two special numbers
+typedef struct index_info {
+  uint32_t index_version;
+  uint32_t num_segments;
+  pthread_rwlock_t lock;
+} index_info;
 typedef struct wp_index {
   const char* pathname_base;
   uint16_t num_segments;
   uint16_t sizeof_segments;
   uint64_t* docid_offsets;
-  struct wp_segment* segments;
+  wp_segment* segments;
   uint8_t open;
+  mmap_obj indexinfo;
 } wp_index;
 // API methods
@@ -45,7 +54,7 @@ wp_error* wp_index_unload(wp_index* index) RAISES_ERROR;
 wp_error* wp_index_free(wp_index* index) RAISES_ERROR;
 // public: returns the number of documents in the index.
-uint64_t wp_index_num_docs(wp_index* index);
+wp_error* wp_index_num_docs(wp_index* index, uint64_t* num_docs) RAISES_ERROR;
 // public: initializes a query for use on the index. must be called before
 // run_query

data/ext/whistlepig/lock.c ADDED Viewed

@@ -0,0 +1,153 @@
+#include <unistd.h>
+#include <pthread.h>
+#include "whistlepig.h"
+wp_error* wp_lock_setup(pthread_rwlock_t* lock) {
+  pthread_rwlockattr_t attr;
+  if(pthread_rwlockattr_init(&attr) != 0) RAISE_SYSERROR("cannot initialize pthreads rwlock attr");
+  if(pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) != 0) RAISE_SYSERROR("cannot set pthreads rwlockattr to PTHREAD_PROCESS_SHARED");
+  if(pthread_rwlock_init(lock, &attr) != 0) RAISE_SYSERROR("cannot initialize pthreads rwlock");
+  if(pthread_rwlockattr_destroy(&attr) != 0) RAISE_SYSERROR("cannot destroy pthreads rwlock attr");
+  return NO_ERROR;
+}
+/*
+alernative implementation that uses rdlock. doesn't allow us to detect
+and break stale locks, but gives us a good sense of what the timing should
+be like.
+wp_error* wp_segment_grab_lock2(wp_segment* seg, int lock_type) {
+  segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
+  const char* lock_name = (lock_type == WP_LOCK_READLOCK ? "read" : "write");
+  DEBUG("grabbing %slock for segment %p", lock_name, seg);
+  struct timespec start, end;
+  clock_gettime(CLOCK_MONOTONIC, &start);
+  int ret = 0;
+  switch(lock_type) {
+    case WP_LOCK_READLOCK: ret = pthread_rwlock_rdlock(&si->lock); break;
+    case WP_LOCK_WRITELOCK: ret = pthread_rwlock_wrlock(&si->lock); break;
+  }
+  if(ret != 0) RAISE_SYSERROR("grabbing %slock", lock_name);
+  clock_gettime(CLOCK_MONOTONIC, &end);
+  uint64_t diff_in_ns = ((end.tv_sec * 1000000000) + end.tv_nsec) -
+             ((start.tv_sec * 1000000000) + start.tv_nsec);
+  uint32_t total_delay_ms = diff_in_ns / 1000000;
+  if(total_delay_ms > 0) printf("XXX acquired %slock for segment %p after %ums\n", lock_name, seg, total_delay_ms);
+  return NO_ERROR;
+}
+*/
+// we will wait this many milliseconds before assuming the lock
+// is stale and breaking it.
+#define LOCK_STALE_TIME_MS 2500
+/* here's the best implementation i can find, empirically, of being
+   able to grab pthread read and write locks, while still being able
+   to detect stale locks and repair them.
+   it involves a busyloop, which is lame.
+*/
+wp_error* wp_lock_grab(pthread_rwlock_t* lock, int lock_type) {
+  const char* lock_name = (lock_type == WP_LOCK_READLOCK ? "read" : "write");
+  DEBUG("grabbing %slock at %p", lock_name, lock);
+  unsigned int delay_ms = 1;
+  uint32_t total_delay_ms = 0;
+  while(1) {
+    int ret = 0;
+    switch(lock_type) {
+      case WP_LOCK_READLOCK: ret = pthread_rwlock_tryrdlock(lock); break;
+      case WP_LOCK_WRITELOCK: ret = pthread_rwlock_trywrlock(lock); break;
+      default: RAISE_ERROR("invalid lock type");
+    }
+    if(ret == 0) break; // acquired!
+    // we get EAGAINs here if the writer died before closing the lock.
+    if((ret != EBUSY) && (ret != EAGAIN)) RAISE_SYSERROR("acquiring %slock", lock_name);
+    if(total_delay_ms >= LOCK_STALE_TIME_MS) {
+      //RAISE_ERROR("timeout acquiring %slock: %ums", lock_name, total_delay_ms);
+      DEBUG("assuming lock is stale and breaking it!");
+      RELAY_ERROR(wp_lock_setup(lock));
+    }
+    if(delay_ms > 1000) sleep(delay_ms / 1000);
+    usleep(1000 * (delay_ms % 1000));
+    total_delay_ms += delay_ms;
+  }
+  if(total_delay_ms > 0) DEBUG(":( acquired %slock for after %ums\n", lock_name, total_delay_ms);
+  return NO_ERROR;
+}
+/* an alternative implementation that uses the _timed pthread operations.
+   although this should be the best version, i had many problems with it.  the
+   timeout didn't seem to ever trigger. i would also see an EINVAL whenever a
+   writer had a readlock. in the case of a stale lock, i would just get EINVALS
+   forever rather than a proper ETIMEDOUT.
+   since using this would require implementing my own stale lock detection
+   anyways, so i'm just going to use the simpler version above instead.
+*/
+/*
+wp_error* wp_segment_grab_lock3(wp_segment* seg, int lock_type) {
+  segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
+  const char* lock_name = (lock_type == WP_LOCK_READLOCK ? "read" : "write");
+  DEBUG("grabbing %slock for segment %p", lock_name, seg);
+  struct timespec timeout;
+  timeout.tv_sec = 3;//LOCK_STALE_TIME_MS / 1000;
+  timeout.tv_nsec = 0;//(LOCK_STALE_TIME_MS % 1000) * 1000000;
+  struct timeval startt, endt;
+  gettimeofday(&startt, NULL);
+  int acquired = 0;
+  while(!acquired) {
+    int ret = 0;
+    switch(lock_type) {
+      case WP_LOCK_READLOCK: ret = pthread_rwlock_timedrdlock(&si->lock, &timeout); break;
+      case WP_LOCK_WRITELOCK: ret = pthread_rwlock_timedwrlock(&si->lock, &timeout); break;
+      default: RAISE_ERROR("invalid lock type");
+    }
+    switch(ret) {
+      case 0: acquired = 1; break;
+      case ETIMEDOUT:
+        DEBUG("assuming lock is stale and breaking it!");
+        RELAY_ERROR(setup_lock(&si->lock));
+        break;
+      case EAGAIN:
+        // despite the documentation, this seems to happen every time we request a readlock and
+        // the lock is already held by the writer. so we will just busyloop here. this happens
+        // fairly frequently, so this is lame.
+        usleep(1000);
+        break;
+      default:
+        RAISE_SYSERROR("acquiring %slock", lock_name);
+    }
+  }
+  gettimeofday(&endt, NULL);
+  long elapsed = ((endt.tv_sec - startt.tv_sec) * 1000) + ((endt.tv_usec - startt.tv_usec) / 1000);
+  if(elapsed > 0) printf(":( acquired %slock for segment %p after %ldms\n", lock_name, seg, elapsed);
+  return NO_ERROR;
+}
+*/
+wp_error* wp_lock_release(pthread_rwlock_t* lock) {
+  if(pthread_rwlock_unlock(lock) != 0) RAISE_SYSERROR("releasing lock");
+  return NO_ERROR;
+}