RubyGems - ferret - Versions diffs - 0.10.14 → 0.11.0 - Mend

ferret 0.10.14 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

data/TODO +3 -0
data/ext/analysis.c +5 -0
data/ext/compound_io.c +46 -24
data/ext/except.c +14 -0
data/ext/except.h +29 -17
data/ext/ferret.c +22 -1
data/ext/ferret.h +2 -1
data/ext/fs_store.c +9 -12
data/ext/global.c +80 -0
data/ext/global.h +10 -0
data/ext/hash.c +0 -7
data/ext/hash.h +0 -8
data/ext/index.c +1289 -625
data/ext/index.h +59 -14
data/ext/q_boolean.c +12 -5
data/ext/q_parser.c +570 -372
data/ext/r_analysis.c +16 -16
data/ext/r_index.c +41 -43
data/ext/r_qparser.c +37 -36
data/ext/r_search.c +10 -10
data/ext/r_store.c +7 -7
data/ext/ram_store.c +4 -3
data/ext/search.c +3 -2
data/ext/store.c +35 -19
data/ext/store.h +3 -5
data/lib/ferret/index.rb +4 -4
data/lib/ferret_version.rb +1 -1
data/test/threading/thread_safety_read_write_test.rb +76 -0
data/test/threading/thread_safety_test.rb +17 -21
data/test/unit/index/tc_index.rb +6 -2
data/test/unit/index/tc_index_writer.rb +2 -2
data/test/unit/query_parser/tc_query_parser.rb +20 -5
data/test/unit/search/tc_index_searcher.rb +3 -1
data/test/unit/search/tm_searcher.rb +3 -1
metadata +3 -2

data/ext/r_search.c CHANGED Viewed

@@ -160,9 +160,9 @@ frt_get_td(TopDocs *td, VALUE rsearcher)
     VALUE rtop_docs;
     VALUE hit_ary = rb_ary_new2(td->size);
-    RARRAY(hit_ary)->len = td->size;
     for (i = 0; i < td->size; i++) {
         RARRAY(hit_ary)->ptr[i] = frt_get_hit(td->hits[i]);
+        RARRAY(hit_ary)->len++;
     }
     rtop_docs = rb_struct_new(cTopDocs,
@@ -1015,7 +1015,7 @@ frt_bq_add_query(int argc, VALUE *argv, VALUE self)
         rb_raise(rb_eArgError, "Cannot add %s to a BooleanQuery",
                  rb_class2name(klass));
     }
-    return Qnil;
+    return self;
 }
 /****************************************************************************
@@ -1206,7 +1206,7 @@ frt_phq_add(int argc, VALUE *argv, VALUE self)
         default:
             rb_raise(rb_eArgError, "You can only add a string or an array of "
                      "strings to a PhraseQuery, not a %s\n",
-                     RSTRING(rb_obj_as_string(rterm))->ptr);
+                     rs2s(rb_obj_as_string(rterm)));
     }
     return self;
 }
@@ -2258,14 +2258,14 @@ frt_sort_add(Sort *sort, VALUE rsf, bool reverse)
             break;
         case T_SYMBOL:
             rsf = rb_obj_as_string(rsf);
-            sf = sort_field_auto_new(RSTRING(rsf)->ptr, reverse);
+            sf = sort_field_auto_new(rs2s(rsf), reverse);
             /* need to give it a ruby object so it'll be freed when the
              * sort is garbage collected */
             rsf = frt_get_sf(sf);
             sort_add_sort_field(sort, sf);
             break;
         case T_STRING:
-            frt_parse_sort_str(sort, RSTRING(rsf)->ptr);
+            frt_parse_sort_str(sort, rs2s(rsf));
             break;
         default:
             rb_raise(rb_eArgError, "Unknown SortField Type");
@@ -2697,7 +2697,7 @@ frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
  *  :num_excerpts::     Default: 2. Number of excerpts to return.
  *  :pre_tag::          Default: "<b>". Tag to place to the left of the match.
  *                      You'll probably want to change this to a "<span>" tag
- *                      with a class "\033[7m" for use in a terminal.
+ *                      with a class. Try "\033[7m" for use in a terminal.
  *  :post_tag::         Default: "</b>". This tag should close the +:pre_tag+.
  *                      Try tag "\033[m" in the terminal.
  *  :ellipsis::         Default: "...". This is the string that is appended at
@@ -2733,13 +2733,13 @@ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
         }
     }
     if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
-        pre_tag = RSTRING(rb_obj_as_string(v))->ptr;
+        pre_tag = rs2s(rb_obj_as_string(v));
     }
     if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
-        post_tag = RSTRING(rb_obj_as_string(v))->ptr;
+        post_tag = rs2s(rb_obj_as_string(v));
     }
     if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
-        ellipsis = RSTRING(rb_obj_as_string(v))->ptr;
+        ellipsis = rs2s(rb_obj_as_string(v));
     }
     if ((excerpts = searcher_highlight(sea,
@@ -2754,10 +2754,10 @@ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
         const int size = ary_size(excerpts);
         int i;
         VALUE rexcerpts = rb_ary_new2(size);
-        RARRAY(rexcerpts)->len = size;
         for (i = 0; i < size; i++) {
             RARRAY(rexcerpts)->ptr[i] = rb_str_new2(excerpts[i]);
+            RARRAY(rexcerpts)->len++;
         }
         ary_destroy(excerpts, &free);
         return rexcerpts;

data/ext/r_store.c CHANGED Viewed

@@ -191,7 +191,7 @@ frt_dir_exists(VALUE self, VALUE rfname)
 {
     Store *store = DATA_PTR(self);
     StringValue(rfname);
-    return store->exists(store, RSTRING(rfname)->ptr) ? Qtrue : Qfalse;
+    return store->exists(store, rs2s(rfname)) ? Qtrue : Qfalse;
 }
 /*
@@ -205,7 +205,7 @@ frt_dir_touch(VALUE self, VALUE rfname)
 {
     Store *store = DATA_PTR(self);
     StringValue(rfname);
-    store->touch(store, RSTRING(rfname)->ptr);
+    store->touch(store, rs2s(rfname));
     return Qnil;
 }
@@ -220,7 +220,7 @@ frt_dir_delete(VALUE self, VALUE rfname)
 {
     Store *store = DATA_PTR(self);
     StringValue(rfname);
-    return (store->remove(store, RSTRING(rfname)->ptr) == 0) ? Qtrue : Qfalse;
+    return (store->remove(store, rs2s(rfname)) == 0) ? Qtrue : Qfalse;
 }
 /*
@@ -263,7 +263,7 @@ frt_dir_rename(VALUE self, VALUE rfrom, VALUE rto)
     Store *store = DATA_PTR(self);
     StringValue(rfrom);
     StringValue(rto);
-    store->rename(store, RSTRING(rfrom)->ptr, RSTRING(rto)->ptr);
+    store->rename(store, rs2s(rfrom), rs2s(rto));
     return self;
 }
@@ -283,7 +283,7 @@ frt_dir_make_lock(VALUE self, VALUE rlock_name)
     Lock *lock;
     Store *store = DATA_PTR(self);
     StringValue(rlock_name);
-    lock = open_lock(store, RSTRING(rlock_name)->ptr);
+    lock = open_lock(store, rs2s(rlock_name));
     rlock = Data_Wrap_Struct(cLock, &frt_lock_mark, &frt_lock_free, lock);
     object_add(lock, rlock);
     return rlock;
@@ -362,9 +362,9 @@ frt_fsdir_new(int argc, VALUE *argv, VALUE klass)
     }
     if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
         rb_raise(rb_eIOError, "No directory <%s> found. Use :create => true"
-                 " to create one.", RSTRING(rpath)->ptr);
+                 " to create one.", rs2s(rpath));
     }
-    store = open_fs_store(RSTRING(rpath)->ptr);
+    store = open_fs_store(rs2s(rpath));
     if (create) store->clear_all(store);
     if ((self = object_get(store)) == Qnil) {
         self = Data_Wrap_Struct(klass, NULL, &frt_dir_free, store);

data/ext/ram_store.c CHANGED Viewed

@@ -354,13 +354,14 @@ static const struct InStreamMethods RAM_IN_STREAM_METHODS = {
 static InStream *ram_open_input(Store *store, const char *filename)
 {
     RAMFile *rf = (RAMFile *)h_get(store->dir.ht, filename);
-    InStream *is = is_new();
+    InStream *is = NULL;
     if (rf == NULL) {
-        RAISE(IO_ERROR, "tried to open \"%s\" but it doesn't exist", filename);
+        RAISE(FILE_NOT_FOUND_ERROR,
+              "tried to open \"%s\" but it doesn't exist", filename);
     }
     REF(rf);
+    is = is_new();
     is->file.rf = rf;
     is->d.pointer = 0;
     is->m = &RAM_IN_STREAM_METHODS;

data/ext/search.c CHANGED Viewed

@@ -1,4 +1,5 @@
 #include <string.h>
+#include <limits.h>
 #include "search.h"
 #include "array.h"
@@ -1021,7 +1022,7 @@ static TopDocs *isea_search_w(Searcher *self,
                               filter_ft filter_func,
                               bool load_fields)
 {
-    int max_size = first_doc + num_docs;
+    int max_size = num_docs + (num_docs == INT_MAX ? 0 : first_doc);
     int i;
     Scorer *scorer;
     Hit **score_docs = NULL;
@@ -1539,7 +1540,7 @@ static TopDocs *msea_search_w(Searcher *self,
                               filter_ft filter_func,
                               bool load_fields)
 {
-    int max_size = first_doc + num_docs;
+    int max_size = num_docs + (num_docs == INT_MAX ? 0 : first_doc);
     int i;
     int total_hits = 0;
     Hit **score_docs = NULL;

data/ext/store.c CHANGED Viewed

@@ -568,36 +568,52 @@ void is2os_copy_vints(InStream *is, OutStream *os, int cnt)
 /**
  * Test argument used to test the store->each function
  */
-struct FileNameConcatArg
+struct FileNameListArg
 {
-    char *p;
-    char *end;
+    int count;
+    int size;
+    int total_len;
+    char **files;
 };
 /**
  * Test function used to test store->each function
  */
-static void concat_filenames(char *fname, void *arg)
+static void add_file_name(char *fname, void *arg)
 {
-    struct FileNameConcatArg *fnca = (struct FileNameConcatArg *)arg;
-    if (fnca->p + strlen(fname) + 2 < fnca->end) {
-        strcpy(fnca->p, fname);
-        fnca->p += strlen(fname);
-        *(fnca->p++) = ',';
-        *(fnca->p++) = ' ';
+    struct FileNameListArg *fnl = (struct FileNameListArg *)arg;
+    if (fnl->count >= fnl->size) {
+        fnl->size *= 2;
+        REALLOC_N(fnl->files, char *, fnl->size);
     }
+    fnl->files[fnl->count++] = estrdup(fname);
+    fnl->total_len += strlen(fname) + 2;
 }
-char *store_to_s(Store *store, char *buf, int buf_size)
+char *store_to_s(Store *store)
 {
-    struct FileNameConcatArg fnca;
+    struct FileNameListArg fnl;
+    char *buf, *b;
+    int i;
+    fnl.count = 0;
+    fnl.size = 16;
+    fnl.total_len = 10;
+    fnl.files = ALLOC_N(char *, 16);
+    store->each(store, &add_file_name, &fnl);
+    qsort(fnl.files, fnl.count, sizeof(char *), &scmp);
+    b = buf = ALLOC_N(char, fnl.total_len);
+    for (i = 0; i < fnl.count; i++) {
+        char *fn = fnl.files[i];
+        int len = strlen(fn);
+        memcpy(b, fn, len);
+        b += len;
+        *b++ = '\n';
+        free(fn);
+    }
+    *b = '\0';
+    free(fnl.files);
-    fnca.p = buf;
-    fnca.end = buf + buf_size;
-    store->each(store, &concat_filenames, &fnca);
-    if (fnca.p > buf + 2) {
-        fnca.p[-2] = '\0';
-    }
     return buf;
 }

data/ext/store.h CHANGED Viewed

@@ -292,7 +292,7 @@ struct Store
      *
      * @param store self
      * @param filename the name of the input stream
-     * @raise IO_ERROR if the input stream cannot be opened
+     * @raise FILE_NOT_FOUND_ERROR if the input stream cannot be opened
      */
     InStream *(*open_input)(Store *store, const char *filename);
@@ -728,11 +728,9 @@ extern void is2os_copy_vints(InStream *is, OutStream *os, int cnt);
 /**
  * Print the filenames in a store to a buffer.
  *
- * @param store  the store to get the filenames from
- * @param buf the buffer to print the filenames to
- * @paran len the length of the buffer
+ * @param store the store to get the filenames from
  */
-extern char *store_to_s(Store *store, char *buf, int buf_size);
+extern char *store_to_s(Store *store);
 extern Lock *open_lock(Store *store, char *lockname);
 extern void close_lock(Lock *lock);

data/lib/ferret/index.rb CHANGED Viewed

@@ -171,10 +171,10 @@ module Ferret::Index
     # num_excerpts::     Default: 2. Number of excerpts to return.
     # pre_tag::          Default: "<b>". Tag to place to the left of the
     #                    match.  You'll probably want to change this to a
-    #                    "<span>" tag with a class "\033[36m" for use in a
-    #                    terminal.
+    #                    "<span>" tag with a class. Try "\033[36m" for use in
+    #                    a terminal.
     # post_tag::         Default: "</b>". This tag should close the
-    #                    +:pre_tag+.  Try tag "\033[m" in the terminal.
+    #                    +:pre_tag+. Try tag "\033[m" in the terminal.
     # ellipsis::         Default: "...". This is the string that is appended
     #                    at the beginning and end of excerpts (unless the
     #                    excerpt hits the start or end of the field.
@@ -673,7 +673,7 @@ module Ferret::Index
           latest = false
           begin
             latest = @reader.latest?
-          rescue LockError => le
+          rescue Lock::LockError => le
             sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
             latest = @reader.latest?
           end

data/lib/ferret_version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Ferret
-  VERSION = '0.10.14'
+  VERSION = '0.11.0'
 end

data/test/threading/thread_safety_read_write_test.rb ADDED Viewed

@@ -0,0 +1,76 @@
+require File.dirname(__FILE__) + "/../test_helper"
+require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
+require 'thread'
+class IndexThreadSafetyReadWriteTest < Test::Unit::TestCase
+  include Ferret::Index
+  include Ferret::Document
+  INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
+  ITERATIONS = 10000
+  ANALYZER = Ferret::Analysis::Analyzer.new()
+  def setup
+    @index = Index.new(:path => 'index2',
+                       :create => true,
+                       :analyzer => ANALYZER,
+                       :default_field => 'contents')
+  end
+  def search_thread()
+    ITERATIONS.times do
+      do_search()
+      sleep(rand(1))
+    end
+  rescue => e
+    puts e
+    puts e.backtrace
+    @index = nil
+    raise e
+  end
+  def index_thread()
+    ITERATIONS.times do
+      do_add_doc()
+      sleep(rand(1))
+    end
+  rescue => e
+    puts e
+    puts e.backtrace
+    @index = nil
+    raise e
+  end
+  def do_add_doc
+    d = Document.new()
+    n = rand(0xFFFFFFFF)
+    d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
+    d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
+    puts("Adding #{n}")
+    begin
+      @index << d
+    rescue => e
+      puts e
+      puts e.backtrace
+      @index = nil
+      raise e
+    end
+  end
+  def do_search
+    n = rand(0xFFFFFFFF)
+    puts("Searching for #{n}")
+    hits = @index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
+      puts "Hit for #{n}: #{@index[d]["id"]} - #{s}"
+    end
+    puts("Searched for #{n}: total = #{hits}")
+  end
+  def test_threading
+    threads = []
+    threads << Thread.new { search_thread }
+    threads << Thread.new { index_thread }
+    threads.each { |t| t.join }
+  end
+end

data/test/threading/thread_safety_test.rb CHANGED Viewed

@@ -1,20 +1,22 @@
 require File.dirname(__FILE__) + "/../test_helper"
-require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
+require File.join(File.dirname(__FILE__), "number_to_spoken.rb")
 require 'thread'
 class ThreadSafetyTest
   include Ferret::Index
   include Ferret::Search
   include Ferret::Store
-  include Ferret::Document
+  include Ferret
   def initialize(options)
     @options = options
   end
   INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
-  ANALYZER = Ferret::Analysis::Analyzer.new()
-  ITERATIONS = 19
+  ANALYZER = Ferret::Analysis::WhiteSpaceAnalyzer.new()
+  ITERATIONS = 1000
+  QUERY_PARSER = Ferret::QueryParser.new(:analyzer => ANALYZER,
+                                         :default_field => 'contents')
   @@searcher = nil
   def run_index_thread(writer)
@@ -23,10 +25,8 @@ class ThreadSafetyTest
     use_compound_file = false
     (400*ITERATIONS).times do |i|
-      d = Document.new()
       n = rand(0xFFFFFFFF)
-      d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
-      d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
+      d = {:id => n.to_s, :contents => n.to_spoken}
       puts("Adding #{n}")
       # Switch between single and multiple file segments
@@ -37,7 +37,7 @@ class ThreadSafetyTest
       if (i % reopen_interval == 0)
         writer.close()
-        writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER)
+        writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER)
       end
     end
@@ -52,17 +52,17 @@ class ThreadSafetyTest
     reopen_interval = 10 + rand(20)
     unless use_global
-      searcher = IndexSearcher.new(INDEX_DIR)
+      searcher = Searcher.new(INDEX_DIR)
     end
     (50*ITERATIONS).times do |i|
       search_for(rand(0xFFFFFFFF), (searcher.nil? ? @@searcher : searcher))
       if (i%reopen_interval == 0)
         if (searcher == nil)
-          @@searcher = IndexSearcher.new(INDEX_DIR)
+          @@searcher = Searcher.new(INDEX_DIR)
         else
           searcher.close()
-          searcher = IndexSearcher.new(INDEX_DIR)
+          searcher = Searcher.new(INDEX_DIR)
         end
       end
     end
@@ -74,30 +74,26 @@ class ThreadSafetyTest
   def search_for(n, searcher)
     puts("Searching for #{n}")
-    hits =
-      searcher.search(Ferret::QueryParser.parse(n.to_spoken, "contents", :analyzer => ANALYZER),
-                      :num_docs => 3)
-    puts("Search for #{n}: total = #{hits.size}")
-    hits.each do |d, s|
-      puts "Hit for #{n}: #{searcher.reader.get_document(d)["id"]} - #{s}"
+    topdocs = searcher.search(QUERY_PARSER.parse(n.to_spoken), :limit => 3)
+    puts("Search for #{n}: total = #{topdocs.total_hits}")
+    topdocs.hits.each do |hit|
+      puts "Hit for #{n}: #{searcher.reader[hit.doc]["id"]} - #{hit.score}"
     end
   end
   def run_test_threads
     threads = []
     unless @options[:read_only]
-      writer = IndexWriter.new(INDEX_DIR, :analyzer => ANALYZER,
+      writer = IndexWriter.new(:path => INDEX_DIR, :analyzer => ANALYZER,
                                :create => !@options[:add])
       threads << Thread.new { run_index_thread(writer) }
       sleep(1)
     end
     threads << Thread.new { run_search_thread(false)}
-    @@searcher = IndexSearcher.new(INDEX_DIR)
+    @@searcher = Searcher.new(INDEX_DIR)
     threads << Thread.new { run_search_thread(true)}
     threads << Thread.new { run_search_thread(true)}