RubyGems - ferret - Versions diffs - 0.10.11 → 0.10.12 - Mend

ferret 0.10.11 → 0.10.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

data/CHANGELOG +13 -0
data/Rakefile +1 -1
data/ext/analysis.c +62 -11
data/ext/analysis.h +11 -0
data/ext/bitvector.c +29 -18
data/ext/{defines.h → config.h} +0 -0
data/ext/except.h +1 -1
data/ext/extconf.rb +2 -1
data/ext/fs_store.c +4 -2
data/ext/global.h +1 -1
data/ext/hash.c +15 -12
data/ext/hash.h +1 -0
data/ext/helper.c +2 -2
data/ext/helper.h +1 -1
data/ext/index.c +4 -2
data/ext/index.h +2 -2
data/ext/{mem_pool.c → mempool.c} +1 -1
data/ext/{mem_pool.h → mempool.h} +0 -0
data/ext/multimapper.c +310 -0
data/ext/multimapper.h +51 -0
data/ext/r_analysis.c +200 -22
data/ext/r_search.c +125 -15
data/ext/search.c +1 -1
data/ext/sort.c +1 -1
data/ext/stopwords.c +2 -3
data/lib/ferret/index.rb +2 -1
data/lib/ferret_version.rb +1 -1
data/test/unit/analysis/tc_token_stream.rb +62 -0
data/test/unit/index/tc_index.rb +19 -1
data/test/unit/search/tc_search_and_sort.rb +1 -1
data/test/unit/utils/tc_bit_vector.rb +7 -0
metadata +9 -7

data/ext/search.c CHANGED Viewed

@@ -1690,8 +1690,8 @@ static void msea_close(Searcher *self)
             s = msea->searchers[i];
             s->close(s);
         }
-        free(msea->searchers);
     }
+    free(msea->searchers);
     free(msea->starts);
     free(self);
 }

data/ext/sort.c CHANGED Viewed

@@ -538,7 +538,7 @@ SortField *sort_field_auto_new(char *field, bool reverse)
 void *field_cache_get_index(IndexReader *ir, SortField *sf)
 {
-    void *index = NULL;
+    void *volatile index = NULL;
     int length = 0;
     TermEnum *volatile te = NULL;
     TermDocEnum *volatile tde = NULL;

data/ext/stopwords.c CHANGED Viewed

@@ -10,10 +10,9 @@
 const char *ENGLISH_STOP_WORDS[] = {
     "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in",
-    "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t",
-        "that",
+    "into", "is", "it", "no", "not", "of", "on", "or", "such", "that",
     "the", "their", "then", "there", "these", "they", "this", "to", "was",
-    "will", "with", NULL
+    "with", NULL
 };
 const char *FULL_ENGLISH_STOP_WORDS[] = {

data/lib/ferret/index.rb CHANGED Viewed

@@ -527,6 +527,7 @@ module Ferret::Index
         @searcher = nil
       end
     end
+    alias :commit :flush
     # optimizes the index. This should only be called when the index will no
     # longer be updated very often, but will be read a lot.
@@ -670,7 +671,7 @@ module Ferret::Index
           latest = false
           begin
             latest = @reader.latest?
-          rescue LockException => le
+          rescue LockError => le
             sleep(@options[:lock_retry_time]) # sleep for 2 seconds and try again
             latest = @reader.latest?
           end

data/lib/ferret_version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Ferret
-  VERSION = '0.10.11'
+  VERSION = '0.10.12'
 end

data/test/unit/analysis/tc_token_stream.rb CHANGED Viewed

@@ -368,6 +368,68 @@ class RegExpTokenizerTest < Test::Unit::TestCase
   end
 end
+class MappingFilterTest < Test::Unit::TestCase
+  include Ferret::Analysis
+  def test_mapping_filter()
+    mapping = {
+      ['à','á','â','ã','ä','å','ā','ă']         => 'a',
+      'æ'                                       => 'ae',
+      ['ď','đ']                                 => 'd',
+      ['ç','ć','č','ĉ','ċ']                     => 'c',
+      ['è','é','ê','ë','ē','ę','ě','ĕ','ė',]    => 'e',
+      ['ƒ']                                     => 'f',
+      ['ĝ','ğ','ġ','ģ']                         => 'g',
+      ['ĥ','ħ']                                 => 'h',
+      ['ì','ì','í','î','ï','ī','ĩ','ĭ']         => 'i',
+      ['į','ı','ĳ','ĵ']                         => 'j',
+      ['ķ','ĸ']                                 => 'k',
+      ['ł','ľ','ĺ','ļ','ŀ']                     => 'l',
+      ['ñ','ń','ň','ņ','ŉ','ŋ']                 => 'n',
+      ['ò','ó','ô','õ','ö','ø','ō','ő','ŏ','ŏ'] => 'o',
+      'œ'                                       => 'oek',
+      'ą'                                       => 'q',
+      ['ŕ','ř','ŗ']                             => 'r',
+      ['ś','š','ş','ŝ','ș']                     => 's',
+      ['ť','ţ','ŧ','ț']                         => 't',
+      ['ù','ú','û','ü','ū','ů','ű','ŭ','ũ','ų'] => 'u',
+      'ŵ'                                       => 'w',
+      ['ý','ÿ','ŷ']                             => 'y',
+      ['ž','ż','ź']                             => 'z'
+    }
+    input = <<END
+aàáâãäåāăb cæd eďđf gçćčĉċh ièéêëēęěĕėj kƒl mĝğġģn oĥħp qììíîïīĩĭr sįıĳĵt uķĸv
+włľĺļŀx yñńňņŉŋz aòóôõöøōőŏŏb cœd eąf gŕřŗh iśšşŝșj kťţŧțl mùúûüūůűŭũųn oŵp
+qýÿŷr sžżźt
+END
+    t = MappingFilter.new(LetterTokenizer.new(input), mapping)
+    assert_equal(Token.new('aaaaaaaaab', 0, 18), t.next)
+    assert_equal(Token.new('caed', 19, 23), t.next)
+    assert_equal(Token.new('eddf', 24, 30), t.next)
+    assert_equal(Token.new('gccccch', 31, 43), t.next)
+    assert_equal(Token.new('ieeeeeeeeej', 44, 64), t.next)
+    assert_equal(Token.new('kfl', 65, 69), t.next)
+    assert_equal(Token.new('mggggn', 70, 80), t.next)
+    assert_equal(Token.new('ohhp', 81, 87), t.next)
+    assert_equal(Token.new('qiiiiiiiir', 88, 106), t.next)
+    assert_equal(Token.new('sjjjjt', 107, 117), t.next)
+    assert_equal(Token.new('ukkv', 118, 124), t.next)
+    assert_equal(Token.new('wlllllx', 125, 137), t.next)
+    assert_equal(Token.new('ynnnnnnz', 138, 152), t.next)
+    assert_equal(Token.new('aoooooooooob', 153, 175), t.next)
+    assert_equal(Token.new('coekd', 176, 180), t.next)
+    assert_equal(Token.new('eqf', 181, 185), t.next)
+    assert_equal(Token.new('grrrh', 186, 194), t.next)
+    assert_equal(Token.new('isssssj', 195, 207), t.next)
+    assert_equal(Token.new('kttttl', 208, 218), t.next)
+    assert_equal(Token.new('muuuuuuuuuun', 219, 241), t.next)
+    assert_equal(Token.new('owp', 242, 246), t.next)
+    assert_equal(Token.new('qyyyr', 247, 255), t.next)
+    assert_equal(Token.new('szzzt', 256, 264), t.next)
+    assert(! t.next())
+  end
+end if (/mswin/i !~ RUBY_PLATFORM)
 class StopFilterTest < Test::Unit::TestCase
   include Ferret::Analysis

data/test/unit/index/tc_index.rb CHANGED Viewed

@@ -525,7 +525,7 @@ class IndexTest < Test::Unit::TestCase
       {:content => "four", :date => "19390912"}
     ].each {|doc| index << doc}
-    sf_date = SortField.new("date", {:sort_type => :integer})
+    sf_date = SortField.new("date", {:type => :integer})
     #top_docs = index.search("one", :sort => [sf_date, SortField::SCORE])
     top_docs = index.search("one", :sort => Sort.new("date"))
     assert_equal(3, top_docs.total_hits)
@@ -773,5 +773,23 @@ class IndexTest < Test::Unit::TestCase
     index.add_document({:content => "Content With Capitals"}, a)
     tv = index.reader.term_vector(0, :content)
     assert_equal("Capitals", tv.terms[0].text)
+    index.close
+  end
+  def test_top_doc_to_json
+    index = Ferret::I.new
+    [
+      {:f1 => "one"},
+      {:f2 => ["two",2,2.0]},
+      {:f3 => 3},
+      {:f4 => 4.0},
+      {:f5 => "five", :funny => '"' * 10_000}
+    ].each {|doc| index << doc}
+    json_str = index.search("one two 3 4.0 five",
+                            :sort => Ferret::Search::Sort::INDEX_ORDER).to_json
+    assert(json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"f5":"five","funny":"' + '\'"\'' * 10_000 + '"}]' ||
+           json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"funny":"' + '\'"\'' * 10_000 + '","f5":"five"}]')
+    assert_equal('[]', index.search("xxx").to_json)
+    index.close
   end
 end

data/test/unit/search/tc_search_and_sort.rb CHANGED Viewed

@@ -128,7 +128,7 @@ class SearchAndSortTest < Test::Unit::TestCase
     ## byte
     do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0],
-                     [SortField.new(:int, :type => :byte)])
+                     SortField.new(:int, :type => :byte))
     do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3],
                      [SortField.new(:int, :type => :byte, :reverse => true)])

data/test/unit/utils/tc_bit_vector.rb CHANGED Viewed

@@ -133,6 +133,13 @@ class BitVectorTest < Test::Unit::TestCase
     assert_equal(bv2, and_bv, "and_bv should be empty")
     assert_equal(0, and_bv.count)
+    bv1 = BitVector.new
+    bv2 = BitVector.new.not!
+    bv1.set(10)
+    bv1.set(11)
+    bv1.set(20)
+    assert_equal(bv1, bv1 & bv2, "bv anded with empty not bv should be same")
   end
   def test_bv_or

metadata CHANGED Viewed

@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
 specification_version: 1
 name: ferret
 version: !ruby/object:Gem::Version
-  version: 0.10.11
-date: 2006-10-11 00:00:00 +09:00
+  version: 0.10.12
+date: 2006-10-20 00:00:00 +09:00
 summary: Ruby indexing library.
 require_paths:
 - lib
@@ -55,7 +55,6 @@ files:
 - ext/stopwords.c
 - ext/array.c
 - ext/index.c
-- ext/mem_pool.c
 - ext/compound_io.c
 - ext/q_prefix.c
 - ext/q_range.c
@@ -72,8 +71,10 @@ files:
 - ext/q_parser.c
 - ext/q_span.c
 - ext/term_vectors.c
+- ext/multimapper.c
+- ext/mempool.c
 - ext/priorityqueue.h
-- ext/defines.h
+- ext/mempool.h
 - ext/posh.h
 - ext/store.h
 - ext/hashset.h
@@ -89,9 +90,10 @@ files:
 - ext/win32.h
 - ext/analysis.h
 - ext/search.h
-- ext/mem_pool.h
 - ext/array.h
 - ext/lang.h
+- ext/config.h
+- ext/multimapper.h
 - ext/stem_UTF_8_norwegian.c
 - ext/stem_UTF_8_danish.c
 - ext/stem_UTF_8_dutch.c
@@ -149,10 +151,10 @@ files:
 - ext/api.h
 - ext/header.h
 - ext/libstemmer.c
-- ext/modules.h
-- ext/libstemmer.h
 - ext/ferret.h
 - ext/ferret.c
+- ext/modules.h
+- ext/libstemmer.h
 - ext/r_analysis.c
 - ext/r_utils.c
 - ext/r_store.c