RubyGems - ferret - Versions diffs - 0.10.4 → 0.10.5 - Mend

ferret 0.10.4 → 0.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

data/Rakefile +1 -1
data/ext/analysis.c +7 -1
data/ext/bitvector.c +5 -2
data/ext/bitvector.h +1 -0
data/ext/ferret.c +55 -8
data/ext/ferret.h +8 -2
data/ext/index.c +34 -43
data/ext/index.h +1 -1
data/ext/q_boolean.c +1 -1
data/ext/q_multi_term.c +13 -1
data/ext/q_parser.c +33 -18
data/ext/r_analysis.c +68 -45
data/ext/r_index.c +64 -10
data/ext/r_search.c +145 -10
data/ext/search.c +71 -12
data/lib/ferret/index.rb +42 -28
data/lib/ferret_version.rb +1 -1
data/test/unit/analysis/tc_analyzer.rb +1 -1
data/test/unit/analysis/tc_token_stream.rb +0 -1
data/test/unit/index/tc_index.rb +3 -3
data/test/unit/index/tc_index_reader.rb +5 -0
data/test/unit/search/tc_filter.rb +15 -0
data/test/unit/search/tm_searcher.rb +13 -2
metadata +2 -2

data/lib/ferret_version.rb CHANGED

@@ -1,3 +1,3 @@
 module Ferret
-  VERSION = '0.10.4'
+  VERSION = '0.10.5'
 end

data/test/unit/analysis/tc_analyzer.rb CHANGED

@@ -246,7 +246,7 @@ class AsciiStandardAnalyzerTest < Test::Unit::TestCase
     assert_equal(Token.new('tnt', 86, 91), t2.next)
     assert_equal(Token.new('123-1235-asd-1234', 93, 110), t2.next)
     assert(! t2.next())
-    a = AsciiStandardAnalyzer.new(false)
+    a = AsciiStandardAnalyzer.new(ENGLISH_STOP_WORDS, false)
     t = a.token_stream("fieldname", input)
     t2 = a.token_stream("fieldname", input)
     assert_equal(Token.new('DBalmain@gmail.com', 0, 18), t.next)

data/test/unit/analysis/tc_token_stream.rb CHANGED

@@ -486,7 +486,6 @@ class CustomTokenizerTest < Test::Unit::TestCase
     assert(! t.next())
     t = AsciiLowerCaseFilter.new(MyCSVTokenizer.new(input))
     assert_equal(Token.new("first field", 0, 11), t.next)
-    return
     assert_equal(Token.new("2nd field", 12, 21), t.next)
     assert_equal(Token.new("  p a d d e d  f i e l d  ", 22, 48), t.next)
     assert(! t.next())

data/test/unit/index/tc_index.rb CHANGED

@@ -753,14 +753,14 @@ class IndexTest < Test::Unit::TestCase
                                  :excerpt_length => 10,
                                  :num_excerpts => 1)
     assert_equal(1, highlights.size)
-    assert_equal("...<b>for are one and two</b>...", highlights[0])
+    assert_equal("<b>the words</b>...", highlights[0])
     highlights = index.highlight('"the words" "for are one and two" ' +
                                  'words one two', 0,
-                                 :excerpt_length => 10,
+                                 :excerpt_length => 20,
                                  :num_excerpts => 2)
     assert_equal(2, highlights.size)
-    assert_equal("<b>the words</b>...", highlights[0])
+    assert_equal("<b>the words</b> we are...", highlights[0])
     assert_equal("...<b>for are one and two</b>...", highlights[1])

data/test/unit/index/tc_index_reader.rb CHANGED

@@ -198,6 +198,11 @@ module IndexReaderCommon
     assert_equal(4, doc.size)
     [:author, :body, :title, :year].each {|fn| assert(doc.keys.include?(fn))}
+    assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0, 3].collect {|d| d.load})
+    assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61, 100].collect {|d| d.load})
+    assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0..2].collect {|d| d.load})
+    assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61..100].collect {|d| d.load})
+    assert_equal(@ir[-60], @ir[4])
   end
   def test_ir_norms()

data/test/unit/search/tc_filter.rb CHANGED

@@ -98,4 +98,19 @@ class FilterTest < Test::Unit::TestCase
     rf2 = RangeFilter.new(:int, :>= => "3")
     do_test_top_docs(searcher, query, [4,6], rf2)
   end
+  class CustomFilter
+    def bits(ir)
+      bv = Ferret::Utils::BitVector.new
+      bv[0] = bv[2] = bv[4] = true
+      bv
+    end
+  end
+  def test_custom_filter
+    searcher = Searcher.new(@dir)
+    q = MatchAllQuery.new
+    filt = CustomFilter.new
+    do_test_top_docs(searcher, q, [0, 2, 4], filt)
+  end
 end

data/test/unit/search/tm_searcher.rb CHANGED

@@ -23,6 +23,8 @@ module SearcherTests
     assert_equal(10, top_docs.hits.size)
     top_docs = @searcher.search(tq, {:limit => 20})
     assert_equal(@searcher.max_doc, top_docs.hits.size)
+    assert_equal([Ferret::Term.new(:field, "word1")], tq.terms(@searcher))
   end
   def check_docs(query, options, expected=[])
@@ -79,6 +81,12 @@ module SearcherTests
     assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
     mtq << "word3"
     assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
+    terms = mtq.terms(@searcher)
+    assert(terms.index(Ferret::Term.new(:field, "brown")))
+    assert(terms.index(Ferret::Term.new(:field, "word1")))
+    assert(terms.index(Ferret::Term.new(:field, "word2")))
+    assert(terms.index(Ferret::Term.new(:field, "fast")))
   end
   def test_boolean_query
@@ -199,6 +207,9 @@ module SearcherTests
     wq = WildcardQuery.new(:category, "cat1*/su??ub2")
     check_hits(wq, [4, 16])
+    wq = WildcardQuery.new(:category, "*/sub2*")
+    check_hits(wq, [3, 4, 13, 15])
   end
   def test_multi_phrase_query()
@@ -327,13 +338,13 @@ module SearcherTests
                                     :excerpt_length => 10,
                                     :num_excerpts => 1)
     assert_equal(1, highlights.size)
-    assert_equal("...<b>for are one and two</b>...", highlights[0])
+    assert_equal("<b>the words</b>...", highlights[0])
     highlights = searcher.highlight(q, 0, :field,
                                     :excerpt_length => 10,
                                     :num_excerpts => 2)
     assert_equal(2, highlights.size)
     assert_equal("<b>the words</b>...", highlights[0])
-    assert_equal("...<b>for are one and two</b>...", highlights[1])
+    assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
   end
 end

metadata CHANGED

@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
 specification_version: 1
 name: ferret
 version: !ruby/object:Gem::Version
-  version: 0.10.4
-date: 2006-09-09 00:00:00 +09:00
+  version: 0.10.5
+date: 2006-09-19 00:00:00 +09:00
 summary: Ruby indexing library.
 require_paths:
 - lib