RubyGems - ferret - Versions diffs - 0.3.1 → 0.3.2 - Mend

ferret 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

data/lib/ferret.rb +1 -1
data/lib/ferret/search/field_cache.rb +3 -3
data/lib/ferret/search/field_sorted_hit_queue.rb +1 -1
data/lib/ferret/search/fuzzy_term_enum.rb +1 -4
data/lib/ferret/search/multi_phrase_query.rb +8 -4
data/lib/ferret/search/score_doc_comparator.rb +3 -3
data/lib/ferret/search/sort.rb +1 -0
data/lib/ferret/search/sort_comparator.rb +2 -2
data/lib/ferret/search/sort_field.rb +2 -2
data/test/unit/index/tc_index.rb +38 -0
data/test/unit/query_parser/tc_query_parser.rb +1 -0
data/test/unit/search/tc_fuzzy_query.rb +3 -0
data/test/unit/search/tc_index_searcher.rb +17 -3
data/test/unit/search/tc_search_and_sort.rb +13 -12
data/test/unit/search/tc_sort.rb +2 -2
data/test/unit/search/tc_sort_field.rb +1 -1
metadata +2 -2

data/lib/ferret.rb CHANGED Viewed

@@ -22,7 +22,7 @@
 #++
 # :include: ../TUTORIAL
 module Ferret
-  VERSION = '0.3.1'
+  VERSION = '0.3.2'
 end
 require 'ferret/utils'

data/lib/ferret/search/field_cache.rb CHANGED Viewed

@@ -85,7 +85,7 @@ module Ferret::Search
               term = term_enum.term
               break if (term.field != field)
               termval = parser.call(term.text)
-              term_docs.seek(term_enum)
+              term_docs.seek(term)
               while term_docs.next?
                 index[term_docs.doc] = termval
               end
@@ -141,7 +141,7 @@ module Ferret::Search
               end
               str_map[t] = term.text
-              term_docs.seek(term_enum)
+              term_docs.seek(term)
               while term_docs.next?
                 str_index[term_docs.doc] = t
               end
@@ -192,7 +192,7 @@ module Ferret::Search
             termtext = term.text.strip
             if (termtext == termtext.to_i.to_s)
-              index = get_index(reader, field, SortField::SortType::INT)
+              index = get_index(reader, field, SortField::SortType::INTEGER)
             elsif (termtext == termtext.to_f.to_s or termtext == "%f"%termtext.to_f)
               index = get_index(reader, field, SortField::SortType::FLOAT)
             else

data/lib/ferret/search/field_sorted_hit_queue.rb CHANGED Viewed

@@ -173,7 +173,7 @@ module Ferret::Search
       if (index.is_a?(FieldCache::StringIndex))
         return StringFieldComparator.new(index)
       elsif (index[0].is_a?(Integer))
-        return SimpleFieldComparator.new(index, SortField::SortType::INT)
+        return SimpleFieldComparator.new(index, SortField::SortType::INTEGER)
       elsif (index[0].is_a?(Float))
         return SimpleFieldComparator.new(index, SortField::SortType::FLOAT)
       else

data/lib/ferret/search/fuzzy_term_enum.rb CHANGED Viewed

@@ -231,10 +231,7 @@ module Ferret::Search
     # m:: the length of the "other value"
     # returns:: the maximum levenshtein distance that we care about
     def max_distance(m)
-      if (m >= @max_distances.length)
-        @max_distances[m] = calculate_max_distance(m)
-      end
-      return @max_distances[m]
+      return @max_distances[m] ||= calculate_max_distance(m)
     end
     def initialize_max_distances()

data/lib/ferret/search/multi_phrase_query.rb CHANGED Viewed

@@ -47,8 +47,12 @@ module Ferret::Search
         end
       end
-      @term_arrays << terms
-      @positions << position
+      if i = @positions.index(position)
+        term_arrays[i] += terms
+      else
+        @term_arrays << terms
+        @positions << position
+      end
     end
     alias :<< :add
@@ -167,10 +171,10 @@ module Ferret::Search
         terms = @term_arrays[0]
         bq = BooleanQuery.new(true)
         terms.each do |term|
-          bq.add(TermQuery.new(term), BooleanClause::Occur::SHOULD)
+          bq.add_query(TermQuery.new(term), BooleanClause::Occur::SHOULD)
         end
         bq.boost = boost()
-        return boq
+        return bq
       else
         return self
       end

data/lib/ferret/search/score_doc_comparator.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Ferret::Search
     RELEVANCE = ScoreDocComparator.new()
     class <<RELEVANCE
       def compare(i, j)
-        return -(i.score <=> j.score)
+        return j.score <=> i.score
       end
       def sort_value(i)
         return i.score
@@ -74,7 +74,7 @@ module Ferret::Search
       @sort_type = sort_type
     end
-    def compare(j, i)
+    def compare(i, j)
       return @index[i.doc] <=> @index[j.doc]
     end
     def sort_value(i)
@@ -90,7 +90,7 @@ module Ferret::Search
       super(index, sort_type)
       @comparator = comparator
     end
-    def compare(j, i)
+    def compare(i, j)
       return @comparator.call(@index[i.doc], @index[j.doc])
     end
   end

data/lib/ferret/search/sort.rb CHANGED Viewed

@@ -81,6 +81,7 @@ module Ferret::Search
                    reverse = false)
       fields = [fields] unless fields.is_a?(Array)
       @fields = fields
+      fields = fields.map {|field| field.is_a?(Symbol) ? field.to_s : field}
       if fields[0].is_a?(String)
         @fields = fields.map do |field|
           SortField.new(field, {:sort_type => SortField::SortType::AUTO,

data/lib/ferret/search/sort_comparator.rb CHANGED Viewed

@@ -2,8 +2,8 @@ module Ferret::Search
   # Abstract base class for sorting hits returned by a Query.
   #
   # This class should only be used if the other SortField types (SCORE, DOC,
-  # STRING, INT, FLOAT) do not provide an adequate sorting.  It maintains an
-  # internal cache of values which could be quite large.  The cache is an
+  # STRING, INTEGER, FLOAT) do not provide an adequate sorting.  It maintains
+  # an internal cache of values which could be quite large.  The cache is an
   # array of Comparable, one for each document in the index.  There is a
   # distinct Comparable for each unique term in the field - if some documents
   # have the same term in the field, the cache array will have entries which

data/lib/ferret/search/sort_field.rb CHANGED Viewed

@@ -37,7 +37,7 @@ module Ferret::Search
       # Sort using term values as encoded Integers.  Sort values are Integer
       # and lower values are at the front.
-      INT = SortType.new("int", lambda{|str| str.to_i})
+      INTEGER = SortType.new("int", lambda{|str| str.to_i})
       # Sort using term values as encoded Floats.  Sort values are Float and
       # lower values are at the front.
@@ -62,7 +62,7 @@ module Ferret::Search
     # comparator:: a proc used to compare two values from the index. You can
     #    also give this value to the SortType object that you pass.
     def initialize(name = nil, args= {})
-      @name = name
+      @name = name.to_s if name
       @sort_type = args[:sort_type]||SortType::AUTO
       @reverse = args[:reverse]||false
       @comparator = args[:comparator]||@sort_type.comparator

data/test/unit/index/tc_index.rb CHANGED Viewed

@@ -3,6 +3,7 @@ require File.dirname(__FILE__) + "/../../test_helper"
 class IndexTest < Test::Unit::TestCase
   include Ferret::Index
+  include Ferret::Search
   include Ferret::Analysis
   include Ferret::Store
   include Ferret::Document
@@ -448,6 +449,43 @@ class IndexTest < Test::Unit::TestCase
     index.close
   end
+  def test_sortby_date
+    data = [
+      {:content => "one", :date => "20051023"},
+      {:content => "two", :date => "19530315"},
+      {:content => "three four", :date => "19390912"},
+      {:content => "one", :date => "19770905"},
+      {:content => "two", :date => "19810831"},
+      {:content => "three", :date => "19790531"},
+      {:content => "one", :date => "19770725"},
+      {:content => "two", :date => "19751226"},
+      {:content => "three", :date => "19390912"}
+    ]
+    index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
+    data.each { |doc|
+      document = Document.new
+      doc.each_pair do |key, value|
+        document << Field.new(key.to_s, value, Field::Store::YES, Field::Index::TOKENIZED)
+      end
+      index << document
+    }
+    sf_date = SortField.new("date", {:sort_type => SortField::SortType::INTEGER})
+    #top_docs = index.search("one", :sort => [sf_date, SortField::FIELD_SCORE])
+    top_docs = index.search("one", :sort => Sort.new("date"))
+    assert_equal(3, top_docs.size)
+    assert_equal("19770725", index[top_docs.score_docs[0].doc][:date])
+    assert_equal("19770905", index[top_docs.score_docs[1].doc][:date])
+    assert_equal("20051023", index[top_docs.score_docs[2].doc][:date])
+    top_docs = index.search("one two three four",
+                            :sort => [sf_date, SortField::FIELD_SCORE])
+    assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
+    assert_equal("three four", index[top_docs.score_docs[0].doc][:content])
+    assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
+    assert_equal("three", index[top_docs.score_docs[1].doc][:content])
+    assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
+    index.close
+  end
   def test_auto_flush
     fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
     Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}

data/test/unit/query_parser/tc_query_parser.rb CHANGED Viewed

@@ -17,6 +17,7 @@ class QueryParserTest < Test::Unit::TestCase
       ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
       ['field:"one <> <> <> three|four|five <>"', 'field:"one <> <> <> three|four|five"'],
       ['field:"one|two three|four|five six|seven"', 'field:"one|two three|four|five six|seven"'],
+      ['contents:"testing|trucks"', 'contents:"testing|trucks"'],
       ['[aaa bbb]', '[aaa bbb]'],
       ['{aaa bbb]', '{aaa bbb]'],
       ['field:[aaa bbb}', 'field:[aaa bbb}'],

data/test/unit/search/tc_fuzzy_query.rb CHANGED Viewed

@@ -47,6 +47,8 @@ class FuzzyQueryTest < Test::Unit::TestCase
     add_doc("abbbb", iw)
     add_doc("bbbbb", iw)
     add_doc("ddddd", iw)
+    add_doc("ddddddddddddddddddddd", iw) # test max_distances problem
+    add_doc("aaaaaaaaaaaaaaaaaaaaaaa", iw) # test max_distances problem
     #iw.optimize()
     iw.close()
@@ -55,6 +57,7 @@ class FuzzyQueryTest < Test::Unit::TestCase
     fq = FuzzyQuery.new(Term.new("field", "aaaaa"), FuzzyQuery.default_min_similarity, 5)
+    do_prefix_test(is, "aaaaaaaaaaaaaaaaaaaaaa", 1, [8])
     do_prefix_test(is, "aaaaa", 0, [0,1,2])
     do_prefix_test(is, "aaaaa", 1, [0,1,2])
     do_prefix_test(is, "aaaaa", 2, [0,1,2])

data/test/unit/search/tc_index_searcher.rb CHANGED Viewed

@@ -139,9 +139,6 @@ class IndexSearcherTest < Test::Unit::TestCase
     pq << t1 << t2 << t3
     check_hits(pq, [1])
-    pq.slop = 4
-    check_hits(pq, [1,16,17])
     pq = PhraseQuery.new()
     pq << t1
     pq.add(t3, 2)
@@ -154,6 +151,23 @@ class IndexSearcherTest < Test::Unit::TestCase
     check_hits(pq, [1,11,14,16,17])
   end
+  def test_multi_phrase_query()
+    pq = MultiPhraseQuery.new()
+    t1 = Term.new("field", "quick")
+    t2 = Term.new("field", "brown")
+    t3 = Term.new("field", "fox")
+    pq << t1
+    pq << t2
+    pq << t3
+    check_hits(pq, [1])
+    t1b = Term.new("field", "fast")
+    pq.add(t1b, 0)
+    check_hits(pq, [1, 8])
+  end
   def test_range_query()
     rq = RangeQuery.new("date", "20051006", "20051010", true, true)
     check_hits(rq, [6,7,8,9,10])

data/test/unit/search/tc_search_and_sort.rb CHANGED Viewed

@@ -18,7 +18,7 @@ class SearchAndSortTest < Test::Unit::TestCase
   def setup()
     @dir = RAMDirectory.new()
-    iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
+    iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :min_merge_docs => 3)
     docs = [                                                             # len mod
       {"search"=>"findall","string"=>"a","int"=>"6","float"=>"0.01"},    #  4   0
       {"search"=>"findall","string"=>"c","int"=>"5","float"=>"0.1"},     #  3   3
@@ -56,16 +56,16 @@ class SearchAndSortTest < Test::Unit::TestCase
     do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], [SortField::FIELD_DOC])
     ## int
-    sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT})
+    sf_int = SortField.new("int", {:sort_type => SortField::SortType::INTEGER, :reverse => true})
     do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], [sf_int])
     do_test_top_docs(is, q, [0,1,6,5,9,8,4,7,2,3], [sf_int, SortField::FIELD_SCORE])
-    sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT, :reverse => true})
+    sf_int = SortField.new("int", {:sort_type => SortField::SortType::INTEGER})
     do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], [sf_int])
     ## float
-    sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
-    do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
     sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT, :reverse => true})
+    do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
+    sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
     do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new([sf_float, SortField::FIELD_SCORE]))
     ## str
@@ -74,11 +74,11 @@ class SearchAndSortTest < Test::Unit::TestCase
     ## auto
     do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], Sort.new("string"))
-    do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], Sort.new(["int"]))
-    do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float"))
-    do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float", true))
-    do_test_top_docs(is, q, [0,1,6,9,5,8,4,2,7,3], Sort.new(["int", "string"]))
-    do_test_top_docs(is, q, [3,7,2,4,8,5,9,6,1,0], Sort.new(["int", "string"], true))
+    do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], Sort.new(["int"]))
+    do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float"))
+    do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float", true))
+    do_test_top_docs(is, q, [0,6,1,5,9,4,8,7,2,3], Sort.new(["int", "string"], true))
+    do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], Sort.new(["int", "string"]))
   end
   LENGTH = SortField::SortType.new("length", lambda{|str| str.length})
@@ -87,11 +87,12 @@ class SearchAndSortTest < Test::Unit::TestCase
   def test_special_sorts
     is = IndexSearcher.new(@dir)
     q = TermQuery.new(Term.new("search", "findall"))
-    sf = SortField.new("float", {:sort_type => LENGTH})
+    sf = SortField.new("float", {:sort_type => LENGTH, :reverse => true})
     do_test_top_docs(is, q, [9,6,4,8,2,7,0,5,1,3], [sf])
-    sf = SortField.new("float", {:sort_type => LENGTH_MODULO})
+    sf = SortField.new("float", {:sort_type => LENGTH_MODULO, :reverse => true})
     do_test_top_docs(is, q, [1,3,6,4,8,2,7,0,5,9], [sf])
     sf = SortField.new("float", {:sort_type => LENGTH,
+                                 :reverse => true,
                                  :comparator => lambda{|i,j| (j%4) <=> (i%4)}})
     do_test_top_docs(is, q, [0,5,9,2,7,4,8,1,3,6], [sf])
   end

data/test/unit/search/tc_sort.rb CHANGED Viewed

@@ -32,14 +32,14 @@ class SortTest < Test::Unit::TestCase
   end
   def test_multi_fields()
-    sf1 = SortField.new("field", {:sort_type => SortField::SortType::INT,
+    sf1 = SortField.new("field", {:sort_type => SortField::SortType::INTEGER,
                                   :reverse => true})
     sf2 = SortField::FIELD_SCORE
     sf3 = SortField::FIELD_DOC
     s = Sort.new([sf1, sf2, sf3])
     assert_equal(3, s.fields.size)
-    assert_equal(SortField::SortType::INT, s.fields[0].sort_type)
+    assert_equal(SortField::SortType::INTEGER, s.fields[0].sort_type)
     assert_equal("field", s.fields[0].name)
     assert(s.fields[0].reverse?)
     assert_equal(SortField::FIELD_SCORE, s.fields[1])

data/test/unit/search/tc_sort_field.rb CHANGED Viewed

@@ -21,7 +21,7 @@ class SortFieldTest < Test::Unit::TestCase
   def test_error_raised()
     assert_raise(ArgumentError) {
-      fs = SortField.new(nil, {:sort_type => SortField::SortType::INT})
+      fs = SortField.new(nil, {:sort_type => SortField::SortType::INTEGER})
     }
   end
 end

metadata CHANGED Viewed

@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
 specification_version: 1
 name: ferret
 version: !ruby/object:Gem::Version
-  version: 0.3.1
-date: 2005-12-08 00:00:00 +09:00
+  version: 0.3.2
+date: 2005-12-16 00:00:00 +09:00
 summary: Ruby indexing library.
 require_paths:
   - lib