ferret 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ferret.rb CHANGED
@@ -22,7 +22,7 @@
22
22
  #++
23
23
  # :include: ../TUTORIAL
24
24
  module Ferret
25
- VERSION = '0.3.1'
25
+ VERSION = '0.3.2'
26
26
  end
27
27
 
28
28
  require 'ferret/utils'
@@ -85,7 +85,7 @@ module Ferret::Search
85
85
  term = term_enum.term
86
86
  break if (term.field != field)
87
87
  termval = parser.call(term.text)
88
- term_docs.seek(term_enum)
88
+ term_docs.seek(term)
89
89
  while term_docs.next?
90
90
  index[term_docs.doc] = termval
91
91
  end
@@ -141,7 +141,7 @@ module Ferret::Search
141
141
  end
142
142
  str_map[t] = term.text
143
143
 
144
- term_docs.seek(term_enum)
144
+ term_docs.seek(term)
145
145
  while term_docs.next?
146
146
  str_index[term_docs.doc] = t
147
147
  end
@@ -192,7 +192,7 @@ module Ferret::Search
192
192
  termtext = term.text.strip
193
193
 
194
194
  if (termtext == termtext.to_i.to_s)
195
- index = get_index(reader, field, SortField::SortType::INT)
195
+ index = get_index(reader, field, SortField::SortType::INTEGER)
196
196
  elsif (termtext == termtext.to_f.to_s or termtext == "%f"%termtext.to_f)
197
197
  index = get_index(reader, field, SortField::SortType::FLOAT)
198
198
  else
@@ -173,7 +173,7 @@ module Ferret::Search
173
173
  if (index.is_a?(FieldCache::StringIndex))
174
174
  return StringFieldComparator.new(index)
175
175
  elsif (index[0].is_a?(Integer))
176
- return SimpleFieldComparator.new(index, SortField::SortType::INT)
176
+ return SimpleFieldComparator.new(index, SortField::SortType::INTEGER)
177
177
  elsif (index[0].is_a?(Float))
178
178
  return SimpleFieldComparator.new(index, SortField::SortType::FLOAT)
179
179
  else
@@ -231,10 +231,7 @@ module Ferret::Search
231
231
  # m:: the length of the "other value"
232
232
  # returns:: the maximum levenshtein distance that we care about
233
233
  def max_distance(m)
234
- if (m >= @max_distances.length)
235
- @max_distances[m] = calculate_max_distance(m)
236
- end
237
- return @max_distances[m]
234
+ return @max_distances[m] ||= calculate_max_distance(m)
238
235
  end
239
236
 
240
237
  def initialize_max_distances()
@@ -47,8 +47,12 @@ module Ferret::Search
47
47
  end
48
48
  end
49
49
 
50
- @term_arrays << terms
51
- @positions << position
50
+ if i = @positions.index(position)
51
+ term_arrays[i] += terms
52
+ else
53
+ @term_arrays << terms
54
+ @positions << position
55
+ end
52
56
  end
53
57
  alias :<< :add
54
58
 
@@ -167,10 +171,10 @@ module Ferret::Search
167
171
  terms = @term_arrays[0]
168
172
  bq = BooleanQuery.new(true)
169
173
  terms.each do |term|
170
- bq.add(TermQuery.new(term), BooleanClause::Occur::SHOULD)
174
+ bq.add_query(TermQuery.new(term), BooleanClause::Occur::SHOULD)
171
175
  end
172
176
  bq.boost = boost()
173
- return boq
177
+ return bq
174
178
  else
175
179
  return self
176
180
  end
@@ -6,7 +6,7 @@ module Ferret::Search
6
6
  RELEVANCE = ScoreDocComparator.new()
7
7
  class <<RELEVANCE
8
8
  def compare(i, j)
9
- return -(i.score <=> j.score)
9
+ return j.score <=> i.score
10
10
  end
11
11
  def sort_value(i)
12
12
  return i.score
@@ -74,7 +74,7 @@ module Ferret::Search
74
74
  @sort_type = sort_type
75
75
  end
76
76
 
77
- def compare(j, i)
77
+ def compare(i, j)
78
78
  return @index[i.doc] <=> @index[j.doc]
79
79
  end
80
80
  def sort_value(i)
@@ -90,7 +90,7 @@ module Ferret::Search
90
90
  super(index, sort_type)
91
91
  @comparator = comparator
92
92
  end
93
- def compare(j, i)
93
+ def compare(i, j)
94
94
  return @comparator.call(@index[i.doc], @index[j.doc])
95
95
  end
96
96
  end
@@ -81,6 +81,7 @@ module Ferret::Search
81
81
  reverse = false)
82
82
  fields = [fields] unless fields.is_a?(Array)
83
83
  @fields = fields
84
+ fields = fields.map {|field| field.is_a?(Symbol) ? field.to_s : field}
84
85
  if fields[0].is_a?(String)
85
86
  @fields = fields.map do |field|
86
87
  SortField.new(field, {:sort_type => SortField::SortType::AUTO,
@@ -2,8 +2,8 @@ module Ferret::Search
2
2
  # Abstract base class for sorting hits returned by a Query.
3
3
  #
4
4
  # This class should only be used if the other SortField types (SCORE, DOC,
5
- # STRING, INT, FLOAT) do not provide an adequate sorting. It maintains an
6
- # internal cache of values which could be quite large. The cache is an
5
+ # STRING, INTEGER, FLOAT) do not provide an adequate sorting. It maintains
6
+ # an internal cache of values which could be quite large. The cache is an
7
7
  # array of Comparable, one for each document in the index. There is a
8
8
  # distinct Comparable for each unique term in the field - if some documents
9
9
  # have the same term in the field, the cache array will have entries which
@@ -37,7 +37,7 @@ module Ferret::Search
37
37
 
38
38
  # Sort using term values as encoded Integers. Sort values are Integer
39
39
  # and lower values are at the front.
40
- INT = SortType.new("int", lambda{|str| str.to_i})
40
+ INTEGER = SortType.new("int", lambda{|str| str.to_i})
41
41
 
42
42
  # Sort using term values as encoded Floats. Sort values are Float and
43
43
  # lower values are at the front.
@@ -62,7 +62,7 @@ module Ferret::Search
62
62
  # comparator:: a proc used to compare two values from the index. You can
63
63
  # also give this value to the SortType object that you pass.
64
64
  def initialize(name = nil, args= {})
65
- @name = name
65
+ @name = name.to_s if name
66
66
  @sort_type = args[:sort_type]||SortType::AUTO
67
67
  @reverse = args[:reverse]||false
68
68
  @comparator = args[:comparator]||@sort_type.comparator
@@ -3,6 +3,7 @@ require File.dirname(__FILE__) + "/../../test_helper"
3
3
 
4
4
  class IndexTest < Test::Unit::TestCase
5
5
  include Ferret::Index
6
+ include Ferret::Search
6
7
  include Ferret::Analysis
7
8
  include Ferret::Store
8
9
  include Ferret::Document
@@ -448,6 +449,43 @@ class IndexTest < Test::Unit::TestCase
448
449
  index.close
449
450
  end
450
451
 
452
+ def test_sortby_date
453
+ data = [
454
+ {:content => "one", :date => "20051023"},
455
+ {:content => "two", :date => "19530315"},
456
+ {:content => "three four", :date => "19390912"},
457
+ {:content => "one", :date => "19770905"},
458
+ {:content => "two", :date => "19810831"},
459
+ {:content => "three", :date => "19790531"},
460
+ {:content => "one", :date => "19770725"},
461
+ {:content => "two", :date => "19751226"},
462
+ {:content => "three", :date => "19390912"}
463
+ ]
464
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
465
+ data.each { |doc|
466
+ document = Document.new
467
+ doc.each_pair do |key, value|
468
+ document << Field.new(key.to_s, value, Field::Store::YES, Field::Index::TOKENIZED)
469
+ end
470
+ index << document
471
+ }
472
+ sf_date = SortField.new("date", {:sort_type => SortField::SortType::INTEGER})
473
+ #top_docs = index.search("one", :sort => [sf_date, SortField::FIELD_SCORE])
474
+ top_docs = index.search("one", :sort => Sort.new("date"))
475
+ assert_equal(3, top_docs.size)
476
+ assert_equal("19770725", index[top_docs.score_docs[0].doc][:date])
477
+ assert_equal("19770905", index[top_docs.score_docs[1].doc][:date])
478
+ assert_equal("20051023", index[top_docs.score_docs[2].doc][:date])
479
+ top_docs = index.search("one two three four",
480
+ :sort => [sf_date, SortField::FIELD_SCORE])
481
+ assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
482
+ assert_equal("three four", index[top_docs.score_docs[0].doc][:content])
483
+ assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
484
+ assert_equal("three", index[top_docs.score_docs[1].doc][:content])
485
+ assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
486
+ index.close
487
+ end
488
+
451
489
  def test_auto_flush
452
490
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
453
491
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
@@ -17,6 +17,7 @@ class QueryParserTest < Test::Unit::TestCase
17
17
  ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
18
18
  ['field:"one <> <> <> three|four|five <>"', 'field:"one <> <> <> three|four|five"'],
19
19
  ['field:"one|two three|four|five six|seven"', 'field:"one|two three|four|five six|seven"'],
20
+ ['contents:"testing|trucks"', 'contents:"testing|trucks"'],
20
21
  ['[aaa bbb]', '[aaa bbb]'],
21
22
  ['{aaa bbb]', '{aaa bbb]'],
22
23
  ['field:[aaa bbb}', 'field:[aaa bbb}'],
@@ -47,6 +47,8 @@ class FuzzyQueryTest < Test::Unit::TestCase
47
47
  add_doc("abbbb", iw)
48
48
  add_doc("bbbbb", iw)
49
49
  add_doc("ddddd", iw)
50
+ add_doc("ddddddddddddddddddddd", iw) # test max_distances problem
51
+ add_doc("aaaaaaaaaaaaaaaaaaaaaaa", iw) # test max_distances problem
50
52
  #iw.optimize()
51
53
  iw.close()
52
54
 
@@ -55,6 +57,7 @@ class FuzzyQueryTest < Test::Unit::TestCase
55
57
 
56
58
  fq = FuzzyQuery.new(Term.new("field", "aaaaa"), FuzzyQuery.default_min_similarity, 5)
57
59
 
60
+ do_prefix_test(is, "aaaaaaaaaaaaaaaaaaaaaa", 1, [8])
58
61
  do_prefix_test(is, "aaaaa", 0, [0,1,2])
59
62
  do_prefix_test(is, "aaaaa", 1, [0,1,2])
60
63
  do_prefix_test(is, "aaaaa", 2, [0,1,2])
@@ -139,9 +139,6 @@ class IndexSearcherTest < Test::Unit::TestCase
139
139
  pq << t1 << t2 << t3
140
140
  check_hits(pq, [1])
141
141
 
142
- pq.slop = 4
143
- check_hits(pq, [1,16,17])
144
-
145
142
  pq = PhraseQuery.new()
146
143
  pq << t1
147
144
  pq.add(t3, 2)
@@ -154,6 +151,23 @@ class IndexSearcherTest < Test::Unit::TestCase
154
151
  check_hits(pq, [1,11,14,16,17])
155
152
  end
156
153
 
154
+ def test_multi_phrase_query()
155
+ pq = MultiPhraseQuery.new()
156
+ t1 = Term.new("field", "quick")
157
+ t2 = Term.new("field", "brown")
158
+ t3 = Term.new("field", "fox")
159
+ pq << t1
160
+ pq << t2
161
+ pq << t3
162
+ check_hits(pq, [1])
163
+
164
+ t1b = Term.new("field", "fast")
165
+ pq.add(t1b, 0)
166
+ check_hits(pq, [1, 8])
167
+ end
168
+
169
+
170
+
157
171
  def test_range_query()
158
172
  rq = RangeQuery.new("date", "20051006", "20051010", true, true)
159
173
  check_hits(rq, [6,7,8,9,10])
@@ -18,7 +18,7 @@ class SearchAndSortTest < Test::Unit::TestCase
18
18
 
19
19
  def setup()
20
20
  @dir = RAMDirectory.new()
21
- iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
21
+ iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :min_merge_docs => 3)
22
22
  docs = [ # len mod
23
23
  {"search"=>"findall","string"=>"a","int"=>"6","float"=>"0.01"}, # 4 0
24
24
  {"search"=>"findall","string"=>"c","int"=>"5","float"=>"0.1"}, # 3 3
@@ -56,16 +56,16 @@ class SearchAndSortTest < Test::Unit::TestCase
56
56
  do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], [SortField::FIELD_DOC])
57
57
 
58
58
  ## int
59
- sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT})
59
+ sf_int = SortField.new("int", {:sort_type => SortField::SortType::INTEGER, :reverse => true})
60
60
  do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], [sf_int])
61
61
  do_test_top_docs(is, q, [0,1,6,5,9,8,4,7,2,3], [sf_int, SortField::FIELD_SCORE])
62
- sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT, :reverse => true})
62
+ sf_int = SortField.new("int", {:sort_type => SortField::SortType::INTEGER})
63
63
  do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], [sf_int])
64
64
 
65
65
  ## float
66
- sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
67
- do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
68
66
  sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT, :reverse => true})
67
+ do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
68
+ sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
69
69
  do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new([sf_float, SortField::FIELD_SCORE]))
70
70
 
71
71
  ## str
@@ -74,11 +74,11 @@ class SearchAndSortTest < Test::Unit::TestCase
74
74
 
75
75
  ## auto
76
76
  do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], Sort.new("string"))
77
- do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], Sort.new(["int"]))
78
- do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float"))
79
- do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float", true))
80
- do_test_top_docs(is, q, [0,1,6,9,5,8,4,2,7,3], Sort.new(["int", "string"]))
81
- do_test_top_docs(is, q, [3,7,2,4,8,5,9,6,1,0], Sort.new(["int", "string"], true))
77
+ do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], Sort.new(["int"]))
78
+ do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float"))
79
+ do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float", true))
80
+ do_test_top_docs(is, q, [0,6,1,5,9,4,8,7,2,3], Sort.new(["int", "string"], true))
81
+ do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], Sort.new(["int", "string"]))
82
82
  end
83
83
 
84
84
  LENGTH = SortField::SortType.new("length", lambda{|str| str.length})
@@ -87,11 +87,12 @@ class SearchAndSortTest < Test::Unit::TestCase
87
87
  def test_special_sorts
88
88
  is = IndexSearcher.new(@dir)
89
89
  q = TermQuery.new(Term.new("search", "findall"))
90
- sf = SortField.new("float", {:sort_type => LENGTH})
90
+ sf = SortField.new("float", {:sort_type => LENGTH, :reverse => true})
91
91
  do_test_top_docs(is, q, [9,6,4,8,2,7,0,5,1,3], [sf])
92
- sf = SortField.new("float", {:sort_type => LENGTH_MODULO})
92
+ sf = SortField.new("float", {:sort_type => LENGTH_MODULO, :reverse => true})
93
93
  do_test_top_docs(is, q, [1,3,6,4,8,2,7,0,5,9], [sf])
94
94
  sf = SortField.new("float", {:sort_type => LENGTH,
95
+ :reverse => true,
95
96
  :comparator => lambda{|i,j| (j%4) <=> (i%4)}})
96
97
  do_test_top_docs(is, q, [0,5,9,2,7,4,8,1,3,6], [sf])
97
98
  end
@@ -32,14 +32,14 @@ class SortTest < Test::Unit::TestCase
32
32
  end
33
33
 
34
34
  def test_multi_fields()
35
- sf1 = SortField.new("field", {:sort_type => SortField::SortType::INT,
35
+ sf1 = SortField.new("field", {:sort_type => SortField::SortType::INTEGER,
36
36
  :reverse => true})
37
37
  sf2 = SortField::FIELD_SCORE
38
38
  sf3 = SortField::FIELD_DOC
39
39
  s = Sort.new([sf1, sf2, sf3])
40
40
 
41
41
  assert_equal(3, s.fields.size)
42
- assert_equal(SortField::SortType::INT, s.fields[0].sort_type)
42
+ assert_equal(SortField::SortType::INTEGER, s.fields[0].sort_type)
43
43
  assert_equal("field", s.fields[0].name)
44
44
  assert(s.fields[0].reverse?)
45
45
  assert_equal(SortField::FIELD_SCORE, s.fields[1])
@@ -21,7 +21,7 @@ class SortFieldTest < Test::Unit::TestCase
21
21
 
22
22
  def test_error_raised()
23
23
  assert_raise(ArgumentError) {
24
- fs = SortField.new(nil, {:sort_type => SortField::SortType::INT})
24
+ fs = SortField.new(nil, {:sort_type => SortField::SortType::INTEGER})
25
25
  }
26
26
  end
27
27
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.3.1
7
- date: 2005-12-08 00:00:00 +09:00
6
+ version: 0.3.2
7
+ date: 2005-12-16 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib