ferret 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ferret.rb CHANGED
@@ -22,7 +22,7 @@
22
22
  #++
23
23
  # :include: ../TUTORIAL
24
24
  module Ferret
25
- VERSION = '0.3.1'
25
+ VERSION = '0.3.2'
26
26
  end
27
27
 
28
28
  require 'ferret/utils'
@@ -85,7 +85,7 @@ module Ferret::Search
85
85
  term = term_enum.term
86
86
  break if (term.field != field)
87
87
  termval = parser.call(term.text)
88
- term_docs.seek(term_enum)
88
+ term_docs.seek(term)
89
89
  while term_docs.next?
90
90
  index[term_docs.doc] = termval
91
91
  end
@@ -141,7 +141,7 @@ module Ferret::Search
141
141
  end
142
142
  str_map[t] = term.text
143
143
 
144
- term_docs.seek(term_enum)
144
+ term_docs.seek(term)
145
145
  while term_docs.next?
146
146
  str_index[term_docs.doc] = t
147
147
  end
@@ -192,7 +192,7 @@ module Ferret::Search
192
192
  termtext = term.text.strip
193
193
 
194
194
  if (termtext == termtext.to_i.to_s)
195
- index = get_index(reader, field, SortField::SortType::INT)
195
+ index = get_index(reader, field, SortField::SortType::INTEGER)
196
196
  elsif (termtext == termtext.to_f.to_s or termtext == "%f"%termtext.to_f)
197
197
  index = get_index(reader, field, SortField::SortType::FLOAT)
198
198
  else
@@ -173,7 +173,7 @@ module Ferret::Search
173
173
  if (index.is_a?(FieldCache::StringIndex))
174
174
  return StringFieldComparator.new(index)
175
175
  elsif (index[0].is_a?(Integer))
176
- return SimpleFieldComparator.new(index, SortField::SortType::INT)
176
+ return SimpleFieldComparator.new(index, SortField::SortType::INTEGER)
177
177
  elsif (index[0].is_a?(Float))
178
178
  return SimpleFieldComparator.new(index, SortField::SortType::FLOAT)
179
179
  else
@@ -231,10 +231,7 @@ module Ferret::Search
231
231
  # m:: the length of the "other value"
232
232
  # returns:: the maximum levenshtein distance that we care about
233
233
  def max_distance(m)
234
- if (m >= @max_distances.length)
235
- @max_distances[m] = calculate_max_distance(m)
236
- end
237
- return @max_distances[m]
234
+ return @max_distances[m] ||= calculate_max_distance(m)
238
235
  end
239
236
 
240
237
  def initialize_max_distances()
@@ -47,8 +47,12 @@ module Ferret::Search
47
47
  end
48
48
  end
49
49
 
50
- @term_arrays << terms
51
- @positions << position
50
+ if i = @positions.index(position)
51
+ term_arrays[i] += terms
52
+ else
53
+ @term_arrays << terms
54
+ @positions << position
55
+ end
52
56
  end
53
57
  alias :<< :add
54
58
 
@@ -167,10 +171,10 @@ module Ferret::Search
167
171
  terms = @term_arrays[0]
168
172
  bq = BooleanQuery.new(true)
169
173
  terms.each do |term|
170
- bq.add(TermQuery.new(term), BooleanClause::Occur::SHOULD)
174
+ bq.add_query(TermQuery.new(term), BooleanClause::Occur::SHOULD)
171
175
  end
172
176
  bq.boost = boost()
173
- return boq
177
+ return bq
174
178
  else
175
179
  return self
176
180
  end
@@ -6,7 +6,7 @@ module Ferret::Search
6
6
  RELEVANCE = ScoreDocComparator.new()
7
7
  class <<RELEVANCE
8
8
  def compare(i, j)
9
- return -(i.score <=> j.score)
9
+ return j.score <=> i.score
10
10
  end
11
11
  def sort_value(i)
12
12
  return i.score
@@ -74,7 +74,7 @@ module Ferret::Search
74
74
  @sort_type = sort_type
75
75
  end
76
76
 
77
- def compare(j, i)
77
+ def compare(i, j)
78
78
  return @index[i.doc] <=> @index[j.doc]
79
79
  end
80
80
  def sort_value(i)
@@ -90,7 +90,7 @@ module Ferret::Search
90
90
  super(index, sort_type)
91
91
  @comparator = comparator
92
92
  end
93
- def compare(j, i)
93
+ def compare(i, j)
94
94
  return @comparator.call(@index[i.doc], @index[j.doc])
95
95
  end
96
96
  end
@@ -81,6 +81,7 @@ module Ferret::Search
81
81
  reverse = false)
82
82
  fields = [fields] unless fields.is_a?(Array)
83
83
  @fields = fields
84
+ fields = fields.map {|field| field.is_a?(Symbol) ? field.to_s : field}
84
85
  if fields[0].is_a?(String)
85
86
  @fields = fields.map do |field|
86
87
  SortField.new(field, {:sort_type => SortField::SortType::AUTO,
@@ -2,8 +2,8 @@ module Ferret::Search
2
2
  # Abstract base class for sorting hits returned by a Query.
3
3
  #
4
4
  # This class should only be used if the other SortField types (SCORE, DOC,
5
- # STRING, INT, FLOAT) do not provide an adequate sorting. It maintains an
6
- # internal cache of values which could be quite large. The cache is an
5
+ # STRING, INTEGER, FLOAT) do not provide an adequate sorting. It maintains
6
+ # an internal cache of values which could be quite large. The cache is an
7
7
  # array of Comparable, one for each document in the index. There is a
8
8
  # distinct Comparable for each unique term in the field - if some documents
9
9
  # have the same term in the field, the cache array will have entries which
@@ -37,7 +37,7 @@ module Ferret::Search
37
37
 
38
38
  # Sort using term values as encoded Integers. Sort values are Integer
39
39
  # and lower values are at the front.
40
- INT = SortType.new("int", lambda{|str| str.to_i})
40
+ INTEGER = SortType.new("int", lambda{|str| str.to_i})
41
41
 
42
42
  # Sort using term values as encoded Floats. Sort values are Float and
43
43
  # lower values are at the front.
@@ -62,7 +62,7 @@ module Ferret::Search
62
62
  # comparator:: a proc used to compare two values from the index. You can
63
63
  # also give this value to the SortType object that you pass.
64
64
  def initialize(name = nil, args= {})
65
- @name = name
65
+ @name = name.to_s if name
66
66
  @sort_type = args[:sort_type]||SortType::AUTO
67
67
  @reverse = args[:reverse]||false
68
68
  @comparator = args[:comparator]||@sort_type.comparator
@@ -3,6 +3,7 @@ require File.dirname(__FILE__) + "/../../test_helper"
3
3
 
4
4
  class IndexTest < Test::Unit::TestCase
5
5
  include Ferret::Index
6
+ include Ferret::Search
6
7
  include Ferret::Analysis
7
8
  include Ferret::Store
8
9
  include Ferret::Document
@@ -448,6 +449,43 @@ class IndexTest < Test::Unit::TestCase
448
449
  index.close
449
450
  end
450
451
 
452
+ def test_sortby_date
453
+ data = [
454
+ {:content => "one", :date => "20051023"},
455
+ {:content => "two", :date => "19530315"},
456
+ {:content => "three four", :date => "19390912"},
457
+ {:content => "one", :date => "19770905"},
458
+ {:content => "two", :date => "19810831"},
459
+ {:content => "three", :date => "19790531"},
460
+ {:content => "one", :date => "19770725"},
461
+ {:content => "two", :date => "19751226"},
462
+ {:content => "three", :date => "19390912"}
463
+ ]
464
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
465
+ data.each { |doc|
466
+ document = Document.new
467
+ doc.each_pair do |key, value|
468
+ document << Field.new(key.to_s, value, Field::Store::YES, Field::Index::TOKENIZED)
469
+ end
470
+ index << document
471
+ }
472
+ sf_date = SortField.new("date", {:sort_type => SortField::SortType::INTEGER})
473
+ #top_docs = index.search("one", :sort => [sf_date, SortField::FIELD_SCORE])
474
+ top_docs = index.search("one", :sort => Sort.new("date"))
475
+ assert_equal(3, top_docs.size)
476
+ assert_equal("19770725", index[top_docs.score_docs[0].doc][:date])
477
+ assert_equal("19770905", index[top_docs.score_docs[1].doc][:date])
478
+ assert_equal("20051023", index[top_docs.score_docs[2].doc][:date])
479
+ top_docs = index.search("one two three four",
480
+ :sort => [sf_date, SortField::FIELD_SCORE])
481
+ assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
482
+ assert_equal("three four", index[top_docs.score_docs[0].doc][:content])
483
+ assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
484
+ assert_equal("three", index[top_docs.score_docs[1].doc][:content])
485
+ assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
486
+ index.close
487
+ end
488
+
451
489
  def test_auto_flush
452
490
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
453
491
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
@@ -17,6 +17,7 @@ class QueryParserTest < Test::Unit::TestCase
17
17
  ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
18
18
  ['field:"one <> <> <> three|four|five <>"', 'field:"one <> <> <> three|four|five"'],
19
19
  ['field:"one|two three|four|five six|seven"', 'field:"one|two three|four|five six|seven"'],
20
+ ['contents:"testing|trucks"', 'contents:"testing|trucks"'],
20
21
  ['[aaa bbb]', '[aaa bbb]'],
21
22
  ['{aaa bbb]', '{aaa bbb]'],
22
23
  ['field:[aaa bbb}', 'field:[aaa bbb}'],
@@ -47,6 +47,8 @@ class FuzzyQueryTest < Test::Unit::TestCase
47
47
  add_doc("abbbb", iw)
48
48
  add_doc("bbbbb", iw)
49
49
  add_doc("ddddd", iw)
50
+ add_doc("ddddddddddddddddddddd", iw) # test max_distances problem
51
+ add_doc("aaaaaaaaaaaaaaaaaaaaaaa", iw) # test max_distances problem
50
52
  #iw.optimize()
51
53
  iw.close()
52
54
 
@@ -55,6 +57,7 @@ class FuzzyQueryTest < Test::Unit::TestCase
55
57
 
56
58
  fq = FuzzyQuery.new(Term.new("field", "aaaaa"), FuzzyQuery.default_min_similarity, 5)
57
59
 
60
+ do_prefix_test(is, "aaaaaaaaaaaaaaaaaaaaaa", 1, [8])
58
61
  do_prefix_test(is, "aaaaa", 0, [0,1,2])
59
62
  do_prefix_test(is, "aaaaa", 1, [0,1,2])
60
63
  do_prefix_test(is, "aaaaa", 2, [0,1,2])
@@ -139,9 +139,6 @@ class IndexSearcherTest < Test::Unit::TestCase
139
139
  pq << t1 << t2 << t3
140
140
  check_hits(pq, [1])
141
141
 
142
- pq.slop = 4
143
- check_hits(pq, [1,16,17])
144
-
145
142
  pq = PhraseQuery.new()
146
143
  pq << t1
147
144
  pq.add(t3, 2)
@@ -154,6 +151,23 @@ class IndexSearcherTest < Test::Unit::TestCase
154
151
  check_hits(pq, [1,11,14,16,17])
155
152
  end
156
153
 
154
+ def test_multi_phrase_query()
155
+ pq = MultiPhraseQuery.new()
156
+ t1 = Term.new("field", "quick")
157
+ t2 = Term.new("field", "brown")
158
+ t3 = Term.new("field", "fox")
159
+ pq << t1
160
+ pq << t2
161
+ pq << t3
162
+ check_hits(pq, [1])
163
+
164
+ t1b = Term.new("field", "fast")
165
+ pq.add(t1b, 0)
166
+ check_hits(pq, [1, 8])
167
+ end
168
+
169
+
170
+
157
171
  def test_range_query()
158
172
  rq = RangeQuery.new("date", "20051006", "20051010", true, true)
159
173
  check_hits(rq, [6,7,8,9,10])
@@ -18,7 +18,7 @@ class SearchAndSortTest < Test::Unit::TestCase
18
18
 
19
19
  def setup()
20
20
  @dir = RAMDirectory.new()
21
- iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
21
+ iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :min_merge_docs => 3)
22
22
  docs = [ # len mod
23
23
  {"search"=>"findall","string"=>"a","int"=>"6","float"=>"0.01"}, # 4 0
24
24
  {"search"=>"findall","string"=>"c","int"=>"5","float"=>"0.1"}, # 3 3
@@ -56,16 +56,16 @@ class SearchAndSortTest < Test::Unit::TestCase
56
56
  do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], [SortField::FIELD_DOC])
57
57
 
58
58
  ## int
59
- sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT})
59
+ sf_int = SortField.new("int", {:sort_type => SortField::SortType::INTEGER, :reverse => true})
60
60
  do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], [sf_int])
61
61
  do_test_top_docs(is, q, [0,1,6,5,9,8,4,7,2,3], [sf_int, SortField::FIELD_SCORE])
62
- sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT, :reverse => true})
62
+ sf_int = SortField.new("int", {:sort_type => SortField::SortType::INTEGER})
63
63
  do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], [sf_int])
64
64
 
65
65
  ## float
66
- sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
67
- do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
68
66
  sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT, :reverse => true})
67
+ do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
68
+ sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
69
69
  do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new([sf_float, SortField::FIELD_SCORE]))
70
70
 
71
71
  ## str
@@ -74,11 +74,11 @@ class SearchAndSortTest < Test::Unit::TestCase
74
74
 
75
75
  ## auto
76
76
  do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], Sort.new("string"))
77
- do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], Sort.new(["int"]))
78
- do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float"))
79
- do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float", true))
80
- do_test_top_docs(is, q, [0,1,6,9,5,8,4,2,7,3], Sort.new(["int", "string"]))
81
- do_test_top_docs(is, q, [3,7,2,4,8,5,9,6,1,0], Sort.new(["int", "string"], true))
77
+ do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], Sort.new(["int"]))
78
+ do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float"))
79
+ do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float", true))
80
+ do_test_top_docs(is, q, [0,6,1,5,9,4,8,7,2,3], Sort.new(["int", "string"], true))
81
+ do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], Sort.new(["int", "string"]))
82
82
  end
83
83
 
84
84
  LENGTH = SortField::SortType.new("length", lambda{|str| str.length})
@@ -87,11 +87,12 @@ class SearchAndSortTest < Test::Unit::TestCase
87
87
  def test_special_sorts
88
88
  is = IndexSearcher.new(@dir)
89
89
  q = TermQuery.new(Term.new("search", "findall"))
90
- sf = SortField.new("float", {:sort_type => LENGTH})
90
+ sf = SortField.new("float", {:sort_type => LENGTH, :reverse => true})
91
91
  do_test_top_docs(is, q, [9,6,4,8,2,7,0,5,1,3], [sf])
92
- sf = SortField.new("float", {:sort_type => LENGTH_MODULO})
92
+ sf = SortField.new("float", {:sort_type => LENGTH_MODULO, :reverse => true})
93
93
  do_test_top_docs(is, q, [1,3,6,4,8,2,7,0,5,9], [sf])
94
94
  sf = SortField.new("float", {:sort_type => LENGTH,
95
+ :reverse => true,
95
96
  :comparator => lambda{|i,j| (j%4) <=> (i%4)}})
96
97
  do_test_top_docs(is, q, [0,5,9,2,7,4,8,1,3,6], [sf])
97
98
  end
@@ -32,14 +32,14 @@ class SortTest < Test::Unit::TestCase
32
32
  end
33
33
 
34
34
  def test_multi_fields()
35
- sf1 = SortField.new("field", {:sort_type => SortField::SortType::INT,
35
+ sf1 = SortField.new("field", {:sort_type => SortField::SortType::INTEGER,
36
36
  :reverse => true})
37
37
  sf2 = SortField::FIELD_SCORE
38
38
  sf3 = SortField::FIELD_DOC
39
39
  s = Sort.new([sf1, sf2, sf3])
40
40
 
41
41
  assert_equal(3, s.fields.size)
42
- assert_equal(SortField::SortType::INT, s.fields[0].sort_type)
42
+ assert_equal(SortField::SortType::INTEGER, s.fields[0].sort_type)
43
43
  assert_equal("field", s.fields[0].name)
44
44
  assert(s.fields[0].reverse?)
45
45
  assert_equal(SortField::FIELD_SCORE, s.fields[1])
@@ -21,7 +21,7 @@ class SortFieldTest < Test::Unit::TestCase
21
21
 
22
22
  def test_error_raised()
23
23
  assert_raise(ArgumentError) {
24
- fs = SortField.new(nil, {:sort_type => SortField::SortType::INT})
24
+ fs = SortField.new(nil, {:sort_type => SortField::SortType::INTEGER})
25
25
  }
26
26
  end
27
27
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: ferret
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.3.1
7
- date: 2005-12-08 00:00:00 +09:00
6
+ version: 0.3.2
7
+ date: 2005-12-16 00:00:00 +09:00
8
8
  summary: Ruby indexing library.
9
9
  require_paths:
10
10
  - lib