ferret 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/ferret.rb +1 -1
- data/lib/ferret/search/field_cache.rb +3 -3
- data/lib/ferret/search/field_sorted_hit_queue.rb +1 -1
- data/lib/ferret/search/fuzzy_term_enum.rb +1 -4
- data/lib/ferret/search/multi_phrase_query.rb +8 -4
- data/lib/ferret/search/score_doc_comparator.rb +3 -3
- data/lib/ferret/search/sort.rb +1 -0
- data/lib/ferret/search/sort_comparator.rb +2 -2
- data/lib/ferret/search/sort_field.rb +2 -2
- data/test/unit/index/tc_index.rb +38 -0
- data/test/unit/query_parser/tc_query_parser.rb +1 -0
- data/test/unit/search/tc_fuzzy_query.rb +3 -0
- data/test/unit/search/tc_index_searcher.rb +17 -3
- data/test/unit/search/tc_search_and_sort.rb +13 -12
- data/test/unit/search/tc_sort.rb +2 -2
- data/test/unit/search/tc_sort_field.rb +1 -1
- metadata +2 -2
data/lib/ferret.rb
CHANGED
@@ -85,7 +85,7 @@ module Ferret::Search
|
|
85
85
|
term = term_enum.term
|
86
86
|
break if (term.field != field)
|
87
87
|
termval = parser.call(term.text)
|
88
|
-
term_docs.seek(
|
88
|
+
term_docs.seek(term)
|
89
89
|
while term_docs.next?
|
90
90
|
index[term_docs.doc] = termval
|
91
91
|
end
|
@@ -141,7 +141,7 @@ module Ferret::Search
|
|
141
141
|
end
|
142
142
|
str_map[t] = term.text
|
143
143
|
|
144
|
-
term_docs.seek(
|
144
|
+
term_docs.seek(term)
|
145
145
|
while term_docs.next?
|
146
146
|
str_index[term_docs.doc] = t
|
147
147
|
end
|
@@ -192,7 +192,7 @@ module Ferret::Search
|
|
192
192
|
termtext = term.text.strip
|
193
193
|
|
194
194
|
if (termtext == termtext.to_i.to_s)
|
195
|
-
index = get_index(reader, field, SortField::SortType::
|
195
|
+
index = get_index(reader, field, SortField::SortType::INTEGER)
|
196
196
|
elsif (termtext == termtext.to_f.to_s or termtext == "%f"%termtext.to_f)
|
197
197
|
index = get_index(reader, field, SortField::SortType::FLOAT)
|
198
198
|
else
|
@@ -173,7 +173,7 @@ module Ferret::Search
|
|
173
173
|
if (index.is_a?(FieldCache::StringIndex))
|
174
174
|
return StringFieldComparator.new(index)
|
175
175
|
elsif (index[0].is_a?(Integer))
|
176
|
-
return SimpleFieldComparator.new(index, SortField::SortType::
|
176
|
+
return SimpleFieldComparator.new(index, SortField::SortType::INTEGER)
|
177
177
|
elsif (index[0].is_a?(Float))
|
178
178
|
return SimpleFieldComparator.new(index, SortField::SortType::FLOAT)
|
179
179
|
else
|
@@ -231,10 +231,7 @@ module Ferret::Search
|
|
231
231
|
# m:: the length of the "other value"
|
232
232
|
# returns:: the maximum levenshtein distance that we care about
|
233
233
|
def max_distance(m)
|
234
|
-
|
235
|
-
@max_distances[m] = calculate_max_distance(m)
|
236
|
-
end
|
237
|
-
return @max_distances[m]
|
234
|
+
return @max_distances[m] ||= calculate_max_distance(m)
|
238
235
|
end
|
239
236
|
|
240
237
|
def initialize_max_distances()
|
@@ -47,8 +47,12 @@ module Ferret::Search
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
if i = @positions.index(position)
|
51
|
+
term_arrays[i] += terms
|
52
|
+
else
|
53
|
+
@term_arrays << terms
|
54
|
+
@positions << position
|
55
|
+
end
|
52
56
|
end
|
53
57
|
alias :<< :add
|
54
58
|
|
@@ -167,10 +171,10 @@ module Ferret::Search
|
|
167
171
|
terms = @term_arrays[0]
|
168
172
|
bq = BooleanQuery.new(true)
|
169
173
|
terms.each do |term|
|
170
|
-
bq.
|
174
|
+
bq.add_query(TermQuery.new(term), BooleanClause::Occur::SHOULD)
|
171
175
|
end
|
172
176
|
bq.boost = boost()
|
173
|
-
return
|
177
|
+
return bq
|
174
178
|
else
|
175
179
|
return self
|
176
180
|
end
|
@@ -6,7 +6,7 @@ module Ferret::Search
|
|
6
6
|
RELEVANCE = ScoreDocComparator.new()
|
7
7
|
class <<RELEVANCE
|
8
8
|
def compare(i, j)
|
9
|
-
return
|
9
|
+
return j.score <=> i.score
|
10
10
|
end
|
11
11
|
def sort_value(i)
|
12
12
|
return i.score
|
@@ -74,7 +74,7 @@ module Ferret::Search
|
|
74
74
|
@sort_type = sort_type
|
75
75
|
end
|
76
76
|
|
77
|
-
def compare(
|
77
|
+
def compare(i, j)
|
78
78
|
return @index[i.doc] <=> @index[j.doc]
|
79
79
|
end
|
80
80
|
def sort_value(i)
|
@@ -90,7 +90,7 @@ module Ferret::Search
|
|
90
90
|
super(index, sort_type)
|
91
91
|
@comparator = comparator
|
92
92
|
end
|
93
|
-
def compare(
|
93
|
+
def compare(i, j)
|
94
94
|
return @comparator.call(@index[i.doc], @index[j.doc])
|
95
95
|
end
|
96
96
|
end
|
data/lib/ferret/search/sort.rb
CHANGED
@@ -81,6 +81,7 @@ module Ferret::Search
|
|
81
81
|
reverse = false)
|
82
82
|
fields = [fields] unless fields.is_a?(Array)
|
83
83
|
@fields = fields
|
84
|
+
fields = fields.map {|field| field.is_a?(Symbol) ? field.to_s : field}
|
84
85
|
if fields[0].is_a?(String)
|
85
86
|
@fields = fields.map do |field|
|
86
87
|
SortField.new(field, {:sort_type => SortField::SortType::AUTO,
|
@@ -2,8 +2,8 @@ module Ferret::Search
|
|
2
2
|
# Abstract base class for sorting hits returned by a Query.
|
3
3
|
#
|
4
4
|
# This class should only be used if the other SortField types (SCORE, DOC,
|
5
|
-
# STRING,
|
6
|
-
# internal cache of values which could be quite large. The cache is an
|
5
|
+
# STRING, INTEGER, FLOAT) do not provide an adequate sorting. It maintains
|
6
|
+
# an internal cache of values which could be quite large. The cache is an
|
7
7
|
# array of Comparable, one for each document in the index. There is a
|
8
8
|
# distinct Comparable for each unique term in the field - if some documents
|
9
9
|
# have the same term in the field, the cache array will have entries which
|
@@ -37,7 +37,7 @@ module Ferret::Search
|
|
37
37
|
|
38
38
|
# Sort using term values as encoded Integers. Sort values are Integer
|
39
39
|
# and lower values are at the front.
|
40
|
-
|
40
|
+
INTEGER = SortType.new("int", lambda{|str| str.to_i})
|
41
41
|
|
42
42
|
# Sort using term values as encoded Floats. Sort values are Float and
|
43
43
|
# lower values are at the front.
|
@@ -62,7 +62,7 @@ module Ferret::Search
|
|
62
62
|
# comparator:: a proc used to compare two values from the index. You can
|
63
63
|
# also give this value to the SortType object that you pass.
|
64
64
|
def initialize(name = nil, args= {})
|
65
|
-
@name = name
|
65
|
+
@name = name.to_s if name
|
66
66
|
@sort_type = args[:sort_type]||SortType::AUTO
|
67
67
|
@reverse = args[:reverse]||false
|
68
68
|
@comparator = args[:comparator]||@sort_type.comparator
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -3,6 +3,7 @@ require File.dirname(__FILE__) + "/../../test_helper"
|
|
3
3
|
|
4
4
|
class IndexTest < Test::Unit::TestCase
|
5
5
|
include Ferret::Index
|
6
|
+
include Ferret::Search
|
6
7
|
include Ferret::Analysis
|
7
8
|
include Ferret::Store
|
8
9
|
include Ferret::Document
|
@@ -448,6 +449,43 @@ class IndexTest < Test::Unit::TestCase
|
|
448
449
|
index.close
|
449
450
|
end
|
450
451
|
|
452
|
+
def test_sortby_date
|
453
|
+
data = [
|
454
|
+
{:content => "one", :date => "20051023"},
|
455
|
+
{:content => "two", :date => "19530315"},
|
456
|
+
{:content => "three four", :date => "19390912"},
|
457
|
+
{:content => "one", :date => "19770905"},
|
458
|
+
{:content => "two", :date => "19810831"},
|
459
|
+
{:content => "three", :date => "19790531"},
|
460
|
+
{:content => "one", :date => "19770725"},
|
461
|
+
{:content => "two", :date => "19751226"},
|
462
|
+
{:content => "three", :date => "19390912"}
|
463
|
+
]
|
464
|
+
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
|
465
|
+
data.each { |doc|
|
466
|
+
document = Document.new
|
467
|
+
doc.each_pair do |key, value|
|
468
|
+
document << Field.new(key.to_s, value, Field::Store::YES, Field::Index::TOKENIZED)
|
469
|
+
end
|
470
|
+
index << document
|
471
|
+
}
|
472
|
+
sf_date = SortField.new("date", {:sort_type => SortField::SortType::INTEGER})
|
473
|
+
#top_docs = index.search("one", :sort => [sf_date, SortField::FIELD_SCORE])
|
474
|
+
top_docs = index.search("one", :sort => Sort.new("date"))
|
475
|
+
assert_equal(3, top_docs.size)
|
476
|
+
assert_equal("19770725", index[top_docs.score_docs[0].doc][:date])
|
477
|
+
assert_equal("19770905", index[top_docs.score_docs[1].doc][:date])
|
478
|
+
assert_equal("20051023", index[top_docs.score_docs[2].doc][:date])
|
479
|
+
top_docs = index.search("one two three four",
|
480
|
+
:sort => [sf_date, SortField::FIELD_SCORE])
|
481
|
+
assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
|
482
|
+
assert_equal("three four", index[top_docs.score_docs[0].doc][:content])
|
483
|
+
assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
|
484
|
+
assert_equal("three", index[top_docs.score_docs[1].doc][:content])
|
485
|
+
assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
|
486
|
+
index.close
|
487
|
+
end
|
488
|
+
|
451
489
|
def test_auto_flush
|
452
490
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
453
491
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
@@ -17,6 +17,7 @@ class QueryParserTest < Test::Unit::TestCase
|
|
17
17
|
['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
|
18
18
|
['field:"one <> <> <> three|four|five <>"', 'field:"one <> <> <> three|four|five"'],
|
19
19
|
['field:"one|two three|four|five six|seven"', 'field:"one|two three|four|five six|seven"'],
|
20
|
+
['contents:"testing|trucks"', 'contents:"testing|trucks"'],
|
20
21
|
['[aaa bbb]', '[aaa bbb]'],
|
21
22
|
['{aaa bbb]', '{aaa bbb]'],
|
22
23
|
['field:[aaa bbb}', 'field:[aaa bbb}'],
|
@@ -47,6 +47,8 @@ class FuzzyQueryTest < Test::Unit::TestCase
|
|
47
47
|
add_doc("abbbb", iw)
|
48
48
|
add_doc("bbbbb", iw)
|
49
49
|
add_doc("ddddd", iw)
|
50
|
+
add_doc("ddddddddddddddddddddd", iw) # test max_distances problem
|
51
|
+
add_doc("aaaaaaaaaaaaaaaaaaaaaaa", iw) # test max_distances problem
|
50
52
|
#iw.optimize()
|
51
53
|
iw.close()
|
52
54
|
|
@@ -55,6 +57,7 @@ class FuzzyQueryTest < Test::Unit::TestCase
|
|
55
57
|
|
56
58
|
fq = FuzzyQuery.new(Term.new("field", "aaaaa"), FuzzyQuery.default_min_similarity, 5)
|
57
59
|
|
60
|
+
do_prefix_test(is, "aaaaaaaaaaaaaaaaaaaaaa", 1, [8])
|
58
61
|
do_prefix_test(is, "aaaaa", 0, [0,1,2])
|
59
62
|
do_prefix_test(is, "aaaaa", 1, [0,1,2])
|
60
63
|
do_prefix_test(is, "aaaaa", 2, [0,1,2])
|
@@ -139,9 +139,6 @@ class IndexSearcherTest < Test::Unit::TestCase
|
|
139
139
|
pq << t1 << t2 << t3
|
140
140
|
check_hits(pq, [1])
|
141
141
|
|
142
|
-
pq.slop = 4
|
143
|
-
check_hits(pq, [1,16,17])
|
144
|
-
|
145
142
|
pq = PhraseQuery.new()
|
146
143
|
pq << t1
|
147
144
|
pq.add(t3, 2)
|
@@ -154,6 +151,23 @@ class IndexSearcherTest < Test::Unit::TestCase
|
|
154
151
|
check_hits(pq, [1,11,14,16,17])
|
155
152
|
end
|
156
153
|
|
154
|
+
def test_multi_phrase_query()
|
155
|
+
pq = MultiPhraseQuery.new()
|
156
|
+
t1 = Term.new("field", "quick")
|
157
|
+
t2 = Term.new("field", "brown")
|
158
|
+
t3 = Term.new("field", "fox")
|
159
|
+
pq << t1
|
160
|
+
pq << t2
|
161
|
+
pq << t3
|
162
|
+
check_hits(pq, [1])
|
163
|
+
|
164
|
+
t1b = Term.new("field", "fast")
|
165
|
+
pq.add(t1b, 0)
|
166
|
+
check_hits(pq, [1, 8])
|
167
|
+
end
|
168
|
+
|
169
|
+
|
170
|
+
|
157
171
|
def test_range_query()
|
158
172
|
rq = RangeQuery.new("date", "20051006", "20051010", true, true)
|
159
173
|
check_hits(rq, [6,7,8,9,10])
|
@@ -18,7 +18,7 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
18
18
|
|
19
19
|
def setup()
|
20
20
|
@dir = RAMDirectory.new()
|
21
|
-
iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
21
|
+
iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true, :min_merge_docs => 3)
|
22
22
|
docs = [ # len mod
|
23
23
|
{"search"=>"findall","string"=>"a","int"=>"6","float"=>"0.01"}, # 4 0
|
24
24
|
{"search"=>"findall","string"=>"c","int"=>"5","float"=>"0.1"}, # 3 3
|
@@ -56,16 +56,16 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
56
56
|
do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], [SortField::FIELD_DOC])
|
57
57
|
|
58
58
|
## int
|
59
|
-
sf_int = SortField.new("int", {:sort_type => SortField::SortType::
|
59
|
+
sf_int = SortField.new("int", {:sort_type => SortField::SortType::INTEGER, :reverse => true})
|
60
60
|
do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], [sf_int])
|
61
61
|
do_test_top_docs(is, q, [0,1,6,5,9,8,4,7,2,3], [sf_int, SortField::FIELD_SCORE])
|
62
|
-
sf_int = SortField.new("int", {:sort_type => SortField::SortType::
|
62
|
+
sf_int = SortField.new("int", {:sort_type => SortField::SortType::INTEGER})
|
63
63
|
do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], [sf_int])
|
64
64
|
|
65
65
|
## float
|
66
|
-
sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
|
67
|
-
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
|
68
66
|
sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT, :reverse => true})
|
67
|
+
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
|
68
|
+
sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
|
69
69
|
do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new([sf_float, SortField::FIELD_SCORE]))
|
70
70
|
|
71
71
|
## str
|
@@ -74,11 +74,11 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
74
74
|
|
75
75
|
## auto
|
76
76
|
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], Sort.new("string"))
|
77
|
-
do_test_top_docs(is, q, [
|
78
|
-
do_test_top_docs(is, q, [
|
79
|
-
do_test_top_docs(is, q, [
|
80
|
-
do_test_top_docs(is, q, [0,1,
|
81
|
-
do_test_top_docs(is, q, [3,7,
|
77
|
+
do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], Sort.new(["int"]))
|
78
|
+
do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float"))
|
79
|
+
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float", true))
|
80
|
+
do_test_top_docs(is, q, [0,6,1,5,9,4,8,7,2,3], Sort.new(["int", "string"], true))
|
81
|
+
do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], Sort.new(["int", "string"]))
|
82
82
|
end
|
83
83
|
|
84
84
|
LENGTH = SortField::SortType.new("length", lambda{|str| str.length})
|
@@ -87,11 +87,12 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
87
87
|
def test_special_sorts
|
88
88
|
is = IndexSearcher.new(@dir)
|
89
89
|
q = TermQuery.new(Term.new("search", "findall"))
|
90
|
-
sf = SortField.new("float", {:sort_type => LENGTH})
|
90
|
+
sf = SortField.new("float", {:sort_type => LENGTH, :reverse => true})
|
91
91
|
do_test_top_docs(is, q, [9,6,4,8,2,7,0,5,1,3], [sf])
|
92
|
-
sf = SortField.new("float", {:sort_type => LENGTH_MODULO})
|
92
|
+
sf = SortField.new("float", {:sort_type => LENGTH_MODULO, :reverse => true})
|
93
93
|
do_test_top_docs(is, q, [1,3,6,4,8,2,7,0,5,9], [sf])
|
94
94
|
sf = SortField.new("float", {:sort_type => LENGTH,
|
95
|
+
:reverse => true,
|
95
96
|
:comparator => lambda{|i,j| (j%4) <=> (i%4)}})
|
96
97
|
do_test_top_docs(is, q, [0,5,9,2,7,4,8,1,3,6], [sf])
|
97
98
|
end
|
data/test/unit/search/tc_sort.rb
CHANGED
@@ -32,14 +32,14 @@ class SortTest < Test::Unit::TestCase
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def test_multi_fields()
|
35
|
-
sf1 = SortField.new("field", {:sort_type => SortField::SortType::
|
35
|
+
sf1 = SortField.new("field", {:sort_type => SortField::SortType::INTEGER,
|
36
36
|
:reverse => true})
|
37
37
|
sf2 = SortField::FIELD_SCORE
|
38
38
|
sf3 = SortField::FIELD_DOC
|
39
39
|
s = Sort.new([sf1, sf2, sf3])
|
40
40
|
|
41
41
|
assert_equal(3, s.fields.size)
|
42
|
-
assert_equal(SortField::SortType::
|
42
|
+
assert_equal(SortField::SortType::INTEGER, s.fields[0].sort_type)
|
43
43
|
assert_equal("field", s.fields[0].name)
|
44
44
|
assert(s.fields[0].reverse?)
|
45
45
|
assert_equal(SortField::FIELD_SCORE, s.fields[1])
|
@@ -21,7 +21,7 @@ class SortFieldTest < Test::Unit::TestCase
|
|
21
21
|
|
22
22
|
def test_error_raised()
|
23
23
|
assert_raise(ArgumentError) {
|
24
|
-
fs = SortField.new(nil, {:sort_type => SortField::SortType::
|
24
|
+
fs = SortField.new(nil, {:sort_type => SortField::SortType::INTEGER})
|
25
25
|
}
|
26
26
|
end
|
27
27
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.3.
|
7
|
-
date: 2005-12-
|
6
|
+
version: 0.3.2
|
7
|
+
date: 2005-12-16 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|