ferret 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/test/unit/index/tc_index.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require File.dirname(__FILE__) + "/../../test_helper"
|
2
2
|
|
3
|
-
|
4
3
|
class IndexTest < Test::Unit::TestCase
|
5
4
|
include Ferret::Index
|
6
5
|
include Ferret::Search
|
@@ -146,6 +145,7 @@ class IndexTest < Test::Unit::TestCase
|
|
146
145
|
|
147
146
|
def test_fs_index
|
148
147
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
148
|
+
|
149
149
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
150
150
|
assert_raise(StandardError) do
|
151
151
|
Index.new(:path => fs_path,
|
@@ -171,6 +171,7 @@ class IndexTest < Test::Unit::TestCase
|
|
171
171
|
|
172
172
|
def test_fs_index_is_persistant
|
173
173
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
174
|
+
|
174
175
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
175
176
|
data = [
|
176
177
|
{"def_field" => "one two", :id => "me"},
|
@@ -195,6 +196,7 @@ class IndexTest < Test::Unit::TestCase
|
|
195
196
|
|
196
197
|
def test_key_used_for_id_field
|
197
198
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
199
|
+
|
198
200
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
199
201
|
data = [
|
200
202
|
{:my_id => "one two", :id => "me"},
|
@@ -301,6 +303,7 @@ class IndexTest < Test::Unit::TestCase
|
|
301
303
|
index = Index.new(:default_field => "f")
|
302
304
|
data.each {|doc| index << doc }
|
303
305
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
306
|
+
|
304
307
|
index.persist(fs_path, true)
|
305
308
|
assert_equal(3, index.size)
|
306
309
|
assert_equal("zero", index[0]["f"])
|
@@ -585,4 +588,40 @@ class IndexTest < Test::Unit::TestCase
|
|
585
588
|
index1.close
|
586
589
|
index2.close
|
587
590
|
end
|
591
|
+
|
592
|
+
def test_doc_specific_analyzer
|
593
|
+
index = Index.new
|
594
|
+
index.add_document("abc", Ferret::Analysis::Analyzer.new)
|
595
|
+
assert_equal(1, index.size)
|
596
|
+
end
|
597
|
+
|
598
|
+
|
599
|
+
def test_adding_empty_term_vectors
|
600
|
+
index = Index.new()
|
601
|
+
doc = Document.new
|
602
|
+
|
603
|
+
# Note: Adding keywords to either field1 or field2 gets rid of the error
|
604
|
+
|
605
|
+
doc << Field.new('field1', '',
|
606
|
+
Field::Store::NO,
|
607
|
+
Field::Index::TOKENIZED,
|
608
|
+
Field::TermVector::YES)
|
609
|
+
|
610
|
+
doc << Field.new('field2', '',
|
611
|
+
Field::Store::NO,
|
612
|
+
Field::Index::TOKENIZED,
|
613
|
+
Field::TermVector::YES)
|
614
|
+
|
615
|
+
# Note: keywords in this un-term-vector-stored field don't help the situation
|
616
|
+
|
617
|
+
doc << Field.new('field3', 'foo bar baz',
|
618
|
+
Field::Store::YES,
|
619
|
+
Field::Index::TOKENIZED,
|
620
|
+
Field::TermVector::NO)
|
621
|
+
|
622
|
+
index << doc
|
623
|
+
|
624
|
+
index.flush
|
625
|
+
index.close
|
626
|
+
end
|
588
627
|
end
|
data/test/unit/index/tc_term.rb
CHANGED
@@ -17,4 +17,11 @@ class TermTest < Test::Unit::TestCase
|
|
17
17
|
term4.set!("field3", "text3")
|
18
18
|
assert_not_equal(term1, term4)
|
19
19
|
end
|
20
|
+
|
21
|
+
def test_non_strings()
|
22
|
+
t = Term.new(2345, 3)
|
23
|
+
t = Term.new(:symbol, :symbol)
|
24
|
+
t.set!(:symbol, :symbol)
|
25
|
+
t.set!(234, 23462346)
|
26
|
+
end
|
20
27
|
end
|
data/test/unit/index/th_doc.rb
CHANGED
@@ -240,5 +240,13 @@ module IndexTestHelper
|
|
240
240
|
end
|
241
241
|
return docs
|
242
242
|
end
|
243
|
+
|
244
|
+
def IndexTestHelper.explain (query, searcher, field)
|
245
|
+
top_docs = searcher.search(query)
|
246
|
+
top_docs.score_docs.each { |sd|
|
247
|
+
puts "\nDoc #{sd.doc}: #{searcher.doc(sd.doc)[field]}\n#{searcher.explain(query, sd.doc).to_s}\n"
|
248
|
+
}
|
249
|
+
end
|
250
|
+
|
243
251
|
end
|
244
252
|
|
@@ -8,10 +8,10 @@ class QueryParserTest < Test::Unit::TestCase
|
|
8
8
|
['', ''],
|
9
9
|
['word', 'word'],
|
10
10
|
['field:word', 'field:word'],
|
11
|
-
['"word1 word2
|
12
|
-
['"word1
|
11
|
+
['"word1 word2 word#"', '"word1 word2 word"'],
|
12
|
+
['"word1 %%% word3"', '"word1 word3"'],
|
13
13
|
['field:"one two three"', 'field:"one two three"'],
|
14
|
-
['field:"one
|
14
|
+
['field:"one %%% three"', 'field:"one three"'],
|
15
15
|
['field:"one <> three"', 'field:"one <> three"'],
|
16
16
|
['field:"one <> three <>"', 'field:"one <> three"'],
|
17
17
|
['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
|
@@ -104,7 +104,9 @@ class QueryParserTest < Test::Unit::TestCase
|
|
104
104
|
:analyzer => Ferret::Analysis::StandardAnalyzer.new)
|
105
105
|
pairs = [
|
106
106
|
['key:1234', 'key:1234'],
|
107
|
-
['key:(1234)', 'key:1234']
|
107
|
+
['key:(1234 and Dave)', 'key:1234 key:dave'],
|
108
|
+
['key:(1234)', 'key:1234'],
|
109
|
+
['and the but they with', '']
|
108
110
|
]
|
109
111
|
|
110
112
|
pairs.each do |query_str, expected|
|
@@ -4,11 +4,11 @@ class SortFieldTest < Test::Unit::TestCase
|
|
4
4
|
include Ferret::Search
|
5
5
|
|
6
6
|
def test_params()
|
7
|
-
assert_equal("
|
8
|
-
assert_equal("
|
9
|
-
assert_equal("auto",
|
10
|
-
assert_equal("string",
|
11
|
-
assert_equal("
|
12
|
-
assert_equal("float",
|
7
|
+
assert_equal("SCORE", SortField::SortType::SCORE.to_s)
|
8
|
+
assert_equal("DOC", SortField::SortType::DOC.to_s)
|
9
|
+
assert_equal("auto", SortField::SortType::AUTO.to_s)
|
10
|
+
assert_equal("string", SortField::SortType::STRING.to_s)
|
11
|
+
assert_equal("integer", SortField::SortType::INTEGER.to_s)
|
12
|
+
assert_equal("float", SortField::SortType::FLOAT.to_s)
|
13
13
|
end
|
14
14
|
end
|
@@ -59,6 +59,7 @@ class IndexSearcherTest < Test::Unit::TestCase
|
|
59
59
|
assert_equal(18, @is.max_doc)
|
60
60
|
assert_equal("20050930", @is.doc(0).values(:date))
|
61
61
|
assert_equal("cat1/sub2/subsub2", @is.doc(4)[:cat])
|
62
|
+
assert_equal("20051012", @is.doc(12)[:date])
|
62
63
|
end
|
63
64
|
|
64
65
|
def test_term_query
|
@@ -129,6 +130,13 @@ class IndexSearcherTest < Test::Unit::TestCase
|
|
129
130
|
bq.add_query(tq2, BooleanClause::Occur::SHOULD)
|
130
131
|
bq.add_query(tq3, BooleanClause::Occur::SHOULD)
|
131
132
|
check_hits(bq, [1,2,3,4,6,8,11,14])
|
133
|
+
|
134
|
+
bq = BooleanQuery.new()
|
135
|
+
bc1 = BooleanClause.new(tq2, BooleanClause::Occur::SHOULD)
|
136
|
+
bc2 = BooleanClause.new(tq3, BooleanClause::Occur::SHOULD)
|
137
|
+
bq.add_clause(bc1)
|
138
|
+
bq.add_clause(bc2)
|
139
|
+
check_hits(bq, [1,2,3,4,6,8,11,14])
|
132
140
|
end
|
133
141
|
|
134
142
|
def test_phrase_query()
|
@@ -0,0 +1,275 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
require File.join(File.dirname(__FILE__), "tc_index_searcher.rb")
|
3
|
+
|
4
|
+
# make sure a MultiSearcher searching only one index
|
5
|
+
# passes all the IndexSearcher tests
|
6
|
+
class SimpleMultiSearcherTest < IndexSearcherTest
|
7
|
+
alias :old_setup :setup
|
8
|
+
def setup()
|
9
|
+
old_setup
|
10
|
+
@multi = MultiSearcher.new([IndexSearcher.new(@dir)])
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
# checks query results of a multisearcher searching two indexes
|
16
|
+
# against those of a single indexsearcher searching the same
|
17
|
+
# set of documents
|
18
|
+
class MultiSearcherTest < Test::Unit::TestCase
|
19
|
+
include Ferret::Document
|
20
|
+
include Ferret::Search
|
21
|
+
include Ferret::Store
|
22
|
+
include Ferret::Analysis
|
23
|
+
include Ferret::Index
|
24
|
+
|
25
|
+
def prepare_search_docs(data)
|
26
|
+
docs = []
|
27
|
+
data.each_with_index do |fields, i|
|
28
|
+
doc = Document.new()
|
29
|
+
fields.each_pair do |field, text|
|
30
|
+
doc << Field.new(field, text, Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO, false)
|
31
|
+
end
|
32
|
+
docs << doc
|
33
|
+
end
|
34
|
+
return docs
|
35
|
+
end
|
36
|
+
|
37
|
+
def prepare_documents
|
38
|
+
@documents = prepare_search_docs([
|
39
|
+
{"date" => "20050930", "field" => "word1",
|
40
|
+
"cat" => "cat1/"},
|
41
|
+
{"date" => "20051001", "field" => "word1 word2 the quick brown fox",
|
42
|
+
"cat" => "cat1/sub1"},
|
43
|
+
{"date" => "20051002", "field" => "word1 word3",
|
44
|
+
"cat" => "cat1/sub1/subsub1"},
|
45
|
+
{"date" => "20051003", "field" => "word1 word3",
|
46
|
+
"cat" => "cat1/sub2"},
|
47
|
+
{"date" => "20051004", "field" => "word1 word2",
|
48
|
+
"cat" => "cat1/sub2/subsub2"},
|
49
|
+
{"date" => "20051005", "field" => "word1",
|
50
|
+
"cat" => "cat2/sub1"},
|
51
|
+
{"date" => "20051006", "field" => "word1 word3",
|
52
|
+
"cat" => "cat2/sub1"},
|
53
|
+
{"date" => "20051007", "field" => "word1",
|
54
|
+
"cat" => "cat2/sub1"},
|
55
|
+
{"date" => "20051008", "field" => "word1 word2 word3 the fast brown fox",
|
56
|
+
"cat" => "cat2/sub1"}
|
57
|
+
])
|
58
|
+
@documents2 = prepare_search_docs([
|
59
|
+
{"date" => "20051009", "field" => "word1",
|
60
|
+
"cat" => "cat3/sub1"},
|
61
|
+
{"date" => "20051010", "field" => "word1",
|
62
|
+
"cat" => "cat3/sub1"},
|
63
|
+
{"date" => "20051011", "field" => "word1 word3 the quick red fox",
|
64
|
+
"cat" => "cat3/sub1"},
|
65
|
+
{"date" => "20051012", "field" => "word1",
|
66
|
+
"cat" => "cat3/sub1"},
|
67
|
+
{"date" => "20051013", "field" => "word1",
|
68
|
+
"cat" => "cat1/sub2"},
|
69
|
+
{"date" => "20051014", "field" => "word1 word3 the quick hairy fox",
|
70
|
+
"cat" => "cat1/sub1"},
|
71
|
+
{"date" => "20051015", "field" => "word1",
|
72
|
+
"cat" => "cat1/sub2/subsub1"},
|
73
|
+
{"date" => "20051016",
|
74
|
+
"field" => "word1 the quick fox is brown and hairy and a little red",
|
75
|
+
"cat" => "cat1/sub1/subsub2"},
|
76
|
+
{"date" => "20051017", "field" => "word1 the brown fox is quick and red",
|
77
|
+
"cat" => "cat1/"}
|
78
|
+
])
|
79
|
+
end
|
80
|
+
|
81
|
+
def setup()
|
82
|
+
prepare_documents
|
83
|
+
# create MultiSearcher from two seperate searchers
|
84
|
+
dir1 = RAMDirectory.new()
|
85
|
+
iw1 = IndexWriter.new(dir1, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
86
|
+
@documents.each { |doc| iw1 << doc }
|
87
|
+
iw1.close()
|
88
|
+
|
89
|
+
dir2 = RAMDirectory.new()
|
90
|
+
iw2 = IndexWriter.new(dir2, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
91
|
+
@documents2.each { |doc| iw2 << doc }
|
92
|
+
iw2.close()
|
93
|
+
@multi = Ferret::Search::MultiSearcher.new([IndexSearcher.new(dir1), IndexSearcher.new(dir2)])
|
94
|
+
|
95
|
+
# create single searcher
|
96
|
+
dir = RAMDirectory.new
|
97
|
+
iw = IndexWriter.new(dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
98
|
+
@documents.each { |doc| iw << doc }
|
99
|
+
@documents2.each { |doc| iw << doc }
|
100
|
+
iw.close
|
101
|
+
@single = IndexSearcher.new(dir)
|
102
|
+
|
103
|
+
@query_parser = Ferret::QueryParser.new(['date', 'field', 'cat'], :analyzer => WhiteSpaceAnalyzer.new())
|
104
|
+
end
|
105
|
+
|
106
|
+
def tear_down()
|
107
|
+
@multi.close
|
108
|
+
@single.close
|
109
|
+
end
|
110
|
+
|
111
|
+
def check_hits(query, debug_field=nil)
|
112
|
+
query = @query_parser.parse(query) if (query.is_a? String)
|
113
|
+
multi_docs = @multi.search(query)
|
114
|
+
single_docs = @single.search(query)
|
115
|
+
IndexTestHelper.explain(query, @single, debug_field) if debug_field
|
116
|
+
IndexTestHelper.explain(query, @multi, debug_field) if debug_field
|
117
|
+
assert_equal(single_docs.score_docs.size, multi_docs.score_docs.size, 'hit count')
|
118
|
+
assert_equal(single_docs.total_hits, multi_docs.total_hits, 'hit count')
|
119
|
+
|
120
|
+
multi_docs.score_docs.each_with_index { |sd, id|
|
121
|
+
assert_equal(single_docs.score_docs[id].doc, sd.doc)
|
122
|
+
assert_equal(single_docs.score_docs[id].score, sd.score)
|
123
|
+
}
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_get_doc()
|
127
|
+
assert_equal(18, @multi.max_doc)
|
128
|
+
assert_equal("20050930", @multi.doc(0).values(:date))
|
129
|
+
assert_equal("cat1/sub2/subsub2", @multi.doc(4)[:cat])
|
130
|
+
assert_equal("20051012", @multi.doc(12)[:date])
|
131
|
+
assert_equal(18, @single.max_doc)
|
132
|
+
assert_equal("20050930", @single.doc(0).values(:date))
|
133
|
+
assert_equal("cat1/sub2/subsub2", @single.doc(4)[:cat])
|
134
|
+
assert_equal("20051012", @single.doc(12)[:date])
|
135
|
+
end
|
136
|
+
|
137
|
+
def test_term_query
|
138
|
+
tq = TermQuery.new(Term.new("field", "word2"));
|
139
|
+
tq.boost = 100
|
140
|
+
check_hits(tq)
|
141
|
+
|
142
|
+
tq = TermQuery.new(Term.new("field", "2342"));
|
143
|
+
check_hits(tq)
|
144
|
+
|
145
|
+
tq = TermQuery.new(Term.new("field", ""));
|
146
|
+
check_hits(tq)
|
147
|
+
|
148
|
+
tq = TermQuery.new(Term.new("field", "word1"));
|
149
|
+
check_hits(tq)
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
def test_boolean_query
|
154
|
+
bq = BooleanQuery.new()
|
155
|
+
tq1 = TermQuery.new(Term.new("field", "word1"))
|
156
|
+
tq2 = TermQuery.new(Term.new("field", "word3"))
|
157
|
+
bq.add_query(tq1, BooleanClause::Occur::MUST)
|
158
|
+
bq.add_query(tq2, BooleanClause::Occur::MUST)
|
159
|
+
check_hits(bq)
|
160
|
+
|
161
|
+
tq3 = TermQuery.new(Term.new("field", "word2"))
|
162
|
+
bq.add_query(tq3, BooleanClause::Occur::SHOULD)
|
163
|
+
check_hits(bq)
|
164
|
+
|
165
|
+
bq = BooleanQuery.new()
|
166
|
+
bq.add_query(tq2, BooleanClause::Occur::MUST)
|
167
|
+
bq.add_query(tq3, BooleanClause::Occur::MUST_NOT)
|
168
|
+
check_hits(bq)
|
169
|
+
|
170
|
+
bq = BooleanQuery.new()
|
171
|
+
bq.add_query(tq2, BooleanClause::Occur::MUST_NOT)
|
172
|
+
check_hits(bq)
|
173
|
+
|
174
|
+
bq = BooleanQuery.new()
|
175
|
+
bq.add_query(tq2, BooleanClause::Occur::SHOULD)
|
176
|
+
bq.add_query(tq3, BooleanClause::Occur::SHOULD)
|
177
|
+
check_hits(bq)
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_phrase_query()
|
181
|
+
pq = PhraseQuery.new()
|
182
|
+
t1 = Term.new("field", "quick")
|
183
|
+
t2 = Term.new("field", "brown")
|
184
|
+
t3 = Term.new("field", "fox")
|
185
|
+
pq << t1 << t2 << t3
|
186
|
+
check_hits(pq)
|
187
|
+
|
188
|
+
pq = PhraseQuery.new()
|
189
|
+
pq << t1
|
190
|
+
pq.add(t3, 2)
|
191
|
+
check_hits(pq)
|
192
|
+
|
193
|
+
pq.slop = 1
|
194
|
+
check_hits(pq)
|
195
|
+
|
196
|
+
pq.slop = 4
|
197
|
+
check_hits(pq)
|
198
|
+
end
|
199
|
+
|
200
|
+
def test_range_query()
|
201
|
+
rq = RangeQuery.new("date", "20051006", "20051010", true, true)
|
202
|
+
check_hits(rq)
|
203
|
+
|
204
|
+
rq = RangeQuery.new("date", "20051006", "20051010", false, true)
|
205
|
+
check_hits(rq)
|
206
|
+
|
207
|
+
rq = RangeQuery.new("date", "20051006", "20051010", true, false)
|
208
|
+
check_hits(rq)
|
209
|
+
|
210
|
+
rq = RangeQuery.new("date", "20051006", "20051010", false, false)
|
211
|
+
check_hits(rq)
|
212
|
+
|
213
|
+
rq = RangeQuery.new("date", nil, "20051003", false, true)
|
214
|
+
check_hits(rq)
|
215
|
+
|
216
|
+
rq = RangeQuery.new("date", nil, "20051003", false, false)
|
217
|
+
check_hits(rq)
|
218
|
+
|
219
|
+
rq = RangeQuery.new_less("date", "20051003", true)
|
220
|
+
check_hits(rq)
|
221
|
+
|
222
|
+
rq = RangeQuery.new_less("date", "20051003", false)
|
223
|
+
check_hits(rq)
|
224
|
+
|
225
|
+
rq = RangeQuery.new("date", "20051014", nil, true, false)
|
226
|
+
check_hits(rq)
|
227
|
+
|
228
|
+
rq = RangeQuery.new("date", "20051014", nil, false, false)
|
229
|
+
check_hits(rq)
|
230
|
+
|
231
|
+
rq = RangeQuery.new_more("date", "20051014", true)
|
232
|
+
check_hits(rq)
|
233
|
+
|
234
|
+
rq = RangeQuery.new_more("date", "20051014", false)
|
235
|
+
check_hits(rq)
|
236
|
+
end
|
237
|
+
|
238
|
+
def test_prefix_query()
|
239
|
+
t = Term.new("cat", "cat1")
|
240
|
+
pq = PrefixQuery.new(t)
|
241
|
+
check_hits(pq)
|
242
|
+
|
243
|
+
t.text = "cat1/sub2"
|
244
|
+
pq = PrefixQuery.new(t)
|
245
|
+
check_hits(pq)
|
246
|
+
end
|
247
|
+
|
248
|
+
def test_wildcard_query()
|
249
|
+
t = Term.new("cat", "cat1*")
|
250
|
+
wq = WildcardQuery.new(t)
|
251
|
+
check_hits(wq)
|
252
|
+
|
253
|
+
t.text = "cat1*/su??ub2"
|
254
|
+
wq = WildcardQuery.new(t)
|
255
|
+
check_hits(wq)
|
256
|
+
end
|
257
|
+
|
258
|
+
def test_multi_phrase_query()
|
259
|
+
t11 = Term.new("field", "quick")
|
260
|
+
t12 = Term.new("field", "fast")
|
261
|
+
t21 = Term.new("field", "brown")
|
262
|
+
t22 = Term.new("field", "red")
|
263
|
+
t23 = Term.new("field", "hairy")
|
264
|
+
t3 = Term.new("field", "fox")
|
265
|
+
|
266
|
+
mpq = MultiPhraseQuery.new()
|
267
|
+
mpq << [t11, t12]
|
268
|
+
mpq << [t21, t22, t23]
|
269
|
+
mpq << t3
|
270
|
+
check_hits(mpq)
|
271
|
+
|
272
|
+
mpq.slop = 4
|
273
|
+
check_hits(mpq)
|
274
|
+
end
|
275
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
# Tests the multisearcher by comparing it's results
|
4
|
+
# with those returned by an IndexSearcher.
|
5
|
+
# Taken from TestMultiSearcherRanking.java of Lucene
|
6
|
+
class MultiSearcher2Test < Test::Unit::TestCase
|
7
|
+
include Ferret::Document
|
8
|
+
include Ferret::Search
|
9
|
+
include Ferret::Store
|
10
|
+
include Ferret::Analysis
|
11
|
+
include Ferret::Index
|
12
|
+
|
13
|
+
FIELD_NAME = 'body'
|
14
|
+
|
15
|
+
def test_one_Term_query
|
16
|
+
check_query 'three'
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_two_term_query
|
20
|
+
check_query 'three foo'
|
21
|
+
# as of 2006/03/11 these fail in Java Lucene as
|
22
|
+
# well, hits are returned in slightly different order.
|
23
|
+
#check_query '+pizza +blue*', :body
|
24
|
+
#check_query '+pizza blue*', :body
|
25
|
+
#check_query 'pizza blue*', :body
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_prefix_query
|
29
|
+
check_query 'multi*'
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_fuzzy_query
|
33
|
+
check_query 'multiThree~'
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_range_query
|
37
|
+
check_query '{multiA multiP}'
|
38
|
+
end
|
39
|
+
|
40
|
+
# fails (query parse error)
|
41
|
+
#def test_multi_phrase_query
|
42
|
+
# check_query '"blueberry pi*"'
|
43
|
+
#end
|
44
|
+
|
45
|
+
def test_nomatch_query
|
46
|
+
check_query '+three +nomatch'
|
47
|
+
end
|
48
|
+
|
49
|
+
# this yields differing scores, but doesn't work in
|
50
|
+
# Java Lucene either
|
51
|
+
#def test_term_repeated_query
|
52
|
+
# check_query 'multi* multi* foo'
|
53
|
+
#end
|
54
|
+
|
55
|
+
|
56
|
+
def check_query(query_str, debug_field=nil)
|
57
|
+
@parser ||= Ferret::QueryParser.new(FIELD_NAME, :analyzer => @analyzer)
|
58
|
+
query = @parser.parse(query_str)
|
59
|
+
puts "Query: #{query}" if debug_field
|
60
|
+
IndexTestHelper.explain(query, @multi, debug_field) if debug_field
|
61
|
+
IndexTestHelper.explain(query, @single, debug_field) if debug_field
|
62
|
+
multi_hits = @multi.search(query)
|
63
|
+
single_hits = @single.search(query)
|
64
|
+
assert_equal single_hits.size, multi_hits.size, "hit count differs"
|
65
|
+
multi_hits.score_docs.each_with_index { |multi_sd, i|
|
66
|
+
single_sd = single_hits.score_docs[i]
|
67
|
+
doc_multi = @multi.doc(multi_sd.doc)
|
68
|
+
doc_single = @single.doc(single_sd.doc)
|
69
|
+
assert_equal single_sd.score, multi_sd.score, "score differs in result #{i}"
|
70
|
+
assert_equal doc_single[FIELD_NAME], doc_multi[FIELD_NAME], "field values differ in result #{i}"
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
def setup()
|
75
|
+
@analyzer = WhiteSpaceAnalyzer.new()
|
76
|
+
# create MultiSearcher from two seperate searchers
|
77
|
+
d1 = RAMDirectory.new()
|
78
|
+
iw1 = IndexWriter.new(d1, :analyzer => @analyzer, :create => true)
|
79
|
+
add_collection1(iw1)
|
80
|
+
iw1.close()
|
81
|
+
|
82
|
+
d2 = RAMDirectory.new()
|
83
|
+
iw2 = IndexWriter.new(d2, :analyzer => @analyzer, :create => true)
|
84
|
+
add_collection2(iw2)
|
85
|
+
iw2.close()
|
86
|
+
@multi = MultiSearcher.new([IndexSearcher.new(d1), IndexSearcher.new(d2)])
|
87
|
+
|
88
|
+
# create IndexSearcher which contains all documents
|
89
|
+
d = RAMDirectory.new()
|
90
|
+
iw = IndexWriter.new(d, :analyzer => @analyzer, :create => true)
|
91
|
+
add_collection1(iw)
|
92
|
+
add_collection2(iw)
|
93
|
+
iw.close()
|
94
|
+
@single = IndexSearcher.new(d)
|
95
|
+
end
|
96
|
+
|
97
|
+
def tear_down()
|
98
|
+
@multi.close
|
99
|
+
@single.close
|
100
|
+
end
|
101
|
+
|
102
|
+
def add(value, iw)
|
103
|
+
d = Document.new
|
104
|
+
d << Field.new(FIELD_NAME, value, Field::Store::YES, Field::Index::TOKENIZED)
|
105
|
+
iw << d
|
106
|
+
end
|
107
|
+
|
108
|
+
def add_collection1(iw)
|
109
|
+
add("one blah three", iw)
|
110
|
+
add("one foo three multiOne", iw)
|
111
|
+
add("one foobar three multiThree", iw)
|
112
|
+
add("blueberry pie", iw)
|
113
|
+
add("blueberry strudel", iw)
|
114
|
+
add("blueberry pizza", iw)
|
115
|
+
end
|
116
|
+
def add_collection2(iw)
|
117
|
+
add("two blah three", iw)
|
118
|
+
add("two foo xxx multiTwo", iw)
|
119
|
+
add("two foobar xxx multiThreee", iw)
|
120
|
+
add("blueberry chewing gum", iw)
|
121
|
+
add("bluebird pizza", iw)
|
122
|
+
add("bluebird foobar pizza", iw)
|
123
|
+
add("piccadilly circus", iw)
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|