ferret 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/test/unit/index/tc_index.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require File.dirname(__FILE__) + "/../../test_helper"
|
2
2
|
|
3
|
-
|
4
3
|
class IndexTest < Test::Unit::TestCase
|
5
4
|
include Ferret::Index
|
6
5
|
include Ferret::Search
|
@@ -146,6 +145,7 @@ class IndexTest < Test::Unit::TestCase
|
|
146
145
|
|
147
146
|
def test_fs_index
|
148
147
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
148
|
+
|
149
149
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
150
150
|
assert_raise(StandardError) do
|
151
151
|
Index.new(:path => fs_path,
|
@@ -171,6 +171,7 @@ class IndexTest < Test::Unit::TestCase
|
|
171
171
|
|
172
172
|
def test_fs_index_is_persistant
|
173
173
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
174
|
+
|
174
175
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
175
176
|
data = [
|
176
177
|
{"def_field" => "one two", :id => "me"},
|
@@ -195,6 +196,7 @@ class IndexTest < Test::Unit::TestCase
|
|
195
196
|
|
196
197
|
def test_key_used_for_id_field
|
197
198
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
199
|
+
|
198
200
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
199
201
|
data = [
|
200
202
|
{:my_id => "one two", :id => "me"},
|
@@ -301,6 +303,7 @@ class IndexTest < Test::Unit::TestCase
|
|
301
303
|
index = Index.new(:default_field => "f")
|
302
304
|
data.each {|doc| index << doc }
|
303
305
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
306
|
+
|
304
307
|
index.persist(fs_path, true)
|
305
308
|
assert_equal(3, index.size)
|
306
309
|
assert_equal("zero", index[0]["f"])
|
@@ -585,4 +588,40 @@ class IndexTest < Test::Unit::TestCase
|
|
585
588
|
index1.close
|
586
589
|
index2.close
|
587
590
|
end
|
591
|
+
|
592
|
+
def test_doc_specific_analyzer
|
593
|
+
index = Index.new
|
594
|
+
index.add_document("abc", Ferret::Analysis::Analyzer.new)
|
595
|
+
assert_equal(1, index.size)
|
596
|
+
end
|
597
|
+
|
598
|
+
|
599
|
+
def test_adding_empty_term_vectors
|
600
|
+
index = Index.new()
|
601
|
+
doc = Document.new
|
602
|
+
|
603
|
+
# Note: Adding keywords to either field1 or field2 gets rid of the error
|
604
|
+
|
605
|
+
doc << Field.new('field1', '',
|
606
|
+
Field::Store::NO,
|
607
|
+
Field::Index::TOKENIZED,
|
608
|
+
Field::TermVector::YES)
|
609
|
+
|
610
|
+
doc << Field.new('field2', '',
|
611
|
+
Field::Store::NO,
|
612
|
+
Field::Index::TOKENIZED,
|
613
|
+
Field::TermVector::YES)
|
614
|
+
|
615
|
+
# Note: keywords in this un-term-vector-stored field don't help the situation
|
616
|
+
|
617
|
+
doc << Field.new('field3', 'foo bar baz',
|
618
|
+
Field::Store::YES,
|
619
|
+
Field::Index::TOKENIZED,
|
620
|
+
Field::TermVector::NO)
|
621
|
+
|
622
|
+
index << doc
|
623
|
+
|
624
|
+
index.flush
|
625
|
+
index.close
|
626
|
+
end
|
588
627
|
end
|
data/test/unit/index/tc_term.rb
CHANGED
@@ -17,4 +17,11 @@ class TermTest < Test::Unit::TestCase
|
|
17
17
|
term4.set!("field3", "text3")
|
18
18
|
assert_not_equal(term1, term4)
|
19
19
|
end
|
20
|
+
|
21
|
+
def test_non_strings()
|
22
|
+
t = Term.new(2345, 3)
|
23
|
+
t = Term.new(:symbol, :symbol)
|
24
|
+
t.set!(:symbol, :symbol)
|
25
|
+
t.set!(234, 23462346)
|
26
|
+
end
|
20
27
|
end
|
data/test/unit/index/th_doc.rb
CHANGED
@@ -240,5 +240,13 @@ module IndexTestHelper
|
|
240
240
|
end
|
241
241
|
return docs
|
242
242
|
end
|
243
|
+
|
244
|
+
def IndexTestHelper.explain (query, searcher, field)
|
245
|
+
top_docs = searcher.search(query)
|
246
|
+
top_docs.score_docs.each { |sd|
|
247
|
+
puts "\nDoc #{sd.doc}: #{searcher.doc(sd.doc)[field]}\n#{searcher.explain(query, sd.doc).to_s}\n"
|
248
|
+
}
|
249
|
+
end
|
250
|
+
|
243
251
|
end
|
244
252
|
|
@@ -8,10 +8,10 @@ class QueryParserTest < Test::Unit::TestCase
|
|
8
8
|
['', ''],
|
9
9
|
['word', 'word'],
|
10
10
|
['field:word', 'field:word'],
|
11
|
-
['"word1 word2
|
12
|
-
['"word1
|
11
|
+
['"word1 word2 word#"', '"word1 word2 word"'],
|
12
|
+
['"word1 %%% word3"', '"word1 word3"'],
|
13
13
|
['field:"one two three"', 'field:"one two three"'],
|
14
|
-
['field:"one
|
14
|
+
['field:"one %%% three"', 'field:"one three"'],
|
15
15
|
['field:"one <> three"', 'field:"one <> three"'],
|
16
16
|
['field:"one <> three <>"', 'field:"one <> three"'],
|
17
17
|
['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
|
@@ -104,7 +104,9 @@ class QueryParserTest < Test::Unit::TestCase
|
|
104
104
|
:analyzer => Ferret::Analysis::StandardAnalyzer.new)
|
105
105
|
pairs = [
|
106
106
|
['key:1234', 'key:1234'],
|
107
|
-
['key:(1234)', 'key:1234']
|
107
|
+
['key:(1234 and Dave)', 'key:1234 key:dave'],
|
108
|
+
['key:(1234)', 'key:1234'],
|
109
|
+
['and the but they with', '']
|
108
110
|
]
|
109
111
|
|
110
112
|
pairs.each do |query_str, expected|
|
@@ -4,11 +4,11 @@ class SortFieldTest < Test::Unit::TestCase
|
|
4
4
|
include Ferret::Search
|
5
5
|
|
6
6
|
def test_params()
|
7
|
-
assert_equal("
|
8
|
-
assert_equal("
|
9
|
-
assert_equal("auto",
|
10
|
-
assert_equal("string",
|
11
|
-
assert_equal("
|
12
|
-
assert_equal("float",
|
7
|
+
assert_equal("SCORE", SortField::SortType::SCORE.to_s)
|
8
|
+
assert_equal("DOC", SortField::SortType::DOC.to_s)
|
9
|
+
assert_equal("auto", SortField::SortType::AUTO.to_s)
|
10
|
+
assert_equal("string", SortField::SortType::STRING.to_s)
|
11
|
+
assert_equal("integer", SortField::SortType::INTEGER.to_s)
|
12
|
+
assert_equal("float", SortField::SortType::FLOAT.to_s)
|
13
13
|
end
|
14
14
|
end
|
@@ -59,6 +59,7 @@ class IndexSearcherTest < Test::Unit::TestCase
|
|
59
59
|
assert_equal(18, @is.max_doc)
|
60
60
|
assert_equal("20050930", @is.doc(0).values(:date))
|
61
61
|
assert_equal("cat1/sub2/subsub2", @is.doc(4)[:cat])
|
62
|
+
assert_equal("20051012", @is.doc(12)[:date])
|
62
63
|
end
|
63
64
|
|
64
65
|
def test_term_query
|
@@ -129,6 +130,13 @@ class IndexSearcherTest < Test::Unit::TestCase
|
|
129
130
|
bq.add_query(tq2, BooleanClause::Occur::SHOULD)
|
130
131
|
bq.add_query(tq3, BooleanClause::Occur::SHOULD)
|
131
132
|
check_hits(bq, [1,2,3,4,6,8,11,14])
|
133
|
+
|
134
|
+
bq = BooleanQuery.new()
|
135
|
+
bc1 = BooleanClause.new(tq2, BooleanClause::Occur::SHOULD)
|
136
|
+
bc2 = BooleanClause.new(tq3, BooleanClause::Occur::SHOULD)
|
137
|
+
bq.add_clause(bc1)
|
138
|
+
bq.add_clause(bc2)
|
139
|
+
check_hits(bq, [1,2,3,4,6,8,11,14])
|
132
140
|
end
|
133
141
|
|
134
142
|
def test_phrase_query()
|
@@ -0,0 +1,275 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
require File.join(File.dirname(__FILE__), "tc_index_searcher.rb")
|
3
|
+
|
4
|
+
# make sure a MultiSearcher searching only one index
|
5
|
+
# passes all the IndexSearcher tests
|
6
|
+
class SimpleMultiSearcherTest < IndexSearcherTest
|
7
|
+
alias :old_setup :setup
|
8
|
+
def setup()
|
9
|
+
old_setup
|
10
|
+
@multi = MultiSearcher.new([IndexSearcher.new(@dir)])
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
# checks query results of a multisearcher searching two indexes
|
16
|
+
# against those of a single indexsearcher searching the same
|
17
|
+
# set of documents
|
18
|
+
class MultiSearcherTest < Test::Unit::TestCase
|
19
|
+
include Ferret::Document
|
20
|
+
include Ferret::Search
|
21
|
+
include Ferret::Store
|
22
|
+
include Ferret::Analysis
|
23
|
+
include Ferret::Index
|
24
|
+
|
25
|
+
def prepare_search_docs(data)
|
26
|
+
docs = []
|
27
|
+
data.each_with_index do |fields, i|
|
28
|
+
doc = Document.new()
|
29
|
+
fields.each_pair do |field, text|
|
30
|
+
doc << Field.new(field, text, Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO, false)
|
31
|
+
end
|
32
|
+
docs << doc
|
33
|
+
end
|
34
|
+
return docs
|
35
|
+
end
|
36
|
+
|
37
|
+
def prepare_documents
|
38
|
+
@documents = prepare_search_docs([
|
39
|
+
{"date" => "20050930", "field" => "word1",
|
40
|
+
"cat" => "cat1/"},
|
41
|
+
{"date" => "20051001", "field" => "word1 word2 the quick brown fox",
|
42
|
+
"cat" => "cat1/sub1"},
|
43
|
+
{"date" => "20051002", "field" => "word1 word3",
|
44
|
+
"cat" => "cat1/sub1/subsub1"},
|
45
|
+
{"date" => "20051003", "field" => "word1 word3",
|
46
|
+
"cat" => "cat1/sub2"},
|
47
|
+
{"date" => "20051004", "field" => "word1 word2",
|
48
|
+
"cat" => "cat1/sub2/subsub2"},
|
49
|
+
{"date" => "20051005", "field" => "word1",
|
50
|
+
"cat" => "cat2/sub1"},
|
51
|
+
{"date" => "20051006", "field" => "word1 word3",
|
52
|
+
"cat" => "cat2/sub1"},
|
53
|
+
{"date" => "20051007", "field" => "word1",
|
54
|
+
"cat" => "cat2/sub1"},
|
55
|
+
{"date" => "20051008", "field" => "word1 word2 word3 the fast brown fox",
|
56
|
+
"cat" => "cat2/sub1"}
|
57
|
+
])
|
58
|
+
@documents2 = prepare_search_docs([
|
59
|
+
{"date" => "20051009", "field" => "word1",
|
60
|
+
"cat" => "cat3/sub1"},
|
61
|
+
{"date" => "20051010", "field" => "word1",
|
62
|
+
"cat" => "cat3/sub1"},
|
63
|
+
{"date" => "20051011", "field" => "word1 word3 the quick red fox",
|
64
|
+
"cat" => "cat3/sub1"},
|
65
|
+
{"date" => "20051012", "field" => "word1",
|
66
|
+
"cat" => "cat3/sub1"},
|
67
|
+
{"date" => "20051013", "field" => "word1",
|
68
|
+
"cat" => "cat1/sub2"},
|
69
|
+
{"date" => "20051014", "field" => "word1 word3 the quick hairy fox",
|
70
|
+
"cat" => "cat1/sub1"},
|
71
|
+
{"date" => "20051015", "field" => "word1",
|
72
|
+
"cat" => "cat1/sub2/subsub1"},
|
73
|
+
{"date" => "20051016",
|
74
|
+
"field" => "word1 the quick fox is brown and hairy and a little red",
|
75
|
+
"cat" => "cat1/sub1/subsub2"},
|
76
|
+
{"date" => "20051017", "field" => "word1 the brown fox is quick and red",
|
77
|
+
"cat" => "cat1/"}
|
78
|
+
])
|
79
|
+
end
|
80
|
+
|
81
|
+
def setup()
|
82
|
+
prepare_documents
|
83
|
+
# create MultiSearcher from two seperate searchers
|
84
|
+
dir1 = RAMDirectory.new()
|
85
|
+
iw1 = IndexWriter.new(dir1, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
86
|
+
@documents.each { |doc| iw1 << doc }
|
87
|
+
iw1.close()
|
88
|
+
|
89
|
+
dir2 = RAMDirectory.new()
|
90
|
+
iw2 = IndexWriter.new(dir2, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
91
|
+
@documents2.each { |doc| iw2 << doc }
|
92
|
+
iw2.close()
|
93
|
+
@multi = Ferret::Search::MultiSearcher.new([IndexSearcher.new(dir1), IndexSearcher.new(dir2)])
|
94
|
+
|
95
|
+
# create single searcher
|
96
|
+
dir = RAMDirectory.new
|
97
|
+
iw = IndexWriter.new(dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
98
|
+
@documents.each { |doc| iw << doc }
|
99
|
+
@documents2.each { |doc| iw << doc }
|
100
|
+
iw.close
|
101
|
+
@single = IndexSearcher.new(dir)
|
102
|
+
|
103
|
+
@query_parser = Ferret::QueryParser.new(['date', 'field', 'cat'], :analyzer => WhiteSpaceAnalyzer.new())
|
104
|
+
end
|
105
|
+
|
106
|
+
def tear_down()
|
107
|
+
@multi.close
|
108
|
+
@single.close
|
109
|
+
end
|
110
|
+
|
111
|
+
def check_hits(query, debug_field=nil)
|
112
|
+
query = @query_parser.parse(query) if (query.is_a? String)
|
113
|
+
multi_docs = @multi.search(query)
|
114
|
+
single_docs = @single.search(query)
|
115
|
+
IndexTestHelper.explain(query, @single, debug_field) if debug_field
|
116
|
+
IndexTestHelper.explain(query, @multi, debug_field) if debug_field
|
117
|
+
assert_equal(single_docs.score_docs.size, multi_docs.score_docs.size, 'hit count')
|
118
|
+
assert_equal(single_docs.total_hits, multi_docs.total_hits, 'hit count')
|
119
|
+
|
120
|
+
multi_docs.score_docs.each_with_index { |sd, id|
|
121
|
+
assert_equal(single_docs.score_docs[id].doc, sd.doc)
|
122
|
+
assert_equal(single_docs.score_docs[id].score, sd.score)
|
123
|
+
}
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_get_doc()
|
127
|
+
assert_equal(18, @multi.max_doc)
|
128
|
+
assert_equal("20050930", @multi.doc(0).values(:date))
|
129
|
+
assert_equal("cat1/sub2/subsub2", @multi.doc(4)[:cat])
|
130
|
+
assert_equal("20051012", @multi.doc(12)[:date])
|
131
|
+
assert_equal(18, @single.max_doc)
|
132
|
+
assert_equal("20050930", @single.doc(0).values(:date))
|
133
|
+
assert_equal("cat1/sub2/subsub2", @single.doc(4)[:cat])
|
134
|
+
assert_equal("20051012", @single.doc(12)[:date])
|
135
|
+
end
|
136
|
+
|
137
|
+
def test_term_query
|
138
|
+
tq = TermQuery.new(Term.new("field", "word2"));
|
139
|
+
tq.boost = 100
|
140
|
+
check_hits(tq)
|
141
|
+
|
142
|
+
tq = TermQuery.new(Term.new("field", "2342"));
|
143
|
+
check_hits(tq)
|
144
|
+
|
145
|
+
tq = TermQuery.new(Term.new("field", ""));
|
146
|
+
check_hits(tq)
|
147
|
+
|
148
|
+
tq = TermQuery.new(Term.new("field", "word1"));
|
149
|
+
check_hits(tq)
|
150
|
+
end
|
151
|
+
|
152
|
+
|
153
|
+
def test_boolean_query
|
154
|
+
bq = BooleanQuery.new()
|
155
|
+
tq1 = TermQuery.new(Term.new("field", "word1"))
|
156
|
+
tq2 = TermQuery.new(Term.new("field", "word3"))
|
157
|
+
bq.add_query(tq1, BooleanClause::Occur::MUST)
|
158
|
+
bq.add_query(tq2, BooleanClause::Occur::MUST)
|
159
|
+
check_hits(bq)
|
160
|
+
|
161
|
+
tq3 = TermQuery.new(Term.new("field", "word2"))
|
162
|
+
bq.add_query(tq3, BooleanClause::Occur::SHOULD)
|
163
|
+
check_hits(bq)
|
164
|
+
|
165
|
+
bq = BooleanQuery.new()
|
166
|
+
bq.add_query(tq2, BooleanClause::Occur::MUST)
|
167
|
+
bq.add_query(tq3, BooleanClause::Occur::MUST_NOT)
|
168
|
+
check_hits(bq)
|
169
|
+
|
170
|
+
bq = BooleanQuery.new()
|
171
|
+
bq.add_query(tq2, BooleanClause::Occur::MUST_NOT)
|
172
|
+
check_hits(bq)
|
173
|
+
|
174
|
+
bq = BooleanQuery.new()
|
175
|
+
bq.add_query(tq2, BooleanClause::Occur::SHOULD)
|
176
|
+
bq.add_query(tq3, BooleanClause::Occur::SHOULD)
|
177
|
+
check_hits(bq)
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_phrase_query()
|
181
|
+
pq = PhraseQuery.new()
|
182
|
+
t1 = Term.new("field", "quick")
|
183
|
+
t2 = Term.new("field", "brown")
|
184
|
+
t3 = Term.new("field", "fox")
|
185
|
+
pq << t1 << t2 << t3
|
186
|
+
check_hits(pq)
|
187
|
+
|
188
|
+
pq = PhraseQuery.new()
|
189
|
+
pq << t1
|
190
|
+
pq.add(t3, 2)
|
191
|
+
check_hits(pq)
|
192
|
+
|
193
|
+
pq.slop = 1
|
194
|
+
check_hits(pq)
|
195
|
+
|
196
|
+
pq.slop = 4
|
197
|
+
check_hits(pq)
|
198
|
+
end
|
199
|
+
|
200
|
+
def test_range_query()
|
201
|
+
rq = RangeQuery.new("date", "20051006", "20051010", true, true)
|
202
|
+
check_hits(rq)
|
203
|
+
|
204
|
+
rq = RangeQuery.new("date", "20051006", "20051010", false, true)
|
205
|
+
check_hits(rq)
|
206
|
+
|
207
|
+
rq = RangeQuery.new("date", "20051006", "20051010", true, false)
|
208
|
+
check_hits(rq)
|
209
|
+
|
210
|
+
rq = RangeQuery.new("date", "20051006", "20051010", false, false)
|
211
|
+
check_hits(rq)
|
212
|
+
|
213
|
+
rq = RangeQuery.new("date", nil, "20051003", false, true)
|
214
|
+
check_hits(rq)
|
215
|
+
|
216
|
+
rq = RangeQuery.new("date", nil, "20051003", false, false)
|
217
|
+
check_hits(rq)
|
218
|
+
|
219
|
+
rq = RangeQuery.new_less("date", "20051003", true)
|
220
|
+
check_hits(rq)
|
221
|
+
|
222
|
+
rq = RangeQuery.new_less("date", "20051003", false)
|
223
|
+
check_hits(rq)
|
224
|
+
|
225
|
+
rq = RangeQuery.new("date", "20051014", nil, true, false)
|
226
|
+
check_hits(rq)
|
227
|
+
|
228
|
+
rq = RangeQuery.new("date", "20051014", nil, false, false)
|
229
|
+
check_hits(rq)
|
230
|
+
|
231
|
+
rq = RangeQuery.new_more("date", "20051014", true)
|
232
|
+
check_hits(rq)
|
233
|
+
|
234
|
+
rq = RangeQuery.new_more("date", "20051014", false)
|
235
|
+
check_hits(rq)
|
236
|
+
end
|
237
|
+
|
238
|
+
def test_prefix_query()
|
239
|
+
t = Term.new("cat", "cat1")
|
240
|
+
pq = PrefixQuery.new(t)
|
241
|
+
check_hits(pq)
|
242
|
+
|
243
|
+
t.text = "cat1/sub2"
|
244
|
+
pq = PrefixQuery.new(t)
|
245
|
+
check_hits(pq)
|
246
|
+
end
|
247
|
+
|
248
|
+
def test_wildcard_query()
|
249
|
+
t = Term.new("cat", "cat1*")
|
250
|
+
wq = WildcardQuery.new(t)
|
251
|
+
check_hits(wq)
|
252
|
+
|
253
|
+
t.text = "cat1*/su??ub2"
|
254
|
+
wq = WildcardQuery.new(t)
|
255
|
+
check_hits(wq)
|
256
|
+
end
|
257
|
+
|
258
|
+
def test_multi_phrase_query()
|
259
|
+
t11 = Term.new("field", "quick")
|
260
|
+
t12 = Term.new("field", "fast")
|
261
|
+
t21 = Term.new("field", "brown")
|
262
|
+
t22 = Term.new("field", "red")
|
263
|
+
t23 = Term.new("field", "hairy")
|
264
|
+
t3 = Term.new("field", "fox")
|
265
|
+
|
266
|
+
mpq = MultiPhraseQuery.new()
|
267
|
+
mpq << [t11, t12]
|
268
|
+
mpq << [t21, t22, t23]
|
269
|
+
mpq << t3
|
270
|
+
check_hits(mpq)
|
271
|
+
|
272
|
+
mpq.slop = 4
|
273
|
+
check_hits(mpq)
|
274
|
+
end
|
275
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
# Tests the multisearcher by comparing it's results
|
4
|
+
# with those returned by an IndexSearcher.
|
5
|
+
# Taken from TestMultiSearcherRanking.java of Lucene
|
6
|
+
class MultiSearcher2Test < Test::Unit::TestCase
|
7
|
+
include Ferret::Document
|
8
|
+
include Ferret::Search
|
9
|
+
include Ferret::Store
|
10
|
+
include Ferret::Analysis
|
11
|
+
include Ferret::Index
|
12
|
+
|
13
|
+
FIELD_NAME = 'body'
|
14
|
+
|
15
|
+
def test_one_Term_query
|
16
|
+
check_query 'three'
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_two_term_query
|
20
|
+
check_query 'three foo'
|
21
|
+
# as of 2006/03/11 these fail in Java Lucene as
|
22
|
+
# well, hits are returned in slightly different order.
|
23
|
+
#check_query '+pizza +blue*', :body
|
24
|
+
#check_query '+pizza blue*', :body
|
25
|
+
#check_query 'pizza blue*', :body
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_prefix_query
|
29
|
+
check_query 'multi*'
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_fuzzy_query
|
33
|
+
check_query 'multiThree~'
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_range_query
|
37
|
+
check_query '{multiA multiP}'
|
38
|
+
end
|
39
|
+
|
40
|
+
# fails (query parse error)
|
41
|
+
#def test_multi_phrase_query
|
42
|
+
# check_query '"blueberry pi*"'
|
43
|
+
#end
|
44
|
+
|
45
|
+
def test_nomatch_query
|
46
|
+
check_query '+three +nomatch'
|
47
|
+
end
|
48
|
+
|
49
|
+
# this yields differing scores, but doesn't work in
|
50
|
+
# Java Lucene either
|
51
|
+
#def test_term_repeated_query
|
52
|
+
# check_query 'multi* multi* foo'
|
53
|
+
#end
|
54
|
+
|
55
|
+
|
56
|
+
def check_query(query_str, debug_field=nil)
|
57
|
+
@parser ||= Ferret::QueryParser.new(FIELD_NAME, :analyzer => @analyzer)
|
58
|
+
query = @parser.parse(query_str)
|
59
|
+
puts "Query: #{query}" if debug_field
|
60
|
+
IndexTestHelper.explain(query, @multi, debug_field) if debug_field
|
61
|
+
IndexTestHelper.explain(query, @single, debug_field) if debug_field
|
62
|
+
multi_hits = @multi.search(query)
|
63
|
+
single_hits = @single.search(query)
|
64
|
+
assert_equal single_hits.size, multi_hits.size, "hit count differs"
|
65
|
+
multi_hits.score_docs.each_with_index { |multi_sd, i|
|
66
|
+
single_sd = single_hits.score_docs[i]
|
67
|
+
doc_multi = @multi.doc(multi_sd.doc)
|
68
|
+
doc_single = @single.doc(single_sd.doc)
|
69
|
+
assert_equal single_sd.score, multi_sd.score, "score differs in result #{i}"
|
70
|
+
assert_equal doc_single[FIELD_NAME], doc_multi[FIELD_NAME], "field values differ in result #{i}"
|
71
|
+
}
|
72
|
+
end
|
73
|
+
|
74
|
+
def setup()
|
75
|
+
@analyzer = WhiteSpaceAnalyzer.new()
|
76
|
+
# create MultiSearcher from two seperate searchers
|
77
|
+
d1 = RAMDirectory.new()
|
78
|
+
iw1 = IndexWriter.new(d1, :analyzer => @analyzer, :create => true)
|
79
|
+
add_collection1(iw1)
|
80
|
+
iw1.close()
|
81
|
+
|
82
|
+
d2 = RAMDirectory.new()
|
83
|
+
iw2 = IndexWriter.new(d2, :analyzer => @analyzer, :create => true)
|
84
|
+
add_collection2(iw2)
|
85
|
+
iw2.close()
|
86
|
+
@multi = MultiSearcher.new([IndexSearcher.new(d1), IndexSearcher.new(d2)])
|
87
|
+
|
88
|
+
# create IndexSearcher which contains all documents
|
89
|
+
d = RAMDirectory.new()
|
90
|
+
iw = IndexWriter.new(d, :analyzer => @analyzer, :create => true)
|
91
|
+
add_collection1(iw)
|
92
|
+
add_collection2(iw)
|
93
|
+
iw.close()
|
94
|
+
@single = IndexSearcher.new(d)
|
95
|
+
end
|
96
|
+
|
97
|
+
def tear_down()
|
98
|
+
@multi.close
|
99
|
+
@single.close
|
100
|
+
end
|
101
|
+
|
102
|
+
def add(value, iw)
|
103
|
+
d = Document.new
|
104
|
+
d << Field.new(FIELD_NAME, value, Field::Store::YES, Field::Index::TOKENIZED)
|
105
|
+
iw << d
|
106
|
+
end
|
107
|
+
|
108
|
+
def add_collection1(iw)
|
109
|
+
add("one blah three", iw)
|
110
|
+
add("one foo three multiOne", iw)
|
111
|
+
add("one foobar three multiThree", iw)
|
112
|
+
add("blueberry pie", iw)
|
113
|
+
add("blueberry strudel", iw)
|
114
|
+
add("blueberry pizza", iw)
|
115
|
+
end
|
116
|
+
def add_collection2(iw)
|
117
|
+
add("two blah three", iw)
|
118
|
+
add("two foo xxx multiTwo", iw)
|
119
|
+
add("two foobar xxx multiThreee", iw)
|
120
|
+
add("blueberry chewing gum", iw)
|
121
|
+
add("bluebird pizza", iw)
|
122
|
+
add("bluebird foobar pizza", iw)
|
123
|
+
add("piccadilly circus", iw)
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|