ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
@@ -1,6 +1,5 @@
1
1
  require File.dirname(__FILE__) + "/../../test_helper"
2
2
 
3
-
4
3
  class IndexTest < Test::Unit::TestCase
5
4
  include Ferret::Index
6
5
  include Ferret::Search
@@ -146,6 +145,7 @@ class IndexTest < Test::Unit::TestCase
146
145
 
147
146
  def test_fs_index
148
147
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
148
+
149
149
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
150
150
  assert_raise(StandardError) do
151
151
  Index.new(:path => fs_path,
@@ -171,6 +171,7 @@ class IndexTest < Test::Unit::TestCase
171
171
 
172
172
  def test_fs_index_is_persistant
173
173
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
174
+
174
175
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
175
176
  data = [
176
177
  {"def_field" => "one two", :id => "me"},
@@ -195,6 +196,7 @@ class IndexTest < Test::Unit::TestCase
195
196
 
196
197
  def test_key_used_for_id_field
197
198
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
199
+
198
200
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
199
201
  data = [
200
202
  {:my_id => "one two", :id => "me"},
@@ -301,6 +303,7 @@ class IndexTest < Test::Unit::TestCase
301
303
  index = Index.new(:default_field => "f")
302
304
  data.each {|doc| index << doc }
303
305
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
306
+
304
307
  index.persist(fs_path, true)
305
308
  assert_equal(3, index.size)
306
309
  assert_equal("zero", index[0]["f"])
@@ -585,4 +588,40 @@ class IndexTest < Test::Unit::TestCase
585
588
  index1.close
586
589
  index2.close
587
590
  end
591
+
592
+ def test_doc_specific_analyzer
593
+ index = Index.new
594
+ index.add_document("abc", Ferret::Analysis::Analyzer.new)
595
+ assert_equal(1, index.size)
596
+ end
597
+
598
+
599
+ def test_adding_empty_term_vectors
600
+ index = Index.new()
601
+ doc = Document.new
602
+
603
+ # Note: Adding keywords to either field1 or field2 gets rid of the error
604
+
605
+ doc << Field.new('field1', '',
606
+ Field::Store::NO,
607
+ Field::Index::TOKENIZED,
608
+ Field::TermVector::YES)
609
+
610
+ doc << Field.new('field2', '',
611
+ Field::Store::NO,
612
+ Field::Index::TOKENIZED,
613
+ Field::TermVector::YES)
614
+
615
+ # Note: keywords in this un-term-vector-stored field don't help the situation
616
+
617
+ doc << Field.new('field3', 'foo bar baz',
618
+ Field::Store::YES,
619
+ Field::Index::TOKENIZED,
620
+ Field::TermVector::NO)
621
+
622
+ index << doc
623
+
624
+ index.flush
625
+ index.close
626
+ end
588
627
  end
@@ -17,4 +17,11 @@ class TermTest < Test::Unit::TestCase
17
17
  term4.set!("field3", "text3")
18
18
  assert_not_equal(term1, term4)
19
19
  end
20
+
21
+ def test_non_strings()
22
+ t = Term.new(2345, 3)
23
+ t = Term.new(:symbol, :symbol)
24
+ t.set!(:symbol, :symbol)
25
+ t.set!(234, 23462346)
26
+ end
20
27
  end
@@ -240,5 +240,13 @@ module IndexTestHelper
240
240
  end
241
241
  return docs
242
242
  end
243
+
244
+ def IndexTestHelper.explain (query, searcher, field)
245
+ top_docs = searcher.search(query)
246
+ top_docs.score_docs.each { |sd|
247
+ puts "\nDoc #{sd.doc}: #{searcher.doc(sd.doc)[field]}\n#{searcher.explain(query, sd.doc).to_s}\n"
248
+ }
249
+ end
250
+
243
251
  end
244
252
 
@@ -8,10 +8,10 @@ class QueryParserTest < Test::Unit::TestCase
8
8
  ['', ''],
9
9
  ['word', 'word'],
10
10
  ['field:word', 'field:word'],
11
- ['"word1 word2 word3"', '"word word word"'],
12
- ['"word1 2342 word3"', '"word word"'],
11
+ ['"word1 word2 word#"', '"word1 word2 word"'],
12
+ ['"word1 %%% word3"', '"word1 word3"'],
13
13
  ['field:"one two three"', 'field:"one two three"'],
14
- ['field:"one 222 three"', 'field:"one three"'],
14
+ ['field:"one %%% three"', 'field:"one three"'],
15
15
  ['field:"one <> three"', 'field:"one <> three"'],
16
16
  ['field:"one <> three <>"', 'field:"one <> three"'],
17
17
  ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
@@ -104,7 +104,9 @@ class QueryParserTest < Test::Unit::TestCase
104
104
  :analyzer => Ferret::Analysis::StandardAnalyzer.new)
105
105
  pairs = [
106
106
  ['key:1234', 'key:1234'],
107
- ['key:(1234)', 'key:1234']
107
+ ['key:(1234 and Dave)', 'key:1234 key:dave'],
108
+ ['key:(1234)', 'key:1234'],
109
+ ['and the but they with', '']
108
110
  ]
109
111
 
110
112
  pairs.each do |query_str, expected|
@@ -4,11 +4,11 @@ class SortFieldTest < Test::Unit::TestCase
4
4
  include Ferret::Search
5
5
 
6
6
  def test_params()
7
- assert_equal("score", SortField::SortType::SCORE.to_s)
8
- assert_equal("doc", SortField::SortType::DOC.to_s)
9
- assert_equal("auto", SortField::SortType::AUTO.to_s)
10
- assert_equal("string", SortField::SortType::STRING.to_s)
11
- assert_equal("int", SortField::SortType::INTEGER.to_s)
12
- assert_equal("float", SortField::SortType::FLOAT.to_s)
7
+ assert_equal("SCORE", SortField::SortType::SCORE.to_s)
8
+ assert_equal("DOC", SortField::SortType::DOC.to_s)
9
+ assert_equal("auto", SortField::SortType::AUTO.to_s)
10
+ assert_equal("string", SortField::SortType::STRING.to_s)
11
+ assert_equal("integer", SortField::SortType::INTEGER.to_s)
12
+ assert_equal("float", SortField::SortType::FLOAT.to_s)
13
13
  end
14
14
  end
@@ -59,6 +59,7 @@ class IndexSearcherTest < Test::Unit::TestCase
59
59
  assert_equal(18, @is.max_doc)
60
60
  assert_equal("20050930", @is.doc(0).values(:date))
61
61
  assert_equal("cat1/sub2/subsub2", @is.doc(4)[:cat])
62
+ assert_equal("20051012", @is.doc(12)[:date])
62
63
  end
63
64
 
64
65
  def test_term_query
@@ -129,6 +130,13 @@ class IndexSearcherTest < Test::Unit::TestCase
129
130
  bq.add_query(tq2, BooleanClause::Occur::SHOULD)
130
131
  bq.add_query(tq3, BooleanClause::Occur::SHOULD)
131
132
  check_hits(bq, [1,2,3,4,6,8,11,14])
133
+
134
+ bq = BooleanQuery.new()
135
+ bc1 = BooleanClause.new(tq2, BooleanClause::Occur::SHOULD)
136
+ bc2 = BooleanClause.new(tq3, BooleanClause::Occur::SHOULD)
137
+ bq.add_clause(bc1)
138
+ bq.add_clause(bc2)
139
+ check_hits(bq, [1,2,3,4,6,8,11,14])
132
140
  end
133
141
 
134
142
  def test_phrase_query()
@@ -0,0 +1,275 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require File.join(File.dirname(__FILE__), "tc_index_searcher.rb")
3
+
4
+ # make sure a MultiSearcher searching only one index
5
+ # passes all the IndexSearcher tests
6
+ class SimpleMultiSearcherTest < IndexSearcherTest
7
+ alias :old_setup :setup
8
+ def setup()
9
+ old_setup
10
+ @multi = MultiSearcher.new([IndexSearcher.new(@dir)])
11
+ end
12
+ end
13
+
14
+
15
+ # checks query results of a multisearcher searching two indexes
16
+ # against those of a single indexsearcher searching the same
17
+ # set of documents
18
+ class MultiSearcherTest < Test::Unit::TestCase
19
+ include Ferret::Document
20
+ include Ferret::Search
21
+ include Ferret::Store
22
+ include Ferret::Analysis
23
+ include Ferret::Index
24
+
25
+ def prepare_search_docs(data)
26
+ docs = []
27
+ data.each_with_index do |fields, i|
28
+ doc = Document.new()
29
+ fields.each_pair do |field, text|
30
+ doc << Field.new(field, text, Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO, false)
31
+ end
32
+ docs << doc
33
+ end
34
+ return docs
35
+ end
36
+
37
+ def prepare_documents
38
+ @documents = prepare_search_docs([
39
+ {"date" => "20050930", "field" => "word1",
40
+ "cat" => "cat1/"},
41
+ {"date" => "20051001", "field" => "word1 word2 the quick brown fox",
42
+ "cat" => "cat1/sub1"},
43
+ {"date" => "20051002", "field" => "word1 word3",
44
+ "cat" => "cat1/sub1/subsub1"},
45
+ {"date" => "20051003", "field" => "word1 word3",
46
+ "cat" => "cat1/sub2"},
47
+ {"date" => "20051004", "field" => "word1 word2",
48
+ "cat" => "cat1/sub2/subsub2"},
49
+ {"date" => "20051005", "field" => "word1",
50
+ "cat" => "cat2/sub1"},
51
+ {"date" => "20051006", "field" => "word1 word3",
52
+ "cat" => "cat2/sub1"},
53
+ {"date" => "20051007", "field" => "word1",
54
+ "cat" => "cat2/sub1"},
55
+ {"date" => "20051008", "field" => "word1 word2 word3 the fast brown fox",
56
+ "cat" => "cat2/sub1"}
57
+ ])
58
+ @documents2 = prepare_search_docs([
59
+ {"date" => "20051009", "field" => "word1",
60
+ "cat" => "cat3/sub1"},
61
+ {"date" => "20051010", "field" => "word1",
62
+ "cat" => "cat3/sub1"},
63
+ {"date" => "20051011", "field" => "word1 word3 the quick red fox",
64
+ "cat" => "cat3/sub1"},
65
+ {"date" => "20051012", "field" => "word1",
66
+ "cat" => "cat3/sub1"},
67
+ {"date" => "20051013", "field" => "word1",
68
+ "cat" => "cat1/sub2"},
69
+ {"date" => "20051014", "field" => "word1 word3 the quick hairy fox",
70
+ "cat" => "cat1/sub1"},
71
+ {"date" => "20051015", "field" => "word1",
72
+ "cat" => "cat1/sub2/subsub1"},
73
+ {"date" => "20051016",
74
+ "field" => "word1 the quick fox is brown and hairy and a little red",
75
+ "cat" => "cat1/sub1/subsub2"},
76
+ {"date" => "20051017", "field" => "word1 the brown fox is quick and red",
77
+ "cat" => "cat1/"}
78
+ ])
79
+ end
80
+
81
+ def setup()
82
+ prepare_documents
83
+ # create MultiSearcher from two seperate searchers
84
+ dir1 = RAMDirectory.new()
85
+ iw1 = IndexWriter.new(dir1, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
86
+ @documents.each { |doc| iw1 << doc }
87
+ iw1.close()
88
+
89
+ dir2 = RAMDirectory.new()
90
+ iw2 = IndexWriter.new(dir2, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
91
+ @documents2.each { |doc| iw2 << doc }
92
+ iw2.close()
93
+ @multi = Ferret::Search::MultiSearcher.new([IndexSearcher.new(dir1), IndexSearcher.new(dir2)])
94
+
95
+ # create single searcher
96
+ dir = RAMDirectory.new
97
+ iw = IndexWriter.new(dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
98
+ @documents.each { |doc| iw << doc }
99
+ @documents2.each { |doc| iw << doc }
100
+ iw.close
101
+ @single = IndexSearcher.new(dir)
102
+
103
+ @query_parser = Ferret::QueryParser.new(['date', 'field', 'cat'], :analyzer => WhiteSpaceAnalyzer.new())
104
+ end
105
+
106
+ def tear_down()
107
+ @multi.close
108
+ @single.close
109
+ end
110
+
111
+ def check_hits(query, debug_field=nil)
112
+ query = @query_parser.parse(query) if (query.is_a? String)
113
+ multi_docs = @multi.search(query)
114
+ single_docs = @single.search(query)
115
+ IndexTestHelper.explain(query, @single, debug_field) if debug_field
116
+ IndexTestHelper.explain(query, @multi, debug_field) if debug_field
117
+ assert_equal(single_docs.score_docs.size, multi_docs.score_docs.size, 'hit count')
118
+ assert_equal(single_docs.total_hits, multi_docs.total_hits, 'hit count')
119
+
120
+ multi_docs.score_docs.each_with_index { |sd, id|
121
+ assert_equal(single_docs.score_docs[id].doc, sd.doc)
122
+ assert_equal(single_docs.score_docs[id].score, sd.score)
123
+ }
124
+ end
125
+
126
+ def test_get_doc()
127
+ assert_equal(18, @multi.max_doc)
128
+ assert_equal("20050930", @multi.doc(0).values(:date))
129
+ assert_equal("cat1/sub2/subsub2", @multi.doc(4)[:cat])
130
+ assert_equal("20051012", @multi.doc(12)[:date])
131
+ assert_equal(18, @single.max_doc)
132
+ assert_equal("20050930", @single.doc(0).values(:date))
133
+ assert_equal("cat1/sub2/subsub2", @single.doc(4)[:cat])
134
+ assert_equal("20051012", @single.doc(12)[:date])
135
+ end
136
+
137
+ def test_term_query
138
+ tq = TermQuery.new(Term.new("field", "word2"));
139
+ tq.boost = 100
140
+ check_hits(tq)
141
+
142
+ tq = TermQuery.new(Term.new("field", "2342"));
143
+ check_hits(tq)
144
+
145
+ tq = TermQuery.new(Term.new("field", ""));
146
+ check_hits(tq)
147
+
148
+ tq = TermQuery.new(Term.new("field", "word1"));
149
+ check_hits(tq)
150
+ end
151
+
152
+
153
+ def test_boolean_query
154
+ bq = BooleanQuery.new()
155
+ tq1 = TermQuery.new(Term.new("field", "word1"))
156
+ tq2 = TermQuery.new(Term.new("field", "word3"))
157
+ bq.add_query(tq1, BooleanClause::Occur::MUST)
158
+ bq.add_query(tq2, BooleanClause::Occur::MUST)
159
+ check_hits(bq)
160
+
161
+ tq3 = TermQuery.new(Term.new("field", "word2"))
162
+ bq.add_query(tq3, BooleanClause::Occur::SHOULD)
163
+ check_hits(bq)
164
+
165
+ bq = BooleanQuery.new()
166
+ bq.add_query(tq2, BooleanClause::Occur::MUST)
167
+ bq.add_query(tq3, BooleanClause::Occur::MUST_NOT)
168
+ check_hits(bq)
169
+
170
+ bq = BooleanQuery.new()
171
+ bq.add_query(tq2, BooleanClause::Occur::MUST_NOT)
172
+ check_hits(bq)
173
+
174
+ bq = BooleanQuery.new()
175
+ bq.add_query(tq2, BooleanClause::Occur::SHOULD)
176
+ bq.add_query(tq3, BooleanClause::Occur::SHOULD)
177
+ check_hits(bq)
178
+ end
179
+
180
+ def test_phrase_query()
181
+ pq = PhraseQuery.new()
182
+ t1 = Term.new("field", "quick")
183
+ t2 = Term.new("field", "brown")
184
+ t3 = Term.new("field", "fox")
185
+ pq << t1 << t2 << t3
186
+ check_hits(pq)
187
+
188
+ pq = PhraseQuery.new()
189
+ pq << t1
190
+ pq.add(t3, 2)
191
+ check_hits(pq)
192
+
193
+ pq.slop = 1
194
+ check_hits(pq)
195
+
196
+ pq.slop = 4
197
+ check_hits(pq)
198
+ end
199
+
200
+ def test_range_query()
201
+ rq = RangeQuery.new("date", "20051006", "20051010", true, true)
202
+ check_hits(rq)
203
+
204
+ rq = RangeQuery.new("date", "20051006", "20051010", false, true)
205
+ check_hits(rq)
206
+
207
+ rq = RangeQuery.new("date", "20051006", "20051010", true, false)
208
+ check_hits(rq)
209
+
210
+ rq = RangeQuery.new("date", "20051006", "20051010", false, false)
211
+ check_hits(rq)
212
+
213
+ rq = RangeQuery.new("date", nil, "20051003", false, true)
214
+ check_hits(rq)
215
+
216
+ rq = RangeQuery.new("date", nil, "20051003", false, false)
217
+ check_hits(rq)
218
+
219
+ rq = RangeQuery.new_less("date", "20051003", true)
220
+ check_hits(rq)
221
+
222
+ rq = RangeQuery.new_less("date", "20051003", false)
223
+ check_hits(rq)
224
+
225
+ rq = RangeQuery.new("date", "20051014", nil, true, false)
226
+ check_hits(rq)
227
+
228
+ rq = RangeQuery.new("date", "20051014", nil, false, false)
229
+ check_hits(rq)
230
+
231
+ rq = RangeQuery.new_more("date", "20051014", true)
232
+ check_hits(rq)
233
+
234
+ rq = RangeQuery.new_more("date", "20051014", false)
235
+ check_hits(rq)
236
+ end
237
+
238
+ def test_prefix_query()
239
+ t = Term.new("cat", "cat1")
240
+ pq = PrefixQuery.new(t)
241
+ check_hits(pq)
242
+
243
+ t.text = "cat1/sub2"
244
+ pq = PrefixQuery.new(t)
245
+ check_hits(pq)
246
+ end
247
+
248
+ def test_wildcard_query()
249
+ t = Term.new("cat", "cat1*")
250
+ wq = WildcardQuery.new(t)
251
+ check_hits(wq)
252
+
253
+ t.text = "cat1*/su??ub2"
254
+ wq = WildcardQuery.new(t)
255
+ check_hits(wq)
256
+ end
257
+
258
+ def test_multi_phrase_query()
259
+ t11 = Term.new("field", "quick")
260
+ t12 = Term.new("field", "fast")
261
+ t21 = Term.new("field", "brown")
262
+ t22 = Term.new("field", "red")
263
+ t23 = Term.new("field", "hairy")
264
+ t3 = Term.new("field", "fox")
265
+
266
+ mpq = MultiPhraseQuery.new()
267
+ mpq << [t11, t12]
268
+ mpq << [t21, t22, t23]
269
+ mpq << t3
270
+ check_hits(mpq)
271
+
272
+ mpq.slop = 4
273
+ check_hits(mpq)
274
+ end
275
+ end
@@ -0,0 +1,126 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ # Tests the multisearcher by comparing it's results
4
+ # with those returned by an IndexSearcher.
5
+ # Taken from TestMultiSearcherRanking.java of Lucene
6
+ class MultiSearcher2Test < Test::Unit::TestCase
7
+ include Ferret::Document
8
+ include Ferret::Search
9
+ include Ferret::Store
10
+ include Ferret::Analysis
11
+ include Ferret::Index
12
+
13
+ FIELD_NAME = 'body'
14
+
15
+ def test_one_Term_query
16
+ check_query 'three'
17
+ end
18
+
19
+ def test_two_term_query
20
+ check_query 'three foo'
21
+ # as of 2006/03/11 these fail in Java Lucene as
22
+ # well, hits are returned in slightly different order.
23
+ #check_query '+pizza +blue*', :body
24
+ #check_query '+pizza blue*', :body
25
+ #check_query 'pizza blue*', :body
26
+ end
27
+
28
+ def test_prefix_query
29
+ check_query 'multi*'
30
+ end
31
+
32
+ def test_fuzzy_query
33
+ check_query 'multiThree~'
34
+ end
35
+
36
+ def test_range_query
37
+ check_query '{multiA multiP}'
38
+ end
39
+
40
+ # fails (query parse error)
41
+ #def test_multi_phrase_query
42
+ # check_query '"blueberry pi*"'
43
+ #end
44
+
45
+ def test_nomatch_query
46
+ check_query '+three +nomatch'
47
+ end
48
+
49
+ # this yields differing scores, but doesn't work in
50
+ # Java Lucene either
51
+ #def test_term_repeated_query
52
+ # check_query 'multi* multi* foo'
53
+ #end
54
+
55
+
56
+ def check_query(query_str, debug_field=nil)
57
+ @parser ||= Ferret::QueryParser.new(FIELD_NAME, :analyzer => @analyzer)
58
+ query = @parser.parse(query_str)
59
+ puts "Query: #{query}" if debug_field
60
+ IndexTestHelper.explain(query, @multi, debug_field) if debug_field
61
+ IndexTestHelper.explain(query, @single, debug_field) if debug_field
62
+ multi_hits = @multi.search(query)
63
+ single_hits = @single.search(query)
64
+ assert_equal single_hits.size, multi_hits.size, "hit count differs"
65
+ multi_hits.score_docs.each_with_index { |multi_sd, i|
66
+ single_sd = single_hits.score_docs[i]
67
+ doc_multi = @multi.doc(multi_sd.doc)
68
+ doc_single = @single.doc(single_sd.doc)
69
+ assert_equal single_sd.score, multi_sd.score, "score differs in result #{i}"
70
+ assert_equal doc_single[FIELD_NAME], doc_multi[FIELD_NAME], "field values differ in result #{i}"
71
+ }
72
+ end
73
+
74
+ def setup()
75
+ @analyzer = WhiteSpaceAnalyzer.new()
76
+ # create MultiSearcher from two seperate searchers
77
+ d1 = RAMDirectory.new()
78
+ iw1 = IndexWriter.new(d1, :analyzer => @analyzer, :create => true)
79
+ add_collection1(iw1)
80
+ iw1.close()
81
+
82
+ d2 = RAMDirectory.new()
83
+ iw2 = IndexWriter.new(d2, :analyzer => @analyzer, :create => true)
84
+ add_collection2(iw2)
85
+ iw2.close()
86
+ @multi = MultiSearcher.new([IndexSearcher.new(d1), IndexSearcher.new(d2)])
87
+
88
+ # create IndexSearcher which contains all documents
89
+ d = RAMDirectory.new()
90
+ iw = IndexWriter.new(d, :analyzer => @analyzer, :create => true)
91
+ add_collection1(iw)
92
+ add_collection2(iw)
93
+ iw.close()
94
+ @single = IndexSearcher.new(d)
95
+ end
96
+
97
+ def tear_down()
98
+ @multi.close
99
+ @single.close
100
+ end
101
+
102
+ def add(value, iw)
103
+ d = Document.new
104
+ d << Field.new(FIELD_NAME, value, Field::Store::YES, Field::Index::TOKENIZED)
105
+ iw << d
106
+ end
107
+
108
+ def add_collection1(iw)
109
+ add("one blah three", iw)
110
+ add("one foo three multiOne", iw)
111
+ add("one foobar three multiThree", iw)
112
+ add("blueberry pie", iw)
113
+ add("blueberry strudel", iw)
114
+ add("blueberry pizza", iw)
115
+ end
116
+ def add_collection2(iw)
117
+ add("two blah three", iw)
118
+ add("two foo xxx multiTwo", iw)
119
+ add("two foobar xxx multiThreee", iw)
120
+ add("blueberry chewing gum", iw)
121
+ add("bluebird pizza", iw)
122
+ add("bluebird foobar pizza", iw)
123
+ add("piccadilly circus", iw)
124
+ end
125
+
126
+ end