ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
@@ -1,6 +1,5 @@
1
1
  require File.dirname(__FILE__) + "/../../test_helper"
2
2
 
3
-
4
3
  class IndexTest < Test::Unit::TestCase
5
4
  include Ferret::Index
6
5
  include Ferret::Search
@@ -146,6 +145,7 @@ class IndexTest < Test::Unit::TestCase
146
145
 
147
146
  def test_fs_index
148
147
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
148
+
149
149
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
150
150
  assert_raise(StandardError) do
151
151
  Index.new(:path => fs_path,
@@ -171,6 +171,7 @@ class IndexTest < Test::Unit::TestCase
171
171
 
172
172
  def test_fs_index_is_persistant
173
173
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
174
+
174
175
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
175
176
  data = [
176
177
  {"def_field" => "one two", :id => "me"},
@@ -195,6 +196,7 @@ class IndexTest < Test::Unit::TestCase
195
196
 
196
197
  def test_key_used_for_id_field
197
198
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
199
+
198
200
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
199
201
  data = [
200
202
  {:my_id => "one two", :id => "me"},
@@ -301,6 +303,7 @@ class IndexTest < Test::Unit::TestCase
301
303
  index = Index.new(:default_field => "f")
302
304
  data.each {|doc| index << doc }
303
305
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
306
+
304
307
  index.persist(fs_path, true)
305
308
  assert_equal(3, index.size)
306
309
  assert_equal("zero", index[0]["f"])
@@ -585,4 +588,40 @@ class IndexTest < Test::Unit::TestCase
585
588
  index1.close
586
589
  index2.close
587
590
  end
591
+
592
+ def test_doc_specific_analyzer
593
+ index = Index.new
594
+ index.add_document("abc", Ferret::Analysis::Analyzer.new)
595
+ assert_equal(1, index.size)
596
+ end
597
+
598
+
599
+ def test_adding_empty_term_vectors
600
+ index = Index.new()
601
+ doc = Document.new
602
+
603
+ # Note: Adding keywords to either field1 or field2 gets rid of the error
604
+
605
+ doc << Field.new('field1', '',
606
+ Field::Store::NO,
607
+ Field::Index::TOKENIZED,
608
+ Field::TermVector::YES)
609
+
610
+ doc << Field.new('field2', '',
611
+ Field::Store::NO,
612
+ Field::Index::TOKENIZED,
613
+ Field::TermVector::YES)
614
+
615
+ # Note: keywords in this un-term-vector-stored field don't help the situation
616
+
617
+ doc << Field.new('field3', 'foo bar baz',
618
+ Field::Store::YES,
619
+ Field::Index::TOKENIZED,
620
+ Field::TermVector::NO)
621
+
622
+ index << doc
623
+
624
+ index.flush
625
+ index.close
626
+ end
588
627
  end
@@ -17,4 +17,11 @@ class TermTest < Test::Unit::TestCase
17
17
  term4.set!("field3", "text3")
18
18
  assert_not_equal(term1, term4)
19
19
  end
20
+
21
+ def test_non_strings()
22
+ t = Term.new(2345, 3)
23
+ t = Term.new(:symbol, :symbol)
24
+ t.set!(:symbol, :symbol)
25
+ t.set!(234, 23462346)
26
+ end
20
27
  end
@@ -240,5 +240,13 @@ module IndexTestHelper
240
240
  end
241
241
  return docs
242
242
  end
243
+
244
+ def IndexTestHelper.explain (query, searcher, field)
245
+ top_docs = searcher.search(query)
246
+ top_docs.score_docs.each { |sd|
247
+ puts "\nDoc #{sd.doc}: #{searcher.doc(sd.doc)[field]}\n#{searcher.explain(query, sd.doc).to_s}\n"
248
+ }
249
+ end
250
+
243
251
  end
244
252
 
@@ -8,10 +8,10 @@ class QueryParserTest < Test::Unit::TestCase
8
8
  ['', ''],
9
9
  ['word', 'word'],
10
10
  ['field:word', 'field:word'],
11
- ['"word1 word2 word3"', '"word word word"'],
12
- ['"word1 2342 word3"', '"word word"'],
11
+ ['"word1 word2 word#"', '"word1 word2 word"'],
12
+ ['"word1 %%% word3"', '"word1 word3"'],
13
13
  ['field:"one two three"', 'field:"one two three"'],
14
- ['field:"one 222 three"', 'field:"one three"'],
14
+ ['field:"one %%% three"', 'field:"one three"'],
15
15
  ['field:"one <> three"', 'field:"one <> three"'],
16
16
  ['field:"one <> three <>"', 'field:"one <> three"'],
17
17
  ['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
@@ -104,7 +104,9 @@ class QueryParserTest < Test::Unit::TestCase
104
104
  :analyzer => Ferret::Analysis::StandardAnalyzer.new)
105
105
  pairs = [
106
106
  ['key:1234', 'key:1234'],
107
- ['key:(1234)', 'key:1234']
107
+ ['key:(1234 and Dave)', 'key:1234 key:dave'],
108
+ ['key:(1234)', 'key:1234'],
109
+ ['and the but they with', '']
108
110
  ]
109
111
 
110
112
  pairs.each do |query_str, expected|
@@ -4,11 +4,11 @@ class SortFieldTest < Test::Unit::TestCase
4
4
  include Ferret::Search
5
5
 
6
6
  def test_params()
7
- assert_equal("score", SortField::SortType::SCORE.to_s)
8
- assert_equal("doc", SortField::SortType::DOC.to_s)
9
- assert_equal("auto", SortField::SortType::AUTO.to_s)
10
- assert_equal("string", SortField::SortType::STRING.to_s)
11
- assert_equal("int", SortField::SortType::INTEGER.to_s)
12
- assert_equal("float", SortField::SortType::FLOAT.to_s)
7
+ assert_equal("SCORE", SortField::SortType::SCORE.to_s)
8
+ assert_equal("DOC", SortField::SortType::DOC.to_s)
9
+ assert_equal("auto", SortField::SortType::AUTO.to_s)
10
+ assert_equal("string", SortField::SortType::STRING.to_s)
11
+ assert_equal("integer", SortField::SortType::INTEGER.to_s)
12
+ assert_equal("float", SortField::SortType::FLOAT.to_s)
13
13
  end
14
14
  end
@@ -59,6 +59,7 @@ class IndexSearcherTest < Test::Unit::TestCase
59
59
  assert_equal(18, @is.max_doc)
60
60
  assert_equal("20050930", @is.doc(0).values(:date))
61
61
  assert_equal("cat1/sub2/subsub2", @is.doc(4)[:cat])
62
+ assert_equal("20051012", @is.doc(12)[:date])
62
63
  end
63
64
 
64
65
  def test_term_query
@@ -129,6 +130,13 @@ class IndexSearcherTest < Test::Unit::TestCase
129
130
  bq.add_query(tq2, BooleanClause::Occur::SHOULD)
130
131
  bq.add_query(tq3, BooleanClause::Occur::SHOULD)
131
132
  check_hits(bq, [1,2,3,4,6,8,11,14])
133
+
134
+ bq = BooleanQuery.new()
135
+ bc1 = BooleanClause.new(tq2, BooleanClause::Occur::SHOULD)
136
+ bc2 = BooleanClause.new(tq3, BooleanClause::Occur::SHOULD)
137
+ bq.add_clause(bc1)
138
+ bq.add_clause(bc2)
139
+ check_hits(bq, [1,2,3,4,6,8,11,14])
132
140
  end
133
141
 
134
142
  def test_phrase_query()
@@ -0,0 +1,275 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require File.join(File.dirname(__FILE__), "tc_index_searcher.rb")
3
+
4
+ # make sure a MultiSearcher searching only one index
5
+ # passes all the IndexSearcher tests
6
+ class SimpleMultiSearcherTest < IndexSearcherTest
7
+ alias :old_setup :setup
8
+ def setup()
9
+ old_setup
10
+ @multi = MultiSearcher.new([IndexSearcher.new(@dir)])
11
+ end
12
+ end
13
+
14
+
15
+ # checks query results of a multisearcher searching two indexes
16
+ # against those of a single indexsearcher searching the same
17
+ # set of documents
18
+ class MultiSearcherTest < Test::Unit::TestCase
19
+ include Ferret::Document
20
+ include Ferret::Search
21
+ include Ferret::Store
22
+ include Ferret::Analysis
23
+ include Ferret::Index
24
+
25
+ def prepare_search_docs(data)
26
+ docs = []
27
+ data.each_with_index do |fields, i|
28
+ doc = Document.new()
29
+ fields.each_pair do |field, text|
30
+ doc << Field.new(field, text, Field::Store::YES, Field::Index::TOKENIZED, Field::TermVector::NO, false)
31
+ end
32
+ docs << doc
33
+ end
34
+ return docs
35
+ end
36
+
37
+ def prepare_documents
38
+ @documents = prepare_search_docs([
39
+ {"date" => "20050930", "field" => "word1",
40
+ "cat" => "cat1/"},
41
+ {"date" => "20051001", "field" => "word1 word2 the quick brown fox",
42
+ "cat" => "cat1/sub1"},
43
+ {"date" => "20051002", "field" => "word1 word3",
44
+ "cat" => "cat1/sub1/subsub1"},
45
+ {"date" => "20051003", "field" => "word1 word3",
46
+ "cat" => "cat1/sub2"},
47
+ {"date" => "20051004", "field" => "word1 word2",
48
+ "cat" => "cat1/sub2/subsub2"},
49
+ {"date" => "20051005", "field" => "word1",
50
+ "cat" => "cat2/sub1"},
51
+ {"date" => "20051006", "field" => "word1 word3",
52
+ "cat" => "cat2/sub1"},
53
+ {"date" => "20051007", "field" => "word1",
54
+ "cat" => "cat2/sub1"},
55
+ {"date" => "20051008", "field" => "word1 word2 word3 the fast brown fox",
56
+ "cat" => "cat2/sub1"}
57
+ ])
58
+ @documents2 = prepare_search_docs([
59
+ {"date" => "20051009", "field" => "word1",
60
+ "cat" => "cat3/sub1"},
61
+ {"date" => "20051010", "field" => "word1",
62
+ "cat" => "cat3/sub1"},
63
+ {"date" => "20051011", "field" => "word1 word3 the quick red fox",
64
+ "cat" => "cat3/sub1"},
65
+ {"date" => "20051012", "field" => "word1",
66
+ "cat" => "cat3/sub1"},
67
+ {"date" => "20051013", "field" => "word1",
68
+ "cat" => "cat1/sub2"},
69
+ {"date" => "20051014", "field" => "word1 word3 the quick hairy fox",
70
+ "cat" => "cat1/sub1"},
71
+ {"date" => "20051015", "field" => "word1",
72
+ "cat" => "cat1/sub2/subsub1"},
73
+ {"date" => "20051016",
74
+ "field" => "word1 the quick fox is brown and hairy and a little red",
75
+ "cat" => "cat1/sub1/subsub2"},
76
+ {"date" => "20051017", "field" => "word1 the brown fox is quick and red",
77
+ "cat" => "cat1/"}
78
+ ])
79
+ end
80
+
81
+ def setup()
82
+ prepare_documents
83
+ # create MultiSearcher from two seperate searchers
84
+ dir1 = RAMDirectory.new()
85
+ iw1 = IndexWriter.new(dir1, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
86
+ @documents.each { |doc| iw1 << doc }
87
+ iw1.close()
88
+
89
+ dir2 = RAMDirectory.new()
90
+ iw2 = IndexWriter.new(dir2, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
91
+ @documents2.each { |doc| iw2 << doc }
92
+ iw2.close()
93
+ @multi = Ferret::Search::MultiSearcher.new([IndexSearcher.new(dir1), IndexSearcher.new(dir2)])
94
+
95
+ # create single searcher
96
+ dir = RAMDirectory.new
97
+ iw = IndexWriter.new(dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
98
+ @documents.each { |doc| iw << doc }
99
+ @documents2.each { |doc| iw << doc }
100
+ iw.close
101
+ @single = IndexSearcher.new(dir)
102
+
103
+ @query_parser = Ferret::QueryParser.new(['date', 'field', 'cat'], :analyzer => WhiteSpaceAnalyzer.new())
104
+ end
105
+
106
+ def tear_down()
107
+ @multi.close
108
+ @single.close
109
+ end
110
+
111
+ def check_hits(query, debug_field=nil)
112
+ query = @query_parser.parse(query) if (query.is_a? String)
113
+ multi_docs = @multi.search(query)
114
+ single_docs = @single.search(query)
115
+ IndexTestHelper.explain(query, @single, debug_field) if debug_field
116
+ IndexTestHelper.explain(query, @multi, debug_field) if debug_field
117
+ assert_equal(single_docs.score_docs.size, multi_docs.score_docs.size, 'hit count')
118
+ assert_equal(single_docs.total_hits, multi_docs.total_hits, 'hit count')
119
+
120
+ multi_docs.score_docs.each_with_index { |sd, id|
121
+ assert_equal(single_docs.score_docs[id].doc, sd.doc)
122
+ assert_equal(single_docs.score_docs[id].score, sd.score)
123
+ }
124
+ end
125
+
126
+ def test_get_doc()
127
+ assert_equal(18, @multi.max_doc)
128
+ assert_equal("20050930", @multi.doc(0).values(:date))
129
+ assert_equal("cat1/sub2/subsub2", @multi.doc(4)[:cat])
130
+ assert_equal("20051012", @multi.doc(12)[:date])
131
+ assert_equal(18, @single.max_doc)
132
+ assert_equal("20050930", @single.doc(0).values(:date))
133
+ assert_equal("cat1/sub2/subsub2", @single.doc(4)[:cat])
134
+ assert_equal("20051012", @single.doc(12)[:date])
135
+ end
136
+
137
+ def test_term_query
138
+ tq = TermQuery.new(Term.new("field", "word2"));
139
+ tq.boost = 100
140
+ check_hits(tq)
141
+
142
+ tq = TermQuery.new(Term.new("field", "2342"));
143
+ check_hits(tq)
144
+
145
+ tq = TermQuery.new(Term.new("field", ""));
146
+ check_hits(tq)
147
+
148
+ tq = TermQuery.new(Term.new("field", "word1"));
149
+ check_hits(tq)
150
+ end
151
+
152
+
153
+ def test_boolean_query
154
+ bq = BooleanQuery.new()
155
+ tq1 = TermQuery.new(Term.new("field", "word1"))
156
+ tq2 = TermQuery.new(Term.new("field", "word3"))
157
+ bq.add_query(tq1, BooleanClause::Occur::MUST)
158
+ bq.add_query(tq2, BooleanClause::Occur::MUST)
159
+ check_hits(bq)
160
+
161
+ tq3 = TermQuery.new(Term.new("field", "word2"))
162
+ bq.add_query(tq3, BooleanClause::Occur::SHOULD)
163
+ check_hits(bq)
164
+
165
+ bq = BooleanQuery.new()
166
+ bq.add_query(tq2, BooleanClause::Occur::MUST)
167
+ bq.add_query(tq3, BooleanClause::Occur::MUST_NOT)
168
+ check_hits(bq)
169
+
170
+ bq = BooleanQuery.new()
171
+ bq.add_query(tq2, BooleanClause::Occur::MUST_NOT)
172
+ check_hits(bq)
173
+
174
+ bq = BooleanQuery.new()
175
+ bq.add_query(tq2, BooleanClause::Occur::SHOULD)
176
+ bq.add_query(tq3, BooleanClause::Occur::SHOULD)
177
+ check_hits(bq)
178
+ end
179
+
180
+ def test_phrase_query()
181
+ pq = PhraseQuery.new()
182
+ t1 = Term.new("field", "quick")
183
+ t2 = Term.new("field", "brown")
184
+ t3 = Term.new("field", "fox")
185
+ pq << t1 << t2 << t3
186
+ check_hits(pq)
187
+
188
+ pq = PhraseQuery.new()
189
+ pq << t1
190
+ pq.add(t3, 2)
191
+ check_hits(pq)
192
+
193
+ pq.slop = 1
194
+ check_hits(pq)
195
+
196
+ pq.slop = 4
197
+ check_hits(pq)
198
+ end
199
+
200
+ def test_range_query()
201
+ rq = RangeQuery.new("date", "20051006", "20051010", true, true)
202
+ check_hits(rq)
203
+
204
+ rq = RangeQuery.new("date", "20051006", "20051010", false, true)
205
+ check_hits(rq)
206
+
207
+ rq = RangeQuery.new("date", "20051006", "20051010", true, false)
208
+ check_hits(rq)
209
+
210
+ rq = RangeQuery.new("date", "20051006", "20051010", false, false)
211
+ check_hits(rq)
212
+
213
+ rq = RangeQuery.new("date", nil, "20051003", false, true)
214
+ check_hits(rq)
215
+
216
+ rq = RangeQuery.new("date", nil, "20051003", false, false)
217
+ check_hits(rq)
218
+
219
+ rq = RangeQuery.new_less("date", "20051003", true)
220
+ check_hits(rq)
221
+
222
+ rq = RangeQuery.new_less("date", "20051003", false)
223
+ check_hits(rq)
224
+
225
+ rq = RangeQuery.new("date", "20051014", nil, true, false)
226
+ check_hits(rq)
227
+
228
+ rq = RangeQuery.new("date", "20051014", nil, false, false)
229
+ check_hits(rq)
230
+
231
+ rq = RangeQuery.new_more("date", "20051014", true)
232
+ check_hits(rq)
233
+
234
+ rq = RangeQuery.new_more("date", "20051014", false)
235
+ check_hits(rq)
236
+ end
237
+
238
+ def test_prefix_query()
239
+ t = Term.new("cat", "cat1")
240
+ pq = PrefixQuery.new(t)
241
+ check_hits(pq)
242
+
243
+ t.text = "cat1/sub2"
244
+ pq = PrefixQuery.new(t)
245
+ check_hits(pq)
246
+ end
247
+
248
+ def test_wildcard_query()
249
+ t = Term.new("cat", "cat1*")
250
+ wq = WildcardQuery.new(t)
251
+ check_hits(wq)
252
+
253
+ t.text = "cat1*/su??ub2"
254
+ wq = WildcardQuery.new(t)
255
+ check_hits(wq)
256
+ end
257
+
258
+ def test_multi_phrase_query()
259
+ t11 = Term.new("field", "quick")
260
+ t12 = Term.new("field", "fast")
261
+ t21 = Term.new("field", "brown")
262
+ t22 = Term.new("field", "red")
263
+ t23 = Term.new("field", "hairy")
264
+ t3 = Term.new("field", "fox")
265
+
266
+ mpq = MultiPhraseQuery.new()
267
+ mpq << [t11, t12]
268
+ mpq << [t21, t22, t23]
269
+ mpq << t3
270
+ check_hits(mpq)
271
+
272
+ mpq.slop = 4
273
+ check_hits(mpq)
274
+ end
275
+ end
@@ -0,0 +1,126 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ # Tests the multisearcher by comparing it's results
4
+ # with those returned by an IndexSearcher.
5
+ # Taken from TestMultiSearcherRanking.java of Lucene
6
+ class MultiSearcher2Test < Test::Unit::TestCase
7
+ include Ferret::Document
8
+ include Ferret::Search
9
+ include Ferret::Store
10
+ include Ferret::Analysis
11
+ include Ferret::Index
12
+
13
+ FIELD_NAME = 'body'
14
+
15
+ def test_one_Term_query
16
+ check_query 'three'
17
+ end
18
+
19
+ def test_two_term_query
20
+ check_query 'three foo'
21
+ # as of 2006/03/11 these fail in Java Lucene as
22
+ # well, hits are returned in slightly different order.
23
+ #check_query '+pizza +blue*', :body
24
+ #check_query '+pizza blue*', :body
25
+ #check_query 'pizza blue*', :body
26
+ end
27
+
28
+ def test_prefix_query
29
+ check_query 'multi*'
30
+ end
31
+
32
+ def test_fuzzy_query
33
+ check_query 'multiThree~'
34
+ end
35
+
36
+ def test_range_query
37
+ check_query '{multiA multiP}'
38
+ end
39
+
40
+ # fails (query parse error)
41
+ #def test_multi_phrase_query
42
+ # check_query '"blueberry pi*"'
43
+ #end
44
+
45
+ def test_nomatch_query
46
+ check_query '+three +nomatch'
47
+ end
48
+
49
+ # this yields differing scores, but doesn't work in
50
+ # Java Lucene either
51
+ #def test_term_repeated_query
52
+ # check_query 'multi* multi* foo'
53
+ #end
54
+
55
+
56
+ def check_query(query_str, debug_field=nil)
57
+ @parser ||= Ferret::QueryParser.new(FIELD_NAME, :analyzer => @analyzer)
58
+ query = @parser.parse(query_str)
59
+ puts "Query: #{query}" if debug_field
60
+ IndexTestHelper.explain(query, @multi, debug_field) if debug_field
61
+ IndexTestHelper.explain(query, @single, debug_field) if debug_field
62
+ multi_hits = @multi.search(query)
63
+ single_hits = @single.search(query)
64
+ assert_equal single_hits.size, multi_hits.size, "hit count differs"
65
+ multi_hits.score_docs.each_with_index { |multi_sd, i|
66
+ single_sd = single_hits.score_docs[i]
67
+ doc_multi = @multi.doc(multi_sd.doc)
68
+ doc_single = @single.doc(single_sd.doc)
69
+ assert_equal single_sd.score, multi_sd.score, "score differs in result #{i}"
70
+ assert_equal doc_single[FIELD_NAME], doc_multi[FIELD_NAME], "field values differ in result #{i}"
71
+ }
72
+ end
73
+
74
+ def setup()
75
+ @analyzer = WhiteSpaceAnalyzer.new()
76
+ # create MultiSearcher from two seperate searchers
77
+ d1 = RAMDirectory.new()
78
+ iw1 = IndexWriter.new(d1, :analyzer => @analyzer, :create => true)
79
+ add_collection1(iw1)
80
+ iw1.close()
81
+
82
+ d2 = RAMDirectory.new()
83
+ iw2 = IndexWriter.new(d2, :analyzer => @analyzer, :create => true)
84
+ add_collection2(iw2)
85
+ iw2.close()
86
+ @multi = MultiSearcher.new([IndexSearcher.new(d1), IndexSearcher.new(d2)])
87
+
88
+ # create IndexSearcher which contains all documents
89
+ d = RAMDirectory.new()
90
+ iw = IndexWriter.new(d, :analyzer => @analyzer, :create => true)
91
+ add_collection1(iw)
92
+ add_collection2(iw)
93
+ iw.close()
94
+ @single = IndexSearcher.new(d)
95
+ end
96
+
97
+ def tear_down()
98
+ @multi.close
99
+ @single.close
100
+ end
101
+
102
+ def add(value, iw)
103
+ d = Document.new
104
+ d << Field.new(FIELD_NAME, value, Field::Store::YES, Field::Index::TOKENIZED)
105
+ iw << d
106
+ end
107
+
108
+ def add_collection1(iw)
109
+ add("one blah three", iw)
110
+ add("one foo three multiOne", iw)
111
+ add("one foobar three multiThree", iw)
112
+ add("blueberry pie", iw)
113
+ add("blueberry strudel", iw)
114
+ add("blueberry pizza", iw)
115
+ end
116
+ def add_collection2(iw)
117
+ add("two blah three", iw)
118
+ add("two foo xxx multiTwo", iw)
119
+ add("two foobar xxx multiThreee", iw)
120
+ add("blueberry chewing gum", iw)
121
+ add("bluebird pizza", iw)
122
+ add("bluebird foobar pizza", iw)
123
+ add("piccadilly circus", iw)
124
+ end
125
+
126
+ end