xapian-fu 0.2 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +152 -13
- data/examples/query.rb +34 -6
- data/examples/spider.rb +44 -15
- data/lib/xapian_fu/query_parser.rb +179 -0
- data/lib/xapian_fu/result_set.rb +52 -0
- data/lib/xapian_fu/stopper_factory.rb +40 -0
- data/lib/xapian_fu/stopwords/README +7 -0
- data/lib/xapian_fu/stopwords/danish.txt +102 -0
- data/lib/xapian_fu/stopwords/dutch.txt +113 -0
- data/lib/xapian_fu/stopwords/english.txt +312 -0
- data/lib/xapian_fu/stopwords/finnish.txt +89 -0
- data/lib/xapian_fu/stopwords/french.txt +168 -0
- data/lib/xapian_fu/stopwords/german.txt +286 -0
- data/lib/xapian_fu/stopwords/hungarian.txt +203 -0
- data/lib/xapian_fu/stopwords/italian.txt +295 -0
- data/lib/xapian_fu/stopwords/norwegian.txt +186 -0
- data/lib/xapian_fu/stopwords/portuguese.txt +245 -0
- data/lib/xapian_fu/stopwords/russian.txt +236 -0
- data/lib/xapian_fu/stopwords/spanish.txt +348 -0
- data/lib/xapian_fu/stopwords/swedish.txt +125 -0
- data/lib/xapian_fu/stopwords/update.rb +7 -0
- data/lib/xapian_fu/xapian_db.rb +215 -99
- data/lib/xapian_fu/xapian_doc.rb +229 -47
- data/lib/xapian_fu/xapian_doc_value_accessor.rb +125 -0
- data/lib/xapian_fu/xapian_documents_accessor.rb +82 -0
- data/lib/xapian_fu.rb +1 -0
- data/spec/query_parser_spec.rb +43 -0
- data/spec/stopper_factory_spec.rb +57 -0
- data/spec/xapian_db_spec.rb +458 -215
- data/spec/xapian_doc_spec.rb +180 -0
- data/spec/xapian_doc_value_accessor_spec.rb +92 -0
- metadata +29 -5
data/spec/xapian_db_spec.rb
CHANGED
@@ -2,6 +2,7 @@ require 'xapian'
|
|
2
2
|
require 'lib/xapian_fu.rb'
|
3
3
|
include XapianFu
|
4
4
|
require 'fileutils'
|
5
|
+
require 'date'
|
5
6
|
|
6
7
|
# Will be deleted
|
7
8
|
tmp_dir = '/tmp/xapian_fu_test.db'
|
@@ -10,24 +11,36 @@ describe XapianDb do
|
|
10
11
|
before do
|
11
12
|
FileUtils.rm_rf tmp_dir if File.exists?(tmp_dir)
|
12
13
|
end
|
14
|
+
|
15
|
+
describe "new" do
|
16
|
+
it "should make an in-memory database by default" do
|
17
|
+
xdb = XapianDb.new
|
18
|
+
xdb.ro.should be_a_kind_of(Xapian::Database)
|
19
|
+
xdb.rw.should === xdb.ro
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should make an on-disk database when given a :dir option" do
|
23
|
+
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
24
|
+
xdb.rw
|
25
|
+
File.exists?(tmp_dir).should be_true
|
26
|
+
xdb.should respond_to(:dir)
|
27
|
+
xdb.dir.should == tmp_dir
|
28
|
+
xdb.rw.should be_a_kind_of(Xapian::WritableDatabase)
|
29
|
+
xdb.ro.should be_a_kind_of(Xapian::Database)
|
30
|
+
end
|
13
31
|
|
14
|
-
it "should make an in-memory database by default" do
|
15
|
-
xdb = XapianDb.new
|
16
|
-
xdb.ro.should be_a_kind_of(Xapian::Database)
|
17
|
-
xdb.rw.should === xdb.ro
|
18
32
|
end
|
19
|
-
|
20
|
-
it "should
|
33
|
+
|
34
|
+
it "should lazily create the on-disk database when rw is used" do
|
21
35
|
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
22
|
-
File.exists?(tmp_dir).should
|
23
|
-
xdb.
|
24
|
-
|
25
|
-
xdb.rw.should be_a_kind_of(Xapian::WritableDatabase)
|
26
|
-
xdb.ro.should be_a_kind_of(Xapian::Database)
|
36
|
+
File.exists?(tmp_dir).should be_false
|
37
|
+
xdb.rw
|
38
|
+
File.exists?(tmp_dir).should be_true
|
27
39
|
end
|
28
40
|
|
29
41
|
it "should flush documents to the index when flush is called" do
|
30
42
|
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
43
|
+
xdb.flush
|
31
44
|
xdb.size.should == 0
|
32
45
|
xdb << "Once upon a time"
|
33
46
|
xdb.size.should == 0
|
@@ -35,261 +48,491 @@ describe XapianDb do
|
|
35
48
|
xdb.size.should == 1
|
36
49
|
end
|
37
50
|
|
38
|
-
it "should
|
39
|
-
|
40
|
-
|
41
|
-
|
51
|
+
it "should return a nice string when inspect is called" do
|
52
|
+
XapianDb.new.inspect.should =~ /XapianDb/
|
53
|
+
end
|
54
|
+
|
55
|
+
describe "transaction" do
|
56
|
+
it "should commit writes when the block completed successfully" do
|
57
|
+
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
42
58
|
xdb << "Once upon a time"
|
43
|
-
xdb.
|
59
|
+
xdb.transaction do
|
60
|
+
xdb << "Once upon a time"
|
61
|
+
xdb.size.should == 1
|
62
|
+
end
|
63
|
+
xdb.flush
|
64
|
+
xdb.size.should == 2
|
44
65
|
end
|
45
|
-
xdb.flush
|
46
|
-
xdb.size.should == 2
|
47
|
-
end
|
48
66
|
|
49
|
-
|
50
|
-
|
51
|
-
|
67
|
+
it "should serialize attempts at concurrent transactions" do
|
68
|
+
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
69
|
+
thread = Thread.new do
|
70
|
+
xdb.transaction do
|
71
|
+
sleep 0.1
|
72
|
+
xdb << "Once upon a time"
|
73
|
+
sleep 0.1
|
74
|
+
xdb << "Once upon a time"
|
75
|
+
end
|
76
|
+
end
|
52
77
|
xdb.transaction do
|
53
|
-
sleep 0.1
|
54
78
|
xdb << "Once upon a time"
|
55
79
|
sleep 0.1
|
56
80
|
xdb << "Once upon a time"
|
57
81
|
end
|
82
|
+
thread.join
|
83
|
+
xdb.flush
|
84
|
+
xdb.size.should == 4
|
58
85
|
end
|
59
|
-
|
60
|
-
|
61
|
-
|
86
|
+
|
87
|
+
it "should abort a transaction on an exception" do
|
88
|
+
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
62
89
|
xdb << "Once upon a time"
|
90
|
+
begin
|
91
|
+
xdb.transaction do
|
92
|
+
xdb << "Once upon a time"
|
93
|
+
raise StandardError
|
94
|
+
end
|
95
|
+
rescue StandardError
|
96
|
+
end
|
97
|
+
xdb.flush
|
98
|
+
xdb.size.should == 1
|
63
99
|
end
|
64
|
-
thread.join
|
65
|
-
xdb.flush
|
66
|
-
xdb.size.should == 4
|
67
100
|
end
|
68
101
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
xdb.
|
74
|
-
|
75
|
-
|
102
|
+
describe "documents" do
|
103
|
+
|
104
|
+
it "should return a new XapianDoc with the db set on new" do
|
105
|
+
xdb = XapianDb.new
|
106
|
+
doc = xdb.documents.new
|
107
|
+
doc.should be_a_kind_of XapianDoc
|
108
|
+
doc.db.should == xdb
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should raise a XapianFu::DocNotFound error on find if the document doesn't exist" do
|
112
|
+
xdb = XapianDb.new
|
113
|
+
xdb << "once upon a time"
|
114
|
+
xdb.flush
|
115
|
+
lambda { xdb.documents.find(10) }.should raise_error XapianFu::DocNotFound
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should retrieve documents with the find method" do
|
119
|
+
xdb = XapianDb.new
|
120
|
+
xdb << "Once upon a time"
|
121
|
+
xdb.flush
|
122
|
+
xdb.documents.find(1).should be_a_kind_of(XapianDoc)
|
123
|
+
end
|
124
|
+
|
125
|
+
it "should retrieve documents like an array and return a XapianDoc" do
|
126
|
+
xdb = XapianDb.new
|
127
|
+
xdb << "once upon a time"
|
128
|
+
xdb.flush
|
129
|
+
xdb.documents[1].should be_a_kind_of(XapianDoc)
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should provide the id of retrieved documents" do
|
133
|
+
xdb = XapianDb.new
|
134
|
+
xdb << "once upon a time"
|
135
|
+
xdb.documents[1].id.should == 1
|
136
|
+
end
|
137
|
+
|
138
|
+
it "should set the db field for the retrieved XapianDoc" do
|
139
|
+
xdb = XapianDb.new
|
140
|
+
xdb << "once upon a time"
|
141
|
+
xdb.documents[1].db.should == xdb
|
142
|
+
end
|
143
|
+
|
144
|
+
it "should delete docs by id" do
|
145
|
+
xdb = XapianDb.new
|
146
|
+
doc = xdb << XapianDoc.new("Once upon a time")
|
147
|
+
xdb.flush
|
148
|
+
xdb.size.should == 1
|
149
|
+
xdb.documents.delete(doc.id).should == 1
|
150
|
+
xdb.flush
|
151
|
+
xdb.size.should == 0
|
152
|
+
end
|
153
|
+
|
154
|
+
it "should handle being asked to delete docs that don't exist in the db" do
|
155
|
+
xdb = XapianDb.new
|
156
|
+
doc = xdb << XapianDoc.new("Once upon a time")
|
157
|
+
xdb.flush
|
158
|
+
xdb.documents.delete(100000).should == nil
|
159
|
+
end
|
160
|
+
|
161
|
+
describe "max" do
|
162
|
+
it "should return the doc with the highest id" do
|
163
|
+
xdb = XapianDb.new
|
164
|
+
xdb << { :id => 20 }
|
165
|
+
xdb << { :id => 9 }
|
166
|
+
xdb << { :id => 15 }
|
167
|
+
xdb.flush
|
168
|
+
xdb.documents.max.id.should == 20
|
169
|
+
end
|
170
|
+
|
171
|
+
it "should return the doc with the highest specified stored value" do
|
172
|
+
xdb = XapianDb.new(:fields => { :number => { :store => true } })
|
173
|
+
xdb << { :id => 8, :number => "200" }
|
174
|
+
xdb << { :id => 9, :number => "300" }
|
175
|
+
xdb << { :id => 15, :number => "100" }
|
176
|
+
xdb.flush
|
177
|
+
xdb.documents.max(:number).id.should == 9
|
76
178
|
end
|
77
|
-
rescue StandardError
|
78
179
|
end
|
79
|
-
xdb.flush
|
80
|
-
xdb.size.should == 1
|
81
180
|
end
|
82
181
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
182
|
+
describe "when indexing" do
|
183
|
+
it "should index a XapianDoc" do
|
184
|
+
xdb = XapianDb.new
|
185
|
+
xdb << XapianDoc.new({ :text => "once upon a time", :title => "A story" })
|
186
|
+
xdb.flush
|
187
|
+
xdb.size.should == 1
|
188
|
+
end
|
89
189
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
190
|
+
it "should index a Hash" do
|
191
|
+
xdb = XapianDb.new
|
192
|
+
xdb << { :text => "once upon a time", :title => "A story" }
|
193
|
+
xdb.flush
|
194
|
+
xdb.size.should == 1
|
195
|
+
end
|
96
196
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
197
|
+
it "should index a string" do
|
198
|
+
xdb = XapianDb.new
|
199
|
+
xdb << "once upon a time"
|
200
|
+
xdb.size.should == 1
|
201
|
+
xdb << XapianDoc.new("once upon a time")
|
202
|
+
xdb.size.should == 2
|
203
|
+
end
|
204
|
+
|
103
205
|
end
|
104
206
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
207
|
+
describe "search" do
|
208
|
+
it "should return a list of XapianDocs with the weight and match set" do
|
209
|
+
xdb = XapianDb.new
|
210
|
+
xdb << XapianDoc.new(:title => 'once upon a time')
|
211
|
+
xdb << XapianDoc.new(:title => 'three little pings')
|
212
|
+
results = xdb.search("pings")
|
213
|
+
results.should be_a_kind_of Array
|
214
|
+
results.size.should == 1
|
215
|
+
results.first.should be_a_kind_of XapianDoc
|
216
|
+
results.first.match.should be_a_kind_of Xapian::Match
|
217
|
+
results.first.weight.should be_a_kind_of Float
|
218
|
+
end
|
111
219
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
220
|
+
it "should collapse results by the value specified by the :collapse option" do
|
221
|
+
xdb = XapianDb.new(:collapsible => :group)
|
222
|
+
alpha1 = xdb << XapianDoc.new(:words => "cow dog cat", :group => "alpha")
|
223
|
+
alpha2 = xdb << XapianDoc.new(:words => "cow dog", :group => "alpha")
|
224
|
+
beta1 = xdb << XapianDoc.new(:words => "cow", :group => "beta")
|
225
|
+
results = xdb.search("cow dog cat", :collapse => :group, :default_op => :or)
|
226
|
+
results.should == [alpha1, beta1]
|
227
|
+
end
|
118
228
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
229
|
+
it "should do a case-insensitive boolean AND search by default" do
|
230
|
+
xdb = XapianDb.new
|
231
|
+
doc1 = xdb << "cow dog cat"
|
232
|
+
doc2 = xdb << "cow dog"
|
233
|
+
xdb.search("cow dog cat").should == [doc1]
|
234
|
+
end
|
125
235
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
236
|
+
it "should do a case-sensitive boolean search when the :boolean_anycase option is set to false" do
|
237
|
+
pending
|
238
|
+
xdb = XapianDb.new
|
239
|
+
doc1 = xdb << "cow dog"
|
240
|
+
doc2 = xdb << "COW dog"
|
241
|
+
xdb.search("cow", :boolean_anycase => false).should == [doc1]
|
242
|
+
xdb.search("COW", :boolean_anycase => false).should == [doc2]
|
243
|
+
end
|
131
244
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
245
|
+
it "should allow LOVEHATE style queries by default" do
|
246
|
+
xdb = XapianDb.new
|
247
|
+
doc1 = xdb << "cow dog cat moose"
|
248
|
+
doc2 = xdb << "cow dog"
|
249
|
+
doc3 = xdb << "cow dog moose"
|
250
|
+
doc4 = xdb << "cow moose"
|
251
|
+
xdb.search("cow +dog -cat", :default_op => :or).should == [doc2, doc3]
|
252
|
+
end
|
139
253
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
end
|
254
|
+
it "should do a boolean OR search when :default_op option is set to :or" do
|
255
|
+
xdb = XapianDb.new
|
256
|
+
doc1 = xdb << "cow dog cat"
|
257
|
+
doc2 = xdb << "cow dog"
|
258
|
+
xdb.search("cow dog cat", :default_op => :or).should == [doc1, doc2]
|
259
|
+
end
|
147
260
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
xdb.flush
|
156
|
-
xdb.size.should == 1
|
157
|
-
updated_doc.id.should == doc.id
|
158
|
-
end
|
261
|
+
it "should allow a wildcard search by default" do
|
262
|
+
xdb = XapianDb.new
|
263
|
+
doc1 = xdb << "fox"
|
264
|
+
doc2 = xdb << "follow"
|
265
|
+
doc3 = xdb << "fantastic"
|
266
|
+
xdb.search("fo*").should == [doc1, doc2]
|
267
|
+
end
|
159
268
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
xdb.size.should == 0
|
168
|
-
end
|
269
|
+
it "should ignore wildcard searches when the :wildcards option is false" do
|
270
|
+
xdb = XapianDb.new
|
271
|
+
doc1 = xdb << "fox"
|
272
|
+
doc2 = xdb << "follow"
|
273
|
+
doc3 = xdb << "fo"
|
274
|
+
xdb.search("fo*", :wildcards => false).should == [doc3]
|
275
|
+
end
|
169
276
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
277
|
+
it "should provide a corrected spelling string by default" do
|
278
|
+
pending
|
279
|
+
xdb = XapianDb.new(:dir => tmp_dir + 'corrected_spelling', :create => true)
|
280
|
+
xdb.rw.add_spelling("house mouse louse")
|
281
|
+
xdb << "there is a mouse in this house"
|
282
|
+
xdb.flush
|
283
|
+
results = xdb.search("moose")
|
284
|
+
results.corrected_query.should == "mouse"
|
285
|
+
end
|
176
286
|
|
177
|
-
|
178
|
-
xdb = XapianDb.new
|
179
|
-
doc = xdb << XapianDoc.new(:id => 0xbeef, :title => "Once upon a time")
|
180
|
-
xdb.flush
|
181
|
-
xdb.documents[0xbeef].id.should == 0xbeef
|
182
|
-
doc.id.should == 0xbeef
|
183
|
-
end
|
287
|
+
it "should do phrase matching by default when then :default_op option is :phrase"
|
184
288
|
|
185
|
-
|
186
|
-
xdb = XapianDb.new
|
187
|
-
doc = xdb << XapianDoc.new("once upon a time")
|
188
|
-
doc.terms.should be_a_kind_of Array
|
189
|
-
doc.terms.last.should be_a_kind_of Xapian::Term
|
190
|
-
doc.terms.last.term.should == "upon"
|
191
|
-
end
|
289
|
+
it "should do AND_MAYBE matching by default when the :default_op option is :and_maybe"
|
192
290
|
|
193
|
-
|
194
|
-
xdb = XapianDb.new
|
195
|
-
doc = xdb << XapianDoc.new(:title => 'once upon a time')
|
196
|
-
doc.terms.should be_a_kind_of Array
|
197
|
-
doc.terms.last.should be_a_kind_of Xapian::Term
|
198
|
-
doc.terms.last.term.should == "upon"
|
199
|
-
end
|
291
|
+
it "should do PURE_NOT matching by default when the :default_op option is :pure_not"
|
200
292
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
293
|
+
it "should page results when given the :page and :per_page options" do
|
294
|
+
xdb = XapianDb.new
|
295
|
+
content = "word"
|
296
|
+
200.times { xdb << XapianDoc.new(content) }
|
297
|
+
xdb.size.should == 200
|
298
|
+
results = xdb.search(content, :page => 1, :per_page => 12)
|
299
|
+
results.first.id.should == 1
|
300
|
+
results.size.should == 12
|
301
|
+
results = xdb.search(content, :page => 5, :per_page => 18)
|
302
|
+
results.first.id.should == 18 * 4 + 1
|
303
|
+
results.size.should == 18
|
304
|
+
results = xdb.search(content, :page => 100, :per_page => 12)
|
305
|
+
results.size.should == 0
|
306
|
+
end
|
212
307
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
results = xdb.search(content, :page => 5, :per_page => 18)
|
222
|
-
results.first.id.should == 18 * 4 + 1
|
223
|
-
results.size.should == 18
|
224
|
-
results = xdb.search(content, :page => 100, :per_page => 12)
|
225
|
-
results.size.should == 0
|
226
|
-
end
|
308
|
+
it "should limit results when the :limit option is given" do
|
309
|
+
xdb = XapianDb.new
|
310
|
+
content = "word"
|
311
|
+
30.times { xdb << XapianDoc.new(content) }
|
312
|
+
xdb.size.should == 30
|
313
|
+
results = xdb.search(content, :limit => 16)
|
314
|
+
results.size.should == 16
|
315
|
+
end
|
227
316
|
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
317
|
+
it "should return an array that can be used with will_paginate" do
|
318
|
+
xdb = XapianDb.new
|
319
|
+
content = "word"
|
320
|
+
30.times { xdb << XapianDoc.new(content) }
|
321
|
+
xdb.size.should == 30
|
322
|
+
results = xdb.search(content, :page => 1, :per_page => 16)
|
323
|
+
results.should be_a_kind_of XapianFu::ResultSet
|
324
|
+
results.per_page.should == 16
|
325
|
+
results.current_page.should == 1
|
326
|
+
results.total_entries.should == 30
|
327
|
+
results.total_pages.should == 2
|
328
|
+
results.previous_page.should == nil
|
329
|
+
results.next_page.should == 2
|
330
|
+
results.offset.should == 0
|
331
|
+
results = xdb.search(content, :page => 2, :per_page => 16)
|
332
|
+
results.current_page.should == 2
|
333
|
+
results.previous_page.should == 1
|
334
|
+
results.next_page.should == nil
|
335
|
+
results.offset.should == 16
|
336
|
+
end
|
234
337
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
xdb << XapianDoc.new(:created_at => time.to_i.to_s, :author => "Jim Jones")
|
248
|
-
xdb.flush
|
249
|
-
doc = xdb.documents.find(1)
|
250
|
-
doc.get_value(:created_at).should == time.to_i.to_s
|
338
|
+
it "should do searches with and without field names" do
|
339
|
+
xdb = XapianDb.new(:fields => [:name, :partner])
|
340
|
+
john = xdb << { :name => "John", :partner => "Louisa" }
|
341
|
+
katherine = xdb << { :name => "Katherine", :partner => "John" }
|
342
|
+
louisa = xdb << { :name => "Louisa", :partner => "John" }
|
343
|
+
xdb.search("name:john").should == [john]
|
344
|
+
xdb.search("partner:john").should == [katherine, louisa]
|
345
|
+
xdb.search("partner:louisa").should == [john]
|
346
|
+
xdb.search("louisa").should == [john,louisa]
|
347
|
+
xdb.search("john").should == [john,katherine,louisa]
|
348
|
+
xdb.search("john -name:john").should == [katherine,louisa]
|
349
|
+
end
|
251
350
|
end
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
351
|
+
|
352
|
+
describe "add_doc" do
|
353
|
+
it "should return a XapianDoc with an id" do
|
354
|
+
xdb = XapianDb.new
|
355
|
+
doc = XapianDoc.new("once upon a time")
|
356
|
+
doc.id.should == nil
|
357
|
+
new_doc = xdb << doc
|
358
|
+
new_doc.id.should == 1
|
359
|
+
end
|
360
|
+
|
361
|
+
it "should add new docs with the given id" do
|
362
|
+
xdb = XapianDb.new
|
363
|
+
doc = xdb << XapianDoc.new(:id => 0xbeef, :title => "Once upon a time")
|
364
|
+
xdb.documents[0xbeef].id.should == 0xbeef
|
365
|
+
doc.id.should == 0xbeef
|
366
|
+
end
|
367
|
+
|
368
|
+
it "should replace docs that already have an id when adding to the db" do
|
369
|
+
xdb = XapianDb.new
|
370
|
+
doc = xdb << XapianDoc.new("Once upon a time")
|
371
|
+
xdb.size.should == 1
|
372
|
+
doc.id.should == 1
|
373
|
+
updated_doc = xdb << doc
|
374
|
+
xdb.size.should == 1
|
375
|
+
updated_doc.id.should == doc.id
|
376
|
+
end
|
377
|
+
|
378
|
+
it "should store no fields by default" do
|
379
|
+
xdb = XapianDb.new
|
380
|
+
xdb << XapianDoc.new(:title => "Once upon a time")
|
381
|
+
xdb.flush
|
382
|
+
xdb.documents.find(1).values[:title].should be_empty
|
383
|
+
end
|
384
|
+
|
385
|
+
it "should store fields declared to be stored as values" do
|
386
|
+
xdb = XapianDb.new(:fields => { :title => { :store => true } })
|
387
|
+
xdb << XapianDoc.new(:title => "Once upon a time", :author => "Jim Jones")
|
388
|
+
doc = xdb.documents.find(1)
|
389
|
+
doc.values[:title].should == "Once upon a time"
|
390
|
+
doc.values[:author].should be_empty
|
391
|
+
end
|
392
|
+
|
393
|
+
it "should store values declared as to be sortable" do
|
394
|
+
xdb = XapianDb.new(:sortable => :age)
|
395
|
+
xdb << XapianDoc.new(:age => "32", :author => "Jim Jones")
|
396
|
+
doc = xdb.documents.find(1)
|
397
|
+
doc.values.fetch(:age).should == "32"
|
398
|
+
end
|
399
|
+
|
400
|
+
it "should store values declared as to be collapsible" do
|
401
|
+
xdb = XapianDb.new(:collapsible => :group_id)
|
402
|
+
xdb << XapianDoc.new(:group_id => "666", :author => "Jim Jones")
|
403
|
+
doc = xdb.documents.find(1)
|
404
|
+
doc.values.fetch(:group_id).should == "666"
|
405
|
+
end
|
406
|
+
|
407
|
+
it "should store data in the database" do
|
408
|
+
xdb = XapianDb.new
|
409
|
+
xdb << XapianDoc.new({ :text => "once upon a time" }, :data => Marshal::dump({ :thing => 0xdeadbeef }))
|
410
|
+
xdb.size.should == 1
|
411
|
+
doc = xdb.documents[1]
|
412
|
+
Marshal::load(doc.data).should == { :thing => 0xdeadbeef }
|
413
|
+
end
|
259
414
|
end
|
260
415
|
|
261
416
|
describe "search results sort order" do
|
262
417
|
before(:each) do
|
263
418
|
@xdb = XapianDb.new(:sortable => :number)
|
264
419
|
@expected_results = []
|
265
|
-
@expected_results << (@xdb << XapianDoc.new(:words => "cow dog
|
266
|
-
@expected_results << (@xdb << XapianDoc.new(:words => "cow dog", :number => 3))
|
267
|
-
@expected_results << (@xdb << XapianDoc.new(:words => "cow", :number => 2))
|
420
|
+
@expected_results << (@xdb << XapianDoc.new(:words => "cow dog", :number => 3, :relevance => 2))
|
421
|
+
@expected_results << (@xdb << XapianDoc.new(:words => "cow dog cat", :number => 1, :relevance => 3))
|
422
|
+
@expected_results << (@xdb << XapianDoc.new(:words => "cow", :number => 2, :relevance => 1))
|
268
423
|
end
|
269
|
-
|
270
|
-
it "should be by
|
271
|
-
results = @xdb.search("cow dog cat")
|
272
|
-
results.should == @expected_results
|
424
|
+
|
425
|
+
it "should be by search result weight by default" do
|
426
|
+
results = @xdb.search("cow dog cat", :default_op => :or)
|
427
|
+
results.should == @expected_results.sort_by { |r| r.fields[:relevance] }.reverse
|
273
428
|
end
|
274
|
-
|
429
|
+
|
275
430
|
it "should be by the value specified in descending numerical order" do
|
276
|
-
results = @xdb.search("cow dog cat", :order => :number)
|
431
|
+
results = @xdb.search("cow dog cat", :default_op => :or, :order => :number)
|
277
432
|
results.should == @expected_results.sort_by { |r| r.fields[:number] }
|
278
433
|
end
|
279
|
-
|
434
|
+
|
280
435
|
it "should be reversed when the reverse option is set to true" do
|
281
|
-
results = @xdb.search("cow dog cat", :order => :number, :reverse => true)
|
436
|
+
results = @xdb.search("cow dog cat", :default_op => :or, :order => :number, :reverse => true)
|
282
437
|
results.should == @expected_results.sort_by { |r| r.fields[:number] }.reverse
|
283
438
|
end
|
439
|
+
|
440
|
+
it "should be by the id when specified and in ascending numerical order by default" do
|
441
|
+
results = @xdb.search("cow dog cat", :default_op => :or, :order => :id)
|
442
|
+
results.should == @expected_results.sort_by { |r| r.id }
|
443
|
+
end
|
444
|
+
|
445
|
+
it "should be by the id in descending numerical order when specified" do
|
446
|
+
results = @xdb.search("cow dog cat", :default_op => :or, :order => :id, :reverse => true)
|
447
|
+
results.should == @expected_results.sort_by { |r| r.id }.reverse
|
448
|
+
end
|
449
|
+
|
284
450
|
end
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
451
|
+
|
452
|
+
describe "stemmer" do
|
453
|
+
it "should return an english stemmer by default" do
|
454
|
+
xdb = XapianDb.new
|
455
|
+
xdb.stemmer.call("fishing").should == "fish"
|
456
|
+
xdb.stemmer.call("contournait").should == "contournait"
|
457
|
+
end
|
458
|
+
it "should return a stemmer for the database language" do
|
459
|
+
xdb = XapianDb.new(:language => :french)
|
460
|
+
xdb.stemmer.call("contournait").should == "contourn"
|
461
|
+
xdb.stemmer.call("fishing").should == "fishing"
|
462
|
+
end
|
293
463
|
end
|
294
|
-
|
464
|
+
|
465
|
+
describe "stopper" do
|
466
|
+
it "should return an english stopper by default" do
|
467
|
+
xdb = XapianDb.new
|
468
|
+
xdb.stopper.call("and").should == true
|
469
|
+
xdb.stopper.call("avec").should == false
|
470
|
+
end
|
471
|
+
it "should return a stopper for the database language" do
|
472
|
+
xdb = XapianDb.new(:language => :french)
|
473
|
+
xdb.stopper.call("avec").should == true
|
474
|
+
xdb.stopper.call("and").should == false
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
describe "fields" do
|
479
|
+
it "should return a hash of field names set as an array with the :fields option using String as the default type" do
|
480
|
+
xdb = XapianDb.new(:fields => [:name, :age])
|
481
|
+
xdb.fields[:name].should == String
|
482
|
+
xdb.fields[:age].should == String
|
483
|
+
end
|
484
|
+
|
485
|
+
it "should return a hash of field names set as a hash with the :fields option" do
|
486
|
+
xdb = XapianDb.new(:fields => { :name => String, :gender => String,
|
487
|
+
:age => { :type => Fixnum } })
|
488
|
+
xdb.fields[:name].should == String
|
489
|
+
xdb.fields[:gender].should == String
|
490
|
+
xdb.fields[:age].should == Fixnum
|
491
|
+
end
|
492
|
+
|
493
|
+
it "should return an empty array by default" do
|
494
|
+
XapianDb.new.fields.keys.should be_empty
|
495
|
+
end
|
496
|
+
|
497
|
+
end
|
498
|
+
|
499
|
+
describe "stored_values" do
|
500
|
+
it "should return an array of field names passed in the :store option" do
|
501
|
+
xdb = XapianDb.new(:store => [:name, :title])
|
502
|
+
xdb.store_values.should == [:name, :title]
|
503
|
+
end
|
504
|
+
|
505
|
+
it "should return an array of fields defined as storable in the :fields option" do
|
506
|
+
xdb = XapianDb.new(:fields => {
|
507
|
+
:name => { :store => true },
|
508
|
+
:title => { :store => true } })
|
509
|
+
xdb.store_values.should include :name
|
510
|
+
xdb.store_values.should include :title
|
511
|
+
end
|
512
|
+
|
513
|
+
it "should return an array of fields both passed in the :store option and defined as storable in the :fields option" do
|
514
|
+
xdb = XapianDb.new(:fields => {
|
515
|
+
:name => { :store => true },
|
516
|
+
:title => { :store => true } }, :store => [:name, :gender])
|
517
|
+
xdb.store_values.size == 3
|
518
|
+
[:gender, :title, :name].each { |f| xdb.store_values.should include f }
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
describe "unindexed_fields" do
|
523
|
+
it "should return an empty array by default" do
|
524
|
+
xdb = XapianDb.new(:fields => { :name => String, :title => String })
|
525
|
+
xdb.unindexed_fields.should == []
|
526
|
+
end
|
527
|
+
|
528
|
+
it "should return fields defined as not indexed in the fields option" do
|
529
|
+
xdb = XapianDb.new(:fields => {
|
530
|
+
:name => { :type => String, :index => false },
|
531
|
+
:title => String })
|
532
|
+
xdb.unindexed_fields.should include :name
|
533
|
+
xdb.unindexed_fields.should_not include :title
|
534
|
+
end
|
535
|
+
end
|
536
|
+
|
295
537
|
end
|
538
|
+
|