xapian-fu 0.2 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +152 -13
- data/examples/query.rb +34 -6
- data/examples/spider.rb +44 -15
- data/lib/xapian_fu/query_parser.rb +179 -0
- data/lib/xapian_fu/result_set.rb +52 -0
- data/lib/xapian_fu/stopper_factory.rb +40 -0
- data/lib/xapian_fu/stopwords/README +7 -0
- data/lib/xapian_fu/stopwords/danish.txt +102 -0
- data/lib/xapian_fu/stopwords/dutch.txt +113 -0
- data/lib/xapian_fu/stopwords/english.txt +312 -0
- data/lib/xapian_fu/stopwords/finnish.txt +89 -0
- data/lib/xapian_fu/stopwords/french.txt +168 -0
- data/lib/xapian_fu/stopwords/german.txt +286 -0
- data/lib/xapian_fu/stopwords/hungarian.txt +203 -0
- data/lib/xapian_fu/stopwords/italian.txt +295 -0
- data/lib/xapian_fu/stopwords/norwegian.txt +186 -0
- data/lib/xapian_fu/stopwords/portuguese.txt +245 -0
- data/lib/xapian_fu/stopwords/russian.txt +236 -0
- data/lib/xapian_fu/stopwords/spanish.txt +348 -0
- data/lib/xapian_fu/stopwords/swedish.txt +125 -0
- data/lib/xapian_fu/stopwords/update.rb +7 -0
- data/lib/xapian_fu/xapian_db.rb +215 -99
- data/lib/xapian_fu/xapian_doc.rb +229 -47
- data/lib/xapian_fu/xapian_doc_value_accessor.rb +125 -0
- data/lib/xapian_fu/xapian_documents_accessor.rb +82 -0
- data/lib/xapian_fu.rb +1 -0
- data/spec/query_parser_spec.rb +43 -0
- data/spec/stopper_factory_spec.rb +57 -0
- data/spec/xapian_db_spec.rb +458 -215
- data/spec/xapian_doc_spec.rb +180 -0
- data/spec/xapian_doc_value_accessor_spec.rb +92 -0
- metadata +29 -5
data/spec/xapian_db_spec.rb
CHANGED
@@ -2,6 +2,7 @@ require 'xapian'
|
|
2
2
|
require 'lib/xapian_fu.rb'
|
3
3
|
include XapianFu
|
4
4
|
require 'fileutils'
|
5
|
+
require 'date'
|
5
6
|
|
6
7
|
# Will be deleted
|
7
8
|
tmp_dir = '/tmp/xapian_fu_test.db'
|
@@ -10,24 +11,36 @@ describe XapianDb do
|
|
10
11
|
before do
|
11
12
|
FileUtils.rm_rf tmp_dir if File.exists?(tmp_dir)
|
12
13
|
end
|
14
|
+
|
15
|
+
describe "new" do
|
16
|
+
it "should make an in-memory database by default" do
|
17
|
+
xdb = XapianDb.new
|
18
|
+
xdb.ro.should be_a_kind_of(Xapian::Database)
|
19
|
+
xdb.rw.should === xdb.ro
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should make an on-disk database when given a :dir option" do
|
23
|
+
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
24
|
+
xdb.rw
|
25
|
+
File.exists?(tmp_dir).should be_true
|
26
|
+
xdb.should respond_to(:dir)
|
27
|
+
xdb.dir.should == tmp_dir
|
28
|
+
xdb.rw.should be_a_kind_of(Xapian::WritableDatabase)
|
29
|
+
xdb.ro.should be_a_kind_of(Xapian::Database)
|
30
|
+
end
|
13
31
|
|
14
|
-
it "should make an in-memory database by default" do
|
15
|
-
xdb = XapianDb.new
|
16
|
-
xdb.ro.should be_a_kind_of(Xapian::Database)
|
17
|
-
xdb.rw.should === xdb.ro
|
18
32
|
end
|
19
|
-
|
20
|
-
it "should
|
33
|
+
|
34
|
+
it "should lazily create the on-disk database when rw is used" do
|
21
35
|
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
22
|
-
File.exists?(tmp_dir).should
|
23
|
-
xdb.
|
24
|
-
|
25
|
-
xdb.rw.should be_a_kind_of(Xapian::WritableDatabase)
|
26
|
-
xdb.ro.should be_a_kind_of(Xapian::Database)
|
36
|
+
File.exists?(tmp_dir).should be_false
|
37
|
+
xdb.rw
|
38
|
+
File.exists?(tmp_dir).should be_true
|
27
39
|
end
|
28
40
|
|
29
41
|
it "should flush documents to the index when flush is called" do
|
30
42
|
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
43
|
+
xdb.flush
|
31
44
|
xdb.size.should == 0
|
32
45
|
xdb << "Once upon a time"
|
33
46
|
xdb.size.should == 0
|
@@ -35,261 +48,491 @@ describe XapianDb do
|
|
35
48
|
xdb.size.should == 1
|
36
49
|
end
|
37
50
|
|
38
|
-
it "should
|
39
|
-
|
40
|
-
|
41
|
-
|
51
|
+
it "should return a nice string when inspect is called" do
|
52
|
+
XapianDb.new.inspect.should =~ /XapianDb/
|
53
|
+
end
|
54
|
+
|
55
|
+
describe "transaction" do
|
56
|
+
it "should commit writes when the block completed successfully" do
|
57
|
+
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
42
58
|
xdb << "Once upon a time"
|
43
|
-
xdb.
|
59
|
+
xdb.transaction do
|
60
|
+
xdb << "Once upon a time"
|
61
|
+
xdb.size.should == 1
|
62
|
+
end
|
63
|
+
xdb.flush
|
64
|
+
xdb.size.should == 2
|
44
65
|
end
|
45
|
-
xdb.flush
|
46
|
-
xdb.size.should == 2
|
47
|
-
end
|
48
66
|
|
49
|
-
|
50
|
-
|
51
|
-
|
67
|
+
it "should serialize attempts at concurrent transactions" do
|
68
|
+
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
69
|
+
thread = Thread.new do
|
70
|
+
xdb.transaction do
|
71
|
+
sleep 0.1
|
72
|
+
xdb << "Once upon a time"
|
73
|
+
sleep 0.1
|
74
|
+
xdb << "Once upon a time"
|
75
|
+
end
|
76
|
+
end
|
52
77
|
xdb.transaction do
|
53
|
-
sleep 0.1
|
54
78
|
xdb << "Once upon a time"
|
55
79
|
sleep 0.1
|
56
80
|
xdb << "Once upon a time"
|
57
81
|
end
|
82
|
+
thread.join
|
83
|
+
xdb.flush
|
84
|
+
xdb.size.should == 4
|
58
85
|
end
|
59
|
-
|
60
|
-
|
61
|
-
|
86
|
+
|
87
|
+
it "should abort a transaction on an exception" do
|
88
|
+
xdb = XapianDb.new(:dir => tmp_dir, :create => true)
|
62
89
|
xdb << "Once upon a time"
|
90
|
+
begin
|
91
|
+
xdb.transaction do
|
92
|
+
xdb << "Once upon a time"
|
93
|
+
raise StandardError
|
94
|
+
end
|
95
|
+
rescue StandardError
|
96
|
+
end
|
97
|
+
xdb.flush
|
98
|
+
xdb.size.should == 1
|
63
99
|
end
|
64
|
-
thread.join
|
65
|
-
xdb.flush
|
66
|
-
xdb.size.should == 4
|
67
100
|
end
|
68
101
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
xdb.
|
74
|
-
|
75
|
-
|
102
|
+
describe "documents" do
|
103
|
+
|
104
|
+
it "should return a new XapianDoc with the db set on new" do
|
105
|
+
xdb = XapianDb.new
|
106
|
+
doc = xdb.documents.new
|
107
|
+
doc.should be_a_kind_of XapianDoc
|
108
|
+
doc.db.should == xdb
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should raise a XapianFu::DocNotFound error on find if the document doesn't exist" do
|
112
|
+
xdb = XapianDb.new
|
113
|
+
xdb << "once upon a time"
|
114
|
+
xdb.flush
|
115
|
+
lambda { xdb.documents.find(10) }.should raise_error XapianFu::DocNotFound
|
116
|
+
end
|
117
|
+
|
118
|
+
it "should retrieve documents with the find method" do
|
119
|
+
xdb = XapianDb.new
|
120
|
+
xdb << "Once upon a time"
|
121
|
+
xdb.flush
|
122
|
+
xdb.documents.find(1).should be_a_kind_of(XapianDoc)
|
123
|
+
end
|
124
|
+
|
125
|
+
it "should retrieve documents like an array and return a XapianDoc" do
|
126
|
+
xdb = XapianDb.new
|
127
|
+
xdb << "once upon a time"
|
128
|
+
xdb.flush
|
129
|
+
xdb.documents[1].should be_a_kind_of(XapianDoc)
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should provide the id of retrieved documents" do
|
133
|
+
xdb = XapianDb.new
|
134
|
+
xdb << "once upon a time"
|
135
|
+
xdb.documents[1].id.should == 1
|
136
|
+
end
|
137
|
+
|
138
|
+
it "should set the db field for the retrieved XapianDoc" do
|
139
|
+
xdb = XapianDb.new
|
140
|
+
xdb << "once upon a time"
|
141
|
+
xdb.documents[1].db.should == xdb
|
142
|
+
end
|
143
|
+
|
144
|
+
it "should delete docs by id" do
|
145
|
+
xdb = XapianDb.new
|
146
|
+
doc = xdb << XapianDoc.new("Once upon a time")
|
147
|
+
xdb.flush
|
148
|
+
xdb.size.should == 1
|
149
|
+
xdb.documents.delete(doc.id).should == 1
|
150
|
+
xdb.flush
|
151
|
+
xdb.size.should == 0
|
152
|
+
end
|
153
|
+
|
154
|
+
it "should handle being asked to delete docs that don't exist in the db" do
|
155
|
+
xdb = XapianDb.new
|
156
|
+
doc = xdb << XapianDoc.new("Once upon a time")
|
157
|
+
xdb.flush
|
158
|
+
xdb.documents.delete(100000).should == nil
|
159
|
+
end
|
160
|
+
|
161
|
+
describe "max" do
|
162
|
+
it "should return the doc with the highest id" do
|
163
|
+
xdb = XapianDb.new
|
164
|
+
xdb << { :id => 20 }
|
165
|
+
xdb << { :id => 9 }
|
166
|
+
xdb << { :id => 15 }
|
167
|
+
xdb.flush
|
168
|
+
xdb.documents.max.id.should == 20
|
169
|
+
end
|
170
|
+
|
171
|
+
it "should return the doc with the highest specified stored value" do
|
172
|
+
xdb = XapianDb.new(:fields => { :number => { :store => true } })
|
173
|
+
xdb << { :id => 8, :number => "200" }
|
174
|
+
xdb << { :id => 9, :number => "300" }
|
175
|
+
xdb << { :id => 15, :number => "100" }
|
176
|
+
xdb.flush
|
177
|
+
xdb.documents.max(:number).id.should == 9
|
76
178
|
end
|
77
|
-
rescue StandardError
|
78
179
|
end
|
79
|
-
xdb.flush
|
80
|
-
xdb.size.should == 1
|
81
180
|
end
|
82
181
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
182
|
+
describe "when indexing" do
|
183
|
+
it "should index a XapianDoc" do
|
184
|
+
xdb = XapianDb.new
|
185
|
+
xdb << XapianDoc.new({ :text => "once upon a time", :title => "A story" })
|
186
|
+
xdb.flush
|
187
|
+
xdb.size.should == 1
|
188
|
+
end
|
89
189
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
190
|
+
it "should index a Hash" do
|
191
|
+
xdb = XapianDb.new
|
192
|
+
xdb << { :text => "once upon a time", :title => "A story" }
|
193
|
+
xdb.flush
|
194
|
+
xdb.size.should == 1
|
195
|
+
end
|
96
196
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
197
|
+
it "should index a string" do
|
198
|
+
xdb = XapianDb.new
|
199
|
+
xdb << "once upon a time"
|
200
|
+
xdb.size.should == 1
|
201
|
+
xdb << XapianDoc.new("once upon a time")
|
202
|
+
xdb.size.should == 2
|
203
|
+
end
|
204
|
+
|
103
205
|
end
|
104
206
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
207
|
+
describe "search" do
|
208
|
+
it "should return a list of XapianDocs with the weight and match set" do
|
209
|
+
xdb = XapianDb.new
|
210
|
+
xdb << XapianDoc.new(:title => 'once upon a time')
|
211
|
+
xdb << XapianDoc.new(:title => 'three little pings')
|
212
|
+
results = xdb.search("pings")
|
213
|
+
results.should be_a_kind_of Array
|
214
|
+
results.size.should == 1
|
215
|
+
results.first.should be_a_kind_of XapianDoc
|
216
|
+
results.first.match.should be_a_kind_of Xapian::Match
|
217
|
+
results.first.weight.should be_a_kind_of Float
|
218
|
+
end
|
111
219
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
220
|
+
it "should collapse results by the value specified by the :collapse option" do
|
221
|
+
xdb = XapianDb.new(:collapsible => :group)
|
222
|
+
alpha1 = xdb << XapianDoc.new(:words => "cow dog cat", :group => "alpha")
|
223
|
+
alpha2 = xdb << XapianDoc.new(:words => "cow dog", :group => "alpha")
|
224
|
+
beta1 = xdb << XapianDoc.new(:words => "cow", :group => "beta")
|
225
|
+
results = xdb.search("cow dog cat", :collapse => :group, :default_op => :or)
|
226
|
+
results.should == [alpha1, beta1]
|
227
|
+
end
|
118
228
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
229
|
+
it "should do a case-insensitive boolean AND search by default" do
|
230
|
+
xdb = XapianDb.new
|
231
|
+
doc1 = xdb << "cow dog cat"
|
232
|
+
doc2 = xdb << "cow dog"
|
233
|
+
xdb.search("cow dog cat").should == [doc1]
|
234
|
+
end
|
125
235
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
236
|
+
it "should do a case-sensitive boolean search when the :boolean_anycase option is set to false" do
|
237
|
+
pending
|
238
|
+
xdb = XapianDb.new
|
239
|
+
doc1 = xdb << "cow dog"
|
240
|
+
doc2 = xdb << "COW dog"
|
241
|
+
xdb.search("cow", :boolean_anycase => false).should == [doc1]
|
242
|
+
xdb.search("COW", :boolean_anycase => false).should == [doc2]
|
243
|
+
end
|
131
244
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
245
|
+
it "should allow LOVEHATE style queries by default" do
|
246
|
+
xdb = XapianDb.new
|
247
|
+
doc1 = xdb << "cow dog cat moose"
|
248
|
+
doc2 = xdb << "cow dog"
|
249
|
+
doc3 = xdb << "cow dog moose"
|
250
|
+
doc4 = xdb << "cow moose"
|
251
|
+
xdb.search("cow +dog -cat", :default_op => :or).should == [doc2, doc3]
|
252
|
+
end
|
139
253
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
end
|
254
|
+
it "should do a boolean OR search when :default_op option is set to :or" do
|
255
|
+
xdb = XapianDb.new
|
256
|
+
doc1 = xdb << "cow dog cat"
|
257
|
+
doc2 = xdb << "cow dog"
|
258
|
+
xdb.search("cow dog cat", :default_op => :or).should == [doc1, doc2]
|
259
|
+
end
|
147
260
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
xdb.flush
|
156
|
-
xdb.size.should == 1
|
157
|
-
updated_doc.id.should == doc.id
|
158
|
-
end
|
261
|
+
it "should allow a wildcard search by default" do
|
262
|
+
xdb = XapianDb.new
|
263
|
+
doc1 = xdb << "fox"
|
264
|
+
doc2 = xdb << "follow"
|
265
|
+
doc3 = xdb << "fantastic"
|
266
|
+
xdb.search("fo*").should == [doc1, doc2]
|
267
|
+
end
|
159
268
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
xdb.size.should == 0
|
168
|
-
end
|
269
|
+
it "should ignore wildcard searches when the :wildcards option is false" do
|
270
|
+
xdb = XapianDb.new
|
271
|
+
doc1 = xdb << "fox"
|
272
|
+
doc2 = xdb << "follow"
|
273
|
+
doc3 = xdb << "fo"
|
274
|
+
xdb.search("fo*", :wildcards => false).should == [doc3]
|
275
|
+
end
|
169
276
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
277
|
+
it "should provide a corrected spelling string by default" do
|
278
|
+
pending
|
279
|
+
xdb = XapianDb.new(:dir => tmp_dir + 'corrected_spelling', :create => true)
|
280
|
+
xdb.rw.add_spelling("house mouse louse")
|
281
|
+
xdb << "there is a mouse in this house"
|
282
|
+
xdb.flush
|
283
|
+
results = xdb.search("moose")
|
284
|
+
results.corrected_query.should == "mouse"
|
285
|
+
end
|
176
286
|
|
177
|
-
|
178
|
-
xdb = XapianDb.new
|
179
|
-
doc = xdb << XapianDoc.new(:id => 0xbeef, :title => "Once upon a time")
|
180
|
-
xdb.flush
|
181
|
-
xdb.documents[0xbeef].id.should == 0xbeef
|
182
|
-
doc.id.should == 0xbeef
|
183
|
-
end
|
287
|
+
it "should do phrase matching by default when then :default_op option is :phrase"
|
184
288
|
|
185
|
-
|
186
|
-
xdb = XapianDb.new
|
187
|
-
doc = xdb << XapianDoc.new("once upon a time")
|
188
|
-
doc.terms.should be_a_kind_of Array
|
189
|
-
doc.terms.last.should be_a_kind_of Xapian::Term
|
190
|
-
doc.terms.last.term.should == "upon"
|
191
|
-
end
|
289
|
+
it "should do AND_MAYBE matching by default when the :default_op option is :and_maybe"
|
192
290
|
|
193
|
-
|
194
|
-
xdb = XapianDb.new
|
195
|
-
doc = xdb << XapianDoc.new(:title => 'once upon a time')
|
196
|
-
doc.terms.should be_a_kind_of Array
|
197
|
-
doc.terms.last.should be_a_kind_of Xapian::Term
|
198
|
-
doc.terms.last.term.should == "upon"
|
199
|
-
end
|
291
|
+
it "should do PURE_NOT matching by default when the :default_op option is :pure_not"
|
200
292
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
293
|
+
it "should page results when given the :page and :per_page options" do
|
294
|
+
xdb = XapianDb.new
|
295
|
+
content = "word"
|
296
|
+
200.times { xdb << XapianDoc.new(content) }
|
297
|
+
xdb.size.should == 200
|
298
|
+
results = xdb.search(content, :page => 1, :per_page => 12)
|
299
|
+
results.first.id.should == 1
|
300
|
+
results.size.should == 12
|
301
|
+
results = xdb.search(content, :page => 5, :per_page => 18)
|
302
|
+
results.first.id.should == 18 * 4 + 1
|
303
|
+
results.size.should == 18
|
304
|
+
results = xdb.search(content, :page => 100, :per_page => 12)
|
305
|
+
results.size.should == 0
|
306
|
+
end
|
212
307
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
results = xdb.search(content, :page => 5, :per_page => 18)
|
222
|
-
results.first.id.should == 18 * 4 + 1
|
223
|
-
results.size.should == 18
|
224
|
-
results = xdb.search(content, :page => 100, :per_page => 12)
|
225
|
-
results.size.should == 0
|
226
|
-
end
|
308
|
+
it "should limit results when the :limit option is given" do
|
309
|
+
xdb = XapianDb.new
|
310
|
+
content = "word"
|
311
|
+
30.times { xdb << XapianDoc.new(content) }
|
312
|
+
xdb.size.should == 30
|
313
|
+
results = xdb.search(content, :limit => 16)
|
314
|
+
results.size.should == 16
|
315
|
+
end
|
227
316
|
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
317
|
+
it "should return an array that can be used with will_paginate" do
|
318
|
+
xdb = XapianDb.new
|
319
|
+
content = "word"
|
320
|
+
30.times { xdb << XapianDoc.new(content) }
|
321
|
+
xdb.size.should == 30
|
322
|
+
results = xdb.search(content, :page => 1, :per_page => 16)
|
323
|
+
results.should be_a_kind_of XapianFu::ResultSet
|
324
|
+
results.per_page.should == 16
|
325
|
+
results.current_page.should == 1
|
326
|
+
results.total_entries.should == 30
|
327
|
+
results.total_pages.should == 2
|
328
|
+
results.previous_page.should == nil
|
329
|
+
results.next_page.should == 2
|
330
|
+
results.offset.should == 0
|
331
|
+
results = xdb.search(content, :page => 2, :per_page => 16)
|
332
|
+
results.current_page.should == 2
|
333
|
+
results.previous_page.should == 1
|
334
|
+
results.next_page.should == nil
|
335
|
+
results.offset.should == 16
|
336
|
+
end
|
234
337
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
xdb << XapianDoc.new(:created_at => time.to_i.to_s, :author => "Jim Jones")
|
248
|
-
xdb.flush
|
249
|
-
doc = xdb.documents.find(1)
|
250
|
-
doc.get_value(:created_at).should == time.to_i.to_s
|
338
|
+
it "should do searches with and without field names" do
|
339
|
+
xdb = XapianDb.new(:fields => [:name, :partner])
|
340
|
+
john = xdb << { :name => "John", :partner => "Louisa" }
|
341
|
+
katherine = xdb << { :name => "Katherine", :partner => "John" }
|
342
|
+
louisa = xdb << { :name => "Louisa", :partner => "John" }
|
343
|
+
xdb.search("name:john").should == [john]
|
344
|
+
xdb.search("partner:john").should == [katherine, louisa]
|
345
|
+
xdb.search("partner:louisa").should == [john]
|
346
|
+
xdb.search("louisa").should == [john,louisa]
|
347
|
+
xdb.search("john").should == [john,katherine,louisa]
|
348
|
+
xdb.search("john -name:john").should == [katherine,louisa]
|
349
|
+
end
|
251
350
|
end
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
351
|
+
|
352
|
+
describe "add_doc" do
|
353
|
+
it "should return a XapianDoc with an id" do
|
354
|
+
xdb = XapianDb.new
|
355
|
+
doc = XapianDoc.new("once upon a time")
|
356
|
+
doc.id.should == nil
|
357
|
+
new_doc = xdb << doc
|
358
|
+
new_doc.id.should == 1
|
359
|
+
end
|
360
|
+
|
361
|
+
it "should add new docs with the given id" do
|
362
|
+
xdb = XapianDb.new
|
363
|
+
doc = xdb << XapianDoc.new(:id => 0xbeef, :title => "Once upon a time")
|
364
|
+
xdb.documents[0xbeef].id.should == 0xbeef
|
365
|
+
doc.id.should == 0xbeef
|
366
|
+
end
|
367
|
+
|
368
|
+
it "should replace docs that already have an id when adding to the db" do
|
369
|
+
xdb = XapianDb.new
|
370
|
+
doc = xdb << XapianDoc.new("Once upon a time")
|
371
|
+
xdb.size.should == 1
|
372
|
+
doc.id.should == 1
|
373
|
+
updated_doc = xdb << doc
|
374
|
+
xdb.size.should == 1
|
375
|
+
updated_doc.id.should == doc.id
|
376
|
+
end
|
377
|
+
|
378
|
+
it "should store no fields by default" do
|
379
|
+
xdb = XapianDb.new
|
380
|
+
xdb << XapianDoc.new(:title => "Once upon a time")
|
381
|
+
xdb.flush
|
382
|
+
xdb.documents.find(1).values[:title].should be_empty
|
383
|
+
end
|
384
|
+
|
385
|
+
it "should store fields declared to be stored as values" do
|
386
|
+
xdb = XapianDb.new(:fields => { :title => { :store => true } })
|
387
|
+
xdb << XapianDoc.new(:title => "Once upon a time", :author => "Jim Jones")
|
388
|
+
doc = xdb.documents.find(1)
|
389
|
+
doc.values[:title].should == "Once upon a time"
|
390
|
+
doc.values[:author].should be_empty
|
391
|
+
end
|
392
|
+
|
393
|
+
it "should store values declared as to be sortable" do
|
394
|
+
xdb = XapianDb.new(:sortable => :age)
|
395
|
+
xdb << XapianDoc.new(:age => "32", :author => "Jim Jones")
|
396
|
+
doc = xdb.documents.find(1)
|
397
|
+
doc.values.fetch(:age).should == "32"
|
398
|
+
end
|
399
|
+
|
400
|
+
it "should store values declared as to be collapsible" do
|
401
|
+
xdb = XapianDb.new(:collapsible => :group_id)
|
402
|
+
xdb << XapianDoc.new(:group_id => "666", :author => "Jim Jones")
|
403
|
+
doc = xdb.documents.find(1)
|
404
|
+
doc.values.fetch(:group_id).should == "666"
|
405
|
+
end
|
406
|
+
|
407
|
+
it "should store data in the database" do
|
408
|
+
xdb = XapianDb.new
|
409
|
+
xdb << XapianDoc.new({ :text => "once upon a time" }, :data => Marshal::dump({ :thing => 0xdeadbeef }))
|
410
|
+
xdb.size.should == 1
|
411
|
+
doc = xdb.documents[1]
|
412
|
+
Marshal::load(doc.data).should == { :thing => 0xdeadbeef }
|
413
|
+
end
|
259
414
|
end
|
260
415
|
|
261
416
|
describe "search results sort order" do
|
262
417
|
before(:each) do
|
263
418
|
@xdb = XapianDb.new(:sortable => :number)
|
264
419
|
@expected_results = []
|
265
|
-
@expected_results << (@xdb << XapianDoc.new(:words => "cow dog
|
266
|
-
@expected_results << (@xdb << XapianDoc.new(:words => "cow dog", :number => 3))
|
267
|
-
@expected_results << (@xdb << XapianDoc.new(:words => "cow", :number => 2))
|
420
|
+
@expected_results << (@xdb << XapianDoc.new(:words => "cow dog", :number => 3, :relevance => 2))
|
421
|
+
@expected_results << (@xdb << XapianDoc.new(:words => "cow dog cat", :number => 1, :relevance => 3))
|
422
|
+
@expected_results << (@xdb << XapianDoc.new(:words => "cow", :number => 2, :relevance => 1))
|
268
423
|
end
|
269
|
-
|
270
|
-
it "should be by
|
271
|
-
results = @xdb.search("cow dog cat")
|
272
|
-
results.should == @expected_results
|
424
|
+
|
425
|
+
it "should be by search result weight by default" do
|
426
|
+
results = @xdb.search("cow dog cat", :default_op => :or)
|
427
|
+
results.should == @expected_results.sort_by { |r| r.fields[:relevance] }.reverse
|
273
428
|
end
|
274
|
-
|
429
|
+
|
275
430
|
it "should be by the value specified in descending numerical order" do
|
276
|
-
results = @xdb.search("cow dog cat", :order => :number)
|
431
|
+
results = @xdb.search("cow dog cat", :default_op => :or, :order => :number)
|
277
432
|
results.should == @expected_results.sort_by { |r| r.fields[:number] }
|
278
433
|
end
|
279
|
-
|
434
|
+
|
280
435
|
it "should be reversed when the reverse option is set to true" do
|
281
|
-
results = @xdb.search("cow dog cat", :order => :number, :reverse => true)
|
436
|
+
results = @xdb.search("cow dog cat", :default_op => :or, :order => :number, :reverse => true)
|
282
437
|
results.should == @expected_results.sort_by { |r| r.fields[:number] }.reverse
|
283
438
|
end
|
439
|
+
|
440
|
+
it "should be by the id when specified and in ascending numerical order by default" do
|
441
|
+
results = @xdb.search("cow dog cat", :default_op => :or, :order => :id)
|
442
|
+
results.should == @expected_results.sort_by { |r| r.id }
|
443
|
+
end
|
444
|
+
|
445
|
+
it "should be by the id in descending numerical order when specified" do
|
446
|
+
results = @xdb.search("cow dog cat", :default_op => :or, :order => :id, :reverse => true)
|
447
|
+
results.should == @expected_results.sort_by { |r| r.id }.reverse
|
448
|
+
end
|
449
|
+
|
284
450
|
end
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
451
|
+
|
452
|
+
describe "stemmer" do
|
453
|
+
it "should return an english stemmer by default" do
|
454
|
+
xdb = XapianDb.new
|
455
|
+
xdb.stemmer.call("fishing").should == "fish"
|
456
|
+
xdb.stemmer.call("contournait").should == "contournait"
|
457
|
+
end
|
458
|
+
it "should return a stemmer for the database language" do
|
459
|
+
xdb = XapianDb.new(:language => :french)
|
460
|
+
xdb.stemmer.call("contournait").should == "contourn"
|
461
|
+
xdb.stemmer.call("fishing").should == "fishing"
|
462
|
+
end
|
293
463
|
end
|
294
|
-
|
464
|
+
|
465
|
+
describe "stopper" do
|
466
|
+
it "should return an english stopper by default" do
|
467
|
+
xdb = XapianDb.new
|
468
|
+
xdb.stopper.call("and").should == true
|
469
|
+
xdb.stopper.call("avec").should == false
|
470
|
+
end
|
471
|
+
it "should return a stopper for the database language" do
|
472
|
+
xdb = XapianDb.new(:language => :french)
|
473
|
+
xdb.stopper.call("avec").should == true
|
474
|
+
xdb.stopper.call("and").should == false
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
describe "fields" do
|
479
|
+
it "should return a hash of field names set as an array with the :fields option using String as the default type" do
|
480
|
+
xdb = XapianDb.new(:fields => [:name, :age])
|
481
|
+
xdb.fields[:name].should == String
|
482
|
+
xdb.fields[:age].should == String
|
483
|
+
end
|
484
|
+
|
485
|
+
it "should return a hash of field names set as a hash with the :fields option" do
|
486
|
+
xdb = XapianDb.new(:fields => { :name => String, :gender => String,
|
487
|
+
:age => { :type => Fixnum } })
|
488
|
+
xdb.fields[:name].should == String
|
489
|
+
xdb.fields[:gender].should == String
|
490
|
+
xdb.fields[:age].should == Fixnum
|
491
|
+
end
|
492
|
+
|
493
|
+
it "should return an empty array by default" do
|
494
|
+
XapianDb.new.fields.keys.should be_empty
|
495
|
+
end
|
496
|
+
|
497
|
+
end
|
498
|
+
|
499
|
+
describe "stored_values" do
|
500
|
+
it "should return an array of field names passed in the :store option" do
|
501
|
+
xdb = XapianDb.new(:store => [:name, :title])
|
502
|
+
xdb.store_values.should == [:name, :title]
|
503
|
+
end
|
504
|
+
|
505
|
+
it "should return an array of fields defined as storable in the :fields option" do
|
506
|
+
xdb = XapianDb.new(:fields => {
|
507
|
+
:name => { :store => true },
|
508
|
+
:title => { :store => true } })
|
509
|
+
xdb.store_values.should include :name
|
510
|
+
xdb.store_values.should include :title
|
511
|
+
end
|
512
|
+
|
513
|
+
it "should return an array of fields both passed in the :store option and defined as storable in the :fields option" do
|
514
|
+
xdb = XapianDb.new(:fields => {
|
515
|
+
:name => { :store => true },
|
516
|
+
:title => { :store => true } }, :store => [:name, :gender])
|
517
|
+
xdb.store_values.size == 3
|
518
|
+
[:gender, :title, :name].each { |f| xdb.store_values.should include f }
|
519
|
+
end
|
520
|
+
end
|
521
|
+
|
522
|
+
describe "unindexed_fields" do
|
523
|
+
it "should return an empty array by default" do
|
524
|
+
xdb = XapianDb.new(:fields => { :name => String, :title => String })
|
525
|
+
xdb.unindexed_fields.should == []
|
526
|
+
end
|
527
|
+
|
528
|
+
it "should return fields defined as not indexed in the fields option" do
|
529
|
+
xdb = XapianDb.new(:fields => {
|
530
|
+
:name => { :type => String, :index => false },
|
531
|
+
:title => String })
|
532
|
+
xdb.unindexed_fields.should include :name
|
533
|
+
xdb.unindexed_fields.should_not include :title
|
534
|
+
end
|
535
|
+
end
|
536
|
+
|
295
537
|
end
|
538
|
+
|