xapian-fu 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +4 -0
- data/README.rdoc +14 -0
- data/lib/xapian_fu/version.rb +1 -1
- data/lib/xapian_fu/xapian_db.rb +22 -0
- data/lib/xapian_fu/xapian_doc.rb +4 -2
- data/spec/xapian_db_spec.rb +19 -0
- metadata +47 -72
data/CHANGELOG.rdoc
CHANGED
data/README.rdoc
CHANGED
@@ -160,6 +160,20 @@ And any combinations of the above:
|
|
160
160
|
|
161
161
|
db.search("(ruby OR sinatra) -rails xap*")
|
162
162
|
|
163
|
+
== Custom term weights
|
164
|
+
|
165
|
+
Sometimes you may want to increase the weight of a particular term in
|
166
|
+
a document. Xapian supports adding
|
167
|
+
{extra weight}(http://trac.xapian.org/wiki/FAQ/ExtraWeight) to a term
|
168
|
+
at index time by providing an integer "wdf" (default is 1).
|
169
|
+
|
170
|
+
You may set an optional :weights option when initializing a XapianDb.
|
171
|
+
The :weights option accepts a Proc or Lambda that will be called with
|
172
|
+
the key, value and list of document fields as each term is indexed.
|
173
|
+
Your function should return an integer to set the weight to.
|
174
|
+
|
175
|
+
XapianDb.new(:weights => lambda {|k, v, f| k == :title ? 3 : 1}
|
176
|
+
|
163
177
|
== Boolean terms
|
164
178
|
|
165
179
|
If you want to implement something like [this](http://getting-started-with-xapian.readthedocs.org/en/latest/howtos/boolean_filters.html#searching),
|
data/lib/xapian_fu/version.rb
CHANGED
data/lib/xapian_fu/xapian_db.rb
CHANGED
@@ -98,6 +98,26 @@ module XapianFu #:nodoc:
|
|
98
98
|
# and sort them efficiently (without having to resort to storing
|
99
99
|
# leading zeros or anything like that).
|
100
100
|
#
|
101
|
+
# == Term Weights
|
102
|
+
#
|
103
|
+
# The <tt>:weights</tt> option accepts a Proc or Lambda that sets
|
104
|
+
# custom {term weights}[http://trac.xapian.org/wiki/FAQ/ExtraWeight].
|
105
|
+
#
|
106
|
+
# Your function will receive the term key and value and the full list of
|
107
|
+
# fields, and should return an integer weight to be applied for that term
|
108
|
+
# when the document is indexed.
|
109
|
+
#
|
110
|
+
# In this example,
|
111
|
+
#
|
112
|
+
# XapianDb.new(:weights => Proc.new do |key, value, fields|
|
113
|
+
# return 10 if fields.keys.include?('culturally_important')
|
114
|
+
# return 3 if key == 'title'
|
115
|
+
# 1
|
116
|
+
# end)
|
117
|
+
#
|
118
|
+
# terms in the title will be weighted three times greater than other terms,
|
119
|
+
# and all terms in 'culturally important' items will weighted 10 times more.
|
120
|
+
#
|
101
121
|
class XapianDb # :nonew:
|
102
122
|
# Path to the on-disk database. Nil if in-memory database
|
103
123
|
attr_reader :dir
|
@@ -117,6 +137,7 @@ module XapianFu #:nodoc:
|
|
117
137
|
# Whether this db will generate a spelling dictionary during indexing
|
118
138
|
attr_reader :spelling
|
119
139
|
attr_reader :sortable_fields
|
140
|
+
attr_accessor :weights_function
|
120
141
|
|
121
142
|
def initialize( options = { } )
|
122
143
|
@options = { :index_positions => true, :spelling => true }.merge(options)
|
@@ -135,6 +156,7 @@ module XapianFu #:nodoc:
|
|
135
156
|
@store_values << @options[:collapsible]
|
136
157
|
@store_values = @store_values.flatten.uniq.compact
|
137
158
|
@spelling = @options[:spelling]
|
159
|
+
@weights_function = @options[:weights]
|
138
160
|
end
|
139
161
|
|
140
162
|
# Return a new stemmer object for this database
|
data/lib/xapian_fu/xapian_doc.rb
CHANGED
@@ -275,10 +275,12 @@ module XapianFu #:nodoc:
|
|
275
275
|
else
|
276
276
|
v = v.to_s
|
277
277
|
end
|
278
|
+
# get the custom term weight if a weights function exists
|
279
|
+
weight = db.weights_function ? db.weights_function.call(k, v, fields).to_i : 1
|
278
280
|
# add value with field name
|
279
|
-
tg.send(index_method, v,
|
281
|
+
tg.send(index_method, v, weight, 'X' + k.to_s.upcase)
|
280
282
|
# add value without field name
|
281
|
-
tg.send(index_method, v)
|
283
|
+
tg.send(index_method, v, weight)
|
282
284
|
end
|
283
285
|
|
284
286
|
db.boolean_fields.each do |name|
|
data/spec/xapian_db_spec.rb
CHANGED
@@ -202,6 +202,25 @@ describe XapianDb do
|
|
202
202
|
xdb.size.should == 2
|
203
203
|
end
|
204
204
|
|
205
|
+
it "should assign extra weight according to the weights function" do
|
206
|
+
xdb = XapianDb.new(:weights => lambda {|k, v, f| k == :title ? 3 : 1})
|
207
|
+
xdb << { :text => "once upon time", :title => "A story" }
|
208
|
+
|
209
|
+
s = xdb.search("story")
|
210
|
+
terms = s.first.terms
|
211
|
+
terms.select {|t| t.term.match(/story/)}.map(&:wdf).uniq.should == [3]
|
212
|
+
terms.select {|t| t.term.match(/upon/)}.map(&:wdf).uniq.should == [1]
|
213
|
+
end
|
214
|
+
|
215
|
+
it "should use a weight of 1 if no weights function is provided" do
|
216
|
+
xdb = XapianDb.new
|
217
|
+
xdb << { :text => "once upon time", :title => "A story" }
|
218
|
+
|
219
|
+
s = xdb.search("story")
|
220
|
+
terms = s.first.terms
|
221
|
+
terms.map(&:wdf).uniq.should == [1]
|
222
|
+
end
|
223
|
+
|
205
224
|
it "should generate boolean terms for multiple values" do
|
206
225
|
xdb = XapianDb.new(:dir => tmp_dir, :create => true,
|
207
226
|
:fields => {
|
metadata
CHANGED
@@ -1,78 +1,60 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian-fu
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.5.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 4
|
9
|
-
- 0
|
10
|
-
version: 1.4.0
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- John Leach
|
14
9
|
- Damian Janowski
|
15
10
|
autorequire:
|
16
11
|
bindir: bin
|
17
12
|
cert_chain: []
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
- !ruby/object:Gem::Dependency
|
13
|
+
date: 2012-05-23 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
22
16
|
name: rspec
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &12933040 !ruby/object:Gem::Requirement
|
25
18
|
none: false
|
26
|
-
requirements:
|
19
|
+
requirements:
|
27
20
|
- - ~>
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
hash: 19
|
30
|
-
segments:
|
31
|
-
- 2
|
32
|
-
- 7
|
33
|
-
- 0
|
21
|
+
- !ruby/object:Gem::Version
|
34
22
|
version: 2.7.0
|
35
23
|
type: :development
|
36
|
-
version_requirements: *id001
|
37
|
-
- !ruby/object:Gem::Dependency
|
38
|
-
name: rake
|
39
24
|
prerelease: false
|
40
|
-
|
25
|
+
version_requirements: *12933040
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rake
|
28
|
+
requirement: &12928460 !ruby/object:Gem::Requirement
|
41
29
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
segments:
|
47
|
-
- 0
|
48
|
-
version: "0"
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
49
34
|
type: :development
|
50
|
-
version_requirements: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
|
-
name: rdoc
|
53
35
|
prerelease: false
|
54
|
-
|
36
|
+
version_requirements: *12928460
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: rdoc
|
39
|
+
requirement: &12907260 !ruby/object:Gem::Requirement
|
55
40
|
none: false
|
56
|
-
requirements:
|
57
|
-
- -
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
|
60
|
-
segments:
|
61
|
-
- 0
|
62
|
-
version: "0"
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
63
45
|
type: :development
|
64
|
-
|
65
|
-
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *12907260
|
48
|
+
description: A library to provide a more Ruby-like interface to the Xapian search
|
49
|
+
engine.
|
66
50
|
email: john@johnleach.co.uk
|
67
51
|
executables: []
|
68
|
-
|
69
52
|
extensions: []
|
70
|
-
|
71
|
-
extra_rdoc_files:
|
53
|
+
extra_rdoc_files:
|
72
54
|
- README.rdoc
|
73
55
|
- LICENSE
|
74
56
|
- CHANGELOG.rdoc
|
75
|
-
files:
|
57
|
+
files:
|
76
58
|
- lib/xapian_fu.rb
|
77
59
|
- lib/xapian_fu/xapian_doc_value_accessor.rb
|
78
60
|
- lib/xapian_fu/query_parser.rb
|
@@ -166,42 +148,34 @@ files:
|
|
166
148
|
- spec/query_parser_spec.rb
|
167
149
|
homepage: http://github.com/johnl/xapian-fu
|
168
150
|
licenses: []
|
169
|
-
|
170
151
|
post_install_message:
|
171
|
-
rdoc_options:
|
152
|
+
rdoc_options:
|
172
153
|
- --title
|
173
154
|
- Xapian Fu
|
174
155
|
- --main
|
175
156
|
- README.rdoc
|
176
157
|
- --line-numbers
|
177
|
-
require_paths:
|
158
|
+
require_paths:
|
178
159
|
- lib
|
179
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
160
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
180
161
|
none: false
|
181
|
-
requirements:
|
182
|
-
- -
|
183
|
-
- !ruby/object:Gem::Version
|
184
|
-
|
185
|
-
|
186
|
-
- 0
|
187
|
-
version: "0"
|
188
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ! '>='
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
166
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
189
167
|
none: false
|
190
|
-
requirements:
|
191
|
-
- -
|
192
|
-
- !ruby/object:Gem::Version
|
193
|
-
|
194
|
-
segments:
|
195
|
-
- 0
|
196
|
-
version: "0"
|
168
|
+
requirements:
|
169
|
+
- - ! '>='
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: '0'
|
197
172
|
requirements: []
|
198
|
-
|
199
173
|
rubyforge_project: xapian-fu
|
200
|
-
rubygems_version: 1.8.
|
174
|
+
rubygems_version: 1.8.11
|
201
175
|
signing_key:
|
202
176
|
specification_version: 3
|
203
177
|
summary: A Ruby interface to the Xapian search engine
|
204
|
-
test_files:
|
178
|
+
test_files:
|
205
179
|
- spec/xapian_doc_spec.rb
|
206
180
|
- spec/xapian_db_spec.rb
|
207
181
|
- spec/stopper_factory_spec.rb
|
@@ -261,3 +235,4 @@ test_files:
|
|
261
235
|
- spec/xapian_doc_value_accessor_spec.rb
|
262
236
|
- spec/build_db_for_value_testing.rb
|
263
237
|
- spec/query_parser_spec.rb
|
238
|
+
has_rdoc:
|