xapian-fu 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +4 -0
- data/README.rdoc +14 -0
- data/lib/xapian_fu/version.rb +1 -1
- data/lib/xapian_fu/xapian_db.rb +22 -0
- data/lib/xapian_fu/xapian_doc.rb +4 -2
- data/spec/xapian_db_spec.rb +19 -0
- metadata +47 -72
data/CHANGELOG.rdoc
CHANGED
data/README.rdoc
CHANGED
@@ -160,6 +160,20 @@ And any combinations of the above:
|
|
160
160
|
|
161
161
|
db.search("(ruby OR sinatra) -rails xap*")
|
162
162
|
|
163
|
+
== Custom term weights
|
164
|
+
|
165
|
+
Sometimes you may want to increase the weight of a particular term in
|
166
|
+
a document. Xapian supports adding
|
167
|
+
{extra weight}(http://trac.xapian.org/wiki/FAQ/ExtraWeight) to a term
|
168
|
+
at index time by providing an integer "wdf" (default is 1).
|
169
|
+
|
170
|
+
You may set an optional :weights option when initializing a XapianDb.
|
171
|
+
The :weights option accepts a Proc or Lambda that will be called with
|
172
|
+
the key, value and list of document fields as each term is indexed.
|
173
|
+
Your function should return an integer to set the weight to.
|
174
|
+
|
175
|
+
XapianDb.new(:weights => lambda {|k, v, f| k == :title ? 3 : 1}
|
176
|
+
|
163
177
|
== Boolean terms
|
164
178
|
|
165
179
|
If you want to implement something like [this](http://getting-started-with-xapian.readthedocs.org/en/latest/howtos/boolean_filters.html#searching),
|
data/lib/xapian_fu/version.rb
CHANGED
data/lib/xapian_fu/xapian_db.rb
CHANGED
@@ -98,6 +98,26 @@ module XapianFu #:nodoc:
|
|
98
98
|
# and sort them efficiently (without having to resort to storing
|
99
99
|
# leading zeros or anything like that).
|
100
100
|
#
|
101
|
+
# == Term Weights
|
102
|
+
#
|
103
|
+
# The <tt>:weights</tt> option accepts a Proc or Lambda that sets
|
104
|
+
# custom {term weights}[http://trac.xapian.org/wiki/FAQ/ExtraWeight].
|
105
|
+
#
|
106
|
+
# Your function will receive the term key and value and the full list of
|
107
|
+
# fields, and should return an integer weight to be applied for that term
|
108
|
+
# when the document is indexed.
|
109
|
+
#
|
110
|
+
# In this example,
|
111
|
+
#
|
112
|
+
# XapianDb.new(:weights => Proc.new do |key, value, fields|
|
113
|
+
# return 10 if fields.keys.include?('culturally_important')
|
114
|
+
# return 3 if key == 'title'
|
115
|
+
# 1
|
116
|
+
# end)
|
117
|
+
#
|
118
|
+
# terms in the title will be weighted three times greater than other terms,
|
119
|
+
# and all terms in 'culturally important' items will weighted 10 times more.
|
120
|
+
#
|
101
121
|
class XapianDb # :nonew:
|
102
122
|
# Path to the on-disk database. Nil if in-memory database
|
103
123
|
attr_reader :dir
|
@@ -117,6 +137,7 @@ module XapianFu #:nodoc:
|
|
117
137
|
# Whether this db will generate a spelling dictionary during indexing
|
118
138
|
attr_reader :spelling
|
119
139
|
attr_reader :sortable_fields
|
140
|
+
attr_accessor :weights_function
|
120
141
|
|
121
142
|
def initialize( options = { } )
|
122
143
|
@options = { :index_positions => true, :spelling => true }.merge(options)
|
@@ -135,6 +156,7 @@ module XapianFu #:nodoc:
|
|
135
156
|
@store_values << @options[:collapsible]
|
136
157
|
@store_values = @store_values.flatten.uniq.compact
|
137
158
|
@spelling = @options[:spelling]
|
159
|
+
@weights_function = @options[:weights]
|
138
160
|
end
|
139
161
|
|
140
162
|
# Return a new stemmer object for this database
|
data/lib/xapian_fu/xapian_doc.rb
CHANGED
@@ -275,10 +275,12 @@ module XapianFu #:nodoc:
|
|
275
275
|
else
|
276
276
|
v = v.to_s
|
277
277
|
end
|
278
|
+
# get the custom term weight if a weights function exists
|
279
|
+
weight = db.weights_function ? db.weights_function.call(k, v, fields).to_i : 1
|
278
280
|
# add value with field name
|
279
|
-
tg.send(index_method, v,
|
281
|
+
tg.send(index_method, v, weight, 'X' + k.to_s.upcase)
|
280
282
|
# add value without field name
|
281
|
-
tg.send(index_method, v)
|
283
|
+
tg.send(index_method, v, weight)
|
282
284
|
end
|
283
285
|
|
284
286
|
db.boolean_fields.each do |name|
|
data/spec/xapian_db_spec.rb
CHANGED
@@ -202,6 +202,25 @@ describe XapianDb do
|
|
202
202
|
xdb.size.should == 2
|
203
203
|
end
|
204
204
|
|
205
|
+
it "should assign extra weight according to the weights function" do
|
206
|
+
xdb = XapianDb.new(:weights => lambda {|k, v, f| k == :title ? 3 : 1})
|
207
|
+
xdb << { :text => "once upon time", :title => "A story" }
|
208
|
+
|
209
|
+
s = xdb.search("story")
|
210
|
+
terms = s.first.terms
|
211
|
+
terms.select {|t| t.term.match(/story/)}.map(&:wdf).uniq.should == [3]
|
212
|
+
terms.select {|t| t.term.match(/upon/)}.map(&:wdf).uniq.should == [1]
|
213
|
+
end
|
214
|
+
|
215
|
+
it "should use a weight of 1 if no weights function is provided" do
|
216
|
+
xdb = XapianDb.new
|
217
|
+
xdb << { :text => "once upon time", :title => "A story" }
|
218
|
+
|
219
|
+
s = xdb.search("story")
|
220
|
+
terms = s.first.terms
|
221
|
+
terms.map(&:wdf).uniq.should == [1]
|
222
|
+
end
|
223
|
+
|
205
224
|
it "should generate boolean terms for multiple values" do
|
206
225
|
xdb = XapianDb.new(:dir => tmp_dir, :create => true,
|
207
226
|
:fields => {
|
metadata
CHANGED
@@ -1,78 +1,60 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian-fu
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.5.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 4
|
9
|
-
- 0
|
10
|
-
version: 1.4.0
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- John Leach
|
14
9
|
- Damian Janowski
|
15
10
|
autorequire:
|
16
11
|
bindir: bin
|
17
12
|
cert_chain: []
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
- !ruby/object:Gem::Dependency
|
13
|
+
date: 2012-05-23 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
22
16
|
name: rspec
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
17
|
+
requirement: &12933040 !ruby/object:Gem::Requirement
|
25
18
|
none: false
|
26
|
-
requirements:
|
19
|
+
requirements:
|
27
20
|
- - ~>
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
hash: 19
|
30
|
-
segments:
|
31
|
-
- 2
|
32
|
-
- 7
|
33
|
-
- 0
|
21
|
+
- !ruby/object:Gem::Version
|
34
22
|
version: 2.7.0
|
35
23
|
type: :development
|
36
|
-
version_requirements: *id001
|
37
|
-
- !ruby/object:Gem::Dependency
|
38
|
-
name: rake
|
39
24
|
prerelease: false
|
40
|
-
|
25
|
+
version_requirements: *12933040
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: rake
|
28
|
+
requirement: &12928460 !ruby/object:Gem::Requirement
|
41
29
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
segments:
|
47
|
-
- 0
|
48
|
-
version: "0"
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
49
34
|
type: :development
|
50
|
-
version_requirements: *id002
|
51
|
-
- !ruby/object:Gem::Dependency
|
52
|
-
name: rdoc
|
53
35
|
prerelease: false
|
54
|
-
|
36
|
+
version_requirements: *12928460
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: rdoc
|
39
|
+
requirement: &12907260 !ruby/object:Gem::Requirement
|
55
40
|
none: false
|
56
|
-
requirements:
|
57
|
-
- -
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
|
60
|
-
segments:
|
61
|
-
- 0
|
62
|
-
version: "0"
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
63
45
|
type: :development
|
64
|
-
|
65
|
-
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *12907260
|
48
|
+
description: A library to provide a more Ruby-like interface to the Xapian search
|
49
|
+
engine.
|
66
50
|
email: john@johnleach.co.uk
|
67
51
|
executables: []
|
68
|
-
|
69
52
|
extensions: []
|
70
|
-
|
71
|
-
extra_rdoc_files:
|
53
|
+
extra_rdoc_files:
|
72
54
|
- README.rdoc
|
73
55
|
- LICENSE
|
74
56
|
- CHANGELOG.rdoc
|
75
|
-
files:
|
57
|
+
files:
|
76
58
|
- lib/xapian_fu.rb
|
77
59
|
- lib/xapian_fu/xapian_doc_value_accessor.rb
|
78
60
|
- lib/xapian_fu/query_parser.rb
|
@@ -166,42 +148,34 @@ files:
|
|
166
148
|
- spec/query_parser_spec.rb
|
167
149
|
homepage: http://github.com/johnl/xapian-fu
|
168
150
|
licenses: []
|
169
|
-
|
170
151
|
post_install_message:
|
171
|
-
rdoc_options:
|
152
|
+
rdoc_options:
|
172
153
|
- --title
|
173
154
|
- Xapian Fu
|
174
155
|
- --main
|
175
156
|
- README.rdoc
|
176
157
|
- --line-numbers
|
177
|
-
require_paths:
|
158
|
+
require_paths:
|
178
159
|
- lib
|
179
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
160
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
180
161
|
none: false
|
181
|
-
requirements:
|
182
|
-
- -
|
183
|
-
- !ruby/object:Gem::Version
|
184
|
-
|
185
|
-
|
186
|
-
- 0
|
187
|
-
version: "0"
|
188
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - ! '>='
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '0'
|
166
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
189
167
|
none: false
|
190
|
-
requirements:
|
191
|
-
- -
|
192
|
-
- !ruby/object:Gem::Version
|
193
|
-
|
194
|
-
segments:
|
195
|
-
- 0
|
196
|
-
version: "0"
|
168
|
+
requirements:
|
169
|
+
- - ! '>='
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: '0'
|
197
172
|
requirements: []
|
198
|
-
|
199
173
|
rubyforge_project: xapian-fu
|
200
|
-
rubygems_version: 1.8.
|
174
|
+
rubygems_version: 1.8.11
|
201
175
|
signing_key:
|
202
176
|
specification_version: 3
|
203
177
|
summary: A Ruby interface to the Xapian search engine
|
204
|
-
test_files:
|
178
|
+
test_files:
|
205
179
|
- spec/xapian_doc_spec.rb
|
206
180
|
- spec/xapian_db_spec.rb
|
207
181
|
- spec/stopper_factory_spec.rb
|
@@ -261,3 +235,4 @@ test_files:
|
|
261
235
|
- spec/xapian_doc_value_accessor_spec.rb
|
262
236
|
- spec/build_db_for_value_testing.rb
|
263
237
|
- spec/query_parser_spec.rb
|
238
|
+
has_rdoc:
|