xapian-fu 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,7 @@
1
+ === 1.5.0 (23rd May 2012)
2
+
3
+ * Support for custom term weights when indexing (Mike Ragalie)
4
+
1
5
  === 1.4.0 (13th March 2012)
2
6
 
3
7
  * Support for indexing Arrays properly
data/README.rdoc CHANGED
@@ -160,6 +160,20 @@ And any combinations of the above:
160
160
 
161
161
  db.search("(ruby OR sinatra) -rails xap*")
162
162
 
163
+ == Custom term weights
164
+
165
+ Sometimes you may want to increase the weight of a particular term in
166
+ a document. Xapian supports adding
167
+ {extra weight}(http://trac.xapian.org/wiki/FAQ/ExtraWeight) to a term
168
+ at index time by providing an integer "wdf" (default is 1).
169
+
170
+ You may set an optional :weights option when initializing a XapianDb.
171
+ The :weights option accepts a Proc or Lambda that will be called with
172
+ the key, value and list of document fields as each term is indexed.
173
+ Your function should return an integer to set the weight to.
174
+
175
+ XapianDb.new(:weights => lambda {|k, v, f| k == :title ? 3 : 1}
176
+
163
177
  == Boolean terms
164
178
 
165
179
  If you want to implement something like [this](http://getting-started-with-xapian.readthedocs.org/en/latest/howtos/boolean_filters.html#searching),
@@ -1,3 +1,3 @@
1
1
  module XapianFu #:nodoc:
2
- VERSION = "1.4.0"
2
+ VERSION = "1.5.0"
3
3
  end
@@ -98,6 +98,26 @@ module XapianFu #:nodoc:
98
98
  # and sort them efficiently (without having to resort to storing
99
99
  # leading zeros or anything like that).
100
100
  #
101
+ # == Term Weights
102
+ #
103
+ # The <tt>:weights</tt> option accepts a Proc or Lambda that sets
104
+ # custom {term weights}[http://trac.xapian.org/wiki/FAQ/ExtraWeight].
105
+ #
106
+ # Your function will receive the term key and value and the full list of
107
+ # fields, and should return an integer weight to be applied for that term
108
+ # when the document is indexed.
109
+ #
110
+ # In this example,
111
+ #
112
+ # XapianDb.new(:weights => Proc.new do |key, value, fields|
113
+ # return 10 if fields.keys.include?('culturally_important')
114
+ # return 3 if key == 'title'
115
+ # 1
116
+ # end)
117
+ #
118
+ # terms in the title will be weighted three times greater than other terms,
119
+ # and all terms in 'culturally important' items will weighted 10 times more.
120
+ #
101
121
  class XapianDb # :nonew:
102
122
  # Path to the on-disk database. Nil if in-memory database
103
123
  attr_reader :dir
@@ -117,6 +137,7 @@ module XapianFu #:nodoc:
117
137
  # Whether this db will generate a spelling dictionary during indexing
118
138
  attr_reader :spelling
119
139
  attr_reader :sortable_fields
140
+ attr_accessor :weights_function
120
141
 
121
142
  def initialize( options = { } )
122
143
  @options = { :index_positions => true, :spelling => true }.merge(options)
@@ -135,6 +156,7 @@ module XapianFu #:nodoc:
135
156
  @store_values << @options[:collapsible]
136
157
  @store_values = @store_values.flatten.uniq.compact
137
158
  @spelling = @options[:spelling]
159
+ @weights_function = @options[:weights]
138
160
  end
139
161
 
140
162
  # Return a new stemmer object for this database
@@ -275,10 +275,12 @@ module XapianFu #:nodoc:
275
275
  else
276
276
  v = v.to_s
277
277
  end
278
+ # get the custom term weight if a weights function exists
279
+ weight = db.weights_function ? db.weights_function.call(k, v, fields).to_i : 1
278
280
  # add value with field name
279
- tg.send(index_method, v, 1, 'X' + k.to_s.upcase)
281
+ tg.send(index_method, v, weight, 'X' + k.to_s.upcase)
280
282
  # add value without field name
281
- tg.send(index_method, v)
283
+ tg.send(index_method, v, weight)
282
284
  end
283
285
 
284
286
  db.boolean_fields.each do |name|
@@ -202,6 +202,25 @@ describe XapianDb do
202
202
  xdb.size.should == 2
203
203
  end
204
204
 
205
+ it "should assign extra weight according to the weights function" do
206
+ xdb = XapianDb.new(:weights => lambda {|k, v, f| k == :title ? 3 : 1})
207
+ xdb << { :text => "once upon time", :title => "A story" }
208
+
209
+ s = xdb.search("story")
210
+ terms = s.first.terms
211
+ terms.select {|t| t.term.match(/story/)}.map(&:wdf).uniq.should == [3]
212
+ terms.select {|t| t.term.match(/upon/)}.map(&:wdf).uniq.should == [1]
213
+ end
214
+
215
+ it "should use a weight of 1 if no weights function is provided" do
216
+ xdb = XapianDb.new
217
+ xdb << { :text => "once upon time", :title => "A story" }
218
+
219
+ s = xdb.search("story")
220
+ terms = s.first.terms
221
+ terms.map(&:wdf).uniq.should == [1]
222
+ end
223
+
205
224
  it "should generate boolean terms for multiple values" do
206
225
  xdb = XapianDb.new(:dir => tmp_dir, :create => true,
207
226
  :fields => {
metadata CHANGED
@@ -1,78 +1,60 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: xapian-fu
3
- version: !ruby/object:Gem::Version
4
- hash: 7
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.0
5
5
  prerelease:
6
- segments:
7
- - 1
8
- - 4
9
- - 0
10
- version: 1.4.0
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - John Leach
14
9
  - Damian Janowski
15
10
  autorequire:
16
11
  bindir: bin
17
12
  cert_chain: []
18
-
19
- date: 2012-03-13 00:00:00 Z
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
13
+ date: 2012-05-23 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
22
16
  name: rspec
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: &12933040 !ruby/object:Gem::Requirement
25
18
  none: false
26
- requirements:
19
+ requirements:
27
20
  - - ~>
28
- - !ruby/object:Gem::Version
29
- hash: 19
30
- segments:
31
- - 2
32
- - 7
33
- - 0
21
+ - !ruby/object:Gem::Version
34
22
  version: 2.7.0
35
23
  type: :development
36
- version_requirements: *id001
37
- - !ruby/object:Gem::Dependency
38
- name: rake
39
24
  prerelease: false
40
- requirement: &id002 !ruby/object:Gem::Requirement
25
+ version_requirements: *12933040
26
+ - !ruby/object:Gem::Dependency
27
+ name: rake
28
+ requirement: &12928460 !ruby/object:Gem::Requirement
41
29
  none: false
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- hash: 3
46
- segments:
47
- - 0
48
- version: "0"
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
49
34
  type: :development
50
- version_requirements: *id002
51
- - !ruby/object:Gem::Dependency
52
- name: rdoc
53
35
  prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
36
+ version_requirements: *12928460
37
+ - !ruby/object:Gem::Dependency
38
+ name: rdoc
39
+ requirement: &12907260 !ruby/object:Gem::Requirement
55
40
  none: false
56
- requirements:
57
- - - ">="
58
- - !ruby/object:Gem::Version
59
- hash: 3
60
- segments:
61
- - 0
62
- version: "0"
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
63
45
  type: :development
64
- version_requirements: *id003
65
- description: A library to provide a more Ruby-like interface to the Xapian search engine.
46
+ prerelease: false
47
+ version_requirements: *12907260
48
+ description: A library to provide a more Ruby-like interface to the Xapian search
49
+ engine.
66
50
  email: john@johnleach.co.uk
67
51
  executables: []
68
-
69
52
  extensions: []
70
-
71
- extra_rdoc_files:
53
+ extra_rdoc_files:
72
54
  - README.rdoc
73
55
  - LICENSE
74
56
  - CHANGELOG.rdoc
75
- files:
57
+ files:
76
58
  - lib/xapian_fu.rb
77
59
  - lib/xapian_fu/xapian_doc_value_accessor.rb
78
60
  - lib/xapian_fu/query_parser.rb
@@ -166,42 +148,34 @@ files:
166
148
  - spec/query_parser_spec.rb
167
149
  homepage: http://github.com/johnl/xapian-fu
168
150
  licenses: []
169
-
170
151
  post_install_message:
171
- rdoc_options:
152
+ rdoc_options:
172
153
  - --title
173
154
  - Xapian Fu
174
155
  - --main
175
156
  - README.rdoc
176
157
  - --line-numbers
177
- require_paths:
158
+ require_paths:
178
159
  - lib
179
- required_ruby_version: !ruby/object:Gem::Requirement
160
+ required_ruby_version: !ruby/object:Gem::Requirement
180
161
  none: false
181
- requirements:
182
- - - ">="
183
- - !ruby/object:Gem::Version
184
- hash: 3
185
- segments:
186
- - 0
187
- version: "0"
188
- required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ! '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ required_rubygems_version: !ruby/object:Gem::Requirement
189
167
  none: false
190
- requirements:
191
- - - ">="
192
- - !ruby/object:Gem::Version
193
- hash: 3
194
- segments:
195
- - 0
196
- version: "0"
168
+ requirements:
169
+ - - ! '>='
170
+ - !ruby/object:Gem::Version
171
+ version: '0'
197
172
  requirements: []
198
-
199
173
  rubyforge_project: xapian-fu
200
- rubygems_version: 1.8.10
174
+ rubygems_version: 1.8.11
201
175
  signing_key:
202
176
  specification_version: 3
203
177
  summary: A Ruby interface to the Xapian search engine
204
- test_files:
178
+ test_files:
205
179
  - spec/xapian_doc_spec.rb
206
180
  - spec/xapian_db_spec.rb
207
181
  - spec/stopper_factory_spec.rb
@@ -261,3 +235,4 @@ test_files:
261
235
  - spec/xapian_doc_value_accessor_spec.rb
262
236
  - spec/build_db_for_value_testing.rb
263
237
  - spec/query_parser_spec.rb
238
+ has_rdoc: