xapian-fu 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,7 @@
1
+ === 1.5.0 (23rd May 2012)
2
+
3
+ * Support for custom term weights when indexing (Mike Ragalie)
4
+
1
5
  === 1.4.0 (13th March 2012)
2
6
 
3
7
  * Support for indexing Arrays properly
data/README.rdoc CHANGED
@@ -160,6 +160,20 @@ And any combinations of the above:
160
160
 
161
161
  db.search("(ruby OR sinatra) -rails xap*")
162
162
 
163
+ == Custom term weights
164
+
165
+ Sometimes you may want to increase the weight of a particular term in
166
+ a document. Xapian supports adding
167
+ {extra weight}(http://trac.xapian.org/wiki/FAQ/ExtraWeight) to a term
168
+ at index time by providing an integer "wdf" (default is 1).
169
+
170
+ You may set an optional :weights option when initializing a XapianDb.
171
+ The :weights option accepts a Proc or Lambda that will be called with
172
+ the key, value and list of document fields as each term is indexed.
173
+ Your function should return an integer to set the weight to.
174
+
175
+ XapianDb.new(:weights => lambda {|k, v, f| k == :title ? 3 : 1}
176
+
163
177
  == Boolean terms
164
178
 
165
179
  If you want to implement something like [this](http://getting-started-with-xapian.readthedocs.org/en/latest/howtos/boolean_filters.html#searching),
@@ -1,3 +1,3 @@
1
1
  module XapianFu #:nodoc:
2
- VERSION = "1.4.0"
2
+ VERSION = "1.5.0"
3
3
  end
@@ -98,6 +98,26 @@ module XapianFu #:nodoc:
98
98
  # and sort them efficiently (without having to resort to storing
99
99
  # leading zeros or anything like that).
100
100
  #
101
+ # == Term Weights
102
+ #
103
+ # The <tt>:weights</tt> option accepts a Proc or Lambda that sets
104
+ # custom {term weights}[http://trac.xapian.org/wiki/FAQ/ExtraWeight].
105
+ #
106
+ # Your function will receive the term key and value and the full list of
107
+ # fields, and should return an integer weight to be applied for that term
108
+ # when the document is indexed.
109
+ #
110
+ # In this example,
111
+ #
112
+ # XapianDb.new(:weights => Proc.new do |key, value, fields|
113
+ # return 10 if fields.keys.include?('culturally_important')
114
+ # return 3 if key == 'title'
115
+ # 1
116
+ # end)
117
+ #
118
+ # terms in the title will be weighted three times greater than other terms,
119
+ # and all terms in 'culturally important' items will weighted 10 times more.
120
+ #
101
121
  class XapianDb # :nonew:
102
122
  # Path to the on-disk database. Nil if in-memory database
103
123
  attr_reader :dir
@@ -117,6 +137,7 @@ module XapianFu #:nodoc:
117
137
  # Whether this db will generate a spelling dictionary during indexing
118
138
  attr_reader :spelling
119
139
  attr_reader :sortable_fields
140
+ attr_accessor :weights_function
120
141
 
121
142
  def initialize( options = { } )
122
143
  @options = { :index_positions => true, :spelling => true }.merge(options)
@@ -135,6 +156,7 @@ module XapianFu #:nodoc:
135
156
  @store_values << @options[:collapsible]
136
157
  @store_values = @store_values.flatten.uniq.compact
137
158
  @spelling = @options[:spelling]
159
+ @weights_function = @options[:weights]
138
160
  end
139
161
 
140
162
  # Return a new stemmer object for this database
@@ -275,10 +275,12 @@ module XapianFu #:nodoc:
275
275
  else
276
276
  v = v.to_s
277
277
  end
278
+ # get the custom term weight if a weights function exists
279
+ weight = db.weights_function ? db.weights_function.call(k, v, fields).to_i : 1
278
280
  # add value with field name
279
- tg.send(index_method, v, 1, 'X' + k.to_s.upcase)
281
+ tg.send(index_method, v, weight, 'X' + k.to_s.upcase)
280
282
  # add value without field name
281
- tg.send(index_method, v)
283
+ tg.send(index_method, v, weight)
282
284
  end
283
285
 
284
286
  db.boolean_fields.each do |name|
@@ -202,6 +202,25 @@ describe XapianDb do
202
202
  xdb.size.should == 2
203
203
  end
204
204
 
205
+ it "should assign extra weight according to the weights function" do
206
+ xdb = XapianDb.new(:weights => lambda {|k, v, f| k == :title ? 3 : 1})
207
+ xdb << { :text => "once upon time", :title => "A story" }
208
+
209
+ s = xdb.search("story")
210
+ terms = s.first.terms
211
+ terms.select {|t| t.term.match(/story/)}.map(&:wdf).uniq.should == [3]
212
+ terms.select {|t| t.term.match(/upon/)}.map(&:wdf).uniq.should == [1]
213
+ end
214
+
215
+ it "should use a weight of 1 if no weights function is provided" do
216
+ xdb = XapianDb.new
217
+ xdb << { :text => "once upon time", :title => "A story" }
218
+
219
+ s = xdb.search("story")
220
+ terms = s.first.terms
221
+ terms.map(&:wdf).uniq.should == [1]
222
+ end
223
+
205
224
  it "should generate boolean terms for multiple values" do
206
225
  xdb = XapianDb.new(:dir => tmp_dir, :create => true,
207
226
  :fields => {
metadata CHANGED
@@ -1,78 +1,60 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: xapian-fu
3
- version: !ruby/object:Gem::Version
4
- hash: 7
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.5.0
5
5
  prerelease:
6
- segments:
7
- - 1
8
- - 4
9
- - 0
10
- version: 1.4.0
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - John Leach
14
9
  - Damian Janowski
15
10
  autorequire:
16
11
  bindir: bin
17
12
  cert_chain: []
18
-
19
- date: 2012-03-13 00:00:00 Z
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
13
+ date: 2012-05-23 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
22
16
  name: rspec
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
17
+ requirement: &12933040 !ruby/object:Gem::Requirement
25
18
  none: false
26
- requirements:
19
+ requirements:
27
20
  - - ~>
28
- - !ruby/object:Gem::Version
29
- hash: 19
30
- segments:
31
- - 2
32
- - 7
33
- - 0
21
+ - !ruby/object:Gem::Version
34
22
  version: 2.7.0
35
23
  type: :development
36
- version_requirements: *id001
37
- - !ruby/object:Gem::Dependency
38
- name: rake
39
24
  prerelease: false
40
- requirement: &id002 !ruby/object:Gem::Requirement
25
+ version_requirements: *12933040
26
+ - !ruby/object:Gem::Dependency
27
+ name: rake
28
+ requirement: &12928460 !ruby/object:Gem::Requirement
41
29
  none: false
42
- requirements:
43
- - - ">="
44
- - !ruby/object:Gem::Version
45
- hash: 3
46
- segments:
47
- - 0
48
- version: "0"
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
49
34
  type: :development
50
- version_requirements: *id002
51
- - !ruby/object:Gem::Dependency
52
- name: rdoc
53
35
  prerelease: false
54
- requirement: &id003 !ruby/object:Gem::Requirement
36
+ version_requirements: *12928460
37
+ - !ruby/object:Gem::Dependency
38
+ name: rdoc
39
+ requirement: &12907260 !ruby/object:Gem::Requirement
55
40
  none: false
56
- requirements:
57
- - - ">="
58
- - !ruby/object:Gem::Version
59
- hash: 3
60
- segments:
61
- - 0
62
- version: "0"
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
63
45
  type: :development
64
- version_requirements: *id003
65
- description: A library to provide a more Ruby-like interface to the Xapian search engine.
46
+ prerelease: false
47
+ version_requirements: *12907260
48
+ description: A library to provide a more Ruby-like interface to the Xapian search
49
+ engine.
66
50
  email: john@johnleach.co.uk
67
51
  executables: []
68
-
69
52
  extensions: []
70
-
71
- extra_rdoc_files:
53
+ extra_rdoc_files:
72
54
  - README.rdoc
73
55
  - LICENSE
74
56
  - CHANGELOG.rdoc
75
- files:
57
+ files:
76
58
  - lib/xapian_fu.rb
77
59
  - lib/xapian_fu/xapian_doc_value_accessor.rb
78
60
  - lib/xapian_fu/query_parser.rb
@@ -166,42 +148,34 @@ files:
166
148
  - spec/query_parser_spec.rb
167
149
  homepage: http://github.com/johnl/xapian-fu
168
150
  licenses: []
169
-
170
151
  post_install_message:
171
- rdoc_options:
152
+ rdoc_options:
172
153
  - --title
173
154
  - Xapian Fu
174
155
  - --main
175
156
  - README.rdoc
176
157
  - --line-numbers
177
- require_paths:
158
+ require_paths:
178
159
  - lib
179
- required_ruby_version: !ruby/object:Gem::Requirement
160
+ required_ruby_version: !ruby/object:Gem::Requirement
180
161
  none: false
181
- requirements:
182
- - - ">="
183
- - !ruby/object:Gem::Version
184
- hash: 3
185
- segments:
186
- - 0
187
- version: "0"
188
- required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ! '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ required_rubygems_version: !ruby/object:Gem::Requirement
189
167
  none: false
190
- requirements:
191
- - - ">="
192
- - !ruby/object:Gem::Version
193
- hash: 3
194
- segments:
195
- - 0
196
- version: "0"
168
+ requirements:
169
+ - - ! '>='
170
+ - !ruby/object:Gem::Version
171
+ version: '0'
197
172
  requirements: []
198
-
199
173
  rubyforge_project: xapian-fu
200
- rubygems_version: 1.8.10
174
+ rubygems_version: 1.8.11
201
175
  signing_key:
202
176
  specification_version: 3
203
177
  summary: A Ruby interface to the Xapian search engine
204
- test_files:
178
+ test_files:
205
179
  - spec/xapian_doc_spec.rb
206
180
  - spec/xapian_db_spec.rb
207
181
  - spec/stopper_factory_spec.rb
@@ -261,3 +235,4 @@ test_files:
261
235
  - spec/xapian_doc_value_accessor_spec.rb
262
236
  - spec/build_db_for_value_testing.rb
263
237
  - spec/query_parser_spec.rb
238
+ has_rdoc: