xapian-fu 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +31 -0
- data/README.rdoc +12 -1
- data/lib/xapian_fu/xapian_db.rb +27 -5
- data/lib/xapian_fu/xapian_doc.rb +2 -1
- data/spec/xapian_db_spec.rb +14 -5
- metadata +79 -6
data/CHANGELOG.rdoc
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
=== 1.1.2 (26th August 2010)
|
2
|
+
|
3
|
+
* Spelling correction support
|
4
|
+
|
5
|
+
=== 1.1.1 (21st April 2010)
|
6
|
+
|
7
|
+
* Support disabling stopping (as per documentation!)
|
8
|
+
|
9
|
+
=== 1.1.0 (14th December 2009)
|
10
|
+
|
11
|
+
* 64bit support and Ruby 1.9.1 support. On-disk format changed.
|
12
|
+
|
13
|
+
=== 1.0.1 (9th September 2009)
|
14
|
+
|
15
|
+
* First high quality release. On-disk format changed.
|
16
|
+
|
17
|
+
* Lots more documentation
|
18
|
+
* Ordering
|
19
|
+
* Result size limiting
|
20
|
+
* Will Paginate support
|
21
|
+
* Stemming support
|
22
|
+
* Stopping support
|
23
|
+
* Field storage improvements
|
24
|
+
* Field tokenization and search support
|
25
|
+
* Query parsing refactor
|
26
|
+
* search options improvements
|
27
|
+
|
28
|
+
=== 0.2 (June 2009)
|
29
|
+
|
30
|
+
* Initial release
|
31
|
+
|
data/README.rdoc
CHANGED
@@ -62,6 +62,17 @@ Simple integration with the will_paginate Rails helpers.
|
|
62
62
|
@results = db.search("mountain", :page => 1, :per_page => 5)
|
63
63
|
will_paginate @results
|
64
64
|
|
65
|
+
== Spelling correction
|
66
|
+
|
67
|
+
Spelling suggestions, like Google's "Did you mean..." feature:
|
68
|
+
|
69
|
+
db = XapianDb.new(:dir => 'example.db', :create => true)
|
70
|
+
db << "There is a mouse in this house"
|
71
|
+
@results = db.search "moose house"
|
72
|
+
unless @results.corrected_query.empty?
|
73
|
+
puts "Did you mean '#{@results.corrected_query}'"
|
74
|
+
end
|
75
|
+
|
65
76
|
== Transactions support
|
66
77
|
|
67
78
|
Ensure that a group of documents are either entirely added to the
|
@@ -173,7 +184,7 @@ perhaps by reindexing once in a while.
|
|
173
184
|
= More Info
|
174
185
|
|
175
186
|
Author:: John Leach (mailto:john@johnleach.co.uk)
|
176
|
-
Copyright:: Copyright (c) 2009 John Leach
|
187
|
+
Copyright:: Copyright (c) 2009-2010 John Leach
|
177
188
|
License:: MIT (The Xapian library is GPL)
|
178
189
|
Mailing list:: http://rubyforge.org/mailman/listinfo/xapian-fu-discuss
|
179
190
|
Web page:: http://johnleach.co.uk/documents/xapian-fu
|
data/lib/xapian_fu/xapian_db.rb
CHANGED
@@ -57,6 +57,17 @@ module XapianFu #:nodoc:
|
|
57
57
|
#
|
58
58
|
# db = XapianDb.new(:language => :italian, :stopper => false)
|
59
59
|
#
|
60
|
+
# == Spelling suggestions
|
61
|
+
#
|
62
|
+
# The <tt>:spelling</tt> option controls generation of a spelling
|
63
|
+
# dictionary during indexing and its use during searches. When
|
64
|
+
# enabled, Xapian will build a dictionary of words for the database
|
65
|
+
# whilst indexing documents and will enable spelling suggestion by
|
66
|
+
# default for searches. Building the dictionary will impact
|
67
|
+
# indexing performance and database size. It is enabled by default.
|
68
|
+
# See the search section for information on getting spelling
|
69
|
+
# correction information during searches.
|
70
|
+
#
|
60
71
|
# == Fields and values
|
61
72
|
#
|
62
73
|
# The <tt>:store</tt> option specifies which document fields should
|
@@ -101,9 +112,11 @@ module XapianFu #:nodoc:
|
|
101
112
|
attr_reader :fields
|
102
113
|
# An array of fields that will not be indexed
|
103
114
|
attr_reader :unindexed_fields
|
115
|
+
# Whether this db will generate a spelling dictionary during indexing
|
116
|
+
attr_reader :spelling
|
104
117
|
|
105
118
|
def initialize( options = { } )
|
106
|
-
@options = { :index_positions => true }.merge(options)
|
119
|
+
@options = { :index_positions => true, :spelling => true }.merge(options)
|
107
120
|
@dir = @options[:dir]
|
108
121
|
@index_positions = @options[:index_positions]
|
109
122
|
@db_flag = Xapian::DB_OPEN
|
@@ -118,6 +131,7 @@ module XapianFu #:nodoc:
|
|
118
131
|
@store_values << @options[:sortable]
|
119
132
|
@store_values << @options[:collapsible]
|
120
133
|
@store_values = @store_values.flatten.uniq.compact
|
134
|
+
@spelling = @options[:spelling]
|
121
135
|
end
|
122
136
|
|
123
137
|
# Return a new stemmer object for this database
|
@@ -130,12 +144,12 @@ module XapianFu #:nodoc:
|
|
130
144
|
StopperFactory.stopper_for(@stopper)
|
131
145
|
end
|
132
146
|
|
133
|
-
# The writable Xapian
|
147
|
+
# The writable Xapian::WritableDatabase
|
134
148
|
def rw
|
135
149
|
@rw ||= setup_rw_db
|
136
150
|
end
|
137
151
|
|
138
|
-
# The read-only Xapian
|
152
|
+
# The read-only Xapian::Database
|
139
153
|
def ro
|
140
154
|
@ro ||= setup_ro_db
|
141
155
|
end
|
@@ -157,7 +171,8 @@ module XapianFu #:nodoc:
|
|
157
171
|
alias_method "<<", :add_doc
|
158
172
|
|
159
173
|
# Conduct a search on the Xapian database, returning an array of
|
160
|
-
# XapianDoc objects for the matches
|
174
|
+
# XapianFu::XapianDoc objects for the matches wrapped in a
|
175
|
+
# XapianFu::ResultSet.
|
161
176
|
#
|
162
177
|
# The <tt>:limit</tt> option sets how many results to return. For
|
163
178
|
# compatability with the <tt>will_paginate</tt> plugin, the
|
@@ -178,13 +193,19 @@ module XapianFu #:nodoc:
|
|
178
193
|
# to collapse (group) the results on. Works a bit like the
|
179
194
|
# SQL <tt>GROUP BY</tt> behaviour
|
180
195
|
#
|
196
|
+
# The <tt>:spelling</tt> option controls whether spelling
|
197
|
+
# suggestions will be made for queries. It defaults to whatever
|
198
|
+
# the database spelling setting is (true by default). When
|
199
|
+
# enabled, spelling suggestions are available using the
|
200
|
+
# XapianFu::ResultSet <tt>corrected_query</tt> method.
|
201
|
+
#
|
181
202
|
# For additional options on how the query is parsed, see
|
182
203
|
# XapianFu::QueryParser
|
183
204
|
|
184
205
|
def search(q, options = {})
|
185
206
|
defaults = { :page => 1, :reverse => false,
|
186
207
|
:boolean => true, :boolean_anycase => true, :wildcards => true,
|
187
|
-
:lovehate => true, :spelling =>
|
208
|
+
:lovehate => true, :spelling => spelling, :pure_not => false }
|
188
209
|
options = defaults.merge(options)
|
189
210
|
page = options[:page].to_i rescue 1
|
190
211
|
page = page > 1 ? page - 1 : 0
|
@@ -247,6 +268,7 @@ module XapianFu #:nodoc:
|
|
247
268
|
@rw
|
248
269
|
else
|
249
270
|
# In memory database
|
271
|
+
@spelling = false # inmemory doesn't support spelling
|
250
272
|
@rw = Xapian::inmemory_open
|
251
273
|
end
|
252
274
|
end
|
data/lib/xapian_fu/xapian_doc.rb
CHANGED
@@ -256,7 +256,8 @@ module XapianFu #:nodoc:
|
|
256
256
|
tg.database = db.rw
|
257
257
|
tg.document = xapian_document
|
258
258
|
tg.stopper = stopper if stopper
|
259
|
-
tg.stemmer = stemmer
|
259
|
+
tg.stemmer = stemmer
|
260
|
+
tg.set_flags Xapian::TermGenerator::FLAG_SPELLING if db.spelling
|
260
261
|
index_method = db.index_positions ? :index_text : :index_text_without_positions
|
261
262
|
fields.each do |k,v|
|
262
263
|
next if unindexed_fields.include?(k)
|
data/spec/xapian_db_spec.rb
CHANGED
@@ -275,15 +275,24 @@ describe XapianDb do
|
|
275
275
|
end
|
276
276
|
|
277
277
|
it "should provide a corrected spelling string by default" do
|
278
|
-
|
279
|
-
|
280
|
-
xdb
|
278
|
+
xdb = XapianDb.new(:dir => tmp_dir + 'corrected_spelling', :create => true,
|
279
|
+
:overwrite => true)
|
280
|
+
xdb << "there is a mouse in this house"
|
281
|
+
xdb.flush
|
282
|
+
results = xdb.search("there was a moose at our house")
|
283
|
+
results.corrected_query.should == "there was a mouse at our house"
|
284
|
+
end
|
285
|
+
|
286
|
+
it "should not provide corrected spellings when disabled" do
|
287
|
+
xdb = XapianDb.new(:dir => tmp_dir + 'no_corrected_spelling', :create => true,
|
288
|
+
:overwrite => true, :spelling => false)
|
281
289
|
xdb << "there is a mouse in this house"
|
282
290
|
xdb.flush
|
283
|
-
results = xdb.search("moose")
|
284
|
-
results.corrected_query.should == "
|
291
|
+
results = xdb.search("there was a moose at our house")
|
292
|
+
results.corrected_query.should == ""
|
285
293
|
end
|
286
294
|
|
295
|
+
|
287
296
|
it "should do phrase matching by default when then :default_op option is :phrase"
|
288
297
|
|
289
298
|
it "should do AND_MAYBE matching by default when the :default_op option is :and_maybe"
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian-fu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
- 2
|
10
|
+
version: 1.1.2
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- John Leach
|
@@ -9,7 +15,7 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date: 2010-
|
18
|
+
date: 2010-08-26 00:00:00 +01:00
|
13
19
|
default_executable:
|
14
20
|
dependencies: []
|
15
21
|
|
@@ -22,6 +28,7 @@ extensions: []
|
|
22
28
|
extra_rdoc_files:
|
23
29
|
- README.rdoc
|
24
30
|
- LICENSE
|
31
|
+
- CHANGELOG.rdoc
|
25
32
|
files:
|
26
33
|
- lib/xapian_fu.rb
|
27
34
|
- lib/xapian_fu/xapian_doc_value_accessor.rb
|
@@ -53,8 +60,68 @@ files:
|
|
53
60
|
- examples/ar_query.rb
|
54
61
|
- README.rdoc
|
55
62
|
- LICENSE
|
63
|
+
- CHANGELOG.rdoc
|
64
|
+
- spec/xapian_doc_spec.rb
|
65
|
+
- spec/xapian_db_spec.rb
|
66
|
+
- spec/stopper_factory_spec.rb
|
67
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
|
68
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
|
69
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
|
70
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
|
71
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
|
72
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
|
73
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
|
74
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
|
75
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
|
76
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
|
77
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
|
78
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
|
79
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
|
80
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
|
81
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
|
82
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
|
83
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
|
84
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
|
85
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/position.baseA
|
86
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
|
87
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/value.DB
|
88
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseA
|
89
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
|
90
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
|
91
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
|
92
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseB
|
93
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
|
94
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/record.baseB
|
95
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/flintlock
|
96
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
|
97
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/record.DB
|
98
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/position.DB
|
99
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
|
100
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/iamflint
|
101
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
|
102
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseA
|
103
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
|
104
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
|
105
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseA
|
106
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
|
107
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
|
108
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
|
109
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseB
|
110
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
|
111
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseB
|
112
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
|
113
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
|
114
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/record.DB
|
115
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
|
116
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
|
117
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
|
118
|
+
- spec/fixtures/film_data.rb
|
119
|
+
- spec/xapian_doc_value_accessor_spec.rb
|
120
|
+
- spec/build_db_for_value_testing.rb
|
121
|
+
- spec/query_parser_spec.rb
|
122
|
+
- spec/spec.opts
|
56
123
|
has_rdoc: true
|
57
|
-
homepage: http://github.com/johnl/xapian-fu
|
124
|
+
homepage: http://github.com/johnl/xapian-fu
|
58
125
|
licenses: []
|
59
126
|
|
60
127
|
post_install_message:
|
@@ -67,21 +134,27 @@ rdoc_options:
|
|
67
134
|
require_paths:
|
68
135
|
- lib
|
69
136
|
required_ruby_version: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
70
138
|
requirements:
|
71
139
|
- - ">="
|
72
140
|
- !ruby/object:Gem::Version
|
141
|
+
hash: 3
|
142
|
+
segments:
|
143
|
+
- 0
|
73
144
|
version: "0"
|
74
|
-
version:
|
75
145
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
|
+
none: false
|
76
147
|
requirements:
|
77
148
|
- - ">="
|
78
149
|
- !ruby/object:Gem::Version
|
150
|
+
hash: 3
|
151
|
+
segments:
|
152
|
+
- 0
|
79
153
|
version: "0"
|
80
|
-
version:
|
81
154
|
requirements: []
|
82
155
|
|
83
156
|
rubyforge_project: xapian-fu
|
84
|
-
rubygems_version: 1.3.
|
157
|
+
rubygems_version: 1.3.7
|
85
158
|
signing_key:
|
86
159
|
specification_version: 3
|
87
160
|
summary: A Ruby interface to the Xapian search engine
|