xapian-fu 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +31 -0
- data/README.rdoc +12 -1
- data/lib/xapian_fu/xapian_db.rb +27 -5
- data/lib/xapian_fu/xapian_doc.rb +2 -1
- data/spec/xapian_db_spec.rb +14 -5
- metadata +79 -6
data/CHANGELOG.rdoc
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
=== 1.1.2 (26th August 2010)
|
2
|
+
|
3
|
+
* Spelling correction support
|
4
|
+
|
5
|
+
=== 1.1.1 (21st April 2010)
|
6
|
+
|
7
|
+
* Support disabling stopping (as per documentation!)
|
8
|
+
|
9
|
+
=== 1.1.0 (14th December 2009)
|
10
|
+
|
11
|
+
* 64bit support and Ruby 1.9.1 support. On-disk format changed.
|
12
|
+
|
13
|
+
=== 1.0.1 (9th September 2009)
|
14
|
+
|
15
|
+
* First high quality release. On-disk format changed.
|
16
|
+
|
17
|
+
* Lots more documentation
|
18
|
+
* Ordering
|
19
|
+
* Result size limiting
|
20
|
+
* Will Paginate support
|
21
|
+
* Stemming support
|
22
|
+
* Stopping support
|
23
|
+
* Field storage improvements
|
24
|
+
* Field tokenization and search support
|
25
|
+
* Query parsing refactor
|
26
|
+
* search options improvements
|
27
|
+
|
28
|
+
=== 0.2 (June 2009)
|
29
|
+
|
30
|
+
* Initial release
|
31
|
+
|
data/README.rdoc
CHANGED
@@ -62,6 +62,17 @@ Simple integration with the will_paginate Rails helpers.
|
|
62
62
|
@results = db.search("mountain", :page => 1, :per_page => 5)
|
63
63
|
will_paginate @results
|
64
64
|
|
65
|
+
== Spelling correction
|
66
|
+
|
67
|
+
Spelling suggestions, like Google's "Did you mean..." feature:
|
68
|
+
|
69
|
+
db = XapianDb.new(:dir => 'example.db', :create => true)
|
70
|
+
db << "There is a mouse in this house"
|
71
|
+
@results = db.search "moose house"
|
72
|
+
unless @results.corrected_query.empty?
|
73
|
+
puts "Did you mean '#{@results.corrected_query}'"
|
74
|
+
end
|
75
|
+
|
65
76
|
== Transactions support
|
66
77
|
|
67
78
|
Ensure that a group of documents are either entirely added to the
|
@@ -173,7 +184,7 @@ perhaps by reindexing once in a while.
|
|
173
184
|
= More Info
|
174
185
|
|
175
186
|
Author:: John Leach (mailto:john@johnleach.co.uk)
|
176
|
-
Copyright:: Copyright (c) 2009 John Leach
|
187
|
+
Copyright:: Copyright (c) 2009-2010 John Leach
|
177
188
|
License:: MIT (The Xapian library is GPL)
|
178
189
|
Mailing list:: http://rubyforge.org/mailman/listinfo/xapian-fu-discuss
|
179
190
|
Web page:: http://johnleach.co.uk/documents/xapian-fu
|
data/lib/xapian_fu/xapian_db.rb
CHANGED
@@ -57,6 +57,17 @@ module XapianFu #:nodoc:
|
|
57
57
|
#
|
58
58
|
# db = XapianDb.new(:language => :italian, :stopper => false)
|
59
59
|
#
|
60
|
+
# == Spelling suggestions
|
61
|
+
#
|
62
|
+
# The <tt>:spelling</tt> option controls generation of a spelling
|
63
|
+
# dictionary during indexing and its use during searches. When
|
64
|
+
# enabled, Xapian will build a dictionary of words for the database
|
65
|
+
# whilst indexing documents and will enable spelling suggestion by
|
66
|
+
# default for searches. Building the dictionary will impact
|
67
|
+
# indexing performance and database size. It is enabled by default.
|
68
|
+
# See the search section for information on getting spelling
|
69
|
+
# correction information during searches.
|
70
|
+
#
|
60
71
|
# == Fields and values
|
61
72
|
#
|
62
73
|
# The <tt>:store</tt> option specifies which document fields should
|
@@ -101,9 +112,11 @@ module XapianFu #:nodoc:
|
|
101
112
|
attr_reader :fields
|
102
113
|
# An array of fields that will not be indexed
|
103
114
|
attr_reader :unindexed_fields
|
115
|
+
# Whether this db will generate a spelling dictionary during indexing
|
116
|
+
attr_reader :spelling
|
104
117
|
|
105
118
|
def initialize( options = { } )
|
106
|
-
@options = { :index_positions => true }.merge(options)
|
119
|
+
@options = { :index_positions => true, :spelling => true }.merge(options)
|
107
120
|
@dir = @options[:dir]
|
108
121
|
@index_positions = @options[:index_positions]
|
109
122
|
@db_flag = Xapian::DB_OPEN
|
@@ -118,6 +131,7 @@ module XapianFu #:nodoc:
|
|
118
131
|
@store_values << @options[:sortable]
|
119
132
|
@store_values << @options[:collapsible]
|
120
133
|
@store_values = @store_values.flatten.uniq.compact
|
134
|
+
@spelling = @options[:spelling]
|
121
135
|
end
|
122
136
|
|
123
137
|
# Return a new stemmer object for this database
|
@@ -130,12 +144,12 @@ module XapianFu #:nodoc:
|
|
130
144
|
StopperFactory.stopper_for(@stopper)
|
131
145
|
end
|
132
146
|
|
133
|
-
# The writable Xapian
|
147
|
+
# The writable Xapian::WritableDatabase
|
134
148
|
def rw
|
135
149
|
@rw ||= setup_rw_db
|
136
150
|
end
|
137
151
|
|
138
|
-
# The read-only Xapian
|
152
|
+
# The read-only Xapian::Database
|
139
153
|
def ro
|
140
154
|
@ro ||= setup_ro_db
|
141
155
|
end
|
@@ -157,7 +171,8 @@ module XapianFu #:nodoc:
|
|
157
171
|
alias_method "<<", :add_doc
|
158
172
|
|
159
173
|
# Conduct a search on the Xapian database, returning an array of
|
160
|
-
# XapianDoc objects for the matches
|
174
|
+
# XapianFu::XapianDoc objects for the matches wrapped in a
|
175
|
+
# XapianFu::ResultSet.
|
161
176
|
#
|
162
177
|
# The <tt>:limit</tt> option sets how many results to return. For
|
163
178
|
# compatability with the <tt>will_paginate</tt> plugin, the
|
@@ -178,13 +193,19 @@ module XapianFu #:nodoc:
|
|
178
193
|
# to collapse (group) the results on. Works a bit like the
|
179
194
|
# SQL <tt>GROUP BY</tt> behaviour
|
180
195
|
#
|
196
|
+
# The <tt>:spelling</tt> option controls whether spelling
|
197
|
+
# suggestions will be made for queries. It defaults to whatever
|
198
|
+
# the database spelling setting is (true by default). When
|
199
|
+
# enabled, spelling suggestions are available using the
|
200
|
+
# XapianFu::ResultSet <tt>corrected_query</tt> method.
|
201
|
+
#
|
181
202
|
# For additional options on how the query is parsed, see
|
182
203
|
# XapianFu::QueryParser
|
183
204
|
|
184
205
|
def search(q, options = {})
|
185
206
|
defaults = { :page => 1, :reverse => false,
|
186
207
|
:boolean => true, :boolean_anycase => true, :wildcards => true,
|
187
|
-
:lovehate => true, :spelling =>
|
208
|
+
:lovehate => true, :spelling => spelling, :pure_not => false }
|
188
209
|
options = defaults.merge(options)
|
189
210
|
page = options[:page].to_i rescue 1
|
190
211
|
page = page > 1 ? page - 1 : 0
|
@@ -247,6 +268,7 @@ module XapianFu #:nodoc:
|
|
247
268
|
@rw
|
248
269
|
else
|
249
270
|
# In memory database
|
271
|
+
@spelling = false # inmemory doesn't support spelling
|
250
272
|
@rw = Xapian::inmemory_open
|
251
273
|
end
|
252
274
|
end
|
data/lib/xapian_fu/xapian_doc.rb
CHANGED
@@ -256,7 +256,8 @@ module XapianFu #:nodoc:
|
|
256
256
|
tg.database = db.rw
|
257
257
|
tg.document = xapian_document
|
258
258
|
tg.stopper = stopper if stopper
|
259
|
-
tg.stemmer = stemmer
|
259
|
+
tg.stemmer = stemmer
|
260
|
+
tg.set_flags Xapian::TermGenerator::FLAG_SPELLING if db.spelling
|
260
261
|
index_method = db.index_positions ? :index_text : :index_text_without_positions
|
261
262
|
fields.each do |k,v|
|
262
263
|
next if unindexed_fields.include?(k)
|
data/spec/xapian_db_spec.rb
CHANGED
@@ -275,15 +275,24 @@ describe XapianDb do
|
|
275
275
|
end
|
276
276
|
|
277
277
|
it "should provide a corrected spelling string by default" do
|
278
|
-
|
279
|
-
|
280
|
-
xdb
|
278
|
+
xdb = XapianDb.new(:dir => tmp_dir + 'corrected_spelling', :create => true,
|
279
|
+
:overwrite => true)
|
280
|
+
xdb << "there is a mouse in this house"
|
281
|
+
xdb.flush
|
282
|
+
results = xdb.search("there was a moose at our house")
|
283
|
+
results.corrected_query.should == "there was a mouse at our house"
|
284
|
+
end
|
285
|
+
|
286
|
+
it "should not provide corrected spellings when disabled" do
|
287
|
+
xdb = XapianDb.new(:dir => tmp_dir + 'no_corrected_spelling', :create => true,
|
288
|
+
:overwrite => true, :spelling => false)
|
281
289
|
xdb << "there is a mouse in this house"
|
282
290
|
xdb.flush
|
283
|
-
results = xdb.search("moose")
|
284
|
-
results.corrected_query.should == "
|
291
|
+
results = xdb.search("there was a moose at our house")
|
292
|
+
results.corrected_query.should == ""
|
285
293
|
end
|
286
294
|
|
295
|
+
|
287
296
|
it "should do phrase matching by default when then :default_op option is :phrase"
|
288
297
|
|
289
298
|
it "should do AND_MAYBE matching by default when the :default_op option is :and_maybe"
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian-fu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
- 2
|
10
|
+
version: 1.1.2
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- John Leach
|
@@ -9,7 +15,7 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date: 2010-
|
18
|
+
date: 2010-08-26 00:00:00 +01:00
|
13
19
|
default_executable:
|
14
20
|
dependencies: []
|
15
21
|
|
@@ -22,6 +28,7 @@ extensions: []
|
|
22
28
|
extra_rdoc_files:
|
23
29
|
- README.rdoc
|
24
30
|
- LICENSE
|
31
|
+
- CHANGELOG.rdoc
|
25
32
|
files:
|
26
33
|
- lib/xapian_fu.rb
|
27
34
|
- lib/xapian_fu/xapian_doc_value_accessor.rb
|
@@ -53,8 +60,68 @@ files:
|
|
53
60
|
- examples/ar_query.rb
|
54
61
|
- README.rdoc
|
55
62
|
- LICENSE
|
63
|
+
- CHANGELOG.rdoc
|
64
|
+
- spec/xapian_doc_spec.rb
|
65
|
+
- spec/xapian_db_spec.rb
|
66
|
+
- spec/stopper_factory_spec.rb
|
67
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
|
68
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
|
69
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
|
70
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
|
71
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
|
72
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
|
73
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
|
74
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
|
75
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
|
76
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
|
77
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
|
78
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
|
79
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
|
80
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
|
81
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
|
82
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
|
83
|
+
- spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
|
84
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
|
85
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/position.baseA
|
86
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
|
87
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/value.DB
|
88
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseA
|
89
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
|
90
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
|
91
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
|
92
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseB
|
93
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
|
94
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/record.baseB
|
95
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/flintlock
|
96
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
|
97
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/record.DB
|
98
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/position.DB
|
99
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
|
100
|
+
- spec/fixtures/film_data/i486-linux~1.8.7/iamflint
|
101
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
|
102
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseA
|
103
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
|
104
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
|
105
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseA
|
106
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
|
107
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
|
108
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
|
109
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseB
|
110
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
|
111
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseB
|
112
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
|
113
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
|
114
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/record.DB
|
115
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
|
116
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
|
117
|
+
- spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
|
118
|
+
- spec/fixtures/film_data.rb
|
119
|
+
- spec/xapian_doc_value_accessor_spec.rb
|
120
|
+
- spec/build_db_for_value_testing.rb
|
121
|
+
- spec/query_parser_spec.rb
|
122
|
+
- spec/spec.opts
|
56
123
|
has_rdoc: true
|
57
|
-
homepage: http://github.com/johnl/xapian-fu
|
124
|
+
homepage: http://github.com/johnl/xapian-fu
|
58
125
|
licenses: []
|
59
126
|
|
60
127
|
post_install_message:
|
@@ -67,21 +134,27 @@ rdoc_options:
|
|
67
134
|
require_paths:
|
68
135
|
- lib
|
69
136
|
required_ruby_version: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
70
138
|
requirements:
|
71
139
|
- - ">="
|
72
140
|
- !ruby/object:Gem::Version
|
141
|
+
hash: 3
|
142
|
+
segments:
|
143
|
+
- 0
|
73
144
|
version: "0"
|
74
|
-
version:
|
75
145
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
|
+
none: false
|
76
147
|
requirements:
|
77
148
|
- - ">="
|
78
149
|
- !ruby/object:Gem::Version
|
150
|
+
hash: 3
|
151
|
+
segments:
|
152
|
+
- 0
|
79
153
|
version: "0"
|
80
|
-
version:
|
81
154
|
requirements: []
|
82
155
|
|
83
156
|
rubyforge_project: xapian-fu
|
84
|
-
rubygems_version: 1.3.
|
157
|
+
rubygems_version: 1.3.7
|
85
158
|
signing_key:
|
86
159
|
specification_version: 3
|
87
160
|
summary: A Ruby interface to the Xapian search engine
|