xapian-fu 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.rdoc ADDED
@@ -0,0 +1,31 @@
1
+ === 1.1.2 (26th August 2010)
2
+
3
+ * Spelling correction support
4
+
5
+ === 1.1.1 (21st April 2010)
6
+
7
+ * Support disabling stopping (as per documentation!)
8
+
9
+ === 1.1.0 (14th December 2009)
10
+
11
+ * 64bit support and Ruby 1.9.1 support. On-disk format changed.
12
+
13
+ === 1.0.1 (9th September 2009)
14
+
15
+ * First high quality release. On-disk format changed.
16
+
17
+ * Lots more documentation
18
+ * Ordering
19
+ * Result size limiting
20
+ * Will Paginate support
21
+ * Stemming support
22
+ * Stopping support
23
+ * Field storage improvements
24
+ * Field tokenization and search support
25
+ * Query parsing refactor
26
+ * search options improvements
27
+
28
+ === 0.2 (June 2009)
29
+
30
+ * Initial release
31
+
data/README.rdoc CHANGED
@@ -62,6 +62,17 @@ Simple integration with the will_paginate Rails helpers.
62
62
  @results = db.search("mountain", :page => 1, :per_page => 5)
63
63
  will_paginate @results
64
64
 
65
+ == Spelling correction
66
+
67
+ Spelling suggestions, like Google's "Did you mean..." feature:
68
+
69
+ db = XapianDb.new(:dir => 'example.db', :create => true)
70
+ db << "There is a mouse in this house"
71
+ @results = db.search "moose house"
72
+ unless @results.corrected_query.empty?
73
+ puts "Did you mean '#{@results.corrected_query}'"
74
+ end
75
+
65
76
  == Transactions support
66
77
 
67
78
  Ensure that a group of documents are either entirely added to the
@@ -173,7 +184,7 @@ perhaps by reindexing once in a while.
173
184
  = More Info
174
185
 
175
186
  Author:: John Leach (mailto:john@johnleach.co.uk)
176
- Copyright:: Copyright (c) 2009 John Leach
187
+ Copyright:: Copyright (c) 2009-2010 John Leach
177
188
  License:: MIT (The Xapian library is GPL)
178
189
  Mailing list:: http://rubyforge.org/mailman/listinfo/xapian-fu-discuss
179
190
  Web page:: http://johnleach.co.uk/documents/xapian-fu
@@ -57,6 +57,17 @@ module XapianFu #:nodoc:
57
57
  #
58
58
  # db = XapianDb.new(:language => :italian, :stopper => false)
59
59
  #
60
+ # == Spelling suggestions
61
+ #
62
+ # The <tt>:spelling</tt> option controls generation of a spelling
63
+ # dictionary during indexing and its use during searches. When
64
+ # enabled, Xapian will build a dictionary of words for the database
65
+ # whilst indexing documents and will enable spelling suggestion by
66
+ # default for searches. Building the dictionary will impact
67
+ # indexing performance and database size. It is enabled by default.
68
+ # See the search section for information on getting spelling
69
+ # correction information during searches.
70
+ #
60
71
  # == Fields and values
61
72
  #
62
73
  # The <tt>:store</tt> option specifies which document fields should
@@ -101,9 +112,11 @@ module XapianFu #:nodoc:
101
112
  attr_reader :fields
102
113
  # An array of fields that will not be indexed
103
114
  attr_reader :unindexed_fields
115
+ # Whether this db will generate a spelling dictionary during indexing
116
+ attr_reader :spelling
104
117
 
105
118
  def initialize( options = { } )
106
- @options = { :index_positions => true }.merge(options)
119
+ @options = { :index_positions => true, :spelling => true }.merge(options)
107
120
  @dir = @options[:dir]
108
121
  @index_positions = @options[:index_positions]
109
122
  @db_flag = Xapian::DB_OPEN
@@ -118,6 +131,7 @@ module XapianFu #:nodoc:
118
131
  @store_values << @options[:sortable]
119
132
  @store_values << @options[:collapsible]
120
133
  @store_values = @store_values.flatten.uniq.compact
134
+ @spelling = @options[:spelling]
121
135
  end
122
136
 
123
137
  # Return a new stemmer object for this database
@@ -130,12 +144,12 @@ module XapianFu #:nodoc:
130
144
  StopperFactory.stopper_for(@stopper)
131
145
  end
132
146
 
133
- # The writable Xapian database
147
+ # The writable Xapian::WritableDatabase
134
148
  def rw
135
149
  @rw ||= setup_rw_db
136
150
  end
137
151
 
138
- # The read-only Xapian database
152
+ # The read-only Xapian::Database
139
153
  def ro
140
154
  @ro ||= setup_ro_db
141
155
  end
@@ -157,7 +171,8 @@ module XapianFu #:nodoc:
157
171
  alias_method "<<", :add_doc
158
172
 
159
173
  # Conduct a search on the Xapian database, returning an array of
160
- # XapianDoc objects for the matches.
174
+ # XapianFu::XapianDoc objects for the matches wrapped in a
175
+ # XapianFu::ResultSet.
161
176
  #
162
177
  # The <tt>:limit</tt> option sets how many results to return. For
163
178
  # compatability with the <tt>will_paginate</tt> plugin, the
@@ -178,13 +193,19 @@ module XapianFu #:nodoc:
178
193
  # to collapse (group) the results on. Works a bit like the
179
194
  # SQL <tt>GROUP BY</tt> behaviour
180
195
  #
196
+ # The <tt>:spelling</tt> option controls whether spelling
197
+ # suggestions will be made for queries. It defaults to whatever
198
+ # the database spelling setting is (true by default). When
199
+ # enabled, spelling suggestions are available using the
200
+ # XapianFu::ResultSet <tt>corrected_query</tt> method.
201
+ #
181
202
  # For additional options on how the query is parsed, see
182
203
  # XapianFu::QueryParser
183
204
 
184
205
  def search(q, options = {})
185
206
  defaults = { :page => 1, :reverse => false,
186
207
  :boolean => true, :boolean_anycase => true, :wildcards => true,
187
- :lovehate => true, :spelling => true, :pure_not => false }
208
+ :lovehate => true, :spelling => spelling, :pure_not => false }
188
209
  options = defaults.merge(options)
189
210
  page = options[:page].to_i rescue 1
190
211
  page = page > 1 ? page - 1 : 0
@@ -247,6 +268,7 @@ module XapianFu #:nodoc:
247
268
  @rw
248
269
  else
249
270
  # In memory database
271
+ @spelling = false # inmemory doesn't support spelling
250
272
  @rw = Xapian::inmemory_open
251
273
  end
252
274
  end
@@ -256,7 +256,8 @@ module XapianFu #:nodoc:
256
256
  tg.database = db.rw
257
257
  tg.document = xapian_document
258
258
  tg.stopper = stopper if stopper
259
- tg.stemmer = stemmer
259
+ tg.stemmer = stemmer
260
+ tg.set_flags Xapian::TermGenerator::FLAG_SPELLING if db.spelling
260
261
  index_method = db.index_positions ? :index_text : :index_text_without_positions
261
262
  fields.each do |k,v|
262
263
  next if unindexed_fields.include?(k)
@@ -275,15 +275,24 @@ describe XapianDb do
275
275
  end
276
276
 
277
277
  it "should provide a corrected spelling string by default" do
278
- pending
279
- xdb = XapianDb.new(:dir => tmp_dir + 'corrected_spelling', :create => true)
280
- xdb.rw.add_spelling("house mouse louse")
278
+ xdb = XapianDb.new(:dir => tmp_dir + 'corrected_spelling', :create => true,
279
+ :overwrite => true)
280
+ xdb << "there is a mouse in this house"
281
+ xdb.flush
282
+ results = xdb.search("there was a moose at our house")
283
+ results.corrected_query.should == "there was a mouse at our house"
284
+ end
285
+
286
+ it "should not provide corrected spellings when disabled" do
287
+ xdb = XapianDb.new(:dir => tmp_dir + 'no_corrected_spelling', :create => true,
288
+ :overwrite => true, :spelling => false)
281
289
  xdb << "there is a mouse in this house"
282
290
  xdb.flush
283
- results = xdb.search("moose")
284
- results.corrected_query.should == "mouse"
291
+ results = xdb.search("there was a moose at our house")
292
+ results.corrected_query.should == ""
285
293
  end
286
294
 
295
+
287
296
  it "should do phrase matching by default when then :default_op option is :phrase"
288
297
 
289
298
  it "should do AND_MAYBE matching by default when the :default_op option is :and_maybe"
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian-fu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ hash: 23
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 1
9
+ - 2
10
+ version: 1.1.2
5
11
  platform: ruby
6
12
  authors:
7
13
  - John Leach
@@ -9,7 +15,7 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-04-21 00:00:00 +01:00
18
+ date: 2010-08-26 00:00:00 +01:00
13
19
  default_executable:
14
20
  dependencies: []
15
21
 
@@ -22,6 +28,7 @@ extensions: []
22
28
  extra_rdoc_files:
23
29
  - README.rdoc
24
30
  - LICENSE
31
+ - CHANGELOG.rdoc
25
32
  files:
26
33
  - lib/xapian_fu.rb
27
34
  - lib/xapian_fu/xapian_doc_value_accessor.rb
@@ -53,8 +60,68 @@ files:
53
60
  - examples/ar_query.rb
54
61
  - README.rdoc
55
62
  - LICENSE
63
+ - CHANGELOG.rdoc
64
+ - spec/xapian_doc_spec.rb
65
+ - spec/xapian_db_spec.rb
66
+ - spec/stopper_factory_spec.rb
67
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
68
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
69
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
70
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
71
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
72
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
73
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
74
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
75
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
76
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
77
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
78
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
79
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
80
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
81
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
82
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
83
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
84
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
85
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.baseA
86
+ - spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
87
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.DB
88
+ - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseA
89
+ - spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
90
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
91
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
92
+ - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseB
93
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
94
+ - spec/fixtures/film_data/i486-linux~1.8.7/record.baseB
95
+ - spec/fixtures/film_data/i486-linux~1.8.7/flintlock
96
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
97
+ - spec/fixtures/film_data/i486-linux~1.8.7/record.DB
98
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.DB
99
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
100
+ - spec/fixtures/film_data/i486-linux~1.8.7/iamflint
101
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
102
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseA
103
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
104
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
105
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseA
106
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
107
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
108
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
109
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseB
110
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
111
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseB
112
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
113
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
114
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/record.DB
115
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
116
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
117
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
118
+ - spec/fixtures/film_data.rb
119
+ - spec/xapian_doc_value_accessor_spec.rb
120
+ - spec/build_db_for_value_testing.rb
121
+ - spec/query_parser_spec.rb
122
+ - spec/spec.opts
56
123
  has_rdoc: true
57
- homepage: http://github.com/johnl/xapian-fu/tree/master
124
+ homepage: http://github.com/johnl/xapian-fu
58
125
  licenses: []
59
126
 
60
127
  post_install_message:
@@ -67,21 +134,27 @@ rdoc_options:
67
134
  require_paths:
68
135
  - lib
69
136
  required_ruby_version: !ruby/object:Gem::Requirement
137
+ none: false
70
138
  requirements:
71
139
  - - ">="
72
140
  - !ruby/object:Gem::Version
141
+ hash: 3
142
+ segments:
143
+ - 0
73
144
  version: "0"
74
- version:
75
145
  required_rubygems_version: !ruby/object:Gem::Requirement
146
+ none: false
76
147
  requirements:
77
148
  - - ">="
78
149
  - !ruby/object:Gem::Version
150
+ hash: 3
151
+ segments:
152
+ - 0
79
153
  version: "0"
80
- version:
81
154
  requirements: []
82
155
 
83
156
  rubyforge_project: xapian-fu
84
- rubygems_version: 1.3.5
157
+ rubygems_version: 1.3.7
85
158
  signing_key:
86
159
  specification_version: 3
87
160
  summary: A Ruby interface to the Xapian search engine