xapian-fu 1.1.2 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,9 @@
1
+ === 1.2 (5th July 2011)
2
+
3
+ * Support for synonyms (Damian Janowski)
4
+ * Fix some Ruby 1.9.2 issues, particularly reading utf8 stop files
5
+ * Fixed specs to work with both xapian 1.0 and 1.2
6
+
1
7
  === 1.1.2 (26th August 2010)
2
8
 
3
9
  * Spelling correction support
data/examples/spider.rb CHANGED
@@ -8,7 +8,7 @@ require 'rubygems'
8
8
  require 'benchmark'
9
9
  require 'lib/xapian_fu'
10
10
 
11
- db = XapianFu::XapianDb.new(:store => [:filename, :filesize],
11
+ db = XapianFu::XapianDb.new(:dir => 'spider.db', :store => [:filename, :filesize],
12
12
  :overwrite => true)
13
13
 
14
14
  base_path = ARGV[0] || '.'
@@ -31,12 +31,12 @@ while dir = index_queue.shift
31
31
  next
32
32
  end
33
33
  next unless File.file?(filename)
34
- next unless filename =~ /(txt|doc|README|c|cpp|h|pl|sh|rb|py|note|xml)$/i
34
+ next unless filename =~ /(txt|doc|README|c|h|pl|sh|rb|py|note|xml)$/i
35
35
  file_count += 1
36
36
 
37
37
  # Read the first 10k of data
38
38
  text = File.open(filename) { |f| f.read(10 * 1024) }
39
- file_data += text.to_s.size
39
+ file_data += text.size
40
40
  # Index the data, filename and filesize
41
41
  bm = Benchmark.measure do
42
42
  db << {
@@ -120,7 +120,7 @@ module XapianFu #:nodoc:
120
120
  if @flags
121
121
  @flags
122
122
  else
123
- valid_flags = [:boolean, :boolean_anycase, :wildcards, :lovehate, :spelling, :pure_not]
123
+ valid_flags = [:boolean, :boolean_anycase, :wildcards, :lovehate, :spelling, :pure_not, :synonyms]
124
124
  @flags = valid_flags.delete_if { |vf| not @options[vf] }
125
125
  end
126
126
  end
@@ -135,6 +135,7 @@ module XapianFu #:nodoc:
135
135
  qflags |= Xapian::QueryParser::FLAG_LOVEHATE if flags.include?(:lovehate)
136
136
  qflags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION if flags.include?(:spelling)
137
137
  qflags |= Xapian::QueryParser::FLAG_PURE_NOT if flags.include?(:pure_not)
138
+ qflags |= Xapian::QueryParser::FLAG_AUTO_SYNONYMS if flags.include?(:synonyms)
138
139
  qflags
139
140
  end
140
141
 
@@ -32,7 +32,10 @@ module XapianFu
32
32
  def self.stop_words_for(lang)
33
33
  raise UnsupportedStopperLanguage, lang.to_s unless File.exists?(stop_words_filename(lang))
34
34
  words = []
35
- open(stop_words_filename(lang), "r") do |f|
35
+ # Open files with correct encoding in Ruby 1.9
36
+ open_args = [stop_words_filename(lang), "r"]
37
+ open_args << { :encoding => "UTF-8" } if String.new.respond_to? :encoding
38
+ open(*open_args) do |f|
36
39
  while line = f.readline rescue nil
37
40
  words << line.split(" ", 2).first.downcase.strip unless line =~ /^ +|^$|^\|/
38
41
  end
@@ -170,6 +170,19 @@ module XapianFu #:nodoc:
170
170
  end
171
171
  alias_method "<<", :add_doc
172
172
 
173
+ # Add a synonym to the database.
174
+ #
175
+ # If you want to search with synonym support, remember to add
176
+ # the option:
177
+ #
178
+ # db.search("foo", :synonyms => true)
179
+ #
180
+ # Note that in-memory databases don't support synonyms.
181
+ #
182
+ def add_synonym(term, synonym)
183
+ rw.add_synonym(term, synonym)
184
+ end
185
+
173
186
  # Conduct a search on the Xapian database, returning an array of
174
187
  # XapianFu::XapianDoc objects for the matches wrapped in a
175
188
  # XapianFu::ResultSet.
@@ -1,5 +1,5 @@
1
1
  require 'xapian'
2
- require 'lib/xapian_fu.rb'
2
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
3
3
  include XapianFu
4
4
 
5
5
  describe QueryParser do
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require 'xapian'
3
- require 'lib/xapian_fu.rb'
3
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
4
4
  include XapianFu
5
5
  require 'fileutils'
6
6
 
@@ -33,7 +33,7 @@ describe StopperFactory do
33
33
  words.should include 'and'
34
34
  words.should include "they're"
35
35
  end
36
-
36
+
37
37
  %w(danish dutch english finnish french german hungarian italian norwegian portuguese russian spanish swedish).each do |lang|
38
38
  describe lang do
39
39
  it "should return an array of words" do
@@ -49,7 +49,7 @@ describe StopperFactory do
49
49
  end
50
50
  end
51
51
 
52
-
52
+
53
53
  it "should raise a UnsupportedStopperLanguage error if there is no data for the given language" do
54
54
  Proc.new { StopperFactory.stop_words_for(:no_existy) }.should raise_error UnsupportedStopperLanguage
55
55
  end
@@ -1,5 +1,5 @@
1
1
  require 'xapian'
2
- require 'lib/xapian_fu.rb'
2
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
3
3
  include XapianFu
4
4
  require 'fileutils'
5
5
  require 'date'
@@ -28,14 +28,14 @@ describe XapianDb do
28
28
  xdb.rw.should be_a_kind_of(Xapian::WritableDatabase)
29
29
  xdb.ro.should be_a_kind_of(Xapian::Database)
30
30
  end
31
-
31
+
32
32
  end
33
33
 
34
34
  it "should lazily create the on-disk database when rw is used" do
35
35
  xdb = XapianDb.new(:dir => tmp_dir, :create => true)
36
36
  File.exists?(tmp_dir).should be_false
37
37
  xdb.rw
38
- File.exists?(tmp_dir).should be_true
38
+ File.exists?(tmp_dir).should be_true
39
39
  end
40
40
 
41
41
  it "should flush documents to the index when flush is called" do
@@ -201,7 +201,7 @@ describe XapianDb do
201
201
  xdb << XapianDoc.new("once upon a time")
202
202
  xdb.size.should == 2
203
203
  end
204
-
204
+
205
205
  end
206
206
 
207
207
  describe "search" do
@@ -277,10 +277,10 @@ describe XapianDb do
277
277
  it "should provide a corrected spelling string by default" do
278
278
  xdb = XapianDb.new(:dir => tmp_dir + 'corrected_spelling', :create => true,
279
279
  :overwrite => true)
280
- xdb << "there is a mouse in this house"
280
+ xdb << "there is a mouse in this building"
281
281
  xdb.flush
282
- results = xdb.search("there was a moose at our house")
283
- results.corrected_query.should == "there was a mouse at our house"
282
+ results = xdb.search("there was a moose at our building")
283
+ results.corrected_query.should == "there was a mouse at our building"
284
284
  end
285
285
 
286
286
  it "should not provide corrected spellings when disabled" do
@@ -293,11 +293,17 @@ describe XapianDb do
293
293
  end
294
294
 
295
295
 
296
- it "should do phrase matching by default when then :default_op option is :phrase"
296
+ it "should do phrase matching by default when then :default_op option is :phrase" do
297
+ pending
298
+ end
297
299
 
298
- it "should do AND_MAYBE matching by default when the :default_op option is :and_maybe"
300
+ it "should do AND_MAYBE matching by default when the :default_op option is :and_maybe" do
301
+ pending
302
+ end
299
303
 
300
- it "should do PURE_NOT matching by default when the :default_op option is :pure_not"
304
+ it "should do PURE_NOT matching by default when the :default_op option is :pure_not" do
305
+ pending
306
+ end
301
307
 
302
308
  it "should page results when given the :page and :per_page options" do
303
309
  xdb = XapianDb.new
@@ -356,6 +362,22 @@ describe XapianDb do
356
362
  xdb.search("john").should == [john,katherine,louisa]
357
363
  xdb.search("john -name:john").should == [katherine,louisa]
358
364
  end
365
+
366
+ it "should recognize synonyms" do
367
+ xdb = XapianDb.new(:dir => tmp_dir + 'synonyms', :create => true,
368
+ :fields => [:name], :overwrite => true)
369
+
370
+ xdb << {:name => "john"}
371
+ xdb.flush
372
+
373
+ xdb.search("jon", :synonyms => true).should be_empty
374
+
375
+ xdb.add_synonym("jon", "john")
376
+ xdb.flush
377
+
378
+ xdb.search("jon").should be_empty
379
+ xdb.search("jon", :synonyms => true).should_not be_empty
380
+ end
359
381
  end
360
382
 
361
383
  describe "add_doc" do
@@ -533,9 +555,9 @@ describe XapianDb do
533
555
  xdb = XapianDb.new(:fields => { :name => String, :title => String })
534
556
  xdb.unindexed_fields.should == []
535
557
  end
536
-
558
+
537
559
  it "should return fields defined as not indexed in the fields option" do
538
- xdb = XapianDb.new(:fields => {
560
+ xdb = XapianDb.new(:fields => {
539
561
  :name => { :type => String, :index => false },
540
562
  :title => String })
541
563
  xdb.unindexed_fields.should include :name
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require 'xapian'
3
- require 'lib/xapian_fu.rb'
3
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
4
4
  include XapianFu
5
5
  require 'fileutils'
6
6
 
@@ -9,7 +9,7 @@ describe XapianDoc do
9
9
  it "should be equal to other XapianDoc objects with the same id" do
10
10
  XapianDoc.new(:id => 666).should == XapianDoc.new(:id => 666)
11
11
  end
12
-
12
+
13
13
  it "should not be equal to other XapianDoc objects with different ids" do
14
14
  XapianDoc.new(:id => 666).should_not == XapianDoc.new(:id => 667)
15
15
  end
@@ -44,7 +44,7 @@ describe XapianDoc do
44
44
  xdoc.terms.last.should be_a_kind_of Xapian::Term
45
45
  xdoc.terms.last.term.should == "upon"
46
46
  end
47
-
47
+
48
48
  it "should tokenize the fields of a hash separately" do
49
49
  xdb = XapianDb.new
50
50
  xdoc = xdb.documents.new({ :text => "once upon a time", :title => "A story" }).to_xapian_document
@@ -73,7 +73,7 @@ describe XapianDoc do
73
73
  terms = xdoc.terms.collect { |t| t.term }
74
74
  terms.should include time.utc.strftime("%Y%m%d%H%M%S")
75
75
  end
76
-
76
+
77
77
  it "should convert DateTime instances to a useful format when tokenizing" do
78
78
  datetime = DateTime.now
79
79
  xdb = XapianDb.new
@@ -118,8 +118,8 @@ describe XapianDoc do
118
118
  it "should stem #{lang.to_s.capitalize} words when the :stemmer option is set to :#{lang}" do
119
119
  xdb = XapianDb.new
120
120
  xdoc = xdb.documents.new(word, :stemmer => lang).to_xapian_document
121
- terms = xdoc.terms.collect { |t| t.term }
122
- terms.should include 'Z'+stem
121
+ terms = xdoc.terms.collect { |t| t.term.respond_to?(:force_encoding) ? t.term.force_encoding("UTF-8") : t.term }
122
+ terms.should include 'Z' + stem
123
123
  end
124
124
  end
125
125
  end
@@ -130,7 +130,7 @@ describe XapianDoc do
130
130
  terms = xdoc.terms.collect { |t| t.term if t.term =~ /^Z/ }.compact
131
131
  terms.should be_empty
132
132
  end
133
-
133
+
134
134
  it "should not stem english stop words by default" do
135
135
  xdb = XapianDb.new
136
136
  xdoc = xdb.documents.new("And they made a cake", :stemmer => :english).to_xapian_document
@@ -139,14 +139,14 @@ describe XapianDoc do
139
139
  terms.should_not include 'Za'
140
140
  terms.should include 'Zcake'
141
141
  end
142
-
142
+
143
143
  it "should allow setting the stopper on initialisation" do
144
144
  xdb = XapianDb.new(:stopper => :english)
145
145
  xdoc = xdb.documents.new("And they made a cake", :stopper => :french)
146
146
  xdoc.stopper.call("ayantes").should == true
147
147
  xdoc.stopper.call("and").should == false
148
148
  end
149
-
149
+
150
150
  it "should not stop words when stopper is set to false" do
151
151
  xdb = XapianDb.new
152
152
  xdoc = xdb.documents.new("And they made a cake", :stopper => false).to_xapian_document
@@ -160,7 +160,7 @@ describe XapianDoc do
160
160
  terms = xdoc.terms.collect { |t| t.term }
161
161
  terms.should_not include 'Zи'
162
162
  terms.should_not include 'Zони'
163
- terms.should include 'Zcake'
163
+ terms.should include 'Zcake'
164
164
  end
165
165
  end
166
166
 
@@ -180,7 +180,7 @@ describe XapianDoc do
180
180
  xdoc = xdb.documents.new("stink and bones", :language => :english, :stemmer => :french)
181
181
  xdoc.stemmer.call("contournait").should == "contourn"
182
182
  end
183
-
183
+
184
184
  end
185
185
 
186
186
  describe "stopper" do
@@ -188,7 +188,7 @@ describe XapianDoc do
188
188
  xdb = XapianDb.new(:language => :french)
189
189
  xdoc = xdb.documents.new("stink and bones")
190
190
  xdoc.stopper.call("avec").should == true
191
- end
191
+ end
192
192
  it "should return a stopper for the document language, overriding the db" do
193
193
  xdb = XapianDb.new(:language => :english)
194
194
  xdoc = xdb.documents.new("stink and bones", :language => :french)
@@ -198,7 +198,7 @@ describe XapianDoc do
198
198
  xdb = XapianDb.new(:language => :german)
199
199
  xdoc = xdb.documents.new("stink and bones", :language => :english, :stopper => :french)
200
200
  xdoc.stopper.call("avec").should == true
201
- end
201
+ end
202
202
  end
203
203
 
204
204
  end
@@ -1,5 +1,5 @@
1
1
  require 'xapian'
2
- require 'lib/xapian_fu.rb'
2
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
3
3
  include XapianFu
4
4
  require 'fileutils'
5
5
  require 'fixtures/film_data'
@@ -34,12 +34,12 @@ describe XapianDocValueAccessor do
34
34
  doc.values.fetch(:number, Fixnum).should == number
35
35
  doc.to_xapian_document.values.first.value.should == [number].pack("G")
36
36
  end
37
- end
37
+ end
38
38
 
39
39
  it "should store fields defined as Bignum as packed double-precision float, network byte order" do
40
40
  xdb = XapianDb.new(:fields => { :number => { :type => Bignum, :store => true } })
41
41
  [
42
- (-0x1fffffffffffff..-0x1fffffffffffff + 10).to_a,
42
+ (-0x1fffffffffffff..-0x1fffffffffffff + 10).to_a,
43
43
  (0x1fffffffffffff-10..0x1fffffffffffff).to_a
44
44
  ].flatten.each do |number|
45
45
  doc = xdb.documents.new(:number => number)
@@ -56,7 +56,7 @@ describe XapianDocValueAccessor do
56
56
  lambda { doc.values.store(:number, number, Bignum) }.should raise_error XapianFu::ValueOutOfBounds
57
57
  end
58
58
  end
59
-
59
+
60
60
  it "should store fields defined as Float as packed double-precision float, network byte order" do
61
61
  xdb = XapianDb.new(:fields => { :number => { :type => Float, :store => true } })
62
62
  [-0.303393984588383833, 8.448488388488384, 1.0].each do |number|
@@ -64,9 +64,9 @@ describe XapianDocValueAccessor do
64
64
  doc.values.store(:number, number).should == number
65
65
  doc.values.fetch(:number).should == number
66
66
  doc.to_xapian_document.values.first.value.should == [number].pack("G")
67
- end
67
+ end
68
68
  end
69
-
69
+
70
70
  it "should store fields defined as Time in UTC as packed double-precision float, network byte order" do
71
71
  xdb = XapianDb.new(:fields => { :created_at => { :type => Time, :store => true }})
72
72
  time = Time.now
@@ -110,22 +110,22 @@ describe XapianDocValueAccessor do
110
110
  film_data_path = File.join(File.dirname(__FILE__), "fixtures/film_data")
111
111
  Dir.foreach(film_data_path) do |db_path|
112
112
  next unless db_path =~ /.+~.+/
113
- it "should read stored values from databases created by #{db_path}" do
113
+ it "should read stored values from databases created by #{db_path}" do
114
114
  db = XapianDb.new(:dir => File.join(film_data_path, db_path),
115
- :fields => {
115
+ :fields => {
116
116
  :title => { :type => String, :store => true },
117
- :released_on => { :type => Date, :store => true },
117
+ :released_on => { :type => Date, :store => true },
118
118
  :revenue => { :type => Integer, :store => true }
119
119
  })
120
120
  FILM_DATA.size.times do |i|
121
121
  doc = db.documents[i+1]
122
122
  [:title, :released_on, :revenue].each do |field|
123
- doc.values[field].should === FILM_DATA[i][field]
123
+ doc.values[field].should === FILM_DATA[i][field]
124
124
  end
125
125
  end
126
126
  end
127
127
  end
128
-
128
+
129
129
  end
130
130
 
131
131
 
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian-fu
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
5
- prerelease: false
4
+ hash: 11
5
+ prerelease:
6
6
  segments:
7
7
  - 1
8
- - 1
9
8
  - 2
10
- version: 1.1.2
9
+ version: "1.2"
11
10
  platform: ruby
12
11
  authors:
13
12
  - John Leach
@@ -15,10 +14,24 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-08-26 00:00:00 +01:00
19
- default_executable:
20
- dependencies: []
21
-
17
+ date: 2011-07-05 00:00:00 Z
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: rspec
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - "="
26
+ - !ruby/object:Gem::Version
27
+ hash: 27
28
+ segments:
29
+ - 1
30
+ - 3
31
+ - 0
32
+ version: 1.3.0
33
+ type: :development
34
+ version_requirements: *id001
22
35
  description: A library to provide a more Ruby-like interface to the Xapian search engine.
23
36
  email: john@johnleach.co.uk
24
37
  executables: []
@@ -120,7 +133,6 @@ files:
120
133
  - spec/build_db_for_value_testing.rb
121
134
  - spec/query_parser_spec.rb
122
135
  - spec/spec.opts
123
- has_rdoc: true
124
136
  homepage: http://github.com/johnl/xapian-fu
125
137
  licenses: []
126
138
 
@@ -154,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
154
166
  requirements: []
155
167
 
156
168
  rubyforge_project: xapian-fu
157
- rubygems_version: 1.3.7
169
+ rubygems_version: 1.7.2
158
170
  signing_key:
159
171
  specification_version: 3
160
172
  summary: A Ruby interface to the Xapian search engine
@@ -218,3 +230,4 @@ test_files:
218
230
  - spec/build_db_for_value_testing.rb
219
231
  - spec/query_parser_spec.rb
220
232
  - spec/spec.opts
233
+ has_rdoc: