xapian-fu 1.1.2 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,9 @@
1
+ === 1.2 (5th July 2011)
2
+
3
+ * Support for synonyms (Damian Janowski)
4
+ * Fix some Ruby 1.9.2 issues, particularly reading utf8 stop files
5
+ * Fixed specs to work with both xapian 1.0 and 1.2
6
+
1
7
  === 1.1.2 (26th August 2010)
2
8
 
3
9
  * Spelling correction support
data/examples/spider.rb CHANGED
@@ -8,7 +8,7 @@ require 'rubygems'
8
8
  require 'benchmark'
9
9
  require 'lib/xapian_fu'
10
10
 
11
- db = XapianFu::XapianDb.new(:store => [:filename, :filesize],
11
+ db = XapianFu::XapianDb.new(:dir => 'spider.db', :store => [:filename, :filesize],
12
12
  :overwrite => true)
13
13
 
14
14
  base_path = ARGV[0] || '.'
@@ -31,12 +31,12 @@ while dir = index_queue.shift
31
31
  next
32
32
  end
33
33
  next unless File.file?(filename)
34
- next unless filename =~ /(txt|doc|README|c|cpp|h|pl|sh|rb|py|note|xml)$/i
34
+ next unless filename =~ /(txt|doc|README|c|h|pl|sh|rb|py|note|xml)$/i
35
35
  file_count += 1
36
36
 
37
37
  # Read the first 10k of data
38
38
  text = File.open(filename) { |f| f.read(10 * 1024) }
39
- file_data += text.to_s.size
39
+ file_data += text.size
40
40
  # Index the data, filename and filesize
41
41
  bm = Benchmark.measure do
42
42
  db << {
@@ -120,7 +120,7 @@ module XapianFu #:nodoc:
120
120
  if @flags
121
121
  @flags
122
122
  else
123
- valid_flags = [:boolean, :boolean_anycase, :wildcards, :lovehate, :spelling, :pure_not]
123
+ valid_flags = [:boolean, :boolean_anycase, :wildcards, :lovehate, :spelling, :pure_not, :synonyms]
124
124
  @flags = valid_flags.delete_if { |vf| not @options[vf] }
125
125
  end
126
126
  end
@@ -135,6 +135,7 @@ module XapianFu #:nodoc:
135
135
  qflags |= Xapian::QueryParser::FLAG_LOVEHATE if flags.include?(:lovehate)
136
136
  qflags |= Xapian::QueryParser::FLAG_SPELLING_CORRECTION if flags.include?(:spelling)
137
137
  qflags |= Xapian::QueryParser::FLAG_PURE_NOT if flags.include?(:pure_not)
138
+ qflags |= Xapian::QueryParser::FLAG_AUTO_SYNONYMS if flags.include?(:synonyms)
138
139
  qflags
139
140
  end
140
141
 
@@ -32,7 +32,10 @@ module XapianFu
32
32
  def self.stop_words_for(lang)
33
33
  raise UnsupportedStopperLanguage, lang.to_s unless File.exists?(stop_words_filename(lang))
34
34
  words = []
35
- open(stop_words_filename(lang), "r") do |f|
35
+ # Open files with correct encoding in Ruby 1.9
36
+ open_args = [stop_words_filename(lang), "r"]
37
+ open_args << { :encoding => "UTF-8" } if String.new.respond_to? :encoding
38
+ open(*open_args) do |f|
36
39
  while line = f.readline rescue nil
37
40
  words << line.split(" ", 2).first.downcase.strip unless line =~ /^ +|^$|^\|/
38
41
  end
@@ -170,6 +170,19 @@ module XapianFu #:nodoc:
170
170
  end
171
171
  alias_method "<<", :add_doc
172
172
 
173
+ # Add a synonym to the database.
174
+ #
175
+ # If you want to search with synonym support, remember to add
176
+ # the option:
177
+ #
178
+ # db.search("foo", :synonyms => true)
179
+ #
180
+ # Note that in-memory databases don't support synonyms.
181
+ #
182
+ def add_synonym(term, synonym)
183
+ rw.add_synonym(term, synonym)
184
+ end
185
+
173
186
  # Conduct a search on the Xapian database, returning an array of
174
187
  # XapianFu::XapianDoc objects for the matches wrapped in a
175
188
  # XapianFu::ResultSet.
@@ -1,5 +1,5 @@
1
1
  require 'xapian'
2
- require 'lib/xapian_fu.rb'
2
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
3
3
  include XapianFu
4
4
 
5
5
  describe QueryParser do
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require 'xapian'
3
- require 'lib/xapian_fu.rb'
3
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
4
4
  include XapianFu
5
5
  require 'fileutils'
6
6
 
@@ -33,7 +33,7 @@ describe StopperFactory do
33
33
  words.should include 'and'
34
34
  words.should include "they're"
35
35
  end
36
-
36
+
37
37
  %w(danish dutch english finnish french german hungarian italian norwegian portuguese russian spanish swedish).each do |lang|
38
38
  describe lang do
39
39
  it "should return an array of words" do
@@ -49,7 +49,7 @@ describe StopperFactory do
49
49
  end
50
50
  end
51
51
 
52
-
52
+
53
53
  it "should raise a UnsupportedStopperLanguage error if there is no data for the given language" do
54
54
  Proc.new { StopperFactory.stop_words_for(:no_existy) }.should raise_error UnsupportedStopperLanguage
55
55
  end
@@ -1,5 +1,5 @@
1
1
  require 'xapian'
2
- require 'lib/xapian_fu.rb'
2
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
3
3
  include XapianFu
4
4
  require 'fileutils'
5
5
  require 'date'
@@ -28,14 +28,14 @@ describe XapianDb do
28
28
  xdb.rw.should be_a_kind_of(Xapian::WritableDatabase)
29
29
  xdb.ro.should be_a_kind_of(Xapian::Database)
30
30
  end
31
-
31
+
32
32
  end
33
33
 
34
34
  it "should lazily create the on-disk database when rw is used" do
35
35
  xdb = XapianDb.new(:dir => tmp_dir, :create => true)
36
36
  File.exists?(tmp_dir).should be_false
37
37
  xdb.rw
38
- File.exists?(tmp_dir).should be_true
38
+ File.exists?(tmp_dir).should be_true
39
39
  end
40
40
 
41
41
  it "should flush documents to the index when flush is called" do
@@ -201,7 +201,7 @@ describe XapianDb do
201
201
  xdb << XapianDoc.new("once upon a time")
202
202
  xdb.size.should == 2
203
203
  end
204
-
204
+
205
205
  end
206
206
 
207
207
  describe "search" do
@@ -277,10 +277,10 @@ describe XapianDb do
277
277
  it "should provide a corrected spelling string by default" do
278
278
  xdb = XapianDb.new(:dir => tmp_dir + 'corrected_spelling', :create => true,
279
279
  :overwrite => true)
280
- xdb << "there is a mouse in this house"
280
+ xdb << "there is a mouse in this building"
281
281
  xdb.flush
282
- results = xdb.search("there was a moose at our house")
283
- results.corrected_query.should == "there was a mouse at our house"
282
+ results = xdb.search("there was a moose at our building")
283
+ results.corrected_query.should == "there was a mouse at our building"
284
284
  end
285
285
 
286
286
  it "should not provide corrected spellings when disabled" do
@@ -293,11 +293,17 @@ describe XapianDb do
293
293
  end
294
294
 
295
295
 
296
- it "should do phrase matching by default when then :default_op option is :phrase"
296
+ it "should do phrase matching by default when then :default_op option is :phrase" do
297
+ pending
298
+ end
297
299
 
298
- it "should do AND_MAYBE matching by default when the :default_op option is :and_maybe"
300
+ it "should do AND_MAYBE matching by default when the :default_op option is :and_maybe" do
301
+ pending
302
+ end
299
303
 
300
- it "should do PURE_NOT matching by default when the :default_op option is :pure_not"
304
+ it "should do PURE_NOT matching by default when the :default_op option is :pure_not" do
305
+ pending
306
+ end
301
307
 
302
308
  it "should page results when given the :page and :per_page options" do
303
309
  xdb = XapianDb.new
@@ -356,6 +362,22 @@ describe XapianDb do
356
362
  xdb.search("john").should == [john,katherine,louisa]
357
363
  xdb.search("john -name:john").should == [katherine,louisa]
358
364
  end
365
+
366
+ it "should recognize synonyms" do
367
+ xdb = XapianDb.new(:dir => tmp_dir + 'synonyms', :create => true,
368
+ :fields => [:name], :overwrite => true)
369
+
370
+ xdb << {:name => "john"}
371
+ xdb.flush
372
+
373
+ xdb.search("jon", :synonyms => true).should be_empty
374
+
375
+ xdb.add_synonym("jon", "john")
376
+ xdb.flush
377
+
378
+ xdb.search("jon").should be_empty
379
+ xdb.search("jon", :synonyms => true).should_not be_empty
380
+ end
359
381
  end
360
382
 
361
383
  describe "add_doc" do
@@ -533,9 +555,9 @@ describe XapianDb do
533
555
  xdb = XapianDb.new(:fields => { :name => String, :title => String })
534
556
  xdb.unindexed_fields.should == []
535
557
  end
536
-
558
+
537
559
  it "should return fields defined as not indexed in the fields option" do
538
- xdb = XapianDb.new(:fields => {
560
+ xdb = XapianDb.new(:fields => {
539
561
  :name => { :type => String, :index => false },
540
562
  :title => String })
541
563
  xdb.unindexed_fields.should include :name
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require 'xapian'
3
- require 'lib/xapian_fu.rb'
3
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
4
4
  include XapianFu
5
5
  require 'fileutils'
6
6
 
@@ -9,7 +9,7 @@ describe XapianDoc do
9
9
  it "should be equal to other XapianDoc objects with the same id" do
10
10
  XapianDoc.new(:id => 666).should == XapianDoc.new(:id => 666)
11
11
  end
12
-
12
+
13
13
  it "should not be equal to other XapianDoc objects with different ids" do
14
14
  XapianDoc.new(:id => 666).should_not == XapianDoc.new(:id => 667)
15
15
  end
@@ -44,7 +44,7 @@ describe XapianDoc do
44
44
  xdoc.terms.last.should be_a_kind_of Xapian::Term
45
45
  xdoc.terms.last.term.should == "upon"
46
46
  end
47
-
47
+
48
48
  it "should tokenize the fields of a hash separately" do
49
49
  xdb = XapianDb.new
50
50
  xdoc = xdb.documents.new({ :text => "once upon a time", :title => "A story" }).to_xapian_document
@@ -73,7 +73,7 @@ describe XapianDoc do
73
73
  terms = xdoc.terms.collect { |t| t.term }
74
74
  terms.should include time.utc.strftime("%Y%m%d%H%M%S")
75
75
  end
76
-
76
+
77
77
  it "should convert DateTime instances to a useful format when tokenizing" do
78
78
  datetime = DateTime.now
79
79
  xdb = XapianDb.new
@@ -118,8 +118,8 @@ describe XapianDoc do
118
118
  it "should stem #{lang.to_s.capitalize} words when the :stemmer option is set to :#{lang}" do
119
119
  xdb = XapianDb.new
120
120
  xdoc = xdb.documents.new(word, :stemmer => lang).to_xapian_document
121
- terms = xdoc.terms.collect { |t| t.term }
122
- terms.should include 'Z'+stem
121
+ terms = xdoc.terms.collect { |t| t.term.respond_to?(:force_encoding) ? t.term.force_encoding("UTF-8") : t.term }
122
+ terms.should include 'Z' + stem
123
123
  end
124
124
  end
125
125
  end
@@ -130,7 +130,7 @@ describe XapianDoc do
130
130
  terms = xdoc.terms.collect { |t| t.term if t.term =~ /^Z/ }.compact
131
131
  terms.should be_empty
132
132
  end
133
-
133
+
134
134
  it "should not stem english stop words by default" do
135
135
  xdb = XapianDb.new
136
136
  xdoc = xdb.documents.new("And they made a cake", :stemmer => :english).to_xapian_document
@@ -139,14 +139,14 @@ describe XapianDoc do
139
139
  terms.should_not include 'Za'
140
140
  terms.should include 'Zcake'
141
141
  end
142
-
142
+
143
143
  it "should allow setting the stopper on initialisation" do
144
144
  xdb = XapianDb.new(:stopper => :english)
145
145
  xdoc = xdb.documents.new("And they made a cake", :stopper => :french)
146
146
  xdoc.stopper.call("ayantes").should == true
147
147
  xdoc.stopper.call("and").should == false
148
148
  end
149
-
149
+
150
150
  it "should not stop words when stopper is set to false" do
151
151
  xdb = XapianDb.new
152
152
  xdoc = xdb.documents.new("And they made a cake", :stopper => false).to_xapian_document
@@ -160,7 +160,7 @@ describe XapianDoc do
160
160
  terms = xdoc.terms.collect { |t| t.term }
161
161
  terms.should_not include 'Zи'
162
162
  terms.should_not include 'Zони'
163
- terms.should include 'Zcake'
163
+ terms.should include 'Zcake'
164
164
  end
165
165
  end
166
166
 
@@ -180,7 +180,7 @@ describe XapianDoc do
180
180
  xdoc = xdb.documents.new("stink and bones", :language => :english, :stemmer => :french)
181
181
  xdoc.stemmer.call("contournait").should == "contourn"
182
182
  end
183
-
183
+
184
184
  end
185
185
 
186
186
  describe "stopper" do
@@ -188,7 +188,7 @@ describe XapianDoc do
188
188
  xdb = XapianDb.new(:language => :french)
189
189
  xdoc = xdb.documents.new("stink and bones")
190
190
  xdoc.stopper.call("avec").should == true
191
- end
191
+ end
192
192
  it "should return a stopper for the document language, overriding the db" do
193
193
  xdb = XapianDb.new(:language => :english)
194
194
  xdoc = xdb.documents.new("stink and bones", :language => :french)
@@ -198,7 +198,7 @@ describe XapianDoc do
198
198
  xdb = XapianDb.new(:language => :german)
199
199
  xdoc = xdb.documents.new("stink and bones", :language => :english, :stopper => :french)
200
200
  xdoc.stopper.call("avec").should == true
201
- end
201
+ end
202
202
  end
203
203
 
204
204
  end
@@ -1,5 +1,5 @@
1
1
  require 'xapian'
2
- require 'lib/xapian_fu.rb'
2
+ require File.expand_path('../lib/xapian_fu.rb', File.dirname(__FILE__))
3
3
  include XapianFu
4
4
  require 'fileutils'
5
5
  require 'fixtures/film_data'
@@ -34,12 +34,12 @@ describe XapianDocValueAccessor do
34
34
  doc.values.fetch(:number, Fixnum).should == number
35
35
  doc.to_xapian_document.values.first.value.should == [number].pack("G")
36
36
  end
37
- end
37
+ end
38
38
 
39
39
  it "should store fields defined as Bignum as packed double-precision float, network byte order" do
40
40
  xdb = XapianDb.new(:fields => { :number => { :type => Bignum, :store => true } })
41
41
  [
42
- (-0x1fffffffffffff..-0x1fffffffffffff + 10).to_a,
42
+ (-0x1fffffffffffff..-0x1fffffffffffff + 10).to_a,
43
43
  (0x1fffffffffffff-10..0x1fffffffffffff).to_a
44
44
  ].flatten.each do |number|
45
45
  doc = xdb.documents.new(:number => number)
@@ -56,7 +56,7 @@ describe XapianDocValueAccessor do
56
56
  lambda { doc.values.store(:number, number, Bignum) }.should raise_error XapianFu::ValueOutOfBounds
57
57
  end
58
58
  end
59
-
59
+
60
60
  it "should store fields defined as Float as packed double-precision float, network byte order" do
61
61
  xdb = XapianDb.new(:fields => { :number => { :type => Float, :store => true } })
62
62
  [-0.303393984588383833, 8.448488388488384, 1.0].each do |number|
@@ -64,9 +64,9 @@ describe XapianDocValueAccessor do
64
64
  doc.values.store(:number, number).should == number
65
65
  doc.values.fetch(:number).should == number
66
66
  doc.to_xapian_document.values.first.value.should == [number].pack("G")
67
- end
67
+ end
68
68
  end
69
-
69
+
70
70
  it "should store fields defined as Time in UTC as packed double-precision float, network byte order" do
71
71
  xdb = XapianDb.new(:fields => { :created_at => { :type => Time, :store => true }})
72
72
  time = Time.now
@@ -110,22 +110,22 @@ describe XapianDocValueAccessor do
110
110
  film_data_path = File.join(File.dirname(__FILE__), "fixtures/film_data")
111
111
  Dir.foreach(film_data_path) do |db_path|
112
112
  next unless db_path =~ /.+~.+/
113
- it "should read stored values from databases created by #{db_path}" do
113
+ it "should read stored values from databases created by #{db_path}" do
114
114
  db = XapianDb.new(:dir => File.join(film_data_path, db_path),
115
- :fields => {
115
+ :fields => {
116
116
  :title => { :type => String, :store => true },
117
- :released_on => { :type => Date, :store => true },
117
+ :released_on => { :type => Date, :store => true },
118
118
  :revenue => { :type => Integer, :store => true }
119
119
  })
120
120
  FILM_DATA.size.times do |i|
121
121
  doc = db.documents[i+1]
122
122
  [:title, :released_on, :revenue].each do |field|
123
- doc.values[field].should === FILM_DATA[i][field]
123
+ doc.values[field].should === FILM_DATA[i][field]
124
124
  end
125
125
  end
126
126
  end
127
127
  end
128
-
128
+
129
129
  end
130
130
 
131
131
 
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian-fu
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
5
- prerelease: false
4
+ hash: 11
5
+ prerelease:
6
6
  segments:
7
7
  - 1
8
- - 1
9
8
  - 2
10
- version: 1.1.2
9
+ version: "1.2"
11
10
  platform: ruby
12
11
  authors:
13
12
  - John Leach
@@ -15,10 +14,24 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-08-26 00:00:00 +01:00
19
- default_executable:
20
- dependencies: []
21
-
17
+ date: 2011-07-05 00:00:00 Z
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: rspec
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - "="
26
+ - !ruby/object:Gem::Version
27
+ hash: 27
28
+ segments:
29
+ - 1
30
+ - 3
31
+ - 0
32
+ version: 1.3.0
33
+ type: :development
34
+ version_requirements: *id001
22
35
  description: A library to provide a more Ruby-like interface to the Xapian search engine.
23
36
  email: john@johnleach.co.uk
24
37
  executables: []
@@ -120,7 +133,6 @@ files:
120
133
  - spec/build_db_for_value_testing.rb
121
134
  - spec/query_parser_spec.rb
122
135
  - spec/spec.opts
123
- has_rdoc: true
124
136
  homepage: http://github.com/johnl/xapian-fu
125
137
  licenses: []
126
138
 
@@ -154,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
154
166
  requirements: []
155
167
 
156
168
  rubyforge_project: xapian-fu
157
- rubygems_version: 1.3.7
169
+ rubygems_version: 1.7.2
158
170
  signing_key:
159
171
  specification_version: 3
160
172
  summary: A Ruby interface to the Xapian search engine
@@ -218,3 +230,4 @@ test_files:
218
230
  - spec/build_db_for_value_testing.rb
219
231
  - spec/query_parser_spec.rb
220
232
  - spec/spec.opts
233
+ has_rdoc: