xapian-fu 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MzU5N2ExNTgxZjc3NzlhZjkwMjhiY2UzZDcwMzZhNzQwODk0N2M2ZQ==
5
+ data.tar.gz: !binary |-
6
+ YWE3ZDUwYmMyNTdiZjg2NzdhZGNjNTNkYzJhYjg2NzAzNTkyYjg2OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZDVmYmQ3YzA4OWUzYTZhNzEzYWU5NmMyMzU0ZDNmOTE2NDUyMDEyMDQwZTZk
10
+ YTI0MGMwMzNiN2YwMmY0NmNlYzFmOTlhNjIzNTU5ZGZhZWI0MmMwZmNjNGI1
11
+ MWYwYTQxYWZhMDQ3ZWExOWExM2Q5NGMzY2QyYjZkNmViYTJhMWQ=
12
+ data.tar.gz: !binary |-
13
+ NDE1OTM0NmQ2MThiZmVhYWU3YTYyNjEzZTIxNmE3MTRkZjRjYTI0MDhiNjg1
14
+ MGRmZWM5YmNmY2ZmZmQwZDYyNDYwOWU4NjdiMDY2NWRlMDgwOGRjZjdiMGZi
15
+ OGM4ZDgyODQ2ZWQxMzc2OTExOWY2ZjNlNWM1M2QyZDQwNTU1NjM=
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,21 @@
1
+ === 1.5.1 (17th July 2014)
2
+
3
+ * You can now customize the parsed query by passing `:query_builder` to
4
+ `XapianDb#search`.
5
+
6
+ * Add ability to pass a `PostingSource` when searching.
7
+
8
+ * Add ability to specify how many documents to sample when searching.
9
+
10
+ This allows for accurate paging links, facet counts, etc.
11
+
12
+ * Support a `:weight` option when declaring database fields.
13
+
14
+ * `XapianDb#search` can now receive a `:ranges` option to disable range
15
+ parsing.
16
+
17
+ * Facet values are now correctly marked as UTF-8.
18
+
1
19
  === 1.5.0 (23rd May 2012)
2
20
 
3
21
  * Support for custom term weights when indexing (Mike Ragalie)
@@ -0,0 +1,47 @@
1
+ # A generic <tt>Xapian::PostingSource</tt>.
2
+ class XapianFu::PostingSource < Xapian::PostingSource
3
+ def init(db)
4
+ @all = {}
5
+ @ids = db.postlist("").map(&:docid)
6
+
7
+ @ids.each do |id|
8
+ @all[id] = db.document(id)
9
+ end
10
+
11
+ @iter = @ids.each
12
+ @current = @iter.next
13
+ @db = db
14
+ end
15
+
16
+ def next(minweight)
17
+ @current = @iter.next
18
+ rescue StopIteration
19
+ @current = nil
20
+ end
21
+
22
+ def at_end
23
+ @current.nil?
24
+ end
25
+
26
+ def docid
27
+ @current
28
+ end
29
+
30
+ def get_termfreq_min
31
+ 0
32
+ end
33
+
34
+ def get_termfreq_max
35
+ @db.doccount
36
+ end
37
+
38
+ def get_termfreq_est
39
+ @db.doccount
40
+ end
41
+
42
+ def get_maxweight
43
+ 1
44
+ end
45
+ end
46
+
47
+
@@ -97,8 +97,8 @@ module XapianFu #:nodoc:
97
97
  else
98
98
  qp = Xapian::QueryParser.new
99
99
  qp.database = xapian_database if xapian_database
100
- qp.stopper = database.stopper if database
101
- qp.stemmer = database.stemmer if database
100
+ qp.stopper = database.stopper if database && database.stopper
101
+ qp.stemmer = database.stemmer if database && database.stemmer
102
102
  qp.default_op = xapian_default_op
103
103
  qp.stemming_strategy = xapian_stemming_strategy
104
104
 
@@ -127,7 +127,7 @@ module XapianFu #:nodoc:
127
127
  string,
128
128
  prefix
129
129
  ))
130
- end if database
130
+ end if database && @options.fetch(:ranges, true)
131
131
 
132
132
  @query_parser = qp
133
133
  end
@@ -25,7 +25,9 @@ module XapianFu
25
25
 
26
26
  options[:spies].each do |name, spy|
27
27
  @facets[name] = spy.values.map do |value|
28
- [@db.unserialize_value(name, value.term), value.termfreq]
28
+ term = value.term.force_encoding(Encoding::UTF_8)
29
+
30
+ [@db.unserialize_value(name, term), value.termfreq]
29
31
  end
30
32
  end if options[:spies]
31
33
 
@@ -40,7 +42,7 @@ module XapianFu
40
42
 
41
43
  # The estimated total number of pages of results this search could return
42
44
  def total_pages
43
- (total_entries / per_page.to_f).round
45
+ (total_entries / per_page.to_f).ceil
44
46
  end
45
47
 
46
48
  # The previous page number, or nil if there are no previous pages available
@@ -1,3 +1,3 @@
1
1
  module XapianFu #:nodoc:
2
- VERSION = "1.5.0"
2
+ VERSION = "1.5.1"
3
3
  end
@@ -138,6 +138,7 @@ module XapianFu #:nodoc:
138
138
  attr_reader :spelling
139
139
  attr_reader :sortable_fields
140
140
  attr_accessor :weights_function
141
+ attr :field_weights
141
142
 
142
143
  def initialize( options = { } )
143
144
  @options = { :index_positions => true, :spelling => true }.merge(options)
@@ -237,6 +238,16 @@ module XapianFu #:nodoc:
237
238
  # enabled, spelling suggestions are available using the
238
239
  # XapianFu::ResultSet <tt>corrected_query</tt> method.
239
240
  #
241
+ # The <tt>:check_at_least</tt> option controls how many documents
242
+ # will be sampled. This allows for accurate page and facet counts.
243
+ # Specifying the special value of <tt>:all</tt> will make Xapian
244
+ # sample every document in the database. Be aware that this can hurt
245
+ # your query performance.
246
+ #
247
+ # The <tt>:query_builder</tt> option allows you to pass a proc that
248
+ # will return the final query to be run. The proc receives the parsed
249
+ # query as its only argument.
250
+ #
240
251
  # The first parameter can also be <tt>:all</tt> or
241
252
  # <tt>:nothing</tt>, to match all documents or no documents
242
253
  # respectively.
@@ -254,9 +265,18 @@ module XapianFu #:nodoc:
254
265
  per_page = options[:per_page] || options[:limit] || 10
255
266
  per_page = per_page.to_i rescue 10
256
267
  offset = page * per_page
268
+
269
+ check_at_least = options.include?(:check_at_least) ? options[:check_at_least] : 0
270
+ check_at_least = self.size if check_at_least == :all
271
+
257
272
  qp = XapianFu::QueryParser.new({ :database => self }.merge(options))
258
273
  query = qp.parse_query(q.is_a?(Symbol) ? q : q.to_s)
259
274
  query = filter_query(query, options[:filter]) if options[:filter]
275
+
276
+ if options.include?(:query_builder)
277
+ query = options[:query_builder].call(query)
278
+ end
279
+
260
280
  enquiry = Xapian::Enquire.new(ro)
261
281
  setup_ordering(enquiry, options[:order], options[:reverse])
262
282
  if options[:collapse]
@@ -269,9 +289,14 @@ module XapianFu #:nodoc:
269
289
  accum
270
290
  end
271
291
  end
292
+
293
+ if options.include?(:posting_source)
294
+ query = Xapian::Query.new(Xapian::Query::OP_AND_MAYBE, query, Xapian::Query.new(options[:posting_source]))
295
+ end
296
+
272
297
  enquiry.query = query
273
298
 
274
- ResultSet.new(:mset => enquiry.mset(offset, per_page),
299
+ ResultSet.new(:mset => enquiry.mset(offset, per_page, check_at_least),
275
300
  :current_page => page + 1,
276
301
  :per_page => per_page,
277
302
  :corrected_query => qp.corrected_query,
@@ -376,6 +401,7 @@ module XapianFu #:nodoc:
376
401
  @store_values = []
377
402
  @sortable_fields = {}
378
403
  @boolean_fields = []
404
+ @field_weights = Hash.new(1)
379
405
  return nil if field_options.nil?
380
406
  default_opts = {
381
407
  :store => true,
@@ -405,6 +431,7 @@ module XapianFu #:nodoc:
405
431
  @unindexed_fields << name if opts[:index] == false
406
432
  @boolean_fields << name if opts[:boolean]
407
433
  @fields[name] = opts[:type]
434
+ @field_weights[name] = opts[:weight] if opts.include?(:weight)
408
435
  end
409
436
  @fields
410
437
  end
@@ -148,7 +148,7 @@ module XapianFu #:nodoc:
148
148
  # Compare IDs with another XapianDoc
149
149
  def ==(b)
150
150
  if b.is_a?(XapianDoc)
151
- id == b.id
151
+ id == b.id && (db == b.db || db.dir == b.db.dir)
152
152
  else
153
153
  super(b)
154
154
  end
@@ -276,7 +276,7 @@ module XapianFu #:nodoc:
276
276
  v = v.to_s
277
277
  end
278
278
  # get the custom term weight if a weights function exists
279
- weight = db.weights_function ? db.weights_function.call(k, v, fields).to_i : 1
279
+ weight = db.weights_function ? db.weights_function.call(k, v, fields).to_i : db.field_weights[k]
280
280
  # add value with field name
281
281
  tg.send(index_method, v, weight, 'X' + k.to_s.upcase)
282
282
  # add value without field name
data/spec/facets_spec.rb CHANGED
@@ -5,20 +5,23 @@ tmp_dir = '/tmp/xapian_fu_test.db'
5
5
  describe "Facets support" do
6
6
 
7
7
  before do
8
+ FileUtils.rm_rf(tmp_dir)
9
+
8
10
  @xdb = XapianFu::XapianDb.new(
9
11
  :dir => tmp_dir, :create => true, :overwrite => true,
10
12
  :fields => {
11
13
  :name => { :index => true },
12
14
  :age => { :type => Integer, :sortable => true },
13
- :height => { :type => Float, :sortable => true }
15
+ :height => { :type => Float, :sortable => true },
16
+ :city => { :store => true }
14
17
  }
15
18
  )
16
19
 
17
- @xdb << {:name => "John A", :age => 30, :height => 1.8}
18
- @xdb << {:name => "John B", :age => 35, :height => 1.8}
19
- @xdb << {:name => "John C", :age => 40, :height => 1.7}
20
- @xdb << {:name => "John D", :age => 40, :height => 1.7}
21
- @xdb << {:name => "Markus", :age => 35, :height => 1.7}
20
+ @xdb << {:name => "John A", :age => 30, :height => 1.8, city: "NY"}
21
+ @xdb << {:name => "John B", :age => 35, :height => 1.8, city: "NY"}
22
+ @xdb << {:name => "John C", :age => 40, :height => 1.7, city: "SF"}
23
+ @xdb << {:name => "John D", :age => 40, :height => 1.7, city: "NY"}
24
+ @xdb << {:name => "Markus", :age => 35, :height => 1.7, city: "LA"}
22
25
  @xdb.flush
23
26
  end
24
27
 
@@ -31,4 +34,31 @@ describe "Facets support" do
31
34
  results.facets.keys.map(&:to_s).sort == %w(age height)
32
35
  end
33
36
 
37
+ it "should allow to set the minimum amount of documents to check" do
38
+ 100.times do |i|
39
+ @xdb << {:name => "John A #{i}", :age => 30, :height => 1.8}
40
+ @xdb << {:name => "John B #{i}", :age => 35, :height => 1.8}
41
+ @xdb << {:name => "John C #{i}", :age => 40, :height => 1.7}
42
+ @xdb << {:name => "John D #{i}", :age => 40, :height => 1.7}
43
+ @xdb << {:name => "Markus #{i}", :age => 35, :height => 1.7}
44
+ end
45
+
46
+ @xdb.flush
47
+
48
+ results = @xdb.search("john", :facets => [:age, :height], :check_at_least => :all)
49
+
50
+ results.facets[:age].map(&:last).inject(0) { |t,i| t + i }.should == 404
51
+
52
+ results = @xdb.search(:all, :facets => [:age, :height], :check_at_least => :all)
53
+
54
+ results.facets[:age].map(&:last).inject(0) { |t,i| t + i }.should == 505
55
+ end
56
+
57
+ it "should return facet values in UTF-8" do
58
+ results = @xdb.search("john", {:facets => [:city]})
59
+
60
+ results.facets[:city].should == [["NY", 3], ["SF", 1]]
61
+
62
+ results.facets[:city].first.first.encoding.should == Encoding::UTF_8
63
+ end
34
64
  end
@@ -423,6 +423,8 @@ describe XapianDb do
423
423
  results.previous_page.should == 1
424
424
  results.next_page.should == nil
425
425
  results.offset.should == 16
426
+ results = xdb.search(content, :page => 1, :per_page => 14)
427
+ results.total_pages.should == 3
426
428
  end
427
429
 
428
430
  it "should do searches with and without field names" do
@@ -496,6 +498,28 @@ describe XapianDb do
496
498
  xdb.search("liverpool").should be_empty
497
499
  xdb.search("city:liverpool").map(&:id).should == [2, 3]
498
500
  end
501
+
502
+ it "allows further refining of the parsed query" do
503
+ xdb = XapianDb.new(:dir => tmp_dir, :create => true,
504
+ :fields => {
505
+ :name => { :index => true },
506
+ :age => { :boolean => true },
507
+ }
508
+ )
509
+
510
+ xdb << {:name => "John A", :age => 10}
511
+ xdb << {:name => "John B", :age => 11}
512
+
513
+ xdb.flush
514
+
515
+ xdb.search("john").size.should == 2
516
+
517
+ builder = lambda do |q|
518
+ Xapian::Query.new(Xapian::Query::OP_AND, Xapian::Query.new("XAGE10"), q)
519
+ end
520
+
521
+ xdb.search("john", :query_builder => builder).map(&:id).should == [1]
522
+ end
499
523
  end
500
524
 
501
525
  describe "filtering" do
@@ -621,6 +645,25 @@ describe XapianDb do
621
645
  docs.map { |d| d.id }.should == [1, 2]
622
646
  end
623
647
 
648
+ it "should not parse ranges when no ranges configured" do
649
+ xdb = XapianDb.new(:fields => {
650
+ :price => { :type => Integer, :sortable => true, :range_prefix => "$" },
651
+ :age => { :type => Integer, :sortable => true }
652
+ })
653
+
654
+ xdb << XapianDoc.new(:price => 10, :age => 40)
655
+ xdb << XapianDoc.new(:price => 20, :age => 35)
656
+ xdb << XapianDoc.new(:price => 45, :age => 30)
657
+
658
+ docs = xdb.search("$20..40 OR age:40..50", ranges: false)
659
+
660
+ docs.should be_empty
661
+
662
+ docs = xdb.search("20")
663
+
664
+ docs.map { |d| d.id }.should == [2]
665
+ end
666
+
624
667
  it "should store values declared as to be collapsible" do
625
668
  xdb = XapianDb.new(:collapsible => :group_id)
626
669
  xdb << XapianDoc.new(:group_id => "666", :author => "Jim Jones")
@@ -758,5 +801,59 @@ describe XapianDb do
758
801
  end
759
802
  end
760
803
 
761
- end
804
+ describe "weights per field" do
805
+ it "should honor the :weight option when declaring fields" do
806
+ xdb = XapianDb.new(
807
+ :fields => {
808
+ :title => {:weight => 20},
809
+ :abstract => {:weight => 10},
810
+ :description => {:type => String}
811
+ }
812
+ )
813
+
814
+ xdb << {:id => 1, :title => "Programming Ruby: The Pragmatic Programmer's Guide", :abstract => "The programming language", :description => "This book is a tutorial and reference for the Ruby programming language."}
815
+ xdb << {:id => 2, :title => "The Ruby Programming Language", :abstract => "A great book", :description => "The Matz book."}
816
+ xdb << {:id => 3, :title => "The Rails Way", :abstract => "A good Rails book.", :description => "You have to know the language."}
817
+
818
+ xdb.search("language").map(&:id).should == [2, 1, 3]
819
+ end
820
+ end
821
+
822
+ describe "posting sources" do
823
+ class BoostLatest < Xapian::PostingSource
824
+ attr :docid
825
+
826
+ def init(db)
827
+ @db = db
828
+ @iter = db.postlist("").map(&:docid).each
829
+ @docid = @iter.next
830
+ end
831
+
832
+ def weight
833
+ @docid * 10
834
+ end
835
+
836
+ def next(minweight)
837
+ @docid = @iter.next
838
+ rescue StopIteration
839
+ @docid = nil
840
+ end
762
841
 
842
+ def at_end
843
+ @docid.nil?
844
+ end
845
+ end
846
+
847
+ it "allows to pass a custom posting source to boost results" do
848
+ xdb = XapianDb.new
849
+
850
+ xdb << {:id => 1, :name => "Foo"}
851
+ xdb << {:id => 2, :name => "Foo Bar"}
852
+ xdb << {:id => 3, :name => "Foo Bar Baz"}
853
+
854
+ xdb.search("foo").map(&:id).should == [1, 2, 3]
855
+
856
+ xdb.search("foo", :posting_source => BoostLatest.new).map(&:id).should == [3, 2, 1]
857
+ end
858
+ end
859
+ end
@@ -6,8 +6,16 @@ require 'fileutils'
6
6
 
7
7
  describe XapianDoc do
8
8
 
9
- it "should be equal to other XapianDoc objects with the same id" do
10
- XapianDoc.new(:id => 666).should == XapianDoc.new(:id => 666)
9
+ it "should be equal to other XapianDoc objects with the same id belonging to the same database" do
10
+ xdb1 = XapianDb.new(:dir => "/tmp/foos")
11
+ xdb2 = XapianDb.new(:dir => "/tmp/foos")
12
+
13
+ XapianDoc.new({:id => 666}, :xapian_db => xdb1).should == XapianDoc.new({:id => 666}, :xapian_db => xdb1)
14
+ XapianDoc.new({:id => 666}, :xapian_db => xdb1).should == XapianDoc.new({:id => 666}, :xapian_db => xdb2)
15
+
16
+ xdb3 = XapianDb.new(:dir => "/tmp/bars")
17
+
18
+ XapianDoc.new({:id => 666}, :xapian_db => xdb1).should_not == XapianDoc.new({:id => 666}, :xapian_db => xdb3)
11
19
  end
12
20
 
13
21
  it "should not be equal to other XapianDoc objects with different ids" do
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian-fu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
5
- prerelease:
4
+ version: 1.5.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - John Leach
@@ -14,37 +13,46 @@ date: 2012-05-23 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
15
  name: rspec
17
- requirement: &12933040 !ruby/object:Gem::Requirement
18
- none: false
16
+ requirement: !ruby/object:Gem::Requirement
19
17
  requirements:
20
18
  - - ~>
21
19
  - !ruby/object:Gem::Version
22
20
  version: 2.7.0
23
21
  type: :development
24
22
  prerelease: false
25
- version_requirements: *12933040
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ version: 2.7.0
26
28
  - !ruby/object:Gem::Dependency
27
29
  name: rake
28
- requirement: &12928460 !ruby/object:Gem::Requirement
29
- none: false
30
+ requirement: !ruby/object:Gem::Requirement
30
31
  requirements:
31
32
  - - ! '>='
32
33
  - !ruby/object:Gem::Version
33
34
  version: '0'
34
35
  type: :development
35
36
  prerelease: false
36
- version_requirements: *12928460
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
37
42
  - !ruby/object:Gem::Dependency
38
43
  name: rdoc
39
- requirement: &12907260 !ruby/object:Gem::Requirement
40
- none: false
44
+ requirement: !ruby/object:Gem::Requirement
41
45
  requirements:
42
46
  - - ! '>='
43
47
  - !ruby/object:Gem::Version
44
48
  version: '0'
45
49
  type: :development
46
50
  prerelease: false
47
- version_requirements: *12907260
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
48
56
  description: A library to provide a more Ruby-like interface to the Xapian search
49
57
  engine.
50
58
  email: john@johnleach.co.uk
@@ -55,99 +63,99 @@ extra_rdoc_files:
55
63
  - LICENSE
56
64
  - CHANGELOG.rdoc
57
65
  files:
58
- - lib/xapian_fu.rb
59
- - lib/xapian_fu/xapian_doc_value_accessor.rb
66
+ - lib/xapian-fu.rb
67
+ - lib/xapian_fu/posting_source.rb
60
68
  - lib/xapian_fu/query_parser.rb
61
- - lib/xapian_fu/version.rb
62
- - lib/xapian_fu/xapian_db.rb
63
- - lib/xapian_fu/xapian_documents_accessor.rb
64
- - lib/xapian_fu/xapian_doc.rb
65
- - lib/xapian_fu/stopwords/russian.txt
66
- - lib/xapian_fu/stopwords/spanish.txt
67
- - lib/xapian_fu/stopwords/README
69
+ - lib/xapian_fu/result_set.rb
70
+ - lib/xapian_fu/stopper_factory.rb
68
71
  - lib/xapian_fu/stopwords/danish.txt
69
- - lib/xapian_fu/stopwords/french.txt
70
72
  - lib/xapian_fu/stopwords/dutch.txt
71
73
  - lib/xapian_fu/stopwords/english.txt
74
+ - lib/xapian_fu/stopwords/finnish.txt
75
+ - lib/xapian_fu/stopwords/french.txt
72
76
  - lib/xapian_fu/stopwords/german.txt
77
+ - lib/xapian_fu/stopwords/hungarian.txt
78
+ - lib/xapian_fu/stopwords/italian.txt
73
79
  - lib/xapian_fu/stopwords/norwegian.txt
80
+ - lib/xapian_fu/stopwords/portuguese.txt
81
+ - lib/xapian_fu/stopwords/README
82
+ - lib/xapian_fu/stopwords/russian.txt
83
+ - lib/xapian_fu/stopwords/spanish.txt
74
84
  - lib/xapian_fu/stopwords/swedish.txt
75
85
  - lib/xapian_fu/stopwords/update.rb
76
- - lib/xapian_fu/stopwords/italian.txt
77
- - lib/xapian_fu/stopwords/finnish.txt
78
- - lib/xapian_fu/stopwords/hungarian.txt
79
- - lib/xapian_fu/stopwords/portuguese.txt
80
- - lib/xapian_fu/result_set.rb
81
- - lib/xapian_fu/stopper_factory.rb
82
- - lib/xapian-fu.rb
83
- - examples/spider.rb
84
- - examples/ar_spider.rb
86
+ - lib/xapian_fu/version.rb
87
+ - lib/xapian_fu/xapian_db.rb
88
+ - lib/xapian_fu/xapian_doc.rb
89
+ - lib/xapian_fu/xapian_doc_value_accessor.rb
90
+ - lib/xapian_fu/xapian_documents_accessor.rb
91
+ - lib/xapian_fu.rb
85
92
  - examples/query.rb
86
- - examples/ar_query.rb
93
+ - examples/spider.rb
87
94
  - README.rdoc
88
95
  - LICENSE
89
96
  - CHANGELOG.rdoc
90
- - spec/xapian_doc_spec.rb
91
- - spec/xapian_db_spec.rb
92
- - spec/stopper_factory_spec.rb
97
+ - spec/build_db_for_value_testing.rb
93
98
  - spec/facets_spec.rb
94
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
95
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
96
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
97
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
98
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
99
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
100
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
101
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
102
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
103
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
104
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
105
- - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
106
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
107
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
108
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
109
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
110
- - spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
111
- - spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
99
+ - spec/fixtures/film_data/i486-linux~1.8.7/flintlock
100
+ - spec/fixtures/film_data/i486-linux~1.8.7/iamflint
112
101
  - spec/fixtures/film_data/i486-linux~1.8.7/position.baseA
113
- - spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
114
- - spec/fixtures/film_data/i486-linux~1.8.7/value.DB
102
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
103
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.DB
115
104
  - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseA
116
- - spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
117
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
118
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
119
105
  - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseB
120
- - spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
106
+ - spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
107
+ - spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
121
108
  - spec/fixtures/film_data/i486-linux~1.8.7/record.baseB
122
- - spec/fixtures/film_data/i486-linux~1.8.7/flintlock
123
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
124
109
  - spec/fixtures/film_data/i486-linux~1.8.7/record.DB
125
- - spec/fixtures/film_data/i486-linux~1.8.7/position.DB
126
- - spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
127
- - spec/fixtures/film_data/i486-linux~1.8.7/iamflint
128
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
110
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
111
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
112
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
113
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
114
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
115
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.DB
116
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
117
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
118
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
119
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
120
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
121
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
122
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
123
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
124
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
125
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
126
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
127
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
128
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
129
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
130
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
131
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
132
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
133
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
134
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
129
135
  - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseA
130
- - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
131
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
136
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
137
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
132
138
  - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseA
133
- - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
134
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
135
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
136
139
  - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseB
137
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
140
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
141
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
138
142
  - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseB
139
- - spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
140
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
141
143
  - spec/fixtures/film_data/x86_64-linux~1.9.1/record.DB
142
- - spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
143
- - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
144
- - spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
144
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
145
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
146
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
147
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
148
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
149
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
145
150
  - spec/fixtures/film_data.rb
146
- - spec/xapian_doc_value_accessor_spec.rb
147
- - spec/build_db_for_value_testing.rb
148
151
  - spec/query_parser_spec.rb
152
+ - spec/stopper_factory_spec.rb
153
+ - spec/xapian_db_spec.rb
154
+ - spec/xapian_doc_spec.rb
155
+ - spec/xapian_doc_value_accessor_spec.rb
149
156
  homepage: http://github.com/johnl/xapian-fu
150
157
  licenses: []
158
+ metadata: {}
151
159
  post_install_message:
152
160
  rdoc_options:
153
161
  - --title
@@ -158,81 +166,78 @@ rdoc_options:
158
166
  require_paths:
159
167
  - lib
160
168
  required_ruby_version: !ruby/object:Gem::Requirement
161
- none: false
162
169
  requirements:
163
170
  - - ! '>='
164
171
  - !ruby/object:Gem::Version
165
172
  version: '0'
166
173
  required_rubygems_version: !ruby/object:Gem::Requirement
167
- none: false
168
174
  requirements:
169
175
  - - ! '>='
170
176
  - !ruby/object:Gem::Version
171
177
  version: '0'
172
178
  requirements: []
173
179
  rubyforge_project: xapian-fu
174
- rubygems_version: 1.8.11
180
+ rubygems_version: 2.0.0
175
181
  signing_key:
176
- specification_version: 3
182
+ specification_version: 4
177
183
  summary: A Ruby interface to the Xapian search engine
178
184
  test_files:
179
- - spec/xapian_doc_spec.rb
180
- - spec/xapian_db_spec.rb
181
- - spec/stopper_factory_spec.rb
185
+ - spec/build_db_for_value_testing.rb
182
186
  - spec/facets_spec.rb
183
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
184
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
185
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
186
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
187
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
188
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
189
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
190
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
191
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
192
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
193
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
194
- - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
195
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
196
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
197
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
198
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
199
- - spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
200
- - spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
187
+ - spec/fixtures/film_data/i486-linux~1.8.7/flintlock
188
+ - spec/fixtures/film_data/i486-linux~1.8.7/iamflint
201
189
  - spec/fixtures/film_data/i486-linux~1.8.7/position.baseA
202
- - spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
203
- - spec/fixtures/film_data/i486-linux~1.8.7/value.DB
190
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
191
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.DB
204
192
  - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseA
205
- - spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
206
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
207
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
208
193
  - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseB
209
- - spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
194
+ - spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
195
+ - spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
210
196
  - spec/fixtures/film_data/i486-linux~1.8.7/record.baseB
211
- - spec/fixtures/film_data/i486-linux~1.8.7/flintlock
212
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
213
197
  - spec/fixtures/film_data/i486-linux~1.8.7/record.DB
214
- - spec/fixtures/film_data/i486-linux~1.8.7/position.DB
215
- - spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
216
- - spec/fixtures/film_data/i486-linux~1.8.7/iamflint
217
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
198
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
199
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
200
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
201
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
202
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
203
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.DB
204
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
205
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
206
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
207
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
208
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
209
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
210
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
211
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
212
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
213
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
214
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
215
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
216
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
217
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
218
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
219
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
220
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
221
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
222
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
218
223
  - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseA
219
- - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
220
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
224
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
225
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
221
226
  - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseA
222
- - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
223
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
224
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
225
227
  - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseB
226
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
228
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
229
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
227
230
  - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseB
228
- - spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
229
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
230
231
  - spec/fixtures/film_data/x86_64-linux~1.9.1/record.DB
231
- - spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
232
- - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
233
- - spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
232
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
233
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
234
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
235
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
236
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
237
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
234
238
  - spec/fixtures/film_data.rb
235
- - spec/xapian_doc_value_accessor_spec.rb
236
- - spec/build_db_for_value_testing.rb
237
239
  - spec/query_parser_spec.rb
238
- has_rdoc:
240
+ - spec/stopper_factory_spec.rb
241
+ - spec/xapian_db_spec.rb
242
+ - spec/xapian_doc_spec.rb
243
+ - spec/xapian_doc_value_accessor_spec.rb
data/examples/ar_query.rb DELETED
@@ -1,35 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'rubygems'
4
- require 'benchmark'
5
- require 'lib/xapian_fu'
6
- include XapianFu
7
- require 'active_record'
8
-
9
- ActiveRecord::Base.establish_connection(
10
- :adapter => "mysql",
11
- :host => "localhost",
12
- :username => "john",
13
- :password => "john",
14
- :database => "john_fametastic_dev" )
15
-
16
- class WpPost < ActiveRecord::Base
17
- set_primary_key :ID
18
- end
19
-
20
- #puts WpPost.new.attributes.keys.join(' ')
21
- db = XapianDb.new(:dir => 'ar_spider.db')
22
-
23
- results = nil
24
- bm = Benchmark.measure do
25
- results = db.search(ARGV.join(' '))
26
- end
27
-
28
- posts = WpPost.find(results.collect { |r| r.id })
29
-
30
- puts "Weight\tTitle"
31
- posts.each_with_index do |p,i|
32
- puts "%.3f\t#{p.post_title}" % results[i].weight
33
- end
34
-
35
- puts "Search took %.5f seconds" % bm.total
@@ -1,37 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'rubygems'
4
- require 'benchmark'
5
- require 'lib/xapian_fu'
6
- include XapianFu
7
- require 'active_record'
8
-
9
- ActiveRecord::Base.establish_connection(
10
- :adapter => "mysql",
11
- :host => "localhost",
12
- :username => "john",
13
- :password => "john",
14
- :database => "john_fametastic_dev" )
15
-
16
- class WpPost < ActiveRecord::Base
17
- set_primary_key :ID
18
- end
19
-
20
- #puts WpPost.new.attributes.keys.join(' ')
21
- db = XapianDb.new(:dir => 'ar_spider.db', :overwrite => true)
22
-
23
- count = 0
24
- indexing_time = 0.0
25
- WpPost.find_in_batches do |posts|
26
- db.transaction do
27
- puts "Indexing wp_posts #{count} to #{count += posts.size}"
28
- posts.each do |post|
29
- bm = Benchmark.measure do
30
- db << XapianDoc.new(post.attributes.merge({ :id => post.id }))
31
- end
32
- indexing_time += bm.total
33
- end
34
- end
35
- indexing_time += Benchmark.measure { db.flush }.total
36
- end
37
- puts "%i documents took %.4f seconds. %.2f per second" % [count, indexing_time, count / indexing_time]