xapian-fu 1.5.0 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MzU5N2ExNTgxZjc3NzlhZjkwMjhiY2UzZDcwMzZhNzQwODk0N2M2ZQ==
5
+ data.tar.gz: !binary |-
6
+ YWE3ZDUwYmMyNTdiZjg2NzdhZGNjNTNkYzJhYjg2NzAzNTkyYjg2OA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ ZDVmYmQ3YzA4OWUzYTZhNzEzYWU5NmMyMzU0ZDNmOTE2NDUyMDEyMDQwZTZk
10
+ YTI0MGMwMzNiN2YwMmY0NmNlYzFmOTlhNjIzNTU5ZGZhZWI0MmMwZmNjNGI1
11
+ MWYwYTQxYWZhMDQ3ZWExOWExM2Q5NGMzY2QyYjZkNmViYTJhMWQ=
12
+ data.tar.gz: !binary |-
13
+ NDE1OTM0NmQ2MThiZmVhYWU3YTYyNjEzZTIxNmE3MTRkZjRjYTI0MDhiNjg1
14
+ MGRmZWM5YmNmY2ZmZmQwZDYyNDYwOWU4NjdiMDY2NWRlMDgwOGRjZjdiMGZi
15
+ OGM4ZDgyODQ2ZWQxMzc2OTExOWY2ZjNlNWM1M2QyZDQwNTU1NjM=
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,21 @@
1
+ === 1.5.1 (17th July 2014)
2
+
3
+ * You can now customize the parsed query by passing `:query_builder` to
4
+ `XapianDb#search`.
5
+
6
+ * Add ability to pass a `PostingSource` when searching.
7
+
8
+ * Add ability to specify how many documents to sample when searching.
9
+
10
+ This allows for accurate paging links, facet counts, etc.
11
+
12
+ * Support a `:weight` option when declaring database fields.
13
+
14
+ * `XapianDb#search` can now receive a `:ranges` option to disable range
15
+ parsing.
16
+
17
+ * Facet values are now correctly marked as UTF-8.
18
+
1
19
  === 1.5.0 (23rd May 2012)
2
20
 
3
21
  * Support for custom term weights when indexing (Mike Ragalie)
@@ -0,0 +1,47 @@
1
+ # A generic <tt>Xapian::PostingSource</tt>.
2
+ class XapianFu::PostingSource < Xapian::PostingSource
3
+ def init(db)
4
+ @all = {}
5
+ @ids = db.postlist("").map(&:docid)
6
+
7
+ @ids.each do |id|
8
+ @all[id] = db.document(id)
9
+ end
10
+
11
+ @iter = @ids.each
12
+ @current = @iter.next
13
+ @db = db
14
+ end
15
+
16
+ def next(minweight)
17
+ @current = @iter.next
18
+ rescue StopIteration
19
+ @current = nil
20
+ end
21
+
22
+ def at_end
23
+ @current.nil?
24
+ end
25
+
26
+ def docid
27
+ @current
28
+ end
29
+
30
+ def get_termfreq_min
31
+ 0
32
+ end
33
+
34
+ def get_termfreq_max
35
+ @db.doccount
36
+ end
37
+
38
+ def get_termfreq_est
39
+ @db.doccount
40
+ end
41
+
42
+ def get_maxweight
43
+ 1
44
+ end
45
+ end
46
+
47
+
@@ -97,8 +97,8 @@ module XapianFu #:nodoc:
97
97
  else
98
98
  qp = Xapian::QueryParser.new
99
99
  qp.database = xapian_database if xapian_database
100
- qp.stopper = database.stopper if database
101
- qp.stemmer = database.stemmer if database
100
+ qp.stopper = database.stopper if database && database.stopper
101
+ qp.stemmer = database.stemmer if database && database.stemmer
102
102
  qp.default_op = xapian_default_op
103
103
  qp.stemming_strategy = xapian_stemming_strategy
104
104
 
@@ -127,7 +127,7 @@ module XapianFu #:nodoc:
127
127
  string,
128
128
  prefix
129
129
  ))
130
- end if database
130
+ end if database && @options.fetch(:ranges, true)
131
131
 
132
132
  @query_parser = qp
133
133
  end
@@ -25,7 +25,9 @@ module XapianFu
25
25
 
26
26
  options[:spies].each do |name, spy|
27
27
  @facets[name] = spy.values.map do |value|
28
- [@db.unserialize_value(name, value.term), value.termfreq]
28
+ term = value.term.force_encoding(Encoding::UTF_8)
29
+
30
+ [@db.unserialize_value(name, term), value.termfreq]
29
31
  end
30
32
  end if options[:spies]
31
33
 
@@ -40,7 +42,7 @@ module XapianFu
40
42
 
41
43
  # The estimated total number of pages of results this search could return
42
44
  def total_pages
43
- (total_entries / per_page.to_f).round
45
+ (total_entries / per_page.to_f).ceil
44
46
  end
45
47
 
46
48
  # The previous page number, or nil if there are no previous pages available
@@ -1,3 +1,3 @@
1
1
  module XapianFu #:nodoc:
2
- VERSION = "1.5.0"
2
+ VERSION = "1.5.1"
3
3
  end
@@ -138,6 +138,7 @@ module XapianFu #:nodoc:
138
138
  attr_reader :spelling
139
139
  attr_reader :sortable_fields
140
140
  attr_accessor :weights_function
141
+ attr :field_weights
141
142
 
142
143
  def initialize( options = { } )
143
144
  @options = { :index_positions => true, :spelling => true }.merge(options)
@@ -237,6 +238,16 @@ module XapianFu #:nodoc:
237
238
  # enabled, spelling suggestions are available using the
238
239
  # XapianFu::ResultSet <tt>corrected_query</tt> method.
239
240
  #
241
+ # The <tt>:check_at_least</tt> option controls how many documents
242
+ # will be sampled. This allows for accurate page and facet counts.
243
+ # Specifying the special value of <tt>:all</tt> will make Xapian
244
+ # sample every document in the database. Be aware that this can hurt
245
+ # your query performance.
246
+ #
247
+ # The <tt>:query_builder</tt> option allows you to pass a proc that
248
+ # will return the final query to be run. The proc receives the parsed
249
+ # query as its only argument.
250
+ #
240
251
  # The first parameter can also be <tt>:all</tt> or
241
252
  # <tt>:nothing</tt>, to match all documents or no documents
242
253
  # respectively.
@@ -254,9 +265,18 @@ module XapianFu #:nodoc:
254
265
  per_page = options[:per_page] || options[:limit] || 10
255
266
  per_page = per_page.to_i rescue 10
256
267
  offset = page * per_page
268
+
269
+ check_at_least = options.include?(:check_at_least) ? options[:check_at_least] : 0
270
+ check_at_least = self.size if check_at_least == :all
271
+
257
272
  qp = XapianFu::QueryParser.new({ :database => self }.merge(options))
258
273
  query = qp.parse_query(q.is_a?(Symbol) ? q : q.to_s)
259
274
  query = filter_query(query, options[:filter]) if options[:filter]
275
+
276
+ if options.include?(:query_builder)
277
+ query = options[:query_builder].call(query)
278
+ end
279
+
260
280
  enquiry = Xapian::Enquire.new(ro)
261
281
  setup_ordering(enquiry, options[:order], options[:reverse])
262
282
  if options[:collapse]
@@ -269,9 +289,14 @@ module XapianFu #:nodoc:
269
289
  accum
270
290
  end
271
291
  end
292
+
293
+ if options.include?(:posting_source)
294
+ query = Xapian::Query.new(Xapian::Query::OP_AND_MAYBE, query, Xapian::Query.new(options[:posting_source]))
295
+ end
296
+
272
297
  enquiry.query = query
273
298
 
274
- ResultSet.new(:mset => enquiry.mset(offset, per_page),
299
+ ResultSet.new(:mset => enquiry.mset(offset, per_page, check_at_least),
275
300
  :current_page => page + 1,
276
301
  :per_page => per_page,
277
302
  :corrected_query => qp.corrected_query,
@@ -376,6 +401,7 @@ module XapianFu #:nodoc:
376
401
  @store_values = []
377
402
  @sortable_fields = {}
378
403
  @boolean_fields = []
404
+ @field_weights = Hash.new(1)
379
405
  return nil if field_options.nil?
380
406
  default_opts = {
381
407
  :store => true,
@@ -405,6 +431,7 @@ module XapianFu #:nodoc:
405
431
  @unindexed_fields << name if opts[:index] == false
406
432
  @boolean_fields << name if opts[:boolean]
407
433
  @fields[name] = opts[:type]
434
+ @field_weights[name] = opts[:weight] if opts.include?(:weight)
408
435
  end
409
436
  @fields
410
437
  end
@@ -148,7 +148,7 @@ module XapianFu #:nodoc:
148
148
  # Compare IDs with another XapianDoc
149
149
  def ==(b)
150
150
  if b.is_a?(XapianDoc)
151
- id == b.id
151
+ id == b.id && (db == b.db || db.dir == b.db.dir)
152
152
  else
153
153
  super(b)
154
154
  end
@@ -276,7 +276,7 @@ module XapianFu #:nodoc:
276
276
  v = v.to_s
277
277
  end
278
278
  # get the custom term weight if a weights function exists
279
- weight = db.weights_function ? db.weights_function.call(k, v, fields).to_i : 1
279
+ weight = db.weights_function ? db.weights_function.call(k, v, fields).to_i : db.field_weights[k]
280
280
  # add value with field name
281
281
  tg.send(index_method, v, weight, 'X' + k.to_s.upcase)
282
282
  # add value without field name
data/spec/facets_spec.rb CHANGED
@@ -5,20 +5,23 @@ tmp_dir = '/tmp/xapian_fu_test.db'
5
5
  describe "Facets support" do
6
6
 
7
7
  before do
8
+ FileUtils.rm_rf(tmp_dir)
9
+
8
10
  @xdb = XapianFu::XapianDb.new(
9
11
  :dir => tmp_dir, :create => true, :overwrite => true,
10
12
  :fields => {
11
13
  :name => { :index => true },
12
14
  :age => { :type => Integer, :sortable => true },
13
- :height => { :type => Float, :sortable => true }
15
+ :height => { :type => Float, :sortable => true },
16
+ :city => { :store => true }
14
17
  }
15
18
  )
16
19
 
17
- @xdb << {:name => "John A", :age => 30, :height => 1.8}
18
- @xdb << {:name => "John B", :age => 35, :height => 1.8}
19
- @xdb << {:name => "John C", :age => 40, :height => 1.7}
20
- @xdb << {:name => "John D", :age => 40, :height => 1.7}
21
- @xdb << {:name => "Markus", :age => 35, :height => 1.7}
20
+ @xdb << {:name => "John A", :age => 30, :height => 1.8, city: "NY"}
21
+ @xdb << {:name => "John B", :age => 35, :height => 1.8, city: "NY"}
22
+ @xdb << {:name => "John C", :age => 40, :height => 1.7, city: "SF"}
23
+ @xdb << {:name => "John D", :age => 40, :height => 1.7, city: "NY"}
24
+ @xdb << {:name => "Markus", :age => 35, :height => 1.7, city: "LA"}
22
25
  @xdb.flush
23
26
  end
24
27
 
@@ -31,4 +34,31 @@ describe "Facets support" do
31
34
  results.facets.keys.map(&:to_s).sort == %w(age height)
32
35
  end
33
36
 
37
+ it "should allow to set the minimum amount of documents to check" do
38
+ 100.times do |i|
39
+ @xdb << {:name => "John A #{i}", :age => 30, :height => 1.8}
40
+ @xdb << {:name => "John B #{i}", :age => 35, :height => 1.8}
41
+ @xdb << {:name => "John C #{i}", :age => 40, :height => 1.7}
42
+ @xdb << {:name => "John D #{i}", :age => 40, :height => 1.7}
43
+ @xdb << {:name => "Markus #{i}", :age => 35, :height => 1.7}
44
+ end
45
+
46
+ @xdb.flush
47
+
48
+ results = @xdb.search("john", :facets => [:age, :height], :check_at_least => :all)
49
+
50
+ results.facets[:age].map(&:last).inject(0) { |t,i| t + i }.should == 404
51
+
52
+ results = @xdb.search(:all, :facets => [:age, :height], :check_at_least => :all)
53
+
54
+ results.facets[:age].map(&:last).inject(0) { |t,i| t + i }.should == 505
55
+ end
56
+
57
+ it "should return facet values in UTF-8" do
58
+ results = @xdb.search("john", {:facets => [:city]})
59
+
60
+ results.facets[:city].should == [["NY", 3], ["SF", 1]]
61
+
62
+ results.facets[:city].first.first.encoding.should == Encoding::UTF_8
63
+ end
34
64
  end
@@ -423,6 +423,8 @@ describe XapianDb do
423
423
  results.previous_page.should == 1
424
424
  results.next_page.should == nil
425
425
  results.offset.should == 16
426
+ results = xdb.search(content, :page => 1, :per_page => 14)
427
+ results.total_pages.should == 3
426
428
  end
427
429
 
428
430
  it "should do searches with and without field names" do
@@ -496,6 +498,28 @@ describe XapianDb do
496
498
  xdb.search("liverpool").should be_empty
497
499
  xdb.search("city:liverpool").map(&:id).should == [2, 3]
498
500
  end
501
+
502
+ it "allows further refining of the parsed query" do
503
+ xdb = XapianDb.new(:dir => tmp_dir, :create => true,
504
+ :fields => {
505
+ :name => { :index => true },
506
+ :age => { :boolean => true },
507
+ }
508
+ )
509
+
510
+ xdb << {:name => "John A", :age => 10}
511
+ xdb << {:name => "John B", :age => 11}
512
+
513
+ xdb.flush
514
+
515
+ xdb.search("john").size.should == 2
516
+
517
+ builder = lambda do |q|
518
+ Xapian::Query.new(Xapian::Query::OP_AND, Xapian::Query.new("XAGE10"), q)
519
+ end
520
+
521
+ xdb.search("john", :query_builder => builder).map(&:id).should == [1]
522
+ end
499
523
  end
500
524
 
501
525
  describe "filtering" do
@@ -621,6 +645,25 @@ describe XapianDb do
621
645
  docs.map { |d| d.id }.should == [1, 2]
622
646
  end
623
647
 
648
+ it "should not parse ranges when no ranges configured" do
649
+ xdb = XapianDb.new(:fields => {
650
+ :price => { :type => Integer, :sortable => true, :range_prefix => "$" },
651
+ :age => { :type => Integer, :sortable => true }
652
+ })
653
+
654
+ xdb << XapianDoc.new(:price => 10, :age => 40)
655
+ xdb << XapianDoc.new(:price => 20, :age => 35)
656
+ xdb << XapianDoc.new(:price => 45, :age => 30)
657
+
658
+ docs = xdb.search("$20..40 OR age:40..50", ranges: false)
659
+
660
+ docs.should be_empty
661
+
662
+ docs = xdb.search("20")
663
+
664
+ docs.map { |d| d.id }.should == [2]
665
+ end
666
+
624
667
  it "should store values declared as to be collapsible" do
625
668
  xdb = XapianDb.new(:collapsible => :group_id)
626
669
  xdb << XapianDoc.new(:group_id => "666", :author => "Jim Jones")
@@ -758,5 +801,59 @@ describe XapianDb do
758
801
  end
759
802
  end
760
803
 
761
- end
804
+ describe "weights per field" do
805
+ it "should honor the :weight option when declaring fields" do
806
+ xdb = XapianDb.new(
807
+ :fields => {
808
+ :title => {:weight => 20},
809
+ :abstract => {:weight => 10},
810
+ :description => {:type => String}
811
+ }
812
+ )
813
+
814
+ xdb << {:id => 1, :title => "Programming Ruby: The Pragmatic Programmer's Guide", :abstract => "The programming language", :description => "This book is a tutorial and reference for the Ruby programming language."}
815
+ xdb << {:id => 2, :title => "The Ruby Programming Language", :abstract => "A great book", :description => "The Matz book."}
816
+ xdb << {:id => 3, :title => "The Rails Way", :abstract => "A good Rails book.", :description => "You have to know the language."}
817
+
818
+ xdb.search("language").map(&:id).should == [2, 1, 3]
819
+ end
820
+ end
821
+
822
+ describe "posting sources" do
823
+ class BoostLatest < Xapian::PostingSource
824
+ attr :docid
825
+
826
+ def init(db)
827
+ @db = db
828
+ @iter = db.postlist("").map(&:docid).each
829
+ @docid = @iter.next
830
+ end
831
+
832
+ def weight
833
+ @docid * 10
834
+ end
835
+
836
+ def next(minweight)
837
+ @docid = @iter.next
838
+ rescue StopIteration
839
+ @docid = nil
840
+ end
762
841
 
842
+ def at_end
843
+ @docid.nil?
844
+ end
845
+ end
846
+
847
+ it "allows to pass a custom posting source to boost results" do
848
+ xdb = XapianDb.new
849
+
850
+ xdb << {:id => 1, :name => "Foo"}
851
+ xdb << {:id => 2, :name => "Foo Bar"}
852
+ xdb << {:id => 3, :name => "Foo Bar Baz"}
853
+
854
+ xdb.search("foo").map(&:id).should == [1, 2, 3]
855
+
856
+ xdb.search("foo", :posting_source => BoostLatest.new).map(&:id).should == [3, 2, 1]
857
+ end
858
+ end
859
+ end
@@ -6,8 +6,16 @@ require 'fileutils'
6
6
 
7
7
  describe XapianDoc do
8
8
 
9
- it "should be equal to other XapianDoc objects with the same id" do
10
- XapianDoc.new(:id => 666).should == XapianDoc.new(:id => 666)
9
+ it "should be equal to other XapianDoc objects with the same id belonging to the same database" do
10
+ xdb1 = XapianDb.new(:dir => "/tmp/foos")
11
+ xdb2 = XapianDb.new(:dir => "/tmp/foos")
12
+
13
+ XapianDoc.new({:id => 666}, :xapian_db => xdb1).should == XapianDoc.new({:id => 666}, :xapian_db => xdb1)
14
+ XapianDoc.new({:id => 666}, :xapian_db => xdb1).should == XapianDoc.new({:id => 666}, :xapian_db => xdb2)
15
+
16
+ xdb3 = XapianDb.new(:dir => "/tmp/bars")
17
+
18
+ XapianDoc.new({:id => 666}, :xapian_db => xdb1).should_not == XapianDoc.new({:id => 666}, :xapian_db => xdb3)
11
19
  end
12
20
 
13
21
  it "should not be equal to other XapianDoc objects with different ids" do
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian-fu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
5
- prerelease:
4
+ version: 1.5.1
6
5
  platform: ruby
7
6
  authors:
8
7
  - John Leach
@@ -14,37 +13,46 @@ date: 2012-05-23 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
15
  name: rspec
17
- requirement: &12933040 !ruby/object:Gem::Requirement
18
- none: false
16
+ requirement: !ruby/object:Gem::Requirement
19
17
  requirements:
20
18
  - - ~>
21
19
  - !ruby/object:Gem::Version
22
20
  version: 2.7.0
23
21
  type: :development
24
22
  prerelease: false
25
- version_requirements: *12933040
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ version: 2.7.0
26
28
  - !ruby/object:Gem::Dependency
27
29
  name: rake
28
- requirement: &12928460 !ruby/object:Gem::Requirement
29
- none: false
30
+ requirement: !ruby/object:Gem::Requirement
30
31
  requirements:
31
32
  - - ! '>='
32
33
  - !ruby/object:Gem::Version
33
34
  version: '0'
34
35
  type: :development
35
36
  prerelease: false
36
- version_requirements: *12928460
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
37
42
  - !ruby/object:Gem::Dependency
38
43
  name: rdoc
39
- requirement: &12907260 !ruby/object:Gem::Requirement
40
- none: false
44
+ requirement: !ruby/object:Gem::Requirement
41
45
  requirements:
42
46
  - - ! '>='
43
47
  - !ruby/object:Gem::Version
44
48
  version: '0'
45
49
  type: :development
46
50
  prerelease: false
47
- version_requirements: *12907260
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ! '>='
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
48
56
  description: A library to provide a more Ruby-like interface to the Xapian search
49
57
  engine.
50
58
  email: john@johnleach.co.uk
@@ -55,99 +63,99 @@ extra_rdoc_files:
55
63
  - LICENSE
56
64
  - CHANGELOG.rdoc
57
65
  files:
58
- - lib/xapian_fu.rb
59
- - lib/xapian_fu/xapian_doc_value_accessor.rb
66
+ - lib/xapian-fu.rb
67
+ - lib/xapian_fu/posting_source.rb
60
68
  - lib/xapian_fu/query_parser.rb
61
- - lib/xapian_fu/version.rb
62
- - lib/xapian_fu/xapian_db.rb
63
- - lib/xapian_fu/xapian_documents_accessor.rb
64
- - lib/xapian_fu/xapian_doc.rb
65
- - lib/xapian_fu/stopwords/russian.txt
66
- - lib/xapian_fu/stopwords/spanish.txt
67
- - lib/xapian_fu/stopwords/README
69
+ - lib/xapian_fu/result_set.rb
70
+ - lib/xapian_fu/stopper_factory.rb
68
71
  - lib/xapian_fu/stopwords/danish.txt
69
- - lib/xapian_fu/stopwords/french.txt
70
72
  - lib/xapian_fu/stopwords/dutch.txt
71
73
  - lib/xapian_fu/stopwords/english.txt
74
+ - lib/xapian_fu/stopwords/finnish.txt
75
+ - lib/xapian_fu/stopwords/french.txt
72
76
  - lib/xapian_fu/stopwords/german.txt
77
+ - lib/xapian_fu/stopwords/hungarian.txt
78
+ - lib/xapian_fu/stopwords/italian.txt
73
79
  - lib/xapian_fu/stopwords/norwegian.txt
80
+ - lib/xapian_fu/stopwords/portuguese.txt
81
+ - lib/xapian_fu/stopwords/README
82
+ - lib/xapian_fu/stopwords/russian.txt
83
+ - lib/xapian_fu/stopwords/spanish.txt
74
84
  - lib/xapian_fu/stopwords/swedish.txt
75
85
  - lib/xapian_fu/stopwords/update.rb
76
- - lib/xapian_fu/stopwords/italian.txt
77
- - lib/xapian_fu/stopwords/finnish.txt
78
- - lib/xapian_fu/stopwords/hungarian.txt
79
- - lib/xapian_fu/stopwords/portuguese.txt
80
- - lib/xapian_fu/result_set.rb
81
- - lib/xapian_fu/stopper_factory.rb
82
- - lib/xapian-fu.rb
83
- - examples/spider.rb
84
- - examples/ar_spider.rb
86
+ - lib/xapian_fu/version.rb
87
+ - lib/xapian_fu/xapian_db.rb
88
+ - lib/xapian_fu/xapian_doc.rb
89
+ - lib/xapian_fu/xapian_doc_value_accessor.rb
90
+ - lib/xapian_fu/xapian_documents_accessor.rb
91
+ - lib/xapian_fu.rb
85
92
  - examples/query.rb
86
- - examples/ar_query.rb
93
+ - examples/spider.rb
87
94
  - README.rdoc
88
95
  - LICENSE
89
96
  - CHANGELOG.rdoc
90
- - spec/xapian_doc_spec.rb
91
- - spec/xapian_db_spec.rb
92
- - spec/stopper_factory_spec.rb
97
+ - spec/build_db_for_value_testing.rb
93
98
  - spec/facets_spec.rb
94
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
95
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
96
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
97
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
98
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
99
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
100
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
101
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
102
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
103
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
104
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
105
- - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
106
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
107
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
108
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
109
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
110
- - spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
111
- - spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
99
+ - spec/fixtures/film_data/i486-linux~1.8.7/flintlock
100
+ - spec/fixtures/film_data/i486-linux~1.8.7/iamflint
112
101
  - spec/fixtures/film_data/i486-linux~1.8.7/position.baseA
113
- - spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
114
- - spec/fixtures/film_data/i486-linux~1.8.7/value.DB
102
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
103
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.DB
115
104
  - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseA
116
- - spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
117
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
118
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
119
105
  - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseB
120
- - spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
106
+ - spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
107
+ - spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
121
108
  - spec/fixtures/film_data/i486-linux~1.8.7/record.baseB
122
- - spec/fixtures/film_data/i486-linux~1.8.7/flintlock
123
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
124
109
  - spec/fixtures/film_data/i486-linux~1.8.7/record.DB
125
- - spec/fixtures/film_data/i486-linux~1.8.7/position.DB
126
- - spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
127
- - spec/fixtures/film_data/i486-linux~1.8.7/iamflint
128
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
110
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
111
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
112
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
113
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
114
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
115
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.DB
116
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
117
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
118
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
119
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
120
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
121
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
122
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
123
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
124
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
125
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
126
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
127
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
128
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
129
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
130
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
131
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
132
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
133
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
134
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
129
135
  - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseA
130
- - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
131
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
136
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
137
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
132
138
  - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseA
133
- - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
134
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
135
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
136
139
  - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseB
137
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
140
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
141
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
138
142
  - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseB
139
- - spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
140
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
141
143
  - spec/fixtures/film_data/x86_64-linux~1.9.1/record.DB
142
- - spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
143
- - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
144
- - spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
144
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
145
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
146
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
147
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
148
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
149
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
145
150
  - spec/fixtures/film_data.rb
146
- - spec/xapian_doc_value_accessor_spec.rb
147
- - spec/build_db_for_value_testing.rb
148
151
  - spec/query_parser_spec.rb
152
+ - spec/stopper_factory_spec.rb
153
+ - spec/xapian_db_spec.rb
154
+ - spec/xapian_doc_spec.rb
155
+ - spec/xapian_doc_value_accessor_spec.rb
149
156
  homepage: http://github.com/johnl/xapian-fu
150
157
  licenses: []
158
+ metadata: {}
151
159
  post_install_message:
152
160
  rdoc_options:
153
161
  - --title
@@ -158,81 +166,78 @@ rdoc_options:
158
166
  require_paths:
159
167
  - lib
160
168
  required_ruby_version: !ruby/object:Gem::Requirement
161
- none: false
162
169
  requirements:
163
170
  - - ! '>='
164
171
  - !ruby/object:Gem::Version
165
172
  version: '0'
166
173
  required_rubygems_version: !ruby/object:Gem::Requirement
167
- none: false
168
174
  requirements:
169
175
  - - ! '>='
170
176
  - !ruby/object:Gem::Version
171
177
  version: '0'
172
178
  requirements: []
173
179
  rubyforge_project: xapian-fu
174
- rubygems_version: 1.8.11
180
+ rubygems_version: 2.0.0
175
181
  signing_key:
176
- specification_version: 3
182
+ specification_version: 4
177
183
  summary: A Ruby interface to the Xapian search engine
178
184
  test_files:
179
- - spec/xapian_doc_spec.rb
180
- - spec/xapian_db_spec.rb
181
- - spec/stopper_factory_spec.rb
185
+ - spec/build_db_for_value_testing.rb
182
186
  - spec/facets_spec.rb
183
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
184
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
185
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
186
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
187
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
188
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
189
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
190
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
191
- - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
192
- - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
193
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
194
- - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
195
- - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
196
- - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
197
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
198
- - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
199
- - spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
200
- - spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
187
+ - spec/fixtures/film_data/i486-linux~1.8.7/flintlock
188
+ - spec/fixtures/film_data/i486-linux~1.8.7/iamflint
201
189
  - spec/fixtures/film_data/i486-linux~1.8.7/position.baseA
202
- - spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
203
- - spec/fixtures/film_data/i486-linux~1.8.7/value.DB
190
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
191
+ - spec/fixtures/film_data/i486-linux~1.8.7/position.DB
204
192
  - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseA
205
- - spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
206
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
207
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
208
193
  - spec/fixtures/film_data/i486-linux~1.8.7/postlist.baseB
209
- - spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
194
+ - spec/fixtures/film_data/i486-linux~1.8.7/postlist.DB
195
+ - spec/fixtures/film_data/i486-linux~1.8.7/record.baseA
210
196
  - spec/fixtures/film_data/i486-linux~1.8.7/record.baseB
211
- - spec/fixtures/film_data/i486-linux~1.8.7/flintlock
212
- - spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
213
197
  - spec/fixtures/film_data/i486-linux~1.8.7/record.DB
214
- - spec/fixtures/film_data/i486-linux~1.8.7/position.DB
215
- - spec/fixtures/film_data/i486-linux~1.8.7/position.baseB
216
- - spec/fixtures/film_data/i486-linux~1.8.7/iamflint
217
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
198
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseA
199
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.baseB
200
+ - spec/fixtures/film_data/i486-linux~1.8.7/termlist.DB
201
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseA
202
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.baseB
203
+ - spec/fixtures/film_data/i486-linux~1.8.7/value.DB
204
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/flintlock
205
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/iamflint
206
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseA
207
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.baseB
208
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/position.DB
209
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseA
210
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.baseB
211
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/postlist.DB
212
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseA
213
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.baseB
214
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/record.DB
215
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseA
216
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.baseB
217
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/termlist.DB
218
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseA
219
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.baseB
220
+ - spec/fixtures/film_data/x86_64-linux~1.8.7/value.DB
221
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
222
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
218
223
  - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseA
219
- - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
220
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
224
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
225
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
221
226
  - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseA
222
- - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
223
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
224
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
225
227
  - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.baseB
226
- - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
228
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/postlist.DB
229
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseA
227
230
  - spec/fixtures/film_data/x86_64-linux~1.9.1/record.baseB
228
- - spec/fixtures/film_data/x86_64-linux~1.9.1/flintlock
229
- - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
230
231
  - spec/fixtures/film_data/x86_64-linux~1.9.1/record.DB
231
- - spec/fixtures/film_data/x86_64-linux~1.9.1/position.DB
232
- - spec/fixtures/film_data/x86_64-linux~1.9.1/position.baseB
233
- - spec/fixtures/film_data/x86_64-linux~1.9.1/iamflint
232
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseA
233
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.baseB
234
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/termlist.DB
235
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseA
236
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.baseB
237
+ - spec/fixtures/film_data/x86_64-linux~1.9.1/value.DB
234
238
  - spec/fixtures/film_data.rb
235
- - spec/xapian_doc_value_accessor_spec.rb
236
- - spec/build_db_for_value_testing.rb
237
239
  - spec/query_parser_spec.rb
238
- has_rdoc:
240
+ - spec/stopper_factory_spec.rb
241
+ - spec/xapian_db_spec.rb
242
+ - spec/xapian_doc_spec.rb
243
+ - spec/xapian_doc_value_accessor_spec.rb
data/examples/ar_query.rb DELETED
@@ -1,35 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'rubygems'
4
- require 'benchmark'
5
- require 'lib/xapian_fu'
6
- include XapianFu
7
- require 'active_record'
8
-
9
- ActiveRecord::Base.establish_connection(
10
- :adapter => "mysql",
11
- :host => "localhost",
12
- :username => "john",
13
- :password => "john",
14
- :database => "john_fametastic_dev" )
15
-
16
- class WpPost < ActiveRecord::Base
17
- set_primary_key :ID
18
- end
19
-
20
- #puts WpPost.new.attributes.keys.join(' ')
21
- db = XapianDb.new(:dir => 'ar_spider.db')
22
-
23
- results = nil
24
- bm = Benchmark.measure do
25
- results = db.search(ARGV.join(' '))
26
- end
27
-
28
- posts = WpPost.find(results.collect { |r| r.id })
29
-
30
- puts "Weight\tTitle"
31
- posts.each_with_index do |p,i|
32
- puts "%.3f\t#{p.post_title}" % results[i].weight
33
- end
34
-
35
- puts "Search took %.5f seconds" % bm.total
@@ -1,37 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- require 'rubygems'
4
- require 'benchmark'
5
- require 'lib/xapian_fu'
6
- include XapianFu
7
- require 'active_record'
8
-
9
- ActiveRecord::Base.establish_connection(
10
- :adapter => "mysql",
11
- :host => "localhost",
12
- :username => "john",
13
- :password => "john",
14
- :database => "john_fametastic_dev" )
15
-
16
- class WpPost < ActiveRecord::Base
17
- set_primary_key :ID
18
- end
19
-
20
- #puts WpPost.new.attributes.keys.join(' ')
21
- db = XapianDb.new(:dir => 'ar_spider.db', :overwrite => true)
22
-
23
- count = 0
24
- indexing_time = 0.0
25
- WpPost.find_in_batches do |posts|
26
- db.transaction do
27
- puts "Indexing wp_posts #{count} to #{count += posts.size}"
28
- posts.each do |post|
29
- bm = Benchmark.measure do
30
- db << XapianDoc.new(post.attributes.merge({ :id => post.id }))
31
- end
32
- indexing_time += bm.total
33
- end
34
- end
35
- indexing_time += Benchmark.measure { db.flush }.total
36
- end
37
- puts "%i documents took %.4f seconds. %.2f per second" % [count, indexing_time, count / indexing_time]