xapian_db 1.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ##1.1.1 (September 9th, 2011)
2
+
3
+ Fixes:
4
+
5
+ - fixed a bug in XapianDb.rebuild_xapian_index that did not index all records of a klass; it ist highly recommended to run Xapian.rebuild_xapian_index after installing this version
6
+
7
+ Features:
8
+
9
+ - base query option for blueprints; may speed up reindexing signifiantly if you index associations
10
+
1
11
  ##1.1 (September 7th, 2011)
2
12
 
3
13
  Fixes:
data/README.rdoc CHANGED
@@ -131,10 +131,17 @@ You can add a type information to an attribute. As of now the special types :str
131
131
  You can override the global adapter configuration in a specific blueprint. Let's say you use ActiveRecord, but you have
132
132
  one more class that is not stored in the database, but you want it to be indexed:
133
133
 
134
- XapianDb::DocumentBlueprint.setup(SpecialClass) do |blueprint|
135
- blueprint.adapter :generic
136
- blueprint.index :some_stuff
137
- end
134
+ XapianDb::DocumentBlueprint.setup(SpecialClass) do |blueprint|
135
+ blueprint.adapter :generic
136
+ blueprint.index :some_stuff
137
+ end
138
+
139
+ If you use associations in your blueprints, it might be a good idea to specify a base query to speed up rebuild_xapian_index calls (avoiding 1+n queries):
140
+
141
+ XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
142
+ blueprint.index :addresses
143
+ blueprint.base_query Person.includes(:addresses)
144
+ end
138
145
 
139
146
  place these configurations either into the corresponding class or - I prefer to have the index configurations outside
140
147
  the models - into the file config/xapian_blueprints.rb.
@@ -20,6 +20,13 @@ module XapianDb
20
20
 
21
21
  class << self
22
22
 
23
+ # return the name of the primary key column of a class
24
+ # @param [Class] klass the class
25
+ # @return [Symbol] the name of the primary key column
26
+ def primary_key_for(klass)
27
+ klass.primary_key
28
+ end
29
+
23
30
  # Implement the class helper methods
24
31
  # @param [Class] klass The class to add the helper methods to
25
32
  def add_class_helper_methods_to(klass)
@@ -19,6 +19,13 @@ module XapianDb
19
19
 
20
20
  class << self
21
21
 
22
+ # return the name of the primary key column of a class
23
+ # @param [Class] klass the class
24
+ # @return [Symbol] the name of the primary key column
25
+ def primary_key_for(klass)
26
+ klass.serial.name
27
+ end
28
+
22
29
  # Implement the class helper methods
23
30
  # @param [Class] klass The class to add the helper methods to
24
31
  def add_class_helper_methods_to(klass)
@@ -205,8 +205,8 @@ module XapianDb
205
205
  # Blueprint DSL methods
206
206
  # ---------------------------------------------------------------------------------
207
207
 
208
- # An optional custom adapter
209
208
  attr_accessor :_adapter
209
+ attr_reader :_base_query
210
210
 
211
211
  # Construct the blueprint
212
212
  def initialize
@@ -306,6 +306,16 @@ module XapianDb
306
306
  @ignore_expression = block
307
307
  end
308
308
 
309
+ # Define a base query to select one or all objects of the indexed class. The reason for a
310
+ # base query is to optimize the query avoiding th 1+n problematic. The base query should only
311
+ # include joins(...) and includes(...) calls.
312
+ # @param [expression] a base query expression
313
+ # @example Include the adresses
314
+ # blueprint.base_query Person.includes(:addresses)
315
+ def base_query(expression)
316
+ @_base_query = expression
317
+ end
318
+
309
319
  # Options for an indexed method
310
320
  class IndexOptions
311
321
 
@@ -12,6 +12,7 @@ module XapianDb
12
12
 
13
13
  class DirectWriter
14
14
 
15
+ BATCH_SIZE = 200
15
16
  class << self
16
17
 
17
18
  # Update an object in the index
@@ -37,30 +38,25 @@ module XapianDb
37
38
  # @option options [Boolean] :verbose (false) Should the reindexing give status informations?
38
39
  def reindex_class(klass, options={})
39
40
  opts = {:verbose => false}.merge(options)
40
- # First, delete all docs of this class
41
41
  XapianDb.database.delete_docs_of_class(klass)
42
- blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
43
- indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
42
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
43
+ indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
44
+ base_query = blueprint._base_query || klass
44
45
  show_progressbar = false
45
- obj_count = klass.count
46
+ obj_count = base_query.count
46
47
  if opts[:verbose]
47
- if defined?(ProgressBar)
48
- show_progressbar = true
49
- end
48
+ show_progressbar = defined?(ProgressBar)
50
49
  puts "reindexing #{obj_count} objects of #{klass}..."
51
50
  pbar = ProgressBar.new("Status", obj_count) if show_progressbar
52
51
  end
53
52
 
54
53
  # Process the objects in batches to reduce the memory footprint
55
- nr_of_batches = (obj_count / 1000) + 1
54
+ nr_of_batches = (obj_count / BATCH_SIZE) + 1
55
+ order_expression = "#{klass.name.tableize}.#{options[:primary_key]}"
56
56
  nr_of_batches.times do |batch|
57
- klass.all(:offset => batch * 1000, :limit => 1000, :order => options[:primary_key]).each do |obj|
58
- if blueprint.should_index? obj
59
- doc = indexer.build_document_for(obj)
60
- XapianDb.database.store_doc(doc)
61
- else
62
- XapianDb.database.delete_doc_with_unique_term(obj.xapian_id)
63
- end
57
+ # raise "PK: #{options[:primary_key]}"
58
+ base_query.all(:offset => batch * BATCH_SIZE, :limit => BATCH_SIZE, :order => order_expression).each do |obj|
59
+ XapianDb.reindex obj
64
60
  pbar.inc if show_progressbar
65
61
  end
66
62
  end
@@ -69,8 +65,6 @@ module XapianDb
69
65
  end
70
66
 
71
67
  end
72
-
73
68
  end
74
-
75
69
  end
76
70
  end
data/lib/xapian_db.rb CHANGED
@@ -149,7 +149,12 @@ module XapianDb
149
149
  configured_classes = XapianDb::DocumentBlueprint.configured_classes
150
150
  return false unless configured_classes.size > 0
151
151
  configured_classes.each do |klass|
152
- XapianDb::Config.writer.reindex_class(klass, options) if klass.respond_to?(:rebuild_xapian_index)
152
+ if klass.respond_to?(:rebuild_xapian_index)
153
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for klass
154
+ adapter = blueprint._adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
155
+ options[:primary_key] = adapter.primary_key_for(klass)
156
+ XapianDb::Config.writer.reindex_class(klass, options)
157
+ end
153
158
  end
154
159
  true
155
160
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian_db
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.1'
4
+ version: 1.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-07 00:00:00.000000000 +02:00
12
+ date: 2011-09-09 00:00:00.000000000 +02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: daemons
17
- requirement: &70318028242380 !ruby/object:Gem::Requirement
17
+ requirement: &70154206238860 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 1.0.10
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *70318028242380
25
+ version_requirements: *70154206238860
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: xapian-ruby
28
- requirement: &70318028241920 !ruby/object:Gem::Requirement
28
+ requirement: &70154206238400 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 1.2.6
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *70318028241920
36
+ version_requirements: *70154206238400
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: rspec
39
- requirement: &70318028241460 !ruby/object:Gem::Requirement
39
+ requirement: &70154206237940 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 2.3.1
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *70318028241460
47
+ version_requirements: *70154206237940
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: simplecov
50
- requirement: &70318028241000 !ruby/object:Gem::Requirement
50
+ requirement: &70154206237480 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ! '>='
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 0.3.7
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *70318028241000
58
+ version_requirements: *70154206237480
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: beanstalk-client
61
- requirement: &70318028240520 !ruby/object:Gem::Requirement
61
+ requirement: &70154206237020 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ! '>='
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: 1.1.0
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *70318028240520
69
+ version_requirements: *70154206237020
70
70
  description: XapianDb is a ruby gem that combines features of nosql databases and
71
71
  fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
72
72
  email: gernot.kogler (at) garaio (dot) com