xapian_db 1.1 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ##1.1.1 (September 9th, 2011)
2
+
3
+ Fixes:
4
+
5
+ - fixed a bug in XapianDb.rebuild_xapian_index that did not index all records of a klass; it ist highly recommended to run Xapian.rebuild_xapian_index after installing this version
6
+
7
+ Features:
8
+
9
+ - base query option for blueprints; may speed up reindexing signifiantly if you index associations
10
+
1
11
  ##1.1 (September 7th, 2011)
2
12
 
3
13
  Fixes:
data/README.rdoc CHANGED
@@ -131,10 +131,17 @@ You can add a type information to an attribute. As of now the special types :str
131
131
  You can override the global adapter configuration in a specific blueprint. Let's say you use ActiveRecord, but you have
132
132
  one more class that is not stored in the database, but you want it to be indexed:
133
133
 
134
- XapianDb::DocumentBlueprint.setup(SpecialClass) do |blueprint|
135
- blueprint.adapter :generic
136
- blueprint.index :some_stuff
137
- end
134
+ XapianDb::DocumentBlueprint.setup(SpecialClass) do |blueprint|
135
+ blueprint.adapter :generic
136
+ blueprint.index :some_stuff
137
+ end
138
+
139
+ If you use associations in your blueprints, it might be a good idea to specify a base query to speed up rebuild_xapian_index calls (avoiding 1+n queries):
140
+
141
+ XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
142
+ blueprint.index :addresses
143
+ blueprint.base_query Person.includes(:addresses)
144
+ end
138
145
 
139
146
  place these configurations either into the corresponding class or - I prefer to have the index configurations outside
140
147
  the models - into the file config/xapian_blueprints.rb.
@@ -20,6 +20,13 @@ module XapianDb
20
20
 
21
21
  class << self
22
22
 
23
+ # return the name of the primary key column of a class
24
+ # @param [Class] klass the class
25
+ # @return [Symbol] the name of the primary key column
26
+ def primary_key_for(klass)
27
+ klass.primary_key
28
+ end
29
+
23
30
  # Implement the class helper methods
24
31
  # @param [Class] klass The class to add the helper methods to
25
32
  def add_class_helper_methods_to(klass)
@@ -19,6 +19,13 @@ module XapianDb
19
19
 
20
20
  class << self
21
21
 
22
+ # return the name of the primary key column of a class
23
+ # @param [Class] klass the class
24
+ # @return [Symbol] the name of the primary key column
25
+ def primary_key_for(klass)
26
+ klass.serial.name
27
+ end
28
+
22
29
  # Implement the class helper methods
23
30
  # @param [Class] klass The class to add the helper methods to
24
31
  def add_class_helper_methods_to(klass)
@@ -205,8 +205,8 @@ module XapianDb
205
205
  # Blueprint DSL methods
206
206
  # ---------------------------------------------------------------------------------
207
207
 
208
- # An optional custom adapter
209
208
  attr_accessor :_adapter
209
+ attr_reader :_base_query
210
210
 
211
211
  # Construct the blueprint
212
212
  def initialize
@@ -306,6 +306,16 @@ module XapianDb
306
306
  @ignore_expression = block
307
307
  end
308
308
 
309
+ # Define a base query to select one or all objects of the indexed class. The reason for a
310
+ # base query is to optimize the query avoiding th 1+n problematic. The base query should only
311
+ # include joins(...) and includes(...) calls.
312
+ # @param [expression] a base query expression
313
+ # @example Include the adresses
314
+ # blueprint.base_query Person.includes(:addresses)
315
+ def base_query(expression)
316
+ @_base_query = expression
317
+ end
318
+
309
319
  # Options for an indexed method
310
320
  class IndexOptions
311
321
 
@@ -12,6 +12,7 @@ module XapianDb
12
12
 
13
13
  class DirectWriter
14
14
 
15
+ BATCH_SIZE = 200
15
16
  class << self
16
17
 
17
18
  # Update an object in the index
@@ -37,30 +38,25 @@ module XapianDb
37
38
  # @option options [Boolean] :verbose (false) Should the reindexing give status informations?
38
39
  def reindex_class(klass, options={})
39
40
  opts = {:verbose => false}.merge(options)
40
- # First, delete all docs of this class
41
41
  XapianDb.database.delete_docs_of_class(klass)
42
- blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
43
- indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
42
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
43
+ indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
44
+ base_query = blueprint._base_query || klass
44
45
  show_progressbar = false
45
- obj_count = klass.count
46
+ obj_count = base_query.count
46
47
  if opts[:verbose]
47
- if defined?(ProgressBar)
48
- show_progressbar = true
49
- end
48
+ show_progressbar = defined?(ProgressBar)
50
49
  puts "reindexing #{obj_count} objects of #{klass}..."
51
50
  pbar = ProgressBar.new("Status", obj_count) if show_progressbar
52
51
  end
53
52
 
54
53
  # Process the objects in batches to reduce the memory footprint
55
- nr_of_batches = (obj_count / 1000) + 1
54
+ nr_of_batches = (obj_count / BATCH_SIZE) + 1
55
+ order_expression = "#{klass.name.tableize}.#{options[:primary_key]}"
56
56
  nr_of_batches.times do |batch|
57
- klass.all(:offset => batch * 1000, :limit => 1000, :order => options[:primary_key]).each do |obj|
58
- if blueprint.should_index? obj
59
- doc = indexer.build_document_for(obj)
60
- XapianDb.database.store_doc(doc)
61
- else
62
- XapianDb.database.delete_doc_with_unique_term(obj.xapian_id)
63
- end
57
+ # raise "PK: #{options[:primary_key]}"
58
+ base_query.all(:offset => batch * BATCH_SIZE, :limit => BATCH_SIZE, :order => order_expression).each do |obj|
59
+ XapianDb.reindex obj
64
60
  pbar.inc if show_progressbar
65
61
  end
66
62
  end
@@ -69,8 +65,6 @@ module XapianDb
69
65
  end
70
66
 
71
67
  end
72
-
73
68
  end
74
-
75
69
  end
76
70
  end
data/lib/xapian_db.rb CHANGED
@@ -149,7 +149,12 @@ module XapianDb
149
149
  configured_classes = XapianDb::DocumentBlueprint.configured_classes
150
150
  return false unless configured_classes.size > 0
151
151
  configured_classes.each do |klass|
152
- XapianDb::Config.writer.reindex_class(klass, options) if klass.respond_to?(:rebuild_xapian_index)
152
+ if klass.respond_to?(:rebuild_xapian_index)
153
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for klass
154
+ adapter = blueprint._adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
155
+ options[:primary_key] = adapter.primary_key_for(klass)
156
+ XapianDb::Config.writer.reindex_class(klass, options)
157
+ end
153
158
  end
154
159
  true
155
160
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian_db
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.1'
4
+ version: 1.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-07 00:00:00.000000000 +02:00
12
+ date: 2011-09-09 00:00:00.000000000 +02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: daemons
17
- requirement: &70318028242380 !ruby/object:Gem::Requirement
17
+ requirement: &70154206238860 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 1.0.10
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *70318028242380
25
+ version_requirements: *70154206238860
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: xapian-ruby
28
- requirement: &70318028241920 !ruby/object:Gem::Requirement
28
+ requirement: &70154206238400 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 1.2.6
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *70318028241920
36
+ version_requirements: *70154206238400
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: rspec
39
- requirement: &70318028241460 !ruby/object:Gem::Requirement
39
+ requirement: &70154206237940 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 2.3.1
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *70318028241460
47
+ version_requirements: *70154206237940
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: simplecov
50
- requirement: &70318028241000 !ruby/object:Gem::Requirement
50
+ requirement: &70154206237480 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ! '>='
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 0.3.7
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *70318028241000
58
+ version_requirements: *70154206237480
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: beanstalk-client
61
- requirement: &70318028240520 !ruby/object:Gem::Requirement
61
+ requirement: &70154206237020 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ! '>='
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: 1.1.0
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *70318028240520
69
+ version_requirements: *70154206237020
70
70
  description: XapianDb is a ruby gem that combines features of nosql databases and
71
71
  fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
72
72
  email: gernot.kogler (at) garaio (dot) com