xapian_db 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ##1.1.2 (September 10th, 2011)
2
+
3
+ Fixes:
4
+
5
+ - beanstalk_work flushes log messages (reinstall the worker script: rails generate xapian_db:install)
6
+
7
+ Features:
8
+
9
+ - massive performance optimization in rebuild_xapian_index
10
+
1
11
  ##1.1.1 (September 9th, 2011)
2
12
 
3
13
  Fixes:
data/README.rdoc CHANGED
@@ -138,7 +138,7 @@ one more class that is not stored in the database, but you want it to be indexed
138
138
 
139
139
  If you use associations in your blueprints, it might be a good idea to specify a base query to speed up rebuild_xapian_index calls (avoiding 1+n queries):
140
140
 
141
- XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
141
+ XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
142
142
  blueprint.index :addresses
143
143
  blueprint.base_query Person.includes(:addresses)
144
144
  end
@@ -16,21 +16,33 @@ module XapianDb
16
16
  def process_requests
17
17
 
18
18
  Rails.logger.info "XapianDb beanstalk worker: initializing..."
19
+ Rails.logger.flush
19
20
 
20
- url = XapianDb::Config.beanstalk_daemon_url
21
- beanstalk = Beanstalk::Pool.new([url])
22
- worker = XapianDb::IndexWriters::BeanstalkWorker.new
23
- Rails.logger.info "XapianDb beanstalk worker: ready"
21
+ begin
22
+ url = XapianDb::Config.beanstalk_daemon_url
23
+ beanstalk = Beanstalk::Pool.new([url])
24
+ worker = XapianDb::IndexWriters::BeanstalkWorker.new
25
+ Rails.logger.info "XapianDb beanstalk worker: ready"
26
+ Rails.logger.flush
27
+ rescue Exception => ex
28
+ Rails.logger.error "cannot connect to beanstalk daemon: (#{ex}), terminating"
29
+ Rails.logger.flush
30
+ return
31
+ end
24
32
 
25
33
  loop do
26
34
  job = beanstalk.reserve
27
35
  begin
28
36
  params = YAML::load job.body
29
37
  Rails.logger.info "XapianDb beanstalk worker: executing task #{params}"
38
+ Rails.logger.flush
30
39
  task = params.delete :task
31
40
  worker.send task, params
41
+ Rails.logger.info "XapianDb beanstalk worker: done"
42
+ Rails.logger.flush
32
43
  rescue Exception => ex
33
44
  Rails.logger.error "XapianDb beanstalk worker: could not process #{job.body} (#{ex})"
45
+ Rails.logger.flush
34
46
  end
35
47
  job.delete
36
48
  end
@@ -17,13 +17,13 @@ module XapianDb
17
17
 
18
18
  # Update an object in the index
19
19
  # @param [Object] obj An instance of a class with a blueprint configuration
20
- def index(obj)
20
+ def index(obj, commit=true)
21
21
  beanstalk.put( {:task => "index_task", :class => obj.class.name, :id => obj.id }.to_yaml )
22
22
  end
23
23
 
24
24
  # Remove an object from the index
25
25
  # @param [String] xapian_id The document id
26
- def delete_doc_with(xapian_id)
26
+ def delete_doc_with(xapian_id, commit=true)
27
27
  beanstalk.put( { :task => "delete_doc_task", :xapian_id => xapian_id }.to_yaml )
28
28
  end
29
29
 
@@ -12,24 +12,36 @@ module XapianDb
12
12
 
13
13
  class DirectWriter
14
14
 
15
- BATCH_SIZE = 200
15
+ BATCH_SIZE = 500
16
+
16
17
  class << self
17
18
 
18
19
  # Update an object in the index
19
20
  # @param [Object] obj An instance of a class with a blueprint configuration
20
- def index(obj)
21
+ def index(obj, commit=true)
21
22
  blueprint = XapianDb::DocumentBlueprint.blueprint_for(obj.class)
22
23
  indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
23
24
  doc = indexer.build_document_for(obj)
24
25
  XapianDb.database.store_doc(doc)
25
- XapianDb.database.commit
26
+ XapianDb.database.commit if commit
26
27
  end
27
28
 
28
29
  # Remove an object from the index
29
30
  # @param [String] xapian_id The document id of an object
30
- def delete_doc_with(xapian_id)
31
+ def delete_doc_with(xapian_id, commit=true)
31
32
  XapianDb.database.delete_doc_with_unique_term xapian_id
32
- XapianDb.database.commit
33
+ XapianDb.database.commit if commit
34
+ end
35
+
36
+ # Update or delete a xapian document belonging to an object depending on the ignore_if logic(if present)
37
+ # @param [Object] object An instance of a class with a blueprint configuration
38
+ def reindex(object, commit=true)
39
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for object.class
40
+ if blueprint.should_index?(object)
41
+ index object, commit
42
+ else
43
+ delete_doc_with object.xapian_id, commit
44
+ end
33
45
  end
34
46
 
35
47
  # Reindex all objects of a given class
@@ -38,6 +50,9 @@ module XapianDb
38
50
  # @option options [Boolean] :verbose (false) Should the reindexing give status informations?
39
51
  def reindex_class(klass, options={})
40
52
  opts = {:verbose => false}.merge(options)
53
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for klass
54
+ adapter = blueprint._adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
55
+ primary_key = adapter.primary_key_for(klass)
41
56
  XapianDb.database.delete_docs_of_class(klass)
42
57
  blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
43
58
  indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
@@ -52,11 +67,11 @@ module XapianDb
52
67
 
53
68
  # Process the objects in batches to reduce the memory footprint
54
69
  nr_of_batches = (obj_count / BATCH_SIZE) + 1
55
- order_expression = "#{klass.name.tableize}.#{options[:primary_key]}"
70
+ order_expression = "#{klass.name.tableize}.#{primary_key}"
71
+ # raise "vor loop"
56
72
  nr_of_batches.times do |batch|
57
- # raise "PK: #{options[:primary_key]}"
58
73
  base_query.all(:offset => batch * BATCH_SIZE, :limit => BATCH_SIZE, :order => order_expression).each do |obj|
59
- XapianDb.reindex obj
74
+ reindex obj, false
60
75
  pbar.inc if show_progressbar
61
76
  end
62
77
  end
@@ -10,11 +10,11 @@ module XapianDb
10
10
 
11
11
  # Update an object in the index
12
12
  # @param [Object] obj An instance of a class with a blueprint configuration
13
- def index(obj); end
13
+ def index(obj, commit=true); end
14
14
 
15
15
  # Remove an object from the index
16
16
  # @param [String] xapian_id The document id
17
- def delete_doc_with(xapian_id); end
17
+ def delete_doc_with(xapian_id, commit=true); end
18
18
 
19
19
  # Reindex all objects of a given class
20
20
  # @param [Class] klass The class to reindex
@@ -20,13 +20,13 @@ module XapianDb
20
20
 
21
21
  # Update an object in the index
22
22
  # @param [Object] obj An instance of a class with a blueprint configuration
23
- def index(obj)
23
+ def index(obj, commit=false)
24
24
  @index_requests << obj
25
25
  end
26
26
 
27
27
  # Remove a document from the index
28
28
  # @param [String] xapian_id The document id
29
- def delete_doc_with(xapian_id)
29
+ def delete_doc_with(xapian_id, commit=false)
30
30
  @delete_requests << xapian_id
31
31
  end
32
32
 
@@ -41,8 +41,9 @@ module XapianDb
41
41
  # Commit all pending changes to the database
42
42
  # @param [DirectWriter, BeanstalkWriter] writer The writer to use
43
43
  def commit_using(writer)
44
- @index_requests.each { |obj| writer.index obj }
45
- @delete_requests.each { |xapian_id| writer.delete_doc_with xapian_id }
44
+ @index_requests.each { |obj| writer.index obj, false }
45
+ @delete_requests.each { |xapian_id| writer.delete_doc_with xapian_id, false }
46
+ XapianDb.database.commit
46
47
  end
47
48
 
48
49
  end
data/lib/xapian_db.rb CHANGED
@@ -110,26 +110,27 @@ module XapianDb
110
110
 
111
111
  # Update an object in the index
112
112
  # @param [Object] obj An instance of a class with a blueprint configuration
113
- def self.index(obj)
113
+ def self.index(obj, commit=true)
114
114
  writer = @block_writer || XapianDb::Config.writer
115
- writer.index obj
115
+ writer.index obj, commit
116
116
  end
117
117
 
118
118
  # Remove a document from the index
119
119
  # @param [String] xapian_id The document id
120
- def self.delete_doc_with(xapian_id)
120
+ def self.delete_doc_with(xapian_id, commit=true)
121
121
  writer = @block_writer || XapianDb::Config.writer
122
- writer.delete_doc_with xapian_id
122
+ writer.delete_doc_with xapian_id, commit
123
123
  end
124
124
 
125
125
  # Update or delete a xapian document belonging to an object depending on the ignore_if logic(if present)
126
126
  # @param [Object] object An instance of a class with a blueprint configuration
127
- def self.reindex(object)
127
+ def self.reindex(object, commit=true)
128
+ writer = @block_writer || XapianDb::Config.writer
128
129
  blueprint = XapianDb::DocumentBlueprint.blueprint_for object.class
129
130
  if blueprint.should_index?(object)
130
- XapianDb.index object
131
+ writer.index object, commit
131
132
  else
132
- XapianDb.delete_doc_with object.xapian_id
133
+ writer.delete_doc_with object.xapian_id, commit
133
134
  end
134
135
  end
135
136
 
@@ -150,9 +151,6 @@ module XapianDb
150
151
  return false unless configured_classes.size > 0
151
152
  configured_classes.each do |klass|
152
153
  if klass.respond_to?(:rebuild_xapian_index)
153
- blueprint = XapianDb::DocumentBlueprint.blueprint_for klass
154
- adapter = blueprint._adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
155
- options[:primary_key] = adapter.primary_key_for(klass)
156
154
  XapianDb::Config.writer.reindex_class(klass, options)
157
155
  end
158
156
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xapian_db
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-09 00:00:00.000000000 +02:00
12
+ date: 2011-09-10 00:00:00.000000000 +02:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: daemons
17
- requirement: &70154206238860 !ruby/object:Gem::Requirement
17
+ requirement: &70182803109440 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: 1.0.10
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *70154206238860
25
+ version_requirements: *70182803109440
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: xapian-ruby
28
- requirement: &70154206238400 !ruby/object:Gem::Requirement
28
+ requirement: &70182803108960 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 1.2.6
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *70154206238400
36
+ version_requirements: *70182803108960
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: rspec
39
- requirement: &70154206237940 !ruby/object:Gem::Requirement
39
+ requirement: &70182803108480 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 2.3.1
45
45
  type: :development
46
46
  prerelease: false
47
- version_requirements: *70154206237940
47
+ version_requirements: *70182803108480
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: simplecov
50
- requirement: &70154206237480 !ruby/object:Gem::Requirement
50
+ requirement: &70182803108020 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ! '>='
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 0.3.7
56
56
  type: :development
57
57
  prerelease: false
58
- version_requirements: *70154206237480
58
+ version_requirements: *70182803108020
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: beanstalk-client
61
- requirement: &70154206237020 !ruby/object:Gem::Requirement
61
+ requirement: &70182803107560 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ! '>='
@@ -66,7 +66,7 @@ dependencies:
66
66
  version: 1.1.0
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *70154206237020
69
+ version_requirements: *70182803107560
70
70
  description: XapianDb is a ruby gem that combines features of nosql databases and
71
71
  fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
72
72
  email: gernot.kogler (at) garaio (dot) com