xapian_db 1.1.1 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +10 -0
- data/README.rdoc +1 -1
- data/lib/generators/templates/beanstalk_worker +16 -4
- data/lib/xapian_db/index_writers/beanstalk_writer.rb +2 -2
- data/lib/xapian_db/index_writers/direct_writer.rb +23 -8
- data/lib/xapian_db/index_writers/no_op_writer.rb +2 -2
- data/lib/xapian_db/index_writers/transactional_writer.rb +5 -4
- data/lib/xapian_db.rb +8 -10
- metadata +12 -12
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
##1.1.2 (September 10th, 2011)
|
2
|
+
|
3
|
+
Fixes:
|
4
|
+
|
5
|
+
- beanstalk_work flushes log messages (reinstall the worker script: rails generate xapian_db:install)
|
6
|
+
|
7
|
+
Features:
|
8
|
+
|
9
|
+
- massive performance optimization in rebuild_xapian_index
|
10
|
+
|
1
11
|
##1.1.1 (September 9th, 2011)
|
2
12
|
|
3
13
|
Fixes:
|
data/README.rdoc
CHANGED
@@ -138,7 +138,7 @@ one more class that is not stored in the database, but you want it to be indexed
|
|
138
138
|
|
139
139
|
If you use associations in your blueprints, it might be a good idea to specify a base query to speed up rebuild_xapian_index calls (avoiding 1+n queries):
|
140
140
|
|
141
|
-
XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
|
141
|
+
XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
|
142
142
|
blueprint.index :addresses
|
143
143
|
blueprint.base_query Person.includes(:addresses)
|
144
144
|
end
|
@@ -16,21 +16,33 @@ module XapianDb
|
|
16
16
|
def process_requests
|
17
17
|
|
18
18
|
Rails.logger.info "XapianDb beanstalk worker: initializing..."
|
19
|
+
Rails.logger.flush
|
19
20
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
begin
|
22
|
+
url = XapianDb::Config.beanstalk_daemon_url
|
23
|
+
beanstalk = Beanstalk::Pool.new([url])
|
24
|
+
worker = XapianDb::IndexWriters::BeanstalkWorker.new
|
25
|
+
Rails.logger.info "XapianDb beanstalk worker: ready"
|
26
|
+
Rails.logger.flush
|
27
|
+
rescue Exception => ex
|
28
|
+
Rails.logger.error "cannot connect to beanstalk daemon: (#{ex}), terminating"
|
29
|
+
Rails.logger.flush
|
30
|
+
return
|
31
|
+
end
|
24
32
|
|
25
33
|
loop do
|
26
34
|
job = beanstalk.reserve
|
27
35
|
begin
|
28
36
|
params = YAML::load job.body
|
29
37
|
Rails.logger.info "XapianDb beanstalk worker: executing task #{params}"
|
38
|
+
Rails.logger.flush
|
30
39
|
task = params.delete :task
|
31
40
|
worker.send task, params
|
41
|
+
Rails.logger.info "XapianDb beanstalk worker: done"
|
42
|
+
Rails.logger.flush
|
32
43
|
rescue Exception => ex
|
33
44
|
Rails.logger.error "XapianDb beanstalk worker: could not process #{job.body} (#{ex})"
|
45
|
+
Rails.logger.flush
|
34
46
|
end
|
35
47
|
job.delete
|
36
48
|
end
|
@@ -17,13 +17,13 @@ module XapianDb
|
|
17
17
|
|
18
18
|
# Update an object in the index
|
19
19
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
20
|
-
def index(obj)
|
20
|
+
def index(obj, commit=true)
|
21
21
|
beanstalk.put( {:task => "index_task", :class => obj.class.name, :id => obj.id }.to_yaml )
|
22
22
|
end
|
23
23
|
|
24
24
|
# Remove an object from the index
|
25
25
|
# @param [String] xapian_id The document id
|
26
|
-
def delete_doc_with(xapian_id)
|
26
|
+
def delete_doc_with(xapian_id, commit=true)
|
27
27
|
beanstalk.put( { :task => "delete_doc_task", :xapian_id => xapian_id }.to_yaml )
|
28
28
|
end
|
29
29
|
|
@@ -12,24 +12,36 @@ module XapianDb
|
|
12
12
|
|
13
13
|
class DirectWriter
|
14
14
|
|
15
|
-
BATCH_SIZE =
|
15
|
+
BATCH_SIZE = 500
|
16
|
+
|
16
17
|
class << self
|
17
18
|
|
18
19
|
# Update an object in the index
|
19
20
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
20
|
-
def index(obj)
|
21
|
+
def index(obj, commit=true)
|
21
22
|
blueprint = XapianDb::DocumentBlueprint.blueprint_for(obj.class)
|
22
23
|
indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
|
23
24
|
doc = indexer.build_document_for(obj)
|
24
25
|
XapianDb.database.store_doc(doc)
|
25
|
-
XapianDb.database.commit
|
26
|
+
XapianDb.database.commit if commit
|
26
27
|
end
|
27
28
|
|
28
29
|
# Remove an object from the index
|
29
30
|
# @param [String] xapian_id The document id of an object
|
30
|
-
def delete_doc_with(xapian_id)
|
31
|
+
def delete_doc_with(xapian_id, commit=true)
|
31
32
|
XapianDb.database.delete_doc_with_unique_term xapian_id
|
32
|
-
XapianDb.database.commit
|
33
|
+
XapianDb.database.commit if commit
|
34
|
+
end
|
35
|
+
|
36
|
+
# Update or delete a xapian document belonging to an object depending on the ignore_if logic(if present)
|
37
|
+
# @param [Object] object An instance of a class with a blueprint configuration
|
38
|
+
def reindex(object, commit=true)
|
39
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for object.class
|
40
|
+
if blueprint.should_index?(object)
|
41
|
+
index object, commit
|
42
|
+
else
|
43
|
+
delete_doc_with object.xapian_id, commit
|
44
|
+
end
|
33
45
|
end
|
34
46
|
|
35
47
|
# Reindex all objects of a given class
|
@@ -38,6 +50,9 @@ module XapianDb
|
|
38
50
|
# @option options [Boolean] :verbose (false) Should the reindexing give status informations?
|
39
51
|
def reindex_class(klass, options={})
|
40
52
|
opts = {:verbose => false}.merge(options)
|
53
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for klass
|
54
|
+
adapter = blueprint._adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
|
55
|
+
primary_key = adapter.primary_key_for(klass)
|
41
56
|
XapianDb.database.delete_docs_of_class(klass)
|
42
57
|
blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
|
43
58
|
indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
|
@@ -52,11 +67,11 @@ module XapianDb
|
|
52
67
|
|
53
68
|
# Process the objects in batches to reduce the memory footprint
|
54
69
|
nr_of_batches = (obj_count / BATCH_SIZE) + 1
|
55
|
-
order_expression = "#{klass.name.tableize}.#{
|
70
|
+
order_expression = "#{klass.name.tableize}.#{primary_key}"
|
71
|
+
# raise "vor loop"
|
56
72
|
nr_of_batches.times do |batch|
|
57
|
-
# raise "PK: #{options[:primary_key]}"
|
58
73
|
base_query.all(:offset => batch * BATCH_SIZE, :limit => BATCH_SIZE, :order => order_expression).each do |obj|
|
59
|
-
|
74
|
+
reindex obj, false
|
60
75
|
pbar.inc if show_progressbar
|
61
76
|
end
|
62
77
|
end
|
@@ -10,11 +10,11 @@ module XapianDb
|
|
10
10
|
|
11
11
|
# Update an object in the index
|
12
12
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
13
|
-
def index(obj); end
|
13
|
+
def index(obj, commit=true); end
|
14
14
|
|
15
15
|
# Remove an object from the index
|
16
16
|
# @param [String] xapian_id The document id
|
17
|
-
def delete_doc_with(xapian_id); end
|
17
|
+
def delete_doc_with(xapian_id, commit=true); end
|
18
18
|
|
19
19
|
# Reindex all objects of a given class
|
20
20
|
# @param [Class] klass The class to reindex
|
@@ -20,13 +20,13 @@ module XapianDb
|
|
20
20
|
|
21
21
|
# Update an object in the index
|
22
22
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
23
|
-
def index(obj)
|
23
|
+
def index(obj, commit=false)
|
24
24
|
@index_requests << obj
|
25
25
|
end
|
26
26
|
|
27
27
|
# Remove a document from the index
|
28
28
|
# @param [String] xapian_id The document id
|
29
|
-
def delete_doc_with(xapian_id)
|
29
|
+
def delete_doc_with(xapian_id, commit=false)
|
30
30
|
@delete_requests << xapian_id
|
31
31
|
end
|
32
32
|
|
@@ -41,8 +41,9 @@ module XapianDb
|
|
41
41
|
# Commit all pending changes to the database
|
42
42
|
# @param [DirectWriter, BeanstalkWriter] writer The writer to use
|
43
43
|
def commit_using(writer)
|
44
|
-
@index_requests.each { |obj| writer.index obj }
|
45
|
-
@delete_requests.each { |xapian_id| writer.delete_doc_with xapian_id }
|
44
|
+
@index_requests.each { |obj| writer.index obj, false }
|
45
|
+
@delete_requests.each { |xapian_id| writer.delete_doc_with xapian_id, false }
|
46
|
+
XapianDb.database.commit
|
46
47
|
end
|
47
48
|
|
48
49
|
end
|
data/lib/xapian_db.rb
CHANGED
@@ -110,26 +110,27 @@ module XapianDb
|
|
110
110
|
|
111
111
|
# Update an object in the index
|
112
112
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
113
|
-
def self.index(obj)
|
113
|
+
def self.index(obj, commit=true)
|
114
114
|
writer = @block_writer || XapianDb::Config.writer
|
115
|
-
writer.index obj
|
115
|
+
writer.index obj, commit
|
116
116
|
end
|
117
117
|
|
118
118
|
# Remove a document from the index
|
119
119
|
# @param [String] xapian_id The document id
|
120
|
-
def self.delete_doc_with(xapian_id)
|
120
|
+
def self.delete_doc_with(xapian_id, commit=true)
|
121
121
|
writer = @block_writer || XapianDb::Config.writer
|
122
|
-
writer.delete_doc_with xapian_id
|
122
|
+
writer.delete_doc_with xapian_id, commit
|
123
123
|
end
|
124
124
|
|
125
125
|
# Update or delete a xapian document belonging to an object depending on the ignore_if logic(if present)
|
126
126
|
# @param [Object] object An instance of a class with a blueprint configuration
|
127
|
-
def self.reindex(object)
|
127
|
+
def self.reindex(object, commit=true)
|
128
|
+
writer = @block_writer || XapianDb::Config.writer
|
128
129
|
blueprint = XapianDb::DocumentBlueprint.blueprint_for object.class
|
129
130
|
if blueprint.should_index?(object)
|
130
|
-
|
131
|
+
writer.index object, commit
|
131
132
|
else
|
132
|
-
|
133
|
+
writer.delete_doc_with object.xapian_id, commit
|
133
134
|
end
|
134
135
|
end
|
135
136
|
|
@@ -150,9 +151,6 @@ module XapianDb
|
|
150
151
|
return false unless configured_classes.size > 0
|
151
152
|
configured_classes.each do |klass|
|
152
153
|
if klass.respond_to?(:rebuild_xapian_index)
|
153
|
-
blueprint = XapianDb::DocumentBlueprint.blueprint_for klass
|
154
|
-
adapter = blueprint._adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
|
155
|
-
options[:primary_key] = adapter.primary_key_for(klass)
|
156
154
|
XapianDb::Config.writer.reindex_class(klass, options)
|
157
155
|
end
|
158
156
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-09-
|
12
|
+
date: 2011-09-10 00:00:00.000000000 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: daemons
|
17
|
-
requirement: &
|
17
|
+
requirement: &70182803109440 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.10
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70182803109440
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: xapian-ruby
|
28
|
-
requirement: &
|
28
|
+
requirement: &70182803108960 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.2.6
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70182803108960
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: rspec
|
39
|
-
requirement: &
|
39
|
+
requirement: &70182803108480 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 2.3.1
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70182803108480
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: simplecov
|
50
|
-
requirement: &
|
50
|
+
requirement: &70182803108020 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.3.7
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *70182803108020
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: beanstalk-client
|
61
|
-
requirement: &
|
61
|
+
requirement: &70182803107560 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ! '>='
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.1.0
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *70182803107560
|
70
70
|
description: XapianDb is a ruby gem that combines features of nosql databases and
|
71
71
|
fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
|
72
72
|
email: gernot.kogler (at) garaio (dot) com
|