xapian_db 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +10 -0
- data/README.rdoc +1 -1
- data/lib/generators/templates/beanstalk_worker +16 -4
- data/lib/xapian_db/index_writers/beanstalk_writer.rb +2 -2
- data/lib/xapian_db/index_writers/direct_writer.rb +23 -8
- data/lib/xapian_db/index_writers/no_op_writer.rb +2 -2
- data/lib/xapian_db/index_writers/transactional_writer.rb +5 -4
- data/lib/xapian_db.rb +8 -10
- metadata +12 -12
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
##1.1.2 (September 10th, 2011)
|
2
|
+
|
3
|
+
Fixes:
|
4
|
+
|
5
|
+
- beanstalk_work flushes log messages (reinstall the worker script: rails generate xapian_db:install)
|
6
|
+
|
7
|
+
Features:
|
8
|
+
|
9
|
+
- massive performance optimization in rebuild_xapian_index
|
10
|
+
|
1
11
|
##1.1.1 (September 9th, 2011)
|
2
12
|
|
3
13
|
Fixes:
|
data/README.rdoc
CHANGED
@@ -138,7 +138,7 @@ one more class that is not stored in the database, but you want it to be indexed
|
|
138
138
|
|
139
139
|
If you use associations in your blueprints, it might be a good idea to specify a base query to speed up rebuild_xapian_index calls (avoiding 1+n queries):
|
140
140
|
|
141
|
-
XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
|
141
|
+
XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
|
142
142
|
blueprint.index :addresses
|
143
143
|
blueprint.base_query Person.includes(:addresses)
|
144
144
|
end
|
@@ -16,21 +16,33 @@ module XapianDb
|
|
16
16
|
def process_requests
|
17
17
|
|
18
18
|
Rails.logger.info "XapianDb beanstalk worker: initializing..."
|
19
|
+
Rails.logger.flush
|
19
20
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
begin
|
22
|
+
url = XapianDb::Config.beanstalk_daemon_url
|
23
|
+
beanstalk = Beanstalk::Pool.new([url])
|
24
|
+
worker = XapianDb::IndexWriters::BeanstalkWorker.new
|
25
|
+
Rails.logger.info "XapianDb beanstalk worker: ready"
|
26
|
+
Rails.logger.flush
|
27
|
+
rescue Exception => ex
|
28
|
+
Rails.logger.error "cannot connect to beanstalk daemon: (#{ex}), terminating"
|
29
|
+
Rails.logger.flush
|
30
|
+
return
|
31
|
+
end
|
24
32
|
|
25
33
|
loop do
|
26
34
|
job = beanstalk.reserve
|
27
35
|
begin
|
28
36
|
params = YAML::load job.body
|
29
37
|
Rails.logger.info "XapianDb beanstalk worker: executing task #{params}"
|
38
|
+
Rails.logger.flush
|
30
39
|
task = params.delete :task
|
31
40
|
worker.send task, params
|
41
|
+
Rails.logger.info "XapianDb beanstalk worker: done"
|
42
|
+
Rails.logger.flush
|
32
43
|
rescue Exception => ex
|
33
44
|
Rails.logger.error "XapianDb beanstalk worker: could not process #{job.body} (#{ex})"
|
45
|
+
Rails.logger.flush
|
34
46
|
end
|
35
47
|
job.delete
|
36
48
|
end
|
@@ -17,13 +17,13 @@ module XapianDb
|
|
17
17
|
|
18
18
|
# Update an object in the index
|
19
19
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
20
|
-
def index(obj)
|
20
|
+
def index(obj, commit=true)
|
21
21
|
beanstalk.put( {:task => "index_task", :class => obj.class.name, :id => obj.id }.to_yaml )
|
22
22
|
end
|
23
23
|
|
24
24
|
# Remove an object from the index
|
25
25
|
# @param [String] xapian_id The document id
|
26
|
-
def delete_doc_with(xapian_id)
|
26
|
+
def delete_doc_with(xapian_id, commit=true)
|
27
27
|
beanstalk.put( { :task => "delete_doc_task", :xapian_id => xapian_id }.to_yaml )
|
28
28
|
end
|
29
29
|
|
@@ -12,24 +12,36 @@ module XapianDb
|
|
12
12
|
|
13
13
|
class DirectWriter
|
14
14
|
|
15
|
-
BATCH_SIZE =
|
15
|
+
BATCH_SIZE = 500
|
16
|
+
|
16
17
|
class << self
|
17
18
|
|
18
19
|
# Update an object in the index
|
19
20
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
20
|
-
def index(obj)
|
21
|
+
def index(obj, commit=true)
|
21
22
|
blueprint = XapianDb::DocumentBlueprint.blueprint_for(obj.class)
|
22
23
|
indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
|
23
24
|
doc = indexer.build_document_for(obj)
|
24
25
|
XapianDb.database.store_doc(doc)
|
25
|
-
XapianDb.database.commit
|
26
|
+
XapianDb.database.commit if commit
|
26
27
|
end
|
27
28
|
|
28
29
|
# Remove an object from the index
|
29
30
|
# @param [String] xapian_id The document id of an object
|
30
|
-
def delete_doc_with(xapian_id)
|
31
|
+
def delete_doc_with(xapian_id, commit=true)
|
31
32
|
XapianDb.database.delete_doc_with_unique_term xapian_id
|
32
|
-
XapianDb.database.commit
|
33
|
+
XapianDb.database.commit if commit
|
34
|
+
end
|
35
|
+
|
36
|
+
# Update or delete a xapian document belonging to an object depending on the ignore_if logic(if present)
|
37
|
+
# @param [Object] object An instance of a class with a blueprint configuration
|
38
|
+
def reindex(object, commit=true)
|
39
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for object.class
|
40
|
+
if blueprint.should_index?(object)
|
41
|
+
index object, commit
|
42
|
+
else
|
43
|
+
delete_doc_with object.xapian_id, commit
|
44
|
+
end
|
33
45
|
end
|
34
46
|
|
35
47
|
# Reindex all objects of a given class
|
@@ -38,6 +50,9 @@ module XapianDb
|
|
38
50
|
# @option options [Boolean] :verbose (false) Should the reindexing give status informations?
|
39
51
|
def reindex_class(klass, options={})
|
40
52
|
opts = {:verbose => false}.merge(options)
|
53
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for klass
|
54
|
+
adapter = blueprint._adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
|
55
|
+
primary_key = adapter.primary_key_for(klass)
|
41
56
|
XapianDb.database.delete_docs_of_class(klass)
|
42
57
|
blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
|
43
58
|
indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
|
@@ -52,11 +67,11 @@ module XapianDb
|
|
52
67
|
|
53
68
|
# Process the objects in batches to reduce the memory footprint
|
54
69
|
nr_of_batches = (obj_count / BATCH_SIZE) + 1
|
55
|
-
order_expression = "#{klass.name.tableize}.#{
|
70
|
+
order_expression = "#{klass.name.tableize}.#{primary_key}"
|
71
|
+
# raise "vor loop"
|
56
72
|
nr_of_batches.times do |batch|
|
57
|
-
# raise "PK: #{options[:primary_key]}"
|
58
73
|
base_query.all(:offset => batch * BATCH_SIZE, :limit => BATCH_SIZE, :order => order_expression).each do |obj|
|
59
|
-
|
74
|
+
reindex obj, false
|
60
75
|
pbar.inc if show_progressbar
|
61
76
|
end
|
62
77
|
end
|
@@ -10,11 +10,11 @@ module XapianDb
|
|
10
10
|
|
11
11
|
# Update an object in the index
|
12
12
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
13
|
-
def index(obj); end
|
13
|
+
def index(obj, commit=true); end
|
14
14
|
|
15
15
|
# Remove an object from the index
|
16
16
|
# @param [String] xapian_id The document id
|
17
|
-
def delete_doc_with(xapian_id); end
|
17
|
+
def delete_doc_with(xapian_id, commit=true); end
|
18
18
|
|
19
19
|
# Reindex all objects of a given class
|
20
20
|
# @param [Class] klass The class to reindex
|
@@ -20,13 +20,13 @@ module XapianDb
|
|
20
20
|
|
21
21
|
# Update an object in the index
|
22
22
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
23
|
-
def index(obj)
|
23
|
+
def index(obj, commit=false)
|
24
24
|
@index_requests << obj
|
25
25
|
end
|
26
26
|
|
27
27
|
# Remove a document from the index
|
28
28
|
# @param [String] xapian_id The document id
|
29
|
-
def delete_doc_with(xapian_id)
|
29
|
+
def delete_doc_with(xapian_id, commit=false)
|
30
30
|
@delete_requests << xapian_id
|
31
31
|
end
|
32
32
|
|
@@ -41,8 +41,9 @@ module XapianDb
|
|
41
41
|
# Commit all pending changes to the database
|
42
42
|
# @param [DirectWriter, BeanstalkWriter] writer The writer to use
|
43
43
|
def commit_using(writer)
|
44
|
-
@index_requests.each { |obj| writer.index obj }
|
45
|
-
@delete_requests.each { |xapian_id| writer.delete_doc_with xapian_id }
|
44
|
+
@index_requests.each { |obj| writer.index obj, false }
|
45
|
+
@delete_requests.each { |xapian_id| writer.delete_doc_with xapian_id, false }
|
46
|
+
XapianDb.database.commit
|
46
47
|
end
|
47
48
|
|
48
49
|
end
|
data/lib/xapian_db.rb
CHANGED
@@ -110,26 +110,27 @@ module XapianDb
|
|
110
110
|
|
111
111
|
# Update an object in the index
|
112
112
|
# @param [Object] obj An instance of a class with a blueprint configuration
|
113
|
-
def self.index(obj)
|
113
|
+
def self.index(obj, commit=true)
|
114
114
|
writer = @block_writer || XapianDb::Config.writer
|
115
|
-
writer.index obj
|
115
|
+
writer.index obj, commit
|
116
116
|
end
|
117
117
|
|
118
118
|
# Remove a document from the index
|
119
119
|
# @param [String] xapian_id The document id
|
120
|
-
def self.delete_doc_with(xapian_id)
|
120
|
+
def self.delete_doc_with(xapian_id, commit=true)
|
121
121
|
writer = @block_writer || XapianDb::Config.writer
|
122
|
-
writer.delete_doc_with xapian_id
|
122
|
+
writer.delete_doc_with xapian_id, commit
|
123
123
|
end
|
124
124
|
|
125
125
|
# Update or delete a xapian document belonging to an object depending on the ignore_if logic(if present)
|
126
126
|
# @param [Object] object An instance of a class with a blueprint configuration
|
127
|
-
def self.reindex(object)
|
127
|
+
def self.reindex(object, commit=true)
|
128
|
+
writer = @block_writer || XapianDb::Config.writer
|
128
129
|
blueprint = XapianDb::DocumentBlueprint.blueprint_for object.class
|
129
130
|
if blueprint.should_index?(object)
|
130
|
-
|
131
|
+
writer.index object, commit
|
131
132
|
else
|
132
|
-
|
133
|
+
writer.delete_doc_with object.xapian_id, commit
|
133
134
|
end
|
134
135
|
end
|
135
136
|
|
@@ -150,9 +151,6 @@ module XapianDb
|
|
150
151
|
return false unless configured_classes.size > 0
|
151
152
|
configured_classes.each do |klass|
|
152
153
|
if klass.respond_to?(:rebuild_xapian_index)
|
153
|
-
blueprint = XapianDb::DocumentBlueprint.blueprint_for klass
|
154
|
-
adapter = blueprint._adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
|
155
|
-
options[:primary_key] = adapter.primary_key_for(klass)
|
156
154
|
XapianDb::Config.writer.reindex_class(klass, options)
|
157
155
|
end
|
158
156
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-09-
|
12
|
+
date: 2011-09-10 00:00:00.000000000 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: daemons
|
17
|
-
requirement: &
|
17
|
+
requirement: &70182803109440 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.10
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70182803109440
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: xapian-ruby
|
28
|
-
requirement: &
|
28
|
+
requirement: &70182803108960 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.2.6
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70182803108960
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: rspec
|
39
|
-
requirement: &
|
39
|
+
requirement: &70182803108480 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 2.3.1
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70182803108480
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: simplecov
|
50
|
-
requirement: &
|
50
|
+
requirement: &70182803108020 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.3.7
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *70182803108020
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: beanstalk-client
|
61
|
-
requirement: &
|
61
|
+
requirement: &70182803107560 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ! '>='
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.1.0
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *70182803107560
|
70
70
|
description: XapianDb is a ruby gem that combines features of nosql databases and
|
71
71
|
fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
|
72
72
|
email: gernot.kogler (at) garaio (dot) com
|