xapian_db 1.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +10 -0
- data/README.rdoc +11 -4
- data/lib/xapian_db/adapters/active_record_adapter.rb +7 -0
- data/lib/xapian_db/adapters/datamapper_adapter.rb +7 -0
- data/lib/xapian_db/document_blueprint.rb +11 -1
- data/lib/xapian_db/index_writers/direct_writer.rb +11 -17
- data/lib/xapian_db.rb +6 -1
- metadata +12 -12
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
##1.1.1 (September 9th, 2011)
|
2
|
+
|
3
|
+
Fixes:
|
4
|
+
|
5
|
+
- fixed a bug in XapianDb.rebuild_xapian_index that did not index all records of a klass; it ist highly recommended to run Xapian.rebuild_xapian_index after installing this version
|
6
|
+
|
7
|
+
Features:
|
8
|
+
|
9
|
+
- base query option for blueprints; may speed up reindexing signifiantly if you index associations
|
10
|
+
|
1
11
|
##1.1 (September 7th, 2011)
|
2
12
|
|
3
13
|
Fixes:
|
data/README.rdoc
CHANGED
@@ -131,10 +131,17 @@ You can add a type information to an attribute. As of now the special types :str
|
|
131
131
|
You can override the global adapter configuration in a specific blueprint. Let's say you use ActiveRecord, but you have
|
132
132
|
one more class that is not stored in the database, but you want it to be indexed:
|
133
133
|
|
134
|
-
XapianDb::DocumentBlueprint.setup(SpecialClass) do |blueprint|
|
135
|
-
|
136
|
-
|
137
|
-
end
|
134
|
+
XapianDb::DocumentBlueprint.setup(SpecialClass) do |blueprint|
|
135
|
+
blueprint.adapter :generic
|
136
|
+
blueprint.index :some_stuff
|
137
|
+
end
|
138
|
+
|
139
|
+
If you use associations in your blueprints, it might be a good idea to specify a base query to speed up rebuild_xapian_index calls (avoiding 1+n queries):
|
140
|
+
|
141
|
+
XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
|
142
|
+
blueprint.index :addresses
|
143
|
+
blueprint.base_query Person.includes(:addresses)
|
144
|
+
end
|
138
145
|
|
139
146
|
place these configurations either into the corresponding class or - I prefer to have the index configurations outside
|
140
147
|
the models - into the file config/xapian_blueprints.rb.
|
@@ -20,6 +20,13 @@ module XapianDb
|
|
20
20
|
|
21
21
|
class << self
|
22
22
|
|
23
|
+
# return the name of the primary key column of a class
|
24
|
+
# @param [Class] klass the class
|
25
|
+
# @return [Symbol] the name of the primary key column
|
26
|
+
def primary_key_for(klass)
|
27
|
+
klass.primary_key
|
28
|
+
end
|
29
|
+
|
23
30
|
# Implement the class helper methods
|
24
31
|
# @param [Class] klass The class to add the helper methods to
|
25
32
|
def add_class_helper_methods_to(klass)
|
@@ -19,6 +19,13 @@ module XapianDb
|
|
19
19
|
|
20
20
|
class << self
|
21
21
|
|
22
|
+
# return the name of the primary key column of a class
|
23
|
+
# @param [Class] klass the class
|
24
|
+
# @return [Symbol] the name of the primary key column
|
25
|
+
def primary_key_for(klass)
|
26
|
+
klass.serial.name
|
27
|
+
end
|
28
|
+
|
22
29
|
# Implement the class helper methods
|
23
30
|
# @param [Class] klass The class to add the helper methods to
|
24
31
|
def add_class_helper_methods_to(klass)
|
@@ -205,8 +205,8 @@ module XapianDb
|
|
205
205
|
# Blueprint DSL methods
|
206
206
|
# ---------------------------------------------------------------------------------
|
207
207
|
|
208
|
-
# An optional custom adapter
|
209
208
|
attr_accessor :_adapter
|
209
|
+
attr_reader :_base_query
|
210
210
|
|
211
211
|
# Construct the blueprint
|
212
212
|
def initialize
|
@@ -306,6 +306,16 @@ module XapianDb
|
|
306
306
|
@ignore_expression = block
|
307
307
|
end
|
308
308
|
|
309
|
+
# Define a base query to select one or all objects of the indexed class. The reason for a
|
310
|
+
# base query is to optimize the query avoiding th 1+n problematic. The base query should only
|
311
|
+
# include joins(...) and includes(...) calls.
|
312
|
+
# @param [expression] a base query expression
|
313
|
+
# @example Include the adresses
|
314
|
+
# blueprint.base_query Person.includes(:addresses)
|
315
|
+
def base_query(expression)
|
316
|
+
@_base_query = expression
|
317
|
+
end
|
318
|
+
|
309
319
|
# Options for an indexed method
|
310
320
|
class IndexOptions
|
311
321
|
|
@@ -12,6 +12,7 @@ module XapianDb
|
|
12
12
|
|
13
13
|
class DirectWriter
|
14
14
|
|
15
|
+
BATCH_SIZE = 200
|
15
16
|
class << self
|
16
17
|
|
17
18
|
# Update an object in the index
|
@@ -37,30 +38,25 @@ module XapianDb
|
|
37
38
|
# @option options [Boolean] :verbose (false) Should the reindexing give status informations?
|
38
39
|
def reindex_class(klass, options={})
|
39
40
|
opts = {:verbose => false}.merge(options)
|
40
|
-
# First, delete all docs of this class
|
41
41
|
XapianDb.database.delete_docs_of_class(klass)
|
42
|
-
blueprint
|
43
|
-
indexer
|
42
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
|
43
|
+
indexer = XapianDb::Indexer.new(XapianDb.database, blueprint)
|
44
|
+
base_query = blueprint._base_query || klass
|
44
45
|
show_progressbar = false
|
45
|
-
obj_count =
|
46
|
+
obj_count = base_query.count
|
46
47
|
if opts[:verbose]
|
47
|
-
|
48
|
-
show_progressbar = true
|
49
|
-
end
|
48
|
+
show_progressbar = defined?(ProgressBar)
|
50
49
|
puts "reindexing #{obj_count} objects of #{klass}..."
|
51
50
|
pbar = ProgressBar.new("Status", obj_count) if show_progressbar
|
52
51
|
end
|
53
52
|
|
54
53
|
# Process the objects in batches to reduce the memory footprint
|
55
|
-
nr_of_batches = (obj_count /
|
54
|
+
nr_of_batches = (obj_count / BATCH_SIZE) + 1
|
55
|
+
order_expression = "#{klass.name.tableize}.#{options[:primary_key]}"
|
56
56
|
nr_of_batches.times do |batch|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
XapianDb.database.store_doc(doc)
|
61
|
-
else
|
62
|
-
XapianDb.database.delete_doc_with_unique_term(obj.xapian_id)
|
63
|
-
end
|
57
|
+
# raise "PK: #{options[:primary_key]}"
|
58
|
+
base_query.all(:offset => batch * BATCH_SIZE, :limit => BATCH_SIZE, :order => order_expression).each do |obj|
|
59
|
+
XapianDb.reindex obj
|
64
60
|
pbar.inc if show_progressbar
|
65
61
|
end
|
66
62
|
end
|
@@ -69,8 +65,6 @@ module XapianDb
|
|
69
65
|
end
|
70
66
|
|
71
67
|
end
|
72
|
-
|
73
68
|
end
|
74
|
-
|
75
69
|
end
|
76
70
|
end
|
data/lib/xapian_db.rb
CHANGED
@@ -149,7 +149,12 @@ module XapianDb
|
|
149
149
|
configured_classes = XapianDb::DocumentBlueprint.configured_classes
|
150
150
|
return false unless configured_classes.size > 0
|
151
151
|
configured_classes.each do |klass|
|
152
|
-
|
152
|
+
if klass.respond_to?(:rebuild_xapian_index)
|
153
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for klass
|
154
|
+
adapter = blueprint._adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
|
155
|
+
options[:primary_key] = adapter.primary_key_for(klass)
|
156
|
+
XapianDb::Config.writer.reindex_class(klass, options)
|
157
|
+
end
|
153
158
|
end
|
154
159
|
true
|
155
160
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-09-
|
12
|
+
date: 2011-09-09 00:00:00.000000000 +02:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: daemons
|
17
|
-
requirement: &
|
17
|
+
requirement: &70154206238860 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.10
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70154206238860
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: xapian-ruby
|
28
|
-
requirement: &
|
28
|
+
requirement: &70154206238400 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.2.6
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70154206238400
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: rspec
|
39
|
-
requirement: &
|
39
|
+
requirement: &70154206237940 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 2.3.1
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70154206237940
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: simplecov
|
50
|
-
requirement: &
|
50
|
+
requirement: &70154206237480 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.3.7
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *70154206237480
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: beanstalk-client
|
61
|
-
requirement: &
|
61
|
+
requirement: &70154206237020 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ! '>='
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
version: 1.1.0
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *70154206237020
|
70
70
|
description: XapianDb is a ruby gem that combines features of nosql databases and
|
71
71
|
fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
|
72
72
|
email: gernot.kogler (at) garaio (dot) com
|