xapian_db 1.1.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +11 -0
- data/README.rdoc +32 -1
- data/Rakefile +1 -0
- data/lib/xapian_db/config.rb +13 -3
- data/lib/xapian_db/database.rb +1 -1
- data/lib/xapian_db/index_writers/direct_writer.rb +2 -2
- data/lib/xapian_db/index_writers/no_op_writer.rb +15 -13
- data/lib/xapian_db/index_writers/resque_worker.rb +40 -0
- data/lib/xapian_db/index_writers/resque_writer.rb +39 -0
- data/lib/xapian_db/railtie.rb +3 -1
- data/lib/xapian_db.rb +1 -1
- metadata +47 -12
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
##1.2.0 (November 8th, 2011)
|
2
|
+
|
3
|
+
Fixes:
|
4
|
+
|
5
|
+
- find_similar_to supports the :limit option
|
6
|
+
|
7
|
+
Features:
|
8
|
+
|
9
|
+
- new index worker / writer for resque (thanks, Michael)
|
10
|
+
- support for namespaced models (thanks, Albert)
|
11
|
+
|
1
12
|
##1.1.4 (October 25th, 2011)
|
2
13
|
|
3
14
|
Fixes:
|
data/README.rdoc
CHANGED
@@ -331,7 +331,9 @@ However, dates need special handling to support date range queries. To support d
|
|
331
331
|
|
332
332
|
Since Xapian allows only one database instance to write to the index, the default setup of XapianDb will not work
|
333
333
|
with multiple app instances trying to write to the same database (you will get lock errors).
|
334
|
-
Therefore, XapianDb provides
|
334
|
+
Therefore, XapianDb provides two solutions based on queueing systems to overcome this. The first solution uses beanstalk and the second one uses resque.
|
335
|
+
|
336
|
+
== Installation with beanstalk
|
335
337
|
|
336
338
|
=== 1. Install beanstalkd
|
337
339
|
|
@@ -389,3 +391,32 @@ If everything is fine, you should find a file namend beanstalk_worker.pid in tmp
|
|
389
391
|
goes wrong, you'll find beanstalk_worker.log instead showing the stack trace.
|
390
392
|
|
391
393
|
<b>Important: Do not start multiple instances of this daemon!</b>
|
394
|
+
|
395
|
+
== Installation with Resque
|
396
|
+
|
397
|
+
=== 1. Install and start redis
|
398
|
+
|
399
|
+
Install and start redis as described on the {resque github page}[https://github.com/defunkt/resque].
|
400
|
+
|
401
|
+
=== 2. Add the resque gem to your config
|
402
|
+
|
403
|
+
gem 'resque'
|
404
|
+
bundle install
|
405
|
+
|
406
|
+
=== 3. Configure XapianDb to use resque in production
|
407
|
+
|
408
|
+
production:
|
409
|
+
database: db/xapian_db/production
|
410
|
+
writer: resque
|
411
|
+
resque_queue: my_queue
|
412
|
+
|
413
|
+
If you don't specify a queue name XapianDb will use 'xapian_db' by default.
|
414
|
+
|
415
|
+
=== 4. Start the resque worker
|
416
|
+
|
417
|
+
RAILS_ENV=production QUEUE=my_queue rake resque:work
|
418
|
+
|
419
|
+
Be sure to specify the correct queue name when starting the worker.
|
420
|
+
|
421
|
+
<b>If you don't provide a queue name, it WON'T take 'xapian_db' by default! Do not start multiple
|
422
|
+
instances of this worker!</b>
|
data/Rakefile
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
data/lib/xapian_db/config.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# -*- coding: utf-8 -*-
|
2
2
|
|
3
3
|
module XapianDb
|
4
4
|
|
@@ -44,13 +44,17 @@ module XapianDb
|
|
44
44
|
return default_url if @config.nil?
|
45
45
|
@config.instance_variable_get("@_beanstalk_daemon_url") || default_url
|
46
46
|
end
|
47
|
+
|
48
|
+
def resque_queue
|
49
|
+
@config.instance_variable_get("@_resque_queue") || 'xapian_db'
|
50
|
+
end
|
47
51
|
end
|
48
52
|
|
49
53
|
# ---------------------------------------------------------------------------------
|
50
54
|
# DSL methods
|
51
55
|
# ---------------------------------------------------------------------------------
|
52
56
|
|
53
|
-
attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_stemmer, :_stopper
|
57
|
+
attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper
|
54
58
|
|
55
59
|
# Set the global database to use
|
56
60
|
# @param [String] path The path to the database. Either apply a file sytem path or :memory
|
@@ -100,6 +104,12 @@ module XapianDb
|
|
100
104
|
@_beanstalk_daemon_url = url
|
101
105
|
end
|
102
106
|
|
107
|
+
# Set the name of the resque queue
|
108
|
+
# @param [String] name The name of the resque queue
|
109
|
+
def resque_queue(name)
|
110
|
+
@_resque_queue = name
|
111
|
+
end
|
112
|
+
|
103
113
|
# Set the language.
|
104
114
|
# @param [Symbol] lang The language; apply the two letter ISO639 code for the language
|
105
115
|
# @example
|
@@ -115,4 +125,4 @@ module XapianDb
|
|
115
125
|
|
116
126
|
end
|
117
127
|
|
118
|
-
end
|
128
|
+
end
|
data/lib/xapian_db/database.rb
CHANGED
@@ -116,7 +116,7 @@ module XapianDb
|
|
116
116
|
end
|
117
117
|
enquiry = Xapian::Enquire.new(reader)
|
118
118
|
enquiry.query = final_query
|
119
|
-
Resultset.new(enquiry, :db_size => self.size)
|
119
|
+
Resultset.new(enquiry, :db_size => self.size, :limit => options[:limit])
|
120
120
|
end
|
121
121
|
|
122
122
|
# A very simple implementation of facets using Xapian collapse key.
|
@@ -67,7 +67,7 @@ module XapianDb
|
|
67
67
|
|
68
68
|
# Process the objects in batches to reduce the memory footprint
|
69
69
|
nr_of_batches = (obj_count / BATCH_SIZE) + 1
|
70
|
-
order_expression = "#{klass.name.tableize}.#{primary_key}"
|
70
|
+
order_expression = "#{klass.name.parameterize.tableize}.#{primary_key}"
|
71
71
|
# raise "vor loop"
|
72
72
|
nr_of_batches.times do |batch|
|
73
73
|
base_query.all(:offset => batch * BATCH_SIZE, :limit => BATCH_SIZE, :order => order_expression).each do |obj|
|
@@ -82,4 +82,4 @@ module XapianDb
|
|
82
82
|
end
|
83
83
|
end
|
84
84
|
end
|
85
|
-
end
|
85
|
+
end
|
@@ -8,22 +8,24 @@ module XapianDb
|
|
8
8
|
|
9
9
|
class NoOpWriter
|
10
10
|
|
11
|
-
|
12
|
-
# @param [Object] obj An instance of a class with a blueprint configuration
|
13
|
-
def index(obj, commit=true); end
|
11
|
+
class << self
|
14
12
|
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
# Update an object in the index
|
14
|
+
# @param [Object] obj An instance of a class with a blueprint configuration
|
15
|
+
def index(obj, commit=true); end
|
18
16
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
# @option options [Boolean] :verbose (false) Should the reindexing give status informations?
|
23
|
-
def reindex_class(klass, options={})
|
24
|
-
raise "rebuild_xapian_index is not supported inside a block with auto indexing disabled"
|
25
|
-
end
|
17
|
+
# Remove an object from the index
|
18
|
+
# @param [String] xapian_id The document id
|
19
|
+
def delete_doc_with(xapian_id, commit=true); end
|
26
20
|
|
21
|
+
# Reindex all objects of a given class
|
22
|
+
# @param [Class] klass The class to reindex
|
23
|
+
# @param [Hash] options Options for reindexing
|
24
|
+
# @option options [Boolean] :verbose (false) Should the reindexing give status informations?
|
25
|
+
def reindex_class(klass, options={})
|
26
|
+
raise "rebuild_xapian_index is not supported inside a block with auto indexing disabled"
|
27
|
+
end
|
28
|
+
end
|
27
29
|
end
|
28
30
|
end
|
29
31
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module XapianDb
|
3
|
+
module IndexWriters
|
4
|
+
# Worker to update the Xapian index; the worker will be called by resque
|
5
|
+
# and uses the DirectWriter to do the real work
|
6
|
+
# @author Michael Stämpfli
|
7
|
+
class ResqueWorker
|
8
|
+
|
9
|
+
extend XapianDb::Utilities
|
10
|
+
|
11
|
+
APPROVED_TASKS = [:index, :delete_doc, :reindex_class]
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def queue
|
15
|
+
XapianDb::Config.resque_queue
|
16
|
+
end
|
17
|
+
|
18
|
+
def perform(task, options)
|
19
|
+
send(task, options) if APPROVED_TASKS.include?(task.to_sym)
|
20
|
+
end
|
21
|
+
|
22
|
+
def index(options)
|
23
|
+
klass = constantize options['class']
|
24
|
+
obj = klass.respond_to?('get') ? klass.get(options['id']) : klass.find(options['id'])
|
25
|
+
DirectWriter.index obj
|
26
|
+
end
|
27
|
+
|
28
|
+
def delete_doc(options)
|
29
|
+
DirectWriter.delete_doc_with options['xapian_id']
|
30
|
+
end
|
31
|
+
|
32
|
+
def reindex_class(options)
|
33
|
+
klass = constantize options['class']
|
34
|
+
DirectWriter.reindex_class klass, :verbose => false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# This writer uses resque to enqueue index jobs
|
3
|
+
# @author Michael Stämpfli
|
4
|
+
|
5
|
+
require 'resque'
|
6
|
+
|
7
|
+
module XapianDb
|
8
|
+
module IndexWriters
|
9
|
+
class ResqueWriter
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
# Update an object in the index
|
14
|
+
# @param [Object] obj An instance of a class with a blueprint configuration
|
15
|
+
def index(obj, commit=true)
|
16
|
+
Resque.enqueue worker_class, :index, :class => obj.class.name, :id => obj.id
|
17
|
+
end
|
18
|
+
|
19
|
+
# Remove an object from the index
|
20
|
+
# @param [String] xapian_id The document id
|
21
|
+
def delete_doc_with(xapian_id, commit=true)
|
22
|
+
Resque.enqueue worker_class, :delete_doc, :xapian_id => xapian_id
|
23
|
+
end
|
24
|
+
|
25
|
+
# Reindex all objects of a given class
|
26
|
+
# @param [Class] klass The class to reindex
|
27
|
+
def reindex_class(klass, options = {})
|
28
|
+
Resque.enqueue worker_class, :reindex_class, :class => klass.name
|
29
|
+
end
|
30
|
+
|
31
|
+
def worker_class
|
32
|
+
ResqueWorker
|
33
|
+
end
|
34
|
+
private :worker_class
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/xapian_db/railtie.rb
CHANGED
@@ -37,6 +37,7 @@ module XapianDb
|
|
37
37
|
config.adapter @adapter.try(:to_sym)
|
38
38
|
config.writer @writer.try(:to_sym)
|
39
39
|
config.beanstalk_daemon_url @beanstalk_daemon
|
40
|
+
config.resque_queue @resque_queue
|
40
41
|
config.language @language.try(:to_sym)
|
41
42
|
end
|
42
43
|
|
@@ -57,6 +58,7 @@ module XapianDb
|
|
57
58
|
@adapter = env_config["adapter"] || :active_record
|
58
59
|
@writer = env_config["writer"] || :direct
|
59
60
|
@beanstalk_daemon = env_config["beanstalk_daemon"]
|
61
|
+
@resque_queue = env_config["resque_queue"]
|
60
62
|
@language = env_config["language"]
|
61
63
|
end
|
62
64
|
|
@@ -69,4 +71,4 @@ module XapianDb
|
|
69
71
|
end
|
70
72
|
|
71
73
|
end
|
72
|
-
end
|
74
|
+
end
|
data/lib/xapian_db.rb
CHANGED
@@ -168,7 +168,7 @@ module XapianDb
|
|
168
168
|
|
169
169
|
# Execute a block and do not update the index
|
170
170
|
def self.auto_indexing_disabled(&block)
|
171
|
-
execute_block :writer => XapianDb::IndexWriters::NoOpWriter
|
171
|
+
execute_block :writer => XapianDb::IndexWriters::NoOpWriter do
|
172
172
|
block.call
|
173
173
|
end
|
174
174
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-11-07 00:00:00.000000000 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: daemons
|
17
|
-
requirement: &
|
17
|
+
requirement: &70141547189340 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.10
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70141547189340
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: xapian-ruby
|
28
|
-
requirement: &
|
28
|
+
requirement: &70141547188660 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.2.6
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70141547188660
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: rspec
|
39
|
-
requirement: &
|
39
|
+
requirement: &70141547187880 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 2.3.1
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70141547187880
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: simplecov
|
50
|
-
requirement: &
|
50
|
+
requirement: &70141547187160 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.3.7
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *70141547187160
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: beanstalk-client
|
61
|
-
requirement: &
|
61
|
+
requirement: &70141547186400 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ! '>='
|
@@ -66,7 +66,40 @@ dependencies:
|
|
66
66
|
version: 1.1.0
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *70141547186400
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rake
|
72
|
+
requirement: &70141547185540 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
type: :development
|
79
|
+
prerelease: false
|
80
|
+
version_requirements: *70141547185540
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: progressbar
|
83
|
+
requirement: &70141547200900 !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ! '>='
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
type: :development
|
90
|
+
prerelease: false
|
91
|
+
version_requirements: *70141547200900
|
92
|
+
- !ruby/object:Gem::Dependency
|
93
|
+
name: resque
|
94
|
+
requirement: &70141547200060 !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ! '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 1.19.0
|
100
|
+
type: :development
|
101
|
+
prerelease: false
|
102
|
+
version_requirements: *70141547200060
|
70
103
|
description: XapianDb is a ruby gem that combines features of nosql databases and
|
71
104
|
fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
|
72
105
|
email: gernot.kogler (at) garaio (dot) com
|
@@ -88,6 +121,8 @@ files:
|
|
88
121
|
- lib/xapian_db/index_writers/beanstalk_writer.rb
|
89
122
|
- lib/xapian_db/index_writers/direct_writer.rb
|
90
123
|
- lib/xapian_db/index_writers/no_op_writer.rb
|
124
|
+
- lib/xapian_db/index_writers/resque_worker.rb
|
125
|
+
- lib/xapian_db/index_writers/resque_writer.rb
|
91
126
|
- lib/xapian_db/index_writers/transactional_writer.rb
|
92
127
|
- lib/xapian_db/indexer.rb
|
93
128
|
- lib/xapian_db/query_parser.rb
|