xapian_db 1.1.4 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +11 -0
- data/README.rdoc +32 -1
- data/Rakefile +1 -0
- data/lib/xapian_db/config.rb +13 -3
- data/lib/xapian_db/database.rb +1 -1
- data/lib/xapian_db/index_writers/direct_writer.rb +2 -2
- data/lib/xapian_db/index_writers/no_op_writer.rb +15 -13
- data/lib/xapian_db/index_writers/resque_worker.rb +40 -0
- data/lib/xapian_db/index_writers/resque_writer.rb +39 -0
- data/lib/xapian_db/railtie.rb +3 -1
- data/lib/xapian_db.rb +1 -1
- metadata +47 -12
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
##1.2.0 (November 8th, 2011)
|
2
|
+
|
3
|
+
Fixes:
|
4
|
+
|
5
|
+
- find_similar_to supports the :limit option
|
6
|
+
|
7
|
+
Features:
|
8
|
+
|
9
|
+
- new index worker / writer for resque (thanks, Michael)
|
10
|
+
- support for namespaced models (thanks, Albert)
|
11
|
+
|
1
12
|
##1.1.4 (October 25th, 2011)
|
2
13
|
|
3
14
|
Fixes:
|
data/README.rdoc
CHANGED
@@ -331,7 +331,9 @@ However, dates need special handling to support date range queries. To support d
|
|
331
331
|
|
332
332
|
Since Xapian allows only one database instance to write to the index, the default setup of XapianDb will not work
|
333
333
|
with multiple app instances trying to write to the same database (you will get lock errors).
|
334
|
-
Therefore, XapianDb provides
|
334
|
+
Therefore, XapianDb provides two solutions based on queueing systems to overcome this. The first solution uses beanstalk and the second one uses resque.
|
335
|
+
|
336
|
+
== Installation with beanstalk
|
335
337
|
|
336
338
|
=== 1. Install beanstalkd
|
337
339
|
|
@@ -389,3 +391,32 @@ If everything is fine, you should find a file namend beanstalk_worker.pid in tmp
|
|
389
391
|
goes wrong, you'll find beanstalk_worker.log instead showing the stack trace.
|
390
392
|
|
391
393
|
<b>Important: Do not start multiple instances of this daemon!</b>
|
394
|
+
|
395
|
+
== Installation with Resque
|
396
|
+
|
397
|
+
=== 1. Install and start redis
|
398
|
+
|
399
|
+
Install and start redis as described on the {resque github page}[https://github.com/defunkt/resque].
|
400
|
+
|
401
|
+
=== 2. Add the resque gem to your config
|
402
|
+
|
403
|
+
gem 'resque'
|
404
|
+
bundle install
|
405
|
+
|
406
|
+
=== 3. Configure XapianDb to use resque in production
|
407
|
+
|
408
|
+
production:
|
409
|
+
database: db/xapian_db/production
|
410
|
+
writer: resque
|
411
|
+
resque_queue: my_queue
|
412
|
+
|
413
|
+
If you don't specify a queue name XapianDb will use 'xapian_db' by default.
|
414
|
+
|
415
|
+
=== 4. Start the resque worker
|
416
|
+
|
417
|
+
RAILS_ENV=production QUEUE=my_queue rake resque:work
|
418
|
+
|
419
|
+
Be sure to specify the correct queue name when starting the worker.
|
420
|
+
|
421
|
+
<b>If you don't provide a queue name, it WON'T take 'xapian_db' by default! Do not start multiple
|
422
|
+
instances of this worker!</b>
|
data/Rakefile
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
data/lib/xapian_db/config.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# -*- coding: utf-8 -*-
|
2
2
|
|
3
3
|
module XapianDb
|
4
4
|
|
@@ -44,13 +44,17 @@ module XapianDb
|
|
44
44
|
return default_url if @config.nil?
|
45
45
|
@config.instance_variable_get("@_beanstalk_daemon_url") || default_url
|
46
46
|
end
|
47
|
+
|
48
|
+
def resque_queue
|
49
|
+
@config.instance_variable_get("@_resque_queue") || 'xapian_db'
|
50
|
+
end
|
47
51
|
end
|
48
52
|
|
49
53
|
# ---------------------------------------------------------------------------------
|
50
54
|
# DSL methods
|
51
55
|
# ---------------------------------------------------------------------------------
|
52
56
|
|
53
|
-
attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_stemmer, :_stopper
|
57
|
+
attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_resque_queue, :_stemmer, :_stopper
|
54
58
|
|
55
59
|
# Set the global database to use
|
56
60
|
# @param [String] path The path to the database. Either apply a file sytem path or :memory
|
@@ -100,6 +104,12 @@ module XapianDb
|
|
100
104
|
@_beanstalk_daemon_url = url
|
101
105
|
end
|
102
106
|
|
107
|
+
# Set the name of the resque queue
|
108
|
+
# @param [String] name The name of the resque queue
|
109
|
+
def resque_queue(name)
|
110
|
+
@_resque_queue = name
|
111
|
+
end
|
112
|
+
|
103
113
|
# Set the language.
|
104
114
|
# @param [Symbol] lang The language; apply the two letter ISO639 code for the language
|
105
115
|
# @example
|
@@ -115,4 +125,4 @@ module XapianDb
|
|
115
125
|
|
116
126
|
end
|
117
127
|
|
118
|
-
end
|
128
|
+
end
|
data/lib/xapian_db/database.rb
CHANGED
@@ -116,7 +116,7 @@ module XapianDb
|
|
116
116
|
end
|
117
117
|
enquiry = Xapian::Enquire.new(reader)
|
118
118
|
enquiry.query = final_query
|
119
|
-
Resultset.new(enquiry, :db_size => self.size)
|
119
|
+
Resultset.new(enquiry, :db_size => self.size, :limit => options[:limit])
|
120
120
|
end
|
121
121
|
|
122
122
|
# A very simple implementation of facets using Xapian collapse key.
|
@@ -67,7 +67,7 @@ module XapianDb
|
|
67
67
|
|
68
68
|
# Process the objects in batches to reduce the memory footprint
|
69
69
|
nr_of_batches = (obj_count / BATCH_SIZE) + 1
|
70
|
-
order_expression = "#{klass.name.tableize}.#{primary_key}"
|
70
|
+
order_expression = "#{klass.name.parameterize.tableize}.#{primary_key}"
|
71
71
|
# raise "vor loop"
|
72
72
|
nr_of_batches.times do |batch|
|
73
73
|
base_query.all(:offset => batch * BATCH_SIZE, :limit => BATCH_SIZE, :order => order_expression).each do |obj|
|
@@ -82,4 +82,4 @@ module XapianDb
|
|
82
82
|
end
|
83
83
|
end
|
84
84
|
end
|
85
|
-
end
|
85
|
+
end
|
@@ -8,22 +8,24 @@ module XapianDb
|
|
8
8
|
|
9
9
|
class NoOpWriter
|
10
10
|
|
11
|
-
|
12
|
-
# @param [Object] obj An instance of a class with a blueprint configuration
|
13
|
-
def index(obj, commit=true); end
|
11
|
+
class << self
|
14
12
|
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
# Update an object in the index
|
14
|
+
# @param [Object] obj An instance of a class with a blueprint configuration
|
15
|
+
def index(obj, commit=true); end
|
18
16
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
# @option options [Boolean] :verbose (false) Should the reindexing give status informations?
|
23
|
-
def reindex_class(klass, options={})
|
24
|
-
raise "rebuild_xapian_index is not supported inside a block with auto indexing disabled"
|
25
|
-
end
|
17
|
+
# Remove an object from the index
|
18
|
+
# @param [String] xapian_id The document id
|
19
|
+
def delete_doc_with(xapian_id, commit=true); end
|
26
20
|
|
21
|
+
# Reindex all objects of a given class
|
22
|
+
# @param [Class] klass The class to reindex
|
23
|
+
# @param [Hash] options Options for reindexing
|
24
|
+
# @option options [Boolean] :verbose (false) Should the reindexing give status informations?
|
25
|
+
def reindex_class(klass, options={})
|
26
|
+
raise "rebuild_xapian_index is not supported inside a block with auto indexing disabled"
|
27
|
+
end
|
28
|
+
end
|
27
29
|
end
|
28
30
|
end
|
29
31
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module XapianDb
|
3
|
+
module IndexWriters
|
4
|
+
# Worker to update the Xapian index; the worker will be called by resque
|
5
|
+
# and uses the DirectWriter to do the real work
|
6
|
+
# @author Michael Stämpfli
|
7
|
+
class ResqueWorker
|
8
|
+
|
9
|
+
extend XapianDb::Utilities
|
10
|
+
|
11
|
+
APPROVED_TASKS = [:index, :delete_doc, :reindex_class]
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def queue
|
15
|
+
XapianDb::Config.resque_queue
|
16
|
+
end
|
17
|
+
|
18
|
+
def perform(task, options)
|
19
|
+
send(task, options) if APPROVED_TASKS.include?(task.to_sym)
|
20
|
+
end
|
21
|
+
|
22
|
+
def index(options)
|
23
|
+
klass = constantize options['class']
|
24
|
+
obj = klass.respond_to?('get') ? klass.get(options['id']) : klass.find(options['id'])
|
25
|
+
DirectWriter.index obj
|
26
|
+
end
|
27
|
+
|
28
|
+
def delete_doc(options)
|
29
|
+
DirectWriter.delete_doc_with options['xapian_id']
|
30
|
+
end
|
31
|
+
|
32
|
+
def reindex_class(options)
|
33
|
+
klass = constantize options['class']
|
34
|
+
DirectWriter.reindex_class klass, :verbose => false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# This writer uses resque to enqueue index jobs
|
3
|
+
# @author Michael Stämpfli
|
4
|
+
|
5
|
+
require 'resque'
|
6
|
+
|
7
|
+
module XapianDb
|
8
|
+
module IndexWriters
|
9
|
+
class ResqueWriter
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
# Update an object in the index
|
14
|
+
# @param [Object] obj An instance of a class with a blueprint configuration
|
15
|
+
def index(obj, commit=true)
|
16
|
+
Resque.enqueue worker_class, :index, :class => obj.class.name, :id => obj.id
|
17
|
+
end
|
18
|
+
|
19
|
+
# Remove an object from the index
|
20
|
+
# @param [String] xapian_id The document id
|
21
|
+
def delete_doc_with(xapian_id, commit=true)
|
22
|
+
Resque.enqueue worker_class, :delete_doc, :xapian_id => xapian_id
|
23
|
+
end
|
24
|
+
|
25
|
+
# Reindex all objects of a given class
|
26
|
+
# @param [Class] klass The class to reindex
|
27
|
+
def reindex_class(klass, options = {})
|
28
|
+
Resque.enqueue worker_class, :reindex_class, :class => klass.name
|
29
|
+
end
|
30
|
+
|
31
|
+
def worker_class
|
32
|
+
ResqueWorker
|
33
|
+
end
|
34
|
+
private :worker_class
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/xapian_db/railtie.rb
CHANGED
@@ -37,6 +37,7 @@ module XapianDb
|
|
37
37
|
config.adapter @adapter.try(:to_sym)
|
38
38
|
config.writer @writer.try(:to_sym)
|
39
39
|
config.beanstalk_daemon_url @beanstalk_daemon
|
40
|
+
config.resque_queue @resque_queue
|
40
41
|
config.language @language.try(:to_sym)
|
41
42
|
end
|
42
43
|
|
@@ -57,6 +58,7 @@ module XapianDb
|
|
57
58
|
@adapter = env_config["adapter"] || :active_record
|
58
59
|
@writer = env_config["writer"] || :direct
|
59
60
|
@beanstalk_daemon = env_config["beanstalk_daemon"]
|
61
|
+
@resque_queue = env_config["resque_queue"]
|
60
62
|
@language = env_config["language"]
|
61
63
|
end
|
62
64
|
|
@@ -69,4 +71,4 @@ module XapianDb
|
|
69
71
|
end
|
70
72
|
|
71
73
|
end
|
72
|
-
end
|
74
|
+
end
|
data/lib/xapian_db.rb
CHANGED
@@ -168,7 +168,7 @@ module XapianDb
|
|
168
168
|
|
169
169
|
# Execute a block and do not update the index
|
170
170
|
def self.auto_indexing_disabled(&block)
|
171
|
-
execute_block :writer => XapianDb::IndexWriters::NoOpWriter
|
171
|
+
execute_block :writer => XapianDb::IndexWriters::NoOpWriter do
|
172
172
|
block.call
|
173
173
|
end
|
174
174
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xapian_db
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-11-07 00:00:00.000000000 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: daemons
|
17
|
-
requirement: &
|
17
|
+
requirement: &70141547189340 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: 1.0.10
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *70141547189340
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: xapian-ruby
|
28
|
-
requirement: &
|
28
|
+
requirement: &70141547188660 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,10 +33,10 @@ dependencies:
|
|
33
33
|
version: 1.2.6
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *70141547188660
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
38
|
name: rspec
|
39
|
-
requirement: &
|
39
|
+
requirement: &70141547187880 !ruby/object:Gem::Requirement
|
40
40
|
none: false
|
41
41
|
requirements:
|
42
42
|
- - ! '>='
|
@@ -44,10 +44,10 @@ dependencies:
|
|
44
44
|
version: 2.3.1
|
45
45
|
type: :development
|
46
46
|
prerelease: false
|
47
|
-
version_requirements: *
|
47
|
+
version_requirements: *70141547187880
|
48
48
|
- !ruby/object:Gem::Dependency
|
49
49
|
name: simplecov
|
50
|
-
requirement: &
|
50
|
+
requirement: &70141547187160 !ruby/object:Gem::Requirement
|
51
51
|
none: false
|
52
52
|
requirements:
|
53
53
|
- - ! '>='
|
@@ -55,10 +55,10 @@ dependencies:
|
|
55
55
|
version: 0.3.7
|
56
56
|
type: :development
|
57
57
|
prerelease: false
|
58
|
-
version_requirements: *
|
58
|
+
version_requirements: *70141547187160
|
59
59
|
- !ruby/object:Gem::Dependency
|
60
60
|
name: beanstalk-client
|
61
|
-
requirement: &
|
61
|
+
requirement: &70141547186400 !ruby/object:Gem::Requirement
|
62
62
|
none: false
|
63
63
|
requirements:
|
64
64
|
- - ! '>='
|
@@ -66,7 +66,40 @@ dependencies:
|
|
66
66
|
version: 1.1.0
|
67
67
|
type: :development
|
68
68
|
prerelease: false
|
69
|
-
version_requirements: *
|
69
|
+
version_requirements: *70141547186400
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: rake
|
72
|
+
requirement: &70141547185540 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
type: :development
|
79
|
+
prerelease: false
|
80
|
+
version_requirements: *70141547185540
|
81
|
+
- !ruby/object:Gem::Dependency
|
82
|
+
name: progressbar
|
83
|
+
requirement: &70141547200900 !ruby/object:Gem::Requirement
|
84
|
+
none: false
|
85
|
+
requirements:
|
86
|
+
- - ! '>='
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
type: :development
|
90
|
+
prerelease: false
|
91
|
+
version_requirements: *70141547200900
|
92
|
+
- !ruby/object:Gem::Dependency
|
93
|
+
name: resque
|
94
|
+
requirement: &70141547200060 !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ! '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 1.19.0
|
100
|
+
type: :development
|
101
|
+
prerelease: false
|
102
|
+
version_requirements: *70141547200060
|
70
103
|
description: XapianDb is a ruby gem that combines features of nosql databases and
|
71
104
|
fulltext indexing. It is based on Xapian, an efficient and powerful indexing library
|
72
105
|
email: gernot.kogler (at) garaio (dot) com
|
@@ -88,6 +121,8 @@ files:
|
|
88
121
|
- lib/xapian_db/index_writers/beanstalk_writer.rb
|
89
122
|
- lib/xapian_db/index_writers/direct_writer.rb
|
90
123
|
- lib/xapian_db/index_writers/no_op_writer.rb
|
124
|
+
- lib/xapian_db/index_writers/resque_worker.rb
|
125
|
+
- lib/xapian_db/index_writers/resque_writer.rb
|
91
126
|
- lib/xapian_db/index_writers/transactional_writer.rb
|
92
127
|
- lib/xapian_db/indexer.rb
|
93
128
|
- lib/xapian_db/query_parser.rb
|