xapian_db 0.4.2 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +7 -0
- data/README.rdoc +59 -4
- data/lib/xapian_db.rb +4 -1
- data/lib/xapian_db/config.rb +15 -2
- data/lib/xapian_db/database.rb +2 -0
- data/lib/xapian_db/index_writers/beanstalk_worker.rb +30 -0
- data/lib/xapian_db/index_writers/beanstalk_writer.rb +47 -0
- data/lib/xapian_db/railtie.rb +14 -5
- data/tasks/beanstalk_worker.rake +35 -0
- metadata +7 -4
data/CHANGELOG.md
CHANGED
data/README.rdoc
CHANGED
@@ -58,8 +58,10 @@ A future release of xapian_db might include the Xapian binaries and make this st
|
|
58
58
|
make
|
59
59
|
sudo make install
|
60
60
|
|
61
|
-
|
62
|
-
|
61
|
+
For a first look, look at the examples in the examples folder. There's the simple ruby script basic.rb that shows the basic
|
62
|
+
usage of XapianDB without rails. In the basic_rails folder you'll find a very simple Rails app unsing XapianDb.
|
63
|
+
|
64
|
+
The following steps assume that you are using xapian_db within a Rails app.
|
63
65
|
|
64
66
|
=== Configure your databases
|
65
67
|
|
@@ -220,6 +222,59 @@ If you want to implement a simple drilldown for your searches, you can use a fac
|
|
220
222
|
|
221
223
|
Facet support in XapianDb is very limited. The only available facet is the class of the indexed objects. In many cases that's all that's needed. Therefore, it is very likely that I won't add more options for facets (since I'm not a fan of facets anyway). However, if you desperately need advanced facets, let me know. Or - even better - send me a pull request with a nice implementation ;-)
|
222
224
|
|
223
|
-
==
|
225
|
+
== Production setup
|
226
|
+
|
227
|
+
Since Xapian allows only one database instance to write to the index, the default setup of XapianDb will not work
|
228
|
+
with multiple app instances trying to write to the same database (you will get lock errors).
|
229
|
+
Therefore, XapianDb provides a solution based on beanstalk to overcome this.
|
230
|
+
|
231
|
+
=== 1. Install beanstalkd
|
232
|
+
|
233
|
+
Make sure you have the {beanstalk daemon}[http://kr.github.com/beanstalkd/] installed
|
234
|
+
|
235
|
+
==== OSX
|
236
|
+
|
237
|
+
The easiest way is to use macports or homebrew:
|
238
|
+
|
239
|
+
port install beanstalkd
|
240
|
+
brew install beanstalkd
|
241
|
+
|
242
|
+
==== Debian (Lenny)
|
243
|
+
|
244
|
+
# Add backports to /etc/apt/sources.list:
|
245
|
+
deb http://ftp.de.debian.org/debian-backports lenny-backports main contrib non-free
|
246
|
+
deb-src http://ftp.de.debian.org/debian-backports lenny-backports main contrib non-free
|
247
|
+
|
248
|
+
sudo apt-get update
|
249
|
+
sudo apt-get -t lenny-backports install libevent-1.4-2
|
250
|
+
sudo apt-get -t lenny-backports install libevent-dev
|
251
|
+
cd /tmp
|
252
|
+
curl http://xph.us/dist/beanstalkd/beanstalkd-1.4.6.tar.gz | tar zx
|
253
|
+
cd beanstalkd-1.4.6/
|
254
|
+
./configure
|
255
|
+
make
|
256
|
+
sudo make install
|
257
|
+
|
258
|
+
=== 2. Add the beanstalk-client gem to your config
|
259
|
+
|
260
|
+
gem 'beanstalk-client' # Add this to your Gemfile
|
261
|
+
bundle install
|
262
|
+
|
263
|
+
=== 3. Configure your production environment in config/xapian_db.yml
|
264
|
+
|
265
|
+
production:
|
266
|
+
database: db/xapian_db/production
|
267
|
+
writer: beanstalk
|
268
|
+
beanstalk_daemon: localhost:11300
|
269
|
+
|
270
|
+
=== 4. start the beanstalk daemon
|
271
|
+
|
272
|
+
beanstalk -d
|
273
|
+
|
274
|
+
=== 5. start the beanstalk worker from within your Rails app root directory
|
275
|
+
|
276
|
+
rake RAILS_ENV=production xapian_db:beanstalk_worker
|
277
|
+
|
278
|
+
<b>Important: Do not start multiple instances of this worker task!</b>
|
279
|
+
|
224
280
|
|
225
|
-
* asynchronous index writer based on {resque}[https://github.com/defunkt/resque] for production environments
|
data/lib/xapian_db.rb
CHANGED
@@ -86,7 +86,7 @@ module XapianDb
|
|
86
86
|
|
87
87
|
end
|
88
88
|
|
89
|
-
do_not_require = %w(update_stopwords.rb railtie.rb base_adapter.rb)
|
89
|
+
do_not_require = %w(update_stopwords.rb railtie.rb base_adapter.rb beanstalk_writer.rb)
|
90
90
|
files = Dir.glob("#{File.dirname(__FILE__)}/**/*.rb").reject{|path| do_not_require.include?(File.basename(path))}
|
91
91
|
# Require the base adapter first
|
92
92
|
require "#{File.dirname(__FILE__)}/xapian_db/adapters/base_adapter"
|
@@ -94,3 +94,6 @@ files.each {|file| require file}
|
|
94
94
|
|
95
95
|
# Configure XapianDB if we are in a Rails app
|
96
96
|
require File.dirname(__FILE__) + '/xapian_db/railtie' if defined?(Rails)
|
97
|
+
|
98
|
+
# Require the beanstalk writer is beanstalk-client is installed
|
99
|
+
require File.dirname(__FILE__) + '/xapian_db/index_writers/beanstalk_writer' if Gem.available?('beanstalk-client')
|
data/lib/xapian_db/config.rb
CHANGED
@@ -35,6 +35,13 @@ module XapianDb
|
|
35
35
|
@config.nil? ? nil : @config.instance_variable_get("@_#{attr}")
|
36
36
|
end
|
37
37
|
end
|
38
|
+
|
39
|
+
# The beanstalk daemon url
|
40
|
+
define_method :beanstalk_daemon_url do
|
41
|
+
default_url = "localhost:11300"
|
42
|
+
return default_url if @config.nil?
|
43
|
+
@config.instance_variable_get("@_beanstalk_daemon_url") || default_url
|
44
|
+
end
|
38
45
|
end
|
39
46
|
|
40
47
|
# ---------------------------------------------------------------------------------
|
@@ -42,7 +49,7 @@ module XapianDb
|
|
42
49
|
# ---------------------------------------------------------------------------------
|
43
50
|
|
44
51
|
#
|
45
|
-
attr_reader :_database, :_adapter, :_writer, :_stemmer, :_stopper
|
52
|
+
attr_reader :_database, :_adapter, :_writer, :_beanstalk_daemon, :_stemmer, :_stopper
|
46
53
|
|
47
54
|
# Set the global database to use
|
48
55
|
# @param [String] path The path to the database. Either apply a file sytem path or :memory
|
@@ -81,12 +88,18 @@ module XapianDb
|
|
81
88
|
# Set the index writer
|
82
89
|
# @param [Symbol] type The writer type; the following adapters are available:
|
83
90
|
# - :direct ({XapianDb::IndexWriters::DirectWriter})
|
84
|
-
#
|
91
|
+
# - :beanstalk ({XapianDb::IndexWriters::BeanstalkWriter})
|
85
92
|
def writer(type)
|
86
93
|
# We try to guess the writer name
|
87
94
|
@_writer = XapianDb::IndexWriters.const_get("#{camelize(type.to_s)}Writer")
|
88
95
|
end
|
89
96
|
|
97
|
+
# Set the url and port of the beanstalk daemon
|
98
|
+
# @param [Symbol] url The url of the beanstalk daemon; defaults to localhost:11300
|
99
|
+
def beanstalk_daemon_url(url)
|
100
|
+
@_beanstalk_daemon_url = url
|
101
|
+
end
|
102
|
+
|
90
103
|
# Set the language.
|
91
104
|
# @param [Symbol] lang The language; apply the two letter ISO639 code for the language
|
92
105
|
# @example
|
data/lib/xapian_db/database.rb
CHANGED
@@ -65,6 +65,8 @@ module XapianDb
|
|
65
65
|
# If we do not have a valid query we return an empty result set
|
66
66
|
return Resultset.new(nil, opts) unless query
|
67
67
|
|
68
|
+
Rails.logger.info "Executing XapianDb search: #{expression}" if defined?(Rails)
|
69
|
+
|
68
70
|
enquiry = Xapian::Enquire.new(reader)
|
69
71
|
enquiry.query = query
|
70
72
|
if opts[:sort_indices]
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module XapianDb
|
4
|
+
module IndexWriters
|
5
|
+
|
6
|
+
# Worker to update the Xapian index; the worker is used in the beanstalk worker rake task
|
7
|
+
# and uses the DirectWriter to do the real work
|
8
|
+
# @author Gernot Kogler
|
9
|
+
class BeanstalkWorker
|
10
|
+
|
11
|
+
def index_task(options)
|
12
|
+
klass = Kernel.const_get options[:class]
|
13
|
+
obj = klass.respond_to?(:get) ? klass.get(options[:id].to_i) : klass.find(options[:id].to_i)
|
14
|
+
DirectWriter.index obj
|
15
|
+
end
|
16
|
+
|
17
|
+
def unindex_task(options)
|
18
|
+
klass = Kernel.const_get options[:class]
|
19
|
+
obj = klass.respond_to?(:get) ? klass.get(options[:id].to_i) : klass.find(options[:id].to_i)
|
20
|
+
DirectWriter.unindex obj
|
21
|
+
end
|
22
|
+
|
23
|
+
def reindex_class_task(options)
|
24
|
+
klass = Kernel.const_get options[:class]
|
25
|
+
DirectWriter.reindex_class klass, :verbose => false
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# This writer puts reindex requests into a stalker queue. If you want
|
4
|
+
# to use this writer, you must install beanstalkd and the stalker gem.
|
5
|
+
# This writer can only be used inside a Rails app.
|
6
|
+
# See https://github.com/adamwiggins/stalker for more info
|
7
|
+
# @author Gernot Kogler
|
8
|
+
|
9
|
+
require 'beanstalk-client'
|
10
|
+
|
11
|
+
module XapianDb
|
12
|
+
module IndexWriters
|
13
|
+
|
14
|
+
class BeanstalkWriter
|
15
|
+
|
16
|
+
class << self
|
17
|
+
|
18
|
+
# Update an object in the index
|
19
|
+
# @param [Object] obj An instance of a class with a blueprint configuration
|
20
|
+
def index(obj)
|
21
|
+
beanstalk.put({:task => "index_task", :class => obj.class.name, :id => obj.id}.to_yaml)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Remove an object from the index
|
25
|
+
# @param [Object] obj An instance of a class with a blueprint configuration
|
26
|
+
def unindex(obj)
|
27
|
+
beanstalk.put({:task => "unindex_task", :class => obj.class.name, :id => obj.id}.to_yaml)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Reindex all objects of a given class
|
31
|
+
# @param [Class] klass The class to reindex
|
32
|
+
def reindex_class(klass, options={})
|
33
|
+
beanstalk.put({:task => "reindex_class_task", :class => klass.name}.to_yaml)
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def beanstalk
|
39
|
+
@beanstalk ||= Beanstalk::Pool.new([XapianDb::Config.beanstalk_daemon_url])
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
data/lib/xapian_db/railtie.rb
CHANGED
@@ -9,6 +9,11 @@ module XapianDb
|
|
9
9
|
# @author Gernot Kogler
|
10
10
|
class Railtie < ::Rails::Railtie
|
11
11
|
|
12
|
+
# Require our rake tasks
|
13
|
+
rake_tasks do
|
14
|
+
load "#{File.dirname(__FILE__)}/../../tasks/beanstalk_worker.rake"
|
15
|
+
end
|
16
|
+
|
12
17
|
config.before_configuration do
|
13
18
|
|
14
19
|
# Read the database configuration file if there is one
|
@@ -16,14 +21,16 @@ module XapianDb
|
|
16
21
|
if File.exist?(config_file_path)
|
17
22
|
db_config = YAML::load_file config_file_path
|
18
23
|
env_config = db_config[Rails.env]
|
19
|
-
database_path
|
20
|
-
adapter
|
21
|
-
writer
|
24
|
+
database_path = env_config["database"] || ":memory:"
|
25
|
+
adapter = env_config["adapter"] || :active_record
|
26
|
+
writer = env_config["writer"] || :direct
|
27
|
+
beanstalk_daemon = env_config["beanstalk_daemon"]
|
22
28
|
else
|
23
29
|
# No config file, set the defaults
|
24
30
|
Rails.env == "test" ? database_path = ":memory:" : database_path = "db/xapian_db/#{Rails.env}"
|
25
|
-
adapter
|
26
|
-
writer
|
31
|
+
adapter = :active_record
|
32
|
+
writer = :direct
|
33
|
+
beanstalk_daemon = nil
|
27
34
|
end
|
28
35
|
|
29
36
|
# Do the configuration
|
@@ -35,12 +42,14 @@ module XapianDb
|
|
35
42
|
end
|
36
43
|
config.adapter adapter.to_sym
|
37
44
|
config.writer writer.to_sym
|
45
|
+
config.beanstalk_daemon_url beanstalk_daemon
|
38
46
|
config.language(env_config["language"]) if env_config && env_config["language"]
|
39
47
|
end
|
40
48
|
|
41
49
|
end
|
42
50
|
|
43
51
|
config.to_prepare do
|
52
|
+
|
44
53
|
# Load a blueprint config if there is one
|
45
54
|
blueprints_file_path = "#{Rails.root}/config/xapian_blueprints.rb"
|
46
55
|
load blueprints_file_path if File.exist?(blueprints_file_path)
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# This rake task is built to run within a Rails application and is the
|
4
|
+
# backend worker to serialize the index updates to a xapian database.
|
5
|
+
# Remember to install beanstalkd and configure the beanstalk-client gem
|
6
|
+
# in your Gemfile
|
7
|
+
|
8
|
+
require "#{Rails.root}/config/environment"
|
9
|
+
require "xapian_db"
|
10
|
+
require 'yaml'
|
11
|
+
|
12
|
+
namespace :xapian_db do
|
13
|
+
|
14
|
+
desc "Run the beanstalk worker process to update the xapian index"
|
15
|
+
task :beanstalk_worker do
|
16
|
+
|
17
|
+
url = XapianDb::Config.beanstalk_daemon_url
|
18
|
+
beanstalk = Beanstalk::Pool.new([url])
|
19
|
+
worker = XapianDb::IndexWriters::BeanstalkWorker.new
|
20
|
+
puts "XapianDb beanstalk worker is serving on #{url}..."
|
21
|
+
loop do
|
22
|
+
job = beanstalk.reserve
|
23
|
+
begin
|
24
|
+
params = YAML::load job.body
|
25
|
+
Rails.logger.info "XapianDb beanstalk worker: executing task #{params}"
|
26
|
+
task = params.delete :task
|
27
|
+
worker.send task, params
|
28
|
+
rescue Exception => ex
|
29
|
+
Rails.logger.error "XapianDb beanstalk worker: could not process #{job.body} (#{ex})"
|
30
|
+
end
|
31
|
+
job.delete
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 5
|
8
|
+
- 0
|
9
|
+
version: 0.5.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Gernot Kogler
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-12-
|
17
|
+
date: 2010-12-19 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -63,6 +63,8 @@ files:
|
|
63
63
|
- lib/xapian_db/config.rb
|
64
64
|
- lib/xapian_db/database.rb
|
65
65
|
- lib/xapian_db/document_blueprint.rb
|
66
|
+
- lib/xapian_db/index_writers/beanstalk_worker.rb
|
67
|
+
- lib/xapian_db/index_writers/beanstalk_writer.rb
|
66
68
|
- lib/xapian_db/index_writers/direct_writer.rb
|
67
69
|
- lib/xapian_db/indexer.rb
|
68
70
|
- lib/xapian_db/query_parser.rb
|
@@ -86,6 +88,7 @@ files:
|
|
86
88
|
- lib/xapian_db/stopwords/sv.txt
|
87
89
|
- lib/xapian_db/stopwords/update_stopwords.rb
|
88
90
|
- lib/xapian_db.rb
|
91
|
+
- tasks/beanstalk_worker.rake
|
89
92
|
- LICENSE
|
90
93
|
- README.rdoc
|
91
94
|
- CHANGELOG.md
|