stellr 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +36 -0
- data/README.txt +109 -0
- data/Rakefile +28 -0
- data/bin/stellr +64 -0
- data/bin/stellr-search +50 -0
- data/config/stellr.yml +8 -0
- data/lib/stellr.rb +37 -0
- data/lib/stellr/client.rb +78 -0
- data/lib/stellr/collections.rb +6 -0
- data/lib/stellr/collections/base.rb +79 -0
- data/lib/stellr/collections/multi_collection.rb +32 -0
- data/lib/stellr/collections/rsync.rb +38 -0
- data/lib/stellr/collections/searchable_collection.rb +166 -0
- data/lib/stellr/collections/static.rb +97 -0
- data/lib/stellr/collections/writeable_collection.rb +119 -0
- data/lib/stellr/config.rb +88 -0
- data/lib/stellr/search.rb +2 -0
- data/lib/stellr/search/search_result.rb +21 -0
- data/lib/stellr/search/search_results.rb +50 -0
- data/lib/stellr/server.rb +166 -0
- data/lib/stellr/strategies.rb +4 -0
- data/lib/stellr/strategies/base.rb +16 -0
- data/lib/stellr/strategies/blocking.rb +13 -0
- data/lib/stellr/strategies/queueing.rb +78 -0
- data/lib/stellr/utils.rb +24 -0
- data/lib/stellr/utils/observable.rb +20 -0
- data/lib/stellr/utils/shutdown.rb +30 -0
- data/test/fixtures/movies.yml +4 -0
- data/test/stellr_test.rb +38 -0
- data/test/test_client.rb +27 -0
- data/test/test_collections_base.rb +25 -0
- data/test/test_helper.rb +1 -0
- data/test/test_rsync_collection.rb +72 -0
- data/test/test_server.rb +94 -0
- data/test/test_static_collection.rb +40 -0
- data/test/test_stellr.rb +11 -0
- metadata +110 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.txt
|
4
|
+
Rakefile
|
5
|
+
bin/stellr
|
6
|
+
bin/stellr-search
|
7
|
+
config/stellr.yml
|
8
|
+
lib/stellr.rb
|
9
|
+
lib/stellr/client.rb
|
10
|
+
lib/stellr/collections.rb
|
11
|
+
lib/stellr/collections/base.rb
|
12
|
+
lib/stellr/collections/searchable_collection.rb
|
13
|
+
lib/stellr/collections/writeable_collection.rb
|
14
|
+
lib/stellr/collections/rsync.rb
|
15
|
+
lib/stellr/collections/static.rb
|
16
|
+
lib/stellr/collections/multi_collection.rb
|
17
|
+
lib/stellr/config.rb
|
18
|
+
lib/stellr/search.rb
|
19
|
+
lib/stellr/search/search_result.rb
|
20
|
+
lib/stellr/search/search_results.rb
|
21
|
+
lib/stellr/server.rb
|
22
|
+
lib/stellr/strategies.rb
|
23
|
+
lib/stellr/strategies/base.rb
|
24
|
+
lib/stellr/strategies/blocking.rb
|
25
|
+
lib/stellr/strategies/queueing.rb
|
26
|
+
lib/stellr/utils.rb
|
27
|
+
lib/stellr/utils/shutdown.rb
|
28
|
+
lib/stellr/utils/observable.rb
|
29
|
+
test/fixtures/movies.yml
|
30
|
+
test/stellr_test.rb
|
31
|
+
test/test_collections_base.rb
|
32
|
+
test/test_helper.rb
|
33
|
+
test/test_rsync_collection.rb
|
34
|
+
test/test_server.rb
|
35
|
+
test/test_static_collection.rb
|
36
|
+
test/test_stellr.rb
|
data/README.txt
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
stellr
|
2
|
+
by Jens Kraemer and Benjamin Krause
|
3
|
+
http://rubyforge.org/projects/stellr
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Stellr is a Ferret based standalone search server featuring a DRB and (soon to come) an http frontend. It can handle multiple indexes, including multi-index searches. A client library and a simple command line query tool are included with the gem.
|
8
|
+
|
9
|
+
== FEATURES:
|
10
|
+
|
11
|
+
* DRb frontend
|
12
|
+
* easy to use client library (see below)
|
13
|
+
* multi index search
|
14
|
+
* Index rotation
|
15
|
+
Stellr always keeps two versions of your index around - one is used in a
|
16
|
+
multi threaded, read only way to handle incoming search requests, while the
|
17
|
+
other one is written to when you index something.
|
18
|
+
Using the switch function you may decide when to switch over searching from
|
19
|
+
the old index to the new one. Then, changes will be synced, and searches will
|
20
|
+
see the new or updated data from before the switch call.
|
21
|
+
* Index synchronization
|
22
|
+
Two kinds of synchronization methods are supported for now: rsync, using rsync
|
23
|
+
two copy over the changes from one index to the other, and static, which will
|
24
|
+
completely replace the old index with the new one. While the latter is suitable
|
25
|
+
for indexes which you rebuild completely from time to time, the former is good
|
26
|
+
for large indexes that are updated frequently or that are too large for frequent
|
27
|
+
rebuilds.
|
28
|
+
|
29
|
+
== SYNOPSIS:
|
30
|
+
|
31
|
+
* start the server:
|
32
|
+
|
33
|
+
stellr -c /path/to/config.yml start
|
34
|
+
|
35
|
+
* index something
|
36
|
+
|
37
|
+
require 'stellr/client'
|
38
|
+
stellr = Stellr::Client.new('druby://localhost:9010')
|
39
|
+
config = {
|
40
|
+
:collection => :static, # static collections are rebuilt from scratch everytime changes occur
|
41
|
+
# :collection => :rsync, # use the rsync collection for indexes that are updated frequently and/or are
|
42
|
+
# too large to be rebuilt from scratch every time they're updated
|
43
|
+
:analyzer => 'My::Analyzer',
|
44
|
+
:fields => {
|
45
|
+
:title => { :boost => 5, :store => :yes },
|
46
|
+
:content => { :store => :no }
|
47
|
+
}
|
48
|
+
}
|
49
|
+
collection = stellr.connect('my_collection', config)
|
50
|
+
collection.add_record(:id => 1, :title => 'Some Title', :content => 'Content')
|
51
|
+
collection.switch #
|
52
|
+
|
53
|
+
* command line search
|
54
|
+
|
55
|
+
stellr-search my_collection 'query string'
|
56
|
+
|
57
|
+
for now, this will display the first 10 hits only.
|
58
|
+
|
59
|
+
* search via the client library
|
60
|
+
|
61
|
+
require 'stellr/client'
|
62
|
+
stellr = Stellr::Client.new('druby://localhost:9010')
|
63
|
+
collection = stellr.connect 'my_collection',
|
64
|
+
:analyzer => 'My::Analyzer' # the analyzer to use for query parsing
|
65
|
+
results = collection.search 'querystring',
|
66
|
+
:page => 1, :per_page => 100, # built in pagination support
|
67
|
+
:fields => [:title, :content], # the fields to search in
|
68
|
+
:get_fields => [ :title ] # the fields to fetch for result display
|
69
|
+
|
70
|
+
* multi-collection search
|
71
|
+
pass an array of collection names to search multiple
|
72
|
+
collections at once:
|
73
|
+
|
74
|
+
collection = stellr.connect [ 'my_collection', 'another_collection' ],
|
75
|
+
:analyzer => 'My::Analyzer'
|
76
|
+
|
77
|
+
== REQUIREMENTS:
|
78
|
+
|
79
|
+
* Ferret (gem install ferret)
|
80
|
+
* daemons (gem install daemons)
|
81
|
+
|
82
|
+
== INSTALL:
|
83
|
+
|
84
|
+
* sudo gem install stellr
|
85
|
+
|
86
|
+
== LICENSE:
|
87
|
+
|
88
|
+
(The MIT License)
|
89
|
+
|
90
|
+
Copyright (c) 2007 FIX
|
91
|
+
|
92
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
93
|
+
a copy of this software and associated documentation files (the
|
94
|
+
'Software'), to deal in the Software without restriction, including
|
95
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
96
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
97
|
+
permit persons to whom the Software is furnished to do so, subject to
|
98
|
+
the following conditions:
|
99
|
+
|
100
|
+
The above copyright notice and this permission notice shall be
|
101
|
+
included in all copies or substantial portions of the Software.
|
102
|
+
|
103
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
104
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
105
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
106
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
107
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
108
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
109
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hoe'
|
3
|
+
$:.unshift 'lib'
|
4
|
+
require './lib/stellr.rb'
|
5
|
+
|
6
|
+
class Hoe
|
7
|
+
def extra_deps
|
8
|
+
@extra_deps.reject do |x|
|
9
|
+
Array(x).first == 'hoe'
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
Hoe.new('stellr', Stellr::VERSION) do |p|
|
15
|
+
p.rubyforge_name = 'stellr'
|
16
|
+
p.author = [ 'Benjamin Krause', 'Jens Krämer' ]
|
17
|
+
p.email = [ 'bk@benjaminkrause.com', 'jk@jkraemer.net' ]
|
18
|
+
p.summary = 'Stellr is a Ferret based standalone search server.'
|
19
|
+
p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
|
20
|
+
p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
|
21
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
22
|
+
p.extra_deps << [ 'ferret', '>= 0.11.6', 'daemons', '>= 1.0.10', 'fastthread', '>= 1.0' ]
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "Release and publish documentation"
|
26
|
+
task :repubdoc => [:release, :publish_docs]
|
27
|
+
|
28
|
+
# vim:syntax=ruby
|
data/bin/stellr
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'optparse'
|
5
|
+
require 'daemons'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
begin
|
9
|
+
gem 'stellr'
|
10
|
+
rescue Gem::LoadError
|
11
|
+
$:.unshift(File.dirname(__FILE__), "../lib")
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'stellr'
|
15
|
+
|
16
|
+
include Stellr
|
17
|
+
|
18
|
+
commands = %w( start stop restart status )
|
19
|
+
|
20
|
+
options = OptionParser.new do |opts|
|
21
|
+
|
22
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options] #{commands.join('|')}"
|
23
|
+
|
24
|
+
opts.on("-c", "--config CONFIG", "config file location") do |value|
|
25
|
+
@config_file = value
|
26
|
+
end
|
27
|
+
|
28
|
+
opts.on("-h", "--help", "show this help") do
|
29
|
+
@help = true
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
cmdline = options.parse( ARGV )
|
34
|
+
command = cmdline.last
|
35
|
+
|
36
|
+
if @help or not commands.include?( command )
|
37
|
+
puts options.to_s
|
38
|
+
else
|
39
|
+
config = Stellr::Config.new @config_file
|
40
|
+
FileUtils.mkdir_p config.tmp_dir
|
41
|
+
FileUtils.mkdir_p config.log_dir
|
42
|
+
|
43
|
+
Daemons.run_proc( "stellr", :dir => config.tmp_dir,
|
44
|
+
:dir_mode => :normal,
|
45
|
+
:log_output => true,
|
46
|
+
:ARGV => [ command ] ) do
|
47
|
+
Stellr::start_server config
|
48
|
+
end
|
49
|
+
|
50
|
+
if command == "start"
|
51
|
+
sleep 2
|
52
|
+
pid = Daemons::PidFile.new( config.tmp_dir, 'stellr' )
|
53
|
+
if pid.exist? and Daemons::Pid.running?(pid.pid)
|
54
|
+
exit 0
|
55
|
+
else
|
56
|
+
puts "not started. Watch #{config.tmp_dir}/stellr.output for errors."
|
57
|
+
exit 1
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# FIXME how to correctly shut down the running server so indexes are closed
|
62
|
+
# and such?
|
63
|
+
# look for some hook provided by daemon tools, or trap SIGKILL
|
64
|
+
end
|
data/bin/stellr-search
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
begin
|
7
|
+
gem 'stellr'
|
8
|
+
rescue Gem::LoadError
|
9
|
+
$:.unshift(File.dirname(__FILE__), "../lib")
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'stellr'
|
13
|
+
require 'stellr/client'
|
14
|
+
|
15
|
+
options = OptionParser.new do |opts|
|
16
|
+
|
17
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options] collection query"
|
18
|
+
|
19
|
+
opts.on("-c", "--config CONFIG", "config file location") do |value|
|
20
|
+
@config_file = value
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on("-h", "--help", "show this help") do
|
24
|
+
@help = true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
cmdline = options.parse( ARGV )
|
29
|
+
query = cmdline.pop rescue nil
|
30
|
+
collection_name = cmdline.pop
|
31
|
+
|
32
|
+
if @help
|
33
|
+
puts options.to_s
|
34
|
+
else
|
35
|
+
config = Stellr::Config.new @config_file
|
36
|
+
client = Stellr::Client.new config.drb_uri
|
37
|
+
collection = client.connect collection_name
|
38
|
+
puts "searching collection #{collection_name} for >#{query}< ..."
|
39
|
+
results = collection.search query, :limit => 10
|
40
|
+
if results.any?
|
41
|
+
puts "showing first 10 of #{results.total_hits} results"
|
42
|
+
results.each do |r|
|
43
|
+
puts r[:id]
|
44
|
+
end
|
45
|
+
else
|
46
|
+
puts "no results for >#{query}<"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
|
data/config/stellr.yml
ADDED
data/lib/stellr.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'thread'
|
3
|
+
begin
|
4
|
+
require 'fastthread'
|
5
|
+
rescue LoadError
|
6
|
+
puts "couldn't load fastthread"
|
7
|
+
end
|
8
|
+
require 'drb'
|
9
|
+
require 'monitor'
|
10
|
+
require 'ferret'
|
11
|
+
require 'stellr/utils'
|
12
|
+
require 'stellr/server'
|
13
|
+
require 'stellr/config'
|
14
|
+
require 'stellr/collections'
|
15
|
+
require 'stellr/strategies'
|
16
|
+
require 'stellr/search'
|
17
|
+
|
18
|
+
$SAFE = 1
|
19
|
+
|
20
|
+
module Stellr
|
21
|
+
VERSION = '0.1.0'
|
22
|
+
|
23
|
+
def self.start_server( config )
|
24
|
+
if config.script
|
25
|
+
begin
|
26
|
+
load config.script
|
27
|
+
rescue Exception => e
|
28
|
+
puts "\nerror loading script #{config.script}: #{e}\n#{e.backtrace.join("\n")}"
|
29
|
+
exit 1
|
30
|
+
end
|
31
|
+
end
|
32
|
+
stellr = Server.new config
|
33
|
+
server = DRb.start_service config.drb_uri, stellr
|
34
|
+
server.thread.join
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'drb'
|
2
|
+
require 'stellr/search'
|
3
|
+
|
4
|
+
module Stellr
|
5
|
+
|
6
|
+
# = Stellr client
|
7
|
+
#
|
8
|
+
# This class acts as a wrapper around the connection to a Stellr server.
|
9
|
+
# Typical usage looks like this:
|
10
|
+
#
|
11
|
+
# stellr = Stellr::Client.new('druby://myserver.com:9000')
|
12
|
+
# collection = stellr.connect('myindex', :fields => { :content => { :store => :yes } })
|
13
|
+
# collection << { :content => 'lorem ipsum' }
|
14
|
+
# collection.switch
|
15
|
+
# results = collection.search('lorem', :page => 1, :per_page => 10)
|
16
|
+
#
|
17
|
+
class Client
|
18
|
+
|
19
|
+
def initialize( drb_uri )
|
20
|
+
@server = DRbObject.new(nil, drb_uri)
|
21
|
+
end
|
22
|
+
|
23
|
+
# connects to a remote collection and returns a stub that can be used to
|
24
|
+
# add records to the collection and to search for them.
|
25
|
+
#
|
26
|
+
# specify an array of collection names for the first argument to search
|
27
|
+
# multiple connections at once. No indexing is possible with such a
|
28
|
+
# MultiCollection.
|
29
|
+
def connect( collection_name, options = nil )
|
30
|
+
if Array === collection_name
|
31
|
+
multi_connect collection_name, options
|
32
|
+
else
|
33
|
+
@server.register collection_name, options
|
34
|
+
ClientCollection.new @server, collection_name
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Connects to multiple remote collections at once on order to run
|
39
|
+
# cross-collection searches.
|
40
|
+
def multi_connect( collection_names, options = {} )
|
41
|
+
MultiCollection.new @server, collection_names, options
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
# Wrapper around a remote collection.
|
47
|
+
#
|
48
|
+
# See the documentation of the collection class you use for more information.
|
49
|
+
class ClientCollection
|
50
|
+
|
51
|
+
def initialize( server, name )
|
52
|
+
@name = name
|
53
|
+
@server = server
|
54
|
+
end
|
55
|
+
|
56
|
+
# Disconnects this collection from the server.
|
57
|
+
def disconnect
|
58
|
+
@server = nil
|
59
|
+
end
|
60
|
+
|
61
|
+
def method_missing( method, *args )
|
62
|
+
raise "use of disconnected collection" if @server.nil?
|
63
|
+
@server.send method, @name, *args
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
# This client collection class allows to search multiple server side
|
69
|
+
# collections at once.
|
70
|
+
class MultiCollection < ClientCollection
|
71
|
+
def initialize( server, names, options = {} )
|
72
|
+
@server = server
|
73
|
+
@name = @server.register_multi_collection names, options
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Collections
|
3
|
+
|
4
|
+
# Base class for collection implementations
|
5
|
+
class Base
|
6
|
+
include Ferret::Index
|
7
|
+
include Stellr::Utils::Shutdown
|
8
|
+
include Stellr::Utils::Observable
|
9
|
+
attr_reader :name
|
10
|
+
|
11
|
+
def self.create( name, options )
|
12
|
+
collection_class = collection_class_for_options options
|
13
|
+
collection = collection_class.new( name, options )
|
14
|
+
if strategy_class = strategy_class_for_options( options )
|
15
|
+
strategy_class.new( collection, options )
|
16
|
+
else
|
17
|
+
collection
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize( name, options )
|
22
|
+
@logger = options[:logger] || (require 'logger'; Logger.new 'stellr.log')
|
23
|
+
@name = name
|
24
|
+
@options = options.dup
|
25
|
+
end
|
26
|
+
|
27
|
+
# called whenever the strategy thinks it's a good time do do something
|
28
|
+
# timeconsuming (like switching indexes, optimizing, flushing, ...)
|
29
|
+
def batch_finished
|
30
|
+
end
|
31
|
+
|
32
|
+
def on_shutdown( mode )
|
33
|
+
close
|
34
|
+
end
|
35
|
+
|
36
|
+
# close this collection
|
37
|
+
def close
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
|
42
|
+
|
43
|
+
def collection_directory
|
44
|
+
@options[:path]
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def self.collection_class_for_options( options )
|
49
|
+
if (c = options.delete(:collection))
|
50
|
+
options[:collection_class] = collection_class_for_key c
|
51
|
+
end
|
52
|
+
Object.module_eval("::#{options[:collection_class] || 'Stellr::Collections::RSync'}", __FILE__, __LINE__)
|
53
|
+
end
|
54
|
+
def self.collection_class_for_key(key)
|
55
|
+
case key
|
56
|
+
when :static
|
57
|
+
'Stellr::Collections::Static'
|
58
|
+
when :rsync
|
59
|
+
'Stellr::Collections::RSync'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.strategy_class_for_options( options )
|
64
|
+
if (c = options.delete(:strategy))
|
65
|
+
options[:strategy_class] = strategy_class_for_key c
|
66
|
+
end
|
67
|
+
Object.module_eval("::#{options[:strategy_class]}", __FILE__, __LINE__) if options[:strategy_class]
|
68
|
+
end
|
69
|
+
def self.strategy_class_for_key(key)
|
70
|
+
case key
|
71
|
+
when :queueing
|
72
|
+
'Stellr::Strategies::Queueing'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|