stellr 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +36 -0
- data/README.txt +109 -0
- data/Rakefile +28 -0
- data/bin/stellr +64 -0
- data/bin/stellr-search +50 -0
- data/config/stellr.yml +8 -0
- data/lib/stellr.rb +37 -0
- data/lib/stellr/client.rb +78 -0
- data/lib/stellr/collections.rb +6 -0
- data/lib/stellr/collections/base.rb +79 -0
- data/lib/stellr/collections/multi_collection.rb +32 -0
- data/lib/stellr/collections/rsync.rb +38 -0
- data/lib/stellr/collections/searchable_collection.rb +166 -0
- data/lib/stellr/collections/static.rb +97 -0
- data/lib/stellr/collections/writeable_collection.rb +119 -0
- data/lib/stellr/config.rb +88 -0
- data/lib/stellr/search.rb +2 -0
- data/lib/stellr/search/search_result.rb +21 -0
- data/lib/stellr/search/search_results.rb +50 -0
- data/lib/stellr/server.rb +166 -0
- data/lib/stellr/strategies.rb +4 -0
- data/lib/stellr/strategies/base.rb +16 -0
- data/lib/stellr/strategies/blocking.rb +13 -0
- data/lib/stellr/strategies/queueing.rb +78 -0
- data/lib/stellr/utils.rb +24 -0
- data/lib/stellr/utils/observable.rb +20 -0
- data/lib/stellr/utils/shutdown.rb +30 -0
- data/test/fixtures/movies.yml +4 -0
- data/test/stellr_test.rb +38 -0
- data/test/test_client.rb +27 -0
- data/test/test_collections_base.rb +25 -0
- data/test/test_helper.rb +1 -0
- data/test/test_rsync_collection.rb +72 -0
- data/test/test_server.rb +94 -0
- data/test/test_static_collection.rb +40 -0
- data/test/test_stellr.rb +11 -0
- metadata +110 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.txt
|
4
|
+
Rakefile
|
5
|
+
bin/stellr
|
6
|
+
bin/stellr-search
|
7
|
+
config/stellr.yml
|
8
|
+
lib/stellr.rb
|
9
|
+
lib/stellr/client.rb
|
10
|
+
lib/stellr/collections.rb
|
11
|
+
lib/stellr/collections/base.rb
|
12
|
+
lib/stellr/collections/searchable_collection.rb
|
13
|
+
lib/stellr/collections/writeable_collection.rb
|
14
|
+
lib/stellr/collections/rsync.rb
|
15
|
+
lib/stellr/collections/static.rb
|
16
|
+
lib/stellr/collections/multi_collection.rb
|
17
|
+
lib/stellr/config.rb
|
18
|
+
lib/stellr/search.rb
|
19
|
+
lib/stellr/search/search_result.rb
|
20
|
+
lib/stellr/search/search_results.rb
|
21
|
+
lib/stellr/server.rb
|
22
|
+
lib/stellr/strategies.rb
|
23
|
+
lib/stellr/strategies/base.rb
|
24
|
+
lib/stellr/strategies/blocking.rb
|
25
|
+
lib/stellr/strategies/queueing.rb
|
26
|
+
lib/stellr/utils.rb
|
27
|
+
lib/stellr/utils/shutdown.rb
|
28
|
+
lib/stellr/utils/observable.rb
|
29
|
+
test/fixtures/movies.yml
|
30
|
+
test/stellr_test.rb
|
31
|
+
test/test_collections_base.rb
|
32
|
+
test/test_helper.rb
|
33
|
+
test/test_rsync_collection.rb
|
34
|
+
test/test_server.rb
|
35
|
+
test/test_static_collection.rb
|
36
|
+
test/test_stellr.rb
|
data/README.txt
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
stellr
|
2
|
+
by Jens Kraemer and Benjamin Krause
|
3
|
+
http://rubyforge.org/projects/stellr
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Stellr is a Ferret based standalone search server featuring a DRB and (soon to come) an http frontend. It can handle multiple indexes, including multi-index searches. A client library and a simple command line query tool are included with the gem.
|
8
|
+
|
9
|
+
== FEATURES:
|
10
|
+
|
11
|
+
* DRb frontend
|
12
|
+
* easy to use client library (see below)
|
13
|
+
* multi index search
|
14
|
+
* Index rotation
|
15
|
+
Stellr always keeps two versions of your index around - one is used in a
|
16
|
+
multi threaded, read only way to handle incoming search requests, while the
|
17
|
+
other one is written to when you index something.
|
18
|
+
Using the switch function you may decide when to switch over searching from
|
19
|
+
the old index to the new one. Then, changes will be synced, and searches will
|
20
|
+
see the new or updated data from before the switch call.
|
21
|
+
* Index synchronization
|
22
|
+
Two kinds of synchronization methods are supported for now: rsync, using rsync
|
23
|
+
two copy over the changes from one index to the other, and static, which will
|
24
|
+
completely replace the old index with the new one. While the latter is suitable
|
25
|
+
for indexes which you rebuild completely from time to time, the former is good
|
26
|
+
for large indexes that are updated frequently or that are too large for frequent
|
27
|
+
rebuilds.
|
28
|
+
|
29
|
+
== SYNOPSIS:
|
30
|
+
|
31
|
+
* start the server:
|
32
|
+
|
33
|
+
stellr -c /path/to/config.yml start
|
34
|
+
|
35
|
+
* index something
|
36
|
+
|
37
|
+
require 'stellr/client'
|
38
|
+
stellr = Stellr::Client.new('druby://localhost:9010')
|
39
|
+
config = {
|
40
|
+
:collection => :static, # static collections are rebuilt from scratch everytime changes occur
|
41
|
+
# :collection => :rsync, # use the rsync collection for indexes that are updated frequently and/or are
|
42
|
+
# too large to be rebuilt from scratch every time they're updated
|
43
|
+
:analyzer => 'My::Analyzer',
|
44
|
+
:fields => {
|
45
|
+
:title => { :boost => 5, :store => :yes },
|
46
|
+
:content => { :store => :no }
|
47
|
+
}
|
48
|
+
}
|
49
|
+
collection = stellr.connect('my_collection', config)
|
50
|
+
collection.add_record(:id => 1, :title => 'Some Title', :content => 'Content')
|
51
|
+
collection.switch #
|
52
|
+
|
53
|
+
* command line search
|
54
|
+
|
55
|
+
stellr-search my_collection 'query string'
|
56
|
+
|
57
|
+
for now, this will display the first 10 hits only.
|
58
|
+
|
59
|
+
* search via the client library
|
60
|
+
|
61
|
+
require 'stellr/client'
|
62
|
+
stellr = Stellr::Client.new('druby://localhost:9010')
|
63
|
+
collection = stellr.connect 'my_collection',
|
64
|
+
:analyzer => 'My::Analyzer' # the analyzer to use for query parsing
|
65
|
+
results = collection.search 'querystring',
|
66
|
+
:page => 1, :per_page => 100, # built in pagination support
|
67
|
+
:fields => [:title, :content], # the fields to search in
|
68
|
+
:get_fields => [ :title ] # the fields to fetch for result display
|
69
|
+
|
70
|
+
* multi-collection search
|
71
|
+
pass an array of collection names to search multiple
|
72
|
+
collections at once:
|
73
|
+
|
74
|
+
collection = stellr.connect [ 'my_collection', 'another_collection' ],
|
75
|
+
:analyzer => 'My::Analyzer'
|
76
|
+
|
77
|
+
== REQUIREMENTS:
|
78
|
+
|
79
|
+
* Ferret (gem install ferret)
|
80
|
+
* daemons (gem install daemons)
|
81
|
+
|
82
|
+
== INSTALL:
|
83
|
+
|
84
|
+
* sudo gem install stellr
|
85
|
+
|
86
|
+
== LICENSE:
|
87
|
+
|
88
|
+
(The MIT License)
|
89
|
+
|
90
|
+
Copyright (c) 2007 FIX
|
91
|
+
|
92
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
93
|
+
a copy of this software and associated documentation files (the
|
94
|
+
'Software'), to deal in the Software without restriction, including
|
95
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
96
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
97
|
+
permit persons to whom the Software is furnished to do so, subject to
|
98
|
+
the following conditions:
|
99
|
+
|
100
|
+
The above copyright notice and this permission notice shall be
|
101
|
+
included in all copies or substantial portions of the Software.
|
102
|
+
|
103
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
104
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
105
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
106
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
107
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
108
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
109
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hoe'
|
3
|
+
$:.unshift 'lib'
|
4
|
+
require './lib/stellr.rb'
|
5
|
+
|
6
|
+
class Hoe
|
7
|
+
def extra_deps
|
8
|
+
@extra_deps.reject do |x|
|
9
|
+
Array(x).first == 'hoe'
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
Hoe.new('stellr', Stellr::VERSION) do |p|
|
15
|
+
p.rubyforge_name = 'stellr'
|
16
|
+
p.author = [ 'Benjamin Krause', 'Jens Krämer' ]
|
17
|
+
p.email = [ 'bk@benjaminkrause.com', 'jk@jkraemer.net' ]
|
18
|
+
p.summary = 'Stellr is a Ferret based standalone search server.'
|
19
|
+
p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
|
20
|
+
p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
|
21
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
22
|
+
p.extra_deps << [ 'ferret', '>= 0.11.6', 'daemons', '>= 1.0.10', 'fastthread', '>= 1.0' ]
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "Release and publish documentation"
|
26
|
+
task :repubdoc => [:release, :publish_docs]
|
27
|
+
|
28
|
+
# vim:syntax=ruby
|
data/bin/stellr
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'optparse'
|
5
|
+
require 'daemons'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
begin
|
9
|
+
gem 'stellr'
|
10
|
+
rescue Gem::LoadError
|
11
|
+
$:.unshift(File.dirname(__FILE__), "../lib")
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'stellr'
|
15
|
+
|
16
|
+
include Stellr
|
17
|
+
|
18
|
+
commands = %w( start stop restart status )
|
19
|
+
|
20
|
+
options = OptionParser.new do |opts|
|
21
|
+
|
22
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options] #{commands.join('|')}"
|
23
|
+
|
24
|
+
opts.on("-c", "--config CONFIG", "config file location") do |value|
|
25
|
+
@config_file = value
|
26
|
+
end
|
27
|
+
|
28
|
+
opts.on("-h", "--help", "show this help") do
|
29
|
+
@help = true
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
cmdline = options.parse( ARGV )
|
34
|
+
command = cmdline.last
|
35
|
+
|
36
|
+
if @help or not commands.include?( command )
|
37
|
+
puts options.to_s
|
38
|
+
else
|
39
|
+
config = Stellr::Config.new @config_file
|
40
|
+
FileUtils.mkdir_p config.tmp_dir
|
41
|
+
FileUtils.mkdir_p config.log_dir
|
42
|
+
|
43
|
+
Daemons.run_proc( "stellr", :dir => config.tmp_dir,
|
44
|
+
:dir_mode => :normal,
|
45
|
+
:log_output => true,
|
46
|
+
:ARGV => [ command ] ) do
|
47
|
+
Stellr::start_server config
|
48
|
+
end
|
49
|
+
|
50
|
+
if command == "start"
|
51
|
+
sleep 2
|
52
|
+
pid = Daemons::PidFile.new( config.tmp_dir, 'stellr' )
|
53
|
+
if pid.exist? and Daemons::Pid.running?(pid.pid)
|
54
|
+
exit 0
|
55
|
+
else
|
56
|
+
puts "not started. Watch #{config.tmp_dir}/stellr.output for errors."
|
57
|
+
exit 1
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# FIXME how to correctly shut down the running server so indexes are closed
|
62
|
+
# and such?
|
63
|
+
# look for some hook provided by daemon tools, or trap SIGKILL
|
64
|
+
end
|
data/bin/stellr-search
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
begin
|
7
|
+
gem 'stellr'
|
8
|
+
rescue Gem::LoadError
|
9
|
+
$:.unshift(File.dirname(__FILE__), "../lib")
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'stellr'
|
13
|
+
require 'stellr/client'
|
14
|
+
|
15
|
+
options = OptionParser.new do |opts|
|
16
|
+
|
17
|
+
opts.banner = "Usage: #{File.basename(__FILE__)} [options] collection query"
|
18
|
+
|
19
|
+
opts.on("-c", "--config CONFIG", "config file location") do |value|
|
20
|
+
@config_file = value
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on("-h", "--help", "show this help") do
|
24
|
+
@help = true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
cmdline = options.parse( ARGV )
|
29
|
+
query = cmdline.pop rescue nil
|
30
|
+
collection_name = cmdline.pop
|
31
|
+
|
32
|
+
if @help
|
33
|
+
puts options.to_s
|
34
|
+
else
|
35
|
+
config = Stellr::Config.new @config_file
|
36
|
+
client = Stellr::Client.new config.drb_uri
|
37
|
+
collection = client.connect collection_name
|
38
|
+
puts "searching collection #{collection_name} for >#{query}< ..."
|
39
|
+
results = collection.search query, :limit => 10
|
40
|
+
if results.any?
|
41
|
+
puts "showing first 10 of #{results.total_hits} results"
|
42
|
+
results.each do |r|
|
43
|
+
puts r[:id]
|
44
|
+
end
|
45
|
+
else
|
46
|
+
puts "no results for >#{query}<"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
|
data/config/stellr.yml
ADDED
data/lib/stellr.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'thread'
|
3
|
+
begin
|
4
|
+
require 'fastthread'
|
5
|
+
rescue LoadError
|
6
|
+
puts "couldn't load fastthread"
|
7
|
+
end
|
8
|
+
require 'drb'
|
9
|
+
require 'monitor'
|
10
|
+
require 'ferret'
|
11
|
+
require 'stellr/utils'
|
12
|
+
require 'stellr/server'
|
13
|
+
require 'stellr/config'
|
14
|
+
require 'stellr/collections'
|
15
|
+
require 'stellr/strategies'
|
16
|
+
require 'stellr/search'
|
17
|
+
|
18
|
+
$SAFE = 1
|
19
|
+
|
20
|
+
module Stellr
|
21
|
+
VERSION = '0.1.0'
|
22
|
+
|
23
|
+
def self.start_server( config )
|
24
|
+
if config.script
|
25
|
+
begin
|
26
|
+
load config.script
|
27
|
+
rescue Exception => e
|
28
|
+
puts "\nerror loading script #{config.script}: #{e}\n#{e.backtrace.join("\n")}"
|
29
|
+
exit 1
|
30
|
+
end
|
31
|
+
end
|
32
|
+
stellr = Server.new config
|
33
|
+
server = DRb.start_service config.drb_uri, stellr
|
34
|
+
server.thread.join
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'drb'
|
2
|
+
require 'stellr/search'
|
3
|
+
|
4
|
+
module Stellr
|
5
|
+
|
6
|
+
# = Stellr client
|
7
|
+
#
|
8
|
+
# This class acts as a wrapper around the connection to a Stellr server.
|
9
|
+
# Typical usage looks like this:
|
10
|
+
#
|
11
|
+
# stellr = Stellr::Client.new('druby://myserver.com:9000')
|
12
|
+
# collection = stellr.connect('myindex', :fields => { :content => { :store => :yes } })
|
13
|
+
# collection << { :content => 'lorem ipsum' }
|
14
|
+
# collection.switch
|
15
|
+
# results = collection.search('lorem', :page => 1, :per_page => 10)
|
16
|
+
#
|
17
|
+
class Client
|
18
|
+
|
19
|
+
def initialize( drb_uri )
|
20
|
+
@server = DRbObject.new(nil, drb_uri)
|
21
|
+
end
|
22
|
+
|
23
|
+
# connects to a remote collection and returns a stub that can be used to
|
24
|
+
# add records to the collection and to search for them.
|
25
|
+
#
|
26
|
+
# specify an array of collection names for the first argument to search
|
27
|
+
# multiple connections at once. No indexing is possible with such a
|
28
|
+
# MultiCollection.
|
29
|
+
def connect( collection_name, options = nil )
|
30
|
+
if Array === collection_name
|
31
|
+
multi_connect collection_name, options
|
32
|
+
else
|
33
|
+
@server.register collection_name, options
|
34
|
+
ClientCollection.new @server, collection_name
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Connects to multiple remote collections at once on order to run
|
39
|
+
# cross-collection searches.
|
40
|
+
def multi_connect( collection_names, options = {} )
|
41
|
+
MultiCollection.new @server, collection_names, options
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
# Wrapper around a remote collection.
|
47
|
+
#
|
48
|
+
# See the documentation of the collection class you use for more information.
|
49
|
+
class ClientCollection
|
50
|
+
|
51
|
+
def initialize( server, name )
|
52
|
+
@name = name
|
53
|
+
@server = server
|
54
|
+
end
|
55
|
+
|
56
|
+
# Disconnects this collection from the server.
|
57
|
+
def disconnect
|
58
|
+
@server = nil
|
59
|
+
end
|
60
|
+
|
61
|
+
def method_missing( method, *args )
|
62
|
+
raise "use of disconnected collection" if @server.nil?
|
63
|
+
@server.send method, @name, *args
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
# This client collection class allows to search multiple server side
|
69
|
+
# collections at once.
|
70
|
+
class MultiCollection < ClientCollection
|
71
|
+
def initialize( server, names, options = {} )
|
72
|
+
@server = server
|
73
|
+
@name = @server.register_multi_collection names, options
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Collections
|
3
|
+
|
4
|
+
# Base class for collection implementations
|
5
|
+
class Base
|
6
|
+
include Ferret::Index
|
7
|
+
include Stellr::Utils::Shutdown
|
8
|
+
include Stellr::Utils::Observable
|
9
|
+
attr_reader :name
|
10
|
+
|
11
|
+
def self.create( name, options )
|
12
|
+
collection_class = collection_class_for_options options
|
13
|
+
collection = collection_class.new( name, options )
|
14
|
+
if strategy_class = strategy_class_for_options( options )
|
15
|
+
strategy_class.new( collection, options )
|
16
|
+
else
|
17
|
+
collection
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize( name, options )
|
22
|
+
@logger = options[:logger] || (require 'logger'; Logger.new 'stellr.log')
|
23
|
+
@name = name
|
24
|
+
@options = options.dup
|
25
|
+
end
|
26
|
+
|
27
|
+
# called whenever the strategy thinks it's a good time do do something
|
28
|
+
# timeconsuming (like switching indexes, optimizing, flushing, ...)
|
29
|
+
def batch_finished
|
30
|
+
end
|
31
|
+
|
32
|
+
def on_shutdown( mode )
|
33
|
+
close
|
34
|
+
end
|
35
|
+
|
36
|
+
# close this collection
|
37
|
+
def close
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
|
42
|
+
|
43
|
+
def collection_directory
|
44
|
+
@options[:path]
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def self.collection_class_for_options( options )
|
49
|
+
if (c = options.delete(:collection))
|
50
|
+
options[:collection_class] = collection_class_for_key c
|
51
|
+
end
|
52
|
+
Object.module_eval("::#{options[:collection_class] || 'Stellr::Collections::RSync'}", __FILE__, __LINE__)
|
53
|
+
end
|
54
|
+
def self.collection_class_for_key(key)
|
55
|
+
case key
|
56
|
+
when :static
|
57
|
+
'Stellr::Collections::Static'
|
58
|
+
when :rsync
|
59
|
+
'Stellr::Collections::RSync'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.strategy_class_for_options( options )
|
64
|
+
if (c = options.delete(:strategy))
|
65
|
+
options[:strategy_class] = strategy_class_for_key c
|
66
|
+
end
|
67
|
+
Object.module_eval("::#{options[:strategy_class]}", __FILE__, __LINE__) if options[:strategy_class]
|
68
|
+
end
|
69
|
+
def self.strategy_class_for_key(key)
|
70
|
+
case key
|
71
|
+
when :queueing
|
72
|
+
'Stellr::Strategies::Queueing'
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|