stellr 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +36 -0
- data/README.txt +109 -0
- data/Rakefile +28 -0
- data/bin/stellr +64 -0
- data/bin/stellr-search +50 -0
- data/config/stellr.yml +8 -0
- data/lib/stellr.rb +37 -0
- data/lib/stellr/client.rb +78 -0
- data/lib/stellr/collections.rb +6 -0
- data/lib/stellr/collections/base.rb +79 -0
- data/lib/stellr/collections/multi_collection.rb +32 -0
- data/lib/stellr/collections/rsync.rb +38 -0
- data/lib/stellr/collections/searchable_collection.rb +166 -0
- data/lib/stellr/collections/static.rb +97 -0
- data/lib/stellr/collections/writeable_collection.rb +119 -0
- data/lib/stellr/config.rb +88 -0
- data/lib/stellr/search.rb +2 -0
- data/lib/stellr/search/search_result.rb +21 -0
- data/lib/stellr/search/search_results.rb +50 -0
- data/lib/stellr/server.rb +166 -0
- data/lib/stellr/strategies.rb +4 -0
- data/lib/stellr/strategies/base.rb +16 -0
- data/lib/stellr/strategies/blocking.rb +13 -0
- data/lib/stellr/strategies/queueing.rb +78 -0
- data/lib/stellr/utils.rb +24 -0
- data/lib/stellr/utils/observable.rb +20 -0
- data/lib/stellr/utils/shutdown.rb +30 -0
- data/test/fixtures/movies.yml +4 -0
- data/test/stellr_test.rb +38 -0
- data/test/test_client.rb +27 -0
- data/test/test_collections_base.rb +25 -0
- data/test/test_helper.rb +1 -0
- data/test/test_rsync_collection.rb +72 -0
- data/test/test_server.rb +94 -0
- data/test/test_static_collection.rb +40 -0
- data/test/test_stellr.rb +11 -0
- metadata +110 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'erb'
|
3
|
+
|
4
|
+
module Stellr
|
5
|
+
# = Config
|
6
|
+
#
|
7
|
+
# The +stellr+ and +stellr-search+ commands both rely on a YAML file
|
8
|
+
# to provide some basic configuration:
|
9
|
+
#
|
10
|
+
# +:port+ Port the server should listen to
|
11
|
+
# +:host+ Hostname or IP of the server
|
12
|
+
# +:script+ Optional ruby file to load during startup. This is the place to load custom code like self made Analyzers you intend to use.
|
13
|
+
# +:log_level+ Log level, default is +:debug+
|
14
|
+
# +:base_dir+ Base directory where the server will store index data, log
|
15
|
+
# files and configuration data.
|
16
|
+
# +:data_dir+ Index directory, relative to +base_dir+. Defaults to +data+
|
17
|
+
# +:log_dir+ Log file directory, relative to +base_dir+. Defaults to +log+
|
18
|
+
# +:conf_dir+ Configuration directory, relative to +base_dir+. Defaults to +conf+. Here stellr will keep the configuration of registered collections, one YAML file per collection.
|
19
|
+
# +:tmp_dir+ Temp directory, relative to +base_dir+. Defaults to +tmp+
|
20
|
+
#
|
21
|
+
#
|
22
|
+
class Config
|
23
|
+
DEFAULTS = { :port => 9010,
|
24
|
+
:host => 'localhost',
|
25
|
+
:base_dir => '/var/stellr',
|
26
|
+
:data_dir => 'data',
|
27
|
+
:log_dir => 'log',
|
28
|
+
:tmp_dir => 'tmp',
|
29
|
+
:conf_dir => 'conf',
|
30
|
+
:log_level => :warn }
|
31
|
+
|
32
|
+
|
33
|
+
# Configfile search order:
|
34
|
+
# - argument
|
35
|
+
# - /etc/stellr.yml
|
36
|
+
# - +gem_directory+/config/stellr.yml
|
37
|
+
#
|
38
|
+
def initialize( config_file = nil, extra_options = {} )
|
39
|
+
load_config config_file
|
40
|
+
@config.update extra_options
|
41
|
+
@config.each { |k,v| v.untaint } # we trust our config file
|
42
|
+
end
|
43
|
+
|
44
|
+
def data_dir
|
45
|
+
resolve_directory_name( :data_dir )
|
46
|
+
end
|
47
|
+
|
48
|
+
def log_dir
|
49
|
+
resolve_directory_name( :log_dir )
|
50
|
+
end
|
51
|
+
|
52
|
+
def tmp_dir
|
53
|
+
resolve_directory_name( :tmp_dir )
|
54
|
+
end
|
55
|
+
|
56
|
+
def conf_dir
|
57
|
+
resolve_directory_name( :conf_dir )
|
58
|
+
end
|
59
|
+
|
60
|
+
def collection_dir
|
61
|
+
end
|
62
|
+
|
63
|
+
def drb_uri
|
64
|
+
"druby://#{host}:#{port}"
|
65
|
+
end
|
66
|
+
|
67
|
+
protected
|
68
|
+
|
69
|
+
def method_missing( method_name, *args )
|
70
|
+
return @config[method_name] if @config.has_key?( method_name )
|
71
|
+
raise NameError.new( "unknown configuration key: #{method_name}" )
|
72
|
+
end
|
73
|
+
|
74
|
+
def resolve_directory_name( sub_dir )
|
75
|
+
raise NameError.new unless @config.has_key?( sub_dir )
|
76
|
+
File.join( base_dir, @config[sub_dir] )
|
77
|
+
end
|
78
|
+
|
79
|
+
def load_config( config_file )
|
80
|
+
config_file ||= "/etc/stellr.yml"
|
81
|
+
config_file = File.join( File.dirname(__FILE__), "../../config/stellr.yml" ) unless File.exists?( config_file )
|
82
|
+
|
83
|
+
@config = DEFAULTS.merge(
|
84
|
+
YAML.load( ERB.new( IO.read(config_file) ).result )
|
85
|
+
)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Search
|
3
|
+
|
4
|
+
# A single search result
|
5
|
+
class SearchResult
|
6
|
+
attr_reader :score, :doc_id
|
7
|
+
|
8
|
+
def initialize(id, score, field_data)
|
9
|
+
@doc_id = id
|
10
|
+
@score = score
|
11
|
+
@field_data = field_data
|
12
|
+
end
|
13
|
+
|
14
|
+
# retrieve contents of the field +name+
|
15
|
+
def field(name)
|
16
|
+
@field_data[name]
|
17
|
+
end
|
18
|
+
alias [] field
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Search
|
3
|
+
|
4
|
+
# Thin wrapper around an array of search results
|
5
|
+
class SearchResults
|
6
|
+
attr_accessor :total_hits, :current_page, :per_page
|
7
|
+
alias total_entries total_hits
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@results = []
|
11
|
+
end
|
12
|
+
|
13
|
+
def method_missing(symbol, *args, &block)
|
14
|
+
@results.send(symbol, *args, &block)
|
15
|
+
end
|
16
|
+
|
17
|
+
def respond_to?(name)
|
18
|
+
self.methods.include?(name) || @results.respond_to?(name)
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# code from here on derived from will_paginate's collection.rb
|
23
|
+
|
24
|
+
# The total number of pages.
|
25
|
+
def total_pages
|
26
|
+
@total_pages ||= per_page > 0 ? (total_hits / per_page.to_f).ceil : 0
|
27
|
+
end
|
28
|
+
alias page_count total_pages
|
29
|
+
|
30
|
+
# Current offset of the paginated collection. If we're on the first page,
|
31
|
+
# it is always 0. If we're on the 2nd page and there are 30 entries per page,
|
32
|
+
# the offset is 30. This property is useful if you want to render ordinals
|
33
|
+
# besides your records: simply start with offset + 1.
|
34
|
+
#
|
35
|
+
def offset
|
36
|
+
(current_page - 1) * per_page
|
37
|
+
end
|
38
|
+
|
39
|
+
# current_page - 1 or nil if there is no previous page
|
40
|
+
def previous_page
|
41
|
+
current_page > 1 ? (current_page - 1) : nil
|
42
|
+
end
|
43
|
+
|
44
|
+
# current_page + 1 or nil if there is no next page
|
45
|
+
def next_page
|
46
|
+
current_page < page_count ? (current_page + 1) : nil
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module Stellr
|
4
|
+
class Server
|
5
|
+
include Utils::Shutdown
|
6
|
+
protected :shutdown
|
7
|
+
|
8
|
+
attr_accessor :config
|
9
|
+
attr_reader :collections
|
10
|
+
|
11
|
+
def initialize( config )
|
12
|
+
@config = config
|
13
|
+
create_directories
|
14
|
+
@logger = Logger.new File.join(config.log_dir, 'stellr.log')
|
15
|
+
@logger.level = Logger.const_get config.log_level.to_s.upcase
|
16
|
+
@collections = {}
|
17
|
+
@collections.extend MonitorMixin
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_record( collection_name, record, boost = nil )
|
21
|
+
collection( collection_name ).add_record record, boost
|
22
|
+
end
|
23
|
+
alias :<< :add_record
|
24
|
+
|
25
|
+
def delete_record( collection_name, record )
|
26
|
+
collection( collection_name ).delete_record record
|
27
|
+
end
|
28
|
+
|
29
|
+
def size( collection_name )
|
30
|
+
collection( collection_name ).size
|
31
|
+
end
|
32
|
+
|
33
|
+
# Initialize a collection.
|
34
|
+
#
|
35
|
+
# Before anything can be done with a collection, it has to be registered
|
36
|
+
# through this method. When called for a non-existing collection, this will
|
37
|
+
# also create the empty physical index. The given options are saved to a yml
|
38
|
+
# file so they can be loaded again later on.
|
39
|
+
#
|
40
|
+
# Calling register for an existing collection will update the saved index
|
41
|
+
# configuration from the options given, unless the options hash is nil.
|
42
|
+
# An already existing physical index won't be touched and used as is.
|
43
|
+
# Remember that changing Ferret options like analyzers or field
|
44
|
+
# configuration requires an index rebuild to be effective for existing
|
45
|
+
# content. Stellr doesn't take care of this.
|
46
|
+
#
|
47
|
+
# If you access the server through the Stellr::Client class you don't need
|
48
|
+
# to call +register+ explicitly as the client will do it when connecting.
|
49
|
+
#
|
50
|
+
# Name is the name of the collection to create.
|
51
|
+
#
|
52
|
+
# Options is a hash, consisting of:
|
53
|
+
# [+collection+] The collection implementation to use, may be one of :static or :rsync (default)
|
54
|
+
# [+strategy+] The strategy implementation to use (optional, atm there is only :queueing)
|
55
|
+
# [+fields+] +hash+ (see http://ferret.davebalmain.com/api/classes/Ferret/Index/FieldInfo.html)
|
56
|
+
# [+recreate+] Recreate the index (defaults to +false+). A true value will lead to the deletion of any already indexed data.
|
57
|
+
# [+analyzer+] The class name (String) of the Analyzer to use for this collection. By default, Ferret's StandardAnalyzer will be used.
|
58
|
+
# [+field_defaults+] Default setting for unconfigured fields
|
59
|
+
#
|
60
|
+
# Example
|
61
|
+
# register 'standard_index', { :recreate => false,
|
62
|
+
# :fields => { :author => { :index => :untokenized,
|
63
|
+
# :store => :no,
|
64
|
+
# :term_vector => :with_offsets,
|
65
|
+
# :boost => 2.0 },
|
66
|
+
# :content => { :index => :tokenized } }
|
67
|
+
# }
|
68
|
+
#
|
69
|
+
#
|
70
|
+
def register( name, options = {} )
|
71
|
+
@collections.synchronize do
|
72
|
+
@collections[name] ||= create_collection( name, options )
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Initializes a read-only virtual collection that may be used to search
|
77
|
+
# across multiple physical collections.
|
78
|
+
#
|
79
|
+
# Returns the name of the collection to be used with further calls.
|
80
|
+
def register_multi_collection( names, options = {} )
|
81
|
+
key = ".multi_#{names.join '_'}" # '.' is not allowed for regular collection names, so we are safe from name collisions
|
82
|
+
@collections.synchronize do
|
83
|
+
@collections[key] ||= create_multi_collection( key, names.map{ |name| collection(name) }, options )
|
84
|
+
end
|
85
|
+
return key
|
86
|
+
end
|
87
|
+
|
88
|
+
def collection( name )
|
89
|
+
@collections.synchronize do
|
90
|
+
if @collections.has_key?( name )
|
91
|
+
return @collections[name]
|
92
|
+
else
|
93
|
+
@logger.info "trying to initialize collection #{name} from stored configuration..."
|
94
|
+
return @collections[name] = create_collection( name, nil )
|
95
|
+
end
|
96
|
+
end
|
97
|
+
raise "UnknownCollection #{name}"
|
98
|
+
end
|
99
|
+
|
100
|
+
protected
|
101
|
+
|
102
|
+
# pass through commands to collection
|
103
|
+
def method_missing(method, *args)
|
104
|
+
if args.size >= 1
|
105
|
+
collection_name = args.shift
|
106
|
+
return collection( collection_name ).send( method, *args )
|
107
|
+
end
|
108
|
+
super
|
109
|
+
end
|
110
|
+
|
111
|
+
def create_multi_collection( name, collections, options = {} )
|
112
|
+
Stellr::Collections::MultiCollection.new name, collections, { :logger => @logger }.merge(options)
|
113
|
+
end
|
114
|
+
|
115
|
+
# initializes a new collection object
|
116
|
+
#
|
117
|
+
# if nil is given for options, the method tries to locate a previously
|
118
|
+
# saved collection configuration and restore from it.
|
119
|
+
def create_collection( name, options )
|
120
|
+
raise "invalid collection name >#{name}<, may only contain a-zA-Z0-9_-" unless name =~ /^([a-zA-Z0-9_-]+)$/
|
121
|
+
name.untaint
|
122
|
+
save_config = true
|
123
|
+
if options.nil?
|
124
|
+
options = load_collection_config name
|
125
|
+
save_config = false
|
126
|
+
end
|
127
|
+
raise "No options given for collection #{name} and no stored configuration found." if options.nil?
|
128
|
+
|
129
|
+
options[:path] = File.join( @config.data_dir, name )
|
130
|
+
save_collection_config name, options if save_config
|
131
|
+
return Collections::Base.create( name, {:logger => @logger}.merge(options) )
|
132
|
+
end
|
133
|
+
|
134
|
+
def save_collection_config( name, options )
|
135
|
+
path = collection_config_path name
|
136
|
+
( File.open(path, 'w') << YAML.dump(options) ).close
|
137
|
+
@logger.info "wrote collection config to #{path}"
|
138
|
+
end
|
139
|
+
|
140
|
+
def load_collection_config( name )
|
141
|
+
path = collection_config_path name
|
142
|
+
conf = YAML.load( File.read(path) ) if File.readable?(path) rescue nil
|
143
|
+
@logger.info "loaded collection config from #{path}" unless conf.nil?
|
144
|
+
return conf
|
145
|
+
end
|
146
|
+
|
147
|
+
def collection_config_path( name )
|
148
|
+
File.join @config.conf_dir, "#{name}.yml"
|
149
|
+
end
|
150
|
+
|
151
|
+
# called by shutdown
|
152
|
+
def on_shutdown( mode )
|
153
|
+
@collections.synchronize do
|
154
|
+
@collections.values.each { |coll| coll.shutdown mode }
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def create_directories
|
159
|
+
FileUtils.mkdir_p config.log_dir
|
160
|
+
FileUtils.mkdir_p config.tmp_dir
|
161
|
+
FileUtils.mkdir_p config.conf_dir
|
162
|
+
FileUtils.mkdir_p config.data_dir
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Strategies
|
3
|
+
class Base
|
4
|
+
include Stellr::Utils::Shutdown
|
5
|
+
|
6
|
+
def initialize( collection, options )
|
7
|
+
@collection = collection
|
8
|
+
@options = options.dup
|
9
|
+
end
|
10
|
+
|
11
|
+
def method_missing(name, *args)
|
12
|
+
@collection.send name, *args
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Stellr
|
2
|
+
|
3
|
+
module Strategies
|
4
|
+
|
5
|
+
# Queueing strategy. Any index modifying methods return immediately, actions
|
6
|
+
# are queued and executed asynchronously in order of arrival.
|
7
|
+
#
|
8
|
+
class Queueing < Base
|
9
|
+
|
10
|
+
def initialize( collection, options )
|
11
|
+
super collection, options
|
12
|
+
@options[:max_batch_size] ||= 200
|
13
|
+
@queue = Queue.new
|
14
|
+
@thread = spawn_indexing_thread
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_record( record, boost = nil )
|
18
|
+
enqueue :add, [record, boost]
|
19
|
+
end
|
20
|
+
|
21
|
+
def delete_record( record )
|
22
|
+
enqueue :delete, record
|
23
|
+
end
|
24
|
+
|
25
|
+
protected
|
26
|
+
|
27
|
+
def on_shutdown( mode )
|
28
|
+
@queue << 'shutting down' # letztes queue item damit process_queue nicht haengt
|
29
|
+
@thread.join
|
30
|
+
# save_queue :TODO:
|
31
|
+
@collection.shutdown mode
|
32
|
+
end
|
33
|
+
|
34
|
+
# called by the indexer thread as long as the server runs
|
35
|
+
def process_queue
|
36
|
+
counter = 0
|
37
|
+
max_batch_size = @options[:max_batch_size]
|
38
|
+
begin
|
39
|
+
while record = @queue.deq and not shutting_down?( :abort )
|
40
|
+
process_record( *record )
|
41
|
+
break if ((counter += 1) > max_batch_size) or @queue.empty?
|
42
|
+
end
|
43
|
+
@collection.batch_finished
|
44
|
+
rescue Exception => e
|
45
|
+
puts "OH NO! #{e}\n#{e.backtrace.join "\n"}"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# process a single task from the queue
|
50
|
+
def process_record( action, record )
|
51
|
+
case action
|
52
|
+
when :add
|
53
|
+
@collection.add_record( *record )
|
54
|
+
when :delete
|
55
|
+
@collection.delete_record record
|
56
|
+
else
|
57
|
+
raise "UnknownAction"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Spawns the thread executing the main loop
|
62
|
+
def spawn_indexing_thread
|
63
|
+
Thread.new do
|
64
|
+
process_queue while !shutting_down?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# add a task to the queue
|
69
|
+
def enqueue( action, record )
|
70
|
+
return false if shutting_down?
|
71
|
+
@queue << [ action, record ]
|
72
|
+
true
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|