stellr 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +36 -0
- data/README.txt +109 -0
- data/Rakefile +28 -0
- data/bin/stellr +64 -0
- data/bin/stellr-search +50 -0
- data/config/stellr.yml +8 -0
- data/lib/stellr.rb +37 -0
- data/lib/stellr/client.rb +78 -0
- data/lib/stellr/collections.rb +6 -0
- data/lib/stellr/collections/base.rb +79 -0
- data/lib/stellr/collections/multi_collection.rb +32 -0
- data/lib/stellr/collections/rsync.rb +38 -0
- data/lib/stellr/collections/searchable_collection.rb +166 -0
- data/lib/stellr/collections/static.rb +97 -0
- data/lib/stellr/collections/writeable_collection.rb +119 -0
- data/lib/stellr/config.rb +88 -0
- data/lib/stellr/search.rb +2 -0
- data/lib/stellr/search/search_result.rb +21 -0
- data/lib/stellr/search/search_results.rb +50 -0
- data/lib/stellr/server.rb +166 -0
- data/lib/stellr/strategies.rb +4 -0
- data/lib/stellr/strategies/base.rb +16 -0
- data/lib/stellr/strategies/blocking.rb +13 -0
- data/lib/stellr/strategies/queueing.rb +78 -0
- data/lib/stellr/utils.rb +24 -0
- data/lib/stellr/utils/observable.rb +20 -0
- data/lib/stellr/utils/shutdown.rb +30 -0
- data/test/fixtures/movies.yml +4 -0
- data/test/stellr_test.rb +38 -0
- data/test/test_client.rb +27 -0
- data/test/test_collections_base.rb +25 -0
- data/test/test_helper.rb +1 -0
- data/test/test_rsync_collection.rb +72 -0
- data/test/test_server.rb +94 -0
- data/test/test_static_collection.rb +40 -0
- data/test/test_stellr.rb +11 -0
- metadata +110 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Collections
|
3
|
+
|
4
|
+
#
|
5
|
+
class MultiCollection < SearchableCollection
|
6
|
+
|
7
|
+
def initialize( name, collections, options = {} )
|
8
|
+
super name, options
|
9
|
+
@collections = {}
|
10
|
+
collections.each do |collection|
|
11
|
+
@collections[collection.name] = collection
|
12
|
+
collection.add_listener do |event|
|
13
|
+
handle_event(collection.name, event)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
protected
|
19
|
+
|
20
|
+
def open_reader
|
21
|
+
IndexReader.new @collections.values.map{|c| c.reader}
|
22
|
+
end
|
23
|
+
|
24
|
+
def handle_event(collection_name, event)
|
25
|
+
@logger.debug "handle_event: #{event.inspect}"
|
26
|
+
close_reader if event == :closing_reader
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Collections
|
3
|
+
|
4
|
+
# RSync collection implementation.
|
5
|
+
#
|
6
|
+
# Keeps two indexes around - one for searching and one for indexing. The
|
7
|
+
# difference when compared with the Static collection is that every
|
8
|
+
# now and then the changes are synced from the latter to the former using RSync,
|
9
|
+
# so no complete rebuild is necessary after a switch.
|
10
|
+
class RSync < Static
|
11
|
+
|
12
|
+
def batch_finished
|
13
|
+
switch if dirty?
|
14
|
+
end
|
15
|
+
|
16
|
+
# we want the indexes to be in sync after close, so do a last switch
|
17
|
+
alias :close :switch
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
def writer_options
|
22
|
+
super.merge :create => @options[:recreate]
|
23
|
+
end
|
24
|
+
|
25
|
+
# overridden to sync the indexes after re-linking
|
26
|
+
def relink_indexes
|
27
|
+
super
|
28
|
+
sync_indexes
|
29
|
+
end
|
30
|
+
|
31
|
+
def sync_indexes
|
32
|
+
system("rsync -r --delete #{searching_directory}/ #{indexing_directory}")
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Collections
|
3
|
+
|
4
|
+
# Base class for searchable collection implementations
|
5
|
+
class SearchableCollection < Base
|
6
|
+
|
7
|
+
def initialize( name, options )
|
8
|
+
super name, options
|
9
|
+
@reader_monitor = Monitor.new
|
10
|
+
@query_parser_monitor = Monitor.new
|
11
|
+
@reader = @searcher = @query_parser = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
# Search this collection.
|
16
|
+
# Options is a hash taking the usual Ferret::Search::Searcher options,
|
17
|
+
# plus:
|
18
|
+
# [+page+] Page of results to show, starting with 1
|
19
|
+
# [+per_page+] Number of records per page, default 10.
|
20
|
+
# [+fields+] Array of fields to search in
|
21
|
+
# [+get_fields+] Array of fields to retrieve in addition to the
|
22
|
+
# :id field
|
23
|
+
#
|
24
|
+
# The page and per_page options take precedence over any given limit and
|
25
|
+
# offset values.
|
26
|
+
def search(query, options = {})
|
27
|
+
results = Stellr::Search::SearchResults.new
|
28
|
+
|
29
|
+
if options[:page]
|
30
|
+
results.current_page = options.delete(:page).to_i
|
31
|
+
options[:limit] = results.per_page = options.delete(:per_page) || 10
|
32
|
+
options[:offset] = (p = results.current_page - 1) <= 0 ? 0 : p * results.per_page
|
33
|
+
end
|
34
|
+
|
35
|
+
get_fields = options.delete :get_fields
|
36
|
+
# TODO replace synchronization with some kind of shared read/exclusive
|
37
|
+
# write locking mechanism allowing parallel searches but guarding
|
38
|
+
# against the reader instance being shut down while we're inside
|
39
|
+
# retrieve_field_data
|
40
|
+
@reader_monitor.synchronize do
|
41
|
+
q = process_query query, options
|
42
|
+
@logger.debug "options: #{options.inspect}"
|
43
|
+
results.total_hits = searcher.search_each q, options do |id, score|
|
44
|
+
field_data = retrieve_field_data(id, get_fields)
|
45
|
+
results << Stellr::Search::SearchResult.new( id, score, field_data )
|
46
|
+
end
|
47
|
+
@logger.info "query #{query} : #{results.total_hits} results"
|
48
|
+
end
|
49
|
+
return results
|
50
|
+
end
|
51
|
+
|
52
|
+
def highlight( doc_id, query, options = {})
|
53
|
+
return searcher.highlight(process_query(query, options), doc_id, options[:field], options)
|
54
|
+
rescue
|
55
|
+
@logger.error "error in highlight: #{$!}. Document #{doc_id}, Query: #{query}, options: #{options.inspect}"
|
56
|
+
''
|
57
|
+
end
|
58
|
+
|
59
|
+
def size
|
60
|
+
reader.num_docs
|
61
|
+
end
|
62
|
+
|
63
|
+
def on_shutdown( mode )
|
64
|
+
close
|
65
|
+
end
|
66
|
+
|
67
|
+
# close this collection
|
68
|
+
def close
|
69
|
+
close_reader
|
70
|
+
end
|
71
|
+
|
72
|
+
protected
|
73
|
+
|
74
|
+
# should open a reader and return it
|
75
|
+
def open_reader
|
76
|
+
raise 'not implemented'
|
77
|
+
end
|
78
|
+
|
79
|
+
def reader
|
80
|
+
@reader_monitor.synchronize do
|
81
|
+
@reader ||= open_reader
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def searcher
|
86
|
+
@reader_monitor.synchronize do
|
87
|
+
@searcher ||= Ferret::Search::Searcher.new reader
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def query_parser
|
92
|
+
@query_parser_monitor.synchronize do
|
93
|
+
@query_parser ||= create_query_parser
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def create_query_parser(options = {})
|
98
|
+
Ferret::QueryParser.new( { :analyzer => create_analyzer, :or_default => false }.merge( options ) )
|
99
|
+
end
|
100
|
+
|
101
|
+
# reads field data for +:id+ and any other given fields from
|
102
|
+
# the document given by +id+
|
103
|
+
# unsynchronized reader access occurs, so only use from within blocks
|
104
|
+
# synchronizing on @reader_monitor.
|
105
|
+
def retrieve_field_data(id, fields = nil)
|
106
|
+
doc = reader[id]
|
107
|
+
field_data = { :id => doc[:id] }
|
108
|
+
fields.each do |f|
|
109
|
+
field_data[f] = doc[f]
|
110
|
+
end if fields
|
111
|
+
return field_data
|
112
|
+
end
|
113
|
+
|
114
|
+
# Turn a query string into a Ferret Query object.
|
115
|
+
# unsynchronized reader access occurs, so only use from
|
116
|
+
# within blocks synchronizing on @reader_monitor.
|
117
|
+
def process_query(query, options)
|
118
|
+
@logger.debug "process_query: #{query.inspect}"
|
119
|
+
q = query.dup
|
120
|
+
if String === q
|
121
|
+
@query_parser_monitor.synchronize do
|
122
|
+
qp = query_parser
|
123
|
+
tokenized_fields = reader.tokenized_fields
|
124
|
+
qp.fields = options[:fields] || tokenized_fields # reader.fields
|
125
|
+
qp.tokenized_fields = tokenized_fields
|
126
|
+
@logger.debug "tokenized_fields: #{tokenized_fields}"
|
127
|
+
q = qp.parse q
|
128
|
+
end
|
129
|
+
end
|
130
|
+
@logger.debug "processed query: #{q.inspect}"
|
131
|
+
return q
|
132
|
+
rescue
|
133
|
+
@logger.error "error processing query: #{$!}"
|
134
|
+
end
|
135
|
+
|
136
|
+
def collection_directory
|
137
|
+
@options[:path]
|
138
|
+
end
|
139
|
+
|
140
|
+
def close_reader
|
141
|
+
@reader_monitor.synchronize do
|
142
|
+
notify_listeners( :closing_reader )
|
143
|
+
return unless @reader
|
144
|
+
@query_parser = nil
|
145
|
+
@searcher.close if @searcher
|
146
|
+
@searcher = nil
|
147
|
+
@reader.close
|
148
|
+
@reader = nil
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# TODO allow declarative analyzer specification in options
|
153
|
+
def create_analyzer
|
154
|
+
if class_name = @options[:analyzer]
|
155
|
+
@logger.debug "instantiating analyzer #{class_name}"
|
156
|
+
return class_name.constantize.new
|
157
|
+
end
|
158
|
+
return Ferret::Analysis::StandardAnalyzer.new
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
166
|
+
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Collections
|
3
|
+
|
4
|
+
# Static collection implementation.
|
5
|
+
#
|
6
|
+
# This kind of collection is for situations where your index usually doesn't
|
7
|
+
# change but instead is rebuilt from scratch once in a while.
|
8
|
+
#
|
9
|
+
# This collection keeps two indexes, one for searching, and one where all index
|
10
|
+
# modifications take place. Once you are finished building the new index,
|
11
|
+
# call the switch method to put it live. The old index is then dropped and
|
12
|
+
# the new one is used for searching from now on.
|
13
|
+
class Static < WriteableCollection
|
14
|
+
|
15
|
+
def initialize( name, options )
|
16
|
+
super( name, options )
|
17
|
+
reader
|
18
|
+
writer
|
19
|
+
end
|
20
|
+
|
21
|
+
def switch
|
22
|
+
@logger.info "switching indexes"
|
23
|
+
@writer_monitor.synchronize do
|
24
|
+
flush
|
25
|
+
optimize
|
26
|
+
close_writer
|
27
|
+
@reader_monitor.synchronize do
|
28
|
+
close_reader
|
29
|
+
relink_indexes
|
30
|
+
clear!
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
protected
|
36
|
+
|
37
|
+
def open_writer
|
38
|
+
create_directories unless File.exists? indexing_directory # and File.exists? searching_directory
|
39
|
+
IndexWriter.new writer_options
|
40
|
+
end
|
41
|
+
|
42
|
+
def writer_options
|
43
|
+
{
|
44
|
+
:path => indexing_directory,
|
45
|
+
:create => true,
|
46
|
+
:field_infos => create_field_infos,
|
47
|
+
:analyzer => create_analyzer
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def open_reader
|
52
|
+
already_retried = false
|
53
|
+
begin
|
54
|
+
switch unless File.symlink? searching_directory
|
55
|
+
IndexReader.new searching_directory
|
56
|
+
rescue Ferret::FileNotFoundError
|
57
|
+
switch
|
58
|
+
unless already_retried
|
59
|
+
already_retried = true
|
60
|
+
retry
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def create_directories
|
66
|
+
FileUtils.mkdir_p index_storage_directory( '0' )
|
67
|
+
FileUtils.mkdir_p index_storage_directory( '1' )
|
68
|
+
FileUtils.ln_s index_storage_directory( '0' ), indexing_directory, :force => true
|
69
|
+
FileUtils.ln_s index_storage_directory( '1' ), searching_directory, :force => true
|
70
|
+
end
|
71
|
+
|
72
|
+
def indexing_directory
|
73
|
+
File.join( collection_directory, "indexing" )
|
74
|
+
end
|
75
|
+
|
76
|
+
def searching_directory
|
77
|
+
File.join( collection_directory, "searching" )
|
78
|
+
end
|
79
|
+
|
80
|
+
def index_storage_directory( suffix )
|
81
|
+
File.join( collection_directory, suffix )
|
82
|
+
end
|
83
|
+
|
84
|
+
def relink_indexes
|
85
|
+
searching = File.readlink( searching_directory ).untaint
|
86
|
+
indexing = File.readlink( indexing_directory ).untaint
|
87
|
+
@logger.info "relink_indexes: #{searching} will now be used for indexing"
|
88
|
+
File.delete indexing_directory
|
89
|
+
File.delete searching_directory
|
90
|
+
FileUtils.ln_s indexing, searching_directory, :force => true
|
91
|
+
FileUtils.ln_s searching, indexing_directory, :force => true
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module Stellr
|
2
|
+
module Collections
|
3
|
+
|
4
|
+
# Base class for collection implementations that allow index updates
|
5
|
+
class WriteableCollection < SearchableCollection
|
6
|
+
|
7
|
+
def initialize( name, options )
|
8
|
+
super
|
9
|
+
@writer_monitor = Monitor.new
|
10
|
+
@processed_records = 0
|
11
|
+
@writer = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
# Adds the given record to the index.
|
15
|
+
#
|
16
|
+
# Record may be a hash, or a Ferret::Document instance
|
17
|
+
def add_record( record, boost = nil )
|
18
|
+
raise ArgumentError.new("record must contain :id field") if record[:id].nil?
|
19
|
+
if boost
|
20
|
+
if Ferret::Document === record
|
21
|
+
record.boost = boost
|
22
|
+
else
|
23
|
+
hash, record = record, Ferret::Document.new( boost )
|
24
|
+
hash.each_pair do |k,v|
|
25
|
+
record[k] = v
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
@writer_monitor.synchronize do
|
30
|
+
@processed_records += 1
|
31
|
+
w = writer
|
32
|
+
w.delete :id, record[:id].to_s # ensure uniqueness by :id field
|
33
|
+
w << record
|
34
|
+
end
|
35
|
+
true
|
36
|
+
end
|
37
|
+
alias :<< :add_record
|
38
|
+
|
39
|
+
def delete_record( record )
|
40
|
+
raise ArgumentError.new("record must contain :id field") if record[:id].nil?
|
41
|
+
@writer_monitor.synchronize do
|
42
|
+
@processed_records += 1
|
43
|
+
writer.delete :id, record[:id].to_s
|
44
|
+
end
|
45
|
+
true
|
46
|
+
end
|
47
|
+
|
48
|
+
# true if records have been processed since the last call to clear!
|
49
|
+
def dirty?
|
50
|
+
@processed_records > 0
|
51
|
+
end
|
52
|
+
|
53
|
+
def clear!
|
54
|
+
@processed_records = 0
|
55
|
+
end
|
56
|
+
|
57
|
+
# called whenever the strategy thinks it's a good time do do something
|
58
|
+
# timeconsuming (like switching indexes, optimizing, flushing, ...)
|
59
|
+
def batch_finished
|
60
|
+
end
|
61
|
+
|
62
|
+
# close this collection
|
63
|
+
def close
|
64
|
+
close_writer
|
65
|
+
super
|
66
|
+
end
|
67
|
+
|
68
|
+
# flush any unwritten changes to the index
|
69
|
+
def flush
|
70
|
+
@writer_monitor.synchronize do
|
71
|
+
writer.commit
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# optimize the index
|
76
|
+
def optimize
|
77
|
+
@writer_monitor.synchronize do
|
78
|
+
writer.optimize
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
protected
|
83
|
+
|
84
|
+
# should open a writer and return it
|
85
|
+
def open_writer
|
86
|
+
raise 'not implemented'
|
87
|
+
end
|
88
|
+
|
89
|
+
def writer
|
90
|
+
@writer_monitor.synchronize do
|
91
|
+
@writer ||= open_writer
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
def close_writer
|
97
|
+
@writer_monitor.synchronize do
|
98
|
+
notify_listeners( :closing_writer )
|
99
|
+
return unless @writer
|
100
|
+
@writer.close
|
101
|
+
@writer = nil
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def create_field_infos
|
106
|
+
field_infos = FieldInfos.new @options[:field_defaults] || {}
|
107
|
+
@options[:fields].each do |name, definition|
|
108
|
+
field_infos.add_field( name, definition )
|
109
|
+
end if @options[:fields]
|
110
|
+
# provide default settings for :id field
|
111
|
+
field_infos.add_field :id, :store => :yes, :index => :untokenized unless field_infos[:id]
|
112
|
+
field_infos
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|