stellr 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,88 @@
1
+ require 'yaml'
2
+ require 'erb'
3
+
4
+ module Stellr
5
+ # = Config
6
+ #
7
+ # The +stellr+ and +stellr-search+ commands both rely on a YAML file
8
+ # to provide some basic configuration:
9
+ #
10
+ # +:port+ Port the server should listen to
11
+ # +:host+ Hostname or IP of the server
12
+ # +:script+ Optional ruby file to load during startup. This is the place to load custom code like self made Analyzers you intend to use.
13
+ # +:log_level+ Log level, default is +:debug+
14
+ # +:base_dir+ Base directory where the server will store index data, log
15
+ # files and configuration data.
16
+ # +:data_dir+ Index directory, relative to +base_dir+. Defaults to +data+
17
+ # +:log_dir+ Log file directory, relative to +base_dir+. Defaults to +log+
18
+ # +:conf_dir+ Configuration directory, relative to +base_dir+. Defaults to +conf+. Here stellr will keep the configuration of registered collections, one YAML file per collection.
19
+ # +:tmp_dir+ Temp directory, relative to +base_dir+. Defaults to +tmp+
20
+ #
21
+ #
22
+ class Config
23
+ DEFAULTS = { :port => 9010,
24
+ :host => 'localhost',
25
+ :base_dir => '/var/stellr',
26
+ :data_dir => 'data',
27
+ :log_dir => 'log',
28
+ :tmp_dir => 'tmp',
29
+ :conf_dir => 'conf',
30
+ :log_level => :warn }
31
+
32
+
33
+ # Configfile search order:
34
+ # - argument
35
+ # - /etc/stellr.yml
36
+ # - +gem_directory+/config/stellr.yml
37
+ #
38
+ def initialize( config_file = nil, extra_options = {} )
39
+ load_config config_file
40
+ @config.update extra_options
41
+ @config.each { |k,v| v.untaint } # we trust our config file
42
+ end
43
+
44
+ def data_dir
45
+ resolve_directory_name( :data_dir )
46
+ end
47
+
48
+ def log_dir
49
+ resolve_directory_name( :log_dir )
50
+ end
51
+
52
+ def tmp_dir
53
+ resolve_directory_name( :tmp_dir )
54
+ end
55
+
56
+ def conf_dir
57
+ resolve_directory_name( :conf_dir )
58
+ end
59
+
60
+ def collection_dir
61
+ end
62
+
63
+ def drb_uri
64
+ "druby://#{host}:#{port}"
65
+ end
66
+
67
+ protected
68
+
69
+ def method_missing( method_name, *args )
70
+ return @config[method_name] if @config.has_key?( method_name )
71
+ raise NameError.new( "unknown configuration key: #{method_name}" )
72
+ end
73
+
74
+ def resolve_directory_name( sub_dir )
75
+ raise NameError.new unless @config.has_key?( sub_dir )
76
+ File.join( base_dir, @config[sub_dir] )
77
+ end
78
+
79
+ def load_config( config_file )
80
+ config_file ||= "/etc/stellr.yml"
81
+ config_file = File.join( File.dirname(__FILE__), "../../config/stellr.yml" ) unless File.exists?( config_file )
82
+
83
+ @config = DEFAULTS.merge(
84
+ YAML.load( ERB.new( IO.read(config_file) ).result )
85
+ )
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,2 @@
1
+ require 'stellr/search/search_result'
2
+ require 'stellr/search/search_results'
@@ -0,0 +1,21 @@
1
+ module Stellr
2
+ module Search
3
+
4
+ # A single search result
5
+ class SearchResult
6
+ attr_reader :score, :doc_id
7
+
8
+ def initialize(id, score, field_data)
9
+ @doc_id = id
10
+ @score = score
11
+ @field_data = field_data
12
+ end
13
+
14
+ # retrieve contents of the field +name+
15
+ def field(name)
16
+ @field_data[name]
17
+ end
18
+ alias [] field
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,50 @@
1
+ module Stellr
2
+ module Search
3
+
4
+ # Thin wrapper around an array of search results
5
+ class SearchResults
6
+ attr_accessor :total_hits, :current_page, :per_page
7
+ alias total_entries total_hits
8
+
9
+ def initialize
10
+ @results = []
11
+ end
12
+
13
+ def method_missing(symbol, *args, &block)
14
+ @results.send(symbol, *args, &block)
15
+ end
16
+
17
+ def respond_to?(name)
18
+ self.methods.include?(name) || @results.respond_to?(name)
19
+ end
20
+
21
+
22
+ # code from here on derived from will_paginate's collection.rb
23
+
24
+ # The total number of pages.
25
+ def total_pages
26
+ @total_pages ||= per_page > 0 ? (total_hits / per_page.to_f).ceil : 0
27
+ end
28
+ alias page_count total_pages
29
+
30
+ # Current offset of the paginated collection. If we're on the first page,
31
+ # it is always 0. If we're on the 2nd page and there are 30 entries per page,
32
+ # the offset is 30. This property is useful if you want to render ordinals
33
+ # besides your records: simply start with offset + 1.
34
+ #
35
+ def offset
36
+ (current_page - 1) * per_page
37
+ end
38
+
39
+ # current_page - 1 or nil if there is no previous page
40
+ def previous_page
41
+ current_page > 1 ? (current_page - 1) : nil
42
+ end
43
+
44
+ # current_page + 1 or nil if there is no next page
45
+ def next_page
46
+ current_page < page_count ? (current_page + 1) : nil
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,166 @@
1
+ require 'logger'
2
+
3
+ module Stellr
4
+ class Server
5
+ include Utils::Shutdown
6
+ protected :shutdown
7
+
8
+ attr_accessor :config
9
+ attr_reader :collections
10
+
11
+ def initialize( config )
12
+ @config = config
13
+ create_directories
14
+ @logger = Logger.new File.join(config.log_dir, 'stellr.log')
15
+ @logger.level = Logger.const_get config.log_level.to_s.upcase
16
+ @collections = {}
17
+ @collections.extend MonitorMixin
18
+ end
19
+
20
+ def add_record( collection_name, record, boost = nil )
21
+ collection( collection_name ).add_record record, boost
22
+ end
23
+ alias :<< :add_record
24
+
25
+ def delete_record( collection_name, record )
26
+ collection( collection_name ).delete_record record
27
+ end
28
+
29
+ def size( collection_name )
30
+ collection( collection_name ).size
31
+ end
32
+
33
+ # Initialize a collection.
34
+ #
35
+ # Before anything can be done with a collection, it has to be registered
36
+ # through this method. When called for a non-existing collection, this will
37
+ # also create the empty physical index. The given options are saved to a yml
38
+ # file so they can be loaded again later on.
39
+ #
40
+ # Calling register for an existing collection will update the saved index
41
+ # configuration from the options given, unless the options hash is nil.
42
+ # An already existing physical index won't be touched and used as is.
43
+ # Remember that changing Ferret options like analyzers or field
44
+ # configuration requires an index rebuild to be effective for existing
45
+ # content. Stellr doesn't take care of this.
46
+ #
47
+ # If you access the server through the Stellr::Client class you don't need
48
+ # to call +register+ explicitly as the client will do it when connecting.
49
+ #
50
+ # Name is the name of the collection to create.
51
+ #
52
+ # Options is a hash, consisting of:
53
+ # [+collection+] The collection implementation to use, may be one of :static or :rsync (default)
54
+ # [+strategy+] The strategy implementation to use (optional, atm there is only :queueing)
55
+ # [+fields+] +hash+ (see http://ferret.davebalmain.com/api/classes/Ferret/Index/FieldInfo.html)
56
+ # [+recreate+] Recreate the index (defaults to +false+). A true value will lead to the deletion of any already indexed data.
57
+ # [+analyzer+] The class name (String) of the Analyzer to use for this collection. By default, Ferret's StandardAnalyzer will be used.
58
+ # [+field_defaults+] Default setting for unconfigured fields
59
+ #
60
+ # Example
61
+ # register 'standard_index', { :recreate => false,
62
+ # :fields => { :author => { :index => :untokenized,
63
+ # :store => :no,
64
+ # :term_vector => :with_offsets,
65
+ # :boost => 2.0 },
66
+ # :content => { :index => :tokenized } }
67
+ # }
68
+ #
69
+ #
70
+ def register( name, options = {} )
71
+ @collections.synchronize do
72
+ @collections[name] ||= create_collection( name, options )
73
+ end
74
+ end
75
+
76
+ # Initializes a read-only virtual collection that may be used to search
77
+ # across multiple physical collections.
78
+ #
79
+ # Returns the name of the collection to be used with further calls.
80
+ def register_multi_collection( names, options = {} )
81
+ key = ".multi_#{names.join '_'}" # '.' is not allowed for regular collection names, so we are safe from name collisions
82
+ @collections.synchronize do
83
+ @collections[key] ||= create_multi_collection( key, names.map{ |name| collection(name) }, options )
84
+ end
85
+ return key
86
+ end
87
+
88
+ def collection( name )
89
+ @collections.synchronize do
90
+ if @collections.has_key?( name )
91
+ return @collections[name]
92
+ else
93
+ @logger.info "trying to initialize collection #{name} from stored configuration..."
94
+ return @collections[name] = create_collection( name, nil )
95
+ end
96
+ end
97
+ raise "UnknownCollection #{name}"
98
+ end
99
+
100
+ protected
101
+
102
+ # pass through commands to collection
103
+ def method_missing(method, *args)
104
+ if args.size >= 1
105
+ collection_name = args.shift
106
+ return collection( collection_name ).send( method, *args )
107
+ end
108
+ super
109
+ end
110
+
111
+ def create_multi_collection( name, collections, options = {} )
112
+ Stellr::Collections::MultiCollection.new name, collections, { :logger => @logger }.merge(options)
113
+ end
114
+
115
+ # initializes a new collection object
116
+ #
117
+ # if nil is given for options, the method tries to locate a previously
118
+ # saved collection configuration and restore from it.
119
+ def create_collection( name, options )
120
+ raise "invalid collection name >#{name}<, may only contain a-zA-Z0-9_-" unless name =~ /^([a-zA-Z0-9_-]+)$/
121
+ name.untaint
122
+ save_config = true
123
+ if options.nil?
124
+ options = load_collection_config name
125
+ save_config = false
126
+ end
127
+ raise "No options given for collection #{name} and no stored configuration found." if options.nil?
128
+
129
+ options[:path] = File.join( @config.data_dir, name )
130
+ save_collection_config name, options if save_config
131
+ return Collections::Base.create( name, {:logger => @logger}.merge(options) )
132
+ end
133
+
134
+ def save_collection_config( name, options )
135
+ path = collection_config_path name
136
+ ( File.open(path, 'w') << YAML.dump(options) ).close
137
+ @logger.info "wrote collection config to #{path}"
138
+ end
139
+
140
+ def load_collection_config( name )
141
+ path = collection_config_path name
142
+ conf = YAML.load( File.read(path) ) if File.readable?(path) rescue nil
143
+ @logger.info "loaded collection config from #{path}" unless conf.nil?
144
+ return conf
145
+ end
146
+
147
+ def collection_config_path( name )
148
+ File.join @config.conf_dir, "#{name}.yml"
149
+ end
150
+
151
+ # called by shutdown
152
+ def on_shutdown( mode )
153
+ @collections.synchronize do
154
+ @collections.values.each { |coll| coll.shutdown mode }
155
+ end
156
+ end
157
+
158
+ def create_directories
159
+ FileUtils.mkdir_p config.log_dir
160
+ FileUtils.mkdir_p config.tmp_dir
161
+ FileUtils.mkdir_p config.conf_dir
162
+ FileUtils.mkdir_p config.data_dir
163
+ end
164
+
165
+ end
166
+ end
@@ -0,0 +1,4 @@
1
+ require 'stellr/strategies/base'
2
+ require 'stellr/strategies/blocking'
3
+ require 'stellr/strategies/queueing'
4
+
@@ -0,0 +1,16 @@
1
+ module Stellr
2
+ module Strategies
3
+ class Base
4
+ include Stellr::Utils::Shutdown
5
+
6
+ def initialize( collection, options )
7
+ @collection = collection
8
+ @options = options.dup
9
+ end
10
+
11
+ def method_missing(name, *args)
12
+ @collection.send name, *args
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ module Stellr
2
+
3
+ module Strategies
4
+
5
+ # Blocking strategy. Any index modifying methods will block until the change has
6
+ # been done.
7
+ class Blocking < Base
8
+
9
+ end
10
+
11
+ end
12
+ end
13
+
@@ -0,0 +1,78 @@
1
+ module Stellr
2
+
3
+ module Strategies
4
+
5
+ # Queueing strategy. Any index modifying methods return immediately, actions
6
+ # are queued and executed asynchronously in order of arrival.
7
+ #
8
+ class Queueing < Base
9
+
10
+ def initialize( collection, options )
11
+ super collection, options
12
+ @options[:max_batch_size] ||= 200
13
+ @queue = Queue.new
14
+ @thread = spawn_indexing_thread
15
+ end
16
+
17
+ def add_record( record, boost = nil )
18
+ enqueue :add, [record, boost]
19
+ end
20
+
21
+ def delete_record( record )
22
+ enqueue :delete, record
23
+ end
24
+
25
+ protected
26
+
27
+ def on_shutdown( mode )
28
+ @queue << 'shutting down' # letztes queue item damit process_queue nicht haengt
29
+ @thread.join
30
+ # save_queue :TODO:
31
+ @collection.shutdown mode
32
+ end
33
+
34
+ # called by the indexer thread as long as the server runs
35
+ def process_queue
36
+ counter = 0
37
+ max_batch_size = @options[:max_batch_size]
38
+ begin
39
+ while record = @queue.deq and not shutting_down?( :abort )
40
+ process_record( *record )
41
+ break if ((counter += 1) > max_batch_size) or @queue.empty?
42
+ end
43
+ @collection.batch_finished
44
+ rescue Exception => e
45
+ puts "OH NO! #{e}\n#{e.backtrace.join "\n"}"
46
+ end
47
+ end
48
+
49
+ # process a single task from the queue
50
+ def process_record( action, record )
51
+ case action
52
+ when :add
53
+ @collection.add_record( *record )
54
+ when :delete
55
+ @collection.delete_record record
56
+ else
57
+ raise "UnknownAction"
58
+ end
59
+ end
60
+
61
+ # Spawns the thread executing the main loop
62
+ def spawn_indexing_thread
63
+ Thread.new do
64
+ process_queue while !shutting_down?
65
+ end
66
+ end
67
+
68
+ # add a task to the queue
69
+ def enqueue( action, record )
70
+ return false if shutting_down?
71
+ @queue << [ action, record ]
72
+ true
73
+ end
74
+
75
+ end
76
+
77
+ end
78
+ end