stellr 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ require 'yaml'
2
+ require 'erb'
3
+
4
+ module Stellr
5
+ # = Config
6
+ #
7
+ # The +stellr+ and +stellr-search+ commands both rely on a YAML file
8
+ # to provide some basic configuration:
9
+ #
10
+ # +:port+ Port the server should listen to
11
+ # +:host+ Hostname or IP of the server
12
+ # +:script+ Optional ruby file to load during startup. This is the place to load custom code like self made Analyzers you intend to use.
13
+ # +:log_level+ Log level, default is +:debug+
14
+ # +:base_dir+ Base directory where the server will store index data, log
15
+ # files and configuration data.
16
+ # +:data_dir+ Index directory, relative to +base_dir+. Defaults to +data+
17
+ # +:log_dir+ Log file directory, relative to +base_dir+. Defaults to +log+
18
+ # +:conf_dir+ Configuration directory, relative to +base_dir+. Defaults to +conf+. Here stellr will keep the configuration of registered collections, one YAML file per collection.
19
+ # +:tmp_dir+ Temp directory, relative to +base_dir+. Defaults to +tmp+
20
+ #
21
+ #
22
+ class Config
23
+ DEFAULTS = { :port => 9010,
24
+ :host => 'localhost',
25
+ :base_dir => '/var/stellr',
26
+ :data_dir => 'data',
27
+ :log_dir => 'log',
28
+ :tmp_dir => 'tmp',
29
+ :conf_dir => 'conf',
30
+ :log_level => :warn }
31
+
32
+
33
+ # Configfile search order:
34
+ # - argument
35
+ # - /etc/stellr.yml
36
+ # - +gem_directory+/config/stellr.yml
37
+ #
38
+ def initialize( config_file = nil, extra_options = {} )
39
+ load_config config_file
40
+ @config.update extra_options
41
+ @config.each { |k,v| v.untaint } # we trust our config file
42
+ end
43
+
44
+ def data_dir
45
+ resolve_directory_name( :data_dir )
46
+ end
47
+
48
+ def log_dir
49
+ resolve_directory_name( :log_dir )
50
+ end
51
+
52
+ def tmp_dir
53
+ resolve_directory_name( :tmp_dir )
54
+ end
55
+
56
+ def conf_dir
57
+ resolve_directory_name( :conf_dir )
58
+ end
59
+
60
+ def collection_dir
61
+ end
62
+
63
+ def drb_uri
64
+ "druby://#{host}:#{port}"
65
+ end
66
+
67
+ protected
68
+
69
+ def method_missing( method_name, *args )
70
+ return @config[method_name] if @config.has_key?( method_name )
71
+ raise NameError.new( "unknown configuration key: #{method_name}" )
72
+ end
73
+
74
+ def resolve_directory_name( sub_dir )
75
+ raise NameError.new unless @config.has_key?( sub_dir )
76
+ File.join( base_dir, @config[sub_dir] )
77
+ end
78
+
79
+ def load_config( config_file )
80
+ config_file ||= "/etc/stellr.yml"
81
+ config_file = File.join( File.dirname(__FILE__), "../../config/stellr.yml" ) unless File.exists?( config_file )
82
+
83
+ @config = DEFAULTS.merge(
84
+ YAML.load( ERB.new( IO.read(config_file) ).result )
85
+ )
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,2 @@
1
+ require 'stellr/search/search_result'
2
+ require 'stellr/search/search_results'
@@ -0,0 +1,21 @@
1
+ module Stellr
2
+ module Search
3
+
4
+ # A single search result
5
+ class SearchResult
6
+ attr_reader :score, :doc_id
7
+
8
+ def initialize(id, score, field_data)
9
+ @doc_id = id
10
+ @score = score
11
+ @field_data = field_data
12
+ end
13
+
14
+ # retrieve contents of the field +name+
15
+ def field(name)
16
+ @field_data[name]
17
+ end
18
+ alias [] field
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,50 @@
1
+ module Stellr
2
+ module Search
3
+
4
+ # Thin wrapper around an array of search results
5
+ class SearchResults
6
+ attr_accessor :total_hits, :current_page, :per_page
7
+ alias total_entries total_hits
8
+
9
+ def initialize
10
+ @results = []
11
+ end
12
+
13
+ def method_missing(symbol, *args, &block)
14
+ @results.send(symbol, *args, &block)
15
+ end
16
+
17
+ def respond_to?(name)
18
+ self.methods.include?(name) || @results.respond_to?(name)
19
+ end
20
+
21
+
22
+ # code from here on derived from will_paginate's collection.rb
23
+
24
+ # The total number of pages.
25
+ def total_pages
26
+ @total_pages ||= per_page > 0 ? (total_hits / per_page.to_f).ceil : 0
27
+ end
28
+ alias page_count total_pages
29
+
30
+ # Current offset of the paginated collection. If we're on the first page,
31
+ # it is always 0. If we're on the 2nd page and there are 30 entries per page,
32
+ # the offset is 30. This property is useful if you want to render ordinals
33
+ # besides your records: simply start with offset + 1.
34
+ #
35
+ def offset
36
+ (current_page - 1) * per_page
37
+ end
38
+
39
+ # current_page - 1 or nil if there is no previous page
40
+ def previous_page
41
+ current_page > 1 ? (current_page - 1) : nil
42
+ end
43
+
44
+ # current_page + 1 or nil if there is no next page
45
+ def next_page
46
+ current_page < page_count ? (current_page + 1) : nil
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,166 @@
1
+ require 'logger'
2
+
3
+ module Stellr
4
+ class Server
5
+ include Utils::Shutdown
6
+ protected :shutdown
7
+
8
+ attr_accessor :config
9
+ attr_reader :collections
10
+
11
+ def initialize( config )
12
+ @config = config
13
+ create_directories
14
+ @logger = Logger.new File.join(config.log_dir, 'stellr.log')
15
+ @logger.level = Logger.const_get config.log_level.to_s.upcase
16
+ @collections = {}
17
+ @collections.extend MonitorMixin
18
+ end
19
+
20
+ def add_record( collection_name, record, boost = nil )
21
+ collection( collection_name ).add_record record, boost
22
+ end
23
+ alias :<< :add_record
24
+
25
+ def delete_record( collection_name, record )
26
+ collection( collection_name ).delete_record record
27
+ end
28
+
29
+ def size( collection_name )
30
+ collection( collection_name ).size
31
+ end
32
+
33
+ # Initialize a collection.
34
+ #
35
+ # Before anything can be done with a collection, it has to be registered
36
+ # through this method. When called for a non-existing collection, this will
37
+ # also create the empty physical index. The given options are saved to a yml
38
+ # file so they can be loaded again later on.
39
+ #
40
+ # Calling register for an existing collection will update the saved index
41
+ # configuration from the options given, unless the options hash is nil.
42
+ # An already existing physical index won't be touched and used as is.
43
+ # Remember that changing Ferret options like analyzers or field
44
+ # configuration requires an index rebuild to be effective for existing
45
+ # content. Stellr doesn't take care of this.
46
+ #
47
+ # If you access the server through the Stellr::Client class you don't need
48
+ # to call +register+ explicitly as the client will do it when connecting.
49
+ #
50
+ # Name is the name of the collection to create.
51
+ #
52
+ # Options is a hash, consisting of:
53
+ # [+collection+] The collection implementation to use, may be one of :static or :rsync (default)
54
+ # [+strategy+] The strategy implementation to use (optional, atm there is only :queueing)
55
+ # [+fields+] +hash+ (see http://ferret.davebalmain.com/api/classes/Ferret/Index/FieldInfo.html)
56
+ # [+recreate+] Recreate the index (defaults to +false+). A true value will lead to the deletion of any already indexed data.
57
+ # [+analyzer+] The class name (String) of the Analyzer to use for this collection. By default, Ferret's StandardAnalyzer will be used.
58
+ # [+field_defaults+] Default setting for unconfigured fields
59
+ #
60
+ # Example
61
+ # register 'standard_index', { :recreate => false,
62
+ # :fields => { :author => { :index => :untokenized,
63
+ # :store => :no,
64
+ # :term_vector => :with_offsets,
65
+ # :boost => 2.0 },
66
+ # :content => { :index => :tokenized } }
67
+ # }
68
+ #
69
+ #
70
+ def register( name, options = {} )
71
+ @collections.synchronize do
72
+ @collections[name] ||= create_collection( name, options )
73
+ end
74
+ end
75
+
76
+ # Initializes a read-only virtual collection that may be used to search
77
+ # across multiple physical collections.
78
+ #
79
+ # Returns the name of the collection to be used with further calls.
80
+ def register_multi_collection( names, options = {} )
81
+ key = ".multi_#{names.join '_'}" # '.' is not allowed for regular collection names, so we are safe from name collisions
82
+ @collections.synchronize do
83
+ @collections[key] ||= create_multi_collection( key, names.map{ |name| collection(name) }, options )
84
+ end
85
+ return key
86
+ end
87
+
88
+ def collection( name )
89
+ @collections.synchronize do
90
+ if @collections.has_key?( name )
91
+ return @collections[name]
92
+ else
93
+ @logger.info "trying to initialize collection #{name} from stored configuration..."
94
+ return @collections[name] = create_collection( name, nil )
95
+ end
96
+ end
97
+ raise "UnknownCollection #{name}"
98
+ end
99
+
100
+ protected
101
+
102
+ # pass through commands to collection
103
+ def method_missing(method, *args)
104
+ if args.size >= 1
105
+ collection_name = args.shift
106
+ return collection( collection_name ).send( method, *args )
107
+ end
108
+ super
109
+ end
110
+
111
+ def create_multi_collection( name, collections, options = {} )
112
+ Stellr::Collections::MultiCollection.new name, collections, { :logger => @logger }.merge(options)
113
+ end
114
+
115
+ # initializes a new collection object
116
+ #
117
+ # if nil is given for options, the method tries to locate a previously
118
+ # saved collection configuration and restore from it.
119
+ def create_collection( name, options )
120
+ raise "invalid collection name >#{name}<, may only contain a-zA-Z0-9_-" unless name =~ /^([a-zA-Z0-9_-]+)$/
121
+ name.untaint
122
+ save_config = true
123
+ if options.nil?
124
+ options = load_collection_config name
125
+ save_config = false
126
+ end
127
+ raise "No options given for collection #{name} and no stored configuration found." if options.nil?
128
+
129
+ options[:path] = File.join( @config.data_dir, name )
130
+ save_collection_config name, options if save_config
131
+ return Collections::Base.create( name, {:logger => @logger}.merge(options) )
132
+ end
133
+
134
+ def save_collection_config( name, options )
135
+ path = collection_config_path name
136
+ ( File.open(path, 'w') << YAML.dump(options) ).close
137
+ @logger.info "wrote collection config to #{path}"
138
+ end
139
+
140
+ def load_collection_config( name )
141
+ path = collection_config_path name
142
+ conf = YAML.load( File.read(path) ) if File.readable?(path) rescue nil
143
+ @logger.info "loaded collection config from #{path}" unless conf.nil?
144
+ return conf
145
+ end
146
+
147
+ def collection_config_path( name )
148
+ File.join @config.conf_dir, "#{name}.yml"
149
+ end
150
+
151
+ # called by shutdown
152
+ def on_shutdown( mode )
153
+ @collections.synchronize do
154
+ @collections.values.each { |coll| coll.shutdown mode }
155
+ end
156
+ end
157
+
158
+ def create_directories
159
+ FileUtils.mkdir_p config.log_dir
160
+ FileUtils.mkdir_p config.tmp_dir
161
+ FileUtils.mkdir_p config.conf_dir
162
+ FileUtils.mkdir_p config.data_dir
163
+ end
164
+
165
+ end
166
+ end
@@ -0,0 +1,4 @@
1
+ require 'stellr/strategies/base'
2
+ require 'stellr/strategies/blocking'
3
+ require 'stellr/strategies/queueing'
4
+
@@ -0,0 +1,16 @@
1
+ module Stellr
2
+ module Strategies
3
+ class Base
4
+ include Stellr::Utils::Shutdown
5
+
6
+ def initialize( collection, options )
7
+ @collection = collection
8
+ @options = options.dup
9
+ end
10
+
11
+ def method_missing(name, *args)
12
+ @collection.send name, *args
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ module Stellr
2
+
3
+ module Strategies
4
+
5
+ # Blocking strategy. Any index modifying methods will block until the change has
6
+ # been done.
7
+ class Blocking < Base
8
+
9
+ end
10
+
11
+ end
12
+ end
13
+
@@ -0,0 +1,78 @@
1
+ module Stellr
2
+
3
+ module Strategies
4
+
5
+ # Queueing strategy. Any index modifying methods return immediately, actions
6
+ # are queued and executed asynchronously in order of arrival.
7
+ #
8
+ class Queueing < Base
9
+
10
+ def initialize( collection, options )
11
+ super collection, options
12
+ @options[:max_batch_size] ||= 200
13
+ @queue = Queue.new
14
+ @thread = spawn_indexing_thread
15
+ end
16
+
17
+ def add_record( record, boost = nil )
18
+ enqueue :add, [record, boost]
19
+ end
20
+
21
+ def delete_record( record )
22
+ enqueue :delete, record
23
+ end
24
+
25
+ protected
26
+
27
+ def on_shutdown( mode )
28
+ @queue << 'shutting down' # letztes queue item damit process_queue nicht haengt
29
+ @thread.join
30
+ # save_queue :TODO:
31
+ @collection.shutdown mode
32
+ end
33
+
34
+ # called by the indexer thread as long as the server runs
35
+ def process_queue
36
+ counter = 0
37
+ max_batch_size = @options[:max_batch_size]
38
+ begin
39
+ while record = @queue.deq and not shutting_down?( :abort )
40
+ process_record( *record )
41
+ break if ((counter += 1) > max_batch_size) or @queue.empty?
42
+ end
43
+ @collection.batch_finished
44
+ rescue Exception => e
45
+ puts "OH NO! #{e}\n#{e.backtrace.join "\n"}"
46
+ end
47
+ end
48
+
49
+ # process a single task from the queue
50
+ def process_record( action, record )
51
+ case action
52
+ when :add
53
+ @collection.add_record( *record )
54
+ when :delete
55
+ @collection.delete_record record
56
+ else
57
+ raise "UnknownAction"
58
+ end
59
+ end
60
+
61
+ # Spawns the thread executing the main loop
62
+ def spawn_indexing_thread
63
+ Thread.new do
64
+ process_queue while !shutting_down?
65
+ end
66
+ end
67
+
68
+ # add a task to the queue
69
+ def enqueue( action, record )
70
+ return false if shutting_down?
71
+ @queue << [ action, record ]
72
+ true
73
+ end
74
+
75
+ end
76
+
77
+ end
78
+ end