stellr 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ == 0.1.0 / 2008-08-05
2
+
3
+ * Initial release
4
+
data/Manifest.txt ADDED
@@ -0,0 +1,36 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/stellr
6
+ bin/stellr-search
7
+ config/stellr.yml
8
+ lib/stellr.rb
9
+ lib/stellr/client.rb
10
+ lib/stellr/collections.rb
11
+ lib/stellr/collections/base.rb
12
+ lib/stellr/collections/searchable_collection.rb
13
+ lib/stellr/collections/writeable_collection.rb
14
+ lib/stellr/collections/rsync.rb
15
+ lib/stellr/collections/static.rb
16
+ lib/stellr/collections/multi_collection.rb
17
+ lib/stellr/config.rb
18
+ lib/stellr/search.rb
19
+ lib/stellr/search/search_result.rb
20
+ lib/stellr/search/search_results.rb
21
+ lib/stellr/server.rb
22
+ lib/stellr/strategies.rb
23
+ lib/stellr/strategies/base.rb
24
+ lib/stellr/strategies/blocking.rb
25
+ lib/stellr/strategies/queueing.rb
26
+ lib/stellr/utils.rb
27
+ lib/stellr/utils/shutdown.rb
28
+ lib/stellr/utils/observable.rb
29
+ test/fixtures/movies.yml
30
+ test/stellr_test.rb
31
+ test/test_collections_base.rb
32
+ test/test_helper.rb
33
+ test/test_rsync_collection.rb
34
+ test/test_server.rb
35
+ test/test_static_collection.rb
36
+ test/test_stellr.rb
data/README.txt ADDED
@@ -0,0 +1,109 @@
1
+ stellr
2
+ by Jens Kraemer and Benjamin Krause
3
+ http://rubyforge.org/projects/stellr
4
+
5
+ == DESCRIPTION:
6
+
7
+ Stellr is a Ferret based standalone search server featuring a DRB and (soon to come) an http frontend. It can handle multiple indexes, including multi-index searches. A client library and a simple command line query tool are included with the gem.
8
+
9
+ == FEATURES:
10
+
11
+ * DRb frontend
12
+ * easy to use client library (see below)
13
+ * multi index search
14
+ * Index rotation
15
+ Stellr always keeps two versions of your index around - one is used in a
16
+ multi threaded, read only way to handle incoming search requests, while the
17
+ other one is written to when you index something.
18
+ Using the switch function you may decide when to switch over searching from
19
+ the old index to the new one. Then, changes will be synced, and searches will
20
+ see the new or updated data from before the switch call.
21
+ * Index synchronization
22
+ Two kinds of synchronization methods are supported for now: rsync, using rsync
23
+ two copy over the changes from one index to the other, and static, which will
24
+ completely replace the old index with the new one. While the latter is suitable
25
+ for indexes which you rebuild completely from time to time, the former is good
26
+ for large indexes that are updated frequently or that are too large for frequent
27
+ rebuilds.
28
+
29
+ == SYNOPSIS:
30
+
31
+ * start the server:
32
+
33
+ stellr -c /path/to/config.yml start
34
+
35
+ * index something
36
+
37
+ require 'stellr/client'
38
+ stellr = Stellr::Client.new('druby://localhost:9010')
39
+ config = {
40
+ :collection => :static, # static collections are rebuilt from scratch everytime changes occur
41
+ # :collection => :rsync, # use the rsync collection for indexes that are updated frequently and/or are
42
+ # too large to be rebuilt from scratch every time they're updated
43
+ :analyzer => 'My::Analyzer',
44
+ :fields => {
45
+ :title => { :boost => 5, :store => :yes },
46
+ :content => { :store => :no }
47
+ }
48
+ }
49
+ collection = stellr.connect('my_collection', config)
50
+ collection.add_record(:id => 1, :title => 'Some Title', :content => 'Content')
51
+ collection.switch #
52
+
53
+ * command line search
54
+
55
+ stellr-search my_collection 'query string'
56
+
57
+ for now, this will display the first 10 hits only.
58
+
59
+ * search via the client library
60
+
61
+ require 'stellr/client'
62
+ stellr = Stellr::Client.new('druby://localhost:9010')
63
+ collection = stellr.connect 'my_collection',
64
+ :analyzer => 'My::Analyzer' # the analyzer to use for query parsing
65
+ results = collection.search 'querystring',
66
+ :page => 1, :per_page => 100, # built in pagination support
67
+ :fields => [:title, :content], # the fields to search in
68
+ :get_fields => [ :title ] # the fields to fetch for result display
69
+
70
+ * multi-collection search
71
+ pass an array of collection names to search multiple
72
+ collections at once:
73
+
74
+ collection = stellr.connect [ 'my_collection', 'another_collection' ],
75
+ :analyzer => 'My::Analyzer'
76
+
77
+ == REQUIREMENTS:
78
+
79
+ * Ferret (gem install ferret)
80
+ * daemons (gem install daemons)
81
+
82
+ == INSTALL:
83
+
84
+ * sudo gem install stellr
85
+
86
+ == LICENSE:
87
+
88
+ (The MIT License)
89
+
90
+ Copyright (c) 2007 FIX
91
+
92
+ Permission is hereby granted, free of charge, to any person obtaining
93
+ a copy of this software and associated documentation files (the
94
+ 'Software'), to deal in the Software without restriction, including
95
+ without limitation the rights to use, copy, modify, merge, publish,
96
+ distribute, sublicense, and/or sell copies of the Software, and to
97
+ permit persons to whom the Software is furnished to do so, subject to
98
+ the following conditions:
99
+
100
+ The above copyright notice and this permission notice shall be
101
+ included in all copies or substantial portions of the Software.
102
+
103
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
104
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
105
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
106
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
107
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
108
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
109
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+ $:.unshift 'lib'
4
+ require './lib/stellr.rb'
5
+
6
+ class Hoe
7
+ def extra_deps
8
+ @extra_deps.reject do |x|
9
+ Array(x).first == 'hoe'
10
+ end
11
+ end
12
+ end
13
+
14
+ Hoe.new('stellr', Stellr::VERSION) do |p|
15
+ p.rubyforge_name = 'stellr'
16
+ p.author = [ 'Benjamin Krause', 'Jens Krämer' ]
17
+ p.email = [ 'bk@benjaminkrause.com', 'jk@jkraemer.net' ]
18
+ p.summary = 'Stellr is a Ferret based standalone search server.'
19
+ p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
20
+ p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
21
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
22
+ p.extra_deps << [ 'ferret', '>= 0.11.6', 'daemons', '>= 1.0.10', 'fastthread', '>= 1.0' ]
23
+ end
24
+
25
+ desc "Release and publish documentation"
26
+ task :repubdoc => [:release, :publish_docs]
27
+
28
+ # vim:syntax=ruby
data/bin/stellr ADDED
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'optparse'
5
+ require 'daemons'
6
+ require 'fileutils'
7
+
8
+ begin
9
+ gem 'stellr'
10
+ rescue Gem::LoadError
11
+ $:.unshift(File.dirname(__FILE__), "../lib")
12
+ end
13
+
14
+ require 'stellr'
15
+
16
+ include Stellr
17
+
18
+ commands = %w( start stop restart status )
19
+
20
+ options = OptionParser.new do |opts|
21
+
22
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options] #{commands.join('|')}"
23
+
24
+ opts.on("-c", "--config CONFIG", "config file location") do |value|
25
+ @config_file = value
26
+ end
27
+
28
+ opts.on("-h", "--help", "show this help") do
29
+ @help = true
30
+ end
31
+ end
32
+
33
+ cmdline = options.parse( ARGV )
34
+ command = cmdline.last
35
+
36
+ if @help or not commands.include?( command )
37
+ puts options.to_s
38
+ else
39
+ config = Stellr::Config.new @config_file
40
+ FileUtils.mkdir_p config.tmp_dir
41
+ FileUtils.mkdir_p config.log_dir
42
+
43
+ Daemons.run_proc( "stellr", :dir => config.tmp_dir,
44
+ :dir_mode => :normal,
45
+ :log_output => true,
46
+ :ARGV => [ command ] ) do
47
+ Stellr::start_server config
48
+ end
49
+
50
+ if command == "start"
51
+ sleep 2
52
+ pid = Daemons::PidFile.new( config.tmp_dir, 'stellr' )
53
+ if pid.exist? and Daemons::Pid.running?(pid.pid)
54
+ exit 0
55
+ else
56
+ puts "not started. Watch #{config.tmp_dir}/stellr.output for errors."
57
+ exit 1
58
+ end
59
+ end
60
+
61
+ # FIXME how to correctly shut down the running server so indexes are closed
62
+ # and such?
63
+ # look for some hook provided by daemon tools, or trap SIGKILL
64
+ end
data/bin/stellr-search ADDED
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'optparse'
5
+
6
+ begin
7
+ gem 'stellr'
8
+ rescue Gem::LoadError
9
+ $:.unshift(File.dirname(__FILE__), "../lib")
10
+ end
11
+
12
+ require 'stellr'
13
+ require 'stellr/client'
14
+
15
+ options = OptionParser.new do |opts|
16
+
17
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options] collection query"
18
+
19
+ opts.on("-c", "--config CONFIG", "config file location") do |value|
20
+ @config_file = value
21
+ end
22
+
23
+ opts.on("-h", "--help", "show this help") do
24
+ @help = true
25
+ end
26
+ end
27
+
28
+ cmdline = options.parse( ARGV )
29
+ query = cmdline.pop rescue nil
30
+ collection_name = cmdline.pop
31
+
32
+ if @help
33
+ puts options.to_s
34
+ else
35
+ config = Stellr::Config.new @config_file
36
+ client = Stellr::Client.new config.drb_uri
37
+ collection = client.connect collection_name
38
+ puts "searching collection #{collection_name} for >#{query}< ..."
39
+ results = collection.search query, :limit => 10
40
+ if results.any?
41
+ puts "showing first 10 of #{results.total_hits} results"
42
+ results.each do |r|
43
+ puts r[:id]
44
+ end
45
+ else
46
+ puts "no results for >#{query}<"
47
+ end
48
+ end
49
+
50
+
data/config/stellr.yml ADDED
@@ -0,0 +1,8 @@
1
+ :port: 9010
2
+ :host: 'localhost'
3
+ :base_dir: '/var/stellr'
4
+ :data_dir: 'data'
5
+ :log_dir: 'log'
6
+ :tmp_dir: 'tmp'
7
+ :conf_dir: 'conf'
8
+ :log_level: :warn
data/lib/stellr.rb ADDED
@@ -0,0 +1,37 @@
1
+ require 'rubygems'
2
+ require 'thread'
3
+ begin
4
+ require 'fastthread'
5
+ rescue LoadError
6
+ puts "couldn't load fastthread"
7
+ end
8
+ require 'drb'
9
+ require 'monitor'
10
+ require 'ferret'
11
+ require 'stellr/utils'
12
+ require 'stellr/server'
13
+ require 'stellr/config'
14
+ require 'stellr/collections'
15
+ require 'stellr/strategies'
16
+ require 'stellr/search'
17
+
18
+ $SAFE = 1
19
+
20
+ module Stellr
21
+ VERSION = '0.1.0'
22
+
23
+ def self.start_server( config )
24
+ if config.script
25
+ begin
26
+ load config.script
27
+ rescue Exception => e
28
+ puts "\nerror loading script #{config.script}: #{e}\n#{e.backtrace.join("\n")}"
29
+ exit 1
30
+ end
31
+ end
32
+ stellr = Server.new config
33
+ server = DRb.start_service config.drb_uri, stellr
34
+ server.thread.join
35
+ end
36
+
37
+ end
@@ -0,0 +1,78 @@
1
+ require 'drb'
2
+ require 'stellr/search'
3
+
4
+ module Stellr
5
+
6
+ # = Stellr client
7
+ #
8
+ # This class acts as a wrapper around the connection to a Stellr server.
9
+ # Typical usage looks like this:
10
+ #
11
+ # stellr = Stellr::Client.new('druby://myserver.com:9000')
12
+ # collection = stellr.connect('myindex', :fields => { :content => { :store => :yes } })
13
+ # collection << { :content => 'lorem ipsum' }
14
+ # collection.switch
15
+ # results = collection.search('lorem', :page => 1, :per_page => 10)
16
+ #
17
+ class Client
18
+
19
+ def initialize( drb_uri )
20
+ @server = DRbObject.new(nil, drb_uri)
21
+ end
22
+
23
+ # connects to a remote collection and returns a stub that can be used to
24
+ # add records to the collection and to search for them.
25
+ #
26
+ # specify an array of collection names for the first argument to search
27
+ # multiple connections at once. No indexing is possible with such a
28
+ # MultiCollection.
29
+ def connect( collection_name, options = nil )
30
+ if Array === collection_name
31
+ multi_connect collection_name, options
32
+ else
33
+ @server.register collection_name, options
34
+ ClientCollection.new @server, collection_name
35
+ end
36
+ end
37
+
38
+ # Connects to multiple remote collections at once on order to run
39
+ # cross-collection searches.
40
+ def multi_connect( collection_names, options = {} )
41
+ MultiCollection.new @server, collection_names, options
42
+ end
43
+
44
+ end
45
+
46
+ # Wrapper around a remote collection.
47
+ #
48
+ # See the documentation of the collection class you use for more information.
49
+ class ClientCollection
50
+
51
+ def initialize( server, name )
52
+ @name = name
53
+ @server = server
54
+ end
55
+
56
+ # Disconnects this collection from the server.
57
+ def disconnect
58
+ @server = nil
59
+ end
60
+
61
+ def method_missing( method, *args )
62
+ raise "use of disconnected collection" if @server.nil?
63
+ @server.send method, @name, *args
64
+ end
65
+
66
+ end
67
+
68
+ # This client collection class allows to search multiple server side
69
+ # collections at once.
70
+ class MultiCollection < ClientCollection
71
+ def initialize( server, names, options = {} )
72
+ @server = server
73
+ @name = @server.register_multi_collection names, options
74
+ end
75
+ end
76
+
77
+ end
78
+
@@ -0,0 +1,6 @@
1
+ require 'stellr/collections/base'
2
+ require 'stellr/collections/searchable_collection'
3
+ require 'stellr/collections/writeable_collection'
4
+ require 'stellr/collections/static'
5
+ require 'stellr/collections/rsync'
6
+ require 'stellr/collections/multi_collection'
@@ -0,0 +1,79 @@
1
+ module Stellr
2
+ module Collections
3
+
4
+ # Base class for collection implementations
5
+ class Base
6
+ include Ferret::Index
7
+ include Stellr::Utils::Shutdown
8
+ include Stellr::Utils::Observable
9
+ attr_reader :name
10
+
11
+ def self.create( name, options )
12
+ collection_class = collection_class_for_options options
13
+ collection = collection_class.new( name, options )
14
+ if strategy_class = strategy_class_for_options( options )
15
+ strategy_class.new( collection, options )
16
+ else
17
+ collection
18
+ end
19
+ end
20
+
21
+ def initialize( name, options )
22
+ @logger = options[:logger] || (require 'logger'; Logger.new 'stellr.log')
23
+ @name = name
24
+ @options = options.dup
25
+ end
26
+
27
+ # called whenever the strategy thinks it's a good time do do something
28
+ # timeconsuming (like switching indexes, optimizing, flushing, ...)
29
+ def batch_finished
30
+ end
31
+
32
+ def on_shutdown( mode )
33
+ close
34
+ end
35
+
36
+ # close this collection
37
+ def close
38
+ end
39
+
40
+ protected
41
+
42
+
43
+ def collection_directory
44
+ @options[:path]
45
+ end
46
+
47
+
48
+ def self.collection_class_for_options( options )
49
+ if (c = options.delete(:collection))
50
+ options[:collection_class] = collection_class_for_key c
51
+ end
52
+ Object.module_eval("::#{options[:collection_class] || 'Stellr::Collections::RSync'}", __FILE__, __LINE__)
53
+ end
54
+ def self.collection_class_for_key(key)
55
+ case key
56
+ when :static
57
+ 'Stellr::Collections::Static'
58
+ when :rsync
59
+ 'Stellr::Collections::RSync'
60
+ end
61
+ end
62
+
63
+ def self.strategy_class_for_options( options )
64
+ if (c = options.delete(:strategy))
65
+ options[:strategy_class] = strategy_class_for_key c
66
+ end
67
+ Object.module_eval("::#{options[:strategy_class]}", __FILE__, __LINE__) if options[:strategy_class]
68
+ end
69
+ def self.strategy_class_for_key(key)
70
+ case key
71
+ when :queueing
72
+ 'Stellr::Strategies::Queueing'
73
+ end
74
+ end
75
+ end
76
+
77
+ end
78
+
79
+ end