stellr 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ == 0.1.0 / 2008-08-05
2
+
3
+ * Initial release
4
+
data/Manifest.txt ADDED
@@ -0,0 +1,36 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/stellr
6
+ bin/stellr-search
7
+ config/stellr.yml
8
+ lib/stellr.rb
9
+ lib/stellr/client.rb
10
+ lib/stellr/collections.rb
11
+ lib/stellr/collections/base.rb
12
+ lib/stellr/collections/searchable_collection.rb
13
+ lib/stellr/collections/writeable_collection.rb
14
+ lib/stellr/collections/rsync.rb
15
+ lib/stellr/collections/static.rb
16
+ lib/stellr/collections/multi_collection.rb
17
+ lib/stellr/config.rb
18
+ lib/stellr/search.rb
19
+ lib/stellr/search/search_result.rb
20
+ lib/stellr/search/search_results.rb
21
+ lib/stellr/server.rb
22
+ lib/stellr/strategies.rb
23
+ lib/stellr/strategies/base.rb
24
+ lib/stellr/strategies/blocking.rb
25
+ lib/stellr/strategies/queueing.rb
26
+ lib/stellr/utils.rb
27
+ lib/stellr/utils/shutdown.rb
28
+ lib/stellr/utils/observable.rb
29
+ test/fixtures/movies.yml
30
+ test/stellr_test.rb
31
+ test/test_collections_base.rb
32
+ test/test_helper.rb
33
+ test/test_rsync_collection.rb
34
+ test/test_server.rb
35
+ test/test_static_collection.rb
36
+ test/test_stellr.rb
data/README.txt ADDED
@@ -0,0 +1,109 @@
1
+ stellr
2
+ by Jens Kraemer and Benjamin Krause
3
+ http://rubyforge.org/projects/stellr
4
+
5
+ == DESCRIPTION:
6
+
7
+ Stellr is a Ferret based standalone search server featuring a DRB and (soon to come) an http frontend. It can handle multiple indexes, including multi-index searches. A client library and a simple command line query tool are included with the gem.
8
+
9
+ == FEATURES:
10
+
11
+ * DRb frontend
12
+ * easy to use client library (see below)
13
+ * multi index search
14
+ * Index rotation
15
+ Stellr always keeps two versions of your index around - one is used in a
16
+ multi threaded, read only way to handle incoming search requests, while the
17
+ other one is written to when you index something.
18
+ Using the switch function you may decide when to switch over searching from
19
+ the old index to the new one. Then, changes will be synced, and searches will
20
+ see the new or updated data from before the switch call.
21
+ * Index synchronization
22
+ Two kinds of synchronization methods are supported for now: rsync, using rsync
23
+ two copy over the changes from one index to the other, and static, which will
24
+ completely replace the old index with the new one. While the latter is suitable
25
+ for indexes which you rebuild completely from time to time, the former is good
26
+ for large indexes that are updated frequently or that are too large for frequent
27
+ rebuilds.
28
+
29
+ == SYNOPSIS:
30
+
31
+ * start the server:
32
+
33
+ stellr -c /path/to/config.yml start
34
+
35
+ * index something
36
+
37
+ require 'stellr/client'
38
+ stellr = Stellr::Client.new('druby://localhost:9010')
39
+ config = {
40
+ :collection => :static, # static collections are rebuilt from scratch everytime changes occur
41
+ # :collection => :rsync, # use the rsync collection for indexes that are updated frequently and/or are
42
+ # too large to be rebuilt from scratch every time they're updated
43
+ :analyzer => 'My::Analyzer',
44
+ :fields => {
45
+ :title => { :boost => 5, :store => :yes },
46
+ :content => { :store => :no }
47
+ }
48
+ }
49
+ collection = stellr.connect('my_collection', config)
50
+ collection.add_record(:id => 1, :title => 'Some Title', :content => 'Content')
51
+ collection.switch #
52
+
53
+ * command line search
54
+
55
+ stellr-search my_collection 'query string'
56
+
57
+ for now, this will display the first 10 hits only.
58
+
59
+ * search via the client library
60
+
61
+ require 'stellr/client'
62
+ stellr = Stellr::Client.new('druby://localhost:9010')
63
+ collection = stellr.connect 'my_collection',
64
+ :analyzer => 'My::Analyzer' # the analyzer to use for query parsing
65
+ results = collection.search 'querystring',
66
+ :page => 1, :per_page => 100, # built in pagination support
67
+ :fields => [:title, :content], # the fields to search in
68
+ :get_fields => [ :title ] # the fields to fetch for result display
69
+
70
+ * multi-collection search
71
+ pass an array of collection names to search multiple
72
+ collections at once:
73
+
74
+ collection = stellr.connect [ 'my_collection', 'another_collection' ],
75
+ :analyzer => 'My::Analyzer'
76
+
77
+ == REQUIREMENTS:
78
+
79
+ * Ferret (gem install ferret)
80
+ * daemons (gem install daemons)
81
+
82
+ == INSTALL:
83
+
84
+ * sudo gem install stellr
85
+
86
+ == LICENSE:
87
+
88
+ (The MIT License)
89
+
90
+ Copyright (c) 2007 FIX
91
+
92
+ Permission is hereby granted, free of charge, to any person obtaining
93
+ a copy of this software and associated documentation files (the
94
+ 'Software'), to deal in the Software without restriction, including
95
+ without limitation the rights to use, copy, modify, merge, publish,
96
+ distribute, sublicense, and/or sell copies of the Software, and to
97
+ permit persons to whom the Software is furnished to do so, subject to
98
+ the following conditions:
99
+
100
+ The above copyright notice and this permission notice shall be
101
+ included in all copies or substantial portions of the Software.
102
+
103
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
104
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
105
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
106
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
107
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
108
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
109
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+ $:.unshift 'lib'
4
+ require './lib/stellr.rb'
5
+
6
+ class Hoe
7
+ def extra_deps
8
+ @extra_deps.reject do |x|
9
+ Array(x).first == 'hoe'
10
+ end
11
+ end
12
+ end
13
+
14
+ Hoe.new('stellr', Stellr::VERSION) do |p|
15
+ p.rubyforge_name = 'stellr'
16
+ p.author = [ 'Benjamin Krause', 'Jens Krämer' ]
17
+ p.email = [ 'bk@benjaminkrause.com', 'jk@jkraemer.net' ]
18
+ p.summary = 'Stellr is a Ferret based standalone search server.'
19
+ p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
20
+ p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
21
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
22
+ p.extra_deps << [ 'ferret', '>= 0.11.6', 'daemons', '>= 1.0.10', 'fastthread', '>= 1.0' ]
23
+ end
24
+
25
+ desc "Release and publish documentation"
26
+ task :repubdoc => [:release, :publish_docs]
27
+
28
+ # vim:syntax=ruby
data/bin/stellr ADDED
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'optparse'
5
+ require 'daemons'
6
+ require 'fileutils'
7
+
8
+ begin
9
+ gem 'stellr'
10
+ rescue Gem::LoadError
11
+ $:.unshift(File.dirname(__FILE__), "../lib")
12
+ end
13
+
14
+ require 'stellr'
15
+
16
+ include Stellr
17
+
18
+ commands = %w( start stop restart status )
19
+
20
+ options = OptionParser.new do |opts|
21
+
22
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options] #{commands.join('|')}"
23
+
24
+ opts.on("-c", "--config CONFIG", "config file location") do |value|
25
+ @config_file = value
26
+ end
27
+
28
+ opts.on("-h", "--help", "show this help") do
29
+ @help = true
30
+ end
31
+ end
32
+
33
+ cmdline = options.parse( ARGV )
34
+ command = cmdline.last
35
+
36
+ if @help or not commands.include?( command )
37
+ puts options.to_s
38
+ else
39
+ config = Stellr::Config.new @config_file
40
+ FileUtils.mkdir_p config.tmp_dir
41
+ FileUtils.mkdir_p config.log_dir
42
+
43
+ Daemons.run_proc( "stellr", :dir => config.tmp_dir,
44
+ :dir_mode => :normal,
45
+ :log_output => true,
46
+ :ARGV => [ command ] ) do
47
+ Stellr::start_server config
48
+ end
49
+
50
+ if command == "start"
51
+ sleep 2
52
+ pid = Daemons::PidFile.new( config.tmp_dir, 'stellr' )
53
+ if pid.exist? and Daemons::Pid.running?(pid.pid)
54
+ exit 0
55
+ else
56
+ puts "not started. Watch #{config.tmp_dir}/stellr.output for errors."
57
+ exit 1
58
+ end
59
+ end
60
+
61
+ # FIXME how to correctly shut down the running server so indexes are closed
62
+ # and such?
63
+ # look for some hook provided by daemon tools, or trap SIGKILL
64
+ end
data/bin/stellr-search ADDED
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'optparse'
5
+
6
+ begin
7
+ gem 'stellr'
8
+ rescue Gem::LoadError
9
+ $:.unshift(File.dirname(__FILE__), "../lib")
10
+ end
11
+
12
+ require 'stellr'
13
+ require 'stellr/client'
14
+
15
+ options = OptionParser.new do |opts|
16
+
17
+ opts.banner = "Usage: #{File.basename(__FILE__)} [options] collection query"
18
+
19
+ opts.on("-c", "--config CONFIG", "config file location") do |value|
20
+ @config_file = value
21
+ end
22
+
23
+ opts.on("-h", "--help", "show this help") do
24
+ @help = true
25
+ end
26
+ end
27
+
28
+ cmdline = options.parse( ARGV )
29
+ query = cmdline.pop rescue nil
30
+ collection_name = cmdline.pop
31
+
32
+ if @help
33
+ puts options.to_s
34
+ else
35
+ config = Stellr::Config.new @config_file
36
+ client = Stellr::Client.new config.drb_uri
37
+ collection = client.connect collection_name
38
+ puts "searching collection #{collection_name} for >#{query}< ..."
39
+ results = collection.search query, :limit => 10
40
+ if results.any?
41
+ puts "showing first 10 of #{results.total_hits} results"
42
+ results.each do |r|
43
+ puts r[:id]
44
+ end
45
+ else
46
+ puts "no results for >#{query}<"
47
+ end
48
+ end
49
+
50
+
data/config/stellr.yml ADDED
@@ -0,0 +1,8 @@
1
+ :port: 9010
2
+ :host: 'localhost'
3
+ :base_dir: '/var/stellr'
4
+ :data_dir: 'data'
5
+ :log_dir: 'log'
6
+ :tmp_dir: 'tmp'
7
+ :conf_dir: 'conf'
8
+ :log_level: :warn
data/lib/stellr.rb ADDED
@@ -0,0 +1,37 @@
1
+ require 'rubygems'
2
+ require 'thread'
3
+ begin
4
+ require 'fastthread'
5
+ rescue LoadError
6
+ puts "couldn't load fastthread"
7
+ end
8
+ require 'drb'
9
+ require 'monitor'
10
+ require 'ferret'
11
+ require 'stellr/utils'
12
+ require 'stellr/server'
13
+ require 'stellr/config'
14
+ require 'stellr/collections'
15
+ require 'stellr/strategies'
16
+ require 'stellr/search'
17
+
18
+ $SAFE = 1
19
+
20
+ module Stellr
21
+ VERSION = '0.1.0'
22
+
23
+ def self.start_server( config )
24
+ if config.script
25
+ begin
26
+ load config.script
27
+ rescue Exception => e
28
+ puts "\nerror loading script #{config.script}: #{e}\n#{e.backtrace.join("\n")}"
29
+ exit 1
30
+ end
31
+ end
32
+ stellr = Server.new config
33
+ server = DRb.start_service config.drb_uri, stellr
34
+ server.thread.join
35
+ end
36
+
37
+ end
@@ -0,0 +1,78 @@
1
+ require 'drb'
2
+ require 'stellr/search'
3
+
4
+ module Stellr
5
+
6
+ # = Stellr client
7
+ #
8
+ # This class acts as a wrapper around the connection to a Stellr server.
9
+ # Typical usage looks like this:
10
+ #
11
+ # stellr = Stellr::Client.new('druby://myserver.com:9000')
12
+ # collection = stellr.connect('myindex', :fields => { :content => { :store => :yes } })
13
+ # collection << { :content => 'lorem ipsum' }
14
+ # collection.switch
15
+ # results = collection.search('lorem', :page => 1, :per_page => 10)
16
+ #
17
+ class Client
18
+
19
+ def initialize( drb_uri )
20
+ @server = DRbObject.new(nil, drb_uri)
21
+ end
22
+
23
+ # connects to a remote collection and returns a stub that can be used to
24
+ # add records to the collection and to search for them.
25
+ #
26
+ # specify an array of collection names for the first argument to search
27
+ # multiple connections at once. No indexing is possible with such a
28
+ # MultiCollection.
29
+ def connect( collection_name, options = nil )
30
+ if Array === collection_name
31
+ multi_connect collection_name, options
32
+ else
33
+ @server.register collection_name, options
34
+ ClientCollection.new @server, collection_name
35
+ end
36
+ end
37
+
38
+ # Connects to multiple remote collections at once on order to run
39
+ # cross-collection searches.
40
+ def multi_connect( collection_names, options = {} )
41
+ MultiCollection.new @server, collection_names, options
42
+ end
43
+
44
+ end
45
+
46
+ # Wrapper around a remote collection.
47
+ #
48
+ # See the documentation of the collection class you use for more information.
49
+ class ClientCollection
50
+
51
+ def initialize( server, name )
52
+ @name = name
53
+ @server = server
54
+ end
55
+
56
+ # Disconnects this collection from the server.
57
+ def disconnect
58
+ @server = nil
59
+ end
60
+
61
+ def method_missing( method, *args )
62
+ raise "use of disconnected collection" if @server.nil?
63
+ @server.send method, @name, *args
64
+ end
65
+
66
+ end
67
+
68
+ # This client collection class allows to search multiple server side
69
+ # collections at once.
70
+ class MultiCollection < ClientCollection
71
+ def initialize( server, names, options = {} )
72
+ @server = server
73
+ @name = @server.register_multi_collection names, options
74
+ end
75
+ end
76
+
77
+ end
78
+
@@ -0,0 +1,6 @@
1
+ require 'stellr/collections/base'
2
+ require 'stellr/collections/searchable_collection'
3
+ require 'stellr/collections/writeable_collection'
4
+ require 'stellr/collections/static'
5
+ require 'stellr/collections/rsync'
6
+ require 'stellr/collections/multi_collection'
@@ -0,0 +1,79 @@
1
+ module Stellr
2
+ module Collections
3
+
4
+ # Base class for collection implementations
5
+ class Base
6
+ include Ferret::Index
7
+ include Stellr::Utils::Shutdown
8
+ include Stellr::Utils::Observable
9
+ attr_reader :name
10
+
11
+ def self.create( name, options )
12
+ collection_class = collection_class_for_options options
13
+ collection = collection_class.new( name, options )
14
+ if strategy_class = strategy_class_for_options( options )
15
+ strategy_class.new( collection, options )
16
+ else
17
+ collection
18
+ end
19
+ end
20
+
21
+ def initialize( name, options )
22
+ @logger = options[:logger] || (require 'logger'; Logger.new 'stellr.log')
23
+ @name = name
24
+ @options = options.dup
25
+ end
26
+
27
+ # called whenever the strategy thinks it's a good time do do something
28
+ # timeconsuming (like switching indexes, optimizing, flushing, ...)
29
+ def batch_finished
30
+ end
31
+
32
+ def on_shutdown( mode )
33
+ close
34
+ end
35
+
36
+ # close this collection
37
+ def close
38
+ end
39
+
40
+ protected
41
+
42
+
43
+ def collection_directory
44
+ @options[:path]
45
+ end
46
+
47
+
48
+ def self.collection_class_for_options( options )
49
+ if (c = options.delete(:collection))
50
+ options[:collection_class] = collection_class_for_key c
51
+ end
52
+ Object.module_eval("::#{options[:collection_class] || 'Stellr::Collections::RSync'}", __FILE__, __LINE__)
53
+ end
54
+ def self.collection_class_for_key(key)
55
+ case key
56
+ when :static
57
+ 'Stellr::Collections::Static'
58
+ when :rsync
59
+ 'Stellr::Collections::RSync'
60
+ end
61
+ end
62
+
63
+ def self.strategy_class_for_options( options )
64
+ if (c = options.delete(:strategy))
65
+ options[:strategy_class] = strategy_class_for_key c
66
+ end
67
+ Object.module_eval("::#{options[:strategy_class]}", __FILE__, __LINE__) if options[:strategy_class]
68
+ end
69
+ def self.strategy_class_for_key(key)
70
+ case key
71
+ when :queueing
72
+ 'Stellr::Strategies::Queueing'
73
+ end
74
+ end
75
+ end
76
+
77
+ end
78
+
79
+ end