refinery 0.12.2 → 1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/refinery.rb +1 -99
- data/refinery.gemspec +16 -117
- metadata +39 -118
- data/.gitignore +0 -6
- data/CHANGELOG +0 -2
- data/LICENSE +0 -21
- data/README.rdoc +0 -58
- data/README.textile +0 -58
- data/Rakefile +0 -43
- data/VERSION +0 -1
- data/bin/epub +0 -64
- data/bin/monitor +0 -47
- data/bin/pubnow +0 -61
- data/bin/refinery +0 -64
- data/config/config.example.yml +0 -21
- data/lib/refinery/beanstalk_queue.rb +0 -36
- data/lib/refinery/beanstalk_queue_provider.rb +0 -18
- data/lib/refinery/config.rb +0 -48
- data/lib/refinery/configurable.rb +0 -15
- data/lib/refinery/daemon.rb +0 -148
- data/lib/refinery/event_publisher.rb +0 -131
- data/lib/refinery/heartbeat.rb +0 -33
- data/lib/refinery/loggable.rb +0 -9
- data/lib/refinery/monitor.rb +0 -113
- data/lib/refinery/processor.rb +0 -55
- data/lib/refinery/publisher.rb +0 -42
- data/lib/refinery/queueable.rb +0 -48
- data/lib/refinery/server.rb +0 -88
- data/lib/refinery/statistics.rb +0 -61
- data/lib/refinery/stats_server.rb +0 -135
- data/lib/refinery/utilities.rb +0 -33
- data/lib/refinery/validations.rb +0 -48
- data/lib/refinery/worker.rb +0 -65
- data/logs/README +0 -1
- data/publishers/error.rb +0 -6
- data/publishers/sample.rb +0 -6
- data/publishers/sleep.rb +0 -5
- data/test/config.yml +0 -10
- data/test/test_helper.rb +0 -21
- data/test/unit/config_test.rb +0 -42
- data/test/unit/configurable_test.rb +0 -13
- data/test/unit/daemon_test.rb +0 -63
- data/test/unit/event_publisher_test.rb +0 -12
- data/test/unit/heartbeat_test.rb +0 -25
- data/test/unit/loggable_test.rb +0 -12
- data/test/unit/processor_test.rb +0 -34
- data/test/unit/publisher_test.rb +0 -13
- data/test/unit/queueable_test.rb +0 -26
- data/test/unit/server_test.rb +0 -34
- data/test/unit/statistics_test.rb +0 -44
- data/test/unit/utilities_test.rb +0 -25
- data/test/unit/validations_test.rb +0 -37
- data/test/unit/worker_test.rb +0 -44
- data/workers/error.rb +0 -8
- data/workers/sample.rb +0 -8
- data/workers/sleep.rb +0 -7
data/README.rdoc
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
= Refinery
|
2
|
-
|
3
|
-
Refinery is a distributed processing framework written in the Ruby
|
4
|
-
programming language. It is designed to work with Amazon's Web
|
5
|
-
Services such as SQS and S3 to distribute image and data processing
|
6
|
-
across multiple severs to alleviate the need for heavy-duty data and
|
7
|
-
image processing on web application servers.
|
8
|
-
|
9
|
-
Interprocess messaging is accomplished through a distributed queue system such as Amazon SQS and data storage is accomplished through a distributed data store
|
10
|
-
such as Amazon's S3.
|
11
|
-
|
12
|
-
== Required Libraries
|
13
|
-
|
14
|
-
* RightScale AWS
|
15
|
-
* JSON
|
16
|
-
* Moneta
|
17
|
-
|
18
|
-
== Optional Libraries
|
19
|
-
|
20
|
-
If you want to run the monitor then you'll need to install Sequel and have SQLite3 installed.
|
21
|
-
|
22
|
-
If you want to run the stats server to view operational statistics
|
23
|
-
about Refinery then you'll need to install Sequel, SQLite3 and Ramaze.
|
24
|
-
|
25
|
-
== Executing
|
26
|
-
|
27
|
-
Refinery have several executable scripts that are used to launch the various components:
|
28
|
-
|
29
|
-
=== bin/refinery
|
30
|
-
|
31
|
-
This command executes the refinery execution server. This is where jobs
|
32
|
-
are run.
|
33
|
-
|
34
|
-
Example: bin/refinery -c config/config.yml
|
35
|
-
|
36
|
-
=== bin/epub
|
37
|
-
|
38
|
-
This command executes all publishers that fire on a regular basis.
|
39
|
-
|
40
|
-
Example: bin/epub -c config/config.yml
|
41
|
-
|
42
|
-
=== bin/pubnow
|
43
|
-
|
44
|
-
This command executes a single publisher once.
|
45
|
-
|
46
|
-
Example: bin/pubnow -c config/config.yml sample
|
47
|
-
|
48
|
-
== Tests
|
49
|
-
|
50
|
-
To run the tests, do one of the following:
|
51
|
-
|
52
|
-
* Use autotest
|
53
|
-
* Use the command `rake`
|
54
|
-
* Run by hand with `ruby -Ilib -Itest unit/test_to_run.rb`
|
55
|
-
|
56
|
-
=== bin/monitor
|
57
|
-
|
58
|
-
This command is used to run the monitor system. The monitor system tracks the health of the refinery. The monitor should only be run on a single host.
|
data/README.textile
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
h1. Refinery
|
2
|
-
|
3
|
-
Refinery is a distributed processing framework written in the Ruby
|
4
|
-
programming language. It is designed to work with Amazon's Web
|
5
|
-
Services such as SQS and S3 to distribute image and data processing
|
6
|
-
across multiple severs to alleviate the need for heavy-duty data and
|
7
|
-
image processing on web application servers.
|
8
|
-
|
9
|
-
Interprocess messaging is accomplished through a distributed queue system such as Amazon SQS and data storage is accomplished through a distributed data store
|
10
|
-
such as Amazon's S3.
|
11
|
-
|
12
|
-
h2. Required Libraries
|
13
|
-
|
14
|
-
* RightScale AWS
|
15
|
-
* JSON
|
16
|
-
* Moneta
|
17
|
-
|
18
|
-
h2. Optional Libraries
|
19
|
-
|
20
|
-
If you want to run the monitor then you'll need to install Sequel and have SQLite3 installed.
|
21
|
-
|
22
|
-
If you want to run the stats server to view operational statistics
|
23
|
-
about Refinery then you'll need to install Sequel, SQLite3 and Ramaze.
|
24
|
-
|
25
|
-
h2. Executing
|
26
|
-
|
27
|
-
Refinery have several executable scripts that are used to launch the various components:
|
28
|
-
|
29
|
-
h3. bin/refinery
|
30
|
-
|
31
|
-
This command executes the refinery execution server. This is where jobs
|
32
|
-
are run.
|
33
|
-
|
34
|
-
Example: bin/refinery -c config/config.yml
|
35
|
-
|
36
|
-
h3. bin/epub
|
37
|
-
|
38
|
-
This command executes all publishers that fire on a regular basis.
|
39
|
-
|
40
|
-
Example: bin/epub -c config/config.yml
|
41
|
-
|
42
|
-
h3. bin/pubnow
|
43
|
-
|
44
|
-
This command executes a single publisher once.
|
45
|
-
|
46
|
-
Example: bin/pubnow -c config/config.yml sample
|
47
|
-
|
48
|
-
h3. bin/monitor
|
49
|
-
|
50
|
-
This command is used to run the monitor system. The monitor system tracks the health of the refinery. The monitor should only be run on a single host.
|
51
|
-
|
52
|
-
h2. Tests
|
53
|
-
|
54
|
-
To run the tests, do one of the following:
|
55
|
-
|
56
|
-
* Use autotest
|
57
|
-
* Use the command `rake`
|
58
|
-
* Run by hand with `ruby -Ilib -Itest unit/test_to_run.rb`
|
data/Rakefile
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
require 'rake/testtask'
|
3
|
-
require 'rake/rdoctask'
|
4
|
-
|
5
|
-
desc 'Default: run tests.'
|
6
|
-
task :default => [:test]
|
7
|
-
|
8
|
-
desc 'Run tests.'
|
9
|
-
Rake::TestTask.new(:test) do |t|
|
10
|
-
t.libs << 'lib' << 'test'
|
11
|
-
t.pattern = 'test/**/*_test.rb'
|
12
|
-
t.verbose = true
|
13
|
-
end
|
14
|
-
|
15
|
-
desc 'Generate documentation.'
|
16
|
-
Rake::RDocTask.new(:rdoc) do |rdoc|
|
17
|
-
rdoc.rdoc_dir = 'rdoc'
|
18
|
-
rdoc.title = 'Refinery'
|
19
|
-
rdoc.options << '--line-numbers' << '--inline-source'
|
20
|
-
rdoc.rdoc_files.include('README.rdoc')
|
21
|
-
rdoc.rdoc_files.include('lib/*.rb')
|
22
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
23
|
-
end
|
24
|
-
|
25
|
-
begin
|
26
|
-
require 'jeweler'
|
27
|
-
Jeweler::Tasks.new do |gemspec|
|
28
|
-
gemspec.name = "refinery"
|
29
|
-
gemspec.summary = "Refinery processes data in a distributed environment."
|
30
|
-
gemspec.email = "anthonyeden@gmail.com"
|
31
|
-
gemspec.homepage = "http://github.com/aeden/refinery"
|
32
|
-
gemspec.description = "Process data in a distributed fashion."
|
33
|
-
gemspec.authors = ["Anthony Eden"]
|
34
|
-
gemspec.files.exclude 'docs/**/*'
|
35
|
-
gemspec.files.exclude '.autotest'
|
36
|
-
gemspec.rubyforge_project = 'refinery'
|
37
|
-
gemspec.add_dependency "right_aws"
|
38
|
-
gemspec.add_dependency "moneta"
|
39
|
-
gemspec.add_dependency "addressable"
|
40
|
-
end
|
41
|
-
rescue LoadError
|
42
|
-
puts "Jeweler not available. Install it with: sudo gem install jeweler"
|
43
|
-
end
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.12.2
|
data/bin/epub
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# epub: run the event publisher
|
6
|
-
#
|
7
|
-
# == Usage
|
8
|
-
#
|
9
|
-
# epub [OPTION]
|
10
|
-
#
|
11
|
-
# -h, --help:
|
12
|
-
# show help
|
13
|
-
#
|
14
|
-
# -d, --debug:
|
15
|
-
# turn on debug logging
|
16
|
-
#
|
17
|
-
# -c, --config filename
|
18
|
-
# specify a configuration file
|
19
|
-
#
|
20
|
-
# -p, --publishers directory
|
21
|
-
# specify the directory for finding publishers
|
22
|
-
#
|
23
|
-
# -v, --verbose
|
24
|
-
# print info to the standard output
|
25
|
-
|
26
|
-
require 'getoptlong'
|
27
|
-
require 'rdoc/usage'
|
28
|
-
require 'refinery'
|
29
|
-
|
30
|
-
pidfile = 'epub.pid'
|
31
|
-
|
32
|
-
options = {}
|
33
|
-
opts = GetoptLong.new(
|
34
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
35
|
-
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
36
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
|
37
|
-
[ '--publishers', '-p', GetoptLong::REQUIRED_ARGUMENT],
|
38
|
-
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT],
|
39
|
-
[ '--pidfile', GetoptLong::REQUIRED_ARGUMENT]
|
40
|
-
)
|
41
|
-
opts.each do |opt, arg|
|
42
|
-
case opt
|
43
|
-
when '--help'
|
44
|
-
RDoc::usage
|
45
|
-
when '--debug'
|
46
|
-
options[:debug] = true
|
47
|
-
when '--config'
|
48
|
-
options[:config] = arg
|
49
|
-
when '--publishers'
|
50
|
-
options[:publishers] = arg
|
51
|
-
when '--verbose'
|
52
|
-
options[:verbose] = true
|
53
|
-
when '--pidfile'
|
54
|
-
pidfile = arg
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
open(pidfile, 'w') do |f|
|
59
|
-
f << $$
|
60
|
-
end
|
61
|
-
|
62
|
-
Refinery::EventPublisher.new(options).run
|
63
|
-
|
64
|
-
File.delete(pidfile)
|
data/bin/monitor
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# monitor: monitor the refinery system
|
6
|
-
#
|
7
|
-
# == Usage
|
8
|
-
#
|
9
|
-
# monitor [OPTION]
|
10
|
-
#
|
11
|
-
# -h, --help:
|
12
|
-
# show help
|
13
|
-
#
|
14
|
-
# -d, --debug:
|
15
|
-
# turn on debug logging
|
16
|
-
#
|
17
|
-
# -c, --config filename
|
18
|
-
# specify a configuration file
|
19
|
-
#
|
20
|
-
# -v, --verbose
|
21
|
-
# print info to the standard output
|
22
|
-
|
23
|
-
require 'getoptlong'
|
24
|
-
require 'rdoc/usage'
|
25
|
-
require 'refinery'
|
26
|
-
|
27
|
-
options = {}
|
28
|
-
opts = GetoptLong.new(
|
29
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
30
|
-
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
31
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
|
32
|
-
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT]
|
33
|
-
)
|
34
|
-
opts.each do |opt, arg|
|
35
|
-
case opt
|
36
|
-
when '--help'
|
37
|
-
RDoc::usage
|
38
|
-
when '--debug'
|
39
|
-
options[:debug] = true
|
40
|
-
when '--config'
|
41
|
-
options[:config] = arg
|
42
|
-
when '--verbose'
|
43
|
-
options[:verbose] = true
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
Refinery::Monitor.new(options).run
|
data/bin/pubnow
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# pubnow: run the event publisher
|
6
|
-
#
|
7
|
-
# == Usage
|
8
|
-
#
|
9
|
-
# pubnow [OPTION] KEY
|
10
|
-
#
|
11
|
-
# -h, --help:
|
12
|
-
# show help
|
13
|
-
#
|
14
|
-
# -d, --debug:
|
15
|
-
# turn on debug logging
|
16
|
-
#
|
17
|
-
# -c, --config filename
|
18
|
-
# specify a configuration file
|
19
|
-
#
|
20
|
-
# -p, --publishers directory
|
21
|
-
# specify the directory for finding publishers
|
22
|
-
#
|
23
|
-
# -v, --verbose
|
24
|
-
# print info to the standard output
|
25
|
-
#
|
26
|
-
# KEY: the name of the publisher to run. For example
|
27
|
-
# to run the sample publisher use 'sample'
|
28
|
-
|
29
|
-
require 'getoptlong'
|
30
|
-
require 'rdoc/usage'
|
31
|
-
require 'refinery'
|
32
|
-
|
33
|
-
options = {}
|
34
|
-
opts = GetoptLong.new(
|
35
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
36
|
-
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
37
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
|
38
|
-
[ '--publishers', '-p', GetoptLong::REQUIRED_ARGUMENT],
|
39
|
-
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT ]
|
40
|
-
)
|
41
|
-
opts.each do |opt, arg|
|
42
|
-
case opt
|
43
|
-
when '--help'
|
44
|
-
RDoc::usage
|
45
|
-
when '--debug'
|
46
|
-
options[:debug] = true
|
47
|
-
when '--config'
|
48
|
-
options[:config] = arg
|
49
|
-
when '--publishers'
|
50
|
-
options[:publishers] = arg
|
51
|
-
when '--verbose'
|
52
|
-
options[:verbose] = true
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
if ARGV.length != 1
|
57
|
-
puts "Missing key argument (try --help)"
|
58
|
-
exit 0
|
59
|
-
end
|
60
|
-
|
61
|
-
Refinery::EventPublisher.new(options).run_once(ARGV.shift)
|
data/bin/refinery
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# refinery: run the Refinery server
|
6
|
-
#
|
7
|
-
# == Usage
|
8
|
-
#
|
9
|
-
# refinery [OPTION]
|
10
|
-
#
|
11
|
-
# -h, --help:
|
12
|
-
# show help
|
13
|
-
#
|
14
|
-
# -d, --debug:
|
15
|
-
# turn on debug logging
|
16
|
-
#
|
17
|
-
# -c, --config filename
|
18
|
-
# specify a configuration file
|
19
|
-
#
|
20
|
-
# -w, --workers directory
|
21
|
-
# specify the directory for finding workers
|
22
|
-
#
|
23
|
-
# -v, --verbose
|
24
|
-
# print info to the standard output
|
25
|
-
|
26
|
-
require 'getoptlong'
|
27
|
-
require 'rdoc/usage'
|
28
|
-
require 'refinery'
|
29
|
-
|
30
|
-
pidfile = 'refinery.pid'
|
31
|
-
|
32
|
-
options = {}
|
33
|
-
opts = GetoptLong.new(
|
34
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
35
|
-
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
36
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
|
37
|
-
[ '--workers', '-w', GetoptLong::REQUIRED_ARGUMENT],
|
38
|
-
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT],
|
39
|
-
[ '--pidfile', GetoptLong::REQUIRED_ARGUMENT]
|
40
|
-
)
|
41
|
-
opts.each do |opt, arg|
|
42
|
-
case opt
|
43
|
-
when '--help'
|
44
|
-
RDoc::usage
|
45
|
-
when '--debug'
|
46
|
-
options[:debug] = true
|
47
|
-
when '--config'
|
48
|
-
options[:config] = arg
|
49
|
-
when '--workers'
|
50
|
-
options[:workers] = arg
|
51
|
-
when '--verbose'
|
52
|
-
options[:verbose] = true
|
53
|
-
when '--pidfile'
|
54
|
-
pidfile = arg
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
open(pidfile, 'w') do |f|
|
59
|
-
f << $$
|
60
|
-
end
|
61
|
-
|
62
|
-
Refinery::Server.new(options).run
|
63
|
-
|
64
|
-
File.delete(pidfile)
|
data/config/config.example.yml
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
aws:
|
2
|
-
credentials:
|
3
|
-
access_key_id: "access_key_id"
|
4
|
-
secret_access_key: "secret_access_key"
|
5
|
-
# queue_engine:
|
6
|
-
# provider: 'beanstalk'
|
7
|
-
prefix: 'example_'
|
8
|
-
processors:
|
9
|
-
sample:
|
10
|
-
queue: 'sample' # can be omitted
|
11
|
-
publishers:
|
12
|
-
delay: 30
|
13
|
-
workers:
|
14
|
-
initial: 3
|
15
|
-
data_store:
|
16
|
-
class: s3
|
17
|
-
error:
|
18
|
-
publishers:
|
19
|
-
delay: 30
|
20
|
-
workers:
|
21
|
-
initial: 1
|
@@ -1,36 +0,0 @@
|
|
1
|
-
module Refinery #:nodoc:
|
2
|
-
# An interface to beanstalk using SQS-compatible methods.
|
3
|
-
class BeanstalkQueue
|
4
|
-
include Refinery::Loggable
|
5
|
-
attr_reader :name
|
6
|
-
|
7
|
-
# Construct a BeanstalkQueue instance.
|
8
|
-
#
|
9
|
-
# *<tt>name</tt>: if specified then that "tube" will be used.
|
10
|
-
# *<tt>host</tt>: if specified then those host:port combos will be used.
|
11
|
-
def initialize(name=nil, hosts=nil)
|
12
|
-
@name = name.gsub(/_/, '-') # beanstalk does not like underscores in tube names
|
13
|
-
@hosts = hosts || ['localhost:11300']
|
14
|
-
end
|
15
|
-
|
16
|
-
# Get the next message from the queue
|
17
|
-
def receive(visibility=nil)
|
18
|
-
beanstalk.reserve(visibility)
|
19
|
-
end
|
20
|
-
|
21
|
-
# Get the approximate queue size
|
22
|
-
def size
|
23
|
-
beanstalk.stats_tube(name)['current-jobs-ready']
|
24
|
-
end
|
25
|
-
|
26
|
-
# Send a message
|
27
|
-
def send_message(message)
|
28
|
-
beanstalk.put(message)
|
29
|
-
end
|
30
|
-
|
31
|
-
protected
|
32
|
-
def beanstalk
|
33
|
-
@beanstalk ||= Beanstalk::Pool.new(@hosts, name)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|