refinery 0.12.2 → 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/refinery.rb +1 -99
- data/refinery.gemspec +16 -117
- metadata +39 -118
- data/.gitignore +0 -6
- data/CHANGELOG +0 -2
- data/LICENSE +0 -21
- data/README.rdoc +0 -58
- data/README.textile +0 -58
- data/Rakefile +0 -43
- data/VERSION +0 -1
- data/bin/epub +0 -64
- data/bin/monitor +0 -47
- data/bin/pubnow +0 -61
- data/bin/refinery +0 -64
- data/config/config.example.yml +0 -21
- data/lib/refinery/beanstalk_queue.rb +0 -36
- data/lib/refinery/beanstalk_queue_provider.rb +0 -18
- data/lib/refinery/config.rb +0 -48
- data/lib/refinery/configurable.rb +0 -15
- data/lib/refinery/daemon.rb +0 -148
- data/lib/refinery/event_publisher.rb +0 -131
- data/lib/refinery/heartbeat.rb +0 -33
- data/lib/refinery/loggable.rb +0 -9
- data/lib/refinery/monitor.rb +0 -113
- data/lib/refinery/processor.rb +0 -55
- data/lib/refinery/publisher.rb +0 -42
- data/lib/refinery/queueable.rb +0 -48
- data/lib/refinery/server.rb +0 -88
- data/lib/refinery/statistics.rb +0 -61
- data/lib/refinery/stats_server.rb +0 -135
- data/lib/refinery/utilities.rb +0 -33
- data/lib/refinery/validations.rb +0 -48
- data/lib/refinery/worker.rb +0 -65
- data/logs/README +0 -1
- data/publishers/error.rb +0 -6
- data/publishers/sample.rb +0 -6
- data/publishers/sleep.rb +0 -5
- data/test/config.yml +0 -10
- data/test/test_helper.rb +0 -21
- data/test/unit/config_test.rb +0 -42
- data/test/unit/configurable_test.rb +0 -13
- data/test/unit/daemon_test.rb +0 -63
- data/test/unit/event_publisher_test.rb +0 -12
- data/test/unit/heartbeat_test.rb +0 -25
- data/test/unit/loggable_test.rb +0 -12
- data/test/unit/processor_test.rb +0 -34
- data/test/unit/publisher_test.rb +0 -13
- data/test/unit/queueable_test.rb +0 -26
- data/test/unit/server_test.rb +0 -34
- data/test/unit/statistics_test.rb +0 -44
- data/test/unit/utilities_test.rb +0 -25
- data/test/unit/validations_test.rb +0 -37
- data/test/unit/worker_test.rb +0 -44
- data/workers/error.rb +0 -8
- data/workers/sample.rb +0 -8
- data/workers/sleep.rb +0 -7
data/README.rdoc
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
= Refinery
|
2
|
-
|
3
|
-
Refinery is a distributed processing framework written in the Ruby
|
4
|
-
programming language. It is designed to work with Amazon's Web
|
5
|
-
Services such as SQS and S3 to distribute image and data processing
|
6
|
-
across multiple severs to alleviate the need for heavy-duty data and
|
7
|
-
image processing on web application servers.
|
8
|
-
|
9
|
-
Interprocess messaging is accomplished through a distributed queue system such as Amazon SQS and data storage is accomplished through a distributed data store
|
10
|
-
such as Amazon's S3.
|
11
|
-
|
12
|
-
== Required Libraries
|
13
|
-
|
14
|
-
* RightScale AWS
|
15
|
-
* JSON
|
16
|
-
* Moneta
|
17
|
-
|
18
|
-
== Optional Libraries
|
19
|
-
|
20
|
-
If you want to run the monitor then you'll need to install Sequel and have SQLite3 installed.
|
21
|
-
|
22
|
-
If you want to run the stats server to view operational statistics
|
23
|
-
about Refinery then you'll need to install Sequel, SQLite3 and Ramaze.
|
24
|
-
|
25
|
-
== Executing
|
26
|
-
|
27
|
-
Refinery have several executable scripts that are used to launch the various components:
|
28
|
-
|
29
|
-
=== bin/refinery
|
30
|
-
|
31
|
-
This command executes the refinery execution server. This is where jobs
|
32
|
-
are run.
|
33
|
-
|
34
|
-
Example: bin/refinery -c config/config.yml
|
35
|
-
|
36
|
-
=== bin/epub
|
37
|
-
|
38
|
-
This command executes all publishers that fire on a regular basis.
|
39
|
-
|
40
|
-
Example: bin/epub -c config/config.yml
|
41
|
-
|
42
|
-
=== bin/pubnow
|
43
|
-
|
44
|
-
This command executes a single publisher once.
|
45
|
-
|
46
|
-
Example: bin/pubnow -c config/config.yml sample
|
47
|
-
|
48
|
-
== Tests
|
49
|
-
|
50
|
-
To run the tests, do one of the following:
|
51
|
-
|
52
|
-
* Use autotest
|
53
|
-
* Use the command `rake`
|
54
|
-
* Run by hand with `ruby -Ilib -Itest unit/test_to_run.rb`
|
55
|
-
|
56
|
-
=== bin/monitor
|
57
|
-
|
58
|
-
This command is used to run the monitor system. The monitor system tracks the health of the refinery. The monitor should only be run on a single host.
|
data/README.textile
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
h1. Refinery
|
2
|
-
|
3
|
-
Refinery is a distributed processing framework written in the Ruby
|
4
|
-
programming language. It is designed to work with Amazon's Web
|
5
|
-
Services such as SQS and S3 to distribute image and data processing
|
6
|
-
across multiple severs to alleviate the need for heavy-duty data and
|
7
|
-
image processing on web application servers.
|
8
|
-
|
9
|
-
Interprocess messaging is accomplished through a distributed queue system such as Amazon SQS and data storage is accomplished through a distributed data store
|
10
|
-
such as Amazon's S3.
|
11
|
-
|
12
|
-
h2. Required Libraries
|
13
|
-
|
14
|
-
* RightScale AWS
|
15
|
-
* JSON
|
16
|
-
* Moneta
|
17
|
-
|
18
|
-
h2. Optional Libraries
|
19
|
-
|
20
|
-
If you want to run the monitor then you'll need to install Sequel and have SQLite3 installed.
|
21
|
-
|
22
|
-
If you want to run the stats server to view operational statistics
|
23
|
-
about Refinery then you'll need to install Sequel, SQLite3 and Ramaze.
|
24
|
-
|
25
|
-
h2. Executing
|
26
|
-
|
27
|
-
Refinery have several executable scripts that are used to launch the various components:
|
28
|
-
|
29
|
-
h3. bin/refinery
|
30
|
-
|
31
|
-
This command executes the refinery execution server. This is where jobs
|
32
|
-
are run.
|
33
|
-
|
34
|
-
Example: bin/refinery -c config/config.yml
|
35
|
-
|
36
|
-
h3. bin/epub
|
37
|
-
|
38
|
-
This command executes all publishers that fire on a regular basis.
|
39
|
-
|
40
|
-
Example: bin/epub -c config/config.yml
|
41
|
-
|
42
|
-
h3. bin/pubnow
|
43
|
-
|
44
|
-
This command executes a single publisher once.
|
45
|
-
|
46
|
-
Example: bin/pubnow -c config/config.yml sample
|
47
|
-
|
48
|
-
h3. bin/monitor
|
49
|
-
|
50
|
-
This command is used to run the monitor system. The monitor system tracks the health of the refinery. The monitor should only be run on a single host.
|
51
|
-
|
52
|
-
h2. Tests
|
53
|
-
|
54
|
-
To run the tests, do one of the following:
|
55
|
-
|
56
|
-
* Use autotest
|
57
|
-
* Use the command `rake`
|
58
|
-
* Run by hand with `ruby -Ilib -Itest unit/test_to_run.rb`
|
data/Rakefile
DELETED
@@ -1,43 +0,0 @@
|
|
1
|
-
require 'rake'
|
2
|
-
require 'rake/testtask'
|
3
|
-
require 'rake/rdoctask'
|
4
|
-
|
5
|
-
desc 'Default: run tests.'
|
6
|
-
task :default => [:test]
|
7
|
-
|
8
|
-
desc 'Run tests.'
|
9
|
-
Rake::TestTask.new(:test) do |t|
|
10
|
-
t.libs << 'lib' << 'test'
|
11
|
-
t.pattern = 'test/**/*_test.rb'
|
12
|
-
t.verbose = true
|
13
|
-
end
|
14
|
-
|
15
|
-
desc 'Generate documentation.'
|
16
|
-
Rake::RDocTask.new(:rdoc) do |rdoc|
|
17
|
-
rdoc.rdoc_dir = 'rdoc'
|
18
|
-
rdoc.title = 'Refinery'
|
19
|
-
rdoc.options << '--line-numbers' << '--inline-source'
|
20
|
-
rdoc.rdoc_files.include('README.rdoc')
|
21
|
-
rdoc.rdoc_files.include('lib/*.rb')
|
22
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
23
|
-
end
|
24
|
-
|
25
|
-
begin
|
26
|
-
require 'jeweler'
|
27
|
-
Jeweler::Tasks.new do |gemspec|
|
28
|
-
gemspec.name = "refinery"
|
29
|
-
gemspec.summary = "Refinery processes data in a distributed environment."
|
30
|
-
gemspec.email = "anthonyeden@gmail.com"
|
31
|
-
gemspec.homepage = "http://github.com/aeden/refinery"
|
32
|
-
gemspec.description = "Process data in a distributed fashion."
|
33
|
-
gemspec.authors = ["Anthony Eden"]
|
34
|
-
gemspec.files.exclude 'docs/**/*'
|
35
|
-
gemspec.files.exclude '.autotest'
|
36
|
-
gemspec.rubyforge_project = 'refinery'
|
37
|
-
gemspec.add_dependency "right_aws"
|
38
|
-
gemspec.add_dependency "moneta"
|
39
|
-
gemspec.add_dependency "addressable"
|
40
|
-
end
|
41
|
-
rescue LoadError
|
42
|
-
puts "Jeweler not available. Install it with: sudo gem install jeweler"
|
43
|
-
end
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.12.2
|
data/bin/epub
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# epub: run the event publisher
|
6
|
-
#
|
7
|
-
# == Usage
|
8
|
-
#
|
9
|
-
# epub [OPTION]
|
10
|
-
#
|
11
|
-
# -h, --help:
|
12
|
-
# show help
|
13
|
-
#
|
14
|
-
# -d, --debug:
|
15
|
-
# turn on debug logging
|
16
|
-
#
|
17
|
-
# -c, --config filename
|
18
|
-
# specify a configuration file
|
19
|
-
#
|
20
|
-
# -p, --publishers directory
|
21
|
-
# specify the directory for finding publishers
|
22
|
-
#
|
23
|
-
# -v, --verbose
|
24
|
-
# print info to the standard output
|
25
|
-
|
26
|
-
require 'getoptlong'
|
27
|
-
require 'rdoc/usage'
|
28
|
-
require 'refinery'
|
29
|
-
|
30
|
-
pidfile = 'epub.pid'
|
31
|
-
|
32
|
-
options = {}
|
33
|
-
opts = GetoptLong.new(
|
34
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
35
|
-
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
36
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
|
37
|
-
[ '--publishers', '-p', GetoptLong::REQUIRED_ARGUMENT],
|
38
|
-
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT],
|
39
|
-
[ '--pidfile', GetoptLong::REQUIRED_ARGUMENT]
|
40
|
-
)
|
41
|
-
opts.each do |opt, arg|
|
42
|
-
case opt
|
43
|
-
when '--help'
|
44
|
-
RDoc::usage
|
45
|
-
when '--debug'
|
46
|
-
options[:debug] = true
|
47
|
-
when '--config'
|
48
|
-
options[:config] = arg
|
49
|
-
when '--publishers'
|
50
|
-
options[:publishers] = arg
|
51
|
-
when '--verbose'
|
52
|
-
options[:verbose] = true
|
53
|
-
when '--pidfile'
|
54
|
-
pidfile = arg
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
open(pidfile, 'w') do |f|
|
59
|
-
f << $$
|
60
|
-
end
|
61
|
-
|
62
|
-
Refinery::EventPublisher.new(options).run
|
63
|
-
|
64
|
-
File.delete(pidfile)
|
data/bin/monitor
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# monitor: monitor the refinery system
|
6
|
-
#
|
7
|
-
# == Usage
|
8
|
-
#
|
9
|
-
# monitor [OPTION]
|
10
|
-
#
|
11
|
-
# -h, --help:
|
12
|
-
# show help
|
13
|
-
#
|
14
|
-
# -d, --debug:
|
15
|
-
# turn on debug logging
|
16
|
-
#
|
17
|
-
# -c, --config filename
|
18
|
-
# specify a configuration file
|
19
|
-
#
|
20
|
-
# -v, --verbose
|
21
|
-
# print info to the standard output
|
22
|
-
|
23
|
-
require 'getoptlong'
|
24
|
-
require 'rdoc/usage'
|
25
|
-
require 'refinery'
|
26
|
-
|
27
|
-
options = {}
|
28
|
-
opts = GetoptLong.new(
|
29
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
30
|
-
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
31
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
|
32
|
-
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT]
|
33
|
-
)
|
34
|
-
opts.each do |opt, arg|
|
35
|
-
case opt
|
36
|
-
when '--help'
|
37
|
-
RDoc::usage
|
38
|
-
when '--debug'
|
39
|
-
options[:debug] = true
|
40
|
-
when '--config'
|
41
|
-
options[:config] = arg
|
42
|
-
when '--verbose'
|
43
|
-
options[:verbose] = true
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
Refinery::Monitor.new(options).run
|
data/bin/pubnow
DELETED
@@ -1,61 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# pubnow: run the event publisher
|
6
|
-
#
|
7
|
-
# == Usage
|
8
|
-
#
|
9
|
-
# pubnow [OPTION] KEY
|
10
|
-
#
|
11
|
-
# -h, --help:
|
12
|
-
# show help
|
13
|
-
#
|
14
|
-
# -d, --debug:
|
15
|
-
# turn on debug logging
|
16
|
-
#
|
17
|
-
# -c, --config filename
|
18
|
-
# specify a configuration file
|
19
|
-
#
|
20
|
-
# -p, --publishers directory
|
21
|
-
# specify the directory for finding publishers
|
22
|
-
#
|
23
|
-
# -v, --verbose
|
24
|
-
# print info to the standard output
|
25
|
-
#
|
26
|
-
# KEY: the name of the publisher to run. For example
|
27
|
-
# to run the sample publisher use 'sample'
|
28
|
-
|
29
|
-
require 'getoptlong'
|
30
|
-
require 'rdoc/usage'
|
31
|
-
require 'refinery'
|
32
|
-
|
33
|
-
options = {}
|
34
|
-
opts = GetoptLong.new(
|
35
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
36
|
-
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
37
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
|
38
|
-
[ '--publishers', '-p', GetoptLong::REQUIRED_ARGUMENT],
|
39
|
-
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT ]
|
40
|
-
)
|
41
|
-
opts.each do |opt, arg|
|
42
|
-
case opt
|
43
|
-
when '--help'
|
44
|
-
RDoc::usage
|
45
|
-
when '--debug'
|
46
|
-
options[:debug] = true
|
47
|
-
when '--config'
|
48
|
-
options[:config] = arg
|
49
|
-
when '--publishers'
|
50
|
-
options[:publishers] = arg
|
51
|
-
when '--verbose'
|
52
|
-
options[:verbose] = true
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
if ARGV.length != 1
|
57
|
-
puts "Missing key argument (try --help)"
|
58
|
-
exit 0
|
59
|
-
end
|
60
|
-
|
61
|
-
Refinery::EventPublisher.new(options).run_once(ARGV.shift)
|
data/bin/refinery
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# == Synopsis
|
4
|
-
#
|
5
|
-
# refinery: run the Refinery server
|
6
|
-
#
|
7
|
-
# == Usage
|
8
|
-
#
|
9
|
-
# refinery [OPTION]
|
10
|
-
#
|
11
|
-
# -h, --help:
|
12
|
-
# show help
|
13
|
-
#
|
14
|
-
# -d, --debug:
|
15
|
-
# turn on debug logging
|
16
|
-
#
|
17
|
-
# -c, --config filename
|
18
|
-
# specify a configuration file
|
19
|
-
#
|
20
|
-
# -w, --workers directory
|
21
|
-
# specify the directory for finding workers
|
22
|
-
#
|
23
|
-
# -v, --verbose
|
24
|
-
# print info to the standard output
|
25
|
-
|
26
|
-
require 'getoptlong'
|
27
|
-
require 'rdoc/usage'
|
28
|
-
require 'refinery'
|
29
|
-
|
30
|
-
pidfile = 'refinery.pid'
|
31
|
-
|
32
|
-
options = {}
|
33
|
-
opts = GetoptLong.new(
|
34
|
-
[ '--help', '-h', GetoptLong::NO_ARGUMENT ],
|
35
|
-
[ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
|
36
|
-
[ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
|
37
|
-
[ '--workers', '-w', GetoptLong::REQUIRED_ARGUMENT],
|
38
|
-
[ '--verbose', '-v', GetoptLong::NO_ARGUMENT],
|
39
|
-
[ '--pidfile', GetoptLong::REQUIRED_ARGUMENT]
|
40
|
-
)
|
41
|
-
opts.each do |opt, arg|
|
42
|
-
case opt
|
43
|
-
when '--help'
|
44
|
-
RDoc::usage
|
45
|
-
when '--debug'
|
46
|
-
options[:debug] = true
|
47
|
-
when '--config'
|
48
|
-
options[:config] = arg
|
49
|
-
when '--workers'
|
50
|
-
options[:workers] = arg
|
51
|
-
when '--verbose'
|
52
|
-
options[:verbose] = true
|
53
|
-
when '--pidfile'
|
54
|
-
pidfile = arg
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
open(pidfile, 'w') do |f|
|
59
|
-
f << $$
|
60
|
-
end
|
61
|
-
|
62
|
-
Refinery::Server.new(options).run
|
63
|
-
|
64
|
-
File.delete(pidfile)
|
data/config/config.example.yml
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
aws:
|
2
|
-
credentials:
|
3
|
-
access_key_id: "access_key_id"
|
4
|
-
secret_access_key: "secret_access_key"
|
5
|
-
# queue_engine:
|
6
|
-
# provider: 'beanstalk'
|
7
|
-
prefix: 'example_'
|
8
|
-
processors:
|
9
|
-
sample:
|
10
|
-
queue: 'sample' # can be omitted
|
11
|
-
publishers:
|
12
|
-
delay: 30
|
13
|
-
workers:
|
14
|
-
initial: 3
|
15
|
-
data_store:
|
16
|
-
class: s3
|
17
|
-
error:
|
18
|
-
publishers:
|
19
|
-
delay: 30
|
20
|
-
workers:
|
21
|
-
initial: 1
|
@@ -1,36 +0,0 @@
|
|
1
|
-
module Refinery #:nodoc:
|
2
|
-
# An interface to beanstalk using SQS-compatible methods.
|
3
|
-
class BeanstalkQueue
|
4
|
-
include Refinery::Loggable
|
5
|
-
attr_reader :name
|
6
|
-
|
7
|
-
# Construct a BeanstalkQueue instance.
|
8
|
-
#
|
9
|
-
# *<tt>name</tt>: if specified then that "tube" will be used.
|
10
|
-
# *<tt>host</tt>: if specified then those host:port combos will be used.
|
11
|
-
def initialize(name=nil, hosts=nil)
|
12
|
-
@name = name.gsub(/_/, '-') # beanstalk does not like underscores in tube names
|
13
|
-
@hosts = hosts || ['localhost:11300']
|
14
|
-
end
|
15
|
-
|
16
|
-
# Get the next message from the queue
|
17
|
-
def receive(visibility=nil)
|
18
|
-
beanstalk.reserve(visibility)
|
19
|
-
end
|
20
|
-
|
21
|
-
# Get the approximate queue size
|
22
|
-
def size
|
23
|
-
beanstalk.stats_tube(name)['current-jobs-ready']
|
24
|
-
end
|
25
|
-
|
26
|
-
# Send a message
|
27
|
-
def send_message(message)
|
28
|
-
beanstalk.put(message)
|
29
|
-
end
|
30
|
-
|
31
|
-
protected
|
32
|
-
def beanstalk
|
33
|
-
@beanstalk ||= Beanstalk::Pool.new(@hosts, name)
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|