refinery 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/.autotest +10 -0
  2. data/.gitignore +5 -0
  3. data/CHANGELOG +1 -0
  4. data/LICENSE +21 -0
  5. data/README.rdoc +50 -0
  6. data/README.textile +50 -0
  7. data/Rakefile +39 -0
  8. data/VERSION +1 -0
  9. data/bin/epub +53 -0
  10. data/bin/monitor +47 -0
  11. data/bin/pubnow +61 -0
  12. data/bin/refinery +53 -0
  13. data/config/config.example.yml +18 -0
  14. data/lib/refinery/config.rb +48 -0
  15. data/lib/refinery/configurable.rb +15 -0
  16. data/lib/refinery/daemon.rb +124 -0
  17. data/lib/refinery/event_publisher.rb +120 -0
  18. data/lib/refinery/heartbeat.rb +30 -0
  19. data/lib/refinery/loggable.rb +9 -0
  20. data/lib/refinery/monitor.rb +116 -0
  21. data/lib/refinery/publisher.rb +24 -0
  22. data/lib/refinery/queueable.rb +20 -0
  23. data/lib/refinery/server.rb +86 -0
  24. data/lib/refinery/statistics.rb +61 -0
  25. data/lib/refinery/stats_server.rb +134 -0
  26. data/lib/refinery/utilities.rb +33 -0
  27. data/lib/refinery/validations.rb +48 -0
  28. data/lib/refinery/worker.rb +65 -0
  29. data/lib/refinery.rb +74 -0
  30. data/logs/README +1 -0
  31. data/publishers/error.rb +8 -0
  32. data/publishers/sample.rb +8 -0
  33. data/publishers/sleep.rb +7 -0
  34. data/refinery.gemspec +105 -0
  35. data/test/config.yml +10 -0
  36. data/test/test_helper.rb +21 -0
  37. data/test/unit/config_test.rb +42 -0
  38. data/test/unit/configurable_test.rb +11 -0
  39. data/test/unit/daemon_test.rb +37 -0
  40. data/test/unit/event_publisher_test.rb +11 -0
  41. data/test/unit/heartbeat_test.rb +22 -0
  42. data/test/unit/loggable_test.rb +11 -0
  43. data/test/unit/publisher_test.rb +13 -0
  44. data/test/unit/queueable_test.rb +24 -0
  45. data/test/unit/server_test.rb +39 -0
  46. data/test/unit/statistics_test.rb +41 -0
  47. data/test/unit/utilities_test.rb +25 -0
  48. data/test/unit/validations_test.rb +37 -0
  49. data/test/unit/worker_test.rb +44 -0
  50. data/workers/error.rb +8 -0
  51. data/workers/sample.rb +8 -0
  52. data/workers/sleep.rb +7 -0
  53. metadata +122 -0
data/.autotest ADDED
@@ -0,0 +1,10 @@
1
+ module Autotest::CustomTestMatch
2
+ Autotest.add_hook :initialize do |at|
3
+ at.add_mapping(/test/) do |f, _|
4
+ at.files_matching(/_test\.rb$/)
5
+ end
6
+ at.add_mapping(/lib\/.*/) do |f, _|
7
+ at.files_matching(/_test\.rb$/)
8
+ end
9
+ end
10
+ end
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ logs/*.log
2
+ config/config.yml
3
+ rdoc/*
4
+ refinery-*.gem
5
+ stats.db
data/CHANGELOG ADDED
@@ -0,0 +1 @@
1
+ v0.9.0. Initial release.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2009 Anthony Eden
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,50 @@
1
+ = Refinery
2
+
3
+ Refinery is a distributed processing framework written in the Ruby
4
+ programming language. It is designed to work with Amazon's Web
5
+ Services such as SQS and S3 to distribute image and data processing
6
+ across multiple severs to alleviate the need for heavy-duty data and
7
+ image processing on web application servers.
8
+
9
+ Interprocess messaging is accomplished through a distributed queue system such as Amazon SQS and data storage is accomplished through a distributed data store
10
+ such as Amazon's S3.
11
+
12
+ == Required Libraries
13
+
14
+ * RightScale AWS
15
+ * JSON
16
+ * Moneta
17
+
18
+ == Optional Libraries
19
+
20
+ If you want to run the monitor then you'll need to install Sequel and have SQLite3 installed.
21
+
22
+ If you want to run the stats server to view operational statistics
23
+ about Refinery then you'll need to install Sequel, SQLite3 and Ramaze.
24
+
25
+ == Executing
26
+
27
+ Refinery have several executable scripts that are used to launch the various components:
28
+
29
+ === bin/refinery
30
+
31
+ This command executes the refinery execution server. This is where jobs
32
+ are run.
33
+
34
+ Example: bin/refinery -c config/config.yml
35
+
36
+ === bin/epub
37
+
38
+ This command executes all publishers that fire on a regular basis.
39
+
40
+ Example: bin/epub -c config/config.yml
41
+
42
+ === bin/pubnow
43
+
44
+ This command executes a single publisher once.
45
+
46
+ Example: bin/pubnow -c config/config.yml sample
47
+
48
+ === bin/monitor
49
+
50
+ This command is used to run the monitor system. The monitor system tracks the health of the refinery. The monitor should only be run on a single host.
data/README.textile ADDED
@@ -0,0 +1,50 @@
1
+ h1. Refinery
2
+
3
+ Refinery is a distributed processing framework written in the Ruby
4
+ programming language. It is designed to work with Amazon's Web
5
+ Services such as SQS and S3 to distribute image and data processing
6
+ across multiple severs to alleviate the need for heavy-duty data and
7
+ image processing on web application servers.
8
+
9
+ Interprocess messaging is accomplished through a distributed queue system such as Amazon SQS and data storage is accomplished through a distributed data store
10
+ such as Amazon's S3.
11
+
12
+ h2. Required Libraries
13
+
14
+ * RightScale AWS
15
+ * JSON
16
+ * Moneta
17
+
18
+ h2. Optional Libraries
19
+
20
+ If you want to run the monitor then you'll need to install Sequel and have SQLite3 installed.
21
+
22
+ If you want to run the stats server to view operational statistics
23
+ about Refinery then you'll need to install Sequel, SQLite3 and Ramaze.
24
+
25
+ h2. Executing
26
+
27
+ Refinery have several executable scripts that are used to launch the various components:
28
+
29
+ h3. bin/refinery
30
+
31
+ This command executes the refinery execution server. This is where jobs
32
+ are run.
33
+
34
+ Example: bin/refinery -c config/config.yml
35
+
36
+ h3. bin/epub
37
+
38
+ This command executes all publishers that fire on a regular basis.
39
+
40
+ Example: bin/epub -c config/config.yml
41
+
42
+ h3. bin/pubnow
43
+
44
+ This command executes a single publisher once.
45
+
46
+ Example: bin/pubnow -c config/config.yml sample
47
+
48
+ h3. bin/monitor
49
+
50
+ This command is used to run the monitor system. The monitor system tracks the health of the refinery. The monitor should only be run on a single host.
data/Rakefile ADDED
@@ -0,0 +1,39 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rake/rdoctask'
4
+
5
+ desc 'Default: run tests.'
6
+ task :default => [:test]
7
+
8
+ desc 'Run tests.'
9
+ Rake::TestTask.new(:test) do |t|
10
+ t.libs << 'lib'
11
+ t.pattern = 'test/**/*_test.rb'
12
+ t.verbose = true
13
+ end
14
+
15
+ desc 'Generate documentation.'
16
+ Rake::RDocTask.new(:rdoc) do |rdoc|
17
+ rdoc.rdoc_dir = 'rdoc'
18
+ rdoc.title = 'Refinery'
19
+ rdoc.options << '--line-numbers' << '--inline-source'
20
+ rdoc.rdoc_files.include('README.rdoc')
21
+ rdoc.rdoc_files.include('lib/*.rb')
22
+ rdoc.rdoc_files.include('lib/**/*.rb')
23
+ end
24
+
25
+ begin
26
+ require 'jeweler'
27
+ Jeweler::Tasks.new do |gemspec|
28
+ gemspec.name = "refinery"
29
+ gemspec.summary = "Refinery processes data in a distributed environment."
30
+ gemspec.email = "anthonyeden@gmail.com"
31
+ gemspec.homepage = "http://github.com/aeden/refinery"
32
+ gemspec.description = "Process data in a distributed fashion."
33
+ gemspec.authors = ["Anthony Eden"]
34
+ gemspec.files.exclude 'docs/**/*'
35
+ gemspec.rubyforge_project = 'refinery'
36
+ end
37
+ rescue LoadError
38
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
39
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.0
data/bin/epub ADDED
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # epub: run the event publisher
6
+ #
7
+ # == Usage
8
+ #
9
+ # epub [OPTION]
10
+ #
11
+ # -h, --help:
12
+ # show help
13
+ #
14
+ # -d, --debug:
15
+ # turn on debug logging
16
+ #
17
+ # -c, --config filename
18
+ # specify a configuration file
19
+ #
20
+ # -p, --publishers directory
21
+ # specify the directory for finding publishers
22
+ #
23
+ # -v, --verbose
24
+ # print info to the standard output
25
+
26
+ require 'getoptlong'
27
+ require 'rdoc/usage'
28
+ require File.dirname(__FILE__) + '/../lib/refinery'
29
+
30
+ options = {}
31
+ opts = GetoptLong.new(
32
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
33
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
34
+ [ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
35
+ [ '--publishers', '-p', GetoptLong::REQUIRED_ARGUMENT],
36
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT]
37
+ )
38
+ opts.each do |opt, arg|
39
+ case opt
40
+ when '--help'
41
+ RDoc::usage
42
+ when '--debug'
43
+ options[:debug] = true
44
+ when '--config'
45
+ options[:config] = arg
46
+ when '--publishers'
47
+ options[:publishers] = arg
48
+ when '--verbose'
49
+ options[:verbose] = true
50
+ end
51
+ end
52
+
53
+ Refinery::EventPublisher.new(options).run
data/bin/monitor ADDED
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # monitor: monitor the refinery system
6
+ #
7
+ # == Usage
8
+ #
9
+ # monitor [OPTION]
10
+ #
11
+ # -h, --help:
12
+ # show help
13
+ #
14
+ # -d, --debug:
15
+ # turn on debug logging
16
+ #
17
+ # -c, --config filename
18
+ # specify a configuration file
19
+ #
20
+ # -v, --verbose
21
+ # print info to the standard output
22
+
23
+ require 'getoptlong'
24
+ require 'rdoc/usage'
25
+ require File.dirname(__FILE__) + '/../lib/refinery'
26
+
27
+ options = {}
28
+ opts = GetoptLong.new(
29
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
30
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
31
+ [ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
32
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT]
33
+ )
34
+ opts.each do |opt, arg|
35
+ case opt
36
+ when '--help'
37
+ RDoc::usage
38
+ when '--debug'
39
+ options[:debug] = true
40
+ when '--config'
41
+ options[:config] = arg
42
+ when '--verbose'
43
+ options[:verbose] = true
44
+ end
45
+ end
46
+
47
+ Refinery::Monitor.new(options).run
data/bin/pubnow ADDED
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # epub: run the event publisher
6
+ #
7
+ # == Usage
8
+ #
9
+ # pubnow [OPTION] KEY
10
+ #
11
+ # -h, --help:
12
+ # show help
13
+ #
14
+ # -d, --debug:
15
+ # turn on debug logging
16
+ #
17
+ # -c, --config filename
18
+ # specify a configuration file
19
+ #
20
+ # -p, --publishers directory
21
+ # specify the directory for finding publishers
22
+ #
23
+ # -v, --verbose
24
+ # print info to the standard output
25
+ #
26
+ # KEY: the name of the publisher to run. For example
27
+ # to run the sample publisher use 'sample'
28
+
29
+ require 'getoptlong'
30
+ require 'rdoc/usage'
31
+ require File.dirname(__FILE__) + '/../lib/refinery'
32
+
33
+ options = {}
34
+ opts = GetoptLong.new(
35
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
36
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
37
+ [ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
38
+ [ '--publishers', '-p', GetoptLong::REQUIRED_ARGUMENT],
39
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ]
40
+ )
41
+ opts.each do |opt, arg|
42
+ case opt
43
+ when '--help'
44
+ RDoc::usage
45
+ when '--debug'
46
+ options[:debug] = true
47
+ when '--config'
48
+ options[:config] = arg
49
+ when '--publishers'
50
+ options[:publishers] = arg
51
+ when '--verbose'
52
+ options[:verbose] = true
53
+ end
54
+ end
55
+
56
+ if ARGV.length != 1
57
+ puts "Missing key argument (try --help)"
58
+ exit 0
59
+ end
60
+
61
+ Refinery::EventPublisher.new(options).run_once(ARGV.shift)
data/bin/refinery ADDED
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # refinery: run the Refinery server
6
+ #
7
+ # == Usage
8
+ #
9
+ # refinery [OPTION]
10
+ #
11
+ # -h, --help:
12
+ # show help
13
+ #
14
+ # -d, --debug:
15
+ # turn on debug logging
16
+ #
17
+ # -c, --config filename
18
+ # specify a configuration file
19
+ #
20
+ # -w, --workers directory
21
+ # specify the directory for finding workers
22
+ #
23
+ # -v, --verbose
24
+ # print info to the standard output
25
+
26
+ require 'getoptlong'
27
+ require 'rdoc/usage'
28
+ require File.dirname(__FILE__) + '/../lib/refinery'
29
+
30
+ options = {}
31
+ opts = GetoptLong.new(
32
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
33
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
34
+ [ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
35
+ [ '--workers', '-w', GetoptLong::REQUIRED_ARGUMENT],
36
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT]
37
+ )
38
+ opts.each do |opt, arg|
39
+ case opt
40
+ when '--help'
41
+ RDoc::usage
42
+ when '--debug'
43
+ options[:debug] = true
44
+ when '--config'
45
+ options[:config] = arg
46
+ when '--workers'
47
+ options[:workers] = arg
48
+ when '--verbose'
49
+ options[:verbose] = true
50
+ end
51
+ end
52
+
53
+ Refinery::Server.new(options).run
@@ -0,0 +1,18 @@
1
+ aws:
2
+ credentials:
3
+ access_key_id: "access_key_id"
4
+ secret_access_key: "secret_access_key"
5
+ processors:
6
+ sample:
7
+ queue: 'sample' # can be omitted
8
+ publishers:
9
+ delay: 30
10
+ workers:
11
+ initial: 3
12
+ data_store:
13
+ class: s3
14
+ error:
15
+ publishers:
16
+ delay: 30
17
+ workers:
18
+ initial: 1
@@ -0,0 +1,48 @@
1
+ module Refinery #:nodoc:
2
+ # Configuration class.
3
+ class Config
4
+ # Get a shared configuration
5
+ def self.default
6
+ @default ||= new({
7
+ 'aws' => {
8
+ 'credentials' => {}
9
+ },
10
+ 'processors' => {}
11
+ })
12
+ end
13
+
14
+ # Initialize the config with the given data
15
+ def initialize(data={})
16
+ @data = data
17
+ end
18
+
19
+ # Get the configuration value
20
+ def [](key)
21
+ data[key.to_s]
22
+ end
23
+
24
+ # Set the configuration value
25
+ def []=(key, value)
26
+ data[key.to_s] = value
27
+ end
28
+
29
+ # Load configuration from a YAML file
30
+ def load_file(file)
31
+ @file = file
32
+ @data = YAML::load_file(@file)
33
+ @last_load = File.mtime(@file)
34
+ end
35
+
36
+ # Refresh the configuration from the YAML file if necessary.
37
+ def refresh
38
+ if File.mtime(@file) != @last_load
39
+ @data = YAML::load_file(@file)
40
+ end
41
+ end
42
+
43
+ private
44
+ def data
45
+ @data ||= {}
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,15 @@
1
+ module Refinery #:nodoc:
2
+ # Include this module to get access to a shared configuration
3
+ module Configurable
4
+ # Get the configuration. If the config is nil then this will use
5
+ # the default shared configuration.
6
+ def config
7
+ @config ||= Refinery::Config.default
8
+ end
9
+
10
+ # Set the configuration.
11
+ def config=(config)
12
+ @config = config
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,124 @@
1
+ module Refinery #:nodoc:
2
+ # A daemon provides a thread to run workers in.
3
+ class Daemon
4
+ include Refinery::Loggable
5
+ include Refinery::Configurable
6
+ include Refinery::Utilities
7
+
8
+ RUNNING = 'running'
9
+ STOPPED = 'stopped'
10
+
11
+ # The daemon's thread
12
+ attr_reader :thread
13
+ # The name of the daemon
14
+ attr_reader :name
15
+ # The queue for incoming messages to process
16
+ attr_reader :waiting_queue
17
+ # The queue for outgoing messages once they've been processed
18
+ attr_reader :done_queue
19
+ # The queue for error messages
20
+ attr_reader :error_queue
21
+
22
+ # Stop the daemon
23
+ def stop
24
+ self.state = STOPPED
25
+ end
26
+
27
+ # Return the daemon state
28
+ def state
29
+ @state ||= RUNNING
30
+ end
31
+
32
+ # Set the daemon state.
33
+ def state=(state)
34
+ @state = state
35
+ end
36
+ protected :state
37
+
38
+ # Return true if the daemon state is running.
39
+ def running?
40
+ state == RUNNING
41
+ end
42
+
43
+ # Initialize the daemon.
44
+ #
45
+ # * <tt>server</tt>: The server instance
46
+ # * <tt>name</tt>: The processor name
47
+ # * <tt>waiting_queue</tt>: The waiting queue that provides messages to be processed
48
+ # * <tt>error_queue</tt>: The queue where errors are posted.
49
+ # * <tt>done_queue</tt>: The queue for messages that have been processed.
50
+ def initialize(server, name, waiting_queue, error_queue, done_queue)
51
+ Refinery::Server.logger.debug "Starting daemon"
52
+
53
+ @server = server
54
+ @name = name
55
+ @waiting_queue = waiting_queue
56
+ @error_queue = error_queue
57
+ @done_queue = done_queue
58
+
59
+ @thread = Thread.new(self) do |daemon|
60
+ logger.debug "Running daemon thread"
61
+ while(running?)
62
+ begin
63
+ while (message = waiting_queue.receive)
64
+ worker = load_worker_class(name).new(self)
65
+ begin
66
+ result, run_time = worker.run(decode_message(message.body))
67
+ if result
68
+ done_message = {
69
+ 'host_info' => host_info,
70
+ 'original' => message.body,
71
+ 'run_time' => run_time
72
+ }
73
+ logger.debug "Sending 'done' message to #{done_queue.name}"
74
+ done_queue.send_message(encode_message(done_message))
75
+
76
+ logger.debug "Deleting message from queue"
77
+ message.delete()
78
+ end
79
+ rescue Exception => e
80
+ error_message = {
81
+ 'error' => {
82
+ 'message' => e.message,
83
+ 'class' => e.class.name
84
+ },
85
+ 'host_info' => host_info,
86
+ 'original' => message.body
87
+ }
88
+ error_queue.send_message(encode_message(error_message))
89
+ message.delete()
90
+ end
91
+ end
92
+ sleep(1)
93
+ rescue Exception => e
94
+ logger.error "An error occurred while receiving from the waiting queue: #{e.message}"
95
+ end
96
+ end
97
+ logger.debug "Exiting daemon thread"
98
+ end
99
+ end
100
+
101
+ # A hash of worker classes
102
+ def workers
103
+ @workers ||= {}
104
+ end
105
+
106
+ private
107
+ # Load the appropriate worker class
108
+ def load_worker_class(name)
109
+ source_file = "#{@server.workers_directory}/#{name}.rb"
110
+ if File.exist?(source_file)
111
+ modified_at = File.mtime(source_file)
112
+ if workers[name] != modified_at
113
+ logger.debug "Loading #{source_file}"
114
+ load(source_file)
115
+ workers[name] = modified_at
116
+ end
117
+ else
118
+ raise SourceFileNotFound, "Source file not found: #{source_file}"
119
+ end
120
+
121
+ Object.const_get(camelize(name))
122
+ end
123
+ end
124
+ end