refinery 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/.autotest +10 -0
  2. data/.gitignore +5 -0
  3. data/CHANGELOG +1 -0
  4. data/LICENSE +21 -0
  5. data/README.rdoc +50 -0
  6. data/README.textile +50 -0
  7. data/Rakefile +39 -0
  8. data/VERSION +1 -0
  9. data/bin/epub +53 -0
  10. data/bin/monitor +47 -0
  11. data/bin/pubnow +61 -0
  12. data/bin/refinery +53 -0
  13. data/config/config.example.yml +18 -0
  14. data/lib/refinery/config.rb +48 -0
  15. data/lib/refinery/configurable.rb +15 -0
  16. data/lib/refinery/daemon.rb +124 -0
  17. data/lib/refinery/event_publisher.rb +120 -0
  18. data/lib/refinery/heartbeat.rb +30 -0
  19. data/lib/refinery/loggable.rb +9 -0
  20. data/lib/refinery/monitor.rb +116 -0
  21. data/lib/refinery/publisher.rb +24 -0
  22. data/lib/refinery/queueable.rb +20 -0
  23. data/lib/refinery/server.rb +86 -0
  24. data/lib/refinery/statistics.rb +61 -0
  25. data/lib/refinery/stats_server.rb +134 -0
  26. data/lib/refinery/utilities.rb +33 -0
  27. data/lib/refinery/validations.rb +48 -0
  28. data/lib/refinery/worker.rb +65 -0
  29. data/lib/refinery.rb +74 -0
  30. data/logs/README +1 -0
  31. data/publishers/error.rb +8 -0
  32. data/publishers/sample.rb +8 -0
  33. data/publishers/sleep.rb +7 -0
  34. data/refinery.gemspec +105 -0
  35. data/test/config.yml +10 -0
  36. data/test/test_helper.rb +21 -0
  37. data/test/unit/config_test.rb +42 -0
  38. data/test/unit/configurable_test.rb +11 -0
  39. data/test/unit/daemon_test.rb +37 -0
  40. data/test/unit/event_publisher_test.rb +11 -0
  41. data/test/unit/heartbeat_test.rb +22 -0
  42. data/test/unit/loggable_test.rb +11 -0
  43. data/test/unit/publisher_test.rb +13 -0
  44. data/test/unit/queueable_test.rb +24 -0
  45. data/test/unit/server_test.rb +39 -0
  46. data/test/unit/statistics_test.rb +41 -0
  47. data/test/unit/utilities_test.rb +25 -0
  48. data/test/unit/validations_test.rb +37 -0
  49. data/test/unit/worker_test.rb +44 -0
  50. data/workers/error.rb +8 -0
  51. data/workers/sample.rb +8 -0
  52. data/workers/sleep.rb +7 -0
  53. metadata +122 -0
data/.autotest ADDED
@@ -0,0 +1,10 @@
1
+ module Autotest::CustomTestMatch
2
+ Autotest.add_hook :initialize do |at|
3
+ at.add_mapping(/test/) do |f, _|
4
+ at.files_matching(/_test\.rb$/)
5
+ end
6
+ at.add_mapping(/lib\/.*/) do |f, _|
7
+ at.files_matching(/_test\.rb$/)
8
+ end
9
+ end
10
+ end
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ logs/*.log
2
+ config/config.yml
3
+ rdoc/*
4
+ refinery-*.gem
5
+ stats.db
data/CHANGELOG ADDED
@@ -0,0 +1 @@
1
+ v0.9.0. Initial release.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2009 Anthony Eden
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,50 @@
1
+ = Refinery
2
+
3
+ Refinery is a distributed processing framework written in the Ruby
4
+ programming language. It is designed to work with Amazon's Web
5
+ Services such as SQS and S3 to distribute image and data processing
6
+ across multiple severs to alleviate the need for heavy-duty data and
7
+ image processing on web application servers.
8
+
9
+ Interprocess messaging is accomplished through a distributed queue system such as Amazon SQS and data storage is accomplished through a distributed data store
10
+ such as Amazon's S3.
11
+
12
+ == Required Libraries
13
+
14
+ * RightScale AWS
15
+ * JSON
16
+ * Moneta
17
+
18
+ == Optional Libraries
19
+
20
+ If you want to run the monitor then you'll need to install Sequel and have SQLite3 installed.
21
+
22
+ If you want to run the stats server to view operational statistics
23
+ about Refinery then you'll need to install Sequel, SQLite3 and Ramaze.
24
+
25
+ == Executing
26
+
27
+ Refinery have several executable scripts that are used to launch the various components:
28
+
29
+ === bin/refinery
30
+
31
+ This command executes the refinery execution server. This is where jobs
32
+ are run.
33
+
34
+ Example: bin/refinery -c config/config.yml
35
+
36
+ === bin/epub
37
+
38
+ This command executes all publishers that fire on a regular basis.
39
+
40
+ Example: bin/epub -c config/config.yml
41
+
42
+ === bin/pubnow
43
+
44
+ This command executes a single publisher once.
45
+
46
+ Example: bin/pubnow -c config/config.yml sample
47
+
48
+ === bin/monitor
49
+
50
+ This command is used to run the monitor system. The monitor system tracks the health of the refinery. The monitor should only be run on a single host.
data/README.textile ADDED
@@ -0,0 +1,50 @@
1
+ h1. Refinery
2
+
3
+ Refinery is a distributed processing framework written in the Ruby
4
+ programming language. It is designed to work with Amazon's Web
5
+ Services such as SQS and S3 to distribute image and data processing
6
+ across multiple severs to alleviate the need for heavy-duty data and
7
+ image processing on web application servers.
8
+
9
+ Interprocess messaging is accomplished through a distributed queue system such as Amazon SQS and data storage is accomplished through a distributed data store
10
+ such as Amazon's S3.
11
+
12
+ h2. Required Libraries
13
+
14
+ * RightScale AWS
15
+ * JSON
16
+ * Moneta
17
+
18
+ h2. Optional Libraries
19
+
20
+ If you want to run the monitor then you'll need to install Sequel and have SQLite3 installed.
21
+
22
+ If you want to run the stats server to view operational statistics
23
+ about Refinery then you'll need to install Sequel, SQLite3 and Ramaze.
24
+
25
+ h2. Executing
26
+
27
+ Refinery have several executable scripts that are used to launch the various components:
28
+
29
+ h3. bin/refinery
30
+
31
+ This command executes the refinery execution server. This is where jobs
32
+ are run.
33
+
34
+ Example: bin/refinery -c config/config.yml
35
+
36
+ h3. bin/epub
37
+
38
+ This command executes all publishers that fire on a regular basis.
39
+
40
+ Example: bin/epub -c config/config.yml
41
+
42
+ h3. bin/pubnow
43
+
44
+ This command executes a single publisher once.
45
+
46
+ Example: bin/pubnow -c config/config.yml sample
47
+
48
+ h3. bin/monitor
49
+
50
+ This command is used to run the monitor system. The monitor system tracks the health of the refinery. The monitor should only be run on a single host.
data/Rakefile ADDED
@@ -0,0 +1,39 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rake/rdoctask'
4
+
5
+ desc 'Default: run tests.'
6
+ task :default => [:test]
7
+
8
+ desc 'Run tests.'
9
+ Rake::TestTask.new(:test) do |t|
10
+ t.libs << 'lib'
11
+ t.pattern = 'test/**/*_test.rb'
12
+ t.verbose = true
13
+ end
14
+
15
+ desc 'Generate documentation.'
16
+ Rake::RDocTask.new(:rdoc) do |rdoc|
17
+ rdoc.rdoc_dir = 'rdoc'
18
+ rdoc.title = 'Refinery'
19
+ rdoc.options << '--line-numbers' << '--inline-source'
20
+ rdoc.rdoc_files.include('README.rdoc')
21
+ rdoc.rdoc_files.include('lib/*.rb')
22
+ rdoc.rdoc_files.include('lib/**/*.rb')
23
+ end
24
+
25
+ begin
26
+ require 'jeweler'
27
+ Jeweler::Tasks.new do |gemspec|
28
+ gemspec.name = "refinery"
29
+ gemspec.summary = "Refinery processes data in a distributed environment."
30
+ gemspec.email = "anthonyeden@gmail.com"
31
+ gemspec.homepage = "http://github.com/aeden/refinery"
32
+ gemspec.description = "Process data in a distributed fashion."
33
+ gemspec.authors = ["Anthony Eden"]
34
+ gemspec.files.exclude 'docs/**/*'
35
+ gemspec.rubyforge_project = 'refinery'
36
+ end
37
+ rescue LoadError
38
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
39
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.0
data/bin/epub ADDED
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # epub: run the event publisher
6
+ #
7
+ # == Usage
8
+ #
9
+ # epub [OPTION]
10
+ #
11
+ # -h, --help:
12
+ # show help
13
+ #
14
+ # -d, --debug:
15
+ # turn on debug logging
16
+ #
17
+ # -c, --config filename
18
+ # specify a configuration file
19
+ #
20
+ # -p, --publishers directory
21
+ # specify the directory for finding publishers
22
+ #
23
+ # -v, --verbose
24
+ # print info to the standard output
25
+
26
+ require 'getoptlong'
27
+ require 'rdoc/usage'
28
+ require File.dirname(__FILE__) + '/../lib/refinery'
29
+
30
+ options = {}
31
+ opts = GetoptLong.new(
32
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
33
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
34
+ [ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
35
+ [ '--publishers', '-p', GetoptLong::REQUIRED_ARGUMENT],
36
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT]
37
+ )
38
+ opts.each do |opt, arg|
39
+ case opt
40
+ when '--help'
41
+ RDoc::usage
42
+ when '--debug'
43
+ options[:debug] = true
44
+ when '--config'
45
+ options[:config] = arg
46
+ when '--publishers'
47
+ options[:publishers] = arg
48
+ when '--verbose'
49
+ options[:verbose] = true
50
+ end
51
+ end
52
+
53
+ Refinery::EventPublisher.new(options).run
data/bin/monitor ADDED
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # monitor: monitor the refinery system
6
+ #
7
+ # == Usage
8
+ #
9
+ # monitor [OPTION]
10
+ #
11
+ # -h, --help:
12
+ # show help
13
+ #
14
+ # -d, --debug:
15
+ # turn on debug logging
16
+ #
17
+ # -c, --config filename
18
+ # specify a configuration file
19
+ #
20
+ # -v, --verbose
21
+ # print info to the standard output
22
+
23
+ require 'getoptlong'
24
+ require 'rdoc/usage'
25
+ require File.dirname(__FILE__) + '/../lib/refinery'
26
+
27
+ options = {}
28
+ opts = GetoptLong.new(
29
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
30
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
31
+ [ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
32
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT]
33
+ )
34
+ opts.each do |opt, arg|
35
+ case opt
36
+ when '--help'
37
+ RDoc::usage
38
+ when '--debug'
39
+ options[:debug] = true
40
+ when '--config'
41
+ options[:config] = arg
42
+ when '--verbose'
43
+ options[:verbose] = true
44
+ end
45
+ end
46
+
47
+ Refinery::Monitor.new(options).run
data/bin/pubnow ADDED
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # epub: run the event publisher
6
+ #
7
+ # == Usage
8
+ #
9
+ # pubnow [OPTION] KEY
10
+ #
11
+ # -h, --help:
12
+ # show help
13
+ #
14
+ # -d, --debug:
15
+ # turn on debug logging
16
+ #
17
+ # -c, --config filename
18
+ # specify a configuration file
19
+ #
20
+ # -p, --publishers directory
21
+ # specify the directory for finding publishers
22
+ #
23
+ # -v, --verbose
24
+ # print info to the standard output
25
+ #
26
+ # KEY: the name of the publisher to run. For example
27
+ # to run the sample publisher use 'sample'
28
+
29
+ require 'getoptlong'
30
+ require 'rdoc/usage'
31
+ require File.dirname(__FILE__) + '/../lib/refinery'
32
+
33
+ options = {}
34
+ opts = GetoptLong.new(
35
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
36
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
37
+ [ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
38
+ [ '--publishers', '-p', GetoptLong::REQUIRED_ARGUMENT],
39
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ]
40
+ )
41
+ opts.each do |opt, arg|
42
+ case opt
43
+ when '--help'
44
+ RDoc::usage
45
+ when '--debug'
46
+ options[:debug] = true
47
+ when '--config'
48
+ options[:config] = arg
49
+ when '--publishers'
50
+ options[:publishers] = arg
51
+ when '--verbose'
52
+ options[:verbose] = true
53
+ end
54
+ end
55
+
56
+ if ARGV.length != 1
57
+ puts "Missing key argument (try --help)"
58
+ exit 0
59
+ end
60
+
61
+ Refinery::EventPublisher.new(options).run_once(ARGV.shift)
data/bin/refinery ADDED
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # == Synopsis
4
+ #
5
+ # refinery: run the Refinery server
6
+ #
7
+ # == Usage
8
+ #
9
+ # refinery [OPTION]
10
+ #
11
+ # -h, --help:
12
+ # show help
13
+ #
14
+ # -d, --debug:
15
+ # turn on debug logging
16
+ #
17
+ # -c, --config filename
18
+ # specify a configuration file
19
+ #
20
+ # -w, --workers directory
21
+ # specify the directory for finding workers
22
+ #
23
+ # -v, --verbose
24
+ # print info to the standard output
25
+
26
+ require 'getoptlong'
27
+ require 'rdoc/usage'
28
+ require File.dirname(__FILE__) + '/../lib/refinery'
29
+
30
+ options = {}
31
+ opts = GetoptLong.new(
32
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
33
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
34
+ [ '--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
35
+ [ '--workers', '-w', GetoptLong::REQUIRED_ARGUMENT],
36
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT]
37
+ )
38
+ opts.each do |opt, arg|
39
+ case opt
40
+ when '--help'
41
+ RDoc::usage
42
+ when '--debug'
43
+ options[:debug] = true
44
+ when '--config'
45
+ options[:config] = arg
46
+ when '--workers'
47
+ options[:workers] = arg
48
+ when '--verbose'
49
+ options[:verbose] = true
50
+ end
51
+ end
52
+
53
+ Refinery::Server.new(options).run
@@ -0,0 +1,18 @@
1
+ aws:
2
+ credentials:
3
+ access_key_id: "access_key_id"
4
+ secret_access_key: "secret_access_key"
5
+ processors:
6
+ sample:
7
+ queue: 'sample' # can be omitted
8
+ publishers:
9
+ delay: 30
10
+ workers:
11
+ initial: 3
12
+ data_store:
13
+ class: s3
14
+ error:
15
+ publishers:
16
+ delay: 30
17
+ workers:
18
+ initial: 1
@@ -0,0 +1,48 @@
1
+ module Refinery #:nodoc:
2
+ # Configuration class.
3
+ class Config
4
+ # Get a shared configuration
5
+ def self.default
6
+ @default ||= new({
7
+ 'aws' => {
8
+ 'credentials' => {}
9
+ },
10
+ 'processors' => {}
11
+ })
12
+ end
13
+
14
+ # Initialize the config with the given data
15
+ def initialize(data={})
16
+ @data = data
17
+ end
18
+
19
+ # Get the configuration value
20
+ def [](key)
21
+ data[key.to_s]
22
+ end
23
+
24
+ # Set the configuration value
25
+ def []=(key, value)
26
+ data[key.to_s] = value
27
+ end
28
+
29
+ # Load configuration from a YAML file
30
+ def load_file(file)
31
+ @file = file
32
+ @data = YAML::load_file(@file)
33
+ @last_load = File.mtime(@file)
34
+ end
35
+
36
+ # Refresh the configuration from the YAML file if necessary.
37
+ def refresh
38
+ if File.mtime(@file) != @last_load
39
+ @data = YAML::load_file(@file)
40
+ end
41
+ end
42
+
43
+ private
44
+ def data
45
+ @data ||= {}
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,15 @@
1
+ module Refinery #:nodoc:
2
+ # Include this module to get access to a shared configuration
3
+ module Configurable
4
+ # Get the configuration. If the config is nil then this will use
5
+ # the default shared configuration.
6
+ def config
7
+ @config ||= Refinery::Config.default
8
+ end
9
+
10
+ # Set the configuration.
11
+ def config=(config)
12
+ @config = config
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,124 @@
1
+ module Refinery #:nodoc:
2
+ # A daemon provides a thread to run workers in.
3
+ class Daemon
4
+ include Refinery::Loggable
5
+ include Refinery::Configurable
6
+ include Refinery::Utilities
7
+
8
+ RUNNING = 'running'
9
+ STOPPED = 'stopped'
10
+
11
+ # The daemon's thread
12
+ attr_reader :thread
13
+ # The name of the daemon
14
+ attr_reader :name
15
+ # The queue for incoming messages to process
16
+ attr_reader :waiting_queue
17
+ # The queue for outgoing messages once they've been processed
18
+ attr_reader :done_queue
19
+ # The queue for error messages
20
+ attr_reader :error_queue
21
+
22
+ # Stop the daemon
23
+ def stop
24
+ self.state = STOPPED
25
+ end
26
+
27
+ # Return the daemon state
28
+ def state
29
+ @state ||= RUNNING
30
+ end
31
+
32
+ # Set the daemon state.
33
+ def state=(state)
34
+ @state = state
35
+ end
36
+ protected :state
37
+
38
+ # Return true if the daemon state is running.
39
+ def running?
40
+ state == RUNNING
41
+ end
42
+
43
+ # Initialize the daemon.
44
+ #
45
+ # * <tt>server</tt>: The server instance
46
+ # * <tt>name</tt>: The processor name
47
+ # * <tt>waiting_queue</tt>: The waiting queue that provides messages to be processed
48
+ # * <tt>error_queue</tt>: The queue where errors are posted.
49
+ # * <tt>done_queue</tt>: The queue for messages that have been processed.
50
+ def initialize(server, name, waiting_queue, error_queue, done_queue)
51
+ Refinery::Server.logger.debug "Starting daemon"
52
+
53
+ @server = server
54
+ @name = name
55
+ @waiting_queue = waiting_queue
56
+ @error_queue = error_queue
57
+ @done_queue = done_queue
58
+
59
+ @thread = Thread.new(self) do |daemon|
60
+ logger.debug "Running daemon thread"
61
+ while(running?)
62
+ begin
63
+ while (message = waiting_queue.receive)
64
+ worker = load_worker_class(name).new(self)
65
+ begin
66
+ result, run_time = worker.run(decode_message(message.body))
67
+ if result
68
+ done_message = {
69
+ 'host_info' => host_info,
70
+ 'original' => message.body,
71
+ 'run_time' => run_time
72
+ }
73
+ logger.debug "Sending 'done' message to #{done_queue.name}"
74
+ done_queue.send_message(encode_message(done_message))
75
+
76
+ logger.debug "Deleting message from queue"
77
+ message.delete()
78
+ end
79
+ rescue Exception => e
80
+ error_message = {
81
+ 'error' => {
82
+ 'message' => e.message,
83
+ 'class' => e.class.name
84
+ },
85
+ 'host_info' => host_info,
86
+ 'original' => message.body
87
+ }
88
+ error_queue.send_message(encode_message(error_message))
89
+ message.delete()
90
+ end
91
+ end
92
+ sleep(1)
93
+ rescue Exception => e
94
+ logger.error "An error occurred while receiving from the waiting queue: #{e.message}"
95
+ end
96
+ end
97
+ logger.debug "Exiting daemon thread"
98
+ end
99
+ end
100
+
101
+ # A hash of worker classes
102
+ def workers
103
+ @workers ||= {}
104
+ end
105
+
106
+ private
107
+ # Load the appropriate worker class
108
+ def load_worker_class(name)
109
+ source_file = "#{@server.workers_directory}/#{name}.rb"
110
+ if File.exist?(source_file)
111
+ modified_at = File.mtime(source_file)
112
+ if workers[name] != modified_at
113
+ logger.debug "Loading #{source_file}"
114
+ load(source_file)
115
+ workers[name] = modified_at
116
+ end
117
+ else
118
+ raise SourceFileNotFound, "Source file not found: #{source_file}"
119
+ end
120
+
121
+ Object.const_get(camelize(name))
122
+ end
123
+ end
124
+ end