aeden-refinery 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +10 -0
- data/.gitignore +5 -0
- data/Rakefile +17 -1
- data/VERSION +1 -0
- data/config/config.example.yml +18 -0
- data/lib/refinery.rb +74 -0
- data/lib/refinery/config.rb +48 -0
- data/lib/refinery/configurable.rb +15 -0
- data/lib/refinery/daemon.rb +124 -0
- data/lib/refinery/event_publisher.rb +120 -0
- data/lib/refinery/heartbeat.rb +30 -0
- data/lib/refinery/loggable.rb +9 -0
- data/lib/refinery/monitor.rb +116 -0
- data/lib/refinery/publisher.rb +24 -0
- data/lib/refinery/queueable.rb +20 -0
- data/lib/refinery/server.rb +86 -0
- data/lib/refinery/statistics.rb +61 -0
- data/lib/refinery/stats_server.rb +134 -0
- data/lib/refinery/utilities.rb +33 -0
- data/lib/refinery/validations.rb +48 -0
- data/lib/refinery/worker.rb +65 -0
- data/logs/README +1 -0
- data/publishers/error.rb +8 -0
- data/publishers/sample.rb +8 -0
- data/publishers/sleep.rb +7 -0
- data/refinery.gemspec +105 -0
- data/test/config.yml +10 -0
- data/test/test_helper.rb +21 -0
- data/test/unit/config_test.rb +42 -0
- data/test/unit/configurable_test.rb +11 -0
- data/test/unit/daemon_test.rb +37 -0
- data/test/unit/event_publisher_test.rb +11 -0
- data/test/unit/heartbeat_test.rb +22 -0
- data/test/unit/loggable_test.rb +11 -0
- data/test/unit/publisher_test.rb +13 -0
- data/test/unit/queueable_test.rb +24 -0
- data/test/unit/server_test.rb +39 -0
- data/test/unit/statistics_test.rb +41 -0
- data/test/unit/utilities_test.rb +25 -0
- data/test/unit/validations_test.rb +37 -0
- data/test/unit/worker_test.rb +44 -0
- data/workers/error.rb +8 -0
- data/workers/sample.rb +8 -0
- data/workers/sleep.rb +7 -0
- metadata +74 -16
data/.autotest
ADDED
data/.gitignore
ADDED
data/Rakefile
CHANGED
|
@@ -20,4 +20,20 @@ Rake::RDocTask.new(:rdoc) do |rdoc|
|
|
|
20
20
|
rdoc.rdoc_files.include('README.rdoc')
|
|
21
21
|
rdoc.rdoc_files.include('lib/*.rb')
|
|
22
22
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
23
|
-
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
begin
|
|
26
|
+
require 'jeweler'
|
|
27
|
+
Jeweler::Tasks.new do |gemspec|
|
|
28
|
+
gemspec.name = "refinery"
|
|
29
|
+
gemspec.summary = "Refinery processes data in a distributed environment."
|
|
30
|
+
gemspec.email = "anthonyeden@gmail.com"
|
|
31
|
+
gemspec.homepage = "http://github.com/aeden/refinery"
|
|
32
|
+
gemspec.description = "Process data in a distributed fashion."
|
|
33
|
+
gemspec.authors = ["Anthony Eden"]
|
|
34
|
+
gemspec.files.exclude 'docs/**/*'
|
|
35
|
+
gemspec.rubyforge_project = 'refinery'
|
|
36
|
+
end
|
|
37
|
+
rescue LoadError
|
|
38
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
|
39
|
+
end
|
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.9.1
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
aws:
|
|
2
|
+
credentials:
|
|
3
|
+
access_key_id: "access_key_id"
|
|
4
|
+
secret_access_key: "secret_access_key"
|
|
5
|
+
processors:
|
|
6
|
+
sample:
|
|
7
|
+
queue: 'sample' # can be omitted
|
|
8
|
+
publishers:
|
|
9
|
+
delay: 30
|
|
10
|
+
workers:
|
|
11
|
+
initial: 3
|
|
12
|
+
data_store:
|
|
13
|
+
class: s3
|
|
14
|
+
error:
|
|
15
|
+
publishers:
|
|
16
|
+
delay: 30
|
|
17
|
+
workers:
|
|
18
|
+
initial: 1
|
data/lib/refinery.rb
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
|
2
|
+
|
|
3
|
+
require 'logger'
|
|
4
|
+
require 'socket'
|
|
5
|
+
require 'benchmark'
|
|
6
|
+
|
|
7
|
+
# The Refinery module contains all of the classes for the refinery system.
|
|
8
|
+
module Refinery
|
|
9
|
+
|
|
10
|
+
# Require the specified library.
|
|
11
|
+
#
|
|
12
|
+
# The short name is the require path and the display_name will be shown
|
|
13
|
+
# if the library cannot be loaded.
|
|
14
|
+
def self.require_library(short_name, display_name)
|
|
15
|
+
begin
|
|
16
|
+
require short_name
|
|
17
|
+
rescue LoadError
|
|
18
|
+
puts "#{display_name} is required, please install it"
|
|
19
|
+
exit
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Require all of the dependencies.
|
|
24
|
+
def self.require_libraries
|
|
25
|
+
require_library('rubygems', 'Rubygems')
|
|
26
|
+
require_library('right_aws', 'RightScale AWS gem')
|
|
27
|
+
require_library('json', 'JSON gem')
|
|
28
|
+
require_library('moneta', 'Moneta gem')
|
|
29
|
+
require_library('moneta/s3', 'Moneta S3 implementation')
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.require_optional_library(short_name, display_name)
|
|
33
|
+
begin
|
|
34
|
+
require short_name
|
|
35
|
+
rescue LoadError
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.require_optional_libraries
|
|
40
|
+
require_optional_library('sequel', 'Sequel gem')
|
|
41
|
+
require_optional_library('ramaze', 'Ramaze')
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Require internal code files
|
|
45
|
+
def self.require_internals
|
|
46
|
+
require 'refinery/loggable'
|
|
47
|
+
require 'refinery/configurable'
|
|
48
|
+
require 'refinery/queueable'
|
|
49
|
+
|
|
50
|
+
require 'refinery/utilities'
|
|
51
|
+
|
|
52
|
+
require 'refinery/validations'
|
|
53
|
+
|
|
54
|
+
require 'refinery/config'
|
|
55
|
+
require 'refinery/heartbeat'
|
|
56
|
+
require 'refinery/server'
|
|
57
|
+
require 'refinery/daemon'
|
|
58
|
+
require 'refinery/worker'
|
|
59
|
+
require 'refinery/event_publisher'
|
|
60
|
+
require 'refinery/publisher'
|
|
61
|
+
require 'refinery/monitor'
|
|
62
|
+
require 'refinery/statistics'
|
|
63
|
+
require 'refinery/stats_server'
|
|
64
|
+
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Raised if a source file cannot be loaded
|
|
68
|
+
class SourceFileNotFound < RuntimeError
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
Refinery::require_libraries
|
|
73
|
+
Refinery::require_optional_libraries
|
|
74
|
+
Refinery::require_internals
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
module Refinery #:nodoc:
|
|
2
|
+
# Configuration class.
|
|
3
|
+
class Config
|
|
4
|
+
# Get a shared configuration
|
|
5
|
+
def self.default
|
|
6
|
+
@default ||= new({
|
|
7
|
+
'aws' => {
|
|
8
|
+
'credentials' => {}
|
|
9
|
+
},
|
|
10
|
+
'processors' => {}
|
|
11
|
+
})
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Initialize the config with the given data
|
|
15
|
+
def initialize(data={})
|
|
16
|
+
@data = data
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Get the configuration value
|
|
20
|
+
def [](key)
|
|
21
|
+
data[key.to_s]
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Set the configuration value
|
|
25
|
+
def []=(key, value)
|
|
26
|
+
data[key.to_s] = value
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Load configuration from a YAML file
|
|
30
|
+
def load_file(file)
|
|
31
|
+
@file = file
|
|
32
|
+
@data = YAML::load_file(@file)
|
|
33
|
+
@last_load = File.mtime(@file)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Refresh the configuration from the YAML file if necessary.
|
|
37
|
+
def refresh
|
|
38
|
+
if File.mtime(@file) != @last_load
|
|
39
|
+
@data = YAML::load_file(@file)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
def data
|
|
45
|
+
@data ||= {}
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module Refinery #:nodoc:
|
|
2
|
+
# Include this module to get access to a shared configuration
|
|
3
|
+
module Configurable
|
|
4
|
+
# Get the configuration. If the config is nil then this will use
|
|
5
|
+
# the default shared configuration.
|
|
6
|
+
def config
|
|
7
|
+
@config ||= Refinery::Config.default
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Set the configuration.
|
|
11
|
+
def config=(config)
|
|
12
|
+
@config = config
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
module Refinery #:nodoc:
|
|
2
|
+
# A daemon provides a thread to run workers in.
|
|
3
|
+
class Daemon
|
|
4
|
+
include Refinery::Loggable
|
|
5
|
+
include Refinery::Configurable
|
|
6
|
+
include Refinery::Utilities
|
|
7
|
+
|
|
8
|
+
RUNNING = 'running'
|
|
9
|
+
STOPPED = 'stopped'
|
|
10
|
+
|
|
11
|
+
# The daemon's thread
|
|
12
|
+
attr_reader :thread
|
|
13
|
+
# The name of the daemon
|
|
14
|
+
attr_reader :name
|
|
15
|
+
# The queue for incoming messages to process
|
|
16
|
+
attr_reader :waiting_queue
|
|
17
|
+
# The queue for outgoing messages once they've been processed
|
|
18
|
+
attr_reader :done_queue
|
|
19
|
+
# The queue for error messages
|
|
20
|
+
attr_reader :error_queue
|
|
21
|
+
|
|
22
|
+
# Stop the daemon
|
|
23
|
+
def stop
|
|
24
|
+
self.state = STOPPED
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Return the daemon state
|
|
28
|
+
def state
|
|
29
|
+
@state ||= RUNNING
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Set the daemon state.
|
|
33
|
+
def state=(state)
|
|
34
|
+
@state = state
|
|
35
|
+
end
|
|
36
|
+
protected :state
|
|
37
|
+
|
|
38
|
+
# Return true if the daemon state is running.
|
|
39
|
+
def running?
|
|
40
|
+
state == RUNNING
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Initialize the daemon.
|
|
44
|
+
#
|
|
45
|
+
# * <tt>server</tt>: The server instance
|
|
46
|
+
# * <tt>name</tt>: The processor name
|
|
47
|
+
# * <tt>waiting_queue</tt>: The waiting queue that provides messages to be processed
|
|
48
|
+
# * <tt>error_queue</tt>: The queue where errors are posted.
|
|
49
|
+
# * <tt>done_queue</tt>: The queue for messages that have been processed.
|
|
50
|
+
def initialize(server, name, waiting_queue, error_queue, done_queue)
|
|
51
|
+
Refinery::Server.logger.debug "Starting daemon"
|
|
52
|
+
|
|
53
|
+
@server = server
|
|
54
|
+
@name = name
|
|
55
|
+
@waiting_queue = waiting_queue
|
|
56
|
+
@error_queue = error_queue
|
|
57
|
+
@done_queue = done_queue
|
|
58
|
+
|
|
59
|
+
@thread = Thread.new(self) do |daemon|
|
|
60
|
+
logger.debug "Running daemon thread"
|
|
61
|
+
while(running?)
|
|
62
|
+
begin
|
|
63
|
+
while (message = waiting_queue.receive)
|
|
64
|
+
worker = load_worker_class(name).new(self)
|
|
65
|
+
begin
|
|
66
|
+
result, run_time = worker.run(decode_message(message.body))
|
|
67
|
+
if result
|
|
68
|
+
done_message = {
|
|
69
|
+
'host_info' => host_info,
|
|
70
|
+
'original' => message.body,
|
|
71
|
+
'run_time' => run_time
|
|
72
|
+
}
|
|
73
|
+
logger.debug "Sending 'done' message to #{done_queue.name}"
|
|
74
|
+
done_queue.send_message(encode_message(done_message))
|
|
75
|
+
|
|
76
|
+
logger.debug "Deleting message from queue"
|
|
77
|
+
message.delete()
|
|
78
|
+
end
|
|
79
|
+
rescue Exception => e
|
|
80
|
+
error_message = {
|
|
81
|
+
'error' => {
|
|
82
|
+
'message' => e.message,
|
|
83
|
+
'class' => e.class.name
|
|
84
|
+
},
|
|
85
|
+
'host_info' => host_info,
|
|
86
|
+
'original' => message.body
|
|
87
|
+
}
|
|
88
|
+
error_queue.send_message(encode_message(error_message))
|
|
89
|
+
message.delete()
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
sleep(1)
|
|
93
|
+
rescue Exception => e
|
|
94
|
+
logger.error "An error occurred while receiving from the waiting queue: #{e.message}"
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
logger.debug "Exiting daemon thread"
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# A hash of worker classes
|
|
102
|
+
def workers
|
|
103
|
+
@workers ||= {}
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
# Load the appropriate worker class
|
|
108
|
+
def load_worker_class(name)
|
|
109
|
+
source_file = "#{@server.workers_directory}/#{name}.rb"
|
|
110
|
+
if File.exist?(source_file)
|
|
111
|
+
modified_at = File.mtime(source_file)
|
|
112
|
+
if workers[name] != modified_at
|
|
113
|
+
logger.debug "Loading #{source_file}"
|
|
114
|
+
load(source_file)
|
|
115
|
+
workers[name] = modified_at
|
|
116
|
+
end
|
|
117
|
+
else
|
|
118
|
+
raise SourceFileNotFound, "Source file not found: #{source_file}"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
Object.const_get(camelize(name))
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
module Refinery #:nodoc:
|
|
2
|
+
# Publish events.
|
|
3
|
+
class EventPublisher
|
|
4
|
+
include Refinery::Loggable
|
|
5
|
+
include Refinery::Configurable
|
|
6
|
+
include Refinery::Queueable
|
|
7
|
+
include Refinery::Utilities
|
|
8
|
+
|
|
9
|
+
STARTING = 'starting' #:nodoc:
|
|
10
|
+
RUNNING = 'running' #:nodoc:
|
|
11
|
+
STOPPED = 'stopped' #:nodoc:
|
|
12
|
+
|
|
13
|
+
attr_accessor :publishers_directory
|
|
14
|
+
|
|
15
|
+
# Initialize the event publisher
|
|
16
|
+
#
|
|
17
|
+
# Options:
|
|
18
|
+
# * <tt>:debug</tt>: Set to true to enable debug logging
|
|
19
|
+
# * <tt>:config</tt>: Provide a file path to load that config
|
|
20
|
+
def initialize(options={})
|
|
21
|
+
logger.level = Logger::INFO if options[:verbose]
|
|
22
|
+
logger.level = Logger::DEBUG if options[:debug]
|
|
23
|
+
config.load_file(options[:config]) if options[:config]
|
|
24
|
+
self.publishers_directory = options[:publishers] if options[:publishers]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Get the event publisher state
|
|
28
|
+
def state
|
|
29
|
+
@state ||= STARTING
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Return true if the event publisher is running
|
|
33
|
+
def running?
|
|
34
|
+
state == RUNNING
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# The directory where publishers are found. Defaults to ./publishers
|
|
38
|
+
def publishers_directory
|
|
39
|
+
@publishers_directory ||= './publishers'
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# A hash of all publisher classes mapped to last modified timestamps.
|
|
43
|
+
def publishers
|
|
44
|
+
@publishers ||= {}
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Run the specified publisher once and return
|
|
48
|
+
def run_once(key)
|
|
49
|
+
settings = config['processors'][key]
|
|
50
|
+
raise RuntimeError, "No processor configuration found for #{key}" unless settings
|
|
51
|
+
queue_name = settings['queue'] || key
|
|
52
|
+
logger.debug "Using queue #{queue_name}_waiting"
|
|
53
|
+
waiting_queue = queue("#{queue_name}_waiting")
|
|
54
|
+
load_publisher_class(key).new(waiting_queue).execute
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Run the event publisher
|
|
58
|
+
def run
|
|
59
|
+
@state = RUNNING
|
|
60
|
+
logger.info "Starting event publisher"
|
|
61
|
+
config['processors'].each do |key, settings|
|
|
62
|
+
run_publisher(key, settings)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
begin
|
|
66
|
+
threads.each { |thread| thread.join }
|
|
67
|
+
rescue Interrupt => e
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
logger.info "Exiting event publisher"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
# An array of threads, one for each publisher instance
|
|
75
|
+
def threads
|
|
76
|
+
@threads ||= []
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Run the publisher for the given key
|
|
80
|
+
def run_publisher(key, settings)
|
|
81
|
+
logger.info "Creating publisher for #{key}"
|
|
82
|
+
queue_name = settings['queue'] || key
|
|
83
|
+
logger.debug "Using queue #{queue_name}_waiting"
|
|
84
|
+
waiting_queue = queue("#{queue_name}_waiting")
|
|
85
|
+
|
|
86
|
+
threads << Thread.new(waiting_queue, settings) do |waiting_queue, settings|
|
|
87
|
+
while(running?)
|
|
88
|
+
begin
|
|
89
|
+
load_publisher_class(key).new(waiting_queue).execute
|
|
90
|
+
rescue Exception => e
|
|
91
|
+
logger.error e
|
|
92
|
+
raise e
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
delay = settings['publishers']['delay'] || 60
|
|
96
|
+
logger.debug "Sleeping #{delay} seconds"
|
|
97
|
+
sleep delay
|
|
98
|
+
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def load_publisher_class(key)
|
|
104
|
+
source_file = "#{publishers_directory}/#{key}.rb"
|
|
105
|
+
if File.exist?(source_file)
|
|
106
|
+
modified_at = File.mtime(source_file)
|
|
107
|
+
if publishers[key] != modified_at
|
|
108
|
+
logger.debug "Loading #{source_file}"
|
|
109
|
+
load(source_file)
|
|
110
|
+
publishers[key] = modified_at
|
|
111
|
+
end
|
|
112
|
+
else
|
|
113
|
+
raise SourceFileNotFound, "Source file not found: #{source_file}"
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
Object.const_get(camelize(key))
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
end
|
|
120
|
+
end
|