aeden-refinery 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +10 -0
- data/.gitignore +5 -0
- data/Rakefile +17 -1
- data/VERSION +1 -0
- data/config/config.example.yml +18 -0
- data/lib/refinery.rb +74 -0
- data/lib/refinery/config.rb +48 -0
- data/lib/refinery/configurable.rb +15 -0
- data/lib/refinery/daemon.rb +124 -0
- data/lib/refinery/event_publisher.rb +120 -0
- data/lib/refinery/heartbeat.rb +30 -0
- data/lib/refinery/loggable.rb +9 -0
- data/lib/refinery/monitor.rb +116 -0
- data/lib/refinery/publisher.rb +24 -0
- data/lib/refinery/queueable.rb +20 -0
- data/lib/refinery/server.rb +86 -0
- data/lib/refinery/statistics.rb +61 -0
- data/lib/refinery/stats_server.rb +134 -0
- data/lib/refinery/utilities.rb +33 -0
- data/lib/refinery/validations.rb +48 -0
- data/lib/refinery/worker.rb +65 -0
- data/logs/README +1 -0
- data/publishers/error.rb +8 -0
- data/publishers/sample.rb +8 -0
- data/publishers/sleep.rb +7 -0
- data/refinery.gemspec +105 -0
- data/test/config.yml +10 -0
- data/test/test_helper.rb +21 -0
- data/test/unit/config_test.rb +42 -0
- data/test/unit/configurable_test.rb +11 -0
- data/test/unit/daemon_test.rb +37 -0
- data/test/unit/event_publisher_test.rb +11 -0
- data/test/unit/heartbeat_test.rb +22 -0
- data/test/unit/loggable_test.rb +11 -0
- data/test/unit/publisher_test.rb +13 -0
- data/test/unit/queueable_test.rb +24 -0
- data/test/unit/server_test.rb +39 -0
- data/test/unit/statistics_test.rb +41 -0
- data/test/unit/utilities_test.rb +25 -0
- data/test/unit/validations_test.rb +37 -0
- data/test/unit/worker_test.rb +44 -0
- data/workers/error.rb +8 -0
- data/workers/sample.rb +8 -0
- data/workers/sleep.rb +7 -0
- metadata +74 -16
@@ -0,0 +1,30 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# A heartbeat publisher that indicates a server is alive.
|
3
|
+
class Heartbeat
|
4
|
+
include Refinery::Loggable
|
5
|
+
include Refinery::Configurable
|
6
|
+
include Refinery::Queueable
|
7
|
+
include Refinery::Utilities
|
8
|
+
|
9
|
+
# Initialize the heartbeat for the given server.
|
10
|
+
def initialize(server)
|
11
|
+
@server = server
|
12
|
+
@thread = Thread.new(queue('heartbeat')) do |heartbeat_queue|
|
13
|
+
loop do
|
14
|
+
begin
|
15
|
+
logger.debug "Send heartbeat"
|
16
|
+
message = {
|
17
|
+
'host_info' => host_info,
|
18
|
+
'timestamp' => Time.now.utc,
|
19
|
+
'running_daemons' => @server.daemons.length
|
20
|
+
}
|
21
|
+
heartbeat_queue.send_message(Base64.encode64(message.to_json))
|
22
|
+
sleep(60)
|
23
|
+
rescue Exception => e
|
24
|
+
logger.error "Error sending heartbeat: #{e.message}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# The monitor is responsible for monitoring the health of the various
|
3
|
+
# components of refinery.
|
4
|
+
class Monitor
|
5
|
+
include Refinery::Loggable
|
6
|
+
include Refinery::Configurable
|
7
|
+
include Refinery::Queueable
|
8
|
+
include Refinery::Utilities
|
9
|
+
|
10
|
+
# Initialize the monitor.
|
11
|
+
#
|
12
|
+
# Options:
|
13
|
+
# * <tt>:verbose</tt>: Enable INFO level logging
|
14
|
+
# * <tt>:debug</tt>: Enable DEBUG level logging
|
15
|
+
# * <tt>:config</tt>: The config file
|
16
|
+
def initialize(options)
|
17
|
+
logger.level = Logger::INFO if options[:verbose]
|
18
|
+
logger.level = Logger::DEBUG if options[:debug]
|
19
|
+
config.load_file(options[:config]) if options[:config]
|
20
|
+
end
|
21
|
+
|
22
|
+
# Execute the monitor. The monitor will start one heartbeat
|
23
|
+
# monitor thread and one thread for each done queue and error
|
24
|
+
# queue as specified in the configuration.
|
25
|
+
def run
|
26
|
+
logger.info "Starting up monitor"
|
27
|
+
heartbeat_monitor_thread = run_heartbeat_monitor
|
28
|
+
done_monitor_threads = run_done_monitors
|
29
|
+
error_monitor_threads = run_error_monitors
|
30
|
+
|
31
|
+
logger.info "Monitor running"
|
32
|
+
|
33
|
+
Refinery::StatsServer.new.run
|
34
|
+
|
35
|
+
begin
|
36
|
+
heartbeat_monitor_thread.join
|
37
|
+
done_monitor_threads.each { |t| t.join }
|
38
|
+
error_monitor_threads.each { |t| t.join }
|
39
|
+
rescue Interrupt => e
|
40
|
+
end
|
41
|
+
|
42
|
+
logger.info "Monitor is exiting"
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
def statistics
|
47
|
+
@statistics ||= Refinery::Statistics.new
|
48
|
+
end
|
49
|
+
|
50
|
+
def run_heartbeat_monitor
|
51
|
+
logger.info "Starting heartbeat monitor"
|
52
|
+
Thread.new(queue('heartbeat')) do |heartbeat_queue|
|
53
|
+
loop do
|
54
|
+
begin
|
55
|
+
while (message = heartbeat_queue.receive)
|
56
|
+
logger.debug decode_message(message.body).inspect
|
57
|
+
message.delete()
|
58
|
+
end
|
59
|
+
rescue Exception => e
|
60
|
+
logger.error e
|
61
|
+
end
|
62
|
+
sleep(2)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def run_done_monitors
|
68
|
+
config['processors'].collect do |key, settings|
|
69
|
+
queue_name = settings['queue'] || key
|
70
|
+
done_queue_name = "#{queue_name}_done"
|
71
|
+
logger.debug "Starting monitor for queue #{done_queue_name}"
|
72
|
+
Thread.new(queue(done_queue_name)) do |done_queue|
|
73
|
+
loop do
|
74
|
+
begin
|
75
|
+
while (message = done_queue.receive)
|
76
|
+
done_message = decode_message(message.body)
|
77
|
+
processed = decode_message(done_message['original'])
|
78
|
+
logger.info "Done: #{processed.inspect}"
|
79
|
+
message.delete()
|
80
|
+
statistics.record_done(done_message)
|
81
|
+
end
|
82
|
+
rescue Exception => e
|
83
|
+
logger.error e
|
84
|
+
end
|
85
|
+
sleep(2)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def run_error_monitors
|
92
|
+
config['processors'].collect do |key, settings|
|
93
|
+
queue_name = settings['queue'] || key
|
94
|
+
error_queue_name = "#{queue_name}_error"
|
95
|
+
logger.info "Starting error monitor for queue #{error_queue_name}"
|
96
|
+
Thread.new(queue(error_queue_name)) do |error_queue|
|
97
|
+
loop do
|
98
|
+
begin
|
99
|
+
while (message = error_queue.receive)
|
100
|
+
error_message = decode_message(message.body)
|
101
|
+
processed = decode_message(error_message['original'])
|
102
|
+
logger.info "Error: #{processed.inspect}"
|
103
|
+
message.delete()
|
104
|
+
statistics.record_error(error_message)
|
105
|
+
end
|
106
|
+
rescue Exception => e
|
107
|
+
logger.error e
|
108
|
+
end
|
109
|
+
sleep(2)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# Base class for publishers to be implemented by subclasses.
|
3
|
+
class Publisher
|
4
|
+
include Refinery::Loggable
|
5
|
+
|
6
|
+
# Initialize the publisher with the queue to publish messages to.
|
7
|
+
def initialize(waiting_queue)
|
8
|
+
@waiting_queue = waiting_queue
|
9
|
+
end
|
10
|
+
|
11
|
+
protected
|
12
|
+
# Get the publish queue
|
13
|
+
def waiting_queue
|
14
|
+
@waiting_queue
|
15
|
+
end
|
16
|
+
|
17
|
+
# Publish the message. The message will be converted to JSON and pushed
|
18
|
+
# into the queue associated with the publisher.
|
19
|
+
def publish(message)
|
20
|
+
logger.debug "Message: #{message.to_json}"
|
21
|
+
waiting_queue.send_message(Base64.encode64(message.to_json))
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# Mix this module in to classes that want to access a queue.
|
3
|
+
module Queueable
|
4
|
+
# Get a named queue
|
5
|
+
def queue(name)
|
6
|
+
queue_provider.queue(name)
|
7
|
+
end
|
8
|
+
|
9
|
+
protected
|
10
|
+
# Get the queue provider. Defaults to RightAws::SqsGen2 running
|
11
|
+
# in multi-thread mode.
|
12
|
+
def queue_provider
|
13
|
+
@queue_provider ||= RightAws::SqsGen2.new(
|
14
|
+
config['aws']['credentials']["access_key_id"],
|
15
|
+
config['aws']['credentials']["secret_access_key"],
|
16
|
+
{:multi_thread => true}
|
17
|
+
)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# The server instance provides a runtime environment for daemons.
|
3
|
+
# To start the server create an Refinery::Server instance and invoke run.
|
4
|
+
class Server
|
5
|
+
include Refinery::Loggable
|
6
|
+
include Refinery::Configurable
|
7
|
+
include Refinery::Queueable
|
8
|
+
include Refinery::Utilities
|
9
|
+
|
10
|
+
# The directory where worker source files are stored. Defaults to
|
11
|
+
# ./workers
|
12
|
+
attr_accessor :workers_directory
|
13
|
+
|
14
|
+
# Get a server-wide logger
|
15
|
+
def self.logger
|
16
|
+
@logger ||= begin
|
17
|
+
logger = Logger.new(STDOUT)
|
18
|
+
logger.level = Logger::WARN
|
19
|
+
logger
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Initialize the server.
|
24
|
+
#
|
25
|
+
# Options:
|
26
|
+
# * <tt>:config</tt>: Provide a file path to load that config
|
27
|
+
# * <tt>:debug</tt>: Set to true to enable debug logging
|
28
|
+
# * <tt>:verbose</tt>: Set to true to enable info logging
|
29
|
+
# * <tt>:workers</tt>: The workers directory
|
30
|
+
def initialize(options={})
|
31
|
+
logger.level = Logger::INFO if options[:verbose]
|
32
|
+
logger.level = Logger::DEBUG if options[:debug]
|
33
|
+
config.load_file(options[:config]) if options[:config]
|
34
|
+
self.workers_directory = options[:workers] if options[:workers]
|
35
|
+
end
|
36
|
+
|
37
|
+
# The directory where workers are found. Defaults to ./workers
|
38
|
+
def workers_directory
|
39
|
+
@workers_directory ||= "./workers"
|
40
|
+
end
|
41
|
+
|
42
|
+
# Stop the server
|
43
|
+
def stop
|
44
|
+
logger.info "Stopping Refinery Server"
|
45
|
+
daemons.each { |daemon| daemon.stop }
|
46
|
+
end
|
47
|
+
|
48
|
+
# An array of all daemons
|
49
|
+
def daemons
|
50
|
+
@daemons ||= []
|
51
|
+
end
|
52
|
+
|
53
|
+
# Run the server
|
54
|
+
def run
|
55
|
+
logger.info "Starting Refinery server"
|
56
|
+
execute_daemons
|
57
|
+
logger.info "Server is exiting"
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
def execute_daemons
|
62
|
+
config['processors'].each do |key, settings|
|
63
|
+
logger.debug "Creating daemons for #{key}"
|
64
|
+
|
65
|
+
queue_name = settings['queue'] || key
|
66
|
+
logger.debug "Using queue #{queue_name}"
|
67
|
+
waiting_queue = queue("#{queue_name}_waiting")
|
68
|
+
error_queue = queue("#{queue_name}_error")
|
69
|
+
done_queue = queue("#{queue_name}_done")
|
70
|
+
|
71
|
+
1.upto(settings['workers']['initial']) do
|
72
|
+
daemons << Refinery::Daemon.new(self, key, waiting_queue, error_queue, done_queue)
|
73
|
+
end
|
74
|
+
|
75
|
+
logger.debug "Running #{daemons.length} daemons"
|
76
|
+
end
|
77
|
+
|
78
|
+
Heartbeat.new(self)
|
79
|
+
|
80
|
+
begin
|
81
|
+
daemons.each { |daemon| daemon.thread.join }
|
82
|
+
rescue Interrupt => e
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# The statistics class provides a means to record runtime stats
|
3
|
+
# about completed jobs and errors. The stats are stored in a SQL
|
4
|
+
# database (using SQLite3 by default).
|
5
|
+
class Statistics
|
6
|
+
include Refinery::Loggable
|
7
|
+
|
8
|
+
# Record the done record into the
|
9
|
+
def record_done(message)
|
10
|
+
db[:completed_jobs] << {
|
11
|
+
:host => message['host_info']['hostname'],
|
12
|
+
:pid => message['host_info']['pid'],
|
13
|
+
:run_time => message['run_time'],
|
14
|
+
:original_message => message['original'],
|
15
|
+
:when => Time.now
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
# Record the error message into the statistics database.
|
20
|
+
def record_error(message)
|
21
|
+
db[:errors] << {
|
22
|
+
:host => message['host_info']['hostname'],
|
23
|
+
:pid => message['host_info']['pid'],
|
24
|
+
:error_class => message['error']['class'],
|
25
|
+
:error_message => message['error']['message'],
|
26
|
+
:original_message => message['original'],
|
27
|
+
:when => Time.now
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
# Get a Sequel connection to the stats database
|
33
|
+
def db
|
34
|
+
@db ||= begin
|
35
|
+
db = Sequel.connect('sqlite://stats.db')
|
36
|
+
unless db.table_exists?(:completed_jobs)
|
37
|
+
db.create_table :completed_jobs do
|
38
|
+
primary_key :id
|
39
|
+
column :host, :text
|
40
|
+
column :pid, :integer
|
41
|
+
column :run_time, :float
|
42
|
+
column :original_message, :text
|
43
|
+
column :when, :time
|
44
|
+
end
|
45
|
+
end
|
46
|
+
unless db.table_exists?(:errors)
|
47
|
+
db.create_table :errors do
|
48
|
+
primary_key :id
|
49
|
+
column :host, :text
|
50
|
+
column :pid, :integer
|
51
|
+
column :error_class, :text
|
52
|
+
column :error_message, :text
|
53
|
+
column :original_message, :text
|
54
|
+
column :when, :time
|
55
|
+
end
|
56
|
+
end
|
57
|
+
db
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
module Refinery #:nodoc:
|
2
|
+
# The StatsServer class provides a build in web server that provides
|
3
|
+
# a view into the refinery statistics.
|
4
|
+
class StatsServer
|
5
|
+
include Refinery::Loggable
|
6
|
+
|
7
|
+
# Run the stats server.
|
8
|
+
def run
|
9
|
+
begin
|
10
|
+
Ramaze::Log.loggers.clear # supress all Ramaze logging
|
11
|
+
Ramaze.start # start the Ramaze server on port 7000
|
12
|
+
rescue NameError
|
13
|
+
self.logger.warn "Install Remaze to enable the stats server"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
if const_defined?(:Ramaze)
|
18
|
+
class MainController < ::Ramaze::Controller #:nodoc:
|
19
|
+
map '/'
|
20
|
+
|
21
|
+
def index
|
22
|
+
%(
|
23
|
+
<html>
|
24
|
+
<head>
|
25
|
+
<title>Refinery Stats</title>
|
26
|
+
<style>
|
27
|
+
.widget { border: 1px solid #777; margin-bottom: 10px; padding: 4px; }
|
28
|
+
.widget h2 { font-size: 14pt; margin-top: 2px; margin-bottom: 2px; }
|
29
|
+
#left-column { float: left; width: 600px; }
|
30
|
+
#right-column { margin-left: 610px; width: 300px; }
|
31
|
+
table { background-color: #ddd; width: 100%; }
|
32
|
+
table td { background-color: #eee; }
|
33
|
+
table th { background-color: #ccc; }
|
34
|
+
</style>
|
35
|
+
</head>
|
36
|
+
<body>
|
37
|
+
<h1>Refinery Stats</h1>
|
38
|
+
<div id="left-column">
|
39
|
+
<div class="run_time widget">
|
40
|
+
<h2>Runtime Averages</h2>
|
41
|
+
#{avg_run_time}
|
42
|
+
</div>
|
43
|
+
<div class="errors widget">
|
44
|
+
<h2>Last 5 Errors</h2>
|
45
|
+
#{errors_table}
|
46
|
+
</div>
|
47
|
+
<div class="completed widget">
|
48
|
+
<h2>Last 5 Completed Jobs</h2>
|
49
|
+
#{completed_jobs_table}
|
50
|
+
</div>
|
51
|
+
</div>
|
52
|
+
<div id="right-column">
|
53
|
+
<div class="overview widget">
|
54
|
+
<h2>Overview</h2>
|
55
|
+
<div>#{db[:completed_jobs].count} jobs completed</div>
|
56
|
+
<div>#{db[:errors].count} errors</div>
|
57
|
+
</div>
|
58
|
+
|
59
|
+
</div>
|
60
|
+
</body>
|
61
|
+
</html>
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
def db
|
67
|
+
Sequel.connect("sqlite://stats.db")
|
68
|
+
end
|
69
|
+
|
70
|
+
def avg_run_time
|
71
|
+
rows = db[:completed_jobs].group(:host, :pid).select(:host, :pid, :AVG.sql_function(:run_time)).map do |record|
|
72
|
+
%(<tr>
|
73
|
+
<td>#{record[:host]}</td>
|
74
|
+
<td>#{record[:pid]}</td>
|
75
|
+
<td>#{sprintf("%.6f", record[:"AVG(`run_time`)"])}</td>
|
76
|
+
</tr>)
|
77
|
+
end.join
|
78
|
+
%(
|
79
|
+
<table>
|
80
|
+
<tr>
|
81
|
+
<th>Host</th>
|
82
|
+
<th>PID</th>
|
83
|
+
<th>Avg Run Time</th>
|
84
|
+
</tr>
|
85
|
+
#{rows}
|
86
|
+
</table>
|
87
|
+
)
|
88
|
+
end
|
89
|
+
|
90
|
+
def completed_jobs_table
|
91
|
+
jobs_list = db[:completed_jobs].limit(5).map do |record|
|
92
|
+
%Q( <tr>
|
93
|
+
<td>#{record[:host]}</td>
|
94
|
+
<td>#{record[:pid]}</td>
|
95
|
+
<td>#{record[:run_time]}</td>
|
96
|
+
</tr>
|
97
|
+
)
|
98
|
+
end
|
99
|
+
%Q( <table>
|
100
|
+
<tr>
|
101
|
+
<th>Host</th>
|
102
|
+
<th>PID</th>
|
103
|
+
<th>Run Time</th>
|
104
|
+
</tr>
|
105
|
+
#{jobs_list.join}
|
106
|
+
</table>
|
107
|
+
)
|
108
|
+
end
|
109
|
+
|
110
|
+
def errors_table
|
111
|
+
errors = db[:errors].limit(5).map do |record|
|
112
|
+
%(<tr>
|
113
|
+
<td>#{record[:host]}</td>
|
114
|
+
<td>#{record[:pid]}</td>
|
115
|
+
<td>#{record[:error_class]}</td>
|
116
|
+
<td>#{record[:error_message]}</td>
|
117
|
+
</tr>
|
118
|
+
)
|
119
|
+
end
|
120
|
+
%(<table>
|
121
|
+
<tr>
|
122
|
+
<th>Host</th>
|
123
|
+
<th>PID</th>
|
124
|
+
<th>Error Class</th>
|
125
|
+
<th>Error Message</th>
|
126
|
+
</tr>
|
127
|
+
#{errors.join}
|
128
|
+
</table>
|
129
|
+
)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|