symphony 0.3.0.pre20140327204419
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/.simplecov +9 -0
- data/ChangeLog +508 -0
- data/History.rdoc +15 -0
- data/Manifest.txt +30 -0
- data/README.rdoc +89 -0
- data/Rakefile +77 -0
- data/TODO.md +5 -0
- data/USAGE.rdoc +381 -0
- data/bin/symphony +8 -0
- data/bin/symphony-task +10 -0
- data/etc/config.yml.example +9 -0
- data/lib/symphony/daemon.rb +372 -0
- data/lib/symphony/metrics.rb +84 -0
- data/lib/symphony/mixins.rb +75 -0
- data/lib/symphony/queue.rb +313 -0
- data/lib/symphony/routing.rb +98 -0
- data/lib/symphony/signal_handling.rb +107 -0
- data/lib/symphony/task.rb +407 -0
- data/lib/symphony/tasks/auditor.rb +51 -0
- data/lib/symphony/tasks/failure_logger.rb +106 -0
- data/lib/symphony/tasks/pinger.rb +64 -0
- data/lib/symphony/tasks/simulator.rb +57 -0
- data/lib/symphony/tasks/ssh.rb +126 -0
- data/lib/symphony/tasks/sshscript.rb +168 -0
- data/lib/symphony.rb +56 -0
- data/spec/helpers.rb +36 -0
- data/spec/symphony/mixins_spec.rb +78 -0
- data/spec/symphony/queue_spec.rb +368 -0
- data/spec/symphony/task_spec.rb +147 -0
- data/spec/symphony_spec.rb +14 -0
- data.tar.gz.sig +0 -0
- metadata +332 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,372 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#encoding: utf-8
|
3
|
+
|
4
|
+
require 'configurability'
|
5
|
+
require 'loggability'
|
6
|
+
require 'fcntl'
|
7
|
+
require 'trollop'
|
8
|
+
|
9
|
+
require 'symphony' unless defined?( Symphony )
|
10
|
+
require 'symphony/worker'
|
11
|
+
require 'symphony/task'
|
12
|
+
|
13
|
+
|
14
|
+
# The Symphony worker daemon. Watches a Symphony job queue, and runs the tasks
|
15
|
+
# contained in the jobs it fetches.
|
16
|
+
class Symphony::Daemon
|
17
|
+
extend Loggability,
|
18
|
+
Configurability
|
19
|
+
|
20
|
+
include Symphony::SignalHandling
|
21
|
+
|
22
|
+
|
23
|
+
# Loggability API -- log to the symphony logger
|
24
|
+
log_to :symphony
|
25
|
+
|
26
|
+
# Configurability API -- use the 'worker_daemon' section of the config
|
27
|
+
config_key :worker_daemon
|
28
|
+
|
29
|
+
|
30
|
+
# Signals we understand
|
31
|
+
QUEUE_SIGS = [
|
32
|
+
:QUIT, :INT, :TERM, :HUP,
|
33
|
+
# :TODO: :WINCH, :USR1, :USR2, :TTIN, :TTOU
|
34
|
+
]
|
35
|
+
|
36
|
+
# The maximum throttle value caused by failing workers
|
37
|
+
THROTTLE_MAX = 16
|
38
|
+
|
39
|
+
# The factor which controls how much incrementing the throttle factor
|
40
|
+
# affects the pause between workers being started.
|
41
|
+
THROTTLE_FACTOR = 2
|
42
|
+
|
43
|
+
|
44
|
+
#
|
45
|
+
# Class methods
|
46
|
+
#
|
47
|
+
|
48
|
+
### Get the daemon's version as a String.
|
49
|
+
def self::version_string( include_buildnum=false )
|
50
|
+
vstring = "%s %s" % [ self.name, Symphony::VERSION ]
|
51
|
+
if include_buildnum
|
52
|
+
rev = Symphony::REVISION[/: ([[:xdigit:]]+)/, 1] || '0'
|
53
|
+
vstring << " (build %s)" % [ rev ]
|
54
|
+
end
|
55
|
+
return vstring
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
### Start the daemon.
|
60
|
+
def self::run( argv )
|
61
|
+
Loggability.format_with( :color ) if $stdout.tty?
|
62
|
+
|
63
|
+
progname = File.basename( $0 )
|
64
|
+
opts = Trollop.options do
|
65
|
+
banner "Usage: #{progname} OPTIONS"
|
66
|
+
version self.version_string( true )
|
67
|
+
|
68
|
+
opt :config, "The config file to load instead of the default",
|
69
|
+
:type => :string
|
70
|
+
opt :crew_size, "Number of workers to maintain.", :default => DEFAULT_CREW_SIZE
|
71
|
+
opt :queue, "The name of the queue to monitor.", :default => '_default_'
|
72
|
+
|
73
|
+
opt :debug, "Turn on debugging output."
|
74
|
+
end
|
75
|
+
|
76
|
+
# Turn on debugging if it's enabled
|
77
|
+
if opts.debug
|
78
|
+
$DEBUG = true
|
79
|
+
Loggability.level = :debug
|
80
|
+
end
|
81
|
+
|
82
|
+
# Now load the config file
|
83
|
+
Symphony.load_config( opts.config )
|
84
|
+
|
85
|
+
# Re-enable debug-level logging if the config reset it
|
86
|
+
Loggability.level = :debug if opts.debug
|
87
|
+
|
88
|
+
# And start the daemon
|
89
|
+
self.new( opts ).run
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
#
|
94
|
+
# Instance methods
|
95
|
+
#
|
96
|
+
|
97
|
+
### Create a new Daemon instance.
|
98
|
+
def initialize( options )
|
99
|
+
@options = options
|
100
|
+
@queue = Symphony::Queue.new( options.queue )
|
101
|
+
|
102
|
+
# Process control
|
103
|
+
@crew_size = options.crew_size
|
104
|
+
@crew_workers = []
|
105
|
+
@running = false
|
106
|
+
@shutting_down = false
|
107
|
+
@throttle = 0
|
108
|
+
@last_child_started = Time.now
|
109
|
+
|
110
|
+
self.set_up_signal_handling
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
######
|
115
|
+
public
|
116
|
+
######
|
117
|
+
|
118
|
+
# The Array of PIDs of currently-running workers
|
119
|
+
attr_reader :crew_workers
|
120
|
+
|
121
|
+
# The maximum number of children to have running at any given time
|
122
|
+
attr_reader :crew_size
|
123
|
+
|
124
|
+
# A self-pipe for deferred signal-handling
|
125
|
+
attr_reader :selfpipe
|
126
|
+
|
127
|
+
# The Symphony::Queue that jobs will be fetched from
|
128
|
+
attr_reader :queue
|
129
|
+
|
130
|
+
# The Configurability::Config object for the current configuration.
|
131
|
+
attr_reader :config
|
132
|
+
|
133
|
+
|
134
|
+
### Returns +true+ if the daemon is still running.
|
135
|
+
def running?
|
136
|
+
return @running
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
### Returns +true+ if the daemon is shutting down.
|
141
|
+
def shutting_down?
|
142
|
+
return @shutting_down
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
### Set up the daemon and start running.
|
147
|
+
def run
|
148
|
+
self.log.info "Starting worker supervisor"
|
149
|
+
|
150
|
+
# Become session leader if we can
|
151
|
+
if Process.euid.zero?
|
152
|
+
sid = Process.setsid
|
153
|
+
self.log.debug " became session leader of new session: %d" % [ sid ]
|
154
|
+
end
|
155
|
+
|
156
|
+
# Set up traps for common signals
|
157
|
+
self.set_signal_traps( *QUEUE_SIGS )
|
158
|
+
|
159
|
+
# Listen for new jobs and handle them as they come in
|
160
|
+
self.start_handling_jobs
|
161
|
+
|
162
|
+
# Restore the default signal handlers
|
163
|
+
self.reset_signal_traps( *QUEUE_SIGS )
|
164
|
+
|
165
|
+
exit
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
### The main loop of the daemon -- wait for signals, children dying, or jobs, and
|
170
|
+
### take appropriate action.
|
171
|
+
def start_handling_jobs
|
172
|
+
@running = true
|
173
|
+
|
174
|
+
self.log.debug "Starting supervisor loop..."
|
175
|
+
while self.running?
|
176
|
+
self.start_missing_children unless self.shutting_down?
|
177
|
+
|
178
|
+
timeout = self.throttle_seconds
|
179
|
+
timeout = nil if timeout.zero?
|
180
|
+
|
181
|
+
self.wait_for_signals
|
182
|
+
self.reap_children
|
183
|
+
end
|
184
|
+
self.log.info "Supervisor job loop done."
|
185
|
+
|
186
|
+
rescue => err
|
187
|
+
self.log.fatal "%p in job-handler loop: %s" % [ err.class, err.message ]
|
188
|
+
self.log.debug { ' ' + err.backtrace.join("\n ") }
|
189
|
+
|
190
|
+
ensure
|
191
|
+
@running = false
|
192
|
+
self.stop
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
### Shut the daemon down gracefully.
|
197
|
+
def stop
|
198
|
+
self.log.warn "Stopping."
|
199
|
+
@shutting_down = true
|
200
|
+
|
201
|
+
self.ignore_signals( *QUEUE_SIGS )
|
202
|
+
|
203
|
+
self.log.warn "Stopping children."
|
204
|
+
3.times do |i|
|
205
|
+
self.reap_children( *self.crew_workers )
|
206
|
+
sleep( 1 )
|
207
|
+
self.kill_children
|
208
|
+
sleep( 1 )
|
209
|
+
break if self.crew_workers.empty?
|
210
|
+
sleep( 1 )
|
211
|
+
end unless self.crew_workers.empty?
|
212
|
+
|
213
|
+
# Give up on our remaining children.
|
214
|
+
Signal.trap( :CHLD, :IGNORE )
|
215
|
+
if !self.crew_workers.empty?
|
216
|
+
self.log.warn " %d workers remain: sending KILL" % [ self.crew_workers.length ]
|
217
|
+
self.kill_children( :KILL )
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
### Reload the configuration.
|
223
|
+
def reload_config
|
224
|
+
self.log.warn "Reloading config %p" % [ self.config ]
|
225
|
+
self.config.reload
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
#########
|
230
|
+
protected
|
231
|
+
#########
|
232
|
+
|
233
|
+
### Handle signals.
|
234
|
+
def handle_signal( sig )
|
235
|
+
self.log.debug "Handling signal %s" % [ sig ]
|
236
|
+
case sig
|
237
|
+
when :INT, :TERM
|
238
|
+
if @running
|
239
|
+
self.log.warn "%s signal: immediate shutdown" % [ sig ]
|
240
|
+
@running = false
|
241
|
+
else
|
242
|
+
self.ignore_signals
|
243
|
+
self.log.warn "%s signal: forceful shutdown" % [ sig ]
|
244
|
+
self.kill_children( :KILL )
|
245
|
+
exit!( 255 )
|
246
|
+
end
|
247
|
+
|
248
|
+
when :HUP
|
249
|
+
self.log.warn "Hangup signal."
|
250
|
+
self.reload_config
|
251
|
+
|
252
|
+
when :CHLD
|
253
|
+
# Just need to wake up, nothing else necessary
|
254
|
+
|
255
|
+
else
|
256
|
+
self.log.warn "Unhandled signal %s" % [ sig ]
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
260
|
+
|
261
|
+
|
262
|
+
### Fill out the work crew with new children if necessary
|
263
|
+
def start_missing_children
|
264
|
+
missing_count = self.crew_size - self.crew_workers.length
|
265
|
+
return unless missing_count > 0
|
266
|
+
|
267
|
+
# Return unless the throttle period has lapsed
|
268
|
+
unless self.throttle_seconds < (Time.now - @last_child_started)
|
269
|
+
self.log.info "Not starting children: throttled for %0.2f seconds" %
|
270
|
+
[ self.throttle_seconds ]
|
271
|
+
return
|
272
|
+
end
|
273
|
+
|
274
|
+
self.log.debug "Starting %d workers for a crew of %d" % [ missing_count, self.crew_size ]
|
275
|
+
missing_count.times do |i|
|
276
|
+
pid = self.start_worker
|
277
|
+
self.log.debug " started worker %d" % [ pid ]
|
278
|
+
self.crew_workers << pid
|
279
|
+
end
|
280
|
+
|
281
|
+
@last_child_started = Time.now
|
282
|
+
end
|
283
|
+
|
284
|
+
|
285
|
+
### Return the number of seconds between child startup times.
|
286
|
+
def throttle_seconds
|
287
|
+
return 0 unless @throttle.nonzero?
|
288
|
+
return Math.log( @throttle ) * THROTTLE_FACTOR
|
289
|
+
end
|
290
|
+
|
291
|
+
|
292
|
+
### Add +adjustment+ to the throttle value, ensuring that it doesn't go
|
293
|
+
### below zero.
|
294
|
+
def adjust_throttle( adjustment=1 )
|
295
|
+
self.log.debug "Adjusting worker throttle by %d" % [ adjustment ]
|
296
|
+
@throttle += adjustment
|
297
|
+
@throttle = 0 if @throttle < 0
|
298
|
+
@throttle = THROTTLE_MAX if @throttle > THROTTLE_MAX
|
299
|
+
end
|
300
|
+
|
301
|
+
|
302
|
+
### Kill all current children with the specified +signal+. Returns +true+ if the signal was
|
303
|
+
### sent to one or more children.
|
304
|
+
def kill_children( signal=:TERM )
|
305
|
+
return false if self.crew_workers.empty?
|
306
|
+
|
307
|
+
self.log.info "Sending %s signal to %d workers: %p." %
|
308
|
+
[ signal, self.crew_workers.length, self.crew_workers ]
|
309
|
+
Process.kill( signal, *self.crew_workers )
|
310
|
+
|
311
|
+
return true
|
312
|
+
rescue Errno::ESRCH
|
313
|
+
self.log.debug "Ignoring signals to unreaped children."
|
314
|
+
end
|
315
|
+
|
316
|
+
|
317
|
+
### Start a new Symphony::Worker and return its PID.
|
318
|
+
def start_worker
|
319
|
+
return if self.shutting_down?
|
320
|
+
self.log.debug "Starting a worker."
|
321
|
+
return Symphony::Worker.start( self.queue )
|
322
|
+
end
|
323
|
+
|
324
|
+
|
325
|
+
### Clean up after any children that have died.
|
326
|
+
def reap_children( *pids )
|
327
|
+
self.log.debug "Reaping children."
|
328
|
+
|
329
|
+
if pids.empty?
|
330
|
+
self.reap_any_child
|
331
|
+
else
|
332
|
+
self.log.debug " waiting on pids: %p" % [ pids ]
|
333
|
+
pids.each do |pid|
|
334
|
+
self.reap_specific_child( pid )
|
335
|
+
end
|
336
|
+
end
|
337
|
+
rescue Errno::ECHILD => err
|
338
|
+
self.log.debug "No more children to reap."
|
339
|
+
end
|
340
|
+
|
341
|
+
|
342
|
+
### Reap any children that have died within the caller's process group
|
343
|
+
### and remove them from the work crew.
|
344
|
+
def reap_any_child
|
345
|
+
self.log.debug " no pids; waiting on any child in this process group"
|
346
|
+
|
347
|
+
pid, status = Process.waitpid2( -1, Process::WNOHANG )
|
348
|
+
while pid
|
349
|
+
self.adjust_throttle( status.success? ? -1 : 1 )
|
350
|
+
self.log.debug "Child %d exited: %p." % [ pid, status ]
|
351
|
+
self.crew_workers.delete( pid )
|
352
|
+
|
353
|
+
pid, status = Process.waitpid2( -1, Process::WNOHANG )
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
|
358
|
+
### Wait on the child associated with the given +pid+, deleting it from the
|
359
|
+
### crew workers if successful.
|
360
|
+
def reap_specific_child( pid )
|
361
|
+
spid, status = Process.waitpid2( pid )
|
362
|
+
if spid
|
363
|
+
self.log.debug "Child %d exited: %p." % [ spid, status ]
|
364
|
+
self.crew_workers.delete( spid )
|
365
|
+
self.adjust_throttle( status.success? ? -1 : 1 )
|
366
|
+
else
|
367
|
+
self.log.debug "Child %d no reapy." % [ pid ]
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
|
372
|
+
end # class Symphony::Daemon
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#encoding: utf-8
|
3
|
+
|
4
|
+
require 'rusage'
|
5
|
+
require 'metriks'
|
6
|
+
require 'metriks/reporter/logger'
|
7
|
+
require 'metriks/reporter/proc_title'
|
8
|
+
|
9
|
+
require 'symphony' unless defined?( Symphony )
|
10
|
+
|
11
|
+
|
12
|
+
# Metrics for Symphony Tasks.
|
13
|
+
module Symphony::Metrics
|
14
|
+
|
15
|
+
#
|
16
|
+
# Instance methods
|
17
|
+
#
|
18
|
+
|
19
|
+
### Set up metrics and reporters on creation.
|
20
|
+
def initialize( * )
|
21
|
+
super
|
22
|
+
|
23
|
+
@metriks_registry = Metriks::Registry.new
|
24
|
+
@job_timer = @metriks_registry.timer( 'job.duration' )
|
25
|
+
@job_counter = @metriks_registry.meter( 'job.count' )
|
26
|
+
|
27
|
+
@rusage_gauge = @metriks_registry.gauge('job.rusage') { Process.rusage.to_h }
|
28
|
+
|
29
|
+
@log_reporter = Metriks::Reporter::Logger.new(
|
30
|
+
logger: Loggability[ Symphony ],
|
31
|
+
registry: @metriks_registry )
|
32
|
+
@proc_reporter = Metriks::Reporter::ProcTitle.new(
|
33
|
+
prefix: self.class.name,
|
34
|
+
registry: @metriks_registry,
|
35
|
+
on_error: lambda {|ex| self.log.error(ex) } )
|
36
|
+
|
37
|
+
@proc_reporter.add( 'jobs' ) do
|
38
|
+
@job_counter.count
|
39
|
+
end
|
40
|
+
@proc_reporter.add( 'jobs', '/sec' ) do
|
41
|
+
@job_counter.one_minute_rate
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
##
|
47
|
+
# The Metriks::Registry that tracks all metrics for this job
|
48
|
+
attr_reader :metriks_registry
|
49
|
+
|
50
|
+
##
|
51
|
+
# The job timer metric
|
52
|
+
attr_reader :job_timer
|
53
|
+
|
54
|
+
##
|
55
|
+
# The job counter metric
|
56
|
+
attr_reader :job_counter
|
57
|
+
|
58
|
+
|
59
|
+
### Set up metrics on startup.
|
60
|
+
def start
|
61
|
+
@log_reporter.start
|
62
|
+
@proc_reporter.start
|
63
|
+
|
64
|
+
super
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
### Reset metrics on restart.
|
69
|
+
def restart
|
70
|
+
self.metriks_registry.clear
|
71
|
+
super
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
### Add metrics to the task's work block.
|
76
|
+
def work( payload, metadata )
|
77
|
+
self.job_counter.mark
|
78
|
+
self.job_timer.time do
|
79
|
+
super
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
end # module Symphony::Metrics
|
84
|
+
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#encoding: utf-8
|
3
|
+
|
4
|
+
|
5
|
+
module Symphony
|
6
|
+
|
7
|
+
# A collection of methods for declaring other methods.
|
8
|
+
#
|
9
|
+
# class MyClass
|
10
|
+
# extend Symphony::MethodUtilities
|
11
|
+
#
|
12
|
+
# singleton_attr_accessor :types
|
13
|
+
# singleton_method_alias :kinds, :types
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# MyClass.types = [ :pheno, :proto, :stereo ]
|
17
|
+
# MyClass.kinds # => [:pheno, :proto, :stereo]
|
18
|
+
#
|
19
|
+
module MethodUtilities
|
20
|
+
|
21
|
+
### Creates instance variables and corresponding methods that return their
|
22
|
+
### values for each of the specified +symbols+ in the singleton of the
|
23
|
+
### declaring object (e.g., class instance variables and methods if declared
|
24
|
+
### in a Class).
|
25
|
+
def singleton_attr_reader( *symbols )
|
26
|
+
symbols.each do |sym|
|
27
|
+
singleton_class.__send__( :attr_reader, sym )
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
### Creates methods that allow assignment to the attributes of the singleton
|
32
|
+
### of the declaring object that correspond to the specified +symbols+.
|
33
|
+
def singleton_attr_writer( *symbols )
|
34
|
+
symbols.each do |sym|
|
35
|
+
singleton_class.__send__( :attr_writer, sym )
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
### Creates readers and writers that allow assignment to the attributes of
|
40
|
+
### the singleton of the declaring object that correspond to the specified
|
41
|
+
### +symbols+.
|
42
|
+
def singleton_attr_accessor( *symbols )
|
43
|
+
symbols.each do |sym|
|
44
|
+
singleton_class.__send__( :attr_accessor, sym )
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
### Creates an alias for the +original+ method named +newname+.
|
49
|
+
def singleton_method_alias( newname, original )
|
50
|
+
singleton_class.__send__( :alias_method, newname, original )
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
### Create a reader in the form of a predicate for the given +attrname+.
|
55
|
+
def attr_predicate( attrname )
|
56
|
+
attrname = attrname.to_s.chomp( '?' )
|
57
|
+
define_method( "#{attrname}?" ) do
|
58
|
+
instance_variable_get( "@#{attrname}" ) ? true : false
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
### Create a reader in the form of a predicate for the given +attrname+
|
64
|
+
### as well as a regular writer method.
|
65
|
+
def attr_predicate_accessor( attrname )
|
66
|
+
attrname = attrname.to_s.chomp( '?' )
|
67
|
+
attr_writer( attrname )
|
68
|
+
attr_predicate( attrname )
|
69
|
+
end
|
70
|
+
|
71
|
+
end # module MethodUtilities
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
end # module Symphony
|