symphony 0.3.0.pre20140327204419
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/.simplecov +9 -0
- data/ChangeLog +508 -0
- data/History.rdoc +15 -0
- data/Manifest.txt +30 -0
- data/README.rdoc +89 -0
- data/Rakefile +77 -0
- data/TODO.md +5 -0
- data/USAGE.rdoc +381 -0
- data/bin/symphony +8 -0
- data/bin/symphony-task +10 -0
- data/etc/config.yml.example +9 -0
- data/lib/symphony/daemon.rb +372 -0
- data/lib/symphony/metrics.rb +84 -0
- data/lib/symphony/mixins.rb +75 -0
- data/lib/symphony/queue.rb +313 -0
- data/lib/symphony/routing.rb +98 -0
- data/lib/symphony/signal_handling.rb +107 -0
- data/lib/symphony/task.rb +407 -0
- data/lib/symphony/tasks/auditor.rb +51 -0
- data/lib/symphony/tasks/failure_logger.rb +106 -0
- data/lib/symphony/tasks/pinger.rb +64 -0
- data/lib/symphony/tasks/simulator.rb +57 -0
- data/lib/symphony/tasks/ssh.rb +126 -0
- data/lib/symphony/tasks/sshscript.rb +168 -0
- data/lib/symphony.rb +56 -0
- data/spec/helpers.rb +36 -0
- data/spec/symphony/mixins_spec.rb +78 -0
- data/spec/symphony/queue_spec.rb +368 -0
- data/spec/symphony/task_spec.rb +147 -0
- data/spec/symphony_spec.rb +14 -0
- data.tar.gz.sig +0 -0
- metadata +332 -0
- metadata.gz.sig +0 -0
@@ -0,0 +1,372 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#encoding: utf-8
|
3
|
+
|
4
|
+
require 'configurability'
|
5
|
+
require 'loggability'
|
6
|
+
require 'fcntl'
|
7
|
+
require 'trollop'
|
8
|
+
|
9
|
+
require 'symphony' unless defined?( Symphony )
|
10
|
+
require 'symphony/worker'
|
11
|
+
require 'symphony/task'
|
12
|
+
|
13
|
+
|
14
|
+
# The Symphony worker daemon. Watches a Symphony job queue, and runs the tasks
|
15
|
+
# contained in the jobs it fetches.
|
16
|
+
class Symphony::Daemon
|
17
|
+
extend Loggability,
|
18
|
+
Configurability
|
19
|
+
|
20
|
+
include Symphony::SignalHandling
|
21
|
+
|
22
|
+
|
23
|
+
# Loggability API -- log to the symphony logger
|
24
|
+
log_to :symphony
|
25
|
+
|
26
|
+
# Configurability API -- use the 'worker_daemon' section of the config
|
27
|
+
config_key :worker_daemon
|
28
|
+
|
29
|
+
|
30
|
+
# Signals we understand
|
31
|
+
QUEUE_SIGS = [
|
32
|
+
:QUIT, :INT, :TERM, :HUP,
|
33
|
+
# :TODO: :WINCH, :USR1, :USR2, :TTIN, :TTOU
|
34
|
+
]
|
35
|
+
|
36
|
+
# The maximum throttle value caused by failing workers
|
37
|
+
THROTTLE_MAX = 16
|
38
|
+
|
39
|
+
# The factor which controls how much incrementing the throttle factor
|
40
|
+
# affects the pause between workers being started.
|
41
|
+
THROTTLE_FACTOR = 2
|
42
|
+
|
43
|
+
|
44
|
+
#
|
45
|
+
# Class methods
|
46
|
+
#
|
47
|
+
|
48
|
+
### Get the daemon's version as a String.
|
49
|
+
def self::version_string( include_buildnum=false )
|
50
|
+
vstring = "%s %s" % [ self.name, Symphony::VERSION ]
|
51
|
+
if include_buildnum
|
52
|
+
rev = Symphony::REVISION[/: ([[:xdigit:]]+)/, 1] || '0'
|
53
|
+
vstring << " (build %s)" % [ rev ]
|
54
|
+
end
|
55
|
+
return vstring
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
### Start the daemon.
|
60
|
+
def self::run( argv )
|
61
|
+
Loggability.format_with( :color ) if $stdout.tty?
|
62
|
+
|
63
|
+
progname = File.basename( $0 )
|
64
|
+
opts = Trollop.options do
|
65
|
+
banner "Usage: #{progname} OPTIONS"
|
66
|
+
version self.version_string( true )
|
67
|
+
|
68
|
+
opt :config, "The config file to load instead of the default",
|
69
|
+
:type => :string
|
70
|
+
opt :crew_size, "Number of workers to maintain.", :default => DEFAULT_CREW_SIZE
|
71
|
+
opt :queue, "The name of the queue to monitor.", :default => '_default_'
|
72
|
+
|
73
|
+
opt :debug, "Turn on debugging output."
|
74
|
+
end
|
75
|
+
|
76
|
+
# Turn on debugging if it's enabled
|
77
|
+
if opts.debug
|
78
|
+
$DEBUG = true
|
79
|
+
Loggability.level = :debug
|
80
|
+
end
|
81
|
+
|
82
|
+
# Now load the config file
|
83
|
+
Symphony.load_config( opts.config )
|
84
|
+
|
85
|
+
# Re-enable debug-level logging if the config reset it
|
86
|
+
Loggability.level = :debug if opts.debug
|
87
|
+
|
88
|
+
# And start the daemon
|
89
|
+
self.new( opts ).run
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
#
|
94
|
+
# Instance methods
|
95
|
+
#
|
96
|
+
|
97
|
+
### Create a new Daemon instance.
|
98
|
+
def initialize( options )
|
99
|
+
@options = options
|
100
|
+
@queue = Symphony::Queue.new( options.queue )
|
101
|
+
|
102
|
+
# Process control
|
103
|
+
@crew_size = options.crew_size
|
104
|
+
@crew_workers = []
|
105
|
+
@running = false
|
106
|
+
@shutting_down = false
|
107
|
+
@throttle = 0
|
108
|
+
@last_child_started = Time.now
|
109
|
+
|
110
|
+
self.set_up_signal_handling
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
######
|
115
|
+
public
|
116
|
+
######
|
117
|
+
|
118
|
+
# The Array of PIDs of currently-running workers
|
119
|
+
attr_reader :crew_workers
|
120
|
+
|
121
|
+
# The maximum number of children to have running at any given time
|
122
|
+
attr_reader :crew_size
|
123
|
+
|
124
|
+
# A self-pipe for deferred signal-handling
|
125
|
+
attr_reader :selfpipe
|
126
|
+
|
127
|
+
# The Symphony::Queue that jobs will be fetched from
|
128
|
+
attr_reader :queue
|
129
|
+
|
130
|
+
# The Configurability::Config object for the current configuration.
|
131
|
+
attr_reader :config
|
132
|
+
|
133
|
+
|
134
|
+
### Returns +true+ if the daemon is still running.
|
135
|
+
def running?
|
136
|
+
return @running
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
### Returns +true+ if the daemon is shutting down.
|
141
|
+
def shutting_down?
|
142
|
+
return @shutting_down
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
### Set up the daemon and start running.
|
147
|
+
def run
|
148
|
+
self.log.info "Starting worker supervisor"
|
149
|
+
|
150
|
+
# Become session leader if we can
|
151
|
+
if Process.euid.zero?
|
152
|
+
sid = Process.setsid
|
153
|
+
self.log.debug " became session leader of new session: %d" % [ sid ]
|
154
|
+
end
|
155
|
+
|
156
|
+
# Set up traps for common signals
|
157
|
+
self.set_signal_traps( *QUEUE_SIGS )
|
158
|
+
|
159
|
+
# Listen for new jobs and handle them as they come in
|
160
|
+
self.start_handling_jobs
|
161
|
+
|
162
|
+
# Restore the default signal handlers
|
163
|
+
self.reset_signal_traps( *QUEUE_SIGS )
|
164
|
+
|
165
|
+
exit
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
### The main loop of the daemon -- wait for signals, children dying, or jobs, and
|
170
|
+
### take appropriate action.
|
171
|
+
def start_handling_jobs
|
172
|
+
@running = true
|
173
|
+
|
174
|
+
self.log.debug "Starting supervisor loop..."
|
175
|
+
while self.running?
|
176
|
+
self.start_missing_children unless self.shutting_down?
|
177
|
+
|
178
|
+
timeout = self.throttle_seconds
|
179
|
+
timeout = nil if timeout.zero?
|
180
|
+
|
181
|
+
self.wait_for_signals
|
182
|
+
self.reap_children
|
183
|
+
end
|
184
|
+
self.log.info "Supervisor job loop done."
|
185
|
+
|
186
|
+
rescue => err
|
187
|
+
self.log.fatal "%p in job-handler loop: %s" % [ err.class, err.message ]
|
188
|
+
self.log.debug { ' ' + err.backtrace.join("\n ") }
|
189
|
+
|
190
|
+
ensure
|
191
|
+
@running = false
|
192
|
+
self.stop
|
193
|
+
end
|
194
|
+
|
195
|
+
|
196
|
+
### Shut the daemon down gracefully.
|
197
|
+
def stop
|
198
|
+
self.log.warn "Stopping."
|
199
|
+
@shutting_down = true
|
200
|
+
|
201
|
+
self.ignore_signals( *QUEUE_SIGS )
|
202
|
+
|
203
|
+
self.log.warn "Stopping children."
|
204
|
+
3.times do |i|
|
205
|
+
self.reap_children( *self.crew_workers )
|
206
|
+
sleep( 1 )
|
207
|
+
self.kill_children
|
208
|
+
sleep( 1 )
|
209
|
+
break if self.crew_workers.empty?
|
210
|
+
sleep( 1 )
|
211
|
+
end unless self.crew_workers.empty?
|
212
|
+
|
213
|
+
# Give up on our remaining children.
|
214
|
+
Signal.trap( :CHLD, :IGNORE )
|
215
|
+
if !self.crew_workers.empty?
|
216
|
+
self.log.warn " %d workers remain: sending KILL" % [ self.crew_workers.length ]
|
217
|
+
self.kill_children( :KILL )
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
### Reload the configuration.
|
223
|
+
def reload_config
|
224
|
+
self.log.warn "Reloading config %p" % [ self.config ]
|
225
|
+
self.config.reload
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
#########
|
230
|
+
protected
|
231
|
+
#########
|
232
|
+
|
233
|
+
### Handle signals.
|
234
|
+
def handle_signal( sig )
|
235
|
+
self.log.debug "Handling signal %s" % [ sig ]
|
236
|
+
case sig
|
237
|
+
when :INT, :TERM
|
238
|
+
if @running
|
239
|
+
self.log.warn "%s signal: immediate shutdown" % [ sig ]
|
240
|
+
@running = false
|
241
|
+
else
|
242
|
+
self.ignore_signals
|
243
|
+
self.log.warn "%s signal: forceful shutdown" % [ sig ]
|
244
|
+
self.kill_children( :KILL )
|
245
|
+
exit!( 255 )
|
246
|
+
end
|
247
|
+
|
248
|
+
when :HUP
|
249
|
+
self.log.warn "Hangup signal."
|
250
|
+
self.reload_config
|
251
|
+
|
252
|
+
when :CHLD
|
253
|
+
# Just need to wake up, nothing else necessary
|
254
|
+
|
255
|
+
else
|
256
|
+
self.log.warn "Unhandled signal %s" % [ sig ]
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
260
|
+
|
261
|
+
|
262
|
+
### Fill out the work crew with new children if necessary
|
263
|
+
def start_missing_children
|
264
|
+
missing_count = self.crew_size - self.crew_workers.length
|
265
|
+
return unless missing_count > 0
|
266
|
+
|
267
|
+
# Return unless the throttle period has lapsed
|
268
|
+
unless self.throttle_seconds < (Time.now - @last_child_started)
|
269
|
+
self.log.info "Not starting children: throttled for %0.2f seconds" %
|
270
|
+
[ self.throttle_seconds ]
|
271
|
+
return
|
272
|
+
end
|
273
|
+
|
274
|
+
self.log.debug "Starting %d workers for a crew of %d" % [ missing_count, self.crew_size ]
|
275
|
+
missing_count.times do |i|
|
276
|
+
pid = self.start_worker
|
277
|
+
self.log.debug " started worker %d" % [ pid ]
|
278
|
+
self.crew_workers << pid
|
279
|
+
end
|
280
|
+
|
281
|
+
@last_child_started = Time.now
|
282
|
+
end
|
283
|
+
|
284
|
+
|
285
|
+
### Return the number of seconds between child startup times.
|
286
|
+
def throttle_seconds
|
287
|
+
return 0 unless @throttle.nonzero?
|
288
|
+
return Math.log( @throttle ) * THROTTLE_FACTOR
|
289
|
+
end
|
290
|
+
|
291
|
+
|
292
|
+
### Add +adjustment+ to the throttle value, ensuring that it doesn't go
|
293
|
+
### below zero.
|
294
|
+
def adjust_throttle( adjustment=1 )
|
295
|
+
self.log.debug "Adjusting worker throttle by %d" % [ adjustment ]
|
296
|
+
@throttle += adjustment
|
297
|
+
@throttle = 0 if @throttle < 0
|
298
|
+
@throttle = THROTTLE_MAX if @throttle > THROTTLE_MAX
|
299
|
+
end
|
300
|
+
|
301
|
+
|
302
|
+
### Kill all current children with the specified +signal+. Returns +true+ if the signal was
|
303
|
+
### sent to one or more children.
|
304
|
+
def kill_children( signal=:TERM )
|
305
|
+
return false if self.crew_workers.empty?
|
306
|
+
|
307
|
+
self.log.info "Sending %s signal to %d workers: %p." %
|
308
|
+
[ signal, self.crew_workers.length, self.crew_workers ]
|
309
|
+
Process.kill( signal, *self.crew_workers )
|
310
|
+
|
311
|
+
return true
|
312
|
+
rescue Errno::ESRCH
|
313
|
+
self.log.debug "Ignoring signals to unreaped children."
|
314
|
+
end
|
315
|
+
|
316
|
+
|
317
|
+
### Start a new Symphony::Worker and return its PID.
|
318
|
+
def start_worker
|
319
|
+
return if self.shutting_down?
|
320
|
+
self.log.debug "Starting a worker."
|
321
|
+
return Symphony::Worker.start( self.queue )
|
322
|
+
end
|
323
|
+
|
324
|
+
|
325
|
+
### Clean up after any children that have died.
|
326
|
+
def reap_children( *pids )
|
327
|
+
self.log.debug "Reaping children."
|
328
|
+
|
329
|
+
if pids.empty?
|
330
|
+
self.reap_any_child
|
331
|
+
else
|
332
|
+
self.log.debug " waiting on pids: %p" % [ pids ]
|
333
|
+
pids.each do |pid|
|
334
|
+
self.reap_specific_child( pid )
|
335
|
+
end
|
336
|
+
end
|
337
|
+
rescue Errno::ECHILD => err
|
338
|
+
self.log.debug "No more children to reap."
|
339
|
+
end
|
340
|
+
|
341
|
+
|
342
|
+
### Reap any children that have died within the caller's process group
|
343
|
+
### and remove them from the work crew.
|
344
|
+
def reap_any_child
|
345
|
+
self.log.debug " no pids; waiting on any child in this process group"
|
346
|
+
|
347
|
+
pid, status = Process.waitpid2( -1, Process::WNOHANG )
|
348
|
+
while pid
|
349
|
+
self.adjust_throttle( status.success? ? -1 : 1 )
|
350
|
+
self.log.debug "Child %d exited: %p." % [ pid, status ]
|
351
|
+
self.crew_workers.delete( pid )
|
352
|
+
|
353
|
+
pid, status = Process.waitpid2( -1, Process::WNOHANG )
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
|
358
|
+
### Wait on the child associated with the given +pid+, deleting it from the
|
359
|
+
### crew workers if successful.
|
360
|
+
def reap_specific_child( pid )
|
361
|
+
spid, status = Process.waitpid2( pid )
|
362
|
+
if spid
|
363
|
+
self.log.debug "Child %d exited: %p." % [ spid, status ]
|
364
|
+
self.crew_workers.delete( spid )
|
365
|
+
self.adjust_throttle( status.success? ? -1 : 1 )
|
366
|
+
else
|
367
|
+
self.log.debug "Child %d no reapy." % [ pid ]
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
|
372
|
+
end # class Symphony::Daemon
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#encoding: utf-8
|
3
|
+
|
4
|
+
require 'rusage'
|
5
|
+
require 'metriks'
|
6
|
+
require 'metriks/reporter/logger'
|
7
|
+
require 'metriks/reporter/proc_title'
|
8
|
+
|
9
|
+
require 'symphony' unless defined?( Symphony )
|
10
|
+
|
11
|
+
|
12
|
+
# Metrics for Symphony Tasks.
|
13
|
+
module Symphony::Metrics
|
14
|
+
|
15
|
+
#
|
16
|
+
# Instance methods
|
17
|
+
#
|
18
|
+
|
19
|
+
### Set up metrics and reporters on creation.
|
20
|
+
def initialize( * )
|
21
|
+
super
|
22
|
+
|
23
|
+
@metriks_registry = Metriks::Registry.new
|
24
|
+
@job_timer = @metriks_registry.timer( 'job.duration' )
|
25
|
+
@job_counter = @metriks_registry.meter( 'job.count' )
|
26
|
+
|
27
|
+
@rusage_gauge = @metriks_registry.gauge('job.rusage') { Process.rusage.to_h }
|
28
|
+
|
29
|
+
@log_reporter = Metriks::Reporter::Logger.new(
|
30
|
+
logger: Loggability[ Symphony ],
|
31
|
+
registry: @metriks_registry )
|
32
|
+
@proc_reporter = Metriks::Reporter::ProcTitle.new(
|
33
|
+
prefix: self.class.name,
|
34
|
+
registry: @metriks_registry,
|
35
|
+
on_error: lambda {|ex| self.log.error(ex) } )
|
36
|
+
|
37
|
+
@proc_reporter.add( 'jobs' ) do
|
38
|
+
@job_counter.count
|
39
|
+
end
|
40
|
+
@proc_reporter.add( 'jobs', '/sec' ) do
|
41
|
+
@job_counter.one_minute_rate
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
##
|
47
|
+
# The Metriks::Registry that tracks all metrics for this job
|
48
|
+
attr_reader :metriks_registry
|
49
|
+
|
50
|
+
##
|
51
|
+
# The job timer metric
|
52
|
+
attr_reader :job_timer
|
53
|
+
|
54
|
+
##
|
55
|
+
# The job counter metric
|
56
|
+
attr_reader :job_counter
|
57
|
+
|
58
|
+
|
59
|
+
### Set up metrics on startup.
|
60
|
+
def start
|
61
|
+
@log_reporter.start
|
62
|
+
@proc_reporter.start
|
63
|
+
|
64
|
+
super
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
### Reset metrics on restart.
|
69
|
+
def restart
|
70
|
+
self.metriks_registry.clear
|
71
|
+
super
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
### Add metrics to the task's work block.
|
76
|
+
def work( payload, metadata )
|
77
|
+
self.job_counter.mark
|
78
|
+
self.job_timer.time do
|
79
|
+
super
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
end # module Symphony::Metrics
|
84
|
+
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#encoding: utf-8
|
3
|
+
|
4
|
+
|
5
|
+
module Symphony
|
6
|
+
|
7
|
+
# A collection of methods for declaring other methods.
|
8
|
+
#
|
9
|
+
# class MyClass
|
10
|
+
# extend Symphony::MethodUtilities
|
11
|
+
#
|
12
|
+
# singleton_attr_accessor :types
|
13
|
+
# singleton_method_alias :kinds, :types
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# MyClass.types = [ :pheno, :proto, :stereo ]
|
17
|
+
# MyClass.kinds # => [:pheno, :proto, :stereo]
|
18
|
+
#
|
19
|
+
module MethodUtilities
|
20
|
+
|
21
|
+
### Creates instance variables and corresponding methods that return their
|
22
|
+
### values for each of the specified +symbols+ in the singleton of the
|
23
|
+
### declaring object (e.g., class instance variables and methods if declared
|
24
|
+
### in a Class).
|
25
|
+
def singleton_attr_reader( *symbols )
|
26
|
+
symbols.each do |sym|
|
27
|
+
singleton_class.__send__( :attr_reader, sym )
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
### Creates methods that allow assignment to the attributes of the singleton
|
32
|
+
### of the declaring object that correspond to the specified +symbols+.
|
33
|
+
def singleton_attr_writer( *symbols )
|
34
|
+
symbols.each do |sym|
|
35
|
+
singleton_class.__send__( :attr_writer, sym )
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
### Creates readers and writers that allow assignment to the attributes of
|
40
|
+
### the singleton of the declaring object that correspond to the specified
|
41
|
+
### +symbols+.
|
42
|
+
def singleton_attr_accessor( *symbols )
|
43
|
+
symbols.each do |sym|
|
44
|
+
singleton_class.__send__( :attr_accessor, sym )
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
### Creates an alias for the +original+ method named +newname+.
|
49
|
+
def singleton_method_alias( newname, original )
|
50
|
+
singleton_class.__send__( :alias_method, newname, original )
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
### Create a reader in the form of a predicate for the given +attrname+.
|
55
|
+
def attr_predicate( attrname )
|
56
|
+
attrname = attrname.to_s.chomp( '?' )
|
57
|
+
define_method( "#{attrname}?" ) do
|
58
|
+
instance_variable_get( "@#{attrname}" ) ? true : false
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
### Create a reader in the form of a predicate for the given +attrname+
|
64
|
+
### as well as a regular writer method.
|
65
|
+
def attr_predicate_accessor( attrname )
|
66
|
+
attrname = attrname.to_s.chomp( '?' )
|
67
|
+
attr_writer( attrname )
|
68
|
+
attr_predicate( attrname )
|
69
|
+
end
|
70
|
+
|
71
|
+
end # module MethodUtilities
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
end # module Symphony
|