bfire 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +0 -0
- data/README.md +90 -0
- data/bin/bfire +120 -0
- data/examples/benchmark.rb +18 -0
- data/examples/dag.rb +26 -0
- data/examples/elasticity.rb +105 -0
- data/examples/ibbt.rb +125 -0
- data/examples/mine.rb +40 -0
- data/examples/modules/apache2/manifests/init.pp +44 -0
- data/examples/modules/app/files/app/app.rb +29 -0
- data/examples/modules/app/files/app/config.ru +2 -0
- data/examples/modules/app/files/app.phtml +4 -0
- data/examples/modules/app/manifests/init.pp +19 -0
- data/examples/modules/common/manifests/init.pp +8 -0
- data/examples/modules/haproxy/files/default +4 -0
- data/examples/modules/haproxy/files/haproxy.rsyslog.conf +2 -0
- data/examples/modules/haproxy/manifests/init.pp +21 -0
- data/examples/modules/mysql/manifests/init.pp +40 -0
- data/examples/modules/rsyslog/files/rsyslog.conf +116 -0
- data/examples/modules/rsyslog/manifests/init.pp +15 -0
- data/examples/modules/sinatra/manifests/init.pp +9 -0
- data/examples/modules/web/files/monitor/app.rb +55 -0
- data/examples/modules/web/files/monitor/config.ru +2 -0
- data/examples/modules/web/files/monitor/haproxy.cfg.erb +50 -0
- data/examples/modules/web/manifests/init.pp +26 -0
- data/examples/simple.rb +58 -0
- data/lib/bfire/aggregator/zabbix.rb +55 -0
- data/lib/bfire/engine.rb +546 -0
- data/lib/bfire/group.rb +241 -0
- data/lib/bfire/metric.rb +36 -0
- data/lib/bfire/provider/puppet.rb +58 -0
- data/lib/bfire/pub_sub/publisher.rb +40 -0
- data/lib/bfire/rule.rb +110 -0
- data/lib/bfire/template.rb +142 -0
- data/lib/bfire/version.rb +3 -0
- data/lib/bfire.rb +10 -0
- metadata +241 -0
data/lib/bfire/engine.rb
ADDED
@@ -0,0 +1,546 @@
|
|
1
|
+
require 'restfully'
|
2
|
+
require 'restfully/media_type/application_vnd_bonfire_xml'
|
3
|
+
require 'thread'
|
4
|
+
require 'thwait'
|
5
|
+
|
6
|
+
require 'net/ssh'
|
7
|
+
require 'net/scp'
|
8
|
+
require 'net/sftp'
|
9
|
+
require 'net/ssh/gateway'
|
10
|
+
require 'net/ssh/multi'
|
11
|
+
|
12
|
+
# Ruby Graph Library
|
13
|
+
require 'rgl/adjacency'
|
14
|
+
require 'rgl/topsort'
|
15
|
+
|
16
|
+
require 'bfire/group'
|
17
|
+
require 'bfire/aggregator/zabbix'
|
18
|
+
require 'bfire/metric'
|
19
|
+
|
20
|
+
module Bfire
|
21
|
+
class Engine
|
22
|
+
include PubSub::Publisher
|
23
|
+
|
24
|
+
DEBUG = Logger::DEBUG
|
25
|
+
INFO = Logger::INFO
|
26
|
+
WARN = Logger::WARN
|
27
|
+
ERROR = Logger::ERROR
|
28
|
+
UNKNOWN = Logger::UNKNOWN
|
29
|
+
|
30
|
+
# Engine configuration hash:
|
31
|
+
attr_reader :properties
|
32
|
+
# A Restfully::Session object:
|
33
|
+
attr_reader :session
|
34
|
+
|
35
|
+
def initialize(opts = {})
|
36
|
+
@root = opts[:root] || Dir.pwd
|
37
|
+
@properties = {}
|
38
|
+
@vmgroups = {}
|
39
|
+
@networks = {}
|
40
|
+
@storages = {}
|
41
|
+
@locations = {}
|
42
|
+
@mutex = Mutex.new
|
43
|
+
@experiment = nil
|
44
|
+
|
45
|
+
# The group of all master threads.
|
46
|
+
@tg_master = ThreadGroup.new
|
47
|
+
# The group of all threads related to a Group.
|
48
|
+
@tg_groups = ThreadGroup.new
|
49
|
+
|
50
|
+
reset
|
51
|
+
end
|
52
|
+
|
53
|
+
def path_to(path)
|
54
|
+
File.expand_path(path, @root)
|
55
|
+
end
|
56
|
+
|
57
|
+
def reset
|
58
|
+
conf[:name] ||= "Bfire experiment"
|
59
|
+
conf[:description] ||= "Anonymous description"
|
60
|
+
conf[:walltime] ||= 3600
|
61
|
+
conf[:logger] ||= Logger.new(STDOUT)
|
62
|
+
conf[:logging] ||= INFO
|
63
|
+
conf[:user] ||= ENV['USER']
|
64
|
+
conf[:ssh_max_attempts] ||= 3
|
65
|
+
public_key, private_key = keychain
|
66
|
+
conf[:key] ||= private_key
|
67
|
+
conf[:authorized_keys] ||= public_key
|
68
|
+
end
|
69
|
+
|
70
|
+
def keychain
|
71
|
+
private_key = nil
|
72
|
+
public_key = Dir[File.expand_path("~/.ssh/*.pub")].find{|key|
|
73
|
+
private_key = key.gsub(/\.pub$/,"")
|
74
|
+
File.exist?(private_key)
|
75
|
+
}
|
76
|
+
if public_key.nil?
|
77
|
+
nil
|
78
|
+
else
|
79
|
+
[public_key, private_key]
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Returns the directed acyclic graph for the given group names, based on
|
84
|
+
# their declared dependencies.
|
85
|
+
def dag(nodes)
|
86
|
+
dg = RGL::DirectedAdjacencyGraph.new
|
87
|
+
nodes.each{|n|
|
88
|
+
dg.add_vertex(n)
|
89
|
+
group(n).dependencies.each{|m, block|
|
90
|
+
dg.add_vertex(m)
|
91
|
+
dg.add_edge(m, n)
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
raise Error, "Your dependency graph is not acyclic!" unless dg.acyclic?
|
96
|
+
dg
|
97
|
+
end
|
98
|
+
|
99
|
+
# Launch procedure. Will execute each group in a separate thread,
|
100
|
+
# and launch a thread to monitor experiment status.
|
101
|
+
def run!
|
102
|
+
# call #session to initiate Restfully::Session object outside of threads
|
103
|
+
logger.info "#{banner}Using bonfire-api/#{session.root['version']}"
|
104
|
+
|
105
|
+
on(:error) { cleanup! }
|
106
|
+
on(:terminated) { cleanup! }
|
107
|
+
|
108
|
+
@tg_master.add(Thread.new {
|
109
|
+
Thread.current.abort_on_exception = true
|
110
|
+
monitor
|
111
|
+
})
|
112
|
+
|
113
|
+
initialized = if dev? && experiment(conf[:name])
|
114
|
+
resuscitate!
|
115
|
+
else
|
116
|
+
deploy!
|
117
|
+
end
|
118
|
+
|
119
|
+
experiment.update(:status => "running")
|
120
|
+
|
121
|
+
if initialized
|
122
|
+
launch!
|
123
|
+
else
|
124
|
+
cleanup!
|
125
|
+
end
|
126
|
+
|
127
|
+
ThreadsWait.all_waits(*@tg_master.list)
|
128
|
+
rescue Exception => e
|
129
|
+
logger.error "#{banner}#{e.class.name}: #{e.message}"
|
130
|
+
logger.debug e.backtrace.join("; ")
|
131
|
+
trigger :error
|
132
|
+
end
|
133
|
+
|
134
|
+
def deploy!
|
135
|
+
dg = dag(@vmgroups.keys)
|
136
|
+
topsort_iterator = dg.topsort_iterator
|
137
|
+
logger.info "#{banner}Launching groups in the following topological order: #{topsort_iterator.clone.to_a.inspect}."
|
138
|
+
|
139
|
+
launch_waiting_groups(topsort_iterator)
|
140
|
+
end
|
141
|
+
|
142
|
+
# This launches the group in the topological order,
|
143
|
+
# and waits for the end of that initialization procedure.
|
144
|
+
def launch_waiting_groups(topsort_iterator)
|
145
|
+
return true if topsort_iterator.at_end?
|
146
|
+
return false if error?
|
147
|
+
|
148
|
+
# ugly, but I don't know why the lib don't give access to it...
|
149
|
+
waiting = topsort_iterator.instance_variable_get("@waiting")
|
150
|
+
logger.info "#{banner}Launching #{waiting.inspect}"
|
151
|
+
# Make sure you don't touch the topsort_iterator in the each block,
|
152
|
+
# otherwise you can get side-effects.
|
153
|
+
waiting.each do |group_name|
|
154
|
+
g = group(group_name)
|
155
|
+
# in case that group was error'ed by the engine...
|
156
|
+
next if g.error?
|
157
|
+
Thread.new {
|
158
|
+
Thread.current.abort_on_exception = true
|
159
|
+
g.launch_initial_resources
|
160
|
+
}.join
|
161
|
+
end
|
162
|
+
waiting.length.times { topsort_iterator.forward }
|
163
|
+
launch_waiting_groups(topsort_iterator)
|
164
|
+
end
|
165
|
+
|
166
|
+
|
167
|
+
# Launch a monitor for each group, and waits for their termination before
|
168
|
+
# saying "ready".
|
169
|
+
def launch!
|
170
|
+
@vmgroups.each{|name, group|
|
171
|
+
@tg_groups.add(Thread.new {
|
172
|
+
Thread.current.abort_on_exception = true
|
173
|
+
group.monitor
|
174
|
+
})
|
175
|
+
}
|
176
|
+
|
177
|
+
until @vmgroups.all?{|(n,g)| g.triggered_events.include?(:ready)}
|
178
|
+
sleep 5
|
179
|
+
end
|
180
|
+
|
181
|
+
logger.info "#{banner}All groups are now READY: #{groups.inspect}."
|
182
|
+
|
183
|
+
trigger :ready
|
184
|
+
ThreadsWait.all_waits(*@tg_groups.list) do |t|
|
185
|
+
# http://apidock.com/ruby/Thread/status
|
186
|
+
if t.status.nil? || t.status == "aborting" || t[:ko]
|
187
|
+
trigger :error
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# Reloads vmgroups, networks and storages linked to an experiment.
|
193
|
+
def resuscitate!
|
194
|
+
experiment.networks.each do |network|
|
195
|
+
@networks[network['name']] = network
|
196
|
+
end
|
197
|
+
experiment.storages.each do |storage|
|
198
|
+
@storages[storage['name']] = storage
|
199
|
+
end
|
200
|
+
experiment.computes.each do |compute|
|
201
|
+
group_name, template_name, guid = compute['name'].split("--")
|
202
|
+
g = group(group_name)
|
203
|
+
if g.nil?
|
204
|
+
raise Error, "Group #{group_name} is not declared in the DSL."
|
205
|
+
else
|
206
|
+
g.template(template_name).instances.push(compute)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
@vmgroups.each do |name, vmgroup|
|
210
|
+
vmgroup.merge_templates!
|
211
|
+
vmgroup.check!
|
212
|
+
end
|
213
|
+
true
|
214
|
+
end
|
215
|
+
|
216
|
+
# Define a new group (if block given), or return the group corresponding
|
217
|
+
# to the given <tt>name</tt>.
|
218
|
+
def group(name, options = {}, &block)
|
219
|
+
if block
|
220
|
+
@vmgroups[name.to_sym] ||= Group.new(
|
221
|
+
self,
|
222
|
+
name.to_sym,
|
223
|
+
options.symbolize_keys
|
224
|
+
)
|
225
|
+
@vmgroups[name.to_sym].instance_eval(&block)
|
226
|
+
else
|
227
|
+
@vmgroups[name.to_sym]
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
# =================================================
|
232
|
+
# = Resource declaration/finding/creation methods =
|
233
|
+
# =================================================
|
234
|
+
|
235
|
+
# Returns the Restfully::Session object
|
236
|
+
def session
|
237
|
+
@session ||= Restfully::Session.new(
|
238
|
+
:configuration_file => conf[:restfully_config],
|
239
|
+
:logger => logger
|
240
|
+
)
|
241
|
+
end
|
242
|
+
|
243
|
+
# Define a network. A network is location dependent.
|
244
|
+
def network(name, options = {}, &block)
|
245
|
+
@networks[name.to_s] = block
|
246
|
+
end
|
247
|
+
|
248
|
+
def fetch_network(name, location)
|
249
|
+
sname = name.to_s
|
250
|
+
key = [location['name'], sname].join(".")
|
251
|
+
logger.debug "#{banner}Looking for network #{name.inspect} at #{location['name'].inspect}. key=#{key.inspect}"
|
252
|
+
exp = experiment
|
253
|
+
synchronize {
|
254
|
+
# Duplicate general networks if present
|
255
|
+
@networks[key] = @networks[sname].clone if @networks[sname]
|
256
|
+
|
257
|
+
@networks[key] = case @networks[key]
|
258
|
+
when Restfully::Resource
|
259
|
+
@networks[key]
|
260
|
+
when Proc
|
261
|
+
@networks[key].call(name, location, exp)
|
262
|
+
else
|
263
|
+
location.networks.find{|n|
|
264
|
+
if name.kind_of?(Regexp)
|
265
|
+
n['name'] =~ name
|
266
|
+
else
|
267
|
+
n['name'] == sname
|
268
|
+
end
|
269
|
+
}
|
270
|
+
end
|
271
|
+
}
|
272
|
+
@networks[key]
|
273
|
+
end
|
274
|
+
|
275
|
+
# Define a storage. A storage is location dependent.
|
276
|
+
def storage(name, options = {}, &block)
|
277
|
+
@storages[name.to_s] = block
|
278
|
+
end
|
279
|
+
|
280
|
+
def fetch_storage(name, location)
|
281
|
+
sname = name.to_s
|
282
|
+
key = [location['name'], sname].join(".")
|
283
|
+
logger.debug "#{banner}Looking for storage #{name.inspect} at #{location['name'].inspect}. key=#{key.inspect}"
|
284
|
+
exp = experiment
|
285
|
+
synchronize {
|
286
|
+
# Duplicate general storages if present
|
287
|
+
@storages[key] = @storages[sname].clone if @storages[sname]
|
288
|
+
|
289
|
+
@storages[key] = case @storages[key]
|
290
|
+
when Restfully::Resource
|
291
|
+
@storages[key]
|
292
|
+
when Proc
|
293
|
+
@storages[key].call(name, location, exp)
|
294
|
+
else
|
295
|
+
location.storages.find{|n|
|
296
|
+
if name.kind_of?(Regexp)
|
297
|
+
n['name'] =~ name
|
298
|
+
else
|
299
|
+
n['name'] == sname
|
300
|
+
end
|
301
|
+
}
|
302
|
+
end
|
303
|
+
}
|
304
|
+
@storages[key]
|
305
|
+
end
|
306
|
+
|
307
|
+
def fetch_location(name)
|
308
|
+
name = name.to_sym
|
309
|
+
location = if (name == :any)
|
310
|
+
choices = session.root.locations
|
311
|
+
return nil if choices.length == 0
|
312
|
+
choices[rand(choices.length)]
|
313
|
+
else
|
314
|
+
@locations[name] || session.root.locations[name]
|
315
|
+
end
|
316
|
+
raise Error, "#{banner}Can't find #{name.inspect} location" if location.nil?
|
317
|
+
synchronize {
|
318
|
+
@locations[location['name'].to_sym] ||= location
|
319
|
+
}
|
320
|
+
location
|
321
|
+
end
|
322
|
+
|
323
|
+
# Laucnh a number of compute resources based on the given
|
324
|
+
# <tt>template</tt>.
|
325
|
+
def launch_compute(template, count = 1)
|
326
|
+
h = template.to_h
|
327
|
+
count.times.map do |i|
|
328
|
+
logger.debug "#{banner}#{i+1}/#{count} - Launching compute with the following data: #{h.inspect}"
|
329
|
+
experiment.computes.submit(h)
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
# If given a name, attempts to find an existing running experiment with
|
334
|
+
# the same name.
|
335
|
+
# If name is nil or omitted, creates a new experiment.
|
336
|
+
#
|
337
|
+
# Returns a Restfully::Resource object, or nil.
|
338
|
+
def experiment(name = nil)
|
339
|
+
connection = session
|
340
|
+
synchronize {
|
341
|
+
@experiment ||= if name.nil?
|
342
|
+
connection.root.experiments.submit(
|
343
|
+
:name => conf[:name],
|
344
|
+
:description => conf[:description],
|
345
|
+
:walltime => conf[:walltime],
|
346
|
+
:status => "waiting"
|
347
|
+
)
|
348
|
+
else
|
349
|
+
connection.root.experiments.find{|exp|
|
350
|
+
exp['status'] == 'running' && exp['name'] == name
|
351
|
+
}
|
352
|
+
end
|
353
|
+
}
|
354
|
+
end
|
355
|
+
|
356
|
+
def metric(name, options = {})
|
357
|
+
hosts = [options.delete(:hosts) || []].flatten.map{|h|
|
358
|
+
[h['name'], h['id']].join("-")
|
359
|
+
}
|
360
|
+
@zabbix ||= Aggregator::Zabbix.new(session, experiment)
|
361
|
+
|
362
|
+
items = @zabbix.request("item.get", {
|
363
|
+
:filter => {
|
364
|
+
"host" => hosts[0],
|
365
|
+
"key_" => name.to_s
|
366
|
+
},
|
367
|
+
"output" => "extend"
|
368
|
+
}).map{|i| i['itemid']}
|
369
|
+
|
370
|
+
# Most recent last
|
371
|
+
now = Time.now.to_i
|
372
|
+
results = @zabbix.request("history.get", {
|
373
|
+
"itemids" => items[0..1],
|
374
|
+
# FIX once we can correctly specify metric type
|
375
|
+
"history" => 1, # STRING
|
376
|
+
"output" => "extend",
|
377
|
+
"time_from" => now-3600,
|
378
|
+
"time_till" => now
|
379
|
+
})
|
380
|
+
|
381
|
+
Metric.new(name, results, options)
|
382
|
+
end
|
383
|
+
|
384
|
+
# =========================
|
385
|
+
# = Configuration methods =
|
386
|
+
# =========================
|
387
|
+
|
388
|
+
# Sets the given <tt>property</tt> to the given <tt>value</tt>.
|
389
|
+
def set(property, value)
|
390
|
+
@properties[property.to_sym] = value
|
391
|
+
end
|
392
|
+
|
393
|
+
# Returns the configuration Hash.
|
394
|
+
def conf
|
395
|
+
@properties
|
396
|
+
end
|
397
|
+
|
398
|
+
# =====================
|
399
|
+
# = Cleanup procedure =
|
400
|
+
# =====================
|
401
|
+
|
402
|
+
def cleanup!
|
403
|
+
unless @tg_groups.list.empty?
|
404
|
+
synchronize{
|
405
|
+
@tg_groups.list.each(&:kill)
|
406
|
+
}
|
407
|
+
end
|
408
|
+
if cleanup? && !@experiment.nil?
|
409
|
+
logger.warn "#{banner}Cleaning up in 5 seconds. Hit CTRL-C now to keep your experiment running."
|
410
|
+
sleep 5
|
411
|
+
@experiment.delete
|
412
|
+
else
|
413
|
+
logger.warn "#{banner}Not cleaning up experiment."
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
def cleanup?
|
418
|
+
return false if dev? || conf[:no_cancel]
|
419
|
+
return false if conf[:no_cleanup] && !error?
|
420
|
+
true
|
421
|
+
end
|
422
|
+
|
423
|
+
# ===================
|
424
|
+
# = Helpers methods =
|
425
|
+
# ===================
|
426
|
+
|
427
|
+
def engine
|
428
|
+
self
|
429
|
+
end
|
430
|
+
|
431
|
+
def groups
|
432
|
+
@vmgroups
|
433
|
+
end
|
434
|
+
|
435
|
+
def banner
|
436
|
+
"[BFIRE] "
|
437
|
+
end
|
438
|
+
|
439
|
+
def dev?
|
440
|
+
!!conf[:dev]
|
441
|
+
end
|
442
|
+
|
443
|
+
# Returns the logger for the engine.
|
444
|
+
def logger
|
445
|
+
@logger ||= begin
|
446
|
+
l = conf[:logger]
|
447
|
+
l.level = conf[:logging]
|
448
|
+
l
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
# Synchronization primitive
|
453
|
+
def synchronize(&block)
|
454
|
+
@mutex.synchronize { block.call }
|
455
|
+
end
|
456
|
+
|
457
|
+
# ===============
|
458
|
+
# = SSH methods =
|
459
|
+
# ===============
|
460
|
+
|
461
|
+
# Setup an SSH connection as <tt>username</tt> to <tt>fqdn</tt>.
|
462
|
+
# @param [String] fqdn the fully qualified domain name of the host to connect to.
|
463
|
+
# @param [String] username the login to use to connect to the host.
|
464
|
+
# @param [Hash] options a hash of additional options to pass.
|
465
|
+
# @yield [Net::SSH::Connection::Session] ssh a SSH handler.
|
466
|
+
#
|
467
|
+
# By default, the SSH connection will be retried at most <tt>ssh_max_attempts</tt> times if the host is unreachable. You can overwrite that default locally by passing a different <tt>ssh_max_attempts</tt> option.
|
468
|
+
# Same for <tt>:timeout</tt> and <tt>:keys</tt> options.
|
469
|
+
#
|
470
|
+
# If option <tt>:multi</tt> is given and true, then an instance of Net::SSH::Multi::Session is yielded. See <http://net-ssh.github.com/multi/v1/api/index.html> for more information.
|
471
|
+
def ssh(fqdn, username, options = {}, &block)
|
472
|
+
raise ArgumentError, "You MUST provide a block when calling #ssh" if block.nil?
|
473
|
+
log = !!options.delete(:log)
|
474
|
+
options[:timeout] ||= 10
|
475
|
+
if options.has_key?(:password)
|
476
|
+
options[:auth_methods] ||= ['keyboard-interactive']
|
477
|
+
else
|
478
|
+
options[:keys] ||= [conf[:key]].compact
|
479
|
+
end
|
480
|
+
max_attempts = options[:max_attempts] || conf[:ssh_max_attempts]
|
481
|
+
logger.info "#{banner}SSHing to #{username}@#{fqdn.inspect}..." if log
|
482
|
+
attempts = 0
|
483
|
+
begin
|
484
|
+
attempts += 1
|
485
|
+
if options[:multi]
|
486
|
+
Net::SSH::Multi.start(
|
487
|
+
:concurrent_connections => (
|
488
|
+
options[:concurrent_connections] || 10
|
489
|
+
)
|
490
|
+
) do |s|
|
491
|
+
s.via conf[:gateway], conf[:user] unless conf[:gateway].nil?
|
492
|
+
fqdn.each {|h| s.use "#{username}@#{h}"}
|
493
|
+
block.call(s)
|
494
|
+
end
|
495
|
+
else
|
496
|
+
if conf[:gateway]
|
497
|
+
gw_handler = Net::SSH::Gateway.new(conf[:gateway], conf[:user], :forward_agent => true)
|
498
|
+
gw_handler.ssh(fqdn, username, options, &block)
|
499
|
+
gw_handler.shutdown!
|
500
|
+
else
|
501
|
+
Net::SSH.start(fqdn, username, options, &block)
|
502
|
+
end
|
503
|
+
end
|
504
|
+
rescue Errno::EHOSTUNREACH => e
|
505
|
+
if attempts <= max_attempts
|
506
|
+
logger.info "#{banner}No route to host #{fqdn}. Retrying in 5 secs..." if log
|
507
|
+
sleep 5
|
508
|
+
retry
|
509
|
+
else
|
510
|
+
logger.info "#{banner}No route to host #{fqdn}. Won't retry." if log
|
511
|
+
raise e
|
512
|
+
end
|
513
|
+
end
|
514
|
+
end
|
515
|
+
|
516
|
+
protected
|
517
|
+
def monitor
|
518
|
+
@experiment_state ||= nil
|
519
|
+
sleep_time = nil
|
520
|
+
logger.info "#{banner}Monitoring experiment..."
|
521
|
+
experiment.reload
|
522
|
+
has_changed = (@experiment_state != experiment['status'])
|
523
|
+
case experiment['status']
|
524
|
+
when 'waiting'
|
525
|
+
logger.info "#{banner}Experiment is waiting. Nothing to do..."
|
526
|
+
sleep_time = 10
|
527
|
+
when 'running'
|
528
|
+
logger.info "#{banner}Experiment is running."
|
529
|
+
trigger :running if has_changed
|
530
|
+
sleep_time = 30
|
531
|
+
when 'terminating', 'canceling'
|
532
|
+
trigger :stopped if has_changed
|
533
|
+
sleep_time = 10
|
534
|
+
when 'terminated', 'canceled'
|
535
|
+
trigger :terminated if has_changed
|
536
|
+
end
|
537
|
+
@experiment_state = experiment['status']
|
538
|
+
|
539
|
+
unless sleep_time.nil?
|
540
|
+
sleep sleep_time
|
541
|
+
monitor
|
542
|
+
end
|
543
|
+
end
|
544
|
+
|
545
|
+
end
|
546
|
+
end
|