vmpooler 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,14 @@
1
+ module Vmpooler
2
+ class Dashboard < Sinatra::Base
3
+
4
+ def config
5
+ Vmpooler.config
6
+ end
7
+
8
+ get '/dashboard/?' do
9
+ erb :dashboard, locals: {
10
+ site_name: ENV['SITE_NAME'] || config[:config]['site_name'] || '<b>vmpooler</b>'
11
+ }
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ module Vmpooler
2
+ class DummyStatsd
3
+ attr_reader :server, :port, :prefix
4
+
5
+ def initialize(*)
6
+ end
7
+
8
+ def increment(*)
9
+ true
10
+ end
11
+
12
+ def gauge(*)
13
+ true
14
+ end
15
+
16
+ def timing(*)
17
+ true
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,53 @@
1
+ require 'connection_pool'
2
+
3
+ module Vmpooler
4
+ class PoolManager
5
+ class GenericConnectionPool < ConnectionPool
6
+ # Extend the ConnectionPool class with instrumentation
7
+ # https://github.com/mperham/connection_pool/blob/master/lib/connection_pool.rb
8
+
9
+ def initialize(options = {}, &block)
10
+ super(options, &block)
11
+ @metrics = options[:metrics]
12
+ @metric_prefix = options[:metric_prefix]
13
+ @metric_prefix = 'connectionpool' if @metric_prefix.nil? || @metric_prefix == ''
14
+ end
15
+
16
+ if Thread.respond_to?(:handle_interrupt)
17
+ # MRI
18
+ def with_metrics(options = {})
19
+ Thread.handle_interrupt(Exception => :never) do
20
+ start = Time.now
21
+ conn = checkout(options)
22
+ timespan_ms = ((Time.now - start) * 1000).to_i
23
+ @metrics.gauge(@metric_prefix + '.available', @available.length) unless @metrics.nil?
24
+ @metrics.timing(@metric_prefix + '.waited', timespan_ms) unless @metrics.nil?
25
+ begin
26
+ Thread.handle_interrupt(Exception => :immediate) do
27
+ yield conn
28
+ end
29
+ ensure
30
+ checkin
31
+ @metrics.gauge(@metric_prefix + '.available', @available.length) unless @metrics.nil?
32
+ end
33
+ end
34
+ end
35
+ else
36
+ # jruby 1.7.x
37
+ def with_metrics(options = {})
38
+ start = Time.now
39
+ conn = checkout(options)
40
+ timespan_ms = ((Time.now - start) * 1000).to_i
41
+ @metrics.gauge(@metric_prefix + '.available', @available.length) unless @metrics.nil?
42
+ @metrics.timing(@metric_prefix + '.waited', timespan_ms) unless @metrics.nil?
43
+ begin
44
+ yield conn
45
+ ensure
46
+ checkin
47
+ @metrics.gauge(@metric_prefix + '.available', @available.length) unless @metrics.nil?
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,42 @@
1
+ require 'rubygems' unless defined?(Gem)
2
+
3
+ module Vmpooler
4
+ class Graphite
5
+ attr_reader :server, :port, :prefix
6
+
7
+ def initialize(params = {})
8
+ if params['server'].nil? || params['server'].empty?
9
+ raise ArgumentError, "Graphite server is required. Config: #{params.inspect}"
10
+ end
11
+
12
+ @server = params['server']
13
+ @port = params['port'] || 2003
14
+ @prefix = params['prefix'] || 'vmpooler'
15
+ end
16
+
17
+ def increment(label)
18
+ log label, 1
19
+ end
20
+
21
+ def gauge(label, value)
22
+ log label, value
23
+ end
24
+
25
+ def timing(label, duration)
26
+ log label, duration
27
+ end
28
+
29
+ def log(path, value)
30
+ Thread.new do
31
+ socket = TCPSocket.new(server, port)
32
+ begin
33
+ socket.puts "#{prefix}.#{path} #{value} #{Time.now.to_i}"
34
+ ensure
35
+ socket.close
36
+ end
37
+ end
38
+ rescue => err
39
+ $stderr.puts "Failure logging #{path} to graphite server [#{server}:#{port}]: #{err}"
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,22 @@
1
+ require 'rubygems' unless defined?(Gem)
2
+
3
+ module Vmpooler
4
+ class Logger
5
+ def initialize(
6
+ f = '/var/log/vmpooler.log'
7
+ )
8
+ @file = f
9
+ end
10
+
11
+ def log(_level, string)
12
+ time = Time.new
13
+ stamp = time.strftime('%Y-%m-%d %H:%M:%S')
14
+
15
+ puts "[#{stamp}] #{string}" if ENV['VMPOOLER_DEBUG']
16
+
17
+ open(@file, 'a') do |f|
18
+ f.puts "[#{stamp}] #{string}"
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,1029 @@
1
+ module Vmpooler
2
+ class PoolManager
3
+ CHECK_LOOP_DELAY_MIN_DEFAULT = 5
4
+ CHECK_LOOP_DELAY_MAX_DEFAULT = 60
5
+ CHECK_LOOP_DELAY_DECAY_DEFAULT = 2.0
6
+
7
+ def initialize(config, logger, redis, metrics)
8
+ $config = config
9
+
10
+ # Load logger library
11
+ $logger = logger
12
+
13
+ # metrics logging handle
14
+ $metrics = metrics
15
+
16
+ # Connect to Redis
17
+ $redis = redis
18
+
19
+ # VM Provider objects
20
+ $providers = {}
21
+
22
+ # Our thread-tracker object
23
+ $threads = {}
24
+
25
+ # Pool mutex
26
+ @reconfigure_pool = {}
27
+
28
+ @vm_mutex = {}
29
+ end
30
+
31
+ def config
32
+ $config
33
+ end
34
+
35
+ # Place pool configuration in redis so an API instance can discover running pool configuration
36
+ def load_pools_to_redis
37
+ previously_configured_pools = $redis.smembers('vmpooler__pools')
38
+ currently_configured_pools = []
39
+ config[:pools].each do |pool|
40
+ currently_configured_pools << pool['name']
41
+ $redis.sadd('vmpooler__pools', pool['name'])
42
+ pool_keys = pool.keys
43
+ pool_keys.delete('alias')
44
+ to_set = {}
45
+ pool_keys.each do |k|
46
+ to_set[k] = pool[k]
47
+ end
48
+ to_set['alias'] = pool['alias'].join(',') if to_set.has_key?('alias')
49
+ $redis.hmset("vmpooler__pool__#{pool['name']}", to_set.to_a.flatten) unless to_set.empty?
50
+ end
51
+ previously_configured_pools.each do |pool|
52
+ unless currently_configured_pools.include? pool
53
+ $redis.srem('vmpooler__pools', pool)
54
+ $redis.del("vmpooler__pool__#{pool}")
55
+ end
56
+ end
57
+ return
58
+ end
59
+
60
+ # Check the state of a VM
61
+ def check_pending_vm(vm, pool, timeout, provider)
62
+ Thread.new do
63
+ begin
64
+ _check_pending_vm(vm, pool, timeout, provider)
65
+ rescue => err
66
+ $logger.log('s', "[!] [#{pool}] '#{vm}' #{timeout} #{provider} errored while checking a pending vm : #{err}")
67
+ fail_pending_vm(vm, pool, timeout)
68
+ raise
69
+ end
70
+ end
71
+ end
72
+
73
+ def _check_pending_vm(vm, pool, timeout, provider)
74
+ mutex = vm_mutex(vm)
75
+ return if mutex.locked?
76
+ mutex.synchronize do
77
+ if provider.vm_ready?(pool, vm)
78
+ move_pending_vm_to_ready(vm, pool)
79
+ else
80
+ fail_pending_vm(vm, pool, timeout)
81
+ end
82
+ end
83
+ end
84
+
85
+ def remove_nonexistent_vm(vm, pool)
86
+ $redis.srem("vmpooler__pending__#{pool}", vm)
87
+ $logger.log('d', "[!] [#{pool}] '#{vm}' no longer exists. Removing from pending.")
88
+ end
89
+
90
+ def fail_pending_vm(vm, pool, timeout, exists = true)
91
+ clone_stamp = $redis.hget("vmpooler__vm__#{vm}", 'clone')
92
+ return true unless clone_stamp
93
+
94
+ time_since_clone = (Time.now - Time.parse(clone_stamp)) / 60
95
+ if time_since_clone > timeout
96
+ if exists
97
+ $redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm)
98
+ $logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes")
99
+ else
100
+ remove_nonexistent_vm(vm, pool)
101
+ end
102
+ end
103
+ true
104
+ rescue => err
105
+ $logger.log('d', "Fail pending VM failed with an error: #{err}")
106
+ false
107
+ end
108
+
109
+ def move_pending_vm_to_ready(vm, pool)
110
+ clone_time = $redis.hget('vmpooler__vm__' + vm, 'clone')
111
+ finish = format('%.2f', Time.now - Time.parse(clone_time)) if clone_time
112
+
113
+ $redis.smove('vmpooler__pending__' + pool, 'vmpooler__ready__' + pool, vm)
114
+ $redis.hset('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm, finish) # maybe remove as this is never used by vmpooler itself?
115
+ $redis.hset("vmpooler__vm__#{vm}", 'ready', Time.now)
116
+
117
+ # last boot time is displayed in API, and used by alarming script
118
+ $redis.hset('vmpooler__lastboot', pool, Time.now)
119
+
120
+ $metrics.timing("time_to_ready_state.#{pool}", finish)
121
+ $logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue")
122
+ end
123
+
124
+ def vm_still_ready?(pool_name, vm_name, provider)
125
+ # Check if the VM is still ready/available
126
+ return true if provider.vm_ready?(pool_name, vm_name)
127
+ raise("VM #{vm_name} is not ready")
128
+ rescue
129
+ move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
130
+ end
131
+
132
+ def check_ready_vm(vm, pool, ttl, provider)
133
+ Thread.new do
134
+ begin
135
+ _check_ready_vm(vm, pool, ttl, provider)
136
+ rescue => err
137
+ $logger.log('s', "[!] [#{pool['name']}] '#{vm}' failed while checking a ready vm : #{err}")
138
+ raise
139
+ end
140
+ end
141
+ end
142
+
143
+ def _check_ready_vm(vm, pool, ttl, provider)
144
+ # Periodically check that the VM is available
145
+ mutex = vm_mutex(vm)
146
+ return if mutex.locked?
147
+ mutex.synchronize do
148
+ check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check')
149
+ return if check_stamp && (((Time.now - Time.parse(check_stamp)) / 60) <= $config[:config]['vm_checktime'])
150
+
151
+ $redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
152
+ # Check if the hosts TTL has expired
153
+ if ttl > 0
154
+ # host['boottime'] may be nil if host is not powered on
155
+ if ((Time.now - host['boottime']) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl
156
+ $redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm)
157
+
158
+ $logger.log('d', "[!] [#{pool['name']}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
159
+ return
160
+ end
161
+ end
162
+
163
+ return if has_mismatched_hostname?(vm, pool, provider)
164
+
165
+ vm_still_ready?(pool['name'], vm, provider)
166
+ end
167
+ end
168
+
169
+ def has_mismatched_hostname?(vm, pool, provider)
170
+ check_hostname = pool['check_hostname_for_mismatch']
171
+ check_hostname = $config[:config]['check_ready_vm_hostname_for_mismatch'] if check_hostname.nil?
172
+ return if check_hostname == false
173
+
174
+ # Wait one minute before checking a VM for hostname mismatch
175
+ # When checking as soon as the VM passes the ready test the instance
176
+ # often doesn't report its hostname yet causing the VM to be removed immediately
177
+ vm_ready_time = $redis.hget("vmpooler__vm__#{vm}", 'ready')
178
+ if vm_ready_time
179
+ wait_before_checking = 60
180
+ time_since_ready = (Time.now - Time.parse(vm_ready_time)).to_i
181
+ return unless time_since_ready > wait_before_checking
182
+ end
183
+
184
+ # Check if the hostname has magically changed from underneath Pooler
185
+ vm_hash = provider.get_vm(pool['name'], vm)
186
+ hostname = vm_hash['hostname']
187
+
188
+ return if hostname.empty?
189
+ return if hostname == vm
190
+ $redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm)
191
+ $logger.log('d', "[!] [#{pool['name']}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue")
192
+ return true
193
+ end
194
+
195
+ def check_running_vm(vm, pool, ttl, provider)
196
+ Thread.new do
197
+ begin
198
+ _check_running_vm(vm, pool, ttl, provider)
199
+ rescue => err
200
+ $logger.log('s', "[!] [#{pool}] '#{vm}' failed while checking VM with an error: #{err}")
201
+ raise
202
+ end
203
+ end
204
+ end
205
+
206
+ def _check_running_vm(vm, pool, ttl, provider)
207
+ mutex = vm_mutex(vm)
208
+ return if mutex.locked?
209
+ mutex.synchronize do
210
+ # Check that VM is within defined lifetime
211
+ checkouttime = $redis.hget('vmpooler__active__' + pool, vm)
212
+ if checkouttime
213
+ running = (Time.now - Time.parse(checkouttime)) / 60 / 60
214
+
215
+ if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
216
+ move_vm_queue(pool, vm, 'running', 'completed', "reached end of TTL after #{ttl} hours")
217
+ return
218
+ end
219
+ end
220
+
221
+ if provider.vm_ready?(pool, vm)
222
+ return
223
+ else
224
+ host = provider.get_vm(pool, vm)
225
+
226
+ if host
227
+ return
228
+ else
229
+ move_vm_queue(pool, vm, 'running', 'completed', "is no longer in inventory, removing from running")
230
+ end
231
+ end
232
+ end
233
+ end
234
+
235
+ def move_vm_queue(pool, vm, queue_from, queue_to, msg = nil)
236
+ $redis.smove("vmpooler__#{queue_from}__#{pool}", "vmpooler__#{queue_to}__#{pool}", vm)
237
+ $logger.log('d', "[!] [#{pool}] '#{vm}' #{msg}") if msg
238
+ end
239
+
240
+ # Clone a VM
241
+ def clone_vm(pool, provider)
242
+ Thread.new do
243
+ begin
244
+ _clone_vm(pool, provider)
245
+ rescue => err
246
+ $logger.log('s', "[!] [#{pool['name']}] failed while cloning VM with an error: #{err}")
247
+ raise
248
+ end
249
+ end
250
+ end
251
+
252
+ def _clone_vm(pool, provider)
253
+ pool_name = pool['name']
254
+
255
+ # Generate a randomized hostname
256
+ o = [('a'..'z'), ('0'..'9')].map(&:to_a).flatten
257
+ new_vmname = $config[:config]['prefix'] + o[rand(25)] + (0...14).map { o[rand(o.length)] }.join
258
+
259
+ # Add VM to Redis inventory ('pending' pool)
260
+ $redis.sadd('vmpooler__pending__' + pool_name, new_vmname)
261
+ $redis.hset('vmpooler__vm__' + new_vmname, 'clone', Time.now)
262
+ $redis.hset('vmpooler__vm__' + new_vmname, 'template', pool_name)
263
+
264
+ begin
265
+ $logger.log('d', "[ ] [#{pool_name}] Starting to clone '#{new_vmname}'")
266
+ start = Time.now
267
+ provider.create_vm(pool_name, new_vmname)
268
+ finish = format('%.2f', Time.now - start)
269
+
270
+ $redis.hset('vmpooler__clone__' + Date.today.to_s, pool_name + ':' + new_vmname, finish)
271
+ $redis.hset('vmpooler__vm__' + new_vmname, 'clone_time', finish)
272
+ $logger.log('s', "[+] [#{pool_name}] '#{new_vmname}' cloned in #{finish} seconds")
273
+
274
+ $metrics.timing("clone.#{pool_name}", finish)
275
+ rescue => _err
276
+ $redis.srem("vmpooler__pending__#{pool_name}", new_vmname)
277
+ expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
278
+ $redis.expire("vmpooler__vm__#{new_vmname}", expiration_ttl)
279
+ raise _err
280
+ ensure
281
+ $redis.decr('vmpooler__tasks__clone')
282
+ end
283
+ end
284
+
285
+ # Destroy a VM
286
+ def destroy_vm(vm, pool, provider)
287
+ Thread.new do
288
+ begin
289
+ _destroy_vm(vm, pool, provider)
290
+ rescue => err
291
+ $logger.log('d', "[!] [#{pool}] '#{vm}' failed while destroying the VM with an error: #{err}")
292
+ raise
293
+ end
294
+ end
295
+ end
296
+
297
+ def _destroy_vm(vm, pool, provider)
298
+ mutex = vm_mutex(vm)
299
+ return if mutex.locked?
300
+ mutex.synchronize do
301
+ $redis.srem('vmpooler__completed__' + pool, vm)
302
+ $redis.hdel('vmpooler__active__' + pool, vm)
303
+ $redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
304
+
305
+ # Auto-expire metadata key
306
+ $redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
307
+
308
+ start = Time.now
309
+
310
+ provider.destroy_vm(pool, vm)
311
+
312
+ finish = format('%.2f', Time.now - start)
313
+ $logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
314
+ $metrics.timing("destroy.#{pool}", finish)
315
+ end
316
+ end
317
+
318
+ def create_vm_disk(pool_name, vm, disk_size, provider)
319
+ Thread.new do
320
+ begin
321
+ _create_vm_disk(pool_name, vm, disk_size, provider)
322
+ rescue => err
323
+ $logger.log('d', "[!] [#{pool_name}] '#{vm}' failed while creating disk: #{err}")
324
+ raise
325
+ end
326
+ end
327
+ end
328
+
329
+ def _create_vm_disk(pool_name, vm_name, disk_size, provider)
330
+ raise("Invalid disk size of '#{disk_size}' passed") if disk_size.nil? || disk_size.empty? || disk_size.to_i <= 0
331
+
332
+ $logger.log('s', "[ ] [disk_manager] '#{vm_name}' is attaching a #{disk_size}gb disk")
333
+
334
+ start = Time.now
335
+
336
+ result = provider.create_disk(pool_name, vm_name, disk_size.to_i)
337
+
338
+ finish = format('%.2f', Time.now - start)
339
+
340
+ if result
341
+ rdisks = $redis.hget('vmpooler__vm__' + vm_name, 'disk')
342
+ disks = rdisks ? rdisks.split(':') : []
343
+ disks.push("+#{disk_size}gb")
344
+ $redis.hset('vmpooler__vm__' + vm_name, 'disk', disks.join(':'))
345
+
346
+ $logger.log('s', "[+] [disk_manager] '#{vm_name}' attached #{disk_size}gb disk in #{finish} seconds")
347
+ else
348
+ $logger.log('s', "[+] [disk_manager] '#{vm_name}' failed to attach disk")
349
+ end
350
+
351
+ result
352
+ end
353
+
354
+ def create_vm_snapshot(pool_name, vm, snapshot_name, provider)
355
+ Thread.new do
356
+ begin
357
+ _create_vm_snapshot(pool_name, vm, snapshot_name, provider)
358
+ rescue => err
359
+ $logger.log('d', "[!] [#{pool_name}] '#{vm}' failed while creating snapshot: #{err}")
360
+ raise
361
+ end
362
+ end
363
+ end
364
+
365
+ def _create_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
366
+ $logger.log('s', "[ ] [snapshot_manager] 'Attempting to snapshot #{vm_name} in pool #{pool_name}")
367
+ start = Time.now
368
+
369
+ result = provider.create_snapshot(pool_name, vm_name, snapshot_name)
370
+
371
+ finish = format('%.2f', Time.now - start)
372
+
373
+ if result
374
+ $redis.hset('vmpooler__vm__' + vm_name, 'snapshot:' + snapshot_name, Time.now.to_s)
375
+ $logger.log('s', "[+] [snapshot_manager] '#{vm_name}' snapshot created in #{finish} seconds")
376
+ else
377
+ $logger.log('s', "[+] [snapshot_manager] Failed to snapshot '#{vm_name}'")
378
+ end
379
+
380
+ result
381
+ end
382
+
383
+ def revert_vm_snapshot(pool_name, vm, snapshot_name, provider)
384
+ Thread.new do
385
+ begin
386
+ _revert_vm_snapshot(pool_name, vm, snapshot_name, provider)
387
+ rescue => err
388
+ $logger.log('d', "[!] [#{pool_name}] '#{vm}' failed while reverting snapshot: #{err}")
389
+ raise
390
+ end
391
+ end
392
+ end
393
+
394
+ def _revert_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
395
+ $logger.log('s', "[ ] [snapshot_manager] 'Attempting to revert #{vm_name}' in pool #{pool_name} to snapshot '#{snapshot_name}'")
396
+ start = Time.now
397
+
398
+ result = provider.revert_snapshot(pool_name, vm_name, snapshot_name)
399
+
400
+ finish = format('%.2f', Time.now - start)
401
+
402
+ if result
403
+ $logger.log('s', "[+] [snapshot_manager] '#{vm_name}' reverted to snapshot '#{snapshot_name}' in #{finish} seconds")
404
+ else
405
+ $logger.log('s', "[+] [snapshot_manager] Failed to revert #{vm_name}' in pool #{pool_name} to snapshot '#{snapshot_name}'")
406
+ end
407
+
408
+ result
409
+ end
410
+
411
+ def get_pool_name_for_vm(vm_name)
412
+ # the 'template' is a bad name. Should really be 'poolname'
413
+ $redis.hget('vmpooler__vm__' + vm_name, 'template')
414
+ end
415
+
416
+ def get_provider_for_pool(pool_name)
417
+ provider_name = nil
418
+ $config[:pools].each do |pool|
419
+ next unless pool['name'] == pool_name
420
+ provider_name = pool['provider']
421
+ end
422
+ return nil if provider_name.nil?
423
+
424
+ $providers[provider_name]
425
+ end
426
+
427
+ def check_disk_queue(maxloop = 0, loop_delay = 5)
428
+ $logger.log('d', '[*] [disk_manager] starting worker thread')
429
+
430
+ $threads['disk_manager'] = Thread.new do
431
+ loop_count = 1
432
+ loop do
433
+ _check_disk_queue
434
+ sleep(loop_delay)
435
+
436
+ unless maxloop.zero?
437
+ break if loop_count >= maxloop
438
+ loop_count += 1
439
+ end
440
+ end
441
+ end
442
+ end
443
+
444
+ def _check_disk_queue
445
+ task_detail = $redis.spop('vmpooler__tasks__disk')
446
+ unless task_detail.nil?
447
+ begin
448
+ vm_name, disk_size = task_detail.split(':')
449
+ pool_name = get_pool_name_for_vm(vm_name)
450
+ raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
451
+
452
+ provider = get_provider_for_pool(pool_name)
453
+ raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
454
+
455
+ create_vm_disk(pool_name, vm_name, disk_size, provider)
456
+ rescue => err
457
+ $logger.log('s', "[!] [disk_manager] disk creation appears to have failed: #{err}")
458
+ end
459
+ end
460
+ end
461
+
462
+ def check_snapshot_queue(maxloop = 0, loop_delay = 5)
463
+ $logger.log('d', '[*] [snapshot_manager] starting worker thread')
464
+
465
+ $threads['snapshot_manager'] = Thread.new do
466
+ loop_count = 1
467
+ loop do
468
+ _check_snapshot_queue
469
+ sleep(loop_delay)
470
+
471
+ unless maxloop.zero?
472
+ break if loop_count >= maxloop
473
+ loop_count += 1
474
+ end
475
+ end
476
+ end
477
+ end
478
+
479
+ def _check_snapshot_queue
480
+ task_detail = $redis.spop('vmpooler__tasks__snapshot')
481
+
482
+ unless task_detail.nil?
483
+ begin
484
+ vm_name, snapshot_name = task_detail.split(':')
485
+ pool_name = get_pool_name_for_vm(vm_name)
486
+ raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
487
+
488
+ provider = get_provider_for_pool(pool_name)
489
+ raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
490
+
491
+ create_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
492
+ rescue => err
493
+ $logger.log('s', "[!] [snapshot_manager] snapshot create appears to have failed: #{err}")
494
+ end
495
+ end
496
+
497
+ task_detail = $redis.spop('vmpooler__tasks__snapshot-revert')
498
+
499
+ unless task_detail.nil?
500
+ begin
501
+ vm_name, snapshot_name = task_detail.split(':')
502
+ pool_name = get_pool_name_for_vm(vm_name)
503
+ raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
504
+
505
+ provider = get_provider_for_pool(pool_name)
506
+ raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
507
+
508
+ revert_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
509
+ rescue => err
510
+ $logger.log('s', "[!] [snapshot_manager] snapshot revert appears to have failed: #{err}")
511
+ end
512
+ end
513
+ end
514
+
515
+ def migrate_vm(vm_name, pool_name, provider)
516
+ Thread.new do
517
+ begin
518
+ mutex = vm_mutex(vm_name)
519
+ mutex.synchronize do
520
+ $redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
521
+ provider.migrate_vm(pool_name, vm_name)
522
+ end
523
+ rescue => err
524
+ $logger.log('s', "[x] [#{pool_name}] '#{vm_name}' migration failed with an error: #{err}")
525
+ end
526
+ end
527
+ end
528
+
529
+ # Helper method mainly used for unit testing
530
+ def time_passed?(_event, time)
531
+ Time.now > time
532
+ end
533
+
534
+ # Possible wakeup events
535
+ # :pool_size_change
536
+ # - Fires when the number of ready VMs changes due to being consumed.
537
+ # - Additional options
538
+ # :poolname
539
+ # :pool_template_change
540
+ # - Fires when a template configuration update is requested
541
+ # - Additional options
542
+ # :poolname
543
+ #
544
+ def sleep_with_wakeup_events(loop_delay, wakeup_period = 5, options = {})
545
+ exit_by = Time.now + loop_delay
546
+ wakeup_by = Time.now + wakeup_period
547
+ return if time_passed?(:exit_by, exit_by)
548
+
549
+ if options[:pool_size_change]
550
+ initial_ready_size = $redis.scard("vmpooler__ready__#{options[:poolname]}")
551
+ end
552
+
553
+ if options[:pool_template_change]
554
+ initial_template = $redis.hget('vmpooler__template__prepared', options[:poolname])
555
+ end
556
+
557
+ loop do
558
+ sleep(1)
559
+ break if time_passed?(:exit_by, exit_by)
560
+
561
+ # Check for wakeup events
562
+ if time_passed?(:wakeup_by, wakeup_by)
563
+ wakeup_by = Time.now + wakeup_period
564
+
565
+ # Wakeup if the number of ready VMs has changed
566
+ if options[:pool_size_change]
567
+ ready_size = $redis.scard("vmpooler__ready__#{options[:poolname]}")
568
+ break unless ready_size == initial_ready_size
569
+ end
570
+
571
+ if options[:pool_template_change]
572
+ configured_template = $redis.hget('vmpooler__config__template', options[:poolname])
573
+ if configured_template
574
+ break unless initial_template == configured_template
575
+ end
576
+ end
577
+
578
+ end
579
+
580
+ break if time_passed?(:exit_by, exit_by)
581
+ end
582
+ end
583
+
584
+ def check_pool(pool,
585
+ maxloop = 0,
586
+ loop_delay_min = CHECK_LOOP_DELAY_MIN_DEFAULT,
587
+ loop_delay_max = CHECK_LOOP_DELAY_MAX_DEFAULT,
588
+ loop_delay_decay = CHECK_LOOP_DELAY_DECAY_DEFAULT)
589
+ $logger.log('d', "[*] [#{pool['name']}] starting worker thread")
590
+
591
+ # Use the pool setings if they exist
592
+ loop_delay_min = pool['check_loop_delay_min'] unless pool['check_loop_delay_min'].nil?
593
+ loop_delay_max = pool['check_loop_delay_max'] unless pool['check_loop_delay_max'].nil?
594
+ loop_delay_decay = pool['check_loop_delay_decay'] unless pool['check_loop_delay_decay'].nil?
595
+
596
+ loop_delay_decay = 2.0 if loop_delay_decay <= 1.0
597
+ loop_delay_max = loop_delay_min if loop_delay_max.nil? || loop_delay_max < loop_delay_min
598
+
599
+ $threads[pool['name']] = Thread.new do
600
+ begin
601
+ loop_count = 1
602
+ loop_delay = loop_delay_min
603
+ provider = get_provider_for_pool(pool['name'])
604
+ raise("Could not find provider '#{pool['provider']}") if provider.nil?
605
+ sync_pool_template(pool)
606
+ loop do
607
+ result = _check_pool(pool, provider)
608
+
609
+ if result[:cloned_vms] > 0 || result[:checked_pending_vms] > 0 || result[:discovered_vms] > 0
610
+ loop_delay = loop_delay_min
611
+ else
612
+ loop_delay = (loop_delay * loop_delay_decay).to_i
613
+ loop_delay = loop_delay_max if loop_delay > loop_delay_max
614
+ end
615
+ sleep_with_wakeup_events(loop_delay, loop_delay_min, pool_size_change: true, poolname: pool['name'], pool_template_change: true)
616
+
617
+ unless maxloop.zero?
618
+ break if loop_count >= maxloop
619
+ loop_count += 1
620
+ end
621
+ end
622
+ rescue => err
623
+ $logger.log('s', "[!] [#{pool['name']}] Error while checking the pool: #{err}")
624
+ raise
625
+ end
626
+ end
627
+ end
628
+
629
+ def pool_mutex(poolname)
630
+ @reconfigure_pool[poolname] || @reconfigure_pool[poolname] = Mutex.new
631
+ end
632
+
633
+ def vm_mutex(vmname)
634
+ @vm_mutex[vmname] || @vm_mutex[vmname] = Mutex.new
635
+ end
636
+
637
+ def sync_pool_template(pool)
638
+ pool_template = $redis.hget('vmpooler__config__template', pool['name'])
639
+ if pool_template
640
+ unless pool['template'] == pool_template
641
+ pool['template'] = pool_template
642
+ end
643
+ end
644
+ end
645
+
646
+ def prepare_template(pool, provider)
647
+ provider.create_template_delta_disks(pool) if $config[:config]['create_template_delta_disks']
648
+ $redis.hset('vmpooler__template__prepared', pool['name'], pool['template'])
649
+ end
650
+
651
+ def evaluate_template(pool, provider)
652
+ mutex = pool_mutex(pool['name'])
653
+ prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
654
+ configured_template = $redis.hget('vmpooler__config__template', pool['name'])
655
+ return if mutex.locked?
656
+ if prepared_template.nil?
657
+ mutex.synchronize do
658
+ prepare_template(pool, provider)
659
+ prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
660
+ end
661
+ elsif prepared_template != pool['template']
662
+ if configured_template.nil?
663
+ mutex.synchronize do
664
+ prepare_template(pool, provider)
665
+ prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
666
+ end
667
+ end
668
+ end
669
+ return if configured_template.nil?
670
+ return if configured_template == prepared_template
671
+ mutex.synchronize do
672
+ update_pool_template(pool, provider, configured_template, prepared_template)
673
+ end
674
+ end
675
+
676
+ def drain_pool(poolname)
677
+ # Clear a pool of ready and pending instances
678
+ if $redis.scard("vmpooler__ready__#{poolname}") > 0
679
+ $logger.log('s', "[*] [#{poolname}] removing ready instances")
680
+ $redis.smembers("vmpooler__ready__#{poolname}").each do |vm|
681
+ move_vm_queue(poolname, vm, 'ready', 'completed')
682
+ end
683
+ end
684
+ if $redis.scard("vmpooler__pending__#{poolname}") > 0
685
+ $logger.log('s', "[*] [#{poolname}] removing pending instances")
686
+ $redis.smembers("vmpooler__pending__#{poolname}").each do |vm|
687
+ move_vm_queue(poolname, vm, 'pending', 'completed')
688
+ end
689
+ end
690
+ end
691
+
692
+ def update_pool_template(pool, provider, configured_template, prepared_template)
693
+ pool['template'] = configured_template
694
+ $logger.log('s', "[*] [#{pool['name']}] template updated from #{prepared_template} to #{configured_template}")
695
+ # Remove all ready and pending VMs so new instances are created from the new template
696
+ drain_pool(pool['name'])
697
+ # Prepare template for deployment
698
+ $logger.log('s', "[*] [#{pool['name']}] preparing pool template for deployment")
699
+ prepare_template(pool, provider)
700
+ $logger.log('s', "[*] [#{pool['name']}] is ready for use")
701
+ end
702
+
703
+ def remove_excess_vms(pool, provider, ready, total)
704
+ return if total.nil?
705
+ return if total == 0
706
+ mutex = pool_mutex(pool['name'])
707
+ return if mutex.locked?
708
+ return unless ready > pool['size']
709
+ mutex.synchronize do
710
+ difference = ready - pool['size']
711
+ difference.times do
712
+ next_vm = $redis.spop("vmpooler__ready__#{pool['name']}")
713
+ move_vm_queue(pool['name'], next_vm, 'ready', 'completed')
714
+ end
715
+ if total > ready
716
+ $redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
717
+ move_vm_queue(pool['name'], vm, 'pending', 'completed')
718
+ end
719
+ end
720
+ end
721
+ end
722
+
723
+ def update_pool_size(pool)
724
+ mutex = pool_mutex(pool['name'])
725
+ return if mutex.locked?
726
+ poolsize = $redis.hget('vmpooler__config__poolsize', pool['name'])
727
+ return if poolsize.nil?
728
+ poolsize = Integer(poolsize)
729
+ return if poolsize == pool['size']
730
+ mutex.synchronize do
731
+ pool['size'] = poolsize
732
+ end
733
+ end
734
+
735
+ def _check_pool(pool, provider)
736
+ pool_check_response = {
737
+ discovered_vms: 0,
738
+ checked_running_vms: 0,
739
+ checked_ready_vms: 0,
740
+ checked_pending_vms: 0,
741
+ destroyed_vms: 0,
742
+ migrated_vms: 0,
743
+ cloned_vms: 0
744
+ }
745
+ # INVENTORY
746
+ inventory = {}
747
+ begin
748
+ mutex = pool_mutex(pool['name'])
749
+ mutex.synchronize do
750
+ provider.vms_in_pool(pool['name']).each do |vm|
751
+ if !$redis.sismember('vmpooler__running__' + pool['name'], vm['name']) &&
752
+ !$redis.sismember('vmpooler__ready__' + pool['name'], vm['name']) &&
753
+ !$redis.sismember('vmpooler__pending__' + pool['name'], vm['name']) &&
754
+ !$redis.sismember('vmpooler__completed__' + pool['name'], vm['name']) &&
755
+ !$redis.sismember('vmpooler__discovered__' + pool['name'], vm['name']) &&
756
+ !$redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])
757
+
758
+ pool_check_response[:discovered_vms] += 1
759
+ $redis.sadd('vmpooler__discovered__' + pool['name'], vm['name'])
760
+
761
+ $logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue")
762
+ end
763
+
764
+ inventory[vm['name']] = 1
765
+ end
766
+ end
767
+ rescue => err
768
+ $logger.log('s', "[!] [#{pool['name']}] _check_pool failed with an error while inspecting inventory: #{err}")
769
+ return pool_check_response
770
+ end
771
+
772
+ # RUNNING
773
+ $redis.smembers("vmpooler__running__#{pool['name']}").each do |vm|
774
+ if inventory[vm]
775
+ begin
776
+ vm_lifetime = $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12
777
+ pool_check_response[:checked_running_vms] += 1
778
+ check_running_vm(vm, pool['name'], vm_lifetime, provider)
779
+ rescue => err
780
+ $logger.log('d', "[!] [#{pool['name']}] _check_pool with an error while evaluating running VMs: #{err}")
781
+ end
782
+ else
783
+ move_vm_queue(pool['name'], vm, 'running', 'completed', 'is a running VM but is missing from inventory. Marking as completed.')
784
+ end
785
+ end
786
+
787
+ # READY
788
+ $redis.smembers("vmpooler__ready__#{pool['name']}").each do |vm|
789
+ if inventory[vm]
790
+ begin
791
+ pool_check_response[:checked_ready_vms] += 1
792
+ check_ready_vm(vm, pool, pool['ready_ttl'] || 0, provider)
793
+ rescue => err
794
+ $logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating ready VMs: #{err}")
795
+ end
796
+ else
797
+ move_vm_queue(pool['name'], vm, 'ready', 'completed', 'is a ready VM but is missing from inventory. Marking as completed.')
798
+ end
799
+ end
800
+
801
+ # PENDING
802
+ $redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
803
+ pool_timeout = pool['timeout'] || $config[:config]['timeout'] || 15
804
+ if inventory[vm]
805
+ begin
806
+ pool_check_response[:checked_pending_vms] += 1
807
+ check_pending_vm(vm, pool['name'], pool_timeout, provider)
808
+ rescue => err
809
+ $logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating pending VMs: #{err}")
810
+ end
811
+ else
812
+ fail_pending_vm(vm, pool['name'], pool_timeout, false)
813
+ end
814
+ end
815
+
816
+ # COMPLETED
817
+ $redis.smembers("vmpooler__completed__#{pool['name']}").each do |vm|
818
+ if inventory[vm]
819
+ begin
820
+ pool_check_response[:destroyed_vms] += 1
821
+ destroy_vm(vm, pool['name'], provider)
822
+ rescue => err
823
+ $redis.srem("vmpooler__completed__#{pool['name']}", vm)
824
+ $redis.hdel("vmpooler__active__#{pool['name']}", vm)
825
+ $redis.del("vmpooler__vm__#{vm}")
826
+ $logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating completed VMs: #{err}")
827
+ end
828
+ else
829
+ $logger.log('s', "[!] [#{pool['name']}] '#{vm}' not found in inventory, removed from 'completed' queue")
830
+ $redis.srem("vmpooler__completed__#{pool['name']}", vm)
831
+ $redis.hdel("vmpooler__active__#{pool['name']}", vm)
832
+ $redis.del("vmpooler__vm__#{vm}")
833
+ end
834
+ end
835
+
836
+ # DISCOVERED
837
+ begin
838
+ $redis.smembers("vmpooler__discovered__#{pool['name']}").each do |vm|
839
+ %w[pending ready running completed].each do |queue|
840
+ if $redis.sismember("vmpooler__#{queue}__#{pool['name']}", vm)
841
+ $logger.log('d', "[!] [#{pool['name']}] '#{vm}' found in '#{queue}', removed from 'discovered' queue")
842
+ $redis.srem("vmpooler__discovered__#{pool['name']}", vm)
843
+ end
844
+ end
845
+
846
+ if $redis.sismember("vmpooler__discovered__#{pool['name']}", vm)
847
+ $redis.smove("vmpooler__discovered__#{pool['name']}", "vmpooler__completed__#{pool['name']}", vm)
848
+ end
849
+ end
850
+ rescue => err
851
+ $logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating discovered VMs: #{err}")
852
+ end
853
+
854
+ # MIGRATIONS
855
+ $redis.smembers("vmpooler__migrating__#{pool['name']}").each do |vm|
856
+ if inventory[vm]
857
+ begin
858
+ pool_check_response[:migrated_vms] += 1
859
+ migrate_vm(vm, pool['name'], provider)
860
+ rescue => err
861
+ $logger.log('s', "[x] [#{pool['name']}] '#{vm}' failed to migrate: #{err}")
862
+ end
863
+ end
864
+ end
865
+
866
+ # UPDATE TEMPLATE
867
+ # Evaluates a pool template to ensure templates are prepared adequately for the configured provider
868
+ # If a pool template configuration change is detected then template preparation is repeated for the new template
869
+ # Additionally, a pool will drain ready and pending instances
870
+ evaluate_template(pool, provider)
871
+
872
+ # REPOPULATE
873
+ # Do not attempt to repopulate a pool while a template is updating
874
+ unless pool_mutex(pool['name']).locked?
875
+ ready = $redis.scard("vmpooler__ready__#{pool['name']}")
876
+ total = $redis.scard("vmpooler__pending__#{pool['name']}") + ready
877
+
878
+ $metrics.gauge("ready.#{pool['name']}", $redis.scard("vmpooler__ready__#{pool['name']}"))
879
+ $metrics.gauge("running.#{pool['name']}", $redis.scard("vmpooler__running__#{pool['name']}"))
880
+
881
+ if $redis.get("vmpooler__empty__#{pool['name']}")
882
+ $redis.del("vmpooler__empty__#{pool['name']}") unless ready.zero?
883
+ elsif ready.zero?
884
+ $redis.set("vmpooler__empty__#{pool['name']}", 'true')
885
+ $logger.log('s', "[!] [#{pool['name']}] is empty")
886
+ end
887
+
888
+ # Check to see if a pool size change has been made via the configuration API
889
+ # Since check_pool runs in a loop it does not
890
+ # otherwise identify this change when running
891
+ update_pool_size(pool)
892
+
893
+ if total < pool['size']
894
+ (1..(pool['size'] - total)).each do |_i|
895
+ if $redis.get('vmpooler__tasks__clone').to_i < $config[:config]['task_limit'].to_i
896
+ begin
897
+ $redis.incr('vmpooler__tasks__clone')
898
+ pool_check_response[:cloned_vms] += 1
899
+ clone_vm(pool, provider)
900
+ rescue => err
901
+ $logger.log('s', "[!] [#{pool['name']}] clone failed during check_pool with an error: #{err}")
902
+ $redis.decr('vmpooler__tasks__clone')
903
+ raise
904
+ end
905
+ end
906
+ end
907
+ end
908
+ end
909
+
910
+ # Remove VMs in excess of the configured pool size
911
+ remove_excess_vms(pool, provider, ready, total)
912
+
913
+ pool_check_response
914
+ end
915
+
916
+ # Create a provider object, usually based on the providers/*.rb class, that implements providers/base.rb
917
+ # provider_class: Needs to match a class in the Vmpooler::PoolManager::Provider namespace. This is
918
+ # either as a gem in the LOADPATH or in providers/*.rb ie Vmpooler::PoolManager::Provider::X
919
+ # provider_name: Should be a unique provider name
920
+ #
921
+ # returns an object Vmpooler::PoolManager::Provider::*
922
+ # or raises an error if the class does not exist
923
+ def create_provider_object(config, logger, metrics, provider_class, provider_name, options)
924
+ provider_klass = Vmpooler::PoolManager::Provider
925
+ provider_klass.constants.each do |classname|
926
+ next unless classname.to_s.casecmp(provider_class) == 0
927
+ return provider_klass.const_get(classname).new(config, logger, metrics, provider_name, options)
928
+ end
929
+ raise("Provider '#{provider_class}' is unknown for pool with provider name '#{provider_name}'") if provider.nil?
930
+ end
931
+
932
+ def execute!(maxloop = 0, loop_delay = 1)
933
+ $logger.log('d', 'starting vmpooler')
934
+
935
+ # Clear out the tasks manager, as we don't know about any tasks at this point
936
+ $redis.set('vmpooler__tasks__clone', 0)
937
+ # Clear out vmpooler__migrations since stale entries may be left after a restart
938
+ $redis.del('vmpooler__migration')
939
+
940
+ # Copy vSphere settings to correct location. This happens with older configuration files
941
+ if !$config[:vsphere].nil? && ($config[:providers].nil? || $config[:providers][:vsphere].nil?)
942
+ $logger.log('d', "[!] Detected an older configuration file. Copying the settings from ':vsphere:' to ':providers:/:vsphere:'")
943
+ $config[:providers] = {} if $config[:providers].nil?
944
+ $config[:providers][:vsphere] = $config[:vsphere]
945
+ end
946
+
947
+ # Set default provider for all pools that do not have one defined
948
+ $config[:pools].each do |pool|
949
+ if pool['provider'].nil?
950
+ $logger.log('d', "[!] Setting provider for pool '#{pool['name']}' to 'vsphere' as default")
951
+ pool['provider'] = 'vsphere'
952
+ end
953
+ end
954
+
955
+ # Load running pool configuration into redis so API server can retrieve it
956
+ load_pools_to_redis
957
+
958
+ # Get pool loop settings
959
+ $config[:config] = {} if $config[:config].nil?
960
+ check_loop_delay_min = $config[:config]['check_loop_delay_min'] || CHECK_LOOP_DELAY_MIN_DEFAULT
961
+ check_loop_delay_max = $config[:config]['check_loop_delay_max'] || CHECK_LOOP_DELAY_MAX_DEFAULT
962
+ check_loop_delay_decay = $config[:config]['check_loop_delay_decay'] || CHECK_LOOP_DELAY_DECAY_DEFAULT
963
+
964
+ # Create the providers
965
+ $config[:pools].each do |pool|
966
+ provider_name = pool['provider']
967
+ # The provider_class parameter can be defined in the provider's data eg
968
+ #:providers:
969
+ # :vsphere:
970
+ # provider_class: 'vsphere'
971
+ # :another-vsphere:
972
+ # provider_class: 'vsphere'
973
+ # the above would create two providers/vsphere.rb class objects named 'vsphere' and 'another-vsphere'
974
+ # each pools would then define which provider definition to use: vsphere or another-vsphere
975
+ #
976
+ # if provider_class is not defined it will try to use the provider_name as the class, this is to be
977
+ # backwards compatible for example when there is only one provider listed
978
+ # :providers:
979
+ # :dummy:
980
+ # filename: 'db.txs'
981
+ # the above example would create an object based on the class providers/dummy.rb
982
+ if $config[:providers].nil? || $config[:providers][provider_name.to_sym].nil? || $config[:providers][provider_name.to_sym]['provider_class'].nil?
983
+ provider_class = provider_name
984
+ else
985
+ provider_class = $config[:providers][provider_name.to_sym]['provider_class']
986
+ end
987
+ begin
988
+ $providers[provider_name] = create_provider_object($config, $logger, $metrics, provider_class, provider_name, {}) if $providers[provider_name].nil?
989
+ rescue => err
990
+ $logger.log('s', "Error while creating provider for pool #{pool['name']}: #{err}")
991
+ raise
992
+ end
993
+ end
994
+
995
+ loop_count = 1
996
+ loop do
997
+ if !$threads['disk_manager']
998
+ check_disk_queue
999
+ elsif !$threads['disk_manager'].alive?
1000
+ $logger.log('d', '[!] [disk_manager] worker thread died, restarting')
1001
+ check_disk_queue
1002
+ end
1003
+
1004
+ if !$threads['snapshot_manager']
1005
+ check_snapshot_queue
1006
+ elsif !$threads['snapshot_manager'].alive?
1007
+ $logger.log('d', '[!] [snapshot_manager] worker thread died, restarting')
1008
+ check_snapshot_queue
1009
+ end
1010
+
1011
+ $config[:pools].each do |pool|
1012
+ if !$threads[pool['name']]
1013
+ check_pool(pool)
1014
+ elsif !$threads[pool['name']].alive?
1015
+ $logger.log('d', "[!] [#{pool['name']}] worker thread died, restarting")
1016
+ check_pool(pool, check_loop_delay_min, check_loop_delay_max, check_loop_delay_decay)
1017
+ end
1018
+ end
1019
+
1020
+ sleep(loop_delay)
1021
+
1022
+ unless maxloop.zero?
1023
+ break if loop_count >= maxloop
1024
+ loop_count += 1
1025
+ end
1026
+ end
1027
+ end
1028
+ end
1029
+ end