vmpooler 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/vmpooler +54 -0
- data/lib/vmpooler.rb +161 -0
- data/lib/vmpooler/api.rb +53 -0
- data/lib/vmpooler/api/dashboard.rb +143 -0
- data/lib/vmpooler/api/helpers.rb +431 -0
- data/lib/vmpooler/api/reroute.rb +71 -0
- data/lib/vmpooler/api/v1.rb +938 -0
- data/lib/vmpooler/dashboard.rb +14 -0
- data/lib/vmpooler/dummy_statsd.rb +20 -0
- data/lib/vmpooler/generic_connection_pool.rb +53 -0
- data/lib/vmpooler/graphite.rb +42 -0
- data/lib/vmpooler/logger.rb +22 -0
- data/lib/vmpooler/pool_manager.rb +1029 -0
- data/lib/vmpooler/providers.rb +7 -0
- data/lib/vmpooler/providers/base.rb +231 -0
- data/lib/vmpooler/providers/dummy.rb +402 -0
- data/lib/vmpooler/providers/vsphere.rb +929 -0
- data/lib/vmpooler/public/bootstrap.min.css +5 -0
- data/lib/vmpooler/public/img/bg.png +0 -0
- data/lib/vmpooler/public/img/logo.gif +0 -0
- data/lib/vmpooler/public/img/spinner.svg +38 -0
- data/lib/vmpooler/public/img/subtle_dots.png +0 -0
- data/lib/vmpooler/public/img/textured_paper.png +0 -0
- data/lib/vmpooler/public/lib/bootstrap.min.js +7 -0
- data/lib/vmpooler/public/lib/d3.min.js +5 -0
- data/lib/vmpooler/public/lib/dashboard.js +738 -0
- data/lib/vmpooler/public/lib/jquery.min.js +4 -0
- data/lib/vmpooler/public/vmpooler.css +125 -0
- data/lib/vmpooler/statsd.rb +37 -0
- data/lib/vmpooler/version.rb +4 -0
- data/lib/vmpooler/views/dashboard.erb +63 -0
- data/lib/vmpooler/views/layout.erb +48 -0
- metadata +218 -0
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'connection_pool'
|
2
|
+
|
3
|
+
module Vmpooler
|
4
|
+
class PoolManager
|
5
|
+
class GenericConnectionPool < ConnectionPool
|
6
|
+
# Extend the ConnectionPool class with instrumentation
|
7
|
+
# https://github.com/mperham/connection_pool/blob/master/lib/connection_pool.rb
|
8
|
+
|
9
|
+
def initialize(options = {}, &block)
|
10
|
+
super(options, &block)
|
11
|
+
@metrics = options[:metrics]
|
12
|
+
@metric_prefix = options[:metric_prefix]
|
13
|
+
@metric_prefix = 'connectionpool' if @metric_prefix.nil? || @metric_prefix == ''
|
14
|
+
end
|
15
|
+
|
16
|
+
if Thread.respond_to?(:handle_interrupt)
|
17
|
+
# MRI
|
18
|
+
def with_metrics(options = {})
|
19
|
+
Thread.handle_interrupt(Exception => :never) do
|
20
|
+
start = Time.now
|
21
|
+
conn = checkout(options)
|
22
|
+
timespan_ms = ((Time.now - start) * 1000).to_i
|
23
|
+
@metrics.gauge(@metric_prefix + '.available', @available.length) unless @metrics.nil?
|
24
|
+
@metrics.timing(@metric_prefix + '.waited', timespan_ms) unless @metrics.nil?
|
25
|
+
begin
|
26
|
+
Thread.handle_interrupt(Exception => :immediate) do
|
27
|
+
yield conn
|
28
|
+
end
|
29
|
+
ensure
|
30
|
+
checkin
|
31
|
+
@metrics.gauge(@metric_prefix + '.available', @available.length) unless @metrics.nil?
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
else
|
36
|
+
# jruby 1.7.x
|
37
|
+
def with_metrics(options = {})
|
38
|
+
start = Time.now
|
39
|
+
conn = checkout(options)
|
40
|
+
timespan_ms = ((Time.now - start) * 1000).to_i
|
41
|
+
@metrics.gauge(@metric_prefix + '.available', @available.length) unless @metrics.nil?
|
42
|
+
@metrics.timing(@metric_prefix + '.waited', timespan_ms) unless @metrics.nil?
|
43
|
+
begin
|
44
|
+
yield conn
|
45
|
+
ensure
|
46
|
+
checkin
|
47
|
+
@metrics.gauge(@metric_prefix + '.available', @available.length) unless @metrics.nil?
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'rubygems' unless defined?(Gem)
|
2
|
+
|
3
|
+
module Vmpooler
|
4
|
+
class Graphite
|
5
|
+
attr_reader :server, :port, :prefix
|
6
|
+
|
7
|
+
def initialize(params = {})
|
8
|
+
if params['server'].nil? || params['server'].empty?
|
9
|
+
raise ArgumentError, "Graphite server is required. Config: #{params.inspect}"
|
10
|
+
end
|
11
|
+
|
12
|
+
@server = params['server']
|
13
|
+
@port = params['port'] || 2003
|
14
|
+
@prefix = params['prefix'] || 'vmpooler'
|
15
|
+
end
|
16
|
+
|
17
|
+
def increment(label)
|
18
|
+
log label, 1
|
19
|
+
end
|
20
|
+
|
21
|
+
def gauge(label, value)
|
22
|
+
log label, value
|
23
|
+
end
|
24
|
+
|
25
|
+
def timing(label, duration)
|
26
|
+
log label, duration
|
27
|
+
end
|
28
|
+
|
29
|
+
def log(path, value)
|
30
|
+
Thread.new do
|
31
|
+
socket = TCPSocket.new(server, port)
|
32
|
+
begin
|
33
|
+
socket.puts "#{prefix}.#{path} #{value} #{Time.now.to_i}"
|
34
|
+
ensure
|
35
|
+
socket.close
|
36
|
+
end
|
37
|
+
end
|
38
|
+
rescue => err
|
39
|
+
$stderr.puts "Failure logging #{path} to graphite server [#{server}:#{port}]: #{err}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems' unless defined?(Gem)
|
2
|
+
|
3
|
+
module Vmpooler
|
4
|
+
class Logger
|
5
|
+
def initialize(
|
6
|
+
f = '/var/log/vmpooler.log'
|
7
|
+
)
|
8
|
+
@file = f
|
9
|
+
end
|
10
|
+
|
11
|
+
def log(_level, string)
|
12
|
+
time = Time.new
|
13
|
+
stamp = time.strftime('%Y-%m-%d %H:%M:%S')
|
14
|
+
|
15
|
+
puts "[#{stamp}] #{string}" if ENV['VMPOOLER_DEBUG']
|
16
|
+
|
17
|
+
open(@file, 'a') do |f|
|
18
|
+
f.puts "[#{stamp}] #{string}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,1029 @@
|
|
1
|
+
module Vmpooler
|
2
|
+
class PoolManager
|
3
|
+
CHECK_LOOP_DELAY_MIN_DEFAULT = 5
|
4
|
+
CHECK_LOOP_DELAY_MAX_DEFAULT = 60
|
5
|
+
CHECK_LOOP_DELAY_DECAY_DEFAULT = 2.0
|
6
|
+
|
7
|
+
def initialize(config, logger, redis, metrics)
|
8
|
+
$config = config
|
9
|
+
|
10
|
+
# Load logger library
|
11
|
+
$logger = logger
|
12
|
+
|
13
|
+
# metrics logging handle
|
14
|
+
$metrics = metrics
|
15
|
+
|
16
|
+
# Connect to Redis
|
17
|
+
$redis = redis
|
18
|
+
|
19
|
+
# VM Provider objects
|
20
|
+
$providers = {}
|
21
|
+
|
22
|
+
# Our thread-tracker object
|
23
|
+
$threads = {}
|
24
|
+
|
25
|
+
# Pool mutex
|
26
|
+
@reconfigure_pool = {}
|
27
|
+
|
28
|
+
@vm_mutex = {}
|
29
|
+
end
|
30
|
+
|
31
|
+
def config
|
32
|
+
$config
|
33
|
+
end
|
34
|
+
|
35
|
+
# Place pool configuration in redis so an API instance can discover running pool configuration
|
36
|
+
def load_pools_to_redis
|
37
|
+
previously_configured_pools = $redis.smembers('vmpooler__pools')
|
38
|
+
currently_configured_pools = []
|
39
|
+
config[:pools].each do |pool|
|
40
|
+
currently_configured_pools << pool['name']
|
41
|
+
$redis.sadd('vmpooler__pools', pool['name'])
|
42
|
+
pool_keys = pool.keys
|
43
|
+
pool_keys.delete('alias')
|
44
|
+
to_set = {}
|
45
|
+
pool_keys.each do |k|
|
46
|
+
to_set[k] = pool[k]
|
47
|
+
end
|
48
|
+
to_set['alias'] = pool['alias'].join(',') if to_set.has_key?('alias')
|
49
|
+
$redis.hmset("vmpooler__pool__#{pool['name']}", to_set.to_a.flatten) unless to_set.empty?
|
50
|
+
end
|
51
|
+
previously_configured_pools.each do |pool|
|
52
|
+
unless currently_configured_pools.include? pool
|
53
|
+
$redis.srem('vmpooler__pools', pool)
|
54
|
+
$redis.del("vmpooler__pool__#{pool}")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
return
|
58
|
+
end
|
59
|
+
|
60
|
+
# Check the state of a VM
|
61
|
+
def check_pending_vm(vm, pool, timeout, provider)
|
62
|
+
Thread.new do
|
63
|
+
begin
|
64
|
+
_check_pending_vm(vm, pool, timeout, provider)
|
65
|
+
rescue => err
|
66
|
+
$logger.log('s', "[!] [#{pool}] '#{vm}' #{timeout} #{provider} errored while checking a pending vm : #{err}")
|
67
|
+
fail_pending_vm(vm, pool, timeout)
|
68
|
+
raise
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def _check_pending_vm(vm, pool, timeout, provider)
|
74
|
+
mutex = vm_mutex(vm)
|
75
|
+
return if mutex.locked?
|
76
|
+
mutex.synchronize do
|
77
|
+
if provider.vm_ready?(pool, vm)
|
78
|
+
move_pending_vm_to_ready(vm, pool)
|
79
|
+
else
|
80
|
+
fail_pending_vm(vm, pool, timeout)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def remove_nonexistent_vm(vm, pool)
|
86
|
+
$redis.srem("vmpooler__pending__#{pool}", vm)
|
87
|
+
$logger.log('d', "[!] [#{pool}] '#{vm}' no longer exists. Removing from pending.")
|
88
|
+
end
|
89
|
+
|
90
|
+
def fail_pending_vm(vm, pool, timeout, exists = true)
|
91
|
+
clone_stamp = $redis.hget("vmpooler__vm__#{vm}", 'clone')
|
92
|
+
return true unless clone_stamp
|
93
|
+
|
94
|
+
time_since_clone = (Time.now - Time.parse(clone_stamp)) / 60
|
95
|
+
if time_since_clone > timeout
|
96
|
+
if exists
|
97
|
+
$redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm)
|
98
|
+
$logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes")
|
99
|
+
else
|
100
|
+
remove_nonexistent_vm(vm, pool)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
true
|
104
|
+
rescue => err
|
105
|
+
$logger.log('d', "Fail pending VM failed with an error: #{err}")
|
106
|
+
false
|
107
|
+
end
|
108
|
+
|
109
|
+
def move_pending_vm_to_ready(vm, pool)
|
110
|
+
clone_time = $redis.hget('vmpooler__vm__' + vm, 'clone')
|
111
|
+
finish = format('%.2f', Time.now - Time.parse(clone_time)) if clone_time
|
112
|
+
|
113
|
+
$redis.smove('vmpooler__pending__' + pool, 'vmpooler__ready__' + pool, vm)
|
114
|
+
$redis.hset('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm, finish) # maybe remove as this is never used by vmpooler itself?
|
115
|
+
$redis.hset("vmpooler__vm__#{vm}", 'ready', Time.now)
|
116
|
+
|
117
|
+
# last boot time is displayed in API, and used by alarming script
|
118
|
+
$redis.hset('vmpooler__lastboot', pool, Time.now)
|
119
|
+
|
120
|
+
$metrics.timing("time_to_ready_state.#{pool}", finish)
|
121
|
+
$logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue")
|
122
|
+
end
|
123
|
+
|
124
|
+
def vm_still_ready?(pool_name, vm_name, provider)
|
125
|
+
# Check if the VM is still ready/available
|
126
|
+
return true if provider.vm_ready?(pool_name, vm_name)
|
127
|
+
raise("VM #{vm_name} is not ready")
|
128
|
+
rescue
|
129
|
+
move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
|
130
|
+
end
|
131
|
+
|
132
|
+
def check_ready_vm(vm, pool, ttl, provider)
|
133
|
+
Thread.new do
|
134
|
+
begin
|
135
|
+
_check_ready_vm(vm, pool, ttl, provider)
|
136
|
+
rescue => err
|
137
|
+
$logger.log('s', "[!] [#{pool['name']}] '#{vm}' failed while checking a ready vm : #{err}")
|
138
|
+
raise
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def _check_ready_vm(vm, pool, ttl, provider)
|
144
|
+
# Periodically check that the VM is available
|
145
|
+
mutex = vm_mutex(vm)
|
146
|
+
return if mutex.locked?
|
147
|
+
mutex.synchronize do
|
148
|
+
check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check')
|
149
|
+
return if check_stamp && (((Time.now - Time.parse(check_stamp)) / 60) <= $config[:config]['vm_checktime'])
|
150
|
+
|
151
|
+
$redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
|
152
|
+
# Check if the hosts TTL has expired
|
153
|
+
if ttl > 0
|
154
|
+
# host['boottime'] may be nil if host is not powered on
|
155
|
+
if ((Time.now - host['boottime']) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl
|
156
|
+
$redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm)
|
157
|
+
|
158
|
+
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
|
159
|
+
return
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
return if has_mismatched_hostname?(vm, pool, provider)
|
164
|
+
|
165
|
+
vm_still_ready?(pool['name'], vm, provider)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def has_mismatched_hostname?(vm, pool, provider)
|
170
|
+
check_hostname = pool['check_hostname_for_mismatch']
|
171
|
+
check_hostname = $config[:config]['check_ready_vm_hostname_for_mismatch'] if check_hostname.nil?
|
172
|
+
return if check_hostname == false
|
173
|
+
|
174
|
+
# Wait one minute before checking a VM for hostname mismatch
|
175
|
+
# When checking as soon as the VM passes the ready test the instance
|
176
|
+
# often doesn't report its hostname yet causing the VM to be removed immediately
|
177
|
+
vm_ready_time = $redis.hget("vmpooler__vm__#{vm}", 'ready')
|
178
|
+
if vm_ready_time
|
179
|
+
wait_before_checking = 60
|
180
|
+
time_since_ready = (Time.now - Time.parse(vm_ready_time)).to_i
|
181
|
+
return unless time_since_ready > wait_before_checking
|
182
|
+
end
|
183
|
+
|
184
|
+
# Check if the hostname has magically changed from underneath Pooler
|
185
|
+
vm_hash = provider.get_vm(pool['name'], vm)
|
186
|
+
hostname = vm_hash['hostname']
|
187
|
+
|
188
|
+
return if hostname.empty?
|
189
|
+
return if hostname == vm
|
190
|
+
$redis.smove('vmpooler__ready__' + pool['name'], 'vmpooler__completed__' + pool['name'], vm)
|
191
|
+
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue")
|
192
|
+
return true
|
193
|
+
end
|
194
|
+
|
195
|
+
def check_running_vm(vm, pool, ttl, provider)
|
196
|
+
Thread.new do
|
197
|
+
begin
|
198
|
+
_check_running_vm(vm, pool, ttl, provider)
|
199
|
+
rescue => err
|
200
|
+
$logger.log('s', "[!] [#{pool}] '#{vm}' failed while checking VM with an error: #{err}")
|
201
|
+
raise
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def _check_running_vm(vm, pool, ttl, provider)
|
207
|
+
mutex = vm_mutex(vm)
|
208
|
+
return if mutex.locked?
|
209
|
+
mutex.synchronize do
|
210
|
+
# Check that VM is within defined lifetime
|
211
|
+
checkouttime = $redis.hget('vmpooler__active__' + pool, vm)
|
212
|
+
if checkouttime
|
213
|
+
running = (Time.now - Time.parse(checkouttime)) / 60 / 60
|
214
|
+
|
215
|
+
if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
|
216
|
+
move_vm_queue(pool, vm, 'running', 'completed', "reached end of TTL after #{ttl} hours")
|
217
|
+
return
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
if provider.vm_ready?(pool, vm)
|
222
|
+
return
|
223
|
+
else
|
224
|
+
host = provider.get_vm(pool, vm)
|
225
|
+
|
226
|
+
if host
|
227
|
+
return
|
228
|
+
else
|
229
|
+
move_vm_queue(pool, vm, 'running', 'completed', "is no longer in inventory, removing from running")
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
def move_vm_queue(pool, vm, queue_from, queue_to, msg = nil)
|
236
|
+
$redis.smove("vmpooler__#{queue_from}__#{pool}", "vmpooler__#{queue_to}__#{pool}", vm)
|
237
|
+
$logger.log('d', "[!] [#{pool}] '#{vm}' #{msg}") if msg
|
238
|
+
end
|
239
|
+
|
240
|
+
# Clone a VM
|
241
|
+
def clone_vm(pool, provider)
|
242
|
+
Thread.new do
|
243
|
+
begin
|
244
|
+
_clone_vm(pool, provider)
|
245
|
+
rescue => err
|
246
|
+
$logger.log('s', "[!] [#{pool['name']}] failed while cloning VM with an error: #{err}")
|
247
|
+
raise
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def _clone_vm(pool, provider)
|
253
|
+
pool_name = pool['name']
|
254
|
+
|
255
|
+
# Generate a randomized hostname
|
256
|
+
o = [('a'..'z'), ('0'..'9')].map(&:to_a).flatten
|
257
|
+
new_vmname = $config[:config]['prefix'] + o[rand(25)] + (0...14).map { o[rand(o.length)] }.join
|
258
|
+
|
259
|
+
# Add VM to Redis inventory ('pending' pool)
|
260
|
+
$redis.sadd('vmpooler__pending__' + pool_name, new_vmname)
|
261
|
+
$redis.hset('vmpooler__vm__' + new_vmname, 'clone', Time.now)
|
262
|
+
$redis.hset('vmpooler__vm__' + new_vmname, 'template', pool_name)
|
263
|
+
|
264
|
+
begin
|
265
|
+
$logger.log('d', "[ ] [#{pool_name}] Starting to clone '#{new_vmname}'")
|
266
|
+
start = Time.now
|
267
|
+
provider.create_vm(pool_name, new_vmname)
|
268
|
+
finish = format('%.2f', Time.now - start)
|
269
|
+
|
270
|
+
$redis.hset('vmpooler__clone__' + Date.today.to_s, pool_name + ':' + new_vmname, finish)
|
271
|
+
$redis.hset('vmpooler__vm__' + new_vmname, 'clone_time', finish)
|
272
|
+
$logger.log('s', "[+] [#{pool_name}] '#{new_vmname}' cloned in #{finish} seconds")
|
273
|
+
|
274
|
+
$metrics.timing("clone.#{pool_name}", finish)
|
275
|
+
rescue => _err
|
276
|
+
$redis.srem("vmpooler__pending__#{pool_name}", new_vmname)
|
277
|
+
expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
|
278
|
+
$redis.expire("vmpooler__vm__#{new_vmname}", expiration_ttl)
|
279
|
+
raise _err
|
280
|
+
ensure
|
281
|
+
$redis.decr('vmpooler__tasks__clone')
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
# Destroy a VM
|
286
|
+
def destroy_vm(vm, pool, provider)
|
287
|
+
Thread.new do
|
288
|
+
begin
|
289
|
+
_destroy_vm(vm, pool, provider)
|
290
|
+
rescue => err
|
291
|
+
$logger.log('d', "[!] [#{pool}] '#{vm}' failed while destroying the VM with an error: #{err}")
|
292
|
+
raise
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
def _destroy_vm(vm, pool, provider)
|
298
|
+
mutex = vm_mutex(vm)
|
299
|
+
return if mutex.locked?
|
300
|
+
mutex.synchronize do
|
301
|
+
$redis.srem('vmpooler__completed__' + pool, vm)
|
302
|
+
$redis.hdel('vmpooler__active__' + pool, vm)
|
303
|
+
$redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
|
304
|
+
|
305
|
+
# Auto-expire metadata key
|
306
|
+
$redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
|
307
|
+
|
308
|
+
start = Time.now
|
309
|
+
|
310
|
+
provider.destroy_vm(pool, vm)
|
311
|
+
|
312
|
+
finish = format('%.2f', Time.now - start)
|
313
|
+
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
|
314
|
+
$metrics.timing("destroy.#{pool}", finish)
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
def create_vm_disk(pool_name, vm, disk_size, provider)
|
319
|
+
Thread.new do
|
320
|
+
begin
|
321
|
+
_create_vm_disk(pool_name, vm, disk_size, provider)
|
322
|
+
rescue => err
|
323
|
+
$logger.log('d', "[!] [#{pool_name}] '#{vm}' failed while creating disk: #{err}")
|
324
|
+
raise
|
325
|
+
end
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
def _create_vm_disk(pool_name, vm_name, disk_size, provider)
|
330
|
+
raise("Invalid disk size of '#{disk_size}' passed") if disk_size.nil? || disk_size.empty? || disk_size.to_i <= 0
|
331
|
+
|
332
|
+
$logger.log('s', "[ ] [disk_manager] '#{vm_name}' is attaching a #{disk_size}gb disk")
|
333
|
+
|
334
|
+
start = Time.now
|
335
|
+
|
336
|
+
result = provider.create_disk(pool_name, vm_name, disk_size.to_i)
|
337
|
+
|
338
|
+
finish = format('%.2f', Time.now - start)
|
339
|
+
|
340
|
+
if result
|
341
|
+
rdisks = $redis.hget('vmpooler__vm__' + vm_name, 'disk')
|
342
|
+
disks = rdisks ? rdisks.split(':') : []
|
343
|
+
disks.push("+#{disk_size}gb")
|
344
|
+
$redis.hset('vmpooler__vm__' + vm_name, 'disk', disks.join(':'))
|
345
|
+
|
346
|
+
$logger.log('s', "[+] [disk_manager] '#{vm_name}' attached #{disk_size}gb disk in #{finish} seconds")
|
347
|
+
else
|
348
|
+
$logger.log('s', "[+] [disk_manager] '#{vm_name}' failed to attach disk")
|
349
|
+
end
|
350
|
+
|
351
|
+
result
|
352
|
+
end
|
353
|
+
|
354
|
+
def create_vm_snapshot(pool_name, vm, snapshot_name, provider)
|
355
|
+
Thread.new do
|
356
|
+
begin
|
357
|
+
_create_vm_snapshot(pool_name, vm, snapshot_name, provider)
|
358
|
+
rescue => err
|
359
|
+
$logger.log('d', "[!] [#{pool_name}] '#{vm}' failed while creating snapshot: #{err}")
|
360
|
+
raise
|
361
|
+
end
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
def _create_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
|
366
|
+
$logger.log('s', "[ ] [snapshot_manager] 'Attempting to snapshot #{vm_name} in pool #{pool_name}")
|
367
|
+
start = Time.now
|
368
|
+
|
369
|
+
result = provider.create_snapshot(pool_name, vm_name, snapshot_name)
|
370
|
+
|
371
|
+
finish = format('%.2f', Time.now - start)
|
372
|
+
|
373
|
+
if result
|
374
|
+
$redis.hset('vmpooler__vm__' + vm_name, 'snapshot:' + snapshot_name, Time.now.to_s)
|
375
|
+
$logger.log('s', "[+] [snapshot_manager] '#{vm_name}' snapshot created in #{finish} seconds")
|
376
|
+
else
|
377
|
+
$logger.log('s', "[+] [snapshot_manager] Failed to snapshot '#{vm_name}'")
|
378
|
+
end
|
379
|
+
|
380
|
+
result
|
381
|
+
end
|
382
|
+
|
383
|
+
def revert_vm_snapshot(pool_name, vm, snapshot_name, provider)
|
384
|
+
Thread.new do
|
385
|
+
begin
|
386
|
+
_revert_vm_snapshot(pool_name, vm, snapshot_name, provider)
|
387
|
+
rescue => err
|
388
|
+
$logger.log('d', "[!] [#{pool_name}] '#{vm}' failed while reverting snapshot: #{err}")
|
389
|
+
raise
|
390
|
+
end
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
def _revert_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
|
395
|
+
$logger.log('s', "[ ] [snapshot_manager] 'Attempting to revert #{vm_name}' in pool #{pool_name} to snapshot '#{snapshot_name}'")
|
396
|
+
start = Time.now
|
397
|
+
|
398
|
+
result = provider.revert_snapshot(pool_name, vm_name, snapshot_name)
|
399
|
+
|
400
|
+
finish = format('%.2f', Time.now - start)
|
401
|
+
|
402
|
+
if result
|
403
|
+
$logger.log('s', "[+] [snapshot_manager] '#{vm_name}' reverted to snapshot '#{snapshot_name}' in #{finish} seconds")
|
404
|
+
else
|
405
|
+
$logger.log('s', "[+] [snapshot_manager] Failed to revert #{vm_name}' in pool #{pool_name} to snapshot '#{snapshot_name}'")
|
406
|
+
end
|
407
|
+
|
408
|
+
result
|
409
|
+
end
|
410
|
+
|
411
|
+
def get_pool_name_for_vm(vm_name)
|
412
|
+
# the 'template' is a bad name. Should really be 'poolname'
|
413
|
+
$redis.hget('vmpooler__vm__' + vm_name, 'template')
|
414
|
+
end
|
415
|
+
|
416
|
+
def get_provider_for_pool(pool_name)
|
417
|
+
provider_name = nil
|
418
|
+
$config[:pools].each do |pool|
|
419
|
+
next unless pool['name'] == pool_name
|
420
|
+
provider_name = pool['provider']
|
421
|
+
end
|
422
|
+
return nil if provider_name.nil?
|
423
|
+
|
424
|
+
$providers[provider_name]
|
425
|
+
end
|
426
|
+
|
427
|
+
def check_disk_queue(maxloop = 0, loop_delay = 5)
|
428
|
+
$logger.log('d', '[*] [disk_manager] starting worker thread')
|
429
|
+
|
430
|
+
$threads['disk_manager'] = Thread.new do
|
431
|
+
loop_count = 1
|
432
|
+
loop do
|
433
|
+
_check_disk_queue
|
434
|
+
sleep(loop_delay)
|
435
|
+
|
436
|
+
unless maxloop.zero?
|
437
|
+
break if loop_count >= maxloop
|
438
|
+
loop_count += 1
|
439
|
+
end
|
440
|
+
end
|
441
|
+
end
|
442
|
+
end
|
443
|
+
|
444
|
+
def _check_disk_queue
|
445
|
+
task_detail = $redis.spop('vmpooler__tasks__disk')
|
446
|
+
unless task_detail.nil?
|
447
|
+
begin
|
448
|
+
vm_name, disk_size = task_detail.split(':')
|
449
|
+
pool_name = get_pool_name_for_vm(vm_name)
|
450
|
+
raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
|
451
|
+
|
452
|
+
provider = get_provider_for_pool(pool_name)
|
453
|
+
raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
|
454
|
+
|
455
|
+
create_vm_disk(pool_name, vm_name, disk_size, provider)
|
456
|
+
rescue => err
|
457
|
+
$logger.log('s', "[!] [disk_manager] disk creation appears to have failed: #{err}")
|
458
|
+
end
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
def check_snapshot_queue(maxloop = 0, loop_delay = 5)
|
463
|
+
$logger.log('d', '[*] [snapshot_manager] starting worker thread')
|
464
|
+
|
465
|
+
$threads['snapshot_manager'] = Thread.new do
|
466
|
+
loop_count = 1
|
467
|
+
loop do
|
468
|
+
_check_snapshot_queue
|
469
|
+
sleep(loop_delay)
|
470
|
+
|
471
|
+
unless maxloop.zero?
|
472
|
+
break if loop_count >= maxloop
|
473
|
+
loop_count += 1
|
474
|
+
end
|
475
|
+
end
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
def _check_snapshot_queue
|
480
|
+
task_detail = $redis.spop('vmpooler__tasks__snapshot')
|
481
|
+
|
482
|
+
unless task_detail.nil?
|
483
|
+
begin
|
484
|
+
vm_name, snapshot_name = task_detail.split(':')
|
485
|
+
pool_name = get_pool_name_for_vm(vm_name)
|
486
|
+
raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
|
487
|
+
|
488
|
+
provider = get_provider_for_pool(pool_name)
|
489
|
+
raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
|
490
|
+
|
491
|
+
create_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
|
492
|
+
rescue => err
|
493
|
+
$logger.log('s', "[!] [snapshot_manager] snapshot create appears to have failed: #{err}")
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
497
|
+
task_detail = $redis.spop('vmpooler__tasks__snapshot-revert')
|
498
|
+
|
499
|
+
unless task_detail.nil?
|
500
|
+
begin
|
501
|
+
vm_name, snapshot_name = task_detail.split(':')
|
502
|
+
pool_name = get_pool_name_for_vm(vm_name)
|
503
|
+
raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
|
504
|
+
|
505
|
+
provider = get_provider_for_pool(pool_name)
|
506
|
+
raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
|
507
|
+
|
508
|
+
revert_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
|
509
|
+
rescue => err
|
510
|
+
$logger.log('s', "[!] [snapshot_manager] snapshot revert appears to have failed: #{err}")
|
511
|
+
end
|
512
|
+
end
|
513
|
+
end
|
514
|
+
|
515
|
+
def migrate_vm(vm_name, pool_name, provider)
|
516
|
+
Thread.new do
|
517
|
+
begin
|
518
|
+
mutex = vm_mutex(vm_name)
|
519
|
+
mutex.synchronize do
|
520
|
+
$redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
|
521
|
+
provider.migrate_vm(pool_name, vm_name)
|
522
|
+
end
|
523
|
+
rescue => err
|
524
|
+
$logger.log('s', "[x] [#{pool_name}] '#{vm_name}' migration failed with an error: #{err}")
|
525
|
+
end
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
# Helper method mainly used for unit testing
|
530
|
+
def time_passed?(_event, time)
|
531
|
+
Time.now > time
|
532
|
+
end
|
533
|
+
|
534
|
+
# Possible wakeup events
|
535
|
+
# :pool_size_change
|
536
|
+
# - Fires when the number of ready VMs changes due to being consumed.
|
537
|
+
# - Additional options
|
538
|
+
# :poolname
|
539
|
+
# :pool_template_change
|
540
|
+
# - Fires when a template configuration update is requested
|
541
|
+
# - Additional options
|
542
|
+
# :poolname
|
543
|
+
#
|
544
|
+
def sleep_with_wakeup_events(loop_delay, wakeup_period = 5, options = {})
|
545
|
+
exit_by = Time.now + loop_delay
|
546
|
+
wakeup_by = Time.now + wakeup_period
|
547
|
+
return if time_passed?(:exit_by, exit_by)
|
548
|
+
|
549
|
+
if options[:pool_size_change]
|
550
|
+
initial_ready_size = $redis.scard("vmpooler__ready__#{options[:poolname]}")
|
551
|
+
end
|
552
|
+
|
553
|
+
if options[:pool_template_change]
|
554
|
+
initial_template = $redis.hget('vmpooler__template__prepared', options[:poolname])
|
555
|
+
end
|
556
|
+
|
557
|
+
loop do
|
558
|
+
sleep(1)
|
559
|
+
break if time_passed?(:exit_by, exit_by)
|
560
|
+
|
561
|
+
# Check for wakeup events
|
562
|
+
if time_passed?(:wakeup_by, wakeup_by)
|
563
|
+
wakeup_by = Time.now + wakeup_period
|
564
|
+
|
565
|
+
# Wakeup if the number of ready VMs has changed
|
566
|
+
if options[:pool_size_change]
|
567
|
+
ready_size = $redis.scard("vmpooler__ready__#{options[:poolname]}")
|
568
|
+
break unless ready_size == initial_ready_size
|
569
|
+
end
|
570
|
+
|
571
|
+
if options[:pool_template_change]
|
572
|
+
configured_template = $redis.hget('vmpooler__config__template', options[:poolname])
|
573
|
+
if configured_template
|
574
|
+
break unless initial_template == configured_template
|
575
|
+
end
|
576
|
+
end
|
577
|
+
|
578
|
+
end
|
579
|
+
|
580
|
+
break if time_passed?(:exit_by, exit_by)
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
def check_pool(pool,
|
585
|
+
maxloop = 0,
|
586
|
+
loop_delay_min = CHECK_LOOP_DELAY_MIN_DEFAULT,
|
587
|
+
loop_delay_max = CHECK_LOOP_DELAY_MAX_DEFAULT,
|
588
|
+
loop_delay_decay = CHECK_LOOP_DELAY_DECAY_DEFAULT)
|
589
|
+
$logger.log('d', "[*] [#{pool['name']}] starting worker thread")
|
590
|
+
|
591
|
+
# Use the pool setings if they exist
|
592
|
+
loop_delay_min = pool['check_loop_delay_min'] unless pool['check_loop_delay_min'].nil?
|
593
|
+
loop_delay_max = pool['check_loop_delay_max'] unless pool['check_loop_delay_max'].nil?
|
594
|
+
loop_delay_decay = pool['check_loop_delay_decay'] unless pool['check_loop_delay_decay'].nil?
|
595
|
+
|
596
|
+
loop_delay_decay = 2.0 if loop_delay_decay <= 1.0
|
597
|
+
loop_delay_max = loop_delay_min if loop_delay_max.nil? || loop_delay_max < loop_delay_min
|
598
|
+
|
599
|
+
$threads[pool['name']] = Thread.new do
|
600
|
+
begin
|
601
|
+
loop_count = 1
|
602
|
+
loop_delay = loop_delay_min
|
603
|
+
provider = get_provider_for_pool(pool['name'])
|
604
|
+
raise("Could not find provider '#{pool['provider']}") if provider.nil?
|
605
|
+
sync_pool_template(pool)
|
606
|
+
loop do
|
607
|
+
result = _check_pool(pool, provider)
|
608
|
+
|
609
|
+
if result[:cloned_vms] > 0 || result[:checked_pending_vms] > 0 || result[:discovered_vms] > 0
|
610
|
+
loop_delay = loop_delay_min
|
611
|
+
else
|
612
|
+
loop_delay = (loop_delay * loop_delay_decay).to_i
|
613
|
+
loop_delay = loop_delay_max if loop_delay > loop_delay_max
|
614
|
+
end
|
615
|
+
sleep_with_wakeup_events(loop_delay, loop_delay_min, pool_size_change: true, poolname: pool['name'], pool_template_change: true)
|
616
|
+
|
617
|
+
unless maxloop.zero?
|
618
|
+
break if loop_count >= maxloop
|
619
|
+
loop_count += 1
|
620
|
+
end
|
621
|
+
end
|
622
|
+
rescue => err
|
623
|
+
$logger.log('s', "[!] [#{pool['name']}] Error while checking the pool: #{err}")
|
624
|
+
raise
|
625
|
+
end
|
626
|
+
end
|
627
|
+
end
|
628
|
+
|
629
|
+
def pool_mutex(poolname)
|
630
|
+
@reconfigure_pool[poolname] || @reconfigure_pool[poolname] = Mutex.new
|
631
|
+
end
|
632
|
+
|
633
|
+
def vm_mutex(vmname)
|
634
|
+
@vm_mutex[vmname] || @vm_mutex[vmname] = Mutex.new
|
635
|
+
end
|
636
|
+
|
637
|
+
def sync_pool_template(pool)
|
638
|
+
pool_template = $redis.hget('vmpooler__config__template', pool['name'])
|
639
|
+
if pool_template
|
640
|
+
unless pool['template'] == pool_template
|
641
|
+
pool['template'] = pool_template
|
642
|
+
end
|
643
|
+
end
|
644
|
+
end
|
645
|
+
|
646
|
+
def prepare_template(pool, provider)
|
647
|
+
provider.create_template_delta_disks(pool) if $config[:config]['create_template_delta_disks']
|
648
|
+
$redis.hset('vmpooler__template__prepared', pool['name'], pool['template'])
|
649
|
+
end
|
650
|
+
|
651
|
+
def evaluate_template(pool, provider)
|
652
|
+
mutex = pool_mutex(pool['name'])
|
653
|
+
prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
|
654
|
+
configured_template = $redis.hget('vmpooler__config__template', pool['name'])
|
655
|
+
return if mutex.locked?
|
656
|
+
if prepared_template.nil?
|
657
|
+
mutex.synchronize do
|
658
|
+
prepare_template(pool, provider)
|
659
|
+
prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
|
660
|
+
end
|
661
|
+
elsif prepared_template != pool['template']
|
662
|
+
if configured_template.nil?
|
663
|
+
mutex.synchronize do
|
664
|
+
prepare_template(pool, provider)
|
665
|
+
prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
|
666
|
+
end
|
667
|
+
end
|
668
|
+
end
|
669
|
+
return if configured_template.nil?
|
670
|
+
return if configured_template == prepared_template
|
671
|
+
mutex.synchronize do
|
672
|
+
update_pool_template(pool, provider, configured_template, prepared_template)
|
673
|
+
end
|
674
|
+
end
|
675
|
+
|
676
|
+
def drain_pool(poolname)
|
677
|
+
# Clear a pool of ready and pending instances
|
678
|
+
if $redis.scard("vmpooler__ready__#{poolname}") > 0
|
679
|
+
$logger.log('s', "[*] [#{poolname}] removing ready instances")
|
680
|
+
$redis.smembers("vmpooler__ready__#{poolname}").each do |vm|
|
681
|
+
move_vm_queue(poolname, vm, 'ready', 'completed')
|
682
|
+
end
|
683
|
+
end
|
684
|
+
if $redis.scard("vmpooler__pending__#{poolname}") > 0
|
685
|
+
$logger.log('s', "[*] [#{poolname}] removing pending instances")
|
686
|
+
$redis.smembers("vmpooler__pending__#{poolname}").each do |vm|
|
687
|
+
move_vm_queue(poolname, vm, 'pending', 'completed')
|
688
|
+
end
|
689
|
+
end
|
690
|
+
end
|
691
|
+
|
692
|
+
def update_pool_template(pool, provider, configured_template, prepared_template)
|
693
|
+
pool['template'] = configured_template
|
694
|
+
$logger.log('s', "[*] [#{pool['name']}] template updated from #{prepared_template} to #{configured_template}")
|
695
|
+
# Remove all ready and pending VMs so new instances are created from the new template
|
696
|
+
drain_pool(pool['name'])
|
697
|
+
# Prepare template for deployment
|
698
|
+
$logger.log('s', "[*] [#{pool['name']}] preparing pool template for deployment")
|
699
|
+
prepare_template(pool, provider)
|
700
|
+
$logger.log('s', "[*] [#{pool['name']}] is ready for use")
|
701
|
+
end
|
702
|
+
|
703
|
+
def remove_excess_vms(pool, provider, ready, total)
|
704
|
+
return if total.nil?
|
705
|
+
return if total == 0
|
706
|
+
mutex = pool_mutex(pool['name'])
|
707
|
+
return if mutex.locked?
|
708
|
+
return unless ready > pool['size']
|
709
|
+
mutex.synchronize do
|
710
|
+
difference = ready - pool['size']
|
711
|
+
difference.times do
|
712
|
+
next_vm = $redis.spop("vmpooler__ready__#{pool['name']}")
|
713
|
+
move_vm_queue(pool['name'], next_vm, 'ready', 'completed')
|
714
|
+
end
|
715
|
+
if total > ready
|
716
|
+
$redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
|
717
|
+
move_vm_queue(pool['name'], vm, 'pending', 'completed')
|
718
|
+
end
|
719
|
+
end
|
720
|
+
end
|
721
|
+
end
|
722
|
+
|
723
|
+
def update_pool_size(pool)
|
724
|
+
mutex = pool_mutex(pool['name'])
|
725
|
+
return if mutex.locked?
|
726
|
+
poolsize = $redis.hget('vmpooler__config__poolsize', pool['name'])
|
727
|
+
return if poolsize.nil?
|
728
|
+
poolsize = Integer(poolsize)
|
729
|
+
return if poolsize == pool['size']
|
730
|
+
mutex.synchronize do
|
731
|
+
pool['size'] = poolsize
|
732
|
+
end
|
733
|
+
end
|
734
|
+
|
735
|
+
def _check_pool(pool, provider)
|
736
|
+
pool_check_response = {
|
737
|
+
discovered_vms: 0,
|
738
|
+
checked_running_vms: 0,
|
739
|
+
checked_ready_vms: 0,
|
740
|
+
checked_pending_vms: 0,
|
741
|
+
destroyed_vms: 0,
|
742
|
+
migrated_vms: 0,
|
743
|
+
cloned_vms: 0
|
744
|
+
}
|
745
|
+
# INVENTORY
|
746
|
+
inventory = {}
|
747
|
+
begin
|
748
|
+
mutex = pool_mutex(pool['name'])
|
749
|
+
mutex.synchronize do
|
750
|
+
provider.vms_in_pool(pool['name']).each do |vm|
|
751
|
+
if !$redis.sismember('vmpooler__running__' + pool['name'], vm['name']) &&
|
752
|
+
!$redis.sismember('vmpooler__ready__' + pool['name'], vm['name']) &&
|
753
|
+
!$redis.sismember('vmpooler__pending__' + pool['name'], vm['name']) &&
|
754
|
+
!$redis.sismember('vmpooler__completed__' + pool['name'], vm['name']) &&
|
755
|
+
!$redis.sismember('vmpooler__discovered__' + pool['name'], vm['name']) &&
|
756
|
+
!$redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])
|
757
|
+
|
758
|
+
pool_check_response[:discovered_vms] += 1
|
759
|
+
$redis.sadd('vmpooler__discovered__' + pool['name'], vm['name'])
|
760
|
+
|
761
|
+
$logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue")
|
762
|
+
end
|
763
|
+
|
764
|
+
inventory[vm['name']] = 1
|
765
|
+
end
|
766
|
+
end
|
767
|
+
rescue => err
|
768
|
+
$logger.log('s', "[!] [#{pool['name']}] _check_pool failed with an error while inspecting inventory: #{err}")
|
769
|
+
return pool_check_response
|
770
|
+
end
|
771
|
+
|
772
|
+
# RUNNING
|
773
|
+
$redis.smembers("vmpooler__running__#{pool['name']}").each do |vm|
|
774
|
+
if inventory[vm]
|
775
|
+
begin
|
776
|
+
vm_lifetime = $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12
|
777
|
+
pool_check_response[:checked_running_vms] += 1
|
778
|
+
check_running_vm(vm, pool['name'], vm_lifetime, provider)
|
779
|
+
rescue => err
|
780
|
+
$logger.log('d', "[!] [#{pool['name']}] _check_pool with an error while evaluating running VMs: #{err}")
|
781
|
+
end
|
782
|
+
else
|
783
|
+
move_vm_queue(pool['name'], vm, 'running', 'completed', 'is a running VM but is missing from inventory. Marking as completed.')
|
784
|
+
end
|
785
|
+
end
|
786
|
+
|
787
|
+
# READY
|
788
|
+
$redis.smembers("vmpooler__ready__#{pool['name']}").each do |vm|
|
789
|
+
if inventory[vm]
|
790
|
+
begin
|
791
|
+
pool_check_response[:checked_ready_vms] += 1
|
792
|
+
check_ready_vm(vm, pool, pool['ready_ttl'] || 0, provider)
|
793
|
+
rescue => err
|
794
|
+
$logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating ready VMs: #{err}")
|
795
|
+
end
|
796
|
+
else
|
797
|
+
move_vm_queue(pool['name'], vm, 'ready', 'completed', 'is a ready VM but is missing from inventory. Marking as completed.')
|
798
|
+
end
|
799
|
+
end
|
800
|
+
|
801
|
+
# PENDING
|
802
|
+
$redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
|
803
|
+
pool_timeout = pool['timeout'] || $config[:config]['timeout'] || 15
|
804
|
+
if inventory[vm]
|
805
|
+
begin
|
806
|
+
pool_check_response[:checked_pending_vms] += 1
|
807
|
+
check_pending_vm(vm, pool['name'], pool_timeout, provider)
|
808
|
+
rescue => err
|
809
|
+
$logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating pending VMs: #{err}")
|
810
|
+
end
|
811
|
+
else
|
812
|
+
fail_pending_vm(vm, pool['name'], pool_timeout, false)
|
813
|
+
end
|
814
|
+
end
|
815
|
+
|
816
|
+
# COMPLETED
|
817
|
+
$redis.smembers("vmpooler__completed__#{pool['name']}").each do |vm|
|
818
|
+
if inventory[vm]
|
819
|
+
begin
|
820
|
+
pool_check_response[:destroyed_vms] += 1
|
821
|
+
destroy_vm(vm, pool['name'], provider)
|
822
|
+
rescue => err
|
823
|
+
$redis.srem("vmpooler__completed__#{pool['name']}", vm)
|
824
|
+
$redis.hdel("vmpooler__active__#{pool['name']}", vm)
|
825
|
+
$redis.del("vmpooler__vm__#{vm}")
|
826
|
+
$logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating completed VMs: #{err}")
|
827
|
+
end
|
828
|
+
else
|
829
|
+
$logger.log('s', "[!] [#{pool['name']}] '#{vm}' not found in inventory, removed from 'completed' queue")
|
830
|
+
$redis.srem("vmpooler__completed__#{pool['name']}", vm)
|
831
|
+
$redis.hdel("vmpooler__active__#{pool['name']}", vm)
|
832
|
+
$redis.del("vmpooler__vm__#{vm}")
|
833
|
+
end
|
834
|
+
end
|
835
|
+
|
836
|
+
# DISCOVERED
|
837
|
+
begin
|
838
|
+
$redis.smembers("vmpooler__discovered__#{pool['name']}").each do |vm|
|
839
|
+
%w[pending ready running completed].each do |queue|
|
840
|
+
if $redis.sismember("vmpooler__#{queue}__#{pool['name']}", vm)
|
841
|
+
$logger.log('d', "[!] [#{pool['name']}] '#{vm}' found in '#{queue}', removed from 'discovered' queue")
|
842
|
+
$redis.srem("vmpooler__discovered__#{pool['name']}", vm)
|
843
|
+
end
|
844
|
+
end
|
845
|
+
|
846
|
+
if $redis.sismember("vmpooler__discovered__#{pool['name']}", vm)
|
847
|
+
$redis.smove("vmpooler__discovered__#{pool['name']}", "vmpooler__completed__#{pool['name']}", vm)
|
848
|
+
end
|
849
|
+
end
|
850
|
+
rescue => err
|
851
|
+
$logger.log('d', "[!] [#{pool['name']}] _check_pool failed with an error while evaluating discovered VMs: #{err}")
|
852
|
+
end
|
853
|
+
|
854
|
+
# MIGRATIONS
|
855
|
+
$redis.smembers("vmpooler__migrating__#{pool['name']}").each do |vm|
|
856
|
+
if inventory[vm]
|
857
|
+
begin
|
858
|
+
pool_check_response[:migrated_vms] += 1
|
859
|
+
migrate_vm(vm, pool['name'], provider)
|
860
|
+
rescue => err
|
861
|
+
$logger.log('s', "[x] [#{pool['name']}] '#{vm}' failed to migrate: #{err}")
|
862
|
+
end
|
863
|
+
end
|
864
|
+
end
|
865
|
+
|
866
|
+
# UPDATE TEMPLATE
|
867
|
+
# Evaluates a pool template to ensure templates are prepared adequately for the configured provider
|
868
|
+
# If a pool template configuration change is detected then template preparation is repeated for the new template
|
869
|
+
# Additionally, a pool will drain ready and pending instances
|
870
|
+
evaluate_template(pool, provider)
|
871
|
+
|
872
|
+
# REPOPULATE
|
873
|
+
# Do not attempt to repopulate a pool while a template is updating
|
874
|
+
unless pool_mutex(pool['name']).locked?
|
875
|
+
ready = $redis.scard("vmpooler__ready__#{pool['name']}")
|
876
|
+
total = $redis.scard("vmpooler__pending__#{pool['name']}") + ready
|
877
|
+
|
878
|
+
$metrics.gauge("ready.#{pool['name']}", $redis.scard("vmpooler__ready__#{pool['name']}"))
|
879
|
+
$metrics.gauge("running.#{pool['name']}", $redis.scard("vmpooler__running__#{pool['name']}"))
|
880
|
+
|
881
|
+
if $redis.get("vmpooler__empty__#{pool['name']}")
|
882
|
+
$redis.del("vmpooler__empty__#{pool['name']}") unless ready.zero?
|
883
|
+
elsif ready.zero?
|
884
|
+
$redis.set("vmpooler__empty__#{pool['name']}", 'true')
|
885
|
+
$logger.log('s', "[!] [#{pool['name']}] is empty")
|
886
|
+
end
|
887
|
+
|
888
|
+
# Check to see if a pool size change has been made via the configuration API
|
889
|
+
# Since check_pool runs in a loop it does not
|
890
|
+
# otherwise identify this change when running
|
891
|
+
update_pool_size(pool)
|
892
|
+
|
893
|
+
if total < pool['size']
|
894
|
+
(1..(pool['size'] - total)).each do |_i|
|
895
|
+
if $redis.get('vmpooler__tasks__clone').to_i < $config[:config]['task_limit'].to_i
|
896
|
+
begin
|
897
|
+
$redis.incr('vmpooler__tasks__clone')
|
898
|
+
pool_check_response[:cloned_vms] += 1
|
899
|
+
clone_vm(pool, provider)
|
900
|
+
rescue => err
|
901
|
+
$logger.log('s', "[!] [#{pool['name']}] clone failed during check_pool with an error: #{err}")
|
902
|
+
$redis.decr('vmpooler__tasks__clone')
|
903
|
+
raise
|
904
|
+
end
|
905
|
+
end
|
906
|
+
end
|
907
|
+
end
|
908
|
+
end
|
909
|
+
|
910
|
+
# Remove VMs in excess of the configured pool size
|
911
|
+
remove_excess_vms(pool, provider, ready, total)
|
912
|
+
|
913
|
+
pool_check_response
|
914
|
+
end
|
915
|
+
|
916
|
+
# Create a provider object, usually based on the providers/*.rb class, that implements providers/base.rb
|
917
|
+
# provider_class: Needs to match a class in the Vmpooler::PoolManager::Provider namespace. This is
|
918
|
+
# either as a gem in the LOADPATH or in providers/*.rb ie Vmpooler::PoolManager::Provider::X
|
919
|
+
# provider_name: Should be a unique provider name
|
920
|
+
#
|
921
|
+
# returns an object Vmpooler::PoolManager::Provider::*
|
922
|
+
# or raises an error if the class does not exist
|
923
|
+
def create_provider_object(config, logger, metrics, provider_class, provider_name, options)
|
924
|
+
provider_klass = Vmpooler::PoolManager::Provider
|
925
|
+
provider_klass.constants.each do |classname|
|
926
|
+
next unless classname.to_s.casecmp(provider_class) == 0
|
927
|
+
return provider_klass.const_get(classname).new(config, logger, metrics, provider_name, options)
|
928
|
+
end
|
929
|
+
raise("Provider '#{provider_class}' is unknown for pool with provider name '#{provider_name}'") if provider.nil?
|
930
|
+
end
|
931
|
+
|
932
|
+
def execute!(maxloop = 0, loop_delay = 1)
|
933
|
+
$logger.log('d', 'starting vmpooler')
|
934
|
+
|
935
|
+
# Clear out the tasks manager, as we don't know about any tasks at this point
|
936
|
+
$redis.set('vmpooler__tasks__clone', 0)
|
937
|
+
# Clear out vmpooler__migrations since stale entries may be left after a restart
|
938
|
+
$redis.del('vmpooler__migration')
|
939
|
+
|
940
|
+
# Copy vSphere settings to correct location. This happens with older configuration files
|
941
|
+
if !$config[:vsphere].nil? && ($config[:providers].nil? || $config[:providers][:vsphere].nil?)
|
942
|
+
$logger.log('d', "[!] Detected an older configuration file. Copying the settings from ':vsphere:' to ':providers:/:vsphere:'")
|
943
|
+
$config[:providers] = {} if $config[:providers].nil?
|
944
|
+
$config[:providers][:vsphere] = $config[:vsphere]
|
945
|
+
end
|
946
|
+
|
947
|
+
# Set default provider for all pools that do not have one defined
|
948
|
+
$config[:pools].each do |pool|
|
949
|
+
if pool['provider'].nil?
|
950
|
+
$logger.log('d', "[!] Setting provider for pool '#{pool['name']}' to 'vsphere' as default")
|
951
|
+
pool['provider'] = 'vsphere'
|
952
|
+
end
|
953
|
+
end
|
954
|
+
|
955
|
+
# Load running pool configuration into redis so API server can retrieve it
|
956
|
+
load_pools_to_redis
|
957
|
+
|
958
|
+
# Get pool loop settings
|
959
|
+
$config[:config] = {} if $config[:config].nil?
|
960
|
+
check_loop_delay_min = $config[:config]['check_loop_delay_min'] || CHECK_LOOP_DELAY_MIN_DEFAULT
|
961
|
+
check_loop_delay_max = $config[:config]['check_loop_delay_max'] || CHECK_LOOP_DELAY_MAX_DEFAULT
|
962
|
+
check_loop_delay_decay = $config[:config]['check_loop_delay_decay'] || CHECK_LOOP_DELAY_DECAY_DEFAULT
|
963
|
+
|
964
|
+
# Create the providers
|
965
|
+
$config[:pools].each do |pool|
|
966
|
+
provider_name = pool['provider']
|
967
|
+
# The provider_class parameter can be defined in the provider's data eg
|
968
|
+
#:providers:
|
969
|
+
# :vsphere:
|
970
|
+
# provider_class: 'vsphere'
|
971
|
+
# :another-vsphere:
|
972
|
+
# provider_class: 'vsphere'
|
973
|
+
# the above would create two providers/vsphere.rb class objects named 'vsphere' and 'another-vsphere'
|
974
|
+
# each pools would then define which provider definition to use: vsphere or another-vsphere
|
975
|
+
#
|
976
|
+
# if provider_class is not defined it will try to use the provider_name as the class, this is to be
|
977
|
+
# backwards compatible for example when there is only one provider listed
|
978
|
+
# :providers:
|
979
|
+
# :dummy:
|
980
|
+
# filename: 'db.txs'
|
981
|
+
# the above example would create an object based on the class providers/dummy.rb
|
982
|
+
if $config[:providers].nil? || $config[:providers][provider_name.to_sym].nil? || $config[:providers][provider_name.to_sym]['provider_class'].nil?
|
983
|
+
provider_class = provider_name
|
984
|
+
else
|
985
|
+
provider_class = $config[:providers][provider_name.to_sym]['provider_class']
|
986
|
+
end
|
987
|
+
begin
|
988
|
+
$providers[provider_name] = create_provider_object($config, $logger, $metrics, provider_class, provider_name, {}) if $providers[provider_name].nil?
|
989
|
+
rescue => err
|
990
|
+
$logger.log('s', "Error while creating provider for pool #{pool['name']}: #{err}")
|
991
|
+
raise
|
992
|
+
end
|
993
|
+
end
|
994
|
+
|
995
|
+
loop_count = 1
|
996
|
+
loop do
|
997
|
+
if !$threads['disk_manager']
|
998
|
+
check_disk_queue
|
999
|
+
elsif !$threads['disk_manager'].alive?
|
1000
|
+
$logger.log('d', '[!] [disk_manager] worker thread died, restarting')
|
1001
|
+
check_disk_queue
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
if !$threads['snapshot_manager']
|
1005
|
+
check_snapshot_queue
|
1006
|
+
elsif !$threads['snapshot_manager'].alive?
|
1007
|
+
$logger.log('d', '[!] [snapshot_manager] worker thread died, restarting')
|
1008
|
+
check_snapshot_queue
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
$config[:pools].each do |pool|
|
1012
|
+
if !$threads[pool['name']]
|
1013
|
+
check_pool(pool)
|
1014
|
+
elsif !$threads[pool['name']].alive?
|
1015
|
+
$logger.log('d', "[!] [#{pool['name']}] worker thread died, restarting")
|
1016
|
+
check_pool(pool, check_loop_delay_min, check_loop_delay_max, check_loop_delay_decay)
|
1017
|
+
end
|
1018
|
+
end
|
1019
|
+
|
1020
|
+
sleep(loop_delay)
|
1021
|
+
|
1022
|
+
unless maxloop.zero?
|
1023
|
+
break if loop_count >= maxloop
|
1024
|
+
loop_count += 1
|
1025
|
+
end
|
1026
|
+
end
|
1027
|
+
end
|
1028
|
+
end
|
1029
|
+
end
|