vmpooler 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/vmpooler +18 -11
- data/lib/vmpooler.rb +26 -16
- data/lib/vmpooler/api.rb +41 -34
- data/lib/vmpooler/api/helpers.rb +2 -2
- data/lib/vmpooler/api/request_logger.rb +20 -0
- data/lib/vmpooler/api/v1.rb +302 -21
- data/lib/vmpooler/generic_connection_pool.rb +12 -28
- data/lib/vmpooler/metrics.rb +24 -0
- data/lib/vmpooler/metrics/dummy_statsd.rb +24 -0
- data/lib/vmpooler/metrics/graphite.rb +47 -0
- data/lib/vmpooler/metrics/promstats.rb +380 -0
- data/lib/vmpooler/metrics/promstats/collector_middleware.rb +121 -0
- data/lib/vmpooler/metrics/statsd.rb +40 -0
- data/lib/vmpooler/pool_manager.rb +763 -409
- data/lib/vmpooler/providers/base.rb +2 -1
- data/lib/vmpooler/providers/dummy.rb +4 -3
- data/lib/vmpooler/providers/vsphere.rb +137 -54
- data/lib/vmpooler/util/parsing.rb +16 -0
- data/lib/vmpooler/version.rb +1 -1
- metadata +39 -6
- data/lib/vmpooler/dummy_statsd.rb +0 -22
- data/lib/vmpooler/graphite.rb +0 -42
- data/lib/vmpooler/statsd.rb +0 -37
@@ -0,0 +1,121 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This is an adapted Collector module for vmpooler based on the sample implementation
|
4
|
+
# available in the prometheus client_ruby library
|
5
|
+
# https://github.com/prometheus/client_ruby/blob/master/lib/prometheus/middleware/collector.rb
|
6
|
+
#
|
7
|
+
# The code was also failing Rubocop on PR check, so have addressed all the offenses.
|
8
|
+
#
|
9
|
+
# The method strip_hostnames_from_path (originally strip_ids_from_path) has been adapted
|
10
|
+
# to add a match for hostnames in paths # to replace with a single ":hostname" string to
|
11
|
+
# avoid # proliferation of stat lines for # each new vm hostname deleted, modified or
|
12
|
+
# otherwise queried.
|
13
|
+
|
14
|
+
require 'benchmark'
|
15
|
+
require 'prometheus/client'
|
16
|
+
require 'vmpooler/logger'
|
17
|
+
|
18
|
+
module Vmpooler
|
19
|
+
class Metrics
|
20
|
+
class Promstats
|
21
|
+
# CollectorMiddleware is an implementation of Rack Middleware customised
|
22
|
+
# for vmpooler use.
|
23
|
+
#
|
24
|
+
# By default metrics are registered on the global registry. Set the
|
25
|
+
# `:registry` option to use a custom registry.
|
26
|
+
#
|
27
|
+
# By default metrics all have the prefix "http_server". Set to something
|
28
|
+
# else if you like.
|
29
|
+
#
|
30
|
+
# The request counter metric is broken down by code, method and path by
|
31
|
+
# default. Set the `:counter_label_builder` option to use a custom label
|
32
|
+
# builder.
|
33
|
+
#
|
34
|
+
# The request duration metric is broken down by method and path by default.
|
35
|
+
# Set the `:duration_label_builder` option to use a custom label builder.
|
36
|
+
#
|
37
|
+
# Label Builder functions will receive a Rack env and a status code, and must
|
38
|
+
# return a hash with the labels for that request. They must also accept an empty
|
39
|
+
# env, and return a hash with the correct keys. This is necessary to initialize
|
40
|
+
# the metrics with the correct set of labels.
|
41
|
+
class CollectorMiddleware
|
42
|
+
attr_reader :app, :registry
|
43
|
+
|
44
|
+
def initialize(app, options = {})
|
45
|
+
@app = app
|
46
|
+
@registry = options[:registry] || Prometheus::Client.registry
|
47
|
+
@metrics_prefix = options[:metrics_prefix] || 'http_server'
|
48
|
+
|
49
|
+
init_request_metrics
|
50
|
+
init_exception_metrics
|
51
|
+
end
|
52
|
+
|
53
|
+
def call(env) # :nodoc:
|
54
|
+
trace(env) { @app.call(env) }
|
55
|
+
end
|
56
|
+
|
57
|
+
protected
|
58
|
+
|
59
|
+
def init_request_metrics
|
60
|
+
@requests = @registry.counter(
|
61
|
+
:"#{@metrics_prefix}_requests_total",
|
62
|
+
docstring:
|
63
|
+
'The total number of HTTP requests handled by the Rack application.',
|
64
|
+
labels: %i[code method path]
|
65
|
+
)
|
66
|
+
@durations = @registry.histogram(
|
67
|
+
:"#{@metrics_prefix}_request_duration_seconds",
|
68
|
+
docstring: 'The HTTP response duration of the Rack application.',
|
69
|
+
labels: %i[method path]
|
70
|
+
)
|
71
|
+
end
|
72
|
+
|
73
|
+
def init_exception_metrics
|
74
|
+
@exceptions = @registry.counter(
|
75
|
+
:"#{@metrics_prefix}_exceptions_total",
|
76
|
+
docstring: 'The total number of exceptions raised by the Rack application.',
|
77
|
+
labels: [:exception]
|
78
|
+
)
|
79
|
+
end
|
80
|
+
|
81
|
+
def trace(env)
|
82
|
+
response = nil
|
83
|
+
duration = Benchmark.realtime { response = yield }
|
84
|
+
record(env, response.first.to_s, duration)
|
85
|
+
response
|
86
|
+
rescue StandardError => e
|
87
|
+
@exceptions.increment(labels: { exception: e.class.name })
|
88
|
+
raise
|
89
|
+
end
|
90
|
+
|
91
|
+
def record(env, code, duration)
|
92
|
+
counter_labels = {
|
93
|
+
code: code,
|
94
|
+
method: env['REQUEST_METHOD'].downcase,
|
95
|
+
path: strip_hostnames_from_path(env['PATH_INFO'])
|
96
|
+
}
|
97
|
+
|
98
|
+
duration_labels = {
|
99
|
+
method: env['REQUEST_METHOD'].downcase,
|
100
|
+
path: strip_hostnames_from_path(env['PATH_INFO'])
|
101
|
+
}
|
102
|
+
|
103
|
+
@requests.increment(labels: counter_labels)
|
104
|
+
@durations.observe(duration, labels: duration_labels)
|
105
|
+
rescue # rubocop:disable Style/RescueStandardError
|
106
|
+
nil
|
107
|
+
end
|
108
|
+
|
109
|
+
def strip_hostnames_from_path(path)
|
110
|
+
# Custom for /vm path - so we just collect aggrate stats for all usage along this one
|
111
|
+
# path. Custom counters are then added more specific endpoints in v1.rb
|
112
|
+
# Since we aren't parsing UID/GIDs as in the original example, these are removed.
|
113
|
+
# Similarly, request IDs are also stripped from the /ondemand path.
|
114
|
+
path
|
115
|
+
.gsub(%r{/vm/.+$}, '/vm')
|
116
|
+
.gsub(%r{/ondemand/.+$}, '/ondemand')
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rubygems' unless defined?(Gem)
|
4
|
+
require 'statsd'
|
5
|
+
|
6
|
+
module Vmpooler
|
7
|
+
class Metrics
|
8
|
+
class Statsd < Metrics
|
9
|
+
attr_reader :server, :port, :prefix
|
10
|
+
|
11
|
+
def initialize(logger, params = {})
|
12
|
+
raise ArgumentError, "Statsd server is required. Config: #{params.inspect}" if params['server'].nil? || params['server'].empty?
|
13
|
+
|
14
|
+
host = params['server']
|
15
|
+
@port = params['port'] || 8125
|
16
|
+
@prefix = params['prefix'] || 'vmpooler'
|
17
|
+
@server = ::Statsd.new(host, @port)
|
18
|
+
@logger = logger
|
19
|
+
end
|
20
|
+
|
21
|
+
def increment(label)
|
22
|
+
server.increment(prefix + '.' + label)
|
23
|
+
rescue StandardError => e
|
24
|
+
@logger.log('s', "[!] Failure incrementing #{prefix}.#{label} on statsd server [#{server}:#{port}]: #{e}")
|
25
|
+
end
|
26
|
+
|
27
|
+
def gauge(label, value)
|
28
|
+
server.gauge(prefix + '.' + label, value)
|
29
|
+
rescue StandardError => e
|
30
|
+
@logger.log('s', "[!] Failure updating gauge #{prefix}.#{label} on statsd server [#{server}:#{port}]: #{e}")
|
31
|
+
end
|
32
|
+
|
33
|
+
def timing(label, duration)
|
34
|
+
server.timing(prefix + '.' + label, duration)
|
35
|
+
rescue StandardError => e
|
36
|
+
@logger.log('s', "[!] Failure updating timing #{prefix}.#{label} on statsd server [#{server}:#{port}]: #{e}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -1,7 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'vmpooler/providers'
|
4
|
+
require 'vmpooler/util/parsing'
|
4
5
|
require 'spicy-proton'
|
6
|
+
require 'resolv' # ruby standard lib
|
5
7
|
|
6
8
|
module Vmpooler
|
7
9
|
class PoolManager
|
@@ -9,7 +11,7 @@ module Vmpooler
|
|
9
11
|
CHECK_LOOP_DELAY_MAX_DEFAULT = 60
|
10
12
|
CHECK_LOOP_DELAY_DECAY_DEFAULT = 2.0
|
11
13
|
|
12
|
-
def initialize(config, logger,
|
14
|
+
def initialize(config, logger, redis_connection_pool, metrics)
|
13
15
|
$config = config
|
14
16
|
|
15
17
|
# Load logger library
|
@@ -18,19 +20,19 @@ module Vmpooler
|
|
18
20
|
# metrics logging handle
|
19
21
|
$metrics = metrics
|
20
22
|
|
21
|
-
#
|
22
|
-
|
23
|
+
# Redis connection pool
|
24
|
+
@redis = redis_connection_pool
|
23
25
|
|
24
26
|
# VM Provider objects
|
25
|
-
$providers =
|
27
|
+
$providers = Concurrent::Hash.new
|
26
28
|
|
27
29
|
# Our thread-tracker object
|
28
|
-
$threads =
|
30
|
+
$threads = Concurrent::Hash.new
|
29
31
|
|
30
32
|
# Pool mutex
|
31
|
-
@reconfigure_pool =
|
33
|
+
@reconfigure_pool = Concurrent::Hash.new
|
32
34
|
|
33
|
-
@vm_mutex =
|
35
|
+
@vm_mutex = Concurrent::Hash.new
|
34
36
|
|
35
37
|
# Name generator for generating host names
|
36
38
|
@name_generator = Spicy::Proton.new
|
@@ -45,24 +47,26 @@ module Vmpooler
|
|
45
47
|
|
46
48
|
# Place pool configuration in redis so an API instance can discover running pool configuration
|
47
49
|
def load_pools_to_redis
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
50
|
+
@redis.with_metrics do |redis|
|
51
|
+
previously_configured_pools = redis.smembers('vmpooler__pools')
|
52
|
+
currently_configured_pools = []
|
53
|
+
config[:pools].each do |pool|
|
54
|
+
currently_configured_pools << pool['name']
|
55
|
+
redis.sadd('vmpooler__pools', pool['name'])
|
56
|
+
pool_keys = pool.keys
|
57
|
+
pool_keys.delete('alias')
|
58
|
+
to_set = {}
|
59
|
+
pool_keys.each do |k|
|
60
|
+
to_set[k] = pool[k]
|
61
|
+
end
|
62
|
+
to_set['alias'] = pool['alias'].join(',') if to_set.key?('alias')
|
63
|
+
redis.hmset("vmpooler__pool__#{pool['name']}", to_set.to_a.flatten) unless to_set.empty?
|
58
64
|
end
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
$redis.srem('vmpooler__pools', pool)
|
65
|
-
$redis.del("vmpooler__pool__#{pool}")
|
65
|
+
previously_configured_pools.each do |pool|
|
66
|
+
unless currently_configured_pools.include? pool
|
67
|
+
redis.srem('vmpooler__pools', pool)
|
68
|
+
redis.del("vmpooler__pool__#{pool}")
|
69
|
+
end
|
66
70
|
end
|
67
71
|
end
|
68
72
|
nil
|
@@ -75,7 +79,9 @@ module Vmpooler
|
|
75
79
|
_check_pending_vm(vm, pool, timeout, provider)
|
76
80
|
rescue StandardError => e
|
77
81
|
$logger.log('s', "[!] [#{pool}] '#{vm}' #{timeout} #{provider} errored while checking a pending vm : #{e}")
|
78
|
-
|
82
|
+
@redis.with_metrics do |redis|
|
83
|
+
fail_pending_vm(vm, pool, timeout, redis)
|
84
|
+
end
|
79
85
|
raise
|
80
86
|
end
|
81
87
|
end
|
@@ -86,31 +92,38 @@ module Vmpooler
|
|
86
92
|
return if mutex.locked?
|
87
93
|
|
88
94
|
mutex.synchronize do
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
95
|
+
@redis.with_metrics do |redis|
|
96
|
+
request_id = redis.hget("vmpooler__vm__#{vm}", 'request_id')
|
97
|
+
if provider.vm_ready?(pool, vm)
|
98
|
+
move_pending_vm_to_ready(vm, pool, redis, request_id)
|
99
|
+
else
|
100
|
+
fail_pending_vm(vm, pool, timeout, redis)
|
101
|
+
end
|
93
102
|
end
|
94
103
|
end
|
95
104
|
end
|
96
105
|
|
97
|
-
def remove_nonexistent_vm(vm, pool)
|
98
|
-
|
106
|
+
def remove_nonexistent_vm(vm, pool, redis)
|
107
|
+
redis.srem("vmpooler__pending__#{pool}", vm)
|
99
108
|
$logger.log('d', "[!] [#{pool}] '#{vm}' no longer exists. Removing from pending.")
|
100
109
|
end
|
101
110
|
|
102
|
-
def fail_pending_vm(vm, pool, timeout, exists = true)
|
103
|
-
clone_stamp =
|
104
|
-
return true unless clone_stamp
|
111
|
+
def fail_pending_vm(vm, pool, timeout, redis, exists = true)
|
112
|
+
clone_stamp = redis.hget("vmpooler__vm__#{vm}", 'clone')
|
105
113
|
|
106
114
|
time_since_clone = (Time.now - Time.parse(clone_stamp)) / 60
|
107
115
|
if time_since_clone > timeout
|
108
116
|
if exists
|
109
|
-
|
117
|
+
request_id = redis.hget("vmpooler__vm__#{vm}", 'request_id')
|
118
|
+
pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias') if request_id
|
119
|
+
redis.multi
|
120
|
+
redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm)
|
121
|
+
redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool}:1:#{request_id}") if request_id
|
122
|
+
redis.exec
|
110
123
|
$metrics.increment("errors.markedasfailed.#{pool}")
|
111
124
|
$logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes")
|
112
125
|
else
|
113
|
-
remove_nonexistent_vm(vm, pool)
|
126
|
+
remove_nonexistent_vm(vm, pool, redis)
|
114
127
|
end
|
115
128
|
end
|
116
129
|
true
|
@@ -119,28 +132,54 @@ module Vmpooler
|
|
119
132
|
false
|
120
133
|
end
|
121
134
|
|
122
|
-
def move_pending_vm_to_ready(vm, pool)
|
123
|
-
clone_time =
|
124
|
-
finish = format('%<time>.2f', time: Time.now - Time.parse(clone_time))
|
135
|
+
def move_pending_vm_to_ready(vm, pool, redis, request_id = nil)
|
136
|
+
clone_time = redis.hget('vmpooler__vm__' + vm, 'clone')
|
137
|
+
finish = format('%<time>.2f', time: Time.now - Time.parse(clone_time))
|
125
138
|
|
126
|
-
|
127
|
-
|
128
|
-
|
139
|
+
if request_id
|
140
|
+
ondemandrequest_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
|
141
|
+
if ondemandrequest_hash['status'] == 'failed'
|
142
|
+
move_vm_queue(pool, vm, 'pending', 'completed', redis, "moved to completed queue. '#{request_id}' could not be filled in time")
|
143
|
+
return nil
|
144
|
+
elsif ondemandrequest_hash['status'] == 'deleted'
|
145
|
+
move_vm_queue(pool, vm, 'pending', 'completed', redis, "moved to completed queue. '#{request_id}' has been deleted")
|
146
|
+
return nil
|
147
|
+
end
|
148
|
+
pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias')
|
149
|
+
|
150
|
+
redis.pipelined do
|
151
|
+
redis.hset("vmpooler__active__#{pool}", vm, Time.now)
|
152
|
+
redis.hset("vmpooler__vm__#{vm}", 'checkout', Time.now)
|
153
|
+
redis.hset("vmpooler__vm__#{vm}", 'token:token', ondemandrequest_hash['token:token']) if ondemandrequest_hash['token:token']
|
154
|
+
redis.hset("vmpooler__vm__#{vm}", 'token:user', ondemandrequest_hash['token:user']) if ondemandrequest_hash['token:user']
|
155
|
+
redis.sadd("vmpooler__#{request_id}__#{pool_alias}__#{pool}", vm)
|
156
|
+
end
|
157
|
+
move_vm_queue(pool, vm, 'pending', 'running', redis)
|
158
|
+
check_ondemand_request_ready(request_id, redis)
|
159
|
+
else
|
160
|
+
redis.smove('vmpooler__pending__' + pool, 'vmpooler__ready__' + pool, vm)
|
161
|
+
end
|
129
162
|
|
130
|
-
|
131
|
-
|
163
|
+
redis.pipelined do
|
164
|
+
redis.hset('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm, finish) # maybe remove as this is never used by vmpooler itself?
|
165
|
+
redis.hset("vmpooler__vm__#{vm}", 'ready', Time.now)
|
166
|
+
|
167
|
+
# last boot time is displayed in API, and used by alarming script
|
168
|
+
redis.hset('vmpooler__lastboot', pool, Time.now)
|
169
|
+
end
|
132
170
|
|
133
171
|
$metrics.timing("time_to_ready_state.#{pool}", finish)
|
134
|
-
$logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue")
|
172
|
+
$logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue") unless request_id
|
173
|
+
$logger.log('s', "[>] [#{pool}] '#{vm}' is 'ready' for request '#{request_id}'") if request_id
|
135
174
|
end
|
136
175
|
|
137
|
-
def vm_still_ready?(pool_name, vm_name, provider)
|
176
|
+
def vm_still_ready?(pool_name, vm_name, provider, redis)
|
138
177
|
# Check if the VM is still ready/available
|
139
178
|
return true if provider.vm_ready?(pool_name, vm_name)
|
140
179
|
|
141
180
|
raise("VM #{vm_name} is not ready")
|
142
181
|
rescue StandardError
|
143
|
-
move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
|
182
|
+
move_vm_queue(pool_name, vm_name, 'ready', 'completed', redis, "is unreachable, removed from 'ready' queue")
|
144
183
|
end
|
145
184
|
|
146
185
|
def check_ready_vm(vm, pool_name, ttl, provider)
|
@@ -160,34 +199,35 @@ module Vmpooler
|
|
160
199
|
return if mutex.locked?
|
161
200
|
|
162
201
|
mutex.synchronize do
|
163
|
-
|
164
|
-
|
202
|
+
@redis.with_metrics do |redis|
|
203
|
+
check_stamp = redis.hget('vmpooler__vm__' + vm, 'check')
|
204
|
+
last_checked_too_soon = ((Time.now - Time.parse(check_stamp)).to_i < $config[:config]['vm_checktime'] * 60) if check_stamp
|
205
|
+
break if check_stamp && last_checked_too_soon
|
165
206
|
|
166
|
-
|
167
|
-
|
168
|
-
if ttl > 0
|
207
|
+
redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
|
208
|
+
# Check if the hosts TTL has expired
|
169
209
|
# if 'boottime' is nil, set bootime to beginning of unix epoch, forces TTL to be assumed expired
|
170
|
-
boottime =
|
210
|
+
boottime = redis.hget("vmpooler__vm__#{vm}", 'ready')
|
171
211
|
if boottime
|
172
212
|
boottime = Time.parse(boottime)
|
173
213
|
else
|
174
214
|
boottime = Time.at(0)
|
175
215
|
end
|
176
|
-
if (
|
177
|
-
|
216
|
+
if (Time.now - boottime).to_i > ttl * 60
|
217
|
+
redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
|
178
218
|
|
179
219
|
$logger.log('d', "[!] [#{pool_name}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
|
180
|
-
return
|
220
|
+
return nil
|
181
221
|
end
|
182
|
-
end
|
183
222
|
|
184
|
-
|
223
|
+
break if mismatched_hostname?(vm, pool_name, provider, redis)
|
185
224
|
|
186
|
-
|
225
|
+
vm_still_ready?(pool_name, vm, provider, redis)
|
226
|
+
end
|
187
227
|
end
|
188
228
|
end
|
189
229
|
|
190
|
-
def mismatched_hostname?(vm, pool_name, provider)
|
230
|
+
def mismatched_hostname?(vm, pool_name, provider, redis)
|
191
231
|
pool_config = $config[:pools][$config[:pool_index][pool_name]]
|
192
232
|
check_hostname = pool_config['check_hostname_for_mismatch']
|
193
233
|
check_hostname = $config[:config]['check_ready_vm_hostname_for_mismatch'] if check_hostname.nil?
|
@@ -196,7 +236,7 @@ module Vmpooler
|
|
196
236
|
# Wait one minute before checking a VM for hostname mismatch
|
197
237
|
# When checking as soon as the VM passes the ready test the instance
|
198
238
|
# often doesn't report its hostname yet causing the VM to be removed immediately
|
199
|
-
vm_ready_time =
|
239
|
+
vm_ready_time = redis.hget("vmpooler__vm__#{vm}", 'ready')
|
200
240
|
if vm_ready_time
|
201
241
|
wait_before_checking = 60
|
202
242
|
time_since_ready = (Time.now - Time.parse(vm_ready_time)).to_i
|
@@ -213,7 +253,7 @@ module Vmpooler
|
|
213
253
|
return if hostname.empty?
|
214
254
|
return if hostname == vm
|
215
255
|
|
216
|
-
|
256
|
+
redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
|
217
257
|
$logger.log('d', "[!] [#{pool_name}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue")
|
218
258
|
true
|
219
259
|
end
|
@@ -234,49 +274,61 @@ module Vmpooler
|
|
234
274
|
return if mutex.locked?
|
235
275
|
|
236
276
|
mutex.synchronize do
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
277
|
+
catch :stop_checking do
|
278
|
+
@redis.with_metrics do |redis|
|
279
|
+
# Check that VM is within defined lifetime
|
280
|
+
checkouttime = redis.hget('vmpooler__active__' + pool, vm)
|
281
|
+
if checkouttime
|
282
|
+
time_since_checkout = Time.now - Time.parse(checkouttime)
|
283
|
+
running = time_since_checkout / 60 / 60
|
284
|
+
|
285
|
+
if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
|
286
|
+
move_vm_queue(pool, vm, 'running', 'completed', redis, "reached end of TTL after #{ttl} hours")
|
287
|
+
throw :stop_checking
|
288
|
+
end
|
289
|
+
end
|
247
290
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
291
|
+
if provider.vm_ready?(pool, vm)
|
292
|
+
throw :stop_checking
|
293
|
+
else
|
294
|
+
host = provider.get_vm(pool, vm)
|
252
295
|
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
296
|
+
if host
|
297
|
+
throw :stop_checking
|
298
|
+
else
|
299
|
+
move_vm_queue(pool, vm, 'running', 'completed', redis, 'is no longer in inventory, removing from running')
|
300
|
+
end
|
301
|
+
end
|
257
302
|
end
|
258
303
|
end
|
259
304
|
end
|
260
305
|
end
|
261
306
|
|
262
|
-
def move_vm_queue(pool, vm, queue_from, queue_to, msg = nil)
|
263
|
-
|
307
|
+
def move_vm_queue(pool, vm, queue_from, queue_to, redis, msg = nil)
|
308
|
+
redis.smove("vmpooler__#{queue_from}__#{pool}", "vmpooler__#{queue_to}__#{pool}", vm)
|
264
309
|
$logger.log('d', "[!] [#{pool}] '#{vm}' #{msg}") if msg
|
265
310
|
end
|
266
311
|
|
267
312
|
# Clone a VM
|
268
|
-
def clone_vm(pool_name, provider)
|
313
|
+
def clone_vm(pool_name, provider, request_id = nil, pool_alias = nil)
|
269
314
|
Thread.new do
|
270
315
|
begin
|
271
|
-
_clone_vm(pool_name, provider)
|
316
|
+
_clone_vm(pool_name, provider, request_id, pool_alias)
|
272
317
|
rescue StandardError => e
|
273
|
-
|
318
|
+
if request_id
|
319
|
+
$logger.log('s', "[!] [#{pool_name}] failed while cloning VM for request #{request_id} with an error: #{e}")
|
320
|
+
@redis.with_metrics do |redis|
|
321
|
+
redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool_name}:1:#{request_id}")
|
322
|
+
end
|
323
|
+
else
|
324
|
+
$logger.log('s', "[!] [#{pool_name}] failed while cloning VM with an error: #{e}")
|
325
|
+
end
|
274
326
|
raise
|
275
327
|
end
|
276
328
|
end
|
277
329
|
end
|
278
330
|
|
279
|
-
def generate_and_check_hostname
|
331
|
+
def generate_and_check_hostname
|
280
332
|
# Generate a randomized hostname. The total name must no longer than 15
|
281
333
|
# character including the hyphen. The shortest adjective in the corpus is
|
282
334
|
# three characters long. Therefore, we can technically select a noun up to 11
|
@@ -285,58 +337,104 @@ module Vmpooler
|
|
285
337
|
# letter adjectives, we actually limit the noun to 10 letters to avoid
|
286
338
|
# inviting more conflicts. We favor selecting a longer noun rather than a
|
287
339
|
# longer adjective because longer adjectives tend to be less fun.
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
340
|
+
@redis.with do |redis|
|
341
|
+
noun = @name_generator.noun(max: 10)
|
342
|
+
adjective = @name_generator.adjective(max: 14 - noun.length)
|
343
|
+
random_name = [adjective, noun].join('-')
|
344
|
+
hostname = $config[:config]['prefix'] + random_name
|
345
|
+
available = redis.hlen('vmpooler__vm__' + hostname) == 0
|
346
|
+
|
347
|
+
[hostname, available]
|
348
|
+
end
|
295
349
|
end
|
296
350
|
|
297
351
|
def find_unique_hostname(pool_name)
|
352
|
+
# generate hostname that is not already in use in vmpooler
|
353
|
+
# also check that no dns record already exists
|
298
354
|
hostname_retries = 0
|
299
355
|
max_hostname_retries = 3
|
300
356
|
while hostname_retries < max_hostname_retries
|
301
|
-
hostname,
|
302
|
-
|
357
|
+
hostname, hostname_available = generate_and_check_hostname
|
358
|
+
domain = $config[:config]['domain']
|
359
|
+
dns_ip, dns_available = check_dns_available(hostname, domain)
|
360
|
+
break if hostname_available && dns_available
|
303
361
|
|
304
362
|
hostname_retries += 1
|
305
|
-
|
306
|
-
|
363
|
+
|
364
|
+
if !hostname_available
|
365
|
+
$metrics.increment("errors.duplicatehostname.#{pool_name}")
|
366
|
+
$logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} was not unique (attempt \##{hostname_retries} of #{max_hostname_retries})")
|
367
|
+
elsif !dns_available
|
368
|
+
$metrics.increment("errors.staledns.#{hostname}")
|
369
|
+
$logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} already exists in DNS records (#{dns_ip}), stale DNS")
|
370
|
+
end
|
307
371
|
end
|
308
372
|
|
309
|
-
raise "Unable to generate a unique hostname after #{hostname_retries} attempts. The last hostname checked was #{hostname}" unless
|
373
|
+
raise "Unable to generate a unique hostname after #{hostname_retries} attempts. The last hostname checked was #{hostname}" unless hostname_available && dns_available
|
310
374
|
|
311
375
|
hostname
|
312
376
|
end
|
313
377
|
|
314
|
-
def
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
$redis.sadd('vmpooler__pending__' + pool_name, new_vmname)
|
319
|
-
$redis.hset('vmpooler__vm__' + new_vmname, 'clone', Time.now)
|
320
|
-
$redis.hset('vmpooler__vm__' + new_vmname, 'template', pool_name)
|
321
|
-
|
378
|
+
def check_dns_available(vm_name, domain = nil)
|
379
|
+
# Query the DNS for the name we want to create and if it already exists, mark it unavailable
|
380
|
+
# This protects against stale DNS records
|
381
|
+
vm_name = "#{vm_name}.#{domain}" if domain
|
322
382
|
begin
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
383
|
+
dns_ip = Resolv.getaddress(vm_name)
|
384
|
+
rescue Resolv::ResolvError
|
385
|
+
# this is the expected case, swallow the error
|
386
|
+
# eg "no address for blah-daisy"
|
387
|
+
return ['', true]
|
388
|
+
end
|
389
|
+
[dns_ip, false]
|
390
|
+
end
|
327
391
|
|
328
|
-
|
329
|
-
|
330
|
-
|
392
|
+
def _clone_vm(pool_name, provider, request_id = nil, pool_alias = nil)
|
393
|
+
new_vmname = find_unique_hostname(pool_name)
|
394
|
+
mutex = vm_mutex(new_vmname)
|
395
|
+
mutex.synchronize do
|
396
|
+
@redis.with_metrics do |redis|
|
397
|
+
# Add VM to Redis inventory ('pending' pool)
|
398
|
+
redis.multi
|
399
|
+
redis.sadd('vmpooler__pending__' + pool_name, new_vmname)
|
400
|
+
redis.hset('vmpooler__vm__' + new_vmname, 'clone', Time.now)
|
401
|
+
redis.hset('vmpooler__vm__' + new_vmname, 'template', pool_name) # This value is used to represent the pool.
|
402
|
+
redis.hset('vmpooler__vm__' + new_vmname, 'pool', pool_name)
|
403
|
+
redis.hset('vmpooler__vm__' + new_vmname, 'request_id', request_id) if request_id
|
404
|
+
redis.hset('vmpooler__vm__' + new_vmname, 'pool_alias', pool_alias) if pool_alias
|
405
|
+
redis.exec
|
406
|
+
end
|
331
407
|
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
408
|
+
begin
|
409
|
+
$logger.log('d', "[ ] [#{pool_name}] Starting to clone '#{new_vmname}'")
|
410
|
+
start = Time.now
|
411
|
+
provider.create_vm(pool_name, new_vmname)
|
412
|
+
finish = format('%<time>.2f', time: Time.now - start)
|
413
|
+
|
414
|
+
@redis.with_metrics do |redis|
|
415
|
+
redis.pipelined do
|
416
|
+
redis.hset('vmpooler__clone__' + Date.today.to_s, pool_name + ':' + new_vmname, finish)
|
417
|
+
redis.hset('vmpooler__vm__' + new_vmname, 'clone_time', finish)
|
418
|
+
end
|
419
|
+
end
|
420
|
+
$logger.log('s', "[+] [#{pool_name}] '#{new_vmname}' cloned in #{finish} seconds")
|
421
|
+
|
422
|
+
$metrics.timing("clone.#{pool_name}", finish)
|
423
|
+
rescue StandardError
|
424
|
+
@redis.with_metrics do |redis|
|
425
|
+
redis.pipelined do
|
426
|
+
redis.srem("vmpooler__pending__#{pool_name}", new_vmname)
|
427
|
+
expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
|
428
|
+
redis.expire("vmpooler__vm__#{new_vmname}", expiration_ttl)
|
429
|
+
end
|
430
|
+
end
|
431
|
+
raise
|
432
|
+
ensure
|
433
|
+
@redis.with_metrics do |redis|
|
434
|
+
redis.decr('vmpooler__tasks__ondemandclone') if request_id
|
435
|
+
redis.decr('vmpooler__tasks__clone') unless request_id
|
436
|
+
end
|
437
|
+
end
|
340
438
|
end
|
341
439
|
end
|
342
440
|
|
@@ -357,45 +455,57 @@ module Vmpooler
|
|
357
455
|
return if mutex.locked?
|
358
456
|
|
359
457
|
mutex.synchronize do
|
360
|
-
|
361
|
-
|
458
|
+
@redis.with_metrics do |redis|
|
459
|
+
redis.pipelined do
|
460
|
+
redis.hdel('vmpooler__active__' + pool, vm)
|
461
|
+
redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
|
362
462
|
|
363
|
-
|
364
|
-
|
463
|
+
# Auto-expire metadata key
|
464
|
+
redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
|
465
|
+
end
|
365
466
|
|
366
|
-
|
467
|
+
start = Time.now
|
367
468
|
|
368
|
-
|
469
|
+
provider.destroy_vm(pool, vm)
|
369
470
|
|
370
|
-
|
471
|
+
redis.srem('vmpooler__completed__' + pool, vm)
|
371
472
|
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
473
|
+
finish = format('%<time>.2f', time: Time.now - start)
|
474
|
+
$logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
|
475
|
+
$metrics.timing("destroy.#{pool}", finish)
|
476
|
+
get_vm_usage_labels(vm, redis)
|
477
|
+
end
|
376
478
|
end
|
377
479
|
dereference_mutex(vm)
|
378
480
|
end
|
379
481
|
|
380
|
-
def get_vm_usage_labels(vm)
|
482
|
+
def get_vm_usage_labels(vm, redis)
|
381
483
|
return unless $config[:config]['usage_stats']
|
382
484
|
|
383
|
-
|
485
|
+
redis.multi
|
486
|
+
redis.hget("vmpooler__vm__#{vm}", 'checkout')
|
487
|
+
redis.hget("vmpooler__vm__#{vm}", 'tag:jenkins_build_url')
|
488
|
+
redis.hget("vmpooler__vm__#{vm}", 'token:user')
|
489
|
+
redis.hget("vmpooler__vm__#{vm}", 'template')
|
490
|
+
checkout, jenkins_build_url, user, poolname = redis.exec
|
384
491
|
return if checkout.nil?
|
385
492
|
|
386
|
-
|
387
|
-
user =
|
388
|
-
|
493
|
+
user ||= 'unauthenticated'
|
494
|
+
user = user.gsub('.', '_')
|
495
|
+
$metrics.increment("user.#{user}.#{poolname}")
|
496
|
+
|
497
|
+
return unless jenkins_build_url
|
389
498
|
|
390
|
-
|
391
|
-
|
392
|
-
$metrics.increment("
|
499
|
+
if jenkins_build_url.include? 'litmus'
|
500
|
+
# Very simple filter for Litmus jobs - just count them coming through for the moment.
|
501
|
+
$metrics.increment("usage_litmus.#{user}.#{poolname}")
|
393
502
|
return
|
394
503
|
end
|
395
504
|
|
396
505
|
url_parts = jenkins_build_url.split('/')[2..-1]
|
397
|
-
|
506
|
+
jenkins_instance = url_parts[0].gsub('.', '_')
|
398
507
|
value_stream_parts = url_parts[2].split('_')
|
508
|
+
value_stream_parts = value_stream_parts.map { |s| s.gsub('.', '_') }
|
399
509
|
value_stream = value_stream_parts.shift
|
400
510
|
branch = value_stream_parts.pop
|
401
511
|
project = value_stream_parts.shift
|
@@ -403,24 +513,12 @@ module Vmpooler
|
|
403
513
|
build_metadata_parts = url_parts[3]
|
404
514
|
component_to_test = component_to_test('RMM_COMPONENT_TO_TEST_NAME', build_metadata_parts)
|
405
515
|
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
instance,
|
410
|
-
value_stream,
|
411
|
-
branch,
|
412
|
-
project,
|
413
|
-
job_name,
|
414
|
-
component_to_test,
|
415
|
-
poolname
|
416
|
-
]
|
417
|
-
|
418
|
-
metric_parts = metric_parts.reject(&:nil?)
|
419
|
-
metric_parts = metric_parts.map { |s| s.gsub('.', '_') }
|
420
|
-
|
421
|
-
$metrics.increment(metric_parts.join('.'))
|
516
|
+
$metrics.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{poolname}")
|
517
|
+
$metrics.increment("usage_branch_project.#{branch}.#{project}.#{poolname}")
|
518
|
+
$metrics.increment("usage_job_component.#{job_name}.#{component_to_test}.#{poolname}")
|
422
519
|
rescue StandardError => e
|
423
|
-
logger.log('d', "[!] [#{poolname}] failed while evaluating usage labels on '#{vm}' with an error: #{e}")
|
520
|
+
$logger.log('d', "[!] [#{poolname}] failed while evaluating usage labels on '#{vm}' with an error: #{e}")
|
521
|
+
raise
|
424
522
|
end
|
425
523
|
|
426
524
|
def component_to_test(match, labels_string)
|
@@ -432,7 +530,7 @@ module Vmpooler
|
|
432
530
|
next if value.nil?
|
433
531
|
return value if key == match
|
434
532
|
end
|
435
|
-
|
533
|
+
'none'
|
436
534
|
end
|
437
535
|
|
438
536
|
def purge_unused_vms_and_folders
|
@@ -444,7 +542,7 @@ module Vmpooler
|
|
444
542
|
if provider_purge
|
445
543
|
Thread.new do
|
446
544
|
begin
|
447
|
-
purge_vms_and_folders(provider.to_s)
|
545
|
+
purge_vms_and_folders($providers[provider.to_s])
|
448
546
|
rescue StandardError => e
|
449
547
|
$logger.log('s', "[!] failed while purging provider #{provider} VMs and folders with an error: #{e}")
|
450
548
|
end
|
@@ -455,13 +553,14 @@ module Vmpooler
|
|
455
553
|
end
|
456
554
|
|
457
555
|
# Return a list of pool folders
|
458
|
-
def pool_folders(
|
556
|
+
def pool_folders(provider)
|
557
|
+
provider_name = provider.name
|
459
558
|
folders = {}
|
460
559
|
$config[:pools].each do |pool|
|
461
560
|
next unless pool['provider'] == provider_name
|
462
561
|
|
463
562
|
folder_parts = pool['folder'].split('/')
|
464
|
-
datacenter =
|
563
|
+
datacenter = provider.get_target_datacenter_from_config(pool['name'])
|
465
564
|
folders[folder_parts.pop] = "#{datacenter}/vm/#{folder_parts.join('/')}"
|
466
565
|
end
|
467
566
|
folders
|
@@ -478,8 +577,8 @@ module Vmpooler
|
|
478
577
|
def purge_vms_and_folders(provider)
|
479
578
|
configured_folders = pool_folders(provider)
|
480
579
|
base_folders = get_base_folders(configured_folders)
|
481
|
-
whitelist =
|
482
|
-
|
580
|
+
whitelist = provider.provider_config['folder_whitelist']
|
581
|
+
provider.purge_unconfigured_folders(base_folders, configured_folders, whitelist)
|
483
582
|
end
|
484
583
|
|
485
584
|
def create_vm_disk(pool_name, vm, disk_size, provider)
|
@@ -505,10 +604,12 @@ module Vmpooler
|
|
505
604
|
finish = format('%<time>.2f', time: Time.now - start)
|
506
605
|
|
507
606
|
if result
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
607
|
+
@redis.with_metrics do |redis|
|
608
|
+
rdisks = redis.hget('vmpooler__vm__' + vm_name, 'disk')
|
609
|
+
disks = rdisks ? rdisks.split(':') : []
|
610
|
+
disks.push("+#{disk_size}gb")
|
611
|
+
redis.hset('vmpooler__vm__' + vm_name, 'disk', disks.join(':'))
|
612
|
+
end
|
512
613
|
|
513
614
|
$logger.log('s', "[+] [disk_manager] '#{vm_name}' attached #{disk_size}gb disk in #{finish} seconds")
|
514
615
|
else
|
@@ -538,7 +639,9 @@ module Vmpooler
|
|
538
639
|
finish = format('%<time>.2f', time: Time.now - start)
|
539
640
|
|
540
641
|
if result
|
541
|
-
|
642
|
+
@redis.with_metrics do |redis|
|
643
|
+
redis.hset('vmpooler__vm__' + vm_name, 'snapshot:' + snapshot_name, Time.now.to_s)
|
644
|
+
end
|
542
645
|
$logger.log('s', "[+] [snapshot_manager] '#{vm_name}' snapshot created in #{finish} seconds")
|
543
646
|
else
|
544
647
|
$logger.log('s', "[+] [snapshot_manager] Failed to snapshot '#{vm_name}'")
|
@@ -594,9 +697,9 @@ module Vmpooler
|
|
594
697
|
@default_providers ||= %w[vsphere dummy]
|
595
698
|
end
|
596
699
|
|
597
|
-
def get_pool_name_for_vm(vm_name)
|
700
|
+
def get_pool_name_for_vm(vm_name, redis)
|
598
701
|
# the 'template' is a bad name. Should really be 'poolname'
|
599
|
-
|
702
|
+
redis.hget('vmpooler__vm__' + vm_name, 'template')
|
600
703
|
end
|
601
704
|
|
602
705
|
# @param pool_name [String] - the name of the pool
|
@@ -628,19 +731,21 @@ module Vmpooler
|
|
628
731
|
end
|
629
732
|
|
630
733
|
def _check_disk_queue
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
734
|
+
@redis.with_metrics do |redis|
|
735
|
+
task_detail = redis.spop('vmpooler__tasks__disk')
|
736
|
+
unless task_detail.nil?
|
737
|
+
begin
|
738
|
+
vm_name, disk_size = task_detail.split(':')
|
739
|
+
pool_name = get_pool_name_for_vm(vm_name, redis)
|
740
|
+
raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
|
637
741
|
|
638
|
-
|
639
|
-
|
742
|
+
provider = get_provider_for_pool(pool_name)
|
743
|
+
raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
|
640
744
|
|
641
|
-
|
642
|
-
|
643
|
-
|
745
|
+
create_vm_disk(pool_name, vm_name, disk_size, provider)
|
746
|
+
rescue StandardError => e
|
747
|
+
$logger.log('s', "[!] [disk_manager] disk creation appears to have failed: #{e}")
|
748
|
+
end
|
644
749
|
end
|
645
750
|
end
|
646
751
|
end
|
@@ -664,37 +769,39 @@ module Vmpooler
|
|
664
769
|
end
|
665
770
|
|
666
771
|
def _check_snapshot_queue
|
667
|
-
|
772
|
+
@redis.with_metrics do |redis|
|
773
|
+
task_detail = redis.spop('vmpooler__tasks__snapshot')
|
668
774
|
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
775
|
+
unless task_detail.nil?
|
776
|
+
begin
|
777
|
+
vm_name, snapshot_name = task_detail.split(':')
|
778
|
+
pool_name = get_pool_name_for_vm(vm_name, redis)
|
779
|
+
raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
|
674
780
|
|
675
|
-
|
676
|
-
|
781
|
+
provider = get_provider_for_pool(pool_name)
|
782
|
+
raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
|
677
783
|
|
678
|
-
|
679
|
-
|
680
|
-
|
784
|
+
create_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
|
785
|
+
rescue StandardError => e
|
786
|
+
$logger.log('s', "[!] [snapshot_manager] snapshot create appears to have failed: #{e}")
|
787
|
+
end
|
681
788
|
end
|
682
|
-
end
|
683
789
|
|
684
|
-
|
790
|
+
task_detail = redis.spop('vmpooler__tasks__snapshot-revert')
|
685
791
|
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
792
|
+
unless task_detail.nil?
|
793
|
+
begin
|
794
|
+
vm_name, snapshot_name = task_detail.split(':')
|
795
|
+
pool_name = get_pool_name_for_vm(vm_name, redis)
|
796
|
+
raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
|
691
797
|
|
692
|
-
|
693
|
-
|
798
|
+
provider = get_provider_for_pool(pool_name)
|
799
|
+
raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
|
694
800
|
|
695
|
-
|
696
|
-
|
697
|
-
|
801
|
+
revert_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
|
802
|
+
rescue StandardError => e
|
803
|
+
$logger.log('s', "[!] [snapshot_manager] snapshot revert appears to have failed: #{e}")
|
804
|
+
end
|
698
805
|
end
|
699
806
|
end
|
700
807
|
end
|
@@ -704,7 +811,9 @@ module Vmpooler
|
|
704
811
|
begin
|
705
812
|
mutex = vm_mutex(vm_name)
|
706
813
|
mutex.synchronize do
|
707
|
-
|
814
|
+
@redis.with_metrics do |redis|
|
815
|
+
redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
|
816
|
+
end
|
708
817
|
provider.migrate_vm(pool_name, vm_name)
|
709
818
|
end
|
710
819
|
rescue StandardError => e
|
@@ -737,47 +846,65 @@ module Vmpooler
|
|
737
846
|
wakeup_by = Time.now + wakeup_period
|
738
847
|
return if time_passed?(:exit_by, exit_by)
|
739
848
|
|
740
|
-
|
849
|
+
@redis.with_metrics do |redis|
|
850
|
+
initial_ready_size = redis.scard("vmpooler__ready__#{options[:poolname]}") if options[:pool_size_change]
|
741
851
|
|
742
|
-
|
852
|
+
initial_clone_target = redis.hget("vmpooler__pool__#{options[:poolname]}", options[:clone_target]) if options[:clone_target_change]
|
743
853
|
|
744
|
-
|
854
|
+
initial_template = redis.hget('vmpooler__template__prepared', options[:poolname]) if options[:pool_template_change]
|
745
855
|
|
746
|
-
|
747
|
-
|
748
|
-
|
856
|
+
loop do
|
857
|
+
sleep(1)
|
858
|
+
break if time_passed?(:exit_by, exit_by)
|
749
859
|
|
750
|
-
|
751
|
-
|
752
|
-
|
860
|
+
# Check for wakeup events
|
861
|
+
if time_passed?(:wakeup_by, wakeup_by)
|
862
|
+
wakeup_by = Time.now + wakeup_period
|
753
863
|
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
864
|
+
# Wakeup if the number of ready VMs has changed
|
865
|
+
if options[:pool_size_change]
|
866
|
+
ready_size = redis.scard("vmpooler__ready__#{options[:poolname]}")
|
867
|
+
break unless ready_size == initial_ready_size
|
868
|
+
end
|
759
869
|
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
870
|
+
if options[:clone_target_change]
|
871
|
+
clone_target = redis.hget('vmpooler__config__clone_target}', options[:poolname])
|
872
|
+
if clone_target
|
873
|
+
break unless clone_target == initial_clone_target
|
874
|
+
end
|
764
875
|
end
|
765
|
-
end
|
766
876
|
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
877
|
+
if options[:pool_template_change]
|
878
|
+
configured_template = redis.hget('vmpooler__config__template', options[:poolname])
|
879
|
+
if configured_template
|
880
|
+
break unless initial_template == configured_template
|
881
|
+
end
|
882
|
+
end
|
883
|
+
|
884
|
+
if options[:pool_reset]
|
885
|
+
pending = redis.sismember('vmpooler__poolreset', options[:poolname])
|
886
|
+
break if pending
|
771
887
|
end
|
772
|
-
end
|
773
888
|
|
774
|
-
|
775
|
-
|
889
|
+
if options[:pending_vm]
|
890
|
+
pending_vm_count = redis.scard("vmpooler__pending__#{options[:poolname]}")
|
891
|
+
break unless pending_vm_count == 0
|
892
|
+
end
|
893
|
+
|
894
|
+
if options[:ondemand_request]
|
895
|
+
redis.multi
|
896
|
+
redis.zcard('vmpooler__provisioning__request')
|
897
|
+
redis.zcard('vmpooler__provisioning__processing')
|
898
|
+
redis.zcard('vmpooler__odcreate__task')
|
899
|
+
od_request, od_processing, od_createtask = redis.exec
|
900
|
+
break unless od_request == 0
|
901
|
+
break unless od_processing == 0
|
902
|
+
break unless od_createtask == 0
|
903
|
+
end
|
776
904
|
end
|
777
905
|
|
906
|
+
break if time_passed?(:exit_by, exit_by)
|
778
907
|
end
|
779
|
-
|
780
|
-
break if time_passed?(:exit_by, exit_by)
|
781
908
|
end
|
782
909
|
end
|
783
910
|
|
@@ -813,7 +940,7 @@ module Vmpooler
|
|
813
940
|
loop_delay = (loop_delay * loop_delay_decay).to_i
|
814
941
|
loop_delay = loop_delay_max if loop_delay > loop_delay_max
|
815
942
|
end
|
816
|
-
sleep_with_wakeup_events(loop_delay, loop_delay_min, pool_size_change: true, poolname: pool['name'], pool_template_change: true, clone_target_change: true, pool_reset: true)
|
943
|
+
sleep_with_wakeup_events(loop_delay, loop_delay_min, pool_size_change: true, poolname: pool['name'], pool_template_change: true, clone_target_change: true, pending_vm: true, pool_reset: true)
|
817
944
|
|
818
945
|
unless maxloop == 0
|
819
946
|
break if loop_count >= maxloop
|
@@ -843,77 +970,84 @@ module Vmpooler
|
|
843
970
|
end
|
844
971
|
|
845
972
|
def sync_pool_template(pool)
|
846
|
-
|
847
|
-
|
848
|
-
|
973
|
+
@redis.with_metrics do |redis|
|
974
|
+
pool_template = redis.hget('vmpooler__config__template', pool['name'])
|
975
|
+
if pool_template
|
976
|
+
pool['template'] = pool_template unless pool['template'] == pool_template
|
977
|
+
end
|
849
978
|
end
|
850
979
|
end
|
851
980
|
|
852
|
-
def prepare_template(pool, provider)
|
981
|
+
def prepare_template(pool, provider, redis)
|
853
982
|
if $config[:config]['create_template_delta_disks']
|
854
|
-
unless
|
983
|
+
unless redis.sismember('vmpooler__template__deltas', pool['template'])
|
855
984
|
begin
|
856
985
|
provider.create_template_delta_disks(pool)
|
857
|
-
|
986
|
+
redis.sadd('vmpooler__template__deltas', pool['template'])
|
858
987
|
rescue StandardError => e
|
859
988
|
$logger.log('s', "[!] [#{pool['name']}] failed while preparing a template with an error. As a result vmpooler could not create the template delta disks. Either a template delta disk already exists, or the template delta disk creation failed. The error is: #{e}")
|
860
989
|
end
|
861
990
|
end
|
862
991
|
end
|
863
|
-
|
992
|
+
redis.hset('vmpooler__template__prepared', pool['name'], pool['template'])
|
864
993
|
end
|
865
994
|
|
866
995
|
def evaluate_template(pool, provider)
|
867
996
|
mutex = pool_mutex(pool['name'])
|
868
|
-
prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
|
869
|
-
configured_template = $redis.hget('vmpooler__config__template', pool['name'])
|
870
997
|
return if mutex.locked?
|
871
998
|
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
999
|
+
catch :update_not_needed do
|
1000
|
+
@redis.with_metrics do |redis|
|
1001
|
+
prepared_template = redis.hget('vmpooler__template__prepared', pool['name'])
|
1002
|
+
configured_template = redis.hget('vmpooler__config__template', pool['name'])
|
1003
|
+
|
1004
|
+
if prepared_template.nil?
|
1005
|
+
mutex.synchronize do
|
1006
|
+
prepare_template(pool, provider, redis)
|
1007
|
+
prepared_template = redis.hget('vmpooler__template__prepared', pool['name'])
|
1008
|
+
end
|
1009
|
+
elsif prepared_template != pool['template']
|
1010
|
+
if configured_template.nil?
|
1011
|
+
mutex.synchronize do
|
1012
|
+
prepare_template(pool, provider, redis)
|
1013
|
+
prepared_template = redis.hget('vmpooler__template__prepared', pool['name'])
|
1014
|
+
end
|
1015
|
+
end
|
1016
|
+
end
|
1017
|
+
throw :update_not_needed if configured_template.nil?
|
1018
|
+
throw :update_not_needed if configured_template == prepared_template
|
1019
|
+
|
879
1020
|
mutex.synchronize do
|
880
|
-
|
881
|
-
prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
|
1021
|
+
update_pool_template(pool, provider, configured_template, prepared_template, redis)
|
882
1022
|
end
|
883
1023
|
end
|
884
1024
|
end
|
885
|
-
return if configured_template.nil?
|
886
|
-
return if configured_template == prepared_template
|
887
|
-
|
888
|
-
mutex.synchronize do
|
889
|
-
update_pool_template(pool, provider, configured_template, prepared_template)
|
890
|
-
end
|
891
1025
|
end
|
892
1026
|
|
893
|
-
def drain_pool(poolname)
|
1027
|
+
def drain_pool(poolname, redis)
|
894
1028
|
# Clear a pool of ready and pending instances
|
895
|
-
if
|
1029
|
+
if redis.scard("vmpooler__ready__#{poolname}") > 0
|
896
1030
|
$logger.log('s', "[*] [#{poolname}] removing ready instances")
|
897
|
-
|
898
|
-
move_vm_queue(poolname, vm, 'ready', 'completed')
|
1031
|
+
redis.smembers("vmpooler__ready__#{poolname}").each do |vm|
|
1032
|
+
move_vm_queue(poolname, vm, 'ready', 'completed', redis)
|
899
1033
|
end
|
900
1034
|
end
|
901
|
-
if
|
1035
|
+
if redis.scard("vmpooler__pending__#{poolname}") > 0
|
902
1036
|
$logger.log('s', "[*] [#{poolname}] removing pending instances")
|
903
|
-
|
904
|
-
move_vm_queue(poolname, vm, 'pending', 'completed')
|
1037
|
+
redis.smembers("vmpooler__pending__#{poolname}").each do |vm|
|
1038
|
+
move_vm_queue(poolname, vm, 'pending', 'completed', redis)
|
905
1039
|
end
|
906
1040
|
end
|
907
1041
|
end
|
908
1042
|
|
909
|
-
def update_pool_template(pool, provider, configured_template, prepared_template)
|
1043
|
+
def update_pool_template(pool, provider, configured_template, prepared_template, redis)
|
910
1044
|
pool['template'] = configured_template
|
911
1045
|
$logger.log('s', "[*] [#{pool['name']}] template updated from #{prepared_template} to #{configured_template}")
|
912
1046
|
# Remove all ready and pending VMs so new instances are created from the new template
|
913
|
-
drain_pool(pool['name'])
|
1047
|
+
drain_pool(pool['name'], redis)
|
914
1048
|
# Prepare template for deployment
|
915
1049
|
$logger.log('s', "[*] [#{pool['name']}] preparing pool template for deployment")
|
916
|
-
prepare_template(pool, provider)
|
1050
|
+
prepare_template(pool, provider, redis)
|
917
1051
|
$logger.log('s', "[*] [#{pool['name']}] is ready for use")
|
918
1052
|
end
|
919
1053
|
|
@@ -921,38 +1055,45 @@ module Vmpooler
|
|
921
1055
|
mutex = pool_mutex(pool['name'])
|
922
1056
|
return if mutex.locked?
|
923
1057
|
|
924
|
-
|
925
|
-
|
926
|
-
|
1058
|
+
@redis.with_metrics do |redis|
|
1059
|
+
clone_target = redis.hget('vmpooler__config__clone_target', pool['name'])
|
1060
|
+
break if clone_target.nil?
|
1061
|
+
break if clone_target == pool['clone_target']
|
927
1062
|
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
|
1063
|
+
$logger.log('s', "[*] [#{pool['name']}] clone updated from #{pool['clone_target']} to #{clone_target}")
|
1064
|
+
mutex.synchronize do
|
1065
|
+
pool['clone_target'] = clone_target
|
1066
|
+
# Remove all ready and pending VMs so new instances are created for the new clone_target
|
1067
|
+
drain_pool(pool['name'], redis)
|
1068
|
+
end
|
1069
|
+
$logger.log('s', "[*] [#{pool['name']}] is ready for use")
|
933
1070
|
end
|
934
|
-
$logger.log('s', "[*] [#{pool['name']}] is ready for use")
|
935
1071
|
end
|
936
1072
|
|
937
1073
|
def remove_excess_vms(pool)
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
1074
|
+
@redis.with_metrics do |redis|
|
1075
|
+
redis.multi
|
1076
|
+
redis.scard("vmpooler__ready__#{pool['name']}")
|
1077
|
+
redis.scard("vmpooler__pending__#{pool['name']}")
|
1078
|
+
ready, pending = redis.exec
|
1079
|
+
total = pending.to_i + ready.to_i
|
1080
|
+
break if total.nil?
|
1081
|
+
break if total == 0
|
942
1082
|
|
943
|
-
|
944
|
-
|
945
|
-
|
1083
|
+
mutex = pool_mutex(pool['name'])
|
1084
|
+
break if mutex.locked?
|
1085
|
+
break unless ready.to_i > pool['size']
|
946
1086
|
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
1087
|
+
mutex.synchronize do
|
1088
|
+
difference = ready.to_i - pool['size']
|
1089
|
+
difference.times do
|
1090
|
+
next_vm = redis.spop("vmpooler__ready__#{pool['name']}")
|
1091
|
+
move_vm_queue(pool['name'], next_vm, 'ready', 'completed', redis)
|
1092
|
+
end
|
1093
|
+
if total > ready
|
1094
|
+
redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
|
1095
|
+
move_vm_queue(pool['name'], vm, 'pending', 'completed', redis)
|
1096
|
+
end
|
956
1097
|
end
|
957
1098
|
end
|
958
1099
|
end
|
@@ -962,26 +1103,30 @@ module Vmpooler
|
|
962
1103
|
mutex = pool_mutex(pool['name'])
|
963
1104
|
return if mutex.locked?
|
964
1105
|
|
965
|
-
|
966
|
-
|
1106
|
+
@redis.with_metrics do |redis|
|
1107
|
+
poolsize = redis.hget('vmpooler__config__poolsize', pool['name'])
|
1108
|
+
break if poolsize.nil?
|
967
1109
|
|
968
|
-
|
969
|
-
|
1110
|
+
poolsize = Integer(poolsize)
|
1111
|
+
break if poolsize == pool['size']
|
970
1112
|
|
971
|
-
|
972
|
-
|
1113
|
+
mutex.synchronize do
|
1114
|
+
pool['size'] = poolsize
|
1115
|
+
end
|
973
1116
|
end
|
974
1117
|
end
|
975
1118
|
|
976
1119
|
def reset_pool(pool)
|
977
1120
|
poolname = pool['name']
|
978
|
-
|
1121
|
+
@redis.with_metrics do |redis|
|
1122
|
+
break unless redis.sismember('vmpooler__poolreset', poolname)
|
979
1123
|
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
1124
|
+
redis.srem('vmpooler__poolreset', poolname)
|
1125
|
+
mutex = pool_mutex(poolname)
|
1126
|
+
mutex.synchronize do
|
1127
|
+
drain_pool(poolname, redis)
|
1128
|
+
$logger.log('s', "[*] [#{poolname}] reset has cleared ready and pending instances")
|
1129
|
+
end
|
985
1130
|
end
|
986
1131
|
end
|
987
1132
|
|
@@ -990,21 +1135,23 @@ module Vmpooler
|
|
990
1135
|
begin
|
991
1136
|
mutex = pool_mutex(pool['name'])
|
992
1137
|
mutex.synchronize do
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1138
|
+
@redis.with_metrics do |redis|
|
1139
|
+
provider.vms_in_pool(pool['name']).each do |vm|
|
1140
|
+
if !redis.sismember('vmpooler__running__' + pool['name'], vm['name']) &&
|
1141
|
+
!redis.sismember('vmpooler__ready__' + pool['name'], vm['name']) &&
|
1142
|
+
!redis.sismember('vmpooler__pending__' + pool['name'], vm['name']) &&
|
1143
|
+
!redis.sismember('vmpooler__completed__' + pool['name'], vm['name']) &&
|
1144
|
+
!redis.sismember('vmpooler__discovered__' + pool['name'], vm['name']) &&
|
1145
|
+
!redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])
|
1146
|
+
|
1147
|
+
pool_check_response[:discovered_vms] += 1
|
1148
|
+
redis.sadd('vmpooler__discovered__' + pool['name'], vm['name'])
|
1149
|
+
|
1150
|
+
$logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue")
|
1151
|
+
end
|
1152
|
+
|
1153
|
+
inventory[vm['name']] = 1
|
1005
1154
|
end
|
1006
|
-
|
1007
|
-
inventory[vm['name']] = 1
|
1008
1155
|
end
|
1009
1156
|
end
|
1010
1157
|
rescue StandardError => e
|
@@ -1015,96 +1162,112 @@ module Vmpooler
|
|
1015
1162
|
end
|
1016
1163
|
|
1017
1164
|
def check_running_pool_vms(pool_name, provider, pool_check_response, inventory)
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1165
|
+
@redis.with_metrics do |redis|
|
1166
|
+
redis.smembers("vmpooler__running__#{pool_name}").each do |vm|
|
1167
|
+
if inventory[vm]
|
1168
|
+
begin
|
1169
|
+
vm_lifetime = redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12
|
1170
|
+
pool_check_response[:checked_running_vms] += 1
|
1171
|
+
check_running_vm(vm, pool_name, vm_lifetime, provider)
|
1172
|
+
rescue StandardError => e
|
1173
|
+
$logger.log('d', "[!] [#{pool_name}] _check_pool with an error while evaluating running VMs: #{e}")
|
1174
|
+
end
|
1175
|
+
else
|
1176
|
+
move_vm_queue(pool_name, vm, 'running', 'completed', redis, 'is a running VM but is missing from inventory. Marking as completed.')
|
1026
1177
|
end
|
1027
|
-
else
|
1028
|
-
move_vm_queue(pool_name, vm, 'running', 'completed', 'is a running VM but is missing from inventory. Marking as completed.')
|
1029
1178
|
end
|
1030
1179
|
end
|
1031
1180
|
end
|
1032
1181
|
|
1033
|
-
def check_ready_pool_vms(pool_name, provider, pool_check_response, inventory, pool_ttl
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1182
|
+
def check_ready_pool_vms(pool_name, provider, pool_check_response, inventory, pool_ttl)
|
1183
|
+
@redis.with_metrics do |redis|
|
1184
|
+
redis.smembers("vmpooler__ready__#{pool_name}").each do |vm|
|
1185
|
+
if inventory[vm]
|
1186
|
+
begin
|
1187
|
+
pool_check_response[:checked_ready_vms] += 1
|
1188
|
+
check_ready_vm(vm, pool_name, pool_ttl, provider)
|
1189
|
+
rescue StandardError => e
|
1190
|
+
$logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating ready VMs: #{e}")
|
1191
|
+
end
|
1192
|
+
else
|
1193
|
+
move_vm_queue(pool_name, vm, 'ready', 'completed', redis, 'is a ready VM but is missing from inventory. Marking as completed.')
|
1041
1194
|
end
|
1042
|
-
else
|
1043
|
-
move_vm_queue(pool_name, vm, 'ready', 'completed', 'is a ready VM but is missing from inventory. Marking as completed.')
|
1044
1195
|
end
|
1045
1196
|
end
|
1046
1197
|
end
|
1047
1198
|
|
1048
|
-
def check_pending_pool_vms(pool_name, provider, pool_check_response, inventory, pool_timeout
|
1199
|
+
def check_pending_pool_vms(pool_name, provider, pool_check_response, inventory, pool_timeout)
|
1049
1200
|
pool_timeout ||= $config[:config]['timeout'] || 15
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1201
|
+
@redis.with_metrics do |redis|
|
1202
|
+
redis.smembers("vmpooler__pending__#{pool_name}").reverse.each do |vm|
|
1203
|
+
if inventory[vm]
|
1204
|
+
begin
|
1205
|
+
pool_check_response[:checked_pending_vms] += 1
|
1206
|
+
check_pending_vm(vm, pool_name, pool_timeout, provider)
|
1207
|
+
rescue StandardError => e
|
1208
|
+
$logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating pending VMs: #{e}")
|
1209
|
+
end
|
1210
|
+
else
|
1211
|
+
fail_pending_vm(vm, pool_name, pool_timeout, redis, false)
|
1057
1212
|
end
|
1058
|
-
else
|
1059
|
-
fail_pending_vm(vm, pool_name, pool_timeout, false)
|
1060
1213
|
end
|
1061
1214
|
end
|
1062
1215
|
end
|
1063
1216
|
|
1064
1217
|
def check_completed_pool_vms(pool_name, provider, pool_check_response, inventory)
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1218
|
+
@redis.with_metrics do |redis|
|
1219
|
+
redis.smembers("vmpooler__completed__#{pool_name}").each do |vm|
|
1220
|
+
if inventory[vm]
|
1221
|
+
begin
|
1222
|
+
pool_check_response[:destroyed_vms] += 1
|
1223
|
+
destroy_vm(vm, pool_name, provider)
|
1224
|
+
rescue StandardError => e
|
1225
|
+
redis.pipelined do
|
1226
|
+
redis.srem("vmpooler__completed__#{pool_name}", vm)
|
1227
|
+
redis.hdel("vmpooler__active__#{pool_name}", vm)
|
1228
|
+
redis.del("vmpooler__vm__#{vm}")
|
1229
|
+
end
|
1230
|
+
$logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating completed VMs: #{e}")
|
1231
|
+
end
|
1232
|
+
else
|
1233
|
+
$logger.log('s', "[!] [#{pool_name}] '#{vm}' not found in inventory, removed from 'completed' queue")
|
1234
|
+
redis.pipelined do
|
1235
|
+
redis.srem("vmpooler__completed__#{pool_name}", vm)
|
1236
|
+
redis.hdel("vmpooler__active__#{pool_name}", vm)
|
1237
|
+
redis.del("vmpooler__vm__#{vm}")
|
1238
|
+
end
|
1075
1239
|
end
|
1076
|
-
else
|
1077
|
-
$logger.log('s', "[!] [#{pool_name}] '#{vm}' not found in inventory, removed from 'completed' queue")
|
1078
|
-
$redis.srem("vmpooler__completed__#{pool_name}", vm)
|
1079
|
-
$redis.hdel("vmpooler__active__#{pool_name}", vm)
|
1080
|
-
$redis.del("vmpooler__vm__#{vm}")
|
1081
1240
|
end
|
1082
1241
|
end
|
1083
1242
|
end
|
1084
1243
|
|
1085
1244
|
def check_discovered_pool_vms(pool_name)
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1245
|
+
@redis.with_metrics do |redis|
|
1246
|
+
redis.smembers("vmpooler__discovered__#{pool_name}").reverse.each do |vm|
|
1247
|
+
%w[pending ready running completed].each do |queue|
|
1248
|
+
if redis.sismember("vmpooler__#{queue}__#{pool_name}", vm)
|
1249
|
+
$logger.log('d', "[!] [#{pool_name}] '#{vm}' found in '#{queue}', removed from 'discovered' queue")
|
1250
|
+
redis.srem("vmpooler__discovered__#{pool_name}", vm)
|
1251
|
+
end
|
1091
1252
|
end
|
1092
|
-
end
|
1093
1253
|
|
1094
|
-
|
1254
|
+
redis.smove("vmpooler__discovered__#{pool_name}", "vmpooler__completed__#{pool_name}", vm) if redis.sismember("vmpooler__discovered__#{pool_name}", vm)
|
1255
|
+
end
|
1095
1256
|
end
|
1096
1257
|
rescue StandardError => e
|
1097
1258
|
$logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating discovered VMs: #{e}")
|
1098
1259
|
end
|
1099
1260
|
|
1100
1261
|
def check_migrating_pool_vms(pool_name, provider, pool_check_response, inventory)
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1262
|
+
@redis.with_metrics do |redis|
|
1263
|
+
redis.smembers("vmpooler__migrating__#{pool_name}").reverse.each do |vm|
|
1264
|
+
if inventory[vm]
|
1265
|
+
begin
|
1266
|
+
pool_check_response[:migrated_vms] += 1
|
1267
|
+
migrate_vm(vm, pool_name, provider)
|
1268
|
+
rescue StandardError => e
|
1269
|
+
$logger.log('s', "[x] [#{pool_name}] '#{vm}' failed to migrate: #{e}")
|
1270
|
+
end
|
1108
1271
|
end
|
1109
1272
|
end
|
1110
1273
|
end
|
@@ -1113,29 +1276,37 @@ module Vmpooler
|
|
1113
1276
|
def repopulate_pool_vms(pool_name, provider, pool_check_response, pool_size)
|
1114
1277
|
return if pool_mutex(pool_name).locked?
|
1115
1278
|
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
$
|
1126
|
-
|
1127
|
-
|
1279
|
+
@redis.with_metrics do |redis|
|
1280
|
+
redis.multi
|
1281
|
+
redis.scard("vmpooler__ready__#{pool_name}")
|
1282
|
+
redis.scard("vmpooler__pending__#{pool_name}")
|
1283
|
+
redis.scard("vmpooler__running__#{pool_name}")
|
1284
|
+
ready, pending, running = redis.exec
|
1285
|
+
total = pending.to_i + ready.to_i
|
1286
|
+
|
1287
|
+
$metrics.gauge("ready.#{pool_name}", ready)
|
1288
|
+
$metrics.gauge("running.#{pool_name}", running)
|
1289
|
+
|
1290
|
+
unless pool_size == 0
|
1291
|
+
if redis.get("vmpooler__empty__#{pool_name}")
|
1292
|
+
redis.del("vmpooler__empty__#{pool_name}") unless ready == 0
|
1293
|
+
elsif ready == 0
|
1294
|
+
redis.set("vmpooler__empty__#{pool_name}", 'true')
|
1295
|
+
$logger.log('s', "[!] [#{pool_name}] is empty")
|
1296
|
+
end
|
1297
|
+
end
|
1128
1298
|
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1299
|
+
(pool_size - total.to_i).times do
|
1300
|
+
if redis.get('vmpooler__tasks__clone').to_i < $config[:config]['task_limit'].to_i
|
1301
|
+
begin
|
1302
|
+
redis.incr('vmpooler__tasks__clone')
|
1303
|
+
pool_check_response[:cloned_vms] += 1
|
1304
|
+
clone_vm(pool_name, provider)
|
1305
|
+
rescue StandardError => e
|
1306
|
+
$logger.log('s', "[!] [#{pool_name}] clone failed during check_pool with an error: #{e}")
|
1307
|
+
redis.decr('vmpooler__tasks__clone')
|
1308
|
+
raise
|
1309
|
+
end
|
1139
1310
|
end
|
1140
1311
|
end
|
1141
1312
|
end
|
@@ -1160,7 +1331,7 @@ module Vmpooler
|
|
1160
1331
|
|
1161
1332
|
check_running_pool_vms(pool['name'], provider, pool_check_response, inventory)
|
1162
1333
|
|
1163
|
-
check_ready_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['ready_ttl'])
|
1334
|
+
check_ready_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['ready_ttl'] || $config[:config]['ready_ttl'])
|
1164
1335
|
|
1165
1336
|
check_pending_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['timeout'])
|
1166
1337
|
|
@@ -1203,23 +1374,199 @@ module Vmpooler
|
|
1203
1374
|
#
|
1204
1375
|
# returns an object Vmpooler::PoolManager::Provider::*
|
1205
1376
|
# or raises an error if the class does not exist
|
1206
|
-
def create_provider_object(config, logger, metrics, provider_class, provider_name, options)
|
1377
|
+
def create_provider_object(config, logger, metrics, redis_connection_pool, provider_class, provider_name, options)
|
1207
1378
|
provider_klass = Vmpooler::PoolManager::Provider
|
1208
1379
|
provider_klass.constants.each do |classname|
|
1209
1380
|
next unless classname.to_s.casecmp(provider_class) == 0
|
1210
1381
|
|
1211
|
-
return provider_klass.const_get(classname).new(config, logger, metrics, provider_name, options)
|
1382
|
+
return provider_klass.const_get(classname).new(config, logger, metrics, redis_connection_pool, provider_name, options)
|
1212
1383
|
end
|
1213
1384
|
raise("Provider '#{provider_class}' is unknown for pool with provider name '#{provider_name}'") if provider.nil?
|
1214
1385
|
end
|
1215
1386
|
|
1387
|
+
def check_ondemand_requests(maxloop = 0,
|
1388
|
+
loop_delay_min = CHECK_LOOP_DELAY_MIN_DEFAULT,
|
1389
|
+
loop_delay_max = CHECK_LOOP_DELAY_MAX_DEFAULT,
|
1390
|
+
loop_delay_decay = CHECK_LOOP_DELAY_DECAY_DEFAULT)
|
1391
|
+
|
1392
|
+
$logger.log('d', '[*] [ondemand_provisioner] starting worker thread')
|
1393
|
+
|
1394
|
+
$threads['ondemand_provisioner'] = Thread.new do
|
1395
|
+
_check_ondemand_requests(maxloop, loop_delay_min, loop_delay_max, loop_delay_decay)
|
1396
|
+
end
|
1397
|
+
end
|
1398
|
+
|
1399
|
+
def _check_ondemand_requests(maxloop = 0,
|
1400
|
+
loop_delay_min = CHECK_LOOP_DELAY_MIN_DEFAULT,
|
1401
|
+
loop_delay_max = CHECK_LOOP_DELAY_MAX_DEFAULT,
|
1402
|
+
loop_delay_decay = CHECK_LOOP_DELAY_DECAY_DEFAULT)
|
1403
|
+
|
1404
|
+
loop_delay_min = $config[:config]['check_loop_delay_min'] unless $config[:config]['check_loop_delay_min'].nil?
|
1405
|
+
loop_delay_max = $config[:config]['check_loop_delay_max'] unless $config[:config]['check_loop_delay_max'].nil?
|
1406
|
+
loop_delay_decay = $config[:config]['check_loop_delay_decay'] unless $config[:config]['check_loop_delay_decay'].nil?
|
1407
|
+
|
1408
|
+
loop_delay_decay = 2.0 if loop_delay_decay <= 1.0
|
1409
|
+
loop_delay_max = loop_delay_min if loop_delay_max.nil? || loop_delay_max < loop_delay_min
|
1410
|
+
|
1411
|
+
loop_count = 1
|
1412
|
+
loop_delay = loop_delay_min
|
1413
|
+
|
1414
|
+
loop do
|
1415
|
+
result = process_ondemand_requests
|
1416
|
+
|
1417
|
+
loop_delay = (loop_delay * loop_delay_decay).to_i
|
1418
|
+
loop_delay = loop_delay_min if result > 0
|
1419
|
+
loop_delay = loop_delay_max if loop_delay > loop_delay_max
|
1420
|
+
sleep_with_wakeup_events(loop_delay, loop_delay_min, ondemand_request: true)
|
1421
|
+
|
1422
|
+
unless maxloop == 0
|
1423
|
+
break if loop_count >= maxloop
|
1424
|
+
|
1425
|
+
loop_count += 1
|
1426
|
+
end
|
1427
|
+
end
|
1428
|
+
end
|
1429
|
+
|
1430
|
+
def process_ondemand_requests
|
1431
|
+
@redis.with_metrics do |redis|
|
1432
|
+
requests = redis.zrange('vmpooler__provisioning__request', 0, -1)
|
1433
|
+
requests&.map { |request_id| create_ondemand_vms(request_id, redis) }
|
1434
|
+
provisioning_tasks = process_ondemand_vms(redis)
|
1435
|
+
requests_ready = check_ondemand_requests_ready(redis)
|
1436
|
+
requests.length + provisioning_tasks + requests_ready
|
1437
|
+
end
|
1438
|
+
end
|
1439
|
+
|
1440
|
+
def create_ondemand_vms(request_id, redis)
|
1441
|
+
requested = redis.hget("vmpooler__odrequest__#{request_id}", 'requested')
|
1442
|
+
unless requested
|
1443
|
+
$logger.log('s', "Failed to find odrequest for request_id '#{request_id}'")
|
1444
|
+
redis.zrem('vmpooler__provisioning__request', request_id)
|
1445
|
+
return
|
1446
|
+
end
|
1447
|
+
score = redis.zscore('vmpooler__provisioning__request', request_id)
|
1448
|
+
requested = requested.split(',')
|
1449
|
+
|
1450
|
+
redis.pipelined do
|
1451
|
+
requested.each do |request|
|
1452
|
+
redis.zadd('vmpooler__odcreate__task', Time.now.to_i, "#{request}:#{request_id}")
|
1453
|
+
end
|
1454
|
+
redis.zrem('vmpooler__provisioning__request', request_id)
|
1455
|
+
redis.zadd('vmpooler__provisioning__processing', score, request_id)
|
1456
|
+
end
|
1457
|
+
end
|
1458
|
+
|
1459
|
+
def process_ondemand_vms(redis)
|
1460
|
+
queue_key = 'vmpooler__odcreate__task'
|
1461
|
+
queue = redis.zrange(queue_key, 0, -1, with_scores: true)
|
1462
|
+
ondemand_clone_limit = $config[:config]['ondemand_clone_limit']
|
1463
|
+
queue.each do |request, score|
|
1464
|
+
clone_count = redis.get('vmpooler__tasks__ondemandclone').to_i
|
1465
|
+
break unless clone_count < ondemand_clone_limit
|
1466
|
+
|
1467
|
+
pool_alias, pool, count, request_id = request.split(':')
|
1468
|
+
count = count.to_i
|
1469
|
+
provider = get_provider_for_pool(pool)
|
1470
|
+
slots = ondemand_clone_limit - clone_count
|
1471
|
+
break if slots == 0
|
1472
|
+
|
1473
|
+
if slots >= count
|
1474
|
+
count.times do
|
1475
|
+
redis.incr('vmpooler__tasks__ondemandclone')
|
1476
|
+
clone_vm(pool, provider, request_id, pool_alias)
|
1477
|
+
end
|
1478
|
+
redis.zrem(queue_key, request)
|
1479
|
+
else
|
1480
|
+
remaining_count = count - slots
|
1481
|
+
slots.times do
|
1482
|
+
redis.incr('vmpooler__tasks__ondemandclone')
|
1483
|
+
clone_vm(pool, provider, request_id, pool_alias)
|
1484
|
+
end
|
1485
|
+
redis.pipelined do
|
1486
|
+
redis.zrem(queue_key, request)
|
1487
|
+
redis.zadd(queue_key, score, "#{pool_alias}:#{pool}:#{remaining_count}:#{request_id}")
|
1488
|
+
end
|
1489
|
+
end
|
1490
|
+
end
|
1491
|
+
queue.length
|
1492
|
+
end
|
1493
|
+
|
1494
|
+
def vms_ready?(request_id, redis)
|
1495
|
+
catch :request_not_ready do
|
1496
|
+
request_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
|
1497
|
+
Parsing.get_platform_pool_count(request_hash['requested']) do |platform_alias, pool, count|
|
1498
|
+
pools_filled = redis.scard("vmpooler__#{request_id}__#{platform_alias}__#{pool}")
|
1499
|
+
throw :request_not_ready unless pools_filled.to_i == count.to_i
|
1500
|
+
end
|
1501
|
+
return true
|
1502
|
+
end
|
1503
|
+
false
|
1504
|
+
end
|
1505
|
+
|
1506
|
+
def check_ondemand_requests_ready(redis)
|
1507
|
+
in_progress_requests = redis.zrange('vmpooler__provisioning__processing', 0, -1, with_scores: true)
|
1508
|
+
in_progress_requests&.each do |request_id, score|
|
1509
|
+
check_ondemand_request_ready(request_id, redis, score)
|
1510
|
+
end
|
1511
|
+
in_progress_requests.length
|
1512
|
+
end
|
1513
|
+
|
1514
|
+
def check_ondemand_request_ready(request_id, redis, score = nil)
|
1515
|
+
# default expiration is one month to ensure the data does not stay in redis forever
|
1516
|
+
default_expiration = 259_200_0
|
1517
|
+
processing_key = 'vmpooler__provisioning__processing'
|
1518
|
+
ondemand_hash_key = "vmpooler__odrequest__#{request_id}"
|
1519
|
+
score ||= redis.zscore(processing_key, request_id)
|
1520
|
+
return if request_expired?(request_id, score, redis)
|
1521
|
+
|
1522
|
+
return unless vms_ready?(request_id, redis)
|
1523
|
+
|
1524
|
+
redis.multi
|
1525
|
+
redis.hset(ondemand_hash_key, 'status', 'ready')
|
1526
|
+
redis.expire(ondemand_hash_key, default_expiration)
|
1527
|
+
redis.zrem(processing_key, request_id)
|
1528
|
+
redis.exec
|
1529
|
+
end
|
1530
|
+
|
1531
|
+
def request_expired?(request_id, score, redis)
|
1532
|
+
delta = Time.now.to_i - score.to_i
|
1533
|
+
ondemand_request_ttl = $config[:config]['ondemand_request_ttl']
|
1534
|
+
return false unless delta > ondemand_request_ttl * 60
|
1535
|
+
|
1536
|
+
$logger.log('s', "Ondemand request for '#{request_id}' failed to provision all instances within the configured ttl '#{ondemand_request_ttl}'")
|
1537
|
+
expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
|
1538
|
+
redis.pipelined do
|
1539
|
+
redis.zrem('vmpooler__provisioning__processing', request_id)
|
1540
|
+
redis.hset("vmpooler__odrequest__#{request_id}", 'status', 'failed')
|
1541
|
+
redis.expire("vmpooler__odrequest__#{request_id}", expiration_ttl)
|
1542
|
+
end
|
1543
|
+
remove_vms_for_failed_request(request_id, expiration_ttl, redis)
|
1544
|
+
true
|
1545
|
+
end
|
1546
|
+
|
1547
|
+
def remove_vms_for_failed_request(request_id, expiration_ttl, redis)
|
1548
|
+
request_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
|
1549
|
+
Parsing.get_platform_pool_count(request_hash['requested']) do |platform_alias, pool, _count|
|
1550
|
+
pools_filled = redis.smembers("vmpooler__#{request_id}__#{platform_alias}__#{pool}")
|
1551
|
+
redis.pipelined do
|
1552
|
+
pools_filled&.each do |vm|
|
1553
|
+
move_vm_queue(pool, vm, 'running', 'completed', redis, "moved to completed queue. '#{request_id}' could not be filled in time")
|
1554
|
+
end
|
1555
|
+
redis.expire("vmpooler__#{request_id}__#{platform_alias}__#{pool}", expiration_ttl)
|
1556
|
+
end
|
1557
|
+
end
|
1558
|
+
end
|
1559
|
+
|
1216
1560
|
def execute!(maxloop = 0, loop_delay = 1)
|
1217
1561
|
$logger.log('d', 'starting vmpooler')
|
1218
1562
|
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1563
|
+
@redis.with_metrics do |redis|
|
1564
|
+
# Clear out the tasks manager, as we don't know about any tasks at this point
|
1565
|
+
redis.set('vmpooler__tasks__clone', 0)
|
1566
|
+
redis.set('vmpooler__tasks__ondemandclone', 0)
|
1567
|
+
# Clear out vmpooler__migrations since stale entries may be left after a restart
|
1568
|
+
redis.del('vmpooler__migration')
|
1569
|
+
end
|
1223
1570
|
|
1224
1571
|
# Copy vSphere settings to correct location. This happens with older configuration files
|
1225
1572
|
if !$config[:vsphere].nil? && ($config[:providers].nil? || $config[:providers][:vsphere].nil?)
|
@@ -1269,7 +1616,7 @@ module Vmpooler
|
|
1269
1616
|
provider_class = $config[:providers][provider_name.to_sym]['provider_class']
|
1270
1617
|
end
|
1271
1618
|
begin
|
1272
|
-
$providers[provider_name] = create_provider_object($config, $logger, $metrics, provider_class, provider_name, {}) if $providers[provider_name].nil?
|
1619
|
+
$providers[provider_name] = create_provider_object($config, $logger, $metrics, @redis, provider_class, provider_name, {}) if $providers[provider_name].nil?
|
1273
1620
|
rescue StandardError => e
|
1274
1621
|
$logger.log('s', "Error while creating provider for pool #{pool['name']}: #{e}")
|
1275
1622
|
raise
|
@@ -1303,6 +1650,13 @@ module Vmpooler
|
|
1303
1650
|
end
|
1304
1651
|
end
|
1305
1652
|
|
1653
|
+
if !$threads['ondemand_provisioner']
|
1654
|
+
check_ondemand_requests
|
1655
|
+
elsif !$threads['ondemand_provisioner'].alive?
|
1656
|
+
$logger.log('d', '[!] [ondemand_provisioner] worker thread died, restarting')
|
1657
|
+
check_ondemand_requests(check_loop_delay_min, check_loop_delay_max, check_loop_delay_decay)
|
1658
|
+
end
|
1659
|
+
|
1306
1660
|
sleep(loop_delay)
|
1307
1661
|
|
1308
1662
|
unless maxloop == 0
|