vmpooler 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This is an adapted Collector module for vmpooler based on the sample implementation
4
+ # available in the prometheus client_ruby library
5
+ # https://github.com/prometheus/client_ruby/blob/master/lib/prometheus/middleware/collector.rb
6
+ #
7
+ # The code was also failing Rubocop on PR check, so have addressed all the offenses.
8
+ #
9
+ # The method strip_hostnames_from_path (originally strip_ids_from_path) has been adapted
10
+ # to add a match for hostnames in paths # to replace with a single ":hostname" string to
11
+ # avoid # proliferation of stat lines for # each new vm hostname deleted, modified or
12
+ # otherwise queried.
13
+
14
+ require 'benchmark'
15
+ require 'prometheus/client'
16
+ require 'vmpooler/logger'
17
+
18
+ module Vmpooler
19
+ class Metrics
20
+ class Promstats
21
+ # CollectorMiddleware is an implementation of Rack Middleware customised
22
+ # for vmpooler use.
23
+ #
24
+ # By default metrics are registered on the global registry. Set the
25
+ # `:registry` option to use a custom registry.
26
+ #
27
+ # By default metrics all have the prefix "http_server". Set to something
28
+ # else if you like.
29
+ #
30
+ # The request counter metric is broken down by code, method and path by
31
+ # default. Set the `:counter_label_builder` option to use a custom label
32
+ # builder.
33
+ #
34
+ # The request duration metric is broken down by method and path by default.
35
+ # Set the `:duration_label_builder` option to use a custom label builder.
36
+ #
37
+ # Label Builder functions will receive a Rack env and a status code, and must
38
+ # return a hash with the labels for that request. They must also accept an empty
39
+ # env, and return a hash with the correct keys. This is necessary to initialize
40
+ # the metrics with the correct set of labels.
41
+ class CollectorMiddleware
42
+ attr_reader :app, :registry
43
+
44
+ def initialize(app, options = {})
45
+ @app = app
46
+ @registry = options[:registry] || Prometheus::Client.registry
47
+ @metrics_prefix = options[:metrics_prefix] || 'http_server'
48
+
49
+ init_request_metrics
50
+ init_exception_metrics
51
+ end
52
+
53
+ def call(env) # :nodoc:
54
+ trace(env) { @app.call(env) }
55
+ end
56
+
57
+ protected
58
+
59
+ def init_request_metrics
60
+ @requests = @registry.counter(
61
+ :"#{@metrics_prefix}_requests_total",
62
+ docstring:
63
+ 'The total number of HTTP requests handled by the Rack application.',
64
+ labels: %i[code method path]
65
+ )
66
+ @durations = @registry.histogram(
67
+ :"#{@metrics_prefix}_request_duration_seconds",
68
+ docstring: 'The HTTP response duration of the Rack application.',
69
+ labels: %i[method path]
70
+ )
71
+ end
72
+
73
+ def init_exception_metrics
74
+ @exceptions = @registry.counter(
75
+ :"#{@metrics_prefix}_exceptions_total",
76
+ docstring: 'The total number of exceptions raised by the Rack application.',
77
+ labels: [:exception]
78
+ )
79
+ end
80
+
81
+ def trace(env)
82
+ response = nil
83
+ duration = Benchmark.realtime { response = yield }
84
+ record(env, response.first.to_s, duration)
85
+ response
86
+ rescue StandardError => e
87
+ @exceptions.increment(labels: { exception: e.class.name })
88
+ raise
89
+ end
90
+
91
+ def record(env, code, duration)
92
+ counter_labels = {
93
+ code: code,
94
+ method: env['REQUEST_METHOD'].downcase,
95
+ path: strip_hostnames_from_path(env['PATH_INFO'])
96
+ }
97
+
98
+ duration_labels = {
99
+ method: env['REQUEST_METHOD'].downcase,
100
+ path: strip_hostnames_from_path(env['PATH_INFO'])
101
+ }
102
+
103
+ @requests.increment(labels: counter_labels)
104
+ @durations.observe(duration, labels: duration_labels)
105
+ rescue # rubocop:disable Style/RescueStandardError
106
+ nil
107
+ end
108
+
109
+ def strip_hostnames_from_path(path)
110
+ # Custom for /vm path - so we just collect aggrate stats for all usage along this one
111
+ # path. Custom counters are then added more specific endpoints in v1.rb
112
+ # Since we aren't parsing UID/GIDs as in the original example, these are removed.
113
+ # Similarly, request IDs are also stripped from the /ondemand path.
114
+ path
115
+ .gsub(%r{/vm/.+$}, '/vm')
116
+ .gsub(%r{/ondemand/.+$}, '/ondemand')
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems' unless defined?(Gem)
4
+ require 'statsd'
5
+
6
+ module Vmpooler
7
+ class Metrics
8
+ class Statsd < Metrics
9
+ attr_reader :server, :port, :prefix
10
+
11
+ def initialize(logger, params = {})
12
+ raise ArgumentError, "Statsd server is required. Config: #{params.inspect}" if params['server'].nil? || params['server'].empty?
13
+
14
+ host = params['server']
15
+ @port = params['port'] || 8125
16
+ @prefix = params['prefix'] || 'vmpooler'
17
+ @server = ::Statsd.new(host, @port)
18
+ @logger = logger
19
+ end
20
+
21
+ def increment(label)
22
+ server.increment(prefix + '.' + label)
23
+ rescue StandardError => e
24
+ @logger.log('s', "[!] Failure incrementing #{prefix}.#{label} on statsd server [#{server}:#{port}]: #{e}")
25
+ end
26
+
27
+ def gauge(label, value)
28
+ server.gauge(prefix + '.' + label, value)
29
+ rescue StandardError => e
30
+ @logger.log('s', "[!] Failure updating gauge #{prefix}.#{label} on statsd server [#{server}:#{port}]: #{e}")
31
+ end
32
+
33
+ def timing(label, duration)
34
+ server.timing(prefix + '.' + label, duration)
35
+ rescue StandardError => e
36
+ @logger.log('s', "[!] Failure updating timing #{prefix}.#{label} on statsd server [#{server}:#{port}]: #{e}")
37
+ end
38
+ end
39
+ end
40
+ end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'vmpooler/providers'
4
+ require 'vmpooler/util/parsing'
4
5
  require 'spicy-proton'
6
+ require 'resolv' # ruby standard lib
5
7
 
6
8
  module Vmpooler
7
9
  class PoolManager
@@ -9,7 +11,7 @@ module Vmpooler
9
11
  CHECK_LOOP_DELAY_MAX_DEFAULT = 60
10
12
  CHECK_LOOP_DELAY_DECAY_DEFAULT = 2.0
11
13
 
12
- def initialize(config, logger, redis, metrics)
14
+ def initialize(config, logger, redis_connection_pool, metrics)
13
15
  $config = config
14
16
 
15
17
  # Load logger library
@@ -18,19 +20,19 @@ module Vmpooler
18
20
  # metrics logging handle
19
21
  $metrics = metrics
20
22
 
21
- # Connect to Redis
22
- $redis = redis
23
+ # Redis connection pool
24
+ @redis = redis_connection_pool
23
25
 
24
26
  # VM Provider objects
25
- $providers = {}
27
+ $providers = Concurrent::Hash.new
26
28
 
27
29
  # Our thread-tracker object
28
- $threads = {}
30
+ $threads = Concurrent::Hash.new
29
31
 
30
32
  # Pool mutex
31
- @reconfigure_pool = {}
33
+ @reconfigure_pool = Concurrent::Hash.new
32
34
 
33
- @vm_mutex = {}
35
+ @vm_mutex = Concurrent::Hash.new
34
36
 
35
37
  # Name generator for generating host names
36
38
  @name_generator = Spicy::Proton.new
@@ -45,24 +47,26 @@ module Vmpooler
45
47
 
46
48
  # Place pool configuration in redis so an API instance can discover running pool configuration
47
49
  def load_pools_to_redis
48
- previously_configured_pools = $redis.smembers('vmpooler__pools')
49
- currently_configured_pools = []
50
- config[:pools].each do |pool|
51
- currently_configured_pools << pool['name']
52
- $redis.sadd('vmpooler__pools', pool['name'])
53
- pool_keys = pool.keys
54
- pool_keys.delete('alias')
55
- to_set = {}
56
- pool_keys.each do |k|
57
- to_set[k] = pool[k]
50
+ @redis.with_metrics do |redis|
51
+ previously_configured_pools = redis.smembers('vmpooler__pools')
52
+ currently_configured_pools = []
53
+ config[:pools].each do |pool|
54
+ currently_configured_pools << pool['name']
55
+ redis.sadd('vmpooler__pools', pool['name'])
56
+ pool_keys = pool.keys
57
+ pool_keys.delete('alias')
58
+ to_set = {}
59
+ pool_keys.each do |k|
60
+ to_set[k] = pool[k]
61
+ end
62
+ to_set['alias'] = pool['alias'].join(',') if to_set.key?('alias')
63
+ redis.hmset("vmpooler__pool__#{pool['name']}", to_set.to_a.flatten) unless to_set.empty?
58
64
  end
59
- to_set['alias'] = pool['alias'].join(',') if to_set.key?('alias')
60
- $redis.hmset("vmpooler__pool__#{pool['name']}", to_set.to_a.flatten) unless to_set.empty?
61
- end
62
- previously_configured_pools.each do |pool|
63
- unless currently_configured_pools.include? pool
64
- $redis.srem('vmpooler__pools', pool)
65
- $redis.del("vmpooler__pool__#{pool}")
65
+ previously_configured_pools.each do |pool|
66
+ unless currently_configured_pools.include? pool
67
+ redis.srem('vmpooler__pools', pool)
68
+ redis.del("vmpooler__pool__#{pool}")
69
+ end
66
70
  end
67
71
  end
68
72
  nil
@@ -75,7 +79,9 @@ module Vmpooler
75
79
  _check_pending_vm(vm, pool, timeout, provider)
76
80
  rescue StandardError => e
77
81
  $logger.log('s', "[!] [#{pool}] '#{vm}' #{timeout} #{provider} errored while checking a pending vm : #{e}")
78
- fail_pending_vm(vm, pool, timeout)
82
+ @redis.with_metrics do |redis|
83
+ fail_pending_vm(vm, pool, timeout, redis)
84
+ end
79
85
  raise
80
86
  end
81
87
  end
@@ -86,31 +92,38 @@ module Vmpooler
86
92
  return if mutex.locked?
87
93
 
88
94
  mutex.synchronize do
89
- if provider.vm_ready?(pool, vm)
90
- move_pending_vm_to_ready(vm, pool)
91
- else
92
- fail_pending_vm(vm, pool, timeout)
95
+ @redis.with_metrics do |redis|
96
+ request_id = redis.hget("vmpooler__vm__#{vm}", 'request_id')
97
+ if provider.vm_ready?(pool, vm)
98
+ move_pending_vm_to_ready(vm, pool, redis, request_id)
99
+ else
100
+ fail_pending_vm(vm, pool, timeout, redis)
101
+ end
93
102
  end
94
103
  end
95
104
  end
96
105
 
97
- def remove_nonexistent_vm(vm, pool)
98
- $redis.srem("vmpooler__pending__#{pool}", vm)
106
+ def remove_nonexistent_vm(vm, pool, redis)
107
+ redis.srem("vmpooler__pending__#{pool}", vm)
99
108
  $logger.log('d', "[!] [#{pool}] '#{vm}' no longer exists. Removing from pending.")
100
109
  end
101
110
 
102
- def fail_pending_vm(vm, pool, timeout, exists = true)
103
- clone_stamp = $redis.hget("vmpooler__vm__#{vm}", 'clone')
104
- return true unless clone_stamp
111
+ def fail_pending_vm(vm, pool, timeout, redis, exists = true)
112
+ clone_stamp = redis.hget("vmpooler__vm__#{vm}", 'clone')
105
113
 
106
114
  time_since_clone = (Time.now - Time.parse(clone_stamp)) / 60
107
115
  if time_since_clone > timeout
108
116
  if exists
109
- $redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm)
117
+ request_id = redis.hget("vmpooler__vm__#{vm}", 'request_id')
118
+ pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias') if request_id
119
+ redis.multi
120
+ redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm)
121
+ redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool}:1:#{request_id}") if request_id
122
+ redis.exec
110
123
  $metrics.increment("errors.markedasfailed.#{pool}")
111
124
  $logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes")
112
125
  else
113
- remove_nonexistent_vm(vm, pool)
126
+ remove_nonexistent_vm(vm, pool, redis)
114
127
  end
115
128
  end
116
129
  true
@@ -119,28 +132,54 @@ module Vmpooler
119
132
  false
120
133
  end
121
134
 
122
- def move_pending_vm_to_ready(vm, pool)
123
- clone_time = $redis.hget('vmpooler__vm__' + vm, 'clone')
124
- finish = format('%<time>.2f', time: Time.now - Time.parse(clone_time)) if clone_time
135
+ def move_pending_vm_to_ready(vm, pool, redis, request_id = nil)
136
+ clone_time = redis.hget('vmpooler__vm__' + vm, 'clone')
137
+ finish = format('%<time>.2f', time: Time.now - Time.parse(clone_time))
125
138
 
126
- $redis.smove('vmpooler__pending__' + pool, 'vmpooler__ready__' + pool, vm)
127
- $redis.hset('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm, finish) # maybe remove as this is never used by vmpooler itself?
128
- $redis.hset("vmpooler__vm__#{vm}", 'ready', Time.now)
139
+ if request_id
140
+ ondemandrequest_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
141
+ if ondemandrequest_hash['status'] == 'failed'
142
+ move_vm_queue(pool, vm, 'pending', 'completed', redis, "moved to completed queue. '#{request_id}' could not be filled in time")
143
+ return nil
144
+ elsif ondemandrequest_hash['status'] == 'deleted'
145
+ move_vm_queue(pool, vm, 'pending', 'completed', redis, "moved to completed queue. '#{request_id}' has been deleted")
146
+ return nil
147
+ end
148
+ pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias')
149
+
150
+ redis.pipelined do
151
+ redis.hset("vmpooler__active__#{pool}", vm, Time.now)
152
+ redis.hset("vmpooler__vm__#{vm}", 'checkout', Time.now)
153
+ redis.hset("vmpooler__vm__#{vm}", 'token:token', ondemandrequest_hash['token:token']) if ondemandrequest_hash['token:token']
154
+ redis.hset("vmpooler__vm__#{vm}", 'token:user', ondemandrequest_hash['token:user']) if ondemandrequest_hash['token:user']
155
+ redis.sadd("vmpooler__#{request_id}__#{pool_alias}__#{pool}", vm)
156
+ end
157
+ move_vm_queue(pool, vm, 'pending', 'running', redis)
158
+ check_ondemand_request_ready(request_id, redis)
159
+ else
160
+ redis.smove('vmpooler__pending__' + pool, 'vmpooler__ready__' + pool, vm)
161
+ end
129
162
 
130
- # last boot time is displayed in API, and used by alarming script
131
- $redis.hset('vmpooler__lastboot', pool, Time.now)
163
+ redis.pipelined do
164
+ redis.hset('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm, finish) # maybe remove as this is never used by vmpooler itself?
165
+ redis.hset("vmpooler__vm__#{vm}", 'ready', Time.now)
166
+
167
+ # last boot time is displayed in API, and used by alarming script
168
+ redis.hset('vmpooler__lastboot', pool, Time.now)
169
+ end
132
170
 
133
171
  $metrics.timing("time_to_ready_state.#{pool}", finish)
134
- $logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue")
172
+ $logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue") unless request_id
173
+ $logger.log('s', "[>] [#{pool}] '#{vm}' is 'ready' for request '#{request_id}'") if request_id
135
174
  end
136
175
 
137
- def vm_still_ready?(pool_name, vm_name, provider)
176
+ def vm_still_ready?(pool_name, vm_name, provider, redis)
138
177
  # Check if the VM is still ready/available
139
178
  return true if provider.vm_ready?(pool_name, vm_name)
140
179
 
141
180
  raise("VM #{vm_name} is not ready")
142
181
  rescue StandardError
143
- move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
182
+ move_vm_queue(pool_name, vm_name, 'ready', 'completed', redis, "is unreachable, removed from 'ready' queue")
144
183
  end
145
184
 
146
185
  def check_ready_vm(vm, pool_name, ttl, provider)
@@ -160,34 +199,35 @@ module Vmpooler
160
199
  return if mutex.locked?
161
200
 
162
201
  mutex.synchronize do
163
- check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check')
164
- return if check_stamp && (((Time.now - Time.parse(check_stamp)) / 60) <= $config[:config]['vm_checktime'])
202
+ @redis.with_metrics do |redis|
203
+ check_stamp = redis.hget('vmpooler__vm__' + vm, 'check')
204
+ last_checked_too_soon = ((Time.now - Time.parse(check_stamp)).to_i < $config[:config]['vm_checktime'] * 60) if check_stamp
205
+ break if check_stamp && last_checked_too_soon
165
206
 
166
- $redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
167
- # Check if the hosts TTL has expired
168
- if ttl > 0
207
+ redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
208
+ # Check if the hosts TTL has expired
169
209
  # if 'boottime' is nil, set bootime to beginning of unix epoch, forces TTL to be assumed expired
170
- boottime = $redis.hget("vmpooler__vm__#{vm}", 'ready')
210
+ boottime = redis.hget("vmpooler__vm__#{vm}", 'ready')
171
211
  if boottime
172
212
  boottime = Time.parse(boottime)
173
213
  else
174
214
  boottime = Time.at(0)
175
215
  end
176
- if ((Time.now - boottime) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl
177
- $redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
216
+ if (Time.now - boottime).to_i > ttl * 60
217
+ redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
178
218
 
179
219
  $logger.log('d', "[!] [#{pool_name}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
180
- return
220
+ return nil
181
221
  end
182
- end
183
222
 
184
- return if mismatched_hostname?(vm, pool_name, provider)
223
+ break if mismatched_hostname?(vm, pool_name, provider, redis)
185
224
 
186
- vm_still_ready?(pool_name, vm, provider)
225
+ vm_still_ready?(pool_name, vm, provider, redis)
226
+ end
187
227
  end
188
228
  end
189
229
 
190
- def mismatched_hostname?(vm, pool_name, provider)
230
+ def mismatched_hostname?(vm, pool_name, provider, redis)
191
231
  pool_config = $config[:pools][$config[:pool_index][pool_name]]
192
232
  check_hostname = pool_config['check_hostname_for_mismatch']
193
233
  check_hostname = $config[:config]['check_ready_vm_hostname_for_mismatch'] if check_hostname.nil?
@@ -196,7 +236,7 @@ module Vmpooler
196
236
  # Wait one minute before checking a VM for hostname mismatch
197
237
  # When checking as soon as the VM passes the ready test the instance
198
238
  # often doesn't report its hostname yet causing the VM to be removed immediately
199
- vm_ready_time = $redis.hget("vmpooler__vm__#{vm}", 'ready')
239
+ vm_ready_time = redis.hget("vmpooler__vm__#{vm}", 'ready')
200
240
  if vm_ready_time
201
241
  wait_before_checking = 60
202
242
  time_since_ready = (Time.now - Time.parse(vm_ready_time)).to_i
@@ -213,7 +253,7 @@ module Vmpooler
213
253
  return if hostname.empty?
214
254
  return if hostname == vm
215
255
 
216
- $redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
256
+ redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
217
257
  $logger.log('d', "[!] [#{pool_name}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue")
218
258
  true
219
259
  end
@@ -234,49 +274,61 @@ module Vmpooler
234
274
  return if mutex.locked?
235
275
 
236
276
  mutex.synchronize do
237
- # Check that VM is within defined lifetime
238
- checkouttime = $redis.hget('vmpooler__active__' + pool, vm)
239
- if checkouttime
240
- running = (Time.now - Time.parse(checkouttime)) / 60 / 60
241
-
242
- if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
243
- move_vm_queue(pool, vm, 'running', 'completed', "reached end of TTL after #{ttl} hours")
244
- return
245
- end
246
- end
277
+ catch :stop_checking do
278
+ @redis.with_metrics do |redis|
279
+ # Check that VM is within defined lifetime
280
+ checkouttime = redis.hget('vmpooler__active__' + pool, vm)
281
+ if checkouttime
282
+ time_since_checkout = Time.now - Time.parse(checkouttime)
283
+ running = time_since_checkout / 60 / 60
284
+
285
+ if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
286
+ move_vm_queue(pool, vm, 'running', 'completed', redis, "reached end of TTL after #{ttl} hours")
287
+ throw :stop_checking
288
+ end
289
+ end
247
290
 
248
- if provider.vm_ready?(pool, vm)
249
- return
250
- else
251
- host = provider.get_vm(pool, vm)
291
+ if provider.vm_ready?(pool, vm)
292
+ throw :stop_checking
293
+ else
294
+ host = provider.get_vm(pool, vm)
252
295
 
253
- if host
254
- return
255
- else
256
- move_vm_queue(pool, vm, 'running', 'completed', 'is no longer in inventory, removing from running')
296
+ if host
297
+ throw :stop_checking
298
+ else
299
+ move_vm_queue(pool, vm, 'running', 'completed', redis, 'is no longer in inventory, removing from running')
300
+ end
301
+ end
257
302
  end
258
303
  end
259
304
  end
260
305
  end
261
306
 
262
- def move_vm_queue(pool, vm, queue_from, queue_to, msg = nil)
263
- $redis.smove("vmpooler__#{queue_from}__#{pool}", "vmpooler__#{queue_to}__#{pool}", vm)
307
+ def move_vm_queue(pool, vm, queue_from, queue_to, redis, msg = nil)
308
+ redis.smove("vmpooler__#{queue_from}__#{pool}", "vmpooler__#{queue_to}__#{pool}", vm)
264
309
  $logger.log('d', "[!] [#{pool}] '#{vm}' #{msg}") if msg
265
310
  end
266
311
 
267
312
  # Clone a VM
268
- def clone_vm(pool_name, provider)
313
+ def clone_vm(pool_name, provider, request_id = nil, pool_alias = nil)
269
314
  Thread.new do
270
315
  begin
271
- _clone_vm(pool_name, provider)
316
+ _clone_vm(pool_name, provider, request_id, pool_alias)
272
317
  rescue StandardError => e
273
- $logger.log('s', "[!] [#{pool_name}] failed while cloning VM with an error: #{e}")
318
+ if request_id
319
+ $logger.log('s', "[!] [#{pool_name}] failed while cloning VM for request #{request_id} with an error: #{e}")
320
+ @redis.with_metrics do |redis|
321
+ redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool_name}:1:#{request_id}")
322
+ end
323
+ else
324
+ $logger.log('s', "[!] [#{pool_name}] failed while cloning VM with an error: #{e}")
325
+ end
274
326
  raise
275
327
  end
276
328
  end
277
329
  end
278
330
 
279
- def generate_and_check_hostname(_pool_name)
331
+ def generate_and_check_hostname
280
332
  # Generate a randomized hostname. The total name must no longer than 15
281
333
  # character including the hyphen. The shortest adjective in the corpus is
282
334
  # three characters long. Therefore, we can technically select a noun up to 11
@@ -285,58 +337,104 @@ module Vmpooler
285
337
  # letter adjectives, we actually limit the noun to 10 letters to avoid
286
338
  # inviting more conflicts. We favor selecting a longer noun rather than a
287
339
  # longer adjective because longer adjectives tend to be less fun.
288
- noun = @name_generator.noun(max: 10)
289
- adjective = @name_generator.adjective(max: 14 - noun.length)
290
- random_name = [adjective, noun].join('-')
291
- hostname = $config[:config]['prefix'] + random_name
292
- available = $redis.hlen('vmpooler__vm__' + hostname) == 0
293
-
294
- [hostname, available]
340
+ @redis.with do |redis|
341
+ noun = @name_generator.noun(max: 10)
342
+ adjective = @name_generator.adjective(max: 14 - noun.length)
343
+ random_name = [adjective, noun].join('-')
344
+ hostname = $config[:config]['prefix'] + random_name
345
+ available = redis.hlen('vmpooler__vm__' + hostname) == 0
346
+
347
+ [hostname, available]
348
+ end
295
349
  end
296
350
 
297
351
  def find_unique_hostname(pool_name)
352
+ # generate hostname that is not already in use in vmpooler
353
+ # also check that no dns record already exists
298
354
  hostname_retries = 0
299
355
  max_hostname_retries = 3
300
356
  while hostname_retries < max_hostname_retries
301
- hostname, available = generate_and_check_hostname(pool_name)
302
- break if available
357
+ hostname, hostname_available = generate_and_check_hostname
358
+ domain = $config[:config]['domain']
359
+ dns_ip, dns_available = check_dns_available(hostname, domain)
360
+ break if hostname_available && dns_available
303
361
 
304
362
  hostname_retries += 1
305
- $metrics.increment("errors.duplicatehostname.#{pool_name}")
306
- $logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} was not unique (attempt \##{hostname_retries} of #{max_hostname_retries})")
363
+
364
+ if !hostname_available
365
+ $metrics.increment("errors.duplicatehostname.#{pool_name}")
366
+ $logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} was not unique (attempt \##{hostname_retries} of #{max_hostname_retries})")
367
+ elsif !dns_available
368
+ $metrics.increment("errors.staledns.#{hostname}")
369
+ $logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} already exists in DNS records (#{dns_ip}), stale DNS")
370
+ end
307
371
  end
308
372
 
309
- raise "Unable to generate a unique hostname after #{hostname_retries} attempts. The last hostname checked was #{hostname}" unless available
373
+ raise "Unable to generate a unique hostname after #{hostname_retries} attempts. The last hostname checked was #{hostname}" unless hostname_available && dns_available
310
374
 
311
375
  hostname
312
376
  end
313
377
 
314
- def _clone_vm(pool_name, provider)
315
- new_vmname = find_unique_hostname(pool_name)
316
-
317
- # Add VM to Redis inventory ('pending' pool)
318
- $redis.sadd('vmpooler__pending__' + pool_name, new_vmname)
319
- $redis.hset('vmpooler__vm__' + new_vmname, 'clone', Time.now)
320
- $redis.hset('vmpooler__vm__' + new_vmname, 'template', pool_name)
321
-
378
+ def check_dns_available(vm_name, domain = nil)
379
+ # Query the DNS for the name we want to create and if it already exists, mark it unavailable
380
+ # This protects against stale DNS records
381
+ vm_name = "#{vm_name}.#{domain}" if domain
322
382
  begin
323
- $logger.log('d', "[ ] [#{pool_name}] Starting to clone '#{new_vmname}'")
324
- start = Time.now
325
- provider.create_vm(pool_name, new_vmname)
326
- finish = format('%<time>.2f', time: Time.now - start)
383
+ dns_ip = Resolv.getaddress(vm_name)
384
+ rescue Resolv::ResolvError
385
+ # this is the expected case, swallow the error
386
+ # eg "no address for blah-daisy"
387
+ return ['', true]
388
+ end
389
+ [dns_ip, false]
390
+ end
327
391
 
328
- $redis.hset('vmpooler__clone__' + Date.today.to_s, pool_name + ':' + new_vmname, finish)
329
- $redis.hset('vmpooler__vm__' + new_vmname, 'clone_time', finish)
330
- $logger.log('s', "[+] [#{pool_name}] '#{new_vmname}' cloned in #{finish} seconds")
392
+ def _clone_vm(pool_name, provider, request_id = nil, pool_alias = nil)
393
+ new_vmname = find_unique_hostname(pool_name)
394
+ mutex = vm_mutex(new_vmname)
395
+ mutex.synchronize do
396
+ @redis.with_metrics do |redis|
397
+ # Add VM to Redis inventory ('pending' pool)
398
+ redis.multi
399
+ redis.sadd('vmpooler__pending__' + pool_name, new_vmname)
400
+ redis.hset('vmpooler__vm__' + new_vmname, 'clone', Time.now)
401
+ redis.hset('vmpooler__vm__' + new_vmname, 'template', pool_name) # This value is used to represent the pool.
402
+ redis.hset('vmpooler__vm__' + new_vmname, 'pool', pool_name)
403
+ redis.hset('vmpooler__vm__' + new_vmname, 'request_id', request_id) if request_id
404
+ redis.hset('vmpooler__vm__' + new_vmname, 'pool_alias', pool_alias) if pool_alias
405
+ redis.exec
406
+ end
331
407
 
332
- $metrics.timing("clone.#{pool_name}", finish)
333
- rescue StandardError
334
- $redis.srem("vmpooler__pending__#{pool_name}", new_vmname)
335
- expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
336
- $redis.expire("vmpooler__vm__#{new_vmname}", expiration_ttl)
337
- raise
338
- ensure
339
- $redis.decr('vmpooler__tasks__clone')
408
+ begin
409
+ $logger.log('d', "[ ] [#{pool_name}] Starting to clone '#{new_vmname}'")
410
+ start = Time.now
411
+ provider.create_vm(pool_name, new_vmname)
412
+ finish = format('%<time>.2f', time: Time.now - start)
413
+
414
+ @redis.with_metrics do |redis|
415
+ redis.pipelined do
416
+ redis.hset('vmpooler__clone__' + Date.today.to_s, pool_name + ':' + new_vmname, finish)
417
+ redis.hset('vmpooler__vm__' + new_vmname, 'clone_time', finish)
418
+ end
419
+ end
420
+ $logger.log('s', "[+] [#{pool_name}] '#{new_vmname}' cloned in #{finish} seconds")
421
+
422
+ $metrics.timing("clone.#{pool_name}", finish)
423
+ rescue StandardError
424
+ @redis.with_metrics do |redis|
425
+ redis.pipelined do
426
+ redis.srem("vmpooler__pending__#{pool_name}", new_vmname)
427
+ expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
428
+ redis.expire("vmpooler__vm__#{new_vmname}", expiration_ttl)
429
+ end
430
+ end
431
+ raise
432
+ ensure
433
+ @redis.with_metrics do |redis|
434
+ redis.decr('vmpooler__tasks__ondemandclone') if request_id
435
+ redis.decr('vmpooler__tasks__clone') unless request_id
436
+ end
437
+ end
340
438
  end
341
439
  end
342
440
 
@@ -357,45 +455,57 @@ module Vmpooler
357
455
  return if mutex.locked?
358
456
 
359
457
  mutex.synchronize do
360
- $redis.hdel('vmpooler__active__' + pool, vm)
361
- $redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
458
+ @redis.with_metrics do |redis|
459
+ redis.pipelined do
460
+ redis.hdel('vmpooler__active__' + pool, vm)
461
+ redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
362
462
 
363
- # Auto-expire metadata key
364
- $redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
463
+ # Auto-expire metadata key
464
+ redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
465
+ end
365
466
 
366
- start = Time.now
467
+ start = Time.now
367
468
 
368
- provider.destroy_vm(pool, vm)
469
+ provider.destroy_vm(pool, vm)
369
470
 
370
- $redis.srem('vmpooler__completed__' + pool, vm)
471
+ redis.srem('vmpooler__completed__' + pool, vm)
371
472
 
372
- finish = format('%<time>.2f', time: Time.now - start)
373
- $logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
374
- $metrics.timing("destroy.#{pool}", finish)
375
- get_vm_usage_labels(vm)
473
+ finish = format('%<time>.2f', time: Time.now - start)
474
+ $logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
475
+ $metrics.timing("destroy.#{pool}", finish)
476
+ get_vm_usage_labels(vm, redis)
477
+ end
376
478
  end
377
479
  dereference_mutex(vm)
378
480
  end
379
481
 
380
- def get_vm_usage_labels(vm)
482
+ def get_vm_usage_labels(vm, redis)
381
483
  return unless $config[:config]['usage_stats']
382
484
 
383
- checkout = $redis.hget("vmpooler__vm__#{vm}", 'checkout')
485
+ redis.multi
486
+ redis.hget("vmpooler__vm__#{vm}", 'checkout')
487
+ redis.hget("vmpooler__vm__#{vm}", 'tag:jenkins_build_url')
488
+ redis.hget("vmpooler__vm__#{vm}", 'token:user')
489
+ redis.hget("vmpooler__vm__#{vm}", 'template')
490
+ checkout, jenkins_build_url, user, poolname = redis.exec
384
491
  return if checkout.nil?
385
492
 
386
- jenkins_build_url = $redis.hget("vmpooler__vm__#{vm}", 'tag:jenkins_build_url')
387
- user = $redis.hget("vmpooler__vm__#{vm}", 'token:user') || 'unauthenticated'
388
- poolname = $redis.hget("vmpooler__vm__#{vm}", 'template')
493
+ user ||= 'unauthenticated'
494
+ user = user.gsub('.', '_')
495
+ $metrics.increment("user.#{user}.#{poolname}")
496
+
497
+ return unless jenkins_build_url
389
498
 
390
- unless jenkins_build_url
391
- user = user.gsub('.', '_')
392
- $metrics.increment("usage.#{user}.#{poolname}")
499
+ if jenkins_build_url.include? 'litmus'
500
+ # Very simple filter for Litmus jobs - just count them coming through for the moment.
501
+ $metrics.increment("usage_litmus.#{user}.#{poolname}")
393
502
  return
394
503
  end
395
504
 
396
505
  url_parts = jenkins_build_url.split('/')[2..-1]
397
- instance = url_parts[0]
506
+ jenkins_instance = url_parts[0].gsub('.', '_')
398
507
  value_stream_parts = url_parts[2].split('_')
508
+ value_stream_parts = value_stream_parts.map { |s| s.gsub('.', '_') }
399
509
  value_stream = value_stream_parts.shift
400
510
  branch = value_stream_parts.pop
401
511
  project = value_stream_parts.shift
@@ -403,24 +513,12 @@ module Vmpooler
403
513
  build_metadata_parts = url_parts[3]
404
514
  component_to_test = component_to_test('RMM_COMPONENT_TO_TEST_NAME', build_metadata_parts)
405
515
 
406
- metric_parts = [
407
- 'usage',
408
- user,
409
- instance,
410
- value_stream,
411
- branch,
412
- project,
413
- job_name,
414
- component_to_test,
415
- poolname
416
- ]
417
-
418
- metric_parts = metric_parts.reject(&:nil?)
419
- metric_parts = metric_parts.map { |s| s.gsub('.', '_') }
420
-
421
- $metrics.increment(metric_parts.join('.'))
516
+ $metrics.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{poolname}")
517
+ $metrics.increment("usage_branch_project.#{branch}.#{project}.#{poolname}")
518
+ $metrics.increment("usage_job_component.#{job_name}.#{component_to_test}.#{poolname}")
422
519
  rescue StandardError => e
423
- logger.log('d', "[!] [#{poolname}] failed while evaluating usage labels on '#{vm}' with an error: #{e}")
520
+ $logger.log('d', "[!] [#{poolname}] failed while evaluating usage labels on '#{vm}' with an error: #{e}")
521
+ raise
424
522
  end
425
523
 
426
524
  def component_to_test(match, labels_string)
@@ -432,7 +530,7 @@ module Vmpooler
432
530
  next if value.nil?
433
531
  return value if key == match
434
532
  end
435
- nil
533
+ 'none'
436
534
  end
437
535
 
438
536
  def purge_unused_vms_and_folders
@@ -444,7 +542,7 @@ module Vmpooler
444
542
  if provider_purge
445
543
  Thread.new do
446
544
  begin
447
- purge_vms_and_folders(provider.to_s)
545
+ purge_vms_and_folders($providers[provider.to_s])
448
546
  rescue StandardError => e
449
547
  $logger.log('s', "[!] failed while purging provider #{provider} VMs and folders with an error: #{e}")
450
548
  end
@@ -455,13 +553,14 @@ module Vmpooler
455
553
  end
456
554
 
457
555
  # Return a list of pool folders
458
- def pool_folders(provider_name)
556
+ def pool_folders(provider)
557
+ provider_name = provider.name
459
558
  folders = {}
460
559
  $config[:pools].each do |pool|
461
560
  next unless pool['provider'] == provider_name
462
561
 
463
562
  folder_parts = pool['folder'].split('/')
464
- datacenter = $providers[provider_name].get_target_datacenter_from_config(pool['name'])
563
+ datacenter = provider.get_target_datacenter_from_config(pool['name'])
465
564
  folders[folder_parts.pop] = "#{datacenter}/vm/#{folder_parts.join('/')}"
466
565
  end
467
566
  folders
@@ -478,8 +577,8 @@ module Vmpooler
478
577
  def purge_vms_and_folders(provider)
479
578
  configured_folders = pool_folders(provider)
480
579
  base_folders = get_base_folders(configured_folders)
481
- whitelist = $providers[provider].provider_config['folder_whitelist']
482
- $providers[provider].purge_unconfigured_folders(base_folders, configured_folders, whitelist)
580
+ whitelist = provider.provider_config['folder_whitelist']
581
+ provider.purge_unconfigured_folders(base_folders, configured_folders, whitelist)
483
582
  end
484
583
 
485
584
  def create_vm_disk(pool_name, vm, disk_size, provider)
@@ -505,10 +604,12 @@ module Vmpooler
505
604
  finish = format('%<time>.2f', time: Time.now - start)
506
605
 
507
606
  if result
508
- rdisks = $redis.hget('vmpooler__vm__' + vm_name, 'disk')
509
- disks = rdisks ? rdisks.split(':') : []
510
- disks.push("+#{disk_size}gb")
511
- $redis.hset('vmpooler__vm__' + vm_name, 'disk', disks.join(':'))
607
+ @redis.with_metrics do |redis|
608
+ rdisks = redis.hget('vmpooler__vm__' + vm_name, 'disk')
609
+ disks = rdisks ? rdisks.split(':') : []
610
+ disks.push("+#{disk_size}gb")
611
+ redis.hset('vmpooler__vm__' + vm_name, 'disk', disks.join(':'))
612
+ end
512
613
 
513
614
  $logger.log('s', "[+] [disk_manager] '#{vm_name}' attached #{disk_size}gb disk in #{finish} seconds")
514
615
  else
@@ -538,7 +639,9 @@ module Vmpooler
538
639
  finish = format('%<time>.2f', time: Time.now - start)
539
640
 
540
641
  if result
541
- $redis.hset('vmpooler__vm__' + vm_name, 'snapshot:' + snapshot_name, Time.now.to_s)
642
+ @redis.with_metrics do |redis|
643
+ redis.hset('vmpooler__vm__' + vm_name, 'snapshot:' + snapshot_name, Time.now.to_s)
644
+ end
542
645
  $logger.log('s', "[+] [snapshot_manager] '#{vm_name}' snapshot created in #{finish} seconds")
543
646
  else
544
647
  $logger.log('s', "[+] [snapshot_manager] Failed to snapshot '#{vm_name}'")
@@ -594,9 +697,9 @@ module Vmpooler
594
697
  @default_providers ||= %w[vsphere dummy]
595
698
  end
596
699
 
597
- def get_pool_name_for_vm(vm_name)
700
+ def get_pool_name_for_vm(vm_name, redis)
598
701
  # the 'template' is a bad name. Should really be 'poolname'
599
- $redis.hget('vmpooler__vm__' + vm_name, 'template')
702
+ redis.hget('vmpooler__vm__' + vm_name, 'template')
600
703
  end
601
704
 
602
705
  # @param pool_name [String] - the name of the pool
@@ -628,19 +731,21 @@ module Vmpooler
628
731
  end
629
732
 
630
733
  def _check_disk_queue
631
- task_detail = $redis.spop('vmpooler__tasks__disk')
632
- unless task_detail.nil?
633
- begin
634
- vm_name, disk_size = task_detail.split(':')
635
- pool_name = get_pool_name_for_vm(vm_name)
636
- raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
734
+ @redis.with_metrics do |redis|
735
+ task_detail = redis.spop('vmpooler__tasks__disk')
736
+ unless task_detail.nil?
737
+ begin
738
+ vm_name, disk_size = task_detail.split(':')
739
+ pool_name = get_pool_name_for_vm(vm_name, redis)
740
+ raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
637
741
 
638
- provider = get_provider_for_pool(pool_name)
639
- raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
742
+ provider = get_provider_for_pool(pool_name)
743
+ raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
640
744
 
641
- create_vm_disk(pool_name, vm_name, disk_size, provider)
642
- rescue StandardError => e
643
- $logger.log('s', "[!] [disk_manager] disk creation appears to have failed: #{e}")
745
+ create_vm_disk(pool_name, vm_name, disk_size, provider)
746
+ rescue StandardError => e
747
+ $logger.log('s', "[!] [disk_manager] disk creation appears to have failed: #{e}")
748
+ end
644
749
  end
645
750
  end
646
751
  end
@@ -664,37 +769,39 @@ module Vmpooler
664
769
  end
665
770
 
666
771
  def _check_snapshot_queue
667
- task_detail = $redis.spop('vmpooler__tasks__snapshot')
772
+ @redis.with_metrics do |redis|
773
+ task_detail = redis.spop('vmpooler__tasks__snapshot')
668
774
 
669
- unless task_detail.nil?
670
- begin
671
- vm_name, snapshot_name = task_detail.split(':')
672
- pool_name = get_pool_name_for_vm(vm_name)
673
- raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
775
+ unless task_detail.nil?
776
+ begin
777
+ vm_name, snapshot_name = task_detail.split(':')
778
+ pool_name = get_pool_name_for_vm(vm_name, redis)
779
+ raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
674
780
 
675
- provider = get_provider_for_pool(pool_name)
676
- raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
781
+ provider = get_provider_for_pool(pool_name)
782
+ raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
677
783
 
678
- create_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
679
- rescue StandardError => e
680
- $logger.log('s', "[!] [snapshot_manager] snapshot create appears to have failed: #{e}")
784
+ create_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
785
+ rescue StandardError => e
786
+ $logger.log('s', "[!] [snapshot_manager] snapshot create appears to have failed: #{e}")
787
+ end
681
788
  end
682
- end
683
789
 
684
- task_detail = $redis.spop('vmpooler__tasks__snapshot-revert')
790
+ task_detail = redis.spop('vmpooler__tasks__snapshot-revert')
685
791
 
686
- unless task_detail.nil?
687
- begin
688
- vm_name, snapshot_name = task_detail.split(':')
689
- pool_name = get_pool_name_for_vm(vm_name)
690
- raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
792
+ unless task_detail.nil?
793
+ begin
794
+ vm_name, snapshot_name = task_detail.split(':')
795
+ pool_name = get_pool_name_for_vm(vm_name, redis)
796
+ raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
691
797
 
692
- provider = get_provider_for_pool(pool_name)
693
- raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
798
+ provider = get_provider_for_pool(pool_name)
799
+ raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
694
800
 
695
- revert_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
696
- rescue StandardError => e
697
- $logger.log('s', "[!] [snapshot_manager] snapshot revert appears to have failed: #{e}")
801
+ revert_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
802
+ rescue StandardError => e
803
+ $logger.log('s', "[!] [snapshot_manager] snapshot revert appears to have failed: #{e}")
804
+ end
698
805
  end
699
806
  end
700
807
  end
@@ -704,7 +811,9 @@ module Vmpooler
704
811
  begin
705
812
  mutex = vm_mutex(vm_name)
706
813
  mutex.synchronize do
707
- $redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
814
+ @redis.with_metrics do |redis|
815
+ redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
816
+ end
708
817
  provider.migrate_vm(pool_name, vm_name)
709
818
  end
710
819
  rescue StandardError => e
@@ -737,47 +846,65 @@ module Vmpooler
737
846
  wakeup_by = Time.now + wakeup_period
738
847
  return if time_passed?(:exit_by, exit_by)
739
848
 
740
- initial_ready_size = $redis.scard("vmpooler__ready__#{options[:poolname]}") if options[:pool_size_change]
849
+ @redis.with_metrics do |redis|
850
+ initial_ready_size = redis.scard("vmpooler__ready__#{options[:poolname]}") if options[:pool_size_change]
741
851
 
742
- initial_clone_target = $redis.hget("vmpooler__pool__#{options[:poolname]}", options[:clone_target]) if options[:clone_target_change]
852
+ initial_clone_target = redis.hget("vmpooler__pool__#{options[:poolname]}", options[:clone_target]) if options[:clone_target_change]
743
853
 
744
- initial_template = $redis.hget('vmpooler__template__prepared', options[:poolname]) if options[:pool_template_change]
854
+ initial_template = redis.hget('vmpooler__template__prepared', options[:poolname]) if options[:pool_template_change]
745
855
 
746
- loop do
747
- sleep(1)
748
- break if time_passed?(:exit_by, exit_by)
856
+ loop do
857
+ sleep(1)
858
+ break if time_passed?(:exit_by, exit_by)
749
859
 
750
- # Check for wakeup events
751
- if time_passed?(:wakeup_by, wakeup_by)
752
- wakeup_by = Time.now + wakeup_period
860
+ # Check for wakeup events
861
+ if time_passed?(:wakeup_by, wakeup_by)
862
+ wakeup_by = Time.now + wakeup_period
753
863
 
754
- # Wakeup if the number of ready VMs has changed
755
- if options[:pool_size_change]
756
- ready_size = $redis.scard("vmpooler__ready__#{options[:poolname]}")
757
- break unless ready_size == initial_ready_size
758
- end
864
+ # Wakeup if the number of ready VMs has changed
865
+ if options[:pool_size_change]
866
+ ready_size = redis.scard("vmpooler__ready__#{options[:poolname]}")
867
+ break unless ready_size == initial_ready_size
868
+ end
759
869
 
760
- if options[:clone_target_change]
761
- clone_target = $redis.hget('vmpooler__config__clone_target}', options[:poolname])
762
- if clone_target
763
- break unless clone_target == initial_clone_target
870
+ if options[:clone_target_change]
871
+ clone_target = redis.hget('vmpooler__config__clone_target}', options[:poolname])
872
+ if clone_target
873
+ break unless clone_target == initial_clone_target
874
+ end
764
875
  end
765
- end
766
876
 
767
- if options[:pool_template_change]
768
- configured_template = $redis.hget('vmpooler__config__template', options[:poolname])
769
- if configured_template
770
- break unless initial_template == configured_template
877
+ if options[:pool_template_change]
878
+ configured_template = redis.hget('vmpooler__config__template', options[:poolname])
879
+ if configured_template
880
+ break unless initial_template == configured_template
881
+ end
882
+ end
883
+
884
+ if options[:pool_reset]
885
+ pending = redis.sismember('vmpooler__poolreset', options[:poolname])
886
+ break if pending
771
887
  end
772
- end
773
888
 
774
- if options[:pool_reset]
775
- break if $redis.sismember('vmpooler__poolreset', options[:poolname])
889
+ if options[:pending_vm]
890
+ pending_vm_count = redis.scard("vmpooler__pending__#{options[:poolname]}")
891
+ break unless pending_vm_count == 0
892
+ end
893
+
894
+ if options[:ondemand_request]
895
+ redis.multi
896
+ redis.zcard('vmpooler__provisioning__request')
897
+ redis.zcard('vmpooler__provisioning__processing')
898
+ redis.zcard('vmpooler__odcreate__task')
899
+ od_request, od_processing, od_createtask = redis.exec
900
+ break unless od_request == 0
901
+ break unless od_processing == 0
902
+ break unless od_createtask == 0
903
+ end
776
904
  end
777
905
 
906
+ break if time_passed?(:exit_by, exit_by)
778
907
  end
779
-
780
- break if time_passed?(:exit_by, exit_by)
781
908
  end
782
909
  end
783
910
 
@@ -813,7 +940,7 @@ module Vmpooler
813
940
  loop_delay = (loop_delay * loop_delay_decay).to_i
814
941
  loop_delay = loop_delay_max if loop_delay > loop_delay_max
815
942
  end
816
- sleep_with_wakeup_events(loop_delay, loop_delay_min, pool_size_change: true, poolname: pool['name'], pool_template_change: true, clone_target_change: true, pool_reset: true)
943
+ sleep_with_wakeup_events(loop_delay, loop_delay_min, pool_size_change: true, poolname: pool['name'], pool_template_change: true, clone_target_change: true, pending_vm: true, pool_reset: true)
817
944
 
818
945
  unless maxloop == 0
819
946
  break if loop_count >= maxloop
@@ -843,77 +970,84 @@ module Vmpooler
843
970
  end
844
971
 
845
972
  def sync_pool_template(pool)
846
- pool_template = $redis.hget('vmpooler__config__template', pool['name'])
847
- if pool_template
848
- pool['template'] = pool_template unless pool['template'] == pool_template
973
+ @redis.with_metrics do |redis|
974
+ pool_template = redis.hget('vmpooler__config__template', pool['name'])
975
+ if pool_template
976
+ pool['template'] = pool_template unless pool['template'] == pool_template
977
+ end
849
978
  end
850
979
  end
851
980
 
852
- def prepare_template(pool, provider)
981
+ def prepare_template(pool, provider, redis)
853
982
  if $config[:config]['create_template_delta_disks']
854
- unless $redis.sismember('vmpooler__template__deltas', pool['template'])
983
+ unless redis.sismember('vmpooler__template__deltas', pool['template'])
855
984
  begin
856
985
  provider.create_template_delta_disks(pool)
857
- $redis.sadd('vmpooler__template__deltas', pool['template'])
986
+ redis.sadd('vmpooler__template__deltas', pool['template'])
858
987
  rescue StandardError => e
859
988
  $logger.log('s', "[!] [#{pool['name']}] failed while preparing a template with an error. As a result vmpooler could not create the template delta disks. Either a template delta disk already exists, or the template delta disk creation failed. The error is: #{e}")
860
989
  end
861
990
  end
862
991
  end
863
- $redis.hset('vmpooler__template__prepared', pool['name'], pool['template'])
992
+ redis.hset('vmpooler__template__prepared', pool['name'], pool['template'])
864
993
  end
865
994
 
866
995
  def evaluate_template(pool, provider)
867
996
  mutex = pool_mutex(pool['name'])
868
- prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
869
- configured_template = $redis.hget('vmpooler__config__template', pool['name'])
870
997
  return if mutex.locked?
871
998
 
872
- if prepared_template.nil?
873
- mutex.synchronize do
874
- prepare_template(pool, provider)
875
- prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
876
- end
877
- elsif prepared_template != pool['template']
878
- if configured_template.nil?
999
+ catch :update_not_needed do
1000
+ @redis.with_metrics do |redis|
1001
+ prepared_template = redis.hget('vmpooler__template__prepared', pool['name'])
1002
+ configured_template = redis.hget('vmpooler__config__template', pool['name'])
1003
+
1004
+ if prepared_template.nil?
1005
+ mutex.synchronize do
1006
+ prepare_template(pool, provider, redis)
1007
+ prepared_template = redis.hget('vmpooler__template__prepared', pool['name'])
1008
+ end
1009
+ elsif prepared_template != pool['template']
1010
+ if configured_template.nil?
1011
+ mutex.synchronize do
1012
+ prepare_template(pool, provider, redis)
1013
+ prepared_template = redis.hget('vmpooler__template__prepared', pool['name'])
1014
+ end
1015
+ end
1016
+ end
1017
+ throw :update_not_needed if configured_template.nil?
1018
+ throw :update_not_needed if configured_template == prepared_template
1019
+
879
1020
  mutex.synchronize do
880
- prepare_template(pool, provider)
881
- prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
1021
+ update_pool_template(pool, provider, configured_template, prepared_template, redis)
882
1022
  end
883
1023
  end
884
1024
  end
885
- return if configured_template.nil?
886
- return if configured_template == prepared_template
887
-
888
- mutex.synchronize do
889
- update_pool_template(pool, provider, configured_template, prepared_template)
890
- end
891
1025
  end
892
1026
 
893
- def drain_pool(poolname)
1027
+ def drain_pool(poolname, redis)
894
1028
  # Clear a pool of ready and pending instances
895
- if $redis.scard("vmpooler__ready__#{poolname}") > 0
1029
+ if redis.scard("vmpooler__ready__#{poolname}") > 0
896
1030
  $logger.log('s', "[*] [#{poolname}] removing ready instances")
897
- $redis.smembers("vmpooler__ready__#{poolname}").each do |vm|
898
- move_vm_queue(poolname, vm, 'ready', 'completed')
1031
+ redis.smembers("vmpooler__ready__#{poolname}").each do |vm|
1032
+ move_vm_queue(poolname, vm, 'ready', 'completed', redis)
899
1033
  end
900
1034
  end
901
- if $redis.scard("vmpooler__pending__#{poolname}") > 0
1035
+ if redis.scard("vmpooler__pending__#{poolname}") > 0
902
1036
  $logger.log('s', "[*] [#{poolname}] removing pending instances")
903
- $redis.smembers("vmpooler__pending__#{poolname}").each do |vm|
904
- move_vm_queue(poolname, vm, 'pending', 'completed')
1037
+ redis.smembers("vmpooler__pending__#{poolname}").each do |vm|
1038
+ move_vm_queue(poolname, vm, 'pending', 'completed', redis)
905
1039
  end
906
1040
  end
907
1041
  end
908
1042
 
909
- def update_pool_template(pool, provider, configured_template, prepared_template)
1043
+ def update_pool_template(pool, provider, configured_template, prepared_template, redis)
910
1044
  pool['template'] = configured_template
911
1045
  $logger.log('s', "[*] [#{pool['name']}] template updated from #{prepared_template} to #{configured_template}")
912
1046
  # Remove all ready and pending VMs so new instances are created from the new template
913
- drain_pool(pool['name'])
1047
+ drain_pool(pool['name'], redis)
914
1048
  # Prepare template for deployment
915
1049
  $logger.log('s', "[*] [#{pool['name']}] preparing pool template for deployment")
916
- prepare_template(pool, provider)
1050
+ prepare_template(pool, provider, redis)
917
1051
  $logger.log('s', "[*] [#{pool['name']}] is ready for use")
918
1052
  end
919
1053
 
@@ -921,38 +1055,45 @@ module Vmpooler
921
1055
  mutex = pool_mutex(pool['name'])
922
1056
  return if mutex.locked?
923
1057
 
924
- clone_target = $redis.hget('vmpooler__config__clone_target', pool['name'])
925
- return if clone_target.nil?
926
- return if clone_target == pool['clone_target']
1058
+ @redis.with_metrics do |redis|
1059
+ clone_target = redis.hget('vmpooler__config__clone_target', pool['name'])
1060
+ break if clone_target.nil?
1061
+ break if clone_target == pool['clone_target']
927
1062
 
928
- $logger.log('s', "[*] [#{pool['name']}] clone updated from #{pool['clone_target']} to #{clone_target}")
929
- mutex.synchronize do
930
- pool['clone_target'] = clone_target
931
- # Remove all ready and pending VMs so new instances are created for the new clone_target
932
- drain_pool(pool['name'])
1063
+ $logger.log('s', "[*] [#{pool['name']}] clone updated from #{pool['clone_target']} to #{clone_target}")
1064
+ mutex.synchronize do
1065
+ pool['clone_target'] = clone_target
1066
+ # Remove all ready and pending VMs so new instances are created for the new clone_target
1067
+ drain_pool(pool['name'], redis)
1068
+ end
1069
+ $logger.log('s', "[*] [#{pool['name']}] is ready for use")
933
1070
  end
934
- $logger.log('s', "[*] [#{pool['name']}] is ready for use")
935
1071
  end
936
1072
 
937
1073
  def remove_excess_vms(pool)
938
- ready = $redis.scard("vmpooler__ready__#{pool['name']}")
939
- total = $redis.scard("vmpooler__pending__#{pool['name']}") + ready
940
- return if total.nil?
941
- return if total == 0
1074
+ @redis.with_metrics do |redis|
1075
+ redis.multi
1076
+ redis.scard("vmpooler__ready__#{pool['name']}")
1077
+ redis.scard("vmpooler__pending__#{pool['name']}")
1078
+ ready, pending = redis.exec
1079
+ total = pending.to_i + ready.to_i
1080
+ break if total.nil?
1081
+ break if total == 0
942
1082
 
943
- mutex = pool_mutex(pool['name'])
944
- return if mutex.locked?
945
- return unless ready > pool['size']
1083
+ mutex = pool_mutex(pool['name'])
1084
+ break if mutex.locked?
1085
+ break unless ready.to_i > pool['size']
946
1086
 
947
- mutex.synchronize do
948
- difference = ready - pool['size']
949
- difference.times do
950
- next_vm = $redis.spop("vmpooler__ready__#{pool['name']}")
951
- move_vm_queue(pool['name'], next_vm, 'ready', 'completed')
952
- end
953
- if total > ready
954
- $redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
955
- move_vm_queue(pool['name'], vm, 'pending', 'completed')
1087
+ mutex.synchronize do
1088
+ difference = ready.to_i - pool['size']
1089
+ difference.times do
1090
+ next_vm = redis.spop("vmpooler__ready__#{pool['name']}")
1091
+ move_vm_queue(pool['name'], next_vm, 'ready', 'completed', redis)
1092
+ end
1093
+ if total > ready
1094
+ redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
1095
+ move_vm_queue(pool['name'], vm, 'pending', 'completed', redis)
1096
+ end
956
1097
  end
957
1098
  end
958
1099
  end
@@ -962,26 +1103,30 @@ module Vmpooler
962
1103
  mutex = pool_mutex(pool['name'])
963
1104
  return if mutex.locked?
964
1105
 
965
- poolsize = $redis.hget('vmpooler__config__poolsize', pool['name'])
966
- return if poolsize.nil?
1106
+ @redis.with_metrics do |redis|
1107
+ poolsize = redis.hget('vmpooler__config__poolsize', pool['name'])
1108
+ break if poolsize.nil?
967
1109
 
968
- poolsize = Integer(poolsize)
969
- return if poolsize == pool['size']
1110
+ poolsize = Integer(poolsize)
1111
+ break if poolsize == pool['size']
970
1112
 
971
- mutex.synchronize do
972
- pool['size'] = poolsize
1113
+ mutex.synchronize do
1114
+ pool['size'] = poolsize
1115
+ end
973
1116
  end
974
1117
  end
975
1118
 
976
1119
  def reset_pool(pool)
977
1120
  poolname = pool['name']
978
- return unless $redis.sismember('vmpooler__poolreset', poolname)
1121
+ @redis.with_metrics do |redis|
1122
+ break unless redis.sismember('vmpooler__poolreset', poolname)
979
1123
 
980
- $redis.srem('vmpooler__poolreset', poolname)
981
- mutex = pool_mutex(poolname)
982
- mutex.synchronize do
983
- drain_pool(poolname)
984
- $logger.log('s', "[*] [#{poolname}] reset has cleared ready and pending instances")
1124
+ redis.srem('vmpooler__poolreset', poolname)
1125
+ mutex = pool_mutex(poolname)
1126
+ mutex.synchronize do
1127
+ drain_pool(poolname, redis)
1128
+ $logger.log('s', "[*] [#{poolname}] reset has cleared ready and pending instances")
1129
+ end
985
1130
  end
986
1131
  end
987
1132
 
@@ -990,21 +1135,23 @@ module Vmpooler
990
1135
  begin
991
1136
  mutex = pool_mutex(pool['name'])
992
1137
  mutex.synchronize do
993
- provider.vms_in_pool(pool['name']).each do |vm|
994
- if !$redis.sismember('vmpooler__running__' + pool['name'], vm['name']) &&
995
- !$redis.sismember('vmpooler__ready__' + pool['name'], vm['name']) &&
996
- !$redis.sismember('vmpooler__pending__' + pool['name'], vm['name']) &&
997
- !$redis.sismember('vmpooler__completed__' + pool['name'], vm['name']) &&
998
- !$redis.sismember('vmpooler__discovered__' + pool['name'], vm['name']) &&
999
- !$redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])
1000
-
1001
- pool_check_response[:discovered_vms] += 1
1002
- $redis.sadd('vmpooler__discovered__' + pool['name'], vm['name'])
1003
-
1004
- $logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue")
1138
+ @redis.with_metrics do |redis|
1139
+ provider.vms_in_pool(pool['name']).each do |vm|
1140
+ if !redis.sismember('vmpooler__running__' + pool['name'], vm['name']) &&
1141
+ !redis.sismember('vmpooler__ready__' + pool['name'], vm['name']) &&
1142
+ !redis.sismember('vmpooler__pending__' + pool['name'], vm['name']) &&
1143
+ !redis.sismember('vmpooler__completed__' + pool['name'], vm['name']) &&
1144
+ !redis.sismember('vmpooler__discovered__' + pool['name'], vm['name']) &&
1145
+ !redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])
1146
+
1147
+ pool_check_response[:discovered_vms] += 1
1148
+ redis.sadd('vmpooler__discovered__' + pool['name'], vm['name'])
1149
+
1150
+ $logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue")
1151
+ end
1152
+
1153
+ inventory[vm['name']] = 1
1005
1154
  end
1006
-
1007
- inventory[vm['name']] = 1
1008
1155
  end
1009
1156
  end
1010
1157
  rescue StandardError => e
@@ -1015,96 +1162,112 @@ module Vmpooler
1015
1162
  end
1016
1163
 
1017
1164
  def check_running_pool_vms(pool_name, provider, pool_check_response, inventory)
1018
- $redis.smembers("vmpooler__running__#{pool_name}").each do |vm|
1019
- if inventory[vm]
1020
- begin
1021
- vm_lifetime = $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12
1022
- pool_check_response[:checked_running_vms] += 1
1023
- check_running_vm(vm, pool_name, vm_lifetime, provider)
1024
- rescue StandardError => e
1025
- $logger.log('d', "[!] [#{pool_name}] _check_pool with an error while evaluating running VMs: #{e}")
1165
+ @redis.with_metrics do |redis|
1166
+ redis.smembers("vmpooler__running__#{pool_name}").each do |vm|
1167
+ if inventory[vm]
1168
+ begin
1169
+ vm_lifetime = redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12
1170
+ pool_check_response[:checked_running_vms] += 1
1171
+ check_running_vm(vm, pool_name, vm_lifetime, provider)
1172
+ rescue StandardError => e
1173
+ $logger.log('d', "[!] [#{pool_name}] _check_pool with an error while evaluating running VMs: #{e}")
1174
+ end
1175
+ else
1176
+ move_vm_queue(pool_name, vm, 'running', 'completed', redis, 'is a running VM but is missing from inventory. Marking as completed.')
1026
1177
  end
1027
- else
1028
- move_vm_queue(pool_name, vm, 'running', 'completed', 'is a running VM but is missing from inventory. Marking as completed.')
1029
1178
  end
1030
1179
  end
1031
1180
  end
1032
1181
 
1033
- def check_ready_pool_vms(pool_name, provider, pool_check_response, inventory, pool_ttl = 0)
1034
- $redis.smembers("vmpooler__ready__#{pool_name}").each do |vm|
1035
- if inventory[vm]
1036
- begin
1037
- pool_check_response[:checked_ready_vms] += 1
1038
- check_ready_vm(vm, pool_name, pool_ttl || 0, provider)
1039
- rescue StandardError => e
1040
- $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating ready VMs: #{e}")
1182
+ def check_ready_pool_vms(pool_name, provider, pool_check_response, inventory, pool_ttl)
1183
+ @redis.with_metrics do |redis|
1184
+ redis.smembers("vmpooler__ready__#{pool_name}").each do |vm|
1185
+ if inventory[vm]
1186
+ begin
1187
+ pool_check_response[:checked_ready_vms] += 1
1188
+ check_ready_vm(vm, pool_name, pool_ttl, provider)
1189
+ rescue StandardError => e
1190
+ $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating ready VMs: #{e}")
1191
+ end
1192
+ else
1193
+ move_vm_queue(pool_name, vm, 'ready', 'completed', redis, 'is a ready VM but is missing from inventory. Marking as completed.')
1041
1194
  end
1042
- else
1043
- move_vm_queue(pool_name, vm, 'ready', 'completed', 'is a ready VM but is missing from inventory. Marking as completed.')
1044
1195
  end
1045
1196
  end
1046
1197
  end
1047
1198
 
1048
- def check_pending_pool_vms(pool_name, provider, pool_check_response, inventory, pool_timeout = nil)
1199
+ def check_pending_pool_vms(pool_name, provider, pool_check_response, inventory, pool_timeout)
1049
1200
  pool_timeout ||= $config[:config]['timeout'] || 15
1050
- $redis.smembers("vmpooler__pending__#{pool_name}").reverse.each do |vm|
1051
- if inventory[vm]
1052
- begin
1053
- pool_check_response[:checked_pending_vms] += 1
1054
- check_pending_vm(vm, pool_name, pool_timeout, provider)
1055
- rescue StandardError => e
1056
- $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating pending VMs: #{e}")
1201
+ @redis.with_metrics do |redis|
1202
+ redis.smembers("vmpooler__pending__#{pool_name}").reverse.each do |vm|
1203
+ if inventory[vm]
1204
+ begin
1205
+ pool_check_response[:checked_pending_vms] += 1
1206
+ check_pending_vm(vm, pool_name, pool_timeout, provider)
1207
+ rescue StandardError => e
1208
+ $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating pending VMs: #{e}")
1209
+ end
1210
+ else
1211
+ fail_pending_vm(vm, pool_name, pool_timeout, redis, false)
1057
1212
  end
1058
- else
1059
- fail_pending_vm(vm, pool_name, pool_timeout, false)
1060
1213
  end
1061
1214
  end
1062
1215
  end
1063
1216
 
1064
1217
  def check_completed_pool_vms(pool_name, provider, pool_check_response, inventory)
1065
- $redis.smembers("vmpooler__completed__#{pool_name}").each do |vm|
1066
- if inventory[vm]
1067
- begin
1068
- pool_check_response[:destroyed_vms] += 1
1069
- destroy_vm(vm, pool_name, provider)
1070
- rescue StandardError => e
1071
- $redis.srem("vmpooler__completed__#{pool_name}", vm)
1072
- $redis.hdel("vmpooler__active__#{pool_name}", vm)
1073
- $redis.del("vmpooler__vm__#{vm}")
1074
- $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating completed VMs: #{e}")
1218
+ @redis.with_metrics do |redis|
1219
+ redis.smembers("vmpooler__completed__#{pool_name}").each do |vm|
1220
+ if inventory[vm]
1221
+ begin
1222
+ pool_check_response[:destroyed_vms] += 1
1223
+ destroy_vm(vm, pool_name, provider)
1224
+ rescue StandardError => e
1225
+ redis.pipelined do
1226
+ redis.srem("vmpooler__completed__#{pool_name}", vm)
1227
+ redis.hdel("vmpooler__active__#{pool_name}", vm)
1228
+ redis.del("vmpooler__vm__#{vm}")
1229
+ end
1230
+ $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating completed VMs: #{e}")
1231
+ end
1232
+ else
1233
+ $logger.log('s', "[!] [#{pool_name}] '#{vm}' not found in inventory, removed from 'completed' queue")
1234
+ redis.pipelined do
1235
+ redis.srem("vmpooler__completed__#{pool_name}", vm)
1236
+ redis.hdel("vmpooler__active__#{pool_name}", vm)
1237
+ redis.del("vmpooler__vm__#{vm}")
1238
+ end
1075
1239
  end
1076
- else
1077
- $logger.log('s', "[!] [#{pool_name}] '#{vm}' not found in inventory, removed from 'completed' queue")
1078
- $redis.srem("vmpooler__completed__#{pool_name}", vm)
1079
- $redis.hdel("vmpooler__active__#{pool_name}", vm)
1080
- $redis.del("vmpooler__vm__#{vm}")
1081
1240
  end
1082
1241
  end
1083
1242
  end
1084
1243
 
1085
1244
  def check_discovered_pool_vms(pool_name)
1086
- $redis.smembers("vmpooler__discovered__#{pool_name}").reverse.each do |vm|
1087
- %w[pending ready running completed].each do |queue|
1088
- if $redis.sismember("vmpooler__#{queue}__#{pool_name}", vm)
1089
- $logger.log('d', "[!] [#{pool_name}] '#{vm}' found in '#{queue}', removed from 'discovered' queue")
1090
- $redis.srem("vmpooler__discovered__#{pool_name}", vm)
1245
+ @redis.with_metrics do |redis|
1246
+ redis.smembers("vmpooler__discovered__#{pool_name}").reverse.each do |vm|
1247
+ %w[pending ready running completed].each do |queue|
1248
+ if redis.sismember("vmpooler__#{queue}__#{pool_name}", vm)
1249
+ $logger.log('d', "[!] [#{pool_name}] '#{vm}' found in '#{queue}', removed from 'discovered' queue")
1250
+ redis.srem("vmpooler__discovered__#{pool_name}", vm)
1251
+ end
1091
1252
  end
1092
- end
1093
1253
 
1094
- $redis.smove("vmpooler__discovered__#{pool_name}", "vmpooler__completed__#{pool_name}", vm) if $redis.sismember("vmpooler__discovered__#{pool_name}", vm)
1254
+ redis.smove("vmpooler__discovered__#{pool_name}", "vmpooler__completed__#{pool_name}", vm) if redis.sismember("vmpooler__discovered__#{pool_name}", vm)
1255
+ end
1095
1256
  end
1096
1257
  rescue StandardError => e
1097
1258
  $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating discovered VMs: #{e}")
1098
1259
  end
1099
1260
 
1100
1261
  def check_migrating_pool_vms(pool_name, provider, pool_check_response, inventory)
1101
- $redis.smembers("vmpooler__migrating__#{pool_name}").reverse.each do |vm|
1102
- if inventory[vm]
1103
- begin
1104
- pool_check_response[:migrated_vms] += 1
1105
- migrate_vm(vm, pool_name, provider)
1106
- rescue StandardError => e
1107
- $logger.log('s', "[x] [#{pool_name}] '#{vm}' failed to migrate: #{e}")
1262
+ @redis.with_metrics do |redis|
1263
+ redis.smembers("vmpooler__migrating__#{pool_name}").reverse.each do |vm|
1264
+ if inventory[vm]
1265
+ begin
1266
+ pool_check_response[:migrated_vms] += 1
1267
+ migrate_vm(vm, pool_name, provider)
1268
+ rescue StandardError => e
1269
+ $logger.log('s', "[x] [#{pool_name}] '#{vm}' failed to migrate: #{e}")
1270
+ end
1108
1271
  end
1109
1272
  end
1110
1273
  end
@@ -1113,29 +1276,37 @@ module Vmpooler
1113
1276
  def repopulate_pool_vms(pool_name, provider, pool_check_response, pool_size)
1114
1277
  return if pool_mutex(pool_name).locked?
1115
1278
 
1116
- ready = $redis.scard("vmpooler__ready__#{pool_name}")
1117
- total = $redis.scard("vmpooler__pending__#{pool_name}") + ready
1118
-
1119
- $metrics.gauge("ready.#{pool_name}", $redis.scard("vmpooler__ready__#{pool_name}"))
1120
- $metrics.gauge("running.#{pool_name}", $redis.scard("vmpooler__running__#{pool_name}"))
1121
-
1122
- if $redis.get("vmpooler__empty__#{pool_name}")
1123
- $redis.del("vmpooler__empty__#{pool_name}") unless ready == 0
1124
- elsif ready == 0
1125
- $redis.set("vmpooler__empty__#{pool_name}", 'true')
1126
- $logger.log('s', "[!] [#{pool_name}] is empty")
1127
- end
1279
+ @redis.with_metrics do |redis|
1280
+ redis.multi
1281
+ redis.scard("vmpooler__ready__#{pool_name}")
1282
+ redis.scard("vmpooler__pending__#{pool_name}")
1283
+ redis.scard("vmpooler__running__#{pool_name}")
1284
+ ready, pending, running = redis.exec
1285
+ total = pending.to_i + ready.to_i
1286
+
1287
+ $metrics.gauge("ready.#{pool_name}", ready)
1288
+ $metrics.gauge("running.#{pool_name}", running)
1289
+
1290
+ unless pool_size == 0
1291
+ if redis.get("vmpooler__empty__#{pool_name}")
1292
+ redis.del("vmpooler__empty__#{pool_name}") unless ready == 0
1293
+ elsif ready == 0
1294
+ redis.set("vmpooler__empty__#{pool_name}", 'true')
1295
+ $logger.log('s', "[!] [#{pool_name}] is empty")
1296
+ end
1297
+ end
1128
1298
 
1129
- (pool_size - total).times do
1130
- if $redis.get('vmpooler__tasks__clone').to_i < $config[:config]['task_limit'].to_i
1131
- begin
1132
- $redis.incr('vmpooler__tasks__clone')
1133
- pool_check_response[:cloned_vms] += 1
1134
- clone_vm(pool_name, provider)
1135
- rescue StandardError => e
1136
- $logger.log('s', "[!] [#{pool_name}] clone failed during check_pool with an error: #{e}")
1137
- $redis.decr('vmpooler__tasks__clone')
1138
- raise
1299
+ (pool_size - total.to_i).times do
1300
+ if redis.get('vmpooler__tasks__clone').to_i < $config[:config]['task_limit'].to_i
1301
+ begin
1302
+ redis.incr('vmpooler__tasks__clone')
1303
+ pool_check_response[:cloned_vms] += 1
1304
+ clone_vm(pool_name, provider)
1305
+ rescue StandardError => e
1306
+ $logger.log('s', "[!] [#{pool_name}] clone failed during check_pool with an error: #{e}")
1307
+ redis.decr('vmpooler__tasks__clone')
1308
+ raise
1309
+ end
1139
1310
  end
1140
1311
  end
1141
1312
  end
@@ -1160,7 +1331,7 @@ module Vmpooler
1160
1331
 
1161
1332
  check_running_pool_vms(pool['name'], provider, pool_check_response, inventory)
1162
1333
 
1163
- check_ready_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['ready_ttl'])
1334
+ check_ready_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['ready_ttl'] || $config[:config]['ready_ttl'])
1164
1335
 
1165
1336
  check_pending_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['timeout'])
1166
1337
 
@@ -1203,23 +1374,199 @@ module Vmpooler
1203
1374
  #
1204
1375
  # returns an object Vmpooler::PoolManager::Provider::*
1205
1376
  # or raises an error if the class does not exist
1206
- def create_provider_object(config, logger, metrics, provider_class, provider_name, options)
1377
+ def create_provider_object(config, logger, metrics, redis_connection_pool, provider_class, provider_name, options)
1207
1378
  provider_klass = Vmpooler::PoolManager::Provider
1208
1379
  provider_klass.constants.each do |classname|
1209
1380
  next unless classname.to_s.casecmp(provider_class) == 0
1210
1381
 
1211
- return provider_klass.const_get(classname).new(config, logger, metrics, provider_name, options)
1382
+ return provider_klass.const_get(classname).new(config, logger, metrics, redis_connection_pool, provider_name, options)
1212
1383
  end
1213
1384
  raise("Provider '#{provider_class}' is unknown for pool with provider name '#{provider_name}'") if provider.nil?
1214
1385
  end
1215
1386
 
1387
+ def check_ondemand_requests(maxloop = 0,
1388
+ loop_delay_min = CHECK_LOOP_DELAY_MIN_DEFAULT,
1389
+ loop_delay_max = CHECK_LOOP_DELAY_MAX_DEFAULT,
1390
+ loop_delay_decay = CHECK_LOOP_DELAY_DECAY_DEFAULT)
1391
+
1392
+ $logger.log('d', '[*] [ondemand_provisioner] starting worker thread')
1393
+
1394
+ $threads['ondemand_provisioner'] = Thread.new do
1395
+ _check_ondemand_requests(maxloop, loop_delay_min, loop_delay_max, loop_delay_decay)
1396
+ end
1397
+ end
1398
+
1399
+ def _check_ondemand_requests(maxloop = 0,
1400
+ loop_delay_min = CHECK_LOOP_DELAY_MIN_DEFAULT,
1401
+ loop_delay_max = CHECK_LOOP_DELAY_MAX_DEFAULT,
1402
+ loop_delay_decay = CHECK_LOOP_DELAY_DECAY_DEFAULT)
1403
+
1404
+ loop_delay_min = $config[:config]['check_loop_delay_min'] unless $config[:config]['check_loop_delay_min'].nil?
1405
+ loop_delay_max = $config[:config]['check_loop_delay_max'] unless $config[:config]['check_loop_delay_max'].nil?
1406
+ loop_delay_decay = $config[:config]['check_loop_delay_decay'] unless $config[:config]['check_loop_delay_decay'].nil?
1407
+
1408
+ loop_delay_decay = 2.0 if loop_delay_decay <= 1.0
1409
+ loop_delay_max = loop_delay_min if loop_delay_max.nil? || loop_delay_max < loop_delay_min
1410
+
1411
+ loop_count = 1
1412
+ loop_delay = loop_delay_min
1413
+
1414
+ loop do
1415
+ result = process_ondemand_requests
1416
+
1417
+ loop_delay = (loop_delay * loop_delay_decay).to_i
1418
+ loop_delay = loop_delay_min if result > 0
1419
+ loop_delay = loop_delay_max if loop_delay > loop_delay_max
1420
+ sleep_with_wakeup_events(loop_delay, loop_delay_min, ondemand_request: true)
1421
+
1422
+ unless maxloop == 0
1423
+ break if loop_count >= maxloop
1424
+
1425
+ loop_count += 1
1426
+ end
1427
+ end
1428
+ end
1429
+
1430
+ def process_ondemand_requests
1431
+ @redis.with_metrics do |redis|
1432
+ requests = redis.zrange('vmpooler__provisioning__request', 0, -1)
1433
+ requests&.map { |request_id| create_ondemand_vms(request_id, redis) }
1434
+ provisioning_tasks = process_ondemand_vms(redis)
1435
+ requests_ready = check_ondemand_requests_ready(redis)
1436
+ requests.length + provisioning_tasks + requests_ready
1437
+ end
1438
+ end
1439
+
1440
+ def create_ondemand_vms(request_id, redis)
1441
+ requested = redis.hget("vmpooler__odrequest__#{request_id}", 'requested')
1442
+ unless requested
1443
+ $logger.log('s', "Failed to find odrequest for request_id '#{request_id}'")
1444
+ redis.zrem('vmpooler__provisioning__request', request_id)
1445
+ return
1446
+ end
1447
+ score = redis.zscore('vmpooler__provisioning__request', request_id)
1448
+ requested = requested.split(',')
1449
+
1450
+ redis.pipelined do
1451
+ requested.each do |request|
1452
+ redis.zadd('vmpooler__odcreate__task', Time.now.to_i, "#{request}:#{request_id}")
1453
+ end
1454
+ redis.zrem('vmpooler__provisioning__request', request_id)
1455
+ redis.zadd('vmpooler__provisioning__processing', score, request_id)
1456
+ end
1457
+ end
1458
+
1459
+ def process_ondemand_vms(redis)
1460
+ queue_key = 'vmpooler__odcreate__task'
1461
+ queue = redis.zrange(queue_key, 0, -1, with_scores: true)
1462
+ ondemand_clone_limit = $config[:config]['ondemand_clone_limit']
1463
+ queue.each do |request, score|
1464
+ clone_count = redis.get('vmpooler__tasks__ondemandclone').to_i
1465
+ break unless clone_count < ondemand_clone_limit
1466
+
1467
+ pool_alias, pool, count, request_id = request.split(':')
1468
+ count = count.to_i
1469
+ provider = get_provider_for_pool(pool)
1470
+ slots = ondemand_clone_limit - clone_count
1471
+ break if slots == 0
1472
+
1473
+ if slots >= count
1474
+ count.times do
1475
+ redis.incr('vmpooler__tasks__ondemandclone')
1476
+ clone_vm(pool, provider, request_id, pool_alias)
1477
+ end
1478
+ redis.zrem(queue_key, request)
1479
+ else
1480
+ remaining_count = count - slots
1481
+ slots.times do
1482
+ redis.incr('vmpooler__tasks__ondemandclone')
1483
+ clone_vm(pool, provider, request_id, pool_alias)
1484
+ end
1485
+ redis.pipelined do
1486
+ redis.zrem(queue_key, request)
1487
+ redis.zadd(queue_key, score, "#{pool_alias}:#{pool}:#{remaining_count}:#{request_id}")
1488
+ end
1489
+ end
1490
+ end
1491
+ queue.length
1492
+ end
1493
+
1494
+ def vms_ready?(request_id, redis)
1495
+ catch :request_not_ready do
1496
+ request_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
1497
+ Parsing.get_platform_pool_count(request_hash['requested']) do |platform_alias, pool, count|
1498
+ pools_filled = redis.scard("vmpooler__#{request_id}__#{platform_alias}__#{pool}")
1499
+ throw :request_not_ready unless pools_filled.to_i == count.to_i
1500
+ end
1501
+ return true
1502
+ end
1503
+ false
1504
+ end
1505
+
1506
+ def check_ondemand_requests_ready(redis)
1507
+ in_progress_requests = redis.zrange('vmpooler__provisioning__processing', 0, -1, with_scores: true)
1508
+ in_progress_requests&.each do |request_id, score|
1509
+ check_ondemand_request_ready(request_id, redis, score)
1510
+ end
1511
+ in_progress_requests.length
1512
+ end
1513
+
1514
+ def check_ondemand_request_ready(request_id, redis, score = nil)
1515
+ # default expiration is one month to ensure the data does not stay in redis forever
1516
+ default_expiration = 259_200_0
1517
+ processing_key = 'vmpooler__provisioning__processing'
1518
+ ondemand_hash_key = "vmpooler__odrequest__#{request_id}"
1519
+ score ||= redis.zscore(processing_key, request_id)
1520
+ return if request_expired?(request_id, score, redis)
1521
+
1522
+ return unless vms_ready?(request_id, redis)
1523
+
1524
+ redis.multi
1525
+ redis.hset(ondemand_hash_key, 'status', 'ready')
1526
+ redis.expire(ondemand_hash_key, default_expiration)
1527
+ redis.zrem(processing_key, request_id)
1528
+ redis.exec
1529
+ end
1530
+
1531
+ def request_expired?(request_id, score, redis)
1532
+ delta = Time.now.to_i - score.to_i
1533
+ ondemand_request_ttl = $config[:config]['ondemand_request_ttl']
1534
+ return false unless delta > ondemand_request_ttl * 60
1535
+
1536
+ $logger.log('s', "Ondemand request for '#{request_id}' failed to provision all instances within the configured ttl '#{ondemand_request_ttl}'")
1537
+ expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
1538
+ redis.pipelined do
1539
+ redis.zrem('vmpooler__provisioning__processing', request_id)
1540
+ redis.hset("vmpooler__odrequest__#{request_id}", 'status', 'failed')
1541
+ redis.expire("vmpooler__odrequest__#{request_id}", expiration_ttl)
1542
+ end
1543
+ remove_vms_for_failed_request(request_id, expiration_ttl, redis)
1544
+ true
1545
+ end
1546
+
1547
+ def remove_vms_for_failed_request(request_id, expiration_ttl, redis)
1548
+ request_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
1549
+ Parsing.get_platform_pool_count(request_hash['requested']) do |platform_alias, pool, _count|
1550
+ pools_filled = redis.smembers("vmpooler__#{request_id}__#{platform_alias}__#{pool}")
1551
+ redis.pipelined do
1552
+ pools_filled&.each do |vm|
1553
+ move_vm_queue(pool, vm, 'running', 'completed', redis, "moved to completed queue. '#{request_id}' could not be filled in time")
1554
+ end
1555
+ redis.expire("vmpooler__#{request_id}__#{platform_alias}__#{pool}", expiration_ttl)
1556
+ end
1557
+ end
1558
+ end
1559
+
1216
1560
  def execute!(maxloop = 0, loop_delay = 1)
1217
1561
  $logger.log('d', 'starting vmpooler')
1218
1562
 
1219
- # Clear out the tasks manager, as we don't know about any tasks at this point
1220
- $redis.set('vmpooler__tasks__clone', 0)
1221
- # Clear out vmpooler__migrations since stale entries may be left after a restart
1222
- $redis.del('vmpooler__migration')
1563
+ @redis.with_metrics do |redis|
1564
+ # Clear out the tasks manager, as we don't know about any tasks at this point
1565
+ redis.set('vmpooler__tasks__clone', 0)
1566
+ redis.set('vmpooler__tasks__ondemandclone', 0)
1567
+ # Clear out vmpooler__migrations since stale entries may be left after a restart
1568
+ redis.del('vmpooler__migration')
1569
+ end
1223
1570
 
1224
1571
  # Copy vSphere settings to correct location. This happens with older configuration files
1225
1572
  if !$config[:vsphere].nil? && ($config[:providers].nil? || $config[:providers][:vsphere].nil?)
@@ -1269,7 +1616,7 @@ module Vmpooler
1269
1616
  provider_class = $config[:providers][provider_name.to_sym]['provider_class']
1270
1617
  end
1271
1618
  begin
1272
- $providers[provider_name] = create_provider_object($config, $logger, $metrics, provider_class, provider_name, {}) if $providers[provider_name].nil?
1619
+ $providers[provider_name] = create_provider_object($config, $logger, $metrics, @redis, provider_class, provider_name, {}) if $providers[provider_name].nil?
1273
1620
  rescue StandardError => e
1274
1621
  $logger.log('s', "Error while creating provider for pool #{pool['name']}: #{e}")
1275
1622
  raise
@@ -1303,6 +1650,13 @@ module Vmpooler
1303
1650
  end
1304
1651
  end
1305
1652
 
1653
+ if !$threads['ondemand_provisioner']
1654
+ check_ondemand_requests
1655
+ elsif !$threads['ondemand_provisioner'].alive?
1656
+ $logger.log('d', '[!] [ondemand_provisioner] worker thread died, restarting')
1657
+ check_ondemand_requests(check_loop_delay_min, check_loop_delay_max, check_loop_delay_decay)
1658
+ end
1659
+
1306
1660
  sleep(loop_delay)
1307
1661
 
1308
1662
  unless maxloop == 0