vmpooler 0.12.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This is an adapted Collector module for vmpooler based on the sample implementation
4
+ # available in the prometheus client_ruby library
5
+ # https://github.com/prometheus/client_ruby/blob/master/lib/prometheus/middleware/collector.rb
6
+ #
7
+ # The code was also failing Rubocop on PR check, so have addressed all the offenses.
8
+ #
9
+ # The method strip_hostnames_from_path (originally strip_ids_from_path) has been adapted
10
+ # to add a match for hostnames in paths # to replace with a single ":hostname" string to
11
+ # avoid # proliferation of stat lines for # each new vm hostname deleted, modified or
12
+ # otherwise queried.
13
+
14
+ require 'benchmark'
15
+ require 'prometheus/client'
16
+ require 'vmpooler/logger'
17
+
18
+ module Vmpooler
19
+ class Metrics
20
+ class Promstats
21
+ # CollectorMiddleware is an implementation of Rack Middleware customised
22
+ # for vmpooler use.
23
+ #
24
+ # By default metrics are registered on the global registry. Set the
25
+ # `:registry` option to use a custom registry.
26
+ #
27
+ # By default metrics all have the prefix "http_server". Set to something
28
+ # else if you like.
29
+ #
30
+ # The request counter metric is broken down by code, method and path by
31
+ # default. Set the `:counter_label_builder` option to use a custom label
32
+ # builder.
33
+ #
34
+ # The request duration metric is broken down by method and path by default.
35
+ # Set the `:duration_label_builder` option to use a custom label builder.
36
+ #
37
+ # Label Builder functions will receive a Rack env and a status code, and must
38
+ # return a hash with the labels for that request. They must also accept an empty
39
+ # env, and return a hash with the correct keys. This is necessary to initialize
40
+ # the metrics with the correct set of labels.
41
+ class CollectorMiddleware
42
+ attr_reader :app, :registry
43
+
44
+ def initialize(app, options = {})
45
+ @app = app
46
+ @registry = options[:registry] || Prometheus::Client.registry
47
+ @metrics_prefix = options[:metrics_prefix] || 'http_server'
48
+
49
+ init_request_metrics
50
+ init_exception_metrics
51
+ end
52
+
53
+ def call(env) # :nodoc:
54
+ trace(env) { @app.call(env) }
55
+ end
56
+
57
+ protected
58
+
59
+ def init_request_metrics
60
+ @requests = @registry.counter(
61
+ :"#{@metrics_prefix}_requests_total",
62
+ docstring:
63
+ 'The total number of HTTP requests handled by the Rack application.',
64
+ labels: %i[code method path]
65
+ )
66
+ @durations = @registry.histogram(
67
+ :"#{@metrics_prefix}_request_duration_seconds",
68
+ docstring: 'The HTTP response duration of the Rack application.',
69
+ labels: %i[method path]
70
+ )
71
+ end
72
+
73
+ def init_exception_metrics
74
+ @exceptions = @registry.counter(
75
+ :"#{@metrics_prefix}_exceptions_total",
76
+ docstring: 'The total number of exceptions raised by the Rack application.',
77
+ labels: [:exception]
78
+ )
79
+ end
80
+
81
+ def trace(env)
82
+ response = nil
83
+ duration = Benchmark.realtime { response = yield }
84
+ record(env, response.first.to_s, duration)
85
+ response
86
+ rescue StandardError => e
87
+ @exceptions.increment(labels: { exception: e.class.name })
88
+ raise
89
+ end
90
+
91
+ def record(env, code, duration)
92
+ counter_labels = {
93
+ code: code,
94
+ method: env['REQUEST_METHOD'].downcase,
95
+ path: strip_hostnames_from_path(env['PATH_INFO'])
96
+ }
97
+
98
+ duration_labels = {
99
+ method: env['REQUEST_METHOD'].downcase,
100
+ path: strip_hostnames_from_path(env['PATH_INFO'])
101
+ }
102
+
103
+ @requests.increment(labels: counter_labels)
104
+ @durations.observe(duration, labels: duration_labels)
105
+ rescue # rubocop:disable Style/RescueStandardError
106
+ nil
107
+ end
108
+
109
+ def strip_hostnames_from_path(path)
110
+ # Custom for /vm path - so we just collect aggrate stats for all usage along this one
111
+ # path. Custom counters are then added more specific endpoints in v1.rb
112
+ # Since we aren't parsing UID/GIDs as in the original example, these are removed.
113
+ # Similarly, request IDs are also stripped from the /ondemand path.
114
+ path
115
+ .gsub(%r{/vm/.+$}, '/vm')
116
+ .gsub(%r{/ondemand/.+$}, '/ondemand')
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems' unless defined?(Gem)
4
+ require 'statsd'
5
+
6
+ module Vmpooler
7
+ class Metrics
8
+ class Statsd < Metrics
9
+ attr_reader :server, :port, :prefix
10
+
11
+ def initialize(logger, params = {})
12
+ raise ArgumentError, "Statsd server is required. Config: #{params.inspect}" if params['server'].nil? || params['server'].empty?
13
+
14
+ host = params['server']
15
+ @port = params['port'] || 8125
16
+ @prefix = params['prefix'] || 'vmpooler'
17
+ @server = ::Statsd.new(host, @port)
18
+ @logger = logger
19
+ end
20
+
21
+ def increment(label)
22
+ server.increment(prefix + '.' + label)
23
+ rescue StandardError => e
24
+ @logger.log('s', "[!] Failure incrementing #{prefix}.#{label} on statsd server [#{server}:#{port}]: #{e}")
25
+ end
26
+
27
+ def gauge(label, value)
28
+ server.gauge(prefix + '.' + label, value)
29
+ rescue StandardError => e
30
+ @logger.log('s', "[!] Failure updating gauge #{prefix}.#{label} on statsd server [#{server}:#{port}]: #{e}")
31
+ end
32
+
33
+ def timing(label, duration)
34
+ server.timing(prefix + '.' + label, duration)
35
+ rescue StandardError => e
36
+ @logger.log('s', "[!] Failure updating timing #{prefix}.#{label} on statsd server [#{server}:#{port}]: #{e}")
37
+ end
38
+ end
39
+ end
40
+ end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'vmpooler/providers'
4
+ require 'vmpooler/util/parsing'
4
5
  require 'spicy-proton'
6
+ require 'resolv' # ruby standard lib
5
7
 
6
8
  module Vmpooler
7
9
  class PoolManager
@@ -9,7 +11,7 @@ module Vmpooler
9
11
  CHECK_LOOP_DELAY_MAX_DEFAULT = 60
10
12
  CHECK_LOOP_DELAY_DECAY_DEFAULT = 2.0
11
13
 
12
- def initialize(config, logger, redis, metrics)
14
+ def initialize(config, logger, redis_connection_pool, metrics)
13
15
  $config = config
14
16
 
15
17
  # Load logger library
@@ -18,19 +20,19 @@ module Vmpooler
18
20
  # metrics logging handle
19
21
  $metrics = metrics
20
22
 
21
- # Connect to Redis
22
- $redis = redis
23
+ # Redis connection pool
24
+ @redis = redis_connection_pool
23
25
 
24
26
  # VM Provider objects
25
- $providers = {}
27
+ $providers = Concurrent::Hash.new
26
28
 
27
29
  # Our thread-tracker object
28
- $threads = {}
30
+ $threads = Concurrent::Hash.new
29
31
 
30
32
  # Pool mutex
31
- @reconfigure_pool = {}
33
+ @reconfigure_pool = Concurrent::Hash.new
32
34
 
33
- @vm_mutex = {}
35
+ @vm_mutex = Concurrent::Hash.new
34
36
 
35
37
  # Name generator for generating host names
36
38
  @name_generator = Spicy::Proton.new
@@ -45,24 +47,26 @@ module Vmpooler
45
47
 
46
48
  # Place pool configuration in redis so an API instance can discover running pool configuration
47
49
  def load_pools_to_redis
48
- previously_configured_pools = $redis.smembers('vmpooler__pools')
49
- currently_configured_pools = []
50
- config[:pools].each do |pool|
51
- currently_configured_pools << pool['name']
52
- $redis.sadd('vmpooler__pools', pool['name'])
53
- pool_keys = pool.keys
54
- pool_keys.delete('alias')
55
- to_set = {}
56
- pool_keys.each do |k|
57
- to_set[k] = pool[k]
50
+ @redis.with_metrics do |redis|
51
+ previously_configured_pools = redis.smembers('vmpooler__pools')
52
+ currently_configured_pools = []
53
+ config[:pools].each do |pool|
54
+ currently_configured_pools << pool['name']
55
+ redis.sadd('vmpooler__pools', pool['name'])
56
+ pool_keys = pool.keys
57
+ pool_keys.delete('alias')
58
+ to_set = {}
59
+ pool_keys.each do |k|
60
+ to_set[k] = pool[k]
61
+ end
62
+ to_set['alias'] = pool['alias'].join(',') if to_set.key?('alias')
63
+ redis.hmset("vmpooler__pool__#{pool['name']}", to_set.to_a.flatten) unless to_set.empty?
58
64
  end
59
- to_set['alias'] = pool['alias'].join(',') if to_set.key?('alias')
60
- $redis.hmset("vmpooler__pool__#{pool['name']}", to_set.to_a.flatten) unless to_set.empty?
61
- end
62
- previously_configured_pools.each do |pool|
63
- unless currently_configured_pools.include? pool
64
- $redis.srem('vmpooler__pools', pool)
65
- $redis.del("vmpooler__pool__#{pool}")
65
+ previously_configured_pools.each do |pool|
66
+ unless currently_configured_pools.include? pool
67
+ redis.srem('vmpooler__pools', pool)
68
+ redis.del("vmpooler__pool__#{pool}")
69
+ end
66
70
  end
67
71
  end
68
72
  nil
@@ -75,7 +79,9 @@ module Vmpooler
75
79
  _check_pending_vm(vm, pool, timeout, provider)
76
80
  rescue StandardError => e
77
81
  $logger.log('s', "[!] [#{pool}] '#{vm}' #{timeout} #{provider} errored while checking a pending vm : #{e}")
78
- fail_pending_vm(vm, pool, timeout)
82
+ @redis.with_metrics do |redis|
83
+ fail_pending_vm(vm, pool, timeout, redis)
84
+ end
79
85
  raise
80
86
  end
81
87
  end
@@ -86,31 +92,38 @@ module Vmpooler
86
92
  return if mutex.locked?
87
93
 
88
94
  mutex.synchronize do
89
- if provider.vm_ready?(pool, vm)
90
- move_pending_vm_to_ready(vm, pool)
91
- else
92
- fail_pending_vm(vm, pool, timeout)
95
+ @redis.with_metrics do |redis|
96
+ request_id = redis.hget("vmpooler__vm__#{vm}", 'request_id')
97
+ if provider.vm_ready?(pool, vm)
98
+ move_pending_vm_to_ready(vm, pool, redis, request_id)
99
+ else
100
+ fail_pending_vm(vm, pool, timeout, redis)
101
+ end
93
102
  end
94
103
  end
95
104
  end
96
105
 
97
- def remove_nonexistent_vm(vm, pool)
98
- $redis.srem("vmpooler__pending__#{pool}", vm)
106
+ def remove_nonexistent_vm(vm, pool, redis)
107
+ redis.srem("vmpooler__pending__#{pool}", vm)
99
108
  $logger.log('d', "[!] [#{pool}] '#{vm}' no longer exists. Removing from pending.")
100
109
  end
101
110
 
102
- def fail_pending_vm(vm, pool, timeout, exists = true)
103
- clone_stamp = $redis.hget("vmpooler__vm__#{vm}", 'clone')
104
- return true unless clone_stamp
111
+ def fail_pending_vm(vm, pool, timeout, redis, exists = true)
112
+ clone_stamp = redis.hget("vmpooler__vm__#{vm}", 'clone')
105
113
 
106
114
  time_since_clone = (Time.now - Time.parse(clone_stamp)) / 60
107
115
  if time_since_clone > timeout
108
116
  if exists
109
- $redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm)
117
+ request_id = redis.hget("vmpooler__vm__#{vm}", 'request_id')
118
+ pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias') if request_id
119
+ redis.multi
120
+ redis.smove('vmpooler__pending__' + pool, 'vmpooler__completed__' + pool, vm)
121
+ redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool}:1:#{request_id}") if request_id
122
+ redis.exec
110
123
  $metrics.increment("errors.markedasfailed.#{pool}")
111
124
  $logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes")
112
125
  else
113
- remove_nonexistent_vm(vm, pool)
126
+ remove_nonexistent_vm(vm, pool, redis)
114
127
  end
115
128
  end
116
129
  true
@@ -119,28 +132,54 @@ module Vmpooler
119
132
  false
120
133
  end
121
134
 
122
- def move_pending_vm_to_ready(vm, pool)
123
- clone_time = $redis.hget('vmpooler__vm__' + vm, 'clone')
124
- finish = format('%<time>.2f', time: Time.now - Time.parse(clone_time)) if clone_time
135
+ def move_pending_vm_to_ready(vm, pool, redis, request_id = nil)
136
+ clone_time = redis.hget('vmpooler__vm__' + vm, 'clone')
137
+ finish = format('%<time>.2f', time: Time.now - Time.parse(clone_time))
125
138
 
126
- $redis.smove('vmpooler__pending__' + pool, 'vmpooler__ready__' + pool, vm)
127
- $redis.hset('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm, finish) # maybe remove as this is never used by vmpooler itself?
128
- $redis.hset("vmpooler__vm__#{vm}", 'ready', Time.now)
139
+ if request_id
140
+ ondemandrequest_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
141
+ if ondemandrequest_hash['status'] == 'failed'
142
+ move_vm_queue(pool, vm, 'pending', 'completed', redis, "moved to completed queue. '#{request_id}' could not be filled in time")
143
+ return nil
144
+ elsif ondemandrequest_hash['status'] == 'deleted'
145
+ move_vm_queue(pool, vm, 'pending', 'completed', redis, "moved to completed queue. '#{request_id}' has been deleted")
146
+ return nil
147
+ end
148
+ pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias')
149
+
150
+ redis.pipelined do
151
+ redis.hset("vmpooler__active__#{pool}", vm, Time.now)
152
+ redis.hset("vmpooler__vm__#{vm}", 'checkout', Time.now)
153
+ redis.hset("vmpooler__vm__#{vm}", 'token:token', ondemandrequest_hash['token:token']) if ondemandrequest_hash['token:token']
154
+ redis.hset("vmpooler__vm__#{vm}", 'token:user', ondemandrequest_hash['token:user']) if ondemandrequest_hash['token:user']
155
+ redis.sadd("vmpooler__#{request_id}__#{pool_alias}__#{pool}", vm)
156
+ end
157
+ move_vm_queue(pool, vm, 'pending', 'running', redis)
158
+ check_ondemand_request_ready(request_id, redis)
159
+ else
160
+ redis.smove('vmpooler__pending__' + pool, 'vmpooler__ready__' + pool, vm)
161
+ end
129
162
 
130
- # last boot time is displayed in API, and used by alarming script
131
- $redis.hset('vmpooler__lastboot', pool, Time.now)
163
+ redis.pipelined do
164
+ redis.hset('vmpooler__boot__' + Date.today.to_s, pool + ':' + vm, finish) # maybe remove as this is never used by vmpooler itself?
165
+ redis.hset("vmpooler__vm__#{vm}", 'ready', Time.now)
166
+
167
+ # last boot time is displayed in API, and used by alarming script
168
+ redis.hset('vmpooler__lastboot', pool, Time.now)
169
+ end
132
170
 
133
171
  $metrics.timing("time_to_ready_state.#{pool}", finish)
134
- $logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue")
172
+ $logger.log('s', "[>] [#{pool}] '#{vm}' moved from 'pending' to 'ready' queue") unless request_id
173
+ $logger.log('s', "[>] [#{pool}] '#{vm}' is 'ready' for request '#{request_id}'") if request_id
135
174
  end
136
175
 
137
- def vm_still_ready?(pool_name, vm_name, provider)
176
+ def vm_still_ready?(pool_name, vm_name, provider, redis)
138
177
  # Check if the VM is still ready/available
139
178
  return true if provider.vm_ready?(pool_name, vm_name)
140
179
 
141
180
  raise("VM #{vm_name} is not ready")
142
181
  rescue StandardError
143
- move_vm_queue(pool_name, vm_name, 'ready', 'completed', "is unreachable, removed from 'ready' queue")
182
+ move_vm_queue(pool_name, vm_name, 'ready', 'completed', redis, "is unreachable, removed from 'ready' queue")
144
183
  end
145
184
 
146
185
  def check_ready_vm(vm, pool_name, ttl, provider)
@@ -160,34 +199,35 @@ module Vmpooler
160
199
  return if mutex.locked?
161
200
 
162
201
  mutex.synchronize do
163
- check_stamp = $redis.hget('vmpooler__vm__' + vm, 'check')
164
- return if check_stamp && (((Time.now - Time.parse(check_stamp)) / 60) <= $config[:config]['vm_checktime'])
202
+ @redis.with_metrics do |redis|
203
+ check_stamp = redis.hget('vmpooler__vm__' + vm, 'check')
204
+ last_checked_too_soon = ((Time.now - Time.parse(check_stamp)).to_i < $config[:config]['vm_checktime'] * 60) if check_stamp
205
+ break if check_stamp && last_checked_too_soon
165
206
 
166
- $redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
167
- # Check if the hosts TTL has expired
168
- if ttl > 0
207
+ redis.hset('vmpooler__vm__' + vm, 'check', Time.now)
208
+ # Check if the hosts TTL has expired
169
209
  # if 'boottime' is nil, set bootime to beginning of unix epoch, forces TTL to be assumed expired
170
- boottime = $redis.hget("vmpooler__vm__#{vm}", 'ready')
210
+ boottime = redis.hget("vmpooler__vm__#{vm}", 'ready')
171
211
  if boottime
172
212
  boottime = Time.parse(boottime)
173
213
  else
174
214
  boottime = Time.at(0)
175
215
  end
176
- if ((Time.now - boottime) / 60).to_s[/^\d+\.\d{1}/].to_f > ttl
177
- $redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
216
+ if (Time.now - boottime).to_i > ttl * 60
217
+ redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
178
218
 
179
219
  $logger.log('d', "[!] [#{pool_name}] '#{vm}' reached end of TTL after #{ttl} minutes, removed from 'ready' queue")
180
- return
220
+ return nil
181
221
  end
182
- end
183
222
 
184
- return if mismatched_hostname?(vm, pool_name, provider)
223
+ break if mismatched_hostname?(vm, pool_name, provider, redis)
185
224
 
186
- vm_still_ready?(pool_name, vm, provider)
225
+ vm_still_ready?(pool_name, vm, provider, redis)
226
+ end
187
227
  end
188
228
  end
189
229
 
190
- def mismatched_hostname?(vm, pool_name, provider)
230
+ def mismatched_hostname?(vm, pool_name, provider, redis)
191
231
  pool_config = $config[:pools][$config[:pool_index][pool_name]]
192
232
  check_hostname = pool_config['check_hostname_for_mismatch']
193
233
  check_hostname = $config[:config]['check_ready_vm_hostname_for_mismatch'] if check_hostname.nil?
@@ -196,7 +236,7 @@ module Vmpooler
196
236
  # Wait one minute before checking a VM for hostname mismatch
197
237
  # When checking as soon as the VM passes the ready test the instance
198
238
  # often doesn't report its hostname yet causing the VM to be removed immediately
199
- vm_ready_time = $redis.hget("vmpooler__vm__#{vm}", 'ready')
239
+ vm_ready_time = redis.hget("vmpooler__vm__#{vm}", 'ready')
200
240
  if vm_ready_time
201
241
  wait_before_checking = 60
202
242
  time_since_ready = (Time.now - Time.parse(vm_ready_time)).to_i
@@ -213,7 +253,7 @@ module Vmpooler
213
253
  return if hostname.empty?
214
254
  return if hostname == vm
215
255
 
216
- $redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
256
+ redis.smove('vmpooler__ready__' + pool_name, 'vmpooler__completed__' + pool_name, vm)
217
257
  $logger.log('d', "[!] [#{pool_name}] '#{vm}' has mismatched hostname #{hostname}, removed from 'ready' queue")
218
258
  true
219
259
  end
@@ -234,49 +274,61 @@ module Vmpooler
234
274
  return if mutex.locked?
235
275
 
236
276
  mutex.synchronize do
237
- # Check that VM is within defined lifetime
238
- checkouttime = $redis.hget('vmpooler__active__' + pool, vm)
239
- if checkouttime
240
- running = (Time.now - Time.parse(checkouttime)) / 60 / 60
241
-
242
- if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
243
- move_vm_queue(pool, vm, 'running', 'completed', "reached end of TTL after #{ttl} hours")
244
- return
245
- end
246
- end
277
+ catch :stop_checking do
278
+ @redis.with_metrics do |redis|
279
+ # Check that VM is within defined lifetime
280
+ checkouttime = redis.hget('vmpooler__active__' + pool, vm)
281
+ if checkouttime
282
+ time_since_checkout = Time.now - Time.parse(checkouttime)
283
+ running = time_since_checkout / 60 / 60
284
+
285
+ if (ttl.to_i > 0) && (running.to_i >= ttl.to_i)
286
+ move_vm_queue(pool, vm, 'running', 'completed', redis, "reached end of TTL after #{ttl} hours")
287
+ throw :stop_checking
288
+ end
289
+ end
247
290
 
248
- if provider.vm_ready?(pool, vm)
249
- return
250
- else
251
- host = provider.get_vm(pool, vm)
291
+ if provider.vm_ready?(pool, vm)
292
+ throw :stop_checking
293
+ else
294
+ host = provider.get_vm(pool, vm)
252
295
 
253
- if host
254
- return
255
- else
256
- move_vm_queue(pool, vm, 'running', 'completed', 'is no longer in inventory, removing from running')
296
+ if host
297
+ throw :stop_checking
298
+ else
299
+ move_vm_queue(pool, vm, 'running', 'completed', redis, 'is no longer in inventory, removing from running')
300
+ end
301
+ end
257
302
  end
258
303
  end
259
304
  end
260
305
  end
261
306
 
262
- def move_vm_queue(pool, vm, queue_from, queue_to, msg = nil)
263
- $redis.smove("vmpooler__#{queue_from}__#{pool}", "vmpooler__#{queue_to}__#{pool}", vm)
307
+ def move_vm_queue(pool, vm, queue_from, queue_to, redis, msg = nil)
308
+ redis.smove("vmpooler__#{queue_from}__#{pool}", "vmpooler__#{queue_to}__#{pool}", vm)
264
309
  $logger.log('d', "[!] [#{pool}] '#{vm}' #{msg}") if msg
265
310
  end
266
311
 
267
312
  # Clone a VM
268
- def clone_vm(pool_name, provider)
313
+ def clone_vm(pool_name, provider, request_id = nil, pool_alias = nil)
269
314
  Thread.new do
270
315
  begin
271
- _clone_vm(pool_name, provider)
316
+ _clone_vm(pool_name, provider, request_id, pool_alias)
272
317
  rescue StandardError => e
273
- $logger.log('s', "[!] [#{pool_name}] failed while cloning VM with an error: #{e}")
318
+ if request_id
319
+ $logger.log('s', "[!] [#{pool_name}] failed while cloning VM for request #{request_id} with an error: #{e}")
320
+ @redis.with_metrics do |redis|
321
+ redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool_name}:1:#{request_id}")
322
+ end
323
+ else
324
+ $logger.log('s', "[!] [#{pool_name}] failed while cloning VM with an error: #{e}")
325
+ end
274
326
  raise
275
327
  end
276
328
  end
277
329
  end
278
330
 
279
- def generate_and_check_hostname(_pool_name)
331
+ def generate_and_check_hostname
280
332
  # Generate a randomized hostname. The total name must no longer than 15
281
333
  # character including the hyphen. The shortest adjective in the corpus is
282
334
  # three characters long. Therefore, we can technically select a noun up to 11
@@ -285,58 +337,104 @@ module Vmpooler
285
337
  # letter adjectives, we actually limit the noun to 10 letters to avoid
286
338
  # inviting more conflicts. We favor selecting a longer noun rather than a
287
339
  # longer adjective because longer adjectives tend to be less fun.
288
- noun = @name_generator.noun(max: 10)
289
- adjective = @name_generator.adjective(max: 14 - noun.length)
290
- random_name = [adjective, noun].join('-')
291
- hostname = $config[:config]['prefix'] + random_name
292
- available = $redis.hlen('vmpooler__vm__' + hostname) == 0
293
-
294
- [hostname, available]
340
+ @redis.with do |redis|
341
+ noun = @name_generator.noun(max: 10)
342
+ adjective = @name_generator.adjective(max: 14 - noun.length)
343
+ random_name = [adjective, noun].join('-')
344
+ hostname = $config[:config]['prefix'] + random_name
345
+ available = redis.hlen('vmpooler__vm__' + hostname) == 0
346
+
347
+ [hostname, available]
348
+ end
295
349
  end
296
350
 
297
351
  def find_unique_hostname(pool_name)
352
+ # generate hostname that is not already in use in vmpooler
353
+ # also check that no dns record already exists
298
354
  hostname_retries = 0
299
355
  max_hostname_retries = 3
300
356
  while hostname_retries < max_hostname_retries
301
- hostname, available = generate_and_check_hostname(pool_name)
302
- break if available
357
+ hostname, hostname_available = generate_and_check_hostname
358
+ domain = $config[:config]['domain']
359
+ dns_ip, dns_available = check_dns_available(hostname, domain)
360
+ break if hostname_available && dns_available
303
361
 
304
362
  hostname_retries += 1
305
- $metrics.increment("errors.duplicatehostname.#{pool_name}")
306
- $logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} was not unique (attempt \##{hostname_retries} of #{max_hostname_retries})")
363
+
364
+ if !hostname_available
365
+ $metrics.increment("errors.duplicatehostname.#{pool_name}")
366
+ $logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} was not unique (attempt \##{hostname_retries} of #{max_hostname_retries})")
367
+ elsif !dns_available
368
+ $metrics.increment("errors.staledns.#{hostname}")
369
+ $logger.log('s', "[!] [#{pool_name}] Generated hostname #{hostname} already exists in DNS records (#{dns_ip}), stale DNS")
370
+ end
307
371
  end
308
372
 
309
- raise "Unable to generate a unique hostname after #{hostname_retries} attempts. The last hostname checked was #{hostname}" unless available
373
+ raise "Unable to generate a unique hostname after #{hostname_retries} attempts. The last hostname checked was #{hostname}" unless hostname_available && dns_available
310
374
 
311
375
  hostname
312
376
  end
313
377
 
314
- def _clone_vm(pool_name, provider)
315
- new_vmname = find_unique_hostname(pool_name)
316
-
317
- # Add VM to Redis inventory ('pending' pool)
318
- $redis.sadd('vmpooler__pending__' + pool_name, new_vmname)
319
- $redis.hset('vmpooler__vm__' + new_vmname, 'clone', Time.now)
320
- $redis.hset('vmpooler__vm__' + new_vmname, 'template', pool_name)
321
-
378
+ def check_dns_available(vm_name, domain = nil)
379
+ # Query the DNS for the name we want to create and if it already exists, mark it unavailable
380
+ # This protects against stale DNS records
381
+ vm_name = "#{vm_name}.#{domain}" if domain
322
382
  begin
323
- $logger.log('d', "[ ] [#{pool_name}] Starting to clone '#{new_vmname}'")
324
- start = Time.now
325
- provider.create_vm(pool_name, new_vmname)
326
- finish = format('%<time>.2f', time: Time.now - start)
383
+ dns_ip = Resolv.getaddress(vm_name)
384
+ rescue Resolv::ResolvError
385
+ # this is the expected case, swallow the error
386
+ # eg "no address for blah-daisy"
387
+ return ['', true]
388
+ end
389
+ [dns_ip, false]
390
+ end
327
391
 
328
- $redis.hset('vmpooler__clone__' + Date.today.to_s, pool_name + ':' + new_vmname, finish)
329
- $redis.hset('vmpooler__vm__' + new_vmname, 'clone_time', finish)
330
- $logger.log('s', "[+] [#{pool_name}] '#{new_vmname}' cloned in #{finish} seconds")
392
+ def _clone_vm(pool_name, provider, request_id = nil, pool_alias = nil)
393
+ new_vmname = find_unique_hostname(pool_name)
394
+ mutex = vm_mutex(new_vmname)
395
+ mutex.synchronize do
396
+ @redis.with_metrics do |redis|
397
+ # Add VM to Redis inventory ('pending' pool)
398
+ redis.multi
399
+ redis.sadd('vmpooler__pending__' + pool_name, new_vmname)
400
+ redis.hset('vmpooler__vm__' + new_vmname, 'clone', Time.now)
401
+ redis.hset('vmpooler__vm__' + new_vmname, 'template', pool_name) # This value is used to represent the pool.
402
+ redis.hset('vmpooler__vm__' + new_vmname, 'pool', pool_name)
403
+ redis.hset('vmpooler__vm__' + new_vmname, 'request_id', request_id) if request_id
404
+ redis.hset('vmpooler__vm__' + new_vmname, 'pool_alias', pool_alias) if pool_alias
405
+ redis.exec
406
+ end
331
407
 
332
- $metrics.timing("clone.#{pool_name}", finish)
333
- rescue StandardError
334
- $redis.srem("vmpooler__pending__#{pool_name}", new_vmname)
335
- expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
336
- $redis.expire("vmpooler__vm__#{new_vmname}", expiration_ttl)
337
- raise
338
- ensure
339
- $redis.decr('vmpooler__tasks__clone')
408
+ begin
409
+ $logger.log('d', "[ ] [#{pool_name}] Starting to clone '#{new_vmname}'")
410
+ start = Time.now
411
+ provider.create_vm(pool_name, new_vmname)
412
+ finish = format('%<time>.2f', time: Time.now - start)
413
+
414
+ @redis.with_metrics do |redis|
415
+ redis.pipelined do
416
+ redis.hset('vmpooler__clone__' + Date.today.to_s, pool_name + ':' + new_vmname, finish)
417
+ redis.hset('vmpooler__vm__' + new_vmname, 'clone_time', finish)
418
+ end
419
+ end
420
+ $logger.log('s', "[+] [#{pool_name}] '#{new_vmname}' cloned in #{finish} seconds")
421
+
422
+ $metrics.timing("clone.#{pool_name}", finish)
423
+ rescue StandardError
424
+ @redis.with_metrics do |redis|
425
+ redis.pipelined do
426
+ redis.srem("vmpooler__pending__#{pool_name}", new_vmname)
427
+ expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
428
+ redis.expire("vmpooler__vm__#{new_vmname}", expiration_ttl)
429
+ end
430
+ end
431
+ raise
432
+ ensure
433
+ @redis.with_metrics do |redis|
434
+ redis.decr('vmpooler__tasks__ondemandclone') if request_id
435
+ redis.decr('vmpooler__tasks__clone') unless request_id
436
+ end
437
+ end
340
438
  end
341
439
  end
342
440
 
@@ -357,45 +455,57 @@ module Vmpooler
357
455
  return if mutex.locked?
358
456
 
359
457
  mutex.synchronize do
360
- $redis.hdel('vmpooler__active__' + pool, vm)
361
- $redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
458
+ @redis.with_metrics do |redis|
459
+ redis.pipelined do
460
+ redis.hdel('vmpooler__active__' + pool, vm)
461
+ redis.hset('vmpooler__vm__' + vm, 'destroy', Time.now)
362
462
 
363
- # Auto-expire metadata key
364
- $redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
463
+ # Auto-expire metadata key
464
+ redis.expire('vmpooler__vm__' + vm, ($config[:redis]['data_ttl'].to_i * 60 * 60))
465
+ end
365
466
 
366
- start = Time.now
467
+ start = Time.now
367
468
 
368
- provider.destroy_vm(pool, vm)
469
+ provider.destroy_vm(pool, vm)
369
470
 
370
- $redis.srem('vmpooler__completed__' + pool, vm)
471
+ redis.srem('vmpooler__completed__' + pool, vm)
371
472
 
372
- finish = format('%<time>.2f', time: Time.now - start)
373
- $logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
374
- $metrics.timing("destroy.#{pool}", finish)
375
- get_vm_usage_labels(vm)
473
+ finish = format('%<time>.2f', time: Time.now - start)
474
+ $logger.log('s', "[-] [#{pool}] '#{vm}' destroyed in #{finish} seconds")
475
+ $metrics.timing("destroy.#{pool}", finish)
476
+ get_vm_usage_labels(vm, redis)
477
+ end
376
478
  end
377
479
  dereference_mutex(vm)
378
480
  end
379
481
 
380
- def get_vm_usage_labels(vm)
482
+ def get_vm_usage_labels(vm, redis)
381
483
  return unless $config[:config]['usage_stats']
382
484
 
383
- checkout = $redis.hget("vmpooler__vm__#{vm}", 'checkout')
485
+ redis.multi
486
+ redis.hget("vmpooler__vm__#{vm}", 'checkout')
487
+ redis.hget("vmpooler__vm__#{vm}", 'tag:jenkins_build_url')
488
+ redis.hget("vmpooler__vm__#{vm}", 'token:user')
489
+ redis.hget("vmpooler__vm__#{vm}", 'template')
490
+ checkout, jenkins_build_url, user, poolname = redis.exec
384
491
  return if checkout.nil?
385
492
 
386
- jenkins_build_url = $redis.hget("vmpooler__vm__#{vm}", 'tag:jenkins_build_url')
387
- user = $redis.hget("vmpooler__vm__#{vm}", 'token:user') || 'unauthenticated'
388
- poolname = $redis.hget("vmpooler__vm__#{vm}", 'template')
493
+ user ||= 'unauthenticated'
494
+ user = user.gsub('.', '_')
495
+ $metrics.increment("user.#{user}.#{poolname}")
496
+
497
+ return unless jenkins_build_url
389
498
 
390
- unless jenkins_build_url
391
- user = user.gsub('.', '_')
392
- $metrics.increment("usage.#{user}.#{poolname}")
499
+ if jenkins_build_url.include? 'litmus'
500
+ # Very simple filter for Litmus jobs - just count them coming through for the moment.
501
+ $metrics.increment("usage_litmus.#{user}.#{poolname}")
393
502
  return
394
503
  end
395
504
 
396
505
  url_parts = jenkins_build_url.split('/')[2..-1]
397
- instance = url_parts[0]
506
+ jenkins_instance = url_parts[0].gsub('.', '_')
398
507
  value_stream_parts = url_parts[2].split('_')
508
+ value_stream_parts = value_stream_parts.map { |s| s.gsub('.', '_') }
399
509
  value_stream = value_stream_parts.shift
400
510
  branch = value_stream_parts.pop
401
511
  project = value_stream_parts.shift
@@ -403,24 +513,12 @@ module Vmpooler
403
513
  build_metadata_parts = url_parts[3]
404
514
  component_to_test = component_to_test('RMM_COMPONENT_TO_TEST_NAME', build_metadata_parts)
405
515
 
406
- metric_parts = [
407
- 'usage',
408
- user,
409
- instance,
410
- value_stream,
411
- branch,
412
- project,
413
- job_name,
414
- component_to_test,
415
- poolname
416
- ]
417
-
418
- metric_parts = metric_parts.reject(&:nil?)
419
- metric_parts = metric_parts.map { |s| s.gsub('.', '_') }
420
-
421
- $metrics.increment(metric_parts.join('.'))
516
+ $metrics.increment("usage_jenkins_instance.#{jenkins_instance}.#{value_stream}.#{poolname}")
517
+ $metrics.increment("usage_branch_project.#{branch}.#{project}.#{poolname}")
518
+ $metrics.increment("usage_job_component.#{job_name}.#{component_to_test}.#{poolname}")
422
519
  rescue StandardError => e
423
- logger.log('d', "[!] [#{poolname}] failed while evaluating usage labels on '#{vm}' with an error: #{e}")
520
+ $logger.log('d', "[!] [#{poolname}] failed while evaluating usage labels on '#{vm}' with an error: #{e}")
521
+ raise
424
522
  end
425
523
 
426
524
  def component_to_test(match, labels_string)
@@ -432,7 +530,7 @@ module Vmpooler
432
530
  next if value.nil?
433
531
  return value if key == match
434
532
  end
435
- nil
533
+ 'none'
436
534
  end
437
535
 
438
536
  def purge_unused_vms_and_folders
@@ -444,7 +542,7 @@ module Vmpooler
444
542
  if provider_purge
445
543
  Thread.new do
446
544
  begin
447
- purge_vms_and_folders(provider.to_s)
545
+ purge_vms_and_folders($providers[provider.to_s])
448
546
  rescue StandardError => e
449
547
  $logger.log('s', "[!] failed while purging provider #{provider} VMs and folders with an error: #{e}")
450
548
  end
@@ -455,13 +553,14 @@ module Vmpooler
455
553
  end
456
554
 
457
555
  # Return a list of pool folders
458
- def pool_folders(provider_name)
556
+ def pool_folders(provider)
557
+ provider_name = provider.name
459
558
  folders = {}
460
559
  $config[:pools].each do |pool|
461
560
  next unless pool['provider'] == provider_name
462
561
 
463
562
  folder_parts = pool['folder'].split('/')
464
- datacenter = $providers[provider_name].get_target_datacenter_from_config(pool['name'])
563
+ datacenter = provider.get_target_datacenter_from_config(pool['name'])
465
564
  folders[folder_parts.pop] = "#{datacenter}/vm/#{folder_parts.join('/')}"
466
565
  end
467
566
  folders
@@ -478,8 +577,8 @@ module Vmpooler
478
577
  def purge_vms_and_folders(provider)
479
578
  configured_folders = pool_folders(provider)
480
579
  base_folders = get_base_folders(configured_folders)
481
- whitelist = $providers[provider].provider_config['folder_whitelist']
482
- $providers[provider].purge_unconfigured_folders(base_folders, configured_folders, whitelist)
580
+ whitelist = provider.provider_config['folder_whitelist']
581
+ provider.purge_unconfigured_folders(base_folders, configured_folders, whitelist)
483
582
  end
484
583
 
485
584
  def create_vm_disk(pool_name, vm, disk_size, provider)
@@ -505,10 +604,12 @@ module Vmpooler
505
604
  finish = format('%<time>.2f', time: Time.now - start)
506
605
 
507
606
  if result
508
- rdisks = $redis.hget('vmpooler__vm__' + vm_name, 'disk')
509
- disks = rdisks ? rdisks.split(':') : []
510
- disks.push("+#{disk_size}gb")
511
- $redis.hset('vmpooler__vm__' + vm_name, 'disk', disks.join(':'))
607
+ @redis.with_metrics do |redis|
608
+ rdisks = redis.hget('vmpooler__vm__' + vm_name, 'disk')
609
+ disks = rdisks ? rdisks.split(':') : []
610
+ disks.push("+#{disk_size}gb")
611
+ redis.hset('vmpooler__vm__' + vm_name, 'disk', disks.join(':'))
612
+ end
512
613
 
513
614
  $logger.log('s', "[+] [disk_manager] '#{vm_name}' attached #{disk_size}gb disk in #{finish} seconds")
514
615
  else
@@ -538,7 +639,9 @@ module Vmpooler
538
639
  finish = format('%<time>.2f', time: Time.now - start)
539
640
 
540
641
  if result
541
- $redis.hset('vmpooler__vm__' + vm_name, 'snapshot:' + snapshot_name, Time.now.to_s)
642
+ @redis.with_metrics do |redis|
643
+ redis.hset('vmpooler__vm__' + vm_name, 'snapshot:' + snapshot_name, Time.now.to_s)
644
+ end
542
645
  $logger.log('s', "[+] [snapshot_manager] '#{vm_name}' snapshot created in #{finish} seconds")
543
646
  else
544
647
  $logger.log('s', "[+] [snapshot_manager] Failed to snapshot '#{vm_name}'")
@@ -594,9 +697,9 @@ module Vmpooler
594
697
  @default_providers ||= %w[vsphere dummy]
595
698
  end
596
699
 
597
- def get_pool_name_for_vm(vm_name)
700
+ def get_pool_name_for_vm(vm_name, redis)
598
701
  # the 'template' is a bad name. Should really be 'poolname'
599
- $redis.hget('vmpooler__vm__' + vm_name, 'template')
702
+ redis.hget('vmpooler__vm__' + vm_name, 'template')
600
703
  end
601
704
 
602
705
  # @param pool_name [String] - the name of the pool
@@ -628,19 +731,21 @@ module Vmpooler
628
731
  end
629
732
 
630
733
  def _check_disk_queue
631
- task_detail = $redis.spop('vmpooler__tasks__disk')
632
- unless task_detail.nil?
633
- begin
634
- vm_name, disk_size = task_detail.split(':')
635
- pool_name = get_pool_name_for_vm(vm_name)
636
- raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
734
+ @redis.with_metrics do |redis|
735
+ task_detail = redis.spop('vmpooler__tasks__disk')
736
+ unless task_detail.nil?
737
+ begin
738
+ vm_name, disk_size = task_detail.split(':')
739
+ pool_name = get_pool_name_for_vm(vm_name, redis)
740
+ raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
637
741
 
638
- provider = get_provider_for_pool(pool_name)
639
- raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
742
+ provider = get_provider_for_pool(pool_name)
743
+ raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
640
744
 
641
- create_vm_disk(pool_name, vm_name, disk_size, provider)
642
- rescue StandardError => e
643
- $logger.log('s', "[!] [disk_manager] disk creation appears to have failed: #{e}")
745
+ create_vm_disk(pool_name, vm_name, disk_size, provider)
746
+ rescue StandardError => e
747
+ $logger.log('s', "[!] [disk_manager] disk creation appears to have failed: #{e}")
748
+ end
644
749
  end
645
750
  end
646
751
  end
@@ -664,37 +769,39 @@ module Vmpooler
664
769
  end
665
770
 
666
771
  def _check_snapshot_queue
667
- task_detail = $redis.spop('vmpooler__tasks__snapshot')
772
+ @redis.with_metrics do |redis|
773
+ task_detail = redis.spop('vmpooler__tasks__snapshot')
668
774
 
669
- unless task_detail.nil?
670
- begin
671
- vm_name, snapshot_name = task_detail.split(':')
672
- pool_name = get_pool_name_for_vm(vm_name)
673
- raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
775
+ unless task_detail.nil?
776
+ begin
777
+ vm_name, snapshot_name = task_detail.split(':')
778
+ pool_name = get_pool_name_for_vm(vm_name, redis)
779
+ raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
674
780
 
675
- provider = get_provider_for_pool(pool_name)
676
- raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
781
+ provider = get_provider_for_pool(pool_name)
782
+ raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
677
783
 
678
- create_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
679
- rescue StandardError => e
680
- $logger.log('s', "[!] [snapshot_manager] snapshot create appears to have failed: #{e}")
784
+ create_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
785
+ rescue StandardError => e
786
+ $logger.log('s', "[!] [snapshot_manager] snapshot create appears to have failed: #{e}")
787
+ end
681
788
  end
682
- end
683
789
 
684
- task_detail = $redis.spop('vmpooler__tasks__snapshot-revert')
790
+ task_detail = redis.spop('vmpooler__tasks__snapshot-revert')
685
791
 
686
- unless task_detail.nil?
687
- begin
688
- vm_name, snapshot_name = task_detail.split(':')
689
- pool_name = get_pool_name_for_vm(vm_name)
690
- raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
792
+ unless task_detail.nil?
793
+ begin
794
+ vm_name, snapshot_name = task_detail.split(':')
795
+ pool_name = get_pool_name_for_vm(vm_name, redis)
796
+ raise("Unable to determine which pool #{vm_name} is a member of") if pool_name.nil?
691
797
 
692
- provider = get_provider_for_pool(pool_name)
693
- raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
798
+ provider = get_provider_for_pool(pool_name)
799
+ raise("Missing Provider for vm #{vm_name} in pool #{pool_name}") if provider.nil?
694
800
 
695
- revert_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
696
- rescue StandardError => e
697
- $logger.log('s', "[!] [snapshot_manager] snapshot revert appears to have failed: #{e}")
801
+ revert_vm_snapshot(pool_name, vm_name, snapshot_name, provider)
802
+ rescue StandardError => e
803
+ $logger.log('s', "[!] [snapshot_manager] snapshot revert appears to have failed: #{e}")
804
+ end
698
805
  end
699
806
  end
700
807
  end
@@ -704,7 +811,9 @@ module Vmpooler
704
811
  begin
705
812
  mutex = vm_mutex(vm_name)
706
813
  mutex.synchronize do
707
- $redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
814
+ @redis.with_metrics do |redis|
815
+ redis.srem("vmpooler__migrating__#{pool_name}", vm_name)
816
+ end
708
817
  provider.migrate_vm(pool_name, vm_name)
709
818
  end
710
819
  rescue StandardError => e
@@ -737,47 +846,65 @@ module Vmpooler
737
846
  wakeup_by = Time.now + wakeup_period
738
847
  return if time_passed?(:exit_by, exit_by)
739
848
 
740
- initial_ready_size = $redis.scard("vmpooler__ready__#{options[:poolname]}") if options[:pool_size_change]
849
+ @redis.with_metrics do |redis|
850
+ initial_ready_size = redis.scard("vmpooler__ready__#{options[:poolname]}") if options[:pool_size_change]
741
851
 
742
- initial_clone_target = $redis.hget("vmpooler__pool__#{options[:poolname]}", options[:clone_target]) if options[:clone_target_change]
852
+ initial_clone_target = redis.hget("vmpooler__pool__#{options[:poolname]}", options[:clone_target]) if options[:clone_target_change]
743
853
 
744
- initial_template = $redis.hget('vmpooler__template__prepared', options[:poolname]) if options[:pool_template_change]
854
+ initial_template = redis.hget('vmpooler__template__prepared', options[:poolname]) if options[:pool_template_change]
745
855
 
746
- loop do
747
- sleep(1)
748
- break if time_passed?(:exit_by, exit_by)
856
+ loop do
857
+ sleep(1)
858
+ break if time_passed?(:exit_by, exit_by)
749
859
 
750
- # Check for wakeup events
751
- if time_passed?(:wakeup_by, wakeup_by)
752
- wakeup_by = Time.now + wakeup_period
860
+ # Check for wakeup events
861
+ if time_passed?(:wakeup_by, wakeup_by)
862
+ wakeup_by = Time.now + wakeup_period
753
863
 
754
- # Wakeup if the number of ready VMs has changed
755
- if options[:pool_size_change]
756
- ready_size = $redis.scard("vmpooler__ready__#{options[:poolname]}")
757
- break unless ready_size == initial_ready_size
758
- end
864
+ # Wakeup if the number of ready VMs has changed
865
+ if options[:pool_size_change]
866
+ ready_size = redis.scard("vmpooler__ready__#{options[:poolname]}")
867
+ break unless ready_size == initial_ready_size
868
+ end
759
869
 
760
- if options[:clone_target_change]
761
- clone_target = $redis.hget('vmpooler__config__clone_target}', options[:poolname])
762
- if clone_target
763
- break unless clone_target == initial_clone_target
870
+ if options[:clone_target_change]
871
+ clone_target = redis.hget('vmpooler__config__clone_target}', options[:poolname])
872
+ if clone_target
873
+ break unless clone_target == initial_clone_target
874
+ end
764
875
  end
765
- end
766
876
 
767
- if options[:pool_template_change]
768
- configured_template = $redis.hget('vmpooler__config__template', options[:poolname])
769
- if configured_template
770
- break unless initial_template == configured_template
877
+ if options[:pool_template_change]
878
+ configured_template = redis.hget('vmpooler__config__template', options[:poolname])
879
+ if configured_template
880
+ break unless initial_template == configured_template
881
+ end
882
+ end
883
+
884
+ if options[:pool_reset]
885
+ pending = redis.sismember('vmpooler__poolreset', options[:poolname])
886
+ break if pending
771
887
  end
772
- end
773
888
 
774
- if options[:pool_reset]
775
- break if $redis.sismember('vmpooler__poolreset', options[:poolname])
889
+ if options[:pending_vm]
890
+ pending_vm_count = redis.scard("vmpooler__pending__#{options[:poolname]}")
891
+ break unless pending_vm_count == 0
892
+ end
893
+
894
+ if options[:ondemand_request]
895
+ redis.multi
896
+ redis.zcard('vmpooler__provisioning__request')
897
+ redis.zcard('vmpooler__provisioning__processing')
898
+ redis.zcard('vmpooler__odcreate__task')
899
+ od_request, od_processing, od_createtask = redis.exec
900
+ break unless od_request == 0
901
+ break unless od_processing == 0
902
+ break unless od_createtask == 0
903
+ end
776
904
  end
777
905
 
906
+ break if time_passed?(:exit_by, exit_by)
778
907
  end
779
-
780
- break if time_passed?(:exit_by, exit_by)
781
908
  end
782
909
  end
783
910
 
@@ -813,7 +940,7 @@ module Vmpooler
813
940
  loop_delay = (loop_delay * loop_delay_decay).to_i
814
941
  loop_delay = loop_delay_max if loop_delay > loop_delay_max
815
942
  end
816
- sleep_with_wakeup_events(loop_delay, loop_delay_min, pool_size_change: true, poolname: pool['name'], pool_template_change: true, clone_target_change: true, pool_reset: true)
943
+ sleep_with_wakeup_events(loop_delay, loop_delay_min, pool_size_change: true, poolname: pool['name'], pool_template_change: true, clone_target_change: true, pending_vm: true, pool_reset: true)
817
944
 
818
945
  unless maxloop == 0
819
946
  break if loop_count >= maxloop
@@ -843,77 +970,84 @@ module Vmpooler
843
970
  end
844
971
 
845
972
  def sync_pool_template(pool)
846
- pool_template = $redis.hget('vmpooler__config__template', pool['name'])
847
- if pool_template
848
- pool['template'] = pool_template unless pool['template'] == pool_template
973
+ @redis.with_metrics do |redis|
974
+ pool_template = redis.hget('vmpooler__config__template', pool['name'])
975
+ if pool_template
976
+ pool['template'] = pool_template unless pool['template'] == pool_template
977
+ end
849
978
  end
850
979
  end
851
980
 
852
- def prepare_template(pool, provider)
981
+ def prepare_template(pool, provider, redis)
853
982
  if $config[:config]['create_template_delta_disks']
854
- unless $redis.sismember('vmpooler__template__deltas', pool['template'])
983
+ unless redis.sismember('vmpooler__template__deltas', pool['template'])
855
984
  begin
856
985
  provider.create_template_delta_disks(pool)
857
- $redis.sadd('vmpooler__template__deltas', pool['template'])
986
+ redis.sadd('vmpooler__template__deltas', pool['template'])
858
987
  rescue StandardError => e
859
988
  $logger.log('s', "[!] [#{pool['name']}] failed while preparing a template with an error. As a result vmpooler could not create the template delta disks. Either a template delta disk already exists, or the template delta disk creation failed. The error is: #{e}")
860
989
  end
861
990
  end
862
991
  end
863
- $redis.hset('vmpooler__template__prepared', pool['name'], pool['template'])
992
+ redis.hset('vmpooler__template__prepared', pool['name'], pool['template'])
864
993
  end
865
994
 
866
995
  def evaluate_template(pool, provider)
867
996
  mutex = pool_mutex(pool['name'])
868
- prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
869
- configured_template = $redis.hget('vmpooler__config__template', pool['name'])
870
997
  return if mutex.locked?
871
998
 
872
- if prepared_template.nil?
873
- mutex.synchronize do
874
- prepare_template(pool, provider)
875
- prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
876
- end
877
- elsif prepared_template != pool['template']
878
- if configured_template.nil?
999
+ catch :update_not_needed do
1000
+ @redis.with_metrics do |redis|
1001
+ prepared_template = redis.hget('vmpooler__template__prepared', pool['name'])
1002
+ configured_template = redis.hget('vmpooler__config__template', pool['name'])
1003
+
1004
+ if prepared_template.nil?
1005
+ mutex.synchronize do
1006
+ prepare_template(pool, provider, redis)
1007
+ prepared_template = redis.hget('vmpooler__template__prepared', pool['name'])
1008
+ end
1009
+ elsif prepared_template != pool['template']
1010
+ if configured_template.nil?
1011
+ mutex.synchronize do
1012
+ prepare_template(pool, provider, redis)
1013
+ prepared_template = redis.hget('vmpooler__template__prepared', pool['name'])
1014
+ end
1015
+ end
1016
+ end
1017
+ throw :update_not_needed if configured_template.nil?
1018
+ throw :update_not_needed if configured_template == prepared_template
1019
+
879
1020
  mutex.synchronize do
880
- prepare_template(pool, provider)
881
- prepared_template = $redis.hget('vmpooler__template__prepared', pool['name'])
1021
+ update_pool_template(pool, provider, configured_template, prepared_template, redis)
882
1022
  end
883
1023
  end
884
1024
  end
885
- return if configured_template.nil?
886
- return if configured_template == prepared_template
887
-
888
- mutex.synchronize do
889
- update_pool_template(pool, provider, configured_template, prepared_template)
890
- end
891
1025
  end
892
1026
 
893
- def drain_pool(poolname)
1027
+ def drain_pool(poolname, redis)
894
1028
  # Clear a pool of ready and pending instances
895
- if $redis.scard("vmpooler__ready__#{poolname}") > 0
1029
+ if redis.scard("vmpooler__ready__#{poolname}") > 0
896
1030
  $logger.log('s', "[*] [#{poolname}] removing ready instances")
897
- $redis.smembers("vmpooler__ready__#{poolname}").each do |vm|
898
- move_vm_queue(poolname, vm, 'ready', 'completed')
1031
+ redis.smembers("vmpooler__ready__#{poolname}").each do |vm|
1032
+ move_vm_queue(poolname, vm, 'ready', 'completed', redis)
899
1033
  end
900
1034
  end
901
- if $redis.scard("vmpooler__pending__#{poolname}") > 0
1035
+ if redis.scard("vmpooler__pending__#{poolname}") > 0
902
1036
  $logger.log('s', "[*] [#{poolname}] removing pending instances")
903
- $redis.smembers("vmpooler__pending__#{poolname}").each do |vm|
904
- move_vm_queue(poolname, vm, 'pending', 'completed')
1037
+ redis.smembers("vmpooler__pending__#{poolname}").each do |vm|
1038
+ move_vm_queue(poolname, vm, 'pending', 'completed', redis)
905
1039
  end
906
1040
  end
907
1041
  end
908
1042
 
909
- def update_pool_template(pool, provider, configured_template, prepared_template)
1043
+ def update_pool_template(pool, provider, configured_template, prepared_template, redis)
910
1044
  pool['template'] = configured_template
911
1045
  $logger.log('s', "[*] [#{pool['name']}] template updated from #{prepared_template} to #{configured_template}")
912
1046
  # Remove all ready and pending VMs so new instances are created from the new template
913
- drain_pool(pool['name'])
1047
+ drain_pool(pool['name'], redis)
914
1048
  # Prepare template for deployment
915
1049
  $logger.log('s', "[*] [#{pool['name']}] preparing pool template for deployment")
916
- prepare_template(pool, provider)
1050
+ prepare_template(pool, provider, redis)
917
1051
  $logger.log('s', "[*] [#{pool['name']}] is ready for use")
918
1052
  end
919
1053
 
@@ -921,38 +1055,45 @@ module Vmpooler
921
1055
  mutex = pool_mutex(pool['name'])
922
1056
  return if mutex.locked?
923
1057
 
924
- clone_target = $redis.hget('vmpooler__config__clone_target', pool['name'])
925
- return if clone_target.nil?
926
- return if clone_target == pool['clone_target']
1058
+ @redis.with_metrics do |redis|
1059
+ clone_target = redis.hget('vmpooler__config__clone_target', pool['name'])
1060
+ break if clone_target.nil?
1061
+ break if clone_target == pool['clone_target']
927
1062
 
928
- $logger.log('s', "[*] [#{pool['name']}] clone updated from #{pool['clone_target']} to #{clone_target}")
929
- mutex.synchronize do
930
- pool['clone_target'] = clone_target
931
- # Remove all ready and pending VMs so new instances are created for the new clone_target
932
- drain_pool(pool['name'])
1063
+ $logger.log('s', "[*] [#{pool['name']}] clone updated from #{pool['clone_target']} to #{clone_target}")
1064
+ mutex.synchronize do
1065
+ pool['clone_target'] = clone_target
1066
+ # Remove all ready and pending VMs so new instances are created for the new clone_target
1067
+ drain_pool(pool['name'], redis)
1068
+ end
1069
+ $logger.log('s', "[*] [#{pool['name']}] is ready for use")
933
1070
  end
934
- $logger.log('s', "[*] [#{pool['name']}] is ready for use")
935
1071
  end
936
1072
 
937
1073
  def remove_excess_vms(pool)
938
- ready = $redis.scard("vmpooler__ready__#{pool['name']}")
939
- total = $redis.scard("vmpooler__pending__#{pool['name']}") + ready
940
- return if total.nil?
941
- return if total == 0
1074
+ @redis.with_metrics do |redis|
1075
+ redis.multi
1076
+ redis.scard("vmpooler__ready__#{pool['name']}")
1077
+ redis.scard("vmpooler__pending__#{pool['name']}")
1078
+ ready, pending = redis.exec
1079
+ total = pending.to_i + ready.to_i
1080
+ break if total.nil?
1081
+ break if total == 0
942
1082
 
943
- mutex = pool_mutex(pool['name'])
944
- return if mutex.locked?
945
- return unless ready > pool['size']
1083
+ mutex = pool_mutex(pool['name'])
1084
+ break if mutex.locked?
1085
+ break unless ready.to_i > pool['size']
946
1086
 
947
- mutex.synchronize do
948
- difference = ready - pool['size']
949
- difference.times do
950
- next_vm = $redis.spop("vmpooler__ready__#{pool['name']}")
951
- move_vm_queue(pool['name'], next_vm, 'ready', 'completed')
952
- end
953
- if total > ready
954
- $redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
955
- move_vm_queue(pool['name'], vm, 'pending', 'completed')
1087
+ mutex.synchronize do
1088
+ difference = ready.to_i - pool['size']
1089
+ difference.times do
1090
+ next_vm = redis.spop("vmpooler__ready__#{pool['name']}")
1091
+ move_vm_queue(pool['name'], next_vm, 'ready', 'completed', redis)
1092
+ end
1093
+ if total > ready
1094
+ redis.smembers("vmpooler__pending__#{pool['name']}").each do |vm|
1095
+ move_vm_queue(pool['name'], vm, 'pending', 'completed', redis)
1096
+ end
956
1097
  end
957
1098
  end
958
1099
  end
@@ -962,26 +1103,30 @@ module Vmpooler
962
1103
  mutex = pool_mutex(pool['name'])
963
1104
  return if mutex.locked?
964
1105
 
965
- poolsize = $redis.hget('vmpooler__config__poolsize', pool['name'])
966
- return if poolsize.nil?
1106
+ @redis.with_metrics do |redis|
1107
+ poolsize = redis.hget('vmpooler__config__poolsize', pool['name'])
1108
+ break if poolsize.nil?
967
1109
 
968
- poolsize = Integer(poolsize)
969
- return if poolsize == pool['size']
1110
+ poolsize = Integer(poolsize)
1111
+ break if poolsize == pool['size']
970
1112
 
971
- mutex.synchronize do
972
- pool['size'] = poolsize
1113
+ mutex.synchronize do
1114
+ pool['size'] = poolsize
1115
+ end
973
1116
  end
974
1117
  end
975
1118
 
976
1119
  def reset_pool(pool)
977
1120
  poolname = pool['name']
978
- return unless $redis.sismember('vmpooler__poolreset', poolname)
1121
+ @redis.with_metrics do |redis|
1122
+ break unless redis.sismember('vmpooler__poolreset', poolname)
979
1123
 
980
- $redis.srem('vmpooler__poolreset', poolname)
981
- mutex = pool_mutex(poolname)
982
- mutex.synchronize do
983
- drain_pool(poolname)
984
- $logger.log('s', "[*] [#{poolname}] reset has cleared ready and pending instances")
1124
+ redis.srem('vmpooler__poolreset', poolname)
1125
+ mutex = pool_mutex(poolname)
1126
+ mutex.synchronize do
1127
+ drain_pool(poolname, redis)
1128
+ $logger.log('s', "[*] [#{poolname}] reset has cleared ready and pending instances")
1129
+ end
985
1130
  end
986
1131
  end
987
1132
 
@@ -990,21 +1135,23 @@ module Vmpooler
990
1135
  begin
991
1136
  mutex = pool_mutex(pool['name'])
992
1137
  mutex.synchronize do
993
- provider.vms_in_pool(pool['name']).each do |vm|
994
- if !$redis.sismember('vmpooler__running__' + pool['name'], vm['name']) &&
995
- !$redis.sismember('vmpooler__ready__' + pool['name'], vm['name']) &&
996
- !$redis.sismember('vmpooler__pending__' + pool['name'], vm['name']) &&
997
- !$redis.sismember('vmpooler__completed__' + pool['name'], vm['name']) &&
998
- !$redis.sismember('vmpooler__discovered__' + pool['name'], vm['name']) &&
999
- !$redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])
1000
-
1001
- pool_check_response[:discovered_vms] += 1
1002
- $redis.sadd('vmpooler__discovered__' + pool['name'], vm['name'])
1003
-
1004
- $logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue")
1138
+ @redis.with_metrics do |redis|
1139
+ provider.vms_in_pool(pool['name']).each do |vm|
1140
+ if !redis.sismember('vmpooler__running__' + pool['name'], vm['name']) &&
1141
+ !redis.sismember('vmpooler__ready__' + pool['name'], vm['name']) &&
1142
+ !redis.sismember('vmpooler__pending__' + pool['name'], vm['name']) &&
1143
+ !redis.sismember('vmpooler__completed__' + pool['name'], vm['name']) &&
1144
+ !redis.sismember('vmpooler__discovered__' + pool['name'], vm['name']) &&
1145
+ !redis.sismember('vmpooler__migrating__' + pool['name'], vm['name'])
1146
+
1147
+ pool_check_response[:discovered_vms] += 1
1148
+ redis.sadd('vmpooler__discovered__' + pool['name'], vm['name'])
1149
+
1150
+ $logger.log('s', "[?] [#{pool['name']}] '#{vm['name']}' added to 'discovered' queue")
1151
+ end
1152
+
1153
+ inventory[vm['name']] = 1
1005
1154
  end
1006
-
1007
- inventory[vm['name']] = 1
1008
1155
  end
1009
1156
  end
1010
1157
  rescue StandardError => e
@@ -1015,96 +1162,112 @@ module Vmpooler
1015
1162
  end
1016
1163
 
1017
1164
  def check_running_pool_vms(pool_name, provider, pool_check_response, inventory)
1018
- $redis.smembers("vmpooler__running__#{pool_name}").each do |vm|
1019
- if inventory[vm]
1020
- begin
1021
- vm_lifetime = $redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12
1022
- pool_check_response[:checked_running_vms] += 1
1023
- check_running_vm(vm, pool_name, vm_lifetime, provider)
1024
- rescue StandardError => e
1025
- $logger.log('d', "[!] [#{pool_name}] _check_pool with an error while evaluating running VMs: #{e}")
1165
+ @redis.with_metrics do |redis|
1166
+ redis.smembers("vmpooler__running__#{pool_name}").each do |vm|
1167
+ if inventory[vm]
1168
+ begin
1169
+ vm_lifetime = redis.hget('vmpooler__vm__' + vm, 'lifetime') || $config[:config]['vm_lifetime'] || 12
1170
+ pool_check_response[:checked_running_vms] += 1
1171
+ check_running_vm(vm, pool_name, vm_lifetime, provider)
1172
+ rescue StandardError => e
1173
+ $logger.log('d', "[!] [#{pool_name}] _check_pool with an error while evaluating running VMs: #{e}")
1174
+ end
1175
+ else
1176
+ move_vm_queue(pool_name, vm, 'running', 'completed', redis, 'is a running VM but is missing from inventory. Marking as completed.')
1026
1177
  end
1027
- else
1028
- move_vm_queue(pool_name, vm, 'running', 'completed', 'is a running VM but is missing from inventory. Marking as completed.')
1029
1178
  end
1030
1179
  end
1031
1180
  end
1032
1181
 
1033
- def check_ready_pool_vms(pool_name, provider, pool_check_response, inventory, pool_ttl = 0)
1034
- $redis.smembers("vmpooler__ready__#{pool_name}").each do |vm|
1035
- if inventory[vm]
1036
- begin
1037
- pool_check_response[:checked_ready_vms] += 1
1038
- check_ready_vm(vm, pool_name, pool_ttl || 0, provider)
1039
- rescue StandardError => e
1040
- $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating ready VMs: #{e}")
1182
+ def check_ready_pool_vms(pool_name, provider, pool_check_response, inventory, pool_ttl)
1183
+ @redis.with_metrics do |redis|
1184
+ redis.smembers("vmpooler__ready__#{pool_name}").each do |vm|
1185
+ if inventory[vm]
1186
+ begin
1187
+ pool_check_response[:checked_ready_vms] += 1
1188
+ check_ready_vm(vm, pool_name, pool_ttl, provider)
1189
+ rescue StandardError => e
1190
+ $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating ready VMs: #{e}")
1191
+ end
1192
+ else
1193
+ move_vm_queue(pool_name, vm, 'ready', 'completed', redis, 'is a ready VM but is missing from inventory. Marking as completed.')
1041
1194
  end
1042
- else
1043
- move_vm_queue(pool_name, vm, 'ready', 'completed', 'is a ready VM but is missing from inventory. Marking as completed.')
1044
1195
  end
1045
1196
  end
1046
1197
  end
1047
1198
 
1048
- def check_pending_pool_vms(pool_name, provider, pool_check_response, inventory, pool_timeout = nil)
1199
+ def check_pending_pool_vms(pool_name, provider, pool_check_response, inventory, pool_timeout)
1049
1200
  pool_timeout ||= $config[:config]['timeout'] || 15
1050
- $redis.smembers("vmpooler__pending__#{pool_name}").reverse.each do |vm|
1051
- if inventory[vm]
1052
- begin
1053
- pool_check_response[:checked_pending_vms] += 1
1054
- check_pending_vm(vm, pool_name, pool_timeout, provider)
1055
- rescue StandardError => e
1056
- $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating pending VMs: #{e}")
1201
+ @redis.with_metrics do |redis|
1202
+ redis.smembers("vmpooler__pending__#{pool_name}").reverse.each do |vm|
1203
+ if inventory[vm]
1204
+ begin
1205
+ pool_check_response[:checked_pending_vms] += 1
1206
+ check_pending_vm(vm, pool_name, pool_timeout, provider)
1207
+ rescue StandardError => e
1208
+ $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating pending VMs: #{e}")
1209
+ end
1210
+ else
1211
+ fail_pending_vm(vm, pool_name, pool_timeout, redis, false)
1057
1212
  end
1058
- else
1059
- fail_pending_vm(vm, pool_name, pool_timeout, false)
1060
1213
  end
1061
1214
  end
1062
1215
  end
1063
1216
 
1064
1217
  def check_completed_pool_vms(pool_name, provider, pool_check_response, inventory)
1065
- $redis.smembers("vmpooler__completed__#{pool_name}").each do |vm|
1066
- if inventory[vm]
1067
- begin
1068
- pool_check_response[:destroyed_vms] += 1
1069
- destroy_vm(vm, pool_name, provider)
1070
- rescue StandardError => e
1071
- $redis.srem("vmpooler__completed__#{pool_name}", vm)
1072
- $redis.hdel("vmpooler__active__#{pool_name}", vm)
1073
- $redis.del("vmpooler__vm__#{vm}")
1074
- $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating completed VMs: #{e}")
1218
+ @redis.with_metrics do |redis|
1219
+ redis.smembers("vmpooler__completed__#{pool_name}").each do |vm|
1220
+ if inventory[vm]
1221
+ begin
1222
+ pool_check_response[:destroyed_vms] += 1
1223
+ destroy_vm(vm, pool_name, provider)
1224
+ rescue StandardError => e
1225
+ redis.pipelined do
1226
+ redis.srem("vmpooler__completed__#{pool_name}", vm)
1227
+ redis.hdel("vmpooler__active__#{pool_name}", vm)
1228
+ redis.del("vmpooler__vm__#{vm}")
1229
+ end
1230
+ $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating completed VMs: #{e}")
1231
+ end
1232
+ else
1233
+ $logger.log('s', "[!] [#{pool_name}] '#{vm}' not found in inventory, removed from 'completed' queue")
1234
+ redis.pipelined do
1235
+ redis.srem("vmpooler__completed__#{pool_name}", vm)
1236
+ redis.hdel("vmpooler__active__#{pool_name}", vm)
1237
+ redis.del("vmpooler__vm__#{vm}")
1238
+ end
1075
1239
  end
1076
- else
1077
- $logger.log('s', "[!] [#{pool_name}] '#{vm}' not found in inventory, removed from 'completed' queue")
1078
- $redis.srem("vmpooler__completed__#{pool_name}", vm)
1079
- $redis.hdel("vmpooler__active__#{pool_name}", vm)
1080
- $redis.del("vmpooler__vm__#{vm}")
1081
1240
  end
1082
1241
  end
1083
1242
  end
1084
1243
 
1085
1244
  def check_discovered_pool_vms(pool_name)
1086
- $redis.smembers("vmpooler__discovered__#{pool_name}").reverse.each do |vm|
1087
- %w[pending ready running completed].each do |queue|
1088
- if $redis.sismember("vmpooler__#{queue}__#{pool_name}", vm)
1089
- $logger.log('d', "[!] [#{pool_name}] '#{vm}' found in '#{queue}', removed from 'discovered' queue")
1090
- $redis.srem("vmpooler__discovered__#{pool_name}", vm)
1245
+ @redis.with_metrics do |redis|
1246
+ redis.smembers("vmpooler__discovered__#{pool_name}").reverse.each do |vm|
1247
+ %w[pending ready running completed].each do |queue|
1248
+ if redis.sismember("vmpooler__#{queue}__#{pool_name}", vm)
1249
+ $logger.log('d', "[!] [#{pool_name}] '#{vm}' found in '#{queue}', removed from 'discovered' queue")
1250
+ redis.srem("vmpooler__discovered__#{pool_name}", vm)
1251
+ end
1091
1252
  end
1092
- end
1093
1253
 
1094
- $redis.smove("vmpooler__discovered__#{pool_name}", "vmpooler__completed__#{pool_name}", vm) if $redis.sismember("vmpooler__discovered__#{pool_name}", vm)
1254
+ redis.smove("vmpooler__discovered__#{pool_name}", "vmpooler__completed__#{pool_name}", vm) if redis.sismember("vmpooler__discovered__#{pool_name}", vm)
1255
+ end
1095
1256
  end
1096
1257
  rescue StandardError => e
1097
1258
  $logger.log('d', "[!] [#{pool_name}] _check_pool failed with an error while evaluating discovered VMs: #{e}")
1098
1259
  end
1099
1260
 
1100
1261
  def check_migrating_pool_vms(pool_name, provider, pool_check_response, inventory)
1101
- $redis.smembers("vmpooler__migrating__#{pool_name}").reverse.each do |vm|
1102
- if inventory[vm]
1103
- begin
1104
- pool_check_response[:migrated_vms] += 1
1105
- migrate_vm(vm, pool_name, provider)
1106
- rescue StandardError => e
1107
- $logger.log('s', "[x] [#{pool_name}] '#{vm}' failed to migrate: #{e}")
1262
+ @redis.with_metrics do |redis|
1263
+ redis.smembers("vmpooler__migrating__#{pool_name}").reverse.each do |vm|
1264
+ if inventory[vm]
1265
+ begin
1266
+ pool_check_response[:migrated_vms] += 1
1267
+ migrate_vm(vm, pool_name, provider)
1268
+ rescue StandardError => e
1269
+ $logger.log('s', "[x] [#{pool_name}] '#{vm}' failed to migrate: #{e}")
1270
+ end
1108
1271
  end
1109
1272
  end
1110
1273
  end
@@ -1113,29 +1276,37 @@ module Vmpooler
1113
1276
  def repopulate_pool_vms(pool_name, provider, pool_check_response, pool_size)
1114
1277
  return if pool_mutex(pool_name).locked?
1115
1278
 
1116
- ready = $redis.scard("vmpooler__ready__#{pool_name}")
1117
- total = $redis.scard("vmpooler__pending__#{pool_name}") + ready
1118
-
1119
- $metrics.gauge("ready.#{pool_name}", $redis.scard("vmpooler__ready__#{pool_name}"))
1120
- $metrics.gauge("running.#{pool_name}", $redis.scard("vmpooler__running__#{pool_name}"))
1121
-
1122
- if $redis.get("vmpooler__empty__#{pool_name}")
1123
- $redis.del("vmpooler__empty__#{pool_name}") unless ready == 0
1124
- elsif ready == 0
1125
- $redis.set("vmpooler__empty__#{pool_name}", 'true')
1126
- $logger.log('s', "[!] [#{pool_name}] is empty")
1127
- end
1279
+ @redis.with_metrics do |redis|
1280
+ redis.multi
1281
+ redis.scard("vmpooler__ready__#{pool_name}")
1282
+ redis.scard("vmpooler__pending__#{pool_name}")
1283
+ redis.scard("vmpooler__running__#{pool_name}")
1284
+ ready, pending, running = redis.exec
1285
+ total = pending.to_i + ready.to_i
1286
+
1287
+ $metrics.gauge("ready.#{pool_name}", ready)
1288
+ $metrics.gauge("running.#{pool_name}", running)
1289
+
1290
+ unless pool_size == 0
1291
+ if redis.get("vmpooler__empty__#{pool_name}")
1292
+ redis.del("vmpooler__empty__#{pool_name}") unless ready == 0
1293
+ elsif ready == 0
1294
+ redis.set("vmpooler__empty__#{pool_name}", 'true')
1295
+ $logger.log('s', "[!] [#{pool_name}] is empty")
1296
+ end
1297
+ end
1128
1298
 
1129
- (pool_size - total).times do
1130
- if $redis.get('vmpooler__tasks__clone').to_i < $config[:config]['task_limit'].to_i
1131
- begin
1132
- $redis.incr('vmpooler__tasks__clone')
1133
- pool_check_response[:cloned_vms] += 1
1134
- clone_vm(pool_name, provider)
1135
- rescue StandardError => e
1136
- $logger.log('s', "[!] [#{pool_name}] clone failed during check_pool with an error: #{e}")
1137
- $redis.decr('vmpooler__tasks__clone')
1138
- raise
1299
+ (pool_size - total.to_i).times do
1300
+ if redis.get('vmpooler__tasks__clone').to_i < $config[:config]['task_limit'].to_i
1301
+ begin
1302
+ redis.incr('vmpooler__tasks__clone')
1303
+ pool_check_response[:cloned_vms] += 1
1304
+ clone_vm(pool_name, provider)
1305
+ rescue StandardError => e
1306
+ $logger.log('s', "[!] [#{pool_name}] clone failed during check_pool with an error: #{e}")
1307
+ redis.decr('vmpooler__tasks__clone')
1308
+ raise
1309
+ end
1139
1310
  end
1140
1311
  end
1141
1312
  end
@@ -1160,7 +1331,7 @@ module Vmpooler
1160
1331
 
1161
1332
  check_running_pool_vms(pool['name'], provider, pool_check_response, inventory)
1162
1333
 
1163
- check_ready_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['ready_ttl'])
1334
+ check_ready_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['ready_ttl'] || $config[:config]['ready_ttl'])
1164
1335
 
1165
1336
  check_pending_pool_vms(pool['name'], provider, pool_check_response, inventory, pool['timeout'])
1166
1337
 
@@ -1203,23 +1374,199 @@ module Vmpooler
1203
1374
  #
1204
1375
  # returns an object Vmpooler::PoolManager::Provider::*
1205
1376
  # or raises an error if the class does not exist
1206
- def create_provider_object(config, logger, metrics, provider_class, provider_name, options)
1377
+ def create_provider_object(config, logger, metrics, redis_connection_pool, provider_class, provider_name, options)
1207
1378
  provider_klass = Vmpooler::PoolManager::Provider
1208
1379
  provider_klass.constants.each do |classname|
1209
1380
  next unless classname.to_s.casecmp(provider_class) == 0
1210
1381
 
1211
- return provider_klass.const_get(classname).new(config, logger, metrics, provider_name, options)
1382
+ return provider_klass.const_get(classname).new(config, logger, metrics, redis_connection_pool, provider_name, options)
1212
1383
  end
1213
1384
  raise("Provider '#{provider_class}' is unknown for pool with provider name '#{provider_name}'") if provider.nil?
1214
1385
  end
1215
1386
 
1387
+ def check_ondemand_requests(maxloop = 0,
1388
+ loop_delay_min = CHECK_LOOP_DELAY_MIN_DEFAULT,
1389
+ loop_delay_max = CHECK_LOOP_DELAY_MAX_DEFAULT,
1390
+ loop_delay_decay = CHECK_LOOP_DELAY_DECAY_DEFAULT)
1391
+
1392
+ $logger.log('d', '[*] [ondemand_provisioner] starting worker thread')
1393
+
1394
+ $threads['ondemand_provisioner'] = Thread.new do
1395
+ _check_ondemand_requests(maxloop, loop_delay_min, loop_delay_max, loop_delay_decay)
1396
+ end
1397
+ end
1398
+
1399
+ def _check_ondemand_requests(maxloop = 0,
1400
+ loop_delay_min = CHECK_LOOP_DELAY_MIN_DEFAULT,
1401
+ loop_delay_max = CHECK_LOOP_DELAY_MAX_DEFAULT,
1402
+ loop_delay_decay = CHECK_LOOP_DELAY_DECAY_DEFAULT)
1403
+
1404
+ loop_delay_min = $config[:config]['check_loop_delay_min'] unless $config[:config]['check_loop_delay_min'].nil?
1405
+ loop_delay_max = $config[:config]['check_loop_delay_max'] unless $config[:config]['check_loop_delay_max'].nil?
1406
+ loop_delay_decay = $config[:config]['check_loop_delay_decay'] unless $config[:config]['check_loop_delay_decay'].nil?
1407
+
1408
+ loop_delay_decay = 2.0 if loop_delay_decay <= 1.0
1409
+ loop_delay_max = loop_delay_min if loop_delay_max.nil? || loop_delay_max < loop_delay_min
1410
+
1411
+ loop_count = 1
1412
+ loop_delay = loop_delay_min
1413
+
1414
+ loop do
1415
+ result = process_ondemand_requests
1416
+
1417
+ loop_delay = (loop_delay * loop_delay_decay).to_i
1418
+ loop_delay = loop_delay_min if result > 0
1419
+ loop_delay = loop_delay_max if loop_delay > loop_delay_max
1420
+ sleep_with_wakeup_events(loop_delay, loop_delay_min, ondemand_request: true)
1421
+
1422
+ unless maxloop == 0
1423
+ break if loop_count >= maxloop
1424
+
1425
+ loop_count += 1
1426
+ end
1427
+ end
1428
+ end
1429
+
1430
+ def process_ondemand_requests
1431
+ @redis.with_metrics do |redis|
1432
+ requests = redis.zrange('vmpooler__provisioning__request', 0, -1)
1433
+ requests&.map { |request_id| create_ondemand_vms(request_id, redis) }
1434
+ provisioning_tasks = process_ondemand_vms(redis)
1435
+ requests_ready = check_ondemand_requests_ready(redis)
1436
+ requests.length + provisioning_tasks + requests_ready
1437
+ end
1438
+ end
1439
+
1440
+ def create_ondemand_vms(request_id, redis)
1441
+ requested = redis.hget("vmpooler__odrequest__#{request_id}", 'requested')
1442
+ unless requested
1443
+ $logger.log('s', "Failed to find odrequest for request_id '#{request_id}'")
1444
+ redis.zrem('vmpooler__provisioning__request', request_id)
1445
+ return
1446
+ end
1447
+ score = redis.zscore('vmpooler__provisioning__request', request_id)
1448
+ requested = requested.split(',')
1449
+
1450
+ redis.pipelined do
1451
+ requested.each do |request|
1452
+ redis.zadd('vmpooler__odcreate__task', Time.now.to_i, "#{request}:#{request_id}")
1453
+ end
1454
+ redis.zrem('vmpooler__provisioning__request', request_id)
1455
+ redis.zadd('vmpooler__provisioning__processing', score, request_id)
1456
+ end
1457
+ end
1458
+
1459
+ def process_ondemand_vms(redis)
1460
+ queue_key = 'vmpooler__odcreate__task'
1461
+ queue = redis.zrange(queue_key, 0, -1, with_scores: true)
1462
+ ondemand_clone_limit = $config[:config]['ondemand_clone_limit']
1463
+ queue.each do |request, score|
1464
+ clone_count = redis.get('vmpooler__tasks__ondemandclone').to_i
1465
+ break unless clone_count < ondemand_clone_limit
1466
+
1467
+ pool_alias, pool, count, request_id = request.split(':')
1468
+ count = count.to_i
1469
+ provider = get_provider_for_pool(pool)
1470
+ slots = ondemand_clone_limit - clone_count
1471
+ break if slots == 0
1472
+
1473
+ if slots >= count
1474
+ count.times do
1475
+ redis.incr('vmpooler__tasks__ondemandclone')
1476
+ clone_vm(pool, provider, request_id, pool_alias)
1477
+ end
1478
+ redis.zrem(queue_key, request)
1479
+ else
1480
+ remaining_count = count - slots
1481
+ slots.times do
1482
+ redis.incr('vmpooler__tasks__ondemandclone')
1483
+ clone_vm(pool, provider, request_id, pool_alias)
1484
+ end
1485
+ redis.pipelined do
1486
+ redis.zrem(queue_key, request)
1487
+ redis.zadd(queue_key, score, "#{pool_alias}:#{pool}:#{remaining_count}:#{request_id}")
1488
+ end
1489
+ end
1490
+ end
1491
+ queue.length
1492
+ end
1493
+
1494
+ def vms_ready?(request_id, redis)
1495
+ catch :request_not_ready do
1496
+ request_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
1497
+ Parsing.get_platform_pool_count(request_hash['requested']) do |platform_alias, pool, count|
1498
+ pools_filled = redis.scard("vmpooler__#{request_id}__#{platform_alias}__#{pool}")
1499
+ throw :request_not_ready unless pools_filled.to_i == count.to_i
1500
+ end
1501
+ return true
1502
+ end
1503
+ false
1504
+ end
1505
+
1506
+ def check_ondemand_requests_ready(redis)
1507
+ in_progress_requests = redis.zrange('vmpooler__provisioning__processing', 0, -1, with_scores: true)
1508
+ in_progress_requests&.each do |request_id, score|
1509
+ check_ondemand_request_ready(request_id, redis, score)
1510
+ end
1511
+ in_progress_requests.length
1512
+ end
1513
+
1514
+ def check_ondemand_request_ready(request_id, redis, score = nil)
1515
+ # default expiration is one month to ensure the data does not stay in redis forever
1516
+ default_expiration = 259_200_0
1517
+ processing_key = 'vmpooler__provisioning__processing'
1518
+ ondemand_hash_key = "vmpooler__odrequest__#{request_id}"
1519
+ score ||= redis.zscore(processing_key, request_id)
1520
+ return if request_expired?(request_id, score, redis)
1521
+
1522
+ return unless vms_ready?(request_id, redis)
1523
+
1524
+ redis.multi
1525
+ redis.hset(ondemand_hash_key, 'status', 'ready')
1526
+ redis.expire(ondemand_hash_key, default_expiration)
1527
+ redis.zrem(processing_key, request_id)
1528
+ redis.exec
1529
+ end
1530
+
1531
+ def request_expired?(request_id, score, redis)
1532
+ delta = Time.now.to_i - score.to_i
1533
+ ondemand_request_ttl = $config[:config]['ondemand_request_ttl']
1534
+ return false unless delta > ondemand_request_ttl * 60
1535
+
1536
+ $logger.log('s', "Ondemand request for '#{request_id}' failed to provision all instances within the configured ttl '#{ondemand_request_ttl}'")
1537
+ expiration_ttl = $config[:redis]['data_ttl'].to_i * 60 * 60
1538
+ redis.pipelined do
1539
+ redis.zrem('vmpooler__provisioning__processing', request_id)
1540
+ redis.hset("vmpooler__odrequest__#{request_id}", 'status', 'failed')
1541
+ redis.expire("vmpooler__odrequest__#{request_id}", expiration_ttl)
1542
+ end
1543
+ remove_vms_for_failed_request(request_id, expiration_ttl, redis)
1544
+ true
1545
+ end
1546
+
1547
+ def remove_vms_for_failed_request(request_id, expiration_ttl, redis)
1548
+ request_hash = redis.hgetall("vmpooler__odrequest__#{request_id}")
1549
+ Parsing.get_platform_pool_count(request_hash['requested']) do |platform_alias, pool, _count|
1550
+ pools_filled = redis.smembers("vmpooler__#{request_id}__#{platform_alias}__#{pool}")
1551
+ redis.pipelined do
1552
+ pools_filled&.each do |vm|
1553
+ move_vm_queue(pool, vm, 'running', 'completed', redis, "moved to completed queue. '#{request_id}' could not be filled in time")
1554
+ end
1555
+ redis.expire("vmpooler__#{request_id}__#{platform_alias}__#{pool}", expiration_ttl)
1556
+ end
1557
+ end
1558
+ end
1559
+
1216
1560
  def execute!(maxloop = 0, loop_delay = 1)
1217
1561
  $logger.log('d', 'starting vmpooler')
1218
1562
 
1219
- # Clear out the tasks manager, as we don't know about any tasks at this point
1220
- $redis.set('vmpooler__tasks__clone', 0)
1221
- # Clear out vmpooler__migrations since stale entries may be left after a restart
1222
- $redis.del('vmpooler__migration')
1563
+ @redis.with_metrics do |redis|
1564
+ # Clear out the tasks manager, as we don't know about any tasks at this point
1565
+ redis.set('vmpooler__tasks__clone', 0)
1566
+ redis.set('vmpooler__tasks__ondemandclone', 0)
1567
+ # Clear out vmpooler__migrations since stale entries may be left after a restart
1568
+ redis.del('vmpooler__migration')
1569
+ end
1223
1570
 
1224
1571
  # Copy vSphere settings to correct location. This happens with older configuration files
1225
1572
  if !$config[:vsphere].nil? && ($config[:providers].nil? || $config[:providers][:vsphere].nil?)
@@ -1269,7 +1616,7 @@ module Vmpooler
1269
1616
  provider_class = $config[:providers][provider_name.to_sym]['provider_class']
1270
1617
  end
1271
1618
  begin
1272
- $providers[provider_name] = create_provider_object($config, $logger, $metrics, provider_class, provider_name, {}) if $providers[provider_name].nil?
1619
+ $providers[provider_name] = create_provider_object($config, $logger, $metrics, @redis, provider_class, provider_name, {}) if $providers[provider_name].nil?
1273
1620
  rescue StandardError => e
1274
1621
  $logger.log('s', "Error while creating provider for pool #{pool['name']}: #{e}")
1275
1622
  raise
@@ -1303,6 +1650,13 @@ module Vmpooler
1303
1650
  end
1304
1651
  end
1305
1652
 
1653
+ if !$threads['ondemand_provisioner']
1654
+ check_ondemand_requests
1655
+ elsif !$threads['ondemand_provisioner'].alive?
1656
+ $logger.log('d', '[!] [ondemand_provisioner] worker thread died, restarting')
1657
+ check_ondemand_requests(check_loop_delay_min, check_loop_delay_max, check_loop_delay_decay)
1658
+ end
1659
+
1306
1660
  sleep(loop_delay)
1307
1661
 
1308
1662
  unless maxloop == 0