vmpooler 0.13.1 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,8 +11,10 @@ module Vmpooler
11
11
  def initialize(options = {}, &block)
12
12
  super(options, &block)
13
13
  @metrics = options[:metrics]
14
- @metric_prefix = options[:metric_prefix]
15
- @metric_prefix = 'connectionpool' if @metric_prefix.nil? || @metric_prefix == ''
14
+ @connpool_type = options[:connpool_type]
15
+ @connpool_type = 'connectionpool' if @connpool_type.nil? || @connpool_type == ''
16
+ @connpool_provider = options[:connpool_provider]
17
+ @connpool_provider = 'unknown' if @connpool_provider.nil? || @connpool_provider == ''
16
18
  end
17
19
 
18
20
  def with_metrics(options = {})
@@ -20,15 +22,15 @@ module Vmpooler
20
22
  start = Time.now
21
23
  conn = checkout(options)
22
24
  timespan_ms = ((Time.now - start) * 1000).to_i
23
- @metrics&.gauge(@metric_prefix + '.available', @available.length)
24
- @metrics&.timing(@metric_prefix + '.waited', timespan_ms)
25
+ @metrics&.gauge("connection_available.#{@connpool_type}.#{@connpool_provider}", @available.length)
26
+ @metrics&.timing("connection_waited.#{@connpool_type}.#{@connpool_provider}", timespan_ms)
25
27
  begin
26
28
  Thread.handle_interrupt(Exception => :immediate) do
27
29
  yield conn
28
30
  end
29
31
  ensure
30
32
  checkin
31
- @metrics&.gauge(@metric_prefix + '.available', @available.length)
33
+ @metrics&.gauge("connection_available.#{@connpool_type}.#{@connpool_provider}", @available.length)
32
34
  end
33
35
  end
34
36
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'vmpooler/metrics/statsd'
4
+ require 'vmpooler/metrics/graphite'
5
+ require 'vmpooler/metrics/promstats'
6
+ require 'vmpooler/metrics/dummy_statsd'
7
+
8
+ module Vmpooler
9
+ class Metrics
10
+ # static class instantiate appropriate metrics object.
11
+ def self.init(logger, params)
12
+ if params[:statsd]
13
+ metrics = Vmpooler::Metrics::Statsd.new(logger, params[:statsd])
14
+ elsif params[:graphite]
15
+ metrics = Vmpooler::Metrics::Graphite.new(logger, params[:graphite])
16
+ elsif params[:prometheus]
17
+ metrics = Vmpooler::Metrics::Promstats.new(logger, params[:prometheus])
18
+ else
19
+ metrics = Vmpooler::Metrics::DummyStatsd.new
20
+ end
21
+ metrics
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vmpooler
4
+ class Metrics
5
+ class DummyStatsd < Metrics
6
+ attr_reader :server, :port, :prefix
7
+
8
+ def initialize(*)
9
+ end
10
+
11
+ def increment(*)
12
+ true
13
+ end
14
+
15
+ def gauge(*)
16
+ true
17
+ end
18
+
19
+ def timing(*)
20
+ true
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems' unless defined?(Gem)
4
+
5
+ module Vmpooler
6
+ class Metrics
7
+ class Graphite < Metrics
8
+ attr_reader :server, :port, :prefix
9
+
10
+ def initialize(logger, params = {})
11
+ raise ArgumentError, "Graphite server is required. Config: #{params.inspect}" if params['server'].nil? || params['server'].empty?
12
+
13
+ @server = params['server']
14
+ @port = params['port'] || 2003
15
+ @prefix = params['prefix'] || 'vmpooler'
16
+ @logger = logger
17
+ end
18
+
19
+ def increment(label)
20
+ log label, 1
21
+ end
22
+
23
+ def gauge(label, value)
24
+ log label, value
25
+ end
26
+
27
+ def timing(label, duration)
28
+ log label, duration
29
+ end
30
+
31
+ def log(path, value)
32
+ Thread.new do
33
+ socket = TCPSocket.new(server, port)
34
+ begin
35
+ socket.puts "#{prefix}.#{path} #{value} #{Time.now.to_i}"
36
+ ensure
37
+ socket.close
38
+ end
39
+ end
40
+ rescue Errno::EADDRNOTAVAIL => e
41
+ warn "Could not assign address to graphite server #{server}: #{e}"
42
+ rescue StandardError => e
43
+ @logger.log('s', "[!] Failure logging #{path} to graphite server [#{server}:#{port}]: #{e}")
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,380 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'prometheus/client'
4
+
5
+ module Vmpooler
6
+ class Metrics
7
+ class Promstats < Metrics
8
+ attr_reader :prefix, :endpoint, :metrics_prefix
9
+
10
+ # Constants for Metric Types
11
+ M_COUNTER = 1
12
+ M_GAUGE = 2
13
+ M_SUMMARY = 3
14
+ M_HISTOGRAM = 4
15
+
16
+ # Customised Bucket set to use for the Pooler clone times set to more appropriate intervals.
17
+ POOLER_CLONE_TIME_BUCKETS = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 120.0, 180.0, 240.0, 300.0, 600.0].freeze
18
+ POOLER_READY_TIME_BUCKETS = [30.0, 60.0, 120.0, 180.0, 240.0, 300.0, 500.0, 800.0, 1200.0, 1600.0].freeze
19
+ # Same for redis connection times - this is the same as the current Prometheus Default.
20
+ # https://github.com/prometheus/client_ruby/blob/master/lib/prometheus/client/histogram.rb#L14
21
+ REDIS_CONNECT_BUCKETS = [1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 18.0, 23.0].freeze
22
+
23
+ @p_metrics = {}
24
+
25
+ def initialize(logger, params = {})
26
+ @prefix = params['prefix'] || 'vmpooler'
27
+ @metrics_prefix = params['metrics_prefix'] || 'vmpooler'
28
+ @endpoint = params['endpoint'] || '/prometheus'
29
+ @logger = logger
30
+
31
+ # Setup up prometheus registry and data structures
32
+ @prometheus = Prometheus::Client.registry
33
+ end
34
+
35
+ # Metrics structure used to register the metrics and also translate/interpret the incoming metrics.
36
+ def vmpooler_metrics_table
37
+ {
38
+ errors: {
39
+ mtype: M_COUNTER,
40
+ torun: %i[manager],
41
+ docstring: 'Count of errors for pool',
42
+ prom_metric_prefix: "#{@metrics_prefix}_errors",
43
+ metric_suffixes: {
44
+ markedasfailed: 'timeout waiting for instance to initialise',
45
+ duplicatehostname: 'unable to create instance due to duplicate hostname',
46
+ staledns: 'unable to create instance due to duplicate DNS record'
47
+ },
48
+ param_labels: %i[template_name]
49
+ },
50
+ user: {
51
+ mtype: M_COUNTER,
52
+ torun: %i[manager],
53
+ docstring: 'Number of pool instances this user created created',
54
+ prom_metric_prefix: "#{@metrics_prefix}_user",
55
+ param_labels: %i[user poolname]
56
+ },
57
+ usage_litmus: {
58
+ mtype: M_COUNTER,
59
+ torun: %i[manager],
60
+ docstring: 'Pools by Litmus job usage',
61
+ prom_metric_prefix: "#{@metrics_prefix}_usage_litmus",
62
+ param_labels: %i[user poolname]
63
+ },
64
+ usage_jenkins_instance: {
65
+ mtype: M_COUNTER,
66
+ torun: %i[manager],
67
+ docstring: 'Pools by Jenkins instance usage',
68
+ prom_metric_prefix: "#{@metrics_prefix}_usage_jenkins_instance",
69
+ param_labels: %i[jenkins_instance value_stream poolname]
70
+ },
71
+ usage_branch_project: {
72
+ mtype: M_COUNTER,
73
+ torun: %i[manager],
74
+ docstring: 'Pools by branch/project usage',
75
+ prom_metric_prefix: "#{@metrics_prefix}_usage_branch_project",
76
+ param_labels: %i[branch project poolname]
77
+ },
78
+ usage_job_component: {
79
+ mtype: M_COUNTER,
80
+ torun: %i[manager],
81
+ docstring: 'Pools by job/component usage',
82
+ prom_metric_prefix: "#{@metrics_prefix}_usage_job_component",
83
+ param_labels: %i[job_name component_to_test poolname]
84
+ },
85
+ checkout: {
86
+ mtype: M_COUNTER,
87
+ torun: %i[api],
88
+ docstring: 'Pool checkout counts',
89
+ prom_metric_prefix: "#{@metrics_prefix}_checkout",
90
+ metric_suffixes: {
91
+ nonresponsive: 'checkout failed - non responsive machine',
92
+ empty: 'checkout failed - no machine',
93
+ success: 'successful checkout',
94
+ invalid: 'checkout failed - invalid template'
95
+ },
96
+ param_labels: %i[poolname]
97
+ },
98
+ delete: {
99
+ mtype: M_COUNTER,
100
+ torun: %i[api],
101
+ docstring: 'Delete machine',
102
+ prom_metric_prefix: "#{@metrics_prefix}_delete",
103
+ metric_suffixes: {
104
+ success: 'succeeded',
105
+ failed: 'failed'
106
+ },
107
+ param_labels: []
108
+ },
109
+ ondemandrequest_generate: {
110
+ mtype: M_COUNTER,
111
+ torun: %i[api],
112
+ docstring: 'Ondemand request',
113
+ prom_metric_prefix: "#{@metrics_prefix}_ondemandrequest_generate",
114
+ metric_suffixes: {
115
+ duplicaterequests: 'failed duplicate request',
116
+ success: 'succeeded'
117
+ },
118
+ param_labels: []
119
+ },
120
+ ondemandrequest_fail: {
121
+ mtype: M_COUNTER,
122
+ torun: %i[api],
123
+ docstring: 'Ondemand request failure',
124
+ prom_metric_prefix: "#{@metrics_prefix}_ondemandrequest_fail",
125
+ metric_suffixes: {
126
+ toomanyrequests: 'too many requests',
127
+ invalid: 'invalid poolname'
128
+ },
129
+ param_labels: %i[poolname]
130
+ },
131
+ config: {
132
+ mtype: M_COUNTER,
133
+ torun: %i[api],
134
+ docstring: 'vmpooler pool configuration request',
135
+ prom_metric_prefix: "#{@metrics_prefix}_config",
136
+ metric_suffixes: { invalid: 'Invalid' },
137
+ param_labels: %i[poolname]
138
+ },
139
+ poolreset: {
140
+ mtype: M_COUNTER,
141
+ torun: %i[api],
142
+ docstring: 'Pool reset counter',
143
+ prom_metric_prefix: "#{@metrics_prefix}_poolreset",
144
+ metric_suffixes: { invalid: 'Invalid Pool' },
145
+ param_labels: %i[poolname]
146
+ },
147
+ connect: {
148
+ mtype: M_COUNTER,
149
+ torun: %i[manager],
150
+ docstring: 'vmpooler connect (to vSphere)',
151
+ prom_metric_prefix: "#{@metrics_prefix}_connect",
152
+ metric_suffixes: {
153
+ open: 'Connect Succeeded',
154
+ fail: 'Connect Failed'
155
+ },
156
+ param_labels: []
157
+ },
158
+ migrate_from: {
159
+ mtype: M_COUNTER,
160
+ torun: %i[manager],
161
+ docstring: 'vmpooler machine migrated from',
162
+ prom_metric_prefix: "#{@metrics_prefix}_migrate_from",
163
+ param_labels: %i[host_name]
164
+ },
165
+ migrate_to: {
166
+ mtype: M_COUNTER,
167
+ torun: %i[manager],
168
+ docstring: 'vmpooler machine migrated to',
169
+ prom_metric_prefix: "#{@metrics_prefix}_migrate_to",
170
+ param_labels: %i[host_name]
171
+ },
172
+ api_vm: {
173
+ mtype: M_COUNTER,
174
+ torun: %i[api],
175
+ docstring: 'Total number of HTTP request/sub-operations handled by the Rack application under the /vm endpoint',
176
+ prom_metric_prefix: "#{@metrics_prefix}_http_requests_vm_total",
177
+ param_labels: %i[method subpath operation]
178
+ },
179
+ ready: {
180
+ mtype: M_GAUGE,
181
+ torun: %i[manager],
182
+ docstring: 'vmpooler number of machines in ready State',
183
+ prom_metric_prefix: "#{@metrics_prefix}_ready",
184
+ param_labels: %i[poolname]
185
+ },
186
+ running: {
187
+ mtype: M_GAUGE,
188
+ torun: %i[manager],
189
+ docstring: 'vmpooler number of machines running',
190
+ prom_metric_prefix: "#{@metrics_prefix}_running",
191
+ param_labels: %i[poolname]
192
+ },
193
+ connection_available: {
194
+ mtype: M_GAUGE,
195
+ torun: %i[manager],
196
+ docstring: 'vmpooler redis connections available',
197
+ prom_metric_prefix: "#{@metrics_prefix}_connection_available",
198
+ param_labels: %i[type provider]
199
+ },
200
+ time_to_ready_state: {
201
+ mtype: M_HISTOGRAM,
202
+ torun: %i[manager],
203
+ buckets: POOLER_READY_TIME_BUCKETS,
204
+ docstring: 'Time taken for machine to read ready state for pool',
205
+ prom_metric_prefix: "#{@metrics_prefix}_time_to_ready_state",
206
+ param_labels: %i[poolname]
207
+ },
208
+ migrate: {
209
+ mtype: M_HISTOGRAM,
210
+ torun: %i[manager],
211
+ buckets: POOLER_CLONE_TIME_BUCKETS,
212
+ docstring: 'vmpooler time taken to migrate machine for pool',
213
+ prom_metric_prefix: "#{@metrics_prefix}_migrate",
214
+ param_labels: %i[poolname]
215
+ },
216
+ clone: {
217
+ mtype: M_HISTOGRAM,
218
+ torun: %i[manager],
219
+ buckets: POOLER_CLONE_TIME_BUCKETS,
220
+ docstring: 'vmpooler time taken to clone machine',
221
+ prom_metric_prefix: "#{@metrics_prefix}_clone",
222
+ param_labels: %i[poolname]
223
+ },
224
+ destroy: {
225
+ mtype: M_HISTOGRAM,
226
+ torun: %i[manager],
227
+ buckets: POOLER_CLONE_TIME_BUCKETS,
228
+ docstring: 'vmpooler time taken to destroy machine',
229
+ prom_metric_prefix: "#{@metrics_prefix}_destroy",
230
+ param_labels: %i[poolname]
231
+ },
232
+ connection_waited: {
233
+ mtype: M_HISTOGRAM,
234
+ torun: %i[manager],
235
+ buckets: REDIS_CONNECT_BUCKETS,
236
+ docstring: 'vmpooler redis connection wait time',
237
+ prom_metric_prefix: "#{@metrics_prefix}_connection_waited",
238
+ param_labels: %i[type provider]
239
+ }
240
+ }
241
+ end
242
+
243
+ # Helper to add individual prom metric.
244
+ # Allow Histograms to specify the bucket size.
245
+ def add_prometheus_metric(metric_spec, name, docstring)
246
+ case metric_spec[:mtype]
247
+ when M_COUNTER
248
+ metric_class = Prometheus::Client::Counter
249
+ when M_GAUGE
250
+ metric_class = Prometheus::Client::Gauge
251
+ when M_SUMMARY
252
+ metric_class = Prometheus::Client::Summary
253
+ when M_HISTOGRAM
254
+ metric_class = Prometheus::Client::Histogram
255
+ else
256
+ raise("Unable to register metric #{name} with metric type #{metric_spec[:mtype]}")
257
+ end
258
+
259
+ if (metric_spec[:mtype] == M_HISTOGRAM) && (metric_spec.key? :buckets)
260
+ prom_metric = metric_class.new(
261
+ name.to_sym,
262
+ docstring: docstring,
263
+ labels: metric_spec[:param_labels] + [:vmpooler_instance],
264
+ buckets: metric_spec[:buckets],
265
+ preset_labels: { vmpooler_instance: @prefix }
266
+ )
267
+ else
268
+ prom_metric = metric_class.new(
269
+ name.to_sym,
270
+ docstring: docstring,
271
+ labels: metric_spec[:param_labels] + [:vmpooler_instance],
272
+ preset_labels: { vmpooler_instance: @prefix }
273
+ )
274
+ end
275
+ @prometheus.register(prom_metric)
276
+ end
277
+
278
+ # Top level method to register all the prometheus metrics.
279
+
280
+ def setup_prometheus_metrics(torun)
281
+ @p_metrics = vmpooler_metrics_table
282
+ @p_metrics.each do |_name, metric_spec|
283
+ # Only register metrics appropriate to api or manager
284
+ next if (torun & metric_spec[:torun]).empty?
285
+
286
+ if metric_spec.key? :metric_suffixes
287
+ # Iterate thru the suffixes if provided to register multiple counters here.
288
+ metric_spec[:metric_suffixes].each do |metric_suffix|
289
+ add_prometheus_metric(
290
+ metric_spec,
291
+ "#{metric_spec[:prom_metric_prefix]}_#{metric_suffix[0]}",
292
+ "#{metric_spec[:docstring]} #{metric_suffix[1]}"
293
+ )
294
+ end
295
+ else
296
+ # No Additional counter suffixes so register this as metric.
297
+ add_prometheus_metric(
298
+ metric_spec,
299
+ metric_spec[:prom_metric_prefix],
300
+ metric_spec[:docstring]
301
+ )
302
+ end
303
+ end
304
+ end
305
+
306
+ # locate a metric and check/interpet the sub-fields.
307
+ def find_metric(label)
308
+ sublabels = label.split('.')
309
+ metric_key = sublabels.shift.to_sym
310
+ raise("Invalid Metric #{metric_key} for #{label}") unless @p_metrics.key? metric_key
311
+
312
+ metric = @p_metrics[metric_key].clone
313
+
314
+ if metric.key? :metric_suffixes
315
+ metric_subkey = sublabels.shift.to_sym
316
+ raise("Invalid Metric #{metric_key}_#{metric_subkey} for #{label}") unless metric[:metric_suffixes].key? metric_subkey.to_sym
317
+
318
+ metric[:metric_name] = "#{metric[:prom_metric_prefix]}_#{metric_subkey}"
319
+ else
320
+ metric[:metric_name] = metric[:prom_metric_prefix]
321
+ end
322
+
323
+ # Check if we are looking for a parameter value at last element.
324
+ if metric.key? :param_labels
325
+ metric[:labels] = {}
326
+ # Special case processing here - if there is only one parameter label then make sure
327
+ # we append all of the remaining contents of the metric with "." separators to ensure
328
+ # we get full nodenames (e.g. for Migration to node operations)
329
+ if metric[:param_labels].length == 1
330
+ metric[:labels][metric[:param_labels].first] = sublabels.join('.')
331
+ else
332
+ metric[:param_labels].reverse_each do |param_label|
333
+ metric[:labels][param_label] = sublabels.pop(1).first
334
+ end
335
+ end
336
+ end
337
+ metric
338
+ end
339
+
340
+ # Helper to get lab metrics.
341
+ def get(label)
342
+ metric = find_metric(label)
343
+ [metric, @prometheus.get(metric[:metric_name])]
344
+ end
345
+
346
+ # Note - Catch and log metrics failures so they can be noted, but don't interrupt vmpooler operation.
347
+ def increment(label)
348
+ begin
349
+ counter_metric, c = get(label)
350
+ c.increment(labels: counter_metric[:labels])
351
+ rescue StandardError => e
352
+ @logger.log('s', "[!] prometheus error logging metric #{label} increment : #{e}")
353
+ end
354
+ end
355
+
356
+ def gauge(label, value)
357
+ begin
358
+ unless value.nil?
359
+ gauge_metric, g = get(label)
360
+ g.set(value.to_i, labels: gauge_metric[:labels])
361
+ end
362
+ rescue StandardError => e
363
+ @logger.log('s', "[!] prometheus error logging gauge #{label}, value #{value}: #{e}")
364
+ end
365
+ end
366
+
367
+ def timing(label, duration)
368
+ begin
369
+ # https://prometheus.io/docs/practices/histograms/
370
+ unless duration.nil?
371
+ histogram_metric, hm = get(label)
372
+ hm.observe(duration.to_f, labels: histogram_metric[:labels])
373
+ end
374
+ rescue StandardError => e
375
+ @logger.log('s', "[!] prometheus error logging timing event label #{label}, duration #{duration}: #{e}")
376
+ end
377
+ end
378
+ end
379
+ end
380
+ end