vmpooler 0.13.2 → 0.14.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/vmpooler +15 -10
- data/lib/vmpooler.rb +4 -16
- data/lib/vmpooler/api.rb +41 -34
- data/lib/vmpooler/api/helpers.rb +1 -1
- data/lib/vmpooler/api/request_logger.rb +20 -0
- data/lib/vmpooler/api/v1.rb +58 -35
- data/lib/vmpooler/generic_connection_pool.rb +7 -5
- data/lib/vmpooler/metrics.rb +24 -0
- data/lib/vmpooler/metrics/dummy_statsd.rb +24 -0
- data/lib/vmpooler/metrics/graphite.rb +47 -0
- data/lib/vmpooler/metrics/promstats.rb +470 -0
- data/lib/vmpooler/metrics/promstats/collector_middleware.rb +121 -0
- data/lib/vmpooler/metrics/statsd.rb +40 -0
- data/lib/vmpooler/pool_manager.rb +32 -40
- data/lib/vmpooler/providers/dummy.rb +2 -1
- data/lib/vmpooler/providers/vsphere.rb +88 -25
- data/lib/vmpooler/util/parsing.rb +16 -0
- data/lib/vmpooler/version.rb +1 -1
- metadata +25 -6
- data/lib/vmpooler/dummy_statsd.rb +0 -22
- data/lib/vmpooler/graphite.rb +0 -44
- data/lib/vmpooler/statsd.rb +0 -37
@@ -11,8 +11,10 @@ module Vmpooler
|
|
11
11
|
def initialize(options = {}, &block)
|
12
12
|
super(options, &block)
|
13
13
|
@metrics = options[:metrics]
|
14
|
-
@
|
15
|
-
@
|
14
|
+
@connpool_type = options[:connpool_type]
|
15
|
+
@connpool_type = 'connectionpool' if @connpool_type.nil? || @connpool_type == ''
|
16
|
+
@connpool_provider = options[:connpool_provider]
|
17
|
+
@connpool_provider = 'unknown' if @connpool_provider.nil? || @connpool_provider == ''
|
16
18
|
end
|
17
19
|
|
18
20
|
def with_metrics(options = {})
|
@@ -20,15 +22,15 @@ module Vmpooler
|
|
20
22
|
start = Time.now
|
21
23
|
conn = checkout(options)
|
22
24
|
timespan_ms = ((Time.now - start) * 1000).to_i
|
23
|
-
@metrics&.gauge(@
|
24
|
-
@metrics&.timing(@
|
25
|
+
@metrics&.gauge("connection_available.#{@connpool_type}.#{@connpool_provider}", @available.length)
|
26
|
+
@metrics&.timing("connection_waited.#{@connpool_type}.#{@connpool_provider}", timespan_ms)
|
25
27
|
begin
|
26
28
|
Thread.handle_interrupt(Exception => :immediate) do
|
27
29
|
yield conn
|
28
30
|
end
|
29
31
|
ensure
|
30
32
|
checkin
|
31
|
-
@metrics&.gauge(@
|
33
|
+
@metrics&.gauge("connection_available.#{@connpool_type}.#{@connpool_provider}", @available.length)
|
32
34
|
end
|
33
35
|
end
|
34
36
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'vmpooler/metrics/statsd'
|
4
|
+
require 'vmpooler/metrics/graphite'
|
5
|
+
require 'vmpooler/metrics/promstats'
|
6
|
+
require 'vmpooler/metrics/dummy_statsd'
|
7
|
+
|
8
|
+
module Vmpooler
|
9
|
+
class Metrics
|
10
|
+
# static class instantiate appropriate metrics object.
|
11
|
+
def self.init(logger, params)
|
12
|
+
if params[:statsd]
|
13
|
+
metrics = Vmpooler::Metrics::Statsd.new(logger, params[:statsd])
|
14
|
+
elsif params[:graphite]
|
15
|
+
metrics = Vmpooler::Metrics::Graphite.new(logger, params[:graphite])
|
16
|
+
elsif params[:prometheus]
|
17
|
+
metrics = Vmpooler::Metrics::Promstats.new(logger, params[:prometheus])
|
18
|
+
else
|
19
|
+
metrics = Vmpooler::Metrics::DummyStatsd.new
|
20
|
+
end
|
21
|
+
metrics
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Vmpooler
|
4
|
+
class Metrics
|
5
|
+
class DummyStatsd < Metrics
|
6
|
+
attr_reader :server, :port, :prefix
|
7
|
+
|
8
|
+
def initialize(*)
|
9
|
+
end
|
10
|
+
|
11
|
+
def increment(*)
|
12
|
+
true
|
13
|
+
end
|
14
|
+
|
15
|
+
def gauge(*)
|
16
|
+
true
|
17
|
+
end
|
18
|
+
|
19
|
+
def timing(*)
|
20
|
+
true
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rubygems' unless defined?(Gem)
|
4
|
+
|
5
|
+
module Vmpooler
|
6
|
+
class Metrics
|
7
|
+
class Graphite < Metrics
|
8
|
+
attr_reader :server, :port, :prefix
|
9
|
+
|
10
|
+
def initialize(logger, params = {})
|
11
|
+
raise ArgumentError, "Graphite server is required. Config: #{params.inspect}" if params['server'].nil? || params['server'].empty?
|
12
|
+
|
13
|
+
@server = params['server']
|
14
|
+
@port = params['port'] || 2003
|
15
|
+
@prefix = params['prefix'] || 'vmpooler'
|
16
|
+
@logger = logger
|
17
|
+
end
|
18
|
+
|
19
|
+
def increment(label)
|
20
|
+
log label, 1
|
21
|
+
end
|
22
|
+
|
23
|
+
def gauge(label, value)
|
24
|
+
log label, value
|
25
|
+
end
|
26
|
+
|
27
|
+
def timing(label, duration)
|
28
|
+
log label, duration
|
29
|
+
end
|
30
|
+
|
31
|
+
def log(path, value)
|
32
|
+
Thread.new do
|
33
|
+
socket = TCPSocket.new(server, port)
|
34
|
+
begin
|
35
|
+
socket.puts "#{prefix}.#{path} #{value} #{Time.now.to_i}"
|
36
|
+
ensure
|
37
|
+
socket.close
|
38
|
+
end
|
39
|
+
end
|
40
|
+
rescue Errno::EADDRNOTAVAIL => e
|
41
|
+
warn "Could not assign address to graphite server #{server}: #{e}"
|
42
|
+
rescue StandardError => e
|
43
|
+
@logger.log('s', "[!] Failure logging #{path} to graphite server [#{server}:#{port}]: #{e}")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,470 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'prometheus/client'
|
4
|
+
|
5
|
+
module Vmpooler
|
6
|
+
class Metrics
|
7
|
+
class Promstats < Metrics
|
8
|
+
attr_reader :prefix, :prometheus_endpoint, :prometheus_prefix
|
9
|
+
|
10
|
+
# Constants for Metric Types
|
11
|
+
M_COUNTER = 1
|
12
|
+
M_GAUGE = 2
|
13
|
+
M_SUMMARY = 3
|
14
|
+
M_HISTOGRAM = 4
|
15
|
+
|
16
|
+
# Customised Bucket set to use for the Pooler clone times set to more appropriate intervals.
|
17
|
+
POOLER_CLONE_TIME_BUCKETS = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 120.0, 180.0, 240.0, 300.0, 600.0].freeze
|
18
|
+
POOLER_READY_TIME_BUCKETS = [30.0, 60.0, 120.0, 180.0, 240.0, 300.0, 500.0, 800.0, 1200.0, 1600.0].freeze
|
19
|
+
# Same for redis connection times - this is the same as the current Prometheus Default.
|
20
|
+
# https://github.com/prometheus/client_ruby/blob/master/lib/prometheus/client/histogram.rb#L14
|
21
|
+
REDIS_CONNECT_BUCKETS = [1.0, 2.0, 3.0, 5.0, 8.0, 13.0, 18.0, 23.0].freeze
|
22
|
+
|
23
|
+
@p_metrics = {}
|
24
|
+
|
25
|
+
def initialize(logger, params = {})
|
26
|
+
@prefix = params['prefix'] || 'vmpooler'
|
27
|
+
@prometheus_prefix = params['prometheus_prefix'] || 'vmpooler'
|
28
|
+
@prometheus_endpoint = params['prometheus_endpoint'] || '/prometheus'
|
29
|
+
@logger = logger
|
30
|
+
|
31
|
+
# Setup up prometheus registry and data structures
|
32
|
+
@prometheus = Prometheus::Client.registry
|
33
|
+
end
|
34
|
+
|
35
|
+
=begin # rubocop:disable Style/BlockComments
|
36
|
+
The Metrics table is used to register metrics and translate/interpret the incoming metrics.
|
37
|
+
|
38
|
+
This table describes all of the prometheus metrics that are recognised by the application.
|
39
|
+
The background documentation for defining metrics is at: https://prometheus.io/docs/introduction/
|
40
|
+
In particular, the naming practices should be adhered to: https://prometheus.io/docs/practices/naming/
|
41
|
+
The Ruby Client docs are also useful: https://github.com/prometheus/client_ruby
|
42
|
+
|
43
|
+
The table here allows the currently used stats definitions to be translated correctly for Prometheus.
|
44
|
+
The current format is of the form A.B.C, where the final fields may be actual values (e.g. poolname).
|
45
|
+
Prometheus metrics cannot use the '.' as a character, so this is either translated into '_' or
|
46
|
+
variable parameters are expressed as labels accompanying the metric.
|
47
|
+
|
48
|
+
Sample statistics are:
|
49
|
+
# Example showing hostnames (FQDN)
|
50
|
+
migrate_from.pix-jj26-chassis1-2.ops.puppetlabs.net
|
51
|
+
migrate_to.pix-jj26-chassis1-8.ops.puppetlabs.net
|
52
|
+
|
53
|
+
# Example showing poolname as a parameter
|
54
|
+
poolreset.invalid.centos-8-x86_64
|
55
|
+
|
56
|
+
# Examples showing similar sub-typed checkout stats
|
57
|
+
checkout.empty.centos-8-x86_64
|
58
|
+
checkout.invalid.centos-8-x86_64
|
59
|
+
checkout.invalid.unknown
|
60
|
+
checkout.success.centos-8-x86_64
|
61
|
+
|
62
|
+
# Stats without any final parameter.
|
63
|
+
connect.fail
|
64
|
+
connect.open
|
65
|
+
delete.failed
|
66
|
+
delete.success
|
67
|
+
|
68
|
+
# Stats with multiple param_labels
|
69
|
+
vmpooler_user.debian-8-x86_64-pixa4.john
|
70
|
+
|
71
|
+
The metrics implementation here preserves the existing framework which will continue to support
|
72
|
+
graphite and statsd (since vmpooler is used outside of puppet). Some rationalisation and renaming
|
73
|
+
of the actual metrics was done to get a more usable model to fit within the prometheus framework.
|
74
|
+
This particularly applies to the user stats collected once individual machines are terminated as
|
75
|
+
this would have challenged prometheus' ability due to multiple (8) parameters being collected
|
76
|
+
in a single measure (which has a very high cardinality).
|
77
|
+
|
78
|
+
Prometheus requires all metrics to be pre-registered (which is the primary reason for this
|
79
|
+
table) and also uses labels to differentiate the characteristics of the measurement. This
|
80
|
+
is used throughout to capture information such as poolnames. So for example, this is a sample
|
81
|
+
of the prometheus metrics generated for the "vmpooler_ready" measurement:
|
82
|
+
|
83
|
+
# TYPE vmpooler_ready gauge
|
84
|
+
# HELP vmpooler_ready vmpooler number of machines in ready State
|
85
|
+
vmpooler_ready{vmpooler_instance="vmpooler",poolname="win-10-ent-x86_64-pixa4"} 2.0
|
86
|
+
vmpooler_ready{vmpooler_instance="vmpooler",poolname="debian-8-x86_64-pixa4"} 2.0
|
87
|
+
vmpooler_ready{vmpooler_instance="vmpooler",poolname="centos-8-x86_64-pixa4"} 2.0
|
88
|
+
|
89
|
+
Prometheus supports the following metric types:
|
90
|
+
(see https://prometheus.io/docs/concepts/metric_types/)
|
91
|
+
|
92
|
+
Counter (increment):
|
93
|
+
A counter is a cumulative metric that represents a single monotonically increasing counter whose
|
94
|
+
value can only increase or be reset to zero on restart
|
95
|
+
|
96
|
+
Gauge:
|
97
|
+
A gauge is a metric that represents a single numerical value that can arbitrarily go up and down.
|
98
|
+
|
99
|
+
Histogram:
|
100
|
+
A histogram samples observations (usually things like request durations or response sizes) and
|
101
|
+
counts them in configurable buckets. It also provides a sum of all observed values.
|
102
|
+
This replaces the timer metric supported by statsd
|
103
|
+
|
104
|
+
Summary :
|
105
|
+
Summary provides a total count of observations and a sum of all observed values, it calculates
|
106
|
+
configurable quantiles over a sliding time window.
|
107
|
+
(Summary is not used in vmpooler)
|
108
|
+
|
109
|
+
vmpooler_metrics_table is a table of hashes, where the hash key represents the first part of the
|
110
|
+
metric name, e.g. for the metric 'delete.*' (see above) the key would be 'delete:'. "Sub-metrics",
|
111
|
+
are supported, again for the 'delete.*' example, this can be subbed into '.failed' and '.success'
|
112
|
+
|
113
|
+
The entries within the hash as are follows:
|
114
|
+
|
115
|
+
mtype:
|
116
|
+
Metric type, which is one of the following constants:
|
117
|
+
M_COUNTER = 1
|
118
|
+
M_GAUGE = 2
|
119
|
+
M_SUMMARY = 3
|
120
|
+
M_HISTOGRAM = 4
|
121
|
+
|
122
|
+
torun:
|
123
|
+
Indicates which process the metric is for - within vmpooler this is either ':api' or ':manager'
|
124
|
+
(there is a suggestion that we change this to two separate tables).
|
125
|
+
|
126
|
+
docstring:
|
127
|
+
Documentation string for the metric - this is displayed as HELP text by the endpoint.
|
128
|
+
|
129
|
+
metric_suffixes:
|
130
|
+
Array of sub-metrics of the form 'sub-metric: "doc-string for sub-metric"'. This supports
|
131
|
+
the generation of individual sub-metrics for all elements in the array.
|
132
|
+
|
133
|
+
param_labels:
|
134
|
+
This is an optional array of symbols for the final labels in a metric. It should not be
|
135
|
+
specified if there are no additional parameters.
|
136
|
+
|
137
|
+
If it specified, it can either be a single symbol, or two or more symbols. The treatment
|
138
|
+
differs if there is only one symbol given as all of the remainder of the metric string
|
139
|
+
supplied is collected into a label with the symbol name. This allows the handling of
|
140
|
+
node names (FQDN).
|
141
|
+
|
142
|
+
To illustrate:
|
143
|
+
1. In the 'connect.*' or 'delete.*' example above, it should not be specified.
|
144
|
+
2. For the 'migrate_from.*' example above, the remainder of the measure is collected
|
145
|
+
as the 'host_name' label.
|
146
|
+
3. For the 'vmpooler_user' example above, the first parameter is treated as the pool
|
147
|
+
name, and the second as the username.
|
148
|
+
|
149
|
+
=end
|
150
|
+
def vmpooler_metrics_table
|
151
|
+
{
|
152
|
+
errors: {
|
153
|
+
mtype: M_COUNTER,
|
154
|
+
torun: %i[manager],
|
155
|
+
docstring: 'Count of errors for pool',
|
156
|
+
metric_suffixes: {
|
157
|
+
markedasfailed: 'timeout waiting for instance to initialise',
|
158
|
+
duplicatehostname: 'unable to create instance due to duplicate hostname',
|
159
|
+
staledns: 'unable to create instance due to duplicate DNS record'
|
160
|
+
},
|
161
|
+
param_labels: %i[template_name]
|
162
|
+
},
|
163
|
+
user: {
|
164
|
+
mtype: M_COUNTER,
|
165
|
+
torun: %i[manager],
|
166
|
+
docstring: 'Number of pool instances this user created created',
|
167
|
+
param_labels: %i[user poolname]
|
168
|
+
},
|
169
|
+
usage_litmus: {
|
170
|
+
mtype: M_COUNTER,
|
171
|
+
torun: %i[manager],
|
172
|
+
docstring: 'Pools by Litmus job usage',
|
173
|
+
param_labels: %i[user poolname]
|
174
|
+
},
|
175
|
+
usage_jenkins_instance: {
|
176
|
+
mtype: M_COUNTER,
|
177
|
+
torun: %i[manager],
|
178
|
+
docstring: 'Pools by Jenkins instance usage',
|
179
|
+
param_labels: %i[jenkins_instance value_stream poolname]
|
180
|
+
},
|
181
|
+
usage_branch_project: {
|
182
|
+
mtype: M_COUNTER,
|
183
|
+
torun: %i[manager],
|
184
|
+
docstring: 'Pools by branch/project usage',
|
185
|
+
param_labels: %i[branch project poolname]
|
186
|
+
},
|
187
|
+
usage_job_component: {
|
188
|
+
mtype: M_COUNTER,
|
189
|
+
torun: %i[manager],
|
190
|
+
docstring: 'Pools by job/component usage',
|
191
|
+
param_labels: %i[job_name component_to_test poolname]
|
192
|
+
},
|
193
|
+
checkout: {
|
194
|
+
mtype: M_COUNTER,
|
195
|
+
torun: %i[api],
|
196
|
+
docstring: 'Pool checkout counts',
|
197
|
+
metric_suffixes: {
|
198
|
+
nonresponsive: 'checkout failed - non responsive machine',
|
199
|
+
empty: 'checkout failed - no machine',
|
200
|
+
success: 'successful checkout',
|
201
|
+
invalid: 'checkout failed - invalid template'
|
202
|
+
},
|
203
|
+
param_labels: %i[poolname]
|
204
|
+
},
|
205
|
+
delete: {
|
206
|
+
mtype: M_COUNTER,
|
207
|
+
torun: %i[api],
|
208
|
+
docstring: 'Delete machine',
|
209
|
+
metric_suffixes: {
|
210
|
+
success: 'succeeded',
|
211
|
+
failed: 'failed'
|
212
|
+
},
|
213
|
+
param_labels: []
|
214
|
+
},
|
215
|
+
ondemandrequest_generate: {
|
216
|
+
mtype: M_COUNTER,
|
217
|
+
torun: %i[api],
|
218
|
+
docstring: 'Ondemand request',
|
219
|
+
metric_suffixes: {
|
220
|
+
duplicaterequests: 'failed duplicate request',
|
221
|
+
success: 'succeeded'
|
222
|
+
},
|
223
|
+
param_labels: []
|
224
|
+
},
|
225
|
+
ondemandrequest_fail: {
|
226
|
+
mtype: M_COUNTER,
|
227
|
+
torun: %i[api],
|
228
|
+
docstring: 'Ondemand request failure',
|
229
|
+
metric_suffixes: {
|
230
|
+
toomanyrequests: 'too many requests',
|
231
|
+
invalid: 'invalid poolname'
|
232
|
+
},
|
233
|
+
param_labels: %i[poolname]
|
234
|
+
},
|
235
|
+
config: {
|
236
|
+
mtype: M_COUNTER,
|
237
|
+
torun: %i[api],
|
238
|
+
docstring: 'vmpooler pool configuration request',
|
239
|
+
metric_suffixes: { invalid: 'Invalid' },
|
240
|
+
param_labels: %i[poolname]
|
241
|
+
},
|
242
|
+
poolreset: {
|
243
|
+
mtype: M_COUNTER,
|
244
|
+
torun: %i[api],
|
245
|
+
docstring: 'Pool reset counter',
|
246
|
+
metric_suffixes: { invalid: 'Invalid Pool' },
|
247
|
+
param_labels: %i[poolname]
|
248
|
+
},
|
249
|
+
connect: {
|
250
|
+
mtype: M_COUNTER,
|
251
|
+
torun: %i[manager],
|
252
|
+
docstring: 'vmpooler connect (to vSphere)',
|
253
|
+
metric_suffixes: {
|
254
|
+
open: 'Connect Succeeded',
|
255
|
+
fail: 'Connect Failed'
|
256
|
+
},
|
257
|
+
param_labels: []
|
258
|
+
},
|
259
|
+
migrate_from: {
|
260
|
+
mtype: M_COUNTER,
|
261
|
+
torun: %i[manager],
|
262
|
+
docstring: 'vmpooler machine migrated from',
|
263
|
+
param_labels: %i[host_name]
|
264
|
+
},
|
265
|
+
migrate_to: {
|
266
|
+
mtype: M_COUNTER,
|
267
|
+
torun: %i[manager],
|
268
|
+
docstring: 'vmpooler machine migrated to',
|
269
|
+
param_labels: %i[host_name]
|
270
|
+
},
|
271
|
+
http_requests_vm_total: {
|
272
|
+
mtype: M_COUNTER,
|
273
|
+
torun: %i[api],
|
274
|
+
docstring: 'Total number of HTTP request/sub-operations handled by the Rack application under the /vm endpoint',
|
275
|
+
param_labels: %i[method subpath operation]
|
276
|
+
},
|
277
|
+
ready: {
|
278
|
+
mtype: M_GAUGE,
|
279
|
+
torun: %i[manager],
|
280
|
+
docstring: 'vmpooler number of machines in ready State',
|
281
|
+
param_labels: %i[poolname]
|
282
|
+
},
|
283
|
+
running: {
|
284
|
+
mtype: M_GAUGE,
|
285
|
+
torun: %i[manager],
|
286
|
+
docstring: 'vmpooler number of machines running',
|
287
|
+
param_labels: %i[poolname]
|
288
|
+
},
|
289
|
+
connection_available: {
|
290
|
+
mtype: M_GAUGE,
|
291
|
+
torun: %i[manager],
|
292
|
+
docstring: 'vmpooler redis connections available',
|
293
|
+
param_labels: %i[type provider]
|
294
|
+
},
|
295
|
+
time_to_ready_state: {
|
296
|
+
mtype: M_HISTOGRAM,
|
297
|
+
torun: %i[manager],
|
298
|
+
buckets: POOLER_READY_TIME_BUCKETS,
|
299
|
+
docstring: 'Time taken for machine to read ready state for pool',
|
300
|
+
param_labels: %i[poolname]
|
301
|
+
},
|
302
|
+
migrate: {
|
303
|
+
mtype: M_HISTOGRAM,
|
304
|
+
torun: %i[manager],
|
305
|
+
buckets: POOLER_CLONE_TIME_BUCKETS,
|
306
|
+
docstring: 'vmpooler time taken to migrate machine for pool',
|
307
|
+
param_labels: %i[poolname]
|
308
|
+
},
|
309
|
+
clone: {
|
310
|
+
mtype: M_HISTOGRAM,
|
311
|
+
torun: %i[manager],
|
312
|
+
buckets: POOLER_CLONE_TIME_BUCKETS,
|
313
|
+
docstring: 'vmpooler time taken to clone machine',
|
314
|
+
param_labels: %i[poolname]
|
315
|
+
},
|
316
|
+
destroy: {
|
317
|
+
mtype: M_HISTOGRAM,
|
318
|
+
torun: %i[manager],
|
319
|
+
buckets: POOLER_CLONE_TIME_BUCKETS,
|
320
|
+
docstring: 'vmpooler time taken to destroy machine',
|
321
|
+
param_labels: %i[poolname]
|
322
|
+
},
|
323
|
+
connection_waited: {
|
324
|
+
mtype: M_HISTOGRAM,
|
325
|
+
torun: %i[manager],
|
326
|
+
buckets: REDIS_CONNECT_BUCKETS,
|
327
|
+
docstring: 'vmpooler redis connection wait time',
|
328
|
+
param_labels: %i[type provider]
|
329
|
+
}
|
330
|
+
}
|
331
|
+
end
|
332
|
+
|
333
|
+
# Helper to add individual prom metric.
|
334
|
+
# Allow Histograms to specify the bucket size.
|
335
|
+
def add_prometheus_metric(metric_spec, name, docstring)
|
336
|
+
case metric_spec[:mtype]
|
337
|
+
when M_COUNTER
|
338
|
+
metric_class = Prometheus::Client::Counter
|
339
|
+
when M_GAUGE
|
340
|
+
metric_class = Prometheus::Client::Gauge
|
341
|
+
when M_SUMMARY
|
342
|
+
metric_class = Prometheus::Client::Summary
|
343
|
+
when M_HISTOGRAM
|
344
|
+
metric_class = Prometheus::Client::Histogram
|
345
|
+
else
|
346
|
+
raise("Unable to register metric #{name} with metric type #{metric_spec[:mtype]}")
|
347
|
+
end
|
348
|
+
|
349
|
+
if (metric_spec[:mtype] == M_HISTOGRAM) && (metric_spec.key? :buckets)
|
350
|
+
prom_metric = metric_class.new(
|
351
|
+
name.to_sym,
|
352
|
+
docstring: docstring,
|
353
|
+
labels: metric_spec[:param_labels] + [:vmpooler_instance],
|
354
|
+
buckets: metric_spec[:buckets],
|
355
|
+
preset_labels: { vmpooler_instance: @prefix }
|
356
|
+
)
|
357
|
+
else
|
358
|
+
prom_metric = metric_class.new(
|
359
|
+
name.to_sym,
|
360
|
+
docstring: docstring,
|
361
|
+
labels: metric_spec[:param_labels] + [:vmpooler_instance],
|
362
|
+
preset_labels: { vmpooler_instance: @prefix }
|
363
|
+
)
|
364
|
+
end
|
365
|
+
@prometheus.register(prom_metric)
|
366
|
+
end
|
367
|
+
|
368
|
+
# Top level method to register all the prometheus metrics.
|
369
|
+
|
370
|
+
def setup_prometheus_metrics(torun)
|
371
|
+
@p_metrics = vmpooler_metrics_table
|
372
|
+
@p_metrics.each do |name, metric_spec|
|
373
|
+
# Only register metrics appropriate to api or manager
|
374
|
+
next if (torun & metric_spec[:torun]).empty?
|
375
|
+
|
376
|
+
if metric_spec.key? :metric_suffixes
|
377
|
+
# Iterate thru the suffixes if provided to register multiple counters here.
|
378
|
+
metric_spec[:metric_suffixes].each do |metric_suffix|
|
379
|
+
add_prometheus_metric(
|
380
|
+
metric_spec,
|
381
|
+
"#{@prometheus_prefix}_#{name}_#{metric_suffix[0]}",
|
382
|
+
"#{metric_spec[:docstring]} #{metric_suffix[1]}"
|
383
|
+
)
|
384
|
+
end
|
385
|
+
else
|
386
|
+
# No Additional counter suffixes so register this as metric.
|
387
|
+
add_prometheus_metric(
|
388
|
+
metric_spec,
|
389
|
+
"#{@prometheus_prefix}_#{name}",
|
390
|
+
metric_spec[:docstring]
|
391
|
+
)
|
392
|
+
end
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
# locate a metric and check/interpet the sub-fields.
|
397
|
+
def find_metric(label)
|
398
|
+
sublabels = label.split('.')
|
399
|
+
metric_key = sublabels.shift.to_sym
|
400
|
+
raise("Invalid Metric #{metric_key} for #{label}") unless @p_metrics.key? metric_key
|
401
|
+
|
402
|
+
metric = @p_metrics[metric_key].clone
|
403
|
+
|
404
|
+
if metric.key? :metric_suffixes
|
405
|
+
metric_subkey = sublabels.shift.to_sym
|
406
|
+
raise("Invalid Metric #{metric_key}_#{metric_subkey} for #{label}") unless metric[:metric_suffixes].key? metric_subkey.to_sym
|
407
|
+
|
408
|
+
metric[:metric_name] = "#{@prometheus_prefix}_#{metric_key}_#{metric_subkey}"
|
409
|
+
else
|
410
|
+
metric[:metric_name] = "#{@prometheus_prefix}_#{metric_key}"
|
411
|
+
end
|
412
|
+
|
413
|
+
# Check if we are looking for a parameter value at last element.
|
414
|
+
if metric.key? :param_labels
|
415
|
+
metric[:labels] = {}
|
416
|
+
# Special case processing here - if there is only one parameter label then make sure
|
417
|
+
# we append all of the remaining contents of the metric with "." separators to ensure
|
418
|
+
# we get full nodenames (e.g. for Migration to node operations)
|
419
|
+
if metric[:param_labels].length == 1
|
420
|
+
metric[:labels][metric[:param_labels].first] = sublabels.join('.')
|
421
|
+
else
|
422
|
+
metric[:param_labels].reverse_each do |param_label|
|
423
|
+
metric[:labels][param_label] = sublabels.pop(1).first
|
424
|
+
end
|
425
|
+
end
|
426
|
+
end
|
427
|
+
metric
|
428
|
+
end
|
429
|
+
|
430
|
+
# Helper to get lab metrics.
|
431
|
+
def get(label)
|
432
|
+
metric = find_metric(label)
|
433
|
+
[metric, @prometheus.get(metric[:metric_name])]
|
434
|
+
end
|
435
|
+
|
436
|
+
# Note - Catch and log metrics failures so they can be noted, but don't interrupt vmpooler operation.
|
437
|
+
def increment(label)
|
438
|
+
begin
|
439
|
+
counter_metric, c = get(label)
|
440
|
+
c.increment(labels: counter_metric[:labels])
|
441
|
+
rescue StandardError => e
|
442
|
+
@logger.log('s', "[!] prometheus error logging metric #{label} increment : #{e}")
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
def gauge(label, value)
|
447
|
+
begin
|
448
|
+
unless value.nil?
|
449
|
+
gauge_metric, g = get(label)
|
450
|
+
g.set(value.to_i, labels: gauge_metric[:labels])
|
451
|
+
end
|
452
|
+
rescue StandardError => e
|
453
|
+
@logger.log('s', "[!] prometheus error logging gauge #{label}, value #{value}: #{e}")
|
454
|
+
end
|
455
|
+
end
|
456
|
+
|
457
|
+
def timing(label, duration)
|
458
|
+
begin
|
459
|
+
# https://prometheus.io/docs/practices/histograms/
|
460
|
+
unless duration.nil?
|
461
|
+
histogram_metric, hm = get(label)
|
462
|
+
hm.observe(duration.to_f, labels: histogram_metric[:labels])
|
463
|
+
end
|
464
|
+
rescue StandardError => e
|
465
|
+
@logger.log('s', "[!] prometheus error logging timing event label #{label}, duration #{duration}: #{e}")
|
466
|
+
end
|
467
|
+
end
|
468
|
+
end
|
469
|
+
end
|
470
|
+
end
|