vmpooler 3.7.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/vmpooler/adaptive_timeout.rb +130 -0
- data/lib/vmpooler/api/helpers.rb +31 -10
- data/lib/vmpooler/api/input_validator.rb +159 -0
- data/lib/vmpooler/api/rate_limiter.rb +116 -0
- data/lib/vmpooler/api/v3.rb +133 -16
- data/lib/vmpooler/circuit_breaker.rb +189 -0
- data/lib/vmpooler/generic_connection_pool.rb +28 -0
- data/lib/vmpooler/metrics/promstats.rb +60 -0
- data/lib/vmpooler/pool_manager.rb +850 -11
- data/lib/vmpooler/providers/base.rb +89 -0
- data/lib/vmpooler/version.rb +1 -1
- metadata +7 -7
data/lib/vmpooler/api/v3.rb
CHANGED
|
@@ -9,6 +9,20 @@ module Vmpooler
|
|
|
9
9
|
api_version = '3'
|
|
10
10
|
api_prefix = "/api/v#{api_version}"
|
|
11
11
|
|
|
12
|
+
# Simple in-memory cache for status endpoint
|
|
13
|
+
# rubocop:disable Style/ClassVars
|
|
14
|
+
@@status_cache = {}
|
|
15
|
+
@@status_cache_mutex = Mutex.new
|
|
16
|
+
# rubocop:enable Style/ClassVars
|
|
17
|
+
STATUS_CACHE_TTL = 30 # seconds
|
|
18
|
+
|
|
19
|
+
# Clear cache (useful for testing)
|
|
20
|
+
def self.clear_status_cache
|
|
21
|
+
@@status_cache_mutex.synchronize do
|
|
22
|
+
@@status_cache.clear
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
12
26
|
helpers do
|
|
13
27
|
include Vmpooler::API::Helpers
|
|
14
28
|
end
|
|
@@ -464,6 +478,32 @@ module Vmpooler
|
|
|
464
478
|
end
|
|
465
479
|
end
|
|
466
480
|
|
|
481
|
+
# Cache helper methods for status endpoint
|
|
482
|
+
def get_cached_status(cache_key)
|
|
483
|
+
@@status_cache_mutex.synchronize do
|
|
484
|
+
cached = @@status_cache[cache_key]
|
|
485
|
+
if cached && (Time.now - cached[:timestamp]) < STATUS_CACHE_TTL
|
|
486
|
+
return cached[:data]
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
nil
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
def set_cached_status(cache_key, data)
|
|
494
|
+
@@status_cache_mutex.synchronize do
|
|
495
|
+
@@status_cache[cache_key] = {
|
|
496
|
+
data: data,
|
|
497
|
+
timestamp: Time.now
|
|
498
|
+
}
|
|
499
|
+
# Cleanup old cache entries (keep only last 10 unique view combinations)
|
|
500
|
+
if @@status_cache.size > 10
|
|
501
|
+
oldest = @@status_cache.min_by { |_k, v| v[:timestamp] }
|
|
502
|
+
@@status_cache.delete(oldest[0])
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
end
|
|
506
|
+
|
|
467
507
|
def sync_pool_templates
|
|
468
508
|
tracer.in_span("Vmpooler::API::V3.#{__method__}") do
|
|
469
509
|
pool_index = pool_index(pools)
|
|
@@ -646,6 +686,13 @@ module Vmpooler
|
|
|
646
686
|
get "#{api_prefix}/status/?" do
|
|
647
687
|
content_type :json
|
|
648
688
|
|
|
689
|
+
# Create cache key based on view parameters
|
|
690
|
+
cache_key = params[:view] ? "status_#{params[:view]}" : "status_all"
|
|
691
|
+
|
|
692
|
+
# Try to get cached response
|
|
693
|
+
cached_response = get_cached_status(cache_key)
|
|
694
|
+
return cached_response if cached_response
|
|
695
|
+
|
|
649
696
|
if params[:view]
|
|
650
697
|
views = params[:view].split(",")
|
|
651
698
|
end
|
|
@@ -706,7 +753,12 @@ module Vmpooler
|
|
|
706
753
|
|
|
707
754
|
result[:status][:uptime] = (Time.now - Vmpooler::API.settings.config[:uptime]).round(1) if Vmpooler::API.settings.config[:uptime]
|
|
708
755
|
|
|
709
|
-
JSON.pretty_generate(Hash[result.sort_by { |k, _v| k }])
|
|
756
|
+
response = JSON.pretty_generate(Hash[result.sort_by { |k, _v| k }])
|
|
757
|
+
|
|
758
|
+
# Cache the response
|
|
759
|
+
set_cached_status(cache_key, response)
|
|
760
|
+
|
|
761
|
+
response
|
|
710
762
|
end
|
|
711
763
|
|
|
712
764
|
# request statistics for specific pools by passing parameter 'pool'
|
|
@@ -1085,9 +1137,29 @@ module Vmpooler
|
|
|
1085
1137
|
result = { 'ok' => false }
|
|
1086
1138
|
metrics.increment('http_requests_vm_total.post.vm.checkout')
|
|
1087
1139
|
|
|
1088
|
-
|
|
1140
|
+
# Validate and sanitize JSON body
|
|
1141
|
+
payload = sanitize_json_body(request.body.read)
|
|
1142
|
+
if validation_error?(payload)
|
|
1143
|
+
status 400
|
|
1144
|
+
return JSON.pretty_generate(payload)
|
|
1145
|
+
end
|
|
1089
1146
|
|
|
1090
|
-
|
|
1147
|
+
# Validate each template and count
|
|
1148
|
+
payload.each do |template, count|
|
|
1149
|
+
validation = validate_pool_name(template)
|
|
1150
|
+
if validation_error?(validation)
|
|
1151
|
+
status 400
|
|
1152
|
+
return JSON.pretty_generate(validation)
|
|
1153
|
+
end
|
|
1154
|
+
|
|
1155
|
+
validated_count = validate_vm_count(count)
|
|
1156
|
+
if validation_error?(validated_count)
|
|
1157
|
+
status 400
|
|
1158
|
+
return JSON.pretty_generate(validated_count)
|
|
1159
|
+
end
|
|
1160
|
+
end
|
|
1161
|
+
|
|
1162
|
+
if payload && !payload.empty?
|
|
1091
1163
|
invalid = invalid_templates(payload)
|
|
1092
1164
|
if invalid.empty?
|
|
1093
1165
|
result = atomically_allocate_vms(payload)
|
|
@@ -1206,6 +1278,7 @@ module Vmpooler
|
|
|
1206
1278
|
result = { 'ok' => false }
|
|
1207
1279
|
metrics.increment('http_requests_vm_total.get.vm.template')
|
|
1208
1280
|
|
|
1281
|
+
# Template can contain multiple pools separated by +, so validate after parsing
|
|
1209
1282
|
payload = extract_templates_from_query_params(params[:template])
|
|
1210
1283
|
|
|
1211
1284
|
if payload
|
|
@@ -1235,6 +1308,13 @@ module Vmpooler
|
|
|
1235
1308
|
status 404
|
|
1236
1309
|
result['ok'] = false
|
|
1237
1310
|
|
|
1311
|
+
# Validate hostname
|
|
1312
|
+
validation = validate_hostname(params[:hostname])
|
|
1313
|
+
if validation_error?(validation)
|
|
1314
|
+
status 400
|
|
1315
|
+
return JSON.pretty_generate(validation)
|
|
1316
|
+
end
|
|
1317
|
+
|
|
1238
1318
|
params[:hostname] = hostname_shorten(params[:hostname])
|
|
1239
1319
|
|
|
1240
1320
|
rdata = backend.hgetall("vmpooler__vm__#{params[:hostname]}")
|
|
@@ -1373,6 +1453,13 @@ module Vmpooler
|
|
|
1373
1453
|
status 404
|
|
1374
1454
|
result['ok'] = false
|
|
1375
1455
|
|
|
1456
|
+
# Validate hostname
|
|
1457
|
+
validation = validate_hostname(params[:hostname])
|
|
1458
|
+
if validation_error?(validation)
|
|
1459
|
+
status 400
|
|
1460
|
+
return JSON.pretty_generate(validation)
|
|
1461
|
+
end
|
|
1462
|
+
|
|
1376
1463
|
params[:hostname] = hostname_shorten(params[:hostname])
|
|
1377
1464
|
|
|
1378
1465
|
rdata = backend.hgetall("vmpooler__vm__#{params[:hostname]}")
|
|
@@ -1403,16 +1490,21 @@ module Vmpooler
|
|
|
1403
1490
|
|
|
1404
1491
|
failure = []
|
|
1405
1492
|
|
|
1493
|
+
# Validate hostname
|
|
1494
|
+
validation = validate_hostname(params[:hostname])
|
|
1495
|
+
if validation_error?(validation)
|
|
1496
|
+
status 400
|
|
1497
|
+
return JSON.pretty_generate(validation)
|
|
1498
|
+
end
|
|
1499
|
+
|
|
1406
1500
|
params[:hostname] = hostname_shorten(params[:hostname])
|
|
1407
1501
|
|
|
1408
1502
|
if backend.exists?("vmpooler__vm__#{params[:hostname]}")
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
span.status = OpenTelemetry::Trace::Status.error(e.to_s)
|
|
1415
|
-
halt 400, JSON.pretty_generate(result)
|
|
1503
|
+
# Validate and sanitize JSON body
|
|
1504
|
+
jdata = sanitize_json_body(request.body.read)
|
|
1505
|
+
if validation_error?(jdata)
|
|
1506
|
+
status 400
|
|
1507
|
+
return JSON.pretty_generate(jdata)
|
|
1416
1508
|
end
|
|
1417
1509
|
|
|
1418
1510
|
# Validate data payload
|
|
@@ -1421,6 +1513,13 @@ module Vmpooler
|
|
|
1421
1513
|
when 'lifetime'
|
|
1422
1514
|
need_token! if Vmpooler::API.settings.config[:auth]
|
|
1423
1515
|
|
|
1516
|
+
# Validate lifetime is a positive integer
|
|
1517
|
+
lifetime_int = arg.to_i
|
|
1518
|
+
if lifetime_int <= 0
|
|
1519
|
+
failure.push("Lifetime must be a positive integer (got #{arg})")
|
|
1520
|
+
next
|
|
1521
|
+
end
|
|
1522
|
+
|
|
1424
1523
|
# in hours, defaults to one week
|
|
1425
1524
|
max_lifetime_upper_limit = config['max_lifetime_upper_limit']
|
|
1426
1525
|
if max_lifetime_upper_limit
|
|
@@ -1430,13 +1529,17 @@ module Vmpooler
|
|
|
1430
1529
|
end
|
|
1431
1530
|
end
|
|
1432
1531
|
|
|
1433
|
-
# validate lifetime is within boundaries
|
|
1434
|
-
unless arg.to_i > 0
|
|
1435
|
-
failure.push("You provided a lifetime (#{arg}) but you must provide a positive number.")
|
|
1436
|
-
end
|
|
1437
|
-
|
|
1438
1532
|
when 'tags'
|
|
1439
1533
|
failure.push("You provided tags (#{arg}) as something other than a hash.") unless arg.is_a?(Hash)
|
|
1534
|
+
|
|
1535
|
+
# Validate each tag key and value
|
|
1536
|
+
arg.each do |key, value|
|
|
1537
|
+
tag_validation = validate_tag(key, value)
|
|
1538
|
+
if validation_error?(tag_validation)
|
|
1539
|
+
failure.push(tag_validation['error'])
|
|
1540
|
+
end
|
|
1541
|
+
end
|
|
1542
|
+
|
|
1440
1543
|
failure.push("You provided unsuppored tags (#{arg}).") if config['allowed_tags'] && !(arg.keys - config['allowed_tags']).empty?
|
|
1441
1544
|
else
|
|
1442
1545
|
failure.push("Unknown argument #{arg}.")
|
|
@@ -1478,9 +1581,23 @@ module Vmpooler
|
|
|
1478
1581
|
status 404
|
|
1479
1582
|
result = { 'ok' => false }
|
|
1480
1583
|
|
|
1584
|
+
# Validate hostname
|
|
1585
|
+
validation = validate_hostname(params[:hostname])
|
|
1586
|
+
if validation_error?(validation)
|
|
1587
|
+
status 400
|
|
1588
|
+
return JSON.pretty_generate(validation)
|
|
1589
|
+
end
|
|
1590
|
+
|
|
1591
|
+
# Validate disk size
|
|
1592
|
+
validated_size = validate_disk_size(params[:size])
|
|
1593
|
+
if validation_error?(validated_size)
|
|
1594
|
+
status 400
|
|
1595
|
+
return JSON.pretty_generate(validated_size)
|
|
1596
|
+
end
|
|
1597
|
+
|
|
1481
1598
|
params[:hostname] = hostname_shorten(params[:hostname])
|
|
1482
1599
|
|
|
1483
|
-
if
|
|
1600
|
+
if backend.exists?("vmpooler__vm__#{params[:hostname]}")
|
|
1484
1601
|
result[params[:hostname]] = {}
|
|
1485
1602
|
result[params[:hostname]]['disk'] = "+#{params[:size]}gb"
|
|
1486
1603
|
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Vmpooler
|
|
4
|
+
# Circuit breaker pattern implementation to prevent cascading failures
|
|
5
|
+
# when a provider becomes unresponsive or experiences repeated failures.
|
|
6
|
+
#
|
|
7
|
+
# States:
|
|
8
|
+
# - CLOSED: Normal operation, requests flow through
|
|
9
|
+
# - OPEN: Provider is failing, reject requests immediately (fail fast)
|
|
10
|
+
# - HALF_OPEN: Testing if provider has recovered with limited requests
|
|
11
|
+
class CircuitBreaker
|
|
12
|
+
STATES = %i[closed open half_open].freeze
|
|
13
|
+
|
|
14
|
+
class CircuitOpenError < StandardError; end
|
|
15
|
+
|
|
16
|
+
attr_reader :state, :failure_count, :success_count
|
|
17
|
+
|
|
18
|
+
# Initialize a new circuit breaker
|
|
19
|
+
#
|
|
20
|
+
# @param name [String] Name for logging/metrics (e.g., "vsphere_provider")
|
|
21
|
+
# @param logger [Object] Logger instance
|
|
22
|
+
# @param metrics [Object] Metrics instance
|
|
23
|
+
# @param failure_threshold [Integer] Number of failures before opening circuit
|
|
24
|
+
# @param timeout [Integer] Seconds to wait in open state before testing (half-open)
|
|
25
|
+
# @param half_open_attempts [Integer] Number of successful test requests needed to close
|
|
26
|
+
def initialize(name:, logger:, metrics:, failure_threshold: 5, timeout: 30, half_open_attempts: 3)
|
|
27
|
+
@name = name
|
|
28
|
+
@logger = logger
|
|
29
|
+
@metrics = metrics
|
|
30
|
+
@failure_threshold = failure_threshold
|
|
31
|
+
@timeout = timeout
|
|
32
|
+
@half_open_attempts = half_open_attempts
|
|
33
|
+
|
|
34
|
+
@state = :closed
|
|
35
|
+
@failure_count = 0
|
|
36
|
+
@success_count = 0
|
|
37
|
+
@last_failure_time = nil
|
|
38
|
+
@mutex = Mutex.new
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Execute a block with circuit breaker protection
|
|
42
|
+
#
|
|
43
|
+
# @yield Block to execute if circuit allows
|
|
44
|
+
# @return Result of the block
|
|
45
|
+
# @raise CircuitOpenError if circuit is open and timeout hasn't elapsed
|
|
46
|
+
def call
|
|
47
|
+
check_state
|
|
48
|
+
|
|
49
|
+
begin
|
|
50
|
+
result = yield
|
|
51
|
+
on_success
|
|
52
|
+
result
|
|
53
|
+
rescue StandardError => e
|
|
54
|
+
on_failure(e)
|
|
55
|
+
raise
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Check if circuit allows requests
|
|
60
|
+
# @return [Boolean] true if circuit is closed or half-open
|
|
61
|
+
def allow_request?
|
|
62
|
+
@mutex.synchronize do
|
|
63
|
+
case @state
|
|
64
|
+
when :closed
|
|
65
|
+
true
|
|
66
|
+
when :half_open
|
|
67
|
+
true
|
|
68
|
+
when :open
|
|
69
|
+
if should_attempt_reset?
|
|
70
|
+
true
|
|
71
|
+
else
|
|
72
|
+
false
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Get current circuit breaker status
|
|
79
|
+
# @return [Hash] Status information
|
|
80
|
+
def status
|
|
81
|
+
@mutex.synchronize do
|
|
82
|
+
{
|
|
83
|
+
name: @name,
|
|
84
|
+
state: @state,
|
|
85
|
+
failure_count: @failure_count,
|
|
86
|
+
success_count: @success_count,
|
|
87
|
+
last_failure_time: @last_failure_time,
|
|
88
|
+
next_retry_time: next_retry_time
|
|
89
|
+
}
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
def check_state
|
|
96
|
+
@mutex.synchronize do
|
|
97
|
+
case @state
|
|
98
|
+
when :open
|
|
99
|
+
if should_attempt_reset?
|
|
100
|
+
transition_to_half_open
|
|
101
|
+
else
|
|
102
|
+
time_remaining = (@timeout - (Time.now - @last_failure_time)).round(1)
|
|
103
|
+
raise CircuitOpenError, "Circuit breaker '#{@name}' is open (#{@failure_count} failures, retry in #{time_remaining}s)"
|
|
104
|
+
end
|
|
105
|
+
when :half_open
|
|
106
|
+
# Allow limited requests through for testing
|
|
107
|
+
when :closed
|
|
108
|
+
# Normal operation
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def should_attempt_reset?
|
|
114
|
+
return false unless @last_failure_time
|
|
115
|
+
|
|
116
|
+
Time.now - @last_failure_time >= @timeout
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def next_retry_time
|
|
120
|
+
return nil unless @last_failure_time && @state == :open
|
|
121
|
+
|
|
122
|
+
@last_failure_time + @timeout
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def on_success
|
|
126
|
+
@mutex.synchronize do
|
|
127
|
+
case @state
|
|
128
|
+
when :closed
|
|
129
|
+
# Reset failure count on success in closed state
|
|
130
|
+
@failure_count = 0 if @failure_count > 0
|
|
131
|
+
when :half_open
|
|
132
|
+
@success_count += 1
|
|
133
|
+
@failure_count = 0
|
|
134
|
+
@logger.log('d', "[+] [circuit_breaker] '#{@name}' successful test request (#{@success_count}/#{@half_open_attempts})")
|
|
135
|
+
|
|
136
|
+
transition_to_closed if @success_count >= @half_open_attempts
|
|
137
|
+
when :open
|
|
138
|
+
# Should not happen, but reset if we somehow get a success
|
|
139
|
+
transition_to_closed
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def on_failure(error)
|
|
145
|
+
@mutex.synchronize do
|
|
146
|
+
@failure_count += 1
|
|
147
|
+
@last_failure_time = Time.now
|
|
148
|
+
|
|
149
|
+
case @state
|
|
150
|
+
when :closed
|
|
151
|
+
@logger.log('d', "[!] [circuit_breaker] '#{@name}' failure #{@failure_count}/#{@failure_threshold}: #{error.class}")
|
|
152
|
+
transition_to_open if @failure_count >= @failure_threshold
|
|
153
|
+
when :half_open
|
|
154
|
+
@logger.log('d', "[!] [circuit_breaker] '#{@name}' failed during half-open test")
|
|
155
|
+
transition_to_open
|
|
156
|
+
when :open
|
|
157
|
+
# Already open, just log
|
|
158
|
+
@logger.log('d', "[!] [circuit_breaker] '#{@name}' additional failure while open")
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def transition_to_open
|
|
164
|
+
@state = :open
|
|
165
|
+
@success_count = 0
|
|
166
|
+
@logger.log('s', "[!] [circuit_breaker] '#{@name}' OPENED after #{@failure_count} failures (will retry in #{@timeout}s)")
|
|
167
|
+
@metrics.increment("circuit_breaker.opened.#{@name}")
|
|
168
|
+
@metrics.gauge("circuit_breaker.state.#{@name}", 1) # 1 = open
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def transition_to_half_open
|
|
172
|
+
@state = :half_open
|
|
173
|
+
@success_count = 0
|
|
174
|
+
@failure_count = 0
|
|
175
|
+
@logger.log('s', "[*] [circuit_breaker] '#{@name}' HALF-OPEN, testing provider health")
|
|
176
|
+
@metrics.increment("circuit_breaker.half_open.#{@name}")
|
|
177
|
+
@metrics.gauge("circuit_breaker.state.#{@name}", 0.5) # 0.5 = half-open
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def transition_to_closed
|
|
181
|
+
@state = :closed
|
|
182
|
+
@failure_count = 0
|
|
183
|
+
@success_count = 0
|
|
184
|
+
@logger.log('s', "[+] [circuit_breaker] '#{@name}' CLOSED, provider recovered")
|
|
185
|
+
@metrics.increment("circuit_breaker.closed.#{@name}")
|
|
186
|
+
@metrics.gauge("circuit_breaker.state.#{@name}", 0) # 0 = closed
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
@@ -34,6 +34,34 @@ module Vmpooler
|
|
|
34
34
|
end
|
|
35
35
|
end
|
|
36
36
|
end
|
|
37
|
+
|
|
38
|
+
# Get connection pool health status
|
|
39
|
+
# @return [Hash] Health status including utilization and queue depth
|
|
40
|
+
def health_status
|
|
41
|
+
{
|
|
42
|
+
size: @size,
|
|
43
|
+
available: @available.length,
|
|
44
|
+
in_use: @size - @available.length,
|
|
45
|
+
utilization: ((@size - @available.length).to_f / @size * 100).round(2),
|
|
46
|
+
waiting_threads: (@queue.respond_to?(:length) ? @queue.length : 0),
|
|
47
|
+
state: determine_health_state
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def determine_health_state
|
|
54
|
+
utilization = ((@size - @available.length).to_f / @size * 100)
|
|
55
|
+
waiting = @queue.respond_to?(:length) ? @queue.length : 0
|
|
56
|
+
|
|
57
|
+
if utilization >= 90 || waiting > 5
|
|
58
|
+
:critical # Pool exhausted or many waiting threads
|
|
59
|
+
elsif utilization >= 70 || waiting > 2
|
|
60
|
+
:warning # Pool under stress
|
|
61
|
+
else
|
|
62
|
+
:healthy # Normal operation
|
|
63
|
+
end
|
|
64
|
+
end
|
|
37
65
|
end
|
|
38
66
|
end
|
|
39
67
|
end
|
|
@@ -329,6 +329,66 @@ module Vmpooler
|
|
|
329
329
|
buckets: REDIS_CONNECT_BUCKETS,
|
|
330
330
|
docstring: 'vmpooler redis connection wait time',
|
|
331
331
|
param_labels: %i[type provider]
|
|
332
|
+
},
|
|
333
|
+
vmpooler_health: {
|
|
334
|
+
mtype: M_GAUGE,
|
|
335
|
+
torun: %i[manager],
|
|
336
|
+
docstring: 'vmpooler health check metrics',
|
|
337
|
+
param_labels: %i[metric_path]
|
|
338
|
+
},
|
|
339
|
+
vmpooler_purge: {
|
|
340
|
+
mtype: M_GAUGE,
|
|
341
|
+
torun: %i[manager],
|
|
342
|
+
docstring: 'vmpooler purge metrics',
|
|
343
|
+
param_labels: %i[metric_path]
|
|
344
|
+
},
|
|
345
|
+
vmpooler_destroy: {
|
|
346
|
+
mtype: M_GAUGE,
|
|
347
|
+
torun: %i[manager],
|
|
348
|
+
docstring: 'vmpooler destroy metrics',
|
|
349
|
+
param_labels: %i[poolname]
|
|
350
|
+
},
|
|
351
|
+
vmpooler_clone: {
|
|
352
|
+
mtype: M_GAUGE,
|
|
353
|
+
torun: %i[manager],
|
|
354
|
+
docstring: 'vmpooler clone metrics',
|
|
355
|
+
param_labels: %i[poolname]
|
|
356
|
+
},
|
|
357
|
+
circuit_breaker: {
|
|
358
|
+
mtype: M_GAUGE,
|
|
359
|
+
torun: %i[manager],
|
|
360
|
+
docstring: 'Circuit breaker state and failure tracking',
|
|
361
|
+
param_labels: %i[metric_path]
|
|
362
|
+
},
|
|
363
|
+
connection_pool: {
|
|
364
|
+
mtype: M_GAUGE,
|
|
365
|
+
torun: %i[manager],
|
|
366
|
+
docstring: 'Connection pool health metrics',
|
|
367
|
+
param_labels: %i[metric_path]
|
|
368
|
+
},
|
|
369
|
+
adaptive_timeout: {
|
|
370
|
+
mtype: M_GAUGE,
|
|
371
|
+
torun: %i[manager],
|
|
372
|
+
docstring: 'Adaptive timeout statistics',
|
|
373
|
+
param_labels: %i[metric_path]
|
|
374
|
+
},
|
|
375
|
+
vmpooler_performance: {
|
|
376
|
+
mtype: M_GAUGE,
|
|
377
|
+
torun: %i[manager],
|
|
378
|
+
docstring: 'vmpooler performance metrics for pool operations',
|
|
379
|
+
param_labels: %i[metric_path]
|
|
380
|
+
},
|
|
381
|
+
vmpooler_dlq: {
|
|
382
|
+
mtype: M_COUNTER,
|
|
383
|
+
torun: %i[manager],
|
|
384
|
+
docstring: 'vmpooler dead letter queue metrics',
|
|
385
|
+
param_labels: %i[metric_path]
|
|
386
|
+
},
|
|
387
|
+
vmpooler_errors: {
|
|
388
|
+
mtype: M_COUNTER,
|
|
389
|
+
torun: %i[manager],
|
|
390
|
+
docstring: 'vmpooler error counters including permanent failures',
|
|
391
|
+
param_labels: %i[metric_path]
|
|
332
392
|
}
|
|
333
393
|
}
|
|
334
394
|
end
|