sensu-plugins-edgelab 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/check-nomad-jobs.rb +59 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c31c6c21f0a5babe6c6b5dd37c1cee1fb8e50dbf
|
4
|
+
data.tar.gz: 115871cd9dcdcc7a08c699b7c0b603cc0ceca937
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 18f0ff0025f8ef24f349550021492d4d7cb9ec71cf7ed99fc1c3fdb0f31d4aafb6350c291cff405fdc3cf9cf9dbeeaa19f848c3fb3603b4f02443aaf8c582678
|
7
|
+
data.tar.gz: 432ffee07129e3a7f6bbfb44dfb901942b1439459427ebeac15c3666916ab08126a94af27e615b9f2f5bbbd351bf2bf7d26084c7a5a4773c0c59546a670d91b3
|
data/bin/check-nomad-jobs.rb
CHANGED
@@ -38,6 +38,62 @@ class CheckNomadAllocations < Sensu::Plugin::Check::CLI
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
+
# Returning an array containing human readable explanation for placement failures
|
42
|
+
def placement_failures_reasons(failed_eval)
|
43
|
+
reasons = []
|
44
|
+
failed_eval['FailedTGAllocs'].each do |_, metrics|
|
45
|
+
metrics.fetch(:ClassFiltered, []).each do |class_, count|
|
46
|
+
reasons << "Class #{class_} filtered #{count} nodes"
|
47
|
+
end
|
48
|
+
|
49
|
+
metrics.fetch(:ConstraintFiltered, []).each do |constraint, count|
|
50
|
+
reasons << "Constraint #{constraint} filtered #{count} nodes"
|
51
|
+
end
|
52
|
+
|
53
|
+
if metrics['NodesExhausted'] > 0
|
54
|
+
reasons << "Resources exhausted on #{metrics['NodesExhausted']} nodes"
|
55
|
+
end
|
56
|
+
|
57
|
+
metrics.fetch(:ClassExhausted, []).each do |class_, count|
|
58
|
+
reasons << "Class #{class_} exhausted on #{count} nodes"
|
59
|
+
end
|
60
|
+
|
61
|
+
metrics.fetch('DimensionExhausted', []).each do |dimension, count|
|
62
|
+
reasons << "#{dimension} on #{count} nodes"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
reasons
|
67
|
+
end
|
68
|
+
|
69
|
+
# Check that there is no failed evaluations
|
70
|
+
def check_evaluations(job, failed)
|
71
|
+
evaluations = api_call "/v1/job/#{job['ID']}/evaluations"
|
72
|
+
|
73
|
+
blocked = false
|
74
|
+
last_failed = nil
|
75
|
+
|
76
|
+
evaluations.each do |evaluation|
|
77
|
+
if evaluation['Status'] == 'blocked'
|
78
|
+
blocked = true
|
79
|
+
end
|
80
|
+
|
81
|
+
next if evaluation['FailedTGAllocs'].nil?
|
82
|
+
|
83
|
+
if last_failed.nil? || last_failed['CreateIndex'] < evaluation['CreateIndex']
|
84
|
+
last_failed = evaluation
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
if blocked && !last_failed.nil?
|
89
|
+
failure_reasons = placement_failures_reasons last_failed
|
90
|
+
|
91
|
+
if failure_reasons.any?
|
92
|
+
failed << "#{job['ID']}: Placemement failure [" + failure_reasons.join(' / ') + ']'
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
41
97
|
# Check that allocations are in the desired status
|
42
98
|
def check_allocations(job, failed)
|
43
99
|
allocations = api_call "/v1/job/#{job['ID']}/allocations"
|
@@ -45,11 +101,11 @@ class CheckNomadAllocations < Sensu::Plugin::Check::CLI
|
|
45
101
|
allocations.each do |alloc|
|
46
102
|
if alloc['DesiredStatus'] == 'run'
|
47
103
|
# Batch stay in run DesiredStatus even if task completed successfully.
|
48
|
-
next if job['Type'] == 'batch'
|
104
|
+
next if job['Type'] == 'batch' && alloc['ClientStatus'] == 'complete'
|
49
105
|
|
50
106
|
alloc['TaskStates'].each do |_, state|
|
51
107
|
if state['State'] == 'dead'
|
52
|
-
|
108
|
+
failed << "Alloc #{alloc['Name']} is dead but desired status is 'run'"
|
53
109
|
|
54
110
|
# Check that pending alloc are not too old
|
55
111
|
elsif state['State'] == 'pending'
|
@@ -63,7 +119,6 @@ class CheckNomadAllocations < Sensu::Plugin::Check::CLI
|
|
63
119
|
# No need to check other task in the same task group.
|
64
120
|
break
|
65
121
|
end
|
66
|
-
|
67
122
|
end
|
68
123
|
end
|
69
124
|
end
|
@@ -79,6 +134,7 @@ class CheckNomadAllocations < Sensu::Plugin::Check::CLI
|
|
79
134
|
failed = []
|
80
135
|
|
81
136
|
jobs.each do |job|
|
137
|
+
check_evaluations job, failed
|
82
138
|
check_allocations job, failed
|
83
139
|
end
|
84
140
|
|