kubernetes_health_checker 0.0.0.16 → 0.0.0.20
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/kubernetes_health_checker +38 -31
- data/bin/kubernetes_health_checkerd +2 -2
- data/bin/mock_output.txt +4 -4
- data/bin/mock_output_two.txt +3 -3
- metadata +10 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e2ec47753f96855648ebd5d399f48104d2d8fff7f5dc1144fa5f80e047839336
|
4
|
+
data.tar.gz: 4ed66fba4f249d0d81026cc683911a70e37120d716b72a11b82659f38be4965a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14680daea6b04d3b5e7beca49306694aa66cb984d55a0d41d4a0c76bac3baa62b96c91867ee4bf3649f7dece8946c3e4023d870b6b8e5b793de56f667adb2989
|
7
|
+
data.tar.gz: cdce024d7e5a2ae4266ea6962a60227bbba056086324a024f86693570462000d13f3b863180abbbc6f6df83a8ae53758cef7b50d287aecc764c8f2952d759e7a
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'thor'
|
5
5
|
require 'json'
|
6
|
+
require 'syslog/logger'
|
6
7
|
|
7
8
|
module KubernetesHealthChecker
|
8
9
|
class Runner < Thor
|
@@ -15,47 +16,51 @@ module KubernetesHealthChecker
|
|
15
16
|
method_option :url, aliases: '-u', type: :string, desc: 'Specify the slack url you would like the alert POSTed to'
|
16
17
|
method_option :test, aliases: '-t', type: :boolean, desc: 'Specify if running the gem in test mode'
|
17
18
|
|
18
|
-
ALERT_STATUSES = [
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
'createcontainerconfigerror',
|
24
|
-
].freeze
|
19
|
+
ALERT_STATUSES = %w[crashloopbackoff
|
20
|
+
error
|
21
|
+
runcontainererror
|
22
|
+
createcontainerconfigerror
|
23
|
+
oomkilled].freeze
|
25
24
|
|
26
25
|
# time is in seconds
|
27
26
|
TIMED_ALERT_STATUSES = {
|
28
27
|
'pending' => 120,
|
29
|
-
'
|
30
|
-
|
28
|
+
'errimagepull' => 120,
|
29
|
+
'containercreating' => 120
|
30
|
+
}.freeze
|
31
31
|
|
32
32
|
RUN_INTERVAL = 10
|
33
33
|
|
34
34
|
def start
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
35
|
+
log = Syslog::Logger.new 'kubernetes_health_checker'
|
36
|
+
|
37
|
+
begin
|
38
|
+
set_defaults(options)
|
39
|
+
run_validations if !@test
|
40
|
+
while true
|
41
|
+
puts 'starting...'
|
42
|
+
output = get_cli_output
|
43
|
+
message = construct_message(output)
|
44
|
+
send_alert(message) if !message.empty?
|
45
|
+
puts 'sleeping...'
|
46
|
+
sleep RUN_INTERVAL
|
47
|
+
end
|
48
|
+
rescue => e
|
49
|
+
log.error(e)
|
44
50
|
end
|
45
51
|
end
|
46
52
|
|
47
53
|
desc 'version', 'Prints version'
|
48
54
|
def version
|
49
|
-
say
|
55
|
+
say 'KubernetesHealthChecker 0.0.0'
|
50
56
|
end
|
51
57
|
|
52
58
|
no_commands do
|
53
59
|
def set_defaults(options)
|
54
|
-
opts = options.inject({}){|memo,(k,v)| memo[k.to_sym] = v; memo}
|
60
|
+
opts = options.inject({}) { |memo, (k, v)| memo[k.to_sym] = v; memo }
|
55
61
|
@alert_threshold = opts[:alert_threshold] || 5
|
56
62
|
@slack_channel = opts[:channel] || '@rohan'
|
57
|
-
|
58
|
-
@namespace = opts[:namespace].split(',') || ['default']
|
63
|
+
@namespace = opts[:namespace]&.split(',') || ['default']
|
59
64
|
@test = opts[:test] || false
|
60
65
|
@url = opts[:url]
|
61
66
|
@pods_data = {}
|
@@ -69,7 +74,7 @@ module KubernetesHealthChecker
|
|
69
74
|
if @test
|
70
75
|
# so hacky, but pulls from second mock output if
|
71
76
|
# it's the second time running to test state changes
|
72
|
-
file_name = @pods_data.empty? ? '../mock_output.txt' : '../
|
77
|
+
file_name = @pods_data.empty? ? '../mock_output.txt' : '../mock_output_two.txt'
|
73
78
|
path = File.expand_path(file_name, __FILE__)
|
74
79
|
output = File.read(path)
|
75
80
|
else
|
@@ -90,7 +95,7 @@ module KubernetesHealthChecker
|
|
90
95
|
channel: @slack_channel,
|
91
96
|
username: 'kubernetes_health_check_bot',
|
92
97
|
text: message,
|
93
|
-
icon_emoji:
|
98
|
+
icon_emoji: ':face_with_thermometer:'
|
94
99
|
}
|
95
100
|
|
96
101
|
`curl -X POST --data-urlencode 'payload=#{JSON.generate(payload)}' #{@url}`
|
@@ -99,7 +104,7 @@ module KubernetesHealthChecker
|
|
99
104
|
def get_pod_message(pod_name:, new_status:, new_restarts:, age:)
|
100
105
|
old_restarts = @pods_data[pod_name][:restarts]
|
101
106
|
old_status = @pods_data[pod_name][:status]
|
102
|
-
text =
|
107
|
+
text = ''
|
103
108
|
|
104
109
|
if !@pods_data[pod_name].empty? && old_restarts == new_restarts && old_status == new_status
|
105
110
|
# we've already alerted from this state, skip
|
@@ -107,10 +112,10 @@ module KubernetesHealthChecker
|
|
107
112
|
text = "Pod *#{pod_name}* is in status: *#{new_status}*.\n"
|
108
113
|
elsif TIMED_ALERT_STATUSES.keys.include?(new_status.downcase)
|
109
114
|
alert_threshold = TIMED_ALERT_STATUSES[new_status.downcase]
|
110
|
-
if age > alert_threshold
|
115
|
+
if age.nil? || age > alert_threshold
|
111
116
|
text = "Pod #{pod_name} has been in status: *#{new_status}* for #{age} seconds :grimacing:.\n"
|
112
117
|
end
|
113
|
-
elsif new_restarts > @alert_threshold
|
118
|
+
elsif !new_restarts.nil? && new_restarts > @alert_threshold
|
114
119
|
text = "Pod *#{pod_name}* has restarted *#{new_restarts}* times and has status: *#{new_status}*.\n"
|
115
120
|
end
|
116
121
|
|
@@ -119,20 +124,22 @@ module KubernetesHealthChecker
|
|
119
124
|
|
120
125
|
def construct_message(output)
|
121
126
|
message = ''
|
122
|
-
new_pod_store = {}
|
123
127
|
|
124
128
|
rows = output.split("\n")
|
125
129
|
rows.each_with_index do |row, index|
|
126
130
|
next if index == 0
|
127
131
|
|
128
|
-
row = row.split(
|
132
|
+
row = row.split(' ')
|
129
133
|
pod_name = row[0]
|
130
134
|
status = row[2]
|
131
135
|
restarts = row[3].to_i
|
132
136
|
age = get_age_in_seconds(row[4])
|
133
137
|
|
134
138
|
@pods_data[pod_name] ||= {}
|
135
|
-
message += get_pod_message(pod_name: pod_name,
|
139
|
+
message += get_pod_message(pod_name: pod_name,
|
140
|
+
new_status: status,
|
141
|
+
new_restarts: restarts,
|
142
|
+
age: age)
|
136
143
|
|
137
144
|
# update our data store for the pods
|
138
145
|
@pods_data[pod_name][:status] = status
|
@@ -163,4 +170,4 @@ module KubernetesHealthChecker
|
|
163
170
|
end
|
164
171
|
end
|
165
172
|
|
166
|
-
KubernetesHealthChecker::Runner.start
|
173
|
+
KubernetesHealthChecker::Runner.start
|
data/bin/mock_output.txt
CHANGED
@@ -6,7 +6,7 @@ affinity-external-api-dep-1624144290-zqsfb 0/2 Evicted 0
|
|
6
6
|
affinity-external-api-dep-3283253972-ttgft 2/2 Running 0 1d
|
7
7
|
automate-affinity-pipeline-rc-l6zb0 1/1 Running 0 1d
|
8
8
|
compute-relationship-strengths-rc-x8mv1 1/1 Running 0 1d
|
9
|
-
convert-html-to-text-consumer-rc-lx0s5 1/1
|
9
|
+
convert-html-to-text-consumer-rc-lx0s5 1/1 Pending 0 1d
|
10
10
|
create-contacts-consumer-rc-h5hj7 1/1 Running 0 1d
|
11
11
|
create-deal-emails-consumer-rc-qd7n4 1/1 Running 10 22h
|
12
12
|
create-smart-alert-notifications-rc-d87p2 1/1 Running 0 1d
|
@@ -19,8 +19,8 @@ ditto2-rc-ffj0s 1/1 Running 3
|
|
19
19
|
ditto3-rc-ntdz9 1/1 Running 1 22h
|
20
20
|
dump-new-persons-for-labeling-rc-p043f 1/1 Running 0 1d
|
21
21
|
extract-introductions-consumer-rc-32d29 1/1 Running 1 1d
|
22
|
-
extract-signatures-consumer-rc-4n0wl 1/1
|
23
|
-
fetch-public-metadata-rc-g94b0 1/1
|
22
|
+
extract-signatures-consumer-rc-4n0wl 1/1 ErrImagePull 0 60s
|
23
|
+
fetch-public-metadata-rc-g94b0 1/1 ErrImagePull 0 130s
|
24
24
|
health-check-rc-wkh2t 1/1 Running 0 22h
|
25
25
|
helper-rc-17pmz 1/1 Running 0 22h
|
26
26
|
import-companies-rc-grkkm 1/1 Running 0 1d
|
@@ -104,4 +104,4 @@ weave-net-h29cn 2/2 R
|
|
104
104
|
weave-net-mqjcw 2/2 Running 4 68d
|
105
105
|
weave-net-r85qh 2/2 Running 0 2h
|
106
106
|
weave-net-wgsml 2/2 Running 12 68d
|
107
|
-
weave-net-zdz7j 2/2 Running 4 19d
|
107
|
+
weave-net-zdz7j 2/2 Running 4 19d
|
data/bin/mock_output_two.txt
CHANGED
@@ -25,7 +25,7 @@ health-check-rc-wkh2t 1/1 Running 0
|
|
25
25
|
helper-rc-17pmz 1/1 Running 0 22h
|
26
26
|
import-companies-rc-grkkm 1/1 Running 0 1d
|
27
27
|
import-company-metadata-rc-n094g 1/1 Running 0 22h
|
28
|
-
import-crunchbase-metadata-rc-94433 1/1
|
28
|
+
import-crunchbase-metadata-rc-94433 1/1 OOMKilled 0 1d
|
29
29
|
import-person-metadata-rc-wkvrg 1/1 Running 0 1d
|
30
30
|
match-email-to-persons-consumer-rc-l6cgr 1/1 Running 0 22h
|
31
31
|
match-event-to-persons-consumer-rc-w8jn1 1/1 Running 2 1d
|
@@ -54,7 +54,7 @@ send-inactive-user-emails-rc-92pf0 1/1 CrashLoopBackO
|
|
54
54
|
send-weekly-summary-email-rc-vmr8s 1/1 Running 0 1d
|
55
55
|
shoryuken-ews-syncer-rc-rr512 1/1 Running 1 1d
|
56
56
|
shoryuken-ews-syncer-rc-wq76w 1/1 Running 1 1d
|
57
|
-
shoryuken-insert-emails-rc-65ct6 1/1
|
57
|
+
shoryuken-insert-emails-rc-65ct6 1/1 Pending 2 1d
|
58
58
|
sidekiq-rc-kz4tc 1/1 Running 0 1d
|
59
59
|
sidekiq-rc-wl7pm 1/1 Running 0 1d
|
60
60
|
sidekiq-web-dep-1155096845-kfs0h 2/2 Running 0 1d
|
@@ -67,4 +67,4 @@ update-dropped-email-alerts-consumer-rc-n3069 1/1 Running 1
|
|
67
67
|
update-email-count-rc-s29lk 1/1 Running 0 1d
|
68
68
|
update-primary-emails-rc-6sd8b 1/1 Running 0 1d
|
69
69
|
update-sources-consumer-rc-zrf2f 1/1 Running 0 22h
|
70
|
-
vacuum-analyze-db-rc-jr14r 1/1 Running 0 1d
|
70
|
+
vacuum-analyze-db-rc-jr14r 1/1 Running 0 1d
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kubernetes_health_checker
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.0.
|
4
|
+
version: 0.0.0.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rohan Sahai
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: daemons
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
@@ -25,7 +25,7 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: thor
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -65,11 +65,11 @@ files:
|
|
65
65
|
- bin/kubernetes_health_checkerd
|
66
66
|
- bin/mock_output.txt
|
67
67
|
- bin/mock_output_two.txt
|
68
|
-
homepage:
|
68
|
+
homepage: https://rubygems.org/gems/kubernetes_health_checker
|
69
69
|
licenses:
|
70
70
|
- MIT
|
71
71
|
metadata: {}
|
72
|
-
post_install_message:
|
72
|
+
post_install_message:
|
73
73
|
rdoc_options: []
|
74
74
|
require_paths:
|
75
75
|
- lib
|
@@ -77,16 +77,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
77
77
|
requirements:
|
78
78
|
- - ">="
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version: '
|
80
|
+
version: '2.4'
|
81
81
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
82
|
requirements:
|
83
83
|
- - ">="
|
84
84
|
- !ruby/object:Gem::Version
|
85
85
|
version: '0'
|
86
86
|
requirements: []
|
87
|
-
|
88
|
-
|
89
|
-
signing_key:
|
87
|
+
rubygems_version: 3.2.3
|
88
|
+
signing_key:
|
90
89
|
specification_version: 4
|
91
90
|
summary: Get notifications for unhealthy kubernetes pods
|
92
91
|
test_files: []
|