deadman_check 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Dockerfile +29 -0
- data/README.md +160 -15
- data/Rakefile +15 -0
- data/bin/deadman-check +14 -9
- data/deadman_check.gemspec +2 -1
- data/lib/deadman_check/version.rb +1 -1
- data/lib/deadman_check_switch.rb +22 -13
- metadata +26 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d86166c15a2c87baa6772b460cab729a6a7c341
|
4
|
+
data.tar.gz: 14e8ad5b0a9d9742f75aefe8eb01facadcd3d402
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4aae7f90a0915a393a7aa8e6113af0862debe39a8498a1587869071636050f03b6eb0f09d5de2f287178f14547a1aa4efd153f90a77b0304b3fe6658f9140968
|
7
|
+
data.tar.gz: cff5f26ced3b63b1a2b4013953d0628d48e22346d051ffd9d0b5f214664fe63be6b5f39e7f0c8a98b017bc3dc601f29b4a905f50198e3bb2813a4244069233b8
|
data/.gitignore
CHANGED
data/Dockerfile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
FROM ruby:2.4
|
2
|
+
MAINTAINER Zane Williamson <zane.williamson@gmail.com>
|
3
|
+
|
4
|
+
# Install apt packages
|
5
|
+
ARG DEBIAN_FRONTEND=noninteractive
|
6
|
+
RUN apt-get update -qq && \
|
7
|
+
apt-get install -y -qq \
|
8
|
+
less \
|
9
|
+
locales && \
|
10
|
+
apt-get clean && \
|
11
|
+
rm -rf /var/lib/apt/lists/*
|
12
|
+
|
13
|
+
# Configure locale
|
14
|
+
ARG LOCALE="C.UTF-8"
|
15
|
+
RUN locale-gen "$LOCALE" && \
|
16
|
+
dpkg-reconfigure locales
|
17
|
+
ENV LANG="$LOCALE" LC_ALL="$LOCALE"
|
18
|
+
|
19
|
+
ADD . /app/
|
20
|
+
ADD lib /app/lib
|
21
|
+
|
22
|
+
VOLUME /app
|
23
|
+
WORKDIR /app
|
24
|
+
|
25
|
+
RUN gem install bundler && \
|
26
|
+
bundle install && \
|
27
|
+
rake install
|
28
|
+
|
29
|
+
ENTRYPOINT ["deadman-check"]
|
data/README.md
CHANGED
@@ -1,21 +1,164 @@
|
|
1
1
|
# DeadmanCheck
|
2
2
|
|
3
|
-
[](https://travis-ci.org/sepulworld/deadman_check)
|
4
|
+
[](http://badge.fury.io/rb/deadman_check)
|
5
|
+
|
6
|
+
A monitoring companion for Nomad periodic jobs that alerts if periodic jobs are
|
7
|
+
not processing as expected. The deadman-check has 2 modes, one to run with the
|
8
|
+
Nomad periodic job as an additional [task](https://www.nomadproject.io/docs/job-specification/task.html) to update a key in Redis with current EPOCH time. The other mode is of deadman-check
|
9
|
+
is intended to run as a separate process that will monitor the Redis key's EPOCH
|
10
|
+
time value and alert if that value fails to meet a time 'freshness' threshold that
|
11
|
+
is expected for that job.
|
12
|
+
|
13
|
+
* requires a Redis instance
|
14
|
+
* alerting requires a SLACK_API_TOKEN environment variable
|
15
|
+
|
16
|
+
## Example Usage
|
17
|
+
|
18
|
+
Let's say I have a Nomad periodic job that is set to run every 10 minutes. The Nomad configuration looks like this:
|
19
|
+
|
20
|
+
```hcl
|
21
|
+
job "SilverBulletPeriodic" {
|
22
|
+
type = "batch"
|
23
|
+
|
24
|
+
periodic {
|
25
|
+
cron = "*/10 * * * * *"
|
26
|
+
prohibit_overlap = true
|
27
|
+
}
|
28
|
+
|
29
|
+
group "utility" {
|
30
|
+
task "SilverBulletPeriodicProcess" {
|
31
|
+
driver = "docker"
|
32
|
+
config {
|
33
|
+
image = "silverbullet:build_1"
|
34
|
+
work_dir = "/utility/silverbullet"
|
35
|
+
command = "blaster"
|
36
|
+
}
|
37
|
+
resources {
|
38
|
+
cpu = 100
|
39
|
+
memory = 500
|
40
|
+
}
|
41
|
+
}
|
42
|
+
}
|
43
|
+
}
|
44
|
+
```
|
5
45
|
|
6
|
-
|
46
|
+
To monitor the SilverBulletPeriodicProcess task let's add a deadmad-check task to
|
47
|
+
run post updates to a Redis endpoint (10.0.0.1 for this example)
|
48
|
+
|
49
|
+
```hcl
|
50
|
+
job "SilverBulletPeriodic" {
|
51
|
+
type = "batch"
|
52
|
+
|
53
|
+
periodic {
|
54
|
+
cron = "*/10 * * * * *"
|
55
|
+
prohibit_overlap = true
|
56
|
+
}
|
57
|
+
|
58
|
+
group "silverbullet" {
|
59
|
+
task "SilverBulletPeriodicProcess" {
|
60
|
+
driver = "docker"
|
61
|
+
config {
|
62
|
+
image = "silverbullet:build_1"
|
63
|
+
work_dir = "/utility/silverbullet"
|
64
|
+
command = "blaster"
|
65
|
+
}
|
66
|
+
resources {
|
67
|
+
cpu = 100
|
68
|
+
memory = 500
|
69
|
+
}
|
70
|
+
}
|
71
|
+
task "DeadmanSetSilverBulletPeriodicProcess" {
|
72
|
+
driver = "docker"
|
73
|
+
config {
|
74
|
+
image = "sepulworld/deadman-check"
|
75
|
+
command = "key_set"
|
76
|
+
args = [
|
77
|
+
"--host",
|
78
|
+
"10.0.0.1",
|
79
|
+
"--port",
|
80
|
+
"6379",
|
81
|
+
"--key",
|
82
|
+
"deadman/SilverBulletPeriodicProcess"]
|
83
|
+
}
|
84
|
+
resources {
|
85
|
+
cpu = 100
|
86
|
+
memory = 256
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
}
|
91
|
+
```
|
7
92
|
|
8
|
-
|
93
|
+
Now the key, deadman/SilverBulletPeriodicProcess, in redis1.mydomain.net will be updated with
|
94
|
+
the EPOCH time for each SilverBulletPeriodic job run. If the job hangs or fails to run
|
95
|
+
we will know via the EPOCH time entry going stale.
|
96
|
+
|
97
|
+
Next we need a job that will run to monitor this key.
|
98
|
+
|
99
|
+
```hcl
|
100
|
+
job "DeadmanMonitoring" {
|
101
|
+
type = "service"
|
102
|
+
|
103
|
+
group "monitor" {
|
104
|
+
task "DeadmanMonitorSilverBulletPeriodicProcess" {
|
105
|
+
driver = "docker"
|
106
|
+
config {
|
107
|
+
image = "sepulworld/deadman-check"
|
108
|
+
command = "switch_monitor"
|
109
|
+
args = [
|
110
|
+
"--host",
|
111
|
+
"10.0.0.1",
|
112
|
+
"--port",
|
113
|
+
"6379",
|
114
|
+
"--key",
|
115
|
+
"deadman/SilverBulletPeriodicProcess",
|
116
|
+
"--freshness",
|
117
|
+
"800",
|
118
|
+
"--alert-to",
|
119
|
+
"#slackroom",
|
120
|
+
"--daemon",
|
121
|
+
"--daemon-sleep",
|
122
|
+
"900"]
|
123
|
+
}
|
124
|
+
resources {
|
125
|
+
cpu = 100
|
126
|
+
memory = 256
|
127
|
+
}
|
128
|
+
env {
|
129
|
+
SLACK_API_TOKEN = "YourSlackApiToken"
|
130
|
+
}
|
131
|
+
}
|
132
|
+
}
|
133
|
+
}
|
134
|
+
```
|
9
135
|
|
10
|
-
|
136
|
+
Monitor a Redis key that contains an EPOCH time entry. Send a Slack message if EPOCH age hits given threshold
|
11
137
|
|
12
|
-
|
138
|
+
## Local system installation
|
13
139
|
|
14
|
-
|
140
|
+
execute:
|
15
141
|
|
142
|
+
$ bundle install
|
16
143
|
$ gem install deadman_check
|
17
144
|
|
18
|
-
##
|
145
|
+
## Install and run deadman-check from Docker
|
146
|
+
|
147
|
+
```
|
148
|
+
# Optional: If you don't pull explicitly, `docker run` will do it for you
|
149
|
+
$ docker pull sepulworld/deadman-check
|
150
|
+
|
151
|
+
$ alias deadman-check='\
|
152
|
+
docker run \
|
153
|
+
-it --rm --name=deadman-check \
|
154
|
+
sepulworld/deadman-check'
|
155
|
+
```
|
156
|
+
|
157
|
+
(Depending on how your system is set up, you might have to add sudo in front of the above docker commands or add your user to the docker group).
|
158
|
+
|
159
|
+
If you don't do the docker pull, the first time you run deadman-check, the docker run command will automatically pull the sepulworld/deadman-check image on the Docker Hub. Subsequent runs will use a locally cached copy of the image and will not have to download anything.
|
160
|
+
|
161
|
+
## Usage via Local System Install
|
19
162
|
|
20
163
|
```bash
|
21
164
|
$ deadman-check -h
|
@@ -26,7 +169,7 @@ $ deadman-check -h
|
|
26
169
|
DESCRIPTION:
|
27
170
|
|
28
171
|
Monitor a Redis key that contains an EPOCH time entry.
|
29
|
-
Send
|
172
|
+
Send a Slack message if EPOCH age hits given threshold
|
30
173
|
|
31
174
|
COMMANDS:
|
32
175
|
|
@@ -106,8 +249,7 @@ $ deadman-check switch_monitor -h
|
|
106
249
|
--port 6379 \
|
107
250
|
--key deadman/myservice \
|
108
251
|
--freshness 500 \
|
109
|
-
--alert-to
|
110
|
-
--alert-from ops-no-reply-email@mycomany.tld
|
252
|
+
--alert-to #slackroom
|
111
253
|
|
112
254
|
OPTIONS:
|
113
255
|
|
@@ -124,11 +266,14 @@ $ deadman-check switch_monitor -h
|
|
124
266
|
The value in seconds to alert on when the recorded
|
125
267
|
EPOCH value exceeds current EPOCH
|
126
268
|
|
127
|
-
--alert-to
|
128
|
-
|
269
|
+
--alert-to SLACKROOM
|
270
|
+
Slackroom to alert to
|
271
|
+
|
272
|
+
--daemon
|
273
|
+
Run as a daemon, otherwise will run check just once
|
129
274
|
|
130
|
-
--
|
131
|
-
|
275
|
+
--daemon-sleep SECONDS
|
276
|
+
Set the number of seconds to sleep in between switch checks, default 300
|
132
277
|
```
|
133
278
|
|
134
279
|
## Development
|
data/Rakefile
CHANGED
@@ -8,3 +8,18 @@ Rake::TestTask.new(:test) do |t|
|
|
8
8
|
end
|
9
9
|
|
10
10
|
task :default => :test
|
11
|
+
|
12
|
+
desc "Docker build image"
|
13
|
+
task :docker_build do
|
14
|
+
sh %{docker build -t sepulworld/deadman-check .}
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "Push Docker image to Docker Hub"
|
18
|
+
task :docker_push do
|
19
|
+
sh %{docker push sepulworld/deadman-check}
|
20
|
+
end
|
21
|
+
|
22
|
+
desc "Pull Docker image to Docker Hub"
|
23
|
+
task :docker_pull do
|
24
|
+
sh %{docker pull sepulworld/deadman-check}
|
25
|
+
end
|
data/bin/deadman-check
CHANGED
@@ -3,30 +3,35 @@
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'commander/import'
|
5
5
|
require 'deadman_check'
|
6
|
+
require 'daemons'
|
6
7
|
|
7
8
|
program :name, 'deadman-check'
|
8
9
|
program :version, DeadmanCheck::VERSION
|
9
|
-
program :description, %q{Monitor a Redis key that contains an EPOCH time entry.
|
10
|
-
Send email if EPOCH age hits given threshold}
|
10
|
+
program :description, %q{Monitor a Redis key that contains an EPOCH time entry. Send email if EPOCH age hits given threshold}
|
11
11
|
|
12
12
|
command :switch_monitor do |c|
|
13
13
|
c.syntax = 'deadman-check switch_monitor [options]'
|
14
14
|
c.summary = 'Target a Redis key to monitor'
|
15
15
|
c.description = ''
|
16
|
-
c.example %q{Target a Redis key deadman/myservice, and this key has an EPOCH
|
17
|
-
value to check looking to alert on 500 second or greater freshness},
|
16
|
+
c.example %q{Target a Redis key deadman/myservice, and this key has an EPOCH value to check looking to alert on 500 second or greater freshness},
|
18
17
|
%q{deadman-check switch_monitor --host 127.0.0.1 --port 6379 --key deadman/myservice --freshness 500 --alert-to ops@mycomany.tld --alert-from ops-no-reply-email@mycomany.tld}
|
19
18
|
c.option '--host HOST', String, 'IP address or hostname of Redis system'
|
20
19
|
c.option '--port PORT', String, 'port Redis is listening on'
|
21
20
|
c.option '--key KEY', String, 'Redis key to monitor'
|
22
21
|
c.option '--freshness SECONDS', String, %q{The value in seconds to alert on when the recorded
|
23
22
|
EPOCH value exceeds current EPOCH}
|
24
|
-
c.option '--alert-to
|
25
|
-
c.option '--
|
23
|
+
c.option '--alert-to SLACKROOM', String, 'Slackroom to alert to'
|
24
|
+
c.option '--daemon', 'Run as a daemon, otherwise will run check just once'
|
25
|
+
c.option '--daemon-sleep SECONDS', String, 'Set the number of seconds to sleep in between switch checks, default 300'
|
26
26
|
c.action do |args, options|
|
27
|
+
options.default :daemon_sleep => 300
|
27
28
|
switch_monitor = DeadmanCheck::SwitchMonitor.new(options.host, options.port,
|
28
|
-
options.key, options.freshness, options.alert_to, options.
|
29
|
-
|
29
|
+
options.key, options.freshness, options.alert_to, options.daemon_sleep)
|
30
|
+
if options.daemon
|
31
|
+
Daemons.run(switch_monitor.run_check_daemon)
|
32
|
+
else
|
33
|
+
switch_monitor.run_check_once
|
34
|
+
end
|
30
35
|
end
|
31
36
|
end
|
32
37
|
|
@@ -38,7 +43,7 @@ command :key_set do |c|
|
|
38
43
|
%q{deadman-check key_set --host 127.0.0.1 --port 6379 --key deadman/myservice}
|
39
44
|
c.option '--host HOST', String, 'IP address or hostname of Redis system'
|
40
45
|
c.option '--port PORT', String, 'port Redis is listening on'
|
41
|
-
c.option '--key KEY', String, 'Redis key to
|
46
|
+
c.option '--key KEY', String, 'Redis key to report EPOCH time to'
|
42
47
|
c.action do |args, options|
|
43
48
|
key_set = DeadmanCheck::KeySet.new(options.host, options.port,
|
44
49
|
options.key)
|
data/deadman_check.gemspec
CHANGED
@@ -36,5 +36,6 @@ Gem::Specification.new do |spec|
|
|
36
36
|
|
37
37
|
spec.add_dependency 'commander', '~> 4.4', '>= 4.4.3'
|
38
38
|
spec.add_dependency 'redis', '~> 3.3', '>= 3.3.3'
|
39
|
-
spec.add_dependency '
|
39
|
+
spec.add_dependency 'slack-ruby-client', '~> 0.8.0'
|
40
|
+
spec.add_dependency 'daemons', '~> 1.2.4', '>=1.2.4'
|
40
41
|
end
|
data/lib/deadman_check_switch.rb
CHANGED
@@ -1,20 +1,24 @@
|
|
1
1
|
require 'deadman_check/version'
|
2
2
|
require 'deadman_check_global'
|
3
3
|
require 'redis'
|
4
|
-
require '
|
4
|
+
require 'slack-ruby-client'
|
5
5
|
|
6
6
|
module DeadmanCheck
|
7
7
|
# Switch class
|
8
8
|
class SwitchMonitor
|
9
|
-
attr_accessor :host, :port, :key, :freshness, :alert_to, :
|
9
|
+
attr_accessor :host, :port, :key, :freshness, :alert_to, :daemon_sleep
|
10
10
|
|
11
|
-
def initialize(host, port, key, freshness, alert_to,
|
11
|
+
def initialize(host, port, key, freshness, alert_to, daemon_sleep)
|
12
12
|
@host = host
|
13
13
|
@port = port
|
14
14
|
@key = key
|
15
15
|
@freshness = freshness.to_i
|
16
16
|
@alert_to = alert_to
|
17
|
-
@
|
17
|
+
@daemon_sleep = daemon_sleep.to_i
|
18
|
+
end
|
19
|
+
|
20
|
+
Slack.configure do |config|
|
21
|
+
config.token = ENV['SLACK_API_TOKEN']
|
18
22
|
end
|
19
23
|
|
20
24
|
def _diff_epoc(current_epoch, recorded_epoch)
|
@@ -28,21 +32,26 @@ module DeadmanCheck
|
|
28
32
|
return recorded_epoch
|
29
33
|
end
|
30
34
|
|
31
|
-
def
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
:
|
36
|
-
#{epoch_diff} seconds since last run")
|
35
|
+
def slack_alert(alert_to, key, epoch_diff)
|
36
|
+
client = Slack::Web::Client.new
|
37
|
+
client.chat_postMessage(channel: alert_to, text: "Alert: Deadman Switch
|
38
|
+
Triggered for #{key}, with #{epoch_diff} seconds since last run",
|
39
|
+
username: 'deadman')
|
37
40
|
end
|
38
41
|
|
39
|
-
def
|
42
|
+
def run_check_once
|
40
43
|
recorded_epoch = _get_recorded_epoch(@host, @port, @key).to_i
|
41
44
|
current_epoch = DeadmanCheck::DeadmanCheckGlobal.new.get_epoch_time.to_i
|
42
45
|
epoch_diff = _diff_epoc(current_epoch, recorded_epoch)
|
43
46
|
if epoch_diff > @freshness
|
44
|
-
|
45
|
-
|
47
|
+
slack_alert(@alert_to, @key, epoch_diff)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def run_check_daemon
|
52
|
+
loop do
|
53
|
+
run_check_once
|
54
|
+
sleep(@daemon_sleep)
|
46
55
|
end
|
47
56
|
end
|
48
57
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadman_check
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- zane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -93,19 +93,39 @@ dependencies:
|
|
93
93
|
- !ruby/object:Gem::Version
|
94
94
|
version: 3.3.3
|
95
95
|
- !ruby/object:Gem::Dependency
|
96
|
-
name:
|
96
|
+
name: slack-ruby-client
|
97
97
|
requirement: !ruby/object:Gem::Requirement
|
98
98
|
requirements:
|
99
99
|
- - "~>"
|
100
100
|
- !ruby/object:Gem::Version
|
101
|
-
version:
|
101
|
+
version: 0.8.0
|
102
102
|
type: :runtime
|
103
103
|
prerelease: false
|
104
104
|
version_requirements: !ruby/object:Gem::Requirement
|
105
105
|
requirements:
|
106
106
|
- - "~>"
|
107
107
|
- !ruby/object:Gem::Version
|
108
|
-
version:
|
108
|
+
version: 0.8.0
|
109
|
+
- !ruby/object:Gem::Dependency
|
110
|
+
name: daemons
|
111
|
+
requirement: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - "~>"
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: 1.2.4
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: 1.2.4
|
119
|
+
type: :runtime
|
120
|
+
prerelease: false
|
121
|
+
version_requirements: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - "~>"
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.2.4
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: 1.2.4
|
109
129
|
description: |-
|
110
130
|
A script to check a given Redis key EPOCH for
|
111
131
|
freshness. Good for monitoring cron jobs or batch jobs. Have the last step
|
@@ -121,6 +141,7 @@ files:
|
|
121
141
|
- ".gitignore"
|
122
142
|
- ".travis.yml"
|
123
143
|
- CODE_OF_CONDUCT.md
|
144
|
+
- Dockerfile
|
124
145
|
- Gemfile
|
125
146
|
- LICENSE.txt
|
126
147
|
- README.md
|