deadman_check 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Dockerfile +29 -0
- data/README.md +160 -15
- data/Rakefile +15 -0
- data/bin/deadman-check +14 -9
- data/deadman_check.gemspec +2 -1
- data/lib/deadman_check/version.rb +1 -1
- data/lib/deadman_check_switch.rb +22 -13
- metadata +26 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d86166c15a2c87baa6772b460cab729a6a7c341
|
4
|
+
data.tar.gz: 14e8ad5b0a9d9742f75aefe8eb01facadcd3d402
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4aae7f90a0915a393a7aa8e6113af0862debe39a8498a1587869071636050f03b6eb0f09d5de2f287178f14547a1aa4efd153f90a77b0304b3fe6658f9140968
|
7
|
+
data.tar.gz: cff5f26ced3b63b1a2b4013953d0628d48e22346d051ffd9d0b5f214664fe63be6b5f39e7f0c8a98b017bc3dc601f29b4a905f50198e3bb2813a4244069233b8
|
data/.gitignore
CHANGED
data/Dockerfile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
FROM ruby:2.4
|
2
|
+
MAINTAINER Zane Williamson <zane.williamson@gmail.com>
|
3
|
+
|
4
|
+
# Install apt packages
|
5
|
+
ARG DEBIAN_FRONTEND=noninteractive
|
6
|
+
RUN apt-get update -qq && \
|
7
|
+
apt-get install -y -qq \
|
8
|
+
less \
|
9
|
+
locales && \
|
10
|
+
apt-get clean && \
|
11
|
+
rm -rf /var/lib/apt/lists/*
|
12
|
+
|
13
|
+
# Configure locale
|
14
|
+
ARG LOCALE="C.UTF-8"
|
15
|
+
RUN locale-gen "$LOCALE" && \
|
16
|
+
dpkg-reconfigure locales
|
17
|
+
ENV LANG="$LOCALE" LC_ALL="$LOCALE"
|
18
|
+
|
19
|
+
ADD . /app/
|
20
|
+
ADD lib /app/lib
|
21
|
+
|
22
|
+
VOLUME /app
|
23
|
+
WORKDIR /app
|
24
|
+
|
25
|
+
RUN gem install bundler && \
|
26
|
+
bundle install && \
|
27
|
+
rake install
|
28
|
+
|
29
|
+
ENTRYPOINT ["deadman-check"]
|
data/README.md
CHANGED
@@ -1,21 +1,164 @@
|
|
1
1
|
# DeadmanCheck
|
2
2
|
|
3
|
-
[![Build Status](https://travis-ci.org/sepulworld/
|
4
|
-
[![Gem Version](https://badge.fury.io/rb/
|
3
|
+
[![Build Status](https://travis-ci.org/sepulworld/deadman-check.svg)](https://travis-ci.org/sepulworld/deadman_check)
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/deadman-check.svg)](http://badge.fury.io/rb/deadman_check)
|
5
|
+
|
6
|
+
A monitoring companion for Nomad periodic jobs that alerts if periodic jobs are
|
7
|
+
not processing as expected. The deadman-check has 2 modes, one to run with the
|
8
|
+
Nomad periodic job as an additional [task](https://www.nomadproject.io/docs/job-specification/task.html) to update a key in Redis with current EPOCH time. The other mode is of deadman-check
|
9
|
+
is intended to run as a separate process that will monitor the Redis key's EPOCH
|
10
|
+
time value and alert if that value fails to meet a time 'freshness' threshold that
|
11
|
+
is expected for that job.
|
12
|
+
|
13
|
+
* requires a Redis instance
|
14
|
+
* alerting requires a SLACK_API_TOKEN environment variable
|
15
|
+
|
16
|
+
## Example Usage
|
17
|
+
|
18
|
+
Let's say I have a Nomad periodic job that is set to run every 10 minutes. The Nomad configuration looks like this:
|
19
|
+
|
20
|
+
```hcl
|
21
|
+
job "SilverBulletPeriodic" {
|
22
|
+
type = "batch"
|
23
|
+
|
24
|
+
periodic {
|
25
|
+
cron = "*/10 * * * * *"
|
26
|
+
prohibit_overlap = true
|
27
|
+
}
|
28
|
+
|
29
|
+
group "utility" {
|
30
|
+
task "SilverBulletPeriodicProcess" {
|
31
|
+
driver = "docker"
|
32
|
+
config {
|
33
|
+
image = "silverbullet:build_1"
|
34
|
+
work_dir = "/utility/silverbullet"
|
35
|
+
command = "blaster"
|
36
|
+
}
|
37
|
+
resources {
|
38
|
+
cpu = 100
|
39
|
+
memory = 500
|
40
|
+
}
|
41
|
+
}
|
42
|
+
}
|
43
|
+
}
|
44
|
+
```
|
5
45
|
|
6
|
-
|
46
|
+
To monitor the SilverBulletPeriodicProcess task let's add a deadmad-check task to
|
47
|
+
run post updates to a Redis endpoint (10.0.0.1 for this example)
|
48
|
+
|
49
|
+
```hcl
|
50
|
+
job "SilverBulletPeriodic" {
|
51
|
+
type = "batch"
|
52
|
+
|
53
|
+
periodic {
|
54
|
+
cron = "*/10 * * * * *"
|
55
|
+
prohibit_overlap = true
|
56
|
+
}
|
57
|
+
|
58
|
+
group "silverbullet" {
|
59
|
+
task "SilverBulletPeriodicProcess" {
|
60
|
+
driver = "docker"
|
61
|
+
config {
|
62
|
+
image = "silverbullet:build_1"
|
63
|
+
work_dir = "/utility/silverbullet"
|
64
|
+
command = "blaster"
|
65
|
+
}
|
66
|
+
resources {
|
67
|
+
cpu = 100
|
68
|
+
memory = 500
|
69
|
+
}
|
70
|
+
}
|
71
|
+
task "DeadmanSetSilverBulletPeriodicProcess" {
|
72
|
+
driver = "docker"
|
73
|
+
config {
|
74
|
+
image = "sepulworld/deadman-check"
|
75
|
+
command = "key_set"
|
76
|
+
args = [
|
77
|
+
"--host",
|
78
|
+
"10.0.0.1",
|
79
|
+
"--port",
|
80
|
+
"6379",
|
81
|
+
"--key",
|
82
|
+
"deadman/SilverBulletPeriodicProcess"]
|
83
|
+
}
|
84
|
+
resources {
|
85
|
+
cpu = 100
|
86
|
+
memory = 256
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
}
|
91
|
+
```
|
7
92
|
|
8
|
-
|
93
|
+
Now the key, deadman/SilverBulletPeriodicProcess, in redis1.mydomain.net will be updated with
|
94
|
+
the EPOCH time for each SilverBulletPeriodic job run. If the job hangs or fails to run
|
95
|
+
we will know via the EPOCH time entry going stale.
|
96
|
+
|
97
|
+
Next we need a job that will run to monitor this key.
|
98
|
+
|
99
|
+
```hcl
|
100
|
+
job "DeadmanMonitoring" {
|
101
|
+
type = "service"
|
102
|
+
|
103
|
+
group "monitor" {
|
104
|
+
task "DeadmanMonitorSilverBulletPeriodicProcess" {
|
105
|
+
driver = "docker"
|
106
|
+
config {
|
107
|
+
image = "sepulworld/deadman-check"
|
108
|
+
command = "switch_monitor"
|
109
|
+
args = [
|
110
|
+
"--host",
|
111
|
+
"10.0.0.1",
|
112
|
+
"--port",
|
113
|
+
"6379",
|
114
|
+
"--key",
|
115
|
+
"deadman/SilverBulletPeriodicProcess",
|
116
|
+
"--freshness",
|
117
|
+
"800",
|
118
|
+
"--alert-to",
|
119
|
+
"#slackroom",
|
120
|
+
"--daemon",
|
121
|
+
"--daemon-sleep",
|
122
|
+
"900"]
|
123
|
+
}
|
124
|
+
resources {
|
125
|
+
cpu = 100
|
126
|
+
memory = 256
|
127
|
+
}
|
128
|
+
env {
|
129
|
+
SLACK_API_TOKEN = "YourSlackApiToken"
|
130
|
+
}
|
131
|
+
}
|
132
|
+
}
|
133
|
+
}
|
134
|
+
```
|
9
135
|
|
10
|
-
|
136
|
+
Monitor a Redis key that contains an EPOCH time entry. Send a Slack message if EPOCH age hits given threshold
|
11
137
|
|
12
|
-
|
138
|
+
## Local system installation
|
13
139
|
|
14
|
-
|
140
|
+
execute:
|
15
141
|
|
142
|
+
$ bundle install
|
16
143
|
$ gem install deadman_check
|
17
144
|
|
18
|
-
##
|
145
|
+
## Install and run deadman-check from Docker
|
146
|
+
|
147
|
+
```
|
148
|
+
# Optional: If you don't pull explicitly, `docker run` will do it for you
|
149
|
+
$ docker pull sepulworld/deadman-check
|
150
|
+
|
151
|
+
$ alias deadman-check='\
|
152
|
+
docker run \
|
153
|
+
-it --rm --name=deadman-check \
|
154
|
+
sepulworld/deadman-check'
|
155
|
+
```
|
156
|
+
|
157
|
+
(Depending on how your system is set up, you might have to add sudo in front of the above docker commands or add your user to the docker group).
|
158
|
+
|
159
|
+
If you don't do the docker pull, the first time you run deadman-check, the docker run command will automatically pull the sepulworld/deadman-check image on the Docker Hub. Subsequent runs will use a locally cached copy of the image and will not have to download anything.
|
160
|
+
|
161
|
+
## Usage via Local System Install
|
19
162
|
|
20
163
|
```bash
|
21
164
|
$ deadman-check -h
|
@@ -26,7 +169,7 @@ $ deadman-check -h
|
|
26
169
|
DESCRIPTION:
|
27
170
|
|
28
171
|
Monitor a Redis key that contains an EPOCH time entry.
|
29
|
-
Send
|
172
|
+
Send a Slack message if EPOCH age hits given threshold
|
30
173
|
|
31
174
|
COMMANDS:
|
32
175
|
|
@@ -106,8 +249,7 @@ $ deadman-check switch_monitor -h
|
|
106
249
|
--port 6379 \
|
107
250
|
--key deadman/myservice \
|
108
251
|
--freshness 500 \
|
109
|
-
--alert-to
|
110
|
-
--alert-from ops-no-reply-email@mycomany.tld
|
252
|
+
--alert-to #slackroom
|
111
253
|
|
112
254
|
OPTIONS:
|
113
255
|
|
@@ -124,11 +266,14 @@ $ deadman-check switch_monitor -h
|
|
124
266
|
The value in seconds to alert on when the recorded
|
125
267
|
EPOCH value exceeds current EPOCH
|
126
268
|
|
127
|
-
--alert-to
|
128
|
-
|
269
|
+
--alert-to SLACKROOM
|
270
|
+
Slackroom to alert to
|
271
|
+
|
272
|
+
--daemon
|
273
|
+
Run as a daemon, otherwise will run check just once
|
129
274
|
|
130
|
-
--
|
131
|
-
|
275
|
+
--daemon-sleep SECONDS
|
276
|
+
Set the number of seconds to sleep in between switch checks, default 300
|
132
277
|
```
|
133
278
|
|
134
279
|
## Development
|
data/Rakefile
CHANGED
@@ -8,3 +8,18 @@ Rake::TestTask.new(:test) do |t|
|
|
8
8
|
end
|
9
9
|
|
10
10
|
task :default => :test
|
11
|
+
|
12
|
+
desc "Docker build image"
|
13
|
+
task :docker_build do
|
14
|
+
sh %{docker build -t sepulworld/deadman-check .}
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "Push Docker image to Docker Hub"
|
18
|
+
task :docker_push do
|
19
|
+
sh %{docker push sepulworld/deadman-check}
|
20
|
+
end
|
21
|
+
|
22
|
+
desc "Pull Docker image to Docker Hub"
|
23
|
+
task :docker_pull do
|
24
|
+
sh %{docker pull sepulworld/deadman-check}
|
25
|
+
end
|
data/bin/deadman-check
CHANGED
@@ -3,30 +3,35 @@
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'commander/import'
|
5
5
|
require 'deadman_check'
|
6
|
+
require 'daemons'
|
6
7
|
|
7
8
|
program :name, 'deadman-check'
|
8
9
|
program :version, DeadmanCheck::VERSION
|
9
|
-
program :description, %q{Monitor a Redis key that contains an EPOCH time entry.
|
10
|
-
Send email if EPOCH age hits given threshold}
|
10
|
+
program :description, %q{Monitor a Redis key that contains an EPOCH time entry. Send email if EPOCH age hits given threshold}
|
11
11
|
|
12
12
|
command :switch_monitor do |c|
|
13
13
|
c.syntax = 'deadman-check switch_monitor [options]'
|
14
14
|
c.summary = 'Target a Redis key to monitor'
|
15
15
|
c.description = ''
|
16
|
-
c.example %q{Target a Redis key deadman/myservice, and this key has an EPOCH
|
17
|
-
value to check looking to alert on 500 second or greater freshness},
|
16
|
+
c.example %q{Target a Redis key deadman/myservice, and this key has an EPOCH value to check looking to alert on 500 second or greater freshness},
|
18
17
|
%q{deadman-check switch_monitor --host 127.0.0.1 --port 6379 --key deadman/myservice --freshness 500 --alert-to ops@mycomany.tld --alert-from ops-no-reply-email@mycomany.tld}
|
19
18
|
c.option '--host HOST', String, 'IP address or hostname of Redis system'
|
20
19
|
c.option '--port PORT', String, 'port Redis is listening on'
|
21
20
|
c.option '--key KEY', String, 'Redis key to monitor'
|
22
21
|
c.option '--freshness SECONDS', String, %q{The value in seconds to alert on when the recorded
|
23
22
|
EPOCH value exceeds current EPOCH}
|
24
|
-
c.option '--alert-to
|
25
|
-
c.option '--
|
23
|
+
c.option '--alert-to SLACKROOM', String, 'Slackroom to alert to'
|
24
|
+
c.option '--daemon', 'Run as a daemon, otherwise will run check just once'
|
25
|
+
c.option '--daemon-sleep SECONDS', String, 'Set the number of seconds to sleep in between switch checks, default 300'
|
26
26
|
c.action do |args, options|
|
27
|
+
options.default :daemon_sleep => 300
|
27
28
|
switch_monitor = DeadmanCheck::SwitchMonitor.new(options.host, options.port,
|
28
|
-
options.key, options.freshness, options.alert_to, options.
|
29
|
-
|
29
|
+
options.key, options.freshness, options.alert_to, options.daemon_sleep)
|
30
|
+
if options.daemon
|
31
|
+
Daemons.run(switch_monitor.run_check_daemon)
|
32
|
+
else
|
33
|
+
switch_monitor.run_check_once
|
34
|
+
end
|
30
35
|
end
|
31
36
|
end
|
32
37
|
|
@@ -38,7 +43,7 @@ command :key_set do |c|
|
|
38
43
|
%q{deadman-check key_set --host 127.0.0.1 --port 6379 --key deadman/myservice}
|
39
44
|
c.option '--host HOST', String, 'IP address or hostname of Redis system'
|
40
45
|
c.option '--port PORT', String, 'port Redis is listening on'
|
41
|
-
c.option '--key KEY', String, 'Redis key to
|
46
|
+
c.option '--key KEY', String, 'Redis key to report EPOCH time to'
|
42
47
|
c.action do |args, options|
|
43
48
|
key_set = DeadmanCheck::KeySet.new(options.host, options.port,
|
44
49
|
options.key)
|
data/deadman_check.gemspec
CHANGED
@@ -36,5 +36,6 @@ Gem::Specification.new do |spec|
|
|
36
36
|
|
37
37
|
spec.add_dependency 'commander', '~> 4.4', '>= 4.4.3'
|
38
38
|
spec.add_dependency 'redis', '~> 3.3', '>= 3.3.3'
|
39
|
-
spec.add_dependency '
|
39
|
+
spec.add_dependency 'slack-ruby-client', '~> 0.8.0'
|
40
|
+
spec.add_dependency 'daemons', '~> 1.2.4', '>=1.2.4'
|
40
41
|
end
|
data/lib/deadman_check_switch.rb
CHANGED
@@ -1,20 +1,24 @@
|
|
1
1
|
require 'deadman_check/version'
|
2
2
|
require 'deadman_check_global'
|
3
3
|
require 'redis'
|
4
|
-
require '
|
4
|
+
require 'slack-ruby-client'
|
5
5
|
|
6
6
|
module DeadmanCheck
|
7
7
|
# Switch class
|
8
8
|
class SwitchMonitor
|
9
|
-
attr_accessor :host, :port, :key, :freshness, :alert_to, :
|
9
|
+
attr_accessor :host, :port, :key, :freshness, :alert_to, :daemon_sleep
|
10
10
|
|
11
|
-
def initialize(host, port, key, freshness, alert_to,
|
11
|
+
def initialize(host, port, key, freshness, alert_to, daemon_sleep)
|
12
12
|
@host = host
|
13
13
|
@port = port
|
14
14
|
@key = key
|
15
15
|
@freshness = freshness.to_i
|
16
16
|
@alert_to = alert_to
|
17
|
-
@
|
17
|
+
@daemon_sleep = daemon_sleep.to_i
|
18
|
+
end
|
19
|
+
|
20
|
+
Slack.configure do |config|
|
21
|
+
config.token = ENV['SLACK_API_TOKEN']
|
18
22
|
end
|
19
23
|
|
20
24
|
def _diff_epoc(current_epoch, recorded_epoch)
|
@@ -28,21 +32,26 @@ module DeadmanCheck
|
|
28
32
|
return recorded_epoch
|
29
33
|
end
|
30
34
|
|
31
|
-
def
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
:
|
36
|
-
#{epoch_diff} seconds since last run")
|
35
|
+
def slack_alert(alert_to, key, epoch_diff)
|
36
|
+
client = Slack::Web::Client.new
|
37
|
+
client.chat_postMessage(channel: alert_to, text: "Alert: Deadman Switch
|
38
|
+
Triggered for #{key}, with #{epoch_diff} seconds since last run",
|
39
|
+
username: 'deadman')
|
37
40
|
end
|
38
41
|
|
39
|
-
def
|
42
|
+
def run_check_once
|
40
43
|
recorded_epoch = _get_recorded_epoch(@host, @port, @key).to_i
|
41
44
|
current_epoch = DeadmanCheck::DeadmanCheckGlobal.new.get_epoch_time.to_i
|
42
45
|
epoch_diff = _diff_epoc(current_epoch, recorded_epoch)
|
43
46
|
if epoch_diff > @freshness
|
44
|
-
|
45
|
-
|
47
|
+
slack_alert(@alert_to, @key, epoch_diff)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def run_check_daemon
|
52
|
+
loop do
|
53
|
+
run_check_once
|
54
|
+
sleep(@daemon_sleep)
|
46
55
|
end
|
47
56
|
end
|
48
57
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: deadman_check
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- zane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -93,19 +93,39 @@ dependencies:
|
|
93
93
|
- !ruby/object:Gem::Version
|
94
94
|
version: 3.3.3
|
95
95
|
- !ruby/object:Gem::Dependency
|
96
|
-
name:
|
96
|
+
name: slack-ruby-client
|
97
97
|
requirement: !ruby/object:Gem::Requirement
|
98
98
|
requirements:
|
99
99
|
- - "~>"
|
100
100
|
- !ruby/object:Gem::Version
|
101
|
-
version:
|
101
|
+
version: 0.8.0
|
102
102
|
type: :runtime
|
103
103
|
prerelease: false
|
104
104
|
version_requirements: !ruby/object:Gem::Requirement
|
105
105
|
requirements:
|
106
106
|
- - "~>"
|
107
107
|
- !ruby/object:Gem::Version
|
108
|
-
version:
|
108
|
+
version: 0.8.0
|
109
|
+
- !ruby/object:Gem::Dependency
|
110
|
+
name: daemons
|
111
|
+
requirement: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - "~>"
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: 1.2.4
|
116
|
+
- - ">="
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: 1.2.4
|
119
|
+
type: :runtime
|
120
|
+
prerelease: false
|
121
|
+
version_requirements: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - "~>"
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.2.4
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: 1.2.4
|
109
129
|
description: |-
|
110
130
|
A script to check a given Redis key EPOCH for
|
111
131
|
freshness. Good for monitoring cron jobs or batch jobs. Have the last step
|
@@ -121,6 +141,7 @@ files:
|
|
121
141
|
- ".gitignore"
|
122
142
|
- ".travis.yml"
|
123
143
|
- CODE_OF_CONDUCT.md
|
144
|
+
- Dockerfile
|
124
145
|
- Gemfile
|
125
146
|
- LICENSE.txt
|
126
147
|
- README.md
|