flapjack 0.5.5 → 0.6.23
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/.rbenv-version +1 -0
- data/.rspec +10 -0
- data/Gemfile +18 -0
- data/Guardfile +14 -0
- data/README.md +152 -173
- data/Rakefile +53 -150
- data/bin/flapjack +72 -0
- data/bin/flapjack-nagios-receiver +111 -0
- data/bin/flapjack-nagios-receiver-control +15 -0
- data/bin/flapjack-netsaint-parser +0 -2
- data/bin/flapjack-populator +133 -16
- data/bin/install-flapjack-systemwide +2 -2
- data/config.ru +11 -0
- data/dist/etc/init.d/flapjack +46 -0
- data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
- data/doc/GLOSSARY.md +19 -0
- data/etc/flapjack_config.yaml.example +90 -0
- data/features/events.feature +132 -0
- data/features/notifications.feature +57 -0
- data/features/packaging-lintian.feature +5 -3
- data/features/steps/events_steps.rb +164 -0
- data/features/steps/flapjack-importer_steps.rb +2 -5
- data/features/steps/flapjack-worker_steps.rb +13 -6
- data/features/steps/notifications_steps.rb +178 -0
- data/features/steps/packaging-lintian_steps.rb +14 -0
- data/features/steps/time_travel_steps.rb +34 -0
- data/features/support/env.rb +63 -36
- data/flapjack.gemspec +35 -186
- data/lib/flapjack.rb +2 -0
- data/lib/flapjack/api.rb +274 -0
- data/lib/flapjack/api/entity_check_presenter.rb +184 -0
- data/lib/flapjack/api/entity_presenter.rb +66 -0
- data/lib/flapjack/cli/worker_manager.rb +1 -2
- data/lib/flapjack/configuration.rb +11 -0
- data/lib/flapjack/coordinator.rb +288 -0
- data/lib/flapjack/daemonizing.rb +186 -0
- data/lib/flapjack/data/contact.rb +45 -0
- data/lib/flapjack/data/entity.rb +89 -0
- data/lib/flapjack/data/entity_check.rb +396 -0
- data/lib/flapjack/data/event.rb +144 -0
- data/lib/flapjack/data/notification.rb +13 -0
- data/lib/flapjack/executive.rb +289 -0
- data/lib/flapjack/filters/acknowledgement.rb +39 -0
- data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
- data/lib/flapjack/filters/delays.rb +53 -0
- data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
- data/lib/flapjack/filters/ok.rb +25 -5
- data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
- data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
- data/lib/flapjack/jabber.rb +294 -0
- data/lib/flapjack/notification/common.rb +23 -0
- data/lib/flapjack/notification/email.rb +107 -0
- data/lib/flapjack/notification/email/alert.html.haml +48 -0
- data/lib/flapjack/notification/email/alert.text.erb +14 -0
- data/lib/flapjack/notification/sms.rb +42 -0
- data/lib/flapjack/notification/sms/messagenet.rb +49 -0
- data/lib/flapjack/notifier_engine.rb +4 -4
- data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
- data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
- data/lib/flapjack/pagerduty.rb +230 -0
- data/lib/flapjack/patches.rb +108 -19
- data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
- data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
- data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
- data/lib/flapjack/pikelet.rb +56 -0
- data/lib/flapjack/transports/beanstalkd.rb +1 -1
- data/lib/flapjack/transports/result.rb +6 -6
- data/lib/flapjack/utility.rb +46 -0
- data/lib/flapjack/version.rb +5 -0
- data/lib/flapjack/web.rb +198 -0
- data/lib/flapjack/web/views/acknowledge.haml +55 -0
- data/lib/flapjack/web/views/check.haml +162 -0
- data/lib/flapjack/web/views/index.haml +92 -0
- data/lib/flapjack/web/views/self_stats.haml +56 -0
- data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
- data/lib/flapjack/worker/cli.rb +49 -0
- data/{spec → spec.old}/check_sandbox/echo +0 -0
- data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
- data/{spec → spec.old}/configs/recipients.ini +0 -0
- data/{spec → spec.old}/helpers.rb +0 -0
- data/{spec → spec.old}/inifile_spec.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
- data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
- data/{spec → spec.old}/notifier_application_spec.rb +0 -0
- data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_spec.rb +0 -0
- data/{spec → spec.old}/notifier_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
- data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
- data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
- data/{spec → spec.old}/simple.ini +0 -0
- data/{spec → spec.old}/spec.opts +0 -0
- data/{spec → spec.old}/test-filters/blocker.rb +0 -0
- data/{spec → spec.old}/test-filters/mock.rb +0 -0
- data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
- data/{spec → spec.old}/transports/mock_transport.rb +0 -0
- data/{spec → spec.old}/worker_application_spec.rb +0 -0
- data/{spec → spec.old}/worker_options_spec.rb +0 -0
- data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
- data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
- data/spec/lib/flapjack/api_spec.rb +170 -0
- data/spec/lib/flapjack/coordinator_spec.rb +16 -0
- data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
- data/spec/lib/flapjack/data/entity_spec.rb +71 -0
- data/spec/lib/flapjack/data/event_spec.rb +6 -0
- data/spec/lib/flapjack/executive_spec.rb +59 -0
- data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
- data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
- data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
- data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
- data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/jabber_spec.rb +150 -0
- data/spec/lib/flapjack/notification/email_spec.rb +6 -0
- data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
- data/spec/lib/flapjack/pikelet_spec.rb +28 -0
- data/spec/lib/flapjack/web_spec.rb +188 -0
- data/spec/spec_helper.rb +44 -0
- data/spec/support/profile_all_formatter.rb +44 -0
- data/spec/support/uncolored_doc_formatter.rb +9 -0
- data/tasks/events.rake +85 -0
- data/tmp/acknowledge.rb +14 -0
- data/tmp/create_config_yaml.rb +16 -0
- data/tmp/create_events_failure.rb +33 -0
- data/tmp/create_events_ok.rb +33 -0
- data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
- data/tmp/create_events_ok_failure.rb +40 -0
- data/tmp/create_events_ok_failure_ack.rb +54 -0
- data/tmp/dummy_entities.json +1 -0
- data/tmp/generate_nagios_test_hosts.rb +16 -0
- data/tmp/parse_config_yaml.rb +7 -0
- data/tmp/redis_delete_all_keys.rb +11 -0
- data/tmp/test_entities.json +1 -0
- metadata +482 -221
- data/TODO.md +0 -36
- data/VERSION +0 -1
- data/bin/flapjack-benchmark +0 -50
- data/bin/flapjack-notifier +0 -21
- data/bin/flapjack-notifier-manager +0 -43
- data/bin/flapjack-stats +0 -27
- data/bin/flapjack-worker +0 -13
- data/bin/flapjack-worker-manager +0 -35
- data/dist/etc/init.d/flapjack-notifier +0 -47
- data/dist/etc/init.d/flapjack-workers +0 -44
- data/features/flapjack-notifier-manager.feature +0 -19
- data/features/flapjack-worker-manager.feature +0 -27
- data/features/flapjack-worker.feature +0 -27
- data/features/netsaint-config-converter.feature +0 -126
- data/features/persistence/couch.feature +0 -105
- data/features/persistence/sqlite3.feature +0 -105
- data/features/persistence/steps/couch_steps.rb +0 -25
- data/features/persistence/steps/generic_steps.rb +0 -102
- data/features/persistence/steps/sqlite3_steps.rb +0 -13
- data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
- data/features/steps/flapjack-worker-manager_steps.rb +0 -48
- data/lib/flapjack/applications/notifier.rb +0 -222
- data/lib/flapjack/cli/notifier.rb +0 -108
- data/lib/flapjack/cli/notifier_manager.rb +0 -86
- data/lib/flapjack/cli/worker.rb +0 -51
@@ -44,14 +44,14 @@ system("cp -aiv #{etc_path}/* /etc")
|
|
44
44
|
|
45
45
|
# set sequence number to 50 so beanstalkd has a chance to boot
|
46
46
|
system("update-rc.d flapjack-workers defaults 50")
|
47
|
-
system("update-rc.d flapjack-
|
47
|
+
system("update-rc.d flapjack-executive defaults 50")
|
48
48
|
|
49
49
|
puts
|
50
50
|
puts "Setup complete!"
|
51
51
|
puts
|
52
52
|
puts "You will want to customise:"
|
53
53
|
puts " * /etc/flapjack/recipients.yaml"
|
54
|
-
puts " * /etc/flapjack/flapjack-
|
54
|
+
puts " * /etc/flapjack/flapjack-executive.yaml"
|
55
55
|
puts
|
56
56
|
puts ".examples of these files exist in /etc/flapjack/'
|
57
57
|
|
data/config.ru
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Copyright (c) 2009-2012 Lindsay Holmwood <lindsay@holmwood.id.au>
|
4
|
+
#
|
5
|
+
# flapjack
|
6
|
+
# Boots flapjack (coordinator, flapjack-executive, notification workers, ...)
|
7
|
+
#
|
8
|
+
|
9
|
+
PATH=$PATH:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local:/usr/local/sbin:/usr/local/bin:/usr/lib/flapjack/bin
|
10
|
+
|
11
|
+
# load global rbenv environment if present
|
12
|
+
if [ -x /etc/profile.d/rbenv.sh ] ; then
|
13
|
+
source /etc/profile.d/rbenv.sh
|
14
|
+
fi
|
15
|
+
|
16
|
+
# Default return value
|
17
|
+
RETVAL=0
|
18
|
+
|
19
|
+
export FLAPJACK_ENV="production"
|
20
|
+
|
21
|
+
if [ ! $(which flapjack) ]; then
|
22
|
+
echo "Error: flapjack isn't in PATH."
|
23
|
+
echo "Refusing to do anything!"
|
24
|
+
exit 1
|
25
|
+
fi
|
26
|
+
|
27
|
+
# Evaluate command
|
28
|
+
case "$1" in
|
29
|
+
start)
|
30
|
+
flapjack --config /etc/flapjack/flapjack-config.yaml --daemonize
|
31
|
+
RETVAL=$?
|
32
|
+
;;
|
33
|
+
stop)
|
34
|
+
kill `cat /var/run/flapjack/flapjack.pid`
|
35
|
+
# FIXME: make this more robust, preferably use daemons foo
|
36
|
+
# or at least wait until the flapjack process has exited before exiting
|
37
|
+
RETVAL=$?
|
38
|
+
;;
|
39
|
+
*)
|
40
|
+
echo "Usage: flapjack {start|stop}"
|
41
|
+
RETVAL=1
|
42
|
+
;;
|
43
|
+
esac
|
44
|
+
|
45
|
+
exit $RETVAL
|
46
|
+
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# Copyright (c) 2009-2012 Lindsay Holmwood <lindsay@holmwood.id.au>
|
4
|
+
#
|
5
|
+
# flapjack-nagios-receiver
|
6
|
+
# reads from a nagios perfdata named-pipe and submits each event to the events queue in redis
|
7
|
+
#
|
8
|
+
|
9
|
+
PATH=$PATH:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local:/usr/local/sbin:/usr/local/bin:/usr/lib/flapjack/bin
|
10
|
+
|
11
|
+
# load global rbenv environment if present
|
12
|
+
if [ -x /etc/profile.d/rbenv.sh ] ; then
|
13
|
+
source /etc/profile.d/rbenv.sh
|
14
|
+
fi
|
15
|
+
|
16
|
+
# Default return value
|
17
|
+
RETVAL=0
|
18
|
+
|
19
|
+
export FLAPJACK_ENV="production"
|
20
|
+
|
21
|
+
NAGIOS_PERFDATA_FIFO="/var/cache/nagios3/event_stream.fifo"
|
22
|
+
|
23
|
+
if [ ! $(which flapjack-nagios-receiver-control) ]; then
|
24
|
+
echo "Error: flapjack-nagios-receiver-control isn't in PATH."
|
25
|
+
echo "Refusing to do anything!"
|
26
|
+
exit 1
|
27
|
+
fi
|
28
|
+
|
29
|
+
# Evaluate command
|
30
|
+
|
31
|
+
#/bin/bash -c "source /etc/profile.d/rbenv.sh && rbenv shell 1.9.3-p125 && flapjack-nagios-receiver-control $1 -- --config /etc/flapjack/flapjack-config.yaml ${NAGIOS_PERFDATA_FIFO}"
|
32
|
+
rbenv shell 1.9.3-p125 && flapjack-nagios-receiver-control $1 -- --config /etc/flapjack/flapjack-config.yaml ${NAGIOS_PERFDATA_FIFO}
|
33
|
+
RETVAL=$?
|
34
|
+
|
35
|
+
exit $RETVAL
|
36
|
+
|
data/doc/GLOSSARY.md
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
|
2
|
+
As implemented for the reporting API functions, these are placeholder terms only and open for discussion.
|
3
|
+
|
4
|
+
<dl>
|
5
|
+
<dt>Entity</dt>
|
6
|
+
<dd>TODO</dd>
|
7
|
+
<dt>Check</dt>
|
8
|
+
<dd>TODO</dd>
|
9
|
+
|
10
|
+
<dt>Outage</dt>
|
11
|
+
<dd>Period of time from when a check goes into the 'CRITICAL'
|
12
|
+
state to when it comes out of that state again</dd>
|
13
|
+
<dt>Scheduled maintenance</dt>
|
14
|
+
<dd>Periods of time explicitly created by external actors to denote that maintenance is scheduled to occur at these times.</dd>
|
15
|
+
<dt>Unscheduled maintenance</dt>
|
16
|
+
<dd>Periods of time explicitly created by external actors to denote that maintenance is happening or has happened at these times</dd>
|
17
|
+
<dt>Downtime</dt>
|
18
|
+
<dd>Outages minus scheduled maintenances across any given time period</dd>
|
19
|
+
</dl>
|
@@ -0,0 +1,90 @@
|
|
1
|
+
---
|
2
|
+
|
3
|
+
development:
|
4
|
+
pid_file: tmp/pids/flapjack.pid
|
5
|
+
log_file: log/flapjack.log
|
6
|
+
daemonize: no
|
7
|
+
redis:
|
8
|
+
host: 127.0.0.1
|
9
|
+
port: 6379
|
10
|
+
db: 13
|
11
|
+
executive:
|
12
|
+
enabled: yes
|
13
|
+
email_queue: email_notifications
|
14
|
+
sms_queue: sms_notifications
|
15
|
+
jabber_queue: jabber_notifications
|
16
|
+
notification_log_file: log/flapjack-notification.log
|
17
|
+
email_notifier:
|
18
|
+
enabled: yes
|
19
|
+
queue: email_notifications
|
20
|
+
smtp_config:
|
21
|
+
port: 2525
|
22
|
+
# address: "localhost"
|
23
|
+
# port: 25
|
24
|
+
# domain: 'localhost.localdomain'
|
25
|
+
# user_name: nil
|
26
|
+
# password: nil
|
27
|
+
# authentication: nil
|
28
|
+
# enable_starttls_auto: true
|
29
|
+
sms_notifier:
|
30
|
+
enabled: yes
|
31
|
+
queue: sms_notifications
|
32
|
+
username: "ermahgerd"
|
33
|
+
password: "xxxx"
|
34
|
+
jabber_gateway:
|
35
|
+
enabled: yes
|
36
|
+
queue: jabber_notifications
|
37
|
+
server: "jabber.domain.tld"
|
38
|
+
port: 5222
|
39
|
+
jabberid: "flapjack@jabber.domain.tld"
|
40
|
+
password: "good-password"
|
41
|
+
alias: "flapjack"
|
42
|
+
rooms:
|
43
|
+
- "gimp@conference.jabber.domain.tld"
|
44
|
+
- "log@conference.jabber.domain.tld"
|
45
|
+
pagerduty_gateway:
|
46
|
+
enabled: yes
|
47
|
+
queue: pagerduty_notifications
|
48
|
+
web:
|
49
|
+
enabled: yes
|
50
|
+
port: 5080
|
51
|
+
api:
|
52
|
+
enabled: yes
|
53
|
+
port: 5081
|
54
|
+
|
55
|
+
test:
|
56
|
+
redis:
|
57
|
+
database: 14
|
58
|
+
email_notifier:
|
59
|
+
instances: 1
|
60
|
+
sms_notifier:
|
61
|
+
instances: 1
|
62
|
+
username: "ermahgerd"
|
63
|
+
password: "xxxx"
|
64
|
+
web:
|
65
|
+
instances: 1
|
66
|
+
|
67
|
+
|
68
|
+
staging:
|
69
|
+
redis:
|
70
|
+
database: 15
|
71
|
+
email_notifier:
|
72
|
+
instances: 1
|
73
|
+
sms_notifier:
|
74
|
+
instances: 1
|
75
|
+
username: "ermahgerd"
|
76
|
+
password: "xxxx"
|
77
|
+
web:
|
78
|
+
instances: 1
|
79
|
+
|
80
|
+
production:
|
81
|
+
redis:
|
82
|
+
database: 12
|
83
|
+
email_notifier:
|
84
|
+
instances: 1
|
85
|
+
sms_notifier:
|
86
|
+
instances: 1
|
87
|
+
username: "ermahgerd"
|
88
|
+
password: "xxxx"
|
89
|
+
web:
|
90
|
+
instances: 1
|
@@ -0,0 +1,132 @@
|
|
1
|
+
@events
|
2
|
+
Feature: events
|
3
|
+
So people can be notified when things break and recover
|
4
|
+
flapjack-executive must process events correctly
|
5
|
+
|
6
|
+
# TODO make entity and check implicit, so the test reads more cleanly
|
7
|
+
Background:
|
8
|
+
Given an entity 'def' exists
|
9
|
+
|
10
|
+
Scenario: Check ok to ok
|
11
|
+
Given check 'abc' for entity 'def' is in an ok state
|
12
|
+
When an ok event is received for check 'abc' on entity 'def'
|
13
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
14
|
+
# And show me the output
|
15
|
+
|
16
|
+
Scenario: Check ok to failed
|
17
|
+
Given check 'abc' for entity 'def' is in an ok state
|
18
|
+
When a failure event is received for check 'abc' on entity 'def'
|
19
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
20
|
+
|
21
|
+
@time
|
22
|
+
Scenario: Check failed to failed after 10 seconds
|
23
|
+
Given check 'abc' for entity 'def' is in an ok state
|
24
|
+
When a failure event is received for check 'abc' on entity 'def'
|
25
|
+
And 10 seconds passes
|
26
|
+
And a failure event is received for check 'abc' on entity 'def'
|
27
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
28
|
+
|
29
|
+
@time
|
30
|
+
Scenario: Check ok to failed for 1 minute
|
31
|
+
Given check 'abc' for entity 'def' is in an ok state
|
32
|
+
When a failure event is received for check 'abc' on entity 'def'
|
33
|
+
And 1 minute passes
|
34
|
+
And a failure event is received for check 'abc' on entity 'def'
|
35
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
36
|
+
|
37
|
+
@time
|
38
|
+
Scenario: Check failed and alerted to failed for 1 minute
|
39
|
+
Given check 'abc' for entity 'def' is in an ok state
|
40
|
+
When a failure event is received for check 'abc' on entity 'def'
|
41
|
+
And 1 minute passes
|
42
|
+
And a failure event is received for check 'abc' on entity 'def'
|
43
|
+
And show me the notifications
|
44
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
45
|
+
When 1 minute passes
|
46
|
+
And a failure event is received for check 'abc' on entity 'def'
|
47
|
+
And show me the notifications
|
48
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
49
|
+
|
50
|
+
@time
|
51
|
+
Scenario: Check failed and alerted to failed for 6 minutes
|
52
|
+
Given check 'abc' for entity 'def' is in an ok state
|
53
|
+
When a failure event is received for check 'abc' on entity 'def'
|
54
|
+
And 1 minute passes
|
55
|
+
And a failure event is received for check 'abc' on entity 'def'
|
56
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
57
|
+
When 6 minutes passes
|
58
|
+
And a failure event is received for check 'abc' on entity 'def'
|
59
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
60
|
+
|
61
|
+
@time
|
62
|
+
Scenario: Check ok to failed for 1 minute when in scheduled maintenance
|
63
|
+
Given check 'abc' for entity 'def' is in an ok state
|
64
|
+
And check 'abc' for entity 'def' is in scheduled maintenance
|
65
|
+
When a failure event is received for check 'abc' on entity 'def'
|
66
|
+
And 1 minute passes
|
67
|
+
And a failure event is received for check 'abc' on entity 'def'
|
68
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
69
|
+
|
70
|
+
@time
|
71
|
+
Scenario: Check ok to failed for 1 minute when in unscheduled maintenance
|
72
|
+
Given check 'abc' for entity 'def' is in an ok state
|
73
|
+
And check 'abc' for entity 'def' is in unscheduled maintenance
|
74
|
+
When a failure event is received for check 'abc' on entity 'def'
|
75
|
+
And 1 minute passes
|
76
|
+
And a failure event is received for check 'abc' on entity 'def'
|
77
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
78
|
+
|
79
|
+
@time
|
80
|
+
Scenario: Check ok to failed for 1 minute, acknowledged, and failed for 6 minutes
|
81
|
+
Given check 'abc' for entity 'def' is in an ok state
|
82
|
+
When a failure event is received for check 'abc' on entity 'def'
|
83
|
+
And 1 minute passes
|
84
|
+
And a failure event is received for check 'abc' on entity 'def'
|
85
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
86
|
+
When an acknowledgement is received for check 'abc' on entity 'def'
|
87
|
+
And 6 minute passes
|
88
|
+
And a failure event is received for check 'abc' on entity 'def'
|
89
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
90
|
+
|
91
|
+
Scenario: Check failed to ok
|
92
|
+
Given check 'abc' for entity 'def' is in a failure state
|
93
|
+
And 5 minutes passes
|
94
|
+
And a failure event is received for check 'abc' on entity 'def'
|
95
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
96
|
+
When 5 minutes passes
|
97
|
+
And an ok event is received for check 'abc' on entity 'def'
|
98
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
99
|
+
|
100
|
+
@time
|
101
|
+
Scenario: Check failed to ok when acknowledged
|
102
|
+
Given check 'abc' for entity 'def' is in a failure state
|
103
|
+
When an acknowledgement event is received for check 'abc' on entity 'def'
|
104
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
105
|
+
When 1 minute passes
|
106
|
+
And an ok event is received for check 'abc' on entity 'def'
|
107
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
108
|
+
|
109
|
+
@time
|
110
|
+
Scenario: Check failed to ok when acknowledged, and fails after 6 minutes
|
111
|
+
Given check 'abc' for entity 'def' is in a failure state
|
112
|
+
When an acknowledgement event is received for check 'abc' on entity 'def'
|
113
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
114
|
+
When 1 minute passes
|
115
|
+
And an ok event is received for check 'abc' on entity 'def'
|
116
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
117
|
+
When 6 minutes passes
|
118
|
+
And a failure event is received for check 'abc' on entity 'def'
|
119
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
120
|
+
When 6 minutes passes
|
121
|
+
And a failure event is received for check 'abc' on entity 'def'
|
122
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
123
|
+
|
124
|
+
Scenario: Acknowledgement when ok
|
125
|
+
Given check 'abc' for entity 'def' is in an ok state
|
126
|
+
When an acknowledgement event is received for check 'abc' on entity 'def'
|
127
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
128
|
+
|
129
|
+
Scenario: Acknowledgement when failed
|
130
|
+
Given check 'abc' for entity 'def' is in a failure state
|
131
|
+
When an acknowledgement event is received for check 'abc' on entity 'def'
|
132
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
@@ -0,0 +1,57 @@
|
|
1
|
+
@notifications
|
2
|
+
Feature: notifications
|
3
|
+
So people can be notified when things break and recover
|
4
|
+
flapjack-notifier must send notifications correctly
|
5
|
+
|
6
|
+
# TODO test across multiple contacts
|
7
|
+
|
8
|
+
@resque
|
9
|
+
Scenario: Queue an SMS notification
|
10
|
+
Given the user wants to receive SMS notifications for entity 'example.com'
|
11
|
+
When an event notification is generated for entity 'example.com'
|
12
|
+
Then an SMS notification for entity 'example.com' should be queued for the user
|
13
|
+
And an email notification for entity 'example.com' should not be queued for the user
|
14
|
+
|
15
|
+
@resque
|
16
|
+
Scenario: Queue an email notification
|
17
|
+
Given the user wants to receive email notifications for entity 'example.com'
|
18
|
+
When an event notification is generated for entity 'example.com'
|
19
|
+
Then an email notification for entity 'example.com' should be queued for the user
|
20
|
+
And an SMS notification for entity 'example.com' should not be queued for the user
|
21
|
+
|
22
|
+
@resque
|
23
|
+
Scenario: Queue SMS and email notifications
|
24
|
+
Given the user wants to receive SMS notifications for entity 'example.com' and email notifications for entity 'example2.com'
|
25
|
+
When an event notification is generated for entity 'example.com'
|
26
|
+
And an event notification is generated for entity 'example2.com'
|
27
|
+
Then an SMS notification for entity 'example.com' should be queued for the user
|
28
|
+
And an SMS notification for entity 'example2.com' should not be queued for the user
|
29
|
+
Then an email notification for entity 'example.com' should not be queued for the user
|
30
|
+
And an email notification for entity 'example2.com' should be queued for the user
|
31
|
+
|
32
|
+
# NB: Scenarios below here are those that cover code run by the Resque workers
|
33
|
+
# We could maybe test resque integration as well, see
|
34
|
+
# http://corner.squareup.com/2010/08/cucumber-and-resque.html
|
35
|
+
# http://gist.github.com/532100
|
36
|
+
|
37
|
+
Scenario: Send a queued SMS notification
|
38
|
+
Given a user SMS notification has been queued for entity 'example.com'
|
39
|
+
When the SMS notification handler runs successfully
|
40
|
+
Then the user should receive an SMS notification
|
41
|
+
|
42
|
+
Scenario: Handle a failure to send a queued SMS notification
|
43
|
+
Given a user SMS notification has been queued for entity 'example.com'
|
44
|
+
When the SMS notification handler fails to send an SMS
|
45
|
+
Then the user should not receive an SMS notification
|
46
|
+
|
47
|
+
@email
|
48
|
+
Scenario: Send a queued email notification
|
49
|
+
Given a user email notification has been queued for entity 'example.com'
|
50
|
+
When the email notification handler runs successfully
|
51
|
+
Then the user should receive an email notification
|
52
|
+
|
53
|
+
@email
|
54
|
+
Scenario: Handle a failure to send a queued email notification
|
55
|
+
Given a user email notification has been queued for entity 'example.com'
|
56
|
+
When the email notification handler fails to send an email
|
57
|
+
Then the user should not receive an email notification
|
@@ -1,15 +1,17 @@
|
|
1
|
-
Feature: Packagability
|
1
|
+
Feature: Packagability
|
2
2
|
To make Flapjack usable to the masses
|
3
3
|
It must be easily packagable
|
4
4
|
|
5
5
|
Scenario: No rubygems references
|
6
6
|
Given I am at the project root
|
7
7
|
When I run "grep require lib/* bin/* -R |grep rubygems"
|
8
|
-
Then
|
8
|
+
Then the exit status should be 1
|
9
|
+
And I should see 0 lines of output
|
9
10
|
|
10
11
|
Scenario: A shebang that works everywhere
|
11
12
|
Given I am at the project root
|
12
13
|
When I run "find lib/ -type 'f' -name '*.rb'"
|
13
|
-
Then
|
14
|
+
Then the exit status should be 0
|
15
|
+
And every file in the output should start with "#!/usr/bin/env ruby"
|
14
16
|
|
15
17
|
|
@@ -0,0 +1,164 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'flapjack/data/entity_check'
|
4
|
+
require 'flapjack/data/event'
|
5
|
+
|
6
|
+
def drain_events
|
7
|
+
loop do
|
8
|
+
event = Flapjack::Data::Event.next(:block => false, :persistence => @redis)
|
9
|
+
break unless event
|
10
|
+
@app.send(:process_event, event)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def submit_event(event)
|
15
|
+
@redis.rpush 'events', event.to_json
|
16
|
+
end
|
17
|
+
|
18
|
+
def set_scheduled_maintenance(entity, check, duration = 60*60*2)
|
19
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
20
|
+
t = Time.now.to_i
|
21
|
+
entity_check.create_scheduled_maintenance(:start_time => t, :duration => duration, :summary => "upgrading everything")
|
22
|
+
@redis.setex("#{entity}:#{check}:scheduled_maintenance", duration, t)
|
23
|
+
end
|
24
|
+
|
25
|
+
def remove_scheduled_maintenance(entity, check)
|
26
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
27
|
+
sm = entity_check.maintenances(nil, nil, :scheduled => true)
|
28
|
+
sm.each do |m|
|
29
|
+
entity_check.delete_scheduled_maintenance(:start_time => m[:start_time])
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def remove_unscheduled_maintenance(entity, check)
|
34
|
+
# end any unscheduled downtime
|
35
|
+
event_id = entity + ":" + check
|
36
|
+
if (um_start = @redis.get("#{event_id}:unscheduled_maintenance"))
|
37
|
+
@redis.del("#{event_id}:unscheduled_maintenance")
|
38
|
+
duration = Time.now.to_i - um_start.to_i
|
39
|
+
@redis.zadd("#{event_id}:unscheduled_maintenances", duration, um_start)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def remove_notifications(entity, check)
|
44
|
+
event_id = entity + ":" + check
|
45
|
+
@redis.del("#{event_id}:last_problem_notification")
|
46
|
+
@redis.del("#{event_id}:last_recovery_notification")
|
47
|
+
@redis.del("#{event_id}:last_acknowledgement_notification")
|
48
|
+
end
|
49
|
+
|
50
|
+
def set_ok_state(entity, check)
|
51
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
52
|
+
entity_check.update_state(Flapjack::Data::EntityCheck::STATE_OK,
|
53
|
+
:timestamp => (Time.now.to_i - (60*60*24)))
|
54
|
+
end
|
55
|
+
|
56
|
+
def set_failure_state(entity, check)
|
57
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
58
|
+
entity_check.update_state(Flapjack::Data::EntityCheck::STATE_CRITICAL,
|
59
|
+
:timestamp => (Time.now.to_i - (60*60*24)))
|
60
|
+
end
|
61
|
+
|
62
|
+
def submit_ok(entity, check)
|
63
|
+
event = {
|
64
|
+
'type' => 'service',
|
65
|
+
'state' => 'ok',
|
66
|
+
'summary' => '0% packet loss',
|
67
|
+
'entity' => entity,
|
68
|
+
'check' => check,
|
69
|
+
'client' => 'clientx'
|
70
|
+
}
|
71
|
+
submit_event(event)
|
72
|
+
end
|
73
|
+
|
74
|
+
def submit_critical(entity, check)
|
75
|
+
event = {
|
76
|
+
'type' => 'service',
|
77
|
+
'state' => 'critical',
|
78
|
+
'summary' => '100% packet loss',
|
79
|
+
'entity' => entity,
|
80
|
+
'check' => check,
|
81
|
+
'client' => 'clientx'
|
82
|
+
}
|
83
|
+
submit_event(event)
|
84
|
+
end
|
85
|
+
|
86
|
+
def submit_acknowledgement(entity, check)
|
87
|
+
event = {
|
88
|
+
'type' => 'action',
|
89
|
+
'state' => 'acknowledgement',
|
90
|
+
'summary' => "I'll have this fixed in a jiffy, saw the same thing yesterday",
|
91
|
+
'entity' => entity,
|
92
|
+
'check' => check,
|
93
|
+
'client' => 'clientx',
|
94
|
+
# 'acknowledgement_id' =>
|
95
|
+
}
|
96
|
+
submit_event(event)
|
97
|
+
end
|
98
|
+
|
99
|
+
Given /^an entity '([\w\.\-]+)' exists$/ do |entity|
|
100
|
+
Flapjack::Data::Entity.add({'id' => '5000',
|
101
|
+
'name' => entity},
|
102
|
+
:redis => @redis )
|
103
|
+
end
|
104
|
+
|
105
|
+
Given /^^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in an ok state$/ do |check, entity|
|
106
|
+
remove_unscheduled_maintenance(entity, check)
|
107
|
+
remove_scheduled_maintenance(entity, check)
|
108
|
+
remove_notifications(entity, check)
|
109
|
+
set_ok_state(entity, check)
|
110
|
+
end
|
111
|
+
|
112
|
+
Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in a failure state$/ do |check, entity|
|
113
|
+
remove_unscheduled_maintenance(entity, check)
|
114
|
+
remove_scheduled_maintenance(entity, check)
|
115
|
+
remove_notifications(entity, check)
|
116
|
+
set_failure_state(entity, check)
|
117
|
+
end
|
118
|
+
|
119
|
+
Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in scheduled maintenance$/ do |check, entity|
|
120
|
+
remove_unscheduled_maintenance(entity, check)
|
121
|
+
set_scheduled_maintenance(entity, check)
|
122
|
+
end
|
123
|
+
|
124
|
+
# TODO set the state directly rather than submit & drain
|
125
|
+
Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in unscheduled maintenance$/ do |check, entity|
|
126
|
+
remove_scheduled_maintenance(entity, check)
|
127
|
+
set_failure_state(entity, check)
|
128
|
+
submit_acknowledgement(entity, check)
|
129
|
+
drain_events # TODO these should only be in When clauses
|
130
|
+
end
|
131
|
+
|
132
|
+
When /^an ok event is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
133
|
+
submit_ok(entity, check)
|
134
|
+
drain_events
|
135
|
+
end
|
136
|
+
|
137
|
+
When /^a failure event is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
138
|
+
submit_critical(entity, check)
|
139
|
+
drain_events
|
140
|
+
end
|
141
|
+
|
142
|
+
When /^an acknowledgement .*is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
143
|
+
submit_acknowledgement(entity, check)
|
144
|
+
drain_events
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
# TODO logging is a side-effect, should test for notification generation itself
|
149
|
+
Then /^a notification should not be generated for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
150
|
+
message = @app.logger.messages.find_all {|m| m =~ /ending notifications for event #{entity}:#{check}/ }.last
|
151
|
+
message ? happy = message.match(/Not sending notifications/) : happy = false
|
152
|
+
happy.should be_true
|
153
|
+
end
|
154
|
+
|
155
|
+
Then /^a notification should be generated for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
156
|
+
message = @app.logger.messages.find_all {|m| m =~ /ending notifications for event #{entity}:#{check}/ }.last
|
157
|
+
message ? happy = message.match(/Sending notifications/) : happy = false
|
158
|
+
happy.should be_true
|
159
|
+
end
|
160
|
+
|
161
|
+
Then /^show me the notifications?$/ do
|
162
|
+
puts @app.logger.messages.join("\n")
|
163
|
+
end
|
164
|
+
|