flapjack 0.5.5 → 0.6.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/.rbenv-version +1 -0
- data/.rspec +10 -0
- data/Gemfile +18 -0
- data/Guardfile +14 -0
- data/README.md +152 -173
- data/Rakefile +53 -150
- data/bin/flapjack +72 -0
- data/bin/flapjack-nagios-receiver +111 -0
- data/bin/flapjack-nagios-receiver-control +15 -0
- data/bin/flapjack-netsaint-parser +0 -2
- data/bin/flapjack-populator +133 -16
- data/bin/install-flapjack-systemwide +2 -2
- data/config.ru +11 -0
- data/dist/etc/init.d/flapjack +46 -0
- data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
- data/doc/GLOSSARY.md +19 -0
- data/etc/flapjack_config.yaml.example +90 -0
- data/features/events.feature +132 -0
- data/features/notifications.feature +57 -0
- data/features/packaging-lintian.feature +5 -3
- data/features/steps/events_steps.rb +164 -0
- data/features/steps/flapjack-importer_steps.rb +2 -5
- data/features/steps/flapjack-worker_steps.rb +13 -6
- data/features/steps/notifications_steps.rb +178 -0
- data/features/steps/packaging-lintian_steps.rb +14 -0
- data/features/steps/time_travel_steps.rb +34 -0
- data/features/support/env.rb +63 -36
- data/flapjack.gemspec +35 -186
- data/lib/flapjack.rb +2 -0
- data/lib/flapjack/api.rb +274 -0
- data/lib/flapjack/api/entity_check_presenter.rb +184 -0
- data/lib/flapjack/api/entity_presenter.rb +66 -0
- data/lib/flapjack/cli/worker_manager.rb +1 -2
- data/lib/flapjack/configuration.rb +11 -0
- data/lib/flapjack/coordinator.rb +288 -0
- data/lib/flapjack/daemonizing.rb +186 -0
- data/lib/flapjack/data/contact.rb +45 -0
- data/lib/flapjack/data/entity.rb +89 -0
- data/lib/flapjack/data/entity_check.rb +396 -0
- data/lib/flapjack/data/event.rb +144 -0
- data/lib/flapjack/data/notification.rb +13 -0
- data/lib/flapjack/executive.rb +289 -0
- data/lib/flapjack/filters/acknowledgement.rb +39 -0
- data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
- data/lib/flapjack/filters/delays.rb +53 -0
- data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
- data/lib/flapjack/filters/ok.rb +25 -5
- data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
- data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
- data/lib/flapjack/jabber.rb +294 -0
- data/lib/flapjack/notification/common.rb +23 -0
- data/lib/flapjack/notification/email.rb +107 -0
- data/lib/flapjack/notification/email/alert.html.haml +48 -0
- data/lib/flapjack/notification/email/alert.text.erb +14 -0
- data/lib/flapjack/notification/sms.rb +42 -0
- data/lib/flapjack/notification/sms/messagenet.rb +49 -0
- data/lib/flapjack/notifier_engine.rb +4 -4
- data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
- data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
- data/lib/flapjack/pagerduty.rb +230 -0
- data/lib/flapjack/patches.rb +108 -19
- data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
- data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
- data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
- data/lib/flapjack/pikelet.rb +56 -0
- data/lib/flapjack/transports/beanstalkd.rb +1 -1
- data/lib/flapjack/transports/result.rb +6 -6
- data/lib/flapjack/utility.rb +46 -0
- data/lib/flapjack/version.rb +5 -0
- data/lib/flapjack/web.rb +198 -0
- data/lib/flapjack/web/views/acknowledge.haml +55 -0
- data/lib/flapjack/web/views/check.haml +162 -0
- data/lib/flapjack/web/views/index.haml +92 -0
- data/lib/flapjack/web/views/self_stats.haml +56 -0
- data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
- data/lib/flapjack/worker/cli.rb +49 -0
- data/{spec → spec.old}/check_sandbox/echo +0 -0
- data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
- data/{spec → spec.old}/configs/recipients.ini +0 -0
- data/{spec → spec.old}/helpers.rb +0 -0
- data/{spec → spec.old}/inifile_spec.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
- data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
- data/{spec → spec.old}/notifier_application_spec.rb +0 -0
- data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_spec.rb +0 -0
- data/{spec → spec.old}/notifier_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
- data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
- data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
- data/{spec → spec.old}/simple.ini +0 -0
- data/{spec → spec.old}/spec.opts +0 -0
- data/{spec → spec.old}/test-filters/blocker.rb +0 -0
- data/{spec → spec.old}/test-filters/mock.rb +0 -0
- data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
- data/{spec → spec.old}/transports/mock_transport.rb +0 -0
- data/{spec → spec.old}/worker_application_spec.rb +0 -0
- data/{spec → spec.old}/worker_options_spec.rb +0 -0
- data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
- data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
- data/spec/lib/flapjack/api_spec.rb +170 -0
- data/spec/lib/flapjack/coordinator_spec.rb +16 -0
- data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
- data/spec/lib/flapjack/data/entity_spec.rb +71 -0
- data/spec/lib/flapjack/data/event_spec.rb +6 -0
- data/spec/lib/flapjack/executive_spec.rb +59 -0
- data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
- data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
- data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
- data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
- data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/jabber_spec.rb +150 -0
- data/spec/lib/flapjack/notification/email_spec.rb +6 -0
- data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
- data/spec/lib/flapjack/pikelet_spec.rb +28 -0
- data/spec/lib/flapjack/web_spec.rb +188 -0
- data/spec/spec_helper.rb +44 -0
- data/spec/support/profile_all_formatter.rb +44 -0
- data/spec/support/uncolored_doc_formatter.rb +9 -0
- data/tasks/events.rake +85 -0
- data/tmp/acknowledge.rb +14 -0
- data/tmp/create_config_yaml.rb +16 -0
- data/tmp/create_events_failure.rb +33 -0
- data/tmp/create_events_ok.rb +33 -0
- data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
- data/tmp/create_events_ok_failure.rb +40 -0
- data/tmp/create_events_ok_failure_ack.rb +54 -0
- data/tmp/dummy_entities.json +1 -0
- data/tmp/generate_nagios_test_hosts.rb +16 -0
- data/tmp/parse_config_yaml.rb +7 -0
- data/tmp/redis_delete_all_keys.rb +11 -0
- data/tmp/test_entities.json +1 -0
- metadata +482 -221
- data/TODO.md +0 -36
- data/VERSION +0 -1
- data/bin/flapjack-benchmark +0 -50
- data/bin/flapjack-notifier +0 -21
- data/bin/flapjack-notifier-manager +0 -43
- data/bin/flapjack-stats +0 -27
- data/bin/flapjack-worker +0 -13
- data/bin/flapjack-worker-manager +0 -35
- data/dist/etc/init.d/flapjack-notifier +0 -47
- data/dist/etc/init.d/flapjack-workers +0 -44
- data/features/flapjack-notifier-manager.feature +0 -19
- data/features/flapjack-worker-manager.feature +0 -27
- data/features/flapjack-worker.feature +0 -27
- data/features/netsaint-config-converter.feature +0 -126
- data/features/persistence/couch.feature +0 -105
- data/features/persistence/sqlite3.feature +0 -105
- data/features/persistence/steps/couch_steps.rb +0 -25
- data/features/persistence/steps/generic_steps.rb +0 -102
- data/features/persistence/steps/sqlite3_steps.rb +0 -13
- data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
- data/features/steps/flapjack-worker-manager_steps.rb +0 -48
- data/lib/flapjack/applications/notifier.rb +0 -222
- data/lib/flapjack/cli/notifier.rb +0 -108
- data/lib/flapjack/cli/notifier_manager.rb +0 -86
- data/lib/flapjack/cli/worker.rb +0 -51
|
@@ -44,14 +44,14 @@ system("cp -aiv #{etc_path}/* /etc")
|
|
|
44
44
|
|
|
45
45
|
# set sequence number to 50 so beanstalkd has a chance to boot
|
|
46
46
|
system("update-rc.d flapjack-workers defaults 50")
|
|
47
|
-
system("update-rc.d flapjack-
|
|
47
|
+
system("update-rc.d flapjack-executive defaults 50")
|
|
48
48
|
|
|
49
49
|
puts
|
|
50
50
|
puts "Setup complete!"
|
|
51
51
|
puts
|
|
52
52
|
puts "You will want to customise:"
|
|
53
53
|
puts " * /etc/flapjack/recipients.yaml"
|
|
54
|
-
puts " * /etc/flapjack/flapjack-
|
|
54
|
+
puts " * /etc/flapjack/flapjack-executive.yaml"
|
|
55
55
|
puts
|
|
56
56
|
puts ".examples of these files exist in /etc/flapjack/'
|
|
57
57
|
|
data/config.ru
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2009-2012 Lindsay Holmwood <lindsay@holmwood.id.au>
|
|
4
|
+
#
|
|
5
|
+
# flapjack
|
|
6
|
+
# Boots flapjack (coordinator, flapjack-executive, notification workers, ...)
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
PATH=$PATH:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local:/usr/local/sbin:/usr/local/bin:/usr/lib/flapjack/bin
|
|
10
|
+
|
|
11
|
+
# load global rbenv environment if present
|
|
12
|
+
if [ -x /etc/profile.d/rbenv.sh ] ; then
|
|
13
|
+
source /etc/profile.d/rbenv.sh
|
|
14
|
+
fi
|
|
15
|
+
|
|
16
|
+
# Default return value
|
|
17
|
+
RETVAL=0
|
|
18
|
+
|
|
19
|
+
export FLAPJACK_ENV="production"
|
|
20
|
+
|
|
21
|
+
if [ ! $(which flapjack) ]; then
|
|
22
|
+
echo "Error: flapjack isn't in PATH."
|
|
23
|
+
echo "Refusing to do anything!"
|
|
24
|
+
exit 1
|
|
25
|
+
fi
|
|
26
|
+
|
|
27
|
+
# Evaluate command
|
|
28
|
+
case "$1" in
|
|
29
|
+
start)
|
|
30
|
+
flapjack --config /etc/flapjack/flapjack-config.yaml --daemonize
|
|
31
|
+
RETVAL=$?
|
|
32
|
+
;;
|
|
33
|
+
stop)
|
|
34
|
+
kill `cat /var/run/flapjack/flapjack.pid`
|
|
35
|
+
# FIXME: make this more robust, preferably use daemons foo
|
|
36
|
+
# or at least wait until the flapjack process has exited before exiting
|
|
37
|
+
RETVAL=$?
|
|
38
|
+
;;
|
|
39
|
+
*)
|
|
40
|
+
echo "Usage: flapjack {start|stop}"
|
|
41
|
+
RETVAL=1
|
|
42
|
+
;;
|
|
43
|
+
esac
|
|
44
|
+
|
|
45
|
+
exit $RETVAL
|
|
46
|
+
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
#
|
|
3
|
+
# Copyright (c) 2009-2012 Lindsay Holmwood <lindsay@holmwood.id.au>
|
|
4
|
+
#
|
|
5
|
+
# flapjack-nagios-receiver
|
|
6
|
+
# reads from a nagios perfdata named-pipe and submits each event to the events queue in redis
|
|
7
|
+
#
|
|
8
|
+
|
|
9
|
+
PATH=$PATH:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local:/usr/local/sbin:/usr/local/bin:/usr/lib/flapjack/bin
|
|
10
|
+
|
|
11
|
+
# load global rbenv environment if present
|
|
12
|
+
if [ -x /etc/profile.d/rbenv.sh ] ; then
|
|
13
|
+
source /etc/profile.d/rbenv.sh
|
|
14
|
+
fi
|
|
15
|
+
|
|
16
|
+
# Default return value
|
|
17
|
+
RETVAL=0
|
|
18
|
+
|
|
19
|
+
export FLAPJACK_ENV="production"
|
|
20
|
+
|
|
21
|
+
NAGIOS_PERFDATA_FIFO="/var/cache/nagios3/event_stream.fifo"
|
|
22
|
+
|
|
23
|
+
if [ ! $(which flapjack-nagios-receiver-control) ]; then
|
|
24
|
+
echo "Error: flapjack-nagios-receiver-control isn't in PATH."
|
|
25
|
+
echo "Refusing to do anything!"
|
|
26
|
+
exit 1
|
|
27
|
+
fi
|
|
28
|
+
|
|
29
|
+
# Evaluate command
|
|
30
|
+
|
|
31
|
+
#/bin/bash -c "source /etc/profile.d/rbenv.sh && rbenv shell 1.9.3-p125 && flapjack-nagios-receiver-control $1 -- --config /etc/flapjack/flapjack-config.yaml ${NAGIOS_PERFDATA_FIFO}"
|
|
32
|
+
rbenv shell 1.9.3-p125 && flapjack-nagios-receiver-control $1 -- --config /etc/flapjack/flapjack-config.yaml ${NAGIOS_PERFDATA_FIFO}
|
|
33
|
+
RETVAL=$?
|
|
34
|
+
|
|
35
|
+
exit $RETVAL
|
|
36
|
+
|
data/doc/GLOSSARY.md
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
|
|
2
|
+
As implemented for the reporting API functions, these are placeholder terms only and open for discussion.
|
|
3
|
+
|
|
4
|
+
<dl>
|
|
5
|
+
<dt>Entity</dt>
|
|
6
|
+
<dd>TODO</dd>
|
|
7
|
+
<dt>Check</dt>
|
|
8
|
+
<dd>TODO</dd>
|
|
9
|
+
|
|
10
|
+
<dt>Outage</dt>
|
|
11
|
+
<dd>Period of time from when a check goes into the 'CRITICAL'
|
|
12
|
+
state to when it comes out of that state again</dd>
|
|
13
|
+
<dt>Scheduled maintenance</dt>
|
|
14
|
+
<dd>Periods of time explicitly created by external actors to denote that maintenance is scheduled to occur at these times.</dd>
|
|
15
|
+
<dt>Unscheduled maintenance</dt>
|
|
16
|
+
<dd>Periods of time explicitly created by external actors to denote that maintenance is happening or has happened at these times</dd>
|
|
17
|
+
<dt>Downtime</dt>
|
|
18
|
+
<dd>Outages minus scheduled maintenances across any given time period</dd>
|
|
19
|
+
</dl>
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
---
|
|
2
|
+
|
|
3
|
+
development:
|
|
4
|
+
pid_file: tmp/pids/flapjack.pid
|
|
5
|
+
log_file: log/flapjack.log
|
|
6
|
+
daemonize: no
|
|
7
|
+
redis:
|
|
8
|
+
host: 127.0.0.1
|
|
9
|
+
port: 6379
|
|
10
|
+
db: 13
|
|
11
|
+
executive:
|
|
12
|
+
enabled: yes
|
|
13
|
+
email_queue: email_notifications
|
|
14
|
+
sms_queue: sms_notifications
|
|
15
|
+
jabber_queue: jabber_notifications
|
|
16
|
+
notification_log_file: log/flapjack-notification.log
|
|
17
|
+
email_notifier:
|
|
18
|
+
enabled: yes
|
|
19
|
+
queue: email_notifications
|
|
20
|
+
smtp_config:
|
|
21
|
+
port: 2525
|
|
22
|
+
# address: "localhost"
|
|
23
|
+
# port: 25
|
|
24
|
+
# domain: 'localhost.localdomain'
|
|
25
|
+
# user_name: nil
|
|
26
|
+
# password: nil
|
|
27
|
+
# authentication: nil
|
|
28
|
+
# enable_starttls_auto: true
|
|
29
|
+
sms_notifier:
|
|
30
|
+
enabled: yes
|
|
31
|
+
queue: sms_notifications
|
|
32
|
+
username: "ermahgerd"
|
|
33
|
+
password: "xxxx"
|
|
34
|
+
jabber_gateway:
|
|
35
|
+
enabled: yes
|
|
36
|
+
queue: jabber_notifications
|
|
37
|
+
server: "jabber.domain.tld"
|
|
38
|
+
port: 5222
|
|
39
|
+
jabberid: "flapjack@jabber.domain.tld"
|
|
40
|
+
password: "good-password"
|
|
41
|
+
alias: "flapjack"
|
|
42
|
+
rooms:
|
|
43
|
+
- "gimp@conference.jabber.domain.tld"
|
|
44
|
+
- "log@conference.jabber.domain.tld"
|
|
45
|
+
pagerduty_gateway:
|
|
46
|
+
enabled: yes
|
|
47
|
+
queue: pagerduty_notifications
|
|
48
|
+
web:
|
|
49
|
+
enabled: yes
|
|
50
|
+
port: 5080
|
|
51
|
+
api:
|
|
52
|
+
enabled: yes
|
|
53
|
+
port: 5081
|
|
54
|
+
|
|
55
|
+
test:
|
|
56
|
+
redis:
|
|
57
|
+
database: 14
|
|
58
|
+
email_notifier:
|
|
59
|
+
instances: 1
|
|
60
|
+
sms_notifier:
|
|
61
|
+
instances: 1
|
|
62
|
+
username: "ermahgerd"
|
|
63
|
+
password: "xxxx"
|
|
64
|
+
web:
|
|
65
|
+
instances: 1
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
staging:
|
|
69
|
+
redis:
|
|
70
|
+
database: 15
|
|
71
|
+
email_notifier:
|
|
72
|
+
instances: 1
|
|
73
|
+
sms_notifier:
|
|
74
|
+
instances: 1
|
|
75
|
+
username: "ermahgerd"
|
|
76
|
+
password: "xxxx"
|
|
77
|
+
web:
|
|
78
|
+
instances: 1
|
|
79
|
+
|
|
80
|
+
production:
|
|
81
|
+
redis:
|
|
82
|
+
database: 12
|
|
83
|
+
email_notifier:
|
|
84
|
+
instances: 1
|
|
85
|
+
sms_notifier:
|
|
86
|
+
instances: 1
|
|
87
|
+
username: "ermahgerd"
|
|
88
|
+
password: "xxxx"
|
|
89
|
+
web:
|
|
90
|
+
instances: 1
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
@events
|
|
2
|
+
Feature: events
|
|
3
|
+
So people can be notified when things break and recover
|
|
4
|
+
flapjack-executive must process events correctly
|
|
5
|
+
|
|
6
|
+
# TODO make entity and check implicit, so the test reads more cleanly
|
|
7
|
+
Background:
|
|
8
|
+
Given an entity 'def' exists
|
|
9
|
+
|
|
10
|
+
Scenario: Check ok to ok
|
|
11
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
12
|
+
When an ok event is received for check 'abc' on entity 'def'
|
|
13
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
|
14
|
+
# And show me the output
|
|
15
|
+
|
|
16
|
+
Scenario: Check ok to failed
|
|
17
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
18
|
+
When a failure event is received for check 'abc' on entity 'def'
|
|
19
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
|
20
|
+
|
|
21
|
+
@time
|
|
22
|
+
Scenario: Check failed to failed after 10 seconds
|
|
23
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
24
|
+
When a failure event is received for check 'abc' on entity 'def'
|
|
25
|
+
And 10 seconds passes
|
|
26
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
27
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
|
28
|
+
|
|
29
|
+
@time
|
|
30
|
+
Scenario: Check ok to failed for 1 minute
|
|
31
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
32
|
+
When a failure event is received for check 'abc' on entity 'def'
|
|
33
|
+
And 1 minute passes
|
|
34
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
35
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
36
|
+
|
|
37
|
+
@time
|
|
38
|
+
Scenario: Check failed and alerted to failed for 1 minute
|
|
39
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
40
|
+
When a failure event is received for check 'abc' on entity 'def'
|
|
41
|
+
And 1 minute passes
|
|
42
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
43
|
+
And show me the notifications
|
|
44
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
45
|
+
When 1 minute passes
|
|
46
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
47
|
+
And show me the notifications
|
|
48
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
|
49
|
+
|
|
50
|
+
@time
|
|
51
|
+
Scenario: Check failed and alerted to failed for 6 minutes
|
|
52
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
53
|
+
When a failure event is received for check 'abc' on entity 'def'
|
|
54
|
+
And 1 minute passes
|
|
55
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
56
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
57
|
+
When 6 minutes passes
|
|
58
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
59
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
60
|
+
|
|
61
|
+
@time
|
|
62
|
+
Scenario: Check ok to failed for 1 minute when in scheduled maintenance
|
|
63
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
64
|
+
And check 'abc' for entity 'def' is in scheduled maintenance
|
|
65
|
+
When a failure event is received for check 'abc' on entity 'def'
|
|
66
|
+
And 1 minute passes
|
|
67
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
68
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
|
69
|
+
|
|
70
|
+
@time
|
|
71
|
+
Scenario: Check ok to failed for 1 minute when in unscheduled maintenance
|
|
72
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
73
|
+
And check 'abc' for entity 'def' is in unscheduled maintenance
|
|
74
|
+
When a failure event is received for check 'abc' on entity 'def'
|
|
75
|
+
And 1 minute passes
|
|
76
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
77
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
|
78
|
+
|
|
79
|
+
@time
|
|
80
|
+
Scenario: Check ok to failed for 1 minute, acknowledged, and failed for 6 minutes
|
|
81
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
82
|
+
When a failure event is received for check 'abc' on entity 'def'
|
|
83
|
+
And 1 minute passes
|
|
84
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
85
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
86
|
+
When an acknowledgement is received for check 'abc' on entity 'def'
|
|
87
|
+
And 6 minute passes
|
|
88
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
89
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
|
90
|
+
|
|
91
|
+
Scenario: Check failed to ok
|
|
92
|
+
Given check 'abc' for entity 'def' is in a failure state
|
|
93
|
+
And 5 minutes passes
|
|
94
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
95
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
96
|
+
When 5 minutes passes
|
|
97
|
+
And an ok event is received for check 'abc' on entity 'def'
|
|
98
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
99
|
+
|
|
100
|
+
@time
|
|
101
|
+
Scenario: Check failed to ok when acknowledged
|
|
102
|
+
Given check 'abc' for entity 'def' is in a failure state
|
|
103
|
+
When an acknowledgement event is received for check 'abc' on entity 'def'
|
|
104
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
105
|
+
When 1 minute passes
|
|
106
|
+
And an ok event is received for check 'abc' on entity 'def'
|
|
107
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
108
|
+
|
|
109
|
+
@time
|
|
110
|
+
Scenario: Check failed to ok when acknowledged, and fails after 6 minutes
|
|
111
|
+
Given check 'abc' for entity 'def' is in a failure state
|
|
112
|
+
When an acknowledgement event is received for check 'abc' on entity 'def'
|
|
113
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
114
|
+
When 1 minute passes
|
|
115
|
+
And an ok event is received for check 'abc' on entity 'def'
|
|
116
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
117
|
+
When 6 minutes passes
|
|
118
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
119
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
|
120
|
+
When 6 minutes passes
|
|
121
|
+
And a failure event is received for check 'abc' on entity 'def'
|
|
122
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
123
|
+
|
|
124
|
+
Scenario: Acknowledgement when ok
|
|
125
|
+
Given check 'abc' for entity 'def' is in an ok state
|
|
126
|
+
When an acknowledgement event is received for check 'abc' on entity 'def'
|
|
127
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
|
128
|
+
|
|
129
|
+
Scenario: Acknowledgement when failed
|
|
130
|
+
Given check 'abc' for entity 'def' is in a failure state
|
|
131
|
+
When an acknowledgement event is received for check 'abc' on entity 'def'
|
|
132
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
@notifications
|
|
2
|
+
Feature: notifications
|
|
3
|
+
So people can be notified when things break and recover
|
|
4
|
+
flapjack-notifier must send notifications correctly
|
|
5
|
+
|
|
6
|
+
# TODO test across multiple contacts
|
|
7
|
+
|
|
8
|
+
@resque
|
|
9
|
+
Scenario: Queue an SMS notification
|
|
10
|
+
Given the user wants to receive SMS notifications for entity 'example.com'
|
|
11
|
+
When an event notification is generated for entity 'example.com'
|
|
12
|
+
Then an SMS notification for entity 'example.com' should be queued for the user
|
|
13
|
+
And an email notification for entity 'example.com' should not be queued for the user
|
|
14
|
+
|
|
15
|
+
@resque
|
|
16
|
+
Scenario: Queue an email notification
|
|
17
|
+
Given the user wants to receive email notifications for entity 'example.com'
|
|
18
|
+
When an event notification is generated for entity 'example.com'
|
|
19
|
+
Then an email notification for entity 'example.com' should be queued for the user
|
|
20
|
+
And an SMS notification for entity 'example.com' should not be queued for the user
|
|
21
|
+
|
|
22
|
+
@resque
|
|
23
|
+
Scenario: Queue SMS and email notifications
|
|
24
|
+
Given the user wants to receive SMS notifications for entity 'example.com' and email notifications for entity 'example2.com'
|
|
25
|
+
When an event notification is generated for entity 'example.com'
|
|
26
|
+
And an event notification is generated for entity 'example2.com'
|
|
27
|
+
Then an SMS notification for entity 'example.com' should be queued for the user
|
|
28
|
+
And an SMS notification for entity 'example2.com' should not be queued for the user
|
|
29
|
+
Then an email notification for entity 'example.com' should not be queued for the user
|
|
30
|
+
And an email notification for entity 'example2.com' should be queued for the user
|
|
31
|
+
|
|
32
|
+
# NB: Scenarios below here are those that cover code run by the Resque workers
|
|
33
|
+
# We could maybe test resque integration as well, see
|
|
34
|
+
# http://corner.squareup.com/2010/08/cucumber-and-resque.html
|
|
35
|
+
# http://gist.github.com/532100
|
|
36
|
+
|
|
37
|
+
Scenario: Send a queued SMS notification
|
|
38
|
+
Given a user SMS notification has been queued for entity 'example.com'
|
|
39
|
+
When the SMS notification handler runs successfully
|
|
40
|
+
Then the user should receive an SMS notification
|
|
41
|
+
|
|
42
|
+
Scenario: Handle a failure to send a queued SMS notification
|
|
43
|
+
Given a user SMS notification has been queued for entity 'example.com'
|
|
44
|
+
When the SMS notification handler fails to send an SMS
|
|
45
|
+
Then the user should not receive an SMS notification
|
|
46
|
+
|
|
47
|
+
@email
|
|
48
|
+
Scenario: Send a queued email notification
|
|
49
|
+
Given a user email notification has been queued for entity 'example.com'
|
|
50
|
+
When the email notification handler runs successfully
|
|
51
|
+
Then the user should receive an email notification
|
|
52
|
+
|
|
53
|
+
@email
|
|
54
|
+
Scenario: Handle a failure to send a queued email notification
|
|
55
|
+
Given a user email notification has been queued for entity 'example.com'
|
|
56
|
+
When the email notification handler fails to send an email
|
|
57
|
+
Then the user should not receive an email notification
|
|
@@ -1,15 +1,17 @@
|
|
|
1
|
-
Feature: Packagability
|
|
1
|
+
Feature: Packagability
|
|
2
2
|
To make Flapjack usable to the masses
|
|
3
3
|
It must be easily packagable
|
|
4
4
|
|
|
5
5
|
Scenario: No rubygems references
|
|
6
6
|
Given I am at the project root
|
|
7
7
|
When I run "grep require lib/* bin/* -R |grep rubygems"
|
|
8
|
-
Then
|
|
8
|
+
Then the exit status should be 1
|
|
9
|
+
And I should see 0 lines of output
|
|
9
10
|
|
|
10
11
|
Scenario: A shebang that works everywhere
|
|
11
12
|
Given I am at the project root
|
|
12
13
|
When I run "find lib/ -type 'f' -name '*.rb'"
|
|
13
|
-
Then
|
|
14
|
+
Then the exit status should be 0
|
|
15
|
+
And every file in the output should start with "#!/usr/bin/env ruby"
|
|
14
16
|
|
|
15
17
|
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'flapjack/data/entity_check'
|
|
4
|
+
require 'flapjack/data/event'
|
|
5
|
+
|
|
6
|
+
def drain_events
|
|
7
|
+
loop do
|
|
8
|
+
event = Flapjack::Data::Event.next(:block => false, :persistence => @redis)
|
|
9
|
+
break unless event
|
|
10
|
+
@app.send(:process_event, event)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def submit_event(event)
|
|
15
|
+
@redis.rpush 'events', event.to_json
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def set_scheduled_maintenance(entity, check, duration = 60*60*2)
|
|
19
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
|
20
|
+
t = Time.now.to_i
|
|
21
|
+
entity_check.create_scheduled_maintenance(:start_time => t, :duration => duration, :summary => "upgrading everything")
|
|
22
|
+
@redis.setex("#{entity}:#{check}:scheduled_maintenance", duration, t)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def remove_scheduled_maintenance(entity, check)
|
|
26
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
|
27
|
+
sm = entity_check.maintenances(nil, nil, :scheduled => true)
|
|
28
|
+
sm.each do |m|
|
|
29
|
+
entity_check.delete_scheduled_maintenance(:start_time => m[:start_time])
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def remove_unscheduled_maintenance(entity, check)
|
|
34
|
+
# end any unscheduled downtime
|
|
35
|
+
event_id = entity + ":" + check
|
|
36
|
+
if (um_start = @redis.get("#{event_id}:unscheduled_maintenance"))
|
|
37
|
+
@redis.del("#{event_id}:unscheduled_maintenance")
|
|
38
|
+
duration = Time.now.to_i - um_start.to_i
|
|
39
|
+
@redis.zadd("#{event_id}:unscheduled_maintenances", duration, um_start)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def remove_notifications(entity, check)
|
|
44
|
+
event_id = entity + ":" + check
|
|
45
|
+
@redis.del("#{event_id}:last_problem_notification")
|
|
46
|
+
@redis.del("#{event_id}:last_recovery_notification")
|
|
47
|
+
@redis.del("#{event_id}:last_acknowledgement_notification")
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def set_ok_state(entity, check)
|
|
51
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
|
52
|
+
entity_check.update_state(Flapjack::Data::EntityCheck::STATE_OK,
|
|
53
|
+
:timestamp => (Time.now.to_i - (60*60*24)))
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def set_failure_state(entity, check)
|
|
57
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
|
58
|
+
entity_check.update_state(Flapjack::Data::EntityCheck::STATE_CRITICAL,
|
|
59
|
+
:timestamp => (Time.now.to_i - (60*60*24)))
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def submit_ok(entity, check)
|
|
63
|
+
event = {
|
|
64
|
+
'type' => 'service',
|
|
65
|
+
'state' => 'ok',
|
|
66
|
+
'summary' => '0% packet loss',
|
|
67
|
+
'entity' => entity,
|
|
68
|
+
'check' => check,
|
|
69
|
+
'client' => 'clientx'
|
|
70
|
+
}
|
|
71
|
+
submit_event(event)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def submit_critical(entity, check)
|
|
75
|
+
event = {
|
|
76
|
+
'type' => 'service',
|
|
77
|
+
'state' => 'critical',
|
|
78
|
+
'summary' => '100% packet loss',
|
|
79
|
+
'entity' => entity,
|
|
80
|
+
'check' => check,
|
|
81
|
+
'client' => 'clientx'
|
|
82
|
+
}
|
|
83
|
+
submit_event(event)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def submit_acknowledgement(entity, check)
|
|
87
|
+
event = {
|
|
88
|
+
'type' => 'action',
|
|
89
|
+
'state' => 'acknowledgement',
|
|
90
|
+
'summary' => "I'll have this fixed in a jiffy, saw the same thing yesterday",
|
|
91
|
+
'entity' => entity,
|
|
92
|
+
'check' => check,
|
|
93
|
+
'client' => 'clientx',
|
|
94
|
+
# 'acknowledgement_id' =>
|
|
95
|
+
}
|
|
96
|
+
submit_event(event)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
Given /^an entity '([\w\.\-]+)' exists$/ do |entity|
|
|
100
|
+
Flapjack::Data::Entity.add({'id' => '5000',
|
|
101
|
+
'name' => entity},
|
|
102
|
+
:redis => @redis )
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
Given /^^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in an ok state$/ do |check, entity|
|
|
106
|
+
remove_unscheduled_maintenance(entity, check)
|
|
107
|
+
remove_scheduled_maintenance(entity, check)
|
|
108
|
+
remove_notifications(entity, check)
|
|
109
|
+
set_ok_state(entity, check)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in a failure state$/ do |check, entity|
|
|
113
|
+
remove_unscheduled_maintenance(entity, check)
|
|
114
|
+
remove_scheduled_maintenance(entity, check)
|
|
115
|
+
remove_notifications(entity, check)
|
|
116
|
+
set_failure_state(entity, check)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in scheduled maintenance$/ do |check, entity|
|
|
120
|
+
remove_unscheduled_maintenance(entity, check)
|
|
121
|
+
set_scheduled_maintenance(entity, check)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# TODO set the state directly rather than submit & drain
|
|
125
|
+
Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in unscheduled maintenance$/ do |check, entity|
|
|
126
|
+
remove_scheduled_maintenance(entity, check)
|
|
127
|
+
set_failure_state(entity, check)
|
|
128
|
+
submit_acknowledgement(entity, check)
|
|
129
|
+
drain_events # TODO these should only be in When clauses
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
When /^an ok event is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
|
133
|
+
submit_ok(entity, check)
|
|
134
|
+
drain_events
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
When /^a failure event is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
|
138
|
+
submit_critical(entity, check)
|
|
139
|
+
drain_events
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
When /^an acknowledgement .*is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
|
143
|
+
submit_acknowledgement(entity, check)
|
|
144
|
+
drain_events
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# TODO logging is a side-effect, should test for notification generation itself
|
|
149
|
+
Then /^a notification should not be generated for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
|
150
|
+
message = @app.logger.messages.find_all {|m| m =~ /ending notifications for event #{entity}:#{check}/ }.last
|
|
151
|
+
message ? happy = message.match(/Not sending notifications/) : happy = false
|
|
152
|
+
happy.should be_true
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
Then /^a notification should be generated for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
|
|
156
|
+
message = @app.logger.messages.find_all {|m| m =~ /ending notifications for event #{entity}:#{check}/ }.last
|
|
157
|
+
message ? happy = message.match(/Sending notifications/) : happy = false
|
|
158
|
+
happy.should be_true
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
Then /^show me the notifications?$/ do
|
|
162
|
+
puts @app.logger.messages.join("\n")
|
|
163
|
+
end
|
|
164
|
+
|