flapjack 0.5.5 → 0.6.23

Sign up to get free protection for your applications and to get access to all the features.
Files changed (167) hide show
  1. data/.gitignore +10 -0
  2. data/.rbenv-version +1 -0
  3. data/.rspec +10 -0
  4. data/Gemfile +18 -0
  5. data/Guardfile +14 -0
  6. data/README.md +152 -173
  7. data/Rakefile +53 -150
  8. data/bin/flapjack +72 -0
  9. data/bin/flapjack-nagios-receiver +111 -0
  10. data/bin/flapjack-nagios-receiver-control +15 -0
  11. data/bin/flapjack-netsaint-parser +0 -2
  12. data/bin/flapjack-populator +133 -16
  13. data/bin/install-flapjack-systemwide +2 -2
  14. data/config.ru +11 -0
  15. data/dist/etc/init.d/flapjack +46 -0
  16. data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
  17. data/doc/GLOSSARY.md +19 -0
  18. data/etc/flapjack_config.yaml.example +90 -0
  19. data/features/events.feature +132 -0
  20. data/features/notifications.feature +57 -0
  21. data/features/packaging-lintian.feature +5 -3
  22. data/features/steps/events_steps.rb +164 -0
  23. data/features/steps/flapjack-importer_steps.rb +2 -5
  24. data/features/steps/flapjack-worker_steps.rb +13 -6
  25. data/features/steps/notifications_steps.rb +178 -0
  26. data/features/steps/packaging-lintian_steps.rb +14 -0
  27. data/features/steps/time_travel_steps.rb +34 -0
  28. data/features/support/env.rb +63 -36
  29. data/flapjack.gemspec +35 -186
  30. data/lib/flapjack.rb +2 -0
  31. data/lib/flapjack/api.rb +274 -0
  32. data/lib/flapjack/api/entity_check_presenter.rb +184 -0
  33. data/lib/flapjack/api/entity_presenter.rb +66 -0
  34. data/lib/flapjack/cli/worker_manager.rb +1 -2
  35. data/lib/flapjack/configuration.rb +11 -0
  36. data/lib/flapjack/coordinator.rb +288 -0
  37. data/lib/flapjack/daemonizing.rb +186 -0
  38. data/lib/flapjack/data/contact.rb +45 -0
  39. data/lib/flapjack/data/entity.rb +89 -0
  40. data/lib/flapjack/data/entity_check.rb +396 -0
  41. data/lib/flapjack/data/event.rb +144 -0
  42. data/lib/flapjack/data/notification.rb +13 -0
  43. data/lib/flapjack/executive.rb +289 -0
  44. data/lib/flapjack/filters/acknowledgement.rb +39 -0
  45. data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
  46. data/lib/flapjack/filters/delays.rb +53 -0
  47. data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
  48. data/lib/flapjack/filters/ok.rb +25 -5
  49. data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
  50. data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
  51. data/lib/flapjack/jabber.rb +294 -0
  52. data/lib/flapjack/notification/common.rb +23 -0
  53. data/lib/flapjack/notification/email.rb +107 -0
  54. data/lib/flapjack/notification/email/alert.html.haml +48 -0
  55. data/lib/flapjack/notification/email/alert.text.erb +14 -0
  56. data/lib/flapjack/notification/sms.rb +42 -0
  57. data/lib/flapjack/notification/sms/messagenet.rb +49 -0
  58. data/lib/flapjack/notifier_engine.rb +4 -4
  59. data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
  60. data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
  61. data/lib/flapjack/pagerduty.rb +230 -0
  62. data/lib/flapjack/patches.rb +108 -19
  63. data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
  64. data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
  65. data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
  66. data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
  67. data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
  68. data/lib/flapjack/pikelet.rb +56 -0
  69. data/lib/flapjack/transports/beanstalkd.rb +1 -1
  70. data/lib/flapjack/transports/result.rb +6 -6
  71. data/lib/flapjack/utility.rb +46 -0
  72. data/lib/flapjack/version.rb +5 -0
  73. data/lib/flapjack/web.rb +198 -0
  74. data/lib/flapjack/web/views/acknowledge.haml +55 -0
  75. data/lib/flapjack/web/views/check.haml +162 -0
  76. data/lib/flapjack/web/views/index.haml +92 -0
  77. data/lib/flapjack/web/views/self_stats.haml +56 -0
  78. data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
  79. data/lib/flapjack/worker/cli.rb +49 -0
  80. data/{spec → spec.old}/check_sandbox/echo +0 -0
  81. data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
  82. data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
  83. data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
  84. data/{spec → spec.old}/configs/recipients.ini +0 -0
  85. data/{spec → spec.old}/helpers.rb +0 -0
  86. data/{spec → spec.old}/inifile_spec.rb +0 -0
  87. data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
  88. data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
  89. data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
  90. data/{spec → spec.old}/notifier_application_spec.rb +0 -0
  91. data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
  92. data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
  93. data/{spec → spec.old}/notifier_options_spec.rb +0 -0
  94. data/{spec → spec.old}/notifier_spec.rb +0 -0
  95. data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
  96. data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
  97. data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
  98. data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
  99. data/{spec → spec.old}/simple.ini +0 -0
  100. data/{spec → spec.old}/spec.opts +0 -0
  101. data/{spec → spec.old}/test-filters/blocker.rb +0 -0
  102. data/{spec → spec.old}/test-filters/mock.rb +0 -0
  103. data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
  104. data/{spec → spec.old}/transports/mock_transport.rb +0 -0
  105. data/{spec → spec.old}/worker_application_spec.rb +0 -0
  106. data/{spec → spec.old}/worker_options_spec.rb +0 -0
  107. data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
  108. data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
  109. data/spec/lib/flapjack/api_spec.rb +170 -0
  110. data/spec/lib/flapjack/coordinator_spec.rb +16 -0
  111. data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
  112. data/spec/lib/flapjack/data/entity_spec.rb +71 -0
  113. data/spec/lib/flapjack/data/event_spec.rb +6 -0
  114. data/spec/lib/flapjack/executive_spec.rb +59 -0
  115. data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
  116. data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
  117. data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
  118. data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
  119. data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
  120. data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
  121. data/spec/lib/flapjack/jabber_spec.rb +150 -0
  122. data/spec/lib/flapjack/notification/email_spec.rb +6 -0
  123. data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
  124. data/spec/lib/flapjack/pikelet_spec.rb +28 -0
  125. data/spec/lib/flapjack/web_spec.rb +188 -0
  126. data/spec/spec_helper.rb +44 -0
  127. data/spec/support/profile_all_formatter.rb +44 -0
  128. data/spec/support/uncolored_doc_formatter.rb +9 -0
  129. data/tasks/events.rake +85 -0
  130. data/tmp/acknowledge.rb +14 -0
  131. data/tmp/create_config_yaml.rb +16 -0
  132. data/tmp/create_events_failure.rb +33 -0
  133. data/tmp/create_events_ok.rb +33 -0
  134. data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
  135. data/tmp/create_events_ok_failure.rb +40 -0
  136. data/tmp/create_events_ok_failure_ack.rb +54 -0
  137. data/tmp/dummy_entities.json +1 -0
  138. data/tmp/generate_nagios_test_hosts.rb +16 -0
  139. data/tmp/parse_config_yaml.rb +7 -0
  140. data/tmp/redis_delete_all_keys.rb +11 -0
  141. data/tmp/test_entities.json +1 -0
  142. metadata +482 -221
  143. data/TODO.md +0 -36
  144. data/VERSION +0 -1
  145. data/bin/flapjack-benchmark +0 -50
  146. data/bin/flapjack-notifier +0 -21
  147. data/bin/flapjack-notifier-manager +0 -43
  148. data/bin/flapjack-stats +0 -27
  149. data/bin/flapjack-worker +0 -13
  150. data/bin/flapjack-worker-manager +0 -35
  151. data/dist/etc/init.d/flapjack-notifier +0 -47
  152. data/dist/etc/init.d/flapjack-workers +0 -44
  153. data/features/flapjack-notifier-manager.feature +0 -19
  154. data/features/flapjack-worker-manager.feature +0 -27
  155. data/features/flapjack-worker.feature +0 -27
  156. data/features/netsaint-config-converter.feature +0 -126
  157. data/features/persistence/couch.feature +0 -105
  158. data/features/persistence/sqlite3.feature +0 -105
  159. data/features/persistence/steps/couch_steps.rb +0 -25
  160. data/features/persistence/steps/generic_steps.rb +0 -102
  161. data/features/persistence/steps/sqlite3_steps.rb +0 -13
  162. data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
  163. data/features/steps/flapjack-worker-manager_steps.rb +0 -48
  164. data/lib/flapjack/applications/notifier.rb +0 -222
  165. data/lib/flapjack/cli/notifier.rb +0 -108
  166. data/lib/flapjack/cli/notifier_manager.rb +0 -86
  167. data/lib/flapjack/cli/worker.rb +0 -51
@@ -44,14 +44,14 @@ system("cp -aiv #{etc_path}/* /etc")
44
44
 
45
45
  # set sequence number to 50 so beanstalkd has a chance to boot
46
46
  system("update-rc.d flapjack-workers defaults 50")
47
- system("update-rc.d flapjack-notifier defaults 50")
47
+ system("update-rc.d flapjack-executive defaults 50")
48
48
 
49
49
  puts
50
50
  puts "Setup complete!"
51
51
  puts
52
52
  puts "You will want to customise:"
53
53
  puts " * /etc/flapjack/recipients.yaml"
54
- puts " * /etc/flapjack/flapjack-notifier.yaml"
54
+ puts " * /etc/flapjack/flapjack-executive.yaml"
55
55
  puts
56
56
  puts ".examples of these files exist in /etc/flapjack/'
57
57
 
data/config.ru ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ Bundler.require(:default)
6
+
7
+ $: << File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
8
+ require 'flapjack/web'
9
+
10
+ use Flapjack::Web
11
+ run Sinatra::Application
@@ -0,0 +1,46 @@
1
+ #!/bin/bash
2
+ #
3
+ # Copyright (c) 2009-2012 Lindsay Holmwood <lindsay@holmwood.id.au>
4
+ #
5
+ # flapjack
6
+ # Boots flapjack (coordinator, flapjack-executive, notification workers, ...)
7
+ #
8
+
9
+ PATH=$PATH:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local:/usr/local/sbin:/usr/local/bin:/usr/lib/flapjack/bin
10
+
11
+ # load global rbenv environment if present
12
+ if [ -x /etc/profile.d/rbenv.sh ] ; then
13
+ source /etc/profile.d/rbenv.sh
14
+ fi
15
+
16
+ # Default return value
17
+ RETVAL=0
18
+
19
+ export FLAPJACK_ENV="production"
20
+
21
+ if [ ! $(which flapjack) ]; then
22
+ echo "Error: flapjack isn't in PATH."
23
+ echo "Refusing to do anything!"
24
+ exit 1
25
+ fi
26
+
27
+ # Evaluate command
28
+ case "$1" in
29
+ start)
30
+ flapjack --config /etc/flapjack/flapjack-config.yaml --daemonize
31
+ RETVAL=$?
32
+ ;;
33
+ stop)
34
+ kill `cat /var/run/flapjack/flapjack.pid`
35
+ # FIXME: make this more robust, preferably use daemons foo
36
+ # or at least wait until the flapjack process has exited before exiting
37
+ RETVAL=$?
38
+ ;;
39
+ *)
40
+ echo "Usage: flapjack {start|stop}"
41
+ RETVAL=1
42
+ ;;
43
+ esac
44
+
45
+ exit $RETVAL
46
+
@@ -0,0 +1,36 @@
1
+ #!/bin/bash
2
+ #
3
+ # Copyright (c) 2009-2012 Lindsay Holmwood <lindsay@holmwood.id.au>
4
+ #
5
+ # flapjack-nagios-receiver
6
+ # reads from a nagios perfdata named-pipe and submits each event to the events queue in redis
7
+ #
8
+
9
+ PATH=$PATH:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local:/usr/local/sbin:/usr/local/bin:/usr/lib/flapjack/bin
10
+
11
+ # load global rbenv environment if present
12
+ if [ -x /etc/profile.d/rbenv.sh ] ; then
13
+ source /etc/profile.d/rbenv.sh
14
+ fi
15
+
16
+ # Default return value
17
+ RETVAL=0
18
+
19
+ export FLAPJACK_ENV="production"
20
+
21
+ NAGIOS_PERFDATA_FIFO="/var/cache/nagios3/event_stream.fifo"
22
+
23
+ if [ ! $(which flapjack-nagios-receiver-control) ]; then
24
+ echo "Error: flapjack-nagios-receiver-control isn't in PATH."
25
+ echo "Refusing to do anything!"
26
+ exit 1
27
+ fi
28
+
29
+ # Evaluate command
30
+
31
+ #/bin/bash -c "source /etc/profile.d/rbenv.sh && rbenv shell 1.9.3-p125 && flapjack-nagios-receiver-control $1 -- --config /etc/flapjack/flapjack-config.yaml ${NAGIOS_PERFDATA_FIFO}"
32
+ rbenv shell 1.9.3-p125 && flapjack-nagios-receiver-control $1 -- --config /etc/flapjack/flapjack-config.yaml ${NAGIOS_PERFDATA_FIFO}
33
+ RETVAL=$?
34
+
35
+ exit $RETVAL
36
+
data/doc/GLOSSARY.md ADDED
@@ -0,0 +1,19 @@
1
+
2
+ As implemented for the reporting API functions, these are placeholder terms only and open for discussion.
3
+
4
+ <dl>
5
+ <dt>Entity</dt>
6
+ <dd>TODO</dd>
7
+ <dt>Check</dt>
8
+ <dd>TODO</dd>
9
+
10
+ <dt>Outage</dt>
11
+ <dd>Period of time from when a check goes into the 'CRITICAL'
12
+ state to when it comes out of that state again</dd>
13
+ <dt>Scheduled maintenance</dt>
14
+ <dd>Periods of time explicitly created by external actors to denote that maintenance is scheduled to occur at these times.</dd>
15
+ <dt>Unscheduled maintenance</dt>
16
+ <dd>Periods of time explicitly created by external actors to denote that maintenance is happening or has happened at these times</dd>
17
+ <dt>Downtime</dt>
18
+ <dd>Outages minus scheduled maintenances across any given time period</dd>
19
+ </dl>
@@ -0,0 +1,90 @@
1
+ ---
2
+
3
+ development:
4
+ pid_file: tmp/pids/flapjack.pid
5
+ log_file: log/flapjack.log
6
+ daemonize: no
7
+ redis:
8
+ host: 127.0.0.1
9
+ port: 6379
10
+ db: 13
11
+ executive:
12
+ enabled: yes
13
+ email_queue: email_notifications
14
+ sms_queue: sms_notifications
15
+ jabber_queue: jabber_notifications
16
+ notification_log_file: log/flapjack-notification.log
17
+ email_notifier:
18
+ enabled: yes
19
+ queue: email_notifications
20
+ smtp_config:
21
+ port: 2525
22
+ # address: "localhost"
23
+ # port: 25
24
+ # domain: 'localhost.localdomain'
25
+ # user_name: nil
26
+ # password: nil
27
+ # authentication: nil
28
+ # enable_starttls_auto: true
29
+ sms_notifier:
30
+ enabled: yes
31
+ queue: sms_notifications
32
+ username: "ermahgerd"
33
+ password: "xxxx"
34
+ jabber_gateway:
35
+ enabled: yes
36
+ queue: jabber_notifications
37
+ server: "jabber.domain.tld"
38
+ port: 5222
39
+ jabberid: "flapjack@jabber.domain.tld"
40
+ password: "good-password"
41
+ alias: "flapjack"
42
+ rooms:
43
+ - "gimp@conference.jabber.domain.tld"
44
+ - "log@conference.jabber.domain.tld"
45
+ pagerduty_gateway:
46
+ enabled: yes
47
+ queue: pagerduty_notifications
48
+ web:
49
+ enabled: yes
50
+ port: 5080
51
+ api:
52
+ enabled: yes
53
+ port: 5081
54
+
55
+ test:
56
+ redis:
57
+ database: 14
58
+ email_notifier:
59
+ instances: 1
60
+ sms_notifier:
61
+ instances: 1
62
+ username: "ermahgerd"
63
+ password: "xxxx"
64
+ web:
65
+ instances: 1
66
+
67
+
68
+ staging:
69
+ redis:
70
+ database: 15
71
+ email_notifier:
72
+ instances: 1
73
+ sms_notifier:
74
+ instances: 1
75
+ username: "ermahgerd"
76
+ password: "xxxx"
77
+ web:
78
+ instances: 1
79
+
80
+ production:
81
+ redis:
82
+ database: 12
83
+ email_notifier:
84
+ instances: 1
85
+ sms_notifier:
86
+ instances: 1
87
+ username: "ermahgerd"
88
+ password: "xxxx"
89
+ web:
90
+ instances: 1
@@ -0,0 +1,132 @@
1
+ @events
2
+ Feature: events
3
+ So people can be notified when things break and recover
4
+ flapjack-executive must process events correctly
5
+
6
+ # TODO make entity and check implicit, so the test reads more cleanly
7
+ Background:
8
+ Given an entity 'def' exists
9
+
10
+ Scenario: Check ok to ok
11
+ Given check 'abc' for entity 'def' is in an ok state
12
+ When an ok event is received for check 'abc' on entity 'def'
13
+ Then a notification should not be generated for check 'abc' on entity 'def'
14
+ # And show me the output
15
+
16
+ Scenario: Check ok to failed
17
+ Given check 'abc' for entity 'def' is in an ok state
18
+ When a failure event is received for check 'abc' on entity 'def'
19
+ Then a notification should not be generated for check 'abc' on entity 'def'
20
+
21
+ @time
22
+ Scenario: Check failed to failed after 10 seconds
23
+ Given check 'abc' for entity 'def' is in an ok state
24
+ When a failure event is received for check 'abc' on entity 'def'
25
+ And 10 seconds passes
26
+ And a failure event is received for check 'abc' on entity 'def'
27
+ Then a notification should not be generated for check 'abc' on entity 'def'
28
+
29
+ @time
30
+ Scenario: Check ok to failed for 1 minute
31
+ Given check 'abc' for entity 'def' is in an ok state
32
+ When a failure event is received for check 'abc' on entity 'def'
33
+ And 1 minute passes
34
+ And a failure event is received for check 'abc' on entity 'def'
35
+ Then a notification should be generated for check 'abc' on entity 'def'
36
+
37
+ @time
38
+ Scenario: Check failed and alerted to failed for 1 minute
39
+ Given check 'abc' for entity 'def' is in an ok state
40
+ When a failure event is received for check 'abc' on entity 'def'
41
+ And 1 minute passes
42
+ And a failure event is received for check 'abc' on entity 'def'
43
+ And show me the notifications
44
+ Then a notification should be generated for check 'abc' on entity 'def'
45
+ When 1 minute passes
46
+ And a failure event is received for check 'abc' on entity 'def'
47
+ And show me the notifications
48
+ Then a notification should not be generated for check 'abc' on entity 'def'
49
+
50
+ @time
51
+ Scenario: Check failed and alerted to failed for 6 minutes
52
+ Given check 'abc' for entity 'def' is in an ok state
53
+ When a failure event is received for check 'abc' on entity 'def'
54
+ And 1 minute passes
55
+ And a failure event is received for check 'abc' on entity 'def'
56
+ Then a notification should be generated for check 'abc' on entity 'def'
57
+ When 6 minutes passes
58
+ And a failure event is received for check 'abc' on entity 'def'
59
+ Then a notification should be generated for check 'abc' on entity 'def'
60
+
61
+ @time
62
+ Scenario: Check ok to failed for 1 minute when in scheduled maintenance
63
+ Given check 'abc' for entity 'def' is in an ok state
64
+ And check 'abc' for entity 'def' is in scheduled maintenance
65
+ When a failure event is received for check 'abc' on entity 'def'
66
+ And 1 minute passes
67
+ And a failure event is received for check 'abc' on entity 'def'
68
+ Then a notification should not be generated for check 'abc' on entity 'def'
69
+
70
+ @time
71
+ Scenario: Check ok to failed for 1 minute when in unscheduled maintenance
72
+ Given check 'abc' for entity 'def' is in an ok state
73
+ And check 'abc' for entity 'def' is in unscheduled maintenance
74
+ When a failure event is received for check 'abc' on entity 'def'
75
+ And 1 minute passes
76
+ And a failure event is received for check 'abc' on entity 'def'
77
+ Then a notification should not be generated for check 'abc' on entity 'def'
78
+
79
+ @time
80
+ Scenario: Check ok to failed for 1 minute, acknowledged, and failed for 6 minutes
81
+ Given check 'abc' for entity 'def' is in an ok state
82
+ When a failure event is received for check 'abc' on entity 'def'
83
+ And 1 minute passes
84
+ And a failure event is received for check 'abc' on entity 'def'
85
+ Then a notification should be generated for check 'abc' on entity 'def'
86
+ When an acknowledgement is received for check 'abc' on entity 'def'
87
+ And 6 minute passes
88
+ And a failure event is received for check 'abc' on entity 'def'
89
+ Then a notification should not be generated for check 'abc' on entity 'def'
90
+
91
+ Scenario: Check failed to ok
92
+ Given check 'abc' for entity 'def' is in a failure state
93
+ And 5 minutes passes
94
+ And a failure event is received for check 'abc' on entity 'def'
95
+ Then a notification should be generated for check 'abc' on entity 'def'
96
+ When 5 minutes passes
97
+ And an ok event is received for check 'abc' on entity 'def'
98
+ Then a notification should be generated for check 'abc' on entity 'def'
99
+
100
+ @time
101
+ Scenario: Check failed to ok when acknowledged
102
+ Given check 'abc' for entity 'def' is in a failure state
103
+ When an acknowledgement event is received for check 'abc' on entity 'def'
104
+ Then a notification should be generated for check 'abc' on entity 'def'
105
+ When 1 minute passes
106
+ And an ok event is received for check 'abc' on entity 'def'
107
+ Then a notification should be generated for check 'abc' on entity 'def'
108
+
109
+ @time
110
+ Scenario: Check failed to ok when acknowledged, and fails after 6 minutes
111
+ Given check 'abc' for entity 'def' is in a failure state
112
+ When an acknowledgement event is received for check 'abc' on entity 'def'
113
+ Then a notification should be generated for check 'abc' on entity 'def'
114
+ When 1 minute passes
115
+ And an ok event is received for check 'abc' on entity 'def'
116
+ Then a notification should be generated for check 'abc' on entity 'def'
117
+ When 6 minutes passes
118
+ And a failure event is received for check 'abc' on entity 'def'
119
+ Then a notification should not be generated for check 'abc' on entity 'def'
120
+ When 6 minutes passes
121
+ And a failure event is received for check 'abc' on entity 'def'
122
+ Then a notification should be generated for check 'abc' on entity 'def'
123
+
124
+ Scenario: Acknowledgement when ok
125
+ Given check 'abc' for entity 'def' is in an ok state
126
+ When an acknowledgement event is received for check 'abc' on entity 'def'
127
+ Then a notification should not be generated for check 'abc' on entity 'def'
128
+
129
+ Scenario: Acknowledgement when failed
130
+ Given check 'abc' for entity 'def' is in a failure state
131
+ When an acknowledgement event is received for check 'abc' on entity 'def'
132
+ Then a notification should be generated for check 'abc' on entity 'def'
@@ -0,0 +1,57 @@
1
+ @notifications
2
+ Feature: notifications
3
+ So people can be notified when things break and recover
4
+ flapjack-notifier must send notifications correctly
5
+
6
+ # TODO test across multiple contacts
7
+
8
+ @resque
9
+ Scenario: Queue an SMS notification
10
+ Given the user wants to receive SMS notifications for entity 'example.com'
11
+ When an event notification is generated for entity 'example.com'
12
+ Then an SMS notification for entity 'example.com' should be queued for the user
13
+ And an email notification for entity 'example.com' should not be queued for the user
14
+
15
+ @resque
16
+ Scenario: Queue an email notification
17
+ Given the user wants to receive email notifications for entity 'example.com'
18
+ When an event notification is generated for entity 'example.com'
19
+ Then an email notification for entity 'example.com' should be queued for the user
20
+ And an SMS notification for entity 'example.com' should not be queued for the user
21
+
22
+ @resque
23
+ Scenario: Queue SMS and email notifications
24
+ Given the user wants to receive SMS notifications for entity 'example.com' and email notifications for entity 'example2.com'
25
+ When an event notification is generated for entity 'example.com'
26
+ And an event notification is generated for entity 'example2.com'
27
+ Then an SMS notification for entity 'example.com' should be queued for the user
28
+ And an SMS notification for entity 'example2.com' should not be queued for the user
29
+ Then an email notification for entity 'example.com' should not be queued for the user
30
+ And an email notification for entity 'example2.com' should be queued for the user
31
+
32
+ # NB: Scenarios below here are those that cover code run by the Resque workers
33
+ # We could maybe test resque integration as well, see
34
+ # http://corner.squareup.com/2010/08/cucumber-and-resque.html
35
+ # http://gist.github.com/532100
36
+
37
+ Scenario: Send a queued SMS notification
38
+ Given a user SMS notification has been queued for entity 'example.com'
39
+ When the SMS notification handler runs successfully
40
+ Then the user should receive an SMS notification
41
+
42
+ Scenario: Handle a failure to send a queued SMS notification
43
+ Given a user SMS notification has been queued for entity 'example.com'
44
+ When the SMS notification handler fails to send an SMS
45
+ Then the user should not receive an SMS notification
46
+
47
+ @email
48
+ Scenario: Send a queued email notification
49
+ Given a user email notification has been queued for entity 'example.com'
50
+ When the email notification handler runs successfully
51
+ Then the user should receive an email notification
52
+
53
+ @email
54
+ Scenario: Handle a failure to send a queued email notification
55
+ Given a user email notification has been queued for entity 'example.com'
56
+ When the email notification handler fails to send an email
57
+ Then the user should not receive an email notification
@@ -1,15 +1,17 @@
1
- Feature: Packagability
1
+ Feature: Packagability
2
2
  To make Flapjack usable to the masses
3
3
  It must be easily packagable
4
4
 
5
5
  Scenario: No rubygems references
6
6
  Given I am at the project root
7
7
  When I run "grep require lib/* bin/* -R |grep rubygems"
8
- Then I should see 0 lines of output
8
+ Then the exit status should be 1
9
+ And I should see 0 lines of output
9
10
 
10
11
  Scenario: A shebang that works everywhere
11
12
  Given I am at the project root
12
13
  When I run "find lib/ -type 'f' -name '*.rb'"
13
- Then every file in the output should start with "#!/usr/bin/env ruby"
14
+ Then the exit status should be 0
15
+ And every file in the output should start with "#!/usr/bin/env ruby"
14
16
 
15
17
 
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'flapjack/data/entity_check'
4
+ require 'flapjack/data/event'
5
+
6
+ def drain_events
7
+ loop do
8
+ event = Flapjack::Data::Event.next(:block => false, :persistence => @redis)
9
+ break unless event
10
+ @app.send(:process_event, event)
11
+ end
12
+ end
13
+
14
+ def submit_event(event)
15
+ @redis.rpush 'events', event.to_json
16
+ end
17
+
18
+ def set_scheduled_maintenance(entity, check, duration = 60*60*2)
19
+ entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
20
+ t = Time.now.to_i
21
+ entity_check.create_scheduled_maintenance(:start_time => t, :duration => duration, :summary => "upgrading everything")
22
+ @redis.setex("#{entity}:#{check}:scheduled_maintenance", duration, t)
23
+ end
24
+
25
+ def remove_scheduled_maintenance(entity, check)
26
+ entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
27
+ sm = entity_check.maintenances(nil, nil, :scheduled => true)
28
+ sm.each do |m|
29
+ entity_check.delete_scheduled_maintenance(:start_time => m[:start_time])
30
+ end
31
+ end
32
+
33
+ def remove_unscheduled_maintenance(entity, check)
34
+ # end any unscheduled downtime
35
+ event_id = entity + ":" + check
36
+ if (um_start = @redis.get("#{event_id}:unscheduled_maintenance"))
37
+ @redis.del("#{event_id}:unscheduled_maintenance")
38
+ duration = Time.now.to_i - um_start.to_i
39
+ @redis.zadd("#{event_id}:unscheduled_maintenances", duration, um_start)
40
+ end
41
+ end
42
+
43
+ def remove_notifications(entity, check)
44
+ event_id = entity + ":" + check
45
+ @redis.del("#{event_id}:last_problem_notification")
46
+ @redis.del("#{event_id}:last_recovery_notification")
47
+ @redis.del("#{event_id}:last_acknowledgement_notification")
48
+ end
49
+
50
+ def set_ok_state(entity, check)
51
+ entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
52
+ entity_check.update_state(Flapjack::Data::EntityCheck::STATE_OK,
53
+ :timestamp => (Time.now.to_i - (60*60*24)))
54
+ end
55
+
56
+ def set_failure_state(entity, check)
57
+ entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
58
+ entity_check.update_state(Flapjack::Data::EntityCheck::STATE_CRITICAL,
59
+ :timestamp => (Time.now.to_i - (60*60*24)))
60
+ end
61
+
62
+ def submit_ok(entity, check)
63
+ event = {
64
+ 'type' => 'service',
65
+ 'state' => 'ok',
66
+ 'summary' => '0% packet loss',
67
+ 'entity' => entity,
68
+ 'check' => check,
69
+ 'client' => 'clientx'
70
+ }
71
+ submit_event(event)
72
+ end
73
+
74
+ def submit_critical(entity, check)
75
+ event = {
76
+ 'type' => 'service',
77
+ 'state' => 'critical',
78
+ 'summary' => '100% packet loss',
79
+ 'entity' => entity,
80
+ 'check' => check,
81
+ 'client' => 'clientx'
82
+ }
83
+ submit_event(event)
84
+ end
85
+
86
+ def submit_acknowledgement(entity, check)
87
+ event = {
88
+ 'type' => 'action',
89
+ 'state' => 'acknowledgement',
90
+ 'summary' => "I'll have this fixed in a jiffy, saw the same thing yesterday",
91
+ 'entity' => entity,
92
+ 'check' => check,
93
+ 'client' => 'clientx',
94
+ # 'acknowledgement_id' =>
95
+ }
96
+ submit_event(event)
97
+ end
98
+
99
+ Given /^an entity '([\w\.\-]+)' exists$/ do |entity|
100
+ Flapjack::Data::Entity.add({'id' => '5000',
101
+ 'name' => entity},
102
+ :redis => @redis )
103
+ end
104
+
105
+ Given /^^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in an ok state$/ do |check, entity|
106
+ remove_unscheduled_maintenance(entity, check)
107
+ remove_scheduled_maintenance(entity, check)
108
+ remove_notifications(entity, check)
109
+ set_ok_state(entity, check)
110
+ end
111
+
112
+ Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in a failure state$/ do |check, entity|
113
+ remove_unscheduled_maintenance(entity, check)
114
+ remove_scheduled_maintenance(entity, check)
115
+ remove_notifications(entity, check)
116
+ set_failure_state(entity, check)
117
+ end
118
+
119
+ Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in scheduled maintenance$/ do |check, entity|
120
+ remove_unscheduled_maintenance(entity, check)
121
+ set_scheduled_maintenance(entity, check)
122
+ end
123
+
124
+ # TODO set the state directly rather than submit & drain
125
+ Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in unscheduled maintenance$/ do |check, entity|
126
+ remove_scheduled_maintenance(entity, check)
127
+ set_failure_state(entity, check)
128
+ submit_acknowledgement(entity, check)
129
+ drain_events # TODO these should only be in When clauses
130
+ end
131
+
132
+ When /^an ok event is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
133
+ submit_ok(entity, check)
134
+ drain_events
135
+ end
136
+
137
+ When /^a failure event is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
138
+ submit_critical(entity, check)
139
+ drain_events
140
+ end
141
+
142
+ When /^an acknowledgement .*is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
143
+ submit_acknowledgement(entity, check)
144
+ drain_events
145
+ end
146
+
147
+
148
+ # TODO logging is a side-effect, should test for notification generation itself
149
+ Then /^a notification should not be generated for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
150
+ message = @app.logger.messages.find_all {|m| m =~ /ending notifications for event #{entity}:#{check}/ }.last
151
+ message ? happy = message.match(/Not sending notifications/) : happy = false
152
+ happy.should be_true
153
+ end
154
+
155
+ Then /^a notification should be generated for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
156
+ message = @app.logger.messages.find_all {|m| m =~ /ending notifications for event #{entity}:#{check}/ }.last
157
+ message ? happy = message.match(/Sending notifications/) : happy = false
158
+ happy.should be_true
159
+ end
160
+
161
+ Then /^show me the notifications?$/ do
162
+ puts @app.logger.messages.join("\n")
163
+ end
164
+