flapjack 0.5.5 → 0.6.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. data/.gitignore +10 -0
  2. data/.rbenv-version +1 -0
  3. data/.rspec +10 -0
  4. data/Gemfile +18 -0
  5. data/Guardfile +14 -0
  6. data/README.md +152 -173
  7. data/Rakefile +53 -150
  8. data/bin/flapjack +72 -0
  9. data/bin/flapjack-nagios-receiver +111 -0
  10. data/bin/flapjack-nagios-receiver-control +15 -0
  11. data/bin/flapjack-netsaint-parser +0 -2
  12. data/bin/flapjack-populator +133 -16
  13. data/bin/install-flapjack-systemwide +2 -2
  14. data/config.ru +11 -0
  15. data/dist/etc/init.d/flapjack +46 -0
  16. data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
  17. data/doc/GLOSSARY.md +19 -0
  18. data/etc/flapjack_config.yaml.example +90 -0
  19. data/features/events.feature +132 -0
  20. data/features/notifications.feature +57 -0
  21. data/features/packaging-lintian.feature +5 -3
  22. data/features/steps/events_steps.rb +164 -0
  23. data/features/steps/flapjack-importer_steps.rb +2 -5
  24. data/features/steps/flapjack-worker_steps.rb +13 -6
  25. data/features/steps/notifications_steps.rb +178 -0
  26. data/features/steps/packaging-lintian_steps.rb +14 -0
  27. data/features/steps/time_travel_steps.rb +34 -0
  28. data/features/support/env.rb +63 -36
  29. data/flapjack.gemspec +35 -186
  30. data/lib/flapjack.rb +2 -0
  31. data/lib/flapjack/api.rb +274 -0
  32. data/lib/flapjack/api/entity_check_presenter.rb +184 -0
  33. data/lib/flapjack/api/entity_presenter.rb +66 -0
  34. data/lib/flapjack/cli/worker_manager.rb +1 -2
  35. data/lib/flapjack/configuration.rb +11 -0
  36. data/lib/flapjack/coordinator.rb +288 -0
  37. data/lib/flapjack/daemonizing.rb +186 -0
  38. data/lib/flapjack/data/contact.rb +45 -0
  39. data/lib/flapjack/data/entity.rb +89 -0
  40. data/lib/flapjack/data/entity_check.rb +396 -0
  41. data/lib/flapjack/data/event.rb +144 -0
  42. data/lib/flapjack/data/notification.rb +13 -0
  43. data/lib/flapjack/executive.rb +289 -0
  44. data/lib/flapjack/filters/acknowledgement.rb +39 -0
  45. data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
  46. data/lib/flapjack/filters/delays.rb +53 -0
  47. data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
  48. data/lib/flapjack/filters/ok.rb +25 -5
  49. data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
  50. data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
  51. data/lib/flapjack/jabber.rb +294 -0
  52. data/lib/flapjack/notification/common.rb +23 -0
  53. data/lib/flapjack/notification/email.rb +107 -0
  54. data/lib/flapjack/notification/email/alert.html.haml +48 -0
  55. data/lib/flapjack/notification/email/alert.text.erb +14 -0
  56. data/lib/flapjack/notification/sms.rb +42 -0
  57. data/lib/flapjack/notification/sms/messagenet.rb +49 -0
  58. data/lib/flapjack/notifier_engine.rb +4 -4
  59. data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
  60. data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
  61. data/lib/flapjack/pagerduty.rb +230 -0
  62. data/lib/flapjack/patches.rb +108 -19
  63. data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
  64. data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
  65. data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
  66. data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
  67. data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
  68. data/lib/flapjack/pikelet.rb +56 -0
  69. data/lib/flapjack/transports/beanstalkd.rb +1 -1
  70. data/lib/flapjack/transports/result.rb +6 -6
  71. data/lib/flapjack/utility.rb +46 -0
  72. data/lib/flapjack/version.rb +5 -0
  73. data/lib/flapjack/web.rb +198 -0
  74. data/lib/flapjack/web/views/acknowledge.haml +55 -0
  75. data/lib/flapjack/web/views/check.haml +162 -0
  76. data/lib/flapjack/web/views/index.haml +92 -0
  77. data/lib/flapjack/web/views/self_stats.haml +56 -0
  78. data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
  79. data/lib/flapjack/worker/cli.rb +49 -0
  80. data/{spec → spec.old}/check_sandbox/echo +0 -0
  81. data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
  82. data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
  83. data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
  84. data/{spec → spec.old}/configs/recipients.ini +0 -0
  85. data/{spec → spec.old}/helpers.rb +0 -0
  86. data/{spec → spec.old}/inifile_spec.rb +0 -0
  87. data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
  88. data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
  89. data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
  90. data/{spec → spec.old}/notifier_application_spec.rb +0 -0
  91. data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
  92. data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
  93. data/{spec → spec.old}/notifier_options_spec.rb +0 -0
  94. data/{spec → spec.old}/notifier_spec.rb +0 -0
  95. data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
  96. data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
  97. data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
  98. data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
  99. data/{spec → spec.old}/simple.ini +0 -0
  100. data/{spec → spec.old}/spec.opts +0 -0
  101. data/{spec → spec.old}/test-filters/blocker.rb +0 -0
  102. data/{spec → spec.old}/test-filters/mock.rb +0 -0
  103. data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
  104. data/{spec → spec.old}/transports/mock_transport.rb +0 -0
  105. data/{spec → spec.old}/worker_application_spec.rb +0 -0
  106. data/{spec → spec.old}/worker_options_spec.rb +0 -0
  107. data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
  108. data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
  109. data/spec/lib/flapjack/api_spec.rb +170 -0
  110. data/spec/lib/flapjack/coordinator_spec.rb +16 -0
  111. data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
  112. data/spec/lib/flapjack/data/entity_spec.rb +71 -0
  113. data/spec/lib/flapjack/data/event_spec.rb +6 -0
  114. data/spec/lib/flapjack/executive_spec.rb +59 -0
  115. data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
  116. data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
  117. data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
  118. data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
  119. data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
  120. data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
  121. data/spec/lib/flapjack/jabber_spec.rb +150 -0
  122. data/spec/lib/flapjack/notification/email_spec.rb +6 -0
  123. data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
  124. data/spec/lib/flapjack/pikelet_spec.rb +28 -0
  125. data/spec/lib/flapjack/web_spec.rb +188 -0
  126. data/spec/spec_helper.rb +44 -0
  127. data/spec/support/profile_all_formatter.rb +44 -0
  128. data/spec/support/uncolored_doc_formatter.rb +9 -0
  129. data/tasks/events.rake +85 -0
  130. data/tmp/acknowledge.rb +14 -0
  131. data/tmp/create_config_yaml.rb +16 -0
  132. data/tmp/create_events_failure.rb +33 -0
  133. data/tmp/create_events_ok.rb +33 -0
  134. data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
  135. data/tmp/create_events_ok_failure.rb +40 -0
  136. data/tmp/create_events_ok_failure_ack.rb +54 -0
  137. data/tmp/dummy_entities.json +1 -0
  138. data/tmp/generate_nagios_test_hosts.rb +16 -0
  139. data/tmp/parse_config_yaml.rb +7 -0
  140. data/tmp/redis_delete_all_keys.rb +11 -0
  141. data/tmp/test_entities.json +1 -0
  142. metadata +482 -221
  143. data/TODO.md +0 -36
  144. data/VERSION +0 -1
  145. data/bin/flapjack-benchmark +0 -50
  146. data/bin/flapjack-notifier +0 -21
  147. data/bin/flapjack-notifier-manager +0 -43
  148. data/bin/flapjack-stats +0 -27
  149. data/bin/flapjack-worker +0 -13
  150. data/bin/flapjack-worker-manager +0 -35
  151. data/dist/etc/init.d/flapjack-notifier +0 -47
  152. data/dist/etc/init.d/flapjack-workers +0 -44
  153. data/features/flapjack-notifier-manager.feature +0 -19
  154. data/features/flapjack-worker-manager.feature +0 -27
  155. data/features/flapjack-worker.feature +0 -27
  156. data/features/netsaint-config-converter.feature +0 -126
  157. data/features/persistence/couch.feature +0 -105
  158. data/features/persistence/sqlite3.feature +0 -105
  159. data/features/persistence/steps/couch_steps.rb +0 -25
  160. data/features/persistence/steps/generic_steps.rb +0 -102
  161. data/features/persistence/steps/sqlite3_steps.rb +0 -13
  162. data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
  163. data/features/steps/flapjack-worker-manager_steps.rb +0 -48
  164. data/lib/flapjack/applications/notifier.rb +0 -222
  165. data/lib/flapjack/cli/notifier.rb +0 -108
  166. data/lib/flapjack/cli/notifier_manager.rb +0 -86
  167. data/lib/flapjack/cli/worker.rb +0 -51
@@ -44,14 +44,14 @@ system("cp -aiv #{etc_path}/* /etc")
44
44
 
45
45
  # set sequence number to 50 so beanstalkd has a chance to boot
46
46
  system("update-rc.d flapjack-workers defaults 50")
47
- system("update-rc.d flapjack-notifier defaults 50")
47
+ system("update-rc.d flapjack-executive defaults 50")
48
48
 
49
49
  puts
50
50
  puts "Setup complete!"
51
51
  puts
52
52
  puts "You will want to customise:"
53
53
  puts " * /etc/flapjack/recipients.yaml"
54
- puts " * /etc/flapjack/flapjack-notifier.yaml"
54
+ puts " * /etc/flapjack/flapjack-executive.yaml"
55
55
  puts
56
56
  puts ".examples of these files exist in /etc/flapjack/'
57
57
 
data/config.ru ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ Bundler.require(:default)
6
+
7
+ $: << File.expand_path(File.join(File.dirname(__FILE__), 'lib'))
8
+ require 'flapjack/web'
9
+
10
+ use Flapjack::Web
11
+ run Sinatra::Application
@@ -0,0 +1,46 @@
1
+ #!/bin/bash
2
+ #
3
+ # Copyright (c) 2009-2012 Lindsay Holmwood <lindsay@holmwood.id.au>
4
+ #
5
+ # flapjack
6
+ # Boots flapjack (coordinator, flapjack-executive, notification workers, ...)
7
+ #
8
+
9
+ PATH=$PATH:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local:/usr/local/sbin:/usr/local/bin:/usr/lib/flapjack/bin
10
+
11
+ # load global rbenv environment if present
12
+ if [ -x /etc/profile.d/rbenv.sh ] ; then
13
+ source /etc/profile.d/rbenv.sh
14
+ fi
15
+
16
+ # Default return value
17
+ RETVAL=0
18
+
19
+ export FLAPJACK_ENV="production"
20
+
21
+ if [ ! $(which flapjack) ]; then
22
+ echo "Error: flapjack isn't in PATH."
23
+ echo "Refusing to do anything!"
24
+ exit 1
25
+ fi
26
+
27
+ # Evaluate command
28
+ case "$1" in
29
+ start)
30
+ flapjack --config /etc/flapjack/flapjack-config.yaml --daemonize
31
+ RETVAL=$?
32
+ ;;
33
+ stop)
34
+ kill `cat /var/run/flapjack/flapjack.pid`
35
+ # FIXME: make this more robust, preferably use daemons foo
36
+ # or at least wait until the flapjack process has exited before exiting
37
+ RETVAL=$?
38
+ ;;
39
+ *)
40
+ echo "Usage: flapjack {start|stop}"
41
+ RETVAL=1
42
+ ;;
43
+ esac
44
+
45
+ exit $RETVAL
46
+
@@ -0,0 +1,36 @@
1
+ #!/bin/bash
2
+ #
3
+ # Copyright (c) 2009-2012 Lindsay Holmwood <lindsay@holmwood.id.au>
4
+ #
5
+ # flapjack-nagios-receiver
6
+ # reads from a nagios perfdata named-pipe and submits each event to the events queue in redis
7
+ #
8
+
9
+ PATH=$PATH:/sbin:/bin:/usr/sbin:/usr/bin:/usr/local:/usr/local/sbin:/usr/local/bin:/usr/lib/flapjack/bin
10
+
11
+ # load global rbenv environment if present
12
+ if [ -x /etc/profile.d/rbenv.sh ] ; then
13
+ source /etc/profile.d/rbenv.sh
14
+ fi
15
+
16
+ # Default return value
17
+ RETVAL=0
18
+
19
+ export FLAPJACK_ENV="production"
20
+
21
+ NAGIOS_PERFDATA_FIFO="/var/cache/nagios3/event_stream.fifo"
22
+
23
+ if [ ! $(which flapjack-nagios-receiver-control) ]; then
24
+ echo "Error: flapjack-nagios-receiver-control isn't in PATH."
25
+ echo "Refusing to do anything!"
26
+ exit 1
27
+ fi
28
+
29
+ # Evaluate command
30
+
31
+ #/bin/bash -c "source /etc/profile.d/rbenv.sh && rbenv shell 1.9.3-p125 && flapjack-nagios-receiver-control $1 -- --config /etc/flapjack/flapjack-config.yaml ${NAGIOS_PERFDATA_FIFO}"
32
+ rbenv shell 1.9.3-p125 && flapjack-nagios-receiver-control $1 -- --config /etc/flapjack/flapjack-config.yaml ${NAGIOS_PERFDATA_FIFO}
33
+ RETVAL=$?
34
+
35
+ exit $RETVAL
36
+
data/doc/GLOSSARY.md ADDED
@@ -0,0 +1,19 @@
1
+
2
+ As implemented for the reporting API functions, these are placeholder terms only and open for discussion.
3
+
4
+ <dl>
5
+ <dt>Entity</dt>
6
+ <dd>TODO</dd>
7
+ <dt>Check</dt>
8
+ <dd>TODO</dd>
9
+
10
+ <dt>Outage</dt>
11
+ <dd>Period of time from when a check goes into the 'CRITICAL'
12
+ state to when it comes out of that state again</dd>
13
+ <dt>Scheduled maintenance</dt>
14
+ <dd>Periods of time explicitly created by external actors to denote that maintenance is scheduled to occur at these times.</dd>
15
+ <dt>Unscheduled maintenance</dt>
16
+ <dd>Periods of time explicitly created by external actors to denote that maintenance is happening or has happened at these times</dd>
17
+ <dt>Downtime</dt>
18
+ <dd>Outages minus scheduled maintenances across any given time period</dd>
19
+ </dl>
@@ -0,0 +1,90 @@
1
+ ---
2
+
3
+ development:
4
+ pid_file: tmp/pids/flapjack.pid
5
+ log_file: log/flapjack.log
6
+ daemonize: no
7
+ redis:
8
+ host: 127.0.0.1
9
+ port: 6379
10
+ db: 13
11
+ executive:
12
+ enabled: yes
13
+ email_queue: email_notifications
14
+ sms_queue: sms_notifications
15
+ jabber_queue: jabber_notifications
16
+ notification_log_file: log/flapjack-notification.log
17
+ email_notifier:
18
+ enabled: yes
19
+ queue: email_notifications
20
+ smtp_config:
21
+ port: 2525
22
+ # address: "localhost"
23
+ # port: 25
24
+ # domain: 'localhost.localdomain'
25
+ # user_name: nil
26
+ # password: nil
27
+ # authentication: nil
28
+ # enable_starttls_auto: true
29
+ sms_notifier:
30
+ enabled: yes
31
+ queue: sms_notifications
32
+ username: "ermahgerd"
33
+ password: "xxxx"
34
+ jabber_gateway:
35
+ enabled: yes
36
+ queue: jabber_notifications
37
+ server: "jabber.domain.tld"
38
+ port: 5222
39
+ jabberid: "flapjack@jabber.domain.tld"
40
+ password: "good-password"
41
+ alias: "flapjack"
42
+ rooms:
43
+ - "gimp@conference.jabber.domain.tld"
44
+ - "log@conference.jabber.domain.tld"
45
+ pagerduty_gateway:
46
+ enabled: yes
47
+ queue: pagerduty_notifications
48
+ web:
49
+ enabled: yes
50
+ port: 5080
51
+ api:
52
+ enabled: yes
53
+ port: 5081
54
+
55
+ test:
56
+ redis:
57
+ database: 14
58
+ email_notifier:
59
+ instances: 1
60
+ sms_notifier:
61
+ instances: 1
62
+ username: "ermahgerd"
63
+ password: "xxxx"
64
+ web:
65
+ instances: 1
66
+
67
+
68
+ staging:
69
+ redis:
70
+ database: 15
71
+ email_notifier:
72
+ instances: 1
73
+ sms_notifier:
74
+ instances: 1
75
+ username: "ermahgerd"
76
+ password: "xxxx"
77
+ web:
78
+ instances: 1
79
+
80
+ production:
81
+ redis:
82
+ database: 12
83
+ email_notifier:
84
+ instances: 1
85
+ sms_notifier:
86
+ instances: 1
87
+ username: "ermahgerd"
88
+ password: "xxxx"
89
+ web:
90
+ instances: 1
@@ -0,0 +1,132 @@
1
+ @events
2
+ Feature: events
3
+ So people can be notified when things break and recover
4
+ flapjack-executive must process events correctly
5
+
6
+ # TODO make entity and check implicit, so the test reads more cleanly
7
+ Background:
8
+ Given an entity 'def' exists
9
+
10
+ Scenario: Check ok to ok
11
+ Given check 'abc' for entity 'def' is in an ok state
12
+ When an ok event is received for check 'abc' on entity 'def'
13
+ Then a notification should not be generated for check 'abc' on entity 'def'
14
+ # And show me the output
15
+
16
+ Scenario: Check ok to failed
17
+ Given check 'abc' for entity 'def' is in an ok state
18
+ When a failure event is received for check 'abc' on entity 'def'
19
+ Then a notification should not be generated for check 'abc' on entity 'def'
20
+
21
+ @time
22
+ Scenario: Check failed to failed after 10 seconds
23
+ Given check 'abc' for entity 'def' is in an ok state
24
+ When a failure event is received for check 'abc' on entity 'def'
25
+ And 10 seconds passes
26
+ And a failure event is received for check 'abc' on entity 'def'
27
+ Then a notification should not be generated for check 'abc' on entity 'def'
28
+
29
+ @time
30
+ Scenario: Check ok to failed for 1 minute
31
+ Given check 'abc' for entity 'def' is in an ok state
32
+ When a failure event is received for check 'abc' on entity 'def'
33
+ And 1 minute passes
34
+ And a failure event is received for check 'abc' on entity 'def'
35
+ Then a notification should be generated for check 'abc' on entity 'def'
36
+
37
+ @time
38
+ Scenario: Check failed and alerted to failed for 1 minute
39
+ Given check 'abc' for entity 'def' is in an ok state
40
+ When a failure event is received for check 'abc' on entity 'def'
41
+ And 1 minute passes
42
+ And a failure event is received for check 'abc' on entity 'def'
43
+ And show me the notifications
44
+ Then a notification should be generated for check 'abc' on entity 'def'
45
+ When 1 minute passes
46
+ And a failure event is received for check 'abc' on entity 'def'
47
+ And show me the notifications
48
+ Then a notification should not be generated for check 'abc' on entity 'def'
49
+
50
+ @time
51
+ Scenario: Check failed and alerted to failed for 6 minutes
52
+ Given check 'abc' for entity 'def' is in an ok state
53
+ When a failure event is received for check 'abc' on entity 'def'
54
+ And 1 minute passes
55
+ And a failure event is received for check 'abc' on entity 'def'
56
+ Then a notification should be generated for check 'abc' on entity 'def'
57
+ When 6 minutes passes
58
+ And a failure event is received for check 'abc' on entity 'def'
59
+ Then a notification should be generated for check 'abc' on entity 'def'
60
+
61
+ @time
62
+ Scenario: Check ok to failed for 1 minute when in scheduled maintenance
63
+ Given check 'abc' for entity 'def' is in an ok state
64
+ And check 'abc' for entity 'def' is in scheduled maintenance
65
+ When a failure event is received for check 'abc' on entity 'def'
66
+ And 1 minute passes
67
+ And a failure event is received for check 'abc' on entity 'def'
68
+ Then a notification should not be generated for check 'abc' on entity 'def'
69
+
70
+ @time
71
+ Scenario: Check ok to failed for 1 minute when in unscheduled maintenance
72
+ Given check 'abc' for entity 'def' is in an ok state
73
+ And check 'abc' for entity 'def' is in unscheduled maintenance
74
+ When a failure event is received for check 'abc' on entity 'def'
75
+ And 1 minute passes
76
+ And a failure event is received for check 'abc' on entity 'def'
77
+ Then a notification should not be generated for check 'abc' on entity 'def'
78
+
79
+ @time
80
+ Scenario: Check ok to failed for 1 minute, acknowledged, and failed for 6 minutes
81
+ Given check 'abc' for entity 'def' is in an ok state
82
+ When a failure event is received for check 'abc' on entity 'def'
83
+ And 1 minute passes
84
+ And a failure event is received for check 'abc' on entity 'def'
85
+ Then a notification should be generated for check 'abc' on entity 'def'
86
+ When an acknowledgement is received for check 'abc' on entity 'def'
87
+ And 6 minute passes
88
+ And a failure event is received for check 'abc' on entity 'def'
89
+ Then a notification should not be generated for check 'abc' on entity 'def'
90
+
91
+ Scenario: Check failed to ok
92
+ Given check 'abc' for entity 'def' is in a failure state
93
+ And 5 minutes passes
94
+ And a failure event is received for check 'abc' on entity 'def'
95
+ Then a notification should be generated for check 'abc' on entity 'def'
96
+ When 5 minutes passes
97
+ And an ok event is received for check 'abc' on entity 'def'
98
+ Then a notification should be generated for check 'abc' on entity 'def'
99
+
100
+ @time
101
+ Scenario: Check failed to ok when acknowledged
102
+ Given check 'abc' for entity 'def' is in a failure state
103
+ When an acknowledgement event is received for check 'abc' on entity 'def'
104
+ Then a notification should be generated for check 'abc' on entity 'def'
105
+ When 1 minute passes
106
+ And an ok event is received for check 'abc' on entity 'def'
107
+ Then a notification should be generated for check 'abc' on entity 'def'
108
+
109
+ @time
110
+ Scenario: Check failed to ok when acknowledged, and fails after 6 minutes
111
+ Given check 'abc' for entity 'def' is in a failure state
112
+ When an acknowledgement event is received for check 'abc' on entity 'def'
113
+ Then a notification should be generated for check 'abc' on entity 'def'
114
+ When 1 minute passes
115
+ And an ok event is received for check 'abc' on entity 'def'
116
+ Then a notification should be generated for check 'abc' on entity 'def'
117
+ When 6 minutes passes
118
+ And a failure event is received for check 'abc' on entity 'def'
119
+ Then a notification should not be generated for check 'abc' on entity 'def'
120
+ When 6 minutes passes
121
+ And a failure event is received for check 'abc' on entity 'def'
122
+ Then a notification should be generated for check 'abc' on entity 'def'
123
+
124
+ Scenario: Acknowledgement when ok
125
+ Given check 'abc' for entity 'def' is in an ok state
126
+ When an acknowledgement event is received for check 'abc' on entity 'def'
127
+ Then a notification should not be generated for check 'abc' on entity 'def'
128
+
129
+ Scenario: Acknowledgement when failed
130
+ Given check 'abc' for entity 'def' is in a failure state
131
+ When an acknowledgement event is received for check 'abc' on entity 'def'
132
+ Then a notification should be generated for check 'abc' on entity 'def'
@@ -0,0 +1,57 @@
1
+ @notifications
2
+ Feature: notifications
3
+ So people can be notified when things break and recover
4
+ flapjack-notifier must send notifications correctly
5
+
6
+ # TODO test across multiple contacts
7
+
8
+ @resque
9
+ Scenario: Queue an SMS notification
10
+ Given the user wants to receive SMS notifications for entity 'example.com'
11
+ When an event notification is generated for entity 'example.com'
12
+ Then an SMS notification for entity 'example.com' should be queued for the user
13
+ And an email notification for entity 'example.com' should not be queued for the user
14
+
15
+ @resque
16
+ Scenario: Queue an email notification
17
+ Given the user wants to receive email notifications for entity 'example.com'
18
+ When an event notification is generated for entity 'example.com'
19
+ Then an email notification for entity 'example.com' should be queued for the user
20
+ And an SMS notification for entity 'example.com' should not be queued for the user
21
+
22
+ @resque
23
+ Scenario: Queue SMS and email notifications
24
+ Given the user wants to receive SMS notifications for entity 'example.com' and email notifications for entity 'example2.com'
25
+ When an event notification is generated for entity 'example.com'
26
+ And an event notification is generated for entity 'example2.com'
27
+ Then an SMS notification for entity 'example.com' should be queued for the user
28
+ And an SMS notification for entity 'example2.com' should not be queued for the user
29
+ Then an email notification for entity 'example.com' should not be queued for the user
30
+ And an email notification for entity 'example2.com' should be queued for the user
31
+
32
+ # NB: Scenarios below here are those that cover code run by the Resque workers
33
+ # We could maybe test resque integration as well, see
34
+ # http://corner.squareup.com/2010/08/cucumber-and-resque.html
35
+ # http://gist.github.com/532100
36
+
37
+ Scenario: Send a queued SMS notification
38
+ Given a user SMS notification has been queued for entity 'example.com'
39
+ When the SMS notification handler runs successfully
40
+ Then the user should receive an SMS notification
41
+
42
+ Scenario: Handle a failure to send a queued SMS notification
43
+ Given a user SMS notification has been queued for entity 'example.com'
44
+ When the SMS notification handler fails to send an SMS
45
+ Then the user should not receive an SMS notification
46
+
47
+ @email
48
+ Scenario: Send a queued email notification
49
+ Given a user email notification has been queued for entity 'example.com'
50
+ When the email notification handler runs successfully
51
+ Then the user should receive an email notification
52
+
53
+ @email
54
+ Scenario: Handle a failure to send a queued email notification
55
+ Given a user email notification has been queued for entity 'example.com'
56
+ When the email notification handler fails to send an email
57
+ Then the user should not receive an email notification
@@ -1,15 +1,17 @@
1
- Feature: Packagability
1
+ Feature: Packagability
2
2
  To make Flapjack usable to the masses
3
3
  It must be easily packagable
4
4
 
5
5
  Scenario: No rubygems references
6
6
  Given I am at the project root
7
7
  When I run "grep require lib/* bin/* -R |grep rubygems"
8
- Then I should see 0 lines of output
8
+ Then the exit status should be 1
9
+ And I should see 0 lines of output
9
10
 
10
11
  Scenario: A shebang that works everywhere
11
12
  Given I am at the project root
12
13
  When I run "find lib/ -type 'f' -name '*.rb'"
13
- Then every file in the output should start with "#!/usr/bin/env ruby"
14
+ Then the exit status should be 0
15
+ And every file in the output should start with "#!/usr/bin/env ruby"
14
16
 
15
17
 
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'flapjack/data/entity_check'
4
+ require 'flapjack/data/event'
5
+
6
+ def drain_events
7
+ loop do
8
+ event = Flapjack::Data::Event.next(:block => false, :persistence => @redis)
9
+ break unless event
10
+ @app.send(:process_event, event)
11
+ end
12
+ end
13
+
14
+ def submit_event(event)
15
+ @redis.rpush 'events', event.to_json
16
+ end
17
+
18
+ def set_scheduled_maintenance(entity, check, duration = 60*60*2)
19
+ entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
20
+ t = Time.now.to_i
21
+ entity_check.create_scheduled_maintenance(:start_time => t, :duration => duration, :summary => "upgrading everything")
22
+ @redis.setex("#{entity}:#{check}:scheduled_maintenance", duration, t)
23
+ end
24
+
25
+ def remove_scheduled_maintenance(entity, check)
26
+ entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
27
+ sm = entity_check.maintenances(nil, nil, :scheduled => true)
28
+ sm.each do |m|
29
+ entity_check.delete_scheduled_maintenance(:start_time => m[:start_time])
30
+ end
31
+ end
32
+
33
+ def remove_unscheduled_maintenance(entity, check)
34
+ # end any unscheduled downtime
35
+ event_id = entity + ":" + check
36
+ if (um_start = @redis.get("#{event_id}:unscheduled_maintenance"))
37
+ @redis.del("#{event_id}:unscheduled_maintenance")
38
+ duration = Time.now.to_i - um_start.to_i
39
+ @redis.zadd("#{event_id}:unscheduled_maintenances", duration, um_start)
40
+ end
41
+ end
42
+
43
+ def remove_notifications(entity, check)
44
+ event_id = entity + ":" + check
45
+ @redis.del("#{event_id}:last_problem_notification")
46
+ @redis.del("#{event_id}:last_recovery_notification")
47
+ @redis.del("#{event_id}:last_acknowledgement_notification")
48
+ end
49
+
50
+ def set_ok_state(entity, check)
51
+ entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
52
+ entity_check.update_state(Flapjack::Data::EntityCheck::STATE_OK,
53
+ :timestamp => (Time.now.to_i - (60*60*24)))
54
+ end
55
+
56
+ def set_failure_state(entity, check)
57
+ entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
58
+ entity_check.update_state(Flapjack::Data::EntityCheck::STATE_CRITICAL,
59
+ :timestamp => (Time.now.to_i - (60*60*24)))
60
+ end
61
+
62
+ def submit_ok(entity, check)
63
+ event = {
64
+ 'type' => 'service',
65
+ 'state' => 'ok',
66
+ 'summary' => '0% packet loss',
67
+ 'entity' => entity,
68
+ 'check' => check,
69
+ 'client' => 'clientx'
70
+ }
71
+ submit_event(event)
72
+ end
73
+
74
+ def submit_critical(entity, check)
75
+ event = {
76
+ 'type' => 'service',
77
+ 'state' => 'critical',
78
+ 'summary' => '100% packet loss',
79
+ 'entity' => entity,
80
+ 'check' => check,
81
+ 'client' => 'clientx'
82
+ }
83
+ submit_event(event)
84
+ end
85
+
86
+ def submit_acknowledgement(entity, check)
87
+ event = {
88
+ 'type' => 'action',
89
+ 'state' => 'acknowledgement',
90
+ 'summary' => "I'll have this fixed in a jiffy, saw the same thing yesterday",
91
+ 'entity' => entity,
92
+ 'check' => check,
93
+ 'client' => 'clientx',
94
+ # 'acknowledgement_id' =>
95
+ }
96
+ submit_event(event)
97
+ end
98
+
99
+ Given /^an entity '([\w\.\-]+)' exists$/ do |entity|
100
+ Flapjack::Data::Entity.add({'id' => '5000',
101
+ 'name' => entity},
102
+ :redis => @redis )
103
+ end
104
+
105
+ Given /^^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in an ok state$/ do |check, entity|
106
+ remove_unscheduled_maintenance(entity, check)
107
+ remove_scheduled_maintenance(entity, check)
108
+ remove_notifications(entity, check)
109
+ set_ok_state(entity, check)
110
+ end
111
+
112
+ Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in a failure state$/ do |check, entity|
113
+ remove_unscheduled_maintenance(entity, check)
114
+ remove_scheduled_maintenance(entity, check)
115
+ remove_notifications(entity, check)
116
+ set_failure_state(entity, check)
117
+ end
118
+
119
+ Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in scheduled maintenance$/ do |check, entity|
120
+ remove_unscheduled_maintenance(entity, check)
121
+ set_scheduled_maintenance(entity, check)
122
+ end
123
+
124
+ # TODO set the state directly rather than submit & drain
125
+ Given /^check '([\w\.\-]+)' for entity '([\w\.\-]+)' is in unscheduled maintenance$/ do |check, entity|
126
+ remove_scheduled_maintenance(entity, check)
127
+ set_failure_state(entity, check)
128
+ submit_acknowledgement(entity, check)
129
+ drain_events # TODO these should only be in When clauses
130
+ end
131
+
132
+ When /^an ok event is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
133
+ submit_ok(entity, check)
134
+ drain_events
135
+ end
136
+
137
+ When /^a failure event is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
138
+ submit_critical(entity, check)
139
+ drain_events
140
+ end
141
+
142
+ When /^an acknowledgement .*is received for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
143
+ submit_acknowledgement(entity, check)
144
+ drain_events
145
+ end
146
+
147
+
148
+ # TODO logging is a side-effect, should test for notification generation itself
149
+ Then /^a notification should not be generated for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
150
+ message = @app.logger.messages.find_all {|m| m =~ /ending notifications for event #{entity}:#{check}/ }.last
151
+ message ? happy = message.match(/Not sending notifications/) : happy = false
152
+ happy.should be_true
153
+ end
154
+
155
+ Then /^a notification should be generated for check '([\w\.\-]+)' on entity '([\w\.\-]+)'$/ do |check, entity|
156
+ message = @app.logger.messages.find_all {|m| m =~ /ending notifications for event #{entity}:#{check}/ }.last
157
+ message ? happy = message.match(/Sending notifications/) : happy = false
158
+ happy.should be_true
159
+ end
160
+
161
+ Then /^show me the notifications?$/ do
162
+ puts @app.logger.messages.join("\n")
163
+ end
164
+