flapjack 0.5.5 → 0.6.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. data/.gitignore +10 -0
  2. data/.rbenv-version +1 -0
  3. data/.rspec +10 -0
  4. data/Gemfile +18 -0
  5. data/Guardfile +14 -0
  6. data/README.md +152 -173
  7. data/Rakefile +53 -150
  8. data/bin/flapjack +72 -0
  9. data/bin/flapjack-nagios-receiver +111 -0
  10. data/bin/flapjack-nagios-receiver-control +15 -0
  11. data/bin/flapjack-netsaint-parser +0 -2
  12. data/bin/flapjack-populator +133 -16
  13. data/bin/install-flapjack-systemwide +2 -2
  14. data/config.ru +11 -0
  15. data/dist/etc/init.d/flapjack +46 -0
  16. data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
  17. data/doc/GLOSSARY.md +19 -0
  18. data/etc/flapjack_config.yaml.example +90 -0
  19. data/features/events.feature +132 -0
  20. data/features/notifications.feature +57 -0
  21. data/features/packaging-lintian.feature +5 -3
  22. data/features/steps/events_steps.rb +164 -0
  23. data/features/steps/flapjack-importer_steps.rb +2 -5
  24. data/features/steps/flapjack-worker_steps.rb +13 -6
  25. data/features/steps/notifications_steps.rb +178 -0
  26. data/features/steps/packaging-lintian_steps.rb +14 -0
  27. data/features/steps/time_travel_steps.rb +34 -0
  28. data/features/support/env.rb +63 -36
  29. data/flapjack.gemspec +35 -186
  30. data/lib/flapjack.rb +2 -0
  31. data/lib/flapjack/api.rb +274 -0
  32. data/lib/flapjack/api/entity_check_presenter.rb +184 -0
  33. data/lib/flapjack/api/entity_presenter.rb +66 -0
  34. data/lib/flapjack/cli/worker_manager.rb +1 -2
  35. data/lib/flapjack/configuration.rb +11 -0
  36. data/lib/flapjack/coordinator.rb +288 -0
  37. data/lib/flapjack/daemonizing.rb +186 -0
  38. data/lib/flapjack/data/contact.rb +45 -0
  39. data/lib/flapjack/data/entity.rb +89 -0
  40. data/lib/flapjack/data/entity_check.rb +396 -0
  41. data/lib/flapjack/data/event.rb +144 -0
  42. data/lib/flapjack/data/notification.rb +13 -0
  43. data/lib/flapjack/executive.rb +289 -0
  44. data/lib/flapjack/filters/acknowledgement.rb +39 -0
  45. data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
  46. data/lib/flapjack/filters/delays.rb +53 -0
  47. data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
  48. data/lib/flapjack/filters/ok.rb +25 -5
  49. data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
  50. data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
  51. data/lib/flapjack/jabber.rb +294 -0
  52. data/lib/flapjack/notification/common.rb +23 -0
  53. data/lib/flapjack/notification/email.rb +107 -0
  54. data/lib/flapjack/notification/email/alert.html.haml +48 -0
  55. data/lib/flapjack/notification/email/alert.text.erb +14 -0
  56. data/lib/flapjack/notification/sms.rb +42 -0
  57. data/lib/flapjack/notification/sms/messagenet.rb +49 -0
  58. data/lib/flapjack/notifier_engine.rb +4 -4
  59. data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
  60. data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
  61. data/lib/flapjack/pagerduty.rb +230 -0
  62. data/lib/flapjack/patches.rb +108 -19
  63. data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
  64. data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
  65. data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
  66. data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
  67. data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
  68. data/lib/flapjack/pikelet.rb +56 -0
  69. data/lib/flapjack/transports/beanstalkd.rb +1 -1
  70. data/lib/flapjack/transports/result.rb +6 -6
  71. data/lib/flapjack/utility.rb +46 -0
  72. data/lib/flapjack/version.rb +5 -0
  73. data/lib/flapjack/web.rb +198 -0
  74. data/lib/flapjack/web/views/acknowledge.haml +55 -0
  75. data/lib/flapjack/web/views/check.haml +162 -0
  76. data/lib/flapjack/web/views/index.haml +92 -0
  77. data/lib/flapjack/web/views/self_stats.haml +56 -0
  78. data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
  79. data/lib/flapjack/worker/cli.rb +49 -0
  80. data/{spec → spec.old}/check_sandbox/echo +0 -0
  81. data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
  82. data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
  83. data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
  84. data/{spec → spec.old}/configs/recipients.ini +0 -0
  85. data/{spec → spec.old}/helpers.rb +0 -0
  86. data/{spec → spec.old}/inifile_spec.rb +0 -0
  87. data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
  88. data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
  89. data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
  90. data/{spec → spec.old}/notifier_application_spec.rb +0 -0
  91. data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
  92. data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
  93. data/{spec → spec.old}/notifier_options_spec.rb +0 -0
  94. data/{spec → spec.old}/notifier_spec.rb +0 -0
  95. data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
  96. data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
  97. data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
  98. data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
  99. data/{spec → spec.old}/simple.ini +0 -0
  100. data/{spec → spec.old}/spec.opts +0 -0
  101. data/{spec → spec.old}/test-filters/blocker.rb +0 -0
  102. data/{spec → spec.old}/test-filters/mock.rb +0 -0
  103. data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
  104. data/{spec → spec.old}/transports/mock_transport.rb +0 -0
  105. data/{spec → spec.old}/worker_application_spec.rb +0 -0
  106. data/{spec → spec.old}/worker_options_spec.rb +0 -0
  107. data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
  108. data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
  109. data/spec/lib/flapjack/api_spec.rb +170 -0
  110. data/spec/lib/flapjack/coordinator_spec.rb +16 -0
  111. data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
  112. data/spec/lib/flapjack/data/entity_spec.rb +71 -0
  113. data/spec/lib/flapjack/data/event_spec.rb +6 -0
  114. data/spec/lib/flapjack/executive_spec.rb +59 -0
  115. data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
  116. data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
  117. data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
  118. data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
  119. data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
  120. data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
  121. data/spec/lib/flapjack/jabber_spec.rb +150 -0
  122. data/spec/lib/flapjack/notification/email_spec.rb +6 -0
  123. data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
  124. data/spec/lib/flapjack/pikelet_spec.rb +28 -0
  125. data/spec/lib/flapjack/web_spec.rb +188 -0
  126. data/spec/spec_helper.rb +44 -0
  127. data/spec/support/profile_all_formatter.rb +44 -0
  128. data/spec/support/uncolored_doc_formatter.rb +9 -0
  129. data/tasks/events.rake +85 -0
  130. data/tmp/acknowledge.rb +14 -0
  131. data/tmp/create_config_yaml.rb +16 -0
  132. data/tmp/create_events_failure.rb +33 -0
  133. data/tmp/create_events_ok.rb +33 -0
  134. data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
  135. data/tmp/create_events_ok_failure.rb +40 -0
  136. data/tmp/create_events_ok_failure_ack.rb +54 -0
  137. data/tmp/dummy_entities.json +1 -0
  138. data/tmp/generate_nagios_test_hosts.rb +16 -0
  139. data/tmp/parse_config_yaml.rb +7 -0
  140. data/tmp/redis_delete_all_keys.rb +11 -0
  141. data/tmp/test_entities.json +1 -0
  142. metadata +482 -221
  143. data/TODO.md +0 -36
  144. data/VERSION +0 -1
  145. data/bin/flapjack-benchmark +0 -50
  146. data/bin/flapjack-notifier +0 -21
  147. data/bin/flapjack-notifier-manager +0 -43
  148. data/bin/flapjack-stats +0 -27
  149. data/bin/flapjack-worker +0 -13
  150. data/bin/flapjack-worker-manager +0 -35
  151. data/dist/etc/init.d/flapjack-notifier +0 -47
  152. data/dist/etc/init.d/flapjack-workers +0 -44
  153. data/features/flapjack-notifier-manager.feature +0 -19
  154. data/features/flapjack-worker-manager.feature +0 -27
  155. data/features/flapjack-worker.feature +0 -27
  156. data/features/netsaint-config-converter.feature +0 -126
  157. data/features/persistence/couch.feature +0 -105
  158. data/features/persistence/sqlite3.feature +0 -105
  159. data/features/persistence/steps/couch_steps.rb +0 -25
  160. data/features/persistence/steps/generic_steps.rb +0 -102
  161. data/features/persistence/steps/sqlite3_steps.rb +0 -13
  162. data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
  163. data/features/steps/flapjack-worker-manager_steps.rb +0 -48
  164. data/lib/flapjack/applications/notifier.rb +0 -222
  165. data/lib/flapjack/cli/notifier.rb +0 -108
  166. data/lib/flapjack/cli/notifier_manager.rb +0 -86
  167. data/lib/flapjack/cli/worker.rb +0 -51
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'flapjack/pikelet'
4
+
5
+ module Flapjack
6
+ module Notification
7
+
8
+ module Common
9
+
10
+ include Flapjack::Pikelet
11
+
12
+ # TODO to make this testable, work out a supported way to make the passed redis connection
13
+ # use the test db
14
+ def perform(notification)
15
+ bootstrap
16
+ @logger.debug "Woo, got a notification to send out: #{notification.inspect}"
17
+ dispatch(notification, :logger => @logger, :redis => ::Redis.new)
18
+ end
19
+
20
+ end
21
+ end
22
+ end
23
+
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'mail'
4
+ require 'erb'
5
+ require 'haml'
6
+ require 'socket'
7
+
8
+ require 'flapjack/data/entity_check'
9
+ require 'flapjack/notification/common'
10
+
11
+ module Flapjack
12
+ module Notification
13
+
14
+ class Email
15
+ extend Flapjack::Notification::Common
16
+
17
+ def self.dispatch(notification, opts = {})
18
+ notification_type = notification['notification_type']
19
+ contact_first_name = notification['contact_first_name']
20
+ contact_last_name = notification['contact_last_name']
21
+ state = notification['state']
22
+ summary = notification['summary']
23
+ time = notification['time']
24
+ entity, check = notification['event_id'].split(':')
25
+
26
+ entity_check = Flapjack::Data::EntityCheck.for_event_id(notification['event_id'],
27
+ :redis => opts[:redis])
28
+
29
+ headline_map = {'problem' => 'Problem: ',
30
+ 'recovery' => 'Recovery: ',
31
+ 'acknowledgement' => 'Acknowledgement: ',
32
+ 'unknown' => ''
33
+ }
34
+
35
+ headline = headline_map[notification_type] || ''
36
+
37
+ subject = "#{headline}'#{check}' on #{entity}"
38
+ subject += " is #{state.upcase}" unless notification_type == 'acknowledgement'
39
+
40
+ notification['subject'] = subject
41
+ opts[:logger].debug "Flapjack::Notification::Email#dispatch is calling Flapjack::Notification::Mailer.sender, notification_id: #{notification['id']}"
42
+ sender_opts = {:logger => opts[:logger],
43
+ :in_scheduled_maintenance => entity_check.in_scheduled_maintenance?,
44
+ :in_unscheduled_maintenance => entity_check.in_unscheduled_maintenance?
45
+ }
46
+
47
+ mail = prepare_email(notification, sender_opts)
48
+ mail.deliver!
49
+ end
50
+
51
+ private
52
+
53
+ def self.prepare_email(notification, opts)
54
+
55
+ logger = opts[:logger]
56
+
57
+ # not useing socket and gethostname as that doesn't give you a fqdn.
58
+ # see the facter issue: https://projects.puppetlabs.com/issues/3898
59
+ fqdn = `/bin/hostname -f`.chomp
60
+ m_from = "flapjack@#{fqdn}"
61
+ logger.debug("flapjack_mailer: set from to #{m_from}")
62
+ m_reply_to = m_from
63
+
64
+ m_to = notification['address']
65
+ m_subject = notification['subject']
66
+
67
+ logger.debug("Flapjack::Notification::Mailer #{notification['id']} to: #{m_to} subject: #{m_subject}")
68
+
69
+ @notification_type = notification['notification_type']
70
+ @contact_first_name = notification['contact_first_name']
71
+ @contact_last_name = notification['contact_last_name']
72
+ @state = notification['state']
73
+ @summary = notification['summary']
74
+ @time = notification['time']
75
+ @entity, @check = notification['event_id'].split(':')
76
+ @in_unscheduled_maintenance = opts[:in_unscheduled_maintenance]
77
+ @in_scheduled_maintenance = opts[:in_scheduled_maintenance]
78
+
79
+ text_template = ERB.new(File.read(File.dirname(__FILE__) +
80
+ '/email/alert.text.erb'))
81
+
82
+ haml_engine = Haml::Engine.new(File.read(File.dirname(__FILE__) +
83
+ '/email/alert.html.haml'))
84
+
85
+ mail_scope = self
86
+
87
+ mail = Mail.new do
88
+ from m_from
89
+ to m_to
90
+ subject m_subject
91
+ reply_to m_reply_to
92
+
93
+ text_part do
94
+ body text_template.result(binding)
95
+ end
96
+
97
+ html_part do
98
+ content_type 'text/html; charset=UTF-8'
99
+ body haml_engine.render(mail_scope)
100
+ end
101
+ end
102
+ end
103
+
104
+ end
105
+ end
106
+ end
107
+
@@ -0,0 +1,48 @@
1
+ %style{:type => "text/css", :media => "screen"}
2
+ :plain
3
+ #container {
4
+ text-transform: uppercase;
5
+ }
6
+ table {
7
+ border-collapse: collapse;
8
+ }
9
+ table, th, td {
10
+ border: 1px solid #666;
11
+ padding: 4px;
12
+ }
13
+
14
+ %p= "Hi #{@contact_first_name},"
15
+
16
+ %p Monitoring has detected the following:
17
+
18
+ %table
19
+ %tbody
20
+ %tr
21
+ %td
22
+ %strong Entity
23
+ %td= @entity
24
+ %tr
25
+ %td
26
+ %strong Check
27
+ %td= @check
28
+ %tr
29
+ %td
30
+ %strong State
31
+ %td= @state.upcase
32
+ %tr
33
+ %td
34
+ %strong Summary
35
+ %td= @summary
36
+ %tr
37
+ %td
38
+ %strong Time
39
+ %td= Time.at(@time.to_i).to_s
40
+ %tr
41
+ %td
42
+ %strong Flapjack
43
+ %td
44
+ %a(href="http://127.0.0.1:9292/check?entity=#{@entity}&check=#{@check}") http://127.0.0.1:9292/check?entity=#{@entity}&check=#{@check}
45
+
46
+ %p Cheers,
47
+ %p Flapjack
48
+
@@ -0,0 +1,14 @@
1
+ Hi <%= @contact_first_name %>,
2
+
3
+ Monitoring has detected the following:
4
+
5
+ Entity: <%= @entity %>
6
+ Check: <%= @check %>
7
+ State: <%= @state %>
8
+ Summary: <%= @summary %>
9
+ Time: <%= Time.at(@time.to_i).to_s %>
10
+ Flapjack: http://127.0.0.1:9292/check?entity=<%= @entity %>&check=<%= @check %>
11
+
12
+ Cheers,
13
+ Flapjack
14
+
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'flapjack/notification/sms/messagenet'
4
+
5
+ module Flapjack
6
+ module Notification
7
+
8
+ class Sms
9
+ extend Flapjack::Notification::Common
10
+
11
+ def self.dispatch(notification, opts = {})
12
+ notification_type = notification['notification_type']
13
+ contact_first_name = notification['contact_first_name']
14
+ contact_last_name = notification['contact_last_name']
15
+ state = notification['state']
16
+ summary = notification['summary']
17
+ time = notification['time']
18
+ entity, check = notification['event_id'].split(':')
19
+
20
+ headline_map = {'problem' => 'PROBLEM: ',
21
+ 'recovery' => 'RECOVERY: ',
22
+ 'acknowledgement' => 'ACK: ',
23
+ 'unknown' => '',
24
+ '' => '',
25
+ }
26
+
27
+ headline = headline_map[notification_type] || ''
28
+
29
+ message = "#{headline}'#{check}' on #{entity}"
30
+ message += " is #{state.upcase}" unless notification_type == 'acknowledgement'
31
+ message += " at #{Time.at(time).strftime('%-d %b %H:%M')}, #{summary}"
32
+
33
+ notification['message'] = message
34
+ Flapjack::Notification::Sms::Messagenet.sender(notification,
35
+ :logger => opts[:logger],
36
+ :config => Flapjack::Notification::Sms.class_variable_get('@@config'))
37
+ end
38
+
39
+ end
40
+ end
41
+ end
42
+
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'net/http'
4
+ require 'uri'
5
+
6
+ module Flapjack
7
+ module Notification
8
+ class Sms
9
+ class Messagenet
10
+
11
+ def self.sender(notification, options = {})
12
+ logger = options[:logger]
13
+ config = options[:config]
14
+
15
+ unless config && (username = config["username"])
16
+ raise RuntimeError.new('sms_messagenet: username is missing')
17
+ end
18
+ unless config && (password = config["password"])
19
+ raise RuntimeError.new('sms_messagenet: password is missing')
20
+ end
21
+
22
+ raise RuntimeError.new('address is missing') unless address = notification['address']
23
+ raise RuntimeError.new('message is missing') unless message = notification['message']
24
+ raise RuntimeError.new('id is missing') unless notification_id = notification['id']
25
+
26
+ params = { 'Username' => username,
27
+ 'Pwd' => password,
28
+ 'PhoneNumber' => address,
29
+ 'PhoneMessage' => message }
30
+
31
+ uri = URI('https://www.messagenet.com.au/dotnet/Lodge.asmx/LodgeSMSMessage')
32
+ uri.query = URI.encode_www_form(params)
33
+ logger.debug("request_uri: #{uri.request_uri.inspect}")
34
+
35
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https') do |http|
36
+ request = Net::HTTP::Get.new uri.request_uri
37
+ response = http.request request
38
+ http_success = ( response.is_a?(Net::HTTPSuccess) == true )
39
+ logger.debug("Flapjack::Notification::SMSMessagenet: response from server: #{response.body}")
40
+ raise RuntimeError.new "Failed to send SMS via messagenet, http response is a #{response.class}, notification_id: #{notification_id}" unless http_success
41
+ end
42
+
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+
@@ -4,13 +4,13 @@ require 'ostruct'
4
4
 
5
5
  module Flapjack
6
6
  class NotifierEngine
7
-
7
+
8
8
  attr_reader :log, :notifiers
9
-
9
+
10
10
  def initialize(opts={})
11
11
  @log = opts[:log]
12
12
  raise "you have to specify a logger" unless @log
13
-
13
+
14
14
  @notifiers = []
15
15
  if opts[:notifiers]
16
16
  opts[:notifiers].each do |n|
@@ -21,7 +21,7 @@ module Flapjack
21
21
  @log.warning("There are no notifiers! flapjack-notifier won't be useful.")
22
22
  end
23
23
  end
24
-
24
+
25
25
  def notify!(options={})
26
26
  result = options[:result]
27
27
  event = options[:event]
@@ -1,6 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'rubygems'
4
3
  require 'net/smtp'
5
4
  require 'tmail'
6
5
 
@@ -15,7 +14,7 @@ module Flapjack
15
14
  @log = opts[:log]
16
15
  @from_address = opts[:from_address]
17
16
  @website_uri = opts[:website_uri]
18
-
17
+
19
18
  raise ArgumentError, "from address must be provided" unless @from_address
20
19
  end
21
20
 
@@ -30,15 +29,15 @@ module Flapjack
30
29
  mail.subject = "Check: #{opts[:result].check_id}, Status: #{opts[:result].status}"
31
30
  mail.body = <<-DESC
32
31
  Check #{opts[:result].check_id} returned the status "#{opts[:result].status}".
33
-
34
- Here was the output:
32
+
33
+ Here was the output:
35
34
  #{opts[:result].output}
36
-
35
+
37
36
  You can respond to this issue at:
38
37
  #{@website_uri}/issue/#{opts[:result].check_id}
39
38
  DESC
40
39
 
41
- begin
40
+ begin
42
41
  Net::SMTP.start('localhost') do |smtp|
43
42
  return smtp.sendmail(mail.to_s, mail.from, mail.to)
44
43
  end
@@ -46,7 +45,7 @@ module Flapjack
46
45
  @log.error("Couldn't establish connection to mail server!")
47
46
  end
48
47
  end
49
-
48
+
50
49
  end
51
50
  end
52
51
  end
@@ -5,41 +5,41 @@ require 'xmpp4r'
5
5
  module Flapjack
6
6
  module Notifiers
7
7
  class Xmpp
8
-
8
+
9
9
  def initialize(opts={})
10
-
10
+
11
11
  @jid = opts[:jid]
12
12
  @password = opts[:password]
13
13
  @log = opts[:logger]
14
- unless @jid && @password
14
+ unless @jid && @password
15
15
  raise ArgumentError, "You have to provide a username and password"
16
16
  end
17
17
 
18
- begin
18
+ begin
19
19
  @xmpp = Jabber::Client.new(@jid)
20
20
  @xmpp.connect
21
21
  @xmpp.auth(@password)
22
22
  rescue SocketError => e
23
23
  @log.error("XMPP: #{e.message}")
24
24
  end
25
-
25
+
26
26
  end
27
-
27
+
28
28
  def notify(opts={})
29
-
30
- raise ArgumentError, "a recipient was not specified" unless opts[:who]
29
+
30
+ raise ArgumentError, "a recipient was not specified" unless opts[:who]
31
31
  raise ArgumentError, "a result was not specified" unless opts[:result]
32
-
32
+
33
33
  text = <<-DESC
34
34
  Check #{opts[:result].check_id} returned the status "#{opts[:result].status}".
35
35
  http://localhost:4000/checks/#{opts[:result].check_id}
36
36
  DESC
37
-
37
+
38
38
  message = Jabber::Message.new(opts[:who].jid, text)
39
39
  @xmpp.send(message)
40
-
40
+
41
41
  end
42
-
42
+
43
43
  end
44
44
  end
45
45
  end
@@ -0,0 +1,230 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'eventmachine'
4
+ # the redis/synchrony gems need to be required in this particular order, see
5
+ # the redis-rb README for details
6
+ require 'hiredis'
7
+ require 'em-synchrony'
8
+ require 'em-synchrony/em-http'
9
+ require 'redis/connection/synchrony'
10
+ require 'redis'
11
+
12
+ require 'yajl/json_gem'
13
+
14
+ require 'flapjack/data/entity_check'
15
+ require 'flapjack/pikelet'
16
+
17
+ module Flapjack
18
+
19
+ class Pagerduty
20
+
21
+ include Flapjack::Pikelet
22
+
23
+ def setup
24
+ @redis = build_redis_connection_pool
25
+ logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
26
+
27
+ @pagerduty_events_api_url = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
28
+ @pagerduty_acks_started = nil
29
+ @sem_pagerduty_acks_running = 'sem_pagerduty_acks_running'
30
+ end
31
+
32
+ def send_pagerduty_event(event)
33
+ options = { :body => Yajl::Encoder.encode(event) }
34
+ http = EM::HttpRequest.new(@pagerduty_events_api_url).post(options)
35
+ response = Yajl::Parser.parse(http.response)
36
+ status = http.response_header.status
37
+ logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
38
+ [status, response]
39
+ end
40
+
41
+ def test_pagerduty_connection
42
+ noop = { "service_key" => "11111111111111111111111111111111",
43
+ "incident_key" => "Flapjack is running a NOOP",
44
+ "event_type" => "nop",
45
+ "description" => "I love APIs with noops." }
46
+ code, results = send_pagerduty_event(noop)
47
+ return true if code == 200 && results['status'] =~ /success/i
48
+ logger.error "Error: test_pagerduty_connection: API returned #{code.to_s} #{results.inspect}"
49
+ false
50
+ end
51
+
52
+ # this should be moved to a checks data model perhaps
53
+ def unacknowledged_failing_checks
54
+ failing_checks = @redis_timer.zrange('failed_checks', '0', '-1')
55
+ unless failing_checks.is_a?(Array)
56
+ @logger.error("redis.zrange returned something other than an array! Here it is: " + failing_checks.inspect)
57
+ end
58
+ ufc = failing_checks.reject {|check|
59
+ @redis_timer.exists(check + ':unscheduled_maintenance')
60
+ }
61
+ @logger.debug "found unacknowledged failing checks as follows: " + ufc.join(', ')
62
+ ufc
63
+ end
64
+
65
+ def pagerduty_acknowledged?(opts)
66
+ subdomain = opts['subdomain']
67
+ username = opts['username']
68
+ password = opts['password']
69
+ check = opts['check']
70
+
71
+ url = 'https://' + subdomain + '.pagerduty.com/api/v1/incidents'
72
+ query = { 'fields' => 'incident_number,status,last_status_change_by',
73
+ 'since' => (Time.new.utc - (60*60*24*7)).iso8601,
74
+ 'until' => (Time.new.utc + (60*60*24)).iso8601,
75
+ 'incident_key' => check,
76
+ 'status' => 'acknowledged' }
77
+
78
+ options = { :head => { 'authorization' => [username, password] },
79
+ :query => query }
80
+
81
+ http = EM::HttpRequest.new(url).get(options)
82
+ # DEBUG flapjack-pagerduty: pagerduty_acknowledged?: decoded response as:
83
+ # {"incidents"=>[{"incident_number"=>40, "status"=>"acknowledged",
84
+ # "last_status_change_by"=>{"id"=>"PO1NWPS", "name"=>"Jesse Reynolds",
85
+ # "email"=>"jesse@bulletproof.net",
86
+ # "html_url"=>"http://bltprf.pagerduty.com/users/PO1NWPS"}}], "limit"=>100, "offset"=>0,
87
+ # "total"=>1}
88
+ begin
89
+ response = Yajl::Parser.parse(http.response)
90
+ rescue Yajl::ParseError
91
+ @logger.error("failed to parse json from a post to #{url} ... response headers and body follows...")
92
+ @logger.error(http.response_header.inspect)
93
+ @logger.error(http.response)
94
+ return nil, nil
95
+ end
96
+ status = http.response_header.status
97
+
98
+ @logger.debug("pagerduty_acknowledged?: decoded response as: #{response.inspect}")
99
+ if response.nil?
100
+ @logger.error('no valid response received from pagerduty!')
101
+ return nil, nil
102
+ end
103
+
104
+ if response['incidents'].nil?
105
+ @logger.error('no incidents found in response')
106
+ return nil, nil
107
+ end
108
+
109
+ if response['incidents'].length > 0
110
+ pg_acknowledged_by = response['incidents'].first['last_status_change_by']
111
+ return true, :pg_acknowledged_by => pg_acknowledged_by
112
+ else
113
+ return false, nil
114
+ end
115
+ end
116
+
117
+ def catch_pagerduty_acks
118
+
119
+ # ensure we're the only instance of the pagerduty acknowledgement check running (with a naive
120
+ # timeout of five minutes to guard against stale locks caused by crashing code) either in this
121
+ # process or in other processes
122
+ if (@pagerduty_acks_started and @pagerduty_acks_started > (Time.now.to_i - 300)) or
123
+ @redis_timer.get(@sem_pagerduty_acks_running) == 'true'
124
+ logger.debug("skipping looking for acks in pagerduty as this is already happening")
125
+ return
126
+ end
127
+
128
+ @pagerduty_acks_started = Time.now.to_i
129
+ @redis_timer.set(@sem_pagerduty_acks_running, 'true')
130
+ @redis_timer.expire(@sem_pagerduty_acks_running, 300)
131
+
132
+ logger.debug("looking for acks in pagerduty for unack'd problems")
133
+
134
+ # ok lets do it
135
+ unacknowledged_failing_checks.each {|check|
136
+ entity_check = Flapjack::Data::EntityCheck.for_event_id(check, { :redis => @redis_timer, :logger => @logger } )
137
+ pagerduty_credentials = entity_check.pagerduty_credentials( { :redis => @redis_timer, :logger => @logger } )
138
+
139
+ if pagerduty_credentials.length == 0
140
+ @logger.debug("Found no pagerduty creditials for #{entity_check.entity_name}:#{entity_check.check}, moving on")
141
+ next
142
+ end
143
+
144
+ # FIXME: try each set of credentials until one works (may have stale contacts turning up)
145
+ options = pagerduty_credentials.first.merge('check' => check)
146
+
147
+ pagerduty_acknowledged, result_hash = pagerduty_acknowledged?(options)
148
+ if pagerduty_acknowledged
149
+ pg_acknowledged_by = result_hash[:pg_acknowledged_by] unless result_hash.nil?
150
+ @logger.debug "#{check} is acknowledged in pagerduty, creating flapjack acknowledgement ... "
151
+ who_text = ""
152
+ if !pg_acknowledged_by.nil? && !pg_acknowledged_by['name'].nil?
153
+ who_text = " by #{pg_acknowledged_by['name']}"
154
+ end
155
+ entity_check.create_acknowledgement('summary' => "Acknowledged on PagerDuty" + who_text)
156
+ else
157
+ @logger.debug "#{check} is not acknowledged in pagerduty, moving on"
158
+ end
159
+ }
160
+ @redis_timer.del(@sem_pagerduty_acks_running)
161
+ @pagerduty_acks_started = nil
162
+ end
163
+
164
+ def add_shutdown_event(opts = {})
165
+ return unless redis = opts[:redis]
166
+ redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
167
+ end
168
+
169
+ def main
170
+ setup
171
+
172
+ logger.debug("pagerduty gateway - commencing main method")
173
+ raise "Can't connect to the pagerduty API" unless test_pagerduty_connection
174
+
175
+ # TODO: only clear this if there isn't another pagerduty gateway instance running
176
+ # or better, include on instance ID in the semaphore key name
177
+ @redis.del(@sem_pagerduty_acks_running)
178
+
179
+ acknowledgement_timer = EM::Synchrony.add_periodic_timer(10) do
180
+ @redis_timer ||= build_redis_connection_pool
181
+ catch_pagerduty_acks
182
+ end
183
+
184
+ queue = @config['queue']
185
+ events = {}
186
+
187
+ until should_quit?
188
+ logger.debug("pagerduty gateway is going into blpop mode on #{queue}")
189
+ events[queue] = @redis.blpop(queue)
190
+ event = Yajl::Parser.parse(events[queue][1])
191
+ type = event['notification_type']
192
+ logger.debug("pagerduty notification event popped off the queue: " + event.inspect)
193
+ if 'shutdown'.eql?(type)
194
+ # do anything in particular?
195
+ else
196
+ event_id = event['event_id']
197
+ entity, check = event_id.split(':')
198
+ state = event['state']
199
+ summary = event['summary']
200
+ address = event['address']
201
+
202
+ case type.downcase
203
+ when 'acknowledgement'
204
+ maint_str = "has been acknowledged"
205
+ pagerduty_type = 'acknowledge'
206
+ when 'problem'
207
+ maint_str = "is #{state.upcase}"
208
+ pagerduty_type = "trigger"
209
+ when 'recovery'
210
+ maint_str = "is #{state.upcase}"
211
+ pagerduty_type = "resolve"
212
+ end
213
+
214
+ message = "#{type.upcase} - \"#{check}\" on #{entity} #{maint_str} - #{summary}"
215
+
216
+ pagerduty_event = { :service_key => address,
217
+ :incident_key => event_id,
218
+ :event_type => pagerduty_type,
219
+ :description => message }
220
+
221
+ send_pagerduty_event(pagerduty_event)
222
+ end
223
+ end
224
+
225
+ acknowledgement_timer.cancel
226
+ end
227
+
228
+ end
229
+ end
230
+