flapjack 0.6.43 → 0.6.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/flapjack +4 -2
- data/bin/flapjack-nagios-receiver +4 -1
- data/bin/flapjack-populator +4 -1
- data/etc/flapjack_config.yaml.example +111 -106
- data/features/steps/notifications_steps.rb +6 -6
- data/lib/flapjack/configuration.rb +76 -24
- data/lib/flapjack/coordinator.rb +27 -44
- data/lib/flapjack/data/entity.rb +28 -7
- data/lib/flapjack/data/entity_check.rb +18 -20
- data/lib/flapjack/executive.rb +5 -4
- data/lib/flapjack/gateways/api.rb +391 -0
- data/lib/flapjack/gateways/api/entity_check_presenter.rb +185 -0
- data/lib/flapjack/gateways/api/entity_presenter.rb +70 -0
- data/lib/flapjack/gateways/base.rb +38 -0
- data/lib/flapjack/{notification → gateways}/email.rb +4 -5
- data/lib/flapjack/{notification → gateways}/email/alert.html.haml +0 -0
- data/lib/flapjack/{notification → gateways}/email/alert.text.erb +0 -0
- data/lib/flapjack/gateways/jabber.rb +387 -0
- data/lib/flapjack/gateways/oobetet.rb +241 -0
- data/lib/flapjack/gateways/pagerduty.rb +247 -0
- data/lib/flapjack/{notification → gateways}/sms.rb +5 -6
- data/lib/flapjack/{notification → gateways}/sms/messagenet.rb +1 -1
- data/lib/flapjack/gateways/web.rb +293 -0
- data/lib/flapjack/{web → gateways/web}/views/_css.haml +0 -0
- data/lib/flapjack/{web → gateways/web}/views/_nav.haml +0 -0
- data/lib/flapjack/{web → gateways/web}/views/check.haml +0 -0
- data/lib/flapjack/{web → gateways/web}/views/contact.haml +0 -0
- data/lib/flapjack/{web → gateways/web}/views/contacts.haml +0 -0
- data/lib/flapjack/{web → gateways/web}/views/index.haml +0 -0
- data/lib/flapjack/{web → gateways/web}/views/self_stats.haml +0 -0
- data/lib/flapjack/pikelet.rb +0 -23
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/coordinator_spec.rb +56 -36
- data/spec/lib/flapjack/data/entity_spec.rb +53 -4
- data/spec/lib/flapjack/{api → gateways/api}/entity_check_presenter_spec.rb +10 -13
- data/spec/lib/flapjack/{api → gateways/api}/entity_presenter_spec.rb +10 -10
- data/spec/lib/flapjack/{api_spec.rb → gateways/api_spec.rb} +14 -14
- data/spec/lib/flapjack/gateways/email_spec.rb +6 -0
- data/spec/lib/flapjack/{jabber_spec.rb → gateways/jabber_spec.rb} +9 -9
- data/spec/lib/flapjack/{oobetet_spec.rb → gateways/oobetet_spec.rb} +10 -10
- data/spec/lib/flapjack/{pagerduty_spec.rb → gateways/pagerduty_spec.rb} +11 -11
- data/spec/lib/flapjack/gateways/sms_spec.rb +6 -0
- data/spec/lib/flapjack/{web_spec.rb → gateways/web_spec.rb} +4 -4
- metadata +46 -79
- data/bin/install-flapjack-systemwide +0 -58
- data/features/steps/flapjack-importer_steps.rb +0 -109
- data/features/steps/flapjack-worker_steps.rb +0 -68
- data/lib/flapjack/api.rb +0 -388
- data/lib/flapjack/api/entity_check_presenter.rb +0 -181
- data/lib/flapjack/api/entity_presenter.rb +0 -66
- data/lib/flapjack/cli/worker_manager.rb +0 -46
- data/lib/flapjack/inifile.rb +0 -44
- data/lib/flapjack/jabber.rb +0 -383
- data/lib/flapjack/notifier_engine.rb +0 -40
- data/lib/flapjack/notifiers/mailer/init.rb +0 -3
- data/lib/flapjack/notifiers/mailer/mailer.rb +0 -51
- data/lib/flapjack/notifiers/xmpp/init.rb +0 -3
- data/lib/flapjack/notifiers/xmpp/xmpp.rb +0 -46
- data/lib/flapjack/oobetet.rb +0 -240
- data/lib/flapjack/pagerduty.rb +0 -242
- data/lib/flapjack/web.rb +0 -286
- data/spec.old/check_sandbox/echo +0 -3
- data/spec.old/check_sandbox/sandboxed_check +0 -5
- data/spec.old/configs/flapjack-notifier-couchdb.ini +0 -25
- data/spec.old/configs/flapjack-notifier.ini +0 -39
- data/spec.old/configs/recipients.ini +0 -14
- data/spec.old/helpers.rb +0 -15
- data/spec.old/inifile_spec.rb +0 -66
- data/spec.old/mock-notifiers/mock/init.rb +0 -3
- data/spec.old/mock-notifiers/mock/mock.rb +0 -19
- data/spec.old/notifier-directories/spoons/testmailer/init.rb +0 -20
- data/spec.old/notifier_application_spec.rb +0 -222
- data/spec.old/notifier_filters_spec.rb +0 -52
- data/spec.old/notifier_options_multiplexer_spec.rb +0 -71
- data/spec.old/notifier_options_spec.rb +0 -115
- data/spec.old/notifier_spec.rb +0 -57
- data/spec.old/notifiers/mailer_spec.rb +0 -36
- data/spec.old/notifiers/xmpp_spec.rb +0 -36
- data/spec.old/persistence/datamapper_spec.rb +0 -74
- data/spec.old/persistence/mock_persistence_backend.rb +0 -26
- data/spec.old/simple.ini +0 -6
- data/spec.old/spec.opts +0 -4
- data/spec.old/test-filters/blocker.rb +0 -13
- data/spec.old/test-filters/mock.rb +0 -13
- data/spec.old/transports/beanstalkd_spec.rb +0 -44
- data/spec.old/transports/mock_transport.rb +0 -58
- data/spec.old/worker_application_spec.rb +0 -62
- data/spec.old/worker_options_spec.rb +0 -83
- data/spec/lib/flapjack/notification/email_spec.rb +0 -6
- data/spec/lib/flapjack/notification/sms_spec.rb +0 -6
@@ -1,40 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'ostruct'
|
4
|
-
|
5
|
-
module Flapjack
|
6
|
-
class NotifierEngine
|
7
|
-
|
8
|
-
attr_reader :log, :notifiers
|
9
|
-
|
10
|
-
def initialize(opts={})
|
11
|
-
@log = opts[:log]
|
12
|
-
raise "you have to specify a logger" unless @log
|
13
|
-
|
14
|
-
@notifiers = []
|
15
|
-
if opts[:notifiers]
|
16
|
-
opts[:notifiers].each do |n|
|
17
|
-
@notifiers << n
|
18
|
-
@log.info("using the #{n.class.to_s.split("::").last} notifier")
|
19
|
-
end
|
20
|
-
else
|
21
|
-
@log.warning("There are no notifiers! flapjack-notifier won't be useful.")
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def notify!(options={})
|
26
|
-
result = options[:result]
|
27
|
-
event = options[:event]
|
28
|
-
recipients = options[:recipients]
|
29
|
-
|
30
|
-
raise ArgumentError, "A result + event were not passed!" unless result && event
|
31
|
-
|
32
|
-
@notifiers.each do |n|
|
33
|
-
recipients.each do |recipient|
|
34
|
-
@log.info("Notifying #{recipient.name} via #{n.class} about check #{result.check_id}")
|
35
|
-
n.notify(:result => result, :who => recipient, :event => event)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
@@ -1,51 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'net/smtp'
|
4
|
-
require 'tmail'
|
5
|
-
|
6
|
-
module Flapjack
|
7
|
-
module Notifiers
|
8
|
-
|
9
|
-
class Mailer
|
10
|
-
|
11
|
-
attr_accessor :log, :from_address
|
12
|
-
|
13
|
-
def initialize(opts={})
|
14
|
-
@log = opts[:log]
|
15
|
-
@from_address = opts[:from_address]
|
16
|
-
@website_uri = opts[:website_uri]
|
17
|
-
|
18
|
-
raise ArgumentError, "from address must be provided" unless @from_address
|
19
|
-
end
|
20
|
-
|
21
|
-
def notify(opts={})
|
22
|
-
raise ArgumentError, "a recipient was not specified" unless opts[:who]
|
23
|
-
raise ArgumentError, "a result was not specified" unless opts[:result]
|
24
|
-
|
25
|
-
# potential FIXME: refactor TMail out entirely?
|
26
|
-
mail = TMail::Mail.new
|
27
|
-
mail.to = opts[:who].email
|
28
|
-
mail.from = @from_address
|
29
|
-
mail.subject = "Check: #{opts[:result].check_id}, Status: #{opts[:result].status}"
|
30
|
-
mail.body = <<-DESC
|
31
|
-
Check #{opts[:result].check_id} returned the status "#{opts[:result].status}".
|
32
|
-
|
33
|
-
Here was the output:
|
34
|
-
#{opts[:result].output}
|
35
|
-
|
36
|
-
You can respond to this issue at:
|
37
|
-
#{@website_uri}/issue/#{opts[:result].check_id}
|
38
|
-
DESC
|
39
|
-
|
40
|
-
begin
|
41
|
-
Net::SMTP.start('localhost') do |smtp|
|
42
|
-
return smtp.sendmail(mail.to_s, mail.from, mail.to)
|
43
|
-
end
|
44
|
-
rescue Errno::ECONNREFUSED
|
45
|
-
@log.error("Couldn't establish connection to mail server!")
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'xmpp4r'
|
4
|
-
|
5
|
-
module Flapjack
|
6
|
-
module Notifiers
|
7
|
-
class Xmpp
|
8
|
-
|
9
|
-
def initialize(opts={})
|
10
|
-
|
11
|
-
@jid = opts[:jid]
|
12
|
-
@password = opts[:password]
|
13
|
-
@log = opts[:logger]
|
14
|
-
unless @jid && @password
|
15
|
-
raise ArgumentError, "You have to provide a username and password"
|
16
|
-
end
|
17
|
-
|
18
|
-
begin
|
19
|
-
@xmpp = Jabber::Client.new(@jid)
|
20
|
-
@xmpp.connect
|
21
|
-
@xmpp.auth(@password)
|
22
|
-
rescue SocketError => e
|
23
|
-
@log.error("XMPP: #{e.message}")
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
def notify(opts={})
|
29
|
-
|
30
|
-
raise ArgumentError, "a recipient was not specified" unless opts[:who]
|
31
|
-
raise ArgumentError, "a result was not specified" unless opts[:result]
|
32
|
-
|
33
|
-
text = <<-DESC
|
34
|
-
Check #{opts[:result].check_id} returned the status "#{opts[:result].status}".
|
35
|
-
http://localhost:4000/checks/#{opts[:result].check_id}
|
36
|
-
DESC
|
37
|
-
|
38
|
-
message = Jabber::Message.new(opts[:who].jid, text)
|
39
|
-
@xmpp.send(message)
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
data/lib/flapjack/oobetet.rb
DELETED
@@ -1,240 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'socket'
|
4
|
-
|
5
|
-
require 'eventmachine'
|
6
|
-
require 'em-synchrony'
|
7
|
-
|
8
|
-
require 'blather/client/client'
|
9
|
-
require 'em-synchrony/fiber_iterator'
|
10
|
-
require 'yajl/json_gem'
|
11
|
-
|
12
|
-
require 'flapjack/pikelet'
|
13
|
-
require 'flapjack/utility'
|
14
|
-
|
15
|
-
module Flapjack
|
16
|
-
|
17
|
-
class Oobetet < Blather::Client
|
18
|
-
|
19
|
-
include Flapjack::GenericPikelet
|
20
|
-
include Flapjack::Utility
|
21
|
-
|
22
|
-
log = Logger.new(STDOUT)
|
23
|
-
# log.level = Logger::DEBUG
|
24
|
-
log.level = Logger::INFO
|
25
|
-
Blather.logger = log
|
26
|
-
|
27
|
-
def setup
|
28
|
-
@hostname = Socket.gethostname
|
29
|
-
@flapjacktest_jid = Blather::JID.new((@config['jabberid'] || 'flapjacktest') + "/#{@hostname}:#{Process.pid}")
|
30
|
-
|
31
|
-
super(@flapjacktest_jid, @config['password'], @config['server'], @config['port'].to_i)
|
32
|
-
|
33
|
-
logger.debug("Building jabber connection with jabberid: " +
|
34
|
-
@flapjacktest_jid.to_s + ", port: " + @config['port'].to_s +
|
35
|
-
", server: " + @config['server'].to_s + ", password: " +
|
36
|
-
@config['password'].to_s)
|
37
|
-
|
38
|
-
@pagerduty_events_api_url = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
|
39
|
-
|
40
|
-
if !@config['watched_check'] or !@config['watched_entity']
|
41
|
-
raise RuntimeError, 'Flapjack::Oobetet: watched_check and watched_entity must be defined in the config'
|
42
|
-
end
|
43
|
-
|
44
|
-
@check_matcher = '"' + @config['watched_check'] + '" on ' + @config['watched_entity']
|
45
|
-
@max_latency = @config['max_latency'] || 300
|
46
|
-
@flapjack_ok = true
|
47
|
-
|
48
|
-
t = Time.now.to_i
|
49
|
-
@times = { :last_problem => t,
|
50
|
-
:last_recovery => t,
|
51
|
-
:last_ack => t,
|
52
|
-
:last_ack_sent => t }
|
53
|
-
|
54
|
-
@last_alert = nil
|
55
|
-
end
|
56
|
-
|
57
|
-
# split out to ease testing
|
58
|
-
def register_handlers
|
59
|
-
register_handler :ready do |stanza|
|
60
|
-
EventMachine::Synchrony.next_tick do
|
61
|
-
on_ready(stanza)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
register_handler :message, :groupchat? do |stanza|
|
66
|
-
EventMachine::Synchrony.next_tick do
|
67
|
-
on_groupchat(stanza)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
register_handler :disconnected do |stanza|
|
72
|
-
ret = true
|
73
|
-
EventMachine::Synchrony.next_tick do
|
74
|
-
ret = on_disconnect(stanza)
|
75
|
-
end
|
76
|
-
ret
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
|
81
|
-
# Join the MUC Chat room after connecting.
|
82
|
-
def on_ready(stanza)
|
83
|
-
return if should_quit?
|
84
|
-
@connected_at = Time.now.to_i
|
85
|
-
logger.info("Jabber Connected")
|
86
|
-
if @config['rooms'] && @config['rooms'].length > 0
|
87
|
-
@config['rooms'].each do |room|
|
88
|
-
logger.info("Joining room #{room}")
|
89
|
-
presence = Blather::Stanza::Presence.new
|
90
|
-
presence.from = @flapjacktest_jid
|
91
|
-
presence.to = Blather::JID.new("#{room}/#{@config['alias']}")
|
92
|
-
presence << "<x xmlns='http://jabber.org/protocol/muc'/>"
|
93
|
-
write presence
|
94
|
-
say(room, "flapjack self monitoring (oobetet) started at #{Time.now}, g'day!", :groupchat)
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
# returning true to prevent the reactor loop from stopping
|
100
|
-
def on_disconnect(stanza)
|
101
|
-
return true if should_quit?
|
102
|
-
logger.warn("jabbers disconnected! reconnecting in 1 second ...")
|
103
|
-
EventMachine::Timer.new(1) do
|
104
|
-
connect # Blather::Client.connect
|
105
|
-
end
|
106
|
-
true
|
107
|
-
end
|
108
|
-
|
109
|
-
def on_groupchat(stanza)
|
110
|
-
return if should_quit?
|
111
|
-
|
112
|
-
stanza_body = stanza.body
|
113
|
-
|
114
|
-
logger.debug("groupchat stanza body: " + stanza_body)
|
115
|
-
logger.debug("groupchat message received: #{stanza.inspect}")
|
116
|
-
|
117
|
-
if (stanza_body =~ /^(?:problem|recovery|acknowledgement)/i) &&
|
118
|
-
(stanza_body =~ /^(\w+).*#{Regexp.escape(@check_matcher)}/)
|
119
|
-
|
120
|
-
# got something interesting
|
121
|
-
status = $1.downcase
|
122
|
-
t = Time.now.to_i
|
123
|
-
logger.debug("groupchat found the following state for #{@check_matcher}: #{status}")
|
124
|
-
|
125
|
-
case status
|
126
|
-
when 'problem'
|
127
|
-
logger.debug("updating @times last_problem")
|
128
|
-
@times[:last_problem] = t
|
129
|
-
when 'recovery'
|
130
|
-
logger.debug("updating @times last_recovery")
|
131
|
-
@times[:last_recovery] = t
|
132
|
-
when 'acknowledgement'
|
133
|
-
logger.debug("updating @times last_ack")
|
134
|
-
@times[:last_ack] = t
|
135
|
-
end
|
136
|
-
end
|
137
|
-
logger.debug("@times: #{@times.inspect}")
|
138
|
-
end
|
139
|
-
|
140
|
-
def check_timers
|
141
|
-
t = Time.now.to_i
|
142
|
-
breach = nil
|
143
|
-
@logger.debug("check_timers: inspecting @times #{@times.inspect}")
|
144
|
-
case
|
145
|
-
when @times[:last_problem] < (t - @max_latency)
|
146
|
-
breach = "haven't seen a test problem notification in the last #{@max_latency} seconds"
|
147
|
-
when @times[:last_recovery] < (t - @max_latency)
|
148
|
-
breach = "haven't seen a test recovery notification in the last #{@max_latency} seconds"
|
149
|
-
end
|
150
|
-
|
151
|
-
unless @flapjack_ok || breach
|
152
|
-
emit_jabber("Flapjack Self Monitoring is OK")
|
153
|
-
emit_pagerduty("Flapjack Self Monitoring is OK", 'resolve')
|
154
|
-
end
|
155
|
-
|
156
|
-
@flapjack_ok = !breach
|
157
|
-
|
158
|
-
return unless breach
|
159
|
-
@logger.error("Self monitoring has detected the following breach: #{breach}")
|
160
|
-
summary = "Flapjack Self Monitoring is Critical: #{breach} for #{@check_matcher}, "
|
161
|
-
summary += "from #{@hostname} at #{Time.now}"
|
162
|
-
|
163
|
-
if !@last_alert or @last_alert < (t - 55)
|
164
|
-
|
165
|
-
emit_jabber(summary)
|
166
|
-
emit_pagerduty(summary, 'trigger')
|
167
|
-
|
168
|
-
if !@last_alert or @last_alert < (t - 55)
|
169
|
-
msg = "NOTICE: Self monitoring has detected a failure and is unable to tell "
|
170
|
-
msg += "anyone about it. DON'T PANIC."
|
171
|
-
@logger.error msg
|
172
|
-
end
|
173
|
-
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
|
-
def emit_jabber(summary)
|
178
|
-
if @config['rooms'] && @config['rooms'].length > 0
|
179
|
-
@config['rooms'].each do |room|
|
180
|
-
say(room, summary, :groupchat)
|
181
|
-
end
|
182
|
-
@last_alert = Time.now.to_i
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
def emit_pagerduty(summary, event_type = 'trigger')
|
187
|
-
if @config['pagerduty_contact']
|
188
|
-
pagerduty_event = { :service_key => @config['pagerduty_contact'],
|
189
|
-
:incident_key => "Flapjack Self Monitoring from #{@hostname}",
|
190
|
-
:event_type => event_type,
|
191
|
-
:description => summary }
|
192
|
-
status, response = send_pagerduty_event(pagerduty_event)
|
193
|
-
if status == 200
|
194
|
-
@logger.debug("successfully sent pagerduty event")
|
195
|
-
@last_alert = Time.now.to_i
|
196
|
-
else
|
197
|
-
@logger.error("pagerduty returned #{status} #{response.inspect}")
|
198
|
-
end
|
199
|
-
end
|
200
|
-
end
|
201
|
-
|
202
|
-
def say(to, msg, using = :chat)
|
203
|
-
@logger.debug("Sending a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
|
204
|
-
write Blather::Stanza::Message.new(to, msg, using)
|
205
|
-
end
|
206
|
-
|
207
|
-
def send_pagerduty_event(event)
|
208
|
-
options = { :body => Yajl::Encoder.encode(event) }
|
209
|
-
http = EM::HttpRequest.new(@pagerduty_events_api_url).post(options)
|
210
|
-
response = Yajl::Parser.parse(http.response)
|
211
|
-
status = http.response_header.status
|
212
|
-
logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
|
213
|
-
[status, response]
|
214
|
-
end
|
215
|
-
|
216
|
-
def main
|
217
|
-
logger.debug("New oobetet pikelet with the following options: #{@config.inspect}")
|
218
|
-
|
219
|
-
keepalive_timer = EM::Synchrony.add_periodic_timer(60) do
|
220
|
-
logger.debug("calling keepalive on the jabber connection")
|
221
|
-
write(' ') if connected?
|
222
|
-
end
|
223
|
-
|
224
|
-
setup
|
225
|
-
register_handlers
|
226
|
-
connect # Blather::Client.connect
|
227
|
-
|
228
|
-
until should_quit?
|
229
|
-
EM::Synchrony.sleep(10)
|
230
|
-
check_timers
|
231
|
-
end
|
232
|
-
|
233
|
-
keepalive_timer.cancel
|
234
|
-
end
|
235
|
-
|
236
|
-
end
|
237
|
-
end
|
238
|
-
|
239
|
-
|
240
|
-
|
data/lib/flapjack/pagerduty.rb
DELETED
@@ -1,242 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'em-synchrony'
|
4
|
-
require 'em-synchrony/em-http'
|
5
|
-
|
6
|
-
require 'yajl/json_gem'
|
7
|
-
|
8
|
-
require 'flapjack/data/entity_check'
|
9
|
-
require 'flapjack/data/global'
|
10
|
-
require 'flapjack/pikelet'
|
11
|
-
require 'flapjack/redis_pool'
|
12
|
-
|
13
|
-
module Flapjack
|
14
|
-
|
15
|
-
class Pagerduty
|
16
|
-
|
17
|
-
include Flapjack::GenericPikelet
|
18
|
-
|
19
|
-
PAGERDUTY_EVENTS_API_URL = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
|
20
|
-
SEM_PAGERDUTY_ACKS_RUNNING = 'sem_pagerduty_acks_running'
|
21
|
-
|
22
|
-
alias_method :generic_bootstrap, :bootstrap
|
23
|
-
alias_method :generic_cleanup, :cleanup
|
24
|
-
|
25
|
-
def bootstrap(opts = {})
|
26
|
-
generic_bootstrap(opts)
|
27
|
-
|
28
|
-
@redis_config = opts[:redis_config]
|
29
|
-
@redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
|
30
|
-
|
31
|
-
logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
|
32
|
-
|
33
|
-
@pagerduty_acks_started = nil
|
34
|
-
end
|
35
|
-
|
36
|
-
def cleanup
|
37
|
-
@redis.empty! if @redis
|
38
|
-
@redis_timer.empty! if @redis_timer
|
39
|
-
generic_cleanup
|
40
|
-
end
|
41
|
-
|
42
|
-
def add_shutdown_event(opts = {})
|
43
|
-
return unless redis = opts[:redis]
|
44
|
-
redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
|
45
|
-
end
|
46
|
-
|
47
|
-
def main
|
48
|
-
logger.debug("pagerduty gateway - commencing main method")
|
49
|
-
raise "Can't connect to the pagerduty API" unless test_pagerduty_connection
|
50
|
-
|
51
|
-
# TODO: only clear this if there isn't another pagerduty gateway instance running
|
52
|
-
# or better, include an instance ID in the semaphore key name
|
53
|
-
@redis.del(SEM_PAGERDUTY_ACKS_RUNNING)
|
54
|
-
|
55
|
-
acknowledgement_timer = EM::Synchrony.add_periodic_timer(10) do
|
56
|
-
@redis_timer ||= Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
|
57
|
-
find_pagerduty_acknowledgements_if_safe
|
58
|
-
end
|
59
|
-
|
60
|
-
queue = @config['queue']
|
61
|
-
events = {}
|
62
|
-
|
63
|
-
until should_quit?
|
64
|
-
logger.debug("pagerduty gateway is going into blpop mode on #{queue}")
|
65
|
-
events[queue] = @redis.blpop(queue, 0)
|
66
|
-
event = Yajl::Parser.parse(events[queue][1])
|
67
|
-
type = event['notification_type']
|
68
|
-
logger.debug("pagerduty notification event popped off the queue: " + event.inspect)
|
69
|
-
unless 'shutdown'.eql?(type)
|
70
|
-
event_id = event['event_id']
|
71
|
-
entity, check = event_id.split(':')
|
72
|
-
state = event['state']
|
73
|
-
summary = event['summary']
|
74
|
-
address = event['address']
|
75
|
-
|
76
|
-
headline = type.upcase
|
77
|
-
|
78
|
-
case type.downcase
|
79
|
-
when 'acknowledgement'
|
80
|
-
maint_str = "has been acknowledged"
|
81
|
-
pagerduty_type = 'acknowledge'
|
82
|
-
when 'problem'
|
83
|
-
maint_str = "is #{state.upcase}"
|
84
|
-
pagerduty_type = "trigger"
|
85
|
-
when 'recovery'
|
86
|
-
maint_str = "is #{state.upcase}"
|
87
|
-
pagerduty_type = "resolve"
|
88
|
-
when 'test'
|
89
|
-
maint_str = ""
|
90
|
-
pagerduty_type = "trigger"
|
91
|
-
headline = "TEST NOTIFICATION"
|
92
|
-
end
|
93
|
-
|
94
|
-
message = "#{type.upcase} - \"#{check}\" on #{entity} #{maint_str} - #{summary}"
|
95
|
-
|
96
|
-
pagerduty_event = { :service_key => address,
|
97
|
-
:incident_key => event_id,
|
98
|
-
:event_type => pagerduty_type,
|
99
|
-
:description => message }
|
100
|
-
|
101
|
-
send_pagerduty_event(pagerduty_event)
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
acknowledgement_timer.cancel
|
106
|
-
end
|
107
|
-
|
108
|
-
# considering this as part of the public API -- exposes it for testing.
|
109
|
-
def find_pagerduty_acknowledgements_if_safe
|
110
|
-
|
111
|
-
# ensure we're the only instance of the pagerduty acknowledgement check running (with a naive
|
112
|
-
# timeout of five minutes to guard against stale locks caused by crashing code) either in this
|
113
|
-
# process or in other processes
|
114
|
-
if (@pagerduty_acks_started and @pagerduty_acks_started > (Time.now.to_i - 300)) or
|
115
|
-
@redis_timer.get(SEM_PAGERDUTY_ACKS_RUNNING) == 'true'
|
116
|
-
logger.debug("skipping looking for acks in pagerduty as this is already happening")
|
117
|
-
return
|
118
|
-
end
|
119
|
-
|
120
|
-
@pagerduty_acks_started = Time.now.to_i
|
121
|
-
@redis_timer.set(SEM_PAGERDUTY_ACKS_RUNNING, 'true')
|
122
|
-
@redis_timer.expire(SEM_PAGERDUTY_ACKS_RUNNING, 300)
|
123
|
-
|
124
|
-
find_pagerduty_acknowledgements
|
125
|
-
|
126
|
-
@redis_timer.del(SEM_PAGERDUTY_ACKS_RUNNING)
|
127
|
-
@pagerduty_acks_started = nil
|
128
|
-
end
|
129
|
-
|
130
|
-
private
|
131
|
-
|
132
|
-
def test_pagerduty_connection
|
133
|
-
noop = { "service_key" => "11111111111111111111111111111111",
|
134
|
-
"incident_key" => "Flapjack is running a NOOP",
|
135
|
-
"event_type" => "nop",
|
136
|
-
"description" => "I love APIs with noops." }
|
137
|
-
code, results = send_pagerduty_event(noop)
|
138
|
-
return true if code == 200 && results['status'] =~ /success/i
|
139
|
-
logger.error "Error: test_pagerduty_connection: API returned #{code.to_s} #{results.inspect}"
|
140
|
-
false
|
141
|
-
end
|
142
|
-
|
143
|
-
def send_pagerduty_event(event)
|
144
|
-
options = { :body => Yajl::Encoder.encode(event) }
|
145
|
-
http = EM::HttpRequest.new(PAGERDUTY_EVENTS_API_URL).post(options)
|
146
|
-
response = Yajl::Parser.parse(http.response)
|
147
|
-
status = http.response_header.status
|
148
|
-
logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
|
149
|
-
[status, response]
|
150
|
-
end
|
151
|
-
|
152
|
-
def find_pagerduty_acknowledgements
|
153
|
-
|
154
|
-
logger.debug("looking for acks in pagerduty for unack'd problems")
|
155
|
-
|
156
|
-
unacknowledged_failing_checks = Flapjack::Data::Global.unacknowledged_failing_checks(:redis => @redis_timer)
|
157
|
-
|
158
|
-
@logger.debug "found unacknowledged failing checks as follows: " + unacknowledged_failing_checks.join(', ')
|
159
|
-
|
160
|
-
unacknowledged_failing_checks.each do |entity_check|
|
161
|
-
pagerduty_credentials = entity_check.pagerduty_credentials(:redis => @redis_timer)
|
162
|
-
check = entity_check.check
|
163
|
-
|
164
|
-
if pagerduty_credentials.empty?
|
165
|
-
@logger.debug("No pagerduty credentials found for #{entity_check.entity_name}:#{check}, skipping")
|
166
|
-
next
|
167
|
-
end
|
168
|
-
|
169
|
-
# FIXME: try each set of credentials until one works (may have stale contacts turning up)
|
170
|
-
options = pagerduty_credentials.first.merge('check' => check)
|
171
|
-
|
172
|
-
acknowledged = pagerduty_acknowledged?(options)
|
173
|
-
if acknowledged.nil?
|
174
|
-
@logger.debug "#{check} is not acknowledged in pagerduty, skipping"
|
175
|
-
next
|
176
|
-
end
|
177
|
-
|
178
|
-
pg_acknowledged_by = acknowledged[:pg_acknowledged_by]
|
179
|
-
@logger.debug "#{check} is acknowledged in pagerduty, creating flapjack acknowledgement... "
|
180
|
-
who_text = ""
|
181
|
-
if !pg_acknowledged_by.nil? && !pg_acknowledged_by['name'].nil?
|
182
|
-
who_text = " by #{pg_acknowledged_by['name']}"
|
183
|
-
end
|
184
|
-
entity_check.create_acknowledgement('summary' => "Acknowledged on PagerDuty" + who_text)
|
185
|
-
end
|
186
|
-
|
187
|
-
end
|
188
|
-
|
189
|
-
def pagerduty_acknowledged?(opts)
|
190
|
-
subdomain = opts['subdomain']
|
191
|
-
username = opts['username']
|
192
|
-
password = opts['password']
|
193
|
-
check = opts['check']
|
194
|
-
|
195
|
-
t = Time.now.utc
|
196
|
-
|
197
|
-
url = 'https://' + subdomain + '.pagerduty.com/api/v1/incidents'
|
198
|
-
query = { 'fields' => 'incident_number,status,last_status_change_by',
|
199
|
-
'since' => (t - (60*60*24*7)).iso8601, # the last week
|
200
|
-
'until' => (t + (60*60*24)).iso8601, # 1 day in the future
|
201
|
-
'incident_key' => check,
|
202
|
-
'status' => 'acknowledged' }
|
203
|
-
|
204
|
-
options = { :head => { 'authorization' => [username, password] },
|
205
|
-
:query => query }
|
206
|
-
|
207
|
-
http = EM::HttpRequest.new(url).get(options)
|
208
|
-
# DEBUG flapjack-pagerduty: pagerduty_acknowledged?: decoded response as:
|
209
|
-
# {"incidents"=>[{"incident_number"=>40, "status"=>"acknowledged",
|
210
|
-
# "last_status_change_by"=>{"id"=>"PO1NWPS", "name"=>"Jesse Reynolds",
|
211
|
-
# "email"=>"jesse@bulletproof.net",
|
212
|
-
# "html_url"=>"http://bltprf.pagerduty.com/users/PO1NWPS"}}], "limit"=>100, "offset"=>0,
|
213
|
-
# "total"=>1}
|
214
|
-
begin
|
215
|
-
response = Yajl::Parser.parse(http.response)
|
216
|
-
rescue Yajl::ParseError
|
217
|
-
@logger.error("failed to parse json from a post to #{url} ... response headers and body follows...")
|
218
|
-
@logger.error(http.response_header.inspect)
|
219
|
-
@logger.error(http.response)
|
220
|
-
return nil
|
221
|
-
end
|
222
|
-
status = http.response_header.status
|
223
|
-
|
224
|
-
@logger.debug("pagerduty_acknowledged?: decoded response as: #{response.inspect}")
|
225
|
-
if response.nil?
|
226
|
-
@logger.error('no valid response received from pagerduty!')
|
227
|
-
return nil
|
228
|
-
end
|
229
|
-
|
230
|
-
if response['incidents'].nil?
|
231
|
-
@logger.error('no incidents found in response')
|
232
|
-
return nil
|
233
|
-
end
|
234
|
-
|
235
|
-
return nil if response['incidents'].empty?
|
236
|
-
|
237
|
-
{:pg_acknowledged_by => response['incidents'].first['last_status_change_by']}
|
238
|
-
end
|
239
|
-
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|