flapjack 1.5.0 → 1.6.0rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/features/rollup.feature +46 -0
- data/features/steps/events_steps.rb +10 -0
- data/lib/flapjack/data/contact.rb +18 -10
- data/lib/flapjack/data/entity.rb +1 -1
- data/lib/flapjack/data/entity_check.rb +0 -1
- data/lib/flapjack/data/migration.rb +14 -0
- data/lib/flapjack/data/notification.rb +3 -3
- data/lib/flapjack/gateways/jsonapi/pagerduty_credential_methods.rb +4 -1
- data/lib/flapjack/gateways/pagerduty.rb +31 -16
- data/lib/flapjack/gateways/web.rb +8 -2
- data/lib/flapjack/gateways/web/views/check.html.erb +26 -0
- data/lib/flapjack/gateways/web/views/contact.html.erb +3 -1
- data/lib/flapjack/redis_pool.rb +2 -0
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/data/contact_spec.rb +13 -9
- data/spec/lib/flapjack/data/migration_spec.rb +41 -0
- data/spec/lib/flapjack/gateways/jsonapi/pagerduty_credential_methods_spec.rb +3 -2
- data/spec/lib/flapjack/gateways/pagerduty_spec.rb +50 -10
- data/spec/lib/flapjack/gateways/web/views/check.html.erb_spec.rb +5 -0
- data/spec/lib/flapjack/gateways/web_spec.rb +2 -1
- data/spec/lib/flapjack/redis_pool_spec.rb +1 -0
- data/spec/service_consumers/pacts/flapjack-diner_v1.0.json +12 -18
- data/spec/service_consumers/provider_states_for_flapjack-diner.rb +3 -6
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a5aa06a9caf7b8c5b81d7b5b5245f8b623cda1b
|
4
|
+
data.tar.gz: 5d10f49f09e6ab9e87da21537d80add6409708a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4d57fdb2ee84f2a56118808d390fcea5087ca7b68c36c4588d8a352d490d98cccba536509606189524515800fd0b559a6b30dfeb2ab7ccc552a035dad5cd9a5
|
7
|
+
data.tar.gz: fe92aeefa1f66b21d690d7902f775b175a1d95fc09ae601107bd8aad5fbcfe28d784be65f561387b7c5192c87f45b4d9b52e8457cd935b226606c14f37b73dcf
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
## Flapjack Changelog
|
2
2
|
|
3
|
+
# 1.6.0 - 2015-05-13
|
4
|
+
- Feature: use token authentication for pagerduty gateway #831 (@alperkokmen)
|
5
|
+
- Feature: expose failure delays in web UI #849 (@jessereynolds)
|
6
|
+
- Bug: performance improvement - fix usage of KEYS command for entity check names c57d3a5 (@ali-graham)
|
7
|
+
- Bug: remove disabled checks from rollup calculations #843 (@ali-graham)
|
8
|
+
- Bug: fall back to basic auth for pagerduty incidents api #853 (@jessereynolds)
|
9
|
+
- Chore: pagerduty ack retrieval improvements #858 (@jessereynolds)
|
10
|
+
|
3
11
|
# 1.5.0 - 2015-03-31
|
4
12
|
- No changes
|
5
13
|
|
data/features/rollup.feature
CHANGED
@@ -175,6 +175,52 @@ Feature: Rollup on a per contact, per media basis
|
|
175
175
|
Then 2 sms alerts should be queued for +61400000001
|
176
176
|
And 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
177
177
|
|
178
|
+
@time
|
179
|
+
Scenario: Disabling a failing check suppresses rollup
|
180
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
181
|
+
And check 'ping' for entity 'baz' is in an ok state
|
182
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
183
|
+
And 1 minute passes
|
184
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
185
|
+
Then 1 sms alert should be queued for +61400000001
|
186
|
+
Then 1 sms alerts of type problem and rollup none should be queued for +61400000001
|
187
|
+
When 5 minutes passes
|
188
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
189
|
+
And 1 minute passes
|
190
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
191
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
192
|
+
And 2 sms alerts should be queued for +61400000001
|
193
|
+
When check 'ping' on entity 'foo' is disabled
|
194
|
+
And 30 minutes passes
|
195
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
196
|
+
Then 1 sms alert of rollup recovery should be queued for +61400000001
|
197
|
+
|
198
|
+
@time
|
199
|
+
Scenario: Enabling a failing check promotes rollup
|
200
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
201
|
+
And check 'ping' for entity 'baz' is in an ok state
|
202
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
203
|
+
And 1 minute passes
|
204
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
205
|
+
Then 1 sms alert should be queued for +61400000001
|
206
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
207
|
+
When check 'ping' for entity 'foo' is disabled
|
208
|
+
And 5 minutes passes
|
209
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
210
|
+
And 1 minute passes
|
211
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
212
|
+
Then 2 sms alerts should be queued for +61400000001
|
213
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
214
|
+
Then 1 sms alert of type problem and rollup recovery should be queued for +61400000001
|
215
|
+
When 1 hour passes
|
216
|
+
And check 'ping' on entity 'foo' is enabled
|
217
|
+
And 5 minutes passes
|
218
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
219
|
+
Then 3 sms alerts should be queued for +61400000001
|
220
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
221
|
+
Then 1 sms alert of type problem and rollup recovery should be queued for +61400000001
|
222
|
+
And 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
223
|
+
|
178
224
|
@time
|
179
225
|
Scenario: Contact ceases to be a contact on an entity that they were being alerted for
|
180
226
|
Given check 'ping' for entity 'foo' is in an ok state
|
@@ -279,6 +279,16 @@ When /^the unscheduled maintenance is ended(?: for check '([\w\.\-]+)' on entity
|
|
279
279
|
end_unscheduled_maintenance(entity, check)
|
280
280
|
end
|
281
281
|
|
282
|
+
When /^check '([\w\.\-]+)' (?:for|on) entity '([\w\.\-]+)' is (dis|en)abled$/ do |check, entity, dis_en|
|
283
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
284
|
+
case dis_en
|
285
|
+
when 'dis'
|
286
|
+
entity_check.disable!
|
287
|
+
when 'en'
|
288
|
+
entity_check.enable!
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
282
292
|
# TODO logging is a side-effect, should test for notification generation itself
|
283
293
|
Then /^a notification should not be generated(?: for check '([\w\.\-]+)' on entity '([\w\.\-]+)')?$/ do |check, entity|
|
284
294
|
check ||= @check
|
@@ -160,7 +160,7 @@ module Flapjack
|
|
160
160
|
def set_pagerduty_credentials(details)
|
161
161
|
@redis.hset("contact_media:#{self.id}", 'pagerduty', details['service_key'])
|
162
162
|
@redis.hmset("contact_pagerduty:#{self.id}",
|
163
|
-
*['subdomain', 'username', 'password'].collect {|f| [f, details[f]]})
|
163
|
+
*['subdomain', 'token', 'username', 'password'].collect {|f| [f, details[f]]})
|
164
164
|
end
|
165
165
|
|
166
166
|
def delete_pagerduty_credentials
|
@@ -387,25 +387,29 @@ module Flapjack
|
|
387
387
|
end
|
388
388
|
end
|
389
389
|
|
390
|
-
def add_alerting_check_for_media(media,
|
391
|
-
@redis.zadd("contact_alerting_checks:#{self.id}:media:#{media}", Time.now.to_i,
|
390
|
+
def add_alerting_check_for_media(media, event_id)
|
391
|
+
@redis.zadd("contact_alerting_checks:#{self.id}:media:#{media}", Time.now.to_i, event_id)
|
392
392
|
end
|
393
393
|
|
394
394
|
def remove_alerting_check_for_media(media, check)
|
395
395
|
@redis.zrem("contact_alerting_checks:#{self.id}:media:#{media}", check)
|
396
396
|
end
|
397
397
|
|
398
|
-
# removes any checks that are in ok, scheduled or unscheduled maintenance
|
399
|
-
# from the alerting checks set for the given media
|
400
|
-
# returns the
|
398
|
+
# removes any checks that are in ok, scheduled or unscheduled maintenance,
|
399
|
+
# or are disabled from the alerting checks set for the given media;
|
400
|
+
# returns whether this cleaning moved the medium from rollup to recovery
|
401
401
|
def clean_alerting_checks_for_media(media)
|
402
|
-
key = "contact_alerting_checks:#{self.id}:media:#{media}"
|
403
402
|
cleaned = 0
|
404
|
-
|
403
|
+
|
404
|
+
alerting_checks = alerting_checks_for_media(media)
|
405
|
+
rollup_threshold = rollup_threshold_for_media(media)
|
406
|
+
|
407
|
+
alerting_checks.each do |check|
|
405
408
|
entity_check = Flapjack::Data::EntityCheck.for_event_id(check, :redis => @redis)
|
406
409
|
next unless Flapjack::Data::EntityCheck.state_for_event_id?(check, :redis => @redis) == 'ok' ||
|
407
410
|
Flapjack::Data::EntityCheck.in_unscheduled_maintenance_for_event_id?(check, :redis => @redis) ||
|
408
411
|
Flapjack::Data::EntityCheck.in_scheduled_maintenance_for_event_id?(check, :redis => @redis) ||
|
412
|
+
!entity_check.enabled? ||
|
409
413
|
!entity_check.contacts.map {|c| c.id}.include?(self.id)
|
410
414
|
|
411
415
|
# FIXME: why can't i get this logging when called from notifier (notification.rb)?
|
@@ -413,7 +417,11 @@ module Flapjack
|
|
413
417
|
remove_alerting_check_for_media(media, check)
|
414
418
|
cleaned += 1
|
415
419
|
end
|
416
|
-
|
420
|
+
|
421
|
+
return false if rollup_threshold.nil? || (rollup_threshold <= 0) ||
|
422
|
+
(alerting_checks.size < rollup_threshold)
|
423
|
+
|
424
|
+
return(cleaned > (alerting_checks.size - rollup_threshold))
|
417
425
|
end
|
418
426
|
|
419
427
|
def alerting_checks_for_media(media)
|
@@ -498,7 +506,7 @@ module Flapjack
|
|
498
506
|
when 'pagerduty'
|
499
507
|
redis.hset("contact_media:#{contact_id}", medium, details['service_key'])
|
500
508
|
redis.hmset("contact_pagerduty:#{contact_id}",
|
501
|
-
*['subdomain', 'username', 'password'].collect {|f| [f, details[f]]})
|
509
|
+
*['subdomain', 'token', 'username', 'password'].collect {|f| [f, details[f]]})
|
502
510
|
else
|
503
511
|
redis.hset("contact_media:#{contact_id}", medium, details['address'])
|
504
512
|
redis.hset("contact_media_intervals:#{contact_id}", medium, details['interval']) if details['interval']
|
data/lib/flapjack/data/entity.rb
CHANGED
@@ -605,7 +605,7 @@ module Flapjack
|
|
605
605
|
entity_name = redis.hget('all_entity_names_by_id', entity_id)
|
606
606
|
next memo if entity_name.nil? || entity_name.empty?
|
607
607
|
en = Regexp.escape(entity_name)
|
608
|
-
check_names = redis.
|
608
|
+
check_names = redis.zrange("all_checks:#{entity_name}", 0, -1) |
|
609
609
|
Flapjack::Data::EntityCheck.find_current_names_for_entity_name(entity_name, :redis => redis)
|
610
610
|
memo[entity_id] = check_names.map {|cn| "#{entity_name}:#{cn}"}
|
611
611
|
memo
|
@@ -681,7 +681,6 @@ module Flapjack
|
|
681
681
|
@redis.zadd("all_checks:#{entity_name}", timestamp, check)
|
682
682
|
@redis.zrem("current_checks:#{entity_name}", check)
|
683
683
|
if @redis.zcount("current_checks:#{entity_name}", '-inf', '+inf') == 0
|
684
|
-
@redis.zrem("current_checks:#{entity_name}", check)
|
685
684
|
@redis.zrem("current_entities", entity.name)
|
686
685
|
end
|
687
686
|
end
|
@@ -215,6 +215,20 @@ module Flapjack
|
|
215
215
|
redis.set('validated_scheduled_maintenance_periods', 'true')
|
216
216
|
end
|
217
217
|
|
218
|
+
def self.correct_rollup_including_disabled_checks(options = {})
|
219
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
220
|
+
logger = options[:logger]
|
221
|
+
return if redis.exists('corrected_rollup_including_disabled_checks')
|
222
|
+
|
223
|
+
Flapjack::Data::Contact.all(:redis => redis).each do |contact|
|
224
|
+
contact.media_list.each do |medium|
|
225
|
+
contact.clean_alerting_checks_for_media(medium)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
logger.warn "Corrected rollup to no longer include disabled checks" unless logger.nil?
|
230
|
+
redis.set('corrected_rollup_including_disabled_checks', 'true')
|
231
|
+
end
|
218
232
|
end
|
219
233
|
end
|
220
234
|
end
|
@@ -221,8 +221,8 @@ module Flapjack
|
|
221
221
|
contact.add_alerting_check_for_media(media, @event_id) unless ok? || acknowledgement? || test?
|
222
222
|
|
223
223
|
# expunge checks in (un)scheduled maintenance from the alerting set
|
224
|
-
|
225
|
-
logger.debug("cleaned alerting checks for #{media}: #{
|
224
|
+
recovered = contact.clean_alerting_checks_for_media(media)
|
225
|
+
logger.debug("cleaned alerting checks for #{media}: recovered? #{recovered}")
|
226
226
|
|
227
227
|
# pagerduty is an example of a medium which should never be rolled up
|
228
228
|
unless ['pagerduty'].include?(media)
|
@@ -236,7 +236,7 @@ module Flapjack
|
|
236
236
|
next ret if contact.drop_rollup_notifications_for_media?(media)
|
237
237
|
contact.update_sent_rollup_alert_keys_for_media(media, :delete => ok?)
|
238
238
|
rollup_type = 'problem'
|
239
|
-
when
|
239
|
+
when recovered
|
240
240
|
# alerting checks was just cleaned such that it is now below the rollup threshold
|
241
241
|
contact.update_sent_rollup_alert_keys_for_media(media, :delete => true)
|
242
242
|
rollup_type = 'recovery'
|
@@ -30,7 +30,7 @@ module Flapjack
|
|
30
30
|
halt err(422, "No valid pagerduty credentials were submitted")
|
31
31
|
end
|
32
32
|
|
33
|
-
fields = ['service_key', 'subdomain', 'username', 'password']
|
33
|
+
fields = ['service_key', 'subdomain', 'token', 'username', 'password']
|
34
34
|
|
35
35
|
pagerduty_credential = pagerduty_credentials_data.last
|
36
36
|
|
@@ -102,6 +102,9 @@ module Flapjack
|
|
102
102
|
when 'subdomain'
|
103
103
|
pd_data['subdomain'] = value
|
104
104
|
contact.set_pagerduty_credentials(pd_data)
|
105
|
+
when 'token'
|
106
|
+
pd_data['token'] = value
|
107
|
+
contact.set_pagerduty_credentials(pd_data)
|
105
108
|
when 'username'
|
106
109
|
pd_data['username'] = value
|
107
110
|
contact.set_pagerduty_credentials(pd_data)
|
@@ -16,6 +16,7 @@ module Flapjack
|
|
16
16
|
class Pagerduty
|
17
17
|
PAGERDUTY_EVENTS_API_URL = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
|
18
18
|
SEM_PAGERDUTY_ACKS_RUNNING = 'sem_pagerduty_acks_running'
|
19
|
+
SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT = 3600
|
19
20
|
|
20
21
|
include Flapjack::Utility
|
21
22
|
|
@@ -129,7 +130,8 @@ module Flapjack
|
|
129
130
|
# ensure we're the only instance of the pagerduty acknowledgement check running (with a naive
|
130
131
|
# timeout of five minutes to guard against stale locks caused by crashing code) either in this
|
131
132
|
# process or in other processes
|
132
|
-
if (@pagerduty_acks_started and @pagerduty_acks_started >
|
133
|
+
if (@pagerduty_acks_started and @pagerduty_acks_started >
|
134
|
+
(Time.now.to_i - SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT)) or
|
133
135
|
@redis.get(SEM_PAGERDUTY_ACKS_RUNNING) == 'true'
|
134
136
|
@logger.debug("skipping looking for acks in pagerduty as this is already happening")
|
135
137
|
return
|
@@ -137,7 +139,7 @@ module Flapjack
|
|
137
139
|
|
138
140
|
@pagerduty_acks_started = Time.now.to_i
|
139
141
|
@redis.set(SEM_PAGERDUTY_ACKS_RUNNING, 'true')
|
140
|
-
@redis.expire(SEM_PAGERDUTY_ACKS_RUNNING,
|
142
|
+
@redis.expire(SEM_PAGERDUTY_ACKS_RUNNING, SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT)
|
141
143
|
|
142
144
|
find_pagerduty_acknowledgements
|
143
145
|
|
@@ -200,19 +202,25 @@ module Flapjack
|
|
200
202
|
# FIXME: try each set of credentials until one works (may have stale contacts turning up)
|
201
203
|
options = ec_credentials.first.merge('check' => "#{entity_name}:#{check}")
|
202
204
|
|
203
|
-
|
204
|
-
if
|
205
|
-
|
205
|
+
# check again that the check is still unacknowledged
|
206
|
+
if entity_check.in_unscheduled_maintenance?
|
207
|
+
# skip this one
|
208
|
+
@logger.warn "#{entity_name}:#{check} seems to have been acknowledged by " +
|
209
|
+
"some other process while I've been running. Cancelling acknowledgement creation"
|
206
210
|
next
|
207
211
|
end
|
208
212
|
|
209
|
-
# check again that the check is still
|
210
|
-
unless
|
211
|
-
"#{ec.entity_name}:#{ec.check}"
|
212
|
-
}.include?("#{entity_name}:#{check}")
|
213
|
+
# check again that the check is still failing
|
214
|
+
unless entity_check.failed?
|
213
215
|
# skip this one
|
214
|
-
@logger.warn "#{entity_name}:#{check} seems to have
|
215
|
-
"
|
216
|
+
@logger.warn "#{entity_name}:#{check} seems to have recovered " +
|
217
|
+
"while I've been running. Cancelling acknowledgement creation"
|
218
|
+
next
|
219
|
+
end
|
220
|
+
|
221
|
+
acknowledged = pagerduty_acknowledged?(options)
|
222
|
+
if acknowledged.nil?
|
223
|
+
@logger.debug "#{entity_name}:#{check} is not acknowledged in pagerduty, skipping"
|
216
224
|
next
|
217
225
|
end
|
218
226
|
|
@@ -237,14 +245,16 @@ module Flapjack
|
|
237
245
|
|
238
246
|
def pagerduty_acknowledged?(opts)
|
239
247
|
subdomain = opts['subdomain']
|
248
|
+
token = opts['token']
|
240
249
|
username = opts['username']
|
241
250
|
password = opts['password']
|
242
251
|
check = opts['check']
|
243
252
|
|
244
|
-
unless subdomain && username && password && check
|
253
|
+
unless subdomain && (token || (username && password)) && check
|
245
254
|
@logger.warn("pagerduty_acknowledged?: Unable to look for acknowledgements on pagerduty" +
|
246
|
-
" as
|
247
|
-
" subdomain (#{subdomain}),
|
255
|
+
" as the following options are required:" +
|
256
|
+
" subdomain (#{subdomain}), token (#{token}) or" +
|
257
|
+
" username (#{username}) and password (#{password}), check (#{check})")
|
248
258
|
return nil
|
249
259
|
end
|
250
260
|
|
@@ -257,7 +267,13 @@ module Flapjack
|
|
257
267
|
'incident_key' => check,
|
258
268
|
'status' => 'acknowledged' }
|
259
269
|
|
260
|
-
|
270
|
+
auth_header = if token && token.length > 0
|
271
|
+
"Token token=#{token}"
|
272
|
+
else
|
273
|
+
[username, password]
|
274
|
+
end
|
275
|
+
|
276
|
+
options = { :head => { 'authorization' => auth_header },
|
261
277
|
:query => query }
|
262
278
|
|
263
279
|
@logger.debug("pagerduty_acknowledged?: request to #{url}")
|
@@ -295,4 +311,3 @@ module Flapjack
|
|
295
311
|
end
|
296
312
|
|
297
313
|
end
|
298
|
-
|
@@ -289,8 +289,6 @@ module Flapjack
|
|
289
289
|
entity_check = get_entity_check(@entity, @check)
|
290
290
|
halt(404, "Could not find check '#{@entity}:#{@check}'") if entity_check.nil?
|
291
291
|
|
292
|
-
check_stats
|
293
|
-
|
294
292
|
last_change = entity_check.last_change
|
295
293
|
|
296
294
|
@check_state = entity_check.state
|
@@ -301,6 +299,14 @@ module Flapjack
|
|
301
299
|
@check_details = entity_check.details
|
302
300
|
@check_perfdata = entity_check.perfdata
|
303
301
|
|
302
|
+
@check_initial_failure_delay = entity_check.initial_failure_delay ||
|
303
|
+
Flapjack::DEFAULT_INITIAL_FAILURE_DELAY
|
304
|
+
@check_repeat_failure_delay = entity_check.repeat_failure_delay ||
|
305
|
+
Flapjack::DEFAULT_REPEAT_FAILURE_DELAY
|
306
|
+
|
307
|
+
@check_initial_failure_delay_is_default = entity_check.initial_failure_delay ? false : true
|
308
|
+
@check_repeat_failure_delay_is_default = entity_check.repeat_failure_delay ? false : true
|
309
|
+
|
304
310
|
@last_notifications = last_notification_data(entity_check)
|
305
311
|
|
306
312
|
@scheduled_maintenances = entity_check.maintenances(nil, nil, :scheduled => true)
|
@@ -278,6 +278,32 @@
|
|
278
278
|
</div><!-- col-md-6 -->
|
279
279
|
</div><!-- row -->
|
280
280
|
|
281
|
+
<div class="row">
|
282
|
+
<div id="failure-delays" class="col-md-6">
|
283
|
+
<a name="failure_delays"></a>
|
284
|
+
<h3>Failure Delays</h3>
|
285
|
+
<table class="table table-hover table-condensed">
|
286
|
+
|
287
|
+
<tr>
|
288
|
+
<td>Initial failure delay:</td>
|
289
|
+
<td><%= h(ChronicDuration.output(@check_initial_failure_delay, :keep_zero => true)) %>
|
290
|
+
<% if @check_initial_failure_delay_is_default %>
|
291
|
+
(default)
|
292
|
+
<% end %>
|
293
|
+
</td>
|
294
|
+
</tr>
|
295
|
+
<tr>
|
296
|
+
<td>Repeat failure delay:</td>
|
297
|
+
<td><%= h(ChronicDuration.output(@check_repeat_failure_delay, :keep_zero => true)) %>
|
298
|
+
<% if @check_repeat_failure_delay_is_default %>
|
299
|
+
(default)
|
300
|
+
<% end %>
|
301
|
+
</td>
|
302
|
+
</tr>
|
303
|
+
</table>
|
304
|
+
</div>
|
305
|
+
</div>
|
306
|
+
|
281
307
|
<div class="row">
|
282
308
|
<div id="scheduled-maintenance-periods" class="col-md-12">
|
283
309
|
<a name="scheduled_maintenance_periods"></a>
|
@@ -24,7 +24,9 @@
|
|
24
24
|
<td>PagerDuty</td>
|
25
25
|
<td>
|
26
26
|
<% @pagerduty_credentials.each_pair do |pk, pv| %>
|
27
|
-
|
27
|
+
<% unless pv.empty? %>
|
28
|
+
<p><%= 'password'.eql?(pk) ? h("#{pk}: ...") : h("#{pk}: #{pv}") %></p>
|
29
|
+
<% end %>
|
28
30
|
<% end %>
|
29
31
|
</td>
|
30
32
|
<td></td>
|
data/lib/flapjack/redis_pool.rb
CHANGED
@@ -32,6 +32,8 @@ module Flapjack
|
|
32
32
|
Flapjack::Data::Migration.refresh_archive_index(:redis => redis)
|
33
33
|
Flapjack::Data::Migration.validate_scheduled_maintenance_periods(:redis => redis,
|
34
34
|
:logger => logger)
|
35
|
+
Flapjack::Data::Migration.correct_rollup_including_disabled_checks(:redis => redis,
|
36
|
+
:logger => logger)
|
35
37
|
redis
|
36
38
|
}
|
37
39
|
end
|
data/lib/flapjack/version.rb
CHANGED
@@ -39,8 +39,9 @@ describe Flapjack::Data::Contact, :redis => true do
|
|
39
39
|
'pagerduty' => {
|
40
40
|
'service_key' => '123456789012345678901234',
|
41
41
|
'subdomain' => 'flpjck',
|
42
|
-
'
|
43
|
-
'
|
42
|
+
'token' => 'token123',
|
43
|
+
'username' => nil,
|
44
|
+
'password' => nil
|
44
45
|
},
|
45
46
|
},
|
46
47
|
},
|
@@ -276,23 +277,26 @@ describe Flapjack::Data::Contact, :redis => true do
|
|
276
277
|
expect(credentials).not_to be_nil
|
277
278
|
expect(credentials).to be_a(Hash)
|
278
279
|
expect(credentials).to eq({'service_key' => '123456789012345678901234',
|
279
|
-
|
280
|
-
|
281
|
-
|
280
|
+
'subdomain' => 'flpjck',
|
281
|
+
'token' => 'token123',
|
282
|
+
'username' => '',
|
283
|
+
'password' => ''})
|
282
284
|
end
|
283
285
|
|
284
286
|
it "sets pagerduty credentials for a contact" do
|
285
287
|
contact = Flapjack::Data::Contact.find_by_id('c362', :redis => @redis)
|
286
288
|
contact.set_pagerduty_credentials('service_key' => '567890123456789012345678',
|
287
289
|
'subdomain' => 'eggs',
|
288
|
-
'
|
289
|
-
'
|
290
|
+
'token' => 'token123',
|
291
|
+
'username' => 'mary',
|
292
|
+
'password' => 'mary_password')
|
290
293
|
|
291
294
|
expect(@redis.hget('contact_media:c362', 'pagerduty')).to eq('567890123456789012345678')
|
292
295
|
expect(@redis.hgetall('contact_pagerduty:c362')).to eq({
|
293
296
|
'subdomain' => 'eggs',
|
294
|
-
'
|
295
|
-
'
|
297
|
+
'token' => 'token123',
|
298
|
+
'username' => 'mary',
|
299
|
+
'password' => 'mary_password'
|
296
300
|
})
|
297
301
|
end
|
298
302
|
|
@@ -60,4 +60,45 @@ describe Flapjack::Data::Migration, :redis => true do
|
|
60
60
|
expect(rule.contact_id).to eq(contact.id)
|
61
61
|
end
|
62
62
|
|
63
|
+
it "removes a disabled check from a medium's alerting checks" do
|
64
|
+
contact = Flapjack::Data::Contact.add( {
|
65
|
+
'id' => 'c363_a-f@42%*',
|
66
|
+
'first_name' => 'Jane',
|
67
|
+
'last_name' => 'Janeley',
|
68
|
+
'email' => 'janej@example.com',
|
69
|
+
'media' => {
|
70
|
+
'email' => {
|
71
|
+
'address' => 'janej@example.com',
|
72
|
+
'interval' => 60,
|
73
|
+
'rollup_threshold' => 5,
|
74
|
+
},
|
75
|
+
},
|
76
|
+
},
|
77
|
+
:redis => @redis)
|
78
|
+
|
79
|
+
entity = Flapjack::Data::Entity.add({ 'id' => '5000',
|
80
|
+
'name' => 'abc-123',
|
81
|
+
'contacts' => ['c363_a-f@42%*'] },
|
82
|
+
:redis => @redis)
|
83
|
+
|
84
|
+
entity_check_ping = Flapjack::Data::EntityCheck.for_entity_name('abc-123', 'ping', :redis => @redis)
|
85
|
+
entity_check_ping.update_state('critical')
|
86
|
+
|
87
|
+
entity_check_ssh = Flapjack::Data::EntityCheck.for_entity_name('abc-123', 'ssh', :redis => @redis)
|
88
|
+
entity_check_ssh.update_state('critical')
|
89
|
+
|
90
|
+
contact.add_alerting_check_for_media('email', 'abc-123:ping')
|
91
|
+
contact.add_alerting_check_for_media('email', 'abc-123:ssh')
|
92
|
+
|
93
|
+
expect(contact.alerting_checks_for_media('email')).to eq(['abc-123:ping', 'abc-123:ssh'])
|
94
|
+
|
95
|
+
entity_check_ssh.disable!
|
96
|
+
|
97
|
+
expect(contact.alerting_checks_for_media('email')).to eq(['abc-123:ping', 'abc-123:ssh'])
|
98
|
+
|
99
|
+
Flapjack::Data::Migration.correct_rollup_including_disabled_checks(:redis => @redis)
|
100
|
+
|
101
|
+
expect(contact.alerting_checks_for_media('email')).to eq(['abc-123:ping'])
|
102
|
+
end
|
103
|
+
|
63
104
|
end
|
@@ -10,8 +10,9 @@ describe 'Flapjack::Gateways::JSONAPI::PagerdutyCredentialMethods', :sinatra =>
|
|
10
10
|
let(:pagerduty_credentials) {
|
11
11
|
{'service_key' => 'abc',
|
12
12
|
'subdomain' => 'def',
|
13
|
-
'
|
14
|
-
'
|
13
|
+
'token' => 'ghi',
|
14
|
+
'username' => 'mongoose',
|
15
|
+
'password' => 'mongoose_password'
|
15
16
|
}
|
16
17
|
}
|
17
18
|
|
@@ -32,7 +32,7 @@ describe Flapjack::Gateways::Pagerduty, :logger => true do
|
|
32
32
|
it "looks for acknowledgements if the search is not already running" do
|
33
33
|
expect(redis).to receive(:get).with('sem_pagerduty_acks_running').and_return(nil)
|
34
34
|
expect(redis).to receive(:set).with('sem_pagerduty_acks_running', 'true')
|
35
|
-
expect(redis).to receive(:expire).with('sem_pagerduty_acks_running',
|
35
|
+
expect(redis).to receive(:expire).with('sem_pagerduty_acks_running', 3600)
|
36
36
|
|
37
37
|
expect(redis).to receive(:del).with('sem_pagerduty_acks_running')
|
38
38
|
|
@@ -69,16 +69,56 @@ describe Flapjack::Gateways::Pagerduty, :logger => true do
|
|
69
69
|
stub_request(:get, "https://flpjck.pagerduty.com/api/v1/incidents?" +
|
70
70
|
"fields=incident_number,status,last_status_change_by&incident_key=#{check}&" +
|
71
71
|
"since=#{since}&status=acknowledged&until=#{unt}").
|
72
|
-
with(:headers => {'Authorization'=>
|
72
|
+
with(:headers => {'Authorization'=>'Token token=token123'}).
|
73
73
|
to_return(:status => 200, :body => response.to_json, :headers => {})
|
74
74
|
|
75
75
|
expect(Flapjack::RedisPool).to receive(:new).and_return(redis)
|
76
76
|
fp = Flapjack::Gateways::Pagerduty.new(:config => config, :logger => @logger)
|
77
77
|
|
78
|
+
EM.synchrony do
|
79
|
+
result = fp.send(:pagerduty_acknowledged?, 'subdomain' => 'flpjck', 'token' => 'token123',
|
80
|
+
'check' => check)
|
81
|
+
|
82
|
+
expect(result).to be_a(Hash)
|
83
|
+
expect(result).to have_key(:pg_acknowledged_by)
|
84
|
+
expect(result[:pg_acknowledged_by]).to be_a(Hash)
|
85
|
+
expect(result[:pg_acknowledged_by]).to have_key('id')
|
86
|
+
expect(result[:pg_acknowledged_by]['id']).to eq('ABCDEFG')
|
87
|
+
EM.stop
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
it "looks for acknowledgements via the PagerDuty API with basic auth" do
|
93
|
+
check = 'PING'
|
94
|
+
expect(Time).to receive(:now).and_return(time)
|
95
|
+
since = (time.utc - (60*60*24*7)).iso8601 # the last week
|
96
|
+
unt = (time.utc + (60*60*24)).iso8601 # 1 day in the future
|
97
|
+
|
98
|
+
response = {"incidents" =>
|
99
|
+
[{"incident_number" => 12,
|
100
|
+
"status" => "acknowledged",
|
101
|
+
"last_status_change_by" => {"id"=>"ABCDEFG", "name"=>"John Smith",
|
102
|
+
"email"=>"johns@example.com",
|
103
|
+
"html_url"=>"http://flpjck.pagerduty.com/users/ABCDEFG"}
|
104
|
+
}
|
105
|
+
],
|
106
|
+
"limit"=>100,
|
107
|
+
"offset"=>0,
|
108
|
+
"total"=>1}
|
109
|
+
|
110
|
+
stub_request(:get, "https://flpjck.pagerduty.com/api/v1/incidents?" +
|
111
|
+
"fields=incident_number,status,last_status_change_by&incident_key=#{check}&" +
|
112
|
+
"since=#{since}&status=acknowledged&until=#{unt}").
|
113
|
+
with(:headers => {'Authorization'=>['flapjack', 'password123']}).
|
114
|
+
to_return(:status => 200, :body => response.to_json, :headers => {})
|
115
|
+
|
116
|
+
expect(Flapjack::RedisPool).to receive(:new).and_return(redis)
|
117
|
+
fp = Flapjack::Gateways::Pagerduty.new(:config => config, :logger => @logger)
|
78
118
|
|
79
119
|
EM.synchrony do
|
80
|
-
result = fp.send(:pagerduty_acknowledged?, 'subdomain' => 'flpjck',
|
81
|
-
|
120
|
+
result = fp.send(:pagerduty_acknowledged?, 'subdomain' => 'flpjck',
|
121
|
+
'username' => 'flapjack', 'password' => 'password123', 'check' => check)
|
82
122
|
|
83
123
|
expect(result).to be_a(Hash)
|
84
124
|
expect(result).to have_key(:pg_acknowledged_by)
|
@@ -98,18 +138,19 @@ describe Flapjack::Gateways::Pagerduty, :logger => true do
|
|
98
138
|
expect(contact).to receive(:pagerduty_credentials).and_return({
|
99
139
|
'service_key' => '12345678',
|
100
140
|
'subdomain"' => 'flpjck',
|
101
|
-
'
|
102
|
-
'password' => 'password123'
|
141
|
+
'token' => 'token123'
|
103
142
|
})
|
104
143
|
|
105
144
|
entity_check = double('entity_check')
|
106
|
-
expect(entity_check).to receive(:check).exactly(
|
145
|
+
expect(entity_check).to receive(:check).exactly(1).times.and_return('PING')
|
107
146
|
expect(entity_check).to receive(:contacts).and_return([contact])
|
108
|
-
expect(entity_check).to receive(:entity_name).exactly(
|
147
|
+
expect(entity_check).to receive(:entity_name).exactly(1).times.and_return('foo-app-01.bar.net')
|
148
|
+
expect(entity_check).to receive(:in_unscheduled_maintenance?).exactly(1).times.and_return(false)
|
149
|
+
expect(entity_check).to receive(:failed?).exactly(1).times.and_return(true)
|
109
150
|
expect(Flapjack::Data::Event).to receive(:create_acknowledgement).with('foo-app-01.bar.net', 'PING',
|
110
151
|
:summary => 'Acknowledged on PagerDuty', :duration => 14400, :redis => redis)
|
111
152
|
|
112
|
-
expect(Flapjack::Data::EntityCheck).to receive(:unacknowledged_failing).exactly(
|
153
|
+
expect(Flapjack::Data::EntityCheck).to receive(:unacknowledged_failing).exactly(1).times.and_return([entity_check])
|
113
154
|
|
114
155
|
expect(fp).to receive(:pagerduty_acknowledged?).and_return({})
|
115
156
|
|
@@ -192,7 +233,6 @@ describe Flapjack::Gateways::Pagerduty, :logger => true do
|
|
192
233
|
|
193
234
|
it "does not look for acknowledgements if all required credentials are not present" do
|
194
235
|
creds = {'subdomain' => 'example',
|
195
|
-
'username' => 'sausage',
|
196
236
|
'check' => 'PING'}
|
197
237
|
|
198
238
|
expect(Flapjack::RedisPool).to receive(:new).and_return(redis)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
+
require 'chronic_duration'
|
2
3
|
|
3
4
|
describe 'web/views/check.html.erb', :erb_view => true do
|
4
5
|
|
@@ -8,6 +9,10 @@ describe 'web/views/check.html.erb', :erb_view => true do
|
|
8
9
|
@entity = 'abc-xyz-01'
|
9
10
|
@check = 'Disk / Utilisation'
|
10
11
|
@last_notifications = {}
|
12
|
+
@check_initial_failure_delay = 30
|
13
|
+
@check_repeat_failure_delay = 60
|
14
|
+
@check_initial_failure_delay_is_default = true
|
15
|
+
@check_repeat_failure_delay_is_default = true
|
11
16
|
|
12
17
|
page = render_erb('check.html.erb', binding)
|
13
18
|
expect(page).to match(%r{/abc-xyz-01/Disk%20%2F%20Utilisation})
|
@@ -175,7 +175,6 @@ describe Flapjack::Gateways::Web, :sinatra => true, :logger => true do
|
|
175
175
|
:recovery => {:timestamp => time.to_i - (3 * 60 * 60), :summary => nil},
|
176
176
|
:acknowledgement => {:timestamp => nil, :summary => nil} }
|
177
177
|
|
178
|
-
expect_check_stats
|
179
178
|
expect(entity_check).to receive(:state).and_return('ok')
|
180
179
|
expect(entity_check).to receive(:last_update).and_return(time.to_i - (3 * 60 * 60))
|
181
180
|
expect(entity_check).to receive(:last_change).and_return(time.to_i - (3 * 60 * 60))
|
@@ -191,6 +190,8 @@ describe Flapjack::Gateways::Web, :sinatra => true, :logger => true do
|
|
191
190
|
expect(entity_check).to receive(:historical_states).
|
192
191
|
with(nil, time.to_i, :order => 'desc', :limit => 20).and_return([])
|
193
192
|
expect(entity_check).to receive(:enabled?).and_return(true)
|
193
|
+
expect(entity_check).to receive(:initial_failure_delay).exactly(2).times.and_return(30)
|
194
|
+
expect(entity_check).to receive(:repeat_failure_delay).exactly(2).times.and_return(60)
|
194
195
|
|
195
196
|
expect(Flapjack::Data::Entity).to receive(:find_by_name).
|
196
197
|
with(entity_name, :redis => redis).and_return(entity)
|
@@ -18,6 +18,7 @@ describe Flapjack::RedisPool do
|
|
18
18
|
expect(Flapjack::Data::Migration).to receive(:clear_orphaned_entity_ids).exactly(redis_count).times
|
19
19
|
expect(Flapjack::Data::Migration).to receive(:refresh_archive_index).exactly(redis_count).times
|
20
20
|
expect(Flapjack::Data::Migration).to receive(:validate_scheduled_maintenance_periods).exactly(redis_count).times
|
21
|
+
expect(Flapjack::Data::Migration).to receive(:correct_rollup_including_disabled_checks).exactly(redis_count).times
|
21
22
|
|
22
23
|
frp = Flapjack::RedisPool.new(:size => redis_count)
|
23
24
|
|
@@ -18,8 +18,8 @@
|
|
18
18
|
"body": [
|
19
19
|
{
|
20
20
|
"op": "replace",
|
21
|
-
"path": "/pagerduty_credentials/0/
|
22
|
-
"value": "
|
21
|
+
"path": "/pagerduty_credentials/0/token",
|
22
|
+
"value": "token123"
|
23
23
|
}
|
24
24
|
]
|
25
25
|
},
|
@@ -47,8 +47,8 @@
|
|
47
47
|
"body": [
|
48
48
|
{
|
49
49
|
"op": "replace",
|
50
|
-
"path": "/pagerduty_credentials/0/
|
51
|
-
"value": "
|
50
|
+
"path": "/pagerduty_credentials/0/token",
|
51
|
+
"value": "token123"
|
52
52
|
}
|
53
53
|
]
|
54
54
|
},
|
@@ -71,8 +71,8 @@
|
|
71
71
|
"body": [
|
72
72
|
{
|
73
73
|
"op": "replace",
|
74
|
-
"path": "/pagerduty_credentials/0/
|
75
|
-
"value": "
|
74
|
+
"path": "/pagerduty_credentials/0/token",
|
75
|
+
"value": "token123"
|
76
76
|
}
|
77
77
|
]
|
78
78
|
},
|
@@ -147,8 +147,7 @@
|
|
147
147
|
{
|
148
148
|
"service_key": "abc",
|
149
149
|
"subdomain": "def",
|
150
|
-
"
|
151
|
-
"password": "jkl"
|
150
|
+
"token": "ghi"
|
152
151
|
}
|
153
152
|
]
|
154
153
|
}
|
@@ -177,8 +176,7 @@
|
|
177
176
|
{
|
178
177
|
"service_key": "abc",
|
179
178
|
"subdomain": "def",
|
180
|
-
"
|
181
|
-
"password": "jkl"
|
179
|
+
"token": "ghi"
|
182
180
|
}
|
183
181
|
]
|
184
182
|
}
|
@@ -212,14 +210,12 @@
|
|
212
210
|
{
|
213
211
|
"service_key": "abc",
|
214
212
|
"subdomain": "def",
|
215
|
-
"
|
216
|
-
"password": "jkl"
|
213
|
+
"token": "ghi"
|
217
214
|
},
|
218
215
|
{
|
219
216
|
"service_key": "mno",
|
220
217
|
"subdomain": "pqr",
|
221
|
-
"
|
222
|
-
"password": "vwx"
|
218
|
+
"token": "stu"
|
223
219
|
}
|
224
220
|
]
|
225
221
|
}
|
@@ -261,8 +257,7 @@
|
|
261
257
|
{
|
262
258
|
"service_key": "abc",
|
263
259
|
"subdomain": "def",
|
264
|
-
"
|
265
|
-
"password": "jkl"
|
260
|
+
"token": "ghi"
|
266
261
|
}
|
267
262
|
]
|
268
263
|
}
|
@@ -285,8 +280,7 @@
|
|
285
280
|
{
|
286
281
|
"service_key": "abc",
|
287
282
|
"subdomain": "def",
|
288
|
-
"
|
289
|
-
"password": "jkl"
|
283
|
+
"token": "ghi"
|
290
284
|
}
|
291
285
|
]
|
292
286
|
}
|
@@ -301,8 +301,7 @@ Pact.provider_states_for "flapjack-diner" do
|
|
301
301
|
pdc_data = {
|
302
302
|
'service_key' => 'abc',
|
303
303
|
'subdomain' => 'def',
|
304
|
-
'
|
305
|
-
'password' => 'jkl',
|
304
|
+
'token' => 'ghi',
|
306
305
|
}
|
307
306
|
contact.set_pagerduty_credentials(pdc_data)
|
308
307
|
end
|
@@ -333,15 +332,13 @@ Pact.provider_states_for "flapjack-diner" do
|
|
333
332
|
pdc_data = {
|
334
333
|
'service_key' => 'abc',
|
335
334
|
'subdomain' => 'def',
|
336
|
-
'
|
337
|
-
'password' => 'jkl',
|
335
|
+
'token' => 'ghi',
|
338
336
|
}
|
339
337
|
contact.set_pagerduty_credentials(pdc_data)
|
340
338
|
pdc_data_2 = {
|
341
339
|
'service_key' => 'mno',
|
342
340
|
'subdomain' => 'pqr',
|
343
|
-
'
|
344
|
-
'password' => 'vwx',
|
341
|
+
'token' => 'stu',
|
345
342
|
}
|
346
343
|
contact_2.set_pagerduty_credentials(pdc_data_2)
|
347
344
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: flapjack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lindsay Holmwood
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2015-
|
14
|
+
date: 2015-05-13 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: dante
|
@@ -635,9 +635,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
635
635
|
version: '0'
|
636
636
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
637
637
|
requirements:
|
638
|
-
- - "
|
638
|
+
- - ">"
|
639
639
|
- !ruby/object:Gem::Version
|
640
|
-
version:
|
640
|
+
version: 1.3.1
|
641
641
|
requirements: []
|
642
642
|
rubyforge_project:
|
643
643
|
rubygems_version: 2.4.5
|