flapjack 1.5.0 → 1.6.0rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/features/rollup.feature +46 -0
- data/features/steps/events_steps.rb +10 -0
- data/lib/flapjack/data/contact.rb +18 -10
- data/lib/flapjack/data/entity.rb +1 -1
- data/lib/flapjack/data/entity_check.rb +0 -1
- data/lib/flapjack/data/migration.rb +14 -0
- data/lib/flapjack/data/notification.rb +3 -3
- data/lib/flapjack/gateways/jsonapi/pagerduty_credential_methods.rb +4 -1
- data/lib/flapjack/gateways/pagerduty.rb +31 -16
- data/lib/flapjack/gateways/web.rb +8 -2
- data/lib/flapjack/gateways/web/views/check.html.erb +26 -0
- data/lib/flapjack/gateways/web/views/contact.html.erb +3 -1
- data/lib/flapjack/redis_pool.rb +2 -0
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/data/contact_spec.rb +13 -9
- data/spec/lib/flapjack/data/migration_spec.rb +41 -0
- data/spec/lib/flapjack/gateways/jsonapi/pagerduty_credential_methods_spec.rb +3 -2
- data/spec/lib/flapjack/gateways/pagerduty_spec.rb +50 -10
- data/spec/lib/flapjack/gateways/web/views/check.html.erb_spec.rb +5 -0
- data/spec/lib/flapjack/gateways/web_spec.rb +2 -1
- data/spec/lib/flapjack/redis_pool_spec.rb +1 -0
- data/spec/service_consumers/pacts/flapjack-diner_v1.0.json +12 -18
- data/spec/service_consumers/provider_states_for_flapjack-diner.rb +3 -6
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a5aa06a9caf7b8c5b81d7b5b5245f8b623cda1b
|
4
|
+
data.tar.gz: 5d10f49f09e6ab9e87da21537d80add6409708a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4d57fdb2ee84f2a56118808d390fcea5087ca7b68c36c4588d8a352d490d98cccba536509606189524515800fd0b559a6b30dfeb2ab7ccc552a035dad5cd9a5
|
7
|
+
data.tar.gz: fe92aeefa1f66b21d690d7902f775b175a1d95fc09ae601107bd8aad5fbcfe28d784be65f561387b7c5192c87f45b4d9b52e8457cd935b226606c14f37b73dcf
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
## Flapjack Changelog
|
2
2
|
|
3
|
+
# 1.6.0 - 2015-05-13
|
4
|
+
- Feature: use token authentication for pagerduty gateway #831 (@alperkokmen)
|
5
|
+
- Feature: expose failure delays in web UI #849 (@jessereynolds)
|
6
|
+
- Bug: performance improvement - fix usage of KEYS command for entity check names c57d3a5 (@ali-graham)
|
7
|
+
- Bug: remove disabled checks from rollup calculations #843 (@ali-graham)
|
8
|
+
- Bug: fall back to basic auth for pagerduty incidents api #853 (@jessereynolds)
|
9
|
+
- Chore: pagerduty ack retrieval improvements #858 (@jessereynolds)
|
10
|
+
|
3
11
|
# 1.5.0 - 2015-03-31
|
4
12
|
- No changes
|
5
13
|
|
data/features/rollup.feature
CHANGED
@@ -175,6 +175,52 @@ Feature: Rollup on a per contact, per media basis
|
|
175
175
|
Then 2 sms alerts should be queued for +61400000001
|
176
176
|
And 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
177
177
|
|
178
|
+
@time
|
179
|
+
Scenario: Disabling a failing check suppresses rollup
|
180
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
181
|
+
And check 'ping' for entity 'baz' is in an ok state
|
182
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
183
|
+
And 1 minute passes
|
184
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
185
|
+
Then 1 sms alert should be queued for +61400000001
|
186
|
+
Then 1 sms alerts of type problem and rollup none should be queued for +61400000001
|
187
|
+
When 5 minutes passes
|
188
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
189
|
+
And 1 minute passes
|
190
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
191
|
+
Then 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
192
|
+
And 2 sms alerts should be queued for +61400000001
|
193
|
+
When check 'ping' on entity 'foo' is disabled
|
194
|
+
And 30 minutes passes
|
195
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
196
|
+
Then 1 sms alert of rollup recovery should be queued for +61400000001
|
197
|
+
|
198
|
+
@time
|
199
|
+
Scenario: Enabling a failing check promotes rollup
|
200
|
+
Given check 'ping' for entity 'foo' is in an ok state
|
201
|
+
And check 'ping' for entity 'baz' is in an ok state
|
202
|
+
When a critical event is received for check 'ping' on entity 'foo'
|
203
|
+
And 1 minute passes
|
204
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
205
|
+
Then 1 sms alert should be queued for +61400000001
|
206
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
207
|
+
When check 'ping' for entity 'foo' is disabled
|
208
|
+
And 5 minutes passes
|
209
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
210
|
+
And 1 minute passes
|
211
|
+
And a critical event is received for check 'ping' on entity 'baz'
|
212
|
+
Then 2 sms alerts should be queued for +61400000001
|
213
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
214
|
+
Then 1 sms alert of type problem and rollup recovery should be queued for +61400000001
|
215
|
+
When 1 hour passes
|
216
|
+
And check 'ping' on entity 'foo' is enabled
|
217
|
+
And 5 minutes passes
|
218
|
+
And a critical event is received for check 'ping' on entity 'foo'
|
219
|
+
Then 3 sms alerts should be queued for +61400000001
|
220
|
+
Then 1 sms alert of type problem and rollup none should be queued for +61400000001
|
221
|
+
Then 1 sms alert of type problem and rollup recovery should be queued for +61400000001
|
222
|
+
And 1 sms alert of type problem and rollup problem should be queued for +61400000001
|
223
|
+
|
178
224
|
@time
|
179
225
|
Scenario: Contact ceases to be a contact on an entity that they were being alerted for
|
180
226
|
Given check 'ping' for entity 'foo' is in an ok state
|
@@ -279,6 +279,16 @@ When /^the unscheduled maintenance is ended(?: for check '([\w\.\-]+)' on entity
|
|
279
279
|
end_unscheduled_maintenance(entity, check)
|
280
280
|
end
|
281
281
|
|
282
|
+
When /^check '([\w\.\-]+)' (?:for|on) entity '([\w\.\-]+)' is (dis|en)abled$/ do |check, entity, dis_en|
|
283
|
+
entity_check = Flapjack::Data::EntityCheck.for_entity_name(entity, check, :redis => @redis)
|
284
|
+
case dis_en
|
285
|
+
when 'dis'
|
286
|
+
entity_check.disable!
|
287
|
+
when 'en'
|
288
|
+
entity_check.enable!
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
282
292
|
# TODO logging is a side-effect, should test for notification generation itself
|
283
293
|
Then /^a notification should not be generated(?: for check '([\w\.\-]+)' on entity '([\w\.\-]+)')?$/ do |check, entity|
|
284
294
|
check ||= @check
|
@@ -160,7 +160,7 @@ module Flapjack
|
|
160
160
|
def set_pagerduty_credentials(details)
|
161
161
|
@redis.hset("contact_media:#{self.id}", 'pagerduty', details['service_key'])
|
162
162
|
@redis.hmset("contact_pagerduty:#{self.id}",
|
163
|
-
*['subdomain', 'username', 'password'].collect {|f| [f, details[f]]})
|
163
|
+
*['subdomain', 'token', 'username', 'password'].collect {|f| [f, details[f]]})
|
164
164
|
end
|
165
165
|
|
166
166
|
def delete_pagerduty_credentials
|
@@ -387,25 +387,29 @@ module Flapjack
|
|
387
387
|
end
|
388
388
|
end
|
389
389
|
|
390
|
-
def add_alerting_check_for_media(media,
|
391
|
-
@redis.zadd("contact_alerting_checks:#{self.id}:media:#{media}", Time.now.to_i,
|
390
|
+
def add_alerting_check_for_media(media, event_id)
|
391
|
+
@redis.zadd("contact_alerting_checks:#{self.id}:media:#{media}", Time.now.to_i, event_id)
|
392
392
|
end
|
393
393
|
|
394
394
|
def remove_alerting_check_for_media(media, check)
|
395
395
|
@redis.zrem("contact_alerting_checks:#{self.id}:media:#{media}", check)
|
396
396
|
end
|
397
397
|
|
398
|
-
# removes any checks that are in ok, scheduled or unscheduled maintenance
|
399
|
-
# from the alerting checks set for the given media
|
400
|
-
# returns the
|
398
|
+
# removes any checks that are in ok, scheduled or unscheduled maintenance,
|
399
|
+
# or are disabled from the alerting checks set for the given media;
|
400
|
+
# returns whether this cleaning moved the medium from rollup to recovery
|
401
401
|
def clean_alerting_checks_for_media(media)
|
402
|
-
key = "contact_alerting_checks:#{self.id}:media:#{media}"
|
403
402
|
cleaned = 0
|
404
|
-
|
403
|
+
|
404
|
+
alerting_checks = alerting_checks_for_media(media)
|
405
|
+
rollup_threshold = rollup_threshold_for_media(media)
|
406
|
+
|
407
|
+
alerting_checks.each do |check|
|
405
408
|
entity_check = Flapjack::Data::EntityCheck.for_event_id(check, :redis => @redis)
|
406
409
|
next unless Flapjack::Data::EntityCheck.state_for_event_id?(check, :redis => @redis) == 'ok' ||
|
407
410
|
Flapjack::Data::EntityCheck.in_unscheduled_maintenance_for_event_id?(check, :redis => @redis) ||
|
408
411
|
Flapjack::Data::EntityCheck.in_scheduled_maintenance_for_event_id?(check, :redis => @redis) ||
|
412
|
+
!entity_check.enabled? ||
|
409
413
|
!entity_check.contacts.map {|c| c.id}.include?(self.id)
|
410
414
|
|
411
415
|
# FIXME: why can't i get this logging when called from notifier (notification.rb)?
|
@@ -413,7 +417,11 @@ module Flapjack
|
|
413
417
|
remove_alerting_check_for_media(media, check)
|
414
418
|
cleaned += 1
|
415
419
|
end
|
416
|
-
|
420
|
+
|
421
|
+
return false if rollup_threshold.nil? || (rollup_threshold <= 0) ||
|
422
|
+
(alerting_checks.size < rollup_threshold)
|
423
|
+
|
424
|
+
return(cleaned > (alerting_checks.size - rollup_threshold))
|
417
425
|
end
|
418
426
|
|
419
427
|
def alerting_checks_for_media(media)
|
@@ -498,7 +506,7 @@ module Flapjack
|
|
498
506
|
when 'pagerduty'
|
499
507
|
redis.hset("contact_media:#{contact_id}", medium, details['service_key'])
|
500
508
|
redis.hmset("contact_pagerduty:#{contact_id}",
|
501
|
-
*['subdomain', 'username', 'password'].collect {|f| [f, details[f]]})
|
509
|
+
*['subdomain', 'token', 'username', 'password'].collect {|f| [f, details[f]]})
|
502
510
|
else
|
503
511
|
redis.hset("contact_media:#{contact_id}", medium, details['address'])
|
504
512
|
redis.hset("contact_media_intervals:#{contact_id}", medium, details['interval']) if details['interval']
|
data/lib/flapjack/data/entity.rb
CHANGED
@@ -605,7 +605,7 @@ module Flapjack
|
|
605
605
|
entity_name = redis.hget('all_entity_names_by_id', entity_id)
|
606
606
|
next memo if entity_name.nil? || entity_name.empty?
|
607
607
|
en = Regexp.escape(entity_name)
|
608
|
-
check_names = redis.
|
608
|
+
check_names = redis.zrange("all_checks:#{entity_name}", 0, -1) |
|
609
609
|
Flapjack::Data::EntityCheck.find_current_names_for_entity_name(entity_name, :redis => redis)
|
610
610
|
memo[entity_id] = check_names.map {|cn| "#{entity_name}:#{cn}"}
|
611
611
|
memo
|
@@ -681,7 +681,6 @@ module Flapjack
|
|
681
681
|
@redis.zadd("all_checks:#{entity_name}", timestamp, check)
|
682
682
|
@redis.zrem("current_checks:#{entity_name}", check)
|
683
683
|
if @redis.zcount("current_checks:#{entity_name}", '-inf', '+inf') == 0
|
684
|
-
@redis.zrem("current_checks:#{entity_name}", check)
|
685
684
|
@redis.zrem("current_entities", entity.name)
|
686
685
|
end
|
687
686
|
end
|
@@ -215,6 +215,20 @@ module Flapjack
|
|
215
215
|
redis.set('validated_scheduled_maintenance_periods', 'true')
|
216
216
|
end
|
217
217
|
|
218
|
+
def self.correct_rollup_including_disabled_checks(options = {})
|
219
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
220
|
+
logger = options[:logger]
|
221
|
+
return if redis.exists('corrected_rollup_including_disabled_checks')
|
222
|
+
|
223
|
+
Flapjack::Data::Contact.all(:redis => redis).each do |contact|
|
224
|
+
contact.media_list.each do |medium|
|
225
|
+
contact.clean_alerting_checks_for_media(medium)
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
logger.warn "Corrected rollup to no longer include disabled checks" unless logger.nil?
|
230
|
+
redis.set('corrected_rollup_including_disabled_checks', 'true')
|
231
|
+
end
|
218
232
|
end
|
219
233
|
end
|
220
234
|
end
|
@@ -221,8 +221,8 @@ module Flapjack
|
|
221
221
|
contact.add_alerting_check_for_media(media, @event_id) unless ok? || acknowledgement? || test?
|
222
222
|
|
223
223
|
# expunge checks in (un)scheduled maintenance from the alerting set
|
224
|
-
|
225
|
-
logger.debug("cleaned alerting checks for #{media}: #{
|
224
|
+
recovered = contact.clean_alerting_checks_for_media(media)
|
225
|
+
logger.debug("cleaned alerting checks for #{media}: recovered? #{recovered}")
|
226
226
|
|
227
227
|
# pagerduty is an example of a medium which should never be rolled up
|
228
228
|
unless ['pagerduty'].include?(media)
|
@@ -236,7 +236,7 @@ module Flapjack
|
|
236
236
|
next ret if contact.drop_rollup_notifications_for_media?(media)
|
237
237
|
contact.update_sent_rollup_alert_keys_for_media(media, :delete => ok?)
|
238
238
|
rollup_type = 'problem'
|
239
|
-
when
|
239
|
+
when recovered
|
240
240
|
# alerting checks was just cleaned such that it is now below the rollup threshold
|
241
241
|
contact.update_sent_rollup_alert_keys_for_media(media, :delete => true)
|
242
242
|
rollup_type = 'recovery'
|
@@ -30,7 +30,7 @@ module Flapjack
|
|
30
30
|
halt err(422, "No valid pagerduty credentials were submitted")
|
31
31
|
end
|
32
32
|
|
33
|
-
fields = ['service_key', 'subdomain', 'username', 'password']
|
33
|
+
fields = ['service_key', 'subdomain', 'token', 'username', 'password']
|
34
34
|
|
35
35
|
pagerduty_credential = pagerduty_credentials_data.last
|
36
36
|
|
@@ -102,6 +102,9 @@ module Flapjack
|
|
102
102
|
when 'subdomain'
|
103
103
|
pd_data['subdomain'] = value
|
104
104
|
contact.set_pagerduty_credentials(pd_data)
|
105
|
+
when 'token'
|
106
|
+
pd_data['token'] = value
|
107
|
+
contact.set_pagerduty_credentials(pd_data)
|
105
108
|
when 'username'
|
106
109
|
pd_data['username'] = value
|
107
110
|
contact.set_pagerduty_credentials(pd_data)
|
@@ -16,6 +16,7 @@ module Flapjack
|
|
16
16
|
class Pagerduty
|
17
17
|
PAGERDUTY_EVENTS_API_URL = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
|
18
18
|
SEM_PAGERDUTY_ACKS_RUNNING = 'sem_pagerduty_acks_running'
|
19
|
+
SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT = 3600
|
19
20
|
|
20
21
|
include Flapjack::Utility
|
21
22
|
|
@@ -129,7 +130,8 @@ module Flapjack
|
|
129
130
|
# ensure we're the only instance of the pagerduty acknowledgement check running (with a naive
|
130
131
|
# timeout of five minutes to guard against stale locks caused by crashing code) either in this
|
131
132
|
# process or in other processes
|
132
|
-
if (@pagerduty_acks_started and @pagerduty_acks_started >
|
133
|
+
if (@pagerduty_acks_started and @pagerduty_acks_started >
|
134
|
+
(Time.now.to_i - SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT)) or
|
133
135
|
@redis.get(SEM_PAGERDUTY_ACKS_RUNNING) == 'true'
|
134
136
|
@logger.debug("skipping looking for acks in pagerduty as this is already happening")
|
135
137
|
return
|
@@ -137,7 +139,7 @@ module Flapjack
|
|
137
139
|
|
138
140
|
@pagerduty_acks_started = Time.now.to_i
|
139
141
|
@redis.set(SEM_PAGERDUTY_ACKS_RUNNING, 'true')
|
140
|
-
@redis.expire(SEM_PAGERDUTY_ACKS_RUNNING,
|
142
|
+
@redis.expire(SEM_PAGERDUTY_ACKS_RUNNING, SEM_PAGERDUTY_ACKS_RUNNING_TIMEOUT)
|
141
143
|
|
142
144
|
find_pagerduty_acknowledgements
|
143
145
|
|
@@ -200,19 +202,25 @@ module Flapjack
|
|
200
202
|
# FIXME: try each set of credentials until one works (may have stale contacts turning up)
|
201
203
|
options = ec_credentials.first.merge('check' => "#{entity_name}:#{check}")
|
202
204
|
|
203
|
-
|
204
|
-
if
|
205
|
-
|
205
|
+
# check again that the check is still unacknowledged
|
206
|
+
if entity_check.in_unscheduled_maintenance?
|
207
|
+
# skip this one
|
208
|
+
@logger.warn "#{entity_name}:#{check} seems to have been acknowledged by " +
|
209
|
+
"some other process while I've been running. Cancelling acknowledgement creation"
|
206
210
|
next
|
207
211
|
end
|
208
212
|
|
209
|
-
# check again that the check is still
|
210
|
-
unless
|
211
|
-
"#{ec.entity_name}:#{ec.check}"
|
212
|
-
}.include?("#{entity_name}:#{check}")
|
213
|
+
# check again that the check is still failing
|
214
|
+
unless entity_check.failed?
|
213
215
|
# skip this one
|
214
|
-
@logger.warn "#{entity_name}:#{check} seems to have
|
215
|
-
"
|
216
|
+
@logger.warn "#{entity_name}:#{check} seems to have recovered " +
|
217
|
+
"while I've been running. Cancelling acknowledgement creation"
|
218
|
+
next
|
219
|
+
end
|
220
|
+
|
221
|
+
acknowledged = pagerduty_acknowledged?(options)
|
222
|
+
if acknowledged.nil?
|
223
|
+
@logger.debug "#{entity_name}:#{check} is not acknowledged in pagerduty, skipping"
|
216
224
|
next
|
217
225
|
end
|
218
226
|
|
@@ -237,14 +245,16 @@ module Flapjack
|
|
237
245
|
|
238
246
|
def pagerduty_acknowledged?(opts)
|
239
247
|
subdomain = opts['subdomain']
|
248
|
+
token = opts['token']
|
240
249
|
username = opts['username']
|
241
250
|
password = opts['password']
|
242
251
|
check = opts['check']
|
243
252
|
|
244
|
-
unless subdomain && username && password && check
|
253
|
+
unless subdomain && (token || (username && password)) && check
|
245
254
|
@logger.warn("pagerduty_acknowledged?: Unable to look for acknowledgements on pagerduty" +
|
246
|
-
" as
|
247
|
-
" subdomain (#{subdomain}),
|
255
|
+
" as the following options are required:" +
|
256
|
+
" subdomain (#{subdomain}), token (#{token}) or" +
|
257
|
+
" username (#{username}) and password (#{password}), check (#{check})")
|
248
258
|
return nil
|
249
259
|
end
|
250
260
|
|
@@ -257,7 +267,13 @@ module Flapjack
|
|
257
267
|
'incident_key' => check,
|
258
268
|
'status' => 'acknowledged' }
|
259
269
|
|
260
|
-
|
270
|
+
auth_header = if token && token.length > 0
|
271
|
+
"Token token=#{token}"
|
272
|
+
else
|
273
|
+
[username, password]
|
274
|
+
end
|
275
|
+
|
276
|
+
options = { :head => { 'authorization' => auth_header },
|
261
277
|
:query => query }
|
262
278
|
|
263
279
|
@logger.debug("pagerduty_acknowledged?: request to #{url}")
|
@@ -295,4 +311,3 @@ module Flapjack
|
|
295
311
|
end
|
296
312
|
|
297
313
|
end
|
298
|
-
|
@@ -289,8 +289,6 @@ module Flapjack
|
|
289
289
|
entity_check = get_entity_check(@entity, @check)
|
290
290
|
halt(404, "Could not find check '#{@entity}:#{@check}'") if entity_check.nil?
|
291
291
|
|
292
|
-
check_stats
|
293
|
-
|
294
292
|
last_change = entity_check.last_change
|
295
293
|
|
296
294
|
@check_state = entity_check.state
|
@@ -301,6 +299,14 @@ module Flapjack
|
|
301
299
|
@check_details = entity_check.details
|
302
300
|
@check_perfdata = entity_check.perfdata
|
303
301
|
|
302
|
+
@check_initial_failure_delay = entity_check.initial_failure_delay ||
|
303
|
+
Flapjack::DEFAULT_INITIAL_FAILURE_DELAY
|
304
|
+
@check_repeat_failure_delay = entity_check.repeat_failure_delay ||
|
305
|
+
Flapjack::DEFAULT_REPEAT_FAILURE_DELAY
|
306
|
+
|
307
|
+
@check_initial_failure_delay_is_default = entity_check.initial_failure_delay ? false : true
|
308
|
+
@check_repeat_failure_delay_is_default = entity_check.repeat_failure_delay ? false : true
|
309
|
+
|
304
310
|
@last_notifications = last_notification_data(entity_check)
|
305
311
|
|
306
312
|
@scheduled_maintenances = entity_check.maintenances(nil, nil, :scheduled => true)
|
@@ -278,6 +278,32 @@
|
|
278
278
|
</div><!-- col-md-6 -->
|
279
279
|
</div><!-- row -->
|
280
280
|
|
281
|
+
<div class="row">
|
282
|
+
<div id="failure-delays" class="col-md-6">
|
283
|
+
<a name="failure_delays"></a>
|
284
|
+
<h3>Failure Delays</h3>
|
285
|
+
<table class="table table-hover table-condensed">
|
286
|
+
|
287
|
+
<tr>
|
288
|
+
<td>Initial failure delay:</td>
|
289
|
+
<td><%= h(ChronicDuration.output(@check_initial_failure_delay, :keep_zero => true)) %>
|
290
|
+
<% if @check_initial_failure_delay_is_default %>
|
291
|
+
(default)
|
292
|
+
<% end %>
|
293
|
+
</td>
|
294
|
+
</tr>
|
295
|
+
<tr>
|
296
|
+
<td>Repeat failure delay:</td>
|
297
|
+
<td><%= h(ChronicDuration.output(@check_repeat_failure_delay, :keep_zero => true)) %>
|
298
|
+
<% if @check_repeat_failure_delay_is_default %>
|
299
|
+
(default)
|
300
|
+
<% end %>
|
301
|
+
</td>
|
302
|
+
</tr>
|
303
|
+
</table>
|
304
|
+
</div>
|
305
|
+
</div>
|
306
|
+
|
281
307
|
<div class="row">
|
282
308
|
<div id="scheduled-maintenance-periods" class="col-md-12">
|
283
309
|
<a name="scheduled_maintenance_periods"></a>
|
@@ -24,7 +24,9 @@
|
|
24
24
|
<td>PagerDuty</td>
|
25
25
|
<td>
|
26
26
|
<% @pagerduty_credentials.each_pair do |pk, pv| %>
|
27
|
-
|
27
|
+
<% unless pv.empty? %>
|
28
|
+
<p><%= 'password'.eql?(pk) ? h("#{pk}: ...") : h("#{pk}: #{pv}") %></p>
|
29
|
+
<% end %>
|
28
30
|
<% end %>
|
29
31
|
</td>
|
30
32
|
<td></td>
|
data/lib/flapjack/redis_pool.rb
CHANGED
@@ -32,6 +32,8 @@ module Flapjack
|
|
32
32
|
Flapjack::Data::Migration.refresh_archive_index(:redis => redis)
|
33
33
|
Flapjack::Data::Migration.validate_scheduled_maintenance_periods(:redis => redis,
|
34
34
|
:logger => logger)
|
35
|
+
Flapjack::Data::Migration.correct_rollup_including_disabled_checks(:redis => redis,
|
36
|
+
:logger => logger)
|
35
37
|
redis
|
36
38
|
}
|
37
39
|
end
|
data/lib/flapjack/version.rb
CHANGED
@@ -39,8 +39,9 @@ describe Flapjack::Data::Contact, :redis => true do
|
|
39
39
|
'pagerduty' => {
|
40
40
|
'service_key' => '123456789012345678901234',
|
41
41
|
'subdomain' => 'flpjck',
|
42
|
-
'
|
43
|
-
'
|
42
|
+
'token' => 'token123',
|
43
|
+
'username' => nil,
|
44
|
+
'password' => nil
|
44
45
|
},
|
45
46
|
},
|
46
47
|
},
|
@@ -276,23 +277,26 @@ describe Flapjack::Data::Contact, :redis => true do
|
|
276
277
|
expect(credentials).not_to be_nil
|
277
278
|
expect(credentials).to be_a(Hash)
|
278
279
|
expect(credentials).to eq({'service_key' => '123456789012345678901234',
|
279
|
-
|
280
|
-
|
281
|
-
|
280
|
+
'subdomain' => 'flpjck',
|
281
|
+
'token' => 'token123',
|
282
|
+
'username' => '',
|
283
|
+
'password' => ''})
|
282
284
|
end
|
283
285
|
|
284
286
|
it "sets pagerduty credentials for a contact" do
|
285
287
|
contact = Flapjack::Data::Contact.find_by_id('c362', :redis => @redis)
|
286
288
|
contact.set_pagerduty_credentials('service_key' => '567890123456789012345678',
|
287
289
|
'subdomain' => 'eggs',
|
288
|
-
'
|
289
|
-
'
|
290
|
+
'token' => 'token123',
|
291
|
+
'username' => 'mary',
|
292
|
+
'password' => 'mary_password')
|
290
293
|
|
291
294
|
expect(@redis.hget('contact_media:c362', 'pagerduty')).to eq('567890123456789012345678')
|
292
295
|
expect(@redis.hgetall('contact_pagerduty:c362')).to eq({
|
293
296
|
'subdomain' => 'eggs',
|
294
|
-
'
|
295
|
-
'
|
297
|
+
'token' => 'token123',
|
298
|
+
'username' => 'mary',
|
299
|
+
'password' => 'mary_password'
|
296
300
|
})
|
297
301
|
end
|
298
302
|
|
@@ -60,4 +60,45 @@ describe Flapjack::Data::Migration, :redis => true do
|
|
60
60
|
expect(rule.contact_id).to eq(contact.id)
|
61
61
|
end
|
62
62
|
|
63
|
+
it "removes a disabled check from a medium's alerting checks" do
|
64
|
+
contact = Flapjack::Data::Contact.add( {
|
65
|
+
'id' => 'c363_a-f@42%*',
|
66
|
+
'first_name' => 'Jane',
|
67
|
+
'last_name' => 'Janeley',
|
68
|
+
'email' => 'janej@example.com',
|
69
|
+
'media' => {
|
70
|
+
'email' => {
|
71
|
+
'address' => 'janej@example.com',
|
72
|
+
'interval' => 60,
|
73
|
+
'rollup_threshold' => 5,
|
74
|
+
},
|
75
|
+
},
|
76
|
+
},
|
77
|
+
:redis => @redis)
|
78
|
+
|
79
|
+
entity = Flapjack::Data::Entity.add({ 'id' => '5000',
|
80
|
+
'name' => 'abc-123',
|
81
|
+
'contacts' => ['c363_a-f@42%*'] },
|
82
|
+
:redis => @redis)
|
83
|
+
|
84
|
+
entity_check_ping = Flapjack::Data::EntityCheck.for_entity_name('abc-123', 'ping', :redis => @redis)
|
85
|
+
entity_check_ping.update_state('critical')
|
86
|
+
|
87
|
+
entity_check_ssh = Flapjack::Data::EntityCheck.for_entity_name('abc-123', 'ssh', :redis => @redis)
|
88
|
+
entity_check_ssh.update_state('critical')
|
89
|
+
|
90
|
+
contact.add_alerting_check_for_media('email', 'abc-123:ping')
|
91
|
+
contact.add_alerting_check_for_media('email', 'abc-123:ssh')
|
92
|
+
|
93
|
+
expect(contact.alerting_checks_for_media('email')).to eq(['abc-123:ping', 'abc-123:ssh'])
|
94
|
+
|
95
|
+
entity_check_ssh.disable!
|
96
|
+
|
97
|
+
expect(contact.alerting_checks_for_media('email')).to eq(['abc-123:ping', 'abc-123:ssh'])
|
98
|
+
|
99
|
+
Flapjack::Data::Migration.correct_rollup_including_disabled_checks(:redis => @redis)
|
100
|
+
|
101
|
+
expect(contact.alerting_checks_for_media('email')).to eq(['abc-123:ping'])
|
102
|
+
end
|
103
|
+
|
63
104
|
end
|
@@ -10,8 +10,9 @@ describe 'Flapjack::Gateways::JSONAPI::PagerdutyCredentialMethods', :sinatra =>
|
|
10
10
|
let(:pagerduty_credentials) {
|
11
11
|
{'service_key' => 'abc',
|
12
12
|
'subdomain' => 'def',
|
13
|
-
'
|
14
|
-
'
|
13
|
+
'token' => 'ghi',
|
14
|
+
'username' => 'mongoose',
|
15
|
+
'password' => 'mongoose_password'
|
15
16
|
}
|
16
17
|
}
|
17
18
|
|
@@ -32,7 +32,7 @@ describe Flapjack::Gateways::Pagerduty, :logger => true do
|
|
32
32
|
it "looks for acknowledgements if the search is not already running" do
|
33
33
|
expect(redis).to receive(:get).with('sem_pagerduty_acks_running').and_return(nil)
|
34
34
|
expect(redis).to receive(:set).with('sem_pagerduty_acks_running', 'true')
|
35
|
-
expect(redis).to receive(:expire).with('sem_pagerduty_acks_running',
|
35
|
+
expect(redis).to receive(:expire).with('sem_pagerduty_acks_running', 3600)
|
36
36
|
|
37
37
|
expect(redis).to receive(:del).with('sem_pagerduty_acks_running')
|
38
38
|
|
@@ -69,16 +69,56 @@ describe Flapjack::Gateways::Pagerduty, :logger => true do
|
|
69
69
|
stub_request(:get, "https://flpjck.pagerduty.com/api/v1/incidents?" +
|
70
70
|
"fields=incident_number,status,last_status_change_by&incident_key=#{check}&" +
|
71
71
|
"since=#{since}&status=acknowledged&until=#{unt}").
|
72
|
-
with(:headers => {'Authorization'=>
|
72
|
+
with(:headers => {'Authorization'=>'Token token=token123'}).
|
73
73
|
to_return(:status => 200, :body => response.to_json, :headers => {})
|
74
74
|
|
75
75
|
expect(Flapjack::RedisPool).to receive(:new).and_return(redis)
|
76
76
|
fp = Flapjack::Gateways::Pagerduty.new(:config => config, :logger => @logger)
|
77
77
|
|
78
|
+
EM.synchrony do
|
79
|
+
result = fp.send(:pagerduty_acknowledged?, 'subdomain' => 'flpjck', 'token' => 'token123',
|
80
|
+
'check' => check)
|
81
|
+
|
82
|
+
expect(result).to be_a(Hash)
|
83
|
+
expect(result).to have_key(:pg_acknowledged_by)
|
84
|
+
expect(result[:pg_acknowledged_by]).to be_a(Hash)
|
85
|
+
expect(result[:pg_acknowledged_by]).to have_key('id')
|
86
|
+
expect(result[:pg_acknowledged_by]['id']).to eq('ABCDEFG')
|
87
|
+
EM.stop
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
it "looks for acknowledgements via the PagerDuty API with basic auth" do
|
93
|
+
check = 'PING'
|
94
|
+
expect(Time).to receive(:now).and_return(time)
|
95
|
+
since = (time.utc - (60*60*24*7)).iso8601 # the last week
|
96
|
+
unt = (time.utc + (60*60*24)).iso8601 # 1 day in the future
|
97
|
+
|
98
|
+
response = {"incidents" =>
|
99
|
+
[{"incident_number" => 12,
|
100
|
+
"status" => "acknowledged",
|
101
|
+
"last_status_change_by" => {"id"=>"ABCDEFG", "name"=>"John Smith",
|
102
|
+
"email"=>"johns@example.com",
|
103
|
+
"html_url"=>"http://flpjck.pagerduty.com/users/ABCDEFG"}
|
104
|
+
}
|
105
|
+
],
|
106
|
+
"limit"=>100,
|
107
|
+
"offset"=>0,
|
108
|
+
"total"=>1}
|
109
|
+
|
110
|
+
stub_request(:get, "https://flpjck.pagerduty.com/api/v1/incidents?" +
|
111
|
+
"fields=incident_number,status,last_status_change_by&incident_key=#{check}&" +
|
112
|
+
"since=#{since}&status=acknowledged&until=#{unt}").
|
113
|
+
with(:headers => {'Authorization'=>['flapjack', 'password123']}).
|
114
|
+
to_return(:status => 200, :body => response.to_json, :headers => {})
|
115
|
+
|
116
|
+
expect(Flapjack::RedisPool).to receive(:new).and_return(redis)
|
117
|
+
fp = Flapjack::Gateways::Pagerduty.new(:config => config, :logger => @logger)
|
78
118
|
|
79
119
|
EM.synchrony do
|
80
|
-
result = fp.send(:pagerduty_acknowledged?, 'subdomain' => 'flpjck',
|
81
|
-
|
120
|
+
result = fp.send(:pagerduty_acknowledged?, 'subdomain' => 'flpjck',
|
121
|
+
'username' => 'flapjack', 'password' => 'password123', 'check' => check)
|
82
122
|
|
83
123
|
expect(result).to be_a(Hash)
|
84
124
|
expect(result).to have_key(:pg_acknowledged_by)
|
@@ -98,18 +138,19 @@ describe Flapjack::Gateways::Pagerduty, :logger => true do
|
|
98
138
|
expect(contact).to receive(:pagerduty_credentials).and_return({
|
99
139
|
'service_key' => '12345678',
|
100
140
|
'subdomain"' => 'flpjck',
|
101
|
-
'
|
102
|
-
'password' => 'password123'
|
141
|
+
'token' => 'token123'
|
103
142
|
})
|
104
143
|
|
105
144
|
entity_check = double('entity_check')
|
106
|
-
expect(entity_check).to receive(:check).exactly(
|
145
|
+
expect(entity_check).to receive(:check).exactly(1).times.and_return('PING')
|
107
146
|
expect(entity_check).to receive(:contacts).and_return([contact])
|
108
|
-
expect(entity_check).to receive(:entity_name).exactly(
|
147
|
+
expect(entity_check).to receive(:entity_name).exactly(1).times.and_return('foo-app-01.bar.net')
|
148
|
+
expect(entity_check).to receive(:in_unscheduled_maintenance?).exactly(1).times.and_return(false)
|
149
|
+
expect(entity_check).to receive(:failed?).exactly(1).times.and_return(true)
|
109
150
|
expect(Flapjack::Data::Event).to receive(:create_acknowledgement).with('foo-app-01.bar.net', 'PING',
|
110
151
|
:summary => 'Acknowledged on PagerDuty', :duration => 14400, :redis => redis)
|
111
152
|
|
112
|
-
expect(Flapjack::Data::EntityCheck).to receive(:unacknowledged_failing).exactly(
|
153
|
+
expect(Flapjack::Data::EntityCheck).to receive(:unacknowledged_failing).exactly(1).times.and_return([entity_check])
|
113
154
|
|
114
155
|
expect(fp).to receive(:pagerduty_acknowledged?).and_return({})
|
115
156
|
|
@@ -192,7 +233,6 @@ describe Flapjack::Gateways::Pagerduty, :logger => true do
|
|
192
233
|
|
193
234
|
it "does not look for acknowledgements if all required credentials are not present" do
|
194
235
|
creds = {'subdomain' => 'example',
|
195
|
-
'username' => 'sausage',
|
196
236
|
'check' => 'PING'}
|
197
237
|
|
198
238
|
expect(Flapjack::RedisPool).to receive(:new).and_return(redis)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
+
require 'chronic_duration'
|
2
3
|
|
3
4
|
describe 'web/views/check.html.erb', :erb_view => true do
|
4
5
|
|
@@ -8,6 +9,10 @@ describe 'web/views/check.html.erb', :erb_view => true do
|
|
8
9
|
@entity = 'abc-xyz-01'
|
9
10
|
@check = 'Disk / Utilisation'
|
10
11
|
@last_notifications = {}
|
12
|
+
@check_initial_failure_delay = 30
|
13
|
+
@check_repeat_failure_delay = 60
|
14
|
+
@check_initial_failure_delay_is_default = true
|
15
|
+
@check_repeat_failure_delay_is_default = true
|
11
16
|
|
12
17
|
page = render_erb('check.html.erb', binding)
|
13
18
|
expect(page).to match(%r{/abc-xyz-01/Disk%20%2F%20Utilisation})
|
@@ -175,7 +175,6 @@ describe Flapjack::Gateways::Web, :sinatra => true, :logger => true do
|
|
175
175
|
:recovery => {:timestamp => time.to_i - (3 * 60 * 60), :summary => nil},
|
176
176
|
:acknowledgement => {:timestamp => nil, :summary => nil} }
|
177
177
|
|
178
|
-
expect_check_stats
|
179
178
|
expect(entity_check).to receive(:state).and_return('ok')
|
180
179
|
expect(entity_check).to receive(:last_update).and_return(time.to_i - (3 * 60 * 60))
|
181
180
|
expect(entity_check).to receive(:last_change).and_return(time.to_i - (3 * 60 * 60))
|
@@ -191,6 +190,8 @@ describe Flapjack::Gateways::Web, :sinatra => true, :logger => true do
|
|
191
190
|
expect(entity_check).to receive(:historical_states).
|
192
191
|
with(nil, time.to_i, :order => 'desc', :limit => 20).and_return([])
|
193
192
|
expect(entity_check).to receive(:enabled?).and_return(true)
|
193
|
+
expect(entity_check).to receive(:initial_failure_delay).exactly(2).times.and_return(30)
|
194
|
+
expect(entity_check).to receive(:repeat_failure_delay).exactly(2).times.and_return(60)
|
194
195
|
|
195
196
|
expect(Flapjack::Data::Entity).to receive(:find_by_name).
|
196
197
|
with(entity_name, :redis => redis).and_return(entity)
|
@@ -18,6 +18,7 @@ describe Flapjack::RedisPool do
|
|
18
18
|
expect(Flapjack::Data::Migration).to receive(:clear_orphaned_entity_ids).exactly(redis_count).times
|
19
19
|
expect(Flapjack::Data::Migration).to receive(:refresh_archive_index).exactly(redis_count).times
|
20
20
|
expect(Flapjack::Data::Migration).to receive(:validate_scheduled_maintenance_periods).exactly(redis_count).times
|
21
|
+
expect(Flapjack::Data::Migration).to receive(:correct_rollup_including_disabled_checks).exactly(redis_count).times
|
21
22
|
|
22
23
|
frp = Flapjack::RedisPool.new(:size => redis_count)
|
23
24
|
|
@@ -18,8 +18,8 @@
|
|
18
18
|
"body": [
|
19
19
|
{
|
20
20
|
"op": "replace",
|
21
|
-
"path": "/pagerduty_credentials/0/
|
22
|
-
"value": "
|
21
|
+
"path": "/pagerduty_credentials/0/token",
|
22
|
+
"value": "token123"
|
23
23
|
}
|
24
24
|
]
|
25
25
|
},
|
@@ -47,8 +47,8 @@
|
|
47
47
|
"body": [
|
48
48
|
{
|
49
49
|
"op": "replace",
|
50
|
-
"path": "/pagerduty_credentials/0/
|
51
|
-
"value": "
|
50
|
+
"path": "/pagerduty_credentials/0/token",
|
51
|
+
"value": "token123"
|
52
52
|
}
|
53
53
|
]
|
54
54
|
},
|
@@ -71,8 +71,8 @@
|
|
71
71
|
"body": [
|
72
72
|
{
|
73
73
|
"op": "replace",
|
74
|
-
"path": "/pagerduty_credentials/0/
|
75
|
-
"value": "
|
74
|
+
"path": "/pagerduty_credentials/0/token",
|
75
|
+
"value": "token123"
|
76
76
|
}
|
77
77
|
]
|
78
78
|
},
|
@@ -147,8 +147,7 @@
|
|
147
147
|
{
|
148
148
|
"service_key": "abc",
|
149
149
|
"subdomain": "def",
|
150
|
-
"
|
151
|
-
"password": "jkl"
|
150
|
+
"token": "ghi"
|
152
151
|
}
|
153
152
|
]
|
154
153
|
}
|
@@ -177,8 +176,7 @@
|
|
177
176
|
{
|
178
177
|
"service_key": "abc",
|
179
178
|
"subdomain": "def",
|
180
|
-
"
|
181
|
-
"password": "jkl"
|
179
|
+
"token": "ghi"
|
182
180
|
}
|
183
181
|
]
|
184
182
|
}
|
@@ -212,14 +210,12 @@
|
|
212
210
|
{
|
213
211
|
"service_key": "abc",
|
214
212
|
"subdomain": "def",
|
215
|
-
"
|
216
|
-
"password": "jkl"
|
213
|
+
"token": "ghi"
|
217
214
|
},
|
218
215
|
{
|
219
216
|
"service_key": "mno",
|
220
217
|
"subdomain": "pqr",
|
221
|
-
"
|
222
|
-
"password": "vwx"
|
218
|
+
"token": "stu"
|
223
219
|
}
|
224
220
|
]
|
225
221
|
}
|
@@ -261,8 +257,7 @@
|
|
261
257
|
{
|
262
258
|
"service_key": "abc",
|
263
259
|
"subdomain": "def",
|
264
|
-
"
|
265
|
-
"password": "jkl"
|
260
|
+
"token": "ghi"
|
266
261
|
}
|
267
262
|
]
|
268
263
|
}
|
@@ -285,8 +280,7 @@
|
|
285
280
|
{
|
286
281
|
"service_key": "abc",
|
287
282
|
"subdomain": "def",
|
288
|
-
"
|
289
|
-
"password": "jkl"
|
283
|
+
"token": "ghi"
|
290
284
|
}
|
291
285
|
]
|
292
286
|
}
|
@@ -301,8 +301,7 @@ Pact.provider_states_for "flapjack-diner" do
|
|
301
301
|
pdc_data = {
|
302
302
|
'service_key' => 'abc',
|
303
303
|
'subdomain' => 'def',
|
304
|
-
'
|
305
|
-
'password' => 'jkl',
|
304
|
+
'token' => 'ghi',
|
306
305
|
}
|
307
306
|
contact.set_pagerduty_credentials(pdc_data)
|
308
307
|
end
|
@@ -333,15 +332,13 @@ Pact.provider_states_for "flapjack-diner" do
|
|
333
332
|
pdc_data = {
|
334
333
|
'service_key' => 'abc',
|
335
334
|
'subdomain' => 'def',
|
336
|
-
'
|
337
|
-
'password' => 'jkl',
|
335
|
+
'token' => 'ghi',
|
338
336
|
}
|
339
337
|
contact.set_pagerduty_credentials(pdc_data)
|
340
338
|
pdc_data_2 = {
|
341
339
|
'service_key' => 'mno',
|
342
340
|
'subdomain' => 'pqr',
|
343
|
-
'
|
344
|
-
'password' => 'vwx',
|
341
|
+
'token' => 'stu',
|
345
342
|
}
|
346
343
|
contact_2.set_pagerduty_credentials(pdc_data_2)
|
347
344
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: flapjack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0rc1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lindsay Holmwood
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2015-
|
14
|
+
date: 2015-05-13 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: dante
|
@@ -635,9 +635,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
635
635
|
version: '0'
|
636
636
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
637
637
|
requirements:
|
638
|
-
- - "
|
638
|
+
- - ">"
|
639
639
|
- !ruby/object:Gem::Version
|
640
|
-
version:
|
640
|
+
version: 1.3.1
|
641
641
|
requirements: []
|
642
642
|
rubyforge_project:
|
643
643
|
rubygems_version: 2.4.5
|