flapjack 0.7.1 → 0.7.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +29 -0
- data/README.md +1 -4
- data/bin/flapjack +24 -2
- data/bin/flapjack-nagios-receiver +4 -2
- data/bin/receive-events +191 -0
- data/bin/simulate-failed-check +144 -0
- data/features/notification_rules.feature +63 -2
- data/features/steps/events_steps.rb +15 -3
- data/flapjack.gemspec +1 -1
- data/lib/flapjack/data/contact.rb +15 -9
- data/lib/flapjack/data/entity.rb +19 -1
- data/lib/flapjack/data/entity_check.rb +12 -0
- data/lib/flapjack/data/event.rb +10 -2
- data/lib/flapjack/data/notification.rb +12 -8
- data/lib/flapjack/data/notification_rule.rb +3 -1
- data/lib/flapjack/executive.rb +71 -17
- data/lib/flapjack/gateways/api.rb +5 -2
- data/lib/flapjack/gateways/jabber.rb +26 -17
- data/lib/flapjack/gateways/web.rb +54 -9
- data/lib/flapjack/gateways/web/public/css/bootstrap-responsive.min.css +9 -0
- data/lib/flapjack/gateways/web/public/css/bootstrap.min.css +9 -0
- data/lib/flapjack/gateways/web/public/css/flapjack.css +51 -0
- data/lib/flapjack/gateways/web/public/img/flapjack_white_bg_400_353.jpeg +0 -0
- data/lib/flapjack/gateways/web/public/img/glyphicons-halflings-white.png +0 -0
- data/lib/flapjack/gateways/web/public/img/glyphicons-halflings.png +0 -0
- data/lib/flapjack/gateways/web/public/js/bootstrap.min.js +6 -0
- data/lib/flapjack/gateways/web/views/_foot.haml +8 -0
- data/lib/flapjack/gateways/web/views/_head.haml +10 -0
- data/lib/flapjack/gateways/web/views/_nav.haml +9 -3
- data/lib/flapjack/gateways/web/views/check.haml +140 -138
- data/lib/flapjack/gateways/web/views/checks.haml +49 -0
- data/lib/flapjack/gateways/web/views/contact.haml +78 -37
- data/lib/flapjack/gateways/web/views/contacts.haml +23 -17
- data/lib/flapjack/gateways/web/views/entities.haml +28 -0
- data/lib/flapjack/gateways/web/views/entity.haml +44 -0
- data/lib/flapjack/gateways/web/views/index.haml +27 -44
- data/lib/flapjack/gateways/web/views/self_stats.haml +65 -22
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/executive_spec.rb +6 -2
- data/spec/lib/flapjack/gateways/api_spec.rb +15 -0
- data/spec/lib/flapjack/gateways/web/views/contact.haml_spec.rb +2 -1
- data/spec/lib/flapjack/gateways/web/views/index.haml_spec.rb +3 -2
- data/spec/lib/flapjack/gateways/web_spec.rb +23 -9
- data/tmp/create_events_failure.rb +6 -4
- metadata +23 -12
@@ -11,6 +11,7 @@ Feature: Notification rules on a per contact basis
|
|
11
11
|
| id | name | contacts |
|
12
12
|
| 1 | foo | 1 |
|
13
13
|
| 2 | bar | 1,2 |
|
14
|
+
| 3 | baz | 1 |
|
14
15
|
|
15
16
|
And user 1 has the following notification intervals:
|
16
17
|
| email | sms |
|
@@ -20,6 +21,7 @@ Feature: Notification rules on a per contact basis
|
|
20
21
|
| id | entities | entity_tags | warning_media | critical_media | warning_blackhole | critical_blackhole | time_restrictions |
|
21
22
|
| 1 | foo | | email | sms,email | | | 8-18 weekdays |
|
22
23
|
| 2 | bar | | | sms,email | true | | |
|
24
|
+
| 3 | baz | | email | sms,email | | | |
|
23
25
|
|
24
26
|
@time_restrictions @time
|
25
27
|
Scenario: Alerts only during specified time restrictions
|
@@ -60,7 +62,45 @@ Feature: Notification rules on a per contact basis
|
|
60
62
|
Then no email alerts should be queued for malak@example.com
|
61
63
|
|
62
64
|
@severity @time
|
63
|
-
Scenario:
|
65
|
+
Scenario: Recoveries are not affected by notification rules
|
66
|
+
Given the check is check 'ping' on entity 'baz'
|
67
|
+
And the check is in an ok state
|
68
|
+
When a critical event is received
|
69
|
+
And 5 minutes passes
|
70
|
+
And a critical event is received
|
71
|
+
Then 1 email alert should be queued for malak@example.com
|
72
|
+
When 1 minute passes
|
73
|
+
And an ok event is received
|
74
|
+
Then 2 email alerts should be queued for malak@example.com
|
75
|
+
|
76
|
+
@severity @time
|
77
|
+
Scenario: Alerts are sent to media of highest severity reached since last ok
|
78
|
+
Given the check is check 'ping' on entity 'baz'
|
79
|
+
And the check is in an ok state
|
80
|
+
When a warning event is received
|
81
|
+
And 1 minute passes
|
82
|
+
And a warning event is received
|
83
|
+
Then 1 email alert should be queued for malak@example.com
|
84
|
+
And 0 sms alerts should be queued for +61400000001
|
85
|
+
When 70 minutes passes
|
86
|
+
And a critical event is received
|
87
|
+
And 1 minute passes
|
88
|
+
And a critical event is received
|
89
|
+
Then 2 email alerts should be queued for malak@example.com
|
90
|
+
And 1 sms alert should be queued for +61400000001
|
91
|
+
When 70 minutes passes
|
92
|
+
And a warning event is received
|
93
|
+
And 1 minute passes
|
94
|
+
And a warning event is received
|
95
|
+
Then 3 email alerts should be queued for malak@example.com
|
96
|
+
And 2 sms alerts should be queued for +61400000001
|
97
|
+
When 70 minutes passes
|
98
|
+
And an ok event is received
|
99
|
+
Then 4 email alerts should be queued for malak@example.com
|
100
|
+
And 3 sms alerts should be queued for +61400000001
|
101
|
+
|
102
|
+
@severity @time
|
103
|
+
Scenario: Alerts only when media,severity matches any matching rule's severity's media with ok->warning->critical->ok
|
64
104
|
Given the check is check 'ping' on entity 'bar'
|
65
105
|
And the check is in an ok state
|
66
106
|
When a warning event is received
|
@@ -68,9 +108,12 @@ Feature: Notification rules on a per contact basis
|
|
68
108
|
And a warning event is received
|
69
109
|
Then no email alerts should be queued for malak@example.com
|
70
110
|
When a critical event is received
|
71
|
-
And 5
|
111
|
+
And 5 minutes passes
|
72
112
|
And a critical event is received
|
73
113
|
Then 1 email alert should be queued for malak@example.com
|
114
|
+
When 1 minute passes
|
115
|
+
And an ok event is received
|
116
|
+
Then 2 email alert should be queued for malak@example.com
|
74
117
|
|
75
118
|
@blackhole
|
76
119
|
Scenario: Drop alerts matching a blackhole rule
|
@@ -91,3 +134,21 @@ Feature: Notification rules on a per contact basis
|
|
91
134
|
And a critical event is received
|
92
135
|
Then 2 email alerts should be queued for malak@example.com
|
93
136
|
|
137
|
+
@intervals @time
|
138
|
+
Scenario: Problem directly after Recovery should alert despite notification intervals
|
139
|
+
Given the check is check 'ping' on entity 'baz'
|
140
|
+
And the check is in an ok state
|
141
|
+
When a critical event is received
|
142
|
+
And 1 minute passes
|
143
|
+
And a critical event is received
|
144
|
+
Then 1 email alert should be queued for malak@example.com
|
145
|
+
And 1 sms alert should be queued for +61400000001
|
146
|
+
When an ok event is received
|
147
|
+
Then 2 email alerts should be queued for malak@example.com
|
148
|
+
And 2 sms alerts should be queued for +61400000001
|
149
|
+
When 1 minute passes
|
150
|
+
And a critical event is received
|
151
|
+
And 1 minute passes
|
152
|
+
And a critical event is received
|
153
|
+
Then 3 email alerts should be queued for malak@example.com
|
154
|
+
And 3 sms alerts should be queued for +61400000001
|
@@ -209,7 +209,8 @@ Then /^a notification should be generated(?: for check '([\w\.\-]+)' on entity '
|
|
209
209
|
found.should be_true
|
210
210
|
end
|
211
211
|
|
212
|
-
Then /^show me the log$/ do
|
212
|
+
Then /^show me the (\w+ )*log$/ do |adjective|
|
213
|
+
puts "the #{adjective}log:"
|
213
214
|
puts @logger.messages.join("\n")
|
214
215
|
end
|
215
216
|
|
@@ -232,8 +233,8 @@ end
|
|
232
233
|
Given /^the following users exist:$/ do |contacts|
|
233
234
|
contacts.hashes.each do |contact|
|
234
235
|
media = {}
|
235
|
-
media['email'] = contact['email']
|
236
|
-
media['sms'] = contact['sms']
|
236
|
+
media['email'] = { 'address' => contact['email'] }
|
237
|
+
media['sms'] = { 'address' => contact['sms'] }
|
237
238
|
Flapjack::Data::Contact.add({'id' => contact['id'],
|
238
239
|
'first_name' => contact['first_name'],
|
239
240
|
'last_name' => contact['last_name'],
|
@@ -302,3 +303,14 @@ Then /^(.*) email alert(?:s)? should be queued for (.*)$/ do |num_queued, addres
|
|
302
303
|
queue = Resque.peek('email_notifications', 0, 30)
|
303
304
|
queue.find_all {|n| n['args'].first['address'] == address }.length.should == num_queued.to_i
|
304
305
|
end
|
306
|
+
|
307
|
+
Then /^(.*) sms alert(?:s)? should be queued for (.*)$/ do |num_queued, address|
|
308
|
+
check = check ? check : @check
|
309
|
+
entity = entity ? entity : @entity
|
310
|
+
case num_queued
|
311
|
+
when 'no'
|
312
|
+
num_queued = 0
|
313
|
+
end
|
314
|
+
queue = Resque.peek('sms_notifications', 0, 30)
|
315
|
+
queue.find_all {|n| n['args'].first['address'] == address }.length.should == num_queued.to_i
|
316
|
+
end
|
data/flapjack.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |gem|
|
|
32
32
|
gem.add_dependency 'haml'
|
33
33
|
gem.add_dependency 'thin'
|
34
34
|
gem.add_dependency 'mail'
|
35
|
-
gem.add_dependency 'blather', '0.8.
|
35
|
+
gem.add_dependency 'blather', '~> 0.8.3'
|
36
36
|
gem.add_dependency 'chronic'
|
37
37
|
gem.add_dependency 'chronic_duration'
|
38
38
|
gem.add_dependency 'activesupport'
|
@@ -229,12 +229,17 @@ module Flapjack
|
|
229
229
|
end
|
230
230
|
|
231
231
|
def update_sent_alert_keys(opts)
|
232
|
-
media
|
233
|
-
check
|
234
|
-
state
|
232
|
+
media = opts[:media]
|
233
|
+
check = opts[:check]
|
234
|
+
state = opts[:state]
|
235
|
+
delete = !! opts[:delete]
|
235
236
|
key = "drop_alerts_for_contact:#{self.id}:#{media}:#{check}:#{state}"
|
236
|
-
|
237
|
-
|
237
|
+
if delete
|
238
|
+
@redis.del(key)
|
239
|
+
else
|
240
|
+
@redis.set(key, 'd')
|
241
|
+
@redis.expire(key, self.interval_for_media(media))
|
242
|
+
end
|
238
243
|
end
|
239
244
|
|
240
245
|
# FIXME
|
@@ -331,14 +336,15 @@ module Flapjack
|
|
331
336
|
*['first_name', 'last_name', 'email'].collect {|f| [f, contact_data[f]]})
|
332
337
|
|
333
338
|
unless contact_data['media'].nil?
|
334
|
-
contact_data['media'].each_pair {|medium,
|
339
|
+
contact_data['media'].each_pair {|medium, details|
|
335
340
|
case medium
|
336
341
|
when 'pagerduty'
|
337
|
-
redis.hset("contact_media:#{contact_id}", medium,
|
342
|
+
redis.hset("contact_media:#{contact_id}", medium, details['service_key'])
|
338
343
|
redis.hmset("contact_pagerduty:#{contact_id}",
|
339
|
-
*['subdomain', 'username', 'password'].collect {|f| [f,
|
344
|
+
*['subdomain', 'username', 'password'].collect {|f| [f, details[f]]})
|
340
345
|
else
|
341
|
-
redis.hset("contact_media:#{contact_id}", medium, address)
|
346
|
+
redis.hset("contact_media:#{contact_id}", medium, details['address'])
|
347
|
+
redis.hset("contact_media_intervals:#{contact_id}", medium, details['interval']) if details['interval']
|
342
348
|
end
|
343
349
|
}
|
344
350
|
end
|
data/lib/flapjack/data/entity.rb
CHANGED
@@ -78,15 +78,33 @@ module Flapjack
|
|
78
78
|
# time
|
79
79
|
def self.find_all_name_matching(pattern, options = {})
|
80
80
|
raise "Redis connection not set" unless redis = options[:redis]
|
81
|
+
begin
|
82
|
+
regex = /#{pattern}/
|
83
|
+
rescue => e
|
84
|
+
if @logger
|
85
|
+
@logger.info("Jabber#self.find_all_name_matching - unable to use /#{pattern}/ as a regex pattern: #{e}")
|
86
|
+
end
|
87
|
+
return nil
|
88
|
+
end
|
81
89
|
redis.keys('entity_id:*').inject([]) {|memo, check|
|
82
90
|
a, entity_name = check.split(':')
|
83
|
-
if (entity_name =~
|
91
|
+
if (entity_name =~ regex) && !memo.include?(entity_name)
|
84
92
|
memo << entity_name
|
85
93
|
end
|
86
94
|
memo
|
87
95
|
}.sort
|
88
96
|
end
|
89
97
|
|
98
|
+
def self.find_all_with_checks(options)
|
99
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
100
|
+
redis.keys("check:*").map {|s| s.match(/.*:(.*):.*/)[1] }.to_set
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.find_all_with_failing_checks(options)
|
104
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
105
|
+
redis.zrange("failed_checks", 0, -1).map {|s| s.match(/(.*):.*/)[1] }.to_set
|
106
|
+
end
|
107
|
+
|
90
108
|
def contacts
|
91
109
|
contact_ids = @redis.smembers("contacts_for:#{id}")
|
92
110
|
|
@@ -304,6 +304,18 @@ module Flapjack
|
|
304
304
|
ln
|
305
305
|
end
|
306
306
|
|
307
|
+
def max_notified_severity_of_current_failure
|
308
|
+
last_recovery = last_recovery_notification || 0
|
309
|
+
|
310
|
+
last_critical = last_critical_notification
|
311
|
+
return STATE_CRITICAL if last_critical && (last_critical > last_recovery)
|
312
|
+
|
313
|
+
last_warning = last_warning_notification
|
314
|
+
return STATE_WARNING if last_warning && (last_warning > last_recovery)
|
315
|
+
|
316
|
+
nil
|
317
|
+
end
|
318
|
+
|
307
319
|
# unpredictable results if there are multiple notifications of different
|
308
320
|
# types sent at the same time
|
309
321
|
def last_notification
|
data/lib/flapjack/data/event.rb
CHANGED
@@ -25,6 +25,9 @@ module Flapjack
|
|
25
25
|
:archive_events => false,
|
26
26
|
:events_archive_maxage => (3 * 60 * 60) }
|
27
27
|
options = defaults.merge(opts)
|
28
|
+
if options[:logger]
|
29
|
+
logger = options[:logger]
|
30
|
+
end
|
28
31
|
|
29
32
|
if options[:archive_events]
|
30
33
|
dest = "events_archive:#{Time.now.utc.strftime "%Y%m%d%H"}"
|
@@ -43,8 +46,13 @@ module Flapjack
|
|
43
46
|
return unless raw
|
44
47
|
end
|
45
48
|
end
|
46
|
-
|
47
|
-
|
49
|
+
begin
|
50
|
+
parsed = ::JSON.parse( raw )
|
51
|
+
rescue => e
|
52
|
+
logger.warn("Error deserialising event json: #{e}, raw json: #{raw.inspect}")
|
53
|
+
return nil
|
54
|
+
end
|
55
|
+
return self.new( parsed )
|
48
56
|
end
|
49
57
|
|
50
58
|
# creates, or modifies, an event object and adds it to the events list in redis
|
@@ -6,10 +6,12 @@ module Flapjack
|
|
6
6
|
module Data
|
7
7
|
class Notification
|
8
8
|
|
9
|
-
attr_accessor :event, :type
|
9
|
+
attr_accessor :event, :type, :max_notified_severity
|
10
10
|
|
11
11
|
def self.for_event(event, opts = {})
|
12
|
-
self.new(:event => event,
|
12
|
+
self.new(:event => event,
|
13
|
+
:type => opts[:type],
|
14
|
+
:max_notified_severity => opts[:max_notified_severity])
|
13
15
|
end
|
14
16
|
|
15
17
|
def messages(opts = {})
|
@@ -28,12 +30,13 @@ module Flapjack
|
|
28
30
|
end
|
29
31
|
|
30
32
|
def contents
|
31
|
-
@contents ||= {'event_id'
|
32
|
-
'state'
|
33
|
-
'summary'
|
34
|
-
'time'
|
35
|
-
'duration'
|
36
|
-
'notification_type'
|
33
|
+
@contents ||= {'event_id' => event.id,
|
34
|
+
'state' => event.state,
|
35
|
+
'summary' => event.summary,
|
36
|
+
'time' => event.time,
|
37
|
+
'duration' => event.duration || nil,
|
38
|
+
'notification_type' => type,
|
39
|
+
'max_notified_severity' => max_notified_severity }
|
37
40
|
end
|
38
41
|
|
39
42
|
private
|
@@ -42,6 +45,7 @@ module Flapjack
|
|
42
45
|
raise "Event not passed" unless event = opts[:event]
|
43
46
|
@event = event
|
44
47
|
@type = opts[:type]
|
48
|
+
@max_notified_severity = opts[:max_notified_severity]
|
45
49
|
end
|
46
50
|
|
47
51
|
end
|
@@ -105,7 +105,7 @@ module Flapjack
|
|
105
105
|
end
|
106
106
|
|
107
107
|
def update(rule_data)
|
108
|
-
self.class.add_or_update(rule_data, :redis => @redis)
|
108
|
+
self.class.add_or_update(rule_data.merge(:id => @id), :redis => @redis)
|
109
109
|
self.refresh
|
110
110
|
end
|
111
111
|
|
@@ -154,6 +154,8 @@ module Flapjack
|
|
154
154
|
end
|
155
155
|
|
156
156
|
def self.add_or_update(rule_data, options = {})
|
157
|
+
raise ":id is a required key in rule_data" unless rule_data[:id]
|
158
|
+
|
157
159
|
redis = options[:redis]
|
158
160
|
|
159
161
|
rule_data[:entities] = Yajl::Encoder.encode(rule_data[:entities])
|
data/lib/flapjack/executive.rb
CHANGED
@@ -80,9 +80,6 @@ module Flapjack
|
|
80
80
|
# we could generate a fuid and save it to disk, and prepend it from that
|
81
81
|
# point on...
|
82
82
|
|
83
|
-
# TODO unset on exit?
|
84
|
-
@redis.set('boot_time', @boot_time.to_i)
|
85
|
-
|
86
83
|
# FIXME: add an administrative function to reset all event counters
|
87
84
|
if @redis.hget('event_counters', 'all').nil?
|
88
85
|
@redis.hset('event_counters', 'all', 0)
|
@@ -91,11 +88,27 @@ module Flapjack
|
|
91
88
|
@redis.hset('event_counters', 'action', 0)
|
92
89
|
end
|
93
90
|
|
94
|
-
|
91
|
+
#@redis.zadd('executive_instances', @boot_time.to_i, @instance_id)
|
92
|
+
@redis.hset("executive_instance:#{@instance_id}", 'boot_time', @boot_time.to_i)
|
95
93
|
@redis.hset("event_counters:#{@instance_id}", 'all', 0)
|
96
94
|
@redis.hset("event_counters:#{@instance_id}", 'ok', 0)
|
97
95
|
@redis.hset("event_counters:#{@instance_id}", 'failure', 0)
|
98
96
|
@redis.hset("event_counters:#{@instance_id}", 'action', 0)
|
97
|
+
touch_keys
|
98
|
+
end
|
99
|
+
|
100
|
+
# expire instance keys after one week
|
101
|
+
# TODO: set up a separate EM timer to reset key expiry every minute
|
102
|
+
# and reduce the expiry to, say, five minutes
|
103
|
+
# TODO: remove these keys on process exit
|
104
|
+
def touch_keys
|
105
|
+
[ "executive_instance:#{@instance_id}",
|
106
|
+
"event_counters:#{@instance_id}",
|
107
|
+
"event_counters:#{@instance_id}",
|
108
|
+
"event_counters:#{@instance_id}",
|
109
|
+
"event_counters:#{@instance_id}" ].each {|key|
|
110
|
+
@redis.expire(key, 1036800)
|
111
|
+
}
|
99
112
|
end
|
100
113
|
|
101
114
|
def start
|
@@ -105,7 +118,8 @@ module Flapjack
|
|
105
118
|
@logger.debug("Waiting for event...")
|
106
119
|
event = Flapjack::Data::Event.next(:redis => @redis,
|
107
120
|
:archive_events => @archive_events,
|
108
|
-
:events_archive_maxage => @events_archive_maxage
|
121
|
+
:events_archive_maxage => @events_archive_maxage,
|
122
|
+
:logger => @logger)
|
109
123
|
process_event(event) unless event.nil?
|
110
124
|
end
|
111
125
|
|
@@ -157,6 +171,10 @@ module Flapjack
|
|
157
171
|
end
|
158
172
|
|
159
173
|
def update_keys(event, entity_check)
|
174
|
+
|
175
|
+
# TODO: run touch_keys from a separate EM timer for efficiency
|
176
|
+
touch_keys
|
177
|
+
|
160
178
|
result = { :skip_filters => false }
|
161
179
|
timestamp = Time.now.to_i
|
162
180
|
@event_count = @redis.hincrby('event_counters', 'all', 1)
|
@@ -243,6 +261,9 @@ module Flapjack
|
|
243
261
|
notification_type = 'test'
|
244
262
|
end
|
245
263
|
end
|
264
|
+
|
265
|
+
max_notified_severity = entity_check.max_notified_severity_of_current_failure
|
266
|
+
|
246
267
|
@redis.set("#{event.id}:last_#{notification_type}_notification", timestamp)
|
247
268
|
@redis.set("#{event.id}:last_#{event.state}_notification", timestamp) if event.failure?
|
248
269
|
@redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp)
|
@@ -252,13 +273,17 @@ module Flapjack
|
|
252
273
|
contacts = entity_check.contacts
|
253
274
|
|
254
275
|
if contacts.empty?
|
276
|
+
@logger.debug("No contacts for #{event.id}")
|
255
277
|
@notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | NO CONTACTS")
|
256
278
|
return
|
257
279
|
end
|
258
280
|
|
259
|
-
notification = Flapjack::Data::Notification.for_event(
|
281
|
+
notification = Flapjack::Data::Notification.for_event(
|
282
|
+
event, :type => notification_type, :max_notified_severity => max_notified_severity)
|
260
283
|
|
261
|
-
|
284
|
+
messages = notification.messages(:contacts => contacts)
|
285
|
+
messages = apply_notification_rules(messages)
|
286
|
+
enqueue_messages(messages)
|
262
287
|
|
263
288
|
end
|
264
289
|
|
@@ -291,8 +316,7 @@ module Flapjack
|
|
291
316
|
# don't consider notification rules if the contact has none
|
292
317
|
|
293
318
|
tuple = messages.map do |message|
|
294
|
-
@logger.debug "considering message: #{message.medium} #{message.notification.event.id} #{message.notification.event.state}"
|
295
|
-
@logger.debug "contact_id: #{message.contact.id}"
|
319
|
+
@logger.debug "considering message for contact: #{message.contact.id} #{message.medium} #{message.notification.event.id} #{message.notification.event.state}"
|
296
320
|
rules = message.contact.notification_rules
|
297
321
|
@logger.debug "found #{rules.length} rules for this message's contact"
|
298
322
|
event_id = message.notification.event.id
|
@@ -337,15 +361,32 @@ module Flapjack
|
|
337
361
|
|
338
362
|
# delete any media that doesn't meet severity<->media constraints
|
339
363
|
tuple = tuple.find_all do |message, matchers, options|
|
340
|
-
|
364
|
+
state = message.notification.event.state
|
365
|
+
max_notified_severity = message.notification.max_notified_severity
|
366
|
+
|
367
|
+
# use EntityCheck#max_notified_severity_of_current_failure
|
368
|
+
# as calculated prior to updating the last_notification* keys
|
369
|
+
# if it's a higher severity than the current state
|
370
|
+
severity = 'ok'
|
371
|
+
case
|
372
|
+
when ([state, max_notified_severity] & ['critical', 'unknown']).any?
|
373
|
+
severity = 'critical'
|
374
|
+
when [state, max_notified_severity].include?('warning')
|
375
|
+
severity = 'warning'
|
376
|
+
end
|
341
377
|
options[:no_rules_for_contact] ||
|
342
378
|
matchers.any? {|matcher|
|
343
|
-
matcher.media_for_severity(severity)
|
344
|
-
|
379
|
+
mms = matcher.media_for_severity(severity)
|
380
|
+
unless mms
|
381
|
+
answer = false
|
382
|
+
else
|
383
|
+
answer = mms.include?(message.medium)
|
384
|
+
end
|
385
|
+
answer
|
345
386
|
}
|
346
387
|
end
|
347
388
|
|
348
|
-
@logger.debug "apply_notification_rules: num messages after severity-media constraints: #{tuple.size}"
|
389
|
+
@logger.debug "apply_notification_rules: num messages after pruning for severity-media constraints: #{tuple.size}"
|
349
390
|
|
350
391
|
# delete media based on notification interval
|
351
392
|
tuple = tuple.find_all do |message, matchers, options|
|
@@ -378,10 +419,23 @@ module Flapjack
|
|
378
419
|
|
379
420
|
@logger.info("Enqueueing #{media_type} alert for #{event_id} to #{message.address}")
|
380
421
|
|
381
|
-
message.
|
382
|
-
|
383
|
-
|
384
|
-
|
422
|
+
if message.notification.event.state == 'ok'
|
423
|
+
message.contact.update_sent_alert_keys(
|
424
|
+
:media => message.medium,
|
425
|
+
:check => message.notification.event.id,
|
426
|
+
:state => 'warning',
|
427
|
+
:delete => true)
|
428
|
+
message.contact.update_sent_alert_keys(
|
429
|
+
:media => message.medium,
|
430
|
+
:check => message.notification.event.id,
|
431
|
+
:state => 'critical',
|
432
|
+
:delete => true)
|
433
|
+
else
|
434
|
+
message.contact.update_sent_alert_keys(
|
435
|
+
:media => message.medium,
|
436
|
+
:check => message.notification.event.id,
|
437
|
+
:state => message.notification.event.state)
|
438
|
+
end
|
385
439
|
|
386
440
|
# TODO consider changing Resque jobs to use raw blpop like the others
|
387
441
|
case media_type.to_sym
|