flapjack 0.6.61 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -1
- data/README.md +8 -4
- data/features/events.feature +269 -146
- data/features/notification_rules.feature +93 -0
- data/features/steps/events_steps.rb +162 -21
- data/features/steps/notifications_steps.rb +1 -1
- data/features/steps/time_travel_steps.rb +30 -19
- data/features/support/env.rb +71 -1
- data/flapjack.gemspec +3 -0
- data/lib/flapjack/data/contact.rb +256 -57
- data/lib/flapjack/data/entity.rb +2 -1
- data/lib/flapjack/data/entity_check.rb +22 -7
- data/lib/flapjack/data/global.rb +1 -0
- data/lib/flapjack/data/message.rb +2 -0
- data/lib/flapjack/data/notification_rule.rb +172 -0
- data/lib/flapjack/data/tag.rb +7 -2
- data/lib/flapjack/data/tag_set.rb +16 -0
- data/lib/flapjack/executive.rb +147 -13
- data/lib/flapjack/filters/delays.rb +21 -9
- data/lib/flapjack/gateways/api.rb +407 -27
- data/lib/flapjack/gateways/pagerduty.rb +1 -1
- data/lib/flapjack/gateways/web.rb +50 -22
- data/lib/flapjack/gateways/web/views/self_stats.haml +2 -0
- data/lib/flapjack/utility.rb +10 -0
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/data/contact_spec.rb +103 -6
- data/spec/lib/flapjack/data/global_spec.rb +2 -0
- data/spec/lib/flapjack/data/message_spec.rb +6 -0
- data/spec/lib/flapjack/data/notification_rule_spec.rb +22 -0
- data/spec/lib/flapjack/data/notification_spec.rb +6 -0
- data/spec/lib/flapjack/gateways/api_spec.rb +727 -4
- data/spec/lib/flapjack/gateways/jabber_spec.rb +1 -0
- data/spec/lib/flapjack/gateways/web_spec.rb +11 -1
- data/spec/spec_helper.rb +10 -0
- data/tmp/notification_rules.rb +73 -0
- data/tmp/test_json_post.rb +16 -0
- data/tmp/test_notification_rules_api.rb +170 -0
- metadata +59 -2
data/lib/flapjack/data/global.rb
CHANGED
@@ -0,0 +1,172 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'yajl/json_gem'
|
4
|
+
require 'active_support/time'
|
5
|
+
require 'ice_cube'
|
6
|
+
require 'flapjack/utility'
|
7
|
+
|
8
|
+
module Flapjack
|
9
|
+
module Data
|
10
|
+
class NotificationRule
|
11
|
+
|
12
|
+
extend Flapjack::Utility
|
13
|
+
|
14
|
+
attr_accessor :id, :contact_id, :entities, :entity_tags, :time_restrictions,
|
15
|
+
:warning_media, :critical_media, :warning_blackhole, :critical_blackhole
|
16
|
+
|
17
|
+
def self.exists_with_id?(rule_id, options = {})
|
18
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
19
|
+
raise "No id value passed" unless not (rule_id.nil? || rule_id == '')
|
20
|
+
logger = options[:logger]
|
21
|
+
redis.exists("notification_rule:#{rule_id}")
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.find_by_id(rule_id, options = {})
|
25
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
26
|
+
raise "No id value passed" unless not (rule_id.nil? || rule_id == '')
|
27
|
+
logger = options[:logger]
|
28
|
+
|
29
|
+
# sanity check
|
30
|
+
return unless redis.exists("notification_rule:#{rule_id}")
|
31
|
+
|
32
|
+
rule = self.new({:id => rule_id}, {:redis => redis})
|
33
|
+
rule.refresh
|
34
|
+
rule
|
35
|
+
end
|
36
|
+
|
37
|
+
# replacing save! etc
|
38
|
+
def self.add(rule_data, options)
|
39
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
40
|
+
|
41
|
+
rule_id = SecureRandom.uuid
|
42
|
+
self.add_or_update(rule_data.merge(:id => rule_id), :redis => redis)
|
43
|
+
self.find_by_id(rule_id, :redis => redis)
|
44
|
+
end
|
45
|
+
|
46
|
+
# add user's timezone string to the hash, deserialise
|
47
|
+
# time in the user's timezone also
|
48
|
+
def self.time_restriction_to_ice_cube_hash(tr, time_zone)
|
49
|
+
tr = symbolize(tr)
|
50
|
+
|
51
|
+
tr[:start_date] = tr[:start_time].dup
|
52
|
+
tr.delete(:start_time)
|
53
|
+
|
54
|
+
if tr[:start_date].is_a?(String)
|
55
|
+
tr[:start_date] = { :time => tr[:start_date] }
|
56
|
+
end
|
57
|
+
if tr[:start_date].is_a?(Hash)
|
58
|
+
tr[:start_date][:time] = time_zone.parse(tr[:start_date][:time])
|
59
|
+
tr[:start_date][:zone] = time_zone.name
|
60
|
+
end
|
61
|
+
|
62
|
+
if tr[:end_time].is_a?(String)
|
63
|
+
tr[:end_time] = { :time => tr[:end_time] }
|
64
|
+
end
|
65
|
+
if tr[:end_time].is_a?(Hash)
|
66
|
+
tr[:end_time][:time] = time_zone.parse(tr[:end_time][:time])
|
67
|
+
tr[:end_time][:zone] = time_zone.name
|
68
|
+
end
|
69
|
+
|
70
|
+
# rewrite Weekly to IceCube::WeeklyRule, etc
|
71
|
+
tr[:rrules].each {|rrule|
|
72
|
+
rrule[:rule_type] = "IceCube::#{rrule[:rule_type]}Rule"
|
73
|
+
}
|
74
|
+
|
75
|
+
tr
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.time_restriction_from_ice_cube_hash(tr, time_zone)
|
79
|
+
tr[:start_date] = time_zone.utc_to_local(tr[:start_date][:time]).strftime "%Y-%m-%d %H:%M:%S"
|
80
|
+
tr[:end_time] = time_zone.utc_to_local(tr[:end_time][:time]).strftime "%Y-%m-%d %H:%M:%S"
|
81
|
+
|
82
|
+
# rewrite IceCube::WeeklyRule to Weekly, etc
|
83
|
+
tr[:rrules].each {|rrule|
|
84
|
+
rrule[:rule_type] = /^.*\:\:(.*)Rule$/.match(rrule[:rule_type])[1]
|
85
|
+
}
|
86
|
+
|
87
|
+
tr[:start_time] = tr[:start_date].dup
|
88
|
+
tr.delete(:start_date)
|
89
|
+
|
90
|
+
tr
|
91
|
+
end
|
92
|
+
|
93
|
+
def refresh
|
94
|
+
rule_data = @redis.hgetall("notification_rule:#{@id}")
|
95
|
+
|
96
|
+
@contact_id = rule_data['contact_id']
|
97
|
+
@entity_tags = Yajl::Parser.parse(rule_data['entity_tags'] || '')
|
98
|
+
@entities = Yajl::Parser.parse(rule_data['entities'] || '')
|
99
|
+
@time_restrictions = Yajl::Parser.parse(rule_data['time_restrictions'] || '')
|
100
|
+
@warning_media = Yajl::Parser.parse(rule_data['warning_media'] || '')
|
101
|
+
@critical_media = Yajl::Parser.parse(rule_data['critical_media'] || '')
|
102
|
+
@warning_blackhole = ((rule_data['warning_blackhole'] || 'false').downcase == 'true')
|
103
|
+
@critical_blackhole = ((rule_data['critical_blackhole'] || 'false').downcase == 'true')
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
def update(rule_data)
|
108
|
+
self.class.add_or_update(rule_data, :redis => @redis)
|
109
|
+
self.refresh
|
110
|
+
end
|
111
|
+
|
112
|
+
def to_json(*args)
|
113
|
+
hash = (Hash[ *([:id, :contact_id, :entity_tags, :entities,
|
114
|
+
:time_restrictions, :warning_media, :critical_media,
|
115
|
+
:warning_blackhole, :critical_blackhole].collect {|k|
|
116
|
+
[k, self.send(k)]
|
117
|
+
}).flatten(1) ])
|
118
|
+
hash.to_json
|
119
|
+
end
|
120
|
+
|
121
|
+
# tags or entity names match?
|
122
|
+
# nil @entity_tags and nil @entities matches
|
123
|
+
def match_entity?(event)
|
124
|
+
return true if (@entity_tags.nil? or @entity_tags.empty?) and
|
125
|
+
(@entities.nil? or @entities.empty?)
|
126
|
+
return true if @entities.include?(event.split(':').first)
|
127
|
+
# TODO: return true if event's entity tags match entity tag list on the rule
|
128
|
+
return false
|
129
|
+
end
|
130
|
+
|
131
|
+
def blackhole?(severity)
|
132
|
+
return true if 'warning'.eql?(severity.downcase) and @warning_blackhole
|
133
|
+
return true if 'critical'.eql?(severity.downcase) and @critical_blackhole
|
134
|
+
return false
|
135
|
+
end
|
136
|
+
|
137
|
+
def media_for_severity(severity)
|
138
|
+
case severity
|
139
|
+
when 'warning'
|
140
|
+
media_list = @warning_media
|
141
|
+
when 'critical'
|
142
|
+
media_list = @critical_media
|
143
|
+
end
|
144
|
+
media_list
|
145
|
+
end
|
146
|
+
|
147
|
+
private
|
148
|
+
|
149
|
+
def initialize(rule_data, opts = {})
|
150
|
+
@redis ||= opts[:redis]
|
151
|
+
@logger = opts[:logger]
|
152
|
+
raise "a redis connection must be supplied" unless @redis
|
153
|
+
@id = rule_data[:id]
|
154
|
+
end
|
155
|
+
|
156
|
+
def self.add_or_update(rule_data, options = {})
|
157
|
+
redis = options[:redis]
|
158
|
+
|
159
|
+
rule_data[:entities] = Yajl::Encoder.encode(rule_data[:entities])
|
160
|
+
rule_data[:entity_tags] = Yajl::Encoder.encode(rule_data[:entity_tags])
|
161
|
+
rule_data[:time_restrictions] = Yajl::Encoder.encode(rule_data[:time_restrictions])
|
162
|
+
rule_data[:warning_media] = Yajl::Encoder.encode(rule_data[:warning_media])
|
163
|
+
rule_data[:critical_media] = Yajl::Encoder.encode(rule_data[:critical_media])
|
164
|
+
|
165
|
+
redis.sadd("contact_notification_rules:#{rule_data[:contact_id]}", rule_data[:id])
|
166
|
+
redis.hmset("notification_rule:#{rule_data[:id]}", *rule_data.flatten)
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
data/lib/flapjack/data/tag.rb
CHANGED
@@ -10,9 +10,10 @@ module Flapjack
|
|
10
10
|
|
11
11
|
attr_accessor :name
|
12
12
|
|
13
|
-
def initialize(opts)
|
13
|
+
def initialize(opts = {})
|
14
|
+
raise "Redis connection not set" unless @redis = opts[:redis]
|
15
|
+
|
14
16
|
@name = opts[:name]
|
15
|
-
@redis = opts[:redis]
|
16
17
|
preset = @redis.smembers(@name)
|
17
18
|
enum = opts[:create] || []
|
18
19
|
@redis.sadd(@name, enum) unless enum.empty?
|
@@ -45,6 +46,10 @@ module Flapjack
|
|
45
46
|
super(o)
|
46
47
|
end
|
47
48
|
|
49
|
+
def to_json(*a)
|
50
|
+
self.to_a.to_json(*a)
|
51
|
+
end
|
52
|
+
|
48
53
|
end
|
49
54
|
end
|
50
55
|
end
|
data/lib/flapjack/executive.rb
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
require 'log4r'
|
4
4
|
require 'log4r/outputter/fileoutputter'
|
5
|
+
require 'tzinfo'
|
6
|
+
require 'active_support/time'
|
5
7
|
|
6
8
|
require 'flapjack/filters/acknowledgement'
|
7
9
|
require 'flapjack/filters/ok'
|
@@ -14,6 +16,7 @@ require 'flapjack/data/entity_check'
|
|
14
16
|
require 'flapjack/data/notification'
|
15
17
|
require 'flapjack/data/event'
|
16
18
|
require 'flapjack/redis_pool'
|
19
|
+
require 'flapjack/utility'
|
17
20
|
|
18
21
|
require 'flapjack/gateways/email'
|
19
22
|
require 'flapjack/gateways/sms_messagenet'
|
@@ -22,6 +25,8 @@ module Flapjack
|
|
22
25
|
|
23
26
|
class Executive
|
24
27
|
|
28
|
+
include Flapjack::Utility
|
29
|
+
|
25
30
|
def initialize(opts = {})
|
26
31
|
@config = opts[:config]
|
27
32
|
@redis_config = opts[:redis_config]
|
@@ -34,9 +39,24 @@ module Flapjack
|
|
34
39
|
:pagerduty => @config['pagerduty_queue']}
|
35
40
|
|
36
41
|
notifylog = @config['notification_log_file'] || 'log/notify.log'
|
42
|
+
if not File.directory?(File.dirname(notifylog))
|
43
|
+
puts "Parent directory for log file #{notifylog} doesn't exist"
|
44
|
+
puts "Exiting!"
|
45
|
+
exit
|
46
|
+
end
|
37
47
|
@notifylog = Log4r::Logger.new("executive")
|
38
48
|
@notifylog.add(Log4r::FileOutputter.new("notifylog", :filename => notifylog))
|
39
49
|
|
50
|
+
tz = nil
|
51
|
+
tz_string = @config['default_contact_timezone'] || ENV['TZ'] || 'UTC'
|
52
|
+
begin
|
53
|
+
tz = ActiveSupport::TimeZone.new(tz_string)
|
54
|
+
rescue ArgumentError
|
55
|
+
logger.error("Invalid timezone string specified in default_contact_timezone or TZ (#{tz_string})")
|
56
|
+
exit 1
|
57
|
+
end
|
58
|
+
@default_contact_timezone = tz
|
59
|
+
|
40
60
|
# FIXME: Put loading filters into separate method
|
41
61
|
# FIXME: should we make the filters more configurable by the end user?
|
42
62
|
options = { :log => opts[:logger], :persistence => @redis }
|
@@ -128,7 +148,7 @@ module Flapjack
|
|
128
148
|
end
|
129
149
|
|
130
150
|
@logger.info("Generating notifications for event #{event.id}, #{event.type}, #{event.state}, #{event.summary}#{time_at_str}")
|
131
|
-
|
151
|
+
generate_notification_messages(event, entity_check)
|
132
152
|
end
|
133
153
|
|
134
154
|
def update_keys(event, entity_check)
|
@@ -198,8 +218,8 @@ module Flapjack
|
|
198
218
|
end
|
199
219
|
|
200
220
|
# takes an event for which a notification needs to be generated, works out the type of
|
201
|
-
# notification, updates the notification history in redis,
|
202
|
-
def
|
221
|
+
# notification, updates the notification history in redis, generates the notifications
|
222
|
+
def generate_notification_messages(event, entity_check)
|
203
223
|
timestamp = Time.now.to_i
|
204
224
|
notification_type = 'unknown'
|
205
225
|
case event.type
|
@@ -219,7 +239,9 @@ module Flapjack
|
|
219
239
|
end
|
220
240
|
end
|
221
241
|
@redis.set("#{event.id}:last_#{notification_type}_notification", timestamp)
|
242
|
+
@redis.set("#{event.id}:last_#{event.state}_notification", timestamp) if event.failure?
|
222
243
|
@redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp)
|
244
|
+
@redis.rpush("#{event.id}:#{event.state}_notifications", timestamp) if event.failure?
|
223
245
|
@logger.debug("Notification of type #{notification_type} is being generated for #{event.id}.")
|
224
246
|
|
225
247
|
contacts = entity_check.contacts
|
@@ -231,21 +253,133 @@ module Flapjack
|
|
231
253
|
|
232
254
|
notification = Flapjack::Data::Notification.for_event(event, :type => notification_type)
|
233
255
|
|
234
|
-
notification.messages(:contacts => contacts)
|
235
|
-
|
256
|
+
enqueue_messages( apply_notification_rules( notification.messages(:contacts => contacts) ) )
|
257
|
+
|
258
|
+
end
|
259
|
+
|
260
|
+
# time restrictions match?
|
261
|
+
# nil rule.time_restrictions matches
|
262
|
+
# times (start, end) within time restrictions will have any UTC offset removed and will be
|
263
|
+
# considered to be in the timezone of the contact
|
264
|
+
def rule_occurring_now?(rule, opts)
|
265
|
+
contact = opts[:contact]
|
266
|
+
return true if rule.time_restrictions.nil? or rule.time_restrictions.empty?
|
267
|
+
|
268
|
+
time_zone = contact.timezone(:default => @default_contact_timezone)
|
269
|
+
usertime = time_zone.now
|
270
|
+
|
271
|
+
match = rule.time_restrictions.any? do |tr|
|
272
|
+
# add contact's timezone to the time restriction hash
|
273
|
+
tr = Flapjack::Data::NotificationRule.time_restriction_to_ice_cube_hash(tr, time_zone)
|
274
|
+
|
275
|
+
schedule = IceCube::Schedule.from_hash(tr)
|
276
|
+
schedule.occurring_at?(usertime)
|
277
|
+
end
|
278
|
+
!!match
|
279
|
+
end
|
280
|
+
|
281
|
+
# delete messages based on entity name(s), tags, severity, time of day
|
282
|
+
def apply_notification_rules(messages)
|
283
|
+
# first get all rules matching entity and time
|
284
|
+
@logger.debug "apply_notification_rules: got messages with size #{messages.size}"
|
285
|
+
|
286
|
+
# don't consider notification rules if the contact has none
|
287
|
+
|
288
|
+
tuple = messages.map do |message|
|
289
|
+
@logger.debug "considering message: #{message.medium} #{message.notification.event.id} #{message.notification.event.state}"
|
290
|
+
@logger.debug "contact_id: #{message.contact.id}"
|
291
|
+
rules = message.contact.notification_rules
|
292
|
+
@logger.debug "found #{rules.length} rules for this message's contact"
|
293
|
+
event_id = message.notification.event.id
|
294
|
+
options = {}
|
295
|
+
options[:no_rules_for_contact] = true if rules.empty?
|
296
|
+
# filter based on entity, tags, severity, time of day
|
297
|
+
matchers = rules.find_all do |rule|
|
298
|
+
rule.match_entity?(event_id) && rule_occurring_now?(rule, :contact => message.contact)
|
299
|
+
end
|
300
|
+
[message, matchers, options]
|
301
|
+
end
|
302
|
+
|
303
|
+
# matchers are rules of the contact that have matched the current event
|
304
|
+
# for time and entity
|
305
|
+
|
306
|
+
@logger.debug "apply_notification_rules: num messages after entity and time matching: #{tuple.size}"
|
307
|
+
|
308
|
+
# delete the matcher for all entities if there are more specific matchers
|
309
|
+
tuple = tuple.map do |message, matchers, options|
|
310
|
+
if matchers.length > 1
|
311
|
+
have_specific = matchers.detect do |matcher|
|
312
|
+
matcher.entities or matcher.entity_tags
|
313
|
+
end
|
314
|
+
if have_specific
|
315
|
+
# delete the rule for all entities
|
316
|
+
matchers.map! do |matcher|
|
317
|
+
matcher.entities.nil? and matcher.entity_tags.nil? ? nil : matcher
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
[message, matchers, options]
|
322
|
+
end
|
323
|
+
|
324
|
+
# delete media based on blackholes
|
325
|
+
tuple = tuple.find_all do |message, matchers, options|
|
326
|
+
severity = message.notification.event.state
|
327
|
+
# or use message.notification.contents['state']
|
328
|
+
matchers.none? {|matcher| matcher.blackhole?(severity) }
|
329
|
+
end
|
330
|
+
|
331
|
+
@logger.debug "apply_notification_rules: num messages after removing blackhole matches: #{tuple.size}"
|
332
|
+
|
333
|
+
# delete any media that doesn't meet severity<->media constraints
|
334
|
+
tuple = tuple.find_all do |message, matchers, options|
|
335
|
+
severity = message.notification.event.state
|
336
|
+
options[:no_rules_for_contact] ||
|
337
|
+
matchers.any? {|matcher|
|
338
|
+
matcher.media_for_severity(severity).include?(message.medium) ||
|
339
|
+
(@logger.warn("got nil for matcher.media_for_severity(#{severity}), matcher: #{matcher.inspect}") && false)
|
340
|
+
}
|
341
|
+
end
|
342
|
+
|
343
|
+
@logger.debug "apply_notification_rules: num messages after severity-media constraints: #{tuple.size}"
|
344
|
+
|
345
|
+
# delete media based on notification interval
|
346
|
+
tuple = tuple.find_all do |message, matchers, options|
|
347
|
+
not message.contact.drop_notifications?(:media => message.medium,
|
348
|
+
:check => message.notification.event.id,
|
349
|
+
:state => message.notification.event.state)
|
350
|
+
end
|
351
|
+
|
352
|
+
@logger.debug "apply_notification_rules: num messages after pruning for notification intervals: #{tuple.size}"
|
236
353
|
|
237
|
-
|
238
|
-
|
354
|
+
tuple.map do |message, matchers, options|
|
355
|
+
message
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
def enqueue_messages(messages)
|
360
|
+
|
361
|
+
messages.each do |message|
|
362
|
+
media_type = message.medium
|
363
|
+
contents = message.contents
|
364
|
+
event_id = message.notification.event.id
|
365
|
+
|
366
|
+
@notifylog.info("#{Time.now.to_s} | #{event_id} | " +
|
367
|
+
"#{message.notification.type} | #{message.contact.id} | #{media_type} | #{message.address}")
|
239
368
|
|
240
|
-
unless @queues[media_type]
|
369
|
+
unless @queues[media_type.to_sym]
|
241
370
|
@logger.error("no queue for media type: #{media_type}")
|
242
|
-
|
371
|
+
return
|
243
372
|
end
|
244
373
|
|
245
|
-
|
374
|
+
@logger.info("Enqueueing #{media_type} alert for #{event_id} to #{message.address}")
|
375
|
+
|
376
|
+
message.contact.update_sent_alert_keys(:media => message.medium,
|
377
|
+
:check => message.notification.event.id,
|
378
|
+
:state => message.notification.event.state)
|
379
|
+
# drop_alerts_for_contact:#{self.id}:#{media}:#{check}:#{state}
|
246
380
|
|
247
381
|
# TODO consider changing Resque jobs to use raw blpop like the others
|
248
|
-
case media_type
|
382
|
+
case media_type.to_sym
|
249
383
|
when :sms
|
250
384
|
Resque.enqueue_to(@queues[:sms], Flapjack::Gateways::SmsMessagenet, contents)
|
251
385
|
when :email
|
@@ -257,9 +391,9 @@ module Flapjack
|
|
257
391
|
when :pagerduty
|
258
392
|
@redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(contents))
|
259
393
|
end
|
260
|
-
|
261
394
|
end
|
262
|
-
|
395
|
+
|
396
|
+
end
|
263
397
|
|
264
398
|
end
|
265
399
|
end
|