flapjack 0.6.61 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/Gemfile +2 -1
  2. data/README.md +8 -4
  3. data/features/events.feature +269 -146
  4. data/features/notification_rules.feature +93 -0
  5. data/features/steps/events_steps.rb +162 -21
  6. data/features/steps/notifications_steps.rb +1 -1
  7. data/features/steps/time_travel_steps.rb +30 -19
  8. data/features/support/env.rb +71 -1
  9. data/flapjack.gemspec +3 -0
  10. data/lib/flapjack/data/contact.rb +256 -57
  11. data/lib/flapjack/data/entity.rb +2 -1
  12. data/lib/flapjack/data/entity_check.rb +22 -7
  13. data/lib/flapjack/data/global.rb +1 -0
  14. data/lib/flapjack/data/message.rb +2 -0
  15. data/lib/flapjack/data/notification_rule.rb +172 -0
  16. data/lib/flapjack/data/tag.rb +7 -2
  17. data/lib/flapjack/data/tag_set.rb +16 -0
  18. data/lib/flapjack/executive.rb +147 -13
  19. data/lib/flapjack/filters/delays.rb +21 -9
  20. data/lib/flapjack/gateways/api.rb +407 -27
  21. data/lib/flapjack/gateways/pagerduty.rb +1 -1
  22. data/lib/flapjack/gateways/web.rb +50 -22
  23. data/lib/flapjack/gateways/web/views/self_stats.haml +2 -0
  24. data/lib/flapjack/utility.rb +10 -0
  25. data/lib/flapjack/version.rb +1 -1
  26. data/spec/lib/flapjack/data/contact_spec.rb +103 -6
  27. data/spec/lib/flapjack/data/global_spec.rb +2 -0
  28. data/spec/lib/flapjack/data/message_spec.rb +6 -0
  29. data/spec/lib/flapjack/data/notification_rule_spec.rb +22 -0
  30. data/spec/lib/flapjack/data/notification_spec.rb +6 -0
  31. data/spec/lib/flapjack/gateways/api_spec.rb +727 -4
  32. data/spec/lib/flapjack/gateways/jabber_spec.rb +1 -0
  33. data/spec/lib/flapjack/gateways/web_spec.rb +11 -1
  34. data/spec/spec_helper.rb +10 -0
  35. data/tmp/notification_rules.rb +73 -0
  36. data/tmp/test_json_post.rb +16 -0
  37. data/tmp/test_notification_rules_api.rb +170 -0
  38. metadata +59 -2
@@ -8,6 +8,7 @@ module Flapjack
8
8
 
9
9
  class Global
10
10
 
11
+ # TODO maybe this should be an EntityCheck class method?
11
12
  def self.unacknowledged_failing_checks(options = {})
12
13
  raise "Redis connection not set" unless redis = options[:redis]
13
14
 
@@ -17,6 +17,8 @@ module Flapjack
17
17
  def id
18
18
  return @id if @id
19
19
  t = Time.now
20
+ # FIXME: consider just using a UUID here
21
+ # this is planned to be used as part of alert history keys
20
22
  @id = self.object_id.to_i.to_s + '-' + t.to_i.to_s + '.' + t.tv_usec.to_s
21
23
  end
22
24
 
@@ -0,0 +1,172 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'yajl/json_gem'
4
+ require 'active_support/time'
5
+ require 'ice_cube'
6
+ require 'flapjack/utility'
7
+
8
+ module Flapjack
9
+ module Data
10
+ class NotificationRule
11
+
12
+ extend Flapjack::Utility
13
+
14
+ attr_accessor :id, :contact_id, :entities, :entity_tags, :time_restrictions,
15
+ :warning_media, :critical_media, :warning_blackhole, :critical_blackhole
16
+
17
+ def self.exists_with_id?(rule_id, options = {})
18
+ raise "Redis connection not set" unless redis = options[:redis]
19
+ raise "No id value passed" unless not (rule_id.nil? || rule_id == '')
20
+ logger = options[:logger]
21
+ redis.exists("notification_rule:#{rule_id}")
22
+ end
23
+
24
+ def self.find_by_id(rule_id, options = {})
25
+ raise "Redis connection not set" unless redis = options[:redis]
26
+ raise "No id value passed" unless not (rule_id.nil? || rule_id == '')
27
+ logger = options[:logger]
28
+
29
+ # sanity check
30
+ return unless redis.exists("notification_rule:#{rule_id}")
31
+
32
+ rule = self.new({:id => rule_id}, {:redis => redis})
33
+ rule.refresh
34
+ rule
35
+ end
36
+
37
+ # replacing save! etc
38
+ def self.add(rule_data, options)
39
+ raise "Redis connection not set" unless redis = options[:redis]
40
+
41
+ rule_id = SecureRandom.uuid
42
+ self.add_or_update(rule_data.merge(:id => rule_id), :redis => redis)
43
+ self.find_by_id(rule_id, :redis => redis)
44
+ end
45
+
46
+ # add user's timezone string to the hash, deserialise
47
+ # time in the user's timezone also
48
+ def self.time_restriction_to_ice_cube_hash(tr, time_zone)
49
+ tr = symbolize(tr)
50
+
51
+ tr[:start_date] = tr[:start_time].dup
52
+ tr.delete(:start_time)
53
+
54
+ if tr[:start_date].is_a?(String)
55
+ tr[:start_date] = { :time => tr[:start_date] }
56
+ end
57
+ if tr[:start_date].is_a?(Hash)
58
+ tr[:start_date][:time] = time_zone.parse(tr[:start_date][:time])
59
+ tr[:start_date][:zone] = time_zone.name
60
+ end
61
+
62
+ if tr[:end_time].is_a?(String)
63
+ tr[:end_time] = { :time => tr[:end_time] }
64
+ end
65
+ if tr[:end_time].is_a?(Hash)
66
+ tr[:end_time][:time] = time_zone.parse(tr[:end_time][:time])
67
+ tr[:end_time][:zone] = time_zone.name
68
+ end
69
+
70
+ # rewrite Weekly to IceCube::WeeklyRule, etc
71
+ tr[:rrules].each {|rrule|
72
+ rrule[:rule_type] = "IceCube::#{rrule[:rule_type]}Rule"
73
+ }
74
+
75
+ tr
76
+ end
77
+
78
+ def self.time_restriction_from_ice_cube_hash(tr, time_zone)
79
+ tr[:start_date] = time_zone.utc_to_local(tr[:start_date][:time]).strftime "%Y-%m-%d %H:%M:%S"
80
+ tr[:end_time] = time_zone.utc_to_local(tr[:end_time][:time]).strftime "%Y-%m-%d %H:%M:%S"
81
+
82
+ # rewrite IceCube::WeeklyRule to Weekly, etc
83
+ tr[:rrules].each {|rrule|
84
+ rrule[:rule_type] = /^.*\:\:(.*)Rule$/.match(rrule[:rule_type])[1]
85
+ }
86
+
87
+ tr[:start_time] = tr[:start_date].dup
88
+ tr.delete(:start_date)
89
+
90
+ tr
91
+ end
92
+
93
+ def refresh
94
+ rule_data = @redis.hgetall("notification_rule:#{@id}")
95
+
96
+ @contact_id = rule_data['contact_id']
97
+ @entity_tags = Yajl::Parser.parse(rule_data['entity_tags'] || '')
98
+ @entities = Yajl::Parser.parse(rule_data['entities'] || '')
99
+ @time_restrictions = Yajl::Parser.parse(rule_data['time_restrictions'] || '')
100
+ @warning_media = Yajl::Parser.parse(rule_data['warning_media'] || '')
101
+ @critical_media = Yajl::Parser.parse(rule_data['critical_media'] || '')
102
+ @warning_blackhole = ((rule_data['warning_blackhole'] || 'false').downcase == 'true')
103
+ @critical_blackhole = ((rule_data['critical_blackhole'] || 'false').downcase == 'true')
104
+
105
+ end
106
+
107
+ def update(rule_data)
108
+ self.class.add_or_update(rule_data, :redis => @redis)
109
+ self.refresh
110
+ end
111
+
112
+ def to_json(*args)
113
+ hash = (Hash[ *([:id, :contact_id, :entity_tags, :entities,
114
+ :time_restrictions, :warning_media, :critical_media,
115
+ :warning_blackhole, :critical_blackhole].collect {|k|
116
+ [k, self.send(k)]
117
+ }).flatten(1) ])
118
+ hash.to_json
119
+ end
120
+
121
+ # tags or entity names match?
122
+ # nil @entity_tags and nil @entities matches
123
+ def match_entity?(event)
124
+ return true if (@entity_tags.nil? or @entity_tags.empty?) and
125
+ (@entities.nil? or @entities.empty?)
126
+ return true if @entities.include?(event.split(':').first)
127
+ # TODO: return true if event's entity tags match entity tag list on the rule
128
+ return false
129
+ end
130
+
131
+ def blackhole?(severity)
132
+ return true if 'warning'.eql?(severity.downcase) and @warning_blackhole
133
+ return true if 'critical'.eql?(severity.downcase) and @critical_blackhole
134
+ return false
135
+ end
136
+
137
+ def media_for_severity(severity)
138
+ case severity
139
+ when 'warning'
140
+ media_list = @warning_media
141
+ when 'critical'
142
+ media_list = @critical_media
143
+ end
144
+ media_list
145
+ end
146
+
147
+ private
148
+
149
+ def initialize(rule_data, opts = {})
150
+ @redis ||= opts[:redis]
151
+ @logger = opts[:logger]
152
+ raise "a redis connection must be supplied" unless @redis
153
+ @id = rule_data[:id]
154
+ end
155
+
156
+ def self.add_or_update(rule_data, options = {})
157
+ redis = options[:redis]
158
+
159
+ rule_data[:entities] = Yajl::Encoder.encode(rule_data[:entities])
160
+ rule_data[:entity_tags] = Yajl::Encoder.encode(rule_data[:entity_tags])
161
+ rule_data[:time_restrictions] = Yajl::Encoder.encode(rule_data[:time_restrictions])
162
+ rule_data[:warning_media] = Yajl::Encoder.encode(rule_data[:warning_media])
163
+ rule_data[:critical_media] = Yajl::Encoder.encode(rule_data[:critical_media])
164
+
165
+ redis.sadd("contact_notification_rules:#{rule_data[:contact_id]}", rule_data[:id])
166
+ redis.hmset("notification_rule:#{rule_data[:id]}", *rule_data.flatten)
167
+ end
168
+
169
+ end
170
+ end
171
+ end
172
+
@@ -10,9 +10,10 @@ module Flapjack
10
10
 
11
11
  attr_accessor :name
12
12
 
13
- def initialize(opts)
13
+ def initialize(opts = {})
14
+ raise "Redis connection not set" unless @redis = opts[:redis]
15
+
14
16
  @name = opts[:name]
15
- @redis = opts[:redis]
16
17
  preset = @redis.smembers(@name)
17
18
  enum = opts[:create] || []
18
19
  @redis.sadd(@name, enum) unless enum.empty?
@@ -45,6 +46,10 @@ module Flapjack
45
46
  super(o)
46
47
  end
47
48
 
49
+ def to_json(*a)
50
+ self.to_a.to_json(*a)
51
+ end
52
+
48
53
  end
49
54
  end
50
55
  end
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'set'
4
+
5
+ module Flapjack
6
+ module Data
7
+ class TagSet < ::Set
8
+
9
+ def to_json(*a)
10
+ self.to_a.to_json(*a)
11
+ end
12
+
13
+ end
14
+ end
15
+ end
16
+
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'log4r'
4
4
  require 'log4r/outputter/fileoutputter'
5
+ require 'tzinfo'
6
+ require 'active_support/time'
5
7
 
6
8
  require 'flapjack/filters/acknowledgement'
7
9
  require 'flapjack/filters/ok'
@@ -14,6 +16,7 @@ require 'flapjack/data/entity_check'
14
16
  require 'flapjack/data/notification'
15
17
  require 'flapjack/data/event'
16
18
  require 'flapjack/redis_pool'
19
+ require 'flapjack/utility'
17
20
 
18
21
  require 'flapjack/gateways/email'
19
22
  require 'flapjack/gateways/sms_messagenet'
@@ -22,6 +25,8 @@ module Flapjack
22
25
 
23
26
  class Executive
24
27
 
28
+ include Flapjack::Utility
29
+
25
30
  def initialize(opts = {})
26
31
  @config = opts[:config]
27
32
  @redis_config = opts[:redis_config]
@@ -34,9 +39,24 @@ module Flapjack
34
39
  :pagerduty => @config['pagerduty_queue']}
35
40
 
36
41
  notifylog = @config['notification_log_file'] || 'log/notify.log'
42
+ if not File.directory?(File.dirname(notifylog))
43
+ puts "Parent directory for log file #{notifylog} doesn't exist"
44
+ puts "Exiting!"
45
+ exit
46
+ end
37
47
  @notifylog = Log4r::Logger.new("executive")
38
48
  @notifylog.add(Log4r::FileOutputter.new("notifylog", :filename => notifylog))
39
49
 
50
+ tz = nil
51
+ tz_string = @config['default_contact_timezone'] || ENV['TZ'] || 'UTC'
52
+ begin
53
+ tz = ActiveSupport::TimeZone.new(tz_string)
54
+ rescue ArgumentError
55
+ logger.error("Invalid timezone string specified in default_contact_timezone or TZ (#{tz_string})")
56
+ exit 1
57
+ end
58
+ @default_contact_timezone = tz
59
+
40
60
  # FIXME: Put loading filters into separate method
41
61
  # FIXME: should we make the filters more configurable by the end user?
42
62
  options = { :log => opts[:logger], :persistence => @redis }
@@ -128,7 +148,7 @@ module Flapjack
128
148
  end
129
149
 
130
150
  @logger.info("Generating notifications for event #{event.id}, #{event.type}, #{event.state}, #{event.summary}#{time_at_str}")
131
- send_notification_messages(event, entity_check)
151
+ generate_notification_messages(event, entity_check)
132
152
  end
133
153
 
134
154
  def update_keys(event, entity_check)
@@ -198,8 +218,8 @@ module Flapjack
198
218
  end
199
219
 
200
220
  # takes an event for which a notification needs to be generated, works out the type of
201
- # notification, updates the notification history in redis, sends the notifications
202
- def send_notification_messages(event, entity_check)
221
+ # notification, updates the notification history in redis, generates the notifications
222
+ def generate_notification_messages(event, entity_check)
203
223
  timestamp = Time.now.to_i
204
224
  notification_type = 'unknown'
205
225
  case event.type
@@ -219,7 +239,9 @@ module Flapjack
219
239
  end
220
240
  end
221
241
  @redis.set("#{event.id}:last_#{notification_type}_notification", timestamp)
242
+ @redis.set("#{event.id}:last_#{event.state}_notification", timestamp) if event.failure?
222
243
  @redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp)
244
+ @redis.rpush("#{event.id}:#{event.state}_notifications", timestamp) if event.failure?
223
245
  @logger.debug("Notification of type #{notification_type} is being generated for #{event.id}.")
224
246
 
225
247
  contacts = entity_check.contacts
@@ -231,21 +253,133 @@ module Flapjack
231
253
 
232
254
  notification = Flapjack::Data::Notification.for_event(event, :type => notification_type)
233
255
 
234
- notification.messages(:contacts => contacts).each do |msg|
235
- media_type = msg.medium.to_sym
256
+ enqueue_messages( apply_notification_rules( notification.messages(:contacts => contacts) ) )
257
+
258
+ end
259
+
260
+ # time restrictions match?
261
+ # nil rule.time_restrictions matches
262
+ # times (start, end) within time restrictions will have any UTC offset removed and will be
263
+ # considered to be in the timezone of the contact
264
+ def rule_occurring_now?(rule, opts)
265
+ contact = opts[:contact]
266
+ return true if rule.time_restrictions.nil? or rule.time_restrictions.empty?
267
+
268
+ time_zone = contact.timezone(:default => @default_contact_timezone)
269
+ usertime = time_zone.now
270
+
271
+ match = rule.time_restrictions.any? do |tr|
272
+ # add contact's timezone to the time restriction hash
273
+ tr = Flapjack::Data::NotificationRule.time_restriction_to_ice_cube_hash(tr, time_zone)
274
+
275
+ schedule = IceCube::Schedule.from_hash(tr)
276
+ schedule.occurring_at?(usertime)
277
+ end
278
+ !!match
279
+ end
280
+
281
+ # delete messages based on entity name(s), tags, severity, time of day
282
+ def apply_notification_rules(messages)
283
+ # first get all rules matching entity and time
284
+ @logger.debug "apply_notification_rules: got messages with size #{messages.size}"
285
+
286
+ # don't consider notification rules if the contact has none
287
+
288
+ tuple = messages.map do |message|
289
+ @logger.debug "considering message: #{message.medium} #{message.notification.event.id} #{message.notification.event.state}"
290
+ @logger.debug "contact_id: #{message.contact.id}"
291
+ rules = message.contact.notification_rules
292
+ @logger.debug "found #{rules.length} rules for this message's contact"
293
+ event_id = message.notification.event.id
294
+ options = {}
295
+ options[:no_rules_for_contact] = true if rules.empty?
296
+ # filter based on entity, tags, severity, time of day
297
+ matchers = rules.find_all do |rule|
298
+ rule.match_entity?(event_id) && rule_occurring_now?(rule, :contact => message.contact)
299
+ end
300
+ [message, matchers, options]
301
+ end
302
+
303
+ # matchers are rules of the contact that have matched the current event
304
+ # for time and entity
305
+
306
+ @logger.debug "apply_notification_rules: num messages after entity and time matching: #{tuple.size}"
307
+
308
+ # delete the matcher for all entities if there are more specific matchers
309
+ tuple = tuple.map do |message, matchers, options|
310
+ if matchers.length > 1
311
+ have_specific = matchers.detect do |matcher|
312
+ matcher.entities or matcher.entity_tags
313
+ end
314
+ if have_specific
315
+ # delete the rule for all entities
316
+ matchers.map! do |matcher|
317
+ matcher.entities.nil? and matcher.entity_tags.nil? ? nil : matcher
318
+ end
319
+ end
320
+ end
321
+ [message, matchers, options]
322
+ end
323
+
324
+ # delete media based on blackholes
325
+ tuple = tuple.find_all do |message, matchers, options|
326
+ severity = message.notification.event.state
327
+ # or use message.notification.contents['state']
328
+ matchers.none? {|matcher| matcher.blackhole?(severity) }
329
+ end
330
+
331
+ @logger.debug "apply_notification_rules: num messages after removing blackhole matches: #{tuple.size}"
332
+
333
+ # delete any media that doesn't meet severity<->media constraints
334
+ tuple = tuple.find_all do |message, matchers, options|
335
+ severity = message.notification.event.state
336
+ options[:no_rules_for_contact] ||
337
+ matchers.any? {|matcher|
338
+ matcher.media_for_severity(severity).include?(message.medium) ||
339
+ (@logger.warn("got nil for matcher.media_for_severity(#{severity}), matcher: #{matcher.inspect}") && false)
340
+ }
341
+ end
342
+
343
+ @logger.debug "apply_notification_rules: num messages after severity-media constraints: #{tuple.size}"
344
+
345
+ # delete media based on notification interval
346
+ tuple = tuple.find_all do |message, matchers, options|
347
+ not message.contact.drop_notifications?(:media => message.medium,
348
+ :check => message.notification.event.id,
349
+ :state => message.notification.event.state)
350
+ end
351
+
352
+ @logger.debug "apply_notification_rules: num messages after pruning for notification intervals: #{tuple.size}"
236
353
 
237
- @notifylog.info("#{Time.now.to_s} | #{event.id} | " +
238
- "#{notification_type} | #{msg.contact.id} | #{media_type.to_s} | #{msg.address}")
354
+ tuple.map do |message, matchers, options|
355
+ message
356
+ end
357
+ end
358
+
359
+ def enqueue_messages(messages)
360
+
361
+ messages.each do |message|
362
+ media_type = message.medium
363
+ contents = message.contents
364
+ event_id = message.notification.event.id
365
+
366
+ @notifylog.info("#{Time.now.to_s} | #{event_id} | " +
367
+ "#{message.notification.type} | #{message.contact.id} | #{media_type} | #{message.address}")
239
368
 
240
- unless @queues[media_type]
369
+ unless @queues[media_type.to_sym]
241
370
  @logger.error("no queue for media type: #{media_type}")
242
- next
371
+ return
243
372
  end
244
373
 
245
- contents = msg.contents
374
+ @logger.info("Enqueueing #{media_type} alert for #{event_id} to #{message.address}")
375
+
376
+ message.contact.update_sent_alert_keys(:media => message.medium,
377
+ :check => message.notification.event.id,
378
+ :state => message.notification.event.state)
379
+ # drop_alerts_for_contact:#{self.id}:#{media}:#{check}:#{state}
246
380
 
247
381
  # TODO consider changing Resque jobs to use raw blpop like the others
248
- case media_type
382
+ case media_type.to_sym
249
383
  when :sms
250
384
  Resque.enqueue_to(@queues[:sms], Flapjack::Gateways::SmsMessagenet, contents)
251
385
  when :email
@@ -257,9 +391,9 @@ module Flapjack
257
391
  when :pagerduty
258
392
  @redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(contents))
259
393
  end
260
-
261
394
  end
262
- end
395
+
396
+ end
263
397
 
264
398
  end
265
399
  end