flapjack 0.7.20 → 0.7.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/.gitignore +1 -0
  2. data/.travis.yml +3 -1
  3. data/CHANGELOG.md +10 -0
  4. data/Gemfile +1 -0
  5. data/bin/flapjack +11 -0
  6. data/bin/simulate-failed-check +5 -5
  7. data/features/notification_rules.feature +77 -19
  8. data/features/steps/events_steps.rb +15 -3
  9. data/lib/flapjack/coordinator.rb +3 -3
  10. data/lib/flapjack/data/contact.rb +1 -1
  11. data/lib/flapjack/data/entity.rb +12 -1
  12. data/lib/flapjack/data/entity_check.rb +9 -2
  13. data/lib/flapjack/data/event.rb +4 -4
  14. data/lib/flapjack/data/notification.rb +27 -20
  15. data/lib/flapjack/data/notification_rule.rb +26 -24
  16. data/lib/flapjack/data/tag.rb +5 -0
  17. data/lib/flapjack/gateways/api.rb +1 -1
  18. data/lib/flapjack/gateways/api/contact_methods.rb +3 -3
  19. data/lib/flapjack/gateways/email.rb +73 -46
  20. data/lib/flapjack/gateways/email/alert.html.erb +13 -4
  21. data/lib/flapjack/gateways/email/alert.text.erb +2 -2
  22. data/lib/flapjack/gateways/jabber.rb +22 -16
  23. data/lib/flapjack/gateways/pagerduty.rb +7 -3
  24. data/lib/flapjack/gateways/web.rb +1 -1
  25. data/lib/flapjack/gateways/web/views/check.html.erb +2 -2
  26. data/lib/flapjack/gateways/web/views/contact.html.erb +3 -3
  27. data/lib/flapjack/logger.rb +67 -35
  28. data/lib/flapjack/notifier.rb +9 -3
  29. data/lib/flapjack/pikelet.rb +3 -1
  30. data/lib/flapjack/processor.rb +34 -10
  31. data/lib/flapjack/version.rb +1 -1
  32. data/spec/lib/flapjack/coordinator_spec.rb +17 -13
  33. data/spec/lib/flapjack/data/contact_spec.rb +4 -3
  34. data/spec/lib/flapjack/data/entity_check_spec.rb +10 -0
  35. data/spec/lib/flapjack/data/entity_spec.rb +60 -5
  36. data/spec/lib/flapjack/data/event_spec.rb +4 -4
  37. data/spec/lib/flapjack/data/notification_rule_spec.rb +9 -2
  38. data/spec/lib/flapjack/data/tag_spec.rb +0 -1
  39. data/spec/lib/flapjack/gateways/api/contact_methods_spec.rb +1 -1
  40. data/spec/lib/flapjack/gateways/email_spec.rb +2 -1
  41. data/spec/lib/flapjack/gateways/jabber_spec.rb +5 -3
  42. data/spec/lib/flapjack/gateways/pagerduty_spec.rb +3 -1
  43. data/spec/lib/flapjack/logger_spec.rb +5 -5
  44. data/spec/lib/flapjack/pikelet_spec.rb +4 -2
  45. data/spec/lib/flapjack/processor_spec.rb +16 -7
  46. data/tasks/benchmarks.rake +228 -0
  47. data/tasks/events.rake +11 -10
  48. data/tasks/support/flapjack_config_benchmark.yaml +58 -0
  49. metadata +6 -4
@@ -4,6 +4,7 @@ require 'oj'
4
4
  require 'active_support/time'
5
5
  require 'ice_cube'
6
6
  require 'flapjack/utility'
7
+ require 'flapjack/data/tag_set'
7
8
 
8
9
  module Flapjack
9
10
  module Data
@@ -11,7 +12,7 @@ module Flapjack
11
12
 
12
13
  extend Flapjack::Utility
13
14
 
14
- attr_accessor :id, :contact_id, :entities, :entity_tags, :time_restrictions,
15
+ attr_accessor :id, :contact_id, :entities, :tags, :time_restrictions,
15
16
  :warning_media, :critical_media, :warning_blackhole, :critical_blackhole
16
17
 
17
18
  def self.exists_with_id?(rule_id, options = {})
@@ -68,19 +69,23 @@ module Flapjack
68
69
  end
69
70
 
70
71
  def to_json(*args)
71
- self.class.hashify(:id, :contact_id, :entity_tags, :entities,
72
+ self.class.hashify(:id, :contact_id, :tags, :entities,
72
73
  :time_restrictions, :warning_media, :critical_media,
73
74
  :warning_blackhole, :critical_blackhole) {|k|
74
75
  [k, self.send(k)]
75
76
  }.to_json
76
77
  end
77
78
 
78
- # tags or entity names match?
79
- # nil @entity_tags and nil @entities matches
80
- def match_entity?(event)
81
- # TODO: return true if event's entity tags match entity tag list on the rule
82
- ((@entity_tags.nil? || @entity_tags.empty?) && (@entities.nil? || @entities.empty?)) ||
83
- (@entities.include?(event.split(':').first))
79
+ # entity names match?
80
+ def match_entity?(event_id)
81
+ return false unless @entities
82
+ @entities.include?(event_id.split(':').first)
83
+ end
84
+
85
+ # tags match?
86
+ def match_tags?(event_tags)
87
+ return false unless @tags && @tags.length > 0
88
+ @tags.subset?(event_tags)
84
89
  end
85
90
 
86
91
  def blackhole?(severity)
@@ -99,7 +104,7 @@ module Flapjack
99
104
 
100
105
  def is_specific?
101
106
  (!@entities.nil? && !@entities.empty?) ||
102
- (!@entity_tags.nil? && !@entity_tags.empty?)
107
+ (!@tags.nil? && !@tags.empty?)
103
108
  end
104
109
 
105
110
  private
@@ -120,17 +125,21 @@ module Flapjack
120
125
  # make some assumptions about the incoming data
121
126
  rule_data[:warning_blackhole] = rule_data[:warning_blackhole] || false
122
127
  rule_data[:critical_blackhole] = rule_data[:critical_blackhole] || false
128
+ if rule_data[:tags].is_a?(Array)
129
+ rule_data[:tags] = Flapjack::Data::TagSet.new(rule_data[:tags])
130
+ end
123
131
 
124
132
  errors = self.validate_data(rule_data, options)
125
133
 
126
134
  return errors unless errors.nil? || errors.empty?
127
135
 
128
136
  # whitelisting fields, rather than passing through submitted data directly
137
+ tag_data = rule_data[:tags].is_a?(Set) ? rule_data[:tags].to_a : nil
129
138
  json_rule_data = {
130
139
  :id => rule_data[:id].to_s,
131
140
  :contact_id => rule_data[:contact_id].to_s,
132
141
  :entities => Oj.dump(rule_data[:entities]),
133
- :entity_tags => Oj.dump(rule_data[:entity_tags]),
142
+ :tags => Oj.dump(tag_data),
134
143
  :time_restrictions => Oj.dump(rule_data[:time_restrictions]),
135
144
  :warning_media => Oj.dump(rule_data[:warning_media]),
136
145
  :critical_media => Oj.dump(rule_data[:critical_media]),
@@ -221,19 +230,11 @@ module Flapjack
221
230
  d[:entities].all? {|e| e.is_a?(String)} ) } =>
222
231
  "entities must be a list of strings",
223
232
 
224
- proc { !d.has_key?(:entity_tags) ||
225
- ( d[:entity_tags].nil? ||
226
- d[:entity_tags].is_a?(Array) &&
227
- d[:entity_tags].all? {|et| et.is_a?(String)} ) } =>
228
- "entity_tags must be a list of strings",
229
-
230
- #proc { (d.has_key?(:entities) &&
231
- # d[:entities].is_a?(Array) &&
232
- # (d[:entities].size > 0)) ||
233
- # (d.has_key?(:entity_tags) &&
234
- # d[:entity_tags].is_a?(Array) &&
235
- # (d[:entity_tags].size > 0)) } =>
236
- #"entities or entity tags must have at least one value",
233
+ proc { !d.has_key?(:tags) ||
234
+ ( d[:tags].nil? ||
235
+ d[:tags].is_a?(Flapjack::Data::TagSet) &&
236
+ d[:tags].all? {|et| et.is_a?(String)} ) } =>
237
+ "tags must be a tag_set of strings",
237
238
 
238
239
  proc { !d.has_key?(:time_restrictions) ||
239
240
  ( d[:time_restrictions].nil? ||
@@ -284,7 +285,8 @@ module Flapjack
284
285
  rule_data = @redis.hgetall("notification_rule:#{@id}")
285
286
 
286
287
  @contact_id = rule_data['contact_id']
287
- @entity_tags = Oj.load(rule_data['entity_tags'] || '')
288
+ tags = Oj.load(rule_data['tags'] || '')
289
+ @tags = tags ? Flapjack::Data::TagSet.new(tags) : nil
288
290
  @entities = Oj.load(rule_data['entities'] || '')
289
291
  @time_restrictions = Oj.load(rule_data['time_restrictions'] || '')
290
292
  @warning_media = Oj.load(rule_data['warning_media'] || '')
@@ -25,6 +25,11 @@ module Flapjack
25
25
  :redis => opts[:redis])
26
26
  end
27
27
 
28
+ def self.find_intersection(tags, opts)
29
+ @redis = opts[:redis]
30
+ @redis.sinter(tags)
31
+ end
32
+
28
33
  def self.create(name, enum = [], opts)
29
34
  self.new(:name => name,
30
35
  :create => enum,
@@ -45,7 +45,7 @@ module Flapjack
45
45
 
46
46
  class << self
47
47
  def start
48
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
48
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2)
49
49
 
50
50
  @logger.info "starting api - class"
51
51
 
@@ -142,7 +142,7 @@ module Flapjack
142
142
 
143
143
  contact = find_contact(params[:contact_id])
144
144
 
145
- rule_data = hashify(:entities, :entity_tags,
145
+ rule_data = hashify(:entities, :tags,
146
146
  :warning_media, :critical_media, :time_restrictions,
147
147
  :warning_blackhole, :critical_blackhole) {|k| [k, params[k]]}
148
148
 
@@ -165,7 +165,7 @@ module Flapjack
165
165
  rule = find_rule(params[:id])
166
166
  contact = find_contact(rule.contact_id)
167
167
 
168
- rule_data = hashify(:entities, :entity_tags,
168
+ rule_data = hashify(:entities, :tags,
169
169
  :warning_media, :critical_media, :time_restrictions,
170
170
  :warning_blackhole, :critical_blackhole) {|k| [k, params[k]]}
171
171
 
@@ -339,4 +339,4 @@ module Flapjack
339
339
 
340
340
  end
341
341
 
342
- end
342
+ end
@@ -3,6 +3,7 @@
3
3
  require 'mail'
4
4
  require 'erb'
5
5
  require 'socket'
6
+ require 'chronic_duration'
6
7
 
7
8
  require 'em-synchrony'
8
9
  require 'em/protocols/smtpclient'
@@ -28,61 +29,86 @@ module Flapjack
28
29
  end
29
30
 
30
31
  def perform(notification)
31
- @logger.debug "Woo, got a notification to send out: #{notification.inspect}"
32
-
33
- @notification_type = notification['notification_type']
34
- @contact_first_name = notification['contact_first_name']
35
- @contact_last_name = notification['contact_last_name']
36
- @state = notification['state']
37
- @summary = notification['summary']
38
- @last_state = notification['last_state']
39
- @last_summary = notification['last_summary']
40
- @details = notification['details']
41
- @time = notification['time']
42
- @relative = relative_time_ago(Time.at(@time))
43
- @entity_name, @check = notification['event_id'].split(':', 2)
44
-
45
- entity_check = Flapjack::Data::EntityCheck.for_event_id(notification['event_id'],
46
- :redis => ::Resque.redis)
47
-
48
- @in_unscheduled_maintenance = entity_check.in_scheduled_maintenance?
49
- @in_scheduled_maintenance = entity_check.in_unscheduled_maintenance?
50
-
51
- headline_map = {'problem' => 'Problem: ',
52
- 'recovery' => 'Recovery: ',
53
- 'acknowledgement' => 'Acknowledgement: ',
54
- 'test' => 'Test Notification: ',
55
- 'unknown' => ''
56
- }
57
-
58
- headline = headline_map[@notification_type] || ''
59
-
60
- @subject = "#{headline}'#{@check}' on #{@entity_name}"
61
- @subject += " is #{@state.upcase}" unless ['acknowledgement', 'test'].include?(@notification_type)
32
+ begin
33
+ @logger.debug "Woo, got a notification to send out: #{notification.inspect}"
34
+
35
+ @notification_type = notification['notification_type']
36
+ @contact_first_name = notification['contact_first_name']
37
+ @contact_last_name = notification['contact_last_name']
38
+ @state = notification['state']
39
+ @summary = notification['summary']
40
+ @last_state = notification['last_state']
41
+ @last_summary = notification['last_summary']
42
+ @details = notification['details']
43
+ @time = notification['time']
44
+ @entity_name, @check = notification['event_id'].split(':', 2)
45
+
46
+ entity_check = Flapjack::Data::EntityCheck.for_event_id(notification['event_id'],
47
+ :redis => ::Resque.redis)
48
+
49
+ @in_unscheduled_maintenance = entity_check.in_scheduled_maintenance?
50
+ @in_scheduled_maintenance = entity_check.in_unscheduled_maintenance?
51
+
52
+ # FIXME: I can not get the entity_check.last_change to work in this context (Resque)
53
+ # it always returns nil, despite entity_check being a good looking EntityCheck object
54
+ # and all ...
55
+ if lc = entity_check.last_change
56
+ duration = (Time.now.to_i - lc)
57
+ @duration = (duration && duration > 40) ? duration : nil
58
+ end
59
+
60
+ headline_map = {'problem' => 'Problem: ',
61
+ 'recovery' => 'Recovery: ',
62
+ 'acknowledgement' => 'Acknowledgement: ',
63
+ 'test' => 'Test Notification: ',
64
+ 'unknown' => ''
65
+ }
66
+
67
+ headline = headline_map[@notification_type] || ''
68
+
69
+ @subject = "#{headline}'#{@check}' on #{@entity_name}"
70
+ @subject += " is #{@state.upcase}" unless ['acknowledgement', 'test'].include?(@notification_type)
71
+ rescue => e
72
+ @logger.error "Error preparing email to #{m_to}: #{e.class}: #{e.message}"
73
+ @logger.error e.backtrace.join("\n")
74
+ raise
75
+ end
62
76
 
63
77
  begin
64
78
  host = @smtp_config ? @smtp_config['host'] : nil
65
79
  port = @smtp_config ? @smtp_config['port'] : nil
80
+ starttls = @smtp_config ? !! @smtp_config['starttls'] : nil
81
+ if @smtp_config
82
+ if auth_config = @smtp_config['auth']
83
+ auth = {}
84
+ auth[:type] = auth_config['type'].to_sym || :plain
85
+ auth[:username] = auth_config['username']
86
+ auth[:password] = auth_config['password']
87
+ end
88
+ end
66
89
 
67
- fqdn = `/bin/hostname -f`.chomp
68
- m_from = "flapjack@#{fqdn}"
90
+ fqdn = `/bin/hostname -f`.chomp
91
+ m_from = "flapjack@#{fqdn}"
69
92
  @logger.debug("flapjack_mailer: set from to #{m_from}")
70
93
  m_reply_to = m_from
71
94
  m_to = notification['address']
72
95
 
73
- @logger.debug("sending Flapjack::Notification::Email " +
74
- "#{notification['id']} to: #{m_to} subject: #{@subject}")
96
+ @logger.debug("sending Flapjack::Notification::Email " +
97
+ "#{notification['id']} to: #{m_to} subject: #{@subject}")
75
98
 
76
- mail = prepare_email(:subject => @subject,
77
- :from => m_from, :to => m_to)
99
+ mail = prepare_email(:subject => @subject,
100
+ :from => m_from,
101
+ :to => m_to)
78
102
 
79
- email = EM::P::SmtpClient.send(
80
- :from => m_from,
81
- :to => m_to,
82
- :content => "#{mail.to_s}\r\n.\r\n",
83
- :domain => fqdn,
84
- :host => host || 'localhost',
85
- :port => port || 25)
103
+ smtp_args = {:from => m_from,
104
+ :to => m_to,
105
+ :content => "#{mail.to_s}\r\n.\r\n",
106
+ :domain => fqdn,
107
+ :host => host || 'localhost',
108
+ :port => port || 25,
109
+ :starttls => starttls}
110
+ smtp_args.merge!(:auth => auth) if auth
111
+ email = EM::P::SmtpClient.send(smtp_args)
86
112
 
87
113
  response = EM::Synchrony.sync(email)
88
114
 
@@ -97,9 +123,10 @@ module Flapjack
97
123
 
98
124
  @logger.info "Email response: #{response.inspect}"
99
125
 
100
- rescue Exception => e
101
- @logger.error "Error delivering email to #{m_to}: #{e.message}"
126
+ rescue => e
127
+ @logger.error "Error delivering email to #{m_to}: #{e.class}: #{e.message}"
102
128
  @logger.error e.backtrace.join("\n")
129
+ raise
103
130
  end
104
131
  end
105
132
 
@@ -44,10 +44,19 @@
44
44
  </tr>
45
45
  <% end %>
46
46
 
47
- <tr>
48
- <td>Time</td>
49
- <td><%= Time.at(@time.to_i).to_s %> (<%= @relative %> ago)</td>
50
- </tr>
47
+ <% if @time %>
48
+ <tr>
49
+ <td>Time</td>
50
+ <td><%= Time.at(@time.to_i).to_s %></td>
51
+ </tr>
52
+ <% end %>
53
+
54
+ <% if @duration %>
55
+ <tr>
56
+ <td>Duration</td>
57
+ <td><%= ChronicDuration.output(@duration) %></td>
58
+ </tr>
59
+ <% end %>
51
60
 
52
61
  <% if @last_state %>
53
62
  <tr>
@@ -1,4 +1,4 @@
1
- Hi <%= @contact_first_name %>,
1
+ Hi <%= @contact_first_name %>
2
2
 
3
3
  Monitoring has detected the following:
4
4
 
@@ -6,7 +6,7 @@ Entity: <%= @entity_name %>
6
6
  Check: <%= @check %>
7
7
  State: <%= @state %>
8
8
  Summary: <%= @summary %>
9
- Time: <%= Time.at(@time.to_i).to_s %> (<%= @relative %> ago)
9
+ <%= @time ? "Time: #{Time.at(@time.to_i).to_s}" : '' %>
10
10
  <%= @last_state ? "\nPrevious state: #{@last_state}" : '' %><%= @last_summary ? "\nPrevious summary: #{@last_summary}" : '' %>
11
11
 
12
12
  Cheers,
@@ -1,14 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'socket'
3
+ require 'em-hiredis'
4
4
 
5
- require 'eventmachine'
6
- # the redis/synchrony gems need to be required in this particular order, see
7
- # the redis-rb README for details
8
- require 'hiredis'
9
- require 'em-synchrony'
10
- require 'redis/connection/synchrony'
11
- require 'redis'
5
+ require 'socket'
12
6
 
13
7
  require 'blather/client/client'
14
8
  require 'chronic_duration'
@@ -49,10 +43,10 @@ module Flapjack
49
43
 
50
44
  def initialize(opts = {})
51
45
  @config = opts[:config]
52
- @redis_config = opts[:redis_config]
46
+ @redis_config = opts[:redis_config] || {}
53
47
  @boot_time = opts[:boot_time]
54
48
 
55
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
49
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2)
56
50
 
57
51
  @logger = opts[:logger]
58
52
 
@@ -63,7 +57,10 @@ module Flapjack
63
57
 
64
58
  def stop
65
59
  @should_quit = true
66
- @redis.rpush(@config['queue'], Oj.dump('notification_type' => 'shutdown'))
60
+ redis_uri = @redis_config[:path] ||
61
+ "redis://#{@redis_config[:host] || '127.0.0.1'}:#{@redis_config[:port] || '6379'}/#{@redis_config[:db] || '0'}"
62
+ shutdown_redis = EM::Hiredis.connect(redis_uri)
63
+ shutdown_redis.rpush(@config['queue'], Oj.dump('notification_type' => 'shutdown'))
67
64
  end
68
65
 
69
66
  def setup
@@ -406,11 +403,19 @@ module Flapjack
406
403
  @logger.info("starting")
407
404
  @logger.debug("new jabber pikelet with the following options: #{@config.inspect}")
408
405
 
409
- keepalive_timer = EM::Synchrony.add_periodic_timer(60) do
410
- @logger.debug("calling keepalive on the jabber connection")
411
- if connected?
412
- EventMachine::Synchrony.next_tick do
413
- write(' ')
406
+ # the periodic timer can't be halted early (without doing EM.stop) so
407
+ # keep the time short and count the iterations ... could just use
408
+ # EM.sleep(1) in a loop, I suppose
409
+ ki = 0
410
+ keepalive_timer = EventMachine::Synchrony.add_periodic_timer(1) do
411
+ ki += 1
412
+ if ki == 60
413
+ ki = 0
414
+ @logger.debug("calling keepalive on the jabber connection")
415
+ if connected?
416
+ EventMachine::Synchrony.next_tick do
417
+ write(' ')
418
+ end
414
419
  end
415
420
  end
416
421
  end
@@ -435,6 +440,7 @@ module Flapjack
435
440
  @logger.debug('jabber notification event received')
436
441
  @logger.debug(event.inspect)
437
442
  if 'shutdown'.eql?(type)
443
+ @logger.debug("@should_quit: #{@should_quit}")
438
444
  if @should_quit
439
445
  EventMachine::Synchrony.next_tick do
440
446
  # get delays without the next_tick
@@ -20,8 +20,8 @@ module Flapjack
20
20
  def initialize(opts = {})
21
21
  @config = opts[:config]
22
22
  @logger = opts[:logger]
23
- @redis_config = opts[:redis_config]
24
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
23
+ @redis_config = opts[:redis_config] || {}
24
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2)
25
25
 
26
26
  @logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
27
27
 
@@ -32,7 +32,11 @@ module Flapjack
32
32
  def stop
33
33
  @logger.info("stopping")
34
34
  @should_quit = true
35
- @redis.rpush(@config['queue'], Oj.dump('notification_type' => 'shutdown'))
35
+
36
+ redis_uri = @redis_config[:path] ||
37
+ "redis://#{@redis_config[:host] || '127.0.0.1'}:#{@redis_config[:port] || '6379'}/#{@redis_config[:db] || '0'}"
38
+ shutdown_redis = EM::Hiredis.connect(redis_uri)
39
+ shutdown_redis.rpush(@config['queue'], Oj.dump('notification_type' => 'shutdown'))
36
40
  end
37
41
 
38
42
  def start