flapjack 0.7.20 → 0.7.21

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/.gitignore +1 -0
  2. data/.travis.yml +3 -1
  3. data/CHANGELOG.md +10 -0
  4. data/Gemfile +1 -0
  5. data/bin/flapjack +11 -0
  6. data/bin/simulate-failed-check +5 -5
  7. data/features/notification_rules.feature +77 -19
  8. data/features/steps/events_steps.rb +15 -3
  9. data/lib/flapjack/coordinator.rb +3 -3
  10. data/lib/flapjack/data/contact.rb +1 -1
  11. data/lib/flapjack/data/entity.rb +12 -1
  12. data/lib/flapjack/data/entity_check.rb +9 -2
  13. data/lib/flapjack/data/event.rb +4 -4
  14. data/lib/flapjack/data/notification.rb +27 -20
  15. data/lib/flapjack/data/notification_rule.rb +26 -24
  16. data/lib/flapjack/data/tag.rb +5 -0
  17. data/lib/flapjack/gateways/api.rb +1 -1
  18. data/lib/flapjack/gateways/api/contact_methods.rb +3 -3
  19. data/lib/flapjack/gateways/email.rb +73 -46
  20. data/lib/flapjack/gateways/email/alert.html.erb +13 -4
  21. data/lib/flapjack/gateways/email/alert.text.erb +2 -2
  22. data/lib/flapjack/gateways/jabber.rb +22 -16
  23. data/lib/flapjack/gateways/pagerduty.rb +7 -3
  24. data/lib/flapjack/gateways/web.rb +1 -1
  25. data/lib/flapjack/gateways/web/views/check.html.erb +2 -2
  26. data/lib/flapjack/gateways/web/views/contact.html.erb +3 -3
  27. data/lib/flapjack/logger.rb +67 -35
  28. data/lib/flapjack/notifier.rb +9 -3
  29. data/lib/flapjack/pikelet.rb +3 -1
  30. data/lib/flapjack/processor.rb +34 -10
  31. data/lib/flapjack/version.rb +1 -1
  32. data/spec/lib/flapjack/coordinator_spec.rb +17 -13
  33. data/spec/lib/flapjack/data/contact_spec.rb +4 -3
  34. data/spec/lib/flapjack/data/entity_check_spec.rb +10 -0
  35. data/spec/lib/flapjack/data/entity_spec.rb +60 -5
  36. data/spec/lib/flapjack/data/event_spec.rb +4 -4
  37. data/spec/lib/flapjack/data/notification_rule_spec.rb +9 -2
  38. data/spec/lib/flapjack/data/tag_spec.rb +0 -1
  39. data/spec/lib/flapjack/gateways/api/contact_methods_spec.rb +1 -1
  40. data/spec/lib/flapjack/gateways/email_spec.rb +2 -1
  41. data/spec/lib/flapjack/gateways/jabber_spec.rb +5 -3
  42. data/spec/lib/flapjack/gateways/pagerduty_spec.rb +3 -1
  43. data/spec/lib/flapjack/logger_spec.rb +5 -5
  44. data/spec/lib/flapjack/pikelet_spec.rb +4 -2
  45. data/spec/lib/flapjack/processor_spec.rb +16 -7
  46. data/tasks/benchmarks.rake +228 -0
  47. data/tasks/events.rake +11 -10
  48. data/tasks/support/flapjack_config_benchmark.yaml +58 -0
  49. metadata +6 -4
@@ -4,6 +4,7 @@ require 'oj'
4
4
  require 'active_support/time'
5
5
  require 'ice_cube'
6
6
  require 'flapjack/utility'
7
+ require 'flapjack/data/tag_set'
7
8
 
8
9
  module Flapjack
9
10
  module Data
@@ -11,7 +12,7 @@ module Flapjack
11
12
 
12
13
  extend Flapjack::Utility
13
14
 
14
- attr_accessor :id, :contact_id, :entities, :entity_tags, :time_restrictions,
15
+ attr_accessor :id, :contact_id, :entities, :tags, :time_restrictions,
15
16
  :warning_media, :critical_media, :warning_blackhole, :critical_blackhole
16
17
 
17
18
  def self.exists_with_id?(rule_id, options = {})
@@ -68,19 +69,23 @@ module Flapjack
68
69
  end
69
70
 
70
71
  def to_json(*args)
71
- self.class.hashify(:id, :contact_id, :entity_tags, :entities,
72
+ self.class.hashify(:id, :contact_id, :tags, :entities,
72
73
  :time_restrictions, :warning_media, :critical_media,
73
74
  :warning_blackhole, :critical_blackhole) {|k|
74
75
  [k, self.send(k)]
75
76
  }.to_json
76
77
  end
77
78
 
78
- # tags or entity names match?
79
- # nil @entity_tags and nil @entities matches
80
- def match_entity?(event)
81
- # TODO: return true if event's entity tags match entity tag list on the rule
82
- ((@entity_tags.nil? || @entity_tags.empty?) && (@entities.nil? || @entities.empty?)) ||
83
- (@entities.include?(event.split(':').first))
79
+ # entity names match?
80
+ def match_entity?(event_id)
81
+ return false unless @entities
82
+ @entities.include?(event_id.split(':').first)
83
+ end
84
+
85
+ # tags match?
86
+ def match_tags?(event_tags)
87
+ return false unless @tags && @tags.length > 0
88
+ @tags.subset?(event_tags)
84
89
  end
85
90
 
86
91
  def blackhole?(severity)
@@ -99,7 +104,7 @@ module Flapjack
99
104
 
100
105
  def is_specific?
101
106
  (!@entities.nil? && !@entities.empty?) ||
102
- (!@entity_tags.nil? && !@entity_tags.empty?)
107
+ (!@tags.nil? && !@tags.empty?)
103
108
  end
104
109
 
105
110
  private
@@ -120,17 +125,21 @@ module Flapjack
120
125
  # make some assumptions about the incoming data
121
126
  rule_data[:warning_blackhole] = rule_data[:warning_blackhole] || false
122
127
  rule_data[:critical_blackhole] = rule_data[:critical_blackhole] || false
128
+ if rule_data[:tags].is_a?(Array)
129
+ rule_data[:tags] = Flapjack::Data::TagSet.new(rule_data[:tags])
130
+ end
123
131
 
124
132
  errors = self.validate_data(rule_data, options)
125
133
 
126
134
  return errors unless errors.nil? || errors.empty?
127
135
 
128
136
  # whitelisting fields, rather than passing through submitted data directly
137
+ tag_data = rule_data[:tags].is_a?(Set) ? rule_data[:tags].to_a : nil
129
138
  json_rule_data = {
130
139
  :id => rule_data[:id].to_s,
131
140
  :contact_id => rule_data[:contact_id].to_s,
132
141
  :entities => Oj.dump(rule_data[:entities]),
133
- :entity_tags => Oj.dump(rule_data[:entity_tags]),
142
+ :tags => Oj.dump(tag_data),
134
143
  :time_restrictions => Oj.dump(rule_data[:time_restrictions]),
135
144
  :warning_media => Oj.dump(rule_data[:warning_media]),
136
145
  :critical_media => Oj.dump(rule_data[:critical_media]),
@@ -221,19 +230,11 @@ module Flapjack
221
230
  d[:entities].all? {|e| e.is_a?(String)} ) } =>
222
231
  "entities must be a list of strings",
223
232
 
224
- proc { !d.has_key?(:entity_tags) ||
225
- ( d[:entity_tags].nil? ||
226
- d[:entity_tags].is_a?(Array) &&
227
- d[:entity_tags].all? {|et| et.is_a?(String)} ) } =>
228
- "entity_tags must be a list of strings",
229
-
230
- #proc { (d.has_key?(:entities) &&
231
- # d[:entities].is_a?(Array) &&
232
- # (d[:entities].size > 0)) ||
233
- # (d.has_key?(:entity_tags) &&
234
- # d[:entity_tags].is_a?(Array) &&
235
- # (d[:entity_tags].size > 0)) } =>
236
- #"entities or entity tags must have at least one value",
233
+ proc { !d.has_key?(:tags) ||
234
+ ( d[:tags].nil? ||
235
+ d[:tags].is_a?(Flapjack::Data::TagSet) &&
236
+ d[:tags].all? {|et| et.is_a?(String)} ) } =>
237
+ "tags must be a tag_set of strings",
237
238
 
238
239
  proc { !d.has_key?(:time_restrictions) ||
239
240
  ( d[:time_restrictions].nil? ||
@@ -284,7 +285,8 @@ module Flapjack
284
285
  rule_data = @redis.hgetall("notification_rule:#{@id}")
285
286
 
286
287
  @contact_id = rule_data['contact_id']
287
- @entity_tags = Oj.load(rule_data['entity_tags'] || '')
288
+ tags = Oj.load(rule_data['tags'] || '')
289
+ @tags = tags ? Flapjack::Data::TagSet.new(tags) : nil
288
290
  @entities = Oj.load(rule_data['entities'] || '')
289
291
  @time_restrictions = Oj.load(rule_data['time_restrictions'] || '')
290
292
  @warning_media = Oj.load(rule_data['warning_media'] || '')
@@ -25,6 +25,11 @@ module Flapjack
25
25
  :redis => opts[:redis])
26
26
  end
27
27
 
28
+ def self.find_intersection(tags, opts)
29
+ @redis = opts[:redis]
30
+ @redis.sinter(tags)
31
+ end
32
+
28
33
  def self.create(name, enum = [], opts)
29
34
  self.new(:name => name,
30
35
  :create => enum,
@@ -45,7 +45,7 @@ module Flapjack
45
45
 
46
46
  class << self
47
47
  def start
48
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
48
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2)
49
49
 
50
50
  @logger.info "starting api - class"
51
51
 
@@ -142,7 +142,7 @@ module Flapjack
142
142
 
143
143
  contact = find_contact(params[:contact_id])
144
144
 
145
- rule_data = hashify(:entities, :entity_tags,
145
+ rule_data = hashify(:entities, :tags,
146
146
  :warning_media, :critical_media, :time_restrictions,
147
147
  :warning_blackhole, :critical_blackhole) {|k| [k, params[k]]}
148
148
 
@@ -165,7 +165,7 @@ module Flapjack
165
165
  rule = find_rule(params[:id])
166
166
  contact = find_contact(rule.contact_id)
167
167
 
168
- rule_data = hashify(:entities, :entity_tags,
168
+ rule_data = hashify(:entities, :tags,
169
169
  :warning_media, :critical_media, :time_restrictions,
170
170
  :warning_blackhole, :critical_blackhole) {|k| [k, params[k]]}
171
171
 
@@ -339,4 +339,4 @@ module Flapjack
339
339
 
340
340
  end
341
341
 
342
- end
342
+ end
@@ -3,6 +3,7 @@
3
3
  require 'mail'
4
4
  require 'erb'
5
5
  require 'socket'
6
+ require 'chronic_duration'
6
7
 
7
8
  require 'em-synchrony'
8
9
  require 'em/protocols/smtpclient'
@@ -28,61 +29,86 @@ module Flapjack
28
29
  end
29
30
 
30
31
  def perform(notification)
31
- @logger.debug "Woo, got a notification to send out: #{notification.inspect}"
32
-
33
- @notification_type = notification['notification_type']
34
- @contact_first_name = notification['contact_first_name']
35
- @contact_last_name = notification['contact_last_name']
36
- @state = notification['state']
37
- @summary = notification['summary']
38
- @last_state = notification['last_state']
39
- @last_summary = notification['last_summary']
40
- @details = notification['details']
41
- @time = notification['time']
42
- @relative = relative_time_ago(Time.at(@time))
43
- @entity_name, @check = notification['event_id'].split(':', 2)
44
-
45
- entity_check = Flapjack::Data::EntityCheck.for_event_id(notification['event_id'],
46
- :redis => ::Resque.redis)
47
-
48
- @in_unscheduled_maintenance = entity_check.in_scheduled_maintenance?
49
- @in_scheduled_maintenance = entity_check.in_unscheduled_maintenance?
50
-
51
- headline_map = {'problem' => 'Problem: ',
52
- 'recovery' => 'Recovery: ',
53
- 'acknowledgement' => 'Acknowledgement: ',
54
- 'test' => 'Test Notification: ',
55
- 'unknown' => ''
56
- }
57
-
58
- headline = headline_map[@notification_type] || ''
59
-
60
- @subject = "#{headline}'#{@check}' on #{@entity_name}"
61
- @subject += " is #{@state.upcase}" unless ['acknowledgement', 'test'].include?(@notification_type)
32
+ begin
33
+ @logger.debug "Woo, got a notification to send out: #{notification.inspect}"
34
+
35
+ @notification_type = notification['notification_type']
36
+ @contact_first_name = notification['contact_first_name']
37
+ @contact_last_name = notification['contact_last_name']
38
+ @state = notification['state']
39
+ @summary = notification['summary']
40
+ @last_state = notification['last_state']
41
+ @last_summary = notification['last_summary']
42
+ @details = notification['details']
43
+ @time = notification['time']
44
+ @entity_name, @check = notification['event_id'].split(':', 2)
45
+
46
+ entity_check = Flapjack::Data::EntityCheck.for_event_id(notification['event_id'],
47
+ :redis => ::Resque.redis)
48
+
49
+ @in_unscheduled_maintenance = entity_check.in_scheduled_maintenance?
50
+ @in_scheduled_maintenance = entity_check.in_unscheduled_maintenance?
51
+
52
+ # FIXME: I can not get the entity_check.last_change to work in this context (Resque)
53
+ # it always returns nil, despite entity_check being a good looking EntityCheck object
54
+ # and all ...
55
+ if lc = entity_check.last_change
56
+ duration = (Time.now.to_i - lc)
57
+ @duration = (duration && duration > 40) ? duration : nil
58
+ end
59
+
60
+ headline_map = {'problem' => 'Problem: ',
61
+ 'recovery' => 'Recovery: ',
62
+ 'acknowledgement' => 'Acknowledgement: ',
63
+ 'test' => 'Test Notification: ',
64
+ 'unknown' => ''
65
+ }
66
+
67
+ headline = headline_map[@notification_type] || ''
68
+
69
+ @subject = "#{headline}'#{@check}' on #{@entity_name}"
70
+ @subject += " is #{@state.upcase}" unless ['acknowledgement', 'test'].include?(@notification_type)
71
+ rescue => e
72
+ @logger.error "Error preparing email to #{m_to}: #{e.class}: #{e.message}"
73
+ @logger.error e.backtrace.join("\n")
74
+ raise
75
+ end
62
76
 
63
77
  begin
64
78
  host = @smtp_config ? @smtp_config['host'] : nil
65
79
  port = @smtp_config ? @smtp_config['port'] : nil
80
+ starttls = @smtp_config ? !! @smtp_config['starttls'] : nil
81
+ if @smtp_config
82
+ if auth_config = @smtp_config['auth']
83
+ auth = {}
84
+ auth[:type] = auth_config['type'].to_sym || :plain
85
+ auth[:username] = auth_config['username']
86
+ auth[:password] = auth_config['password']
87
+ end
88
+ end
66
89
 
67
- fqdn = `/bin/hostname -f`.chomp
68
- m_from = "flapjack@#{fqdn}"
90
+ fqdn = `/bin/hostname -f`.chomp
91
+ m_from = "flapjack@#{fqdn}"
69
92
  @logger.debug("flapjack_mailer: set from to #{m_from}")
70
93
  m_reply_to = m_from
71
94
  m_to = notification['address']
72
95
 
73
- @logger.debug("sending Flapjack::Notification::Email " +
74
- "#{notification['id']} to: #{m_to} subject: #{@subject}")
96
+ @logger.debug("sending Flapjack::Notification::Email " +
97
+ "#{notification['id']} to: #{m_to} subject: #{@subject}")
75
98
 
76
- mail = prepare_email(:subject => @subject,
77
- :from => m_from, :to => m_to)
99
+ mail = prepare_email(:subject => @subject,
100
+ :from => m_from,
101
+ :to => m_to)
78
102
 
79
- email = EM::P::SmtpClient.send(
80
- :from => m_from,
81
- :to => m_to,
82
- :content => "#{mail.to_s}\r\n.\r\n",
83
- :domain => fqdn,
84
- :host => host || 'localhost',
85
- :port => port || 25)
103
+ smtp_args = {:from => m_from,
104
+ :to => m_to,
105
+ :content => "#{mail.to_s}\r\n.\r\n",
106
+ :domain => fqdn,
107
+ :host => host || 'localhost',
108
+ :port => port || 25,
109
+ :starttls => starttls}
110
+ smtp_args.merge!(:auth => auth) if auth
111
+ email = EM::P::SmtpClient.send(smtp_args)
86
112
 
87
113
  response = EM::Synchrony.sync(email)
88
114
 
@@ -97,9 +123,10 @@ module Flapjack
97
123
 
98
124
  @logger.info "Email response: #{response.inspect}"
99
125
 
100
- rescue Exception => e
101
- @logger.error "Error delivering email to #{m_to}: #{e.message}"
126
+ rescue => e
127
+ @logger.error "Error delivering email to #{m_to}: #{e.class}: #{e.message}"
102
128
  @logger.error e.backtrace.join("\n")
129
+ raise
103
130
  end
104
131
  end
105
132
 
@@ -44,10 +44,19 @@
44
44
  </tr>
45
45
  <% end %>
46
46
 
47
- <tr>
48
- <td>Time</td>
49
- <td><%= Time.at(@time.to_i).to_s %> (<%= @relative %> ago)</td>
50
- </tr>
47
+ <% if @time %>
48
+ <tr>
49
+ <td>Time</td>
50
+ <td><%= Time.at(@time.to_i).to_s %></td>
51
+ </tr>
52
+ <% end %>
53
+
54
+ <% if @duration %>
55
+ <tr>
56
+ <td>Duration</td>
57
+ <td><%= ChronicDuration.output(@duration) %></td>
58
+ </tr>
59
+ <% end %>
51
60
 
52
61
  <% if @last_state %>
53
62
  <tr>
@@ -1,4 +1,4 @@
1
- Hi <%= @contact_first_name %>,
1
+ Hi <%= @contact_first_name %>
2
2
 
3
3
  Monitoring has detected the following:
4
4
 
@@ -6,7 +6,7 @@ Entity: <%= @entity_name %>
6
6
  Check: <%= @check %>
7
7
  State: <%= @state %>
8
8
  Summary: <%= @summary %>
9
- Time: <%= Time.at(@time.to_i).to_s %> (<%= @relative %> ago)
9
+ <%= @time ? "Time: #{Time.at(@time.to_i).to_s}" : '' %>
10
10
  <%= @last_state ? "\nPrevious state: #{@last_state}" : '' %><%= @last_summary ? "\nPrevious summary: #{@last_summary}" : '' %>
11
11
 
12
12
  Cheers,
@@ -1,14 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'socket'
3
+ require 'em-hiredis'
4
4
 
5
- require 'eventmachine'
6
- # the redis/synchrony gems need to be required in this particular order, see
7
- # the redis-rb README for details
8
- require 'hiredis'
9
- require 'em-synchrony'
10
- require 'redis/connection/synchrony'
11
- require 'redis'
5
+ require 'socket'
12
6
 
13
7
  require 'blather/client/client'
14
8
  require 'chronic_duration'
@@ -49,10 +43,10 @@ module Flapjack
49
43
 
50
44
  def initialize(opts = {})
51
45
  @config = opts[:config]
52
- @redis_config = opts[:redis_config]
46
+ @redis_config = opts[:redis_config] || {}
53
47
  @boot_time = opts[:boot_time]
54
48
 
55
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
49
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2)
56
50
 
57
51
  @logger = opts[:logger]
58
52
 
@@ -63,7 +57,10 @@ module Flapjack
63
57
 
64
58
  def stop
65
59
  @should_quit = true
66
- @redis.rpush(@config['queue'], Oj.dump('notification_type' => 'shutdown'))
60
+ redis_uri = @redis_config[:path] ||
61
+ "redis://#{@redis_config[:host] || '127.0.0.1'}:#{@redis_config[:port] || '6379'}/#{@redis_config[:db] || '0'}"
62
+ shutdown_redis = EM::Hiredis.connect(redis_uri)
63
+ shutdown_redis.rpush(@config['queue'], Oj.dump('notification_type' => 'shutdown'))
67
64
  end
68
65
 
69
66
  def setup
@@ -406,11 +403,19 @@ module Flapjack
406
403
  @logger.info("starting")
407
404
  @logger.debug("new jabber pikelet with the following options: #{@config.inspect}")
408
405
 
409
- keepalive_timer = EM::Synchrony.add_periodic_timer(60) do
410
- @logger.debug("calling keepalive on the jabber connection")
411
- if connected?
412
- EventMachine::Synchrony.next_tick do
413
- write(' ')
406
+ # the periodic timer can't be halted early (without doing EM.stop) so
407
+ # keep the time short and count the iterations ... could just use
408
+ # EM.sleep(1) in a loop, I suppose
409
+ ki = 0
410
+ keepalive_timer = EventMachine::Synchrony.add_periodic_timer(1) do
411
+ ki += 1
412
+ if ki == 60
413
+ ki = 0
414
+ @logger.debug("calling keepalive on the jabber connection")
415
+ if connected?
416
+ EventMachine::Synchrony.next_tick do
417
+ write(' ')
418
+ end
414
419
  end
415
420
  end
416
421
  end
@@ -435,6 +440,7 @@ module Flapjack
435
440
  @logger.debug('jabber notification event received')
436
441
  @logger.debug(event.inspect)
437
442
  if 'shutdown'.eql?(type)
443
+ @logger.debug("@should_quit: #{@should_quit}")
438
444
  if @should_quit
439
445
  EventMachine::Synchrony.next_tick do
440
446
  # get delays without the next_tick
@@ -20,8 +20,8 @@ module Flapjack
20
20
  def initialize(opts = {})
21
21
  @config = opts[:config]
22
22
  @logger = opts[:logger]
23
- @redis_config = opts[:redis_config]
24
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
23
+ @redis_config = opts[:redis_config] || {}
24
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2)
25
25
 
26
26
  @logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
27
27
 
@@ -32,7 +32,11 @@ module Flapjack
32
32
  def stop
33
33
  @logger.info("stopping")
34
34
  @should_quit = true
35
- @redis.rpush(@config['queue'], Oj.dump('notification_type' => 'shutdown'))
35
+
36
+ redis_uri = @redis_config[:path] ||
37
+ "redis://#{@redis_config[:host] || '127.0.0.1'}:#{@redis_config[:port] || '6379'}/#{@redis_config[:db] || '0'}"
38
+ shutdown_redis = EM::Hiredis.connect(redis_uri)
39
+ shutdown_redis.rpush(@config['queue'], Oj.dump('notification_type' => 'shutdown'))
36
40
  end
37
41
 
38
42
  def start