flapjack 0.6.39 → 0.6.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. data/.gitignore +2 -2
  2. data/Gemfile +5 -1
  3. data/README.md +3 -2
  4. data/Rakefile +2 -1
  5. data/bin/flapjack +2 -2
  6. data/bin/flapjack-nagios-receiver +2 -8
  7. data/bin/flapjack-populator +11 -11
  8. data/etc/flapjack_config.yaml.example +28 -0
  9. data/features/steps/events_steps.rb +1 -1
  10. data/features/steps/notifications_steps.rb +7 -4
  11. data/features/support/env.rb +17 -6
  12. data/flapjack.gemspec +1 -0
  13. data/lib/flapjack/api.rb +72 -28
  14. data/lib/flapjack/configuration.rb +9 -1
  15. data/lib/flapjack/coordinator.rb +138 -162
  16. data/lib/flapjack/data/contact.rb +3 -1
  17. data/lib/flapjack/data/entity.rb +10 -1
  18. data/lib/flapjack/data/entity_check.rb +19 -21
  19. data/lib/flapjack/data/event.rb +26 -27
  20. data/lib/flapjack/data/message.rb +45 -0
  21. data/lib/flapjack/data/notification.rb +49 -0
  22. data/lib/flapjack/executive.rb +53 -74
  23. data/lib/flapjack/filters/acknowledgement.rb +14 -11
  24. data/lib/flapjack/jabber.rb +84 -18
  25. data/lib/flapjack/notification/email.rb +67 -37
  26. data/lib/flapjack/notification/sms.rb +40 -28
  27. data/lib/flapjack/oobetet.rb +1 -1
  28. data/lib/flapjack/pagerduty.rb +24 -15
  29. data/lib/flapjack/patches.rb +3 -1
  30. data/lib/flapjack/pikelet.rb +51 -20
  31. data/lib/flapjack/rack_logger.rb +8 -0
  32. data/lib/flapjack/version.rb +1 -1
  33. data/lib/flapjack/web.rb +51 -27
  34. data/spec/lib/flapjack/api_spec.rb +28 -3
  35. data/spec/lib/flapjack/coordinator_spec.rb +69 -43
  36. data/spec/lib/flapjack/data/contact_spec.rb +17 -9
  37. data/spec/lib/flapjack/data/entity_check_spec.rb +0 -25
  38. data/spec/lib/flapjack/data/entity_spec.rb +4 -0
  39. data/spec/lib/flapjack/data/global_spec.rb +6 -0
  40. data/spec/lib/flapjack/data/message_spec.rb +6 -0
  41. data/spec/lib/flapjack/data/notification_spec.rb +6 -0
  42. data/spec/lib/flapjack/executive_spec.rb +2 -2
  43. data/spec/lib/flapjack/jabber_spec.rb +8 -9
  44. data/spec/lib/flapjack/pagerduty_spec.rb +53 -45
  45. data/spec/lib/flapjack/utility_spec.rb +55 -0
  46. data/spec/lib/flapjack/web_spec.rb +7 -5
  47. data/tasks/events.rake +26 -59
  48. data/tasks/profile.rake +366 -0
  49. metadata +30 -19
  50. data/lib/flapjack/notification/common.rb +0 -23
  51. data/lib/flapjack/persistence/couch.rb +0 -5
  52. data/lib/flapjack/persistence/couch/connection.rb +0 -66
  53. data/lib/flapjack/persistence/couch/couch.rb +0 -63
  54. data/lib/flapjack/persistence/data_mapper.rb +0 -3
  55. data/lib/flapjack/persistence/data_mapper/data_mapper.rb +0 -67
  56. data/lib/flapjack/persistence/data_mapper/models/check.rb +0 -90
  57. data/lib/flapjack/persistence/data_mapper/models/check_template.rb +0 -20
  58. data/lib/flapjack/persistence/data_mapper/models/event.rb +0 -19
  59. data/lib/flapjack/persistence/data_mapper/models/node.rb +0 -18
  60. data/lib/flapjack/persistence/data_mapper/models/related_check.rb +0 -15
  61. data/lib/flapjack/persistence/sqlite3.rb +0 -3
  62. data/lib/flapjack/persistence/sqlite3/sqlite3.rb +0 -166
  63. data/lib/flapjack/transports/beanstalkd.rb +0 -50
  64. data/lib/flapjack/transports/result.rb +0 -58
  65. data/lib/flapjack/worker/application.rb +0 -121
  66. data/lib/flapjack/worker/cli.rb +0 -49
@@ -74,30 +74,26 @@ module Flapjack
74
74
  }
75
75
  end
76
76
 
77
- # creates, or modifies, an event object and adds it to the events list in redis
78
- # 'type' => 'service',
79
- # 'state' => state,
80
- # 'summary' => check_output,
81
- # 'time' => timestamp
82
- def create_event(event)
83
- event.merge!('entity' => @entity.name, 'check' => @check)
84
- event['time'] = Time.now.to_i if event['time'].nil?
85
- @redis.rpush('events', Yajl::Encoder.encode(event))
86
- end
87
-
88
- def create_acknowledgement(opts = {})
89
- defaults = {
90
- 'summary' => '...'
91
- }
92
- options = defaults.merge(opts)
93
-
77
+ def create_acknowledgement(options = {})
94
78
  event = { 'type' => 'action',
95
79
  'state' => 'acknowledgement',
96
80
  'summary' => options['summary'],
97
81
  'duration' => options['duration'],
98
- 'acknowledgement_id' => options['acknowledgement_id']
82
+ 'acknowledgement_id' => options['acknowledgement_id'],
83
+ 'entity' => @entity.name,
84
+ 'check' => @check
85
+ }
86
+ Flapjack::Data::Event.add(event, :redis => @redis)
87
+ end
88
+
89
+ def test_notifications(options = {})
90
+ event = { 'type' => 'action',
91
+ 'state' => 'test_notifications',
92
+ 'summary' => options['summary'],
93
+ 'entity' => @entity.name,
94
+ 'check' => @check
99
95
  }
100
- create_event(event)
96
+ Flapjack::Data::Event.add(event, :redis => @redis)
101
97
  end
102
98
 
103
99
  # FIXME: need to add summary to summary of existing unscheduled maintenance if there is
@@ -425,8 +421,10 @@ module Flapjack
425
421
  check = @check
426
422
 
427
423
  if @logger
428
- @logger.debug("contacts for #{@entity.id} (#{@entity.name}): " + @redis.smembers("contacts_for:#{@entity.id}").length.to_s)
429
- @logger.debug("contacts for #{check}: " + @redis.smembers("contacts_for:#{check}").length.to_s)
424
+ @logger.debug("contacts for #{@entity.id} (#{@entity.name}): " +
425
+ @redis.smembers("contacts_for:#{@entity.id}").length.to_s)
426
+ @logger.debug("contacts for #{check}: " +
427
+ @redis.smembers("contacts_for:#{check}").length.to_s)
430
428
  end
431
429
 
432
430
  union = @redis.sunion("contacts_for:#{@entity.id}", "contacts_for:#{check}")
@@ -17,43 +17,38 @@ module Flapjack
17
17
  # events on the queue.
18
18
  #
19
19
  def self.next(opts={})
20
+ raise "Redis connection not set" unless redis = opts[:redis]
21
+
20
22
  defaults = { :block => true }
21
23
  options = defaults.merge(opts)
22
- block = options[:block]
23
24
 
24
25
  # In production, we wait indefinitely for events coming from other systems.
25
- if block
26
- raw = opts[:persistence].blpop('events', 0).last
27
- event = ::JSON.parse(raw)
28
- self.new(event)
29
- else
30
- # In testing, we take care that there are no events on the queue.
31
- raw = opts[:persistence].lpop('events')
32
- result = nil
33
-
34
- if raw
35
- event = ::JSON.parse(raw)
36
- result = self.new(event)
37
- end
38
-
39
- result
26
+ if options[:block]
27
+ return self.new( ::JSON.parse( redis.blpop('events', 0).last ) )
40
28
  end
29
+
30
+ # In testing, we take care that there are no events on the queue.
31
+ return unless raw = redis.lpop('events')
32
+ self.new( ::JSON.parse(raw) )
33
+ end
34
+
35
+ # creates, or modifies, an event object and adds it to the events list in redis
36
+ # 'type' => 'service',
37
+ # 'state' => state,
38
+ # 'summary' => check_output,
39
+ # 'time' => timestamp
40
+ def self.add(evt, opts = {})
41
+ raise "Redis connection not set" unless redis = opts[:redis]
42
+
43
+ evt['time'] = Time.now.to_i if evt['time'].nil?
44
+ redis.rpush('events', Yajl::Encoder.encode(evt))
41
45
  end
42
46
 
43
47
  # Provide a count of the number of events on the queue to be processed.
44
48
  def self.pending_count(opts = {})
45
- opts[:persistence].llen('events')
46
- end
49
+ raise "Redis connection not set" unless redis = opts[:redis]
47
50
 
48
- # FIXME make this use a logger taken from the opts
49
- def self.purge_all(opts = {})
50
- events_size = opts[:redis].llen('events')
51
- puts "purging #{events_size} events..."
52
- timestamp = Time.now.to_i
53
- puts "renaming events to events.#{timestamp}"
54
- opts[:redis].rename('events', "events.#{timestamp}")
55
- puts "setting expiry of events.#{timestamp} to 8 hours"
56
- opts[:redis].expire("events.#{timestamp}", (60 * 60 * 8))
51
+ redis.llen('events')
57
52
  end
58
53
 
59
54
  def initialize(attrs={})
@@ -125,6 +120,10 @@ module Flapjack
125
120
  action? and state == 'acknowledgement'
126
121
  end
127
122
 
123
+ def test_notifications?
124
+ action? and state == 'test_notifications'
125
+ end
126
+
128
127
  def ok?
129
128
  (state == 'ok') or (state == 'up')
130
129
  end
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # 'Notification' refers to the template object created when an event occurs,
4
+ # from which individual 'Message' objects are created, one for each
5
+ # contact+media recipient.
6
+
7
+ module Flapjack
8
+ module Data
9
+ class Message
10
+
11
+ attr_accessor :medium, :address, :id, :duration, :contact, :notification
12
+
13
+ def self.for_contact(opts = {})
14
+ self.new(:contact => opts[:contact])
15
+ end
16
+
17
+ def id
18
+ return @id if @id
19
+ t = Time.now
20
+ @id = self.object_id.to_i.to_s + '-' + t.to_i.to_s + '.' + t.tv_usec.to_s
21
+ end
22
+
23
+ def contents
24
+ c = {'media' => medium,
25
+ 'address' => address,
26
+ 'id' => id}
27
+ if contact
28
+ c.merge('contact_id' => contact.id,
29
+ 'contact_first_name' => contact.first_name,
30
+ 'contact_last_name' => contact.last_name)
31
+ end
32
+ c['duration'] = duration if duration
33
+ c.merge(notification.contents) if notification
34
+ end
35
+
36
+ private
37
+
38
+ def initialize(opts = {})
39
+ @contact = opts[:contact]
40
+ end
41
+
42
+ end
43
+ end
44
+ end
45
+
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'flapjack/data/message'
4
+
5
+ module Flapjack
6
+ module Data
7
+ class Notification
8
+
9
+ attr_accessor :event, :type
10
+
11
+ def self.for_event(event, opts = {})
12
+ self.new(:event => event, :type => opts[:type])
13
+ end
14
+
15
+ def messages(opts = {})
16
+ contacts = opts[:contacts]
17
+ return [] if contacts.nil?
18
+ @messages ||= contacts.collect {|contact|
19
+ contact.media.keys.inject([]) { |ret, mk|
20
+ m = Flapjack::Data::Message.for_contact(:contact => contact)
21
+ m.notification = self
22
+ m.medium = mk
23
+ m.address = contact.media[mk]
24
+ ret << m
25
+ ret
26
+ }
27
+ }.flatten
28
+ end
29
+
30
+ def contents
31
+ @contents ||= {'event_id' => event.id,
32
+ 'state' => event.state,
33
+ 'summary' => event.summary,
34
+ 'time' => event.time,
35
+ 'notification_type' => type}
36
+ end
37
+
38
+ private
39
+
40
+ def initialize(opts = {})
41
+ raise "Event not passed" unless event = opts[:event]
42
+ @event = event
43
+ @type = opts[:type]
44
+ end
45
+
46
+ end
47
+ end
48
+ end
49
+
@@ -12,19 +12,25 @@ require 'flapjack/filters/detect_mass_client_failures'
12
12
  require 'flapjack/filters/delays'
13
13
  require 'flapjack/data/contact'
14
14
  require 'flapjack/data/entity_check'
15
+ require 'flapjack/data/notification'
15
16
  require 'flapjack/data/event'
16
- require 'flapjack/notification/common'
17
17
  require 'flapjack/notification/sms'
18
18
  require 'flapjack/notification/email'
19
19
  require 'flapjack/pikelet'
20
+ require 'flapjack/redis_pool'
20
21
 
21
22
  module Flapjack
22
23
 
23
24
  class Executive
24
- include Flapjack::Pikelet
25
+ include Flapjack::GenericPikelet
25
26
 
26
- def setup
27
- @redis = build_redis_connection_pool
27
+ alias_method :generic_bootstrap, :bootstrap
28
+ alias_method :generic_cleanup, :cleanup
29
+
30
+ def bootstrap(opts = {})
31
+ generic_bootstrap(opts)
32
+
33
+ @redis = Flapjack::RedisPool.new(:config => opts[:redis_config], :size => 1)
28
34
 
29
35
  @queues = {:email => @config['email_queue'],
30
36
  :sms => @config['sms_queue'],
@@ -73,19 +79,20 @@ module Flapjack
73
79
  @redis.hset("event_counters:#{@instance_id}", 'action', 0)
74
80
  end
75
81
 
76
- def main
77
- setup
82
+ def cleanup
83
+ @redis.empty! if @redis
84
+ generic_cleanup
85
+ end
78
86
 
87
+ def main
79
88
  @logger.info("Booting main loop.")
80
89
 
81
- until should_quit?
90
+ until should_quit? && @received_shutdown
82
91
  @logger.info("Waiting for event...")
83
- event = Flapjack::Data::Event.next(:persistence => @redis)
92
+ event = Flapjack::Data::Event.next(:redis => @redis)
84
93
  process_event(event) unless event.nil?
85
94
  end
86
95
 
87
- @redis.empty! if @redis
88
-
89
96
  @logger.info("Exiting main loop.")
90
97
  end
91
98
 
@@ -102,7 +109,8 @@ module Flapjack
102
109
  private
103
110
 
104
111
  def process_event(event)
105
- @logger.debug("#{Flapjack::Data::Event.pending_count(:persistence => @redis)} events waiting on the queue")
112
+ pending = Flapjack::Data::Event.pending_count(:redis => @redis)
113
+ @logger.debug("#{pending} events waiting on the queue")
106
114
  @logger.debug("Raw event received: #{event.inspect}")
107
115
  time_at = event.time
108
116
  time_at_str = time_at ? ", #{Time.at(time_at).to_s}" : ''
@@ -129,14 +137,14 @@ module Flapjack
129
137
  end
130
138
 
131
139
  @logger.info("#{Time.now}: Sending notifications for event #{event.id}")
132
- generate_notification(event, entity_check)
140
+ send_notification_messages(event, entity_check)
133
141
  end
134
142
 
135
143
  def update_keys(event, entity_check)
136
144
  result = { :skip_filters => false }
137
145
  timestamp = Time.now.to_i
138
146
  @event_count = @redis.hincrby('event_counters', 'all', 1)
139
- @event_count = @redis.hincrby("event_counters:#{@instance_id}", 'all', 1)
147
+ @redis.hincrby("event_counters:#{@instance_id}", 'all', 1)
140
148
 
141
149
  # FIXME skip if entity_check.nil?
142
150
 
@@ -191,16 +199,15 @@ module Flapjack
191
199
  end
192
200
  when 'shutdown'
193
201
  # should this be logged as an action instead? being minimally invasive for now
194
- result[:shutdown] = true
202
+ result[:shutdown] = @received_shutdown = true
195
203
  end
196
204
 
197
205
  result
198
206
  end
199
207
 
200
208
  # takes an event for which a notification needs to be generated, works out the type of
201
- # notification, updates the notification history in redis, calls other methods to work out who
202
- # to notify, by what method, and finally to have the notifications sent
203
- def generate_notification(event, entity_check)
209
+ # notification, updates the notification history in redis, sends the notifications
210
+ def send_notification_messages(event, entity_check)
204
211
  timestamp = Time.now.to_i
205
212
  notification_type = 'unknown'
206
213
  case event.type
@@ -215,79 +222,51 @@ module Flapjack
215
222
  case event.state
216
223
  when 'acknowledgement'
217
224
  notification_type = 'acknowledgement'
225
+ when 'test_notifications'
226
+ notification_type = 'test'
218
227
  end
219
228
  end
220
229
  @redis.set("#{event.id}:last_#{notification_type}_notification", timestamp)
221
230
  @redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp)
222
231
  @logger.debug("Notification of type #{notification_type} is being generated for #{event.id}.")
223
232
 
224
- send_notifications(event, notification_type, entity_check.contacts)
225
- end
226
-
227
- # takes an event, a notification type, and an array of contacts and creates jobs in resque
228
- # (eventually) for each notification
229
- def send_notifications(event, notification_type, contacts)
230
- notification = { 'event_id' => event.id,
231
- 'state' => event.state,
232
- 'summary' => event.summary,
233
- 'time' => event.time,
234
- 'notification_type' => notification_type }
233
+ contacts = entity_check.contacts
235
234
 
236
235
  if contacts.empty?
237
236
  @notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | NO CONTACTS")
238
237
  return
239
- end
238
+ end
239
+
240
+ notification = Flapjack::Data::Notification.for_event(event, :type => notification_type)
240
241
 
241
- contacts.each {|contact|
242
+ notification.messages(:contacts => contacts).each do |msg|
243
+ media_type = msg.medium.to_sym
242
244
 
243
- if contact.media.empty?
244
- @notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | #{contact.id} | NO MEDIA FOR CONTACT")
245
+ @notifylog.info("#{Time.now.to_s} | #{event.id} | " +
246
+ "#{notification_type} | #{msg.contact.id} | #{media_type.to_s} | #{msg.address}")
247
+
248
+ unless @queues[media_type]
249
+ # TODO log error
245
250
  next
246
251
  end
247
252
 
248
- notification.merge!({'contact_id' => contact.id,
249
- 'contact_first_name' => contact.first_name,
250
- 'contact_last_name' => contact.last_name, })
251
-
252
- contact.media.each_pair {|media_type, address|
253
-
254
- @notifylog.info("#{Time.now.to_s} | #{event.id} | " +
255
- "#{notification_type} | #{contact.id} | #{media_type} | #{address}")
256
-
257
- # queue this notification
258
- notif = notification.dup
259
- notif['media'] = media_type
260
- notif['address'] = address
261
- notif['id'] = fuid
262
- dur = event.duration
263
- notif['duration'] = dur if dur
264
- @logger.debug("send_notifications: sending notification: #{notif.inspect}")
265
-
266
- unless @queues[media_type.to_sym]
267
- # TODO log error
268
- next
269
- end
270
-
271
- # TODO consider changing Resque jobs to use raw blpop like the others
272
- case media_type
273
- when "sms"
274
- Resque.enqueue_to(@queues[:sms], Notification::Sms, notif)
275
- when "email"
276
- Resque.enqueue_to(@queues[:email], Notification::Email, notif)
277
- when "jabber"
278
- # TODO move next line up into other notif value setting above?
279
- notif['event_count'] = @event_count if @event_count
280
- @redis.rpush(@queues[:jabber], Yajl::Encoder.encode(notif))
281
- when "pagerduty"
282
- @redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(notif))
283
- end
284
- }
285
- }
286
- end
253
+ contents = msg.contents
254
+
255
+ # TODO consider changing Resque jobs to use raw blpop like the others
256
+ case media_type
257
+ when :sms
258
+ Resque.enqueue_to(@queues[:sms], Flapjack::Notification::Sms, contents)
259
+ when :email
260
+ Resque.enqueue_to(@queues[:email], Flapjack::Notification::Email, contents)
261
+ when :jabber
262
+ # TODO move next line up into other notif value setting above?
263
+ contents['event_count'] = @event_count if @event_count
264
+ @redis.rpush(@queues[:jabber], Yajl::Encoder.encode(contents))
265
+ when :pagerduty
266
+ @redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(contents))
267
+ end
287
268
 
288
- # generates a fairly unique identifier to use as a message id
289
- def fuid
290
- fuid = self.object_id.to_i.to_s + '-' + Time.now.to_i.to_s + '.' + Time.now.tv_usec.to_s
269
+ end
291
270
  end
292
271
 
293
272
  end
@@ -15,21 +15,24 @@ module Flapjack
15
15
  timestamp = Time.now.to_i
16
16
  result = false
17
17
  if event.type == 'action'
18
- if event.acknowledgement? and @persistence.zscore("failed_checks", event.id)
19
- ec = Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @persistence)
20
- if ec.nil?
21
- @log.error "Filter: Acknowledgement: unknown entity for event '#{event.id}'"
18
+ if event.acknowledgement?
19
+ if @persistence.zscore("failed_checks", event.id)
20
+ ec = Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @persistence)
21
+ if ec.nil?
22
+ @log.error "Filter: Acknowledgement: unknown entity for event '#{event.id}'"
23
+ else
24
+ ec.create_unscheduled_maintenance(:start_time => timestamp,
25
+ :duration => (event.duration || (4 * 60 * 60)),
26
+ :summary => event.summary)
27
+ message = "unscheduled maintenance created for #{event.id}"
28
+ end
22
29
  else
23
- ec.create_unscheduled_maintenance(:start_time => timestamp,
24
- :duration => (event.duration || (4 * 60 * 60)),
25
- :summary => event.summary)
26
- message = "unscheduled maintenance created for #{event.id}"
30
+ result = true
31
+ @log.debug("Filter: Acknowledgement: blocking because zscore of failed_checks for #{event.id} is false") unless @persistence.zscore("failed_checks", event.id)
27
32
  end
28
33
  else
29
34
  message = "no action taken"
30
- result = true
31
- @log.debug("Filter: Acknowledgement: blocking because event.acknowledgement? is false") unless event.acknowledgement?
32
- @log.debug("Filter: Acknowledgement: blocking because zscore of failed_checks for #{event.id} is false") unless @persistence.zscore("failed_checks", event.id)
35
+ result = false
33
36
  end
34
37
  end
35
38
  @log.debug("Filter: Acknowledgement: #{result ? "block" : "pass"} (#{message})")