flapjack 0.6.39 → 0.6.40

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/.gitignore +2 -2
  2. data/Gemfile +5 -1
  3. data/README.md +3 -2
  4. data/Rakefile +2 -1
  5. data/bin/flapjack +2 -2
  6. data/bin/flapjack-nagios-receiver +2 -8
  7. data/bin/flapjack-populator +11 -11
  8. data/etc/flapjack_config.yaml.example +28 -0
  9. data/features/steps/events_steps.rb +1 -1
  10. data/features/steps/notifications_steps.rb +7 -4
  11. data/features/support/env.rb +17 -6
  12. data/flapjack.gemspec +1 -0
  13. data/lib/flapjack/api.rb +72 -28
  14. data/lib/flapjack/configuration.rb +9 -1
  15. data/lib/flapjack/coordinator.rb +138 -162
  16. data/lib/flapjack/data/contact.rb +3 -1
  17. data/lib/flapjack/data/entity.rb +10 -1
  18. data/lib/flapjack/data/entity_check.rb +19 -21
  19. data/lib/flapjack/data/event.rb +26 -27
  20. data/lib/flapjack/data/message.rb +45 -0
  21. data/lib/flapjack/data/notification.rb +49 -0
  22. data/lib/flapjack/executive.rb +53 -74
  23. data/lib/flapjack/filters/acknowledgement.rb +14 -11
  24. data/lib/flapjack/jabber.rb +84 -18
  25. data/lib/flapjack/notification/email.rb +67 -37
  26. data/lib/flapjack/notification/sms.rb +40 -28
  27. data/lib/flapjack/oobetet.rb +1 -1
  28. data/lib/flapjack/pagerduty.rb +24 -15
  29. data/lib/flapjack/patches.rb +3 -1
  30. data/lib/flapjack/pikelet.rb +51 -20
  31. data/lib/flapjack/rack_logger.rb +8 -0
  32. data/lib/flapjack/version.rb +1 -1
  33. data/lib/flapjack/web.rb +51 -27
  34. data/spec/lib/flapjack/api_spec.rb +28 -3
  35. data/spec/lib/flapjack/coordinator_spec.rb +69 -43
  36. data/spec/lib/flapjack/data/contact_spec.rb +17 -9
  37. data/spec/lib/flapjack/data/entity_check_spec.rb +0 -25
  38. data/spec/lib/flapjack/data/entity_spec.rb +4 -0
  39. data/spec/lib/flapjack/data/global_spec.rb +6 -0
  40. data/spec/lib/flapjack/data/message_spec.rb +6 -0
  41. data/spec/lib/flapjack/data/notification_spec.rb +6 -0
  42. data/spec/lib/flapjack/executive_spec.rb +2 -2
  43. data/spec/lib/flapjack/jabber_spec.rb +8 -9
  44. data/spec/lib/flapjack/pagerduty_spec.rb +53 -45
  45. data/spec/lib/flapjack/utility_spec.rb +55 -0
  46. data/spec/lib/flapjack/web_spec.rb +7 -5
  47. data/tasks/events.rake +26 -59
  48. data/tasks/profile.rake +366 -0
  49. metadata +30 -19
  50. data/lib/flapjack/notification/common.rb +0 -23
  51. data/lib/flapjack/persistence/couch.rb +0 -5
  52. data/lib/flapjack/persistence/couch/connection.rb +0 -66
  53. data/lib/flapjack/persistence/couch/couch.rb +0 -63
  54. data/lib/flapjack/persistence/data_mapper.rb +0 -3
  55. data/lib/flapjack/persistence/data_mapper/data_mapper.rb +0 -67
  56. data/lib/flapjack/persistence/data_mapper/models/check.rb +0 -90
  57. data/lib/flapjack/persistence/data_mapper/models/check_template.rb +0 -20
  58. data/lib/flapjack/persistence/data_mapper/models/event.rb +0 -19
  59. data/lib/flapjack/persistence/data_mapper/models/node.rb +0 -18
  60. data/lib/flapjack/persistence/data_mapper/models/related_check.rb +0 -15
  61. data/lib/flapjack/persistence/sqlite3.rb +0 -3
  62. data/lib/flapjack/persistence/sqlite3/sqlite3.rb +0 -166
  63. data/lib/flapjack/transports/beanstalkd.rb +0 -50
  64. data/lib/flapjack/transports/result.rb +0 -58
  65. data/lib/flapjack/worker/application.rb +0 -121
  66. data/lib/flapjack/worker/cli.rb +0 -49
@@ -74,30 +74,26 @@ module Flapjack
74
74
  }
75
75
  end
76
76
 
77
- # creates, or modifies, an event object and adds it to the events list in redis
78
- # 'type' => 'service',
79
- # 'state' => state,
80
- # 'summary' => check_output,
81
- # 'time' => timestamp
82
- def create_event(event)
83
- event.merge!('entity' => @entity.name, 'check' => @check)
84
- event['time'] = Time.now.to_i if event['time'].nil?
85
- @redis.rpush('events', Yajl::Encoder.encode(event))
86
- end
87
-
88
- def create_acknowledgement(opts = {})
89
- defaults = {
90
- 'summary' => '...'
91
- }
92
- options = defaults.merge(opts)
93
-
77
+ def create_acknowledgement(options = {})
94
78
  event = { 'type' => 'action',
95
79
  'state' => 'acknowledgement',
96
80
  'summary' => options['summary'],
97
81
  'duration' => options['duration'],
98
- 'acknowledgement_id' => options['acknowledgement_id']
82
+ 'acknowledgement_id' => options['acknowledgement_id'],
83
+ 'entity' => @entity.name,
84
+ 'check' => @check
85
+ }
86
+ Flapjack::Data::Event.add(event, :redis => @redis)
87
+ end
88
+
89
+ def test_notifications(options = {})
90
+ event = { 'type' => 'action',
91
+ 'state' => 'test_notifications',
92
+ 'summary' => options['summary'],
93
+ 'entity' => @entity.name,
94
+ 'check' => @check
99
95
  }
100
- create_event(event)
96
+ Flapjack::Data::Event.add(event, :redis => @redis)
101
97
  end
102
98
 
103
99
  # FIXME: need to add summary to summary of existing unscheduled maintenance if there is
@@ -425,8 +421,10 @@ module Flapjack
425
421
  check = @check
426
422
 
427
423
  if @logger
428
- @logger.debug("contacts for #{@entity.id} (#{@entity.name}): " + @redis.smembers("contacts_for:#{@entity.id}").length.to_s)
429
- @logger.debug("contacts for #{check}: " + @redis.smembers("contacts_for:#{check}").length.to_s)
424
+ @logger.debug("contacts for #{@entity.id} (#{@entity.name}): " +
425
+ @redis.smembers("contacts_for:#{@entity.id}").length.to_s)
426
+ @logger.debug("contacts for #{check}: " +
427
+ @redis.smembers("contacts_for:#{check}").length.to_s)
430
428
  end
431
429
 
432
430
  union = @redis.sunion("contacts_for:#{@entity.id}", "contacts_for:#{check}")
@@ -17,43 +17,38 @@ module Flapjack
17
17
  # events on the queue.
18
18
  #
19
19
  def self.next(opts={})
20
+ raise "Redis connection not set" unless redis = opts[:redis]
21
+
20
22
  defaults = { :block => true }
21
23
  options = defaults.merge(opts)
22
- block = options[:block]
23
24
 
24
25
  # In production, we wait indefinitely for events coming from other systems.
25
- if block
26
- raw = opts[:persistence].blpop('events', 0).last
27
- event = ::JSON.parse(raw)
28
- self.new(event)
29
- else
30
- # In testing, we take care that there are no events on the queue.
31
- raw = opts[:persistence].lpop('events')
32
- result = nil
33
-
34
- if raw
35
- event = ::JSON.parse(raw)
36
- result = self.new(event)
37
- end
38
-
39
- result
26
+ if options[:block]
27
+ return self.new( ::JSON.parse( redis.blpop('events', 0).last ) )
40
28
  end
29
+
30
+ # In testing, we take care that there are no events on the queue.
31
+ return unless raw = redis.lpop('events')
32
+ self.new( ::JSON.parse(raw) )
33
+ end
34
+
35
+ # creates, or modifies, an event object and adds it to the events list in redis
36
+ # 'type' => 'service',
37
+ # 'state' => state,
38
+ # 'summary' => check_output,
39
+ # 'time' => timestamp
40
+ def self.add(evt, opts = {})
41
+ raise "Redis connection not set" unless redis = opts[:redis]
42
+
43
+ evt['time'] = Time.now.to_i if evt['time'].nil?
44
+ redis.rpush('events', Yajl::Encoder.encode(evt))
41
45
  end
42
46
 
43
47
  # Provide a count of the number of events on the queue to be processed.
44
48
  def self.pending_count(opts = {})
45
- opts[:persistence].llen('events')
46
- end
49
+ raise "Redis connection not set" unless redis = opts[:redis]
47
50
 
48
- # FIXME make this use a logger taken from the opts
49
- def self.purge_all(opts = {})
50
- events_size = opts[:redis].llen('events')
51
- puts "purging #{events_size} events..."
52
- timestamp = Time.now.to_i
53
- puts "renaming events to events.#{timestamp}"
54
- opts[:redis].rename('events', "events.#{timestamp}")
55
- puts "setting expiry of events.#{timestamp} to 8 hours"
56
- opts[:redis].expire("events.#{timestamp}", (60 * 60 * 8))
51
+ redis.llen('events')
57
52
  end
58
53
 
59
54
  def initialize(attrs={})
@@ -125,6 +120,10 @@ module Flapjack
125
120
  action? and state == 'acknowledgement'
126
121
  end
127
122
 
123
+ def test_notifications?
124
+ action? and state == 'test_notifications'
125
+ end
126
+
128
127
  def ok?
129
128
  (state == 'ok') or (state == 'up')
130
129
  end
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # 'Notification' refers to the template object created when an event occurs,
4
+ # from which individual 'Message' objects are created, one for each
5
+ # contact+media recipient.
6
+
7
+ module Flapjack
8
+ module Data
9
+ class Message
10
+
11
+ attr_accessor :medium, :address, :id, :duration, :contact, :notification
12
+
13
+ def self.for_contact(opts = {})
14
+ self.new(:contact => opts[:contact])
15
+ end
16
+
17
+ def id
18
+ return @id if @id
19
+ t = Time.now
20
+ @id = self.object_id.to_i.to_s + '-' + t.to_i.to_s + '.' + t.tv_usec.to_s
21
+ end
22
+
23
+ def contents
24
+ c = {'media' => medium,
25
+ 'address' => address,
26
+ 'id' => id}
27
+ if contact
28
+ c.merge('contact_id' => contact.id,
29
+ 'contact_first_name' => contact.first_name,
30
+ 'contact_last_name' => contact.last_name)
31
+ end
32
+ c['duration'] = duration if duration
33
+ c.merge(notification.contents) if notification
34
+ end
35
+
36
+ private
37
+
38
+ def initialize(opts = {})
39
+ @contact = opts[:contact]
40
+ end
41
+
42
+ end
43
+ end
44
+ end
45
+
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'flapjack/data/message'
4
+
5
+ module Flapjack
6
+ module Data
7
+ class Notification
8
+
9
+ attr_accessor :event, :type
10
+
11
+ def self.for_event(event, opts = {})
12
+ self.new(:event => event, :type => opts[:type])
13
+ end
14
+
15
+ def messages(opts = {})
16
+ contacts = opts[:contacts]
17
+ return [] if contacts.nil?
18
+ @messages ||= contacts.collect {|contact|
19
+ contact.media.keys.inject([]) { |ret, mk|
20
+ m = Flapjack::Data::Message.for_contact(:contact => contact)
21
+ m.notification = self
22
+ m.medium = mk
23
+ m.address = contact.media[mk]
24
+ ret << m
25
+ ret
26
+ }
27
+ }.flatten
28
+ end
29
+
30
+ def contents
31
+ @contents ||= {'event_id' => event.id,
32
+ 'state' => event.state,
33
+ 'summary' => event.summary,
34
+ 'time' => event.time,
35
+ 'notification_type' => type}
36
+ end
37
+
38
+ private
39
+
40
+ def initialize(opts = {})
41
+ raise "Event not passed" unless event = opts[:event]
42
+ @event = event
43
+ @type = opts[:type]
44
+ end
45
+
46
+ end
47
+ end
48
+ end
49
+
@@ -12,19 +12,25 @@ require 'flapjack/filters/detect_mass_client_failures'
12
12
  require 'flapjack/filters/delays'
13
13
  require 'flapjack/data/contact'
14
14
  require 'flapjack/data/entity_check'
15
+ require 'flapjack/data/notification'
15
16
  require 'flapjack/data/event'
16
- require 'flapjack/notification/common'
17
17
  require 'flapjack/notification/sms'
18
18
  require 'flapjack/notification/email'
19
19
  require 'flapjack/pikelet'
20
+ require 'flapjack/redis_pool'
20
21
 
21
22
  module Flapjack
22
23
 
23
24
  class Executive
24
- include Flapjack::Pikelet
25
+ include Flapjack::GenericPikelet
25
26
 
26
- def setup
27
- @redis = build_redis_connection_pool
27
+ alias_method :generic_bootstrap, :bootstrap
28
+ alias_method :generic_cleanup, :cleanup
29
+
30
+ def bootstrap(opts = {})
31
+ generic_bootstrap(opts)
32
+
33
+ @redis = Flapjack::RedisPool.new(:config => opts[:redis_config], :size => 1)
28
34
 
29
35
  @queues = {:email => @config['email_queue'],
30
36
  :sms => @config['sms_queue'],
@@ -73,19 +79,20 @@ module Flapjack
73
79
  @redis.hset("event_counters:#{@instance_id}", 'action', 0)
74
80
  end
75
81
 
76
- def main
77
- setup
82
+ def cleanup
83
+ @redis.empty! if @redis
84
+ generic_cleanup
85
+ end
78
86
 
87
+ def main
79
88
  @logger.info("Booting main loop.")
80
89
 
81
- until should_quit?
90
+ until should_quit? && @received_shutdown
82
91
  @logger.info("Waiting for event...")
83
- event = Flapjack::Data::Event.next(:persistence => @redis)
92
+ event = Flapjack::Data::Event.next(:redis => @redis)
84
93
  process_event(event) unless event.nil?
85
94
  end
86
95
 
87
- @redis.empty! if @redis
88
-
89
96
  @logger.info("Exiting main loop.")
90
97
  end
91
98
 
@@ -102,7 +109,8 @@ module Flapjack
102
109
  private
103
110
 
104
111
  def process_event(event)
105
- @logger.debug("#{Flapjack::Data::Event.pending_count(:persistence => @redis)} events waiting on the queue")
112
+ pending = Flapjack::Data::Event.pending_count(:redis => @redis)
113
+ @logger.debug("#{pending} events waiting on the queue")
106
114
  @logger.debug("Raw event received: #{event.inspect}")
107
115
  time_at = event.time
108
116
  time_at_str = time_at ? ", #{Time.at(time_at).to_s}" : ''
@@ -129,14 +137,14 @@ module Flapjack
129
137
  end
130
138
 
131
139
  @logger.info("#{Time.now}: Sending notifications for event #{event.id}")
132
- generate_notification(event, entity_check)
140
+ send_notification_messages(event, entity_check)
133
141
  end
134
142
 
135
143
  def update_keys(event, entity_check)
136
144
  result = { :skip_filters => false }
137
145
  timestamp = Time.now.to_i
138
146
  @event_count = @redis.hincrby('event_counters', 'all', 1)
139
- @event_count = @redis.hincrby("event_counters:#{@instance_id}", 'all', 1)
147
+ @redis.hincrby("event_counters:#{@instance_id}", 'all', 1)
140
148
 
141
149
  # FIXME skip if entity_check.nil?
142
150
 
@@ -191,16 +199,15 @@ module Flapjack
191
199
  end
192
200
  when 'shutdown'
193
201
  # should this be logged as an action instead? being minimally invasive for now
194
- result[:shutdown] = true
202
+ result[:shutdown] = @received_shutdown = true
195
203
  end
196
204
 
197
205
  result
198
206
  end
199
207
 
200
208
  # takes an event for which a notification needs to be generated, works out the type of
201
- # notification, updates the notification history in redis, calls other methods to work out who
202
- # to notify, by what method, and finally to have the notifications sent
203
- def generate_notification(event, entity_check)
209
+ # notification, updates the notification history in redis, sends the notifications
210
+ def send_notification_messages(event, entity_check)
204
211
  timestamp = Time.now.to_i
205
212
  notification_type = 'unknown'
206
213
  case event.type
@@ -215,79 +222,51 @@ module Flapjack
215
222
  case event.state
216
223
  when 'acknowledgement'
217
224
  notification_type = 'acknowledgement'
225
+ when 'test_notifications'
226
+ notification_type = 'test'
218
227
  end
219
228
  end
220
229
  @redis.set("#{event.id}:last_#{notification_type}_notification", timestamp)
221
230
  @redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp)
222
231
  @logger.debug("Notification of type #{notification_type} is being generated for #{event.id}.")
223
232
 
224
- send_notifications(event, notification_type, entity_check.contacts)
225
- end
226
-
227
- # takes an event, a notification type, and an array of contacts and creates jobs in resque
228
- # (eventually) for each notification
229
- def send_notifications(event, notification_type, contacts)
230
- notification = { 'event_id' => event.id,
231
- 'state' => event.state,
232
- 'summary' => event.summary,
233
- 'time' => event.time,
234
- 'notification_type' => notification_type }
233
+ contacts = entity_check.contacts
235
234
 
236
235
  if contacts.empty?
237
236
  @notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | NO CONTACTS")
238
237
  return
239
- end
238
+ end
239
+
240
+ notification = Flapjack::Data::Notification.for_event(event, :type => notification_type)
240
241
 
241
- contacts.each {|contact|
242
+ notification.messages(:contacts => contacts).each do |msg|
243
+ media_type = msg.medium.to_sym
242
244
 
243
- if contact.media.empty?
244
- @notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | #{contact.id} | NO MEDIA FOR CONTACT")
245
+ @notifylog.info("#{Time.now.to_s} | #{event.id} | " +
246
+ "#{notification_type} | #{msg.contact.id} | #{media_type.to_s} | #{msg.address}")
247
+
248
+ unless @queues[media_type]
249
+ # TODO log error
245
250
  next
246
251
  end
247
252
 
248
- notification.merge!({'contact_id' => contact.id,
249
- 'contact_first_name' => contact.first_name,
250
- 'contact_last_name' => contact.last_name, })
251
-
252
- contact.media.each_pair {|media_type, address|
253
-
254
- @notifylog.info("#{Time.now.to_s} | #{event.id} | " +
255
- "#{notification_type} | #{contact.id} | #{media_type} | #{address}")
256
-
257
- # queue this notification
258
- notif = notification.dup
259
- notif['media'] = media_type
260
- notif['address'] = address
261
- notif['id'] = fuid
262
- dur = event.duration
263
- notif['duration'] = dur if dur
264
- @logger.debug("send_notifications: sending notification: #{notif.inspect}")
265
-
266
- unless @queues[media_type.to_sym]
267
- # TODO log error
268
- next
269
- end
270
-
271
- # TODO consider changing Resque jobs to use raw blpop like the others
272
- case media_type
273
- when "sms"
274
- Resque.enqueue_to(@queues[:sms], Notification::Sms, notif)
275
- when "email"
276
- Resque.enqueue_to(@queues[:email], Notification::Email, notif)
277
- when "jabber"
278
- # TODO move next line up into other notif value setting above?
279
- notif['event_count'] = @event_count if @event_count
280
- @redis.rpush(@queues[:jabber], Yajl::Encoder.encode(notif))
281
- when "pagerduty"
282
- @redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(notif))
283
- end
284
- }
285
- }
286
- end
253
+ contents = msg.contents
254
+
255
+ # TODO consider changing Resque jobs to use raw blpop like the others
256
+ case media_type
257
+ when :sms
258
+ Resque.enqueue_to(@queues[:sms], Flapjack::Notification::Sms, contents)
259
+ when :email
260
+ Resque.enqueue_to(@queues[:email], Flapjack::Notification::Email, contents)
261
+ when :jabber
262
+ # TODO move next line up into other notif value setting above?
263
+ contents['event_count'] = @event_count if @event_count
264
+ @redis.rpush(@queues[:jabber], Yajl::Encoder.encode(contents))
265
+ when :pagerduty
266
+ @redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(contents))
267
+ end
287
268
 
288
- # generates a fairly unique identifier to use as a message id
289
- def fuid
290
- fuid = self.object_id.to_i.to_s + '-' + Time.now.to_i.to_s + '.' + Time.now.tv_usec.to_s
269
+ end
291
270
  end
292
271
 
293
272
  end
@@ -15,21 +15,24 @@ module Flapjack
15
15
  timestamp = Time.now.to_i
16
16
  result = false
17
17
  if event.type == 'action'
18
- if event.acknowledgement? and @persistence.zscore("failed_checks", event.id)
19
- ec = Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @persistence)
20
- if ec.nil?
21
- @log.error "Filter: Acknowledgement: unknown entity for event '#{event.id}'"
18
+ if event.acknowledgement?
19
+ if @persistence.zscore("failed_checks", event.id)
20
+ ec = Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @persistence)
21
+ if ec.nil?
22
+ @log.error "Filter: Acknowledgement: unknown entity for event '#{event.id}'"
23
+ else
24
+ ec.create_unscheduled_maintenance(:start_time => timestamp,
25
+ :duration => (event.duration || (4 * 60 * 60)),
26
+ :summary => event.summary)
27
+ message = "unscheduled maintenance created for #{event.id}"
28
+ end
22
29
  else
23
- ec.create_unscheduled_maintenance(:start_time => timestamp,
24
- :duration => (event.duration || (4 * 60 * 60)),
25
- :summary => event.summary)
26
- message = "unscheduled maintenance created for #{event.id}"
30
+ result = true
31
+ @log.debug("Filter: Acknowledgement: blocking because zscore of failed_checks for #{event.id} is false") unless @persistence.zscore("failed_checks", event.id)
27
32
  end
28
33
  else
29
34
  message = "no action taken"
30
- result = true
31
- @log.debug("Filter: Acknowledgement: blocking because event.acknowledgement? is false") unless event.acknowledgement?
32
- @log.debug("Filter: Acknowledgement: blocking because zscore of failed_checks for #{event.id} is false") unless @persistence.zscore("failed_checks", event.id)
35
+ result = false
33
36
  end
34
37
  end
35
38
  @log.debug("Filter: Acknowledgement: #{result ? "block" : "pass"} (#{message})")