flapjack 0.7.22 → 0.7.25

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/CHANGELOG.md +19 -0
  2. data/bin/flapjack +3 -1
  3. data/bin/flapjack-nagios-receiver +5 -4
  4. data/bin/receive-events +2 -2
  5. data/features/events.feature +101 -95
  6. data/features/notification_rules.feature +36 -4
  7. data/features/steps/notifications_steps.rb +4 -0
  8. data/flapjack.gemspec +3 -2
  9. data/lib/flapjack/coordinator.rb +8 -6
  10. data/lib/flapjack/data/entity_check.rb +20 -13
  11. data/lib/flapjack/data/event.rb +4 -7
  12. data/lib/flapjack/data/notification.rb +63 -45
  13. data/lib/flapjack/filters/acknowledgement.rb +26 -24
  14. data/lib/flapjack/filters/delays.rb +46 -42
  15. data/lib/flapjack/filters/ok.rb +31 -34
  16. data/lib/flapjack/filters/scheduled_maintenance.rb +2 -2
  17. data/lib/flapjack/filters/unscheduled_maintenance.rb +2 -3
  18. data/lib/flapjack/gateways/email.rb +111 -114
  19. data/lib/flapjack/gateways/email/alert.html.erb +11 -11
  20. data/lib/flapjack/gateways/email/alert.text.erb +19 -6
  21. data/lib/flapjack/gateways/sms_messagenet.rb +15 -5
  22. data/lib/flapjack/gateways/web.rb +3 -4
  23. data/lib/flapjack/gateways/web/public/css/flapjack.css +0 -2
  24. data/lib/flapjack/gateways/web/public/img/flapjack-favicon-32-16.ico +0 -0
  25. data/lib/flapjack/gateways/web/public/img/flapjack-favicon-64-32-24-16.ico +0 -0
  26. data/lib/flapjack/gateways/web/public/img/flapjack-transparent-300.png +0 -0
  27. data/lib/flapjack/gateways/web/public/img/flapjack-transparent-350-400.png +0 -0
  28. data/lib/flapjack/gateways/web/views/_head.html.erb +1 -0
  29. data/lib/flapjack/gateways/web/views/index.html.erb +1 -1
  30. data/lib/flapjack/notifier.rb +2 -3
  31. data/lib/flapjack/pikelet.rb +5 -4
  32. data/lib/flapjack/processor.rb +39 -27
  33. data/lib/flapjack/version.rb +1 -1
  34. data/spec/lib/flapjack/data/entity_check_spec.rb +5 -0
  35. data/spec/lib/flapjack/data/event_spec.rb +0 -1
  36. data/spec/lib/flapjack/gateways/email_spec.rb +5 -9
  37. data/spec/lib/flapjack/gateways/sms_messagenet.spec.rb +80 -1
  38. data/spec/lib/flapjack/gateways/web_spec.rb +1 -1
  39. data/spec/lib/flapjack/pikelet_spec.rb +4 -3
  40. data/spec/lib/flapjack/processor_spec.rb +0 -1
  41. metadata +28 -11
  42. data/lib/flapjack/filters/detect_mass_client_failures.rb +0 -44
  43. data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +0 -6
data/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  ## Flapjack Changelog
2
2
 
3
+ # 0.7.25 - 2013-09-13
4
+ - Bug: EntityCheck last_update= isn't being called for update_state since refactoring gh-303 (@ali-graham)
5
+ - Bug: flapjack-nagios-receiver is double-escaping its JSON data gh-304 (@jessereynolds)
6
+
7
+ # 0.7.24 - 2013-09-12
8
+ - Bug: gem install of flapjack 0.7.23 fails with tzinfo-data dependency error gh-302 (@jessereynolds)
9
+
10
+ # 0.7.23 - 2013-09-12
11
+ - Bug: Quick ok -> warning -> ok -> warning triggers too many recovery notifications gh-119 (@jessereynolds)
12
+ - Bug: Blackhole notification rule doesn't block recovery notifications gh-282 (@jessereynolds)
13
+ - Chore: Shorten SMS messages to 159 chars on the Messagenet gateway gh-278 (@ali-graham)
14
+ - Chore: flapjack-nagios-receiver should use Event#add gh-275 (@ali-graham)
15
+ - Chore: Non-zero exit code after receiving SIGINT gh-266 (@ali-graham)
16
+ - Bug: Email notifications - remove "(about a minute ago)" and fix previous state fields gh-258 (@ali-graham)
17
+ - Chore: refactor delays filter, remove mass client failures filter gh-293 (@jessereynolds)
18
+ - Bug: creation of scheduled maintenance fails from web UI gh-296 (@ali-graham)
19
+ - Feature: flapjack UI needs a favicon gh-297 (@jessereynolds)
20
+ - Chore: email notification styling gh-298 (@jessereynolds)
21
+
3
22
  # 0.7.22 - 2013-08-08
4
23
  - Bug: fix potential exception in json serialisation of tags in notifications gh-281 (@jessereynolds)
5
24
 
data/bin/flapjack CHANGED
@@ -146,10 +146,12 @@ when "start"
146
146
  puts "Flapjack is already running."
147
147
  else
148
148
  print "Flapjack starting..."
149
+ return_value = nil
149
150
  runner.execute(:daemonize => daemonize) {
150
- flapjack_coord.call
151
+ return_value = flapjack_coord.call
151
152
  }
152
153
  puts " done."
154
+ exit(return_value + 128) unless (return_value.nil? || (return_value == 0))
153
155
  end
154
156
 
155
157
  when "stop"
@@ -14,6 +14,7 @@ Oj.default_options = { :indent => 0, :mode => :strict }
14
14
  require 'dante'
15
15
 
16
16
  require 'flapjack/configuration'
17
+ require 'flapjack/data/event'
17
18
 
18
19
  def pike(message)
19
20
  puts "piking out: #{message}"
@@ -47,16 +48,16 @@ def process_input(opts)
47
48
  state = 'ok' if state.downcase == 'up'
48
49
  state = 'critical' if state.downcase == 'down'
49
50
  details = check_long_output ? check_long_output.gsub(/\\n/, "\n") : nil
50
- event = Oj.dump({
51
+ event = {
51
52
  'entity' => entity,
52
53
  'check' => check,
53
54
  'type' => 'service',
54
55
  'state' => state,
55
56
  'summary' => check_output,
56
57
  'details' => details,
57
- 'timestamp' => timestamp,
58
- })
59
- redis.lpush 'events', event
58
+ 'time' => timestamp,
59
+ }
60
+ Flapjack::Data::Event.add(event, :redis => redis)
60
61
  end
61
62
  rescue Redis::CannotConnectError
62
63
  puts "Error, unable to to connect to the redis server (#{$!})"
data/bin/receive-events CHANGED
@@ -12,6 +12,7 @@ require 'oj'
12
12
  Oj.default_options = { :indent => 0, :mode => :strict }
13
13
 
14
14
  require 'flapjack/configuration'
15
+ require 'flapjack/data/event'
15
16
 
16
17
  def pike(message)
17
18
  puts "piking out: #{message}"
@@ -19,8 +20,7 @@ def pike(message)
19
20
  end
20
21
 
21
22
  def send_event(event, opts)
22
- redis = opts[:redis]
23
- redis.lpush 'events', event
23
+ Flapjack::Data::Event.add(event, :redis => opts[:redis])
24
24
  end
25
25
 
26
26
  def receive(opts)
@@ -123,7 +123,12 @@ Feature: events
123
123
 
124
124
  @time
125
125
  Scenario: Check critical to ok when acknowledged
126
- Given the check is in a critical state
126
+ Given the check is in an ok state
127
+ When a critical event is received
128
+ And one minute passes
129
+ And a critical event is received
130
+ Then a notification should be generated
131
+ # the above all needs to be just a call to the "Check ok to critical for 1 minute" Scenario if that's possible
127
132
  When an acknowledgement event is received
128
133
  Then a notification should be generated
129
134
  When 1 minute passes
@@ -297,101 +302,102 @@ Feature: events
297
302
 
298
303
  # commenting out this test for now, will revive it
299
304
  # when working on gh-119
300
- # @time
301
- # Scenario: a lot of quick ok -> warning -> ok -> warning
302
- # Given the check is in an ok state
303
- # When 10 seconds passes
304
- # And a warning event is received
305
- # Then a notification should not be generated
306
- # When 10 seconds passes
307
- # And an ok event is received
308
- # Then a notification should not be generated
309
- # When 10 seconds passes
310
- # And a warning event is received
311
- # Then a notification should not be generated
312
- # When 10 seconds passes
313
- # And a warning event is received
314
- # Then a notification should not be generated
315
- # When 10 seconds passes
316
- # And a warning event is received
317
- # Then a notification should not be generated
318
- # When 10 seconds passes
319
- # And an ok event is received
320
- # Then a notification should not be generated
321
- # When 10 seconds passes
322
- # And a warning event is received
323
- # Then a notification should not be generated
324
- # When 10 seconds passes
325
- # And an ok event is received
326
- # Then a notification should not be generated
327
- # When 10 seconds passes
328
- # And a warning event is received
329
- # Then a notification should not be generated
330
- # When 10 seconds passes
331
- # And a warning event is received
332
- # Then a notification should not be generated
333
- # When 10 seconds passes
334
- # And a warning event is received
335
- # Then a notification should not be generated
336
- # When 10 seconds passes
337
- # And a warning event is received
338
- # Then a notification should be generated
339
- # When 10 seconds passes
340
- # And a warning event is received
341
- # Then a notification should not be generated
342
- # When 10 seconds passes
343
- # And a warning event is received
344
- # Then a notification should not be generated
345
- # When 10 seconds passes
346
- # And an ok event is received
347
- # Then a notification should be generated
348
- # When 10 seconds passes
349
- # And a warning event is received
350
- # Then a notification should not be generated
351
- # When 10 seconds passes
352
- # And a warning event is received
353
- # Then a notification should not be generated
354
- # When 10 seconds passes
355
- # And a warning event is received
356
- # Then a notification should not be generated
357
- # When 10 seconds passes
358
- # And an ok event is received
359
- # Then a notification should not be generated
360
- # When 10 seconds passes
361
- # And a warning event is received
362
- # Then a notification should not be generated
363
- # When 10 seconds passes
364
- # And a warning event is received
365
- # Then a notification should not be generated
366
- # When 10 seconds passes
367
- # And a warning event is received
368
- # Then a notification should not be generated
369
- # When 10 seconds passes
370
- # And an ok event is received
371
- # Then a notification should not be generated
372
- # When 10 seconds passes
373
- # And an ok event is received
374
- # Then a notification should not be generated
375
- # When 10 seconds passes
376
- # And an ok event is received
377
- # Then a notification should not be generated
378
- # When 10 seconds passes
379
- # And an ok event is received
380
- # Then a notification should not be generated
381
- # When 10 seconds passes
382
- # And an ok event is received
383
- # Then a notification should not be generated
384
- # When 10 seconds passes
385
- # And a warning event is received
386
- # Then a notification should not be generated
387
- # When 10 seconds passes
388
- # And a warning event is received
389
- # Then a notification should not be generated
390
- # When 10 seconds passes
391
- # And an ok event is received
392
- # Then a notification should not be generated
305
+ @time
306
+ Scenario: a lot of quick ok -> warning -> ok -> warning
307
+ Given the check is in an ok state
308
+ When 10 seconds passes
309
+ And a warning event is received
310
+ Then a notification should not be generated
311
+ When 10 seconds passes
312
+ And an ok event is received
313
+ Then a notification should not be generated
314
+ When 10 seconds passes
315
+ And a warning event is received
316
+ Then a notification should not be generated
317
+ When 10 seconds passes
318
+ And a warning event is received
319
+ Then a notification should not be generated
320
+ When 10 seconds passes
321
+ And a warning event is received
322
+ Then a notification should not be generated
323
+ When 20 seconds passes
324
+ And an ok event is received
325
+ Then a notification should not be generated
326
+ When 10 seconds passes
327
+ And a warning event is received
328
+ Then a notification should not be generated
329
+ When 10 seconds passes
330
+ And an ok event is received
331
+ Then a notification should not be generated
332
+ When 10 seconds passes
333
+ And a warning event is received
334
+ Then a notification should not be generated
335
+ When 10 seconds passes
336
+ And a warning event is received
337
+ Then a notification should not be generated
338
+ When 10 seconds passes
339
+ And a warning event is received
340
+ Then a notification should not be generated
341
+ When 10 seconds passes
342
+ And a warning event is received
343
+ Then a notification should be generated
344
+ When 10 seconds passes
345
+ And a warning event is received
346
+ Then a notification should not be generated
347
+ When 10 seconds passes
348
+ And a warning event is received
349
+ Then a notification should not be generated
350
+ When 10 seconds passes
351
+ And an ok event is received
352
+ Then a notification should be generated
353
+ # recovered
354
+ When 10 seconds passes
355
+ And a warning event is received
356
+ Then a notification should not be generated
357
+ When 10 seconds passes
358
+ And a warning event is received
359
+ Then a notification should not be generated
360
+ When 10 seconds passes
361
+ And a warning event is received
362
+ Then a notification should not be generated
363
+ When 10 seconds passes
364
+ And an ok event is received
365
+ Then a notification should not be generated
366
+ When 10 seconds passes
367
+ And a warning event is received
368
+ Then a notification should not be generated
369
+ When 10 seconds passes
370
+ And a warning event is received
371
+ Then a notification should not be generated
372
+ When 10 seconds passes
373
+ And a warning event is received
374
+ Then a notification should not be generated
375
+ When 10 seconds passes
376
+ And an ok event is received
377
+ Then a notification should not be generated
378
+ When 10 seconds passes
379
+ And an ok event is received
380
+ Then a notification should not be generated
381
+ When 10 seconds passes
382
+ And an ok event is received
383
+ Then a notification should not be generated
384
+ When 10 seconds passes
385
+ And an ok event is received
386
+ Then a notification should not be generated
387
+ When 10 seconds passes
388
+ And an ok event is received
389
+ Then a notification should not be generated
390
+ When 10 seconds passes
391
+ And a warning event is received
392
+ Then a notification should not be generated
393
+ When 10 seconds passes
394
+ And a warning event is received
395
+ Then a notification should not be generated
396
+ When 10 seconds passes
397
+ And an ok event is received
398
+ Then a notification should not be generated
393
399
 
394
400
  Scenario: scheduled maintenance created for initial check reference
395
401
  Given the check has no state
396
402
  When an ok event is received
397
- Then scheduled maintenance should be generated
403
+ Then scheduled maintenance should be generated
@@ -45,6 +45,7 @@ Feature: Notification rules on a per contact basis
45
45
  | | | email | email | | | |
46
46
  | | | sms | sms | | | |
47
47
  | bar | | email | email,sms | | | |
48
+ | bar | wags | | | true | true | |
48
49
 
49
50
  And user 3 has the following notification rules:
50
51
  | entities | tags | warning_media | critical_media | warning_blackhole | critical_blackhole | time_restrictions |
@@ -52,6 +53,7 @@ Feature: Notification rules on a per contact basis
52
53
  | baz | | sms | sms | | | |
53
54
  | buf | | email | email | | | |
54
55
  | buf | | sms | sms | | | |
56
+ | bar | | email | email | true | true | |
55
57
 
56
58
  And user 4 has the following notification rules:
57
59
  | entities | tags | warning_media | critical_media | warning_blackhole | critical_blackhole | time_restrictions |
@@ -151,8 +153,38 @@ Feature: Notification rules on a per contact basis
151
153
  And an ok event is received
152
154
  Then 2 email alert should be queued for malak@example.com
153
155
 
154
- @blackhole
155
- Scenario: Drop alerts matching a blackhole rule
156
+ @blackhole @time
157
+ Scenario: Drop alerts matching a general blackhole rule
158
+ Given the check is check 'ping' on entity 'buf'
159
+ And the check is in an ok state
160
+ When a critical event is received
161
+ And 1 minute passes
162
+ And a critical event is received
163
+ Then 0 email alerts should be queued for malak@example.com
164
+
165
+ @blackhole @time
166
+ Scenario: Drop alerts matching a blackhole rule by entity
167
+ Given the check is check 'ping' on entity 'bar'
168
+ And the check is in an ok state
169
+ When a warning event is received
170
+ And 1 minute passes
171
+ And a warning event is received
172
+ Then 0 email alerts should be queued for malak@example.com
173
+ And 0 email alerts should be queued for vera@example.com
174
+ When an ok event is received
175
+ Then 0 email alerts should be queued for malak@example.com
176
+ And 0 email alerts should be queued for vera@example.com
177
+
178
+ @blackhole @time
179
+ Scenario: Drop alerts matching a blackhole rule by tags
180
+ Given the check is check 'wags the dog' on entity 'bar'
181
+ And the check is in an ok state
182
+ When a warning event is received
183
+ And 1 minute passes
184
+ And a warning event is received
185
+ Then 0 email alerts should be queued for imani@example.com
186
+ When an ok event is received
187
+ Then 0 email alerts should be queued for imani@example.com
156
188
 
157
189
  @intervals @time
158
190
  Scenario: Alerts according to custom interval
@@ -274,7 +306,7 @@ Feature: Notification rules on a per contact basis
274
306
 
275
307
  @time
276
308
  Scenario: Test notifications behave like a critical notification
277
- Given the check is check 'ping' on entity 'foo'
309
+ Given the check is check 'ping' on entity 'baz'
278
310
  And the check is in an ok state
279
311
  When a test event is received
280
312
  Then 1 email alert should be queued for malak@example.com
@@ -296,7 +328,7 @@ Feature: Notification rules on a per contact basis
296
328
  And 2 sms alert should be queued for +61400000001
297
329
 
298
330
  Scenario: Unknown event during unscheduled maintenance
299
- Given the check is check 'ping' on entity 'foo'
331
+ Given the check is check 'ping' on entity 'baz'
300
332
  And the check is in an ok state
301
333
  When an unknown event is received
302
334
  And 1 minute passes
@@ -139,6 +139,7 @@ When /^the SMS notification handler runs successfully$/ do
139
139
  @request = stub_request(:get, /^#{Regexp.escape(Flapjack::Gateways::SmsMessagenet::MESSAGENET_URL)}/)
140
140
 
141
141
  Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', {'username' => 'abcd', 'password' => 'efgh'})
142
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@redis', @redis)
142
143
  Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
143
144
  Flapjack::Gateways::SmsMessagenet.start
144
145
 
@@ -148,6 +149,7 @@ end
148
149
  When /^the SMS notification handler fails to send an SMS$/ do
149
150
  @request = stub_request(:get, /^#{Regexp.escape(Flapjack::Gateways::SmsMessagenet::MESSAGENET_URL)}/).to_return(:status => [500, "Internal Server Error"])
150
151
  Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', {'username' => 'abcd', 'password' => 'efgh'})
152
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@redis', @redis)
151
153
  Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
152
154
  Flapjack::Gateways::SmsMessagenet.start
153
155
 
@@ -157,6 +159,7 @@ end
157
159
  When /^the email notification handler runs successfully$/ do
158
160
  Resque.redis = @redis
159
161
  Flapjack::Gateways::Email.instance_variable_set('@config', {'smtp_config' => {'host' => '127.0.0.1', 'port' => 2525}})
162
+ Flapjack::Gateways::Email.instance_variable_set('@redis', @redis)
160
163
  Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
161
164
  Flapjack::Gateways::Email.start
162
165
 
@@ -175,6 +178,7 @@ end
175
178
  When /^the email notification handler fails to send an email$/ do
176
179
  Resque.redis = @redis
177
180
  Flapjack::Gateways::Email.instance_variable_set('@config', {'smtp_config' => {'host' => '127.0.0.1', 'port' => 2525}})
181
+ Flapjack::Gateways::Email.instance_variable_set('@redis', @redis)
178
182
  Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
179
183
  Flapjack::Gateways::Email.start
180
184
 
data/flapjack.gemspec CHANGED
@@ -33,9 +33,10 @@ Gem::Specification.new do |gem|
33
33
  gem.add_dependency 'blather', '~> 0.8.3'
34
34
  gem.add_dependency 'chronic'
35
35
  gem.add_dependency 'chronic_duration'
36
- gem.add_dependency 'activesupport'
36
+ gem.add_dependency 'activesupport', '~> 3.2.14'
37
37
  gem.add_dependency 'ice_cube'
38
- gem.add_dependency 'tzinfo'
38
+ gem.add_dependency 'tzinfo', '~> 1.0.1'
39
+ gem.add_dependency 'tzinfo-data'
39
40
 
40
41
  gem.add_development_dependency 'rake'
41
42
  end
@@ -31,11 +31,13 @@ module Flapjack
31
31
  setup_signals if options[:signals]
32
32
  add_pikelets(pikelets(@config.all))
33
33
  end
34
+
35
+ @exit_value
34
36
  end
35
37
 
36
- def stop
37
- return if @stopping
38
- @stopping = true
38
+ def stop(value = 0)
39
+ return unless @exit_value.nil?
40
+ @exit_value = value
39
41
  remove_pikelets(@pikelets, :shutdown => true)
40
42
  # Syslog.close if Syslog.opened? # TODO revisit in threading branch
41
43
  end
@@ -111,10 +113,10 @@ module Flapjack
111
113
  # within a single coordinator instance. Coordinator is essentially
112
114
  # a singleton anyway...
113
115
  def setup_signals
114
- Kernel.trap('INT') { stop }
115
- Kernel.trap('TERM') { stop }
116
+ Kernel.trap('INT') { stop(Signal.list['INT']) }
117
+ Kernel.trap('TERM') { stop(Signal.list['TERM']) }
116
118
  unless RbConfig::CONFIG['host_os'] =~ /mswin|windows|cygwin/i
117
- Kernel.trap('QUIT') { stop }
119
+ Kernel.trap('QUIT') { stop(Signal.list['QUIT']) }
118
120
  Kernel.trap('HUP') { reload }
119
121
  end
120
122
  end