flapjack 0.7.22 → 0.7.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/CHANGELOG.md +19 -0
  2. data/bin/flapjack +3 -1
  3. data/bin/flapjack-nagios-receiver +5 -4
  4. data/bin/receive-events +2 -2
  5. data/features/events.feature +101 -95
  6. data/features/notification_rules.feature +36 -4
  7. data/features/steps/notifications_steps.rb +4 -0
  8. data/flapjack.gemspec +3 -2
  9. data/lib/flapjack/coordinator.rb +8 -6
  10. data/lib/flapjack/data/entity_check.rb +20 -13
  11. data/lib/flapjack/data/event.rb +4 -7
  12. data/lib/flapjack/data/notification.rb +63 -45
  13. data/lib/flapjack/filters/acknowledgement.rb +26 -24
  14. data/lib/flapjack/filters/delays.rb +46 -42
  15. data/lib/flapjack/filters/ok.rb +31 -34
  16. data/lib/flapjack/filters/scheduled_maintenance.rb +2 -2
  17. data/lib/flapjack/filters/unscheduled_maintenance.rb +2 -3
  18. data/lib/flapjack/gateways/email.rb +111 -114
  19. data/lib/flapjack/gateways/email/alert.html.erb +11 -11
  20. data/lib/flapjack/gateways/email/alert.text.erb +19 -6
  21. data/lib/flapjack/gateways/sms_messagenet.rb +15 -5
  22. data/lib/flapjack/gateways/web.rb +3 -4
  23. data/lib/flapjack/gateways/web/public/css/flapjack.css +0 -2
  24. data/lib/flapjack/gateways/web/public/img/flapjack-favicon-32-16.ico +0 -0
  25. data/lib/flapjack/gateways/web/public/img/flapjack-favicon-64-32-24-16.ico +0 -0
  26. data/lib/flapjack/gateways/web/public/img/flapjack-transparent-300.png +0 -0
  27. data/lib/flapjack/gateways/web/public/img/flapjack-transparent-350-400.png +0 -0
  28. data/lib/flapjack/gateways/web/views/_head.html.erb +1 -0
  29. data/lib/flapjack/gateways/web/views/index.html.erb +1 -1
  30. data/lib/flapjack/notifier.rb +2 -3
  31. data/lib/flapjack/pikelet.rb +5 -4
  32. data/lib/flapjack/processor.rb +39 -27
  33. data/lib/flapjack/version.rb +1 -1
  34. data/spec/lib/flapjack/data/entity_check_spec.rb +5 -0
  35. data/spec/lib/flapjack/data/event_spec.rb +0 -1
  36. data/spec/lib/flapjack/gateways/email_spec.rb +5 -9
  37. data/spec/lib/flapjack/gateways/sms_messagenet.spec.rb +80 -1
  38. data/spec/lib/flapjack/gateways/web_spec.rb +1 -1
  39. data/spec/lib/flapjack/pikelet_spec.rb +4 -3
  40. data/spec/lib/flapjack/processor_spec.rb +0 -1
  41. metadata +28 -11
  42. data/lib/flapjack/filters/detect_mass_client_failures.rb +0 -44
  43. data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +0 -6
data/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  ## Flapjack Changelog
2
2
 
3
+ # 0.7.25 - 2013-09-13
4
+ - Bug: EntityCheck last_update= isn't being called for update_state since refactoring gh-303 (@ali-graham)
5
+ - Bug: flapjack-nagios-receiver is double-escaping its JSON data gh-304 (@jessereynolds)
6
+
7
+ # 0.7.24 - 2013-09-12
8
+ - Bug: gem install of flapjack 0.7.23 fails with tzinfo-data dependency error gh-302 (@jessereynolds)
9
+
10
+ # 0.7.23 - 2013-09-12
11
+ - Bug: Quick ok -> warning -> ok -> warning triggers too many recovery notifications gh-119 (@jessereynolds)
12
+ - Bug: Blackhole notification rule doesn't block recovery notifications gh-282 (@jessereynolds)
13
+ - Chore: Shorten SMS messages to 159 chars on the Messagenet gateway gh-278 (@ali-graham)
14
+ - Chore: flapjack-nagios-receiver should use Event#add gh-275 (@ali-graham)
15
+ - Chore: Non-zero exit code after receiving SIGINT gh-266 (@ali-graham)
16
+ - Bug: Email notifications - remove "(about a minute ago)" and fix previous state fields gh-258 (@ali-graham)
17
+ - Chore: refactor delays filter, remove mass client failures filter gh-293 (@jessereynolds)
18
+ - Bug: creation of scheduled maintenance fails from web UI gh-296 (@ali-graham)
19
+ - Feature: flapjack UI needs a favicon gh-297 (@jessereynolds)
20
+ - Chore: email notification styling gh-298 (@jessereynolds)
21
+
3
22
  # 0.7.22 - 2013-08-08
4
23
  - Bug: fix potential exception in json serialisation of tags in notifications gh-281 (@jessereynolds)
5
24
 
data/bin/flapjack CHANGED
@@ -146,10 +146,12 @@ when "start"
146
146
  puts "Flapjack is already running."
147
147
  else
148
148
  print "Flapjack starting..."
149
+ return_value = nil
149
150
  runner.execute(:daemonize => daemonize) {
150
- flapjack_coord.call
151
+ return_value = flapjack_coord.call
151
152
  }
152
153
  puts " done."
154
+ exit(return_value + 128) unless (return_value.nil? || (return_value == 0))
153
155
  end
154
156
 
155
157
  when "stop"
@@ -14,6 +14,7 @@ Oj.default_options = { :indent => 0, :mode => :strict }
14
14
  require 'dante'
15
15
 
16
16
  require 'flapjack/configuration'
17
+ require 'flapjack/data/event'
17
18
 
18
19
  def pike(message)
19
20
  puts "piking out: #{message}"
@@ -47,16 +48,16 @@ def process_input(opts)
47
48
  state = 'ok' if state.downcase == 'up'
48
49
  state = 'critical' if state.downcase == 'down'
49
50
  details = check_long_output ? check_long_output.gsub(/\\n/, "\n") : nil
50
- event = Oj.dump({
51
+ event = {
51
52
  'entity' => entity,
52
53
  'check' => check,
53
54
  'type' => 'service',
54
55
  'state' => state,
55
56
  'summary' => check_output,
56
57
  'details' => details,
57
- 'timestamp' => timestamp,
58
- })
59
- redis.lpush 'events', event
58
+ 'time' => timestamp,
59
+ }
60
+ Flapjack::Data::Event.add(event, :redis => redis)
60
61
  end
61
62
  rescue Redis::CannotConnectError
62
63
  puts "Error, unable to to connect to the redis server (#{$!})"
data/bin/receive-events CHANGED
@@ -12,6 +12,7 @@ require 'oj'
12
12
  Oj.default_options = { :indent => 0, :mode => :strict }
13
13
 
14
14
  require 'flapjack/configuration'
15
+ require 'flapjack/data/event'
15
16
 
16
17
  def pike(message)
17
18
  puts "piking out: #{message}"
@@ -19,8 +20,7 @@ def pike(message)
19
20
  end
20
21
 
21
22
  def send_event(event, opts)
22
- redis = opts[:redis]
23
- redis.lpush 'events', event
23
+ Flapjack::Data::Event.add(event, :redis => opts[:redis])
24
24
  end
25
25
 
26
26
  def receive(opts)
@@ -123,7 +123,12 @@ Feature: events
123
123
 
124
124
  @time
125
125
  Scenario: Check critical to ok when acknowledged
126
- Given the check is in a critical state
126
+ Given the check is in an ok state
127
+ When a critical event is received
128
+ And one minute passes
129
+ And a critical event is received
130
+ Then a notification should be generated
131
+ # the above all needs to be just a call to the "Check ok to critical for 1 minute" Scenario if that's possible
127
132
  When an acknowledgement event is received
128
133
  Then a notification should be generated
129
134
  When 1 minute passes
@@ -297,101 +302,102 @@ Feature: events
297
302
 
298
303
  # commenting out this test for now, will revive it
299
304
  # when working on gh-119
300
- # @time
301
- # Scenario: a lot of quick ok -> warning -> ok -> warning
302
- # Given the check is in an ok state
303
- # When 10 seconds passes
304
- # And a warning event is received
305
- # Then a notification should not be generated
306
- # When 10 seconds passes
307
- # And an ok event is received
308
- # Then a notification should not be generated
309
- # When 10 seconds passes
310
- # And a warning event is received
311
- # Then a notification should not be generated
312
- # When 10 seconds passes
313
- # And a warning event is received
314
- # Then a notification should not be generated
315
- # When 10 seconds passes
316
- # And a warning event is received
317
- # Then a notification should not be generated
318
- # When 10 seconds passes
319
- # And an ok event is received
320
- # Then a notification should not be generated
321
- # When 10 seconds passes
322
- # And a warning event is received
323
- # Then a notification should not be generated
324
- # When 10 seconds passes
325
- # And an ok event is received
326
- # Then a notification should not be generated
327
- # When 10 seconds passes
328
- # And a warning event is received
329
- # Then a notification should not be generated
330
- # When 10 seconds passes
331
- # And a warning event is received
332
- # Then a notification should not be generated
333
- # When 10 seconds passes
334
- # And a warning event is received
335
- # Then a notification should not be generated
336
- # When 10 seconds passes
337
- # And a warning event is received
338
- # Then a notification should be generated
339
- # When 10 seconds passes
340
- # And a warning event is received
341
- # Then a notification should not be generated
342
- # When 10 seconds passes
343
- # And a warning event is received
344
- # Then a notification should not be generated
345
- # When 10 seconds passes
346
- # And an ok event is received
347
- # Then a notification should be generated
348
- # When 10 seconds passes
349
- # And a warning event is received
350
- # Then a notification should not be generated
351
- # When 10 seconds passes
352
- # And a warning event is received
353
- # Then a notification should not be generated
354
- # When 10 seconds passes
355
- # And a warning event is received
356
- # Then a notification should not be generated
357
- # When 10 seconds passes
358
- # And an ok event is received
359
- # Then a notification should not be generated
360
- # When 10 seconds passes
361
- # And a warning event is received
362
- # Then a notification should not be generated
363
- # When 10 seconds passes
364
- # And a warning event is received
365
- # Then a notification should not be generated
366
- # When 10 seconds passes
367
- # And a warning event is received
368
- # Then a notification should not be generated
369
- # When 10 seconds passes
370
- # And an ok event is received
371
- # Then a notification should not be generated
372
- # When 10 seconds passes
373
- # And an ok event is received
374
- # Then a notification should not be generated
375
- # When 10 seconds passes
376
- # And an ok event is received
377
- # Then a notification should not be generated
378
- # When 10 seconds passes
379
- # And an ok event is received
380
- # Then a notification should not be generated
381
- # When 10 seconds passes
382
- # And an ok event is received
383
- # Then a notification should not be generated
384
- # When 10 seconds passes
385
- # And a warning event is received
386
- # Then a notification should not be generated
387
- # When 10 seconds passes
388
- # And a warning event is received
389
- # Then a notification should not be generated
390
- # When 10 seconds passes
391
- # And an ok event is received
392
- # Then a notification should not be generated
305
+ @time
306
+ Scenario: a lot of quick ok -> warning -> ok -> warning
307
+ Given the check is in an ok state
308
+ When 10 seconds passes
309
+ And a warning event is received
310
+ Then a notification should not be generated
311
+ When 10 seconds passes
312
+ And an ok event is received
313
+ Then a notification should not be generated
314
+ When 10 seconds passes
315
+ And a warning event is received
316
+ Then a notification should not be generated
317
+ When 10 seconds passes
318
+ And a warning event is received
319
+ Then a notification should not be generated
320
+ When 10 seconds passes
321
+ And a warning event is received
322
+ Then a notification should not be generated
323
+ When 20 seconds passes
324
+ And an ok event is received
325
+ Then a notification should not be generated
326
+ When 10 seconds passes
327
+ And a warning event is received
328
+ Then a notification should not be generated
329
+ When 10 seconds passes
330
+ And an ok event is received
331
+ Then a notification should not be generated
332
+ When 10 seconds passes
333
+ And a warning event is received
334
+ Then a notification should not be generated
335
+ When 10 seconds passes
336
+ And a warning event is received
337
+ Then a notification should not be generated
338
+ When 10 seconds passes
339
+ And a warning event is received
340
+ Then a notification should not be generated
341
+ When 10 seconds passes
342
+ And a warning event is received
343
+ Then a notification should be generated
344
+ When 10 seconds passes
345
+ And a warning event is received
346
+ Then a notification should not be generated
347
+ When 10 seconds passes
348
+ And a warning event is received
349
+ Then a notification should not be generated
350
+ When 10 seconds passes
351
+ And an ok event is received
352
+ Then a notification should be generated
353
+ # recovered
354
+ When 10 seconds passes
355
+ And a warning event is received
356
+ Then a notification should not be generated
357
+ When 10 seconds passes
358
+ And a warning event is received
359
+ Then a notification should not be generated
360
+ When 10 seconds passes
361
+ And a warning event is received
362
+ Then a notification should not be generated
363
+ When 10 seconds passes
364
+ And an ok event is received
365
+ Then a notification should not be generated
366
+ When 10 seconds passes
367
+ And a warning event is received
368
+ Then a notification should not be generated
369
+ When 10 seconds passes
370
+ And a warning event is received
371
+ Then a notification should not be generated
372
+ When 10 seconds passes
373
+ And a warning event is received
374
+ Then a notification should not be generated
375
+ When 10 seconds passes
376
+ And an ok event is received
377
+ Then a notification should not be generated
378
+ When 10 seconds passes
379
+ And an ok event is received
380
+ Then a notification should not be generated
381
+ When 10 seconds passes
382
+ And an ok event is received
383
+ Then a notification should not be generated
384
+ When 10 seconds passes
385
+ And an ok event is received
386
+ Then a notification should not be generated
387
+ When 10 seconds passes
388
+ And an ok event is received
389
+ Then a notification should not be generated
390
+ When 10 seconds passes
391
+ And a warning event is received
392
+ Then a notification should not be generated
393
+ When 10 seconds passes
394
+ And a warning event is received
395
+ Then a notification should not be generated
396
+ When 10 seconds passes
397
+ And an ok event is received
398
+ Then a notification should not be generated
393
399
 
394
400
  Scenario: scheduled maintenance created for initial check reference
395
401
  Given the check has no state
396
402
  When an ok event is received
397
- Then scheduled maintenance should be generated
403
+ Then scheduled maintenance should be generated
@@ -45,6 +45,7 @@ Feature: Notification rules on a per contact basis
45
45
  | | | email | email | | | |
46
46
  | | | sms | sms | | | |
47
47
  | bar | | email | email,sms | | | |
48
+ | bar | wags | | | true | true | |
48
49
 
49
50
  And user 3 has the following notification rules:
50
51
  | entities | tags | warning_media | critical_media | warning_blackhole | critical_blackhole | time_restrictions |
@@ -52,6 +53,7 @@ Feature: Notification rules on a per contact basis
52
53
  | baz | | sms | sms | | | |
53
54
  | buf | | email | email | | | |
54
55
  | buf | | sms | sms | | | |
56
+ | bar | | email | email | true | true | |
55
57
 
56
58
  And user 4 has the following notification rules:
57
59
  | entities | tags | warning_media | critical_media | warning_blackhole | critical_blackhole | time_restrictions |
@@ -151,8 +153,38 @@ Feature: Notification rules on a per contact basis
151
153
  And an ok event is received
152
154
  Then 2 email alert should be queued for malak@example.com
153
155
 
154
- @blackhole
155
- Scenario: Drop alerts matching a blackhole rule
156
+ @blackhole @time
157
+ Scenario: Drop alerts matching a general blackhole rule
158
+ Given the check is check 'ping' on entity 'buf'
159
+ And the check is in an ok state
160
+ When a critical event is received
161
+ And 1 minute passes
162
+ And a critical event is received
163
+ Then 0 email alerts should be queued for malak@example.com
164
+
165
+ @blackhole @time
166
+ Scenario: Drop alerts matching a blackhole rule by entity
167
+ Given the check is check 'ping' on entity 'bar'
168
+ And the check is in an ok state
169
+ When a warning event is received
170
+ And 1 minute passes
171
+ And a warning event is received
172
+ Then 0 email alerts should be queued for malak@example.com
173
+ And 0 email alerts should be queued for vera@example.com
174
+ When an ok event is received
175
+ Then 0 email alerts should be queued for malak@example.com
176
+ And 0 email alerts should be queued for vera@example.com
177
+
178
+ @blackhole @time
179
+ Scenario: Drop alerts matching a blackhole rule by tags
180
+ Given the check is check 'wags the dog' on entity 'bar'
181
+ And the check is in an ok state
182
+ When a warning event is received
183
+ And 1 minute passes
184
+ And a warning event is received
185
+ Then 0 email alerts should be queued for imani@example.com
186
+ When an ok event is received
187
+ Then 0 email alerts should be queued for imani@example.com
156
188
 
157
189
  @intervals @time
158
190
  Scenario: Alerts according to custom interval
@@ -274,7 +306,7 @@ Feature: Notification rules on a per contact basis
274
306
 
275
307
  @time
276
308
  Scenario: Test notifications behave like a critical notification
277
- Given the check is check 'ping' on entity 'foo'
309
+ Given the check is check 'ping' on entity 'baz'
278
310
  And the check is in an ok state
279
311
  When a test event is received
280
312
  Then 1 email alert should be queued for malak@example.com
@@ -296,7 +328,7 @@ Feature: Notification rules on a per contact basis
296
328
  And 2 sms alert should be queued for +61400000001
297
329
 
298
330
  Scenario: Unknown event during unscheduled maintenance
299
- Given the check is check 'ping' on entity 'foo'
331
+ Given the check is check 'ping' on entity 'baz'
300
332
  And the check is in an ok state
301
333
  When an unknown event is received
302
334
  And 1 minute passes
@@ -139,6 +139,7 @@ When /^the SMS notification handler runs successfully$/ do
139
139
  @request = stub_request(:get, /^#{Regexp.escape(Flapjack::Gateways::SmsMessagenet::MESSAGENET_URL)}/)
140
140
 
141
141
  Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', {'username' => 'abcd', 'password' => 'efgh'})
142
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@redis', @redis)
142
143
  Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
143
144
  Flapjack::Gateways::SmsMessagenet.start
144
145
 
@@ -148,6 +149,7 @@ end
148
149
  When /^the SMS notification handler fails to send an SMS$/ do
149
150
  @request = stub_request(:get, /^#{Regexp.escape(Flapjack::Gateways::SmsMessagenet::MESSAGENET_URL)}/).to_return(:status => [500, "Internal Server Error"])
150
151
  Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', {'username' => 'abcd', 'password' => 'efgh'})
152
+ Flapjack::Gateways::SmsMessagenet.instance_variable_set('@redis', @redis)
151
153
  Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
152
154
  Flapjack::Gateways::SmsMessagenet.start
153
155
 
@@ -157,6 +159,7 @@ end
157
159
  When /^the email notification handler runs successfully$/ do
158
160
  Resque.redis = @redis
159
161
  Flapjack::Gateways::Email.instance_variable_set('@config', {'smtp_config' => {'host' => '127.0.0.1', 'port' => 2525}})
162
+ Flapjack::Gateways::Email.instance_variable_set('@redis', @redis)
160
163
  Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
161
164
  Flapjack::Gateways::Email.start
162
165
 
@@ -175,6 +178,7 @@ end
175
178
  When /^the email notification handler fails to send an email$/ do
176
179
  Resque.redis = @redis
177
180
  Flapjack::Gateways::Email.instance_variable_set('@config', {'smtp_config' => {'host' => '127.0.0.1', 'port' => 2525}})
181
+ Flapjack::Gateways::Email.instance_variable_set('@redis', @redis)
178
182
  Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
179
183
  Flapjack::Gateways::Email.start
180
184
 
data/flapjack.gemspec CHANGED
@@ -33,9 +33,10 @@ Gem::Specification.new do |gem|
33
33
  gem.add_dependency 'blather', '~> 0.8.3'
34
34
  gem.add_dependency 'chronic'
35
35
  gem.add_dependency 'chronic_duration'
36
- gem.add_dependency 'activesupport'
36
+ gem.add_dependency 'activesupport', '~> 3.2.14'
37
37
  gem.add_dependency 'ice_cube'
38
- gem.add_dependency 'tzinfo'
38
+ gem.add_dependency 'tzinfo', '~> 1.0.1'
39
+ gem.add_dependency 'tzinfo-data'
39
40
 
40
41
  gem.add_development_dependency 'rake'
41
42
  end
@@ -31,11 +31,13 @@ module Flapjack
31
31
  setup_signals if options[:signals]
32
32
  add_pikelets(pikelets(@config.all))
33
33
  end
34
+
35
+ @exit_value
34
36
  end
35
37
 
36
- def stop
37
- return if @stopping
38
- @stopping = true
38
+ def stop(value = 0)
39
+ return unless @exit_value.nil?
40
+ @exit_value = value
39
41
  remove_pikelets(@pikelets, :shutdown => true)
40
42
  # Syslog.close if Syslog.opened? # TODO revisit in threading branch
41
43
  end
@@ -111,10 +113,10 @@ module Flapjack
111
113
  # within a single coordinator instance. Coordinator is essentially
112
114
  # a singleton anyway...
113
115
  def setup_signals
114
- Kernel.trap('INT') { stop }
115
- Kernel.trap('TERM') { stop }
116
+ Kernel.trap('INT') { stop(Signal.list['INT']) }
117
+ Kernel.trap('TERM') { stop(Signal.list['TERM']) }
116
118
  unless RbConfig::CONFIG['host_os'] =~ /mswin|windows|cygwin/i
117
- Kernel.trap('QUIT') { stop }
119
+ Kernel.trap('QUIT') { stop(Signal.list['QUIT']) }
118
120
  Kernel.trap('HUP') { reload }
119
121
  end
120
122
  end