flapjack 0.7.22 → 0.7.25
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +19 -0
- data/bin/flapjack +3 -1
- data/bin/flapjack-nagios-receiver +5 -4
- data/bin/receive-events +2 -2
- data/features/events.feature +101 -95
- data/features/notification_rules.feature +36 -4
- data/features/steps/notifications_steps.rb +4 -0
- data/flapjack.gemspec +3 -2
- data/lib/flapjack/coordinator.rb +8 -6
- data/lib/flapjack/data/entity_check.rb +20 -13
- data/lib/flapjack/data/event.rb +4 -7
- data/lib/flapjack/data/notification.rb +63 -45
- data/lib/flapjack/filters/acknowledgement.rb +26 -24
- data/lib/flapjack/filters/delays.rb +46 -42
- data/lib/flapjack/filters/ok.rb +31 -34
- data/lib/flapjack/filters/scheduled_maintenance.rb +2 -2
- data/lib/flapjack/filters/unscheduled_maintenance.rb +2 -3
- data/lib/flapjack/gateways/email.rb +111 -114
- data/lib/flapjack/gateways/email/alert.html.erb +11 -11
- data/lib/flapjack/gateways/email/alert.text.erb +19 -6
- data/lib/flapjack/gateways/sms_messagenet.rb +15 -5
- data/lib/flapjack/gateways/web.rb +3 -4
- data/lib/flapjack/gateways/web/public/css/flapjack.css +0 -2
- data/lib/flapjack/gateways/web/public/img/flapjack-favicon-32-16.ico +0 -0
- data/lib/flapjack/gateways/web/public/img/flapjack-favicon-64-32-24-16.ico +0 -0
- data/lib/flapjack/gateways/web/public/img/flapjack-transparent-300.png +0 -0
- data/lib/flapjack/gateways/web/public/img/flapjack-transparent-350-400.png +0 -0
- data/lib/flapjack/gateways/web/views/_head.html.erb +1 -0
- data/lib/flapjack/gateways/web/views/index.html.erb +1 -1
- data/lib/flapjack/notifier.rb +2 -3
- data/lib/flapjack/pikelet.rb +5 -4
- data/lib/flapjack/processor.rb +39 -27
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/data/entity_check_spec.rb +5 -0
- data/spec/lib/flapjack/data/event_spec.rb +0 -1
- data/spec/lib/flapjack/gateways/email_spec.rb +5 -9
- data/spec/lib/flapjack/gateways/sms_messagenet.spec.rb +80 -1
- data/spec/lib/flapjack/gateways/web_spec.rb +1 -1
- data/spec/lib/flapjack/pikelet_spec.rb +4 -3
- data/spec/lib/flapjack/processor_spec.rb +0 -1
- metadata +28 -11
- data/lib/flapjack/filters/detect_mass_client_failures.rb +0 -44
- data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +0 -6
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
## Flapjack Changelog
|
2
2
|
|
3
|
+
# 0.7.25 - 2013-09-13
|
4
|
+
- Bug: EntityCheck last_update= isn't being called for update_state since refactoring gh-303 (@ali-graham)
|
5
|
+
- Bug: flapjack-nagios-receiver is double-escaping its JSON data gh-304 (@jessereynolds)
|
6
|
+
|
7
|
+
# 0.7.24 - 2013-09-12
|
8
|
+
- Bug: gem install of flapjack 0.7.23 fails with tzinfo-data dependency error gh-302 (@jessereynolds)
|
9
|
+
|
10
|
+
# 0.7.23 - 2013-09-12
|
11
|
+
- Bug: Quick ok -> warning -> ok -> warning triggers too many recovery notifications gh-119 (@jessereynolds)
|
12
|
+
- Bug: Blackhole notification rule doesn't block recovery notifications gh-282 (@jessereynolds)
|
13
|
+
- Chore: Shorten SMS messages to 159 chars on the Messagenet gateway gh-278 (@ali-graham)
|
14
|
+
- Chore: flapjack-nagios-receiver should use Event#add gh-275 (@ali-graham)
|
15
|
+
- Chore: Non-zero exit code after receiving SIGINT gh-266 (@ali-graham)
|
16
|
+
- Bug: Email notifications - remove "(about a minute ago)" and fix previous state fields gh-258 (@ali-graham)
|
17
|
+
- Chore: refactor delays filter, remove mass client failures filter gh-293 (@jessereynolds)
|
18
|
+
- Bug: creation of scheduled maintenance fails from web UI gh-296 (@ali-graham)
|
19
|
+
- Feature: flapjack UI needs a favicon gh-297 (@jessereynolds)
|
20
|
+
- Chore: email notification styling gh-298 (@jessereynolds)
|
21
|
+
|
3
22
|
# 0.7.22 - 2013-08-08
|
4
23
|
- Bug: fix potential exception in json serialisation of tags in notifications gh-281 (@jessereynolds)
|
5
24
|
|
data/bin/flapjack
CHANGED
@@ -146,10 +146,12 @@ when "start"
|
|
146
146
|
puts "Flapjack is already running."
|
147
147
|
else
|
148
148
|
print "Flapjack starting..."
|
149
|
+
return_value = nil
|
149
150
|
runner.execute(:daemonize => daemonize) {
|
150
|
-
flapjack_coord.call
|
151
|
+
return_value = flapjack_coord.call
|
151
152
|
}
|
152
153
|
puts " done."
|
154
|
+
exit(return_value + 128) unless (return_value.nil? || (return_value == 0))
|
153
155
|
end
|
154
156
|
|
155
157
|
when "stop"
|
@@ -14,6 +14,7 @@ Oj.default_options = { :indent => 0, :mode => :strict }
|
|
14
14
|
require 'dante'
|
15
15
|
|
16
16
|
require 'flapjack/configuration'
|
17
|
+
require 'flapjack/data/event'
|
17
18
|
|
18
19
|
def pike(message)
|
19
20
|
puts "piking out: #{message}"
|
@@ -47,16 +48,16 @@ def process_input(opts)
|
|
47
48
|
state = 'ok' if state.downcase == 'up'
|
48
49
|
state = 'critical' if state.downcase == 'down'
|
49
50
|
details = check_long_output ? check_long_output.gsub(/\\n/, "\n") : nil
|
50
|
-
event =
|
51
|
+
event = {
|
51
52
|
'entity' => entity,
|
52
53
|
'check' => check,
|
53
54
|
'type' => 'service',
|
54
55
|
'state' => state,
|
55
56
|
'summary' => check_output,
|
56
57
|
'details' => details,
|
57
|
-
'
|
58
|
-
}
|
59
|
-
|
58
|
+
'time' => timestamp,
|
59
|
+
}
|
60
|
+
Flapjack::Data::Event.add(event, :redis => redis)
|
60
61
|
end
|
61
62
|
rescue Redis::CannotConnectError
|
62
63
|
puts "Error, unable to to connect to the redis server (#{$!})"
|
data/bin/receive-events
CHANGED
@@ -12,6 +12,7 @@ require 'oj'
|
|
12
12
|
Oj.default_options = { :indent => 0, :mode => :strict }
|
13
13
|
|
14
14
|
require 'flapjack/configuration'
|
15
|
+
require 'flapjack/data/event'
|
15
16
|
|
16
17
|
def pike(message)
|
17
18
|
puts "piking out: #{message}"
|
@@ -19,8 +20,7 @@ def pike(message)
|
|
19
20
|
end
|
20
21
|
|
21
22
|
def send_event(event, opts)
|
22
|
-
redis
|
23
|
-
redis.lpush 'events', event
|
23
|
+
Flapjack::Data::Event.add(event, :redis => opts[:redis])
|
24
24
|
end
|
25
25
|
|
26
26
|
def receive(opts)
|
data/features/events.feature
CHANGED
@@ -123,7 +123,12 @@ Feature: events
|
|
123
123
|
|
124
124
|
@time
|
125
125
|
Scenario: Check critical to ok when acknowledged
|
126
|
-
Given the check is in
|
126
|
+
Given the check is in an ok state
|
127
|
+
When a critical event is received
|
128
|
+
And one minute passes
|
129
|
+
And a critical event is received
|
130
|
+
Then a notification should be generated
|
131
|
+
# the above all needs to be just a call to the "Check ok to critical for 1 minute" Scenario if that's possible
|
127
132
|
When an acknowledgement event is received
|
128
133
|
Then a notification should be generated
|
129
134
|
When 1 minute passes
|
@@ -297,101 +302,102 @@ Feature: events
|
|
297
302
|
|
298
303
|
# commenting out this test for now, will revive it
|
299
304
|
# when working on gh-119
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
#
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
305
|
+
@time
|
306
|
+
Scenario: a lot of quick ok -> warning -> ok -> warning
|
307
|
+
Given the check is in an ok state
|
308
|
+
When 10 seconds passes
|
309
|
+
And a warning event is received
|
310
|
+
Then a notification should not be generated
|
311
|
+
When 10 seconds passes
|
312
|
+
And an ok event is received
|
313
|
+
Then a notification should not be generated
|
314
|
+
When 10 seconds passes
|
315
|
+
And a warning event is received
|
316
|
+
Then a notification should not be generated
|
317
|
+
When 10 seconds passes
|
318
|
+
And a warning event is received
|
319
|
+
Then a notification should not be generated
|
320
|
+
When 10 seconds passes
|
321
|
+
And a warning event is received
|
322
|
+
Then a notification should not be generated
|
323
|
+
When 20 seconds passes
|
324
|
+
And an ok event is received
|
325
|
+
Then a notification should not be generated
|
326
|
+
When 10 seconds passes
|
327
|
+
And a warning event is received
|
328
|
+
Then a notification should not be generated
|
329
|
+
When 10 seconds passes
|
330
|
+
And an ok event is received
|
331
|
+
Then a notification should not be generated
|
332
|
+
When 10 seconds passes
|
333
|
+
And a warning event is received
|
334
|
+
Then a notification should not be generated
|
335
|
+
When 10 seconds passes
|
336
|
+
And a warning event is received
|
337
|
+
Then a notification should not be generated
|
338
|
+
When 10 seconds passes
|
339
|
+
And a warning event is received
|
340
|
+
Then a notification should not be generated
|
341
|
+
When 10 seconds passes
|
342
|
+
And a warning event is received
|
343
|
+
Then a notification should be generated
|
344
|
+
When 10 seconds passes
|
345
|
+
And a warning event is received
|
346
|
+
Then a notification should not be generated
|
347
|
+
When 10 seconds passes
|
348
|
+
And a warning event is received
|
349
|
+
Then a notification should not be generated
|
350
|
+
When 10 seconds passes
|
351
|
+
And an ok event is received
|
352
|
+
Then a notification should be generated
|
353
|
+
# recovered
|
354
|
+
When 10 seconds passes
|
355
|
+
And a warning event is received
|
356
|
+
Then a notification should not be generated
|
357
|
+
When 10 seconds passes
|
358
|
+
And a warning event is received
|
359
|
+
Then a notification should not be generated
|
360
|
+
When 10 seconds passes
|
361
|
+
And a warning event is received
|
362
|
+
Then a notification should not be generated
|
363
|
+
When 10 seconds passes
|
364
|
+
And an ok event is received
|
365
|
+
Then a notification should not be generated
|
366
|
+
When 10 seconds passes
|
367
|
+
And a warning event is received
|
368
|
+
Then a notification should not be generated
|
369
|
+
When 10 seconds passes
|
370
|
+
And a warning event is received
|
371
|
+
Then a notification should not be generated
|
372
|
+
When 10 seconds passes
|
373
|
+
And a warning event is received
|
374
|
+
Then a notification should not be generated
|
375
|
+
When 10 seconds passes
|
376
|
+
And an ok event is received
|
377
|
+
Then a notification should not be generated
|
378
|
+
When 10 seconds passes
|
379
|
+
And an ok event is received
|
380
|
+
Then a notification should not be generated
|
381
|
+
When 10 seconds passes
|
382
|
+
And an ok event is received
|
383
|
+
Then a notification should not be generated
|
384
|
+
When 10 seconds passes
|
385
|
+
And an ok event is received
|
386
|
+
Then a notification should not be generated
|
387
|
+
When 10 seconds passes
|
388
|
+
And an ok event is received
|
389
|
+
Then a notification should not be generated
|
390
|
+
When 10 seconds passes
|
391
|
+
And a warning event is received
|
392
|
+
Then a notification should not be generated
|
393
|
+
When 10 seconds passes
|
394
|
+
And a warning event is received
|
395
|
+
Then a notification should not be generated
|
396
|
+
When 10 seconds passes
|
397
|
+
And an ok event is received
|
398
|
+
Then a notification should not be generated
|
393
399
|
|
394
400
|
Scenario: scheduled maintenance created for initial check reference
|
395
401
|
Given the check has no state
|
396
402
|
When an ok event is received
|
397
|
-
Then scheduled maintenance should be generated
|
403
|
+
Then scheduled maintenance should be generated
|
@@ -45,6 +45,7 @@ Feature: Notification rules on a per contact basis
|
|
45
45
|
| | | email | email | | | |
|
46
46
|
| | | sms | sms | | | |
|
47
47
|
| bar | | email | email,sms | | | |
|
48
|
+
| bar | wags | | | true | true | |
|
48
49
|
|
49
50
|
And user 3 has the following notification rules:
|
50
51
|
| entities | tags | warning_media | critical_media | warning_blackhole | critical_blackhole | time_restrictions |
|
@@ -52,6 +53,7 @@ Feature: Notification rules on a per contact basis
|
|
52
53
|
| baz | | sms | sms | | | |
|
53
54
|
| buf | | email | email | | | |
|
54
55
|
| buf | | sms | sms | | | |
|
56
|
+
| bar | | email | email | true | true | |
|
55
57
|
|
56
58
|
And user 4 has the following notification rules:
|
57
59
|
| entities | tags | warning_media | critical_media | warning_blackhole | critical_blackhole | time_restrictions |
|
@@ -151,8 +153,38 @@ Feature: Notification rules on a per contact basis
|
|
151
153
|
And an ok event is received
|
152
154
|
Then 2 email alert should be queued for malak@example.com
|
153
155
|
|
154
|
-
@blackhole
|
155
|
-
Scenario: Drop alerts matching a blackhole rule
|
156
|
+
@blackhole @time
|
157
|
+
Scenario: Drop alerts matching a general blackhole rule
|
158
|
+
Given the check is check 'ping' on entity 'buf'
|
159
|
+
And the check is in an ok state
|
160
|
+
When a critical event is received
|
161
|
+
And 1 minute passes
|
162
|
+
And a critical event is received
|
163
|
+
Then 0 email alerts should be queued for malak@example.com
|
164
|
+
|
165
|
+
@blackhole @time
|
166
|
+
Scenario: Drop alerts matching a blackhole rule by entity
|
167
|
+
Given the check is check 'ping' on entity 'bar'
|
168
|
+
And the check is in an ok state
|
169
|
+
When a warning event is received
|
170
|
+
And 1 minute passes
|
171
|
+
And a warning event is received
|
172
|
+
Then 0 email alerts should be queued for malak@example.com
|
173
|
+
And 0 email alerts should be queued for vera@example.com
|
174
|
+
When an ok event is received
|
175
|
+
Then 0 email alerts should be queued for malak@example.com
|
176
|
+
And 0 email alerts should be queued for vera@example.com
|
177
|
+
|
178
|
+
@blackhole @time
|
179
|
+
Scenario: Drop alerts matching a blackhole rule by tags
|
180
|
+
Given the check is check 'wags the dog' on entity 'bar'
|
181
|
+
And the check is in an ok state
|
182
|
+
When a warning event is received
|
183
|
+
And 1 minute passes
|
184
|
+
And a warning event is received
|
185
|
+
Then 0 email alerts should be queued for imani@example.com
|
186
|
+
When an ok event is received
|
187
|
+
Then 0 email alerts should be queued for imani@example.com
|
156
188
|
|
157
189
|
@intervals @time
|
158
190
|
Scenario: Alerts according to custom interval
|
@@ -274,7 +306,7 @@ Feature: Notification rules on a per contact basis
|
|
274
306
|
|
275
307
|
@time
|
276
308
|
Scenario: Test notifications behave like a critical notification
|
277
|
-
Given the check is check 'ping' on entity '
|
309
|
+
Given the check is check 'ping' on entity 'baz'
|
278
310
|
And the check is in an ok state
|
279
311
|
When a test event is received
|
280
312
|
Then 1 email alert should be queued for malak@example.com
|
@@ -296,7 +328,7 @@ Feature: Notification rules on a per contact basis
|
|
296
328
|
And 2 sms alert should be queued for +61400000001
|
297
329
|
|
298
330
|
Scenario: Unknown event during unscheduled maintenance
|
299
|
-
Given the check is check 'ping' on entity '
|
331
|
+
Given the check is check 'ping' on entity 'baz'
|
300
332
|
And the check is in an ok state
|
301
333
|
When an unknown event is received
|
302
334
|
And 1 minute passes
|
@@ -139,6 +139,7 @@ When /^the SMS notification handler runs successfully$/ do
|
|
139
139
|
@request = stub_request(:get, /^#{Regexp.escape(Flapjack::Gateways::SmsMessagenet::MESSAGENET_URL)}/)
|
140
140
|
|
141
141
|
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', {'username' => 'abcd', 'password' => 'efgh'})
|
142
|
+
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@redis', @redis)
|
142
143
|
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
|
143
144
|
Flapjack::Gateways::SmsMessagenet.start
|
144
145
|
|
@@ -148,6 +149,7 @@ end
|
|
148
149
|
When /^the SMS notification handler fails to send an SMS$/ do
|
149
150
|
@request = stub_request(:get, /^#{Regexp.escape(Flapjack::Gateways::SmsMessagenet::MESSAGENET_URL)}/).to_return(:status => [500, "Internal Server Error"])
|
150
151
|
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', {'username' => 'abcd', 'password' => 'efgh'})
|
152
|
+
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@redis', @redis)
|
151
153
|
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
|
152
154
|
Flapjack::Gateways::SmsMessagenet.start
|
153
155
|
|
@@ -157,6 +159,7 @@ end
|
|
157
159
|
When /^the email notification handler runs successfully$/ do
|
158
160
|
Resque.redis = @redis
|
159
161
|
Flapjack::Gateways::Email.instance_variable_set('@config', {'smtp_config' => {'host' => '127.0.0.1', 'port' => 2525}})
|
162
|
+
Flapjack::Gateways::Email.instance_variable_set('@redis', @redis)
|
160
163
|
Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
|
161
164
|
Flapjack::Gateways::Email.start
|
162
165
|
|
@@ -175,6 +178,7 @@ end
|
|
175
178
|
When /^the email notification handler fails to send an email$/ do
|
176
179
|
Resque.redis = @redis
|
177
180
|
Flapjack::Gateways::Email.instance_variable_set('@config', {'smtp_config' => {'host' => '127.0.0.1', 'port' => 2525}})
|
181
|
+
Flapjack::Gateways::Email.instance_variable_set('@redis', @redis)
|
178
182
|
Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
|
179
183
|
Flapjack::Gateways::Email.start
|
180
184
|
|
data/flapjack.gemspec
CHANGED
@@ -33,9 +33,10 @@ Gem::Specification.new do |gem|
|
|
33
33
|
gem.add_dependency 'blather', '~> 0.8.3'
|
34
34
|
gem.add_dependency 'chronic'
|
35
35
|
gem.add_dependency 'chronic_duration'
|
36
|
-
gem.add_dependency 'activesupport'
|
36
|
+
gem.add_dependency 'activesupport', '~> 3.2.14'
|
37
37
|
gem.add_dependency 'ice_cube'
|
38
|
-
gem.add_dependency 'tzinfo'
|
38
|
+
gem.add_dependency 'tzinfo', '~> 1.0.1'
|
39
|
+
gem.add_dependency 'tzinfo-data'
|
39
40
|
|
40
41
|
gem.add_development_dependency 'rake'
|
41
42
|
end
|
data/lib/flapjack/coordinator.rb
CHANGED
@@ -31,11 +31,13 @@ module Flapjack
|
|
31
31
|
setup_signals if options[:signals]
|
32
32
|
add_pikelets(pikelets(@config.all))
|
33
33
|
end
|
34
|
+
|
35
|
+
@exit_value
|
34
36
|
end
|
35
37
|
|
36
|
-
def stop
|
37
|
-
return
|
38
|
-
@
|
38
|
+
def stop(value = 0)
|
39
|
+
return unless @exit_value.nil?
|
40
|
+
@exit_value = value
|
39
41
|
remove_pikelets(@pikelets, :shutdown => true)
|
40
42
|
# Syslog.close if Syslog.opened? # TODO revisit in threading branch
|
41
43
|
end
|
@@ -111,10 +113,10 @@ module Flapjack
|
|
111
113
|
# within a single coordinator instance. Coordinator is essentially
|
112
114
|
# a singleton anyway...
|
113
115
|
def setup_signals
|
114
|
-
Kernel.trap('INT') { stop }
|
115
|
-
Kernel.trap('TERM') { stop }
|
116
|
+
Kernel.trap('INT') { stop(Signal.list['INT']) }
|
117
|
+
Kernel.trap('TERM') { stop(Signal.list['TERM']) }
|
116
118
|
unless RbConfig::CONFIG['host_os'] =~ /mswin|windows|cygwin/i
|
117
|
-
Kernel.trap('QUIT') { stop }
|
119
|
+
Kernel.trap('QUIT') { stop(Signal.list['QUIT']) }
|
118
120
|
Kernel.trap('HUP') { reload }
|
119
121
|
end
|
120
122
|
end
|