flapjack 0.7.22 → 0.7.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +19 -0
- data/bin/flapjack +3 -1
- data/bin/flapjack-nagios-receiver +5 -4
- data/bin/receive-events +2 -2
- data/features/events.feature +101 -95
- data/features/notification_rules.feature +36 -4
- data/features/steps/notifications_steps.rb +4 -0
- data/flapjack.gemspec +3 -2
- data/lib/flapjack/coordinator.rb +8 -6
- data/lib/flapjack/data/entity_check.rb +20 -13
- data/lib/flapjack/data/event.rb +4 -7
- data/lib/flapjack/data/notification.rb +63 -45
- data/lib/flapjack/filters/acknowledgement.rb +26 -24
- data/lib/flapjack/filters/delays.rb +46 -42
- data/lib/flapjack/filters/ok.rb +31 -34
- data/lib/flapjack/filters/scheduled_maintenance.rb +2 -2
- data/lib/flapjack/filters/unscheduled_maintenance.rb +2 -3
- data/lib/flapjack/gateways/email.rb +111 -114
- data/lib/flapjack/gateways/email/alert.html.erb +11 -11
- data/lib/flapjack/gateways/email/alert.text.erb +19 -6
- data/lib/flapjack/gateways/sms_messagenet.rb +15 -5
- data/lib/flapjack/gateways/web.rb +3 -4
- data/lib/flapjack/gateways/web/public/css/flapjack.css +0 -2
- data/lib/flapjack/gateways/web/public/img/flapjack-favicon-32-16.ico +0 -0
- data/lib/flapjack/gateways/web/public/img/flapjack-favicon-64-32-24-16.ico +0 -0
- data/lib/flapjack/gateways/web/public/img/flapjack-transparent-300.png +0 -0
- data/lib/flapjack/gateways/web/public/img/flapjack-transparent-350-400.png +0 -0
- data/lib/flapjack/gateways/web/views/_head.html.erb +1 -0
- data/lib/flapjack/gateways/web/views/index.html.erb +1 -1
- data/lib/flapjack/notifier.rb +2 -3
- data/lib/flapjack/pikelet.rb +5 -4
- data/lib/flapjack/processor.rb +39 -27
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/data/entity_check_spec.rb +5 -0
- data/spec/lib/flapjack/data/event_spec.rb +0 -1
- data/spec/lib/flapjack/gateways/email_spec.rb +5 -9
- data/spec/lib/flapjack/gateways/sms_messagenet.spec.rb +80 -1
- data/spec/lib/flapjack/gateways/web_spec.rb +1 -1
- data/spec/lib/flapjack/pikelet_spec.rb +4 -3
- data/spec/lib/flapjack/processor_spec.rb +0 -1
- metadata +28 -11
- data/lib/flapjack/filters/detect_mass_client_failures.rb +0 -44
- data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +0 -6
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
## Flapjack Changelog
|
2
2
|
|
3
|
+
# 0.7.25 - 2013-09-13
|
4
|
+
- Bug: EntityCheck last_update= isn't being called for update_state since refactoring gh-303 (@ali-graham)
|
5
|
+
- Bug: flapjack-nagios-receiver is double-escaping its JSON data gh-304 (@jessereynolds)
|
6
|
+
|
7
|
+
# 0.7.24 - 2013-09-12
|
8
|
+
- Bug: gem install of flapjack 0.7.23 fails with tzinfo-data dependency error gh-302 (@jessereynolds)
|
9
|
+
|
10
|
+
# 0.7.23 - 2013-09-12
|
11
|
+
- Bug: Quick ok -> warning -> ok -> warning triggers too many recovery notifications gh-119 (@jessereynolds)
|
12
|
+
- Bug: Blackhole notification rule doesn't block recovery notifications gh-282 (@jessereynolds)
|
13
|
+
- Chore: Shorten SMS messages to 159 chars on the Messagenet gateway gh-278 (@ali-graham)
|
14
|
+
- Chore: flapjack-nagios-receiver should use Event#add gh-275 (@ali-graham)
|
15
|
+
- Chore: Non-zero exit code after receiving SIGINT gh-266 (@ali-graham)
|
16
|
+
- Bug: Email notifications - remove "(about a minute ago)" and fix previous state fields gh-258 (@ali-graham)
|
17
|
+
- Chore: refactor delays filter, remove mass client failures filter gh-293 (@jessereynolds)
|
18
|
+
- Bug: creation of scheduled maintenance fails from web UI gh-296 (@ali-graham)
|
19
|
+
- Feature: flapjack UI needs a favicon gh-297 (@jessereynolds)
|
20
|
+
- Chore: email notification styling gh-298 (@jessereynolds)
|
21
|
+
|
3
22
|
# 0.7.22 - 2013-08-08
|
4
23
|
- Bug: fix potential exception in json serialisation of tags in notifications gh-281 (@jessereynolds)
|
5
24
|
|
data/bin/flapjack
CHANGED
@@ -146,10 +146,12 @@ when "start"
|
|
146
146
|
puts "Flapjack is already running."
|
147
147
|
else
|
148
148
|
print "Flapjack starting..."
|
149
|
+
return_value = nil
|
149
150
|
runner.execute(:daemonize => daemonize) {
|
150
|
-
flapjack_coord.call
|
151
|
+
return_value = flapjack_coord.call
|
151
152
|
}
|
152
153
|
puts " done."
|
154
|
+
exit(return_value + 128) unless (return_value.nil? || (return_value == 0))
|
153
155
|
end
|
154
156
|
|
155
157
|
when "stop"
|
@@ -14,6 +14,7 @@ Oj.default_options = { :indent => 0, :mode => :strict }
|
|
14
14
|
require 'dante'
|
15
15
|
|
16
16
|
require 'flapjack/configuration'
|
17
|
+
require 'flapjack/data/event'
|
17
18
|
|
18
19
|
def pike(message)
|
19
20
|
puts "piking out: #{message}"
|
@@ -47,16 +48,16 @@ def process_input(opts)
|
|
47
48
|
state = 'ok' if state.downcase == 'up'
|
48
49
|
state = 'critical' if state.downcase == 'down'
|
49
50
|
details = check_long_output ? check_long_output.gsub(/\\n/, "\n") : nil
|
50
|
-
event =
|
51
|
+
event = {
|
51
52
|
'entity' => entity,
|
52
53
|
'check' => check,
|
53
54
|
'type' => 'service',
|
54
55
|
'state' => state,
|
55
56
|
'summary' => check_output,
|
56
57
|
'details' => details,
|
57
|
-
'
|
58
|
-
}
|
59
|
-
|
58
|
+
'time' => timestamp,
|
59
|
+
}
|
60
|
+
Flapjack::Data::Event.add(event, :redis => redis)
|
60
61
|
end
|
61
62
|
rescue Redis::CannotConnectError
|
62
63
|
puts "Error, unable to to connect to the redis server (#{$!})"
|
data/bin/receive-events
CHANGED
@@ -12,6 +12,7 @@ require 'oj'
|
|
12
12
|
Oj.default_options = { :indent => 0, :mode => :strict }
|
13
13
|
|
14
14
|
require 'flapjack/configuration'
|
15
|
+
require 'flapjack/data/event'
|
15
16
|
|
16
17
|
def pike(message)
|
17
18
|
puts "piking out: #{message}"
|
@@ -19,8 +20,7 @@ def pike(message)
|
|
19
20
|
end
|
20
21
|
|
21
22
|
def send_event(event, opts)
|
22
|
-
redis
|
23
|
-
redis.lpush 'events', event
|
23
|
+
Flapjack::Data::Event.add(event, :redis => opts[:redis])
|
24
24
|
end
|
25
25
|
|
26
26
|
def receive(opts)
|
data/features/events.feature
CHANGED
@@ -123,7 +123,12 @@ Feature: events
|
|
123
123
|
|
124
124
|
@time
|
125
125
|
Scenario: Check critical to ok when acknowledged
|
126
|
-
Given the check is in
|
126
|
+
Given the check is in an ok state
|
127
|
+
When a critical event is received
|
128
|
+
And one minute passes
|
129
|
+
And a critical event is received
|
130
|
+
Then a notification should be generated
|
131
|
+
# the above all needs to be just a call to the "Check ok to critical for 1 minute" Scenario if that's possible
|
127
132
|
When an acknowledgement event is received
|
128
133
|
Then a notification should be generated
|
129
134
|
When 1 minute passes
|
@@ -297,101 +302,102 @@ Feature: events
|
|
297
302
|
|
298
303
|
# commenting out this test for now, will revive it
|
299
304
|
# when working on gh-119
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
#
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
305
|
+
@time
|
306
|
+
Scenario: a lot of quick ok -> warning -> ok -> warning
|
307
|
+
Given the check is in an ok state
|
308
|
+
When 10 seconds passes
|
309
|
+
And a warning event is received
|
310
|
+
Then a notification should not be generated
|
311
|
+
When 10 seconds passes
|
312
|
+
And an ok event is received
|
313
|
+
Then a notification should not be generated
|
314
|
+
When 10 seconds passes
|
315
|
+
And a warning event is received
|
316
|
+
Then a notification should not be generated
|
317
|
+
When 10 seconds passes
|
318
|
+
And a warning event is received
|
319
|
+
Then a notification should not be generated
|
320
|
+
When 10 seconds passes
|
321
|
+
And a warning event is received
|
322
|
+
Then a notification should not be generated
|
323
|
+
When 20 seconds passes
|
324
|
+
And an ok event is received
|
325
|
+
Then a notification should not be generated
|
326
|
+
When 10 seconds passes
|
327
|
+
And a warning event is received
|
328
|
+
Then a notification should not be generated
|
329
|
+
When 10 seconds passes
|
330
|
+
And an ok event is received
|
331
|
+
Then a notification should not be generated
|
332
|
+
When 10 seconds passes
|
333
|
+
And a warning event is received
|
334
|
+
Then a notification should not be generated
|
335
|
+
When 10 seconds passes
|
336
|
+
And a warning event is received
|
337
|
+
Then a notification should not be generated
|
338
|
+
When 10 seconds passes
|
339
|
+
And a warning event is received
|
340
|
+
Then a notification should not be generated
|
341
|
+
When 10 seconds passes
|
342
|
+
And a warning event is received
|
343
|
+
Then a notification should be generated
|
344
|
+
When 10 seconds passes
|
345
|
+
And a warning event is received
|
346
|
+
Then a notification should not be generated
|
347
|
+
When 10 seconds passes
|
348
|
+
And a warning event is received
|
349
|
+
Then a notification should not be generated
|
350
|
+
When 10 seconds passes
|
351
|
+
And an ok event is received
|
352
|
+
Then a notification should be generated
|
353
|
+
# recovered
|
354
|
+
When 10 seconds passes
|
355
|
+
And a warning event is received
|
356
|
+
Then a notification should not be generated
|
357
|
+
When 10 seconds passes
|
358
|
+
And a warning event is received
|
359
|
+
Then a notification should not be generated
|
360
|
+
When 10 seconds passes
|
361
|
+
And a warning event is received
|
362
|
+
Then a notification should not be generated
|
363
|
+
When 10 seconds passes
|
364
|
+
And an ok event is received
|
365
|
+
Then a notification should not be generated
|
366
|
+
When 10 seconds passes
|
367
|
+
And a warning event is received
|
368
|
+
Then a notification should not be generated
|
369
|
+
When 10 seconds passes
|
370
|
+
And a warning event is received
|
371
|
+
Then a notification should not be generated
|
372
|
+
When 10 seconds passes
|
373
|
+
And a warning event is received
|
374
|
+
Then a notification should not be generated
|
375
|
+
When 10 seconds passes
|
376
|
+
And an ok event is received
|
377
|
+
Then a notification should not be generated
|
378
|
+
When 10 seconds passes
|
379
|
+
And an ok event is received
|
380
|
+
Then a notification should not be generated
|
381
|
+
When 10 seconds passes
|
382
|
+
And an ok event is received
|
383
|
+
Then a notification should not be generated
|
384
|
+
When 10 seconds passes
|
385
|
+
And an ok event is received
|
386
|
+
Then a notification should not be generated
|
387
|
+
When 10 seconds passes
|
388
|
+
And an ok event is received
|
389
|
+
Then a notification should not be generated
|
390
|
+
When 10 seconds passes
|
391
|
+
And a warning event is received
|
392
|
+
Then a notification should not be generated
|
393
|
+
When 10 seconds passes
|
394
|
+
And a warning event is received
|
395
|
+
Then a notification should not be generated
|
396
|
+
When 10 seconds passes
|
397
|
+
And an ok event is received
|
398
|
+
Then a notification should not be generated
|
393
399
|
|
394
400
|
Scenario: scheduled maintenance created for initial check reference
|
395
401
|
Given the check has no state
|
396
402
|
When an ok event is received
|
397
|
-
Then scheduled maintenance should be generated
|
403
|
+
Then scheduled maintenance should be generated
|
@@ -45,6 +45,7 @@ Feature: Notification rules on a per contact basis
|
|
45
45
|
| | | email | email | | | |
|
46
46
|
| | | sms | sms | | | |
|
47
47
|
| bar | | email | email,sms | | | |
|
48
|
+
| bar | wags | | | true | true | |
|
48
49
|
|
49
50
|
And user 3 has the following notification rules:
|
50
51
|
| entities | tags | warning_media | critical_media | warning_blackhole | critical_blackhole | time_restrictions |
|
@@ -52,6 +53,7 @@ Feature: Notification rules on a per contact basis
|
|
52
53
|
| baz | | sms | sms | | | |
|
53
54
|
| buf | | email | email | | | |
|
54
55
|
| buf | | sms | sms | | | |
|
56
|
+
| bar | | email | email | true | true | |
|
55
57
|
|
56
58
|
And user 4 has the following notification rules:
|
57
59
|
| entities | tags | warning_media | critical_media | warning_blackhole | critical_blackhole | time_restrictions |
|
@@ -151,8 +153,38 @@ Feature: Notification rules on a per contact basis
|
|
151
153
|
And an ok event is received
|
152
154
|
Then 2 email alert should be queued for malak@example.com
|
153
155
|
|
154
|
-
@blackhole
|
155
|
-
Scenario: Drop alerts matching a blackhole rule
|
156
|
+
@blackhole @time
|
157
|
+
Scenario: Drop alerts matching a general blackhole rule
|
158
|
+
Given the check is check 'ping' on entity 'buf'
|
159
|
+
And the check is in an ok state
|
160
|
+
When a critical event is received
|
161
|
+
And 1 minute passes
|
162
|
+
And a critical event is received
|
163
|
+
Then 0 email alerts should be queued for malak@example.com
|
164
|
+
|
165
|
+
@blackhole @time
|
166
|
+
Scenario: Drop alerts matching a blackhole rule by entity
|
167
|
+
Given the check is check 'ping' on entity 'bar'
|
168
|
+
And the check is in an ok state
|
169
|
+
When a warning event is received
|
170
|
+
And 1 minute passes
|
171
|
+
And a warning event is received
|
172
|
+
Then 0 email alerts should be queued for malak@example.com
|
173
|
+
And 0 email alerts should be queued for vera@example.com
|
174
|
+
When an ok event is received
|
175
|
+
Then 0 email alerts should be queued for malak@example.com
|
176
|
+
And 0 email alerts should be queued for vera@example.com
|
177
|
+
|
178
|
+
@blackhole @time
|
179
|
+
Scenario: Drop alerts matching a blackhole rule by tags
|
180
|
+
Given the check is check 'wags the dog' on entity 'bar'
|
181
|
+
And the check is in an ok state
|
182
|
+
When a warning event is received
|
183
|
+
And 1 minute passes
|
184
|
+
And a warning event is received
|
185
|
+
Then 0 email alerts should be queued for imani@example.com
|
186
|
+
When an ok event is received
|
187
|
+
Then 0 email alerts should be queued for imani@example.com
|
156
188
|
|
157
189
|
@intervals @time
|
158
190
|
Scenario: Alerts according to custom interval
|
@@ -274,7 +306,7 @@ Feature: Notification rules on a per contact basis
|
|
274
306
|
|
275
307
|
@time
|
276
308
|
Scenario: Test notifications behave like a critical notification
|
277
|
-
Given the check is check 'ping' on entity '
|
309
|
+
Given the check is check 'ping' on entity 'baz'
|
278
310
|
And the check is in an ok state
|
279
311
|
When a test event is received
|
280
312
|
Then 1 email alert should be queued for malak@example.com
|
@@ -296,7 +328,7 @@ Feature: Notification rules on a per contact basis
|
|
296
328
|
And 2 sms alert should be queued for +61400000001
|
297
329
|
|
298
330
|
Scenario: Unknown event during unscheduled maintenance
|
299
|
-
Given the check is check 'ping' on entity '
|
331
|
+
Given the check is check 'ping' on entity 'baz'
|
300
332
|
And the check is in an ok state
|
301
333
|
When an unknown event is received
|
302
334
|
And 1 minute passes
|
@@ -139,6 +139,7 @@ When /^the SMS notification handler runs successfully$/ do
|
|
139
139
|
@request = stub_request(:get, /^#{Regexp.escape(Flapjack::Gateways::SmsMessagenet::MESSAGENET_URL)}/)
|
140
140
|
|
141
141
|
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', {'username' => 'abcd', 'password' => 'efgh'})
|
142
|
+
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@redis', @redis)
|
142
143
|
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
|
143
144
|
Flapjack::Gateways::SmsMessagenet.start
|
144
145
|
|
@@ -148,6 +149,7 @@ end
|
|
148
149
|
When /^the SMS notification handler fails to send an SMS$/ do
|
149
150
|
@request = stub_request(:get, /^#{Regexp.escape(Flapjack::Gateways::SmsMessagenet::MESSAGENET_URL)}/).to_return(:status => [500, "Internal Server Error"])
|
150
151
|
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@config', {'username' => 'abcd', 'password' => 'efgh'})
|
152
|
+
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@redis', @redis)
|
151
153
|
Flapjack::Gateways::SmsMessagenet.instance_variable_set('@logger', @logger)
|
152
154
|
Flapjack::Gateways::SmsMessagenet.start
|
153
155
|
|
@@ -157,6 +159,7 @@ end
|
|
157
159
|
When /^the email notification handler runs successfully$/ do
|
158
160
|
Resque.redis = @redis
|
159
161
|
Flapjack::Gateways::Email.instance_variable_set('@config', {'smtp_config' => {'host' => '127.0.0.1', 'port' => 2525}})
|
162
|
+
Flapjack::Gateways::Email.instance_variable_set('@redis', @redis)
|
160
163
|
Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
|
161
164
|
Flapjack::Gateways::Email.start
|
162
165
|
|
@@ -175,6 +178,7 @@ end
|
|
175
178
|
When /^the email notification handler fails to send an email$/ do
|
176
179
|
Resque.redis = @redis
|
177
180
|
Flapjack::Gateways::Email.instance_variable_set('@config', {'smtp_config' => {'host' => '127.0.0.1', 'port' => 2525}})
|
181
|
+
Flapjack::Gateways::Email.instance_variable_set('@redis', @redis)
|
178
182
|
Flapjack::Gateways::Email.instance_variable_set('@logger', @logger)
|
179
183
|
Flapjack::Gateways::Email.start
|
180
184
|
|
data/flapjack.gemspec
CHANGED
@@ -33,9 +33,10 @@ Gem::Specification.new do |gem|
|
|
33
33
|
gem.add_dependency 'blather', '~> 0.8.3'
|
34
34
|
gem.add_dependency 'chronic'
|
35
35
|
gem.add_dependency 'chronic_duration'
|
36
|
-
gem.add_dependency 'activesupport'
|
36
|
+
gem.add_dependency 'activesupport', '~> 3.2.14'
|
37
37
|
gem.add_dependency 'ice_cube'
|
38
|
-
gem.add_dependency 'tzinfo'
|
38
|
+
gem.add_dependency 'tzinfo', '~> 1.0.1'
|
39
|
+
gem.add_dependency 'tzinfo-data'
|
39
40
|
|
40
41
|
gem.add_development_dependency 'rake'
|
41
42
|
end
|
data/lib/flapjack/coordinator.rb
CHANGED
@@ -31,11 +31,13 @@ module Flapjack
|
|
31
31
|
setup_signals if options[:signals]
|
32
32
|
add_pikelets(pikelets(@config.all))
|
33
33
|
end
|
34
|
+
|
35
|
+
@exit_value
|
34
36
|
end
|
35
37
|
|
36
|
-
def stop
|
37
|
-
return
|
38
|
-
@
|
38
|
+
def stop(value = 0)
|
39
|
+
return unless @exit_value.nil?
|
40
|
+
@exit_value = value
|
39
41
|
remove_pikelets(@pikelets, :shutdown => true)
|
40
42
|
# Syslog.close if Syslog.opened? # TODO revisit in threading branch
|
41
43
|
end
|
@@ -111,10 +113,10 @@ module Flapjack
|
|
111
113
|
# within a single coordinator instance. Coordinator is essentially
|
112
114
|
# a singleton anyway...
|
113
115
|
def setup_signals
|
114
|
-
Kernel.trap('INT') { stop }
|
115
|
-
Kernel.trap('TERM') { stop }
|
116
|
+
Kernel.trap('INT') { stop(Signal.list['INT']) }
|
117
|
+
Kernel.trap('TERM') { stop(Signal.list['TERM']) }
|
116
118
|
unless RbConfig::CONFIG['host_os'] =~ /mswin|windows|cygwin/i
|
117
|
-
Kernel.trap('QUIT') { stop }
|
119
|
+
Kernel.trap('QUIT') { stop(Signal.list['QUIT']) }
|
118
120
|
Kernel.trap('HUP') { reload }
|
119
121
|
end
|
120
122
|
end
|