flapjack 2.0.0b1 → 2.0.0rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +5 -5
- data/CHANGELOG.md +4 -0
- data/features/events.feature +87 -13
- data/features/steps/events_steps.rb +12 -14
- data/lib/flapjack.rb +4 -0
- data/lib/flapjack/data/check.rb +29 -15
- data/lib/flapjack/data/event.rb +13 -6
- data/lib/flapjack/filters/delays.rb +80 -50
- data/lib/flapjack/filters/ok.rb +3 -4
- data/lib/flapjack/processor.rb +11 -0
- data/lib/flapjack/version.rb +1 -1
- data/spec/service_consumers/fixture_data.rb +2 -2
- data/spec/service_consumers/pacts/flapjack-diner_v2.0.json +2447 -2433
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ea7398f861ed32dc81a2ebc3bf46e34a69b2457
|
4
|
+
data.tar.gz: 25e571baec4fbcd2cc7404eb49f254bf6b8ab70c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab8b5bba15913b850cf817da2478b17e9bee841d17c6660f6c7cbd409b266b05e39391e05019e4d854c2034d7e297e4d6d286d263216b47cba4b4af7ac5d9d57
|
7
|
+
data.tar.gz: 745eb50a67ba61cbfca7a8315564d4abbf80909daa015f11f7fa2ce1d12189cdc3edb314286f5ba0e1ddd1148b05fdb3d37b42174e872c770bb9176c6eb1c3b4
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.3.0
|
data/.travis.yml
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
sudo: false
|
2
2
|
language: ruby
|
3
3
|
rvm:
|
4
|
-
- '2.0'
|
5
|
-
- '2.1'
|
6
|
-
- '2.2'
|
7
|
-
- '
|
4
|
+
- '2.0.0-p648'
|
5
|
+
- '2.1.8'
|
6
|
+
- '2.2.4'
|
7
|
+
- '2.3.0'
|
8
|
+
- 'jruby-9.0.4.0'
|
8
9
|
jdk: oraclejdk8
|
9
10
|
script: bundle exec rspec spec && bundle exec rake pact:verify && bundle exec cucumber features && cd src/flapjack && go test -v
|
10
11
|
env: GOPATH=$TRAVIS_BUILD_DIR
|
@@ -16,7 +17,6 @@ before_script:
|
|
16
17
|
- mkdir -p ./log
|
17
18
|
before_install:
|
18
19
|
- git submodule update --init --recursive
|
19
|
-
- gem install bundler
|
20
20
|
install:
|
21
21
|
- go get github.com/garyburd/redigo/redis
|
22
22
|
- go get github.com/go-martini/martini
|
data/CHANGELOG.md
CHANGED
data/features/events.feature
CHANGED
@@ -31,12 +31,31 @@ Feature: events
|
|
31
31
|
|
32
32
|
@time
|
33
33
|
Scenario: Check critical to critical after 10 seconds, with an initial delay of 5 seconds
|
34
|
-
Given
|
35
|
-
|
34
|
+
Given event initial failure delay is 5 seconds
|
35
|
+
And the check is in an ok state
|
36
|
+
When a critical event is received
|
36
37
|
And 10 seconds passes
|
37
|
-
And a critical event
|
38
|
+
And a critical event is received
|
38
39
|
Then 1 notification should have been generated
|
39
40
|
|
41
|
+
@time
|
42
|
+
Scenario: Check recovery with recovery delay
|
43
|
+
Given event initial recovery delay is 30 seconds
|
44
|
+
And the check is in an ok state
|
45
|
+
When a critical event is received
|
46
|
+
And 1 minute passes
|
47
|
+
And a critical event is received
|
48
|
+
Then 1 notification should have been generated
|
49
|
+
When 10 seconds passes
|
50
|
+
And an ok event is received
|
51
|
+
Then 1 notification should have been generated
|
52
|
+
When 10 seconds passes
|
53
|
+
And an ok event is received
|
54
|
+
Then 1 notification should have been generated
|
55
|
+
When 25 seconds passes
|
56
|
+
And an ok event is received
|
57
|
+
Then 2 notifications should have been generated
|
58
|
+
|
40
59
|
@time
|
41
60
|
Scenario: Check ok to warning for 1 minute
|
42
61
|
Given the check is in an ok state
|
@@ -55,10 +74,11 @@ Feature: events
|
|
55
74
|
|
56
75
|
@time
|
57
76
|
Scenario: Check ok to critical for 1 minute, with an initial delay of 2 minutes
|
77
|
+
Given event initial failure delay is 120 seconds
|
58
78
|
And the check is in an ok state
|
59
|
-
When a critical event
|
79
|
+
When a critical event is received
|
60
80
|
And 1 minute passes
|
61
|
-
And a critical event
|
81
|
+
And a critical event is received
|
62
82
|
Then no notifications should have been generated
|
63
83
|
|
64
84
|
@time
|
@@ -88,13 +108,14 @@ Feature: events
|
|
88
108
|
|
89
109
|
@time
|
90
110
|
Scenario: Check critical and alerted to critical for 40 seconds, with a repeat delay of 20 seconds
|
91
|
-
Given
|
92
|
-
|
111
|
+
Given event repeat failure delay is 20 seconds
|
112
|
+
And the check is in an ok state
|
113
|
+
When a critical event is received
|
93
114
|
And 1 minute passes
|
94
|
-
And a critical event
|
115
|
+
And a critical event is received
|
95
116
|
Then 1 notification should have been generated
|
96
117
|
When 40 seconds passes
|
97
|
-
And a critical event
|
118
|
+
And a critical event is received
|
98
119
|
Then 2 notifications should have been generated
|
99
120
|
|
100
121
|
@time
|
@@ -110,13 +131,14 @@ Feature: events
|
|
110
131
|
|
111
132
|
@time
|
112
133
|
Scenario: Check critical and alerted to critical for 6 minutes, with a repeat delay of 10 minutes
|
113
|
-
Given
|
114
|
-
|
134
|
+
Given event repeat failure delay is 600 seconds
|
135
|
+
And the check is in an ok state
|
136
|
+
When a critical event is received
|
115
137
|
And 1 minute passes
|
116
|
-
And a critical event
|
138
|
+
And a critical event is received
|
117
139
|
Then 1 notification should have been generated
|
118
140
|
When 6 minutes passes
|
119
|
-
And a critical event
|
141
|
+
And a critical event is received
|
120
142
|
Then 1 notification should have been generated
|
121
143
|
|
122
144
|
@time
|
@@ -199,6 +221,8 @@ Feature: events
|
|
199
221
|
Then 2 notifications should have been generated
|
200
222
|
When 6 minutes passes
|
201
223
|
And a critical event is received
|
224
|
+
And 45 seconds passes
|
225
|
+
And a critical event is received
|
202
226
|
Then 3 notifications should have been generated
|
203
227
|
When 6 minutes passes
|
204
228
|
And a critical event is received
|
@@ -220,6 +244,8 @@ Feature: events
|
|
220
244
|
Then 2 notifications should have been generated
|
221
245
|
When 10 seconds passes
|
222
246
|
And a critical event is received
|
247
|
+
And 45 seconds passes
|
248
|
+
And a critical event is received
|
223
249
|
Then 3 notifications should have been generated
|
224
250
|
When 50 seconds passes
|
225
251
|
And a critical event is received
|
@@ -329,6 +355,8 @@ Feature: events
|
|
329
355
|
When 10 seconds passes
|
330
356
|
# 120 seconds
|
331
357
|
And a critical event is received
|
358
|
+
And 45 seconds passes
|
359
|
+
And a critical event is received
|
332
360
|
Then 3 notifications should have been generated
|
333
361
|
When 10 seconds passes
|
334
362
|
And a critical event is received
|
@@ -438,6 +466,8 @@ Scenario: a lot of quick ok -> warning -> ok -> warning
|
|
438
466
|
Then 2 notifications should have been generated
|
439
467
|
When 10 seconds passes
|
440
468
|
And a warning event is received
|
469
|
+
And 45 seconds passes
|
470
|
+
And a warning event is received
|
441
471
|
Then 3 notifications should have been generated
|
442
472
|
When 10 seconds passes
|
443
473
|
And a warning event is received
|
@@ -445,3 +475,47 @@ Scenario: a lot of quick ok -> warning -> ok -> warning
|
|
445
475
|
When 10 seconds passes
|
446
476
|
And an ok event is received
|
447
477
|
Then 4 notifications should have been generated
|
478
|
+
|
479
|
+
@time
|
480
|
+
Scenario: a transient recovery
|
481
|
+
Given event initial recovery delay is 30 seconds
|
482
|
+
Given the check is in a critical state
|
483
|
+
When 35 seconds passes
|
484
|
+
# 'event 1: critical'
|
485
|
+
And a critical event is received
|
486
|
+
Then 1 notification should have been generated
|
487
|
+
|
488
|
+
When 5 seconds passes
|
489
|
+
# 'event 2: ok - no event, before initial_recovery_delay'
|
490
|
+
And an ok event is received
|
491
|
+
Then 1 notification should have been generated
|
492
|
+
|
493
|
+
When 5 seconds passes
|
494
|
+
# 'event 3: ok - no event, still before initial_recovery_delay'
|
495
|
+
And an ok event is received
|
496
|
+
Then 1 notification should have been generated
|
497
|
+
|
498
|
+
When 10 seconds passes
|
499
|
+
# 'event 4: critical, no event because we were in the initial_failure_delay'
|
500
|
+
And a critical event is received
|
501
|
+
Then 1 notification should have been generated
|
502
|
+
|
503
|
+
When 30 seconds passes
|
504
|
+
# 'event 5: critical, no event because we are in the repeat_failure_delay'
|
505
|
+
And a critical event is received
|
506
|
+
Then 1 notification should have been generated
|
507
|
+
|
508
|
+
When 10 seconds passes
|
509
|
+
# 'event 6: ok - no event, before initial_recovery_delay'
|
510
|
+
And an ok event is received
|
511
|
+
Then 1 notification should have been generated
|
512
|
+
|
513
|
+
When 60 seconds passes
|
514
|
+
# 'event 7: ok - send event, after initial_recovery_delay'
|
515
|
+
And an ok event is received
|
516
|
+
Then 2 notifications should have been generated
|
517
|
+
|
518
|
+
When 60 seconds passes
|
519
|
+
# 'event 8: ok - no event, we have already sent the recovery'
|
520
|
+
And an ok event is received
|
521
|
+
Then 2 notifications should have been generated
|
@@ -64,7 +64,7 @@ def set_state(entity_name, check_name, condition, last_update = Time.now)
|
|
64
64
|
check.states << state
|
65
65
|
end
|
66
66
|
|
67
|
-
def submit_event(condition, entity_name, check_name
|
67
|
+
def submit_event(condition, entity_name, check_name)
|
68
68
|
err_rate = case condition
|
69
69
|
when 'ok'
|
70
70
|
'0'
|
@@ -80,9 +80,10 @@ def submit_event(condition, entity_name, check_name, opts = {})
|
|
80
80
|
'entity' => entity_name,
|
81
81
|
'check' => check_name,
|
82
82
|
}
|
83
|
-
['
|
84
|
-
|
85
|
-
|
83
|
+
['initial_failure', 'repeat_failure', 'initial_recovery'].each do |delay_type|
|
84
|
+
delay = instance_variable_get("@event_#{delay_type}_delay")
|
85
|
+
next if delay.nil? || delay < 0
|
86
|
+
event.update("#{delay_type}_delay".to_sym => delay)
|
86
87
|
end
|
87
88
|
Flapjack.redis.rpush('events', Flapjack.dump_json(event))
|
88
89
|
end
|
@@ -151,18 +152,15 @@ Given /^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') is in unsched
|
|
151
152
|
set_unscheduled_maintenance(entity_name, check_name, 60*60*2)
|
152
153
|
end
|
153
154
|
|
154
|
-
|
155
|
+
# Set the delays for next events
|
156
|
+
Given /^event (initial failure|repeat failure|initial recovery) delay is (\d+) seconds$/ do |delay_type, delay|
|
157
|
+
instance_variable_set("@event_#{delay_type.sub(/ /, '_')}_delay", delay.to_i)
|
158
|
+
end
|
159
|
+
|
160
|
+
When /^an? (ok|failure|critical|warning|unknown) event is received(?: for check '([\w\.\-]+)' on entity '([\w\.\-]+)')?$/ do |condition, check_name, entity_name|
|
155
161
|
check_name ||= @check_name
|
156
162
|
entity_name ||= @entity_name
|
157
|
-
|
158
|
-
when 'initial'
|
159
|
-
{:initial_failure_delay => failure_delay}
|
160
|
-
when 'repeat'
|
161
|
-
{:repeat_failure_delay => failure_delay}
|
162
|
-
else
|
163
|
-
{}
|
164
|
-
end
|
165
|
-
submit_event(condition, entity_name, check_name, opts)
|
163
|
+
submit_event(condition, entity_name, check_name)
|
166
164
|
drain_events
|
167
165
|
end
|
168
166
|
|
data/lib/flapjack.rb
CHANGED
@@ -16,6 +16,10 @@ module Flapjack
|
|
16
16
|
DEFAULT_INITIAL_FAILURE_DELAY = 30
|
17
17
|
DEFAULT_REPEAT_FAILURE_DELAY = 60
|
18
18
|
|
19
|
+
# defaulting to 0 for backwards compatibility; can be overridden in config,
|
20
|
+
# or per check / event
|
21
|
+
DEFAULT_INITIAL_RECOVERY_DELAY = 0
|
22
|
+
|
19
23
|
def self.load_json(data)
|
20
24
|
ActiveSupport::JSON.decode(data)
|
21
25
|
end
|
data/lib/flapjack/data/check.rb
CHANGED
@@ -31,15 +31,16 @@ module Flapjack
|
|
31
31
|
include Flapjack::Data::Extensions::Associations
|
32
32
|
include Flapjack::Data::Extensions::ShortName
|
33
33
|
|
34
|
-
define_attributes :name
|
35
|
-
:enabled
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
41
|
-
:
|
42
|
-
:
|
34
|
+
define_attributes :name => :string,
|
35
|
+
:enabled => :boolean,
|
36
|
+
:initial_failure_delay => :integer,
|
37
|
+
:repeat_failure_delay => :integer,
|
38
|
+
:initial_recovery_delay => :integer,
|
39
|
+
:ack_hash => :string,
|
40
|
+
:notification_count => :integer,
|
41
|
+
:condition => :string,
|
42
|
+
:failing => :boolean,
|
43
|
+
:alertable => :boolean
|
43
44
|
|
44
45
|
index_by :enabled, :failing, :alertable
|
45
46
|
unique_index_by :name, :ack_hash
|
@@ -204,6 +205,9 @@ module Flapjack
|
|
204
205
|
validates :repeat_failure_delay, :allow_nil => true,
|
205
206
|
:numericality => {:greater_than_or_equal_to => 0, :only_integer => true}
|
206
207
|
|
208
|
+
validates :initial_recovery_delay, :allow_nil => true,
|
209
|
+
:numericality => {:greater_than_or_equal_to => 0, :only_integer => true}
|
210
|
+
|
207
211
|
before_validation :create_ack_hash
|
208
212
|
validates :ack_hash, :presence => true
|
209
213
|
|
@@ -213,7 +217,8 @@ module Flapjack
|
|
213
217
|
|
214
218
|
swagger_schema :Check do
|
215
219
|
key :required, [:id, :type, :name, :enabled, :initial_failure_delay,
|
216
|
-
:repeat_failure_delay, :failing
|
220
|
+
:repeat_failure_delay, :initial_recovery_delay, :failing, :condition,
|
221
|
+
:ack_hash]
|
217
222
|
property :id do
|
218
223
|
key :type, :string
|
219
224
|
key :format, :uuid
|
@@ -235,6 +240,9 @@ module Flapjack
|
|
235
240
|
property :repeat_failure_delay do
|
236
241
|
key :type, :integer
|
237
242
|
end
|
243
|
+
property :initial_recovery_delay do
|
244
|
+
key :type, :integer
|
245
|
+
end
|
238
246
|
property :failing do
|
239
247
|
key :type, :boolean
|
240
248
|
key :enum, [true, false]
|
@@ -286,8 +294,7 @@ module Flapjack
|
|
286
294
|
end
|
287
295
|
|
288
296
|
swagger_schema :CheckCreate do
|
289
|
-
key :required, [:type, :name, :enabled
|
290
|
-
:repeat_failure_delay]
|
297
|
+
key :required, [:type, :name, :enabled]
|
291
298
|
property :id do
|
292
299
|
key :type, :string
|
293
300
|
key :format, :uuid
|
@@ -309,6 +316,9 @@ module Flapjack
|
|
309
316
|
property :repeat_failure_delay do
|
310
317
|
key :type, :integer
|
311
318
|
end
|
319
|
+
property :initial_recovery_delay do
|
320
|
+
key :type, :integer
|
321
|
+
end
|
312
322
|
property :relationships do
|
313
323
|
key :"$ref", :CheckCreateLinks
|
314
324
|
end
|
@@ -343,6 +353,9 @@ module Flapjack
|
|
343
353
|
property :repeat_failure_delay do
|
344
354
|
key :type, :integer
|
345
355
|
end
|
356
|
+
property :initial_recovery_delay do
|
357
|
+
key :type, :integer
|
358
|
+
end
|
346
359
|
property :relationships do
|
347
360
|
key :"$ref", :CheckUpdateLinks
|
348
361
|
end
|
@@ -372,7 +385,7 @@ module Flapjack
|
|
372
385
|
@jsonapi_methods ||= {
|
373
386
|
:post => Flapjack::Gateways::JSONAPI::Data::MethodDescriptor.new(
|
374
387
|
:attributes => [:name, :enabled, :initial_failure_delay,
|
375
|
-
:repeat_failure_delay],
|
388
|
+
:repeat_failure_delay, :initial_recovery_delay],
|
376
389
|
:descriptions => {
|
377
390
|
:singular => "Create a check.",
|
378
391
|
:multiple => "Create checks."
|
@@ -380,7 +393,8 @@ module Flapjack
|
|
380
393
|
),
|
381
394
|
:get => Flapjack::Gateways::JSONAPI::Data::MethodDescriptor.new(
|
382
395
|
:attributes => [:name, :enabled, :initial_failure_delay,
|
383
|
-
:repeat_failure_delay, :
|
396
|
+
:repeat_failure_delay, :initial_recovery_delay,
|
397
|
+
:failing, :condition, :ack_hash],
|
384
398
|
:descriptions => {
|
385
399
|
:singular => "Returns data for a check.",
|
386
400
|
:multiple => "Returns data for multiple check records."
|
@@ -388,7 +402,7 @@ module Flapjack
|
|
388
402
|
),
|
389
403
|
:patch => Flapjack::Gateways::JSONAPI::Data::MethodDescriptor.new(
|
390
404
|
:attributes => [:name, :enabled, :initial_failure_delay,
|
391
|
-
:repeat_failure_delay],
|
405
|
+
:repeat_failure_delay, :initial_recovery_delay],
|
392
406
|
:descriptions => {
|
393
407
|
:singular => "Update a check.",
|
394
408
|
:multiple => "Update checks."
|
data/lib/flapjack/data/event.rb
CHANGED
@@ -14,8 +14,8 @@ module Flapjack
|
|
14
14
|
# tags are now ignored, tags on the checks are used for rule matching
|
15
15
|
REQUIRED_KEYS = %w(state check)
|
16
16
|
OPTIONAL_KEYS = %w(entity time initial_failure_delay
|
17
|
-
repeat_failure_delay summary details
|
18
|
-
perfdata type)
|
17
|
+
repeat_failure_delay initial_recovery_delay summary details
|
18
|
+
acknowledgement_id duration tags perfdata type)
|
19
19
|
|
20
20
|
VALIDATIONS = {
|
21
21
|
proc {|e| e['state'].is_a?(String) &&
|
@@ -46,6 +46,11 @@ module Flapjack
|
|
46
46
|
(e['repeat_failure_delay'].is_a?(String) && !!(e['repeat_failure_delay'] =~ /^\d+$/)) } =>
|
47
47
|
"repeat_failure_delay must be a positive integer, or a string castable to one",
|
48
48
|
|
49
|
+
proc {|e| e['initial_recovery_delay'].nil? ||
|
50
|
+
e['initial_recovery_delay'].is_a?(Integer) ||
|
51
|
+
(e['initial_recovery_delay'].is_a?(String) && !!(e['initial_recovery_delay'] =~ /^\d+$/)) } =>
|
52
|
+
"initial_recovery_delay must be a positive integer, or a string castable to one",
|
53
|
+
|
49
54
|
proc {|e| e['summary'].nil? || e['summary'].is_a?(String) } =>
|
50
55
|
"summary must be a string",
|
51
56
|
|
@@ -115,7 +120,8 @@ module Flapjack
|
|
115
120
|
# 'check' => check,
|
116
121
|
# 'time' => timestamp,
|
117
122
|
# 'initial_failure_delay' => initial_failure_delay,
|
118
|
-
# 'repeat_failure_delay' => repeat_failure_delay
|
123
|
+
# 'repeat_failure_delay' => repeat_failure_delay,
|
124
|
+
# 'initial_recovery_delay' => initial_recovery_delay,
|
119
125
|
# 'type' => 'service',
|
120
126
|
# 'state' => state,
|
121
127
|
# 'summary' => check_output,
|
@@ -176,8 +182,8 @@ module Flapjack
|
|
176
182
|
"#{attrs['entity']}:#{attrs['check']}"
|
177
183
|
end
|
178
184
|
[:state, :time, :initial_failure_delay, :repeat_failure_delay,
|
179
|
-
:summary, :details, :perfdata,
|
180
|
-
:duration].each do |key|
|
185
|
+
:initial_recovery_delay, :summary, :details, :perfdata,
|
186
|
+
:acknowledgement_id, :duration].each do |key|
|
181
187
|
|
182
188
|
instance_variable_set("@#{key.to_s}", attrs[key.to_s])
|
183
189
|
end
|
@@ -199,7 +205,8 @@ module Flapjack
|
|
199
205
|
@state.downcase
|
200
206
|
end
|
201
207
|
|
202
|
-
[:time, :initial_failure_delay, :repeat_failure_delay,
|
208
|
+
[:time, :initial_failure_delay, :repeat_failure_delay,
|
209
|
+
:initial_recovery_delay, :duration].each do |num_prop|
|
203
210
|
define_method(num_prop) do
|
204
211
|
prop = instance_variable_get("@#{num_prop}")
|
205
212
|
return if prop.nil?
|
@@ -28,66 +28,96 @@ module Flapjack
|
|
28
28
|
initial_failure_delay = opts[:initial_failure_delay]
|
29
29
|
repeat_failure_delay = opts[:repeat_failure_delay]
|
30
30
|
|
31
|
-
|
31
|
+
initial_recovery_delay = opts[:initial_recovery_delay]
|
32
32
|
|
33
|
-
|
34
|
-
Flapjack::Data::Condition.healthy?(new_state.condition)
|
33
|
+
label = 'Filter: Delays:'
|
35
34
|
|
35
|
+
if new_state.nil? || !new_state.action.nil?
|
36
36
|
Flapjack.logger.debug {
|
37
|
-
"#{label} pass - not a service event in a
|
37
|
+
"#{label} pass - not a service event in a known state"
|
38
38
|
}
|
39
39
|
return false
|
40
40
|
end
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
last_recovery = check.latest_notifications.
|
45
|
-
intersect(:condition => Flapjack::Data::Condition.healthy.keys).first
|
46
|
-
last_ack = check.latest_notifications.
|
47
|
-
intersect(:action => 'acknowledgement').first
|
48
|
-
|
49
|
-
last_problem_time = last_problem.nil? ? nil : last_problem.created_at
|
50
|
-
last_notif = [last_problem, last_recovery, last_ack].compact.
|
51
|
-
sort_by(&:created_at).last
|
52
|
-
|
53
|
-
last_change_time = old_state.nil? ? nil : old_state.created_at
|
54
|
-
|
55
|
-
alert_type = Flapjack::Data::Alert.notification_type(new_state.action,
|
56
|
-
new_state.condition)
|
57
|
-
|
58
|
-
last_alert_type = last_notif.nil? ? nil :
|
59
|
-
Flapjack::Data::Alert.notification_type(last_notif.action, last_notif.condition)
|
42
|
+
if (old_state.nil? || Flapjack::Data::Condition.healthy?(old_state.condition)) &&
|
43
|
+
!Flapjack::Data::Condition.healthy?(new_state.condition)
|
60
44
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
"alert type: [#{alert_type}], " +
|
69
|
-
"last_alert_type == alert_type ? #{last_alert_type == alert_type}")
|
70
|
-
|
71
|
-
if !current_condition_duration.nil? && (current_condition_duration < initial_failure_delay)
|
72
|
-
Flapjack.logger.debug("#{label} block - duration of current failure " +
|
73
|
-
"(#{current_condition_duration}) is less than failure_delay (#{initial_failure_delay})")
|
74
|
-
return true
|
75
|
-
end
|
76
|
-
|
77
|
-
if !(last_problem_time.nil? || time_since_last_alert.nil?) &&
|
78
|
-
(time_since_last_alert <= repeat_failure_delay) &&
|
79
|
-
(last_alert_type == alert_type)
|
45
|
+
# just failed
|
46
|
+
if initial_failure_delay > 0
|
47
|
+
Flapjack.logger.debug("#{label} block - just failed, failure_delay = #{initial_failure_delay}")
|
48
|
+
return true
|
49
|
+
end
|
50
|
+
elsif !old_state.nil? && !Flapjack::Data::Condition.healthy?(old_state.condition) &&
|
51
|
+
Flapjack::Data::Condition.healthy?(new_state.condition)
|
80
52
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
53
|
+
# just recovered
|
54
|
+
if initial_recovery_delay > 0
|
55
|
+
Flapjack.logger.debug("#{label} block - just recovered, recovery_delay = #{initial_recovery_delay}")
|
56
|
+
return true
|
57
|
+
end
|
58
|
+
else
|
59
|
+
last_change_time = old_state.nil? ? nil : old_state.created_at
|
60
|
+
current_condition_duration = last_change_time.nil? ? nil : (timestamp - last_change_time)
|
61
|
+
|
62
|
+
if Flapjack::Data::Condition.healthy?(new_state.condition)
|
63
|
+
if !current_condition_duration.nil? && (current_condition_duration < initial_recovery_delay)
|
64
|
+
Flapjack.logger.debug("#{label} block - duration of current success " +
|
65
|
+
"(#{current_condition_duration}) is less than recovery_delay (#{initial_recovery_delay})")
|
66
|
+
return true
|
67
|
+
end
|
68
|
+
|
69
|
+
Flapjack.logger.debug("#{label} pass - not blocking due to recovery delay - " \
|
70
|
+
"current duration #{current_condition_duration}, initial_recovery_delay #{initial_recovery_delay}")
|
71
|
+
false
|
72
|
+
else
|
73
|
+
if !current_condition_duration.nil? && (current_condition_duration < initial_failure_delay)
|
74
|
+
Flapjack.logger.debug("#{label} block - duration of current failure " +
|
75
|
+
"(#{current_condition_duration}) is less than failure_delay (#{initial_failure_delay})")
|
76
|
+
return true
|
77
|
+
end
|
78
|
+
|
79
|
+
last_problem = check.latest_notifications.
|
80
|
+
intersect(:condition => Flapjack::Data::Condition.unhealthy.keys).first
|
81
|
+
last_recovery = check.latest_notifications.
|
82
|
+
intersect(:condition => Flapjack::Data::Condition.healthy.keys).first
|
83
|
+
last_ack = check.latest_notifications.
|
84
|
+
intersect(:action => 'acknowledgement').first
|
85
|
+
|
86
|
+
last_problem_time = last_problem.nil? ? nil : last_problem.created_at
|
87
|
+
last_notif = [last_problem, last_recovery, last_ack].compact.
|
88
|
+
sort_by(&:created_at).last
|
89
|
+
|
90
|
+
alert_type = Flapjack::Data::Alert.notification_type(new_state.action,
|
91
|
+
new_state.condition)
|
92
|
+
|
93
|
+
last_alert_type = last_notif.nil? ? nil :
|
94
|
+
Flapjack::Data::Alert.notification_type(last_notif.action, last_notif.condition)
|
95
|
+
|
96
|
+
time_since_last_alert = last_problem_time.nil? ? nil : (timestamp - last_problem_time)
|
97
|
+
|
98
|
+
Flapjack.logger.debug("#{label} last_problem: #{last_problem_time || 'nil'}, " +
|
99
|
+
"last_change: #{last_change_time || 'nil'}, " +
|
100
|
+
"current_condition_duration: #{current_condition_duration || 'nil'}, " +
|
101
|
+
"time_since_last_alert: #{time_since_last_alert || 'nil'}, " +
|
102
|
+
"alert type: [#{alert_type}], " +
|
103
|
+
"last_alert_type == alert_type ? #{last_alert_type == alert_type}")
|
104
|
+
|
105
|
+
if !(last_problem_time.nil? || time_since_last_alert.nil?) &&
|
106
|
+
(time_since_last_alert <= repeat_failure_delay) &&
|
107
|
+
(last_alert_type == alert_type)
|
108
|
+
|
109
|
+
Flapjack.logger.debug("#{label} block - time since last alert for " +
|
110
|
+
"current problem (#{time_since_last_alert}) is less than " +
|
111
|
+
"repeat_failure_delay (#{repeat_failure_delay}) and last alert type (#{last_alert_type}) " +
|
112
|
+
"is equal to current alert type (#{alert_type})")
|
113
|
+
return true
|
114
|
+
end
|
115
|
+
|
116
|
+
Flapjack.logger.debug("#{label} pass - not blocking because neither of the time comparison " +
|
117
|
+
"conditions were met")
|
118
|
+
false
|
119
|
+
end
|
86
120
|
end
|
87
|
-
|
88
|
-
Flapjack.logger.debug("#{label} pass - not blocking because neither of the time comparison " +
|
89
|
-
"conditions were met")
|
90
|
-
false
|
91
121
|
end
|
92
122
|
end
|
93
123
|
end
|