flapjack 2.0.0b1 → 2.0.0rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +5 -5
- data/CHANGELOG.md +4 -0
- data/features/events.feature +87 -13
- data/features/steps/events_steps.rb +12 -14
- data/lib/flapjack.rb +4 -0
- data/lib/flapjack/data/check.rb +29 -15
- data/lib/flapjack/data/event.rb +13 -6
- data/lib/flapjack/filters/delays.rb +80 -50
- data/lib/flapjack/filters/ok.rb +3 -4
- data/lib/flapjack/processor.rb +11 -0
- data/lib/flapjack/version.rb +1 -1
- data/spec/service_consumers/fixture_data.rb +2 -2
- data/spec/service_consumers/pacts/flapjack-diner_v2.0.json +2447 -2433
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ea7398f861ed32dc81a2ebc3bf46e34a69b2457
|
4
|
+
data.tar.gz: 25e571baec4fbcd2cc7404eb49f254bf6b8ab70c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab8b5bba15913b850cf817da2478b17e9bee841d17c6660f6c7cbd409b266b05e39391e05019e4d854c2034d7e297e4d6d286d263216b47cba4b4af7ac5d9d57
|
7
|
+
data.tar.gz: 745eb50a67ba61cbfca7a8315564d4abbf80909daa015f11f7fa2ce1d12189cdc3edb314286f5ba0e1ddd1148b05fdb3d37b42174e872c770bb9176c6eb1c3b4
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.3.0
|
data/.travis.yml
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
sudo: false
|
2
2
|
language: ruby
|
3
3
|
rvm:
|
4
|
-
- '2.0'
|
5
|
-
- '2.1'
|
6
|
-
- '2.2'
|
7
|
-
- '
|
4
|
+
- '2.0.0-p648'
|
5
|
+
- '2.1.8'
|
6
|
+
- '2.2.4'
|
7
|
+
- '2.3.0'
|
8
|
+
- 'jruby-9.0.4.0'
|
8
9
|
jdk: oraclejdk8
|
9
10
|
script: bundle exec rspec spec && bundle exec rake pact:verify && bundle exec cucumber features && cd src/flapjack && go test -v
|
10
11
|
env: GOPATH=$TRAVIS_BUILD_DIR
|
@@ -16,7 +17,6 @@ before_script:
|
|
16
17
|
- mkdir -p ./log
|
17
18
|
before_install:
|
18
19
|
- git submodule update --init --recursive
|
19
|
-
- gem install bundler
|
20
20
|
install:
|
21
21
|
- go get github.com/garyburd/redigo/redis
|
22
22
|
- go get github.com/go-martini/martini
|
data/CHANGELOG.md
CHANGED
data/features/events.feature
CHANGED
@@ -31,12 +31,31 @@ Feature: events
|
|
31
31
|
|
32
32
|
@time
|
33
33
|
Scenario: Check critical to critical after 10 seconds, with an initial delay of 5 seconds
|
34
|
-
Given
|
35
|
-
|
34
|
+
Given event initial failure delay is 5 seconds
|
35
|
+
And the check is in an ok state
|
36
|
+
When a critical event is received
|
36
37
|
And 10 seconds passes
|
37
|
-
And a critical event
|
38
|
+
And a critical event is received
|
38
39
|
Then 1 notification should have been generated
|
39
40
|
|
41
|
+
@time
|
42
|
+
Scenario: Check recovery with recovery delay
|
43
|
+
Given event initial recovery delay is 30 seconds
|
44
|
+
And the check is in an ok state
|
45
|
+
When a critical event is received
|
46
|
+
And 1 minute passes
|
47
|
+
And a critical event is received
|
48
|
+
Then 1 notification should have been generated
|
49
|
+
When 10 seconds passes
|
50
|
+
And an ok event is received
|
51
|
+
Then 1 notification should have been generated
|
52
|
+
When 10 seconds passes
|
53
|
+
And an ok event is received
|
54
|
+
Then 1 notification should have been generated
|
55
|
+
When 25 seconds passes
|
56
|
+
And an ok event is received
|
57
|
+
Then 2 notifications should have been generated
|
58
|
+
|
40
59
|
@time
|
41
60
|
Scenario: Check ok to warning for 1 minute
|
42
61
|
Given the check is in an ok state
|
@@ -55,10 +74,11 @@ Feature: events
|
|
55
74
|
|
56
75
|
@time
|
57
76
|
Scenario: Check ok to critical for 1 minute, with an initial delay of 2 minutes
|
77
|
+
Given event initial failure delay is 120 seconds
|
58
78
|
And the check is in an ok state
|
59
|
-
When a critical event
|
79
|
+
When a critical event is received
|
60
80
|
And 1 minute passes
|
61
|
-
And a critical event
|
81
|
+
And a critical event is received
|
62
82
|
Then no notifications should have been generated
|
63
83
|
|
64
84
|
@time
|
@@ -88,13 +108,14 @@ Feature: events
|
|
88
108
|
|
89
109
|
@time
|
90
110
|
Scenario: Check critical and alerted to critical for 40 seconds, with a repeat delay of 20 seconds
|
91
|
-
Given
|
92
|
-
|
111
|
+
Given event repeat failure delay is 20 seconds
|
112
|
+
And the check is in an ok state
|
113
|
+
When a critical event is received
|
93
114
|
And 1 minute passes
|
94
|
-
And a critical event
|
115
|
+
And a critical event is received
|
95
116
|
Then 1 notification should have been generated
|
96
117
|
When 40 seconds passes
|
97
|
-
And a critical event
|
118
|
+
And a critical event is received
|
98
119
|
Then 2 notifications should have been generated
|
99
120
|
|
100
121
|
@time
|
@@ -110,13 +131,14 @@ Feature: events
|
|
110
131
|
|
111
132
|
@time
|
112
133
|
Scenario: Check critical and alerted to critical for 6 minutes, with a repeat delay of 10 minutes
|
113
|
-
Given
|
114
|
-
|
134
|
+
Given event repeat failure delay is 600 seconds
|
135
|
+
And the check is in an ok state
|
136
|
+
When a critical event is received
|
115
137
|
And 1 minute passes
|
116
|
-
And a critical event
|
138
|
+
And a critical event is received
|
117
139
|
Then 1 notification should have been generated
|
118
140
|
When 6 minutes passes
|
119
|
-
And a critical event
|
141
|
+
And a critical event is received
|
120
142
|
Then 1 notification should have been generated
|
121
143
|
|
122
144
|
@time
|
@@ -199,6 +221,8 @@ Feature: events
|
|
199
221
|
Then 2 notifications should have been generated
|
200
222
|
When 6 minutes passes
|
201
223
|
And a critical event is received
|
224
|
+
And 45 seconds passes
|
225
|
+
And a critical event is received
|
202
226
|
Then 3 notifications should have been generated
|
203
227
|
When 6 minutes passes
|
204
228
|
And a critical event is received
|
@@ -220,6 +244,8 @@ Feature: events
|
|
220
244
|
Then 2 notifications should have been generated
|
221
245
|
When 10 seconds passes
|
222
246
|
And a critical event is received
|
247
|
+
And 45 seconds passes
|
248
|
+
And a critical event is received
|
223
249
|
Then 3 notifications should have been generated
|
224
250
|
When 50 seconds passes
|
225
251
|
And a critical event is received
|
@@ -329,6 +355,8 @@ Feature: events
|
|
329
355
|
When 10 seconds passes
|
330
356
|
# 120 seconds
|
331
357
|
And a critical event is received
|
358
|
+
And 45 seconds passes
|
359
|
+
And a critical event is received
|
332
360
|
Then 3 notifications should have been generated
|
333
361
|
When 10 seconds passes
|
334
362
|
And a critical event is received
|
@@ -438,6 +466,8 @@ Scenario: a lot of quick ok -> warning -> ok -> warning
|
|
438
466
|
Then 2 notifications should have been generated
|
439
467
|
When 10 seconds passes
|
440
468
|
And a warning event is received
|
469
|
+
And 45 seconds passes
|
470
|
+
And a warning event is received
|
441
471
|
Then 3 notifications should have been generated
|
442
472
|
When 10 seconds passes
|
443
473
|
And a warning event is received
|
@@ -445,3 +475,47 @@ Scenario: a lot of quick ok -> warning -> ok -> warning
|
|
445
475
|
When 10 seconds passes
|
446
476
|
And an ok event is received
|
447
477
|
Then 4 notifications should have been generated
|
478
|
+
|
479
|
+
@time
|
480
|
+
Scenario: a transient recovery
|
481
|
+
Given event initial recovery delay is 30 seconds
|
482
|
+
Given the check is in a critical state
|
483
|
+
When 35 seconds passes
|
484
|
+
# 'event 1: critical'
|
485
|
+
And a critical event is received
|
486
|
+
Then 1 notification should have been generated
|
487
|
+
|
488
|
+
When 5 seconds passes
|
489
|
+
# 'event 2: ok - no event, before initial_recovery_delay'
|
490
|
+
And an ok event is received
|
491
|
+
Then 1 notification should have been generated
|
492
|
+
|
493
|
+
When 5 seconds passes
|
494
|
+
# 'event 3: ok - no event, still before initial_recovery_delay'
|
495
|
+
And an ok event is received
|
496
|
+
Then 1 notification should have been generated
|
497
|
+
|
498
|
+
When 10 seconds passes
|
499
|
+
# 'event 4: critical, no event because we were in the initial_failure_delay'
|
500
|
+
And a critical event is received
|
501
|
+
Then 1 notification should have been generated
|
502
|
+
|
503
|
+
When 30 seconds passes
|
504
|
+
# 'event 5: critical, no event because we are in the repeat_failure_delay'
|
505
|
+
And a critical event is received
|
506
|
+
Then 1 notification should have been generated
|
507
|
+
|
508
|
+
When 10 seconds passes
|
509
|
+
# 'event 6: ok - no event, before initial_recovery_delay'
|
510
|
+
And an ok event is received
|
511
|
+
Then 1 notification should have been generated
|
512
|
+
|
513
|
+
When 60 seconds passes
|
514
|
+
# 'event 7: ok - send event, after initial_recovery_delay'
|
515
|
+
And an ok event is received
|
516
|
+
Then 2 notifications should have been generated
|
517
|
+
|
518
|
+
When 60 seconds passes
|
519
|
+
# 'event 8: ok - no event, we have already sent the recovery'
|
520
|
+
And an ok event is received
|
521
|
+
Then 2 notifications should have been generated
|
@@ -64,7 +64,7 @@ def set_state(entity_name, check_name, condition, last_update = Time.now)
|
|
64
64
|
check.states << state
|
65
65
|
end
|
66
66
|
|
67
|
-
def submit_event(condition, entity_name, check_name
|
67
|
+
def submit_event(condition, entity_name, check_name)
|
68
68
|
err_rate = case condition
|
69
69
|
when 'ok'
|
70
70
|
'0'
|
@@ -80,9 +80,10 @@ def submit_event(condition, entity_name, check_name, opts = {})
|
|
80
80
|
'entity' => entity_name,
|
81
81
|
'check' => check_name,
|
82
82
|
}
|
83
|
-
['
|
84
|
-
|
85
|
-
|
83
|
+
['initial_failure', 'repeat_failure', 'initial_recovery'].each do |delay_type|
|
84
|
+
delay = instance_variable_get("@event_#{delay_type}_delay")
|
85
|
+
next if delay.nil? || delay < 0
|
86
|
+
event.update("#{delay_type}_delay".to_sym => delay)
|
86
87
|
end
|
87
88
|
Flapjack.redis.rpush('events', Flapjack.dump_json(event))
|
88
89
|
end
|
@@ -151,18 +152,15 @@ Given /^(?:the check|check '([\w\.\-]+)' for entity '([\w\.\-]+)') is in unsched
|
|
151
152
|
set_unscheduled_maintenance(entity_name, check_name, 60*60*2)
|
152
153
|
end
|
153
154
|
|
154
|
-
|
155
|
+
# Set the delays for next events
|
156
|
+
Given /^event (initial failure|repeat failure|initial recovery) delay is (\d+) seconds$/ do |delay_type, delay|
|
157
|
+
instance_variable_set("@event_#{delay_type.sub(/ /, '_')}_delay", delay.to_i)
|
158
|
+
end
|
159
|
+
|
160
|
+
When /^an? (ok|failure|critical|warning|unknown) event is received(?: for check '([\w\.\-]+)' on entity '([\w\.\-]+)')?$/ do |condition, check_name, entity_name|
|
155
161
|
check_name ||= @check_name
|
156
162
|
entity_name ||= @entity_name
|
157
|
-
|
158
|
-
when 'initial'
|
159
|
-
{:initial_failure_delay => failure_delay}
|
160
|
-
when 'repeat'
|
161
|
-
{:repeat_failure_delay => failure_delay}
|
162
|
-
else
|
163
|
-
{}
|
164
|
-
end
|
165
|
-
submit_event(condition, entity_name, check_name, opts)
|
163
|
+
submit_event(condition, entity_name, check_name)
|
166
164
|
drain_events
|
167
165
|
end
|
168
166
|
|
data/lib/flapjack.rb
CHANGED
@@ -16,6 +16,10 @@ module Flapjack
|
|
16
16
|
DEFAULT_INITIAL_FAILURE_DELAY = 30
|
17
17
|
DEFAULT_REPEAT_FAILURE_DELAY = 60
|
18
18
|
|
19
|
+
# defaulting to 0 for backwards compatibility; can be overridden in config,
|
20
|
+
# or per check / event
|
21
|
+
DEFAULT_INITIAL_RECOVERY_DELAY = 0
|
22
|
+
|
19
23
|
def self.load_json(data)
|
20
24
|
ActiveSupport::JSON.decode(data)
|
21
25
|
end
|
data/lib/flapjack/data/check.rb
CHANGED
@@ -31,15 +31,16 @@ module Flapjack
|
|
31
31
|
include Flapjack::Data::Extensions::Associations
|
32
32
|
include Flapjack::Data::Extensions::ShortName
|
33
33
|
|
34
|
-
define_attributes :name
|
35
|
-
:enabled
|
36
|
-
:
|
37
|
-
:
|
38
|
-
:
|
39
|
-
:
|
40
|
-
:
|
41
|
-
:
|
42
|
-
:
|
34
|
+
define_attributes :name => :string,
|
35
|
+
:enabled => :boolean,
|
36
|
+
:initial_failure_delay => :integer,
|
37
|
+
:repeat_failure_delay => :integer,
|
38
|
+
:initial_recovery_delay => :integer,
|
39
|
+
:ack_hash => :string,
|
40
|
+
:notification_count => :integer,
|
41
|
+
:condition => :string,
|
42
|
+
:failing => :boolean,
|
43
|
+
:alertable => :boolean
|
43
44
|
|
44
45
|
index_by :enabled, :failing, :alertable
|
45
46
|
unique_index_by :name, :ack_hash
|
@@ -204,6 +205,9 @@ module Flapjack
|
|
204
205
|
validates :repeat_failure_delay, :allow_nil => true,
|
205
206
|
:numericality => {:greater_than_or_equal_to => 0, :only_integer => true}
|
206
207
|
|
208
|
+
validates :initial_recovery_delay, :allow_nil => true,
|
209
|
+
:numericality => {:greater_than_or_equal_to => 0, :only_integer => true}
|
210
|
+
|
207
211
|
before_validation :create_ack_hash
|
208
212
|
validates :ack_hash, :presence => true
|
209
213
|
|
@@ -213,7 +217,8 @@ module Flapjack
|
|
213
217
|
|
214
218
|
swagger_schema :Check do
|
215
219
|
key :required, [:id, :type, :name, :enabled, :initial_failure_delay,
|
216
|
-
:repeat_failure_delay, :failing
|
220
|
+
:repeat_failure_delay, :initial_recovery_delay, :failing, :condition,
|
221
|
+
:ack_hash]
|
217
222
|
property :id do
|
218
223
|
key :type, :string
|
219
224
|
key :format, :uuid
|
@@ -235,6 +240,9 @@ module Flapjack
|
|
235
240
|
property :repeat_failure_delay do
|
236
241
|
key :type, :integer
|
237
242
|
end
|
243
|
+
property :initial_recovery_delay do
|
244
|
+
key :type, :integer
|
245
|
+
end
|
238
246
|
property :failing do
|
239
247
|
key :type, :boolean
|
240
248
|
key :enum, [true, false]
|
@@ -286,8 +294,7 @@ module Flapjack
|
|
286
294
|
end
|
287
295
|
|
288
296
|
swagger_schema :CheckCreate do
|
289
|
-
key :required, [:type, :name, :enabled
|
290
|
-
:repeat_failure_delay]
|
297
|
+
key :required, [:type, :name, :enabled]
|
291
298
|
property :id do
|
292
299
|
key :type, :string
|
293
300
|
key :format, :uuid
|
@@ -309,6 +316,9 @@ module Flapjack
|
|
309
316
|
property :repeat_failure_delay do
|
310
317
|
key :type, :integer
|
311
318
|
end
|
319
|
+
property :initial_recovery_delay do
|
320
|
+
key :type, :integer
|
321
|
+
end
|
312
322
|
property :relationships do
|
313
323
|
key :"$ref", :CheckCreateLinks
|
314
324
|
end
|
@@ -343,6 +353,9 @@ module Flapjack
|
|
343
353
|
property :repeat_failure_delay do
|
344
354
|
key :type, :integer
|
345
355
|
end
|
356
|
+
property :initial_recovery_delay do
|
357
|
+
key :type, :integer
|
358
|
+
end
|
346
359
|
property :relationships do
|
347
360
|
key :"$ref", :CheckUpdateLinks
|
348
361
|
end
|
@@ -372,7 +385,7 @@ module Flapjack
|
|
372
385
|
@jsonapi_methods ||= {
|
373
386
|
:post => Flapjack::Gateways::JSONAPI::Data::MethodDescriptor.new(
|
374
387
|
:attributes => [:name, :enabled, :initial_failure_delay,
|
375
|
-
:repeat_failure_delay],
|
388
|
+
:repeat_failure_delay, :initial_recovery_delay],
|
376
389
|
:descriptions => {
|
377
390
|
:singular => "Create a check.",
|
378
391
|
:multiple => "Create checks."
|
@@ -380,7 +393,8 @@ module Flapjack
|
|
380
393
|
),
|
381
394
|
:get => Flapjack::Gateways::JSONAPI::Data::MethodDescriptor.new(
|
382
395
|
:attributes => [:name, :enabled, :initial_failure_delay,
|
383
|
-
:repeat_failure_delay, :
|
396
|
+
:repeat_failure_delay, :initial_recovery_delay,
|
397
|
+
:failing, :condition, :ack_hash],
|
384
398
|
:descriptions => {
|
385
399
|
:singular => "Returns data for a check.",
|
386
400
|
:multiple => "Returns data for multiple check records."
|
@@ -388,7 +402,7 @@ module Flapjack
|
|
388
402
|
),
|
389
403
|
:patch => Flapjack::Gateways::JSONAPI::Data::MethodDescriptor.new(
|
390
404
|
:attributes => [:name, :enabled, :initial_failure_delay,
|
391
|
-
:repeat_failure_delay],
|
405
|
+
:repeat_failure_delay, :initial_recovery_delay],
|
392
406
|
:descriptions => {
|
393
407
|
:singular => "Update a check.",
|
394
408
|
:multiple => "Update checks."
|
data/lib/flapjack/data/event.rb
CHANGED
@@ -14,8 +14,8 @@ module Flapjack
|
|
14
14
|
# tags are now ignored, tags on the checks are used for rule matching
|
15
15
|
REQUIRED_KEYS = %w(state check)
|
16
16
|
OPTIONAL_KEYS = %w(entity time initial_failure_delay
|
17
|
-
repeat_failure_delay summary details
|
18
|
-
perfdata type)
|
17
|
+
repeat_failure_delay initial_recovery_delay summary details
|
18
|
+
acknowledgement_id duration tags perfdata type)
|
19
19
|
|
20
20
|
VALIDATIONS = {
|
21
21
|
proc {|e| e['state'].is_a?(String) &&
|
@@ -46,6 +46,11 @@ module Flapjack
|
|
46
46
|
(e['repeat_failure_delay'].is_a?(String) && !!(e['repeat_failure_delay'] =~ /^\d+$/)) } =>
|
47
47
|
"repeat_failure_delay must be a positive integer, or a string castable to one",
|
48
48
|
|
49
|
+
proc {|e| e['initial_recovery_delay'].nil? ||
|
50
|
+
e['initial_recovery_delay'].is_a?(Integer) ||
|
51
|
+
(e['initial_recovery_delay'].is_a?(String) && !!(e['initial_recovery_delay'] =~ /^\d+$/)) } =>
|
52
|
+
"initial_recovery_delay must be a positive integer, or a string castable to one",
|
53
|
+
|
49
54
|
proc {|e| e['summary'].nil? || e['summary'].is_a?(String) } =>
|
50
55
|
"summary must be a string",
|
51
56
|
|
@@ -115,7 +120,8 @@ module Flapjack
|
|
115
120
|
# 'check' => check,
|
116
121
|
# 'time' => timestamp,
|
117
122
|
# 'initial_failure_delay' => initial_failure_delay,
|
118
|
-
# 'repeat_failure_delay' => repeat_failure_delay
|
123
|
+
# 'repeat_failure_delay' => repeat_failure_delay,
|
124
|
+
# 'initial_recovery_delay' => initial_recovery_delay,
|
119
125
|
# 'type' => 'service',
|
120
126
|
# 'state' => state,
|
121
127
|
# 'summary' => check_output,
|
@@ -176,8 +182,8 @@ module Flapjack
|
|
176
182
|
"#{attrs['entity']}:#{attrs['check']}"
|
177
183
|
end
|
178
184
|
[:state, :time, :initial_failure_delay, :repeat_failure_delay,
|
179
|
-
:summary, :details, :perfdata,
|
180
|
-
:duration].each do |key|
|
185
|
+
:initial_recovery_delay, :summary, :details, :perfdata,
|
186
|
+
:acknowledgement_id, :duration].each do |key|
|
181
187
|
|
182
188
|
instance_variable_set("@#{key.to_s}", attrs[key.to_s])
|
183
189
|
end
|
@@ -199,7 +205,8 @@ module Flapjack
|
|
199
205
|
@state.downcase
|
200
206
|
end
|
201
207
|
|
202
|
-
[:time, :initial_failure_delay, :repeat_failure_delay,
|
208
|
+
[:time, :initial_failure_delay, :repeat_failure_delay,
|
209
|
+
:initial_recovery_delay, :duration].each do |num_prop|
|
203
210
|
define_method(num_prop) do
|
204
211
|
prop = instance_variable_get("@#{num_prop}")
|
205
212
|
return if prop.nil?
|
@@ -28,66 +28,96 @@ module Flapjack
|
|
28
28
|
initial_failure_delay = opts[:initial_failure_delay]
|
29
29
|
repeat_failure_delay = opts[:repeat_failure_delay]
|
30
30
|
|
31
|
-
|
31
|
+
initial_recovery_delay = opts[:initial_recovery_delay]
|
32
32
|
|
33
|
-
|
34
|
-
Flapjack::Data::Condition.healthy?(new_state.condition)
|
33
|
+
label = 'Filter: Delays:'
|
35
34
|
|
35
|
+
if new_state.nil? || !new_state.action.nil?
|
36
36
|
Flapjack.logger.debug {
|
37
|
-
"#{label} pass - not a service event in a
|
37
|
+
"#{label} pass - not a service event in a known state"
|
38
38
|
}
|
39
39
|
return false
|
40
40
|
end
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
last_recovery = check.latest_notifications.
|
45
|
-
intersect(:condition => Flapjack::Data::Condition.healthy.keys).first
|
46
|
-
last_ack = check.latest_notifications.
|
47
|
-
intersect(:action => 'acknowledgement').first
|
48
|
-
|
49
|
-
last_problem_time = last_problem.nil? ? nil : last_problem.created_at
|
50
|
-
last_notif = [last_problem, last_recovery, last_ack].compact.
|
51
|
-
sort_by(&:created_at).last
|
52
|
-
|
53
|
-
last_change_time = old_state.nil? ? nil : old_state.created_at
|
54
|
-
|
55
|
-
alert_type = Flapjack::Data::Alert.notification_type(new_state.action,
|
56
|
-
new_state.condition)
|
57
|
-
|
58
|
-
last_alert_type = last_notif.nil? ? nil :
|
59
|
-
Flapjack::Data::Alert.notification_type(last_notif.action, last_notif.condition)
|
42
|
+
if (old_state.nil? || Flapjack::Data::Condition.healthy?(old_state.condition)) &&
|
43
|
+
!Flapjack::Data::Condition.healthy?(new_state.condition)
|
60
44
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
"alert type: [#{alert_type}], " +
|
69
|
-
"last_alert_type == alert_type ? #{last_alert_type == alert_type}")
|
70
|
-
|
71
|
-
if !current_condition_duration.nil? && (current_condition_duration < initial_failure_delay)
|
72
|
-
Flapjack.logger.debug("#{label} block - duration of current failure " +
|
73
|
-
"(#{current_condition_duration}) is less than failure_delay (#{initial_failure_delay})")
|
74
|
-
return true
|
75
|
-
end
|
76
|
-
|
77
|
-
if !(last_problem_time.nil? || time_since_last_alert.nil?) &&
|
78
|
-
(time_since_last_alert <= repeat_failure_delay) &&
|
79
|
-
(last_alert_type == alert_type)
|
45
|
+
# just failed
|
46
|
+
if initial_failure_delay > 0
|
47
|
+
Flapjack.logger.debug("#{label} block - just failed, failure_delay = #{initial_failure_delay}")
|
48
|
+
return true
|
49
|
+
end
|
50
|
+
elsif !old_state.nil? && !Flapjack::Data::Condition.healthy?(old_state.condition) &&
|
51
|
+
Flapjack::Data::Condition.healthy?(new_state.condition)
|
80
52
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
53
|
+
# just recovered
|
54
|
+
if initial_recovery_delay > 0
|
55
|
+
Flapjack.logger.debug("#{label} block - just recovered, recovery_delay = #{initial_recovery_delay}")
|
56
|
+
return true
|
57
|
+
end
|
58
|
+
else
|
59
|
+
last_change_time = old_state.nil? ? nil : old_state.created_at
|
60
|
+
current_condition_duration = last_change_time.nil? ? nil : (timestamp - last_change_time)
|
61
|
+
|
62
|
+
if Flapjack::Data::Condition.healthy?(new_state.condition)
|
63
|
+
if !current_condition_duration.nil? && (current_condition_duration < initial_recovery_delay)
|
64
|
+
Flapjack.logger.debug("#{label} block - duration of current success " +
|
65
|
+
"(#{current_condition_duration}) is less than recovery_delay (#{initial_recovery_delay})")
|
66
|
+
return true
|
67
|
+
end
|
68
|
+
|
69
|
+
Flapjack.logger.debug("#{label} pass - not blocking due to recovery delay - " \
|
70
|
+
"current duration #{current_condition_duration}, initial_recovery_delay #{initial_recovery_delay}")
|
71
|
+
false
|
72
|
+
else
|
73
|
+
if !current_condition_duration.nil? && (current_condition_duration < initial_failure_delay)
|
74
|
+
Flapjack.logger.debug("#{label} block - duration of current failure " +
|
75
|
+
"(#{current_condition_duration}) is less than failure_delay (#{initial_failure_delay})")
|
76
|
+
return true
|
77
|
+
end
|
78
|
+
|
79
|
+
last_problem = check.latest_notifications.
|
80
|
+
intersect(:condition => Flapjack::Data::Condition.unhealthy.keys).first
|
81
|
+
last_recovery = check.latest_notifications.
|
82
|
+
intersect(:condition => Flapjack::Data::Condition.healthy.keys).first
|
83
|
+
last_ack = check.latest_notifications.
|
84
|
+
intersect(:action => 'acknowledgement').first
|
85
|
+
|
86
|
+
last_problem_time = last_problem.nil? ? nil : last_problem.created_at
|
87
|
+
last_notif = [last_problem, last_recovery, last_ack].compact.
|
88
|
+
sort_by(&:created_at).last
|
89
|
+
|
90
|
+
alert_type = Flapjack::Data::Alert.notification_type(new_state.action,
|
91
|
+
new_state.condition)
|
92
|
+
|
93
|
+
last_alert_type = last_notif.nil? ? nil :
|
94
|
+
Flapjack::Data::Alert.notification_type(last_notif.action, last_notif.condition)
|
95
|
+
|
96
|
+
time_since_last_alert = last_problem_time.nil? ? nil : (timestamp - last_problem_time)
|
97
|
+
|
98
|
+
Flapjack.logger.debug("#{label} last_problem: #{last_problem_time || 'nil'}, " +
|
99
|
+
"last_change: #{last_change_time || 'nil'}, " +
|
100
|
+
"current_condition_duration: #{current_condition_duration || 'nil'}, " +
|
101
|
+
"time_since_last_alert: #{time_since_last_alert || 'nil'}, " +
|
102
|
+
"alert type: [#{alert_type}], " +
|
103
|
+
"last_alert_type == alert_type ? #{last_alert_type == alert_type}")
|
104
|
+
|
105
|
+
if !(last_problem_time.nil? || time_since_last_alert.nil?) &&
|
106
|
+
(time_since_last_alert <= repeat_failure_delay) &&
|
107
|
+
(last_alert_type == alert_type)
|
108
|
+
|
109
|
+
Flapjack.logger.debug("#{label} block - time since last alert for " +
|
110
|
+
"current problem (#{time_since_last_alert}) is less than " +
|
111
|
+
"repeat_failure_delay (#{repeat_failure_delay}) and last alert type (#{last_alert_type}) " +
|
112
|
+
"is equal to current alert type (#{alert_type})")
|
113
|
+
return true
|
114
|
+
end
|
115
|
+
|
116
|
+
Flapjack.logger.debug("#{label} pass - not blocking because neither of the time comparison " +
|
117
|
+
"conditions were met")
|
118
|
+
false
|
119
|
+
end
|
86
120
|
end
|
87
|
-
|
88
|
-
Flapjack.logger.debug("#{label} pass - not blocking because neither of the time comparison " +
|
89
|
-
"conditions were met")
|
90
|
-
false
|
91
121
|
end
|
92
122
|
end
|
93
123
|
end
|