flapjack 0.6.24 → 0.6.25
Sign up to get free protection for your applications and to get access to all the features.
- data/etc/flapjack_config.yaml.example +15 -0
- data/features/events.feature +25 -3
- data/lib/flapjack/data/entity_check.rb +21 -0
- data/lib/flapjack/filters/delays.rb +20 -8
- data/lib/flapjack/jabber.rb +17 -4
- data/lib/flapjack/version.rb +1 -1
- data/lib/flapjack/web.rb +13 -9
- data/spec/lib/flapjack/jabber_spec.rb +5 -1
- data/spec/lib/flapjack/web_spec.rb +6 -4
- metadata +2 -2
@@ -42,6 +42,21 @@ development:
|
|
42
42
|
rooms:
|
43
43
|
- "gimp@conference.jabber.domain.tld"
|
44
44
|
- "log@conference.jabber.domain.tld"
|
45
|
+
oobetet:
|
46
|
+
enabled: yes
|
47
|
+
server: "jabber.domain.tld"
|
48
|
+
port: 5222
|
49
|
+
jabberid: "flapjacktest@jabber.domain.tld"
|
50
|
+
password: "nuther-good-password"
|
51
|
+
alias: "flapjacktest"
|
52
|
+
watched_check: "PING"
|
53
|
+
watched_entity: "foo.bar.net"
|
54
|
+
max_latency: 300
|
55
|
+
pagerduty_contact: "11111111111111111111111111111111"
|
56
|
+
rooms:
|
57
|
+
- "flapjacktest@conference.jabber.domain.tld"
|
58
|
+
- "gimp@conference.jabber.domain.tld"
|
59
|
+
- "log@conference.jabber.domain.tld"
|
45
60
|
pagerduty_gateway:
|
46
61
|
enabled: yes
|
47
62
|
queue: pagerduty_notifications
|
data/features/events.feature
CHANGED
@@ -11,7 +11,6 @@ Feature: events
|
|
11
11
|
Given check 'abc' for entity 'def' is in an ok state
|
12
12
|
When an ok event is received for check 'abc' on entity 'def'
|
13
13
|
Then a notification should not be generated for check 'abc' on entity 'def'
|
14
|
-
# And show me the output
|
15
14
|
|
16
15
|
Scenario: Check ok to failed
|
17
16
|
Given check 'abc' for entity 'def' is in an ok state
|
@@ -40,11 +39,9 @@ Feature: events
|
|
40
39
|
When a failure event is received for check 'abc' on entity 'def'
|
41
40
|
And 1 minute passes
|
42
41
|
And a failure event is received for check 'abc' on entity 'def'
|
43
|
-
And show me the notifications
|
44
42
|
Then a notification should be generated for check 'abc' on entity 'def'
|
45
43
|
When 1 minute passes
|
46
44
|
And a failure event is received for check 'abc' on entity 'def'
|
47
|
-
And show me the notifications
|
48
45
|
Then a notification should not be generated for check 'abc' on entity 'def'
|
49
46
|
|
50
47
|
@time
|
@@ -121,6 +118,31 @@ Feature: events
|
|
121
118
|
And a failure event is received for check 'abc' on entity 'def'
|
122
119
|
Then a notification should be generated for check 'abc' on entity 'def'
|
123
120
|
|
121
|
+
@time
|
122
|
+
Scenario: Osciliating state, period of two minutes
|
123
|
+
Given check 'abc' for entity 'def' is in an ok state
|
124
|
+
When a failure event is received for check 'abc' on entity 'def'
|
125
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
126
|
+
When 50 seconds passes
|
127
|
+
And a failure event is received for check 'abc' on entity 'def'
|
128
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
129
|
+
When 10 seconds passes
|
130
|
+
And an ok event is received for check 'abc' on entity 'def'
|
131
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
132
|
+
When 50 seconds passes
|
133
|
+
And an ok event is received for check 'abc' on entity 'def'
|
134
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
135
|
+
When 10 seconds passes
|
136
|
+
And a failure event is received for check 'abc' on entity 'def'
|
137
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
138
|
+
When 50 seconds passes
|
139
|
+
And a failure event is received for check 'abc' on entity 'def'
|
140
|
+
#And show me the notifications
|
141
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
142
|
+
When 10 seconds passes
|
143
|
+
And an ok event is received for check 'abc' on entity 'def'
|
144
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
145
|
+
|
124
146
|
Scenario: Acknowledgement when ok
|
125
147
|
Given check 'abc' for entity 'def' is in an ok state
|
126
148
|
When an acknowledgement event is received for check 'abc' on entity 'def'
|
@@ -248,6 +248,27 @@ module Flapjack
|
|
248
248
|
lan.to_i
|
249
249
|
end
|
250
250
|
|
251
|
+
def last_notifications_of_each_type
|
252
|
+
ln = {:problem => last_problem_notification,
|
253
|
+
:recovery => last_recovery_notification,
|
254
|
+
:acknowledgement => last_acknowledgement_notification }
|
255
|
+
puts "***** last_notifications_of_each_type for #{@key.inspect}: #{ln.inspect}"
|
256
|
+
ln
|
257
|
+
end
|
258
|
+
|
259
|
+
# unpredictable results if there are multiple notifications of different
|
260
|
+
# types sent at the same time
|
261
|
+
def last_notification
|
262
|
+
nils = { :type => nil, :timestamp => nil }
|
263
|
+
lne = last_notifications_of_each_type
|
264
|
+
ln = lne.delete_if {|type, timestamp|
|
265
|
+
timestamp.nil? || timestamp.to_i == 0
|
266
|
+
}
|
267
|
+
return nils unless ln.length > 0
|
268
|
+
lns = ln.sort_by { |type, timestamp| timestamp }.last
|
269
|
+
{ :type => lns[0], :timestamp => lns[1] }
|
270
|
+
end
|
271
|
+
|
251
272
|
def event_count_at(timestamp)
|
252
273
|
eca = @redis.get("#{@key}:#{timestamp}:count")
|
253
274
|
return unless (eca && eca =~ /^\d+$/)
|
@@ -9,7 +9,7 @@ module Flapjack
|
|
9
9
|
# * If the service event’s state is a failure, and the time since the ok => failure state change
|
10
10
|
# is below a threshold (e.g. 30 seconds), then don't alert
|
11
11
|
# * If the service event’s state is a failure, and the time since the last alert is below a
|
12
|
-
# threshold (5 minutes), then don’t alert
|
12
|
+
# threshold (5 minutes), and the last notification was not a recovery, then don’t alert
|
13
13
|
class Delays
|
14
14
|
include Base
|
15
15
|
|
@@ -25,20 +25,32 @@ module Flapjack
|
|
25
25
|
current_time = Time.now.to_i
|
26
26
|
|
27
27
|
if entity_check.failed?
|
28
|
-
last_problem_alert
|
29
|
-
last_change
|
28
|
+
last_problem_alert = entity_check.last_problem_notification
|
29
|
+
last_change = entity_check.last_change
|
30
|
+
last_notification = entity_check.last_notification
|
31
|
+
last_alert_type = last_notification[:type]
|
32
|
+
last_alert_timestamp = last_notification[:timestamp]
|
30
33
|
|
31
34
|
current_failure_duration = current_time - last_change
|
32
35
|
time_since_last_alert = current_time - last_problem_alert unless last_problem_alert.nil?
|
33
|
-
@log.debug("Filter: Delays: last_problem_alert: #{last_problem_alert.to_s},
|
36
|
+
@log.debug("Filter: Delays: last_problem_alert: #{last_problem_alert.to_s}, " +
|
37
|
+
"last_change: #{last_change.to_s}, " +
|
38
|
+
"current_failure_duration: #{current_failure_duration}, " +
|
39
|
+
"time_since_last_alert: #{time_since_last_alert.to_s}")
|
34
40
|
if (current_failure_duration < failure_delay)
|
35
41
|
result = true
|
36
|
-
@log.debug("Filter: Delays: blocking because duration of current failure
|
37
|
-
|
42
|
+
@log.debug("Filter: Delays: blocking because duration of current failure " +
|
43
|
+
"(#{current_failure_duration}) is less than failure_delay (#{failure_delay})")
|
44
|
+
elsif !last_problem_alert.nil? && (time_since_last_alert < resend_delay) &&
|
45
|
+
(last_alert_type !~ /recovery/i)
|
46
|
+
|
38
47
|
result = true
|
39
|
-
@log.debug("Filter: Delays: blocking because time since last alert for
|
48
|
+
@log.debug("Filter: Delays: blocking because time since last alert for " +
|
49
|
+
"current problem (#{time_since_last_alert}) is less than " +
|
50
|
+
"resend_delay (#{resend_delay}) and last alert type (#{last_alert_type}) was not a recovery")
|
40
51
|
else
|
41
|
-
@log.debug("Filter: Delays: not blocking because neither of the time comparison
|
52
|
+
@log.debug("Filter: Delays: not blocking because neither of the time comparison " +
|
53
|
+
"conditions were met")
|
42
54
|
end
|
43
55
|
else
|
44
56
|
@log.debug("Filter: Delays: entity_check.failed? returned false ...")
|
data/lib/flapjack/jabber.rb
CHANGED
@@ -32,9 +32,13 @@ module Flapjack
|
|
32
32
|
log.level = Logger::INFO
|
33
33
|
Blather.logger = log
|
34
34
|
|
35
|
+
def initialize
|
36
|
+
@buffer = []
|
37
|
+
@hostname = Socket.gethostname
|
38
|
+
end
|
39
|
+
|
35
40
|
def setup
|
36
41
|
@redis = build_redis_connection_pool
|
37
|
-
@hostname = Socket.gethostname
|
38
42
|
@flapjack_jid = Blather::JID.new((@config['jabberid'] || 'flapjack') + '/' + @hostname)
|
39
43
|
|
40
44
|
super(@flapjack_jid, @config['password'], @config['server'], @config['port'].to_i)
|
@@ -96,10 +100,13 @@ module Flapjack
|
|
96
100
|
say(room, "flapjack jabber gateway started at #{Time.now}, hello!", :groupchat)
|
97
101
|
end
|
98
102
|
end
|
103
|
+
return if @buffer.empty?
|
104
|
+
while stanza = @buffer.shift
|
105
|
+
@logger.debug("Sending a buffered jabber message to: #{stanza.to}, using: #{stanza.type}, message: #{stanza.body}")
|
106
|
+
end
|
99
107
|
end
|
100
108
|
|
101
109
|
def interpreter(command)
|
102
|
-
|
103
110
|
msg = nil
|
104
111
|
action = nil
|
105
112
|
entity_check = nil
|
@@ -219,8 +226,14 @@ module Flapjack
|
|
219
226
|
end
|
220
227
|
|
221
228
|
def say(to, msg, using = :chat)
|
222
|
-
|
223
|
-
|
229
|
+
stanza = Blather::Stanza::Message.new(to, msg, using)
|
230
|
+
if connected?
|
231
|
+
@logger.debug("Sending a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
|
232
|
+
write(stanza)
|
233
|
+
else
|
234
|
+
@logger.debug("Buffering a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
|
235
|
+
@buffer << stanza
|
236
|
+
end
|
224
237
|
end
|
225
238
|
|
226
239
|
def add_shutdown_event(opts = {})
|
data/lib/flapjack/version.rb
CHANGED
data/lib/flapjack/web.rb
CHANGED
@@ -62,6 +62,7 @@ module Flapjack
|
|
62
62
|
end
|
63
63
|
|
64
64
|
get '/check' do
|
65
|
+
begin
|
65
66
|
@entity = params[:entity]
|
66
67
|
@check = params[:check]
|
67
68
|
|
@@ -74,11 +75,7 @@ module Flapjack
|
|
74
75
|
@check_last_update = entity_check.last_update
|
75
76
|
@check_last_change = last_change
|
76
77
|
@check_summary = entity_check.summary
|
77
|
-
@last_notifications =
|
78
|
-
{:problem => entity_check.last_problem_notification,
|
79
|
-
:recovery => entity_check.last_recovery_notification,
|
80
|
-
:acknowledgement => entity_check.last_acknowledgement_notification
|
81
|
-
}
|
78
|
+
@last_notifications = entity_check.last_notifications_of_each_type
|
82
79
|
@in_scheduled_maintenance = entity_check.in_scheduled_maintenance?
|
83
80
|
@in_unscheduled_maintenance = entity_check.in_unscheduled_maintenance?
|
84
81
|
@scheduled_maintenances = entity_check.maintenances(nil, nil, :scheduled => true)
|
@@ -86,15 +83,20 @@ module Flapjack
|
|
86
83
|
entity_check.event_count_at(entity_check.last_change) : nil
|
87
84
|
|
88
85
|
haml :check
|
86
|
+
rescue Exception => e
|
87
|
+
puts e.message
|
88
|
+
puts e.backtrace.join("\n")
|
89
|
+
end
|
90
|
+
|
89
91
|
end
|
90
92
|
|
91
93
|
post '/acknowledgements/:entity/:check' do
|
92
|
-
@entity
|
93
|
-
@check
|
94
|
-
@summary
|
94
|
+
@entity = params[:entity]
|
95
|
+
@check = params[:check]
|
96
|
+
@summary = params[:summary]
|
95
97
|
@acknowledgement_id = params[:acknowledgement_id]
|
96
98
|
|
97
|
-
dur
|
99
|
+
dur = ChronicDuration.parse(params[:duration] || '')
|
98
100
|
@duration = (dur.nil? || (dur <= 0)) ? (4 * 60 * 60) : dur
|
99
101
|
|
100
102
|
entity_check = get_entity_check(@entity, @check)
|
@@ -102,6 +104,8 @@ module Flapjack
|
|
102
104
|
|
103
105
|
ack = entity_check.create_acknowledgement('summary' => (@summary || ''),
|
104
106
|
'acknowledgement_id' => @acknowledgement_id, 'duration' => @duration)
|
107
|
+
|
108
|
+
# FIXME: make this a flash message on the check page and delete the acknowledge page
|
105
109
|
@acknowledge_success = !!ack
|
106
110
|
[201, haml(:acknowledge)]
|
107
111
|
end
|
@@ -48,6 +48,7 @@ describe Flapjack::Jabber do
|
|
48
48
|
fj = Flapjack::Jabber.new
|
49
49
|
fj.bootstrap(:config => config)
|
50
50
|
|
51
|
+
fj.should_receive(:connected?).and_return(true)
|
51
52
|
fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Presence))
|
52
53
|
fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Message))
|
53
54
|
|
@@ -79,6 +80,7 @@ describe Flapjack::Jabber do
|
|
79
80
|
fj.bootstrap(:config => config)
|
80
81
|
fj.instance_variable_set('@redis_handler', redis)
|
81
82
|
|
83
|
+
fj.should_receive(:connected?).and_return(true)
|
82
84
|
fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Message))
|
83
85
|
|
84
86
|
fj.on_groupchat(stanza)
|
@@ -93,6 +95,7 @@ describe Flapjack::Jabber do
|
|
93
95
|
fj = Flapjack::Jabber.new
|
94
96
|
fj.bootstrap(:config => config)
|
95
97
|
|
98
|
+
fj.should_receive(:connected?).and_return(true)
|
96
99
|
fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Message))
|
97
100
|
|
98
101
|
fj.on_groupchat(stanza)
|
@@ -132,9 +135,10 @@ describe Flapjack::Jabber do
|
|
132
135
|
|
133
136
|
fj = Flapjack::Jabber.new
|
134
137
|
fj.bootstrap(:config => config)
|
138
|
+
fj.should_receive(:register_handler).exactly(4).times
|
135
139
|
|
136
140
|
fj.should_receive(:connect)
|
137
|
-
fj.should_receive(:connected?).
|
141
|
+
fj.should_receive(:connected?).exactly(3).times.and_return(true)
|
138
142
|
fj.should_receive(:should_quit?).exactly(3).times.and_return(false, false, true)
|
139
143
|
redis.should_receive(:blpop).twice.and_return(
|
140
144
|
["jabber_notifications", %q{{"notification_type":"problem","event_id":"main-example.com:ping","state":"critical","summary":"!!!"}}],
|
@@ -92,13 +92,15 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
|
|
92
92
|
it "shows the state of a check for an entity" do
|
93
93
|
time = Time.now.to_i
|
94
94
|
|
95
|
+
last_notifications = {:problem => time - ((3 * 60 * 60) + (5 * 60)),
|
96
|
+
:recovery => time - (3 * 60 * 60),
|
97
|
+
:acknowledgement => nil }
|
98
|
+
|
95
99
|
entity_check.should_receive(:state).and_return('ok')
|
96
100
|
entity_check.should_receive(:last_update).and_return(time - (3 * 60 * 60))
|
97
101
|
entity_check.should_receive(:last_change).and_return(time - (3 * 60 * 60))
|
98
102
|
entity_check.should_receive(:summary).and_return('all good')
|
99
|
-
entity_check.should_receive(:
|
100
|
-
entity_check.should_receive(:last_recovery_notification).and_return(time - (3 * 60 * 60))
|
101
|
-
entity_check.should_receive(:last_acknowledgement_notification).and_return(nil)
|
103
|
+
entity_check.should_receive(:last_notifications_of_each_type).and_return(last_notifications)
|
102
104
|
entity_check.should_receive(:in_scheduled_maintenance?).and_return(false)
|
103
105
|
entity_check.should_receive(:in_unscheduled_maintenance?).and_return(false)
|
104
106
|
entity_check.should_receive(:maintenances).with(nil, nil, :scheduled => true).and_return([])
|
@@ -185,4 +187,4 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
|
|
185
187
|
last_response.status.should == 302
|
186
188
|
end
|
187
189
|
|
188
|
-
end
|
190
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: flapjack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.25
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-09-
|
14
|
+
date: 2012-09-21 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: daemons
|