flapjack 0.6.24 → 0.6.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/etc/flapjack_config.yaml.example +15 -0
- data/features/events.feature +25 -3
- data/lib/flapjack/data/entity_check.rb +21 -0
- data/lib/flapjack/filters/delays.rb +20 -8
- data/lib/flapjack/jabber.rb +17 -4
- data/lib/flapjack/version.rb +1 -1
- data/lib/flapjack/web.rb +13 -9
- data/spec/lib/flapjack/jabber_spec.rb +5 -1
- data/spec/lib/flapjack/web_spec.rb +6 -4
- metadata +2 -2
@@ -42,6 +42,21 @@ development:
|
|
42
42
|
rooms:
|
43
43
|
- "gimp@conference.jabber.domain.tld"
|
44
44
|
- "log@conference.jabber.domain.tld"
|
45
|
+
oobetet:
|
46
|
+
enabled: yes
|
47
|
+
server: "jabber.domain.tld"
|
48
|
+
port: 5222
|
49
|
+
jabberid: "flapjacktest@jabber.domain.tld"
|
50
|
+
password: "nuther-good-password"
|
51
|
+
alias: "flapjacktest"
|
52
|
+
watched_check: "PING"
|
53
|
+
watched_entity: "foo.bar.net"
|
54
|
+
max_latency: 300
|
55
|
+
pagerduty_contact: "11111111111111111111111111111111"
|
56
|
+
rooms:
|
57
|
+
- "flapjacktest@conference.jabber.domain.tld"
|
58
|
+
- "gimp@conference.jabber.domain.tld"
|
59
|
+
- "log@conference.jabber.domain.tld"
|
45
60
|
pagerduty_gateway:
|
46
61
|
enabled: yes
|
47
62
|
queue: pagerduty_notifications
|
data/features/events.feature
CHANGED
@@ -11,7 +11,6 @@ Feature: events
|
|
11
11
|
Given check 'abc' for entity 'def' is in an ok state
|
12
12
|
When an ok event is received for check 'abc' on entity 'def'
|
13
13
|
Then a notification should not be generated for check 'abc' on entity 'def'
|
14
|
-
# And show me the output
|
15
14
|
|
16
15
|
Scenario: Check ok to failed
|
17
16
|
Given check 'abc' for entity 'def' is in an ok state
|
@@ -40,11 +39,9 @@ Feature: events
|
|
40
39
|
When a failure event is received for check 'abc' on entity 'def'
|
41
40
|
And 1 minute passes
|
42
41
|
And a failure event is received for check 'abc' on entity 'def'
|
43
|
-
And show me the notifications
|
44
42
|
Then a notification should be generated for check 'abc' on entity 'def'
|
45
43
|
When 1 minute passes
|
46
44
|
And a failure event is received for check 'abc' on entity 'def'
|
47
|
-
And show me the notifications
|
48
45
|
Then a notification should not be generated for check 'abc' on entity 'def'
|
49
46
|
|
50
47
|
@time
|
@@ -121,6 +118,31 @@ Feature: events
|
|
121
118
|
And a failure event is received for check 'abc' on entity 'def'
|
122
119
|
Then a notification should be generated for check 'abc' on entity 'def'
|
123
120
|
|
121
|
+
@time
|
122
|
+
Scenario: Osciliating state, period of two minutes
|
123
|
+
Given check 'abc' for entity 'def' is in an ok state
|
124
|
+
When a failure event is received for check 'abc' on entity 'def'
|
125
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
126
|
+
When 50 seconds passes
|
127
|
+
And a failure event is received for check 'abc' on entity 'def'
|
128
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
129
|
+
When 10 seconds passes
|
130
|
+
And an ok event is received for check 'abc' on entity 'def'
|
131
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
132
|
+
When 50 seconds passes
|
133
|
+
And an ok event is received for check 'abc' on entity 'def'
|
134
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
135
|
+
When 10 seconds passes
|
136
|
+
And a failure event is received for check 'abc' on entity 'def'
|
137
|
+
Then a notification should not be generated for check 'abc' on entity 'def'
|
138
|
+
When 50 seconds passes
|
139
|
+
And a failure event is received for check 'abc' on entity 'def'
|
140
|
+
#And show me the notifications
|
141
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
142
|
+
When 10 seconds passes
|
143
|
+
And an ok event is received for check 'abc' on entity 'def'
|
144
|
+
Then a notification should be generated for check 'abc' on entity 'def'
|
145
|
+
|
124
146
|
Scenario: Acknowledgement when ok
|
125
147
|
Given check 'abc' for entity 'def' is in an ok state
|
126
148
|
When an acknowledgement event is received for check 'abc' on entity 'def'
|
@@ -248,6 +248,27 @@ module Flapjack
|
|
248
248
|
lan.to_i
|
249
249
|
end
|
250
250
|
|
251
|
+
def last_notifications_of_each_type
|
252
|
+
ln = {:problem => last_problem_notification,
|
253
|
+
:recovery => last_recovery_notification,
|
254
|
+
:acknowledgement => last_acknowledgement_notification }
|
255
|
+
puts "***** last_notifications_of_each_type for #{@key.inspect}: #{ln.inspect}"
|
256
|
+
ln
|
257
|
+
end
|
258
|
+
|
259
|
+
# unpredictable results if there are multiple notifications of different
|
260
|
+
# types sent at the same time
|
261
|
+
def last_notification
|
262
|
+
nils = { :type => nil, :timestamp => nil }
|
263
|
+
lne = last_notifications_of_each_type
|
264
|
+
ln = lne.delete_if {|type, timestamp|
|
265
|
+
timestamp.nil? || timestamp.to_i == 0
|
266
|
+
}
|
267
|
+
return nils unless ln.length > 0
|
268
|
+
lns = ln.sort_by { |type, timestamp| timestamp }.last
|
269
|
+
{ :type => lns[0], :timestamp => lns[1] }
|
270
|
+
end
|
271
|
+
|
251
272
|
def event_count_at(timestamp)
|
252
273
|
eca = @redis.get("#{@key}:#{timestamp}:count")
|
253
274
|
return unless (eca && eca =~ /^\d+$/)
|
@@ -9,7 +9,7 @@ module Flapjack
|
|
9
9
|
# * If the service event’s state is a failure, and the time since the ok => failure state change
|
10
10
|
# is below a threshold (e.g. 30 seconds), then don't alert
|
11
11
|
# * If the service event’s state is a failure, and the time since the last alert is below a
|
12
|
-
# threshold (5 minutes), then don’t alert
|
12
|
+
# threshold (5 minutes), and the last notification was not a recovery, then don’t alert
|
13
13
|
class Delays
|
14
14
|
include Base
|
15
15
|
|
@@ -25,20 +25,32 @@ module Flapjack
|
|
25
25
|
current_time = Time.now.to_i
|
26
26
|
|
27
27
|
if entity_check.failed?
|
28
|
-
last_problem_alert
|
29
|
-
last_change
|
28
|
+
last_problem_alert = entity_check.last_problem_notification
|
29
|
+
last_change = entity_check.last_change
|
30
|
+
last_notification = entity_check.last_notification
|
31
|
+
last_alert_type = last_notification[:type]
|
32
|
+
last_alert_timestamp = last_notification[:timestamp]
|
30
33
|
|
31
34
|
current_failure_duration = current_time - last_change
|
32
35
|
time_since_last_alert = current_time - last_problem_alert unless last_problem_alert.nil?
|
33
|
-
@log.debug("Filter: Delays: last_problem_alert: #{last_problem_alert.to_s},
|
36
|
+
@log.debug("Filter: Delays: last_problem_alert: #{last_problem_alert.to_s}, " +
|
37
|
+
"last_change: #{last_change.to_s}, " +
|
38
|
+
"current_failure_duration: #{current_failure_duration}, " +
|
39
|
+
"time_since_last_alert: #{time_since_last_alert.to_s}")
|
34
40
|
if (current_failure_duration < failure_delay)
|
35
41
|
result = true
|
36
|
-
@log.debug("Filter: Delays: blocking because duration of current failure
|
37
|
-
|
42
|
+
@log.debug("Filter: Delays: blocking because duration of current failure " +
|
43
|
+
"(#{current_failure_duration}) is less than failure_delay (#{failure_delay})")
|
44
|
+
elsif !last_problem_alert.nil? && (time_since_last_alert < resend_delay) &&
|
45
|
+
(last_alert_type !~ /recovery/i)
|
46
|
+
|
38
47
|
result = true
|
39
|
-
@log.debug("Filter: Delays: blocking because time since last alert for
|
48
|
+
@log.debug("Filter: Delays: blocking because time since last alert for " +
|
49
|
+
"current problem (#{time_since_last_alert}) is less than " +
|
50
|
+
"resend_delay (#{resend_delay}) and last alert type (#{last_alert_type}) was not a recovery")
|
40
51
|
else
|
41
|
-
@log.debug("Filter: Delays: not blocking because neither of the time comparison
|
52
|
+
@log.debug("Filter: Delays: not blocking because neither of the time comparison " +
|
53
|
+
"conditions were met")
|
42
54
|
end
|
43
55
|
else
|
44
56
|
@log.debug("Filter: Delays: entity_check.failed? returned false ...")
|
data/lib/flapjack/jabber.rb
CHANGED
@@ -32,9 +32,13 @@ module Flapjack
|
|
32
32
|
log.level = Logger::INFO
|
33
33
|
Blather.logger = log
|
34
34
|
|
35
|
+
def initialize
|
36
|
+
@buffer = []
|
37
|
+
@hostname = Socket.gethostname
|
38
|
+
end
|
39
|
+
|
35
40
|
def setup
|
36
41
|
@redis = build_redis_connection_pool
|
37
|
-
@hostname = Socket.gethostname
|
38
42
|
@flapjack_jid = Blather::JID.new((@config['jabberid'] || 'flapjack') + '/' + @hostname)
|
39
43
|
|
40
44
|
super(@flapjack_jid, @config['password'], @config['server'], @config['port'].to_i)
|
@@ -96,10 +100,13 @@ module Flapjack
|
|
96
100
|
say(room, "flapjack jabber gateway started at #{Time.now}, hello!", :groupchat)
|
97
101
|
end
|
98
102
|
end
|
103
|
+
return if @buffer.empty?
|
104
|
+
while stanza = @buffer.shift
|
105
|
+
@logger.debug("Sending a buffered jabber message to: #{stanza.to}, using: #{stanza.type}, message: #{stanza.body}")
|
106
|
+
end
|
99
107
|
end
|
100
108
|
|
101
109
|
def interpreter(command)
|
102
|
-
|
103
110
|
msg = nil
|
104
111
|
action = nil
|
105
112
|
entity_check = nil
|
@@ -219,8 +226,14 @@ module Flapjack
|
|
219
226
|
end
|
220
227
|
|
221
228
|
def say(to, msg, using = :chat)
|
222
|
-
|
223
|
-
|
229
|
+
stanza = Blather::Stanza::Message.new(to, msg, using)
|
230
|
+
if connected?
|
231
|
+
@logger.debug("Sending a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
|
232
|
+
write(stanza)
|
233
|
+
else
|
234
|
+
@logger.debug("Buffering a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
|
235
|
+
@buffer << stanza
|
236
|
+
end
|
224
237
|
end
|
225
238
|
|
226
239
|
def add_shutdown_event(opts = {})
|
data/lib/flapjack/version.rb
CHANGED
data/lib/flapjack/web.rb
CHANGED
@@ -62,6 +62,7 @@ module Flapjack
|
|
62
62
|
end
|
63
63
|
|
64
64
|
get '/check' do
|
65
|
+
begin
|
65
66
|
@entity = params[:entity]
|
66
67
|
@check = params[:check]
|
67
68
|
|
@@ -74,11 +75,7 @@ module Flapjack
|
|
74
75
|
@check_last_update = entity_check.last_update
|
75
76
|
@check_last_change = last_change
|
76
77
|
@check_summary = entity_check.summary
|
77
|
-
@last_notifications =
|
78
|
-
{:problem => entity_check.last_problem_notification,
|
79
|
-
:recovery => entity_check.last_recovery_notification,
|
80
|
-
:acknowledgement => entity_check.last_acknowledgement_notification
|
81
|
-
}
|
78
|
+
@last_notifications = entity_check.last_notifications_of_each_type
|
82
79
|
@in_scheduled_maintenance = entity_check.in_scheduled_maintenance?
|
83
80
|
@in_unscheduled_maintenance = entity_check.in_unscheduled_maintenance?
|
84
81
|
@scheduled_maintenances = entity_check.maintenances(nil, nil, :scheduled => true)
|
@@ -86,15 +83,20 @@ module Flapjack
|
|
86
83
|
entity_check.event_count_at(entity_check.last_change) : nil
|
87
84
|
|
88
85
|
haml :check
|
86
|
+
rescue Exception => e
|
87
|
+
puts e.message
|
88
|
+
puts e.backtrace.join("\n")
|
89
|
+
end
|
90
|
+
|
89
91
|
end
|
90
92
|
|
91
93
|
post '/acknowledgements/:entity/:check' do
|
92
|
-
@entity
|
93
|
-
@check
|
94
|
-
@summary
|
94
|
+
@entity = params[:entity]
|
95
|
+
@check = params[:check]
|
96
|
+
@summary = params[:summary]
|
95
97
|
@acknowledgement_id = params[:acknowledgement_id]
|
96
98
|
|
97
|
-
dur
|
99
|
+
dur = ChronicDuration.parse(params[:duration] || '')
|
98
100
|
@duration = (dur.nil? || (dur <= 0)) ? (4 * 60 * 60) : dur
|
99
101
|
|
100
102
|
entity_check = get_entity_check(@entity, @check)
|
@@ -102,6 +104,8 @@ module Flapjack
|
|
102
104
|
|
103
105
|
ack = entity_check.create_acknowledgement('summary' => (@summary || ''),
|
104
106
|
'acknowledgement_id' => @acknowledgement_id, 'duration' => @duration)
|
107
|
+
|
108
|
+
# FIXME: make this a flash message on the check page and delete the acknowledge page
|
105
109
|
@acknowledge_success = !!ack
|
106
110
|
[201, haml(:acknowledge)]
|
107
111
|
end
|
@@ -48,6 +48,7 @@ describe Flapjack::Jabber do
|
|
48
48
|
fj = Flapjack::Jabber.new
|
49
49
|
fj.bootstrap(:config => config)
|
50
50
|
|
51
|
+
fj.should_receive(:connected?).and_return(true)
|
51
52
|
fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Presence))
|
52
53
|
fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Message))
|
53
54
|
|
@@ -79,6 +80,7 @@ describe Flapjack::Jabber do
|
|
79
80
|
fj.bootstrap(:config => config)
|
80
81
|
fj.instance_variable_set('@redis_handler', redis)
|
81
82
|
|
83
|
+
fj.should_receive(:connected?).and_return(true)
|
82
84
|
fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Message))
|
83
85
|
|
84
86
|
fj.on_groupchat(stanza)
|
@@ -93,6 +95,7 @@ describe Flapjack::Jabber do
|
|
93
95
|
fj = Flapjack::Jabber.new
|
94
96
|
fj.bootstrap(:config => config)
|
95
97
|
|
98
|
+
fj.should_receive(:connected?).and_return(true)
|
96
99
|
fj.should_receive(:write).with(an_instance_of(Blather::Stanza::Message))
|
97
100
|
|
98
101
|
fj.on_groupchat(stanza)
|
@@ -132,9 +135,10 @@ describe Flapjack::Jabber do
|
|
132
135
|
|
133
136
|
fj = Flapjack::Jabber.new
|
134
137
|
fj.bootstrap(:config => config)
|
138
|
+
fj.should_receive(:register_handler).exactly(4).times
|
135
139
|
|
136
140
|
fj.should_receive(:connect)
|
137
|
-
fj.should_receive(:connected?).
|
141
|
+
fj.should_receive(:connected?).exactly(3).times.and_return(true)
|
138
142
|
fj.should_receive(:should_quit?).exactly(3).times.and_return(false, false, true)
|
139
143
|
redis.should_receive(:blpop).twice.and_return(
|
140
144
|
["jabber_notifications", %q{{"notification_type":"problem","event_id":"main-example.com:ping","state":"critical","summary":"!!!"}}],
|
@@ -92,13 +92,15 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
|
|
92
92
|
it "shows the state of a check for an entity" do
|
93
93
|
time = Time.now.to_i
|
94
94
|
|
95
|
+
last_notifications = {:problem => time - ((3 * 60 * 60) + (5 * 60)),
|
96
|
+
:recovery => time - (3 * 60 * 60),
|
97
|
+
:acknowledgement => nil }
|
98
|
+
|
95
99
|
entity_check.should_receive(:state).and_return('ok')
|
96
100
|
entity_check.should_receive(:last_update).and_return(time - (3 * 60 * 60))
|
97
101
|
entity_check.should_receive(:last_change).and_return(time - (3 * 60 * 60))
|
98
102
|
entity_check.should_receive(:summary).and_return('all good')
|
99
|
-
entity_check.should_receive(:
|
100
|
-
entity_check.should_receive(:last_recovery_notification).and_return(time - (3 * 60 * 60))
|
101
|
-
entity_check.should_receive(:last_acknowledgement_notification).and_return(nil)
|
103
|
+
entity_check.should_receive(:last_notifications_of_each_type).and_return(last_notifications)
|
102
104
|
entity_check.should_receive(:in_scheduled_maintenance?).and_return(false)
|
103
105
|
entity_check.should_receive(:in_unscheduled_maintenance?).and_return(false)
|
104
106
|
entity_check.should_receive(:maintenances).with(nil, nil, :scheduled => true).and_return([])
|
@@ -185,4 +187,4 @@ describe Flapjack::Web, :sinatra => true, :redis => true do
|
|
185
187
|
last_response.status.should == 302
|
186
188
|
end
|
187
189
|
|
188
|
-
end
|
190
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: flapjack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.25
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-09-
|
14
|
+
date: 2012-09-21 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: daemons
|