flapjack 0.6.32 → 0.6.33
Sign up to get free protection for your applications and to get access to all the features.
data/doc/DEBUGGING.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
Here's some useful ways of debugging various things in Flapjack.
|
2
|
+
|
3
|
+
Redis Connection Leaks
|
4
|
+
|
5
|
+
The following command reports every five seconds the number of connections to redis (as reported by lsof) and the totall number of EM external protocol connections currently in place:
|
6
|
+
|
7
|
+
production paths:
|
8
|
+
|
9
|
+
while true ; do
|
10
|
+
echo -n "EM connection count: "
|
11
|
+
tail -50000 /var/log/flapjack/flapjack.log | grep -i "connection count" \
|
12
|
+
| tail -1 | awk '{ print $5 }'
|
13
|
+
echo -n "lsof redis: "
|
14
|
+
sudo lsof -p `cat /var/run/flapjack/flapjack.pid` | grep :6379 | wc -l
|
15
|
+
sleep 5
|
16
|
+
done
|
17
|
+
|
18
|
+
development paths:
|
19
|
+
|
20
|
+
while true ; do
|
21
|
+
echo -n "EM connection count: "
|
22
|
+
tail -50000 log/flapjack.log | grep -i "connection count" \
|
23
|
+
| tail -1 | awk '{ print $5 }'
|
24
|
+
echo -n "lsof redis: "
|
25
|
+
sudo lsof -p `cat tmp/pids/flapjack.pid` | grep localhost:6379 | wc -l
|
26
|
+
sleep 5
|
27
|
+
done
|
28
|
+
|
@@ -27,23 +27,25 @@ module Flapjack
|
|
27
27
|
num_states = hist_states.size
|
28
28
|
|
29
29
|
hist_states.each_with_index do |obj, index|
|
30
|
-
|
30
|
+
ts = obj.delete(:timestamp)
|
31
|
+
if index == (num_states - 1)
|
32
|
+
# last (even if the only one)
|
33
|
+
obj[:start_time] = start_time ? [ts, start_time].max : ts
|
34
|
+
obj[:end_time] = end_time
|
35
|
+
elsif (index == 0)
|
31
36
|
# initial
|
32
|
-
ts = obj.delete(:timestamp)
|
33
37
|
obj[:start_time] = start_time ? [ts, start_time].max : ts
|
34
38
|
obj[:end_time] = hist_states[index + 1][:timestamp]
|
35
|
-
elsif index == (num_states - 1)
|
36
|
-
# last
|
37
|
-
obj[:start_time] = obj.delete(:timestamp)
|
38
|
-
obj[:end_time] = end_time
|
39
39
|
else
|
40
40
|
# except for first and last
|
41
|
-
obj[:start_time] =
|
41
|
+
obj[:start_time] = ts
|
42
42
|
obj[:end_time] = hist_states[index + 1][:timestamp]
|
43
43
|
end
|
44
44
|
obj[:duration] = obj[:end_time] ? (obj[:end_time] - obj[:start_time]) : nil
|
45
45
|
end
|
46
46
|
|
47
|
+
# p hist_states
|
48
|
+
|
47
49
|
hist_states.reject {|obj| obj[:state] == 'ok'}
|
48
50
|
end
|
49
51
|
|
@@ -107,16 +109,18 @@ module Flapjack
|
|
107
109
|
split_outs = []
|
108
110
|
|
109
111
|
outs.each { |o|
|
110
|
-
next unless o[:start_time] < sm[:start_time] &&
|
111
|
-
o[:end_time] > sm[:end_time]
|
112
|
+
next unless o[:end_time] && (o[:start_time] < sm[:start_time]) &&
|
113
|
+
(o[:end_time] > sm[:end_time])
|
112
114
|
o[:delete] = true
|
113
115
|
split_outs += [{:state => o[:state],
|
114
116
|
:start_time => o[:start_time],
|
115
117
|
:end_time => sm[:start_time],
|
118
|
+
:duration => sm[:start_time] - o[:start_time],
|
116
119
|
:summary => "#{o[:summary]} [split start]"},
|
117
120
|
{:state => o[:state],
|
118
121
|
:start_time => sm[:end_time],
|
119
122
|
:end_time => o[:end_time],
|
123
|
+
:duration => o[:end_time] - sm[:end_time],
|
120
124
|
:summary => "#{o[:summary]} [split finish]"}]
|
121
125
|
}
|
122
126
|
|
@@ -130,7 +134,7 @@ module Flapjack
|
|
130
134
|
sched_maintenances.each do |sm|
|
131
135
|
|
132
136
|
outs.each do |o|
|
133
|
-
next unless (sm[:start_time] < o[:end_time]) &&
|
137
|
+
next unless o[:end_time] && (sm[:start_time] < o[:end_time]) &&
|
134
138
|
(sm[:end_time] > o[:start_time])
|
135
139
|
|
136
140
|
if sm[:start_time] <= o[:start_time] &&
|
@@ -142,9 +146,11 @@ module Flapjack
|
|
142
146
|
elsif sm[:start_time] <= o[:start_time]
|
143
147
|
# partially overlapping on the earlier side
|
144
148
|
o[:start_time] = sm[:end_time]
|
149
|
+
o[:duration] = o[:end_time] - o[:start_time]
|
145
150
|
elsif sm[:end_time] >= o[:end_time]
|
146
151
|
# partially overlapping on the later side
|
147
152
|
o[:end_time] = sm[:start_time]
|
153
|
+
o[:duration] = o[:end_time] - o[:start_time]
|
148
154
|
end
|
149
155
|
end
|
150
156
|
|
@@ -152,7 +158,7 @@ module Flapjack
|
|
152
158
|
end
|
153
159
|
|
154
160
|
total_secs = outs.inject(total_secs) {|ret, o|
|
155
|
-
ret[o[:state]] +=
|
161
|
+
ret[o[:state]] += o[:duration] if o[:duration]
|
156
162
|
ret
|
157
163
|
}
|
158
164
|
|
@@ -14,7 +14,7 @@ module Flapjack
|
|
14
14
|
def perform(notification)
|
15
15
|
bootstrap
|
16
16
|
@logger.debug "Woo, got a notification to send out: #{notification.inspect}"
|
17
|
-
dispatch(notification, :logger => @logger, :redis => ::
|
17
|
+
dispatch(notification, :logger => @logger, :redis => ::Resque.redis)
|
18
18
|
end
|
19
19
|
|
20
20
|
end
|
data/lib/flapjack/version.rb
CHANGED
@@ -72,6 +72,19 @@ describe 'Flapjack::API::EntityCheck::Presenter' do
|
|
72
72
|
# TODO check the data in those hashes
|
73
73
|
end
|
74
74
|
|
75
|
+
it "returns a (small) outage hash for a single state change" do
|
76
|
+
entity_check.should_receive(:historical_states).
|
77
|
+
with(nil, nil).and_return([{:state => 'critical', :timestamp => time - (4 * 60 * 60)}])
|
78
|
+
entity_check.should_receive(:historical_state_before).
|
79
|
+
with(time - (4 * 60 * 60)).and_return(nil)
|
80
|
+
|
81
|
+
ecp = Flapjack::API::EntityCheckPresenter.new(entity_check)
|
82
|
+
outages = ecp.outages(nil, nil)
|
83
|
+
outages.should_not be_nil
|
84
|
+
outages.should be_an(Array)
|
85
|
+
outages.should have(1).time_range
|
86
|
+
end
|
87
|
+
|
75
88
|
it "a list of unscheduled maintenances for an entity check" do
|
76
89
|
entity_check.should_receive(:maintenances).
|
77
90
|
with(time - (12 * 60 * 60), time, :scheduled => false).and_return(maintenances)
|
@@ -120,7 +133,6 @@ describe 'Flapjack::API::EntityCheck::Presenter' do
|
|
120
133
|
ecp = Flapjack::API::EntityCheckPresenter.new(entity_check)
|
121
134
|
downtimes = ecp.downtime(time - (12 * 60 * 60), time)
|
122
135
|
|
123
|
-
|
124
136
|
# 22 minutes, 3 + 8 + 11
|
125
137
|
downtimes.should be_a(Hash)
|
126
138
|
downtimes[:total_seconds].should == {'critical' => (22 * 60),
|
@@ -156,4 +168,30 @@ describe 'Flapjack::API::EntityCheck::Presenter' do
|
|
156
168
|
downtimes[:downtime].should have(4).time_ranges
|
157
169
|
end
|
158
170
|
|
171
|
+
it "returns downtime and handles an unfinished problem state" do
|
172
|
+
current = [{:state => 'critical', :timestamp => time - (4 * 60 * 60)},
|
173
|
+
{:state => 'ok', :timestamp => time - (4 * 60 * 60) + (5 * 60)},
|
174
|
+
{:state => 'critical', :timestamp => time - (3 * 60 * 60)}]
|
175
|
+
|
176
|
+
entity_check.should_receive(:historical_states).
|
177
|
+
with(nil, nil).and_return(current)
|
178
|
+
|
179
|
+
entity_check.should_receive(:historical_state_before).
|
180
|
+
with(time - (4 * 60 * 60)).and_return(nil)
|
181
|
+
|
182
|
+
entity_check.should_receive(:maintenances).
|
183
|
+
with(nil, nil, :scheduled => true).and_return([])
|
184
|
+
|
185
|
+
ecp = Flapjack::API::EntityCheckPresenter.new(entity_check)
|
186
|
+
downtimes = ecp.downtime(nil, nil)
|
187
|
+
|
188
|
+
downtimes.should be_a(Hash)
|
189
|
+
downtimes[:total_seconds].should == {'critical' => (5 * 60)}
|
190
|
+
downtimes[:percentages].should == {'critical' => nil}
|
191
|
+
downtimes[:downtime].should be_an(Array)
|
192
|
+
# the last outage gets split by the intervening maintenance period,
|
193
|
+
# but the fully covered one gets removed.
|
194
|
+
downtimes[:downtime].should have(2).time_ranges
|
195
|
+
end
|
196
|
+
|
159
197
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: flapjack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.33
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-09-
|
14
|
+
date: 2012-09-25 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: daemons
|
@@ -382,6 +382,7 @@ files:
|
|
382
382
|
- dist/puppet/ruby/manifests/rubygems.pp
|
383
383
|
- dist/puppet/sqlite3/manifests/dev.pp
|
384
384
|
- doc/CONFIGURING.md
|
385
|
+
- doc/DEBUGGING.md
|
385
386
|
- doc/DEVELOPING.md
|
386
387
|
- doc/GLOSSARY.md
|
387
388
|
- doc/INSTALL.md
|