flapjack 0.7.34 → 0.7.35
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +7 -0
- data/Gemfile +1 -0
- data/bin/flapper +22 -11
- data/lib/flapjack/data/alert.rb +4 -2
- data/lib/flapjack/data/entity.rb +10 -0
- data/lib/flapjack/data/entity_check.rb +74 -0
- data/lib/flapjack/gateways/api.rb +46 -42
- data/lib/flapjack/gateways/web.rb +11 -10
- data/lib/flapjack/gateways/web/public/img/flapjack-favicon-64-32-24-16.ico +0 -0
- data/lib/flapjack/gateways/web/views/contact.html.erb +27 -27
- data/lib/flapjack/gateways/web/views/self_stats.html.erb +10 -0
- data/lib/flapjack/logger.rb +6 -0
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/gateways/api/contact_methods_spec.rb +55 -58
- data/spec/lib/flapjack/gateways/api/entity_methods_spec.rb +56 -61
- data/spec/lib/flapjack/gateways/web_spec.rb +13 -14
- data/spec/spec_helper.rb +9 -0
- metadata +4 -7
- data/lib/flapjack/checks/http_content +0 -15
- data/lib/flapjack/checks/ping +0 -10
- data/lib/flapjack/gateways/web/public/img/flapjack-favicon-32-16.ico +0 -0
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,12 @@
|
|
1
1
|
## Flapjack Changelog
|
2
2
|
|
3
|
+
# 0.7.35 - 2013-12-10
|
4
|
+
- Feature: allow flapper to flap with an arbitrary interval gh-383 (@jessereynolds)
|
5
|
+
- Feature: Expose statistics for currency of all checks gh-386 (@jessereynolds)
|
6
|
+
- Chore: WebUI: contacts page - move notification rules section higher gh-376 (@jessereynolds)
|
7
|
+
- Bug: 500 errors in api are not logged by default gh-379 (@ali-graham)
|
8
|
+
- Bug: Exception generating notifications when state_duration is nil gh-372 (@jessereynolds)
|
9
|
+
|
3
10
|
# 0.7.34 - 2013-11-20
|
4
11
|
- Feature: update logoage (@jessereynolds)
|
5
12
|
- Bug: flapjack-nagios-receiver to start daemonized from init script (@jessereynolds)
|
data/Gemfile
CHANGED
data/bin/flapper
CHANGED
@@ -25,13 +25,14 @@ ensure
|
|
25
25
|
Socket.do_not_reverse_lookup = orig
|
26
26
|
end
|
27
27
|
|
28
|
-
def main(bind_ip)
|
29
|
-
|
30
|
-
|
31
|
-
|
28
|
+
def main(bind_ip, bind_port, frequency)
|
29
|
+
raise "bind_port must be an integer" unless bind_port.is_a?(Integer)
|
30
|
+
start_every = frequency
|
31
|
+
stop_after = frequency.to_f / 2
|
32
|
+
|
32
33
|
EM.run {
|
33
34
|
|
34
|
-
puts "#{Time.now}: starting server"
|
35
|
+
puts "#{Time.now}: starting server on #{bind_ip}:#{bind_port}"
|
35
36
|
server_init = EM.start_server bind_ip, bind_port, Flapper
|
36
37
|
EM.add_timer(stop_after) do
|
37
38
|
puts "#{Time.now}: stopping server"
|
@@ -39,7 +40,7 @@ def main(bind_ip)
|
|
39
40
|
end
|
40
41
|
|
41
42
|
EM.add_periodic_timer(start_every) do
|
42
|
-
puts "#{Time.now}: starting server"
|
43
|
+
puts "#{Time.now}: starting server on #{bind_ip}:#{bind_port}"
|
43
44
|
server = EM.start_server bind_ip, bind_port, Flapper
|
44
45
|
EM.add_timer(stop_after) do
|
45
46
|
puts "#{Time.now}: stopping server"
|
@@ -82,12 +83,22 @@ OptionParser.new do |opts|
|
|
82
83
|
options.bind_ip = b
|
83
84
|
end
|
84
85
|
|
86
|
+
opts.on("-P", "--bind-port [PORT]", String, "PORT for flapper to bind to") do |p|
|
87
|
+
options.bind_port = p.to_i
|
88
|
+
end
|
89
|
+
|
90
|
+
opts.on("-f", "--frequency [SECONDS]", String, "oscillate with a frequency of SECONDS [120]") do |f|
|
91
|
+
options.frequency = f.to_f
|
92
|
+
end
|
93
|
+
|
85
94
|
end.parse!(ARGV)
|
86
95
|
|
87
|
-
pidfile = options.pidfile || "/var/run/flapjack/#{exe}.pid"
|
88
|
-
logfile = options.log_path || "/var/log/flapjack/#{exe}.log"
|
89
96
|
daemonize = options.daemonize.nil? ? true : options.daemonize
|
90
|
-
|
97
|
+
pidfile = options.pidfile || "/var/run/flapjack/#{exe}.pid"
|
98
|
+
logfile = options.log_path || "/var/log/flapjack/#{exe}.log"
|
99
|
+
bind_ip = options.bind_ip || local_ip
|
100
|
+
bind_port = options.bind_port || 12345
|
101
|
+
frequency = options.frequency || 120.0
|
91
102
|
|
92
103
|
runner = Dante::Runner.new(exe, :pid_path => pidfile, :log_path => logfile)
|
93
104
|
|
@@ -99,7 +110,7 @@ when "start"
|
|
99
110
|
else
|
100
111
|
print "#{exe} starting..."
|
101
112
|
runner.execute(:daemonize => daemonize) {
|
102
|
-
main(bind_ip)
|
113
|
+
main(bind_ip, bind_port, frequency)
|
103
114
|
}
|
104
115
|
puts " done."
|
105
116
|
end
|
@@ -117,7 +128,7 @@ when "stop"
|
|
117
128
|
when "restart"
|
118
129
|
print "#{exe} restarting..."
|
119
130
|
runner.execute(:daemonize => true, :restart => true) {
|
120
|
-
main(bind_ip)
|
131
|
+
main(bind_ip, bind_port, frequency)
|
121
132
|
}
|
122
133
|
puts " done."
|
123
134
|
|
data/lib/flapjack/data/alert.rb
CHANGED
@@ -84,8 +84,10 @@ module Flapjack
|
|
84
84
|
raise "state #{@state.inspect} is invalid" unless
|
85
85
|
allowed_states.include?(@state)
|
86
86
|
|
87
|
-
|
88
|
-
|
87
|
+
if @state_duration
|
88
|
+
raise "state_duration (#{@state_duration.inspect}) is invalid" unless
|
89
|
+
@state_duration.is_a?(Integer) && @state_duration >= 0
|
90
|
+
end
|
89
91
|
|
90
92
|
if @rollup_alerts
|
91
93
|
raise "rollup_alerts should be nil or a hash" unless @rollup_alerts.is_a?(Hash)
|
data/lib/flapjack/data/entity.rb
CHANGED
@@ -116,6 +116,16 @@ module Flapjack
|
|
116
116
|
Flapjack::Data::EntityCheck.find_all_failing_by_entity(:redis => redis).keys
|
117
117
|
end
|
118
118
|
|
119
|
+
def self.find_all_current(options)
|
120
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
121
|
+
redis.zrange('current_entities', 0, -1)
|
122
|
+
end
|
123
|
+
|
124
|
+
def self.find_all_current_with_last_update(options)
|
125
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
126
|
+
redis.zrange('current_entities', 0, -1, {:withscores => true})
|
127
|
+
end
|
128
|
+
|
119
129
|
def contacts
|
120
130
|
contact_ids = @redis.smembers("contacts_for:#{id}")
|
121
131
|
|
@@ -130,6 +130,80 @@ module Flapjack
|
|
130
130
|
redis.hget("check:#{event_id}", 'state')
|
131
131
|
end
|
132
132
|
|
133
|
+
# takes an array of ages (in seconds) to split all checks up by
|
134
|
+
# - age means how long since the last update
|
135
|
+
# - 0 age is implied if not explicitly passed
|
136
|
+
# returns arrays of all current checks hashed by age range upper bound, eg:
|
137
|
+
#
|
138
|
+
# EntityCheck.find_all_split_by_freshness([60, 300], opts) =>
|
139
|
+
# { 0 => [ 'foo-app-01:SSH' ],
|
140
|
+
# 60 => [ 'foo-app-01:Ping', 'foo-app-01:Disk / Utilisation' ],
|
141
|
+
# 300 => [] }
|
142
|
+
#
|
143
|
+
# you can also set :counts to true in options and you'll just get the counts, eg:
|
144
|
+
#
|
145
|
+
# EntityCheck.find_all_split_by_freshness([60, 300], opts.merge(:counts => true)) =>
|
146
|
+
# { 0 => 1,
|
147
|
+
# 60 => 3,
|
148
|
+
# 300 => 0 }
|
149
|
+
#
|
150
|
+
# and you can get the last update time with each check too by passing :with_times => true eg:
|
151
|
+
#
|
152
|
+
# EntityCheck.find_all_split_by_freshness([60, 300], opts.merge(:with_times => true)) =>
|
153
|
+
# { 0 => [ ['foo-app-01:SSH', 1382329923.0] ],
|
154
|
+
# 60 => [ ['foo-app-01:Ping', 1382329922.0], ['foo-app-01:Disk / Utilisation', 1382329921.0] ],
|
155
|
+
# 300 => [] }
|
156
|
+
#
|
157
|
+
def self.find_all_split_by_freshness(ages, options)
|
158
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
159
|
+
|
160
|
+
raise "ages does not respond_to? :each and :each_with_index" unless ages.respond_to?(:each) && ages.respond_to?(:each_with_index)
|
161
|
+
raise "age values must respond_to? :to_i" unless ages.all? {|age| age.respond_to?(:to_i) }
|
162
|
+
|
163
|
+
ages << 0
|
164
|
+
ages = ages.sort.uniq
|
165
|
+
|
166
|
+
start_time = Time.now
|
167
|
+
|
168
|
+
checks = []
|
169
|
+
# get all the current checks, with last update time
|
170
|
+
Flapjack::Data::Entity.find_all_current(:redis => redis).each do |entity|
|
171
|
+
redis.zrange("current_checks:#{entity}", 0, -1, {:withscores => true}).each do |check|
|
172
|
+
check[0] = "#{entity}:#{check[0]}"
|
173
|
+
checks << check
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
skeleton = ages.inject({}) {|memo, age| memo[age] = [] ; memo }
|
178
|
+
age_ranges = ages.reverse.each_cons(2)
|
179
|
+
results_with_times = checks.inject(skeleton) do |memo, check|
|
180
|
+
check_age = start_time.to_i - check[1]
|
181
|
+
check_age = 0 unless check_age > 0
|
182
|
+
if check_age >= ages.last
|
183
|
+
memo[ages.last] << check
|
184
|
+
else
|
185
|
+
age_range = age_ranges.detect {|a, b| check_age < a && check_age >= b }
|
186
|
+
memo[age_range.last] << check unless age_range.nil?
|
187
|
+
end
|
188
|
+
memo
|
189
|
+
end
|
190
|
+
|
191
|
+
case
|
192
|
+
when options[:with_times]
|
193
|
+
results_with_times
|
194
|
+
when options[:counts]
|
195
|
+
results_with_times.inject({}) do |memo, (age, checks)|
|
196
|
+
memo[age] = checks.length
|
197
|
+
memo
|
198
|
+
end
|
199
|
+
else
|
200
|
+
results_with_times.inject({}) do |memo, (age, checks)|
|
201
|
+
memo[age] = checks.map { |check| check[0] }
|
202
|
+
memo
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
133
207
|
def entity_name
|
134
208
|
entity.name
|
135
209
|
end
|
@@ -29,16 +29,42 @@ module Flapjack
|
|
29
29
|
|
30
30
|
set :show_exceptions, false
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
32
|
+
rescue_exception = Proc.new { |env, exception|
|
33
|
+
|
34
|
+
error = proc {|status, exception, *msg|
|
35
|
+
if !msg || msg.empty?
|
36
|
+
trace = exception.backtrace.join("\n")
|
37
|
+
msg = "#{exception.class} - #{exception.message}"
|
38
|
+
msg_str = "#{msg}\n#{trace}"
|
39
|
+
else
|
40
|
+
msg_str = msg.join(", ")
|
41
|
+
end
|
42
|
+
logger = Flapjack::Gateways::API.instance_variable_get('@logger')
|
43
|
+
case
|
44
|
+
when status < 500
|
45
|
+
logger.warn "Error: #{msg_str}"
|
46
|
+
else
|
47
|
+
logger.error "Error: #{msg_str}"
|
48
|
+
end
|
49
|
+
[status, {}, {:errors => msg}.to_json]
|
50
|
+
}
|
51
|
+
|
52
|
+
e = env['sinatra.error']
|
53
|
+
|
54
|
+
case exception
|
55
|
+
when Flapjack::Gateways::API::ContactNotFound
|
56
|
+
error.call(403, e, "could not find contact '#{e.contact_id}'")
|
57
|
+
when Flapjack::Gateways::API::NotificationRuleNotFound
|
58
|
+
error.call(403, e, "could not find notification rule '#{e.rule_id}'")
|
59
|
+
when Flapjack::Gateways::API::EntityNotFound
|
60
|
+
error.call(403, e, "could not find entity '#{e.entity}'")
|
61
|
+
when Flapjack::Gateways::API::EntityCheckNotFound
|
62
|
+
error.call(403, e, "could not find entity check '#{e.check}'")
|
63
|
+
else
|
64
|
+
error.call(500, exception)
|
65
|
+
end
|
66
|
+
}
|
67
|
+
use Rack::FiberPool, :size => 25, :rescue_exception => rescue_exception
|
42
68
|
|
43
69
|
use Rack::MethodOverride
|
44
70
|
use Rack::JsonParamsParser
|
@@ -65,11 +91,16 @@ module Flapjack
|
|
65
91
|
end
|
66
92
|
|
67
93
|
before do
|
68
|
-
input =
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
94
|
+
input = nil
|
95
|
+
if logger.debug?
|
96
|
+
input = env['rack.input'].read
|
97
|
+
logger.debug("#{request.request_method} #{request.path_info}#{request.query_string} #{input}")
|
98
|
+
elsif logger.info?
|
99
|
+
input = env['rack.input'].read
|
100
|
+
input_short = input.gsub(/\n/, '').gsub(/\s+/, ' ')
|
101
|
+
logger.info("#{request.request_method} #{request.path_info}#{request.query_string} #{input_short[0..80]}")
|
102
|
+
end
|
103
|
+
env['rack.input'].rewind unless input.nil?
|
73
104
|
end
|
74
105
|
|
75
106
|
after do
|
@@ -81,35 +112,9 @@ module Flapjack
|
|
81
112
|
register Flapjack::Gateways::API::ContactMethods
|
82
113
|
|
83
114
|
not_found do
|
84
|
-
logger.debug("in not_found :-(")
|
85
115
|
err(404, "not routable")
|
86
116
|
end
|
87
117
|
|
88
|
-
error Flapjack::Gateways::API::ContactNotFound do
|
89
|
-
e = env['sinatra.error']
|
90
|
-
err(403, "could not find contact '#{e.contact_id}'")
|
91
|
-
end
|
92
|
-
|
93
|
-
error Flapjack::Gateways::API::NotificationRuleNotFound do
|
94
|
-
e = env['sinatra.error']
|
95
|
-
err(403, "could not find notification rule '#{e.rule_id}'")
|
96
|
-
end
|
97
|
-
|
98
|
-
error Flapjack::Gateways::API::EntityNotFound do
|
99
|
-
e = env['sinatra.error']
|
100
|
-
err(403, "could not find entity '#{e.entity}'")
|
101
|
-
end
|
102
|
-
|
103
|
-
error Flapjack::Gateways::API::EntityCheckNotFound do
|
104
|
-
e = env['sinatra.error']
|
105
|
-
err(403, "could not find entity check '#{e.check}'")
|
106
|
-
end
|
107
|
-
|
108
|
-
error do
|
109
|
-
e = env['sinatra.error']
|
110
|
-
err(response.status, "#{e.class} - #{e.message}")
|
111
|
-
end
|
112
|
-
|
113
118
|
private
|
114
119
|
|
115
120
|
def err(status, *msg)
|
@@ -117,7 +122,6 @@ module Flapjack
|
|
117
122
|
logger.info "Error: #{msg_str}"
|
118
123
|
[status, {}, {:errors => msg}.to_json]
|
119
124
|
end
|
120
|
-
|
121
125
|
end
|
122
126
|
|
123
127
|
end
|
@@ -132,11 +132,11 @@ module Flapjack
|
|
132
132
|
entity_stats
|
133
133
|
check_stats
|
134
134
|
{
|
135
|
-
'events_queued'
|
136
|
-
'all_entities'
|
137
|
-
'failing_entities'
|
138
|
-
'all_checks'
|
139
|
-
'failing_checks'
|
135
|
+
'events_queued' => @events_queued,
|
136
|
+
'all_entities' => @count_all_entities,
|
137
|
+
'failing_entities' => @count_failing_entities,
|
138
|
+
'all_checks' => @count_all_checks,
|
139
|
+
'failing_checks' => @count_failing_checks,
|
140
140
|
'processed_events' => {
|
141
141
|
'all_time' => {
|
142
142
|
'total' => @event_counters['all'].to_i,
|
@@ -145,10 +145,11 @@ module Flapjack
|
|
145
145
|
'action' => @event_counters['action'].to_i,
|
146
146
|
}
|
147
147
|
},
|
148
|
-
'
|
149
|
-
'
|
150
|
-
'
|
151
|
-
'
|
148
|
+
'check_freshness' => @current_checks_ages,
|
149
|
+
'total_keys' => @dbsize,
|
150
|
+
'uptime' => @uptime_string,
|
151
|
+
'boottime' => @boot_time,
|
152
|
+
'current_time' => Time.now,
|
152
153
|
'executive_instances' => @executive_instances,
|
153
154
|
}.to_json
|
154
155
|
end
|
@@ -283,7 +284,6 @@ module Flapjack
|
|
283
284
|
end
|
284
285
|
|
285
286
|
get '/contacts' do
|
286
|
-
#self_stats
|
287
287
|
@contacts = Flapjack::Data::Contact.all(:redis => redis)
|
288
288
|
|
289
289
|
erb 'contacts.html'.to_sym
|
@@ -394,6 +394,7 @@ module Flapjack
|
|
394
394
|
end
|
395
395
|
@event_counters = redis.hgetall('event_counters')
|
396
396
|
@events_queued = redis.llen('events')
|
397
|
+
@current_checks_ages = Flapjack::Data::EntityCheck.find_all_split_by_freshness([0, 60, 300, 900, 3600], {:redis => redis, :counts => true } )
|
397
398
|
end
|
398
399
|
|
399
400
|
def entity_stats
|
Binary file
|
@@ -106,32 +106,6 @@
|
|
106
106
|
</table>
|
107
107
|
<% end %>
|
108
108
|
|
109
|
-
<h3>All Entities and Checks</h3>
|
110
|
-
<% if !@entities_and_checks || @entities_and_checks.empty? %>
|
111
|
-
<p>No entities</p>
|
112
|
-
<% else %>
|
113
|
-
<table class="table table-bordered table-hover table-condensed">
|
114
|
-
<tr>
|
115
|
-
<th>Entity</th>
|
116
|
-
<th>Checks</th>
|
117
|
-
</tr>
|
118
|
-
<% @entities_and_checks.each do |ec| %>
|
119
|
-
<%
|
120
|
-
entity = ec[:entity]
|
121
|
-
checks = ec[:checks]
|
122
|
-
%>
|
123
|
-
<tr>
|
124
|
-
<td><a href="/entity/<%= u(entity.name) %>" title="entity status"><%= h entity.name %></a></td>
|
125
|
-
<td>
|
126
|
-
<% checks.each do |check| %>
|
127
|
-
<%= "<a href=\"/check?entity=#{u(entity.name)}&check=#{u(check)}\" title=\"check status\">#{ h check }</a>" %>
|
128
|
-
<% end %>
|
129
|
-
</td>
|
130
|
-
</tr>
|
131
|
-
<% end %>
|
132
|
-
</table>
|
133
|
-
<% end %>
|
134
|
-
|
135
109
|
<h3>Notification Rules</h3>
|
136
110
|
<% rules = @contact.notification_rules %>
|
137
111
|
<% if !rules || rules.empty? %>
|
@@ -161,7 +135,33 @@
|
|
161
135
|
<td><%= h(blackholes.join(', ')) %></td>
|
162
136
|
</tr>
|
163
137
|
<% end %>
|
164
|
-
|
138
|
+
</table>
|
139
|
+
<% end %>
|
140
|
+
|
141
|
+
<h3>All Entities and Checks</h3>
|
142
|
+
<% if !@entities_and_checks || @entities_and_checks.empty? %>
|
143
|
+
<p>No entities</p>
|
144
|
+
<% else %>
|
145
|
+
<table class="table table-bordered table-hover table-condensed">
|
146
|
+
<tr>
|
147
|
+
<th>Entity</th>
|
148
|
+
<th>Checks</th>
|
149
|
+
</tr>
|
150
|
+
<% @entities_and_checks.each do |ec| %>
|
151
|
+
<%
|
152
|
+
entity = ec[:entity]
|
153
|
+
checks = ec[:checks]
|
154
|
+
%>
|
155
|
+
<tr>
|
156
|
+
<td><a href="/entity/<%= u(entity.name) %>" title="entity status"><%= h entity.name %></a></td>
|
157
|
+
<td>
|
158
|
+
<% checks.each do |check| %>
|
159
|
+
<%= "<a href=\"/check?entity=#{u(entity.name)}&check=#{u(check)}\" title=\"check status\">#{ h check }</a>" %>
|
160
|
+
<% end %>
|
161
|
+
</td>
|
162
|
+
</tr>
|
163
|
+
<% end %>
|
164
|
+
</table>
|
165
165
|
<% end %>
|
166
166
|
|
167
167
|
</div>
|
@@ -50,6 +50,16 @@
|
|
50
50
|
</ul>
|
51
51
|
</td>
|
52
52
|
</tr>
|
53
|
+
<tr>
|
54
|
+
<td>Check Freshness</td>
|
55
|
+
<td>
|
56
|
+
<ul>
|
57
|
+
<% @current_checks_ages.each_pair do |age, check_count| %>
|
58
|
+
<li>>= <%= age %>: <%= check_count %></li>
|
59
|
+
<% end %>
|
60
|
+
</ul>
|
61
|
+
</td>
|
62
|
+
</tr>
|
53
63
|
<tr>
|
54
64
|
<td>Total keys in redis</td>
|
55
65
|
<td><%= h @dbsize %></td>
|