flapjack 0.6.38 → 0.6.39
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitmodules +3 -0
- data/.travis.yml +2 -0
- data/README.md +15 -186
- data/Rakefile +11 -7
- data/bin/flapjack +9 -17
- data/bin/flapjack-nagios-receiver +19 -32
- data/bin/flapjack-populator +7 -2
- data/etc/flapjack_config.yaml.example +20 -0
- data/flapjack.gemspec +1 -3
- data/lib/flapjack/api.rb +1 -5
- data/lib/flapjack/coordinator.rb +24 -28
- data/lib/flapjack/data/contact.rb +12 -10
- data/lib/flapjack/data/entity.rb +2 -2
- data/lib/flapjack/data/entity_check.rb +0 -9
- data/lib/flapjack/data/global.rb +25 -0
- data/lib/flapjack/executive.rb +1 -0
- data/lib/flapjack/jabber.rb +11 -19
- data/lib/flapjack/oobetet.rb +34 -32
- data/lib/flapjack/pagerduty.rb +140 -140
- data/lib/flapjack/redis_pool.rb +1 -1
- data/lib/flapjack/version.rb +1 -1
- data/lib/flapjack/web/views/check.haml +1 -1
- data/spec/lib/flapjack/coordinator_spec.rb +162 -5
- data/spec/lib/flapjack/data/contact_spec.rb +34 -2
- data/spec/lib/flapjack/data/entity_spec.rb +25 -11
- data/spec/lib/flapjack/jabber_spec.rb +2 -3
- data/spec/lib/flapjack/oobetet_spec.rb +140 -0
- data/spec/lib/flapjack/pagerduty_spec.rb +177 -0
- data/spec/lib/flapjack/redis_pool_spec.rb +24 -0
- data/spec/spec_helper.rb +3 -0
- metadata +10 -40
- data/doc/CONFIGURING.md +0 -38
- data/doc/DEBUGGING.md +0 -28
- data/doc/DEVELOPING.md +0 -35
- data/doc/GLOSSARY.md +0 -19
- data/doc/INSTALL.md +0 -64
- data/doc/PACKAGING.md +0 -25
data/flapjack.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |gem|
|
|
10
10
|
|
11
11
|
# see http://yehudakatz.com/2010/12/16/clarifying-the-roles-of-the-gemspec-and-gemfile/
|
12
12
|
# following a middle road here, not shipping it with the gem :)
|
13
|
-
gem.files = `git ls-files`.split($\) - ['Gemfile.lock'
|
13
|
+
gem.files = `git ls-files`.split($\) - ['Gemfile.lock']
|
14
14
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
15
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
16
16
|
gem.name = "flapjack"
|
@@ -34,8 +34,6 @@ Gem::Specification.new do |gem|
|
|
34
34
|
gem.add_dependency 'blather'
|
35
35
|
gem.add_dependency 'chronic'
|
36
36
|
gem.add_dependency 'chronic_duration'
|
37
|
-
gem.add_dependency 'httparty'
|
38
37
|
|
39
38
|
gem.add_development_dependency 'rake'
|
40
|
-
gem.add_development_dependency 'colorize'
|
41
39
|
end
|
data/lib/flapjack/api.rb
CHANGED
@@ -283,7 +283,6 @@ module Flapjack
|
|
283
283
|
end
|
284
284
|
|
285
285
|
post '/contacts' do
|
286
|
-
begin
|
287
286
|
pass unless 'application/json'.eql?(request.content_type)
|
288
287
|
content_type :json
|
289
288
|
|
@@ -292,7 +291,7 @@ module Flapjack
|
|
292
291
|
|
293
292
|
contacts = params[:contacts]
|
294
293
|
if contacts && contacts.is_a?(Enumerable) && contacts.any? {|c| !c['id'].nil?}
|
295
|
-
Flapjack::Data::Contact.delete_all
|
294
|
+
Flapjack::Data::Contact.delete_all(:redis => @@redis)
|
296
295
|
contacts.each do |contact|
|
297
296
|
unless contact['id']
|
298
297
|
logger.warn "Contact not imported as it has no id: #{contact.inspect}"
|
@@ -306,9 +305,6 @@ module Flapjack
|
|
306
305
|
errors << "No valid contacts were submitted"
|
307
306
|
end
|
308
307
|
errors.empty? ? ret : [ret, {}, {:errors => [errors]}.to_json]
|
309
|
-
rescue Exception => e
|
310
|
-
puts e.message
|
311
|
-
end
|
312
308
|
end
|
313
309
|
|
314
310
|
not_found do
|
data/lib/flapjack/coordinator.rb
CHANGED
@@ -40,8 +40,7 @@ module Flapjack
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def start(options = {})
|
43
|
-
|
44
|
-
|
43
|
+
@signals = options[:signals]
|
45
44
|
if options[:daemonize]
|
46
45
|
daemonize
|
47
46
|
else
|
@@ -75,7 +74,6 @@ module Flapjack
|
|
75
74
|
end
|
76
75
|
|
77
76
|
EM.synchrony do
|
78
|
-
|
79
77
|
@logger.debug "config keys: #{@config.keys}"
|
80
78
|
|
81
79
|
pikelet_keys = ['executive', 'jabber_gateway', 'pagerduty_gateway',
|
@@ -99,7 +97,7 @@ module Flapjack
|
|
99
97
|
end
|
100
98
|
end
|
101
99
|
|
102
|
-
setup_signals
|
100
|
+
setup_signals if @signals
|
103
101
|
end
|
104
102
|
|
105
103
|
end
|
@@ -126,10 +124,9 @@ module Flapjack
|
|
126
124
|
end
|
127
125
|
return unless pikelet_class
|
128
126
|
|
127
|
+
pikelet = pikelet_class.new
|
129
128
|
f = Fiber.new {
|
130
129
|
begin
|
131
|
-
pikelet = pikelet_class.new
|
132
|
-
@pikelets.detect {|p| p[:type] == pikelet_type}[:instance] = pikelet
|
133
130
|
pikelet.bootstrap(:redis => @redis_options, :config => pikelet_cfg)
|
134
131
|
pikelet.main
|
135
132
|
rescue Exception => e
|
@@ -138,7 +135,7 @@ module Flapjack
|
|
138
135
|
stop
|
139
136
|
end
|
140
137
|
}
|
141
|
-
@pikelets << {:fiber => f, :type => pikelet_type}
|
138
|
+
@pikelets << {:fiber => f, :type => pikelet_type, :instance => pikelet}
|
142
139
|
f.resume
|
143
140
|
@logger.debug "new fiber created for #{pikelet_type}"
|
144
141
|
end
|
@@ -159,13 +156,13 @@ module Flapjack
|
|
159
156
|
|
160
157
|
port = 3001 if (port.nil? || port <= 0 || port > 65535)
|
161
158
|
|
162
|
-
|
163
|
-
|
159
|
+
pikelet_class.class_variable_set('@@redis',
|
160
|
+
Flapjack::RedisPool.new(:config => @redis_options))
|
164
161
|
|
165
162
|
Thin::Logging.silent = true
|
166
163
|
|
167
164
|
pikelet = Thin::Server.new('0.0.0.0', port, pikelet_class, :signals => false)
|
168
|
-
@pikelets << {:instance => pikelet, :type => pikelet_type
|
165
|
+
@pikelets << {:instance => pikelet, :type => pikelet_type}
|
169
166
|
pikelet.start
|
170
167
|
@logger.debug "new thin server instance started for #{pikelet_type}"
|
171
168
|
end
|
@@ -181,10 +178,11 @@ module Flapjack
|
|
181
178
|
|
182
179
|
# set up connection pooling, stop resque errors (ensure that it's only
|
183
180
|
# done once)
|
184
|
-
|
181
|
+
@resque_pool = nil
|
185
182
|
if (['email_notifier', 'sms_notifier'] & @pikelets.collect {|p| p[:type]}).empty?
|
186
183
|
pool = Flapjack::RedisPool.new(:config => @redis_options)
|
187
184
|
::Resque.redis = pool
|
185
|
+
@resque_pool = pool
|
188
186
|
## NB: can override the default 'resque' namespace like this
|
189
187
|
#::Resque.redis.namespace = 'flapjack'
|
190
188
|
end
|
@@ -210,14 +208,14 @@ module Flapjack
|
|
210
208
|
|
211
209
|
pikelet_class.class_variable_set('@@config', pikelet_cfg)
|
212
210
|
|
211
|
+
# TODO error if pikelet_cfg['queue'].nil?
|
212
|
+
pikelet = EM::Resque::Worker.new(pikelet_cfg['queue'])
|
213
|
+
# # Use these to debug the resque workers
|
214
|
+
# pikelet.verbose = true
|
215
|
+
# pikelet.very_verbose = true
|
216
|
+
|
213
217
|
f = Fiber.new {
|
214
218
|
begin
|
215
|
-
# TODO error if pikelet_cfg['queue'].nil?
|
216
|
-
pikelet = EM::Resque::Worker.new(pikelet_cfg['queue'])
|
217
|
-
@pikelets.detect {|p| p[:type] == pikelet_type}[:instance] = pikelet
|
218
|
-
# # Use these to debug the resque workers
|
219
|
-
# flapjack_rsq.verbose = true
|
220
|
-
#flapjack_rsq.very_verbose = true
|
221
219
|
pikelet.work(0.1)
|
222
220
|
rescue Exception => e
|
223
221
|
trace = e.backtrace.join("\n")
|
@@ -225,9 +223,7 @@ module Flapjack
|
|
225
223
|
stop
|
226
224
|
end
|
227
225
|
}
|
228
|
-
|
229
|
-
pikelet_values[:pool] = pool if pool
|
230
|
-
@pikelets << pikelet_values
|
226
|
+
@pikelets << {:fiber => f, :type => pikelet_type, :instance => pikelet}
|
231
227
|
f.resume
|
232
228
|
@logger.debug "new fiber created for #{pikelet_type}"
|
233
229
|
end
|
@@ -244,6 +240,8 @@ module Flapjack
|
|
244
240
|
# end
|
245
241
|
# end
|
246
242
|
|
243
|
+
# TODO whem merged with other changes, have this check pik[:class] instead,
|
244
|
+
# makes tests neater
|
247
245
|
def shutdown
|
248
246
|
@pikelets.each do |pik|
|
249
247
|
case pik[:instance]
|
@@ -278,15 +276,13 @@ module Flapjack
|
|
278
276
|
if fibers.any?(&:alive?) || thin_pikelets.any?{|tp| !tp.backend.empty? }
|
279
277
|
EM::Synchrony.sleep 0.25
|
280
278
|
else
|
281
|
-
@
|
282
|
-
tp[:pool].empty!
|
283
|
-
end
|
284
|
-
|
285
|
-
rsq_p = @pikelets.detect {|p|
|
286
|
-
['email_notifier', 'sms_notifier'].include?(p[:type]) && !p[:pool].nil?
|
287
|
-
}
|
279
|
+
@resque_pool.empty! if @resque_pool
|
288
280
|
|
289
|
-
|
281
|
+
[Flapjack::Web, Flapjack::API].each do |klass|
|
282
|
+
next unless klass.class_variable_defined?('@@redis') &&
|
283
|
+
redis = klass.class_variable_get('@@redis')
|
284
|
+
redis.empty!
|
285
|
+
end
|
290
286
|
|
291
287
|
EM.stop
|
292
288
|
break
|
@@ -69,16 +69,18 @@ module Flapjack
|
|
69
69
|
redis.hmset("contact:#{contact['id']}",
|
70
70
|
*['first_name', 'last_name', 'email'].collect {|f| [f, contact[f]]})
|
71
71
|
|
72
|
-
contact['media'].
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
72
|
+
unless contact['media'].nil?
|
73
|
+
contact['media'].each_pair {|medium, address|
|
74
|
+
case medium
|
75
|
+
when 'pagerduty'
|
76
|
+
redis.hset("contact_media:#{contact['id']}", medium, address['service_key'])
|
77
|
+
redis.hmset("contact_pagerduty:#{contact['id']}",
|
78
|
+
*['subdomain', 'username', 'password'].collect {|f| [f, address[f]]})
|
79
|
+
else
|
80
|
+
redis.hset("contact_media:#{contact['id']}", medium, address)
|
81
|
+
end
|
82
|
+
}
|
83
|
+
end
|
82
84
|
end
|
83
85
|
|
84
86
|
def pagerduty_credentials
|
data/lib/flapjack/data/entity.rb
CHANGED
@@ -12,7 +12,7 @@ module Flapjack
|
|
12
12
|
raise "Redis connection not set" unless redis = options[:redis]
|
13
13
|
redis.keys("entity_id:*").collect {|k|
|
14
14
|
k =~ /^entity_id:(.+)$/; entity_name = $1
|
15
|
-
self.new(:name => entity_name, :id => redis.get("entity_id:#{entity_name}")
|
15
|
+
self.new(:name => entity_name, :id => redis.get("entity_id:#{entity_name}"), :redis => redis)
|
16
16
|
}
|
17
17
|
end
|
18
18
|
|
@@ -51,7 +51,7 @@ module Flapjack
|
|
51
51
|
self.add({'name' => entity_name}, :redis => redis)
|
52
52
|
end
|
53
53
|
self.new(:name => entity_name,
|
54
|
-
:id => (entity_id.nil? || entity_id.empty?) ? nil : entity_id
|
54
|
+
:id => (entity_id.nil? || entity_id.empty?) ? nil : entity_id,
|
55
55
|
:redis => redis)
|
56
56
|
end
|
57
57
|
|
@@ -368,15 +368,6 @@ module Flapjack
|
|
368
368
|
:summary => @redis.get("#{@key}:#{ts.first}:summary")}
|
369
369
|
end
|
370
370
|
|
371
|
-
def historical_state_after(timestamp)
|
372
|
-
pos = @redis.zrank("#{@key}:sorted_state_timestamps", timestamp)
|
373
|
-
ts = @redis.zrange("#{@key}:sorted_state_timestamps", pos + 1, pos + 2)
|
374
|
-
return if ts.nil? || ts.empty?
|
375
|
-
{:timestamp => ts.first.to_i,
|
376
|
-
:state => @redis.get("#{@key}:#{ts.first}:state"),
|
377
|
-
:summary => @redis.get("#{@key}:#{ts.first}:summary")}
|
378
|
-
end
|
379
|
-
|
380
371
|
# Returns a list of maintenance periods (either unscheduled or scheduled) for this
|
381
372
|
# entity check, sorted by timestamp.
|
382
373
|
#
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'flapjack/data/entity_check'
|
4
|
+
|
5
|
+
module Flapjack
|
6
|
+
|
7
|
+
module Data
|
8
|
+
|
9
|
+
class Global
|
10
|
+
|
11
|
+
def self.unacknowledged_failing_checks(options = {})
|
12
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
13
|
+
|
14
|
+
redis.zrange('failed_checks', '0', '-1').reject {|entity_check|
|
15
|
+
redis.exists(entity_check + ':unscheduled_maintenance')
|
16
|
+
}.collect {|entity_check|
|
17
|
+
Flapjack::Data::EntityCheck.for_event_id(entity_check, :redis => redis)
|
18
|
+
}
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
data/lib/flapjack/executive.rb
CHANGED
@@ -36,6 +36,7 @@ module Flapjack
|
|
36
36
|
@notifylog.add(Log4r::FileOutputter.new("notifylog", :filename => notifylog))
|
37
37
|
|
38
38
|
# FIXME: Put loading filters into separate method
|
39
|
+
# FIXME: should we make the filters more configurable by the end user?
|
39
40
|
options = { :log => @logger, :persistence => @redis }
|
40
41
|
@filters = []
|
41
42
|
@filters << Flapjack::Filters::Ok.new(options)
|
data/lib/flapjack/jabber.rb
CHANGED
@@ -51,35 +51,27 @@ module Flapjack
|
|
51
51
|
@config['password'].to_s)
|
52
52
|
|
53
53
|
register_handler :ready do |stanza|
|
54
|
-
|
55
|
-
|
56
|
-
on_ready(stanza)
|
57
|
-
end
|
54
|
+
EventMachine::Synchrony.next_tick do
|
55
|
+
on_ready(stanza)
|
58
56
|
end
|
59
57
|
end
|
60
58
|
|
61
59
|
register_handler :message, :groupchat?, :body => /^flapjack:\s+/ do |stanza|
|
62
|
-
|
63
|
-
|
64
|
-
on_groupchat(stanza)
|
65
|
-
end
|
60
|
+
EventMachine::Synchrony.next_tick do
|
61
|
+
on_groupchat(stanza)
|
66
62
|
end
|
67
63
|
end
|
68
64
|
|
69
65
|
register_handler :message, :chat? do |stanza|
|
70
|
-
|
71
|
-
|
72
|
-
on_chat(stanza)
|
73
|
-
end
|
66
|
+
EventMachine::Synchrony.next_tick do
|
67
|
+
on_chat(stanza)
|
74
68
|
end
|
75
69
|
end
|
76
70
|
|
77
71
|
register_handler :disconnected do |stanza|
|
78
72
|
ret = true
|
79
|
-
|
80
|
-
|
81
|
-
ret = on_disconnect(stanza)
|
82
|
-
end
|
73
|
+
EventMachine::Synchrony.next_tick do
|
74
|
+
ret = on_disconnect(stanza)
|
83
75
|
end
|
84
76
|
ret
|
85
77
|
end
|
@@ -130,7 +122,7 @@ module Flapjack
|
|
130
122
|
end
|
131
123
|
|
132
124
|
four_hours = 4 * 60 * 60
|
133
|
-
duration = (dur.nil? || (dur <= 0)
|
125
|
+
duration = (dur.nil? || (dur <= 0)) ? four_hours : dur
|
134
126
|
|
135
127
|
event_id = @redis_handler.hget('unacknowledged_failures', ackid)
|
136
128
|
|
@@ -278,7 +270,7 @@ module Flapjack
|
|
278
270
|
logger.debug(event.inspect)
|
279
271
|
if 'shutdown'.eql?(type)
|
280
272
|
if should_quit?
|
281
|
-
|
273
|
+
EventMachine::Synchrony.next_tick do
|
282
274
|
# get delays without the next_tick
|
283
275
|
close # Blather::Client.close
|
284
276
|
end
|
@@ -303,7 +295,7 @@ module Flapjack
|
|
303
295
|
|
304
296
|
chat_type = :chat
|
305
297
|
chat_type = :groupchat if @config['rooms'] && @config['rooms'].include?(address)
|
306
|
-
|
298
|
+
EventMachine::Synchrony.next_tick do
|
307
299
|
say(Blather::JID.new(address), msg, chat_type)
|
308
300
|
end
|
309
301
|
end
|
data/lib/flapjack/oobetet.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
3
|
+
require 'socket'
|
4
4
|
|
5
5
|
require 'eventmachine'
|
6
6
|
require 'em-synchrony'
|
@@ -52,35 +52,32 @@ module Flapjack
|
|
52
52
|
:last_ack_sent => t }
|
53
53
|
|
54
54
|
@last_alert = nil
|
55
|
+
end
|
55
56
|
|
57
|
+
# split out to ease testing
|
58
|
+
def register_handlers
|
56
59
|
register_handler :ready do |stanza|
|
57
|
-
|
58
|
-
|
59
|
-
on_ready(stanza)
|
60
|
-
end
|
60
|
+
EventMachine::Synchrony.next_tick do
|
61
|
+
on_ready(stanza)
|
61
62
|
end
|
62
63
|
end
|
63
64
|
|
64
65
|
register_handler :message, :groupchat? do |stanza|
|
65
|
-
|
66
|
-
|
67
|
-
on_groupchat(stanza)
|
68
|
-
end
|
66
|
+
EventMachine::Synchrony.next_tick do
|
67
|
+
on_groupchat(stanza)
|
69
68
|
end
|
70
69
|
end
|
71
70
|
|
72
71
|
register_handler :disconnected do |stanza|
|
73
72
|
ret = true
|
74
|
-
|
75
|
-
|
76
|
-
ret = on_disconnect(stanza)
|
77
|
-
end
|
73
|
+
EventMachine::Synchrony.next_tick do
|
74
|
+
ret = on_disconnect(stanza)
|
78
75
|
end
|
79
76
|
ret
|
80
77
|
end
|
81
|
-
|
82
78
|
end
|
83
79
|
|
80
|
+
|
84
81
|
# Join the MUC Chat room after connecting.
|
85
82
|
def on_ready(stanza)
|
86
83
|
return if should_quit?
|
@@ -111,31 +108,37 @@ module Flapjack
|
|
111
108
|
|
112
109
|
def on_groupchat(stanza)
|
113
110
|
return if should_quit?
|
114
|
-
|
111
|
+
|
112
|
+
stanza_body = stanza.body
|
113
|
+
|
114
|
+
logger.debug("groupchat stanza body: " + stanza_body)
|
115
115
|
logger.debug("groupchat message received: #{stanza.inspect}")
|
116
116
|
|
117
|
-
if
|
117
|
+
if (stanza_body =~ /^(?:problem|recovery|acknowledgement)/i) &&
|
118
|
+
(stanza_body =~ /^(\w+).*#{Regexp.escape(@check_matcher)}/)
|
119
|
+
|
118
120
|
# got something interesting
|
119
|
-
|
120
|
-
|
121
|
+
status = $1.downcase
|
122
|
+
t = Time.now.to_i
|
123
|
+
logger.debug("groupchat found the following state for #{@check_matcher}: #{status}")
|
124
|
+
|
125
|
+
case status
|
121
126
|
when 'problem'
|
122
127
|
logger.debug("updating @times last_problem")
|
123
|
-
@times[:last_problem] =
|
128
|
+
@times[:last_problem] = t
|
124
129
|
when 'recovery'
|
125
130
|
logger.debug("updating @times last_recovery")
|
126
|
-
@times[:last_recovery] =
|
131
|
+
@times[:last_recovery] = t
|
127
132
|
when 'acknowledgement'
|
128
133
|
logger.debug("updating @times last_ack")
|
129
|
-
@times[:last_ack] =
|
134
|
+
@times[:last_ack] = t
|
130
135
|
end
|
131
|
-
|
132
136
|
end
|
133
137
|
logger.debug("@times: #{@times.inspect}")
|
134
|
-
|
135
138
|
end
|
136
139
|
|
137
140
|
def check_timers
|
138
|
-
t = Time.
|
141
|
+
t = Time.now.to_i
|
139
142
|
breach = nil
|
140
143
|
@logger.debug("check_timers: inspecting @times #{@times.inspect}")
|
141
144
|
case
|
@@ -145,7 +148,7 @@ module Flapjack
|
|
145
148
|
breach = "haven't seen a test recovery notification in the last #{@max_latency} seconds"
|
146
149
|
end
|
147
150
|
|
148
|
-
|
151
|
+
unless @flapjack_ok || breach
|
149
152
|
emit_jabber("Flapjack Self Monitoring is OK")
|
150
153
|
emit_pagerduty("Flapjack Self Monitoring is OK", 'resolve')
|
151
154
|
end
|
@@ -218,17 +221,16 @@ module Flapjack
|
|
218
221
|
write(' ') if connected?
|
219
222
|
end
|
220
223
|
|
221
|
-
check_timers_timer = EM::Synchrony.add_periodic_timer(10) do
|
222
|
-
check_timers
|
223
|
-
end
|
224
|
-
|
225
224
|
setup
|
225
|
+
register_handlers
|
226
226
|
connect # Blather::Client.connect
|
227
227
|
|
228
|
-
|
229
|
-
|
230
|
-
|
228
|
+
until should_quit?
|
229
|
+
EM::Synchrony.sleep(10)
|
230
|
+
check_timers
|
231
231
|
end
|
232
|
+
|
233
|
+
keepalive_timer.cancel
|
232
234
|
end
|
233
235
|
|
234
236
|
end
|