flapjack 0.6.23 → 0.6.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +8 -39
- data/flapjack.gemspec +5 -5
- data/lib/flapjack/coordinator.rb +7 -3
- data/lib/flapjack/jabber.rb +22 -10
- data/lib/flapjack/oobetet.rb +245 -0
- data/lib/flapjack/version.rb +1 -1
- data/spec/lib/flapjack/jabber_spec.rb +1 -0
- metadata +8 -4
data/Rakefile
CHANGED
@@ -14,8 +14,14 @@ Dir['tasks/**/*.rake'].each { |t| load t }
|
|
14
14
|
require 'cucumber'
|
15
15
|
require 'cucumber/rake/task'
|
16
16
|
require 'colorize'
|
17
|
+
require 'rake/clean'
|
18
|
+
require 'bundler'
|
19
|
+
Bundler::GemHelper.install_tasks
|
20
|
+
|
17
21
|
Cucumber::Rake::Task.new(:features) do |t|
|
18
|
-
t.cucumber_opts = "features --format pretty"
|
22
|
+
#t.cucumber_opts = "features --format pretty"
|
23
|
+
#t.cucumber_opts = "--format progress"
|
24
|
+
t.cucumber_opts = "--format fuubar"
|
19
25
|
end
|
20
26
|
|
21
27
|
require 'rspec/core/rake_task'
|
@@ -23,42 +29,6 @@ RSpec::Core::RakeTask.new(:spec)
|
|
23
29
|
|
24
30
|
task :default => :spec
|
25
31
|
|
26
|
-
|
27
|
-
desc "build gem"
|
28
|
-
task :build => :verify do
|
29
|
-
build_output = `gem build flapjack.gemspec`
|
30
|
-
puts build_output
|
31
|
-
|
32
|
-
gem_filename = build_output[/File: (.*)/,1]
|
33
|
-
pkg_path = "pkg"
|
34
|
-
FileUtils.mkdir_p(pkg_path)
|
35
|
-
FileUtils.mv(gem_filename, pkg_path)
|
36
|
-
|
37
|
-
puts "Gem built in #{pkg_path}/#{gem_filename}".green
|
38
|
-
end
|
39
|
-
|
40
|
-
desc "push gem"
|
41
|
-
task :push do
|
42
|
-
filenames = Dir.glob("pkg/*.gem")
|
43
|
-
filenames_with_times = filenames.map do |filename|
|
44
|
-
[filename, File.mtime(filename)]
|
45
|
-
end
|
46
|
-
|
47
|
-
newest = filenames_with_times.sort_by { |tuple| tuple.last }.last
|
48
|
-
newest_filename = newest.first
|
49
|
-
|
50
|
-
command = "gem push #{newest_filename}"
|
51
|
-
system(command)
|
52
|
-
end
|
53
|
-
|
54
|
-
desc "clean up various generated files"
|
55
|
-
task :clean do
|
56
|
-
[ "pkg/"].each do |filename|
|
57
|
-
puts "Removing #{filename}"
|
58
|
-
FileUtils.rm_rf(filename)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
32
|
namespace :verify do
|
63
33
|
task :uncommitted do
|
64
34
|
uncommitted = `git ls-files -m`.split("\n")
|
@@ -75,5 +45,4 @@ namespace :verify do
|
|
75
45
|
end
|
76
46
|
|
77
47
|
# FIXME: getting that intermittent gherken lexing error so removing :features from verify list
|
78
|
-
|
79
|
-
task :verify => [ 'verify:all', :spec]
|
48
|
+
task :verify => [ 'verify:all', :spec, :features]
|
data/flapjack.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
require File.expand_path('../lib/flapjack/version', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
|
-
gem.authors = ["Lindsay Holmwood"]
|
6
|
-
gem.email =
|
7
|
-
gem.description =
|
8
|
-
gem.summary =
|
9
|
-
gem.homepage =
|
5
|
+
gem.authors = [ "Lindsay Holmwood", "Jesse Reynolds", "Ali Graham" ]
|
6
|
+
gem.email = "lindsay@holmwood.id.au"
|
7
|
+
gem.description = "Flapjack is distributed monitoring notification system that provides a scalable method for processing streams of events from Nagios and deciding who should be notified"
|
8
|
+
gem.summary = "Intelligent, scalable, distributed monitoring notification system."
|
9
|
+
gem.homepage = "http://flapjack-project.com/"
|
10
10
|
|
11
11
|
# see http://yehudakatz.com/2010/12/16/clarifying-the-roles-of-the-gemspec-and-gemfile/
|
12
12
|
# following a middle road here, not shipping it with the gem :)
|
data/lib/flapjack/coordinator.rb
CHANGED
@@ -17,6 +17,7 @@ require 'flapjack/api'
|
|
17
17
|
require 'flapjack/daemonizing'
|
18
18
|
require 'flapjack/executive'
|
19
19
|
require 'flapjack/jabber'
|
20
|
+
require 'flapjack/oobetet'
|
20
21
|
require 'flapjack/pagerduty'
|
21
22
|
require 'flapjack/notification/email'
|
22
23
|
require 'flapjack/notification/sms'
|
@@ -77,17 +78,18 @@ module Flapjack
|
|
77
78
|
@logger.debug "config keys: #{@config.keys}"
|
78
79
|
|
79
80
|
pikelet_keys = ['executive', 'jabber_gateway', 'pagerduty_gateway',
|
80
|
-
'email_notifier', 'sms_notifier', 'web', 'api'
|
81
|
+
'email_notifier', 'sms_notifier', 'web', 'api',
|
82
|
+
'oobetet']
|
81
83
|
|
82
84
|
@config.keys.each do |pikelet_type|
|
83
|
-
next unless pikelet_keys.include?(pikelet_type) &&
|
85
|
+
next unless pikelet_keys.include?(pikelet_type) &&
|
84
86
|
@config[pikelet_type].is_a?(Hash) &&
|
85
87
|
@config[pikelet_type]['enabled']
|
86
88
|
@logger.debug "coordinator is now initialising the #{pikelet_type} pikelet"
|
87
89
|
pikelet_cfg = @config[pikelet_type]
|
88
90
|
|
89
91
|
case pikelet_type
|
90
|
-
when 'executive', 'jabber_gateway', 'pagerduty_gateway'
|
92
|
+
when 'executive', 'jabber_gateway', 'pagerduty_gateway', 'oobetet'
|
91
93
|
build_pikelet(pikelet_type, pikelet_cfg)
|
92
94
|
when 'web', 'api'
|
93
95
|
build_thin_pikelet(pikelet_type, pikelet_cfg)
|
@@ -118,6 +120,8 @@ module Flapjack
|
|
118
120
|
Flapjack::Jabber
|
119
121
|
when 'pagerduty_gateway'
|
120
122
|
Flapjack::Pagerduty
|
123
|
+
when 'oobetet'
|
124
|
+
Flapjack::Oobetet
|
121
125
|
end
|
122
126
|
return unless pikelet_class
|
123
127
|
|
data/lib/flapjack/jabber.rb
CHANGED
@@ -85,14 +85,16 @@ module Flapjack
|
|
85
85
|
@redis_handler ||= build_redis_connection_pool
|
86
86
|
@connected_at = Time.now.to_i
|
87
87
|
logger.info("Jabber Connected")
|
88
|
-
@config['rooms'].
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
88
|
+
if @config['rooms'] && @config['rooms'].length > 0
|
89
|
+
@config['rooms'].each do |room|
|
90
|
+
logger.info("Joining room #{room}")
|
91
|
+
presence = Blather::Stanza::Presence.new
|
92
|
+
presence.from = @flapjack_jid
|
93
|
+
presence.to = Blather::JID.new("#{room}/#{@config['alias']}")
|
94
|
+
presence << "<x xmlns='http://jabber.org/protocol/muc'/>"
|
95
|
+
write presence
|
96
|
+
say(room, "flapjack jabber gateway started at #{Time.now}, hello!", :groupchat)
|
97
|
+
end
|
96
98
|
end
|
97
99
|
end
|
98
100
|
|
@@ -129,6 +131,10 @@ module Flapjack
|
|
129
131
|
error = "unknown entity" if entity_check.nil?
|
130
132
|
end
|
131
133
|
|
134
|
+
if entity_check && entity_check.in_unscheduled_maintenance?
|
135
|
+
error = "#{event_id} is already acknowledged"
|
136
|
+
end
|
137
|
+
|
132
138
|
if error
|
133
139
|
msg = "ERROR - couldn't ACK #{ackid} - #{error}"
|
134
140
|
else
|
@@ -185,7 +191,13 @@ module Flapjack
|
|
185
191
|
return if should_quit?
|
186
192
|
logger.debug("chat message received: #{stanza.inspect}")
|
187
193
|
|
188
|
-
|
194
|
+
if stanza.body =~ /^flapjack:\s+(.*)/
|
195
|
+
command = $1
|
196
|
+
else
|
197
|
+
command = stanza.body
|
198
|
+
end
|
199
|
+
|
200
|
+
results = interpreter(command)
|
189
201
|
msg = results[:msg]
|
190
202
|
action = results[:action]
|
191
203
|
|
@@ -274,7 +286,7 @@ module Flapjack
|
|
274
286
|
msg = "#{type.upcase} #{ack_str}::: \"#{check}\" on #{entity} #{maint_str} ::: #{summary}"
|
275
287
|
|
276
288
|
chat_type = :chat
|
277
|
-
chat_type = :groupchat if @config['rooms'].include?(address)
|
289
|
+
chat_type = :groupchat if @config['rooms'] && @config['rooms'].include?(address)
|
278
290
|
EM.next_tick do
|
279
291
|
say(Blather::JID.new(address), msg, chat_type)
|
280
292
|
end
|
@@ -0,0 +1,245 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
#require 'socket'
|
4
|
+
|
5
|
+
require 'eventmachine'
|
6
|
+
# the redis/synchrony gems need to be required in this particular order, see # the redis-rb README for details
|
7
|
+
#require 'hiredis'
|
8
|
+
require 'em-synchrony'
|
9
|
+
#require 'redis/connection/synchrony'
|
10
|
+
#require 'redis'
|
11
|
+
|
12
|
+
#require 'chronic_duration'
|
13
|
+
|
14
|
+
require 'blather/client/client'
|
15
|
+
require 'em-synchrony/fiber_iterator'
|
16
|
+
require 'yajl/json_gem'
|
17
|
+
|
18
|
+
#require 'flapjack/data/entity_check'
|
19
|
+
require 'flapjack/pikelet'
|
20
|
+
require 'flapjack/utility'
|
21
|
+
|
22
|
+
module Flapjack
|
23
|
+
|
24
|
+
class Oobetet < Blather::Client
|
25
|
+
|
26
|
+
include Flapjack::Pikelet
|
27
|
+
include Flapjack::Utility
|
28
|
+
|
29
|
+
log = Logger.new(STDOUT)
|
30
|
+
# log.level = Logger::DEBUG
|
31
|
+
log.level = Logger::INFO
|
32
|
+
Blather.logger = log
|
33
|
+
|
34
|
+
def setup
|
35
|
+
@hostname = Socket.gethostname
|
36
|
+
@flapjacktest_jid = Blather::JID.new((@config['jabberid'] || 'flapjacktest') + "/#{@hostname}:#{Process.pid}")
|
37
|
+
|
38
|
+
super(@flapjacktest_jid, @config['password'], @config['server'], @config['port'].to_i)
|
39
|
+
|
40
|
+
logger.debug("Building jabber connection with jabberid: " +
|
41
|
+
@flapjacktest_jid.to_s + ", port: " + @config['port'].to_s +
|
42
|
+
", server: " + @config['server'].to_s + ", password: " +
|
43
|
+
@config['password'].to_s)
|
44
|
+
|
45
|
+
@pagerduty_events_api_url = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
|
46
|
+
|
47
|
+
if !@config['watched_check'] or !@config['watched_entity']
|
48
|
+
raise RuntimeError, 'Flapjack::Oobetet: watched_check and watched_entity must be defined in the config'
|
49
|
+
end
|
50
|
+
|
51
|
+
@check_matcher = '"' + @config['watched_check'] + '" on ' + @config['watched_entity']
|
52
|
+
@max_latency = @config['max_latency'] || 300
|
53
|
+
@flapjack_ok = true
|
54
|
+
|
55
|
+
t = Time.now.to_i
|
56
|
+
@times = { :last_problem => t,
|
57
|
+
:last_recovery => t,
|
58
|
+
:last_ack => t,
|
59
|
+
:last_ack_sent => t }
|
60
|
+
|
61
|
+
@last_alert = nil
|
62
|
+
|
63
|
+
register_handler :ready do |stanza|
|
64
|
+
EM.next_tick do
|
65
|
+
EM.synchrony do
|
66
|
+
on_ready(stanza)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
register_handler :message, :groupchat? do |stanza|
|
72
|
+
EM.next_tick do
|
73
|
+
EM.synchrony do
|
74
|
+
on_groupchat(stanza)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
register_handler :disconnected do |stanza|
|
80
|
+
ret = true
|
81
|
+
EM.next_tick do
|
82
|
+
EM.synchrony do
|
83
|
+
ret = on_disconnect(stanza)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
ret
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
# Join the MUC Chat room after connecting.
|
92
|
+
def on_ready(stanza)
|
93
|
+
return if should_quit?
|
94
|
+
@connected_at = Time.now.to_i
|
95
|
+
logger.info("Jabber Connected")
|
96
|
+
if @config['rooms'] && @config['rooms'].length > 0
|
97
|
+
@config['rooms'].each do |room|
|
98
|
+
logger.info("Joining room #{room}")
|
99
|
+
presence = Blather::Stanza::Presence.new
|
100
|
+
presence.from = @flapjacktest_jid
|
101
|
+
presence.to = Blather::JID.new("#{room}/#{@config['alias']}")
|
102
|
+
presence << "<x xmlns='http://jabber.org/protocol/muc'/>"
|
103
|
+
write presence
|
104
|
+
say(room, "flapjack self monitoring (oobetet) started at #{Time.now}, g'day!", :groupchat)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# returning true to prevent the reactor loop from stopping
|
110
|
+
def on_disconnect(stanza)
|
111
|
+
return true if should_quit?
|
112
|
+
logger.warn("jabbers disconnected! reconnecting in 1 second ...")
|
113
|
+
EventMachine::Timer.new(1) do
|
114
|
+
connect # Blather::Client.connect
|
115
|
+
end
|
116
|
+
true
|
117
|
+
end
|
118
|
+
|
119
|
+
def on_groupchat(stanza)
|
120
|
+
return if should_quit?
|
121
|
+
logger.debug("groupchat stanza body: " + stanza.body)
|
122
|
+
logger.debug("groupchat message received: #{stanza.inspect}")
|
123
|
+
|
124
|
+
if stanza.body =~ /^(\w+).*#{Regexp.escape(@check_matcher)}/
|
125
|
+
# got something interesting
|
126
|
+
logger.debug("groupchat found the following state for #{@check_matcher}: #{$1.downcase}")
|
127
|
+
case $1.downcase
|
128
|
+
when 'problem'
|
129
|
+
logger.debug("updating @times last_problem")
|
130
|
+
@times[:last_problem] = Time.new.to_i
|
131
|
+
when 'recovery'
|
132
|
+
logger.debug("updating @times last_recovery")
|
133
|
+
@times[:last_recovery] = Time.new.to_i
|
134
|
+
when 'acknowledgement'
|
135
|
+
logger.debug("updating @times last_ack")
|
136
|
+
@times[:last_ack] = Time.new.to_i
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
140
|
+
logger.debug("@times: #{@times.inspect}")
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
def check_timers
|
145
|
+
t = Time.new.to_i
|
146
|
+
breach = nil
|
147
|
+
@logger.debug("check_timers: inspecting @times #{@times.inspect}")
|
148
|
+
case
|
149
|
+
when @times[:last_problem] < (t - @max_latency)
|
150
|
+
breach = "haven't seen a test problem notification in the last #{@max_latency} seconds"
|
151
|
+
when @times[:last_recovery] < (t - @max_latency)
|
152
|
+
breach = "haven't seen a test recovery notification in the last #{@max_latency} seconds"
|
153
|
+
end
|
154
|
+
|
155
|
+
if !@flapjack_ok and !breach
|
156
|
+
emit_jabber("Flapjack Self Monitoring is OK")
|
157
|
+
emit_pagerduty("Flapjack Self Monitoring is OK", 'resolve')
|
158
|
+
end
|
159
|
+
|
160
|
+
@flapjack_ok = !breach
|
161
|
+
|
162
|
+
return unless breach
|
163
|
+
@logger.error("Self monitoring has detected the following breach: #{breach}")
|
164
|
+
summary = "Flapjack Self Monitoring is Critical: #{breach} for #{@check_matcher}, "
|
165
|
+
summary += "from #{@hostname} at #{Time.now}"
|
166
|
+
|
167
|
+
if !@last_alert or @last_alert < (t - 55)
|
168
|
+
|
169
|
+
emit_jabber(summary)
|
170
|
+
emit_pagerduty(summary, 'trigger')
|
171
|
+
|
172
|
+
if !@last_alert or @last_alert < (t - 55)
|
173
|
+
msg = "NOTICE: Self monitoring has detected a failure and is unable to tell "
|
174
|
+
msg += "anyone about it. DON'T PANIC."
|
175
|
+
@logger.error msg
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def emit_jabber(summary)
|
182
|
+
if @config['rooms'] && @config['rooms'].length > 0
|
183
|
+
@config['rooms'].each do |room|
|
184
|
+
say(room, summary, :groupchat)
|
185
|
+
end
|
186
|
+
@last_alert = Time.now.to_i
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def emit_pagerduty(summary, event_type = 'trigger')
|
191
|
+
if @config['pagerduty_contact']
|
192
|
+
pagerduty_event = { :service_key => @config['pagerduty_contact'],
|
193
|
+
:incident_key => "Flapjack Self Monitoring from #{@hostname}",
|
194
|
+
:event_type => event_type,
|
195
|
+
:description => summary }
|
196
|
+
status, response = send_pagerduty_event(pagerduty_event)
|
197
|
+
if status == 200
|
198
|
+
@logger.debug("successfully sent pagerduty event")
|
199
|
+
@last_alert = Time.now.to_i
|
200
|
+
else
|
201
|
+
@logger.error("pagerduty returned #{status} #{response.inspect}")
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def say(to, msg, using = :chat)
|
207
|
+
@logger.debug("Sending a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
|
208
|
+
write Blather::Stanza::Message.new(to, msg, using)
|
209
|
+
end
|
210
|
+
|
211
|
+
def send_pagerduty_event(event)
|
212
|
+
options = { :body => Yajl::Encoder.encode(event) }
|
213
|
+
http = EM::HttpRequest.new(@pagerduty_events_api_url).post(options)
|
214
|
+
response = Yajl::Parser.parse(http.response)
|
215
|
+
status = http.response_header.status
|
216
|
+
logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
|
217
|
+
[status, response]
|
218
|
+
end
|
219
|
+
|
220
|
+
def main
|
221
|
+
logger.debug("New oobetet pikelet with the following options: #{@config.inspect}")
|
222
|
+
|
223
|
+
keepalive_timer = EM::Synchrony.add_periodic_timer(60) do
|
224
|
+
logger.debug("calling keepalive on the jabber connection")
|
225
|
+
write(' ') if connected?
|
226
|
+
end
|
227
|
+
|
228
|
+
check_timers_timer = EM::Synchrony.add_periodic_timer(10) do
|
229
|
+
check_timers
|
230
|
+
end
|
231
|
+
|
232
|
+
setup
|
233
|
+
connect # Blather::Client.connect
|
234
|
+
|
235
|
+
if should_quit?
|
236
|
+
keepalive_timer.cancel
|
237
|
+
check_timers_timer.cancel
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
|
245
|
+
|
data/lib/flapjack/version.rb
CHANGED
@@ -65,6 +65,7 @@ describe Flapjack::Jabber do
|
|
65
65
|
and_return('main-example.com:ping')
|
66
66
|
|
67
67
|
entity_check = mock(Flapjack::Data::EntityCheck)
|
68
|
+
entity_check.should_receive(:in_unscheduled_maintenance?)
|
68
69
|
entity_check.should_receive(:create_acknowledgement).
|
69
70
|
with('summary' => 'fixing now', 'acknowledgement_id' => '876', 'duration' => (90 * 60))
|
70
71
|
entity_check.should_receive(:entity_name).and_return('main-example.com')
|
metadata
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: flapjack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.24
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Lindsay Holmwood
|
9
|
+
- Jesse Reynolds
|
10
|
+
- Ali Graham
|
9
11
|
autorequire:
|
10
12
|
bindir: bin
|
11
13
|
cert_chain: []
|
@@ -331,8 +333,9 @@ dependencies:
|
|
331
333
|
- - ! '>='
|
332
334
|
- !ruby/object:Gem::Version
|
333
335
|
version: '0'
|
334
|
-
description: Flapjack is
|
335
|
-
|
336
|
+
description: Flapjack is distributed monitoring notification system that provides
|
337
|
+
a scalable method for processing streams of events from Nagios and deciding who
|
338
|
+
should be notified
|
336
339
|
email: lindsay@holmwood.id.au
|
337
340
|
executables:
|
338
341
|
- flapjack
|
@@ -428,6 +431,7 @@ files:
|
|
428
431
|
- lib/flapjack/notifiers/mailer/mailer.rb
|
429
432
|
- lib/flapjack/notifiers/xmpp/init.rb
|
430
433
|
- lib/flapjack/notifiers/xmpp/xmpp.rb
|
434
|
+
- lib/flapjack/oobetet.rb
|
431
435
|
- lib/flapjack/pagerduty.rb
|
432
436
|
- lib/flapjack/patches.rb
|
433
437
|
- lib/flapjack/persistence/couch.rb
|
@@ -539,7 +543,7 @@ rubyforge_project:
|
|
539
543
|
rubygems_version: 1.8.23
|
540
544
|
signing_key:
|
541
545
|
specification_version: 3
|
542
|
-
summary:
|
546
|
+
summary: Intelligent, scalable, distributed monitoring notification system.
|
543
547
|
test_files:
|
544
548
|
- features/events.feature
|
545
549
|
- features/notifications.feature
|