flapjack 0.5.5 → 0.6.23
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/.rbenv-version +1 -0
- data/.rspec +10 -0
- data/Gemfile +18 -0
- data/Guardfile +14 -0
- data/README.md +152 -173
- data/Rakefile +53 -150
- data/bin/flapjack +72 -0
- data/bin/flapjack-nagios-receiver +111 -0
- data/bin/flapjack-nagios-receiver-control +15 -0
- data/bin/flapjack-netsaint-parser +0 -2
- data/bin/flapjack-populator +133 -16
- data/bin/install-flapjack-systemwide +2 -2
- data/config.ru +11 -0
- data/dist/etc/init.d/flapjack +46 -0
- data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
- data/doc/GLOSSARY.md +19 -0
- data/etc/flapjack_config.yaml.example +90 -0
- data/features/events.feature +132 -0
- data/features/notifications.feature +57 -0
- data/features/packaging-lintian.feature +5 -3
- data/features/steps/events_steps.rb +164 -0
- data/features/steps/flapjack-importer_steps.rb +2 -5
- data/features/steps/flapjack-worker_steps.rb +13 -6
- data/features/steps/notifications_steps.rb +178 -0
- data/features/steps/packaging-lintian_steps.rb +14 -0
- data/features/steps/time_travel_steps.rb +34 -0
- data/features/support/env.rb +63 -36
- data/flapjack.gemspec +35 -186
- data/lib/flapjack.rb +2 -0
- data/lib/flapjack/api.rb +274 -0
- data/lib/flapjack/api/entity_check_presenter.rb +184 -0
- data/lib/flapjack/api/entity_presenter.rb +66 -0
- data/lib/flapjack/cli/worker_manager.rb +1 -2
- data/lib/flapjack/configuration.rb +11 -0
- data/lib/flapjack/coordinator.rb +288 -0
- data/lib/flapjack/daemonizing.rb +186 -0
- data/lib/flapjack/data/contact.rb +45 -0
- data/lib/flapjack/data/entity.rb +89 -0
- data/lib/flapjack/data/entity_check.rb +396 -0
- data/lib/flapjack/data/event.rb +144 -0
- data/lib/flapjack/data/notification.rb +13 -0
- data/lib/flapjack/executive.rb +289 -0
- data/lib/flapjack/filters/acknowledgement.rb +39 -0
- data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
- data/lib/flapjack/filters/delays.rb +53 -0
- data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
- data/lib/flapjack/filters/ok.rb +25 -5
- data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
- data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
- data/lib/flapjack/jabber.rb +294 -0
- data/lib/flapjack/notification/common.rb +23 -0
- data/lib/flapjack/notification/email.rb +107 -0
- data/lib/flapjack/notification/email/alert.html.haml +48 -0
- data/lib/flapjack/notification/email/alert.text.erb +14 -0
- data/lib/flapjack/notification/sms.rb +42 -0
- data/lib/flapjack/notification/sms/messagenet.rb +49 -0
- data/lib/flapjack/notifier_engine.rb +4 -4
- data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
- data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
- data/lib/flapjack/pagerduty.rb +230 -0
- data/lib/flapjack/patches.rb +108 -19
- data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
- data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
- data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
- data/lib/flapjack/pikelet.rb +56 -0
- data/lib/flapjack/transports/beanstalkd.rb +1 -1
- data/lib/flapjack/transports/result.rb +6 -6
- data/lib/flapjack/utility.rb +46 -0
- data/lib/flapjack/version.rb +5 -0
- data/lib/flapjack/web.rb +198 -0
- data/lib/flapjack/web/views/acknowledge.haml +55 -0
- data/lib/flapjack/web/views/check.haml +162 -0
- data/lib/flapjack/web/views/index.haml +92 -0
- data/lib/flapjack/web/views/self_stats.haml +56 -0
- data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
- data/lib/flapjack/worker/cli.rb +49 -0
- data/{spec → spec.old}/check_sandbox/echo +0 -0
- data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
- data/{spec → spec.old}/configs/recipients.ini +0 -0
- data/{spec → spec.old}/helpers.rb +0 -0
- data/{spec → spec.old}/inifile_spec.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
- data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
- data/{spec → spec.old}/notifier_application_spec.rb +0 -0
- data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_spec.rb +0 -0
- data/{spec → spec.old}/notifier_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
- data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
- data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
- data/{spec → spec.old}/simple.ini +0 -0
- data/{spec → spec.old}/spec.opts +0 -0
- data/{spec → spec.old}/test-filters/blocker.rb +0 -0
- data/{spec → spec.old}/test-filters/mock.rb +0 -0
- data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
- data/{spec → spec.old}/transports/mock_transport.rb +0 -0
- data/{spec → spec.old}/worker_application_spec.rb +0 -0
- data/{spec → spec.old}/worker_options_spec.rb +0 -0
- data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
- data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
- data/spec/lib/flapjack/api_spec.rb +170 -0
- data/spec/lib/flapjack/coordinator_spec.rb +16 -0
- data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
- data/spec/lib/flapjack/data/entity_spec.rb +71 -0
- data/spec/lib/flapjack/data/event_spec.rb +6 -0
- data/spec/lib/flapjack/executive_spec.rb +59 -0
- data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
- data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
- data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
- data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
- data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/jabber_spec.rb +150 -0
- data/spec/lib/flapjack/notification/email_spec.rb +6 -0
- data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
- data/spec/lib/flapjack/pikelet_spec.rb +28 -0
- data/spec/lib/flapjack/web_spec.rb +188 -0
- data/spec/spec_helper.rb +44 -0
- data/spec/support/profile_all_formatter.rb +44 -0
- data/spec/support/uncolored_doc_formatter.rb +9 -0
- data/tasks/events.rake +85 -0
- data/tmp/acknowledge.rb +14 -0
- data/tmp/create_config_yaml.rb +16 -0
- data/tmp/create_events_failure.rb +33 -0
- data/tmp/create_events_ok.rb +33 -0
- data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
- data/tmp/create_events_ok_failure.rb +40 -0
- data/tmp/create_events_ok_failure_ack.rb +54 -0
- data/tmp/dummy_entities.json +1 -0
- data/tmp/generate_nagios_test_hosts.rb +16 -0
- data/tmp/parse_config_yaml.rb +7 -0
- data/tmp/redis_delete_all_keys.rb +11 -0
- data/tmp/test_entities.json +1 -0
- metadata +482 -221
- data/TODO.md +0 -36
- data/VERSION +0 -1
- data/bin/flapjack-benchmark +0 -50
- data/bin/flapjack-notifier +0 -21
- data/bin/flapjack-notifier-manager +0 -43
- data/bin/flapjack-stats +0 -27
- data/bin/flapjack-worker +0 -13
- data/bin/flapjack-worker-manager +0 -35
- data/dist/etc/init.d/flapjack-notifier +0 -47
- data/dist/etc/init.d/flapjack-workers +0 -44
- data/features/flapjack-notifier-manager.feature +0 -19
- data/features/flapjack-worker-manager.feature +0 -27
- data/features/flapjack-worker.feature +0 -27
- data/features/netsaint-config-converter.feature +0 -126
- data/features/persistence/couch.feature +0 -105
- data/features/persistence/sqlite3.feature +0 -105
- data/features/persistence/steps/couch_steps.rb +0 -25
- data/features/persistence/steps/generic_steps.rb +0 -102
- data/features/persistence/steps/sqlite3_steps.rb +0 -13
- data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
- data/features/steps/flapjack-worker-manager_steps.rb +0 -48
- data/lib/flapjack/applications/notifier.rb +0 -222
- data/lib/flapjack/cli/notifier.rb +0 -108
- data/lib/flapjack/cli/notifier_manager.rb +0 -86
- data/lib/flapjack/cli/worker.rb +0 -51
@@ -1,13 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
1
3
|
module Flapjack
|
2
4
|
module Filters
|
3
|
-
|
5
|
+
module Base
|
4
6
|
def initialize(opts={})
|
5
|
-
@log
|
7
|
+
@log = opts[:log]
|
6
8
|
@persistence = opts[:persistence]
|
7
9
|
end
|
8
10
|
|
9
|
-
def
|
10
|
-
|
11
|
+
def name
|
12
|
+
self.class.to_s.split('::').last
|
11
13
|
end
|
12
14
|
end
|
13
15
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'flapjack/data/entity_check'
|
4
|
+
require 'flapjack/filters/base'
|
5
|
+
|
6
|
+
module Flapjack
|
7
|
+
module Filters
|
8
|
+
|
9
|
+
# * If the service event’s state is a failure, and the time since the ok => failure state change
|
10
|
+
# is below a threshold (e.g. 30 seconds), then don't alert
|
11
|
+
# * If the service event’s state is a failure, and the time since the last alert is below a
|
12
|
+
# threshold (5 minutes), then don’t alert
|
13
|
+
class Delays
|
14
|
+
include Base
|
15
|
+
|
16
|
+
def block?(event)
|
17
|
+
failure_delay = 30
|
18
|
+
resend_delay = 300
|
19
|
+
|
20
|
+
result = false
|
21
|
+
|
22
|
+
if (event.type == 'service') and (event.critical? or event.warning?)
|
23
|
+
|
24
|
+
entity_check = Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @persistence)
|
25
|
+
current_time = Time.now.to_i
|
26
|
+
|
27
|
+
if entity_check.failed?
|
28
|
+
last_problem_alert = entity_check.last_problem_notification
|
29
|
+
last_change = entity_check.last_change
|
30
|
+
|
31
|
+
current_failure_duration = current_time - last_change
|
32
|
+
time_since_last_alert = current_time - last_problem_alert unless last_problem_alert.nil?
|
33
|
+
@log.debug("Filter: Delays: last_problem_alert: #{last_problem_alert.to_s}, last_change: #{last_change.to_s}, current_failure_duration: #{current_failure_duration}, time_since_last_alert: #{time_since_last_alert.to_s}")
|
34
|
+
if (current_failure_duration < failure_delay)
|
35
|
+
result = true
|
36
|
+
@log.debug("Filter: Delays: blocking because duration of current failure (#{current_failure_duration}) is less than failure_delay (#{failure_delay})")
|
37
|
+
elsif !last_problem_alert.nil? && (time_since_last_alert < resend_delay)
|
38
|
+
result = true
|
39
|
+
@log.debug("Filter: Delays: blocking because time since last alert for current problem (#{time_since_last_alert}) is less than resend_delay (#{resend_delay})")
|
40
|
+
else
|
41
|
+
@log.debug("Filter: Delays: not blocking because neither of the time comparison conditions were met")
|
42
|
+
end
|
43
|
+
else
|
44
|
+
@log.debug("Filter: Delays: entity_check.failed? returned false ...")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
@log.debug("Filter: Delays: #{result ? "block" : "pass"}")
|
49
|
+
result
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'flapjack/filters/base'
|
4
|
+
|
5
|
+
module Flapjack
|
6
|
+
module Filters
|
7
|
+
|
8
|
+
# * If the service event’s state is a failure, and the total number of failing client checks is
|
9
|
+
# over a threshold (e.g. 10 checks are failing), then set a meta flag noting the threshold has
|
10
|
+
# been tripped, and generate an event for this meta check
|
11
|
+
# * If the service event’s state is ok, and the meta flag is set, and the total number of
|
12
|
+
# failing client checks is less than a threshold (eg 10), then unset the flag, and generate an
|
13
|
+
# event for this meta check
|
14
|
+
class DetectMassClientFailures
|
15
|
+
include Base
|
16
|
+
|
17
|
+
def block?(event)
|
18
|
+
client_mass_fail_threshold = 10
|
19
|
+
timestamp = Time.now.to_i
|
20
|
+
|
21
|
+
if event.type == 'service'
|
22
|
+
client_fail_count = @persistence.zcount("failed_checks:#{event.client}", '-inf', '+inf')
|
23
|
+
|
24
|
+
if client_fail_count >= client_mass_fail_threshold
|
25
|
+
# set the flag
|
26
|
+
# FIXME: perhaps implement this with tagging
|
27
|
+
@persistence.add("mass_failed_client:#{event.client}", timestamp)
|
28
|
+
@persistence.zadd("mass_failure_events_client:#{event.client}", 0, timestamp)
|
29
|
+
else
|
30
|
+
# unset the flag
|
31
|
+
start_mf = @persistence.get("mass_failed_client:#{event.client}")
|
32
|
+
duration = Time.now.to_i - start_mf.to_i
|
33
|
+
@persistence.del("mass_failed_client:#{event.client}")
|
34
|
+
@persistence.zadd("mass_failure_events_client:#{event.client}", duration, start_mf)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
result = false
|
39
|
+
@log.debug("Filter: DetectMassClientFailures: #{result ? "block" : "pass"}")
|
40
|
+
result
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/flapjack/filters/ok.rb
CHANGED
@@ -1,12 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'flapjack/filters/base'
|
4
|
+
|
1
5
|
module Flapjack
|
2
6
|
module Filters
|
7
|
+
|
8
|
+
# * If the service event’s state is ok and the previous state was ok, don’t alert
|
9
|
+
# * If the service event's state is ok and there is unscheduled downtime set, end the unscheduled
|
10
|
+
# downtime
|
3
11
|
class Ok
|
4
|
-
|
5
|
-
|
6
|
-
|
12
|
+
include Base
|
13
|
+
|
14
|
+
def block?(event)
|
15
|
+
result = false
|
16
|
+
|
17
|
+
if event.ok?
|
18
|
+
if event.previous_state == 'ok'
|
19
|
+
@log.debug("Filter: Ok: existing state was ok, and the previous state was ok, so blocking")
|
20
|
+
result = true
|
21
|
+
end
|
22
|
+
|
23
|
+
# end any unscheduled downtime
|
24
|
+
entity_check = Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @persistence)
|
25
|
+
entity_check.end_unscheduled_maintenance
|
26
|
+
end
|
7
27
|
|
8
|
-
|
9
|
-
|
28
|
+
@log.debug("Filter: Ok: #{result ? "block" : "pass"}")
|
29
|
+
result
|
10
30
|
end
|
11
31
|
end
|
12
32
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'flapjack/filters/base'
|
4
|
+
|
5
|
+
module Flapjack
|
6
|
+
module Filters
|
7
|
+
class ScheduledMaintenance
|
8
|
+
include Base
|
9
|
+
|
10
|
+
def block?(event)
|
11
|
+
result = @persistence.exists("#{event.id}:scheduled_maintenance")
|
12
|
+
@log.debug("Filter: Scheduled Maintenance: #{result ? "block" : "pass"}")
|
13
|
+
result
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'flapjack/filters/base'
|
4
|
+
|
5
|
+
module Flapjack
|
6
|
+
module Filters
|
7
|
+
class UnscheduledMaintenance
|
8
|
+
include Base
|
9
|
+
|
10
|
+
def block?(event)
|
11
|
+
result = @persistence.exists("#{event.id}:unscheduled_maintenance")
|
12
|
+
@log.debug("Filter: Unscheduled Maintenance: #{result ? "block" : "pass"}")
|
13
|
+
result
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,294 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'socket'
|
4
|
+
|
5
|
+
require 'eventmachine'
|
6
|
+
# the redis/synchrony gems need to be required in this particular order, see
|
7
|
+
# the redis-rb README for details
|
8
|
+
require 'hiredis'
|
9
|
+
require 'em-synchrony'
|
10
|
+
require 'redis/connection/synchrony'
|
11
|
+
require 'redis'
|
12
|
+
|
13
|
+
require 'chronic_duration'
|
14
|
+
|
15
|
+
require 'blather/client/client'
|
16
|
+
require 'em-synchrony/fiber_iterator'
|
17
|
+
require 'yajl/json_gem'
|
18
|
+
|
19
|
+
require 'flapjack/data/entity_check'
|
20
|
+
require 'flapjack/pikelet'
|
21
|
+
require 'flapjack/utility'
|
22
|
+
|
23
|
+
module Flapjack
|
24
|
+
|
25
|
+
class Jabber < Blather::Client
|
26
|
+
|
27
|
+
include Flapjack::Pikelet
|
28
|
+
include Flapjack::Utility
|
29
|
+
|
30
|
+
log = Logger.new(STDOUT)
|
31
|
+
# log.level = Logger::DEBUG
|
32
|
+
log.level = Logger::INFO
|
33
|
+
Blather.logger = log
|
34
|
+
|
35
|
+
def setup
|
36
|
+
@redis = build_redis_connection_pool
|
37
|
+
@hostname = Socket.gethostname
|
38
|
+
@flapjack_jid = Blather::JID.new((@config['jabberid'] || 'flapjack') + '/' + @hostname)
|
39
|
+
|
40
|
+
super(@flapjack_jid, @config['password'], @config['server'], @config['port'].to_i)
|
41
|
+
|
42
|
+
logger.debug("Building jabber connection with jabberid: " +
|
43
|
+
@flapjack_jid.to_s + ", port: " + @config['port'].to_s +
|
44
|
+
", server: " + @config['server'].to_s + ", password: " +
|
45
|
+
@config['password'].to_s)
|
46
|
+
|
47
|
+
register_handler :ready do |stanza|
|
48
|
+
EM.next_tick do
|
49
|
+
EM.synchrony do
|
50
|
+
on_ready(stanza)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
register_handler :message, :groupchat?, :body => /^flapjack:\s+/ do |stanza|
|
56
|
+
EM.next_tick do
|
57
|
+
EM.synchrony do
|
58
|
+
on_groupchat(stanza)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
register_handler :message, :chat? do |stanza|
|
64
|
+
EM.next_tick do
|
65
|
+
EM.synchrony do
|
66
|
+
on_chat(stanza)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
register_handler :disconnected do |stanza|
|
72
|
+
ret = true
|
73
|
+
EM.next_tick do
|
74
|
+
EM.synchrony do
|
75
|
+
ret = on_disconnect(stanza)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
ret
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Join the MUC Chat room after connecting.
|
83
|
+
def on_ready(stanza)
|
84
|
+
return if should_quit?
|
85
|
+
@redis_handler ||= build_redis_connection_pool
|
86
|
+
@connected_at = Time.now.to_i
|
87
|
+
logger.info("Jabber Connected")
|
88
|
+
@config['rooms'].each do |room|
|
89
|
+
logger.info("Joining room #{room}")
|
90
|
+
presence = Blather::Stanza::Presence.new
|
91
|
+
presence.from = @flapjack_jid
|
92
|
+
presence.to = Blather::JID.new("#{room}/#{@config['alias']}")
|
93
|
+
presence << "<x xmlns='http://jabber.org/protocol/muc'/>"
|
94
|
+
write presence
|
95
|
+
say(room, "flapjack jabber gateway started at #{Time.now}, hello!", :groupchat)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def interpreter(command)
|
100
|
+
|
101
|
+
msg = nil
|
102
|
+
action = nil
|
103
|
+
entity_check = nil
|
104
|
+
case
|
105
|
+
when command =~ /^ACKID\s+(\d+)(?:\s*(.*?)(?:\s*duration.*?(\d+.*\w+.*))?)$/i;
|
106
|
+
ackid = $1
|
107
|
+
comment = $2
|
108
|
+
duration_str = $3
|
109
|
+
|
110
|
+
error = nil
|
111
|
+
dur = nil
|
112
|
+
|
113
|
+
if comment.nil? || (comment.length == 0)
|
114
|
+
error = "please provide a comment, eg \"flapjack: ACKID #{$1} AL looking\""
|
115
|
+
elsif duration_str
|
116
|
+
# a fairly liberal match above, we'll let chronic_duration do the heavy lifting
|
117
|
+
dur = ChronicDuration.parse(duration_str)
|
118
|
+
end
|
119
|
+
|
120
|
+
four_hours = 4 * 60 * 60
|
121
|
+
duration = (dur.nil? || (dur <= 0) || (dur > four_hours)) ? four_hours : dur
|
122
|
+
|
123
|
+
event_id = @redis_handler.hget('unacknowledged_failures', ackid)
|
124
|
+
|
125
|
+
if event_id.nil?
|
126
|
+
error = "not found"
|
127
|
+
else
|
128
|
+
entity_check = Flapjack::Data::EntityCheck.for_event_id(event_id, :redis => @redis_handler)
|
129
|
+
error = "unknown entity" if entity_check.nil?
|
130
|
+
end
|
131
|
+
|
132
|
+
if error
|
133
|
+
msg = "ERROR - couldn't ACK #{ackid} - #{error}"
|
134
|
+
else
|
135
|
+
msg = "ACKing #{entity_check.check} on #{entity_check.entity_name} (#{ackid})"
|
136
|
+
action = Proc.new {
|
137
|
+
entity_check.create_acknowledgement('summary' => (comment || ''),
|
138
|
+
'acknowledgement_id' => ackid, 'duration' => duration)
|
139
|
+
}
|
140
|
+
end
|
141
|
+
|
142
|
+
when command =~ /^help$/
|
143
|
+
msg = "commands: \n"
|
144
|
+
msg += " ACKID <id> <comment> [duration: <time spec>] \n"
|
145
|
+
msg += " identify \n"
|
146
|
+
msg += " help \n"
|
147
|
+
|
148
|
+
when command =~ /^identify$/
|
149
|
+
t = Process.times
|
150
|
+
|
151
|
+
msg = "Flapjack process #{Process.pid} on #{`hostname -f`.chomp} \n"
|
152
|
+
msg += "User CPU Time: #{t.utime}\n"
|
153
|
+
msg += "System CPU Time: #{t.stime}\n"
|
154
|
+
msg += `uname -a`.chomp + "\n"
|
155
|
+
|
156
|
+
when command =~ /^(.*)/
|
157
|
+
words = $1
|
158
|
+
msg = "what do you mean, '#{words}'? Type 'help' for a list of acceptable commands."
|
159
|
+
|
160
|
+
end
|
161
|
+
|
162
|
+
{:msg => msg, :action => action}
|
163
|
+
end
|
164
|
+
|
165
|
+
def on_groupchat(stanza)
|
166
|
+
return if should_quit?
|
167
|
+
logger.debug("groupchat message received: #{stanza.inspect}")
|
168
|
+
|
169
|
+
if stanza.body =~ /^flapjack:\s+(.*)/
|
170
|
+
command = $1
|
171
|
+
end
|
172
|
+
|
173
|
+
results = interpreter(command)
|
174
|
+
msg = results[:msg]
|
175
|
+
action = results[:action]
|
176
|
+
|
177
|
+
if msg || action
|
178
|
+
say(stanza.from.stripped, msg, :groupchat)
|
179
|
+
logger.debug("Sent to group chat: #{msg}")
|
180
|
+
action.call if action
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def on_chat(stanza)
|
185
|
+
return if should_quit?
|
186
|
+
logger.debug("chat message received: #{stanza.inspect}")
|
187
|
+
|
188
|
+
results = interpreter(stanza.body)
|
189
|
+
msg = results[:msg]
|
190
|
+
action = results[:action]
|
191
|
+
|
192
|
+
if msg || action
|
193
|
+
say(stanza.from.stripped, msg, :chat)
|
194
|
+
logger.debug("Sent to #{stanza.from.stripped}: #{msg}")
|
195
|
+
action.call if action
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
# returning true to prevent the reactor loop from stopping
|
200
|
+
def on_disconnect(stanza)
|
201
|
+
return true if should_quit?
|
202
|
+
logger.warn("jabbers disconnected! reconnecting in 1 second ...")
|
203
|
+
EventMachine::Timer.new(1) do
|
204
|
+
connect # Blather::Client.connect
|
205
|
+
end
|
206
|
+
true
|
207
|
+
end
|
208
|
+
|
209
|
+
def say(to, msg, using = :chat)
|
210
|
+
@logger.debug("Sending a jabber message to: #{to.to_s}, using: #{using.to_s}, message: #{msg}")
|
211
|
+
write Blather::Stanza::Message.new(to, msg, using)
|
212
|
+
end
|
213
|
+
|
214
|
+
def add_shutdown_event(opts = {})
|
215
|
+
return unless redis = opts[:redis]
|
216
|
+
redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
|
217
|
+
end
|
218
|
+
|
219
|
+
def main
|
220
|
+
logger.debug("New Jabber pikelet with the following options: #{@config.inspect}")
|
221
|
+
|
222
|
+
count_timer = EM::Synchrony.add_periodic_timer(30) do
|
223
|
+
logger.debug("connection count: #{EM.connection_count} #{Time.now.to_s}.#{Time.now.usec.to_s}")
|
224
|
+
end
|
225
|
+
|
226
|
+
keepalive_timer = EM::Synchrony.add_periodic_timer(60) do
|
227
|
+
logger.debug("calling keepalive on the jabber connection")
|
228
|
+
write(' ') if connected?
|
229
|
+
end
|
230
|
+
|
231
|
+
setup
|
232
|
+
connect # Blather::Client.connect
|
233
|
+
|
234
|
+
# simplified to use a single queue only as it makes the shutdown logic easier
|
235
|
+
queue = @config['queue']
|
236
|
+
events = {}
|
237
|
+
|
238
|
+
until should_quit?
|
239
|
+
|
240
|
+
# FIXME: should also check if presence has been established in any group chat rooms that are
|
241
|
+
# configured before starting to process events, otherwise the first few may get lost (send
|
242
|
+
# before joining the group chat rooms)
|
243
|
+
if connected?
|
244
|
+
logger.debug("jabber is connected so commencing blpop on #{queue}")
|
245
|
+
events[queue] = @redis.blpop(queue)
|
246
|
+
event = Yajl::Parser.parse(events[queue][1])
|
247
|
+
type = event['notification_type']
|
248
|
+
logger.debug('jabber notification event received')
|
249
|
+
logger.debug(event.inspect)
|
250
|
+
if 'shutdown'.eql?(type)
|
251
|
+
EM.next_tick do
|
252
|
+
# get delays without the next_tick
|
253
|
+
close # Blather::Client.close
|
254
|
+
end
|
255
|
+
# FIXME: should we also set something so should_quit? returns true
|
256
|
+
# to prevent retrieving more notifications from the queue while closing?
|
257
|
+
# or does close only return once the connection is really and truely closed?
|
258
|
+
else
|
259
|
+
entity, check = event['event_id'].split(':')
|
260
|
+
state = event['state']
|
261
|
+
summary = event['summary']
|
262
|
+
duration = event['duration'] ? time_period_in_words(event['duration']) : '4 hours'
|
263
|
+
address = event['address']
|
264
|
+
|
265
|
+
logger.debug("processing jabber notification address: #{address}, event: #{entity}:#{check}, state: #{state}, summary: #{summary}")
|
266
|
+
|
267
|
+
ack_str = event['event_count'] && !state.eql?('ok') && !'acknowledgement'.eql?(type) ?
|
268
|
+
"::: flapjack: ACKID #{event['event_count']} " : ''
|
269
|
+
|
270
|
+
maint_str = (type && 'acknowledgement'.eql?(type)) ?
|
271
|
+
"has been acknowledged, unscheduled maintenance created for #{duration}" :
|
272
|
+
"is #{state.upcase}"
|
273
|
+
|
274
|
+
msg = "#{type.upcase} #{ack_str}::: \"#{check}\" on #{entity} #{maint_str} ::: #{summary}"
|
275
|
+
|
276
|
+
chat_type = :chat
|
277
|
+
chat_type = :groupchat if @config['rooms'].include?(address)
|
278
|
+
EM.next_tick do
|
279
|
+
say(Blather::JID.new(address), msg, chat_type)
|
280
|
+
end
|
281
|
+
end
|
282
|
+
else
|
283
|
+
logger.debug("not connected, sleep 1 before retry")
|
284
|
+
EM::Synchrony.sleep(1)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
count_timer.cancel
|
289
|
+
keepalive_timer.cancel
|
290
|
+
end
|
291
|
+
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|