flapjack 0.5.5 → 0.6.23
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +10 -0
- data/.rbenv-version +1 -0
- data/.rspec +10 -0
- data/Gemfile +18 -0
- data/Guardfile +14 -0
- data/README.md +152 -173
- data/Rakefile +53 -150
- data/bin/flapjack +72 -0
- data/bin/flapjack-nagios-receiver +111 -0
- data/bin/flapjack-nagios-receiver-control +15 -0
- data/bin/flapjack-netsaint-parser +0 -2
- data/bin/flapjack-populator +133 -16
- data/bin/install-flapjack-systemwide +2 -2
- data/config.ru +11 -0
- data/dist/etc/init.d/flapjack +46 -0
- data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
- data/doc/GLOSSARY.md +19 -0
- data/etc/flapjack_config.yaml.example +90 -0
- data/features/events.feature +132 -0
- data/features/notifications.feature +57 -0
- data/features/packaging-lintian.feature +5 -3
- data/features/steps/events_steps.rb +164 -0
- data/features/steps/flapjack-importer_steps.rb +2 -5
- data/features/steps/flapjack-worker_steps.rb +13 -6
- data/features/steps/notifications_steps.rb +178 -0
- data/features/steps/packaging-lintian_steps.rb +14 -0
- data/features/steps/time_travel_steps.rb +34 -0
- data/features/support/env.rb +63 -36
- data/flapjack.gemspec +35 -186
- data/lib/flapjack.rb +2 -0
- data/lib/flapjack/api.rb +274 -0
- data/lib/flapjack/api/entity_check_presenter.rb +184 -0
- data/lib/flapjack/api/entity_presenter.rb +66 -0
- data/lib/flapjack/cli/worker_manager.rb +1 -2
- data/lib/flapjack/configuration.rb +11 -0
- data/lib/flapjack/coordinator.rb +288 -0
- data/lib/flapjack/daemonizing.rb +186 -0
- data/lib/flapjack/data/contact.rb +45 -0
- data/lib/flapjack/data/entity.rb +89 -0
- data/lib/flapjack/data/entity_check.rb +396 -0
- data/lib/flapjack/data/event.rb +144 -0
- data/lib/flapjack/data/notification.rb +13 -0
- data/lib/flapjack/executive.rb +289 -0
- data/lib/flapjack/filters/acknowledgement.rb +39 -0
- data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
- data/lib/flapjack/filters/delays.rb +53 -0
- data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
- data/lib/flapjack/filters/ok.rb +25 -5
- data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
- data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
- data/lib/flapjack/jabber.rb +294 -0
- data/lib/flapjack/notification/common.rb +23 -0
- data/lib/flapjack/notification/email.rb +107 -0
- data/lib/flapjack/notification/email/alert.html.haml +48 -0
- data/lib/flapjack/notification/email/alert.text.erb +14 -0
- data/lib/flapjack/notification/sms.rb +42 -0
- data/lib/flapjack/notification/sms/messagenet.rb +49 -0
- data/lib/flapjack/notifier_engine.rb +4 -4
- data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
- data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
- data/lib/flapjack/pagerduty.rb +230 -0
- data/lib/flapjack/patches.rb +108 -19
- data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
- data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
- data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
- data/lib/flapjack/pikelet.rb +56 -0
- data/lib/flapjack/transports/beanstalkd.rb +1 -1
- data/lib/flapjack/transports/result.rb +6 -6
- data/lib/flapjack/utility.rb +46 -0
- data/lib/flapjack/version.rb +5 -0
- data/lib/flapjack/web.rb +198 -0
- data/lib/flapjack/web/views/acknowledge.haml +55 -0
- data/lib/flapjack/web/views/check.haml +162 -0
- data/lib/flapjack/web/views/index.haml +92 -0
- data/lib/flapjack/web/views/self_stats.haml +56 -0
- data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
- data/lib/flapjack/worker/cli.rb +49 -0
- data/{spec → spec.old}/check_sandbox/echo +0 -0
- data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
- data/{spec → spec.old}/configs/recipients.ini +0 -0
- data/{spec → spec.old}/helpers.rb +0 -0
- data/{spec → spec.old}/inifile_spec.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
- data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
- data/{spec → spec.old}/notifier_application_spec.rb +0 -0
- data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_spec.rb +0 -0
- data/{spec → spec.old}/notifier_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
- data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
- data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
- data/{spec → spec.old}/simple.ini +0 -0
- data/{spec → spec.old}/spec.opts +0 -0
- data/{spec → spec.old}/test-filters/blocker.rb +0 -0
- data/{spec → spec.old}/test-filters/mock.rb +0 -0
- data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
- data/{spec → spec.old}/transports/mock_transport.rb +0 -0
- data/{spec → spec.old}/worker_application_spec.rb +0 -0
- data/{spec → spec.old}/worker_options_spec.rb +0 -0
- data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
- data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
- data/spec/lib/flapjack/api_spec.rb +170 -0
- data/spec/lib/flapjack/coordinator_spec.rb +16 -0
- data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
- data/spec/lib/flapjack/data/entity_spec.rb +71 -0
- data/spec/lib/flapjack/data/event_spec.rb +6 -0
- data/spec/lib/flapjack/executive_spec.rb +59 -0
- data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
- data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
- data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
- data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
- data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/jabber_spec.rb +150 -0
- data/spec/lib/flapjack/notification/email_spec.rb +6 -0
- data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
- data/spec/lib/flapjack/pikelet_spec.rb +28 -0
- data/spec/lib/flapjack/web_spec.rb +188 -0
- data/spec/spec_helper.rb +44 -0
- data/spec/support/profile_all_formatter.rb +44 -0
- data/spec/support/uncolored_doc_formatter.rb +9 -0
- data/tasks/events.rake +85 -0
- data/tmp/acknowledge.rb +14 -0
- data/tmp/create_config_yaml.rb +16 -0
- data/tmp/create_events_failure.rb +33 -0
- data/tmp/create_events_ok.rb +33 -0
- data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
- data/tmp/create_events_ok_failure.rb +40 -0
- data/tmp/create_events_ok_failure_ack.rb +54 -0
- data/tmp/dummy_entities.json +1 -0
- data/tmp/generate_nagios_test_hosts.rb +16 -0
- data/tmp/parse_config_yaml.rb +7 -0
- data/tmp/redis_delete_all_keys.rb +11 -0
- data/tmp/test_entities.json +1 -0
- metadata +482 -221
- data/TODO.md +0 -36
- data/VERSION +0 -1
- data/bin/flapjack-benchmark +0 -50
- data/bin/flapjack-notifier +0 -21
- data/bin/flapjack-notifier-manager +0 -43
- data/bin/flapjack-stats +0 -27
- data/bin/flapjack-worker +0 -13
- data/bin/flapjack-worker-manager +0 -35
- data/dist/etc/init.d/flapjack-notifier +0 -47
- data/dist/etc/init.d/flapjack-workers +0 -44
- data/features/flapjack-notifier-manager.feature +0 -19
- data/features/flapjack-worker-manager.feature +0 -27
- data/features/flapjack-worker.feature +0 -27
- data/features/netsaint-config-converter.feature +0 -126
- data/features/persistence/couch.feature +0 -105
- data/features/persistence/sqlite3.feature +0 -105
- data/features/persistence/steps/couch_steps.rb +0 -25
- data/features/persistence/steps/generic_steps.rb +0 -102
- data/features/persistence/steps/sqlite3_steps.rb +0 -13
- data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
- data/features/steps/flapjack-worker-manager_steps.rb +0 -48
- data/lib/flapjack/applications/notifier.rb +0 -222
- data/lib/flapjack/cli/notifier.rb +0 -108
- data/lib/flapjack/cli/notifier_manager.rb +0 -86
- data/lib/flapjack/cli/worker.rb +0 -51
@@ -0,0 +1,144 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
module Flapjack
|
4
|
+
module Data
|
5
|
+
class Event
|
6
|
+
|
7
|
+
attr_accessor :previous_state
|
8
|
+
|
9
|
+
# Helper method for getting the next event.
|
10
|
+
#
|
11
|
+
# Has a blocking and non-blocking method signature.
|
12
|
+
#
|
13
|
+
# Calling next with :block => true, we wait indefinitely for events coming
|
14
|
+
# from other systems. This is the default behaviour.
|
15
|
+
#
|
16
|
+
# Calling next with :block => false, will return a nil if there are no
|
17
|
+
# events on the queue.
|
18
|
+
#
|
19
|
+
def self.next(opts={})
|
20
|
+
defaults = { :block => true }
|
21
|
+
options = defaults.merge(opts)
|
22
|
+
block = options[:block]
|
23
|
+
|
24
|
+
# In production, we wait indefinitely for events coming from other systems.
|
25
|
+
if block
|
26
|
+
raw = opts[:persistence].blpop('events').last
|
27
|
+
event = ::JSON.parse(raw)
|
28
|
+
self.new(event)
|
29
|
+
else
|
30
|
+
# In testing, we take care that there are no events on the queue.
|
31
|
+
raw = opts[:persistence].lpop('events')
|
32
|
+
result = nil
|
33
|
+
|
34
|
+
if raw
|
35
|
+
event = ::JSON.parse(raw)
|
36
|
+
result = self.new(event)
|
37
|
+
end
|
38
|
+
|
39
|
+
result
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Provide a count of the number of events on the queue to be processed.
|
44
|
+
def self.pending_count(opts = {})
|
45
|
+
opts[:persistence].llen('events')
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize(attrs={})
|
49
|
+
@attrs = attrs
|
50
|
+
@attrs['time'] = Time.now.to_i unless @attrs.has_key?('time')
|
51
|
+
end
|
52
|
+
|
53
|
+
def state
|
54
|
+
return unless @attrs['state']
|
55
|
+
@attrs['state'].downcase
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity
|
59
|
+
return unless @attrs['entity']
|
60
|
+
@attrs['entity'].downcase
|
61
|
+
end
|
62
|
+
|
63
|
+
def check
|
64
|
+
@attrs['check']
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
# FIXME some values are only set for certain event types --
|
69
|
+
# this may not be the best way to do this
|
70
|
+
def acknowledgement_id
|
71
|
+
@attrs['acknowledgement_id']
|
72
|
+
end
|
73
|
+
|
74
|
+
def duration
|
75
|
+
return unless @attrs['duration']
|
76
|
+
@attrs['duration'].to_i
|
77
|
+
end
|
78
|
+
# end FIXME
|
79
|
+
|
80
|
+
|
81
|
+
def id
|
82
|
+
(entity || '-') + ':' + (check || '-')
|
83
|
+
end
|
84
|
+
|
85
|
+
# FIXME: site specific
|
86
|
+
def client
|
87
|
+
return unless entity
|
88
|
+
entity.split('-').first
|
89
|
+
end
|
90
|
+
|
91
|
+
def type
|
92
|
+
return unless @attrs['type']
|
93
|
+
@attrs['type'].downcase
|
94
|
+
end
|
95
|
+
|
96
|
+
def summary
|
97
|
+
@attrs['summary']
|
98
|
+
end
|
99
|
+
|
100
|
+
def time
|
101
|
+
return unless @attrs['time']
|
102
|
+
@attrs['time'].to_i
|
103
|
+
end
|
104
|
+
|
105
|
+
def action?
|
106
|
+
type == 'action'
|
107
|
+
end
|
108
|
+
|
109
|
+
def service?
|
110
|
+
type == 'service'
|
111
|
+
end
|
112
|
+
|
113
|
+
def acknowledgement?
|
114
|
+
action? and state == 'acknowledgement'
|
115
|
+
end
|
116
|
+
|
117
|
+
def ok?
|
118
|
+
(state == 'ok') or (state == 'up')
|
119
|
+
end
|
120
|
+
|
121
|
+
def unknown?
|
122
|
+
state == 'unknown'
|
123
|
+
end
|
124
|
+
|
125
|
+
def unreachable?
|
126
|
+
state == 'unreachable'
|
127
|
+
end
|
128
|
+
|
129
|
+
def warning?
|
130
|
+
state == 'warning'
|
131
|
+
end
|
132
|
+
|
133
|
+
def critical?
|
134
|
+
state == 'critical'
|
135
|
+
end
|
136
|
+
|
137
|
+
def failure?
|
138
|
+
warning? or critical?
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
@@ -0,0 +1,289 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'log4r'
|
4
|
+
require 'log4r/outputter/fileoutputter'
|
5
|
+
|
6
|
+
require 'flapjack'
|
7
|
+
require 'flapjack/filters/acknowledgement'
|
8
|
+
require 'flapjack/filters/ok'
|
9
|
+
require 'flapjack/filters/scheduled_maintenance'
|
10
|
+
require 'flapjack/filters/unscheduled_maintenance'
|
11
|
+
require 'flapjack/filters/detect_mass_client_failures'
|
12
|
+
require 'flapjack/filters/delays'
|
13
|
+
require 'flapjack/data/contact'
|
14
|
+
require 'flapjack/data/entity_check'
|
15
|
+
require 'flapjack/data/event'
|
16
|
+
require 'flapjack/notification/common'
|
17
|
+
require 'flapjack/notification/sms'
|
18
|
+
require 'flapjack/notification/email'
|
19
|
+
require 'flapjack/pikelet'
|
20
|
+
|
21
|
+
module Flapjack
|
22
|
+
|
23
|
+
class Executive
|
24
|
+
include Flapjack::Pikelet
|
25
|
+
|
26
|
+
def setup
|
27
|
+
@redis = build_redis_connection_pool
|
28
|
+
redis_client_status = @redis.client
|
29
|
+
@logger.debug("Flapjack::Executive.initialize: @redis client status: " + redis_client_status.inspect)
|
30
|
+
|
31
|
+
@queues = {:email => @config['email_queue'],
|
32
|
+
:sms => @config['sms_queue'],
|
33
|
+
:jabber => @config['jabber_queue'],
|
34
|
+
:pagerduty => @config['pagerduty_queue']}
|
35
|
+
|
36
|
+
notifylog = @config['notification_log_file'] || 'log/notify.log'
|
37
|
+
@notifylog = Log4r::Logger.new("executive")
|
38
|
+
@notifylog.add(Log4r::FileOutputter.new("notifylog", :filename => notifylog))
|
39
|
+
|
40
|
+
# FIXME: Put loading filters into separate method
|
41
|
+
options = { :log => @logger, :persistence => @redis }
|
42
|
+
@filters = []
|
43
|
+
@filters << Flapjack::Filters::Ok.new(options)
|
44
|
+
@filters << Flapjack::Filters::ScheduledMaintenance.new(options)
|
45
|
+
@filters << Flapjack::Filters::UnscheduledMaintenance.new(options)
|
46
|
+
@filters << Flapjack::Filters::DetectMassClientFailures.new(options)
|
47
|
+
@filters << Flapjack::Filters::Delays.new(options)
|
48
|
+
@filters << Flapjack::Filters::Acknowledgement.new(options)
|
49
|
+
|
50
|
+
@boot_time = Time.now
|
51
|
+
|
52
|
+
# FIXME: all of the below keys assume there is only ever one executive running;
|
53
|
+
# we could generate a fuid and save it to disk, and prepend it from that
|
54
|
+
# point on...
|
55
|
+
|
56
|
+
# TODO unset on exit?
|
57
|
+
@redis.set('boot_time', @boot_time.to_i)
|
58
|
+
|
59
|
+
# FIXME: add an administrative function to reset all event counters
|
60
|
+
if @redis.hget('event_counters', 'all').nil?
|
61
|
+
@redis.hset('event_counters', 'all', 0)
|
62
|
+
@redis.hset('event_counters', 'ok', 0)
|
63
|
+
@redis.hset('event_counters', 'failure', 0)
|
64
|
+
@redis.hset('event_counters', 'action', 0)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def main
|
69
|
+
setup
|
70
|
+
|
71
|
+
@logger.info("Booting main loop.")
|
72
|
+
|
73
|
+
until should_quit?
|
74
|
+
@logger.info("Waiting for event...")
|
75
|
+
event = Flapjack::Data::Event.next(:persistence => @redis)
|
76
|
+
process_event(event) unless event.nil?
|
77
|
+
end
|
78
|
+
@logger.info("Exiting main loop.")
|
79
|
+
end
|
80
|
+
|
81
|
+
# this must use a separate connection to the main Executive one, as it's running
|
82
|
+
# from a different fiber while the main one is blocking.
|
83
|
+
def add_shutdown_event(opts = {})
|
84
|
+
return unless redis = opts[:redis]
|
85
|
+
redis.rpush('events', JSON.generate('type' => 'shutdown',
|
86
|
+
'host' => '',
|
87
|
+
'service' => '',
|
88
|
+
'state' => ''))
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def process_event(event)
|
94
|
+
@logger.debug("#{Flapjack::Data::Event.pending_count(:persistence => @redis)} events waiting on the queue")
|
95
|
+
@logger.debug("Raw event received: #{event.inspect}")
|
96
|
+
time_at = event.time
|
97
|
+
time_at_str = time_at ? ", #{Time.at(time_at).to_s}" : ''
|
98
|
+
@logger.info("Processing Event: #{event.id}, #{event.type}, #{event.state}, #{event.summary}#{time_at_str}")
|
99
|
+
|
100
|
+
entity_check = (event.type == 'shutdown') ? nil :
|
101
|
+
Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @redis)
|
102
|
+
|
103
|
+
result = update_keys(event, entity_check)
|
104
|
+
return if result[:shutdown]
|
105
|
+
skip_filters = result[:skip_filters]
|
106
|
+
|
107
|
+
blocker = @filters.find {|filter| filter.block?(event) } unless skip_filters
|
108
|
+
|
109
|
+
if skip_filters
|
110
|
+
@logger.info("#{Time.now}: Not sending notifications for event #{event.id} because filtering was skipped")
|
111
|
+
return
|
112
|
+
end
|
113
|
+
|
114
|
+
if blocker
|
115
|
+
blocker_names = [ blocker.name ]
|
116
|
+
@logger.info("#{Time.now}: Not sending notifications for event #{event.id} because these filters blocked: #{blocker_names.join(', ')}")
|
117
|
+
return
|
118
|
+
end
|
119
|
+
|
120
|
+
@logger.info("#{Time.now}: Sending notifications for event #{event.id}")
|
121
|
+
generate_notification(event, entity_check)
|
122
|
+
end
|
123
|
+
|
124
|
+
def update_keys(event, entity_check)
|
125
|
+
result = { :skip_filters => false }
|
126
|
+
timestamp = Time.now.to_i
|
127
|
+
@event_count = @redis.hincrby('event_counters', 'all', 1)
|
128
|
+
|
129
|
+
# FIXME skip if entity_check.nil?
|
130
|
+
|
131
|
+
# FIXME: validate that the event is sane before we ever get here
|
132
|
+
# FIXME: create an event if there is dodgy data
|
133
|
+
|
134
|
+
case event.type
|
135
|
+
# Service events represent changes in state on monitored systems
|
136
|
+
when 'service'
|
137
|
+
# Track when we last saw an event for a particular entity:check pair
|
138
|
+
entity_check.last_update = timestamp
|
139
|
+
|
140
|
+
if event.ok?
|
141
|
+
@redis.hincrby('event_counters', 'ok', 1)
|
142
|
+
elsif event.failure?
|
143
|
+
@redis.hincrby('event_counters', 'failure', 1)
|
144
|
+
@redis.hset('unacknowledged_failures', @event_count, event.id)
|
145
|
+
end
|
146
|
+
|
147
|
+
event.previous_state = entity_check.state
|
148
|
+
@logger.info("No previous state for event #{event.id}") if event.previous_state.nil?
|
149
|
+
|
150
|
+
# If there is a state change, update record with: the time, the new state
|
151
|
+
if event.state != event.previous_state
|
152
|
+
entity_check.update_state(event.state, :timestamp => timestamp,
|
153
|
+
:summary => event.summary, :client => event.client,
|
154
|
+
:count => @event_count)
|
155
|
+
end
|
156
|
+
|
157
|
+
# No state change, and event is ok, so no need to run through filters
|
158
|
+
# OR
|
159
|
+
# If the service event's state is ok and there was no previous state, don't alert.
|
160
|
+
# This stops new checks from alerting as "recovery" after they have been added.
|
161
|
+
if !event.previous_state && event.ok?
|
162
|
+
@logger.debug("setting skip_filters to true because there was no previous state and event is ok")
|
163
|
+
result[:skip_filters] = true
|
164
|
+
end
|
165
|
+
|
166
|
+
entity_check.update_scheduled_maintenance
|
167
|
+
|
168
|
+
# Action events represent human or automated interaction with Flapjack
|
169
|
+
when 'action'
|
170
|
+
# When an action event is processed, store the event.
|
171
|
+
@redis.hset(event.id + ':actions', timestamp, event.state)
|
172
|
+
@redis.hincrby('event_counters', 'action', 1) if event.ok?
|
173
|
+
|
174
|
+
if event.acknowledgement? && event.acknowledgement_id
|
175
|
+
@redis.hdel('unacknowledged_failures', event.acknowledgement_id)
|
176
|
+
end
|
177
|
+
when 'shutdown'
|
178
|
+
# should this be logged as an action instead? being minimally invasive for now
|
179
|
+
result[:shutdown] = true
|
180
|
+
end
|
181
|
+
|
182
|
+
result
|
183
|
+
end
|
184
|
+
|
185
|
+
# takes an event for which a notification needs to be generated, works out the type of
|
186
|
+
# notification, updates the notification history in redis, calls other methods to work out who
|
187
|
+
# to notify, by what method, and finally to have the notifications sent
|
188
|
+
def generate_notification(event, entity_check)
|
189
|
+
timestamp = Time.now.to_i
|
190
|
+
notification_type = 'unknown'
|
191
|
+
case event.type
|
192
|
+
when 'service'
|
193
|
+
case event.state
|
194
|
+
when 'ok', 'unknown'
|
195
|
+
notification_type = 'recovery'
|
196
|
+
when 'warning', 'critical'
|
197
|
+
notification_type = 'problem'
|
198
|
+
end
|
199
|
+
when 'action'
|
200
|
+
case event.state
|
201
|
+
when 'acknowledgement'
|
202
|
+
notification_type = 'acknowledgement'
|
203
|
+
end
|
204
|
+
end
|
205
|
+
@redis.set("#{event.id}:last_#{notification_type}_notification", timestamp)
|
206
|
+
@redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp)
|
207
|
+
@logger.debug("Notification of type #{notification_type} is being generated for #{event.id}.")
|
208
|
+
|
209
|
+
send_notifications(event, notification_type,
|
210
|
+
Flapjack::Data::Contact.find_all_for_entity_check(entity_check, :redis => @redis))
|
211
|
+
end
|
212
|
+
|
213
|
+
# takes an event, a notification type, and an array of contacts and creates jobs in resque
|
214
|
+
# (eventually) for each notification
|
215
|
+
def send_notifications(event, notification_type, contacts)
|
216
|
+
notification = { 'event_id' => event.id,
|
217
|
+
'state' => event.state,
|
218
|
+
'summary' => event.summary,
|
219
|
+
'time' => event.time,
|
220
|
+
'notification_type' => notification_type }
|
221
|
+
|
222
|
+
contacts.each {|contact_id|
|
223
|
+
media = media_for_contact(contact_id)
|
224
|
+
|
225
|
+
contact_deets = {'contact_id' => contact_id,
|
226
|
+
'contact_first_name' => @redis.hget("contact:#{contact_id}", 'first_name'),
|
227
|
+
'contact_last_name' => @redis.hget("contact:#{contact_id}", 'last_name'), }
|
228
|
+
|
229
|
+
notification = notification.merge(contact_deets)
|
230
|
+
|
231
|
+
media.each_pair {|media_type, address|
|
232
|
+
|
233
|
+
@notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | #{contact_id} | #{media} | #{address}")
|
234
|
+
# queue this notification
|
235
|
+
# FIXME: make a Contact class perhaps
|
236
|
+
notif = notification.dup
|
237
|
+
notif['media'] = media_type
|
238
|
+
notif['address'] = address
|
239
|
+
notif['id'] = fuid
|
240
|
+
dur = event.duration
|
241
|
+
notif['duration'] = dur if dur
|
242
|
+
@logger.debug("send_notifications: sending notification: #{notif.inspect}")
|
243
|
+
|
244
|
+
case media_type
|
245
|
+
when "sms"
|
246
|
+
if @queues[:sms]
|
247
|
+
Resque.enqueue_to(@queues[:sms], Notification::Sms, notif)
|
248
|
+
end
|
249
|
+
when "email"
|
250
|
+
if @queues[:email]
|
251
|
+
Resque.enqueue_to(@queues[:email], Notification::Email, notif)
|
252
|
+
end
|
253
|
+
when "jabber"
|
254
|
+
if @queues[:jabber]
|
255
|
+
notif['event_count'] = @event_count if @event_count
|
256
|
+
# puts a notification into the jabber queue (redis list)
|
257
|
+
@redis.rpush(@queues[:jabber], Yajl::Encoder.encode(notif))
|
258
|
+
end
|
259
|
+
when "pagerduty"
|
260
|
+
if @queues[:pagerduty]
|
261
|
+
@redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(notif))
|
262
|
+
end
|
263
|
+
end
|
264
|
+
}
|
265
|
+
if media.length == 0
|
266
|
+
@notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | #{contact_id} | NO MEDIA FOR CONTACT")
|
267
|
+
end
|
268
|
+
}
|
269
|
+
if contacts.length == 0
|
270
|
+
@notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | NO CONTACTS")
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
# takes a contact ID and returns a hash containing each of the media the contact wishes to be
|
275
|
+
# contacted by, and the associated address for each.
|
276
|
+
# eg:
|
277
|
+
# media_for_contact('123') -> { :sms => "+61401234567", :email => "gno@free.dom" }
|
278
|
+
#
|
279
|
+
def media_for_contact(contact)
|
280
|
+
@redis.hgetall("contact_media:#{contact}")
|
281
|
+
end
|
282
|
+
|
283
|
+
# generates a fairly unique identifier to use as a message id
|
284
|
+
def fuid
|
285
|
+
fuid = self.object_id.to_i.to_s + '-' + Time.now.to_i.to_s + '.' + Time.now.tv_usec.to_s
|
286
|
+
end
|
287
|
+
|
288
|
+
end
|
289
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'flapjack/filters/base'
|
4
|
+
|
5
|
+
module Flapjack
|
6
|
+
module Filters
|
7
|
+
# * If the action event’s state is an acknowledgement, and the corresponding check is in a
|
8
|
+
# failure state, then set unscheduled maintenance for 4 hours on the check
|
9
|
+
# * If the action event’s state is an acknowledgement, and the corresponding check is not in a
|
10
|
+
# failure state, then don’t alert
|
11
|
+
class Acknowledgement
|
12
|
+
include Base
|
13
|
+
|
14
|
+
def block?(event)
|
15
|
+
timestamp = Time.now.to_i
|
16
|
+
result = false
|
17
|
+
if event.type == 'action'
|
18
|
+
if event.acknowledgement? and @persistence.zscore("failed_checks", event.id)
|
19
|
+
ec = Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @persistence)
|
20
|
+
if ec.nil?
|
21
|
+
@log.error "Filter: Acknowledgement: unknown entity for event '#{event.id}'"
|
22
|
+
else
|
23
|
+
ec.create_unscheduled_maintenance(:start_time => timestamp,
|
24
|
+
:duration => (event.duration || (4 * 60 * 60)))
|
25
|
+
message = "unscheduled maintenance created for #{event.id}"
|
26
|
+
end
|
27
|
+
else
|
28
|
+
message = "no action taken"
|
29
|
+
result = true
|
30
|
+
@log.debug("Filter: Acknowledgement: blocking because event.acknowledgement? is false") unless event.acknowledgement?
|
31
|
+
@log.debug("Filter: Acknowledgement: blocking because zscore of failed_checks for #{event.id} is false") unless @persistence.zscore("failed_checks", event.id)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
@log.debug("Filter: Acknowledgement: #{result ? "block" : "pass"} (#{message})")
|
35
|
+
result
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|