flapjack 0.5.5 → 0.6.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/.rbenv-version +1 -0
- data/.rspec +10 -0
- data/Gemfile +18 -0
- data/Guardfile +14 -0
- data/README.md +152 -173
- data/Rakefile +53 -150
- data/bin/flapjack +72 -0
- data/bin/flapjack-nagios-receiver +111 -0
- data/bin/flapjack-nagios-receiver-control +15 -0
- data/bin/flapjack-netsaint-parser +0 -2
- data/bin/flapjack-populator +133 -16
- data/bin/install-flapjack-systemwide +2 -2
- data/config.ru +11 -0
- data/dist/etc/init.d/flapjack +46 -0
- data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
- data/doc/GLOSSARY.md +19 -0
- data/etc/flapjack_config.yaml.example +90 -0
- data/features/events.feature +132 -0
- data/features/notifications.feature +57 -0
- data/features/packaging-lintian.feature +5 -3
- data/features/steps/events_steps.rb +164 -0
- data/features/steps/flapjack-importer_steps.rb +2 -5
- data/features/steps/flapjack-worker_steps.rb +13 -6
- data/features/steps/notifications_steps.rb +178 -0
- data/features/steps/packaging-lintian_steps.rb +14 -0
- data/features/steps/time_travel_steps.rb +34 -0
- data/features/support/env.rb +63 -36
- data/flapjack.gemspec +35 -186
- data/lib/flapjack.rb +2 -0
- data/lib/flapjack/api.rb +274 -0
- data/lib/flapjack/api/entity_check_presenter.rb +184 -0
- data/lib/flapjack/api/entity_presenter.rb +66 -0
- data/lib/flapjack/cli/worker_manager.rb +1 -2
- data/lib/flapjack/configuration.rb +11 -0
- data/lib/flapjack/coordinator.rb +288 -0
- data/lib/flapjack/daemonizing.rb +186 -0
- data/lib/flapjack/data/contact.rb +45 -0
- data/lib/flapjack/data/entity.rb +89 -0
- data/lib/flapjack/data/entity_check.rb +396 -0
- data/lib/flapjack/data/event.rb +144 -0
- data/lib/flapjack/data/notification.rb +13 -0
- data/lib/flapjack/executive.rb +289 -0
- data/lib/flapjack/filters/acknowledgement.rb +39 -0
- data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
- data/lib/flapjack/filters/delays.rb +53 -0
- data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
- data/lib/flapjack/filters/ok.rb +25 -5
- data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
- data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
- data/lib/flapjack/jabber.rb +294 -0
- data/lib/flapjack/notification/common.rb +23 -0
- data/lib/flapjack/notification/email.rb +107 -0
- data/lib/flapjack/notification/email/alert.html.haml +48 -0
- data/lib/flapjack/notification/email/alert.text.erb +14 -0
- data/lib/flapjack/notification/sms.rb +42 -0
- data/lib/flapjack/notification/sms/messagenet.rb +49 -0
- data/lib/flapjack/notifier_engine.rb +4 -4
- data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
- data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
- data/lib/flapjack/pagerduty.rb +230 -0
- data/lib/flapjack/patches.rb +108 -19
- data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
- data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
- data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
- data/lib/flapjack/pikelet.rb +56 -0
- data/lib/flapjack/transports/beanstalkd.rb +1 -1
- data/lib/flapjack/transports/result.rb +6 -6
- data/lib/flapjack/utility.rb +46 -0
- data/lib/flapjack/version.rb +5 -0
- data/lib/flapjack/web.rb +198 -0
- data/lib/flapjack/web/views/acknowledge.haml +55 -0
- data/lib/flapjack/web/views/check.haml +162 -0
- data/lib/flapjack/web/views/index.haml +92 -0
- data/lib/flapjack/web/views/self_stats.haml +56 -0
- data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
- data/lib/flapjack/worker/cli.rb +49 -0
- data/{spec → spec.old}/check_sandbox/echo +0 -0
- data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
- data/{spec → spec.old}/configs/recipients.ini +0 -0
- data/{spec → spec.old}/helpers.rb +0 -0
- data/{spec → spec.old}/inifile_spec.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
- data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
- data/{spec → spec.old}/notifier_application_spec.rb +0 -0
- data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_spec.rb +0 -0
- data/{spec → spec.old}/notifier_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
- data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
- data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
- data/{spec → spec.old}/simple.ini +0 -0
- data/{spec → spec.old}/spec.opts +0 -0
- data/{spec → spec.old}/test-filters/blocker.rb +0 -0
- data/{spec → spec.old}/test-filters/mock.rb +0 -0
- data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
- data/{spec → spec.old}/transports/mock_transport.rb +0 -0
- data/{spec → spec.old}/worker_application_spec.rb +0 -0
- data/{spec → spec.old}/worker_options_spec.rb +0 -0
- data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
- data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
- data/spec/lib/flapjack/api_spec.rb +170 -0
- data/spec/lib/flapjack/coordinator_spec.rb +16 -0
- data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
- data/spec/lib/flapjack/data/entity_spec.rb +71 -0
- data/spec/lib/flapjack/data/event_spec.rb +6 -0
- data/spec/lib/flapjack/executive_spec.rb +59 -0
- data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
- data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
- data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
- data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
- data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/jabber_spec.rb +150 -0
- data/spec/lib/flapjack/notification/email_spec.rb +6 -0
- data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
- data/spec/lib/flapjack/pikelet_spec.rb +28 -0
- data/spec/lib/flapjack/web_spec.rb +188 -0
- data/spec/spec_helper.rb +44 -0
- data/spec/support/profile_all_formatter.rb +44 -0
- data/spec/support/uncolored_doc_formatter.rb +9 -0
- data/tasks/events.rake +85 -0
- data/tmp/acknowledge.rb +14 -0
- data/tmp/create_config_yaml.rb +16 -0
- data/tmp/create_events_failure.rb +33 -0
- data/tmp/create_events_ok.rb +33 -0
- data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
- data/tmp/create_events_ok_failure.rb +40 -0
- data/tmp/create_events_ok_failure_ack.rb +54 -0
- data/tmp/dummy_entities.json +1 -0
- data/tmp/generate_nagios_test_hosts.rb +16 -0
- data/tmp/parse_config_yaml.rb +7 -0
- data/tmp/redis_delete_all_keys.rb +11 -0
- data/tmp/test_entities.json +1 -0
- metadata +482 -221
- data/TODO.md +0 -36
- data/VERSION +0 -1
- data/bin/flapjack-benchmark +0 -50
- data/bin/flapjack-notifier +0 -21
- data/bin/flapjack-notifier-manager +0 -43
- data/bin/flapjack-stats +0 -27
- data/bin/flapjack-worker +0 -13
- data/bin/flapjack-worker-manager +0 -35
- data/dist/etc/init.d/flapjack-notifier +0 -47
- data/dist/etc/init.d/flapjack-workers +0 -44
- data/features/flapjack-notifier-manager.feature +0 -19
- data/features/flapjack-worker-manager.feature +0 -27
- data/features/flapjack-worker.feature +0 -27
- data/features/netsaint-config-converter.feature +0 -126
- data/features/persistence/couch.feature +0 -105
- data/features/persistence/sqlite3.feature +0 -105
- data/features/persistence/steps/couch_steps.rb +0 -25
- data/features/persistence/steps/generic_steps.rb +0 -102
- data/features/persistence/steps/sqlite3_steps.rb +0 -13
- data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
- data/features/steps/flapjack-worker-manager_steps.rb +0 -48
- data/lib/flapjack/applications/notifier.rb +0 -222
- data/lib/flapjack/cli/notifier.rb +0 -108
- data/lib/flapjack/cli/notifier_manager.rb +0 -86
- data/lib/flapjack/cli/worker.rb +0 -51
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
module Flapjack
|
|
4
|
+
module Data
|
|
5
|
+
class Event
|
|
6
|
+
|
|
7
|
+
attr_accessor :previous_state
|
|
8
|
+
|
|
9
|
+
# Helper method for getting the next event.
|
|
10
|
+
#
|
|
11
|
+
# Has a blocking and non-blocking method signature.
|
|
12
|
+
#
|
|
13
|
+
# Calling next with :block => true, we wait indefinitely for events coming
|
|
14
|
+
# from other systems. This is the default behaviour.
|
|
15
|
+
#
|
|
16
|
+
# Calling next with :block => false, will return a nil if there are no
|
|
17
|
+
# events on the queue.
|
|
18
|
+
#
|
|
19
|
+
def self.next(opts={})
|
|
20
|
+
defaults = { :block => true }
|
|
21
|
+
options = defaults.merge(opts)
|
|
22
|
+
block = options[:block]
|
|
23
|
+
|
|
24
|
+
# In production, we wait indefinitely for events coming from other systems.
|
|
25
|
+
if block
|
|
26
|
+
raw = opts[:persistence].blpop('events').last
|
|
27
|
+
event = ::JSON.parse(raw)
|
|
28
|
+
self.new(event)
|
|
29
|
+
else
|
|
30
|
+
# In testing, we take care that there are no events on the queue.
|
|
31
|
+
raw = opts[:persistence].lpop('events')
|
|
32
|
+
result = nil
|
|
33
|
+
|
|
34
|
+
if raw
|
|
35
|
+
event = ::JSON.parse(raw)
|
|
36
|
+
result = self.new(event)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
result
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Provide a count of the number of events on the queue to be processed.
|
|
44
|
+
def self.pending_count(opts = {})
|
|
45
|
+
opts[:persistence].llen('events')
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def initialize(attrs={})
|
|
49
|
+
@attrs = attrs
|
|
50
|
+
@attrs['time'] = Time.now.to_i unless @attrs.has_key?('time')
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def state
|
|
54
|
+
return unless @attrs['state']
|
|
55
|
+
@attrs['state'].downcase
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def entity
|
|
59
|
+
return unless @attrs['entity']
|
|
60
|
+
@attrs['entity'].downcase
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def check
|
|
64
|
+
@attrs['check']
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# FIXME some values are only set for certain event types --
|
|
69
|
+
# this may not be the best way to do this
|
|
70
|
+
def acknowledgement_id
|
|
71
|
+
@attrs['acknowledgement_id']
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def duration
|
|
75
|
+
return unless @attrs['duration']
|
|
76
|
+
@attrs['duration'].to_i
|
|
77
|
+
end
|
|
78
|
+
# end FIXME
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def id
|
|
82
|
+
(entity || '-') + ':' + (check || '-')
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# FIXME: site specific
|
|
86
|
+
def client
|
|
87
|
+
return unless entity
|
|
88
|
+
entity.split('-').first
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def type
|
|
92
|
+
return unless @attrs['type']
|
|
93
|
+
@attrs['type'].downcase
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def summary
|
|
97
|
+
@attrs['summary']
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def time
|
|
101
|
+
return unless @attrs['time']
|
|
102
|
+
@attrs['time'].to_i
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def action?
|
|
106
|
+
type == 'action'
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def service?
|
|
110
|
+
type == 'service'
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def acknowledgement?
|
|
114
|
+
action? and state == 'acknowledgement'
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def ok?
|
|
118
|
+
(state == 'ok') or (state == 'up')
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def unknown?
|
|
122
|
+
state == 'unknown'
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def unreachable?
|
|
126
|
+
state == 'unreachable'
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def warning?
|
|
130
|
+
state == 'warning'
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def critical?
|
|
134
|
+
state == 'critical'
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def failure?
|
|
138
|
+
warning? or critical?
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'log4r'
|
|
4
|
+
require 'log4r/outputter/fileoutputter'
|
|
5
|
+
|
|
6
|
+
require 'flapjack'
|
|
7
|
+
require 'flapjack/filters/acknowledgement'
|
|
8
|
+
require 'flapjack/filters/ok'
|
|
9
|
+
require 'flapjack/filters/scheduled_maintenance'
|
|
10
|
+
require 'flapjack/filters/unscheduled_maintenance'
|
|
11
|
+
require 'flapjack/filters/detect_mass_client_failures'
|
|
12
|
+
require 'flapjack/filters/delays'
|
|
13
|
+
require 'flapjack/data/contact'
|
|
14
|
+
require 'flapjack/data/entity_check'
|
|
15
|
+
require 'flapjack/data/event'
|
|
16
|
+
require 'flapjack/notification/common'
|
|
17
|
+
require 'flapjack/notification/sms'
|
|
18
|
+
require 'flapjack/notification/email'
|
|
19
|
+
require 'flapjack/pikelet'
|
|
20
|
+
|
|
21
|
+
module Flapjack
|
|
22
|
+
|
|
23
|
+
class Executive
|
|
24
|
+
include Flapjack::Pikelet
|
|
25
|
+
|
|
26
|
+
def setup
|
|
27
|
+
@redis = build_redis_connection_pool
|
|
28
|
+
redis_client_status = @redis.client
|
|
29
|
+
@logger.debug("Flapjack::Executive.initialize: @redis client status: " + redis_client_status.inspect)
|
|
30
|
+
|
|
31
|
+
@queues = {:email => @config['email_queue'],
|
|
32
|
+
:sms => @config['sms_queue'],
|
|
33
|
+
:jabber => @config['jabber_queue'],
|
|
34
|
+
:pagerduty => @config['pagerduty_queue']}
|
|
35
|
+
|
|
36
|
+
notifylog = @config['notification_log_file'] || 'log/notify.log'
|
|
37
|
+
@notifylog = Log4r::Logger.new("executive")
|
|
38
|
+
@notifylog.add(Log4r::FileOutputter.new("notifylog", :filename => notifylog))
|
|
39
|
+
|
|
40
|
+
# FIXME: Put loading filters into separate method
|
|
41
|
+
options = { :log => @logger, :persistence => @redis }
|
|
42
|
+
@filters = []
|
|
43
|
+
@filters << Flapjack::Filters::Ok.new(options)
|
|
44
|
+
@filters << Flapjack::Filters::ScheduledMaintenance.new(options)
|
|
45
|
+
@filters << Flapjack::Filters::UnscheduledMaintenance.new(options)
|
|
46
|
+
@filters << Flapjack::Filters::DetectMassClientFailures.new(options)
|
|
47
|
+
@filters << Flapjack::Filters::Delays.new(options)
|
|
48
|
+
@filters << Flapjack::Filters::Acknowledgement.new(options)
|
|
49
|
+
|
|
50
|
+
@boot_time = Time.now
|
|
51
|
+
|
|
52
|
+
# FIXME: all of the below keys assume there is only ever one executive running;
|
|
53
|
+
# we could generate a fuid and save it to disk, and prepend it from that
|
|
54
|
+
# point on...
|
|
55
|
+
|
|
56
|
+
# TODO unset on exit?
|
|
57
|
+
@redis.set('boot_time', @boot_time.to_i)
|
|
58
|
+
|
|
59
|
+
# FIXME: add an administrative function to reset all event counters
|
|
60
|
+
if @redis.hget('event_counters', 'all').nil?
|
|
61
|
+
@redis.hset('event_counters', 'all', 0)
|
|
62
|
+
@redis.hset('event_counters', 'ok', 0)
|
|
63
|
+
@redis.hset('event_counters', 'failure', 0)
|
|
64
|
+
@redis.hset('event_counters', 'action', 0)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def main
|
|
69
|
+
setup
|
|
70
|
+
|
|
71
|
+
@logger.info("Booting main loop.")
|
|
72
|
+
|
|
73
|
+
until should_quit?
|
|
74
|
+
@logger.info("Waiting for event...")
|
|
75
|
+
event = Flapjack::Data::Event.next(:persistence => @redis)
|
|
76
|
+
process_event(event) unless event.nil?
|
|
77
|
+
end
|
|
78
|
+
@logger.info("Exiting main loop.")
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# this must use a separate connection to the main Executive one, as it's running
|
|
82
|
+
# from a different fiber while the main one is blocking.
|
|
83
|
+
def add_shutdown_event(opts = {})
|
|
84
|
+
return unless redis = opts[:redis]
|
|
85
|
+
redis.rpush('events', JSON.generate('type' => 'shutdown',
|
|
86
|
+
'host' => '',
|
|
87
|
+
'service' => '',
|
|
88
|
+
'state' => ''))
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
def process_event(event)
|
|
94
|
+
@logger.debug("#{Flapjack::Data::Event.pending_count(:persistence => @redis)} events waiting on the queue")
|
|
95
|
+
@logger.debug("Raw event received: #{event.inspect}")
|
|
96
|
+
time_at = event.time
|
|
97
|
+
time_at_str = time_at ? ", #{Time.at(time_at).to_s}" : ''
|
|
98
|
+
@logger.info("Processing Event: #{event.id}, #{event.type}, #{event.state}, #{event.summary}#{time_at_str}")
|
|
99
|
+
|
|
100
|
+
entity_check = (event.type == 'shutdown') ? nil :
|
|
101
|
+
Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @redis)
|
|
102
|
+
|
|
103
|
+
result = update_keys(event, entity_check)
|
|
104
|
+
return if result[:shutdown]
|
|
105
|
+
skip_filters = result[:skip_filters]
|
|
106
|
+
|
|
107
|
+
blocker = @filters.find {|filter| filter.block?(event) } unless skip_filters
|
|
108
|
+
|
|
109
|
+
if skip_filters
|
|
110
|
+
@logger.info("#{Time.now}: Not sending notifications for event #{event.id} because filtering was skipped")
|
|
111
|
+
return
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
if blocker
|
|
115
|
+
blocker_names = [ blocker.name ]
|
|
116
|
+
@logger.info("#{Time.now}: Not sending notifications for event #{event.id} because these filters blocked: #{blocker_names.join(', ')}")
|
|
117
|
+
return
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
@logger.info("#{Time.now}: Sending notifications for event #{event.id}")
|
|
121
|
+
generate_notification(event, entity_check)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def update_keys(event, entity_check)
|
|
125
|
+
result = { :skip_filters => false }
|
|
126
|
+
timestamp = Time.now.to_i
|
|
127
|
+
@event_count = @redis.hincrby('event_counters', 'all', 1)
|
|
128
|
+
|
|
129
|
+
# FIXME skip if entity_check.nil?
|
|
130
|
+
|
|
131
|
+
# FIXME: validate that the event is sane before we ever get here
|
|
132
|
+
# FIXME: create an event if there is dodgy data
|
|
133
|
+
|
|
134
|
+
case event.type
|
|
135
|
+
# Service events represent changes in state on monitored systems
|
|
136
|
+
when 'service'
|
|
137
|
+
# Track when we last saw an event for a particular entity:check pair
|
|
138
|
+
entity_check.last_update = timestamp
|
|
139
|
+
|
|
140
|
+
if event.ok?
|
|
141
|
+
@redis.hincrby('event_counters', 'ok', 1)
|
|
142
|
+
elsif event.failure?
|
|
143
|
+
@redis.hincrby('event_counters', 'failure', 1)
|
|
144
|
+
@redis.hset('unacknowledged_failures', @event_count, event.id)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
event.previous_state = entity_check.state
|
|
148
|
+
@logger.info("No previous state for event #{event.id}") if event.previous_state.nil?
|
|
149
|
+
|
|
150
|
+
# If there is a state change, update record with: the time, the new state
|
|
151
|
+
if event.state != event.previous_state
|
|
152
|
+
entity_check.update_state(event.state, :timestamp => timestamp,
|
|
153
|
+
:summary => event.summary, :client => event.client,
|
|
154
|
+
:count => @event_count)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# No state change, and event is ok, so no need to run through filters
|
|
158
|
+
# OR
|
|
159
|
+
# If the service event's state is ok and there was no previous state, don't alert.
|
|
160
|
+
# This stops new checks from alerting as "recovery" after they have been added.
|
|
161
|
+
if !event.previous_state && event.ok?
|
|
162
|
+
@logger.debug("setting skip_filters to true because there was no previous state and event is ok")
|
|
163
|
+
result[:skip_filters] = true
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
entity_check.update_scheduled_maintenance
|
|
167
|
+
|
|
168
|
+
# Action events represent human or automated interaction with Flapjack
|
|
169
|
+
when 'action'
|
|
170
|
+
# When an action event is processed, store the event.
|
|
171
|
+
@redis.hset(event.id + ':actions', timestamp, event.state)
|
|
172
|
+
@redis.hincrby('event_counters', 'action', 1) if event.ok?
|
|
173
|
+
|
|
174
|
+
if event.acknowledgement? && event.acknowledgement_id
|
|
175
|
+
@redis.hdel('unacknowledged_failures', event.acknowledgement_id)
|
|
176
|
+
end
|
|
177
|
+
when 'shutdown'
|
|
178
|
+
# should this be logged as an action instead? being minimally invasive for now
|
|
179
|
+
result[:shutdown] = true
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
result
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# takes an event for which a notification needs to be generated, works out the type of
|
|
186
|
+
# notification, updates the notification history in redis, calls other methods to work out who
|
|
187
|
+
# to notify, by what method, and finally to have the notifications sent
|
|
188
|
+
def generate_notification(event, entity_check)
|
|
189
|
+
timestamp = Time.now.to_i
|
|
190
|
+
notification_type = 'unknown'
|
|
191
|
+
case event.type
|
|
192
|
+
when 'service'
|
|
193
|
+
case event.state
|
|
194
|
+
when 'ok', 'unknown'
|
|
195
|
+
notification_type = 'recovery'
|
|
196
|
+
when 'warning', 'critical'
|
|
197
|
+
notification_type = 'problem'
|
|
198
|
+
end
|
|
199
|
+
when 'action'
|
|
200
|
+
case event.state
|
|
201
|
+
when 'acknowledgement'
|
|
202
|
+
notification_type = 'acknowledgement'
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
@redis.set("#{event.id}:last_#{notification_type}_notification", timestamp)
|
|
206
|
+
@redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp)
|
|
207
|
+
@logger.debug("Notification of type #{notification_type} is being generated for #{event.id}.")
|
|
208
|
+
|
|
209
|
+
send_notifications(event, notification_type,
|
|
210
|
+
Flapjack::Data::Contact.find_all_for_entity_check(entity_check, :redis => @redis))
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# takes an event, a notification type, and an array of contacts and creates jobs in resque
|
|
214
|
+
# (eventually) for each notification
|
|
215
|
+
def send_notifications(event, notification_type, contacts)
|
|
216
|
+
notification = { 'event_id' => event.id,
|
|
217
|
+
'state' => event.state,
|
|
218
|
+
'summary' => event.summary,
|
|
219
|
+
'time' => event.time,
|
|
220
|
+
'notification_type' => notification_type }
|
|
221
|
+
|
|
222
|
+
contacts.each {|contact_id|
|
|
223
|
+
media = media_for_contact(contact_id)
|
|
224
|
+
|
|
225
|
+
contact_deets = {'contact_id' => contact_id,
|
|
226
|
+
'contact_first_name' => @redis.hget("contact:#{contact_id}", 'first_name'),
|
|
227
|
+
'contact_last_name' => @redis.hget("contact:#{contact_id}", 'last_name'), }
|
|
228
|
+
|
|
229
|
+
notification = notification.merge(contact_deets)
|
|
230
|
+
|
|
231
|
+
media.each_pair {|media_type, address|
|
|
232
|
+
|
|
233
|
+
@notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | #{contact_id} | #{media} | #{address}")
|
|
234
|
+
# queue this notification
|
|
235
|
+
# FIXME: make a Contact class perhaps
|
|
236
|
+
notif = notification.dup
|
|
237
|
+
notif['media'] = media_type
|
|
238
|
+
notif['address'] = address
|
|
239
|
+
notif['id'] = fuid
|
|
240
|
+
dur = event.duration
|
|
241
|
+
notif['duration'] = dur if dur
|
|
242
|
+
@logger.debug("send_notifications: sending notification: #{notif.inspect}")
|
|
243
|
+
|
|
244
|
+
case media_type
|
|
245
|
+
when "sms"
|
|
246
|
+
if @queues[:sms]
|
|
247
|
+
Resque.enqueue_to(@queues[:sms], Notification::Sms, notif)
|
|
248
|
+
end
|
|
249
|
+
when "email"
|
|
250
|
+
if @queues[:email]
|
|
251
|
+
Resque.enqueue_to(@queues[:email], Notification::Email, notif)
|
|
252
|
+
end
|
|
253
|
+
when "jabber"
|
|
254
|
+
if @queues[:jabber]
|
|
255
|
+
notif['event_count'] = @event_count if @event_count
|
|
256
|
+
# puts a notification into the jabber queue (redis list)
|
|
257
|
+
@redis.rpush(@queues[:jabber], Yajl::Encoder.encode(notif))
|
|
258
|
+
end
|
|
259
|
+
when "pagerduty"
|
|
260
|
+
if @queues[:pagerduty]
|
|
261
|
+
@redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(notif))
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
}
|
|
265
|
+
if media.length == 0
|
|
266
|
+
@notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | #{contact_id} | NO MEDIA FOR CONTACT")
|
|
267
|
+
end
|
|
268
|
+
}
|
|
269
|
+
if contacts.length == 0
|
|
270
|
+
@notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | NO CONTACTS")
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# takes a contact ID and returns a hash containing each of the media the contact wishes to be
|
|
275
|
+
# contacted by, and the associated address for each.
|
|
276
|
+
# eg:
|
|
277
|
+
# media_for_contact('123') -> { :sms => "+61401234567", :email => "gno@free.dom" }
|
|
278
|
+
#
|
|
279
|
+
def media_for_contact(contact)
|
|
280
|
+
@redis.hgetall("contact_media:#{contact}")
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# generates a fairly unique identifier to use as a message id
|
|
284
|
+
def fuid
|
|
285
|
+
fuid = self.object_id.to_i.to_s + '-' + Time.now.to_i.to_s + '.' + Time.now.tv_usec.to_s
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
end
|
|
289
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'flapjack/filters/base'
|
|
4
|
+
|
|
5
|
+
module Flapjack
|
|
6
|
+
module Filters
|
|
7
|
+
# * If the action event’s state is an acknowledgement, and the corresponding check is in a
|
|
8
|
+
# failure state, then set unscheduled maintenance for 4 hours on the check
|
|
9
|
+
# * If the action event’s state is an acknowledgement, and the corresponding check is not in a
|
|
10
|
+
# failure state, then don’t alert
|
|
11
|
+
class Acknowledgement
|
|
12
|
+
include Base
|
|
13
|
+
|
|
14
|
+
def block?(event)
|
|
15
|
+
timestamp = Time.now.to_i
|
|
16
|
+
result = false
|
|
17
|
+
if event.type == 'action'
|
|
18
|
+
if event.acknowledgement? and @persistence.zscore("failed_checks", event.id)
|
|
19
|
+
ec = Flapjack::Data::EntityCheck.for_event_id(event.id, :redis => @persistence)
|
|
20
|
+
if ec.nil?
|
|
21
|
+
@log.error "Filter: Acknowledgement: unknown entity for event '#{event.id}'"
|
|
22
|
+
else
|
|
23
|
+
ec.create_unscheduled_maintenance(:start_time => timestamp,
|
|
24
|
+
:duration => (event.duration || (4 * 60 * 60)))
|
|
25
|
+
message = "unscheduled maintenance created for #{event.id}"
|
|
26
|
+
end
|
|
27
|
+
else
|
|
28
|
+
message = "no action taken"
|
|
29
|
+
result = true
|
|
30
|
+
@log.debug("Filter: Acknowledgement: blocking because event.acknowledgement? is false") unless event.acknowledgement?
|
|
31
|
+
@log.debug("Filter: Acknowledgement: blocking because zscore of failed_checks for #{event.id} is false") unless @persistence.zscore("failed_checks", event.id)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
@log.debug("Filter: Acknowledgement: #{result ? "block" : "pass"} (#{message})")
|
|
35
|
+
result
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|