flapjack 0.5.5 → 0.6.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +10 -0
- data/.rbenv-version +1 -0
- data/.rspec +10 -0
- data/Gemfile +18 -0
- data/Guardfile +14 -0
- data/README.md +152 -173
- data/Rakefile +53 -150
- data/bin/flapjack +72 -0
- data/bin/flapjack-nagios-receiver +111 -0
- data/bin/flapjack-nagios-receiver-control +15 -0
- data/bin/flapjack-netsaint-parser +0 -2
- data/bin/flapjack-populator +133 -16
- data/bin/install-flapjack-systemwide +2 -2
- data/config.ru +11 -0
- data/dist/etc/init.d/flapjack +46 -0
- data/dist/etc/init.d/flapjack-nagios-receiver +36 -0
- data/doc/GLOSSARY.md +19 -0
- data/etc/flapjack_config.yaml.example +90 -0
- data/features/events.feature +132 -0
- data/features/notifications.feature +57 -0
- data/features/packaging-lintian.feature +5 -3
- data/features/steps/events_steps.rb +164 -0
- data/features/steps/flapjack-importer_steps.rb +2 -5
- data/features/steps/flapjack-worker_steps.rb +13 -6
- data/features/steps/notifications_steps.rb +178 -0
- data/features/steps/packaging-lintian_steps.rb +14 -0
- data/features/steps/time_travel_steps.rb +34 -0
- data/features/support/env.rb +63 -36
- data/flapjack.gemspec +35 -186
- data/lib/flapjack.rb +2 -0
- data/lib/flapjack/api.rb +274 -0
- data/lib/flapjack/api/entity_check_presenter.rb +184 -0
- data/lib/flapjack/api/entity_presenter.rb +66 -0
- data/lib/flapjack/cli/worker_manager.rb +1 -2
- data/lib/flapjack/configuration.rb +11 -0
- data/lib/flapjack/coordinator.rb +288 -0
- data/lib/flapjack/daemonizing.rb +186 -0
- data/lib/flapjack/data/contact.rb +45 -0
- data/lib/flapjack/data/entity.rb +89 -0
- data/lib/flapjack/data/entity_check.rb +396 -0
- data/lib/flapjack/data/event.rb +144 -0
- data/lib/flapjack/data/notification.rb +13 -0
- data/lib/flapjack/executive.rb +289 -0
- data/lib/flapjack/filters/acknowledgement.rb +39 -0
- data/lib/flapjack/filters/{any_parents_failed.rb → base.rb} +6 -4
- data/lib/flapjack/filters/delays.rb +53 -0
- data/lib/flapjack/filters/detect_mass_client_failures.rb +44 -0
- data/lib/flapjack/filters/ok.rb +25 -5
- data/lib/flapjack/filters/scheduled_maintenance.rb +17 -0
- data/lib/flapjack/filters/unscheduled_maintenance.rb +17 -0
- data/lib/flapjack/jabber.rb +294 -0
- data/lib/flapjack/notification/common.rb +23 -0
- data/lib/flapjack/notification/email.rb +107 -0
- data/lib/flapjack/notification/email/alert.html.haml +48 -0
- data/lib/flapjack/notification/email/alert.text.erb +14 -0
- data/lib/flapjack/notification/sms.rb +42 -0
- data/lib/flapjack/notification/sms/messagenet.rb +49 -0
- data/lib/flapjack/notifier_engine.rb +4 -4
- data/lib/flapjack/notifiers/mailer/mailer.rb +6 -7
- data/lib/flapjack/notifiers/xmpp/xmpp.rb +12 -12
- data/lib/flapjack/pagerduty.rb +230 -0
- data/lib/flapjack/patches.rb +108 -19
- data/lib/flapjack/persistence/data_mapper/models/check.rb +5 -3
- data/lib/flapjack/persistence/data_mapper/models/check_template.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/event.rb +2 -0
- data/lib/flapjack/persistence/data_mapper/models/node.rb +3 -1
- data/lib/flapjack/persistence/data_mapper/models/related_check.rb +3 -1
- data/lib/flapjack/pikelet.rb +56 -0
- data/lib/flapjack/transports/beanstalkd.rb +1 -1
- data/lib/flapjack/transports/result.rb +6 -6
- data/lib/flapjack/utility.rb +46 -0
- data/lib/flapjack/version.rb +5 -0
- data/lib/flapjack/web.rb +198 -0
- data/lib/flapjack/web/views/acknowledge.haml +55 -0
- data/lib/flapjack/web/views/check.haml +162 -0
- data/lib/flapjack/web/views/index.haml +92 -0
- data/lib/flapjack/web/views/self_stats.haml +56 -0
- data/lib/flapjack/{applications/worker.rb → worker/application.rb} +0 -0
- data/lib/flapjack/worker/cli.rb +49 -0
- data/{spec → spec.old}/check_sandbox/echo +0 -0
- data/{spec → spec.old}/check_sandbox/sandboxed_check +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier-couchdb.ini +0 -0
- data/{spec → spec.old}/configs/flapjack-notifier.ini +0 -0
- data/{spec → spec.old}/configs/recipients.ini +0 -0
- data/{spec → spec.old}/helpers.rb +0 -0
- data/{spec → spec.old}/inifile_spec.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/init.rb +0 -0
- data/{spec → spec.old}/mock-notifiers/mock/mock.rb +0 -0
- data/{spec → spec.old}/notifier-directories/spoons/testmailer/init.rb +0 -0
- data/{spec → spec.old}/notifier_application_spec.rb +0 -0
- data/{spec → spec.old}/notifier_filters_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_multiplexer_spec.rb +0 -0
- data/{spec → spec.old}/notifier_options_spec.rb +0 -0
- data/{spec → spec.old}/notifier_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/mailer_spec.rb +0 -0
- data/{spec → spec.old}/notifiers/xmpp_spec.rb +0 -0
- data/{spec → spec.old}/persistence/datamapper_spec.rb +0 -0
- data/{spec → spec.old}/persistence/mock_persistence_backend.rb +0 -0
- data/{spec → spec.old}/simple.ini +0 -0
- data/{spec → spec.old}/spec.opts +0 -0
- data/{spec → spec.old}/test-filters/blocker.rb +0 -0
- data/{spec → spec.old}/test-filters/mock.rb +0 -0
- data/{spec → spec.old}/transports/beanstalkd_spec.rb +0 -0
- data/{spec → spec.old}/transports/mock_transport.rb +0 -0
- data/{spec → spec.old}/worker_application_spec.rb +0 -0
- data/{spec → spec.old}/worker_options_spec.rb +0 -0
- data/spec/lib/flapjack/api/entity_check_presenter_spec.rb +117 -0
- data/spec/lib/flapjack/api/entity_presenter_spec.rb +92 -0
- data/spec/lib/flapjack/api_spec.rb +170 -0
- data/spec/lib/flapjack/coordinator_spec.rb +16 -0
- data/spec/lib/flapjack/data/entity_check_spec.rb +398 -0
- data/spec/lib/flapjack/data/entity_spec.rb +71 -0
- data/spec/lib/flapjack/data/event_spec.rb +6 -0
- data/spec/lib/flapjack/executive_spec.rb +59 -0
- data/spec/lib/flapjack/filters/acknowledgement_spec.rb +6 -0
- data/spec/lib/flapjack/filters/delays_spec.rb +6 -0
- data/spec/lib/flapjack/filters/detect_mass_client_failures_spec.rb +6 -0
- data/spec/lib/flapjack/filters/ok_spec.rb +6 -0
- data/spec/lib/flapjack/filters/scheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/filters/unscheduled_maintenance_spec.rb +6 -0
- data/spec/lib/flapjack/jabber_spec.rb +150 -0
- data/spec/lib/flapjack/notification/email_spec.rb +6 -0
- data/spec/lib/flapjack/notification/sms_spec.rb +6 -0
- data/spec/lib/flapjack/pikelet_spec.rb +28 -0
- data/spec/lib/flapjack/web_spec.rb +188 -0
- data/spec/spec_helper.rb +44 -0
- data/spec/support/profile_all_formatter.rb +44 -0
- data/spec/support/uncolored_doc_formatter.rb +9 -0
- data/tasks/events.rake +85 -0
- data/tmp/acknowledge.rb +14 -0
- data/tmp/create_config_yaml.rb +16 -0
- data/tmp/create_events_failure.rb +33 -0
- data/tmp/create_events_ok.rb +33 -0
- data/tmp/create_events_ok_fail_ack_ok.rb +54 -0
- data/tmp/create_events_ok_failure.rb +40 -0
- data/tmp/create_events_ok_failure_ack.rb +54 -0
- data/tmp/dummy_entities.json +1 -0
- data/tmp/generate_nagios_test_hosts.rb +16 -0
- data/tmp/parse_config_yaml.rb +7 -0
- data/tmp/redis_delete_all_keys.rb +11 -0
- data/tmp/test_entities.json +1 -0
- metadata +482 -221
- data/TODO.md +0 -36
- data/VERSION +0 -1
- data/bin/flapjack-benchmark +0 -50
- data/bin/flapjack-notifier +0 -21
- data/bin/flapjack-notifier-manager +0 -43
- data/bin/flapjack-stats +0 -27
- data/bin/flapjack-worker +0 -13
- data/bin/flapjack-worker-manager +0 -35
- data/dist/etc/init.d/flapjack-notifier +0 -47
- data/dist/etc/init.d/flapjack-workers +0 -44
- data/features/flapjack-notifier-manager.feature +0 -19
- data/features/flapjack-worker-manager.feature +0 -27
- data/features/flapjack-worker.feature +0 -27
- data/features/netsaint-config-converter.feature +0 -126
- data/features/persistence/couch.feature +0 -105
- data/features/persistence/sqlite3.feature +0 -105
- data/features/persistence/steps/couch_steps.rb +0 -25
- data/features/persistence/steps/generic_steps.rb +0 -102
- data/features/persistence/steps/sqlite3_steps.rb +0 -13
- data/features/steps/flapjack-notifier-manager_steps.rb +0 -24
- data/features/steps/flapjack-worker-manager_steps.rb +0 -48
- data/lib/flapjack/applications/notifier.rb +0 -222
- data/lib/flapjack/cli/notifier.rb +0 -108
- data/lib/flapjack/cli/notifier_manager.rb +0 -86
- data/lib/flapjack/cli/worker.rb +0 -51
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
module Flapjack
|
|
4
|
+
|
|
5
|
+
module Data
|
|
6
|
+
|
|
7
|
+
class Contact
|
|
8
|
+
|
|
9
|
+
# takes a check, looks up contacts that are interested in this check (or in the check's entity)
|
|
10
|
+
# and returns an array of contact ids
|
|
11
|
+
def self.find_all_for_entity_check(entity_check, options = {})
|
|
12
|
+
logger = options[:logger]
|
|
13
|
+
logger = nil
|
|
14
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
15
|
+
|
|
16
|
+
entity = entity_check.entity
|
|
17
|
+
check = entity_check.check
|
|
18
|
+
|
|
19
|
+
if logger
|
|
20
|
+
logger.debug("contacts for #{entity.id} (#{entity.name}): " + redis.smembers("contacts_for:#{entity.id}").length.to_s)
|
|
21
|
+
logger.debug("contacts for #{check}: " + redis.smembers("contacts_for:#{check}").length.to_s)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
union = redis.sunion("contacts_for:#{entity.id}", "contacts_for:#{check}")
|
|
25
|
+
logger.debug("contacts for union of #{entity.id} and #{check}: " + union.length.to_s) if logger
|
|
26
|
+
union
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def self.pagerduty_credentials_for_contact(contact, options = {})
|
|
30
|
+
logger = options[:logger]
|
|
31
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
32
|
+
|
|
33
|
+
service_key = redis.hget("contact_media:#{contact}", 'pagerduty')
|
|
34
|
+
return nil unless service_key
|
|
35
|
+
|
|
36
|
+
deets = redis.hgetall("contact_pagerduty:#{contact}")
|
|
37
|
+
return deets.merge('service_key' => service_key)
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
module Flapjack
|
|
4
|
+
|
|
5
|
+
module Data
|
|
6
|
+
|
|
7
|
+
class Entity
|
|
8
|
+
|
|
9
|
+
attr_accessor :name, :id
|
|
10
|
+
|
|
11
|
+
def self.all(options = {})
|
|
12
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
13
|
+
redis.keys("entity_id:*").collect {|k|
|
|
14
|
+
k =~ /^entity_id:(.+)$/; entity_name = $1
|
|
15
|
+
self.new(:name => entity_name, :id => redis.get("entity_id:#{entity_name}").to_i, :redis => redis)
|
|
16
|
+
}
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def self.add(entity, options = {})
|
|
20
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
21
|
+
raise "Entity name not provided" unless entity['name'] && !entity['name'].empty?
|
|
22
|
+
|
|
23
|
+
if entity['id']
|
|
24
|
+
redis.multi
|
|
25
|
+
existing_name = redis.hget("entity:#{entity['id']}", 'name')
|
|
26
|
+
redis.del("entity_id:#{existing_name}") unless existing_name == entity['name']
|
|
27
|
+
redis.set("entity_id:#{entity['name']}", entity['id'])
|
|
28
|
+
redis.hset("entity:#{entity['id']}", 'name', entity['name'])
|
|
29
|
+
|
|
30
|
+
redis.del("contacts_for:#{entity['id']}")
|
|
31
|
+
if entity['contacts'] && entity['contacts'].respond_to?(:each)
|
|
32
|
+
entity['contacts'].each {|contact|
|
|
33
|
+
redis.sadd("contacts_for:#{entity['id']}", contact)
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
redis.exec
|
|
37
|
+
else
|
|
38
|
+
# empty string is the redis equivalent of a Ruby nil, i.e. key with
|
|
39
|
+
# no value
|
|
40
|
+
redis.set("entity_id:#{entity['name']}", '')
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.find_by_name(entity_name, options = {})
|
|
45
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
46
|
+
entity_id = redis.get("entity_id:#{entity_name}")
|
|
47
|
+
if entity_id.nil?
|
|
48
|
+
# key doesn't exist
|
|
49
|
+
return unless options[:create]
|
|
50
|
+
self.add({'name' => entity_name}, :redis => redis)
|
|
51
|
+
end
|
|
52
|
+
self.new(:name => entity_name,
|
|
53
|
+
:id => (entity_id.nil? || entity_id.empty?) ? nil : entity_id.to_i,
|
|
54
|
+
:redis => redis)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def self.find_by_id(entity_id, options = {})
|
|
58
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
59
|
+
entity_name = redis.hget("entity:#{entity_id}", 'name')
|
|
60
|
+
return if entity_name.nil? || entity_name.empty?
|
|
61
|
+
self.new(:name => entity_name, :id => entity_id, :redis => redis)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def check_list
|
|
65
|
+
@redis.keys("check:#{@name}:*").map {|k| k =~ /^check:#{@name}:(.+)$/; $1}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def check_count
|
|
69
|
+
checks = check_list
|
|
70
|
+
return if checks.nil?
|
|
71
|
+
checks.length
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private
|
|
75
|
+
|
|
76
|
+
# NB: initializer should not be used directly -- instead one of the finder methods
|
|
77
|
+
# above will call it
|
|
78
|
+
def initialize(options = {})
|
|
79
|
+
raise "Redis connection not set" unless @redis = options[:redis]
|
|
80
|
+
raise "Entity name not set" unless @name = options[:name]
|
|
81
|
+
@id = options[:id]
|
|
82
|
+
@logger = options[:logger]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
end
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require 'yajl/json_gem'
|
|
4
|
+
|
|
5
|
+
require 'flapjack/patches'
|
|
6
|
+
|
|
7
|
+
require 'flapjack/data/entity'
|
|
8
|
+
|
|
9
|
+
module Flapjack
|
|
10
|
+
|
|
11
|
+
module Data
|
|
12
|
+
|
|
13
|
+
class EntityCheck
|
|
14
|
+
|
|
15
|
+
STATE_OK = 'ok'
|
|
16
|
+
STATE_WARNING = 'warning'
|
|
17
|
+
STATE_CRITICAL = 'critical'
|
|
18
|
+
STATE_UNKNOWN = 'unknown'
|
|
19
|
+
|
|
20
|
+
attr_accessor :entity, :check
|
|
21
|
+
|
|
22
|
+
# TODO probably shouldn't always be creating on query -- work out when this should be happening
|
|
23
|
+
def self.for_event_id(event_id, options = {})
|
|
24
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
25
|
+
entity_name, check = event_id.split(':')
|
|
26
|
+
self.new(Flapjack::Data::Entity.find_by_name(entity_name, :redis => redis, :create => true), check,
|
|
27
|
+
:redis => redis)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# TODO probably shouldn't always be creating on query -- work out when this should be happening
|
|
31
|
+
def self.for_entity_name(entity_name, check, options = {})
|
|
32
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
33
|
+
self.new(Flapjack::Data::Entity.find_by_name(entity_name, :redis => redis, :create => true), check,
|
|
34
|
+
:redis => redis)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.for_entity_id(entity_id, check, options = {})
|
|
38
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
39
|
+
self.new(Flapjack::Data::Entity.find_by_id(entity_id, :redis => redis), check,
|
|
40
|
+
:redis => redis)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def self.for_entity(entity, check, options = {})
|
|
44
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
45
|
+
self.new(entity, check, :redis => redis)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def entity_name
|
|
49
|
+
@entity.name
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# takes a key "entity:check", returns true if the check is in unscheduled
|
|
53
|
+
# maintenance
|
|
54
|
+
def in_unscheduled_maintenance?
|
|
55
|
+
@redis.exists("#{@key}:unscheduled_maintenance")
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# returns true if the check is in scheduled maintenance
|
|
59
|
+
def in_scheduled_maintenance?
|
|
60
|
+
@redis.exists("#{@key}:scheduled_maintenance")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# creates, or modifies, an event object and adds it to the events list in redis
|
|
64
|
+
# 'type' => 'service',
|
|
65
|
+
# 'state' => state,
|
|
66
|
+
# 'summary' => check_output,
|
|
67
|
+
# 'time' => timestamp
|
|
68
|
+
def create_event(event)
|
|
69
|
+
event.merge!('entity' => @entity.name, 'check' => @check)
|
|
70
|
+
event['time'] = Time.now.to_i if event['time'].nil?
|
|
71
|
+
@redis.rpush('events', Yajl::Encoder.encode(event))
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def create_acknowledgement(opts = {})
|
|
75
|
+
defaults = {
|
|
76
|
+
'summary' => '...'
|
|
77
|
+
}
|
|
78
|
+
options = defaults.merge(opts)
|
|
79
|
+
|
|
80
|
+
event = { 'type' => 'action',
|
|
81
|
+
'state' => 'acknowledgement',
|
|
82
|
+
'summary' => options['summary'],
|
|
83
|
+
'duration' => options['duration'],
|
|
84
|
+
'acknowledgement_id' => options['acknowledgement_id']
|
|
85
|
+
}
|
|
86
|
+
create_event(event)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# FIXME: need to add summary to summary of existing unscheduled maintenance if there is
|
|
90
|
+
# one, and extend duration / expiry time, instead of creating a separate unscheduled
|
|
91
|
+
# outage as we are doing now...
|
|
92
|
+
def create_unscheduled_maintenance(opts = {})
|
|
93
|
+
start_time = opts[:start_time] # unix timestamp
|
|
94
|
+
duration = opts[:duration] # seconds
|
|
95
|
+
summary = opts[:summary]
|
|
96
|
+
time_remaining = (start_time + duration) - Time.now.to_i
|
|
97
|
+
if time_remaining > 0
|
|
98
|
+
@redis.setex("#{@key}:unscheduled_maintenance", time_remaining, start_time)
|
|
99
|
+
end
|
|
100
|
+
@redis.zadd("#{@key}:unscheduled_maintenances", duration, start_time)
|
|
101
|
+
@redis.set("#{@key}:#{start_time}:unscheduled_maintenance:summary", summary)
|
|
102
|
+
|
|
103
|
+
@redis.zadd("#{@key}:sorted_unscheduled_maintenance_timestamps", start_time, start_time)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# ends any unscheduled maintenance
|
|
107
|
+
def end_unscheduled_maintenance(opts = {})
|
|
108
|
+
defaults = {
|
|
109
|
+
:end_time => Time.now.to_i
|
|
110
|
+
}
|
|
111
|
+
options = defaults.merge(opts)
|
|
112
|
+
end_time = options[:end_time]
|
|
113
|
+
|
|
114
|
+
if (um_start = @redis.get("#{@key}:unscheduled_maintenance"))
|
|
115
|
+
duration = end_time - um_start.to_i
|
|
116
|
+
@logger.debug("ending unscheduled downtime for #{@key} at #{Time.at(end_time).to_s}") if @logger
|
|
117
|
+
@redis.del("#{@key}:unscheduled_maintenance")
|
|
118
|
+
@redis.zadd("#{@key}:unscheduled_maintenances", duration, um_start)
|
|
119
|
+
@redis.zadd("#{@key}:sorted_unscheduled_maintenance_timestamps", um_start, um_start)
|
|
120
|
+
else
|
|
121
|
+
@logger.debug("end_unscheduled_maintenance called for #{@key} but none found") if @logger
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# creates a scheduled maintenance period for a check
|
|
126
|
+
# TODO: consider adding some validation to the data we're adding in here
|
|
127
|
+
# eg start_time is a believable unix timestamp (not in the past and not too
|
|
128
|
+
# far in the future), duration is within some bounds...
|
|
129
|
+
def create_scheduled_maintenance(opts = {})
|
|
130
|
+
start_time = opts[:start_time] # unix timestamp
|
|
131
|
+
duration = opts[:duration] # seconds
|
|
132
|
+
summary = opts[:summary]
|
|
133
|
+
@redis.zadd("#{@key}:scheduled_maintenances", duration, start_time)
|
|
134
|
+
@redis.set("#{@key}:#{start_time}:scheduled_maintenance:summary", summary)
|
|
135
|
+
|
|
136
|
+
@redis.zadd("#{@key}:sorted_scheduled_maintenance_timestamps", start_time, start_time)
|
|
137
|
+
|
|
138
|
+
# scheduled maintenance periods have changed, revalidate
|
|
139
|
+
update_scheduled_maintenance(:revalidate => true)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# delete a scheduled maintenance
|
|
143
|
+
def delete_scheduled_maintenance(opts = {})
|
|
144
|
+
start_time = opts[:start_time]
|
|
145
|
+
@redis.del("#{@key}:#{start_time}:scheduled_maintenance:summary")
|
|
146
|
+
@redis.zrem("#{@key}:scheduled_maintenances", start_time)
|
|
147
|
+
|
|
148
|
+
@redis.zremrangebyscore("#{@key}:sorted_scheduled_maintenance_timestamps", start_time, start_time)
|
|
149
|
+
|
|
150
|
+
# scheduled maintenance periods have changed, revalidate
|
|
151
|
+
update_scheduled_maintenance(:revalidate => true)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# if not in scheduled maintenance, looks in scheduled maintenance list for a check to see if
|
|
155
|
+
# current state should be set to scheduled maintenance, and sets it as appropriate
|
|
156
|
+
def update_scheduled_maintenance(opts = {})
|
|
157
|
+
if opts[:revalidate]
|
|
158
|
+
@redis.del("#{@key}:scheduled_maintenance")
|
|
159
|
+
else
|
|
160
|
+
return if in_scheduled_maintenance?
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# are we within a scheduled maintenance period?
|
|
164
|
+
t = Time.now.to_i
|
|
165
|
+
current_sched_ms = maintenances(nil, nil, :scheduled => true).select {|sm|
|
|
166
|
+
(sm[:start_time] <= t) && (t < sm[:end_time])
|
|
167
|
+
}
|
|
168
|
+
return if current_sched_ms.empty?
|
|
169
|
+
|
|
170
|
+
# yes! so set current scheduled maintenance
|
|
171
|
+
# if multiple scheduled maintenances found, find the end_time furthest in the future
|
|
172
|
+
most_futuristic = current_sched_ms.max {|sm| sm[:end_time] }
|
|
173
|
+
start_time = most_futuristic[:start_time]
|
|
174
|
+
duration = most_futuristic[:duration]
|
|
175
|
+
@redis.setex("#{@key}:scheduled_maintenance", duration.to_i, start_time)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# returns nil if no previous state; this must be considered as a possible
|
|
179
|
+
# state by classes using this model
|
|
180
|
+
def state
|
|
181
|
+
@redis.hget("check:#{@key}", 'state')
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def update_state(state, options = {})
|
|
185
|
+
return unless validate_state(state)
|
|
186
|
+
timestamp = options[:timestamp] || Time.now.to_i
|
|
187
|
+
client = options[:client]
|
|
188
|
+
summary = options[:summary]
|
|
189
|
+
count = options[:count]
|
|
190
|
+
|
|
191
|
+
# Note the current state (for speedy lookups)
|
|
192
|
+
@redis.hset("check:#{@key}", 'state', state)
|
|
193
|
+
|
|
194
|
+
# FIXME: rename to last_state_change?
|
|
195
|
+
@redis.hset("check:#{@key}", 'last_change', timestamp)
|
|
196
|
+
|
|
197
|
+
# Retain all state changes for entity:check pair
|
|
198
|
+
@redis.rpush("#{@key}:states", timestamp)
|
|
199
|
+
@redis.set("#{@key}:#{timestamp}:state", state)
|
|
200
|
+
@redis.set("#{@key}:#{timestamp}:summary", summary) if summary
|
|
201
|
+
@redis.set("#{@key}:#{timestamp}:count", count) if count
|
|
202
|
+
|
|
203
|
+
@redis.zadd("#{@key}:sorted_state_timestamps", timestamp, timestamp)
|
|
204
|
+
|
|
205
|
+
case state
|
|
206
|
+
when STATE_WARNING, STATE_CRITICAL
|
|
207
|
+
@redis.zadd('failed_checks', timestamp, @key)
|
|
208
|
+
# FIXME: Iterate through a list of tags associated with an entity:check pair, and update counters
|
|
209
|
+
@redis.zadd("failed_checks:client:#{client}", timestamp, @key) if client
|
|
210
|
+
else
|
|
211
|
+
@redis.zrem("failed_checks", @key)
|
|
212
|
+
# FIXME: Iterate through a list of tags associated with an entity:check pair, and update counters
|
|
213
|
+
@redis.zrem("failed_checks:client:#{client}", @key) if client
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def last_update
|
|
218
|
+
lu = @redis.hget("check:#{@key}", 'last_update')
|
|
219
|
+
return unless (lu && lu =~ /^\d+$/)
|
|
220
|
+
lu.to_i
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def last_update=(timestamp)
|
|
224
|
+
@redis.hset("check:#{@key}", 'last_update', timestamp)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def last_change
|
|
228
|
+
lc = @redis.hget("check:#{@key}", 'last_change')
|
|
229
|
+
return unless (lc && lc =~ /^\d+$/)
|
|
230
|
+
lc.to_i
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def last_problem_notification
|
|
234
|
+
lpn = @redis.get("#{@key}:last_problem_notification")
|
|
235
|
+
return unless (lpn && lpn =~ /^\d+$/)
|
|
236
|
+
lpn.to_i
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def last_recovery_notification
|
|
240
|
+
lrn = @redis.get("#{@key}:last_recovery_notification")
|
|
241
|
+
return unless (lrn && lrn =~ /^\d+$/)
|
|
242
|
+
lrn.to_i
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def last_acknowledgement_notification
|
|
246
|
+
lan = @redis.get("#{@key}:last_acknowledgement_notification")
|
|
247
|
+
return unless (lan && lan =~ /^\d+$/)
|
|
248
|
+
lan.to_i
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def event_count_at(timestamp)
|
|
252
|
+
eca = @redis.get("#{@key}:#{timestamp}:count")
|
|
253
|
+
return unless (eca && eca =~ /^\d+$/)
|
|
254
|
+
eca.to_i
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def failed?
|
|
258
|
+
[STATE_WARNING, STATE_CRITICAL].include?( state )
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def ok?
|
|
262
|
+
[STATE_OK].include?( state )
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def summary
|
|
266
|
+
timestamp = @redis.lindex("#{@key}:states", -1)
|
|
267
|
+
@redis.get("#{@key}:#{timestamp}:summary")
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Returns a list of states for this entity check, sorted by timestamp.
|
|
271
|
+
#
|
|
272
|
+
# start_time and end_time should be passed as integer timestamps; these timestamps
|
|
273
|
+
# will be considered inclusively, so, e.g. coverage for a day should go
|
|
274
|
+
# from midnight to 11:59:59 PM. Pass nil for either end to leave that
|
|
275
|
+
# side unbounded.
|
|
276
|
+
def historical_states(start_time, end_time, opts = {})
|
|
277
|
+
start_time ||= '-inf'
|
|
278
|
+
end_time ||= '+inf'
|
|
279
|
+
order = opts[:order]
|
|
280
|
+
query = (order && 'desc'.eql?(order.downcase)) ? :zrevrangebyscore : :zrangebyscore
|
|
281
|
+
state_ts = @redis.send(query, "#{@key}:sorted_state_timestamps", start_time, end_time)
|
|
282
|
+
|
|
283
|
+
state_data = nil
|
|
284
|
+
|
|
285
|
+
@redis.multi do |r|
|
|
286
|
+
state_data = state_ts.collect {|ts|
|
|
287
|
+
{:timestamp => ts.to_i,
|
|
288
|
+
:state => r.get("#{@key}:#{ts}:state"),
|
|
289
|
+
:summary => r.get("#{@key}:#{ts}:summary")}
|
|
290
|
+
}
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# The redis commands in a pipeline block return future objects, which
|
|
294
|
+
# must be evaluated. This relies on a patch in flapjack/patches.rb to
|
|
295
|
+
# make the Future objects report their class.
|
|
296
|
+
state_data.collect {|sd|
|
|
297
|
+
sd.merge!(sd) {|k,ov,nv|
|
|
298
|
+
(nv.class == Redis::Future) ? nv.value : nv
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# requires a known state timestamp, i.e. probably one returned via
|
|
304
|
+
# historical_states. will find the one before that in the sorted set,
|
|
305
|
+
# if any.
|
|
306
|
+
def historical_state_before(timestamp)
|
|
307
|
+
pos = @redis.zrank("#{@key}:sorted_state_timestamps", timestamp)
|
|
308
|
+
return if pos < 1
|
|
309
|
+
ts = @redis.zrange("#{@key}:sorted_state_timestamps", pos - 1, pos)
|
|
310
|
+
return if ts.nil? || ts.empty?
|
|
311
|
+
{:timestamp => ts.first.to_i,
|
|
312
|
+
:state => @redis.get("#{@key}:#{ts.first}:state"),
|
|
313
|
+
:summary => @redis.get("#{@key}:#{ts.first}:summary")}
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
def historical_state_after(timestamp)
|
|
317
|
+
pos = @redis.zrank("#{@key}:sorted_state_timestamps", timestamp)
|
|
318
|
+
ts = @redis.zrange("#{@key}:sorted_state_timestamps", pos + 1, pos + 2)
|
|
319
|
+
return if ts.nil? || ts.empty?
|
|
320
|
+
{:timestamp => ts.first.to_i,
|
|
321
|
+
:state => @redis.get("#{@key}:#{ts.first}:state"),
|
|
322
|
+
:summary => @redis.get("#{@key}:#{ts.first}:summary")}
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Returns a list of maintenance periods (either unscheduled or scheduled) for this
|
|
326
|
+
# entity check, sorted by timestamp.
|
|
327
|
+
#
|
|
328
|
+
# start_time and end_time should be passed as integer timestamps; these timestamps
|
|
329
|
+
# will be considered inclusively, so, e.g. coverage for a day should go
|
|
330
|
+
# from midnight to 11:59:59 PM. Pass nil for either end to leave that
|
|
331
|
+
# side unbounded.
|
|
332
|
+
def maintenances(start_time, end_time, opts = {})
|
|
333
|
+
sched = opts[:scheduled] ? 'scheduled' : 'unscheduled'
|
|
334
|
+
|
|
335
|
+
start_time ||= '-inf'
|
|
336
|
+
end_time ||= '+inf'
|
|
337
|
+
order = opts[:order]
|
|
338
|
+
query = (order && 'desc'.eql?(order.downcase)) ? :zrevrangebyscore : :zrangebyscore
|
|
339
|
+
maint_ts = @redis.send(query, "#{@key}:sorted_#{sched}_maintenance_timestamps", start_time, end_time)
|
|
340
|
+
|
|
341
|
+
maint_data = nil
|
|
342
|
+
|
|
343
|
+
@redis.multi do |r|
|
|
344
|
+
maint_data = maint_ts.collect {|ts|
|
|
345
|
+
{:start_time => ts.to_i,
|
|
346
|
+
:duration => r.zscore("#{@key}:#{sched}_maintenances", ts),
|
|
347
|
+
:summary => r.get("#{@key}:#{ts}:#{sched}_maintenance:summary"),
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# The redis commands in a pipeline block return future objects, which
|
|
353
|
+
# must be evaluated. This relies on a patch in flapjack/patches.rb to
|
|
354
|
+
# make the Future objects report their class.
|
|
355
|
+
maint_data.collect {|md|
|
|
356
|
+
md.merge!(md) {|k,ov,nv| (nv.class == Redis::Future) ? nv.value : nv }
|
|
357
|
+
md[:end_time] = (md[:start_time] + md[:duration]).floor
|
|
358
|
+
md
|
|
359
|
+
}
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
# returns an array of pagerduty credentials. If more than one contact for this entity_check
|
|
363
|
+
# has pagerduty credentials then there'll be one hash in the array for each set of
|
|
364
|
+
# credentials.
|
|
365
|
+
def pagerduty_credentials(options)
|
|
366
|
+
creds = []
|
|
367
|
+
raise "Redis connection not set" unless redis = options[:redis]
|
|
368
|
+
logger = options[:logger]
|
|
369
|
+
|
|
370
|
+
contacts = Flapjack::Data::Contact.find_all_for_entity_check(self, { :redis => redis, :logger => logger })
|
|
371
|
+
contacts.each {|contact|
|
|
372
|
+
cred = Flapjack::Data::Contact.pagerduty_credentials_for_contact(contact, { :redis => redis, :logger => logger })
|
|
373
|
+
creds << cred if cred
|
|
374
|
+
}
|
|
375
|
+
creds
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
private
|
|
379
|
+
|
|
380
|
+
# Passing around the redis handle like this is a SMELL.
|
|
381
|
+
def initialize(entity, check, options = {})
|
|
382
|
+
raise "Redis connection not set" unless @redis = options[:redis]
|
|
383
|
+
raise "Invalid entity" unless @entity = entity
|
|
384
|
+
raise "Invalid check" unless @check = check
|
|
385
|
+
@key = "#{entity.name}:#{check}"
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def validate_state(state)
|
|
389
|
+
[STATE_OK, STATE_WARNING, STATE_CRITICAL, STATE_UNKNOWN].include?(state)
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
end
|