sensu-plugins-aws 0.0.1.alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +2 -0
- data/CHANGELOG.md +15 -0
- data/LICENSE +22 -0
- data/README.md +125 -0
- data/bin/autoscaling-instance-count-metrics.rb +87 -0
- data/bin/check-dynamodb-capacity.rb +183 -0
- data/bin/check-dynamodb-throttle.rb +177 -0
- data/bin/check-ec2-network.rb +131 -0
- data/bin/check-elb-certs.rb +149 -0
- data/bin/check-elb-health-fog.rb +121 -0
- data/bin/check-elb-health-sdk.rb +124 -0
- data/bin/check-elb-health.rb +122 -0
- data/bin/check-elb-latency.rb +175 -0
- data/bin/check-elb-nodes.rb +145 -0
- data/bin/check-elb-sum-requests.rb +168 -0
- data/bin/check-instance-events.rb +130 -0
- data/bin/check-rds-events.rb +84 -0
- data/bin/check-rds.rb +251 -0
- data/bin/check-redshift-events.rb +120 -0
- data/bin/check-ses-limit.rb +91 -0
- data/bin/check-sqs-messages.rb +107 -0
- data/bin/check_vpc_vpn.py +42 -0
- data/bin/ec2-count-metrics.rb +144 -0
- data/bin/ec2-node.rb +157 -0
- data/bin/elasticache-metrics.rb +200 -0
- data/bin/elb-full-metrics.rb +144 -0
- data/bin/elb-latency-metrics.rb +150 -0
- data/bin/elb-metrics.rb +150 -0
- data/bin/sqs-metrics.rb +84 -0
- data/lib/sensu-plugins-AWS.rb +7 -0
- data.tar.gz.sig +0 -0
- metadata +330 -0
- metadata.gz.sig +0 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#! /usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# check-elb-latency
|
|
4
|
+
#
|
|
5
|
+
#
|
|
6
|
+
# DESCRIPTION:
|
|
7
|
+
# This plugin checks the health of an Amazon Elastic Load Balancer.
|
|
8
|
+
#
|
|
9
|
+
# OUTPUT:
|
|
10
|
+
# plain-text
|
|
11
|
+
#
|
|
12
|
+
# PLATFORMS:
|
|
13
|
+
# Linux
|
|
14
|
+
#
|
|
15
|
+
# DEPENDENCIES:
|
|
16
|
+
# gem: aws-sdk
|
|
17
|
+
# gem: sensu-plugin
|
|
18
|
+
#
|
|
19
|
+
# USAGE:
|
|
20
|
+
# Warning if any load balancer's latency is over 1 second, critical if over 3 seconds.
|
|
21
|
+
# check-elb-latency --warning-over 1 --critical-over 3
|
|
22
|
+
#
|
|
23
|
+
# Critical if "app" load balancer's latency is over 5 seconds, maximum of last one hour
|
|
24
|
+
# check-elb-latency --elb-names app --critical-over 5 --statistics maximum --period 3600
|
|
25
|
+
#
|
|
26
|
+
# NOTES:
|
|
27
|
+
#
|
|
28
|
+
# LICENSE:
|
|
29
|
+
# Copyright 2014 github.com/y13i
|
|
30
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
|
31
|
+
# for details.
|
|
32
|
+
#
|
|
33
|
+
|
|
34
|
+
require 'rubygems' if RUBY_VERSION < '1.9.0'
|
|
35
|
+
require 'sensu-plugin/check/cli'
|
|
36
|
+
require 'aws-sdk'
|
|
37
|
+
|
|
38
|
+
class CheckELBLatency < Sensu::Plugin::Check::CLI
|
|
39
|
+
option :access_key_id,
|
|
40
|
+
short: '-k N',
|
|
41
|
+
long: '--access-key-id ID',
|
|
42
|
+
description: 'AWS access key ID'
|
|
43
|
+
|
|
44
|
+
option :secret_access_key,
|
|
45
|
+
short: '-s N',
|
|
46
|
+
long: '--secret-access-key KEY',
|
|
47
|
+
description: 'AWS secret access key'
|
|
48
|
+
|
|
49
|
+
option :region,
|
|
50
|
+
short: '-r R',
|
|
51
|
+
long: '--region REGION',
|
|
52
|
+
description: 'AWS region'
|
|
53
|
+
|
|
54
|
+
option :elb_names,
|
|
55
|
+
short: '-l N',
|
|
56
|
+
long: '--elb-names NAMES',
|
|
57
|
+
proc: proc { |a| a.split(/[,;]\s*/) },
|
|
58
|
+
description: 'Load balancer names to check. Separated by , or ;. If not specified, check all load balancers'
|
|
59
|
+
|
|
60
|
+
option :end_time,
|
|
61
|
+
short: '-t T',
|
|
62
|
+
long: '--end-time TIME',
|
|
63
|
+
default: Time.now,
|
|
64
|
+
proc: proc { |a| Time.parse a },
|
|
65
|
+
description: 'CloudWatch metric statistics end time'
|
|
66
|
+
|
|
67
|
+
option :period,
|
|
68
|
+
short: '-p N',
|
|
69
|
+
long: '--period SECONDS',
|
|
70
|
+
default: 60,
|
|
71
|
+
proc: proc(&:to_i),
|
|
72
|
+
description: 'CloudWatch metric statistics period'
|
|
73
|
+
|
|
74
|
+
option :statistics,
|
|
75
|
+
short: '-S N',
|
|
76
|
+
long: '--statistics NAME',
|
|
77
|
+
default: :average,
|
|
78
|
+
proc: proc { |a| a.downcase.intern },
|
|
79
|
+
description: 'CloudWatch statistics method'
|
|
80
|
+
|
|
81
|
+
%w(warning critical).each do |severity|
|
|
82
|
+
option :"#{severity}_over",
|
|
83
|
+
long: "--#{severity}-over SECONDS",
|
|
84
|
+
proc: proc(&:to_f),
|
|
85
|
+
description: "Trigger a #{severity} if latancy is over specified seconds"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def aws_config
|
|
89
|
+
hash = {}
|
|
90
|
+
hash.update access_key_id: config[:access_key_id], secret_access_key: config[:secret_access_key] if config[:access_key_id] && config[:secret_access_key]
|
|
91
|
+
hash.update region: config[:region] if config[:region]
|
|
92
|
+
hash
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def elb
|
|
96
|
+
@elb ||= AWS::ELB.new aws_config
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def cloud_watch
|
|
100
|
+
@cloud_watch ||= AWS::CloudWatch.new aws_config
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def elbs
|
|
104
|
+
return @elbs if @elbs
|
|
105
|
+
@elbs = elb.load_balancers.to_a
|
|
106
|
+
@elbs.select! { |elb| config[:elb_names].include? elb.name } if config[:elb_names]
|
|
107
|
+
@elbs
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def latency_metric(elb_name)
|
|
111
|
+
cloud_watch.metrics.with_namespace('AWS/ELB').with_metric_name('Latency').with_dimensions(name: 'LoadBalancerName', value: elb_name).first
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def statistics_options
|
|
115
|
+
{
|
|
116
|
+
start_time: config[:end_time] - config[:period],
|
|
117
|
+
end_time: config[:end_time],
|
|
118
|
+
statistics: [config[:statistics].to_s.capitalize],
|
|
119
|
+
period: config[:period]
|
|
120
|
+
}
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def latest_value(metric)
|
|
124
|
+
metric.statistics(statistics_options.merge unit: 'Seconds').datapoints.sort_by { |datapoint| datapoint[:timestamp] }.last[config[:statistics]]
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def flag_alert(severity, message)
|
|
128
|
+
@severities[severity] = true
|
|
129
|
+
@message += message
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def check_latency(elb)
|
|
133
|
+
metric = latency_metric elb.name
|
|
134
|
+
metric_value = begin
|
|
135
|
+
latest_value metric
|
|
136
|
+
rescue
|
|
137
|
+
0
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
@severities.keys.each do |severity|
|
|
141
|
+
threshold = config[:"#{severity}_over"]
|
|
142
|
+
next unless threshold
|
|
143
|
+
next if metric_value < threshold
|
|
144
|
+
flag_alert severity,
|
|
145
|
+
"; #{elbs.size == 1 ? nil : "#{elb.inspect}'s"} Latency is #{sprintf '%.3f', metric_value} seconds. (expected lower than #{sprintf '%.3f', threshold})" # rubocop:disable LineLength
|
|
146
|
+
break
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def run
|
|
151
|
+
@message = if elbs.size == 1
|
|
152
|
+
elbs.first.inspect
|
|
153
|
+
else
|
|
154
|
+
"#{elbs.size} load balancers total"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
@severities = {
|
|
158
|
+
critical: false,
|
|
159
|
+
warning: false
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
elbs.each { |elb| check_latency elb }
|
|
163
|
+
|
|
164
|
+
@message += "; (#{config[:statistics].to_s.capitalize} within #{config[:period]} seconds "
|
|
165
|
+
@message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})"
|
|
166
|
+
|
|
167
|
+
if @severities[:critical]
|
|
168
|
+
critical @message
|
|
169
|
+
elsif @severities[:warning]
|
|
170
|
+
warning @message
|
|
171
|
+
else
|
|
172
|
+
ok @message
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
#! /usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# check-elb-nodes
|
|
4
|
+
#
|
|
5
|
+
# DESCRIPTION:
|
|
6
|
+
# This plugin checks an AWS Elastic Load Balancer to ensure a minimum number
|
|
7
|
+
# or percentage of nodes are InService on the ELB
|
|
8
|
+
#
|
|
9
|
+
# OUTPUT:
|
|
10
|
+
# plain-text
|
|
11
|
+
#
|
|
12
|
+
# PLATFORMS:
|
|
13
|
+
# Linux
|
|
14
|
+
#
|
|
15
|
+
# DEPENDENCIES:
|
|
16
|
+
# gem: aws-sdk
|
|
17
|
+
# gem: sensu-plugin
|
|
18
|
+
#
|
|
19
|
+
# USAGE:
|
|
20
|
+
# Warning if any load balancer's latency is over 1 second, critical if over 3 seconds.
|
|
21
|
+
# check-elb-latency --warning-over 1 --critical-over 3
|
|
22
|
+
#
|
|
23
|
+
# Critical if "app" load balancer's latency is over 5 seconds, maximum of last one hour
|
|
24
|
+
# check-elb-latency --elb-names app --critical-over 5 --statistics maximum --period 3600
|
|
25
|
+
#
|
|
26
|
+
# NOTES:
|
|
27
|
+
#
|
|
28
|
+
# LICENSE:
|
|
29
|
+
# Copyright (c) 2013, Justin Lambert <jlambert@letsevenup.com>
|
|
30
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
|
31
|
+
# for details.
|
|
32
|
+
#
|
|
33
|
+
|
|
34
|
+
require 'rubygems' if RUBY_VERSION < '1.9.0'
|
|
35
|
+
require 'sensu-plugin/check/cli'
|
|
36
|
+
require 'aws-sdk'
|
|
37
|
+
|
|
38
|
+
class CheckELBNodes < Sensu::Plugin::Check::CLI
|
|
39
|
+
option :aws_access_key,
|
|
40
|
+
short: '-a AWS_ACCESS_KEY',
|
|
41
|
+
long: '--aws-access-key AWS_ACCESS_KEY',
|
|
42
|
+
description: "AWS Access Key. Either set ENV['AWS_ACCESS_KEY_ID'] or provide it as an option"
|
|
43
|
+
|
|
44
|
+
option :aws_secret_access_key,
|
|
45
|
+
short: '-s AWS_SECRET_ACCESS_KEY',
|
|
46
|
+
long: '--aws-secret-access-key AWS_SECRET_ACCESS_KEY',
|
|
47
|
+
description: "AWS Secret Access Key. Either set ENV['AWS_SECRET_ACCESS_KEY'] or provide it as an option"
|
|
48
|
+
|
|
49
|
+
option :aws_region,
|
|
50
|
+
short: '-r AWS_REGION',
|
|
51
|
+
long: '--aws-region REGION',
|
|
52
|
+
description: 'AWS Region (such as eu-west-1).',
|
|
53
|
+
default: 'us-east-1'
|
|
54
|
+
|
|
55
|
+
option :load_balancer,
|
|
56
|
+
short: '-n ELB_NAME',
|
|
57
|
+
long: '--name ELB_NAME',
|
|
58
|
+
description: 'The name of the ELB',
|
|
59
|
+
required: true
|
|
60
|
+
|
|
61
|
+
option :warn_under,
|
|
62
|
+
short: '-w WARN_NUM',
|
|
63
|
+
long: '--warn WARN_NUM',
|
|
64
|
+
description: 'Minimum number of nodes InService on the ELB to be considered a warning',
|
|
65
|
+
default: -1,
|
|
66
|
+
proc: proc(&:to_i)
|
|
67
|
+
|
|
68
|
+
option :crit_under,
|
|
69
|
+
short: '-c CRIT_NUM',
|
|
70
|
+
long: '--crit CRIT_NUM',
|
|
71
|
+
description: 'Minimum number of nodes InService on the ELB to be considered critical',
|
|
72
|
+
default: -1,
|
|
73
|
+
proc: proc(&:to_i)
|
|
74
|
+
|
|
75
|
+
option :warn_percent,
|
|
76
|
+
short: '-W WARN_PERCENT',
|
|
77
|
+
long: '--warn_perc WARN_PERCENT',
|
|
78
|
+
description: 'Warn when the percentage of InService nodes is at or below this number',
|
|
79
|
+
default: -1,
|
|
80
|
+
proc: proc(&:to_i)
|
|
81
|
+
|
|
82
|
+
option :crit_percent,
|
|
83
|
+
short: '-C CRIT_PERCENT',
|
|
84
|
+
long: '--crit_perc CRIT_PERCENT',
|
|
85
|
+
description: 'Minimum percentage of nodes needed to be InService',
|
|
86
|
+
default: -1,
|
|
87
|
+
proc: proc(&:to_i)
|
|
88
|
+
|
|
89
|
+
def aws_config
|
|
90
|
+
hash = {}
|
|
91
|
+
hash.update access_key_id: config[:aws_access_key], secret_access_key: config[:aws_secret_access_key]\
|
|
92
|
+
if config[:aws_access_key] && config[:aws_secret_access_key]
|
|
93
|
+
hash.update region: config[:aws_region]
|
|
94
|
+
hash
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def run
|
|
98
|
+
AWS.start_memoizing
|
|
99
|
+
elb = AWS::ELB.new aws_config
|
|
100
|
+
|
|
101
|
+
begin
|
|
102
|
+
instances = elb.load_balancers[config[:load_balancer]].instances.health
|
|
103
|
+
rescue AWS::ELB::Errors::LoadBalancerNotFound
|
|
104
|
+
unknown "A load balancer with the name '#{config[:load_balancer]}' was not found"
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
num_instances = instances.count.to_f
|
|
108
|
+
state = { 'OutOfService' => [], 'InService' => [], 'Unknown' => [] }
|
|
109
|
+
instances.each do |instance|
|
|
110
|
+
# Force a requery of state
|
|
111
|
+
AWS.stop_memoizing if instance[:state] == 'Unknown'
|
|
112
|
+
state[instance[:state]] << instance[:instance].id
|
|
113
|
+
end
|
|
114
|
+
AWS.stop_memoizing
|
|
115
|
+
|
|
116
|
+
message = "InService: #{state['InService'].count}"
|
|
117
|
+
if state['InService'].count > 0
|
|
118
|
+
message << " (#{state['InService'].join(', ')})"
|
|
119
|
+
end
|
|
120
|
+
message << "; OutOfService: #{state['OutOfService'].count}"
|
|
121
|
+
if state['OutOfService'].count > 0
|
|
122
|
+
message << " (#{state['OutOfService'].join(', ')})"
|
|
123
|
+
end
|
|
124
|
+
message << "; Unknown: #{state['Unknown'].count}"
|
|
125
|
+
if state['Unknown'].count > 0
|
|
126
|
+
message << " (#{state['Unknown'].join(', ')})"
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
if state['Unknown'].count == num_instances
|
|
130
|
+
unknown 'All nodes in unknown state'
|
|
131
|
+
elsif state['InService'].count == 0
|
|
132
|
+
critical message
|
|
133
|
+
elsif config[:crit_under] > 0 && config[:crit_under] >= state['InService'].count
|
|
134
|
+
critical message
|
|
135
|
+
elsif config[:crit_percent] > 0 && config[:crit_percent] >= (state['InService'].count / num_instances) * 100
|
|
136
|
+
critical message
|
|
137
|
+
elsif config[:warn_under] > 0 && config[:warn_under] >= state['InService'].count
|
|
138
|
+
warning message
|
|
139
|
+
elsif config[:warn_percent] > 0 && config[:warn_percent] >= (state['InService'].count / num_instances) * 100
|
|
140
|
+
warning message
|
|
141
|
+
else
|
|
142
|
+
ok message
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#! /usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# chwck-elb-sum-requests
|
|
4
|
+
#
|
|
5
|
+
# DESCRIPTION:
|
|
6
|
+
# Check ELB Sum Requests by CloudWatch API.
|
|
7
|
+
#
|
|
8
|
+
# OUTPUT:
|
|
9
|
+
# plain-text
|
|
10
|
+
#
|
|
11
|
+
# PLATFORMS:
|
|
12
|
+
# Linux
|
|
13
|
+
#
|
|
14
|
+
# DEPENDENCIES:
|
|
15
|
+
# gem: aws-sdk
|
|
16
|
+
# gem: sensu-plugin
|
|
17
|
+
#
|
|
18
|
+
# USAGE:
|
|
19
|
+
# Warning if any load balancer's sum request count is over 1000, critical if over 2000.
|
|
20
|
+
# check-elb-sum-requests --warning-over 1000 --critical-over 2000
|
|
21
|
+
#
|
|
22
|
+
# Critical if "app" load balancer's sum request count is over 10000, within last one hour
|
|
23
|
+
# check-elb-sum-requests --elb-names app --critical-over 10000 --period 3600
|
|
24
|
+
#
|
|
25
|
+
# NOTES:
|
|
26
|
+
#
|
|
27
|
+
# LICENSE:
|
|
28
|
+
# Copyright 2014 github.com/y13i
|
|
29
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
|
30
|
+
# for details.
|
|
31
|
+
#
|
|
32
|
+
|
|
33
|
+
require 'rubygems' if RUBY_VERSION < '1.9.0'
|
|
34
|
+
require 'sensu-plugin/check/cli'
|
|
35
|
+
require 'aws-sdk'
|
|
36
|
+
|
|
37
|
+
class CheckELBSumRequests < Sensu::Plugin::Check::CLI
|
|
38
|
+
option :access_key_id,
|
|
39
|
+
short: '-k N',
|
|
40
|
+
long: '--access-key-id ID',
|
|
41
|
+
description: 'AWS access key ID'
|
|
42
|
+
|
|
43
|
+
option :secret_access_key,
|
|
44
|
+
short: '-s N',
|
|
45
|
+
long: '--secret-access-key KEY',
|
|
46
|
+
description: 'AWS secret access key'
|
|
47
|
+
|
|
48
|
+
option :region,
|
|
49
|
+
short: '-r R',
|
|
50
|
+
long: '--region REGION',
|
|
51
|
+
description: 'AWS region'
|
|
52
|
+
|
|
53
|
+
option :elb_names,
|
|
54
|
+
short: '-l N',
|
|
55
|
+
long: '--elb-names NAMES',
|
|
56
|
+
proc: proc { |a| a.split(/[,;]\s*/) },
|
|
57
|
+
description: 'Load balancer names to check. Separated by , or ;. If not specified, check all load balancers'
|
|
58
|
+
|
|
59
|
+
option :end_time,
|
|
60
|
+
short: '-t T',
|
|
61
|
+
long: '--end-time TIME',
|
|
62
|
+
default: Time.now,
|
|
63
|
+
proc: proc { |a| Time.parse a },
|
|
64
|
+
description: 'CloudWatch metric statistics end time'
|
|
65
|
+
|
|
66
|
+
option :period,
|
|
67
|
+
short: '-p N',
|
|
68
|
+
long: '--period SECONDS',
|
|
69
|
+
default: 60,
|
|
70
|
+
proc: proc(&:to_i),
|
|
71
|
+
description: 'CloudWatch metric statistics period'
|
|
72
|
+
|
|
73
|
+
%w(warning critical).each do |severity|
|
|
74
|
+
option :"#{severity}_over",
|
|
75
|
+
long: "--#{severity}-over COUNT",
|
|
76
|
+
proc: proc(&:to_f),
|
|
77
|
+
description: "Trigger a #{severity} if sum requests is over specified count"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def aws_config
|
|
81
|
+
hash = {}
|
|
82
|
+
hash.update access_key_id: config[:access_key_id], secret_access_key: config[:secret_access_key] if config[:access_key_id] && config[:secret_access_key]
|
|
83
|
+
hash.update region: config[:region] if config[:region]
|
|
84
|
+
hash
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def elb
|
|
88
|
+
@elb ||= AWS::ELB.new aws_config
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def cloud_watch
|
|
92
|
+
@cloud_watch ||= AWS::CloudWatch.new aws_config
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def elbs
|
|
96
|
+
return @elbs if @elbs
|
|
97
|
+
@elbs = elb.load_balancers.to_a
|
|
98
|
+
@elbs.select! { |elb| config[:elb_names].include? elb.name } if config[:elb_names]
|
|
99
|
+
@elbs
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def latency_metric(elb_name)
|
|
103
|
+
cloud_watch.metrics.with_namespace('AWS/ELB').with_metric_name('RequestCount').with_dimensions(name: 'LoadBalancerName', value: elb_name).first
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def statistics_options
|
|
107
|
+
{
|
|
108
|
+
start_time: config[:end_time] - config[:period],
|
|
109
|
+
end_time: config[:end_time],
|
|
110
|
+
statistics: ['Sum'],
|
|
111
|
+
period: config[:period]
|
|
112
|
+
}
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def latest_value(metric)
|
|
116
|
+
metric.statistics(statistics_options.merge unit: 'Count').datapoints.sort_by { |datapoint| datapoint[:timestamp] }.last[:sum]
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def flag_alert(severity, message)
|
|
120
|
+
@severities[severity] = true
|
|
121
|
+
@message += message
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def check_sum_requests(elb)
|
|
125
|
+
metric = latency_metric elb.name
|
|
126
|
+
metric_value = begin
|
|
127
|
+
value = latest_value metric
|
|
128
|
+
puts value
|
|
129
|
+
rescue
|
|
130
|
+
0
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
@severities.keys.each do |severity|
|
|
134
|
+
threshold = config[:"#{severity}_over"]
|
|
135
|
+
next unless threshold
|
|
136
|
+
next if metric_value < threshold
|
|
137
|
+
flag_alert severity,
|
|
138
|
+
"; #{elbs.size == 1 ? nil : "#{elb.inspect}'s"} Sum Requests is #{metric_value}. (expected lower than #{threshold})"
|
|
139
|
+
break
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def run
|
|
144
|
+
@message = if elbs.size == 1
|
|
145
|
+
elbs.first.inspect
|
|
146
|
+
else
|
|
147
|
+
"#{elbs.size} load balancers total"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
@severities = {
|
|
151
|
+
critical: false,
|
|
152
|
+
warning: false
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
elbs.each { |elb| check_sum_requests elb }
|
|
156
|
+
|
|
157
|
+
@message += "; (#{config[:statistics].to_s.capitalize} within #{config[:period]} seconds "
|
|
158
|
+
@message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})"
|
|
159
|
+
|
|
160
|
+
if @severities[:critical]
|
|
161
|
+
critical @message
|
|
162
|
+
elsif @severities[:warning]
|
|
163
|
+
warning @message
|
|
164
|
+
else
|
|
165
|
+
ok @message
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#! /usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# check-instance-events
|
|
4
|
+
#
|
|
5
|
+
# DESCRIPTION:
|
|
6
|
+
# This plugin looks up all instances in an account and alerts if one or more have a scheduled
|
|
7
|
+
# event (reboot, retirement, etc)
|
|
8
|
+
#
|
|
9
|
+
# OUTPUT:
|
|
10
|
+
# plain-text
|
|
11
|
+
#
|
|
12
|
+
# PLATFORMS:
|
|
13
|
+
# Linux
|
|
14
|
+
#
|
|
15
|
+
# DEPENDENCIES:
|
|
16
|
+
# gem: aws-sdk
|
|
17
|
+
# gem: sensu-plugin
|
|
18
|
+
#
|
|
19
|
+
# USAGE:
|
|
20
|
+
# #YELLOW
|
|
21
|
+
#
|
|
22
|
+
# NOTES:
|
|
23
|
+
#
|
|
24
|
+
# LICENSE:
|
|
25
|
+
# Copyright (c) 2014, Tim Smith, tim@cozy.co
|
|
26
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
|
27
|
+
# for details.
|
|
28
|
+
#
|
|
29
|
+
|
|
30
|
+
require 'rubygems' if RUBY_VERSION < '1.9.0'
|
|
31
|
+
require 'sensu-plugin/check/cli'
|
|
32
|
+
require 'aws-sdk'
|
|
33
|
+
|
|
34
|
+
class CheckInstanceEvents < Sensu::Plugin::Check::CLI
|
|
35
|
+
option :aws_access_key,
|
|
36
|
+
short: '-a AWS_ACCESS_KEY',
|
|
37
|
+
long: '--aws-access-key AWS_ACCESS_KEY',
|
|
38
|
+
description: "AWS Access Key. Either set ENV['AWS_ACCESS_KEY_ID'] or provide it as an option",
|
|
39
|
+
default: ENV['AWS_ACCESS_KEY_ID']
|
|
40
|
+
|
|
41
|
+
option :use_iam_role,
|
|
42
|
+
short: '-u',
|
|
43
|
+
long: '--use-iam',
|
|
44
|
+
description: 'Use IAM role authenticiation. Instance must have IAM role assigned for this to work'
|
|
45
|
+
|
|
46
|
+
option :include_name,
|
|
47
|
+
short: '-n',
|
|
48
|
+
long: '--include-name',
|
|
49
|
+
description: "Includes any offending instance's 'Name' tag in the check output",
|
|
50
|
+
default: false
|
|
51
|
+
|
|
52
|
+
option :aws_secret_access_key,
|
|
53
|
+
short: '-s AWS_SECRET_ACCESS_KEY',
|
|
54
|
+
long: '--aws-secret-access-key AWS_SECRET_ACCESS_KEY',
|
|
55
|
+
description: "AWS Secret Access Key. Either set ENV['AWS_SECRET_ACCESS_KEY'] or provide it as an option",
|
|
56
|
+
default: ENV['AWS_SECRET_ACCESS_KEY']
|
|
57
|
+
|
|
58
|
+
option :aws_region,
|
|
59
|
+
short: '-r AWS_REGION',
|
|
60
|
+
long: '--aws-region REGION',
|
|
61
|
+
description: 'AWS Region (such as eu-west-1).',
|
|
62
|
+
default: 'us-east-1'
|
|
63
|
+
|
|
64
|
+
def aws_config
|
|
65
|
+
hash = {}
|
|
66
|
+
hash.update access_key_id: config[:aws_access_key], secret_access_key: config[:aws_secret_access_key]\
|
|
67
|
+
if config[:aws_access_key] && config[:aws_secret_access_key]
|
|
68
|
+
hash.update region: config[:aws_region]
|
|
69
|
+
hash
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def run
|
|
73
|
+
event_instances = []
|
|
74
|
+
aws_config = {}
|
|
75
|
+
|
|
76
|
+
if config[:use_iam_role].nil?
|
|
77
|
+
aws_config.merge!(
|
|
78
|
+
access_key_id: config[:aws_access_key],
|
|
79
|
+
secret_access_key: config[:aws_secret_access_key]
|
|
80
|
+
)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
ec2 = AWS::EC2::Client.new(aws_config.merge!(region: config[:aws_region]))
|
|
84
|
+
begin
|
|
85
|
+
ec2.describe_instance_status[:instance_status_set].each do |i| # rubocop:disable Next
|
|
86
|
+
|
|
87
|
+
unless i[:events_set].empty?
|
|
88
|
+
# Exclude completed reboots since the events API appearently returns these even after they have been completed:
|
|
89
|
+
# Example:
|
|
90
|
+
# "events_set": [
|
|
91
|
+
# {
|
|
92
|
+
# "code": "system-reboot",
|
|
93
|
+
# "description": "[Completed] Scheduled reboot",
|
|
94
|
+
# "not_before": "2015-01-05 12:00:00 UTC",
|
|
95
|
+
# "not_after": "2015-01-05 18:00:00 UTC"
|
|
96
|
+
# }
|
|
97
|
+
# ]
|
|
98
|
+
unless i[:events_set].select { |x| x[:code] == 'system-reboot' && x[:description] =~ /\[Completed\]/ }
|
|
99
|
+
event_instances << i[:instance_id]
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
rescue => e
|
|
104
|
+
unknown "An error occurred processing AWS EC2 API: #{e.message}"
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
if config[:include_name]
|
|
108
|
+
event_instances_with_names = []
|
|
109
|
+
event_instances.each do |id|
|
|
110
|
+
name = ''
|
|
111
|
+
begin
|
|
112
|
+
instance = ec2.describe_instances(instance_ids: [id])
|
|
113
|
+
# Harvests the 'Name' tag for the instance
|
|
114
|
+
name = instance[:reservation_index][id][:instances_set][0][:tag_set].select { |tag| tag[:key] == 'Name' }[0][:value]
|
|
115
|
+
rescue => e
|
|
116
|
+
puts "Issue getting instance details for #{id}. Exception = #{e}"
|
|
117
|
+
end
|
|
118
|
+
# Pushes 'name(i-xxx)' if the Name tag was found, else it just pushes the id
|
|
119
|
+
event_instances_with_names << (name == '' ? id : "#{name}(#{id})")
|
|
120
|
+
end
|
|
121
|
+
event_instances = event_instances_with_names
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
if event_instances.count > 0
|
|
125
|
+
critical("#{event_instances.count} instances #{event_instances.count > 1 ? 'have' : 'has'} upcoming scheduled events: #{event_instances.join(',')}")
|
|
126
|
+
else
|
|
127
|
+
ok
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#! /usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# check-rds-events
|
|
4
|
+
#
|
|
5
|
+
#
|
|
6
|
+
# DESCRIPTION:
|
|
7
|
+
# This plugin checks rds clusters for critical events.
|
|
8
|
+
# Due to the number of events types on RDS clusters the check searches for
|
|
9
|
+
# events containing the text string 'has started' or 'is being'. These events all have
|
|
10
|
+
# accompanying completiion events and are impacting events
|
|
11
|
+
#
|
|
12
|
+
# OUTPUT:
|
|
13
|
+
# plain-text
|
|
14
|
+
#
|
|
15
|
+
# PLATFORMS:
|
|
16
|
+
# Linux
|
|
17
|
+
#
|
|
18
|
+
# DEPENDENCIES:
|
|
19
|
+
# gem: aws-sdk
|
|
20
|
+
# gem: sensu-plugin
|
|
21
|
+
#
|
|
22
|
+
# USAGE:
|
|
23
|
+
# ./check-rds-events.rb -r ${you_region} -s ${your_aws_secret_access_key} -a ${your_aws_access_key}
|
|
24
|
+
#
|
|
25
|
+
# NOTES:
|
|
26
|
+
#
|
|
27
|
+
# LICENSE:
|
|
28
|
+
# Tim Smith <tim@cozy.co>
|
|
29
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
|
30
|
+
# for details.
|
|
31
|
+
#
|
|
32
|
+
|
|
33
|
+
require 'sensu-plugin/check/cli'
|
|
34
|
+
require 'aws-sdk'
|
|
35
|
+
|
|
36
|
+
class CheckRDSEvents < Sensu::Plugin::Check::CLI
|
|
37
|
+
option :aws_access_key,
|
|
38
|
+
short: '-a AWS_ACCESS_KEY',
|
|
39
|
+
long: '--aws-access-key AWS_ACCESS_KEY',
|
|
40
|
+
description: "AWS Access Key. Either set ENV['AWS_ACCESS_KEY_ID'] or provide it as an option",
|
|
41
|
+
default: ENV['AWS_ACCESS_KEY_ID']
|
|
42
|
+
|
|
43
|
+
option :aws_secret_access_key,
|
|
44
|
+
short: '-s AWS_SECRET_ACCESS_KEY',
|
|
45
|
+
long: '--aws-secret-access-key AWS_SECRET_ACCESS_KEY',
|
|
46
|
+
description: "AWS Secret Access Key. Either set ENV['AWS_SECRET_ACCESS_KEY'] or provide it as an option",
|
|
47
|
+
default: ENV['AWS_SECRET_ACCESS_KEY']
|
|
48
|
+
|
|
49
|
+
option :aws_region,
|
|
50
|
+
short: '-r AWS_REGION',
|
|
51
|
+
long: '--aws-region REGION',
|
|
52
|
+
description: 'AWS Region (such as eu-west-1).',
|
|
53
|
+
default: 'us-east-1'
|
|
54
|
+
|
|
55
|
+
def run # rubocop:disable AbcSize
|
|
56
|
+
rds = AWS::RDS::Client.new(
|
|
57
|
+
access_key_id: config[:aws_access_key],
|
|
58
|
+
secret_access_key: config[:aws_secret_access_key],
|
|
59
|
+
region: config[:aws_region])
|
|
60
|
+
|
|
61
|
+
begin
|
|
62
|
+
# fetch all clusters identifiers
|
|
63
|
+
clusters = rds.describe_db_instances[:db_instances].map { |db| db[:db_instance_identifier] }
|
|
64
|
+
maint_clusters = []
|
|
65
|
+
|
|
66
|
+
# fetch the last 2 hours of events for each cluster
|
|
67
|
+
clusters.each do |cluster_name|
|
|
68
|
+
events_record = rds.describe_events(start_time: (Time.now - 7200).iso8601, source_type: 'db-instance', source_identifier: cluster_name)
|
|
69
|
+
next if events_record[:events].empty?
|
|
70
|
+
|
|
71
|
+
# if the last event is a start maint event then the cluster is still in maint
|
|
72
|
+
maint_clusters.push(cluster_name) if events_record[:events][-1][:message] =~ /has started|is being|off-line|shutdown/
|
|
73
|
+
end
|
|
74
|
+
rescue => e
|
|
75
|
+
unknown "An error occurred processing AWS RDS API: #{e.message}"
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
if maint_clusters.empty?
|
|
79
|
+
ok
|
|
80
|
+
else
|
|
81
|
+
critical("Clusters w/ critical events: #{maint_clusters.join(',')}")
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|