sensu-plugins-aws 3.2.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -1
- data/README.md +33 -0
- data/bin/check-asg-instances-created.rb +127 -0
- data/bin/check-asg-instances-inservice.rb +109 -0
- data/bin/check-cloudfront-tag.rb +70 -0
- data/bin/check-cloudwatch-metric.rb +7 -1
- data/bin/check-ebs-burst-limit.rb +96 -0
- data/bin/check-ec2-cpu_balance.rb +2 -2
- data/bin/check-ec2-filter.rb +38 -5
- data/bin/check-ecs-service-health.rb +37 -6
- data/bin/check-elb-instances-inservice.rb +103 -0
- data/bin/check-instance-events.rb +8 -11
- data/bin/check-instance-health.rb +24 -1
- data/bin/check-instances-count.rb +13 -1
- data/bin/check-rds-events.rb +3 -6
- data/bin/check-rds.rb +92 -52
- data/bin/check-route53-domain-expiration.rb +79 -0
- data/bin/check-s3-object.rb +59 -20
- data/bin/check-s3-tag.rb +70 -0
- data/bin/check-ses-limit.rb +1 -1
- data/bin/check-sqs-messages.rb +16 -18
- data/bin/check-vpc-vpn.rb +42 -47
- data/bin/metrics-asg.rb +156 -0
- data/bin/metrics-autoscaling-instance-count.rb +26 -10
- data/bin/metrics-billing.rb +98 -0
- data/bin/metrics-elasticache.rb +118 -167
- data/bin/metrics-elb-full.rb +1 -1
- data/bin/metrics-elb.rb +68 -59
- data/bin/metrics-rds.rb +135 -0
- data/bin/metrics-s3.rb +105 -0
- data/lib/sensu-plugins-aws.rb +1 -0
- data/lib/sensu-plugins-aws/cloudwatch-common.rb +2 -1
- data/lib/sensu-plugins-aws/version.rb +3 -3
- metadata +39 -3
@@ -63,7 +63,19 @@ class CheckInstanceCount < Sensu::Plugin::Check::CLI
|
|
63
63
|
default: 25
|
64
64
|
|
65
65
|
def instance_count
|
66
|
-
|
66
|
+
client = Aws::AutoScaling::Client.new
|
67
|
+
resp = client.describe_auto_scaling_groups(
|
68
|
+
auto_scaling_group_names: [config[:groupname]]
|
69
|
+
).to_h
|
70
|
+
instances = []
|
71
|
+
resp[:auto_scaling_groups].each do |g|
|
72
|
+
g[:instances].each do |i|
|
73
|
+
if i[:lifecycle_state] == 'InService' && i[:health_status] == 'Healthy'
|
74
|
+
instances << i[:instance_id]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
instances.length
|
67
79
|
rescue => e
|
68
80
|
critical "There was an error reaching AWS - #{e.message}"
|
69
81
|
end
|
data/bin/check-rds-events.rb
CHANGED
@@ -41,7 +41,7 @@
|
|
41
41
|
#
|
42
42
|
|
43
43
|
require 'sensu-plugin/check/cli'
|
44
|
-
require 'aws-sdk
|
44
|
+
require 'aws-sdk'
|
45
45
|
|
46
46
|
class CheckRDSEvents < Sensu::Plugin::Check::CLI
|
47
47
|
option :aws_access_key,
|
@@ -74,10 +74,7 @@ class CheckRDSEvents < Sensu::Plugin::Check::CLI
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def rds_regions
|
77
|
-
|
78
|
-
# Aws.partition('aws').regions.map(&:name)
|
79
|
-
|
80
|
-
AWS::RDS.regions.map(&:name)
|
77
|
+
Aws.partition('aws').regions.map(&:name)
|
81
78
|
end
|
82
79
|
|
83
80
|
def run
|
@@ -102,7 +99,7 @@ class CheckRDSEvents < Sensu::Plugin::Check::CLI
|
|
102
99
|
end
|
103
100
|
|
104
101
|
aws_regions.each do |r|
|
105
|
-
rds =
|
102
|
+
rds = Aws::RDS::Client.new aws_config.merge!(region: r)
|
106
103
|
|
107
104
|
begin
|
108
105
|
if !config[:db_instance_id].nil? && !config[:db_instance_id].empty?
|
data/bin/check-rds.rb
CHANGED
@@ -86,6 +86,11 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
86
86
|
long: '--db-instance-id NAME',
|
87
87
|
description: 'DB instance identifier'
|
88
88
|
|
89
|
+
option :db_cluster_id,
|
90
|
+
short: '-l N',
|
91
|
+
long: '--db-cluster-id NAME',
|
92
|
+
description: 'DB cluster identifier'
|
93
|
+
|
89
94
|
option :end_time,
|
90
95
|
short: '-t T',
|
91
96
|
long: '--end-time TIME',
|
@@ -96,7 +101,7 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
96
101
|
option :period,
|
97
102
|
short: '-p N',
|
98
103
|
long: '--period SECONDS',
|
99
|
-
default:
|
104
|
+
default: 180,
|
100
105
|
proc: proc(&:to_i),
|
101
106
|
description: 'CloudWatch metric statistics period'
|
102
107
|
|
@@ -154,6 +159,12 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
154
159
|
db
|
155
160
|
end
|
156
161
|
|
162
|
+
def find_db_cluster_writer(id)
|
163
|
+
wr = rds.describe_db_clusters(db_cluster_identifier: id).db_clusters[0].db_cluster_members.detect(&:is_cluster_writer).db_instance_identifier
|
164
|
+
unknown 'DB cluster not found.' if cl.nil?
|
165
|
+
wr
|
166
|
+
end
|
167
|
+
|
157
168
|
def cloud_watch_metric(metric_name, unit)
|
158
169
|
cloud_watch.get_metric_statistics(
|
159
170
|
namespace: 'AWS/RDS',
|
@@ -183,11 +194,6 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
183
194
|
end
|
184
195
|
end
|
185
196
|
|
186
|
-
def flag_alert(severity, message)
|
187
|
-
@severities[severity] = true
|
188
|
-
@message += message
|
189
|
-
end
|
190
|
-
|
191
197
|
def memory_total_bytes(instance_class)
|
192
198
|
memory_total_gigabytes = {
|
193
199
|
'db.cr1.8xlarge' => 244.0,
|
@@ -224,84 +230,118 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
224
230
|
|
225
231
|
def check_az(severity, expected_az)
|
226
232
|
return if @db_instance.availability_zone == expected_az
|
227
|
-
|
233
|
+
@severities[severity] = true
|
234
|
+
"; AZ is #{@db_instance.availability_zone} (expected #{expected_az})"
|
228
235
|
end
|
229
236
|
|
230
237
|
def check_cpu(severity, expected_lower_than)
|
231
|
-
|
232
|
-
|
233
|
-
return if
|
234
|
-
|
238
|
+
cpu_metric ||= cloud_watch_metric 'CPUUtilization', 'Percent'
|
239
|
+
cpu_metric_value ||= latest_value cpu_metric
|
240
|
+
return if cpu_metric_value < expected_lower_than
|
241
|
+
@severities[severity] = true
|
242
|
+
"; CPUUtilization is #{sprintf '%.2f', cpu_metric_value}% (expected lower than #{expected_lower_than}%)"
|
235
243
|
end
|
236
244
|
|
237
245
|
def check_memory(severity, expected_lower_than)
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
return if
|
244
|
-
|
246
|
+
memory_metric ||= cloud_watch_metric 'FreeableMemory', 'Bytes'
|
247
|
+
memory_metric_value ||= latest_value memory_metric
|
248
|
+
memory_total_bytes ||= memory_total_bytes @db_instance.db_instance_class
|
249
|
+
memory_usage_bytes ||= memory_total_bytes - memory_metric_value
|
250
|
+
memory_usage_percentage ||= memory_usage_bytes / memory_total_bytes * 100
|
251
|
+
return if memory_usage_percentage < expected_lower_than
|
252
|
+
@severities[severity] = true
|
253
|
+
"; Memory usage is #{sprintf '%.2f', memory_usage_percentage}% (expected lower than #{expected_lower_than}%)"
|
245
254
|
end
|
246
255
|
|
247
256
|
def check_disk(severity, expected_lower_than)
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
return if
|
254
|
-
|
257
|
+
disk_metric ||= cloud_watch_metric 'FreeStorageSpace', 'Bytes'
|
258
|
+
disk_metric_value ||= latest_value disk_metric
|
259
|
+
disk_total_bytes ||= @db_instance.allocated_storage * 1024**3
|
260
|
+
disk_usage_bytes ||= disk_total_bytes - disk_metric_value
|
261
|
+
disk_usage_percentage ||= disk_usage_bytes / disk_total_bytes * 100
|
262
|
+
return if disk_usage_percentage < expected_lower_than
|
263
|
+
@severities[severity] = true
|
264
|
+
"; Disk usage is #{sprintf '%.2f', disk_usage_percentage}% (expected lower than #{expected_lower_than}%)"
|
255
265
|
end
|
256
266
|
|
257
267
|
def check_connections(severity, expected_lower_than)
|
258
|
-
|
259
|
-
|
260
|
-
return if
|
261
|
-
|
268
|
+
connections_metric ||= cloud_watch_metric 'DatabaseConnections', 'Count'
|
269
|
+
connections_metric_value ||= latest_value connections_metric
|
270
|
+
return if connections_metric_value < expected_lower_than
|
271
|
+
@severities[severity] = true
|
272
|
+
"; DatabaseConnections are #{sprintf '%d', connections_metric_value} (expected lower than #{expected_lower_than})"
|
262
273
|
end
|
263
274
|
|
264
275
|
def check_iops(severity, expected_lower_than)
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
return if
|
271
|
-
|
276
|
+
read_iops_metric ||= cloud_watch_metric 'ReadIOPS', 'Count/Second'
|
277
|
+
read_iops_metric_value ||= latest_value read_iops_metric
|
278
|
+
write_iops_metric ||= cloud_watch_metric 'WriteIOPS', 'Count/Second'
|
279
|
+
write_iops_metric_value ||= latest_value write_iops_metric
|
280
|
+
iops_metric_value ||= read_iops_metric_value + write_iops_metric_value
|
281
|
+
return if iops_metric_value < expected_lower_than
|
282
|
+
@severities[severity] = true
|
283
|
+
"; IOPS are #{sprintf '%d', iops_metric_value} (expected lower than #{expected_lower_than})"
|
272
284
|
end
|
273
285
|
|
274
286
|
def run
|
287
|
+
instances = []
|
288
|
+
if config[:db_cluster_id]
|
289
|
+
db_cluster_writer_id = find_db_cluster_writer(db_cluster_id)
|
290
|
+
instances << find_db_instance(db_cluster_writer_id)
|
291
|
+
end
|
292
|
+
|
275
293
|
if config[:db_instance_id].nil? || config[:db_instance_id].empty?
|
276
|
-
|
294
|
+
rds.describe_db_instances[:db_instances].map { |db| instances << db }
|
295
|
+
else
|
296
|
+
instances << find_db_instance(config[:db_instance_id])
|
297
|
+
end
|
298
|
+
|
299
|
+
messages = ''
|
300
|
+
severities = {
|
301
|
+
critical: false,
|
302
|
+
warning: false
|
303
|
+
}
|
304
|
+
instances.each do |instance|
|
305
|
+
@db_instance = instance
|
306
|
+
result = collect(instance)
|
307
|
+
if result[1][:critical]
|
308
|
+
messages += result[0]
|
309
|
+
severities[:critical] = true
|
310
|
+
elsif result[1][:warning]
|
311
|
+
severities[:warning] = true
|
312
|
+
messages += result[0]
|
313
|
+
end
|
277
314
|
end
|
278
315
|
|
279
|
-
|
280
|
-
|
281
|
-
|
316
|
+
if severities[:critical]
|
317
|
+
critical messages
|
318
|
+
elsif severities[:warning]
|
319
|
+
warning messages
|
320
|
+
else
|
321
|
+
ok messages
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
def collect(instance)
|
326
|
+
message = "\n#{instance[:db_instance_identifier]}: "
|
327
|
+
@severities = {
|
282
328
|
critical: false,
|
283
329
|
warning: false
|
284
330
|
}
|
285
331
|
|
286
332
|
@severities.keys.each do |severity|
|
287
|
-
check_az severity, config[:"availability_zone_#{severity}"] if config[:"availability_zone_#{severity}"]
|
333
|
+
message += check_az severity, config[:"availability_zone_#{severity}"], instance if config[:"availability_zone_#{severity}"]
|
288
334
|
|
289
335
|
%w(cpu memory disk connections iops).each do |item|
|
290
|
-
send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"]
|
336
|
+
result = send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"]
|
337
|
+
message += result unless result.nil?
|
291
338
|
end
|
292
339
|
end
|
293
340
|
|
294
341
|
if %w(cpu memory disk connections iops).any? { |item| %w(warning critical).any? { |severity| config[:"#{item}_#{severity}_over"] } }
|
295
|
-
|
296
|
-
|
297
|
-
end
|
298
|
-
|
299
|
-
if @severities[:critical]
|
300
|
-
critical @message
|
301
|
-
elsif @severities[:warning]
|
302
|
-
warning @message
|
303
|
-
else
|
304
|
-
ok @message
|
342
|
+
message += "(#{config[:statistics].to_s.capitalize} within #{config[:period]}s "
|
343
|
+
message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})"
|
305
344
|
end
|
345
|
+
[message, @severities]
|
306
346
|
end
|
307
347
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# check-route53-domain-expiration
|
4
|
+
#
|
5
|
+
# DESCRIPTION:
|
6
|
+
# Alert when Route53 registered domains are close to expiration
|
7
|
+
#
|
8
|
+
# OUTPUT:
|
9
|
+
# plain-text
|
10
|
+
#
|
11
|
+
# DEPENDENCIES:
|
12
|
+
# gem: aws-sdk
|
13
|
+
# gem: sensu-plugin
|
14
|
+
#
|
15
|
+
# USAGE:
|
16
|
+
# check-route53-domain-expiration.rb
|
17
|
+
#
|
18
|
+
# LICENSE:
|
19
|
+
# Eric Heydrick <eheydrick@gmail.com>
|
20
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
21
|
+
# for details.
|
22
|
+
|
23
|
+
require 'sensu-plugins-aws'
|
24
|
+
require 'sensu-plugin/check/cli'
|
25
|
+
require 'aws-sdk'
|
26
|
+
|
27
|
+
class CheckRoute53DomainExpiration < Sensu::Plugin::Check::CLI
|
28
|
+
include Common
|
29
|
+
|
30
|
+
option :aws_region,
|
31
|
+
short: '-r AWS_REGION',
|
32
|
+
long: '--aws-region REGION',
|
33
|
+
description: 'AWS Region (defaults to us-east-1).',
|
34
|
+
default: 'us-east-1'
|
35
|
+
|
36
|
+
option :warn,
|
37
|
+
short: '-w WARN',
|
38
|
+
long: '--warning WARN',
|
39
|
+
description: 'Warn if domain expires in less than this many days (default: 30)',
|
40
|
+
default: 30,
|
41
|
+
proc: proc(&:to_i)
|
42
|
+
|
43
|
+
option :crit,
|
44
|
+
short: '-c CRITICAL',
|
45
|
+
long: '--critical CRITICAL',
|
46
|
+
description: 'Critical if domain expires in less than this many days (default: 7)',
|
47
|
+
default: 7,
|
48
|
+
proc: proc(&:to_i)
|
49
|
+
|
50
|
+
def run
|
51
|
+
warn_domains = {}
|
52
|
+
crit_domains = {}
|
53
|
+
|
54
|
+
r53 = Aws::Route53Domains::Client.new(aws_config)
|
55
|
+
begin
|
56
|
+
domains = r53.list_domains.domains
|
57
|
+
domains.each do |domain|
|
58
|
+
expiration = DateTime.parse(domain.expiry.to_s)
|
59
|
+
days_until_expiration = (expiration - DateTime.now).to_i
|
60
|
+
if days_until_expiration <= config[:crit]
|
61
|
+
crit_domains[domain] = days_until_expiration
|
62
|
+
elsif days_until_expiration <= config[:warn]
|
63
|
+
warn_domains[domain] = days_until_expiration
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
if !crit_domains.empty?
|
68
|
+
critical "Domains are expiring in less than #{config[:crit]} days: " + crit_domains.map { |d, v| "#{d.domain_name} (in #{v} days)" }.join(', ')
|
69
|
+
elsif !warn_domains.empty?
|
70
|
+
warning "Domains are expiring in less than #{config[:warn]} days: " + warn_domains.map { |d, v| "#{d.domain_name} (in #{v} days)" }.join(', ')
|
71
|
+
else
|
72
|
+
ok 'No domains are expiring soon'
|
73
|
+
end
|
74
|
+
|
75
|
+
rescue => e
|
76
|
+
unknown "An error occurred communicating with the Route53 API: #{e.message}"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/bin/check-s3-object.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
#
|
3
|
-
# check-s3-
|
3
|
+
# check-s3-object
|
4
4
|
#
|
5
5
|
# DESCRIPTION:
|
6
6
|
# This plugin checks if a file exists in a bucket and/or is not too old.
|
@@ -31,7 +31,7 @@
|
|
31
31
|
require 'sensu-plugin/check/cli'
|
32
32
|
require 'aws-sdk'
|
33
33
|
|
34
|
-
class
|
34
|
+
class CheckS3Object < Sensu::Plugin::Check::CLI
|
35
35
|
option :aws_access_key,
|
36
36
|
short: '-a AWS_ACCESS_KEY',
|
37
37
|
long: '--aws-access-key AWS_ACCESS_KEY',
|
@@ -67,13 +67,6 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
|
|
67
67
|
description: 'The name of key in the bucket',
|
68
68
|
required: true
|
69
69
|
|
70
|
-
option :ok_zero_size,
|
71
|
-
description: 'OK if file has zero size',
|
72
|
-
short: '-z',
|
73
|
-
long: '--ok-zero-size',
|
74
|
-
boolean: true,
|
75
|
-
default: false
|
76
|
-
|
77
70
|
option :warning_age,
|
78
71
|
description: 'Warn if mtime greater than provided age in seconds',
|
79
72
|
short: '-w SECONDS',
|
@@ -84,17 +77,58 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
|
|
84
77
|
short: '-c SECONDS',
|
85
78
|
long: '--critical SECONDS'
|
86
79
|
|
80
|
+
option :ok_zero_size,
|
81
|
+
description: 'OK if file has zero size',
|
82
|
+
short: '-z',
|
83
|
+
long: '--ok-zero-size',
|
84
|
+
boolean: true,
|
85
|
+
default: false
|
86
|
+
|
87
|
+
option :warning_size,
|
88
|
+
description: 'Warning threshold for size',
|
89
|
+
long: '--warning-size COUNT'
|
90
|
+
|
91
|
+
option :critical_size,
|
92
|
+
description: 'Critical threshold for size',
|
93
|
+
long: '--critical-size COUNT'
|
94
|
+
|
95
|
+
option :compare_size,
|
96
|
+
description: 'Comparision operator for threshold: equal, not, greater, less',
|
97
|
+
short: '-o OPERATION',
|
98
|
+
long: '--operator-size OPERATION',
|
99
|
+
default: 'equal'
|
100
|
+
|
87
101
|
def aws_config
|
88
102
|
{ access_key_id: config[:aws_access_key],
|
89
103
|
secret_access_key: config[:aws_secret_access_key],
|
90
104
|
region: config[:aws_region] }
|
91
105
|
end
|
92
106
|
|
93
|
-
def
|
94
|
-
|
95
|
-
|
96
|
-
|
107
|
+
def operator
|
108
|
+
op = lambda do |type, a, b|
|
109
|
+
case type
|
110
|
+
when 'age'
|
111
|
+
a > b
|
112
|
+
when 'size'
|
113
|
+
if config[:compare_size] == 'greater'
|
114
|
+
a > b
|
115
|
+
elsif config[:compare_size] == 'less'
|
116
|
+
a < b
|
117
|
+
elsif config[:compare_size] == 'not'
|
118
|
+
a != b
|
119
|
+
end
|
120
|
+
else
|
121
|
+
a == b
|
122
|
+
end
|
97
123
|
end
|
124
|
+
op
|
125
|
+
end
|
126
|
+
|
127
|
+
def run_check(type, level, value, msg)
|
128
|
+
key = "#{level}_#{type}".to_sym
|
129
|
+
return if config[key].nil?
|
130
|
+
to_check = config[key].to_i
|
131
|
+
send(level, msg % [config[:key_name], value, config[:bucket_name]]) if operator.call type, value, to_check
|
98
132
|
end
|
99
133
|
|
100
134
|
def run
|
@@ -107,21 +141,26 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
|
|
107
141
|
s3 = Aws::S3::Client.new(aws_config.merge!(region: config[:aws_region]))
|
108
142
|
begin
|
109
143
|
output = s3.head_object(bucket: config[:bucket_name], key: config[:key_name])
|
144
|
+
age = Time.now.to_i - output[:last_modified].to_i
|
145
|
+
size = output[:content_length]
|
110
146
|
|
111
|
-
|
112
|
-
|
147
|
+
[:critical, :warning].each do |level|
|
148
|
+
run_check('age', level, age, 'S3 object %s is %s seconds old (bucket %s)')
|
113
149
|
end
|
114
150
|
|
115
|
-
if
|
116
|
-
|
117
|
-
run_check(:critical, age) || run_check(:warning, age) || ok("S3 object #{config[:key_name]} is #{age} seconds old (bucket #{config[:bucket_name]})")
|
151
|
+
if size == 0
|
152
|
+
critical "S3 object #{config[:key_name]} is empty (bucket #{config[:bucket_name]})" unless config[:ok_zero_size]
|
118
153
|
else
|
119
|
-
|
154
|
+
[:critical, :warning].each do |level|
|
155
|
+
run_check('size', level, size, 'S3 %s object\'size : %s octets (bucket %s)')
|
156
|
+
end
|
120
157
|
end
|
158
|
+
|
159
|
+
ok("S3 object #{config[:key_name]} exists in bucket #{config[:bucket_name]}")
|
121
160
|
rescue Aws::S3::Errors::NotFound => _
|
122
161
|
critical "S3 object #{config[:key_name]} not found in bucket #{config[:bucket_name]}"
|
123
162
|
rescue => e
|
124
|
-
critical "S3 object #{config[:key_name]} in bucket #{config[:bucket_name]} - #{e.message}"
|
163
|
+
critical "S3 object #{config[:key_name]} in bucket #{config[:bucket_name]} - #{e.message} - #{e.backtrace}"
|
125
164
|
end
|
126
165
|
end
|
127
166
|
end
|