sensu-plugins-aws 3.2.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +45 -1
- data/README.md +33 -0
- data/bin/check-asg-instances-created.rb +127 -0
- data/bin/check-asg-instances-inservice.rb +109 -0
- data/bin/check-cloudfront-tag.rb +70 -0
- data/bin/check-cloudwatch-metric.rb +7 -1
- data/bin/check-ebs-burst-limit.rb +96 -0
- data/bin/check-ec2-cpu_balance.rb +2 -2
- data/bin/check-ec2-filter.rb +38 -5
- data/bin/check-ecs-service-health.rb +37 -6
- data/bin/check-elb-instances-inservice.rb +103 -0
- data/bin/check-instance-events.rb +8 -11
- data/bin/check-instance-health.rb +24 -1
- data/bin/check-instances-count.rb +13 -1
- data/bin/check-rds-events.rb +3 -6
- data/bin/check-rds.rb +92 -52
- data/bin/check-route53-domain-expiration.rb +79 -0
- data/bin/check-s3-object.rb +59 -20
- data/bin/check-s3-tag.rb +70 -0
- data/bin/check-ses-limit.rb +1 -1
- data/bin/check-sqs-messages.rb +16 -18
- data/bin/check-vpc-vpn.rb +42 -47
- data/bin/metrics-asg.rb +156 -0
- data/bin/metrics-autoscaling-instance-count.rb +26 -10
- data/bin/metrics-billing.rb +98 -0
- data/bin/metrics-elasticache.rb +118 -167
- data/bin/metrics-elb-full.rb +1 -1
- data/bin/metrics-elb.rb +68 -59
- data/bin/metrics-rds.rb +135 -0
- data/bin/metrics-s3.rb +105 -0
- data/lib/sensu-plugins-aws.rb +1 -0
- data/lib/sensu-plugins-aws/cloudwatch-common.rb +2 -1
- data/lib/sensu-plugins-aws/version.rb +3 -3
- metadata +39 -3
@@ -63,7 +63,19 @@ class CheckInstanceCount < Sensu::Plugin::Check::CLI
|
|
63
63
|
default: 25
|
64
64
|
|
65
65
|
def instance_count
|
66
|
-
|
66
|
+
client = Aws::AutoScaling::Client.new
|
67
|
+
resp = client.describe_auto_scaling_groups(
|
68
|
+
auto_scaling_group_names: [config[:groupname]]
|
69
|
+
).to_h
|
70
|
+
instances = []
|
71
|
+
resp[:auto_scaling_groups].each do |g|
|
72
|
+
g[:instances].each do |i|
|
73
|
+
if i[:lifecycle_state] == 'InService' && i[:health_status] == 'Healthy'
|
74
|
+
instances << i[:instance_id]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
instances.length
|
67
79
|
rescue => e
|
68
80
|
critical "There was an error reaching AWS - #{e.message}"
|
69
81
|
end
|
data/bin/check-rds-events.rb
CHANGED
@@ -41,7 +41,7 @@
|
|
41
41
|
#
|
42
42
|
|
43
43
|
require 'sensu-plugin/check/cli'
|
44
|
-
require 'aws-sdk
|
44
|
+
require 'aws-sdk'
|
45
45
|
|
46
46
|
class CheckRDSEvents < Sensu::Plugin::Check::CLI
|
47
47
|
option :aws_access_key,
|
@@ -74,10 +74,7 @@ class CheckRDSEvents < Sensu::Plugin::Check::CLI
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def rds_regions
|
77
|
-
|
78
|
-
# Aws.partition('aws').regions.map(&:name)
|
79
|
-
|
80
|
-
AWS::RDS.regions.map(&:name)
|
77
|
+
Aws.partition('aws').regions.map(&:name)
|
81
78
|
end
|
82
79
|
|
83
80
|
def run
|
@@ -102,7 +99,7 @@ class CheckRDSEvents < Sensu::Plugin::Check::CLI
|
|
102
99
|
end
|
103
100
|
|
104
101
|
aws_regions.each do |r|
|
105
|
-
rds =
|
102
|
+
rds = Aws::RDS::Client.new aws_config.merge!(region: r)
|
106
103
|
|
107
104
|
begin
|
108
105
|
if !config[:db_instance_id].nil? && !config[:db_instance_id].empty?
|
data/bin/check-rds.rb
CHANGED
@@ -86,6 +86,11 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
86
86
|
long: '--db-instance-id NAME',
|
87
87
|
description: 'DB instance identifier'
|
88
88
|
|
89
|
+
option :db_cluster_id,
|
90
|
+
short: '-l N',
|
91
|
+
long: '--db-cluster-id NAME',
|
92
|
+
description: 'DB cluster identifier'
|
93
|
+
|
89
94
|
option :end_time,
|
90
95
|
short: '-t T',
|
91
96
|
long: '--end-time TIME',
|
@@ -96,7 +101,7 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
96
101
|
option :period,
|
97
102
|
short: '-p N',
|
98
103
|
long: '--period SECONDS',
|
99
|
-
default:
|
104
|
+
default: 180,
|
100
105
|
proc: proc(&:to_i),
|
101
106
|
description: 'CloudWatch metric statistics period'
|
102
107
|
|
@@ -154,6 +159,12 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
154
159
|
db
|
155
160
|
end
|
156
161
|
|
162
|
+
def find_db_cluster_writer(id)
|
163
|
+
wr = rds.describe_db_clusters(db_cluster_identifier: id).db_clusters[0].db_cluster_members.detect(&:is_cluster_writer).db_instance_identifier
|
164
|
+
unknown 'DB cluster not found.' if cl.nil?
|
165
|
+
wr
|
166
|
+
end
|
167
|
+
|
157
168
|
def cloud_watch_metric(metric_name, unit)
|
158
169
|
cloud_watch.get_metric_statistics(
|
159
170
|
namespace: 'AWS/RDS',
|
@@ -183,11 +194,6 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
183
194
|
end
|
184
195
|
end
|
185
196
|
|
186
|
-
def flag_alert(severity, message)
|
187
|
-
@severities[severity] = true
|
188
|
-
@message += message
|
189
|
-
end
|
190
|
-
|
191
197
|
def memory_total_bytes(instance_class)
|
192
198
|
memory_total_gigabytes = {
|
193
199
|
'db.cr1.8xlarge' => 244.0,
|
@@ -224,84 +230,118 @@ class CheckRDS < Sensu::Plugin::Check::CLI
|
|
224
230
|
|
225
231
|
def check_az(severity, expected_az)
|
226
232
|
return if @db_instance.availability_zone == expected_az
|
227
|
-
|
233
|
+
@severities[severity] = true
|
234
|
+
"; AZ is #{@db_instance.availability_zone} (expected #{expected_az})"
|
228
235
|
end
|
229
236
|
|
230
237
|
def check_cpu(severity, expected_lower_than)
|
231
|
-
|
232
|
-
|
233
|
-
return if
|
234
|
-
|
238
|
+
cpu_metric ||= cloud_watch_metric 'CPUUtilization', 'Percent'
|
239
|
+
cpu_metric_value ||= latest_value cpu_metric
|
240
|
+
return if cpu_metric_value < expected_lower_than
|
241
|
+
@severities[severity] = true
|
242
|
+
"; CPUUtilization is #{sprintf '%.2f', cpu_metric_value}% (expected lower than #{expected_lower_than}%)"
|
235
243
|
end
|
236
244
|
|
237
245
|
def check_memory(severity, expected_lower_than)
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
return if
|
244
|
-
|
246
|
+
memory_metric ||= cloud_watch_metric 'FreeableMemory', 'Bytes'
|
247
|
+
memory_metric_value ||= latest_value memory_metric
|
248
|
+
memory_total_bytes ||= memory_total_bytes @db_instance.db_instance_class
|
249
|
+
memory_usage_bytes ||= memory_total_bytes - memory_metric_value
|
250
|
+
memory_usage_percentage ||= memory_usage_bytes / memory_total_bytes * 100
|
251
|
+
return if memory_usage_percentage < expected_lower_than
|
252
|
+
@severities[severity] = true
|
253
|
+
"; Memory usage is #{sprintf '%.2f', memory_usage_percentage}% (expected lower than #{expected_lower_than}%)"
|
245
254
|
end
|
246
255
|
|
247
256
|
def check_disk(severity, expected_lower_than)
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
return if
|
254
|
-
|
257
|
+
disk_metric ||= cloud_watch_metric 'FreeStorageSpace', 'Bytes'
|
258
|
+
disk_metric_value ||= latest_value disk_metric
|
259
|
+
disk_total_bytes ||= @db_instance.allocated_storage * 1024**3
|
260
|
+
disk_usage_bytes ||= disk_total_bytes - disk_metric_value
|
261
|
+
disk_usage_percentage ||= disk_usage_bytes / disk_total_bytes * 100
|
262
|
+
return if disk_usage_percentage < expected_lower_than
|
263
|
+
@severities[severity] = true
|
264
|
+
"; Disk usage is #{sprintf '%.2f', disk_usage_percentage}% (expected lower than #{expected_lower_than}%)"
|
255
265
|
end
|
256
266
|
|
257
267
|
def check_connections(severity, expected_lower_than)
|
258
|
-
|
259
|
-
|
260
|
-
return if
|
261
|
-
|
268
|
+
connections_metric ||= cloud_watch_metric 'DatabaseConnections', 'Count'
|
269
|
+
connections_metric_value ||= latest_value connections_metric
|
270
|
+
return if connections_metric_value < expected_lower_than
|
271
|
+
@severities[severity] = true
|
272
|
+
"; DatabaseConnections are #{sprintf '%d', connections_metric_value} (expected lower than #{expected_lower_than})"
|
262
273
|
end
|
263
274
|
|
264
275
|
def check_iops(severity, expected_lower_than)
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
return if
|
271
|
-
|
276
|
+
read_iops_metric ||= cloud_watch_metric 'ReadIOPS', 'Count/Second'
|
277
|
+
read_iops_metric_value ||= latest_value read_iops_metric
|
278
|
+
write_iops_metric ||= cloud_watch_metric 'WriteIOPS', 'Count/Second'
|
279
|
+
write_iops_metric_value ||= latest_value write_iops_metric
|
280
|
+
iops_metric_value ||= read_iops_metric_value + write_iops_metric_value
|
281
|
+
return if iops_metric_value < expected_lower_than
|
282
|
+
@severities[severity] = true
|
283
|
+
"; IOPS are #{sprintf '%d', iops_metric_value} (expected lower than #{expected_lower_than})"
|
272
284
|
end
|
273
285
|
|
274
286
|
def run
|
287
|
+
instances = []
|
288
|
+
if config[:db_cluster_id]
|
289
|
+
db_cluster_writer_id = find_db_cluster_writer(db_cluster_id)
|
290
|
+
instances << find_db_instance(db_cluster_writer_id)
|
291
|
+
end
|
292
|
+
|
275
293
|
if config[:db_instance_id].nil? || config[:db_instance_id].empty?
|
276
|
-
|
294
|
+
rds.describe_db_instances[:db_instances].map { |db| instances << db }
|
295
|
+
else
|
296
|
+
instances << find_db_instance(config[:db_instance_id])
|
297
|
+
end
|
298
|
+
|
299
|
+
messages = ''
|
300
|
+
severities = {
|
301
|
+
critical: false,
|
302
|
+
warning: false
|
303
|
+
}
|
304
|
+
instances.each do |instance|
|
305
|
+
@db_instance = instance
|
306
|
+
result = collect(instance)
|
307
|
+
if result[1][:critical]
|
308
|
+
messages += result[0]
|
309
|
+
severities[:critical] = true
|
310
|
+
elsif result[1][:warning]
|
311
|
+
severities[:warning] = true
|
312
|
+
messages += result[0]
|
313
|
+
end
|
277
314
|
end
|
278
315
|
|
279
|
-
|
280
|
-
|
281
|
-
|
316
|
+
if severities[:critical]
|
317
|
+
critical messages
|
318
|
+
elsif severities[:warning]
|
319
|
+
warning messages
|
320
|
+
else
|
321
|
+
ok messages
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
def collect(instance)
|
326
|
+
message = "\n#{instance[:db_instance_identifier]}: "
|
327
|
+
@severities = {
|
282
328
|
critical: false,
|
283
329
|
warning: false
|
284
330
|
}
|
285
331
|
|
286
332
|
@severities.keys.each do |severity|
|
287
|
-
check_az severity, config[:"availability_zone_#{severity}"] if config[:"availability_zone_#{severity}"]
|
333
|
+
message += check_az severity, config[:"availability_zone_#{severity}"], instance if config[:"availability_zone_#{severity}"]
|
288
334
|
|
289
335
|
%w(cpu memory disk connections iops).each do |item|
|
290
|
-
send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"]
|
336
|
+
result = send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"]
|
337
|
+
message += result unless result.nil?
|
291
338
|
end
|
292
339
|
end
|
293
340
|
|
294
341
|
if %w(cpu memory disk connections iops).any? { |item| %w(warning critical).any? { |severity| config[:"#{item}_#{severity}_over"] } }
|
295
|
-
|
296
|
-
|
297
|
-
end
|
298
|
-
|
299
|
-
if @severities[:critical]
|
300
|
-
critical @message
|
301
|
-
elsif @severities[:warning]
|
302
|
-
warning @message
|
303
|
-
else
|
304
|
-
ok @message
|
342
|
+
message += "(#{config[:statistics].to_s.capitalize} within #{config[:period]}s "
|
343
|
+
message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})"
|
305
344
|
end
|
345
|
+
[message, @severities]
|
306
346
|
end
|
307
347
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# check-route53-domain-expiration
|
4
|
+
#
|
5
|
+
# DESCRIPTION:
|
6
|
+
# Alert when Route53 registered domains are close to expiration
|
7
|
+
#
|
8
|
+
# OUTPUT:
|
9
|
+
# plain-text
|
10
|
+
#
|
11
|
+
# DEPENDENCIES:
|
12
|
+
# gem: aws-sdk
|
13
|
+
# gem: sensu-plugin
|
14
|
+
#
|
15
|
+
# USAGE:
|
16
|
+
# check-route53-domain-expiration.rb
|
17
|
+
#
|
18
|
+
# LICENSE:
|
19
|
+
# Eric Heydrick <eheydrick@gmail.com>
|
20
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
21
|
+
# for details.
|
22
|
+
|
23
|
+
require 'sensu-plugins-aws'
|
24
|
+
require 'sensu-plugin/check/cli'
|
25
|
+
require 'aws-sdk'
|
26
|
+
|
27
|
+
class CheckRoute53DomainExpiration < Sensu::Plugin::Check::CLI
|
28
|
+
include Common
|
29
|
+
|
30
|
+
option :aws_region,
|
31
|
+
short: '-r AWS_REGION',
|
32
|
+
long: '--aws-region REGION',
|
33
|
+
description: 'AWS Region (defaults to us-east-1).',
|
34
|
+
default: 'us-east-1'
|
35
|
+
|
36
|
+
option :warn,
|
37
|
+
short: '-w WARN',
|
38
|
+
long: '--warning WARN',
|
39
|
+
description: 'Warn if domain expires in less than this many days (default: 30)',
|
40
|
+
default: 30,
|
41
|
+
proc: proc(&:to_i)
|
42
|
+
|
43
|
+
option :crit,
|
44
|
+
short: '-c CRITICAL',
|
45
|
+
long: '--critical CRITICAL',
|
46
|
+
description: 'Critical if domain expires in less than this many days (default: 7)',
|
47
|
+
default: 7,
|
48
|
+
proc: proc(&:to_i)
|
49
|
+
|
50
|
+
def run
|
51
|
+
warn_domains = {}
|
52
|
+
crit_domains = {}
|
53
|
+
|
54
|
+
r53 = Aws::Route53Domains::Client.new(aws_config)
|
55
|
+
begin
|
56
|
+
domains = r53.list_domains.domains
|
57
|
+
domains.each do |domain|
|
58
|
+
expiration = DateTime.parse(domain.expiry.to_s)
|
59
|
+
days_until_expiration = (expiration - DateTime.now).to_i
|
60
|
+
if days_until_expiration <= config[:crit]
|
61
|
+
crit_domains[domain] = days_until_expiration
|
62
|
+
elsif days_until_expiration <= config[:warn]
|
63
|
+
warn_domains[domain] = days_until_expiration
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
if !crit_domains.empty?
|
68
|
+
critical "Domains are expiring in less than #{config[:crit]} days: " + crit_domains.map { |d, v| "#{d.domain_name} (in #{v} days)" }.join(', ')
|
69
|
+
elsif !warn_domains.empty?
|
70
|
+
warning "Domains are expiring in less than #{config[:warn]} days: " + warn_domains.map { |d, v| "#{d.domain_name} (in #{v} days)" }.join(', ')
|
71
|
+
else
|
72
|
+
ok 'No domains are expiring soon'
|
73
|
+
end
|
74
|
+
|
75
|
+
rescue => e
|
76
|
+
unknown "An error occurred communicating with the Route53 API: #{e.message}"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/bin/check-s3-object.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
#
|
3
|
-
# check-s3-
|
3
|
+
# check-s3-object
|
4
4
|
#
|
5
5
|
# DESCRIPTION:
|
6
6
|
# This plugin checks if a file exists in a bucket and/or is not too old.
|
@@ -31,7 +31,7 @@
|
|
31
31
|
require 'sensu-plugin/check/cli'
|
32
32
|
require 'aws-sdk'
|
33
33
|
|
34
|
-
class
|
34
|
+
class CheckS3Object < Sensu::Plugin::Check::CLI
|
35
35
|
option :aws_access_key,
|
36
36
|
short: '-a AWS_ACCESS_KEY',
|
37
37
|
long: '--aws-access-key AWS_ACCESS_KEY',
|
@@ -67,13 +67,6 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
|
|
67
67
|
description: 'The name of key in the bucket',
|
68
68
|
required: true
|
69
69
|
|
70
|
-
option :ok_zero_size,
|
71
|
-
description: 'OK if file has zero size',
|
72
|
-
short: '-z',
|
73
|
-
long: '--ok-zero-size',
|
74
|
-
boolean: true,
|
75
|
-
default: false
|
76
|
-
|
77
70
|
option :warning_age,
|
78
71
|
description: 'Warn if mtime greater than provided age in seconds',
|
79
72
|
short: '-w SECONDS',
|
@@ -84,17 +77,58 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
|
|
84
77
|
short: '-c SECONDS',
|
85
78
|
long: '--critical SECONDS'
|
86
79
|
|
80
|
+
option :ok_zero_size,
|
81
|
+
description: 'OK if file has zero size',
|
82
|
+
short: '-z',
|
83
|
+
long: '--ok-zero-size',
|
84
|
+
boolean: true,
|
85
|
+
default: false
|
86
|
+
|
87
|
+
option :warning_size,
|
88
|
+
description: 'Warning threshold for size',
|
89
|
+
long: '--warning-size COUNT'
|
90
|
+
|
91
|
+
option :critical_size,
|
92
|
+
description: 'Critical threshold for size',
|
93
|
+
long: '--critical-size COUNT'
|
94
|
+
|
95
|
+
option :compare_size,
|
96
|
+
description: 'Comparision operator for threshold: equal, not, greater, less',
|
97
|
+
short: '-o OPERATION',
|
98
|
+
long: '--operator-size OPERATION',
|
99
|
+
default: 'equal'
|
100
|
+
|
87
101
|
def aws_config
|
88
102
|
{ access_key_id: config[:aws_access_key],
|
89
103
|
secret_access_key: config[:aws_secret_access_key],
|
90
104
|
region: config[:aws_region] }
|
91
105
|
end
|
92
106
|
|
93
|
-
def
|
94
|
-
|
95
|
-
|
96
|
-
|
107
|
+
def operator
|
108
|
+
op = lambda do |type, a, b|
|
109
|
+
case type
|
110
|
+
when 'age'
|
111
|
+
a > b
|
112
|
+
when 'size'
|
113
|
+
if config[:compare_size] == 'greater'
|
114
|
+
a > b
|
115
|
+
elsif config[:compare_size] == 'less'
|
116
|
+
a < b
|
117
|
+
elsif config[:compare_size] == 'not'
|
118
|
+
a != b
|
119
|
+
end
|
120
|
+
else
|
121
|
+
a == b
|
122
|
+
end
|
97
123
|
end
|
124
|
+
op
|
125
|
+
end
|
126
|
+
|
127
|
+
def run_check(type, level, value, msg)
|
128
|
+
key = "#{level}_#{type}".to_sym
|
129
|
+
return if config[key].nil?
|
130
|
+
to_check = config[key].to_i
|
131
|
+
send(level, msg % [config[:key_name], value, config[:bucket_name]]) if operator.call type, value, to_check
|
98
132
|
end
|
99
133
|
|
100
134
|
def run
|
@@ -107,21 +141,26 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
|
|
107
141
|
s3 = Aws::S3::Client.new(aws_config.merge!(region: config[:aws_region]))
|
108
142
|
begin
|
109
143
|
output = s3.head_object(bucket: config[:bucket_name], key: config[:key_name])
|
144
|
+
age = Time.now.to_i - output[:last_modified].to_i
|
145
|
+
size = output[:content_length]
|
110
146
|
|
111
|
-
|
112
|
-
|
147
|
+
[:critical, :warning].each do |level|
|
148
|
+
run_check('age', level, age, 'S3 object %s is %s seconds old (bucket %s)')
|
113
149
|
end
|
114
150
|
|
115
|
-
if
|
116
|
-
|
117
|
-
run_check(:critical, age) || run_check(:warning, age) || ok("S3 object #{config[:key_name]} is #{age} seconds old (bucket #{config[:bucket_name]})")
|
151
|
+
if size == 0
|
152
|
+
critical "S3 object #{config[:key_name]} is empty (bucket #{config[:bucket_name]})" unless config[:ok_zero_size]
|
118
153
|
else
|
119
|
-
|
154
|
+
[:critical, :warning].each do |level|
|
155
|
+
run_check('size', level, size, 'S3 %s object\'size : %s octets (bucket %s)')
|
156
|
+
end
|
120
157
|
end
|
158
|
+
|
159
|
+
ok("S3 object #{config[:key_name]} exists in bucket #{config[:bucket_name]}")
|
121
160
|
rescue Aws::S3::Errors::NotFound => _
|
122
161
|
critical "S3 object #{config[:key_name]} not found in bucket #{config[:bucket_name]}"
|
123
162
|
rescue => e
|
124
|
-
critical "S3 object #{config[:key_name]} in bucket #{config[:bucket_name]} - #{e.message}"
|
163
|
+
critical "S3 object #{config[:key_name]} in bucket #{config[:bucket_name]} - #{e.message} - #{e.backtrace}"
|
125
164
|
end
|
126
165
|
end
|
127
166
|
end
|