sensu-plugins-aws-boutetnico 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +22 -0
- data/README.md +333 -0
- data/bin/check-alb-target-group-health.rb +100 -0
- data/bin/check-asg-instances-created.rb +129 -0
- data/bin/check-asg-instances-inservice.rb +109 -0
- data/bin/check-autoscaling-cpucredits.rb +160 -0
- data/bin/check-beanstalk-elb-metric.rb +123 -0
- data/bin/check-beanstalk-health.rb +123 -0
- data/bin/check-certificate-expiry.rb +123 -0
- data/bin/check-cloudfront-tag.rb +70 -0
- data/bin/check-cloudwatch-alarm.rb +102 -0
- data/bin/check-cloudwatch-alarms.rb +89 -0
- data/bin/check-cloudwatch-composite-metric.rb +199 -0
- data/bin/check-cloudwatch-metric.rb +123 -0
- data/bin/check-configservice-rules.rb +76 -0
- data/bin/check-direct-connect-virtual-interfaces.rb +84 -0
- data/bin/check-dynamodb-capacity.rb +194 -0
- data/bin/check-dynamodb-throttle.rb +188 -0
- data/bin/check-ebs-burst-limit.rb +143 -0
- data/bin/check-ebs-snapshots.rb +104 -0
- data/bin/check-ec2-cpu_balance.rb +139 -0
- data/bin/check-ec2-filter.rb +190 -0
- data/bin/check-ec2-network.rb +133 -0
- data/bin/check-ecs-service-health.rb +155 -0
- data/bin/check-efs-metric.rb +145 -0
- data/bin/check-eip-allocation.rb +64 -0
- data/bin/check-elasticache-failover.rb +113 -0
- data/bin/check-elb-certs.rb +132 -0
- data/bin/check-elb-health-fog.rb +114 -0
- data/bin/check-elb-health-sdk.rb +176 -0
- data/bin/check-elb-health.rb +116 -0
- data/bin/check-elb-instances-inservice.rb +103 -0
- data/bin/check-elb-latency.rb +166 -0
- data/bin/check-elb-nodes.rb +133 -0
- data/bin/check-elb-sum-requests.rb +157 -0
- data/bin/check-emr-cluster.rb +144 -0
- data/bin/check-emr-steps.rb +90 -0
- data/bin/check-eni-status.rb +110 -0
- data/bin/check-expiring-reservations.rb +117 -0
- data/bin/check-instance-events.rb +154 -0
- data/bin/check-instance-health.rb +108 -0
- data/bin/check-instance-reachability.rb +107 -0
- data/bin/check-instances-count.rb +94 -0
- data/bin/check-kms-key.rb +73 -0
- data/bin/check-rds-events.rb +141 -0
- data/bin/check-rds-pending.rb +91 -0
- data/bin/check-rds.rb +382 -0
- data/bin/check-redshift-events.rb +108 -0
- data/bin/check-reserved-instances.rb +80 -0
- data/bin/check-route.rb +122 -0
- data/bin/check-route53-domain-expiration.rb +78 -0
- data/bin/check-s3-bucket-visibility.rb +176 -0
- data/bin/check-s3-bucket.rb +86 -0
- data/bin/check-s3-object.rb +205 -0
- data/bin/check-s3-tag.rb +70 -0
- data/bin/check-sensu-client.rb +184 -0
- data/bin/check-ses-limit.rb +89 -0
- data/bin/check-ses-statistics.rb +149 -0
- data/bin/check-sns-subscriptions.rb +52 -0
- data/bin/check-sqs-messages.rb +168 -0
- data/bin/check-subnet-ip-consumption.rb +234 -0
- data/bin/check-trustedadvisor-service-limits.rb +90 -0
- data/bin/check-vpc-nameservers.rb +87 -0
- data/bin/check-vpc-vpn.rb +98 -0
- data/bin/handler-ec2_node.rb +241 -0
- data/bin/handler-scale-asg-down.rb +131 -0
- data/bin/handler-scale-asg-up.rb +131 -0
- data/bin/handler-ses.rb +107 -0
- data/bin/handler-sns.rb +64 -0
- data/bin/metrics-asg.rb +156 -0
- data/bin/metrics-autoscaling-instance-count.rb +101 -0
- data/bin/metrics-billing.rb +97 -0
- data/bin/metrics-cloudfront.rb +159 -0
- data/bin/metrics-ec2-count.rb +137 -0
- data/bin/metrics-ec2-filter.rb +97 -0
- data/bin/metrics-elasticache.rb +166 -0
- data/bin/metrics-elb.rb +169 -0
- data/bin/metrics-emr-steps.rb +82 -0
- data/bin/metrics-rds.rb +153 -0
- data/bin/metrics-reservation-utilization.rb +84 -0
- data/bin/metrics-s3.rb +107 -0
- data/bin/metrics-ses.rb +62 -0
- data/bin/metrics-sqs.rb +98 -0
- data/bin/metrics-waf.rb +111 -0
- data/lib/sensu-plugins-aws.rb +4 -0
- data/lib/sensu-plugins-aws/cloudwatch-common.rb +92 -0
- data/lib/sensu-plugins-aws/common.rb +35 -0
- data/lib/sensu-plugins-aws/filter.rb +47 -0
- data/lib/sensu-plugins-aws/version.rb +8 -0
- metadata +456 -0
@@ -0,0 +1,73 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# check-kms-key
|
4
|
+
#
|
5
|
+
# DESCRIPTION:
|
6
|
+
# Check KMS values by KMS API.
|
7
|
+
#
|
8
|
+
# OUTPUT:
|
9
|
+
# plain-text
|
10
|
+
#
|
11
|
+
# PLATFORMS:
|
12
|
+
# Linux
|
13
|
+
#
|
14
|
+
# DEPENDENCIES:
|
15
|
+
# gem: aws-sdk
|
16
|
+
# gem: sensu-plugin
|
17
|
+
#
|
18
|
+
# USAGE:
|
19
|
+
# check-kms-key -k key_id
|
20
|
+
#
|
21
|
+
# Critical if KMS key id doesn't exist
|
22
|
+
# Warning if KMS key id exists but is not enabled
|
23
|
+
# Ok if KMS key id exists and is enabled
|
24
|
+
# Unknown if no key_id is provided
|
25
|
+
#
|
26
|
+
# NOTES:
|
27
|
+
#
|
28
|
+
# LICENSE:
|
29
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
30
|
+
# for details.
|
31
|
+
#
|
32
|
+
|
33
|
+
require 'sensu-plugin/check/cli'
|
34
|
+
require 'sensu-plugins-aws'
|
35
|
+
require 'aws-sdk'
|
36
|
+
|
37
|
+
class CheckKMSKey < Sensu::Plugin::Check::CLI
|
38
|
+
include Common
|
39
|
+
|
40
|
+
option :aws_region,
|
41
|
+
short: '-r AWS_REGION',
|
42
|
+
long: '--aws-region REGION',
|
43
|
+
description: 'AWS Region.',
|
44
|
+
default: 'us-east-1'
|
45
|
+
|
46
|
+
option :key_id,
|
47
|
+
short: '-k ID',
|
48
|
+
long: '--key-id ID',
|
49
|
+
description: 'KMS key identifier',
|
50
|
+
default: nil
|
51
|
+
|
52
|
+
def kms_client
|
53
|
+
@kms_client ||= Aws::KMS::Client.new
|
54
|
+
end
|
55
|
+
|
56
|
+
def check_key(id)
|
57
|
+
return kms_client.describe_key(key_id: id)['key_metadata']['enabled']
|
58
|
+
rescue Aws::KMS::Errors::NotFoundException
|
59
|
+
critical 'Key doesnt exist'
|
60
|
+
rescue StandardError => e
|
61
|
+
unknown "Failed to check key #{id}: #{e}"
|
62
|
+
end
|
63
|
+
|
64
|
+
def run
|
65
|
+
if config[:key_id].nil?
|
66
|
+
unknown 'No KMS key id provided. See help for usage details'
|
67
|
+
elsif check_key(config[:key_id])
|
68
|
+
ok 'Key exists and is enabled'
|
69
|
+
else
|
70
|
+
warning 'Key exists but is not enabled'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# check-rds-events
|
4
|
+
#
|
5
|
+
#
|
6
|
+
# DESCRIPTION:
|
7
|
+
# This plugin checks rds clusters for critical events.
|
8
|
+
# Due to the number of events types on RDS clusters, the check
|
9
|
+
# should filter out non-disruptive events that are part of
|
10
|
+
# basic operations.
|
11
|
+
#
|
12
|
+
# More info on RDS events:
|
13
|
+
# http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_Events.html
|
14
|
+
#
|
15
|
+
# OUTPUT:
|
16
|
+
# plain-text
|
17
|
+
#
|
18
|
+
# PLATFORMS:
|
19
|
+
# Linux
|
20
|
+
#
|
21
|
+
# DEPENDENCIES:
|
22
|
+
# gem: aws-sdk-v1
|
23
|
+
# gem: sensu-plugin
|
24
|
+
#
|
25
|
+
# USAGE:
|
26
|
+
# Check's a specific RDS instance in a specific region for critical events
|
27
|
+
# check-rds-events.rb -r ${your_region} -k ${your_aws_secret_access_key} -a ${your_aws_access_key} -i ${your_rds_instance_id_name}
|
28
|
+
#
|
29
|
+
# Checks all RDS instances in a specific region
|
30
|
+
# check-rds-events.rb -r ${your_region} -k ${your_aws_secret_access_key} -a ${your_aws_access_key}
|
31
|
+
#
|
32
|
+
# Checks all RDS instances in a specific region, should be using IAM role
|
33
|
+
# check-rds-events.rb -r ${your_region}
|
34
|
+
#
|
35
|
+
# NOTES:
|
36
|
+
#
|
37
|
+
# LICENSE:
|
38
|
+
# Tim Smith <tsmith@chef.io>
|
39
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
40
|
+
# for details.
|
41
|
+
#
|
42
|
+
|
43
|
+
require 'sensu-plugin/check/cli'
|
44
|
+
require 'aws-sdk'
|
45
|
+
|
46
|
+
class CheckRDSEvents < Sensu::Plugin::Check::CLI
|
47
|
+
option :aws_access_key,
|
48
|
+
short: '-a AWS_ACCESS_KEY',
|
49
|
+
long: '--aws-access-key AWS_ACCESS_KEY',
|
50
|
+
description: "AWS Access Key. Either set ENV['AWS_ACCESS_KEY'] or provide it as an option",
|
51
|
+
default: ENV['AWS_ACCESS_KEY']
|
52
|
+
|
53
|
+
option :aws_secret_access_key,
|
54
|
+
short: '-k AWS_SECRET_KEY',
|
55
|
+
long: '--aws-secret-access-key AWS_SECRET_KEY',
|
56
|
+
description: "AWS Secret Access Key. Either set ENV['AWS_SECRET_KEY'] or provide it as an option",
|
57
|
+
default: ENV['AWS_SECRET_KEY']
|
58
|
+
|
59
|
+
option :aws_region,
|
60
|
+
short: '-r AWS_REGION',
|
61
|
+
long: '--aws-region REGION',
|
62
|
+
description: 'AWS Region (defaults to us-east-1).',
|
63
|
+
default: 'us-east-1'
|
64
|
+
|
65
|
+
option :db_instance_id,
|
66
|
+
short: '-i N',
|
67
|
+
long: '--db-instance-id NAME',
|
68
|
+
description: 'DB instance identifier'
|
69
|
+
|
70
|
+
def aws_config
|
71
|
+
{ access_key_id: config[:aws_access_key],
|
72
|
+
secret_access_key: config[:aws_secret_access_key],
|
73
|
+
region: config[:aws_region] }
|
74
|
+
end
|
75
|
+
|
76
|
+
def rds_regions
|
77
|
+
Aws.partition('aws').regions.map(&:name)
|
78
|
+
end
|
79
|
+
|
80
|
+
def run
|
81
|
+
clusters = maint_clusters
|
82
|
+
if clusters.empty?
|
83
|
+
ok
|
84
|
+
else
|
85
|
+
critical("Clusters w/ critical events: #{clusters.join(', ')}")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def maint_clusters
|
90
|
+
maint_clusters = []
|
91
|
+
aws_regions = rds_regions
|
92
|
+
|
93
|
+
unless config[:aws_region].casecmp('all').zero?
|
94
|
+
if aws_regions.include? config[:aws_region]
|
95
|
+
aws_regions.clear.push(config[:aws_region])
|
96
|
+
else
|
97
|
+
critical 'Invalid region specified!'
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
aws_regions.each do |r|
|
102
|
+
rds = Aws::RDS::Client.new aws_config.merge!(region: r)
|
103
|
+
|
104
|
+
begin
|
105
|
+
if !config[:db_instance_id].nil? && !config[:db_instance_id].empty?
|
106
|
+
db_instance = rds.describe_db_instances(db_instance_identifier: config[:db_instance_id])
|
107
|
+
if db_instance.nil? || db_instance.empty?
|
108
|
+
unknown "#{config[:db_instance_id]} instance not found"
|
109
|
+
else
|
110
|
+
clusters = [config[:db_instance_id]]
|
111
|
+
end
|
112
|
+
else
|
113
|
+
# fetch all clusters identifiers
|
114
|
+
clusters = rds.describe_db_instances[:db_instances].map { |db| db[:db_instance_identifier] }
|
115
|
+
end
|
116
|
+
|
117
|
+
# fetch the last 15 minutes of events for each cluster
|
118
|
+
# that way, we're only spammed with persistent notifications that we'd care about.
|
119
|
+
clusters.each do |cluster_name|
|
120
|
+
events_record = rds.describe_events(start_time: (Time.now.utc - 900).iso8601, source_type: 'db-instance', source_identifier: cluster_name)
|
121
|
+
next if events_record[:events].empty?
|
122
|
+
|
123
|
+
# we will need to filter out non-disruptive/basic operation events.
|
124
|
+
# ie. the regular backup operations
|
125
|
+
next if events_record[:events][-1][:message] =~ /Backing up DB instance|Finished DB Instance backup|Restored from snapshot/
|
126
|
+
# ie. Replication resumed
|
127
|
+
next if events_record[:events][-1][:message] =~ /Replication for the Read Replica resumed/
|
128
|
+
# you can add more filters to skip more events.
|
129
|
+
|
130
|
+
# draft the messages
|
131
|
+
cluster_name_long = "#{cluster_name} (#{r}) #{events_record[:events][-1][:message]}"
|
132
|
+
maint_clusters.push(cluster_name_long)
|
133
|
+
end
|
134
|
+
rescue StandardError => e
|
135
|
+
unknown "An error occurred processing AWS RDS API (#{r}): #{e.message}"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
maint_clusters
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# check-rds-pending
|
4
|
+
#
|
5
|
+
#
|
6
|
+
# DESCRIPTION:
|
7
|
+
# This plugin checks rds clusters for pending maintenance action.
|
8
|
+
#
|
9
|
+
# OUTPUT:
|
10
|
+
# plain-text
|
11
|
+
#
|
12
|
+
# PLATFORMS:
|
13
|
+
# Linux
|
14
|
+
#
|
15
|
+
# DEPENDENCIES:
|
16
|
+
# gem: aws-sdk
|
17
|
+
# gem: sensu-plugin
|
18
|
+
#
|
19
|
+
# USAGE:
|
20
|
+
# ./check-rds-pending.rb -r ${you_region}
|
21
|
+
#
|
22
|
+
# NOTES:
|
23
|
+
#
|
24
|
+
# LICENSE:
|
25
|
+
# Tim Smith <tim@cozy.co>
|
26
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
27
|
+
# for details.
|
28
|
+
#
|
29
|
+
|
30
|
+
require 'sensu-plugin/check/cli'
|
31
|
+
require 'sensu-plugins-aws'
|
32
|
+
require 'aws-sdk'
|
33
|
+
|
34
|
+
class CheckRDSPending < Sensu::Plugin::Check::CLI
|
35
|
+
include Common
|
36
|
+
|
37
|
+
option :aws_region,
|
38
|
+
short: '-r AWS_REGION',
|
39
|
+
long: '--aws-region REGION',
|
40
|
+
description: 'AWS Region (such as eu-west-1).',
|
41
|
+
default: 'us-east-1'
|
42
|
+
|
43
|
+
option :db_instance_identifier,
|
44
|
+
short: '-d DB_INSTANCE_IDENTIFIER',
|
45
|
+
long: '--db-instance-identifier DB_INSTANCE_IDENTIFIER',
|
46
|
+
description: 'The DB Identifier of the instance to check',
|
47
|
+
default: nil
|
48
|
+
|
49
|
+
def run
|
50
|
+
begin
|
51
|
+
# fetch all clusters identifiers
|
52
|
+
maint_clusters = []
|
53
|
+
|
54
|
+
if clusters.any?
|
55
|
+
# Check if there is any pending maintenance required
|
56
|
+
pending_record = rds.describe_pending_maintenance_actions(filters: [{ name: 'db-instance-id', values: clusters }])
|
57
|
+
pending_record[:pending_maintenance_actions].each do |response|
|
58
|
+
maint_clusters.push(response[:pending_maintenance_action_details])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
rescue StandardError => e
|
62
|
+
unknown "An error occurred processing AWS RDS API: #{e.message}"
|
63
|
+
end
|
64
|
+
|
65
|
+
if maint_clusters.empty?
|
66
|
+
ok
|
67
|
+
else
|
68
|
+
critical("Clusters w/ pending maintenance required: #{maint_clusters.join(',')}")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def rds
|
75
|
+
@rds ||= Aws::RDS::Client.new
|
76
|
+
end
|
77
|
+
|
78
|
+
def clusters
|
79
|
+
@clusters ||= begin
|
80
|
+
params = if config[:db_instance_identifier]
|
81
|
+
{ db_instance_identifier: config[:db_instance_identifier] }
|
82
|
+
else
|
83
|
+
{}
|
84
|
+
end
|
85
|
+
|
86
|
+
rds.describe_db_instances(params)[:db_instances].map do |db|
|
87
|
+
db[:db_instance_identifier]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/bin/check-rds.rb
ADDED
@@ -0,0 +1,382 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# check-rds
|
4
|
+
#
|
5
|
+
# DESCRIPTION:
|
6
|
+
# Check RDS instance statuses by RDS and CloudWatch API.
|
7
|
+
#
|
8
|
+
# OUTPUT:
|
9
|
+
# plain-text
|
10
|
+
#
|
11
|
+
# PLATFORMS:
|
12
|
+
# Linux
|
13
|
+
#
|
14
|
+
# DEPENDENCIES:
|
15
|
+
# gem: aws-sdk
|
16
|
+
# gem: sensu-plugin
|
17
|
+
#
|
18
|
+
# USAGE:
|
19
|
+
# Critical if DB instance "sensu-admin-db" is not on ap-northeast-1a
|
20
|
+
# check-rds -i sensu-admin-db --availability-zone-critical ap-northeast-1a
|
21
|
+
#
|
22
|
+
# Warning if CPUUtilization is over 80%, critical if over 90%
|
23
|
+
# check-rds -i sensu-admin-db --cpu-warning-over 80 --cpu-critical-over 90
|
24
|
+
#
|
25
|
+
# Critical if CPUUtilization is over 90%, maximum of last one hour
|
26
|
+
# check-rds -i sensu-admin-db --cpu-critical-over 90 --statistics maximum --period 3600
|
27
|
+
#
|
28
|
+
# Warning if DatabaseConnections are over 100, critical over 120
|
29
|
+
# check-rds -i sensu-admin-db --connections-critical-over 120 --connections-warning-over 100 --statistics maximum --period 3600
|
30
|
+
#
|
31
|
+
# Warning if IOPS are over 100, critical over 200
|
32
|
+
# check-rds -i sensu-admin-db --iops-critical-over 200 --iops-warning-over 100 --period 300
|
33
|
+
#
|
34
|
+
# Warning if memory usage is over 80%, maximum of last 2 hour
|
35
|
+
# specifying "minimum" is intended actually since memory usage is calculated from CloudWatch "FreeableMemory" metric.
|
36
|
+
# check-rds -i sensu-admin-db --memory-warning-over 80 --statistics minimum --period 7200
|
37
|
+
#
|
38
|
+
# Disk usage, same as memory
|
39
|
+
# check-rds -i sensu-admin-db --disk-warning-over 80 --period 7200
|
40
|
+
#
|
41
|
+
# You can check multiple metrics simultaneously. Highest severity will be reported
|
42
|
+
# check-rds -i sensu-admin-db --cpu-warning-over 80 --cpu-critical-over 90 --memory-warning-over 60 --memory-critical-over 80
|
43
|
+
#
|
44
|
+
# You can ignore accept nil values returned for a time periods from Cloudwatch as being an OK. Amazon falls behind in their
|
45
|
+
# metrics from time to time and this prevents false positives
|
46
|
+
# check-rds -i sensu-admin-db --cpu-critical-over 90 -n
|
47
|
+
#
|
48
|
+
# NOTES:
|
49
|
+
#
|
50
|
+
# LICENSE:
|
51
|
+
# Copyright 2014 github.com/y13i
|
52
|
+
# Released under the same terms as Sensu (the MIT license); see LICENSE
|
53
|
+
# for details.
|
54
|
+
#
|
55
|
+
|
56
|
+
require 'sensu-plugin/check/cli'
|
57
|
+
require 'aws-sdk'
|
58
|
+
require 'time'
|
59
|
+
|
60
|
+
class CheckRDS < Sensu::Plugin::Check::CLI
|
61
|
+
option :aws_access_key,
|
62
|
+
short: '-a AWS_ACCESS_KEY',
|
63
|
+
long: '--aws-access-key AWS_ACCESS_KEY',
|
64
|
+
description: "AWS Access Key. Either set ENV['AWS_ACCESS_KEY'] or provide it as an option",
|
65
|
+
default: ENV['AWS_ACCESS_KEY']
|
66
|
+
|
67
|
+
option :aws_secret_access_key,
|
68
|
+
short: '-k AWS_SECRET_KEY',
|
69
|
+
long: '--aws-secret-access-key AWS_SECRET_KEY',
|
70
|
+
description: "AWS Secret Access Key. Either set ENV['AWS_SECRET_KEY'] or provide it as an option",
|
71
|
+
default: ENV['AWS_SECRET_KEY']
|
72
|
+
|
73
|
+
option :role_arn,
|
74
|
+
long: '--role-arn ROLE_ARN',
|
75
|
+
description: 'AWS role arn of the role of the third party account to switch to',
|
76
|
+
default: false
|
77
|
+
|
78
|
+
option :aws_region,
|
79
|
+
short: '-r AWS_REGION',
|
80
|
+
long: '--aws-region REGION',
|
81
|
+
description: 'AWS Region (defaults to us-east-1).',
|
82
|
+
default: 'us-east-1'
|
83
|
+
|
84
|
+
option :db_instance_id,
|
85
|
+
short: '-i N',
|
86
|
+
long: '--db-instance-id NAME',
|
87
|
+
description: 'DB instance identifier'
|
88
|
+
|
89
|
+
option :db_cluster_id,
|
90
|
+
short: '-l N',
|
91
|
+
long: '--db-cluster-id NAME',
|
92
|
+
description: 'DB cluster identifier'
|
93
|
+
|
94
|
+
option :end_time,
|
95
|
+
short: '-t T',
|
96
|
+
long: '--end-time TIME',
|
97
|
+
default: Time.now,
|
98
|
+
proc: proc { |a| Time.parse a },
|
99
|
+
description: 'CloudWatch metric statistics end time'
|
100
|
+
|
101
|
+
option :period,
|
102
|
+
short: '-p N',
|
103
|
+
long: '--period SECONDS',
|
104
|
+
default: 180,
|
105
|
+
proc: proc(&:to_i),
|
106
|
+
description: 'CloudWatch metric statistics period'
|
107
|
+
|
108
|
+
option :statistics,
|
109
|
+
short: '-S N',
|
110
|
+
long: '--statistics NAME',
|
111
|
+
default: :average,
|
112
|
+
proc: proc { |a| a.downcase.intern },
|
113
|
+
description: 'CloudWatch statistics method'
|
114
|
+
|
115
|
+
option :accept_nil,
|
116
|
+
short: '-n',
|
117
|
+
long: '--accept_nil',
|
118
|
+
description: 'Continue if CloudWatch provides no metrics for the time period',
|
119
|
+
default: false
|
120
|
+
|
121
|
+
%w[warning critical].each do |severity|
|
122
|
+
option :"availability_zone_#{severity}",
|
123
|
+
long: "--availability-zone-#{severity} AZ",
|
124
|
+
description: "Trigger a #{severity} if availability zone is different than given argument"
|
125
|
+
|
126
|
+
%w[cpu memory disk connections iops].each do |item|
|
127
|
+
option :"#{item}_#{severity}_over",
|
128
|
+
long: "--#{item}-#{severity}-over N",
|
129
|
+
proc: proc(&:to_f),
|
130
|
+
description: "Trigger a #{severity} if #{item} usage is over a percentage"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def aws_config
|
135
|
+
{ access_key_id: config[:aws_access_key],
|
136
|
+
secret_access_key: config[:aws_secret_access_key],
|
137
|
+
region: config[:aws_region] }
|
138
|
+
end
|
139
|
+
|
140
|
+
def role_credentials
|
141
|
+
@role_credentials = Aws::AssumeRoleCredentials.new(
|
142
|
+
client: Aws::STS::Client.new(aws_config),
|
143
|
+
role_arn: config[:role_arn],
|
144
|
+
role_session_name: "role@#{Time.now.to_i}"
|
145
|
+
)
|
146
|
+
end
|
147
|
+
|
148
|
+
def rds
|
149
|
+
@rds ||= config[:role_arn] ? Aws::RDS::Client.new(credentials: role_credentials, region: aws_config[:region]) : Aws::RDS::Client.new(aws_config)
|
150
|
+
end
|
151
|
+
|
152
|
+
def cloud_watch
|
153
|
+
@cloud_watch ||= config[:role_arn] ? Aws::CloudWatch::Client.new(credentials: role_credentials, region: aws_config[:region]) : Aws::CloudWatch::Client.new(aws_config)
|
154
|
+
end
|
155
|
+
|
156
|
+
def find_db_instance(id)
|
157
|
+
db = rds.describe_db_instances.db_instances.detect { |db_instance| db_instance.db_instance_identifier == id }
|
158
|
+
unknown 'DB instance not found.' if db.nil?
|
159
|
+
db
|
160
|
+
end
|
161
|
+
|
162
|
+
def find_db_cluster_writer(id)
|
163
|
+
wr = rds.describe_db_clusters(db_cluster_identifier: id).db_clusters[0].db_cluster_members.detect(&:is_cluster_writer).db_instance_identifier
|
164
|
+
unknown 'DB cluster not found.' if wr.nil?
|
165
|
+
wr
|
166
|
+
end
|
167
|
+
|
168
|
+
def cloud_watch_metric(metric_name, unit)
|
169
|
+
cloud_watch.get_metric_statistics(
|
170
|
+
namespace: 'AWS/RDS',
|
171
|
+
metric_name: metric_name,
|
172
|
+
dimensions: [
|
173
|
+
{
|
174
|
+
name: 'DBInstanceIdentifier',
|
175
|
+
value: @db_instance.db_instance_identifier
|
176
|
+
}
|
177
|
+
],
|
178
|
+
start_time: config[:end_time] - config[:period],
|
179
|
+
end_time: config[:end_time],
|
180
|
+
statistics: [config[:statistics].to_s.capitalize],
|
181
|
+
period: config[:period],
|
182
|
+
unit: unit
|
183
|
+
)
|
184
|
+
end
|
185
|
+
|
186
|
+
def latest_value(metric)
|
187
|
+
values = metric.datapoints.sort_by { |datapoint| datapoint[:timestamp] }
|
188
|
+
|
189
|
+
# handle time periods that are too small to return usable values. # this is a cozy addition that wouldn't port upstream.
|
190
|
+
if values.empty?
|
191
|
+
config[:accept_nil] ? ok('Cloudwatch returned no results for time period. Accept nil passed so OK') : unknown('Requested time period did not return values from Cloudwatch. Try increasing your time period.')
|
192
|
+
else
|
193
|
+
values.last[config[:statistics]]
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def memory_total_bytes(instance_class)
|
198
|
+
memory_total_gigabytes = {
|
199
|
+
'db.cr1.8xlarge' => 244.0,
|
200
|
+
'db.m1.small' => 1.7,
|
201
|
+
'db.m1.medium' => 3.75,
|
202
|
+
'db.m1.large' => 7.5,
|
203
|
+
'db.m1.xlarge' => 15.0,
|
204
|
+
'db.m2.xlarge' => 17.1,
|
205
|
+
'db.m2.2xlarge' => 34.2,
|
206
|
+
'db.m2.4xlarge' => 68.4,
|
207
|
+
'db.m3.medium' => 3.75,
|
208
|
+
'db.m3.large' => 7.5,
|
209
|
+
'db.m3.xlarge' => 15.0,
|
210
|
+
'db.m3.2xlarge' => 30.0,
|
211
|
+
'db.m4.large' => 8.0,
|
212
|
+
'db.m4.xlarge' => 16.0,
|
213
|
+
'db.m4.2xlarge' => 32.0,
|
214
|
+
'db.m4.4xlarge' => 64.0,
|
215
|
+
'db.m4.10xlarge' => 160.0,
|
216
|
+
'db.m4.16xlarge' => 256.0,
|
217
|
+
'db.m5.large' => 8.0,
|
218
|
+
'db.m5.xlarge' => 16.0,
|
219
|
+
'db.m5.2xlarge' => 32.0,
|
220
|
+
'db.m5.4xlarge' => 64.0,
|
221
|
+
'db.m5.12xlarge' => 192.0,
|
222
|
+
'db.m5.24xlarge' => 384.0,
|
223
|
+
'db.r3.large' => 15.0,
|
224
|
+
'db.r3.xlarge' => 30.5,
|
225
|
+
'db.r3.2xlarge' => 61.0,
|
226
|
+
'db.r3.4xlarge' => 122.0,
|
227
|
+
'db.r3.8xlarge' => 244.0,
|
228
|
+
'db.r4.large' => 15.25,
|
229
|
+
'db.r4.xlarge' => 30.5,
|
230
|
+
'db.r4.2xlarge' => 61.0,
|
231
|
+
'db.r4.4xlarge' => 122.0,
|
232
|
+
'db.r4.8xlarge' => 244.0,
|
233
|
+
'db.r4.16xlarge' => 488.0,
|
234
|
+
'db.r5.large' => 16.0,
|
235
|
+
'db.r5.xlarge' => 32.0,
|
236
|
+
'db.r5.2xlarge' => 64.0,
|
237
|
+
'db.r5.4xlarge' => 128.0,
|
238
|
+
'db.r5.12xlarge' => 384.0,
|
239
|
+
'db.r5.24xlarge' => 768.0,
|
240
|
+
'db.t1.micro' => 0.615,
|
241
|
+
'db.t2.micro' => 1.0,
|
242
|
+
'db.t2.small' => 2.0,
|
243
|
+
'db.t2.medium' => 4.0,
|
244
|
+
'db.t2.large' => 8.0,
|
245
|
+
'db.t2.xlarge' => 16.0,
|
246
|
+
'db.t2.2xlarge' => 32.0,
|
247
|
+
'db.t3.micro' => 1.0,
|
248
|
+
'db.t3.small' => 2.0,
|
249
|
+
'db.t3.medium' => 4.0,
|
250
|
+
'db.t3.large' => 8.0,
|
251
|
+
'db.t3.xlarge' => 16.0,
|
252
|
+
'db.t3.2xlarge' => 32.0,
|
253
|
+
'db.x1.16xlarge' => 976.0,
|
254
|
+
'db.x1.32xlarge' => 1952.0,
|
255
|
+
'db.x1e.xlarge' => 122.0,
|
256
|
+
'db.x1e.2xlarge' => 244.0,
|
257
|
+
'db.x1e.4xlarge' => 488.0,
|
258
|
+
'db.x1e.8xlarge' => 976.0,
|
259
|
+
'db.x1e.16xlarge' => 1952.0,
|
260
|
+
'db.x1e.32xlarge' => 3904.0
|
261
|
+
}
|
262
|
+
|
263
|
+
memory_total_gigabytes.fetch(instance_class) * 1024**3
|
264
|
+
end
|
265
|
+
|
266
|
+
def check_az(severity, expected_az)
|
267
|
+
return if @db_instance.availability_zone == expected_az
|
268
|
+
@severities[severity] = true
|
269
|
+
"; AZ is #{@db_instance.availability_zone} (expected #{expected_az})"
|
270
|
+
end
|
271
|
+
|
272
|
+
def check_cpu(severity, expected_lower_than)
|
273
|
+
cpu_metric ||= cloud_watch_metric 'CPUUtilization', 'Percent'
|
274
|
+
cpu_metric_value ||= latest_value cpu_metric
|
275
|
+
return if cpu_metric_value < expected_lower_than
|
276
|
+
@severities[severity] = true
|
277
|
+
"; CPUUtilization is #{sprintf '%.2f', cpu_metric_value}% (expected lower than #{expected_lower_than}%)"
|
278
|
+
end
|
279
|
+
|
280
|
+
def check_memory(severity, expected_lower_than)
|
281
|
+
memory_metric ||= cloud_watch_metric 'FreeableMemory', 'Bytes'
|
282
|
+
memory_metric_value ||= latest_value memory_metric
|
283
|
+
memory_total_bytes ||= memory_total_bytes @db_instance.db_instance_class
|
284
|
+
memory_usage_bytes ||= memory_total_bytes - memory_metric_value
|
285
|
+
memory_usage_percentage ||= memory_usage_bytes / memory_total_bytes * 100
|
286
|
+
return if memory_usage_percentage < expected_lower_than
|
287
|
+
@severities[severity] = true
|
288
|
+
"; Memory usage is #{sprintf '%.2f', memory_usage_percentage}% (expected lower than #{expected_lower_than}%)"
|
289
|
+
end
|
290
|
+
|
291
|
+
def check_disk(severity, expected_lower_than)
|
292
|
+
disk_metric ||= cloud_watch_metric 'FreeStorageSpace', 'Bytes'
|
293
|
+
disk_metric_value ||= latest_value disk_metric
|
294
|
+
disk_total_bytes ||= @db_instance.allocated_storage * 1024**3
|
295
|
+
disk_usage_bytes ||= disk_total_bytes - disk_metric_value
|
296
|
+
disk_usage_percentage ||= disk_usage_bytes / disk_total_bytes * 100
|
297
|
+
return if disk_usage_percentage < expected_lower_than
|
298
|
+
@severities[severity] = true
|
299
|
+
"; Disk usage is #{sprintf '%.2f', disk_usage_percentage}% (expected lower than #{expected_lower_than}%)"
|
300
|
+
end
|
301
|
+
|
302
|
+
def check_connections(severity, expected_lower_than)
|
303
|
+
connections_metric ||= cloud_watch_metric 'DatabaseConnections', 'Count'
|
304
|
+
connections_metric_value ||= latest_value connections_metric
|
305
|
+
return if connections_metric_value < expected_lower_than
|
306
|
+
@severities[severity] = true
|
307
|
+
"; DatabaseConnections are #{sprintf '%d', connections_metric_value} (expected lower than #{expected_lower_than})"
|
308
|
+
end
|
309
|
+
|
310
|
+
def check_iops(severity, expected_lower_than)
|
311
|
+
read_iops_metric ||= cloud_watch_metric 'ReadIOPS', 'Count/Second'
|
312
|
+
read_iops_metric_value ||= latest_value read_iops_metric
|
313
|
+
write_iops_metric ||= cloud_watch_metric 'WriteIOPS', 'Count/Second'
|
314
|
+
write_iops_metric_value ||= latest_value write_iops_metric
|
315
|
+
iops_metric_value ||= read_iops_metric_value + write_iops_metric_value
|
316
|
+
return if iops_metric_value < expected_lower_than
|
317
|
+
@severities[severity] = true
|
318
|
+
"; IOPS are #{sprintf '%d', iops_metric_value} (expected lower than #{expected_lower_than})"
|
319
|
+
end
|
320
|
+
|
321
|
+
def run
|
322
|
+
instances = []
|
323
|
+
if config[:db_cluster_id]
|
324
|
+
db_cluster_writer_id = find_db_cluster_writer(config[:db_cluster_id])
|
325
|
+
instances << find_db_instance(db_cluster_writer_id)
|
326
|
+
end
|
327
|
+
|
328
|
+
if config[:db_instance_id].nil? || config[:db_instance_id].empty?
|
329
|
+
rds.describe_db_instances[:db_instances].map { |db| instances << db }
|
330
|
+
else
|
331
|
+
instances << find_db_instance(config[:db_instance_id])
|
332
|
+
end
|
333
|
+
|
334
|
+
messages = ''
|
335
|
+
severities = {
|
336
|
+
critical: false,
|
337
|
+
warning: false
|
338
|
+
}
|
339
|
+
instances.each do |instance|
|
340
|
+
@db_instance = instance
|
341
|
+
result = collect(instance)
|
342
|
+
if result[1][:critical]
|
343
|
+
messages += result[0]
|
344
|
+
severities[:critical] = true
|
345
|
+
elsif result[1][:warning]
|
346
|
+
severities[:warning] = true
|
347
|
+
messages += result[0]
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
if severities[:critical]
|
352
|
+
critical messages
|
353
|
+
elsif severities[:warning]
|
354
|
+
warning messages
|
355
|
+
else
|
356
|
+
ok messages
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
def collect(instance)
|
361
|
+
message = "\n#{instance[:db_instance_identifier]}: "
|
362
|
+
@severities = {
|
363
|
+
critical: false,
|
364
|
+
warning: false
|
365
|
+
}
|
366
|
+
|
367
|
+
@severities.each_key do |severity|
|
368
|
+
message += check_az severity, config[:"availability_zone_#{severity}"], instance if config[:"availability_zone_#{severity}"]
|
369
|
+
|
370
|
+
%w[cpu memory disk connections iops].each do |item|
|
371
|
+
result = send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"]
|
372
|
+
message += result unless result.nil?
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
if %w[cpu memory disk connections iops].any? { |item| %w[warning critical].any? { |severity| config[:"#{item}_#{severity}_over"] } }
|
377
|
+
message += "(#{config[:statistics].to_s.capitalize} within #{config[:period]}s "
|
378
|
+
message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})"
|
379
|
+
end
|
380
|
+
[message, @severities]
|
381
|
+
end
|
382
|
+
end
|