sensu-plugins-aws-boutetnico 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +22 -0
  4. data/README.md +333 -0
  5. data/bin/check-alb-target-group-health.rb +100 -0
  6. data/bin/check-asg-instances-created.rb +129 -0
  7. data/bin/check-asg-instances-inservice.rb +109 -0
  8. data/bin/check-autoscaling-cpucredits.rb +160 -0
  9. data/bin/check-beanstalk-elb-metric.rb +123 -0
  10. data/bin/check-beanstalk-health.rb +123 -0
  11. data/bin/check-certificate-expiry.rb +123 -0
  12. data/bin/check-cloudfront-tag.rb +70 -0
  13. data/bin/check-cloudwatch-alarm.rb +102 -0
  14. data/bin/check-cloudwatch-alarms.rb +89 -0
  15. data/bin/check-cloudwatch-composite-metric.rb +199 -0
  16. data/bin/check-cloudwatch-metric.rb +123 -0
  17. data/bin/check-configservice-rules.rb +76 -0
  18. data/bin/check-direct-connect-virtual-interfaces.rb +84 -0
  19. data/bin/check-dynamodb-capacity.rb +194 -0
  20. data/bin/check-dynamodb-throttle.rb +188 -0
  21. data/bin/check-ebs-burst-limit.rb +143 -0
  22. data/bin/check-ebs-snapshots.rb +104 -0
  23. data/bin/check-ec2-cpu_balance.rb +139 -0
  24. data/bin/check-ec2-filter.rb +190 -0
  25. data/bin/check-ec2-network.rb +133 -0
  26. data/bin/check-ecs-service-health.rb +155 -0
  27. data/bin/check-efs-metric.rb +145 -0
  28. data/bin/check-eip-allocation.rb +64 -0
  29. data/bin/check-elasticache-failover.rb +113 -0
  30. data/bin/check-elb-certs.rb +132 -0
  31. data/bin/check-elb-health-fog.rb +114 -0
  32. data/bin/check-elb-health-sdk.rb +176 -0
  33. data/bin/check-elb-health.rb +116 -0
  34. data/bin/check-elb-instances-inservice.rb +103 -0
  35. data/bin/check-elb-latency.rb +166 -0
  36. data/bin/check-elb-nodes.rb +133 -0
  37. data/bin/check-elb-sum-requests.rb +157 -0
  38. data/bin/check-emr-cluster.rb +144 -0
  39. data/bin/check-emr-steps.rb +90 -0
  40. data/bin/check-eni-status.rb +110 -0
  41. data/bin/check-expiring-reservations.rb +117 -0
  42. data/bin/check-instance-events.rb +154 -0
  43. data/bin/check-instance-health.rb +108 -0
  44. data/bin/check-instance-reachability.rb +107 -0
  45. data/bin/check-instances-count.rb +94 -0
  46. data/bin/check-kms-key.rb +73 -0
  47. data/bin/check-rds-events.rb +141 -0
  48. data/bin/check-rds-pending.rb +91 -0
  49. data/bin/check-rds.rb +382 -0
  50. data/bin/check-redshift-events.rb +108 -0
  51. data/bin/check-reserved-instances.rb +80 -0
  52. data/bin/check-route.rb +122 -0
  53. data/bin/check-route53-domain-expiration.rb +78 -0
  54. data/bin/check-s3-bucket-visibility.rb +176 -0
  55. data/bin/check-s3-bucket.rb +86 -0
  56. data/bin/check-s3-object.rb +205 -0
  57. data/bin/check-s3-tag.rb +70 -0
  58. data/bin/check-sensu-client.rb +184 -0
  59. data/bin/check-ses-limit.rb +89 -0
  60. data/bin/check-ses-statistics.rb +149 -0
  61. data/bin/check-sns-subscriptions.rb +52 -0
  62. data/bin/check-sqs-messages.rb +168 -0
  63. data/bin/check-subnet-ip-consumption.rb +234 -0
  64. data/bin/check-trustedadvisor-service-limits.rb +90 -0
  65. data/bin/check-vpc-nameservers.rb +87 -0
  66. data/bin/check-vpc-vpn.rb +98 -0
  67. data/bin/handler-ec2_node.rb +241 -0
  68. data/bin/handler-scale-asg-down.rb +131 -0
  69. data/bin/handler-scale-asg-up.rb +131 -0
  70. data/bin/handler-ses.rb +107 -0
  71. data/bin/handler-sns.rb +64 -0
  72. data/bin/metrics-asg.rb +156 -0
  73. data/bin/metrics-autoscaling-instance-count.rb +101 -0
  74. data/bin/metrics-billing.rb +97 -0
  75. data/bin/metrics-cloudfront.rb +159 -0
  76. data/bin/metrics-ec2-count.rb +137 -0
  77. data/bin/metrics-ec2-filter.rb +97 -0
  78. data/bin/metrics-elasticache.rb +166 -0
  79. data/bin/metrics-elb.rb +169 -0
  80. data/bin/metrics-emr-steps.rb +82 -0
  81. data/bin/metrics-rds.rb +153 -0
  82. data/bin/metrics-reservation-utilization.rb +84 -0
  83. data/bin/metrics-s3.rb +107 -0
  84. data/bin/metrics-ses.rb +62 -0
  85. data/bin/metrics-sqs.rb +98 -0
  86. data/bin/metrics-waf.rb +111 -0
  87. data/lib/sensu-plugins-aws.rb +4 -0
  88. data/lib/sensu-plugins-aws/cloudwatch-common.rb +92 -0
  89. data/lib/sensu-plugins-aws/common.rb +35 -0
  90. data/lib/sensu-plugins-aws/filter.rb +47 -0
  91. data/lib/sensu-plugins-aws/version.rb +8 -0
  92. metadata +456 -0
@@ -0,0 +1,73 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ # check-kms-key
4
+ #
5
+ # DESCRIPTION:
6
+ # Check KMS values by KMS API.
7
+ #
8
+ # OUTPUT:
9
+ # plain-text
10
+ #
11
+ # PLATFORMS:
12
+ # Linux
13
+ #
14
+ # DEPENDENCIES:
15
+ # gem: aws-sdk
16
+ # gem: sensu-plugin
17
+ #
18
+ # USAGE:
19
+ # check-kms-key -k key_id
20
+ #
21
+ # Critical if KMS key id doesn't exist
22
+ # Warning if KMS key id exists but is not enabled
23
+ # Ok if KMS key id exists and is enabled
24
+ # Unknown if no key_id is provided
25
+ #
26
+ # NOTES:
27
+ #
28
+ # LICENSE:
29
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
30
+ # for details.
31
+ #
32
+
33
+ require 'sensu-plugin/check/cli'
34
+ require 'sensu-plugins-aws'
35
+ require 'aws-sdk'
36
+
37
+ class CheckKMSKey < Sensu::Plugin::Check::CLI
38
+ include Common
39
+
40
+ option :aws_region,
41
+ short: '-r AWS_REGION',
42
+ long: '--aws-region REGION',
43
+ description: 'AWS Region.',
44
+ default: 'us-east-1'
45
+
46
+ option :key_id,
47
+ short: '-k ID',
48
+ long: '--key-id ID',
49
+ description: 'KMS key identifier',
50
+ default: nil
51
+
52
+ def kms_client
53
+ @kms_client ||= Aws::KMS::Client.new
54
+ end
55
+
56
+ def check_key(id)
57
+ return kms_client.describe_key(key_id: id)['key_metadata']['enabled']
58
+ rescue Aws::KMS::Errors::NotFoundException
59
+ critical 'Key doesnt exist'
60
+ rescue StandardError => e
61
+ unknown "Failed to check key #{id}: #{e}"
62
+ end
63
+
64
+ def run
65
+ if config[:key_id].nil?
66
+ unknown 'No KMS key id provided. See help for usage details'
67
+ elsif check_key(config[:key_id])
68
+ ok 'Key exists and is enabled'
69
+ else
70
+ warning 'Key exists but is not enabled'
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,141 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ # check-rds-events
4
+ #
5
+ #
6
+ # DESCRIPTION:
7
+ # This plugin checks rds clusters for critical events.
8
+ # Due to the number of events types on RDS clusters, the check
9
+ # should filter out non-disruptive events that are part of
10
+ # basic operations.
11
+ #
12
+ # More info on RDS events:
13
+ # http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_Events.html
14
+ #
15
+ # OUTPUT:
16
+ # plain-text
17
+ #
18
+ # PLATFORMS:
19
+ # Linux
20
+ #
21
+ # DEPENDENCIES:
22
+ # gem: aws-sdk-v1
23
+ # gem: sensu-plugin
24
+ #
25
+ # USAGE:
26
+ # Check's a specific RDS instance in a specific region for critical events
27
+ # check-rds-events.rb -r ${your_region} -k ${your_aws_secret_access_key} -a ${your_aws_access_key} -i ${your_rds_instance_id_name}
28
+ #
29
+ # Checks all RDS instances in a specific region
30
+ # check-rds-events.rb -r ${your_region} -k ${your_aws_secret_access_key} -a ${your_aws_access_key}
31
+ #
32
+ # Checks all RDS instances in a specific region, should be using IAM role
33
+ # check-rds-events.rb -r ${your_region}
34
+ #
35
+ # NOTES:
36
+ #
37
+ # LICENSE:
38
+ # Tim Smith <tsmith@chef.io>
39
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
40
+ # for details.
41
+ #
42
+
43
+ require 'sensu-plugin/check/cli'
44
+ require 'aws-sdk'
45
+
46
+ class CheckRDSEvents < Sensu::Plugin::Check::CLI
47
+ option :aws_access_key,
48
+ short: '-a AWS_ACCESS_KEY',
49
+ long: '--aws-access-key AWS_ACCESS_KEY',
50
+ description: "AWS Access Key. Either set ENV['AWS_ACCESS_KEY'] or provide it as an option",
51
+ default: ENV['AWS_ACCESS_KEY']
52
+
53
+ option :aws_secret_access_key,
54
+ short: '-k AWS_SECRET_KEY',
55
+ long: '--aws-secret-access-key AWS_SECRET_KEY',
56
+ description: "AWS Secret Access Key. Either set ENV['AWS_SECRET_KEY'] or provide it as an option",
57
+ default: ENV['AWS_SECRET_KEY']
58
+
59
+ option :aws_region,
60
+ short: '-r AWS_REGION',
61
+ long: '--aws-region REGION',
62
+ description: 'AWS Region (defaults to us-east-1).',
63
+ default: 'us-east-1'
64
+
65
+ option :db_instance_id,
66
+ short: '-i N',
67
+ long: '--db-instance-id NAME',
68
+ description: 'DB instance identifier'
69
+
70
+ def aws_config
71
+ { access_key_id: config[:aws_access_key],
72
+ secret_access_key: config[:aws_secret_access_key],
73
+ region: config[:aws_region] }
74
+ end
75
+
76
+ def rds_regions
77
+ Aws.partition('aws').regions.map(&:name)
78
+ end
79
+
80
+ def run
81
+ clusters = maint_clusters
82
+ if clusters.empty?
83
+ ok
84
+ else
85
+ critical("Clusters w/ critical events: #{clusters.join(', ')}")
86
+ end
87
+ end
88
+
89
+ def maint_clusters
90
+ maint_clusters = []
91
+ aws_regions = rds_regions
92
+
93
+ unless config[:aws_region].casecmp('all').zero?
94
+ if aws_regions.include? config[:aws_region]
95
+ aws_regions.clear.push(config[:aws_region])
96
+ else
97
+ critical 'Invalid region specified!'
98
+ end
99
+ end
100
+
101
+ aws_regions.each do |r|
102
+ rds = Aws::RDS::Client.new aws_config.merge!(region: r)
103
+
104
+ begin
105
+ if !config[:db_instance_id].nil? && !config[:db_instance_id].empty?
106
+ db_instance = rds.describe_db_instances(db_instance_identifier: config[:db_instance_id])
107
+ if db_instance.nil? || db_instance.empty?
108
+ unknown "#{config[:db_instance_id]} instance not found"
109
+ else
110
+ clusters = [config[:db_instance_id]]
111
+ end
112
+ else
113
+ # fetch all clusters identifiers
114
+ clusters = rds.describe_db_instances[:db_instances].map { |db| db[:db_instance_identifier] }
115
+ end
116
+
117
+ # fetch the last 15 minutes of events for each cluster
118
+ # that way, we're only spammed with persistent notifications that we'd care about.
119
+ clusters.each do |cluster_name|
120
+ events_record = rds.describe_events(start_time: (Time.now.utc - 900).iso8601, source_type: 'db-instance', source_identifier: cluster_name)
121
+ next if events_record[:events].empty?
122
+
123
+ # we will need to filter out non-disruptive/basic operation events.
124
+ # ie. the regular backup operations
125
+ next if events_record[:events][-1][:message] =~ /Backing up DB instance|Finished DB Instance backup|Restored from snapshot/
126
+ # ie. Replication resumed
127
+ next if events_record[:events][-1][:message] =~ /Replication for the Read Replica resumed/
128
+ # you can add more filters to skip more events.
129
+
130
+ # draft the messages
131
+ cluster_name_long = "#{cluster_name} (#{r}) #{events_record[:events][-1][:message]}"
132
+ maint_clusters.push(cluster_name_long)
133
+ end
134
+ rescue StandardError => e
135
+ unknown "An error occurred processing AWS RDS API (#{r}): #{e.message}"
136
+ end
137
+ end
138
+
139
+ maint_clusters
140
+ end
141
+ end
@@ -0,0 +1,91 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ # check-rds-pending
4
+ #
5
+ #
6
+ # DESCRIPTION:
7
+ # This plugin checks rds clusters for pending maintenance action.
8
+ #
9
+ # OUTPUT:
10
+ # plain-text
11
+ #
12
+ # PLATFORMS:
13
+ # Linux
14
+ #
15
+ # DEPENDENCIES:
16
+ # gem: aws-sdk
17
+ # gem: sensu-plugin
18
+ #
19
+ # USAGE:
20
+ # ./check-rds-pending.rb -r ${you_region}
21
+ #
22
+ # NOTES:
23
+ #
24
+ # LICENSE:
25
+ # Tim Smith <tim@cozy.co>
26
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
27
+ # for details.
28
+ #
29
+
30
+ require 'sensu-plugin/check/cli'
31
+ require 'sensu-plugins-aws'
32
+ require 'aws-sdk'
33
+
34
+ class CheckRDSPending < Sensu::Plugin::Check::CLI
35
+ include Common
36
+
37
+ option :aws_region,
38
+ short: '-r AWS_REGION',
39
+ long: '--aws-region REGION',
40
+ description: 'AWS Region (such as eu-west-1).',
41
+ default: 'us-east-1'
42
+
43
+ option :db_instance_identifier,
44
+ short: '-d DB_INSTANCE_IDENTIFIER',
45
+ long: '--db-instance-identifier DB_INSTANCE_IDENTIFIER',
46
+ description: 'The DB Identifier of the instance to check',
47
+ default: nil
48
+
49
+ def run
50
+ begin
51
+ # fetch all clusters identifiers
52
+ maint_clusters = []
53
+
54
+ if clusters.any?
55
+ # Check if there is any pending maintenance required
56
+ pending_record = rds.describe_pending_maintenance_actions(filters: [{ name: 'db-instance-id', values: clusters }])
57
+ pending_record[:pending_maintenance_actions].each do |response|
58
+ maint_clusters.push(response[:pending_maintenance_action_details])
59
+ end
60
+ end
61
+ rescue StandardError => e
62
+ unknown "An error occurred processing AWS RDS API: #{e.message}"
63
+ end
64
+
65
+ if maint_clusters.empty?
66
+ ok
67
+ else
68
+ critical("Clusters w/ pending maintenance required: #{maint_clusters.join(',')}")
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ def rds
75
+ @rds ||= Aws::RDS::Client.new
76
+ end
77
+
78
+ def clusters
79
+ @clusters ||= begin
80
+ params = if config[:db_instance_identifier]
81
+ { db_instance_identifier: config[:db_instance_identifier] }
82
+ else
83
+ {}
84
+ end
85
+
86
+ rds.describe_db_instances(params)[:db_instances].map do |db|
87
+ db[:db_instance_identifier]
88
+ end
89
+ end
90
+ end
91
+ end
data/bin/check-rds.rb ADDED
@@ -0,0 +1,382 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ # check-rds
4
+ #
5
+ # DESCRIPTION:
6
+ # Check RDS instance statuses by RDS and CloudWatch API.
7
+ #
8
+ # OUTPUT:
9
+ # plain-text
10
+ #
11
+ # PLATFORMS:
12
+ # Linux
13
+ #
14
+ # DEPENDENCIES:
15
+ # gem: aws-sdk
16
+ # gem: sensu-plugin
17
+ #
18
+ # USAGE:
19
+ # Critical if DB instance "sensu-admin-db" is not on ap-northeast-1a
20
+ # check-rds -i sensu-admin-db --availability-zone-critical ap-northeast-1a
21
+ #
22
+ # Warning if CPUUtilization is over 80%, critical if over 90%
23
+ # check-rds -i sensu-admin-db --cpu-warning-over 80 --cpu-critical-over 90
24
+ #
25
+ # Critical if CPUUtilization is over 90%, maximum of last one hour
26
+ # check-rds -i sensu-admin-db --cpu-critical-over 90 --statistics maximum --period 3600
27
+ #
28
+ # Warning if DatabaseConnections are over 100, critical over 120
29
+ # check-rds -i sensu-admin-db --connections-critical-over 120 --connections-warning-over 100 --statistics maximum --period 3600
30
+ #
31
+ # Warning if IOPS are over 100, critical over 200
32
+ # check-rds -i sensu-admin-db --iops-critical-over 200 --iops-warning-over 100 --period 300
33
+ #
34
+ # Warning if memory usage is over 80%, maximum of last 2 hour
35
+ # specifying "minimum" is intended actually since memory usage is calculated from CloudWatch "FreeableMemory" metric.
36
+ # check-rds -i sensu-admin-db --memory-warning-over 80 --statistics minimum --period 7200
37
+ #
38
+ # Disk usage, same as memory
39
+ # check-rds -i sensu-admin-db --disk-warning-over 80 --period 7200
40
+ #
41
+ # You can check multiple metrics simultaneously. Highest severity will be reported
42
+ # check-rds -i sensu-admin-db --cpu-warning-over 80 --cpu-critical-over 90 --memory-warning-over 60 --memory-critical-over 80
43
+ #
44
+ # You can ignore accept nil values returned for a time periods from Cloudwatch as being an OK. Amazon falls behind in their
45
+ # metrics from time to time and this prevents false positives
46
+ # check-rds -i sensu-admin-db --cpu-critical-over 90 -n
47
+ #
48
+ # NOTES:
49
+ #
50
+ # LICENSE:
51
+ # Copyright 2014 github.com/y13i
52
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
53
+ # for details.
54
+ #
55
+
56
+ require 'sensu-plugin/check/cli'
57
+ require 'aws-sdk'
58
+ require 'time'
59
+
60
+ class CheckRDS < Sensu::Plugin::Check::CLI
61
+ option :aws_access_key,
62
+ short: '-a AWS_ACCESS_KEY',
63
+ long: '--aws-access-key AWS_ACCESS_KEY',
64
+ description: "AWS Access Key. Either set ENV['AWS_ACCESS_KEY'] or provide it as an option",
65
+ default: ENV['AWS_ACCESS_KEY']
66
+
67
+ option :aws_secret_access_key,
68
+ short: '-k AWS_SECRET_KEY',
69
+ long: '--aws-secret-access-key AWS_SECRET_KEY',
70
+ description: "AWS Secret Access Key. Either set ENV['AWS_SECRET_KEY'] or provide it as an option",
71
+ default: ENV['AWS_SECRET_KEY']
72
+
73
+ option :role_arn,
74
+ long: '--role-arn ROLE_ARN',
75
+ description: 'AWS role arn of the role of the third party account to switch to',
76
+ default: false
77
+
78
+ option :aws_region,
79
+ short: '-r AWS_REGION',
80
+ long: '--aws-region REGION',
81
+ description: 'AWS Region (defaults to us-east-1).',
82
+ default: 'us-east-1'
83
+
84
+ option :db_instance_id,
85
+ short: '-i N',
86
+ long: '--db-instance-id NAME',
87
+ description: 'DB instance identifier'
88
+
89
+ option :db_cluster_id,
90
+ short: '-l N',
91
+ long: '--db-cluster-id NAME',
92
+ description: 'DB cluster identifier'
93
+
94
+ option :end_time,
95
+ short: '-t T',
96
+ long: '--end-time TIME',
97
+ default: Time.now,
98
+ proc: proc { |a| Time.parse a },
99
+ description: 'CloudWatch metric statistics end time'
100
+
101
+ option :period,
102
+ short: '-p N',
103
+ long: '--period SECONDS',
104
+ default: 180,
105
+ proc: proc(&:to_i),
106
+ description: 'CloudWatch metric statistics period'
107
+
108
+ option :statistics,
109
+ short: '-S N',
110
+ long: '--statistics NAME',
111
+ default: :average,
112
+ proc: proc { |a| a.downcase.intern },
113
+ description: 'CloudWatch statistics method'
114
+
115
+ option :accept_nil,
116
+ short: '-n',
117
+ long: '--accept_nil',
118
+ description: 'Continue if CloudWatch provides no metrics for the time period',
119
+ default: false
120
+
121
+ %w[warning critical].each do |severity|
122
+ option :"availability_zone_#{severity}",
123
+ long: "--availability-zone-#{severity} AZ",
124
+ description: "Trigger a #{severity} if availability zone is different than given argument"
125
+
126
+ %w[cpu memory disk connections iops].each do |item|
127
+ option :"#{item}_#{severity}_over",
128
+ long: "--#{item}-#{severity}-over N",
129
+ proc: proc(&:to_f),
130
+ description: "Trigger a #{severity} if #{item} usage is over a percentage"
131
+ end
132
+ end
133
+
134
+ def aws_config
135
+ { access_key_id: config[:aws_access_key],
136
+ secret_access_key: config[:aws_secret_access_key],
137
+ region: config[:aws_region] }
138
+ end
139
+
140
+ def role_credentials
141
+ @role_credentials = Aws::AssumeRoleCredentials.new(
142
+ client: Aws::STS::Client.new(aws_config),
143
+ role_arn: config[:role_arn],
144
+ role_session_name: "role@#{Time.now.to_i}"
145
+ )
146
+ end
147
+
148
+ def rds
149
+ @rds ||= config[:role_arn] ? Aws::RDS::Client.new(credentials: role_credentials, region: aws_config[:region]) : Aws::RDS::Client.new(aws_config)
150
+ end
151
+
152
+ def cloud_watch
153
+ @cloud_watch ||= config[:role_arn] ? Aws::CloudWatch::Client.new(credentials: role_credentials, region: aws_config[:region]) : Aws::CloudWatch::Client.new(aws_config)
154
+ end
155
+
156
+ def find_db_instance(id)
157
+ db = rds.describe_db_instances.db_instances.detect { |db_instance| db_instance.db_instance_identifier == id }
158
+ unknown 'DB instance not found.' if db.nil?
159
+ db
160
+ end
161
+
162
+ def find_db_cluster_writer(id)
163
+ wr = rds.describe_db_clusters(db_cluster_identifier: id).db_clusters[0].db_cluster_members.detect(&:is_cluster_writer).db_instance_identifier
164
+ unknown 'DB cluster not found.' if wr.nil?
165
+ wr
166
+ end
167
+
168
+ def cloud_watch_metric(metric_name, unit)
169
+ cloud_watch.get_metric_statistics(
170
+ namespace: 'AWS/RDS',
171
+ metric_name: metric_name,
172
+ dimensions: [
173
+ {
174
+ name: 'DBInstanceIdentifier',
175
+ value: @db_instance.db_instance_identifier
176
+ }
177
+ ],
178
+ start_time: config[:end_time] - config[:period],
179
+ end_time: config[:end_time],
180
+ statistics: [config[:statistics].to_s.capitalize],
181
+ period: config[:period],
182
+ unit: unit
183
+ )
184
+ end
185
+
186
+ def latest_value(metric)
187
+ values = metric.datapoints.sort_by { |datapoint| datapoint[:timestamp] }
188
+
189
+ # handle time periods that are too small to return usable values. # this is a cozy addition that wouldn't port upstream.
190
+ if values.empty?
191
+ config[:accept_nil] ? ok('Cloudwatch returned no results for time period. Accept nil passed so OK') : unknown('Requested time period did not return values from Cloudwatch. Try increasing your time period.')
192
+ else
193
+ values.last[config[:statistics]]
194
+ end
195
+ end
196
+
197
+ def memory_total_bytes(instance_class)
198
+ memory_total_gigabytes = {
199
+ 'db.cr1.8xlarge' => 244.0,
200
+ 'db.m1.small' => 1.7,
201
+ 'db.m1.medium' => 3.75,
202
+ 'db.m1.large' => 7.5,
203
+ 'db.m1.xlarge' => 15.0,
204
+ 'db.m2.xlarge' => 17.1,
205
+ 'db.m2.2xlarge' => 34.2,
206
+ 'db.m2.4xlarge' => 68.4,
207
+ 'db.m3.medium' => 3.75,
208
+ 'db.m3.large' => 7.5,
209
+ 'db.m3.xlarge' => 15.0,
210
+ 'db.m3.2xlarge' => 30.0,
211
+ 'db.m4.large' => 8.0,
212
+ 'db.m4.xlarge' => 16.0,
213
+ 'db.m4.2xlarge' => 32.0,
214
+ 'db.m4.4xlarge' => 64.0,
215
+ 'db.m4.10xlarge' => 160.0,
216
+ 'db.m4.16xlarge' => 256.0,
217
+ 'db.m5.large' => 8.0,
218
+ 'db.m5.xlarge' => 16.0,
219
+ 'db.m5.2xlarge' => 32.0,
220
+ 'db.m5.4xlarge' => 64.0,
221
+ 'db.m5.12xlarge' => 192.0,
222
+ 'db.m5.24xlarge' => 384.0,
223
+ 'db.r3.large' => 15.0,
224
+ 'db.r3.xlarge' => 30.5,
225
+ 'db.r3.2xlarge' => 61.0,
226
+ 'db.r3.4xlarge' => 122.0,
227
+ 'db.r3.8xlarge' => 244.0,
228
+ 'db.r4.large' => 15.25,
229
+ 'db.r4.xlarge' => 30.5,
230
+ 'db.r4.2xlarge' => 61.0,
231
+ 'db.r4.4xlarge' => 122.0,
232
+ 'db.r4.8xlarge' => 244.0,
233
+ 'db.r4.16xlarge' => 488.0,
234
+ 'db.r5.large' => 16.0,
235
+ 'db.r5.xlarge' => 32.0,
236
+ 'db.r5.2xlarge' => 64.0,
237
+ 'db.r5.4xlarge' => 128.0,
238
+ 'db.r5.12xlarge' => 384.0,
239
+ 'db.r5.24xlarge' => 768.0,
240
+ 'db.t1.micro' => 0.615,
241
+ 'db.t2.micro' => 1.0,
242
+ 'db.t2.small' => 2.0,
243
+ 'db.t2.medium' => 4.0,
244
+ 'db.t2.large' => 8.0,
245
+ 'db.t2.xlarge' => 16.0,
246
+ 'db.t2.2xlarge' => 32.0,
247
+ 'db.t3.micro' => 1.0,
248
+ 'db.t3.small' => 2.0,
249
+ 'db.t3.medium' => 4.0,
250
+ 'db.t3.large' => 8.0,
251
+ 'db.t3.xlarge' => 16.0,
252
+ 'db.t3.2xlarge' => 32.0,
253
+ 'db.x1.16xlarge' => 976.0,
254
+ 'db.x1.32xlarge' => 1952.0,
255
+ 'db.x1e.xlarge' => 122.0,
256
+ 'db.x1e.2xlarge' => 244.0,
257
+ 'db.x1e.4xlarge' => 488.0,
258
+ 'db.x1e.8xlarge' => 976.0,
259
+ 'db.x1e.16xlarge' => 1952.0,
260
+ 'db.x1e.32xlarge' => 3904.0
261
+ }
262
+
263
+ memory_total_gigabytes.fetch(instance_class) * 1024**3
264
+ end
265
+
266
+ def check_az(severity, expected_az)
267
+ return if @db_instance.availability_zone == expected_az
268
+ @severities[severity] = true
269
+ "; AZ is #{@db_instance.availability_zone} (expected #{expected_az})"
270
+ end
271
+
272
+ def check_cpu(severity, expected_lower_than)
273
+ cpu_metric ||= cloud_watch_metric 'CPUUtilization', 'Percent'
274
+ cpu_metric_value ||= latest_value cpu_metric
275
+ return if cpu_metric_value < expected_lower_than
276
+ @severities[severity] = true
277
+ "; CPUUtilization is #{sprintf '%.2f', cpu_metric_value}% (expected lower than #{expected_lower_than}%)"
278
+ end
279
+
280
+ def check_memory(severity, expected_lower_than)
281
+ memory_metric ||= cloud_watch_metric 'FreeableMemory', 'Bytes'
282
+ memory_metric_value ||= latest_value memory_metric
283
+ memory_total_bytes ||= memory_total_bytes @db_instance.db_instance_class
284
+ memory_usage_bytes ||= memory_total_bytes - memory_metric_value
285
+ memory_usage_percentage ||= memory_usage_bytes / memory_total_bytes * 100
286
+ return if memory_usage_percentage < expected_lower_than
287
+ @severities[severity] = true
288
+ "; Memory usage is #{sprintf '%.2f', memory_usage_percentage}% (expected lower than #{expected_lower_than}%)"
289
+ end
290
+
291
+ def check_disk(severity, expected_lower_than)
292
+ disk_metric ||= cloud_watch_metric 'FreeStorageSpace', 'Bytes'
293
+ disk_metric_value ||= latest_value disk_metric
294
+ disk_total_bytes ||= @db_instance.allocated_storage * 1024**3
295
+ disk_usage_bytes ||= disk_total_bytes - disk_metric_value
296
+ disk_usage_percentage ||= disk_usage_bytes / disk_total_bytes * 100
297
+ return if disk_usage_percentage < expected_lower_than
298
+ @severities[severity] = true
299
+ "; Disk usage is #{sprintf '%.2f', disk_usage_percentage}% (expected lower than #{expected_lower_than}%)"
300
+ end
301
+
302
+ def check_connections(severity, expected_lower_than)
303
+ connections_metric ||= cloud_watch_metric 'DatabaseConnections', 'Count'
304
+ connections_metric_value ||= latest_value connections_metric
305
+ return if connections_metric_value < expected_lower_than
306
+ @severities[severity] = true
307
+ "; DatabaseConnections are #{sprintf '%d', connections_metric_value} (expected lower than #{expected_lower_than})"
308
+ end
309
+
310
+ def check_iops(severity, expected_lower_than)
311
+ read_iops_metric ||= cloud_watch_metric 'ReadIOPS', 'Count/Second'
312
+ read_iops_metric_value ||= latest_value read_iops_metric
313
+ write_iops_metric ||= cloud_watch_metric 'WriteIOPS', 'Count/Second'
314
+ write_iops_metric_value ||= latest_value write_iops_metric
315
+ iops_metric_value ||= read_iops_metric_value + write_iops_metric_value
316
+ return if iops_metric_value < expected_lower_than
317
+ @severities[severity] = true
318
+ "; IOPS are #{sprintf '%d', iops_metric_value} (expected lower than #{expected_lower_than})"
319
+ end
320
+
321
+ def run
322
+ instances = []
323
+ if config[:db_cluster_id]
324
+ db_cluster_writer_id = find_db_cluster_writer(config[:db_cluster_id])
325
+ instances << find_db_instance(db_cluster_writer_id)
326
+ end
327
+
328
+ if config[:db_instance_id].nil? || config[:db_instance_id].empty?
329
+ rds.describe_db_instances[:db_instances].map { |db| instances << db }
330
+ else
331
+ instances << find_db_instance(config[:db_instance_id])
332
+ end
333
+
334
+ messages = ''
335
+ severities = {
336
+ critical: false,
337
+ warning: false
338
+ }
339
+ instances.each do |instance|
340
+ @db_instance = instance
341
+ result = collect(instance)
342
+ if result[1][:critical]
343
+ messages += result[0]
344
+ severities[:critical] = true
345
+ elsif result[1][:warning]
346
+ severities[:warning] = true
347
+ messages += result[0]
348
+ end
349
+ end
350
+
351
+ if severities[:critical]
352
+ critical messages
353
+ elsif severities[:warning]
354
+ warning messages
355
+ else
356
+ ok messages
357
+ end
358
+ end
359
+
360
+ def collect(instance)
361
+ message = "\n#{instance[:db_instance_identifier]}: "
362
+ @severities = {
363
+ critical: false,
364
+ warning: false
365
+ }
366
+
367
+ @severities.each_key do |severity|
368
+ message += check_az severity, config[:"availability_zone_#{severity}"], instance if config[:"availability_zone_#{severity}"]
369
+
370
+ %w[cpu memory disk connections iops].each do |item|
371
+ result = send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"]
372
+ message += result unless result.nil?
373
+ end
374
+ end
375
+
376
+ if %w[cpu memory disk connections iops].any? { |item| %w[warning critical].any? { |severity| config[:"#{item}_#{severity}_over"] } }
377
+ message += "(#{config[:statistics].to_s.capitalize} within #{config[:period]}s "
378
+ message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})"
379
+ end
380
+ [message, @severities]
381
+ end
382
+ end