sensu-plugins-aws 2.1.0 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +13 -5
  2. checksums.yaml.gz.sig +0 -0
  3. data/CHANGELOG.md +26 -1
  4. data/README.md +4 -1
  5. data/bin/check-autoscaling-cpucredits.rb +21 -21
  6. data/bin/check-beanstalk-elb-metric.rb +3 -3
  7. data/bin/check-certificate-expiry.rb +5 -5
  8. data/bin/check-cloudwatch-metric.rb +2 -2
  9. data/bin/check-dynamodb-capacity.rb +29 -18
  10. data/bin/check-dynamodb-throttle.rb +28 -17
  11. data/bin/check-ec2-cpu_balance.rb +107 -0
  12. data/bin/check-ec2-filter.rb +5 -5
  13. data/bin/check-ec2-network.rb +19 -15
  14. data/bin/check-elb-certs.rb +2 -2
  15. data/bin/check-elb-health-fog.rb +3 -5
  16. data/bin/check-elb-health-sdk.rb +5 -5
  17. data/bin/check-elb-latency.rb +1 -1
  18. data/bin/check-elb-sum-requests.rb +1 -1
  19. data/bin/check-emr-cluster.rb +3 -5
  20. data/bin/check-instance-events.rb +2 -4
  21. data/bin/check-instance-health.rb +80 -0
  22. data/bin/check-rds-events.rb +18 -7
  23. data/bin/check-rds.rb +30 -27
  24. data/bin/check-s3-bucket.rb +2 -4
  25. data/bin/check-s3-object.rb +2 -4
  26. data/bin/check-vpc-vpn.rb +4 -6
  27. data/bin/handler-ec2_node.rb +6 -15
  28. data/bin/handler-ses.rb +5 -5
  29. data/bin/handler-sns.rb +12 -14
  30. data/bin/metrics-autoscaling-instance-count.rb +5 -5
  31. data/bin/metrics-ec2-count.rb +10 -10
  32. data/bin/metrics-elasticache.rb +5 -5
  33. data/bin/metrics-elb-full.rb +5 -5
  34. data/bin/metrics-sqs.rb +5 -5
  35. data/lib/sensu-plugins-aws/cloudwatch-common.rb +9 -8
  36. data/lib/sensu-plugins-aws/common.rb +1 -3
  37. data/lib/sensu-plugins-aws/filter.rb +2 -2
  38. data/lib/sensu-plugins-aws/version.rb +1 -1
  39. data.tar.gz.sig +0 -0
  40. metadata +139 -125
  41. metadata.gz.sig +0 -0
@@ -107,11 +107,11 @@ class EC2Filter < Sensu::Plugin::Check::CLI
107
107
 
108
108
  filter = Filter.parse(config[:filter])
109
109
 
110
- if filter.empty?
111
- options = {}
112
- else
113
- options = { filters: filter }
114
- end
110
+ options = if filter.empty?
111
+ {}
112
+ else
113
+ { filters: filter }
114
+ end
115
115
 
116
116
  data = client.describe_instances(options)
117
117
 
@@ -12,7 +12,7 @@
12
12
  # Linux
13
13
  #
14
14
  # DEPENDENCIES:
15
- # gem: aws-sdk-v1
15
+ # gem: aws-sdk
16
16
  # gem: sensu-plugin
17
17
  #
18
18
  # USAGE:
@@ -29,7 +29,7 @@
29
29
  #
30
30
 
31
31
  require 'sensu-plugin/check/cli'
32
- require 'aws-sdk-v1'
32
+ require 'aws-sdk'
33
33
 
34
34
  class CheckEc2Network < Sensu::Plugin::Check::CLI
35
35
  option :aws_access_key,
@@ -87,34 +87,38 @@ class CheckEc2Network < Sensu::Plugin::Check::CLI
87
87
  end
88
88
 
89
89
  def ec2
90
- @ec2 ||= AWS::EC2.new aws_config
90
+ @ec2 ||= Aws::EC2::Client.new aws_config
91
91
  end
92
92
 
93
93
  def cloud_watch
94
- @cloud_watch ||= AWS::CloudWatch.new aws_config
94
+ @cloud_watch ||= Aws::CloudWatch::Client.new aws_config
95
95
  end
96
96
 
97
97
  def network_metric(instance)
98
- cloud_watch.metrics.with_namespace('AWS/EC2').with_metric_name("#{config[:direction]}").with_dimensions(name: 'InstanceId', value: instance).first
99
- end
100
-
101
- def statistics_options
102
- {
98
+ cloud_watch.get_metric_statistics(
99
+ namespace: 'AWS/EC2',
100
+ metric_name: config[:direction].to_s,
101
+ dimensions: [
102
+ {
103
+ name: 'InstanceId',
104
+ value: instance
105
+ }
106
+ ],
103
107
  start_time: config[:end_time] - 300,
104
- end_time: config[:end_time],
108
+ end_time: config[:end_time],
105
109
  statistics: ['Average'],
106
- period: config[:period]
107
- }
110
+ period: config[:period],
111
+ unit: 'Bytes'
112
+ )
108
113
  end
109
114
 
110
- def latest_value(metric)
111
- value = metric.statistics(statistics_options.merge unit: 'Bytes')
115
+ def latest_value(value)
112
116
  value.datapoints[0][:average].to_f unless value.datapoints[0].nil?
113
117
  end
114
118
 
115
119
  def check_metric(instance)
116
120
  metric = network_metric instance
117
- latest_value metric
121
+ latest_value metric unless metric.nil?
118
122
  end
119
123
 
120
124
  def run
@@ -128,11 +128,11 @@ class CheckELBCerts < Sensu::Plugin::Check::CLI
128
128
  unknown "An error occurred processing AWS ELB API: #{e.message}"
129
129
  end
130
130
 
131
- if critical_message.length > 0
131
+ if !critical_message.length.empty?
132
132
  message = cert_message(critical_message.length, 'expiring within', config[:crit_under])
133
133
  message += ': ' + critical_message.sort.join(' ')
134
134
  critical message
135
- elsif warning_message.length > 0
135
+ elsif !warning_message.empty?
136
136
  message = cert_message(warning_message.length, 'expiring within', config[:warn_under])
137
137
  message += ': ' + warning_message.sort.join(' ')
138
138
  warning message
@@ -103,12 +103,10 @@ class ELBHealth < Sensu::Plugin::Check::CLI
103
103
  end
104
104
  if unhealthy_instances.empty?
105
105
  ok "All instances on ELB #{aws_region}::#{config[:elb_name]} healthy!"
106
+ elsif config[:verbose]
107
+ critical "Unhealthy instances detected: #{unhealthy_instances.map { |id, state| '[' + id + '::' + state + ']' }.join(' ')}"
106
108
  else
107
- if config[:verbose]
108
- critical "Unhealthy instances detected: #{unhealthy_instances.map { |id, state| '[' + id + '::' + state + ']' }.join(' ')}"
109
- else
110
- critical "Detected [#{unhealthy_instances.size}] unhealthy instances"
111
- end
109
+ critical "Detected [#{unhealthy_instances.size}] unhealthy instances"
112
110
  end
113
111
  rescue => e
114
112
  warning "An issue occured while communicating with the AWS EC2 API: #{e.message}"
@@ -92,11 +92,11 @@ class ELBHealth < Sensu::Plugin::Check::CLI
92
92
 
93
93
  def check_health(elb)
94
94
  unhealthy_instances = {}
95
- if config[:instances]
96
- instance_health_hash = elb.instances.health(config[:instances])
97
- else
98
- instance_health_hash = elb.instances.health
99
- end
95
+ instance_health_hash = if config[:instances]
96
+ elb.instances.health(config[:instances])
97
+ else
98
+ elb.instances.health
99
+ end
100
100
  instance_health_hash.each do |instance_health|
101
101
  if instance_health[:state] != 'InService'
102
102
  unhealthy_instances[instance_health[:instance].id] = instance_health[:state]
@@ -123,7 +123,7 @@ class CheckELBLatency < Sensu::Plugin::Check::CLI
123
123
  end
124
124
 
125
125
  def latest_value(metric)
126
- metric.statistics(statistics_options.merge unit: 'Seconds').datapoints.sort_by { |datapoint| datapoint[:timestamp] }.last[config[:statistics]]
126
+ metric.statistics(statistics_options.merge(unit: 'Seconds')).datapoints.sort_by { |datapoint| datapoint[:timestamp] }.last[config[:statistics]]
127
127
  end
128
128
 
129
129
  def flag_alert(severity, message)
@@ -115,7 +115,7 @@ class CheckELBSumRequests < Sensu::Plugin::Check::CLI
115
115
  end
116
116
 
117
117
  def latest_value(metric)
118
- metric.statistics(statistics_options.merge unit: 'Count').datapoints.sort_by { |datapoint| datapoint[:timestamp] }.last[:sum]
118
+ metric.statistics(statistics_options.merge(unit: 'Count')).datapoints.sort_by { |datapoint| datapoint[:timestamp] }.last[:sum]
119
119
  end
120
120
 
121
121
  def flag_alert(severity, message)
@@ -106,10 +106,8 @@ class CheckEMRCluster < Sensu::Plugin::Check::CLI
106
106
  def run
107
107
  aws_config = {}
108
108
  if config[:use_iam_role].nil?
109
- aws_config.merge!(
110
- access_key_id: config[:aws_access_key],
111
- secret_access_key: config[:aws_secret_access_key]
112
- )
109
+ aws_config[:access_key_id] = config[:aws_access_key]
110
+ aws_config[:secret_access_key] = config[:aws_secret_access_key]
113
111
  end
114
112
 
115
113
  emr = Aws::EMR::Client.new(aws_config.merge!(region: config[:aws_region]))
@@ -118,7 +116,7 @@ class CheckEMRCluster < Sensu::Plugin::Check::CLI
118
116
  clusters = emr_clusters.select { |c| c.name == config[:cluster_name] }
119
117
 
120
118
  critical "EMR cluster #{config[:cluster_name]} appears #{clusters.size} times" if clusters.size > 1
121
- critical "EMR cluster #{config[:cluster_name]} not found" if clusters.size == 0
119
+ critical "EMR cluster #{config[:cluster_name]} not found" if clusters.empty?
122
120
 
123
121
  cluster = clusters.first
124
122
  state = cluster.status.state
@@ -72,10 +72,8 @@ class CheckInstanceEvents < Sensu::Plugin::Check::CLI
72
72
  aws_config = {}
73
73
 
74
74
  if config[:use_iam_role].nil?
75
- aws_config.merge!(
76
- access_key_id: config[:aws_access_key],
77
- secret_access_key: config[:aws_secret_access_key]
78
- )
75
+ aws_config[:access_key_id] = config[:aws_access_key]
76
+ aws_config[:secret_access_key] = config[:aws_secret_access_key]
79
77
  end
80
78
 
81
79
  ec2 = AWS::EC2::Client.new(aws_config.merge!(region: config[:aws_region]))
@@ -0,0 +1,80 @@
1
+ #! /usr/bin/env ruby
2
+ #
3
+ # check-instance-health
4
+ #
5
+ # DESCRIPTION:
6
+ # This plugin looks up all instances in an account and checks event data, system status
7
+ #
8
+ # OUTPUT:
9
+ # plain-text
10
+ #
11
+ # PLATFORMS:
12
+ # Linux
13
+ #
14
+ # DEPENDENCIES:
15
+ # gem: aws-sdk
16
+ # gem: sensu-plugin
17
+ #
18
+ # USAGE:
19
+ # #YELLOW
20
+ #
21
+ # NOTES:
22
+ #
23
+ # LICENSE:
24
+ # Shane Starcher
25
+ # Copyright (c) 2016
26
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
27
+ # for details.
28
+ #
29
+
30
+ require 'sensu-plugins-aws'
31
+ require 'sensu-plugin/check/cli'
32
+ require 'aws-sdk'
33
+
34
+ class CheckInstanceEvents < Sensu::Plugin::Check::CLI
35
+ include Common
36
+
37
+ option :aws_region,
38
+ short: '-r AWS_REGION',
39
+ long: '--aws-region REGION',
40
+ description: 'AWS Region (defaults to us-east-1).',
41
+ default: 'us-east-1'
42
+
43
+ def gather_events(events)
44
+ useful_events = events.reject { |x| x[:code] == 'system-reboot' && x[:description] =~ /\[Completed\]/ }
45
+ !useful_events.empty?
46
+ end
47
+
48
+ def gather_status(status_checks)
49
+ ['impaired', 'insufficient-data'].include? status_checks.status
50
+ end
51
+
52
+ def run
53
+ messages = []
54
+ ec2 = Aws::EC2::Client.new
55
+ begin
56
+ ec2.describe_instance_status.instance_statuses.each do |item|
57
+ id = item.instance_id
58
+ if gather_events(item.events)
59
+ messages << "#{id} has unscheduled events"
60
+ end
61
+
62
+ if gather_status(item.system_status)
63
+ messages << "#{id} has failed system status checks"
64
+ end
65
+
66
+ if gather_status(item.instance_status)
67
+ messages << "#{id} has failed instance status checks"
68
+ end
69
+ end
70
+ rescue => e
71
+ unknown "An error occurred processing AWS EC2 API: #{e.message}"
72
+ end
73
+
74
+ if messages.count > 0
75
+ critical("#{messages.count} instances #{messages.count > 1 ? 'have' : 'has'}: #{messages.join(',')}")
76
+ else
77
+ ok
78
+ end
79
+ end
80
+ end
@@ -5,9 +5,12 @@
5
5
  #
6
6
  # DESCRIPTION:
7
7
  # This plugin checks rds clusters for critical events.
8
- # Due to the number of events types on RDS clusters the check searches for
9
- # events containing the text string 'has started' or 'is being'. These events all have
10
- # accompanying completiion events and are impacting events
8
+ # Due to the number of events types on RDS clusters, the check
9
+ # should filter out non-disruptive events that are part of
10
+ # basic operations.
11
+ #
12
+ # More info on RDS events:
13
+ # http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_Events.html
11
14
  #
12
15
  # OUTPUT:
13
16
  # plain-text
@@ -76,14 +79,22 @@ class CheckRDSEvents < Sensu::Plugin::Check::CLI
76
79
  clusters = rds.describe_db_instances[:db_instances].map { |db| db[:db_instance_identifier] }
77
80
  maint_clusters = []
78
81
 
79
- # fetch the last 2 hours of events for each cluster
82
+ # fetch the last 15 minutes of events for each cluster
83
+ # that way, we're only spammed with persistent notifications that we'd care about.
80
84
  clusters.each do |cluster_name|
81
- events_record = rds.describe_events(start_time: (Time.now - 7200).iso8601, source_type: 'db-instance', source_identifier: cluster_name)
85
+ events_record = rds.describe_events(start_time: (Time.now - 900).iso8601, source_type: 'db-instance', source_identifier: cluster_name)
82
86
  next if events_record[:events].empty?
83
87
 
84
- # if the last event is a start maint event then the cluster is still in maint
88
+ # we will need to filter out non-disruptive/basic operation events.
89
+ # ie. the regular backup operations
90
+ next if events_record[:events][-1][:message] =~ /Backing up DB instance|Finished DB Instance backup|Restored from snapshot/
91
+ # ie. Replication resumed
92
+ next if events_record[:events][-1][:message] =~ /Replication for the Read Replica resumed/
93
+ # you can add more filters to skip more events.
94
+
95
+ # draft the messages
85
96
  cluster_name_long = "#{cluster_name} (#{aws_config[:region]}) #{events_record[:events][-1][:message]}"
86
- maint_clusters.push(cluster_name_long) if events_record[:events][-1][:message] =~ /has started|is being|off-line|shutdown/
97
+ maint_clusters.push(cluster_name_long)
87
98
  end
88
99
  rescue => e
89
100
  unknown "An error occurred processing AWS RDS API: #{e.message}"
data/bin/check-rds.rb CHANGED
@@ -12,7 +12,7 @@
12
12
  # Linux
13
13
  #
14
14
  # DEPENDENCIES:
15
- # gem: aws-sdk-v1
15
+ # gem: aws-sdk
16
16
  # gem: sensu-plugin
17
17
  #
18
18
  # USAGE:
@@ -51,7 +51,7 @@
51
51
  #
52
52
 
53
53
  require 'sensu-plugin/check/cli'
54
- require 'aws-sdk-v1'
54
+ require 'aws-sdk'
55
55
  require 'time'
56
56
 
57
57
  class CheckRDS < Sensu::Plugin::Check::CLI
@@ -126,36 +126,39 @@ class CheckRDS < Sensu::Plugin::Check::CLI
126
126
  end
127
127
 
128
128
  def rds
129
- @rds ||= AWS::RDS.new aws_config
129
+ @rds ||= Aws::RDS::Client.new aws_config
130
130
  end
131
131
 
132
132
  def cloud_watch
133
- @cloud_watch ||= AWS::CloudWatch.new aws_config
133
+ @cloud_watch ||= Aws::CloudWatch::Client.new aws_config
134
134
  end
135
135
 
136
136
  def find_db_instance(id)
137
- db = rds.instances[id]
138
- fail unless db.exists?
137
+ db = rds.describe_db_instances.db_instances.select { |db_instance| db_instance.db_instance_identifier == id }
138
+ unknown 'DB instance not found.' if db.empty?
139
139
  db
140
- rescue
141
- unknown 'DB instance not found.'
142
140
  end
143
141
 
144
- def cloud_watch_metric(metric_name)
145
- cloud_watch.metrics.with_namespace('AWS/RDS').with_metric_name(metric_name).with_dimensions(name: 'DBInstanceIdentifier', value: @db_instance.id).first
146
- end
147
-
148
- def statistics_options
149
- {
142
+ def cloud_watch_metric(metric_name, unit)
143
+ cloud_watch.get_metric_statistics(
144
+ namespace: 'AWS/RDS',
145
+ metric_name: metric_name,
146
+ dimensions: [
147
+ {
148
+ name: 'DBInstanceIdentifier',
149
+ value: @db_instance.id
150
+ }
151
+ ],
150
152
  start_time: config[:end_time] - config[:period],
151
- end_time: config[:end_time],
153
+ end_time: config[:end_time],
152
154
  statistics: [config[:statistics].to_s.capitalize],
153
- period: config[:period]
154
- }
155
+ period: config[:period],
156
+ unit: unit
157
+ )
155
158
  end
156
159
 
157
- def latest_value(metric, unit)
158
- values = metric.statistics(statistics_options.merge unit: unit).datapoints.sort_by { |datapoint| datapoint[:timestamp] }
160
+ def latest_value(metric)
161
+ values = metric.datapoints.sort_by { |datapoint| datapoint[:timestamp] }
159
162
 
160
163
  # handle time periods that are too small to return usable values. # this is a cozy addition that wouldn't port upstream.
161
164
  if values.empty?
@@ -205,20 +208,20 @@ class CheckRDS < Sensu::Plugin::Check::CLI
205
208
  end
206
209
 
207
210
  def check_az(severity, expected_az)
208
- return if @db_instance.availability_zone_name == expected_az
209
- flag_alert severity, "; AZ is #{@db_instance.availability_zone_name} (expected #{expected_az})"
211
+ return if @db_instance.availability_zone == expected_az
212
+ flag_alert severity, "; AZ is #{@db_instance.availability_zone} (expected #{expected_az})"
210
213
  end
211
214
 
212
215
  def check_cpu(severity, expected_lower_than)
213
- @cpu_metric ||= cloud_watch_metric 'CPUUtilization'
214
- @cpu_metric_value ||= latest_value @cpu_metric, 'Percent'
216
+ @cpu_metric ||= cloud_watch_metric 'CPUUtilization', 'Percent'
217
+ @cpu_metric_value ||= latest_value @cpu_metric
215
218
  return if @cpu_metric_value < expected_lower_than
216
219
  flag_alert severity, "; CPUUtilization is #{sprintf '%.2f', @cpu_metric_value}% (expected lower than #{expected_lower_than}%)"
217
220
  end
218
221
 
219
222
  def check_memory(severity, expected_lower_than)
220
- @memory_metric ||= cloud_watch_metric 'FreeableMemory'
221
- @memory_metric_value ||= latest_value @memory_metric, 'Bytes'
223
+ @memory_metric ||= cloud_watch_metric 'FreeableMemory', 'Bytes'
224
+ @memory_metric_value ||= latest_value @memory_metric
222
225
  @memory_total_bytes ||= memory_total_bytes @db_instance.db_instance_class
223
226
  @memory_usage_bytes ||= @memory_total_bytes - @memory_metric_value
224
227
  @memory_usage_percentage ||= @memory_usage_bytes / @memory_total_bytes * 100
@@ -227,8 +230,8 @@ class CheckRDS < Sensu::Plugin::Check::CLI
227
230
  end
228
231
 
229
232
  def check_disk(severity, expected_lower_than)
230
- @disk_metric ||= cloud_watch_metric 'FreeStorageSpace'
231
- @disk_metric_value ||= latest_value @disk_metric, 'Bytes'
233
+ @disk_metric ||= cloud_watch_metric 'FreeStorageSpace', 'Bytes'
234
+ @disk_metric_value ||= latest_value @disk_metric
232
235
  @disk_total_bytes ||= @db_instance.allocated_storage * 1024**3
233
236
  @disk_usage_bytes ||= @disk_total_bytes - @disk_metric_value
234
237
  @disk_usage_percentage ||= @disk_usage_bytes / @disk_total_bytes * 100
@@ -70,10 +70,8 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
70
70
  aws_config = {}
71
71
 
72
72
  if config[:use_iam_role].nil?
73
- aws_config.merge!(
74
- access_key_id: config[:aws_access_key],
75
- secret_access_key: config[:aws_secret_access_key]
76
- )
73
+ aws_config[:access_key_id] = config[:aws_access_key]
74
+ aws_config[:secret_access_key] = config[:aws_secret_access_key]
77
75
  end
78
76
 
79
77
  s3 = Aws::S3::Client.new(aws_config.merge!(region: config[:aws_region]))
@@ -101,10 +101,8 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
101
101
  def run
102
102
  aws_config = {}
103
103
  if config[:use_iam_role].nil?
104
- aws_config.merge!(
105
- access_key_id: config[:aws_access_key],
106
- secret_access_key: config[:aws_secret_access_key]
107
- )
104
+ aws_config[:access_key_id] = config[:aws_access_key]
105
+ aws_config[:secret_access_key] = config[:aws_secret_access_key]
108
106
  end
109
107
 
110
108
  s3 = Aws::S3::Client.new(aws_config.merge!(region: config[:aws_region]))