sensu-plugins-aws 3.2.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -63,7 +63,19 @@ class CheckInstanceCount < Sensu::Plugin::Check::CLI
63
63
  default: 25
64
64
 
65
65
  def instance_count
66
- AWS::AutoScaling.new.groups[config[:groupname]].auto_scaling_instances.map(&:lifecycle_state).count('InService').to_i
66
+ client = Aws::AutoScaling::Client.new
67
+ resp = client.describe_auto_scaling_groups(
68
+ auto_scaling_group_names: [config[:groupname]]
69
+ ).to_h
70
+ instances = []
71
+ resp[:auto_scaling_groups].each do |g|
72
+ g[:instances].each do |i|
73
+ if i[:lifecycle_state] == 'InService' && i[:health_status] == 'Healthy'
74
+ instances << i[:instance_id]
75
+ end
76
+ end
77
+ end
78
+ instances.length
67
79
  rescue => e
68
80
  critical "There was an error reaching AWS - #{e.message}"
69
81
  end
@@ -41,7 +41,7 @@
41
41
  #
42
42
 
43
43
  require 'sensu-plugin/check/cli'
44
- require 'aws-sdk-v1'
44
+ require 'aws-sdk'
45
45
 
46
46
  class CheckRDSEvents < Sensu::Plugin::Check::CLI
47
47
  option :aws_access_key,
@@ -74,10 +74,7 @@ class CheckRDSEvents < Sensu::Plugin::Check::CLI
74
74
  end
75
75
 
76
76
  def rds_regions
77
- # This is for SDK v2
78
- # Aws.partition('aws').regions.map(&:name)
79
-
80
- AWS::RDS.regions.map(&:name)
77
+ Aws.partition('aws').regions.map(&:name)
81
78
  end
82
79
 
83
80
  def run
@@ -102,7 +99,7 @@ class CheckRDSEvents < Sensu::Plugin::Check::CLI
102
99
  end
103
100
 
104
101
  aws_regions.each do |r|
105
- rds = AWS::RDS::Client.new aws_config.merge!(region: r)
102
+ rds = Aws::RDS::Client.new aws_config.merge!(region: r)
106
103
 
107
104
  begin
108
105
  if !config[:db_instance_id].nil? && !config[:db_instance_id].empty?
data/bin/check-rds.rb CHANGED
@@ -86,6 +86,11 @@ class CheckRDS < Sensu::Plugin::Check::CLI
86
86
  long: '--db-instance-id NAME',
87
87
  description: 'DB instance identifier'
88
88
 
89
+ option :db_cluster_id,
90
+ short: '-l N',
91
+ long: '--db-cluster-id NAME',
92
+ description: 'DB cluster identifier'
93
+
89
94
  option :end_time,
90
95
  short: '-t T',
91
96
  long: '--end-time TIME',
@@ -96,7 +101,7 @@ class CheckRDS < Sensu::Plugin::Check::CLI
96
101
  option :period,
97
102
  short: '-p N',
98
103
  long: '--period SECONDS',
99
- default: 60,
104
+ default: 180,
100
105
  proc: proc(&:to_i),
101
106
  description: 'CloudWatch metric statistics period'
102
107
 
@@ -154,6 +159,12 @@ class CheckRDS < Sensu::Plugin::Check::CLI
154
159
  db
155
160
  end
156
161
 
162
+ def find_db_cluster_writer(id)
163
+ wr = rds.describe_db_clusters(db_cluster_identifier: id).db_clusters[0].db_cluster_members.detect(&:is_cluster_writer).db_instance_identifier
164
+ unknown 'DB cluster not found.' if cl.nil?
165
+ wr
166
+ end
167
+
157
168
  def cloud_watch_metric(metric_name, unit)
158
169
  cloud_watch.get_metric_statistics(
159
170
  namespace: 'AWS/RDS',
@@ -183,11 +194,6 @@ class CheckRDS < Sensu::Plugin::Check::CLI
183
194
  end
184
195
  end
185
196
 
186
- def flag_alert(severity, message)
187
- @severities[severity] = true
188
- @message += message
189
- end
190
-
191
197
  def memory_total_bytes(instance_class)
192
198
  memory_total_gigabytes = {
193
199
  'db.cr1.8xlarge' => 244.0,
@@ -224,84 +230,118 @@ class CheckRDS < Sensu::Plugin::Check::CLI
224
230
 
225
231
  def check_az(severity, expected_az)
226
232
  return if @db_instance.availability_zone == expected_az
227
- flag_alert severity, "; AZ is #{@db_instance.availability_zone} (expected #{expected_az})"
233
+ @severities[severity] = true
234
+ "; AZ is #{@db_instance.availability_zone} (expected #{expected_az})"
228
235
  end
229
236
 
230
237
  def check_cpu(severity, expected_lower_than)
231
- @cpu_metric ||= cloud_watch_metric 'CPUUtilization', 'Percent'
232
- @cpu_metric_value ||= latest_value @cpu_metric
233
- return if @cpu_metric_value < expected_lower_than
234
- flag_alert severity, "; CPUUtilization is #{sprintf '%.2f', @cpu_metric_value}% (expected lower than #{expected_lower_than}%)"
238
+ cpu_metric ||= cloud_watch_metric 'CPUUtilization', 'Percent'
239
+ cpu_metric_value ||= latest_value cpu_metric
240
+ return if cpu_metric_value < expected_lower_than
241
+ @severities[severity] = true
242
+ "; CPUUtilization is #{sprintf '%.2f', cpu_metric_value}% (expected lower than #{expected_lower_than}%)"
235
243
  end
236
244
 
237
245
  def check_memory(severity, expected_lower_than)
238
- @memory_metric ||= cloud_watch_metric 'FreeableMemory', 'Bytes'
239
- @memory_metric_value ||= latest_value @memory_metric
240
- @memory_total_bytes ||= memory_total_bytes @db_instance.db_instance_class
241
- @memory_usage_bytes ||= @memory_total_bytes - @memory_metric_value
242
- @memory_usage_percentage ||= @memory_usage_bytes / @memory_total_bytes * 100
243
- return if @memory_usage_percentage < expected_lower_than
244
- flag_alert severity, "; Memory usage is #{sprintf '%.2f', @memory_usage_percentage}% (expected lower than #{expected_lower_than}%)"
246
+ memory_metric ||= cloud_watch_metric 'FreeableMemory', 'Bytes'
247
+ memory_metric_value ||= latest_value memory_metric
248
+ memory_total_bytes ||= memory_total_bytes @db_instance.db_instance_class
249
+ memory_usage_bytes ||= memory_total_bytes - memory_metric_value
250
+ memory_usage_percentage ||= memory_usage_bytes / memory_total_bytes * 100
251
+ return if memory_usage_percentage < expected_lower_than
252
+ @severities[severity] = true
253
+ "; Memory usage is #{sprintf '%.2f', memory_usage_percentage}% (expected lower than #{expected_lower_than}%)"
245
254
  end
246
255
 
247
256
  def check_disk(severity, expected_lower_than)
248
- @disk_metric ||= cloud_watch_metric 'FreeStorageSpace', 'Bytes'
249
- @disk_metric_value ||= latest_value @disk_metric
250
- @disk_total_bytes ||= @db_instance.allocated_storage * 1024**3
251
- @disk_usage_bytes ||= @disk_total_bytes - @disk_metric_value
252
- @disk_usage_percentage ||= @disk_usage_bytes / @disk_total_bytes * 100
253
- return if @disk_usage_percentage < expected_lower_than
254
- flag_alert severity, "; Disk usage is #{sprintf '%.2f', @disk_usage_percentage}% (expected lower than #{expected_lower_than}%)"
257
+ disk_metric ||= cloud_watch_metric 'FreeStorageSpace', 'Bytes'
258
+ disk_metric_value ||= latest_value disk_metric
259
+ disk_total_bytes ||= @db_instance.allocated_storage * 1024**3
260
+ disk_usage_bytes ||= disk_total_bytes - disk_metric_value
261
+ disk_usage_percentage ||= disk_usage_bytes / disk_total_bytes * 100
262
+ return if disk_usage_percentage < expected_lower_than
263
+ @severities[severity] = true
264
+ "; Disk usage is #{sprintf '%.2f', disk_usage_percentage}% (expected lower than #{expected_lower_than}%)"
255
265
  end
256
266
 
257
267
  def check_connections(severity, expected_lower_than)
258
- @connections_metric ||= cloud_watch_metric 'DatabaseConnections', 'Count'
259
- @connections_metric_value ||= latest_value @connections_metric
260
- return if @connections_metric_value < expected_lower_than
261
- flag_alert severity, "; DatabaseConnections are #{sprintf '%d', @connections_metric_value} (expected lower than #{expected_lower_than})"
268
+ connections_metric ||= cloud_watch_metric 'DatabaseConnections', 'Count'
269
+ connections_metric_value ||= latest_value connections_metric
270
+ return if connections_metric_value < expected_lower_than
271
+ @severities[severity] = true
272
+ "; DatabaseConnections are #{sprintf '%d', connections_metric_value} (expected lower than #{expected_lower_than})"
262
273
  end
263
274
 
264
275
  def check_iops(severity, expected_lower_than)
265
- @read_iops_metric ||= cloud_watch_metric 'ReadIOPS', 'Count/Second'
266
- @read_iops_metric_value ||= latest_value @read_iops_metric
267
- @write_iops_metric ||= cloud_watch_metric 'WriteIOPS', 'Count/Second'
268
- @write_iops_metric_value ||= latest_value @write_iops_metric
269
- @iops_metric_value ||= @read_iops_metric_value + @write_iops_metric_value
270
- return if @iops_metric_value < expected_lower_than
271
- flag_alert severity, "; IOPS are #{sprintf '%d', @iops_metric_value} (expected lower than #{expected_lower_than})"
276
+ read_iops_metric ||= cloud_watch_metric 'ReadIOPS', 'Count/Second'
277
+ read_iops_metric_value ||= latest_value read_iops_metric
278
+ write_iops_metric ||= cloud_watch_metric 'WriteIOPS', 'Count/Second'
279
+ write_iops_metric_value ||= latest_value write_iops_metric
280
+ iops_metric_value ||= read_iops_metric_value + write_iops_metric_value
281
+ return if iops_metric_value < expected_lower_than
282
+ @severities[severity] = true
283
+ "; IOPS are #{sprintf '%d', iops_metric_value} (expected lower than #{expected_lower_than})"
272
284
  end
273
285
 
274
286
  def run
287
+ instances = []
288
+ if config[:db_cluster_id]
289
+ db_cluster_writer_id = find_db_cluster_writer(db_cluster_id)
290
+ instances << find_db_instance(db_cluster_writer_id)
291
+ end
292
+
275
293
  if config[:db_instance_id].nil? || config[:db_instance_id].empty?
276
- unknown 'No DB instance provided. See help for usage details'
294
+ rds.describe_db_instances[:db_instances].map { |db| instances << db }
295
+ else
296
+ instances << find_db_instance(config[:db_instance_id])
297
+ end
298
+
299
+ messages = ''
300
+ severities = {
301
+ critical: false,
302
+ warning: false
303
+ }
304
+ instances.each do |instance|
305
+ @db_instance = instance
306
+ result = collect(instance)
307
+ if result[1][:critical]
308
+ messages += result[0]
309
+ severities[:critical] = true
310
+ elsif result[1][:warning]
311
+ severities[:warning] = true
312
+ messages += result[0]
313
+ end
277
314
  end
278
315
 
279
- @db_instance = find_db_instance config[:db_instance_id]
280
- @message = "#{config[:db_instance_id]}: "
281
- @severities = {
316
+ if severities[:critical]
317
+ critical messages
318
+ elsif severities[:warning]
319
+ warning messages
320
+ else
321
+ ok messages
322
+ end
323
+ end
324
+
325
+ def collect(instance)
326
+ message = "\n#{instance[:db_instance_identifier]}: "
327
+ @severities = {
282
328
  critical: false,
283
329
  warning: false
284
330
  }
285
331
 
286
332
  @severities.keys.each do |severity|
287
- check_az severity, config[:"availability_zone_#{severity}"] if config[:"availability_zone_#{severity}"]
333
+ message += check_az severity, config[:"availability_zone_#{severity}"], instance if config[:"availability_zone_#{severity}"]
288
334
 
289
335
  %w(cpu memory disk connections iops).each do |item|
290
- send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"]
336
+ result = send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"]
337
+ message += result unless result.nil?
291
338
  end
292
339
  end
293
340
 
294
341
  if %w(cpu memory disk connections iops).any? { |item| %w(warning critical).any? { |severity| config[:"#{item}_#{severity}_over"] } }
295
- @message += "(#{config[:statistics].to_s.capitalize} within #{config[:period]}s "
296
- @message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})"
297
- end
298
-
299
- if @severities[:critical]
300
- critical @message
301
- elsif @severities[:warning]
302
- warning @message
303
- else
304
- ok @message
342
+ message += "(#{config[:statistics].to_s.capitalize} within #{config[:period]}s "
343
+ message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})"
305
344
  end
345
+ [message, @severities]
306
346
  end
307
347
  end
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # check-route53-domain-expiration
4
+ #
5
+ # DESCRIPTION:
6
+ # Alert when Route53 registered domains are close to expiration
7
+ #
8
+ # OUTPUT:
9
+ # plain-text
10
+ #
11
+ # DEPENDENCIES:
12
+ # gem: aws-sdk
13
+ # gem: sensu-plugin
14
+ #
15
+ # USAGE:
16
+ # check-route53-domain-expiration.rb
17
+ #
18
+ # LICENSE:
19
+ # Eric Heydrick <eheydrick@gmail.com>
20
+ # Released under the same terms as Sensu (the MIT license); see LICENSE
21
+ # for details.
22
+
23
+ require 'sensu-plugins-aws'
24
+ require 'sensu-plugin/check/cli'
25
+ require 'aws-sdk'
26
+
27
+ class CheckRoute53DomainExpiration < Sensu::Plugin::Check::CLI
28
+ include Common
29
+
30
+ option :aws_region,
31
+ short: '-r AWS_REGION',
32
+ long: '--aws-region REGION',
33
+ description: 'AWS Region (defaults to us-east-1).',
34
+ default: 'us-east-1'
35
+
36
+ option :warn,
37
+ short: '-w WARN',
38
+ long: '--warning WARN',
39
+ description: 'Warn if domain expires in less than this many days (default: 30)',
40
+ default: 30,
41
+ proc: proc(&:to_i)
42
+
43
+ option :crit,
44
+ short: '-c CRITICAL',
45
+ long: '--critical CRITICAL',
46
+ description: 'Critical if domain expires in less than this many days (default: 7)',
47
+ default: 7,
48
+ proc: proc(&:to_i)
49
+
50
+ def run
51
+ warn_domains = {}
52
+ crit_domains = {}
53
+
54
+ r53 = Aws::Route53Domains::Client.new(aws_config)
55
+ begin
56
+ domains = r53.list_domains.domains
57
+ domains.each do |domain|
58
+ expiration = DateTime.parse(domain.expiry.to_s)
59
+ days_until_expiration = (expiration - DateTime.now).to_i
60
+ if days_until_expiration <= config[:crit]
61
+ crit_domains[domain] = days_until_expiration
62
+ elsif days_until_expiration <= config[:warn]
63
+ warn_domains[domain] = days_until_expiration
64
+ end
65
+ end
66
+
67
+ if !crit_domains.empty?
68
+ critical "Domains are expiring in less than #{config[:crit]} days: " + crit_domains.map { |d, v| "#{d.domain_name} (in #{v} days)" }.join(', ')
69
+ elsif !warn_domains.empty?
70
+ warning "Domains are expiring in less than #{config[:warn]} days: " + warn_domains.map { |d, v| "#{d.domain_name} (in #{v} days)" }.join(', ')
71
+ else
72
+ ok 'No domains are expiring soon'
73
+ end
74
+
75
+ rescue => e
76
+ unknown "An error occurred communicating with the Route53 API: #{e.message}"
77
+ end
78
+ end
79
+ end
@@ -1,6 +1,6 @@
1
1
  #! /usr/bin/env ruby
2
2
  #
3
- # check-s3-bucket
3
+ # check-s3-object
4
4
  #
5
5
  # DESCRIPTION:
6
6
  # This plugin checks if a file exists in a bucket and/or is not too old.
@@ -31,7 +31,7 @@
31
31
  require 'sensu-plugin/check/cli'
32
32
  require 'aws-sdk'
33
33
 
34
- class CheckS3Bucket < Sensu::Plugin::Check::CLI
34
+ class CheckS3Object < Sensu::Plugin::Check::CLI
35
35
  option :aws_access_key,
36
36
  short: '-a AWS_ACCESS_KEY',
37
37
  long: '--aws-access-key AWS_ACCESS_KEY',
@@ -67,13 +67,6 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
67
67
  description: 'The name of key in the bucket',
68
68
  required: true
69
69
 
70
- option :ok_zero_size,
71
- description: 'OK if file has zero size',
72
- short: '-z',
73
- long: '--ok-zero-size',
74
- boolean: true,
75
- default: false
76
-
77
70
  option :warning_age,
78
71
  description: 'Warn if mtime greater than provided age in seconds',
79
72
  short: '-w SECONDS',
@@ -84,17 +77,58 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
84
77
  short: '-c SECONDS',
85
78
  long: '--critical SECONDS'
86
79
 
80
+ option :ok_zero_size,
81
+ description: 'OK if file has zero size',
82
+ short: '-z',
83
+ long: '--ok-zero-size',
84
+ boolean: true,
85
+ default: false
86
+
87
+ option :warning_size,
88
+ description: 'Warning threshold for size',
89
+ long: '--warning-size COUNT'
90
+
91
+ option :critical_size,
92
+ description: 'Critical threshold for size',
93
+ long: '--critical-size COUNT'
94
+
95
+ option :compare_size,
96
+ description: 'Comparision operator for threshold: equal, not, greater, less',
97
+ short: '-o OPERATION',
98
+ long: '--operator-size OPERATION',
99
+ default: 'equal'
100
+
87
101
  def aws_config
88
102
  { access_key_id: config[:aws_access_key],
89
103
  secret_access_key: config[:aws_secret_access_key],
90
104
  region: config[:aws_region] }
91
105
  end
92
106
 
93
- def run_check(type, age)
94
- to_check = config["#{type}_age".to_sym].to_i
95
- if to_check > 0 && age >= to_check # rubocop:disable GuardClause
96
- send(type, "S3 object #{config[:key_name]} is #{age - to_check} seconds past (bucket #{config[:bucket_name]})")
107
+ def operator
108
+ op = lambda do |type, a, b|
109
+ case type
110
+ when 'age'
111
+ a > b
112
+ when 'size'
113
+ if config[:compare_size] == 'greater'
114
+ a > b
115
+ elsif config[:compare_size] == 'less'
116
+ a < b
117
+ elsif config[:compare_size] == 'not'
118
+ a != b
119
+ end
120
+ else
121
+ a == b
122
+ end
97
123
  end
124
+ op
125
+ end
126
+
127
+ def run_check(type, level, value, msg)
128
+ key = "#{level}_#{type}".to_sym
129
+ return if config[key].nil?
130
+ to_check = config[key].to_i
131
+ send(level, msg % [config[:key_name], value, config[:bucket_name]]) if operator.call type, value, to_check
98
132
  end
99
133
 
100
134
  def run
@@ -107,21 +141,26 @@ class CheckS3Bucket < Sensu::Plugin::Check::CLI
107
141
  s3 = Aws::S3::Client.new(aws_config.merge!(region: config[:aws_region]))
108
142
  begin
109
143
  output = s3.head_object(bucket: config[:bucket_name], key: config[:key_name])
144
+ age = Time.now.to_i - output[:last_modified].to_i
145
+ size = output[:content_length]
110
146
 
111
- if output[:content_length] == 0 && !config[:ok_zero_size]
112
- critical "S3 object #{config[:key_name]} has zero size (bucket #{config[:bucket_name]})"
147
+ [:critical, :warning].each do |level|
148
+ run_check('age', level, age, 'S3 object %s is %s seconds old (bucket %s)')
113
149
  end
114
150
 
115
- if config[:warning_age] || config[:critical_age]
116
- age = Time.now.to_i - output[:last_modified].to_i
117
- run_check(:critical, age) || run_check(:warning, age) || ok("S3 object #{config[:key_name]} is #{age} seconds old (bucket #{config[:bucket_name]})")
151
+ if size == 0
152
+ critical "S3 object #{config[:key_name]} is empty (bucket #{config[:bucket_name]})" unless config[:ok_zero_size]
118
153
  else
119
- ok("S3 object #{config[:key_name]} exists (bucket #{config[:bucket_name]})")
154
+ [:critical, :warning].each do |level|
155
+ run_check('size', level, size, 'S3 %s object\'size : %s octets (bucket %s)')
156
+ end
120
157
  end
158
+
159
+ ok("S3 object #{config[:key_name]} exists in bucket #{config[:bucket_name]}")
121
160
  rescue Aws::S3::Errors::NotFound => _
122
161
  critical "S3 object #{config[:key_name]} not found in bucket #{config[:bucket_name]}"
123
162
  rescue => e
124
- critical "S3 object #{config[:key_name]} in bucket #{config[:bucket_name]} - #{e.message}"
163
+ critical "S3 object #{config[:key_name]} in bucket #{config[:bucket_name]} - #{e.message} - #{e.backtrace}"
125
164
  end
126
165
  end
127
166
  end