cfn-guardian 0.3.4 → 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/build-gem.yml +25 -0
  3. data/.github/workflows/release-gem.yml +25 -0
  4. data/.github/workflows/release-image.yml +33 -0
  5. data/.rspec +1 -0
  6. data/Gemfile.lock +24 -24
  7. data/README.md +4 -772
  8. data/cfn-guardian.gemspec +1 -3
  9. data/docs/alarm_templates.md +130 -0
  10. data/docs/cli.md +182 -0
  11. data/docs/composite_alarms.md +24 -0
  12. data/docs/custom_checks/azure_file_check.md +28 -0
  13. data/docs/custom_checks/domain_expiry.md +10 -0
  14. data/docs/custom_checks/http.md +59 -0
  15. data/docs/custom_checks/log_group_metric_filters.md +27 -0
  16. data/docs/custom_checks/nrpe.md +29 -0
  17. data/docs/custom_checks/port.md +40 -0
  18. data/docs/custom_checks/sftp.md +73 -0
  19. data/docs/custom_checks/sql.md +44 -0
  20. data/docs/custom_checks/tls.md +25 -0
  21. data/docs/custom_metrics.md +71 -0
  22. data/docs/event_subscriptions.md +67 -0
  23. data/docs/maintenance_mode.md +85 -0
  24. data/docs/notifiers.md +33 -0
  25. data/docs/overview.md +22 -0
  26. data/docs/resources.md +93 -0
  27. data/docs/variables.md +58 -0
  28. data/lib/cfnguardian.rb +76 -62
  29. data/lib/cfnguardian/cloudwatch.rb +43 -32
  30. data/lib/cfnguardian/compile.rb +90 -4
  31. data/lib/cfnguardian/config/defaults.yaml +9 -0
  32. data/lib/cfnguardian/deploy.rb +2 -16
  33. data/lib/cfnguardian/display_formatter.rb +1 -2
  34. data/lib/cfnguardian/error.rb +4 -0
  35. data/lib/cfnguardian/models/alarm.rb +102 -30
  36. data/lib/cfnguardian/models/check.rb +30 -12
  37. data/lib/cfnguardian/models/event.rb +43 -15
  38. data/lib/cfnguardian/models/event_subscription.rb +111 -0
  39. data/lib/cfnguardian/resources/amazonmq_rabbitmq.rb +136 -0
  40. data/lib/cfnguardian/resources/azure_file.rb +20 -0
  41. data/lib/cfnguardian/resources/base.rb +126 -26
  42. data/lib/cfnguardian/resources/batch.rb +14 -0
  43. data/lib/cfnguardian/resources/ec2_instance.rb +11 -0
  44. data/lib/cfnguardian/resources/glue.rb +23 -0
  45. data/lib/cfnguardian/resources/http.rb +1 -0
  46. data/lib/cfnguardian/resources/rds_cluster.rb +14 -0
  47. data/lib/cfnguardian/resources/rds_instance.rb +80 -0
  48. data/lib/cfnguardian/resources/redshift_cluster.rb +2 -2
  49. data/lib/cfnguardian/resources/step_functions.rb +41 -0
  50. data/lib/cfnguardian/stacks/main.rb +9 -8
  51. data/lib/cfnguardian/stacks/resources.rb +35 -6
  52. data/lib/cfnguardian/version.rb +1 -1
  53. metadata +39 -10
@@ -0,0 +1,20 @@
1
+ module CfnGuardian::Resource
2
+ class AzureFile < Base
3
+
4
+ def default_alarms
5
+ alarm = CfnGuardian::Models::AzureFileAlarm.new(@resource)
6
+ alarm.name = 'FileExpired'
7
+ alarm.metric_name = 'FileExpired'
8
+ @alarms.push(alarm)
9
+ end
10
+
11
+ def default_events
12
+ @events.push(CfnGuardian::Models::AzureFileEvent.new(@resource))
13
+ end
14
+
15
+ def default_checks
16
+ @checks.push(CfnGuardian::Models::AzureFileCheck.new(@resource))
17
+ end
18
+
19
+ end
20
+ end
@@ -4,6 +4,7 @@ require 'cfnguardian/models/alarm'
4
4
  require 'cfnguardian/models/event'
5
5
  require 'cfnguardian/models/check'
6
6
  require 'cfnguardian/models/metric_filter'
7
+ require 'cfnguardian/models/event_subscription'
7
8
 
8
9
  module CfnGuardian::Resource
9
10
  class Base
@@ -16,6 +17,7 @@ module CfnGuardian::Resource
16
17
  @events = []
17
18
  @checks = []
18
19
  @metric_filters = []
20
+ @event_subscriptions = []
19
21
  end
20
22
 
21
23
  # Overidden by inheritted classes to define default alarms
@@ -23,57 +25,78 @@ module CfnGuardian::Resource
23
25
  return @alarms
24
26
  end
25
27
 
26
- def get_alarms(overides={},resource={})
28
+ def get_alarms(group,overides={})
27
29
  # generate default alarms
28
30
  default_alarms()
29
-
31
+
32
+ # override any group properties
33
+ group_overrides = overides.has_key?('GroupOverrides') ? overides['GroupOverrides'] : {}
34
+ overides.delete('GroupOverrides')
35
+ if group_overrides.any?
36
+ @alarms.each do |alarm|
37
+ logger.debug("overriding #{alarm.name} alarm properties for resource #{alarm.resource_id} in resource group #{group} via group overrides")
38
+ group_overrides.each {|attr,value| update_object(alarm,attr,value)}
39
+ end
40
+ end
41
+
30
42
  # loop over each override template for the service
31
- overides.each do |name,properties|
32
-
43
+ overides.each do |name,properties|
33
44
  # disable default alarms
34
45
  if [false].include?(properties)
35
- alarm = find_alarm(name)
46
+ alarms = find_alarms(name)
36
47
 
37
- if !alarm.nil?
38
- alarm.enabled = false
39
- logger.debug "Disabling alarm '#{name}' for resource #{alarm.resource_id}"
48
+ if !alarms.nil?
49
+ alarms.each do |alarm|
50
+ alarm.enabled = false
51
+ logger.info "disabling alarm '#{name}' for resource #{alarm.resource_id}"
52
+ end
40
53
  next
41
54
  end
42
55
  end
43
-
56
+
44
57
  # continue if the override is in the incorrect format
45
58
  unless properties.is_a?(Hash)
46
59
  if name != 'Inherit'
47
- logger.warn "Incorrect format for alarm '#{name}'. Should be of type 'Hash', instead got type '#{properties.group}'"
60
+ logger.warn "incorrect format for alarm '#{name}'. Should be of type 'Hash', instead got type '#{properties.group}'"
48
61
  end
49
62
  next
50
63
  end
51
-
64
+
65
+ properties.merge!(group_overrides)
66
+
52
67
  # Create a new alarm inheriting the defaults of an existing alarm
53
68
  if properties.has_key?('Inherit')
54
69
  alarm = find_alarm(properties['Inherit'])
55
70
  if !alarm.nil?
71
+ logger.debug("creating new alarm #{name} for alarm group #{self.class.to_s.split('::').last} inheriting properties from alarm #{properties['Inherit']}")
56
72
  inheritited_alarm = alarm.clone
57
73
  alarm.name = name
58
- properties.each {|attr,value| update_alarm(inheritited_alarm,attr,value)}
74
+ properties.each {|attr,value| update_object(inheritited_alarm,attr,value)}
59
75
  @alarms.push(inheritited_alarm)
60
76
  else
61
- logger.warn "Alarm '#{properties['Inherit']}' doesn't exists and cannot be inherited"
77
+ logger.warn "alarm '#{properties['Inherit']}' doesn't exists and cannot be inherited"
62
78
  end
63
79
  next
64
80
  end
65
81
 
66
- alarm = find_alarm(name)
67
-
68
- if alarm.nil?
69
- # if alarm doesn't exist create a new one
70
- alarm = Kernel.const_get("CfnGuardian::Models::#{self.class.to_s.split('::').last}Alarm").new(resource)
71
- properties.each {|attr,value| update_alarm(alarm,attr,value)}
72
- alarm.name = name
73
- @alarms.push(alarm)
82
+ alarms = find_alarms(name)
83
+
84
+ if alarms.empty?
85
+ # if the alarm doesn't exist and it's not being inherited from another alarm create a new alarm
86
+ resources = @resource.has_key?('Hosts') ? @resource['Hosts'] : [@resource]
87
+ resources.each do |res|
88
+ alarm = Kernel.const_get("CfnGuardian::Models::#{self.class.to_s.split('::').last}Alarm").new(res)
89
+ properties.each {|attr,value| update_object(alarm,attr,value)}
90
+ alarm.name = name
91
+ logger.debug("created new alarm #{alarm.name} for resource #{alarm.resource_id} in resource group #{group}")
92
+ @alarms.push(alarm)
93
+ end
74
94
  else
75
95
  # if there is an existing alarm update the properties
76
- properties.each {|attr,value| update_alarm(alarm,attr,value)}
96
+ alarms.each do |alarm|
97
+ logger.debug("overriding #{alarm.name} alarm properties for resource #{alarm.resource_id} in resource group #{group} via alarm overrides")
98
+ properties.each {|attr,value| update_object(alarm,attr,value)}
99
+ end
77
100
  end
78
101
  end
79
102
 
@@ -81,6 +104,20 @@ module CfnGuardian::Resource
81
104
  @alarms.each {|a| a.group = @override_group}
82
105
  end
83
106
 
107
+ # String interpolation for alarm dimensions
108
+ @alarms.each do |alarm|
109
+ next if alarm.dimensions.nil?
110
+ alarm.dimensions.each do |k,v|
111
+ if v.is_a?(String) && v.match?(/^\${Resource::.*[A-Za-z]}$/)
112
+ resource_key = v.tr('${}', '').split('Resource::').last
113
+ if @resource.has_key?(resource_key)
114
+ logger.debug "overriding alarm #{alarm.name} dimension key '#{k}' with value '#{@resource[resource_key]}'"
115
+ alarm.dimensions[k] = @resource[resource_key]
116
+ end
117
+ end
118
+ end
119
+ end
120
+
84
121
  return @alarms.select{|a| a.enabled}
85
122
  end
86
123
 
@@ -113,6 +150,60 @@ module CfnGuardian::Resource
113
150
  default_metric_filters()
114
151
  return @metric_filters
115
152
  end
153
+
154
+ # Overidden by inheritted classes to define default checks
155
+ def default_event_subscriptions()
156
+ return @event_subscriptions
157
+ end
158
+
159
+ def get_event_subscriptions(group, overides)
160
+ # generate defailt event subscriptions
161
+ default_event_subscriptions()
162
+
163
+ # overide the defaults
164
+ overides.each do |name, properties|
165
+ event_subscription = find_event_subscriptions(name)
166
+
167
+ # disbable the event subscription if the value is false
168
+ if [false].include?(properties)
169
+ unless event_subscription.nil?
170
+ event_subscription.enabled = false
171
+ logger.info "Disabling event subscription #{name} for #{group} #{event_subscription.resource_id}"
172
+ end
173
+
174
+ next
175
+ end
176
+
177
+ # ignore all properties not in a proper format
178
+ next unless properties.is_a?(Hash)
179
+
180
+ # Create a new event subscription by inheriting an existing one
181
+ if properties.has_key?('Inherit')
182
+ inherit_event_subscription = find_event_subscriptions(properties['Inherit'])
183
+
184
+ if inherit_event_subscription.nil?
185
+ logger.warn "Unable to create #{topic} RDSEventSubscription by inheriting #{properties['Inherit']} as it cannot be found"
186
+ next
187
+ end
188
+
189
+ event_subscription = inherit_event_subscription.clone
190
+ event_subscription.enabled = true
191
+ event_subscription.name = name
192
+ @event_subscriptions.push(event_subscription)
193
+ logger.debug "Inheriting RDSEventSubscription #{properties['Inherit']}"
194
+ end
195
+
196
+ if event_subscription.nil?
197
+ event_subscription = Kernel.const_get("CfnGuardian::Models::#{self.class.to_s.split('::').last}EventSubscription").new(@resource)
198
+ event_subscription.name = name
199
+ @event_subscriptions.push(event_subscription)
200
+ end
201
+
202
+ properties.each {|attr,value| update_object(event_subscription,attr,value)}
203
+ end
204
+
205
+ return @event_subscriptions.select {|es| es.enabled }
206
+ end
116
207
 
117
208
  def get_cost()
118
209
  return @alarms.length * 0.10
@@ -123,13 +214,22 @@ module CfnGuardian::Resource
123
214
  def find_alarm(name)
124
215
  @alarms.detect {|alarm| alarm.name == name}
125
216
  end
126
-
127
- def update_alarm(alarm,attr,value)
217
+
218
+ def find_alarms(name)
219
+ @alarms.find_all {|alarm| alarm.name == name}
220
+ end
221
+
222
+ def find_event_subscriptions(name)
223
+ @event_subscriptions.detect {|es| es.name == name}
224
+ end
225
+
226
+ def update_object(obj,attr,value)
227
+ logger.debug("overriding #{obj.type} property '#{attr}' with value #{value} for resource id: #{obj.resource_id}")
128
228
  begin
129
- alarm.send("#{attr.to_underscore}=",value)
229
+ obj.send("#{attr.to_underscore}=",value.clone)
130
230
  rescue NoMethodError => e
131
231
  if !e.message.match?(/inherit/)
132
- logger.warn "Unknown key '#{attr}' for #{alarm.resource_id} alarm #{alarm.name}"
232
+ logger.warn "Unknown property '#{attr}' for type: #{obj.type} and resource id: #{obj.resource_id}"
133
233
  end
134
234
  end
135
235
  end
@@ -0,0 +1,14 @@
1
+ module CfnGuardian::Resource
2
+ class Batch < Base
3
+ def default_event_subscriptions()
4
+ event_subscription = CfnGuardian::Models::BatchEventSubscription.new(@resource)
5
+ event_subscription.name = 'FailedBatch'
6
+ event_subscription.detail_type = 'Batch Job State Change'
7
+ event_subscription.detail = {
8
+ 'status': ['FAILED'],
9
+ 'jobQueue': ["arn:aws:batch:${AWS::Region}:${AWS::AccountId}:job-queue/#{@resource['Id']}"]
10
+ }
11
+ @event_subscriptions.push(event_subscription)
12
+ end
13
+ end
14
+ end
@@ -19,6 +19,17 @@ module CfnGuardian
19
19
  @alarms.push(alarm)
20
20
  end
21
21
 
22
+ def default_event_subscriptions()
23
+ event_subscription = CfnGuardian::Models::Ec2InstanceEventSubscription.new(@resource)
24
+ event_subscription.name = 'InstanceTerminated'
25
+ event_subscription.detail_type = 'EC2 Instance State-change Notification'
26
+ event_subscription.detail = {
27
+ 'instance-id' => [@resource['Id']],
28
+ 'state' => ['terminated']
29
+ }
30
+ @event_subscriptions.push(event_subscription)
31
+ end
32
+
22
33
  end
23
34
  end
24
35
  end
@@ -0,0 +1,23 @@
1
+ module CfnGuardian::Resource
2
+ class Glue < Base
3
+ def default_event_subscriptions()
4
+ event_subscription = CfnGuardian::Models::BatchEventSubscription.new(@resource)
5
+ event_subscription.name = 'FailedGlueJob'
6
+ event_subscription.detail_type = 'Glue Job State Change'
7
+ event_subscription.detail = {
8
+ 'state': ['FAILED'],
9
+ 'jobName': [{'prefix': @resource['Id']}]
10
+ }
11
+ @event_subscriptions.push(event_subscription)
12
+
13
+ event_subscription = CfnGuardian::Models::BatchEventSubscription.new(@resource)
14
+ event_subscription.name = 'TimeoutGlueJob'
15
+ event_subscription.detail_type = 'Glue Job State Change'
16
+ event_subscription.detail = {
17
+ 'state': ['TIMEOUT'],
18
+ 'jobName': [{'prefix': @resource['Id']}]
19
+ }
20
+ @event_subscriptions.push(event_subscription)
21
+ end
22
+ end
23
+ end
@@ -41,6 +41,7 @@ module CfnGuardian::Resource
41
41
  alarm = CfnGuardian::Models::SslAlarm.new(@resource)
42
42
  alarm.name = 'ExpiresInDaysTask'
43
43
  alarm.metric_name = 'ExpiresInDays'
44
+ alarm.alarm_action = 'Task'
44
45
  alarm.threshold = 30
45
46
  @alarms.push(alarm)
46
47
  end
@@ -0,0 +1,14 @@
1
+ module CfnGuardian::Resource
2
+ class RDSCluster < Base
3
+
4
+ def default_event_subscriptions()
5
+ event_subscription = CfnGuardian::Models::RDSClusterEventSubscription.new(@resource)
6
+ event_subscription.name = 'FailoverFailed'
7
+ event_subscription.rds_event_category = 'failover'
8
+ event_subscription.message = 'A failover for the DB cluster has failed.'
9
+ @event_subscriptions.push(event_subscription)
10
+ end
11
+
12
+ end
13
+ end
14
+
@@ -41,7 +41,87 @@ module CfnGuardian::Resource
41
41
  alarm.threshold = 45
42
42
  alarm.evaluation_periods = 10
43
43
  @alarms.push(alarm)
44
+
45
+ alarm = CfnGuardian::Models::RDSInstanceAlarm.new(@resource)
46
+ alarm.name = 'ReplicaLag'
47
+ alarm.metric_name = 'ReplicaLag'
48
+ alarm.threshold = 30 # seconds
49
+ alarm.evaluation_periods = 5
50
+ alarm.alarm_action = 'Warning'
51
+ alarm.enabled = false
52
+ @alarms.push(alarm)
44
53
  end
45
54
 
55
+ def default_event_subscriptions()
56
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
57
+ event_subscription.name = 'MasterPasswordReset'
58
+ event_subscription.rds_event_category = 'configuration change'
59
+ event_subscription.message = 'The master password for the DB instance has been reset.'
60
+ @event_subscriptions.push(event_subscription)
61
+
62
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
63
+ event_subscription.name = 'MasterPasswordResetFailure'
64
+ event_subscription.rds_event_category = 'configuration change'
65
+ event_subscription.message = 'An attempt to reset the master password for the DB instance has failed.'
66
+ @event_subscriptions.push(event_subscription)
67
+
68
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
69
+ event_subscription.name = 'Deletion'
70
+ event_subscription.rds_event_category = 'deletion'
71
+ event_subscription.message = 'The DB instance has been deleted.'
72
+ @event_subscriptions.push(event_subscription)
73
+
74
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
75
+ event_subscription.name = 'MultiAZFailoverStarted'
76
+ event_subscription.rds_event_category = 'failover'
77
+ event_subscription.message = 'A Multi-AZ failover that resulted in the promotion of a standby instance has started.'
78
+ @event_subscriptions.push(event_subscription)
79
+
80
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
81
+ event_subscription.name = 'MultiAZFailoverComplete'
82
+ event_subscription.rds_event_category = 'failover'
83
+ event_subscription.message = 'A Multi-AZ failover has completed.'
84
+ @event_subscriptions.push(event_subscription)
85
+
86
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
87
+ event_subscription.name = 'DBFailure'
88
+ event_subscription.rds_event_category = 'failure'
89
+ event_subscription.message = 'The DB instance has failed due to an incompatible configuration or an underlying storage issue. Begin a point-in-time-restore for the DB instance.'
90
+ @event_subscriptions.push(event_subscription)
91
+
92
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
93
+ event_subscription.name = 'TableCountExceedsRecommended'
94
+ event_subscription.rds_event_category = 'notification'
95
+ event_subscription.message = 'The number of tables you have for your DB instance exceeds the recommended best practices for Amazon RDS.'
96
+ @event_subscriptions.push(event_subscription)
97
+
98
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
99
+ event_subscription.name = 'DatabasesCountExceedsRecommended'
100
+ event_subscription.rds_event_category = 'notification'
101
+ event_subscription.message = 'The number of databases you have for your DB instance exceeds the recommended best practices for Amazon RDS.'
102
+ @event_subscriptions.push(event_subscription)
103
+
104
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
105
+ event_subscription.name = 'ReplicationFailure'
106
+ event_subscription.enabled = false
107
+ event_subscription.rds_event_category = 'read replica'
108
+ event_subscription.message = 'An error has occurred in the read replication process.'
109
+ @event_subscriptions.push(event_subscription)
110
+
111
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
112
+ event_subscription.name = 'ReplicationTerminated'
113
+ event_subscription.enabled = false
114
+ event_subscription.rds_event_category = 'read replica'
115
+ event_subscription.message = 'Replication on the read replica was terminated.'
116
+ @event_subscriptions.push(event_subscription)
117
+
118
+ event_subscription = CfnGuardian::Models::RDSInstanceEventSubscription.new(@resource)
119
+ event_subscription.name = 'ReplicationStopped'
120
+ event_subscription.enabled = false
121
+ event_subscription.rds_event_category = 'read replica'
122
+ event_subscription.message = 'Replication on the read replica was manually stopped.'
123
+ @event_subscriptions.push(event_subscription)
124
+ end
125
+
46
126
  end
47
127
  end
@@ -20,9 +20,9 @@ module CfnGuardian::Resource
20
20
  alarm = CfnGuardian::Models::RedshiftClusterAlarm.new(@resource)
21
21
  alarm.name = 'UnHealthyCluster'
22
22
  alarm.metric_name = 'HealthStatus'
23
- alarm.threshold = 0
23
+ alarm.comparison_operator = 'LessThanThreshold'
24
+ alarm.threshold = 1
24
25
  alarm.evaluation_periods = 10
25
- alarm.treat_missing_data = 'notBreaching'
26
26
  @alarms.push(alarm)
27
27
  end
28
28
 
@@ -0,0 +1,41 @@
1
+ module CfnGuardian::Resource
2
+ class StepFunctions < Base
3
+
4
+ def default_alarms
5
+ alarm = CfnGuardian::Models::StepFunctionsAlarm.new(@resource)
6
+ alarm.name = 'ExecutionsFailed'
7
+ alarm.metric_name = 'ExecutionsFailed'
8
+ alarm.threshold = 1
9
+ alarm.evaluation_periods = 5
10
+ alarm.treat_missing_data = 'notBreaching'
11
+ @alarms.push(alarm)
12
+
13
+ alarm = CfnGuardian::Models::StepFunctionsAlarm.new(@resource)
14
+ alarm.name = 'ExecutionsTimedOut'
15
+ alarm.metric_name = 'ExecutionsTimedOut'
16
+ alarm.threshold = 1
17
+ alarm.evaluation_periods = 5
18
+ alarm.treat_missing_data = 'notBreaching'
19
+ @alarms.push(alarm)
20
+
21
+ alarm = CfnGuardian::Models::StepFunctionsAlarm.new(@resource)
22
+ alarm.name = 'ExecutionThrottled'
23
+ alarm.metric_name = 'ExecutionThrottled'
24
+ alarm.threshold = 1
25
+ alarm.evaluation_periods = 5
26
+ alarm.alarm_action = 'Warning'
27
+ alarm.treat_missing_data = 'notBreaching'
28
+ @alarms.push(alarm)
29
+
30
+ alarm = CfnGuardian::Models::StepFunctionsAlarm.new(@resource)
31
+ alarm.name = 'ExecutionTime'
32
+ alarm.metric_name = 'ExecutionTime'
33
+ alarm.threshold = 60
34
+ alarm.evaluation_periods = 5
35
+ alarm.alarm_action = 'Warning'
36
+ alarm.treat_missing_data = 'notBreaching'
37
+ @alarms.push(alarm)
38
+ end
39
+
40
+ end
41
+ end