cfn-guardian 0.3.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/build-gem.yml +25 -0
  3. data/.github/workflows/release-gem.yml +25 -0
  4. data/.github/workflows/release-image.yml +33 -0
  5. data/.rspec +1 -0
  6. data/Gemfile.lock +24 -24
  7. data/README.md +4 -772
  8. data/cfn-guardian.gemspec +1 -3
  9. data/docs/alarm_templates.md +130 -0
  10. data/docs/cli.md +182 -0
  11. data/docs/composite_alarms.md +24 -0
  12. data/docs/custom_checks/azure_file_check.md +28 -0
  13. data/docs/custom_checks/domain_expiry.md +10 -0
  14. data/docs/custom_checks/http.md +59 -0
  15. data/docs/custom_checks/log_group_metric_filters.md +27 -0
  16. data/docs/custom_checks/nrpe.md +29 -0
  17. data/docs/custom_checks/port.md +40 -0
  18. data/docs/custom_checks/sftp.md +73 -0
  19. data/docs/custom_checks/sql.md +44 -0
  20. data/docs/custom_checks/tls.md +25 -0
  21. data/docs/custom_metrics.md +71 -0
  22. data/docs/event_subscriptions.md +67 -0
  23. data/docs/maintenance_mode.md +85 -0
  24. data/docs/notifiers.md +33 -0
  25. data/docs/overview.md +22 -0
  26. data/docs/resources.md +93 -0
  27. data/docs/variables.md +58 -0
  28. data/lib/cfnguardian.rb +76 -62
  29. data/lib/cfnguardian/cloudwatch.rb +43 -32
  30. data/lib/cfnguardian/compile.rb +87 -4
  31. data/lib/cfnguardian/config/defaults.yaml +9 -0
  32. data/lib/cfnguardian/deploy.rb +2 -16
  33. data/lib/cfnguardian/display_formatter.rb +1 -2
  34. data/lib/cfnguardian/error.rb +4 -0
  35. data/lib/cfnguardian/models/alarm.rb +101 -29
  36. data/lib/cfnguardian/models/check.rb +30 -12
  37. data/lib/cfnguardian/models/event.rb +43 -15
  38. data/lib/cfnguardian/models/event_subscription.rb +96 -0
  39. data/lib/cfnguardian/resources/amazonmq_rabbitmq.rb +136 -0
  40. data/lib/cfnguardian/resources/azure_file.rb +20 -0
  41. data/lib/cfnguardian/resources/base.rb +126 -26
  42. data/lib/cfnguardian/resources/ec2_instance.rb +11 -0
  43. data/lib/cfnguardian/resources/http.rb +1 -0
  44. data/lib/cfnguardian/resources/internal_http.rb +8 -8
  45. data/lib/cfnguardian/resources/internal_port.rb +4 -4
  46. data/lib/cfnguardian/resources/internal_sftp.rb +8 -8
  47. data/lib/cfnguardian/resources/log_group.rb +2 -2
  48. data/lib/cfnguardian/resources/rds_cluster.rb +14 -0
  49. data/lib/cfnguardian/resources/rds_instance.rb +80 -0
  50. data/lib/cfnguardian/resources/redshift_cluster.rb +2 -2
  51. data/lib/cfnguardian/resources/sftp.rb +1 -1
  52. data/lib/cfnguardian/resources/sql.rb +2 -2
  53. data/lib/cfnguardian/stacks/main.rb +9 -8
  54. data/lib/cfnguardian/stacks/resources.rb +35 -6
  55. data/lib/cfnguardian/version.rb +1 -1
  56. metadata +33 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b64db0c4a4a45a9432c2cf750ac07b2600b54938e080fc24e3a50201144bfaa
4
- data.tar.gz: 8e6561a49bd7be8d9185fd64f55e6d3d1842683e325c6c12b6c61ebad1058e43
3
+ metadata.gz: 9df1f4d7843a5283660b98138d46976465f2e64418c9a76b9a88cfb8ce8d2c59
4
+ data.tar.gz: f05a68bf8dc81f31f70185e79f9aedce3497d0087013d9ed1bc5738786b3b3ea
5
5
  SHA512:
6
- metadata.gz: 4585cdc22260486afe530c5abd07f490d4d71a1a63f435d64e1bdc964ebb175a852b9d45af9b6ada79ee025bde0c7df88c1352ef2fc5bb799f507553186b3377
7
- data.tar.gz: eca47bd9aab11dc4888e29aa7a4992512344db581ad06455d4283ec7009d1998a46dcb122191b37d5e300c60a6ee28bf0979a0c6a6c2cfc8281fe785b34ebafe
6
+ metadata.gz: 1bafaf7b5dcbb19b3b3365cc514683426e1f26e98ad9c516606e20ca7463ab2de9fa102c0ac2a9e4bdbdf76b107c2c7f0c02e24f26894944d5d3429f055ad551
7
+ data.tar.gz: 833a797750326a35d09cd96b1fc1d9e0d895793962da45e75dcf01c0197a1ad9283222e7c0ccadcf319446684eb5b66f4ce4b544012f5a7f01bc68efc2f20d57
@@ -0,0 +1,25 @@
1
+ name: test and build gem
2
+ on:
3
+ push:
4
+ branches: [ master ]
5
+ pull_request:
6
+ branches: [ master ]
7
+
8
+ jobs:
9
+ build:
10
+ name: test + build
11
+ runs-on: ubuntu-latest
12
+
13
+ steps:
14
+ - uses: actions/checkout@v2
15
+ - name: set up ruby 2.7
16
+ uses: actions/setup-ruby@v1
17
+ with:
18
+ ruby-version: 2.7.x
19
+ - name: rspec
20
+ run: |
21
+ gem install rspec
22
+ rspec
23
+ - name: build gem
24
+ run: |
25
+ gem build cfn-guardian.gemspec
@@ -0,0 +1,25 @@
1
+ name: release gem
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ build:
9
+ name: Build and publish gem
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Check out the repo
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up ruby 2.7
17
+ uses: actions/setup-ruby@v1
18
+ with:
19
+ ruby-version: 2.7.x
20
+
21
+ - name: Publish gem
22
+ uses: dawidd6/action-publish-gem@v1
23
+ with:
24
+ api_key: ${{secrets.RUBYGEMS_API_KEY}}
25
+ github_token: ${{secrets.GITHUB_TOKEN}}
@@ -0,0 +1,33 @@
1
+ name: release docker image
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ build:
9
+ name: Build + Publish Container Image
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Check out the repo
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Docker Buildx
17
+ uses: docker/setup-buildx-action@v1
18
+
19
+ - name: Login to GitHub Container Repository
20
+ uses: docker/login-action@v1
21
+ with:
22
+ registry: ghcr.io
23
+ username: ${{ github.repository_owner }}
24
+ password: ${{ secrets.GHCR_PUSH_TOKEN }}
25
+
26
+ - name: Build and push Container Image to GitHub Container Repository
27
+ uses: docker/build-push-action@v2
28
+ with:
29
+ context: .
30
+ file: ./Dockerfile
31
+ push: true
32
+ tags: ghcr.io/base2services/guardian:${{ github.event.release.tag_name }}
33
+ build-args: GUARDIAN_VERSION=${{ github.event.release.tag_name }}
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --require spec_helper
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cfn-guardian (0.3.1)
4
+ cfn-guardian (0.6.0)
5
5
  aws-sdk-cloudformation (~> 1.31, < 2)
6
6
  aws-sdk-cloudwatch (~> 1.28, < 2)
7
7
  aws-sdk-codecommit (~> 1.28, < 2)
@@ -15,46 +15,46 @@ PATH
15
15
  GEM
16
16
  remote: https://rubygems.org/
17
17
  specs:
18
- aws-eventstream (1.0.3)
19
- aws-partitions (1.281.0)
20
- aws-sdk-cloudformation (1.31.0)
21
- aws-sdk-core (~> 3, >= 3.71.0)
18
+ aws-eventstream (1.1.0)
19
+ aws-partitions (1.390.0)
20
+ aws-sdk-cloudformation (1.44.0)
21
+ aws-sdk-core (~> 3, >= 3.109.0)
22
22
  aws-sigv4 (~> 1.1)
23
- aws-sdk-cloudwatch (1.34.0)
24
- aws-sdk-core (~> 3, >= 3.71.0)
23
+ aws-sdk-cloudwatch (1.40.0)
24
+ aws-sdk-core (~> 3, >= 3.99.0)
25
25
  aws-sigv4 (~> 1.1)
26
- aws-sdk-codecommit (1.31.0)
27
- aws-sdk-core (~> 3, >= 3.71.0)
26
+ aws-sdk-codecommit (1.36.0)
27
+ aws-sdk-core (~> 3, >= 3.99.0)
28
28
  aws-sigv4 (~> 1.1)
29
- aws-sdk-codepipeline (1.28.0)
30
- aws-sdk-core (~> 3, >= 3.71.0)
29
+ aws-sdk-codepipeline (1.33.0)
30
+ aws-sdk-core (~> 3, >= 3.99.0)
31
31
  aws-sigv4 (~> 1.1)
32
- aws-sdk-core (3.91.0)
33
- aws-eventstream (~> 1.0, >= 1.0.2)
32
+ aws-sdk-core (3.109.2)
33
+ aws-eventstream (~> 1, >= 1.0.2)
34
34
  aws-partitions (~> 1, >= 1.239.0)
35
35
  aws-sigv4 (~> 1.1)
36
36
  jmespath (~> 1.0)
37
- aws-sdk-kms (1.30.0)
38
- aws-sdk-core (~> 3, >= 3.71.0)
37
+ aws-sdk-kms (1.39.0)
38
+ aws-sdk-core (~> 3, >= 3.109.0)
39
39
  aws-sigv4 (~> 1.1)
40
- aws-sdk-s3 (1.61.0)
41
- aws-sdk-core (~> 3, >= 3.83.0)
40
+ aws-sdk-s3 (1.84.0)
41
+ aws-sdk-core (~> 3, >= 3.109.0)
42
42
  aws-sdk-kms (~> 1)
43
43
  aws-sigv4 (~> 1.1)
44
- aws-sigv4 (1.1.1)
45
- aws-eventstream (~> 1.0, >= 1.0.2)
46
- cfndsl (1.0.5)
44
+ aws-sigv4 (1.2.2)
45
+ aws-eventstream (~> 1, >= 1.0.2)
46
+ cfndsl (1.2.0)
47
47
  hana (~> 1.3)
48
- hana (1.3.5)
48
+ hana (1.3.6)
49
49
  jmespath (1.4.0)
50
- rake (10.5.0)
50
+ rake (13.0.1)
51
51
  sync (0.5.0)
52
52
  term-ansicolor (1.7.1)
53
53
  tins (~> 1.0)
54
54
  terminal-table (1.8.0)
55
55
  unicode-display_width (~> 1.1, >= 1.1.1)
56
56
  thor (0.20.3)
57
- tins (1.24.1)
57
+ tins (1.25.0)
58
58
  sync
59
59
  unicode-display_width (1.7.0)
60
60
 
@@ -64,7 +64,7 @@ PLATFORMS
64
64
  DEPENDENCIES
65
65
  bundler (~> 2.0)
66
66
  cfn-guardian!
67
- rake (~> 10.0)
67
+ rake (~> 13.0)
68
68
 
69
69
  BUNDLED WITH
70
70
  2.0.2
data/README.md CHANGED
@@ -1,11 +1,14 @@
1
1
  # CfnGuardian
2
2
 
3
+ [Documentation](docs/overview.md)
4
+
3
5
  CfnGuardian is a AWS monitoring tool with a few capabilities:
4
6
 
5
7
  - creates cloudwatch alarms through cloudformation based upon resources defined in a YAML config
6
8
  - alerting through SNS using 4 levels of severity [ Critical, Warning, Task, Informational ]
7
9
  - has a standard set of default alarms across many AWS resources
8
10
  - creates cloudwatch log metric filters with default alarms
11
+ - creates specfic aws events with sns targets
9
12
  - creates custom metrics for external checks through lambda functions such as
10
13
  - http endpoint availability
11
14
  - http status code matching
@@ -20,7 +23,7 @@ CfnGuardian is a AWS monitoring tool with a few capabilities:
20
23
 
21
24
  **Supported AWS Resources**
22
25
 
23
- - AmazonMq
26
+ - AmazonMq(RabbitMQ and ActiveMQ)
24
27
  - ApiGateway
25
28
  - Application Targetgroups
26
29
  - Network TargetGroups
@@ -38,774 +41,3 @@ CfnGuardian is a AWS monitoring tool with a few capabilities:
38
41
  - Redshift Cluster
39
42
  - SQS Queues
40
43
  - LogGroup Metric Filters
41
-
42
- ## Installation
43
-
44
- ```ruby
45
- gem install cfn-guardian
46
- ```
47
-
48
- ## Commands
49
-
50
- **compile**
51
-
52
- Generates CloudFormation templates from the alarm configuration and output to the out/ directory.
53
-
54
- ```bash
55
- Usage:
56
- cfn-guardian compile c, --config=CONFIG
57
-
58
- Options:
59
- c, --config=CONFIG # yaml config file
60
- [--validate], [--no-validate] # validate cfn templates
61
- # Default: true
62
- [--bucket=BUCKET] # provide custom bucket name, will create a default bucket if not provided
63
- r, [--region=REGION] # set the AWS region
64
- [--debug], [--no-debug] # enable debug logging
65
- ```
66
-
67
- **deploy**
68
-
69
- Generates CloudFormation templates from the alarm configuration and output to the out/ directory. Then copies the files to the s3 bucket and deploys the Cloudformation.
70
-
71
- ```bash
72
- Usage:
73
- cfn-guardian deploy c, --config=CONFIG
74
-
75
- Options:
76
- c, --config=CONFIG # yaml config file
77
- [--bucket=BUCKET] # provide custom bucket name, will create a default bucket if not provided
78
- r, [--region=REGION] # set the AWS region
79
- s, [--stack-name=STACK_NAME] # set the Cloudformation stack name. Defaults to `guardian`
80
- [--sns-critical=SNS_CRITICAL] # sns topic arn for the critical alamrs
81
- [--sns-warning=SNS_WARNING] # sns topic arn for the warning alamrs
82
- [--sns-task=SNS_TASK] # sns topic arn for the task alamrs
83
- [--sns-informational=SNS_INFORMATIONAL] # sns topic arn for the informational alamrs
84
- [--debug], [--no-debug] # enable debug logging
85
- ```
86
-
87
- **show-alarms**
88
-
89
- Displays the configured settings for each alarm. Can be filtered by resource group and alarm name. Defaults to show all configured alarms.
90
-
91
- ```bash
92
- Usage:
93
- cfn-guardian show-alarms c, --config=CONFIG
94
-
95
- Options:
96
- c, --config=CONFIG # yaml config file
97
- g, [--group=GROUP] # resource group
98
- a, [--alarm=ALARM] # alarm name
99
- [--id=ID] # resource id
100
- [--compare], [--no-compare] # compare config to deployed alarms
101
- [--defaults], [--no-defaults] # show default alarm and properites
102
- [--debug], [--no-debug] # enable debug logging
103
- ```
104
-
105
- **show-history**
106
-
107
- Displays the alarm state or config history for the last 7 days. Alarms can be described in 2 different ways:
108
-
109
- 1. Using the config to describe the alarms and filter via the group, alarm and resource id.
110
- 2. Supplying a list of alarm names with the `--alarm-names` option.
111
-
112
- *NOTE: Options 2 may find alarms not in the guardian stack.*
113
-
114
- ```bash
115
- Usage:
116
- cfn-guardian show-history
117
-
118
- Options:
119
- c, [--config=CONFIG] # yaml config file
120
- g, [--group=GROUP] # resource group
121
- a, [--alarm=ALARM] # alarm name
122
- [--alarm-names=one two three] # CloudWatch alarm name if not providing config
123
- [--id=ID] # resource id
124
- t, [--type=TYPE] # filter by alarm state
125
- # Default: state
126
- # Possible values: state, config
127
- [--debug], [--no-debug] # enable debug logging
128
- ```
129
-
130
- **show-state**
131
-
132
- Displays the current CloudWatch alarm state. Alarms can be described in 3 different ways:
133
-
134
- 1. Using the config to describe the alarms and filter via the group, alarm and resource id.
135
- 2. Supplying a list of alarm names with the `--alarm-names` option.
136
- 3. Supplying the alarm name prefix using the `--alarm-prefix` option. For example `--alarm-prefix ECS` will find all the ECSCluster related alarms.
137
-
138
- *NOTE: Options 2 and 3 may find alarms not in the guardian stack.*
139
-
140
- ```bash
141
- Usage:
142
- cfn-guardian show-state
143
-
144
- Options:
145
- c, [--config=CONFIG] # yaml config file
146
- g, [--group=GROUP] # resource group
147
- a, [--alarm=ALARM] # alarm name
148
- [--id=ID] # resource id
149
- s, [--state=STATE] # filter by alarm state
150
- # Possible values: OK, ALARM, INSUFFICIENT_DATA
151
- [--alarm-names=one two three] # CloudWatch alarm name if not providing config
152
- [--alarm-prefix=ALARM_PREFIX] # CloudWatch alarm name prefix if not providing config
153
- [--debug], [--no-debug] # enable debug logging
154
- ```
155
-
156
- **show-drift**
157
-
158
- Displays any Cloudformation drift detection in the CloudWatch alarms from the deployed stacks.
159
-
160
- ```bash
161
- Usage:
162
- cfn-guardian show-drift
163
-
164
- Options:
165
- s, [--stack-name=STACK_NAME] # set the Cloudformation stack name
166
- # Default: guardian
167
- [--debug], [--no-debug] # enable debug logging
168
- ```
169
-
170
- ## Configuration
171
-
172
- Config is stored in a standard YAML file which will default to `alarms.yaml`. This can be overridden by supplying the `--config` switch.
173
-
174
- ### AWS Resources
175
-
176
- The resources key is where the resources are defined.
177
-
178
- ```yaml
179
- Resources:
180
- # resource group
181
- Ec2Instance:
182
- # Array of resources defining the resource id with the Id: key
183
- - Id: i-1a2b3c4d5e
184
- ```
185
-
186
- There are some resources that require more that the resource id to generate the alarm, for these cases addition key:values are required.
187
-
188
- ```yaml
189
- Resources:
190
- ApplicationTargetGroup:
191
- - Id: target-group-id
192
- # Target group requires the loadbalancer id for the alarm
193
- Loadbalancer: app/application-loadbalancer-id
194
- ```
195
-
196
- | Resource Group | Require Keys |
197
- | --------------------------- | ---------------- |
198
- | ApiGateway | Id |
199
- | AmazonMQBroker | Id |
200
- | AutoScalingGroup | Id |
201
- | DynamoDBTable | Id |
202
- | ElastiCacheReplicationGroup | Id |
203
- | ElasticFileSystem | Id |
204
- | Ec2Instance | Id |
205
- | EcsCluster | Id |
206
- | EcsService | Id, Cluster |
207
- | NetworkTargetGroup | Id, LoadBalancer |
208
- | ApplicationTargetGroup | Id, LoadBalancer |
209
- | ElasticLoadBalancer | Id |
210
- | RDSInstance | Id |
211
- | RDSClusterInstance | Id |
212
- | RedshiftCluster | Id |
213
- | Lambda | Id |
214
- | CloudFrontDistribution | Id |
215
- | SQSQueue | Id |
216
-
217
- ### Alarm Defaults
218
-
219
- To list the default alarms use the `show-alarms` command with the `--defaults` switch.
220
- The list can be filtered using the `--group ApplicationTargetGroup` and `--alarm TargetResponseTime` optional switches
221
-
222
- ```sh
223
- cfn-guardian show-alarms --defaults --group ApplicationTargetGroup --alarm TargetResponseTime
224
-
225
- +-------------------------+----------------------------------+
226
- | ApplicationTargetGroup::TargetResponseTime |
227
- | guardian-ApplicationTargetGroup-Default-TargetResponseTime |
228
- +-------------------------+----------------------------------+
229
- | Property | Config |
230
- +-------------------------+----------------------------------+
231
- | ResourceId | Default |
232
- | ResourceHash | 7a1920d61156abc05a60135aefe8bc67 |
233
- | Enabled | true |
234
- | MetricName | TargetResponseTime |
235
- | Dimensions | |
236
- | Threshold | 5 |
237
- | Period | 60 |
238
- | EvaluationPeriods | 5 |
239
- | ComparisonOperator | GreaterThanThreshold |
240
- | Statistic | Maximum |
241
- | ActionsEnabled | true |
242
- | AlarmAction | Critical |
243
- | TreatMissingData | notBreaching |
244
- +-------------------------+----------------------------------+
245
- ```
246
-
247
- ### Friendly Resource Names
248
-
249
- You can set a friendly name which will replace the resource id in the alarm name.
250
- The resource id will still be available in the alarm description.
251
-
252
- ```yaml
253
- Resources:
254
- ApplicationTargetGroup:
255
- - Id: target-group-id
256
- Loadbalancer: app/application-loadbalancer-id
257
- Name: webapp
258
- ```
259
-
260
- ### Log Group Metric Filters
261
-
262
- Metric filters creates the metric filter and a corresponding alarm.
263
- Cloudwatch NameSpace: `MetricFilters`
264
-
265
- AWS [documentation](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/FilterAndPatternSyntax.html) of pattern syntax
266
-
267
- ```yaml
268
- Resources:
269
- LogGroup:
270
- # Log group name
271
- - Id: /aws/lambda/myfuntion
272
- # List of metric filters
273
- MetricFilters:
274
- # Name of the cloud watch metric
275
- - MetricName: MyFunctionErrors
276
- # search pattern, see aws docs for syntax
277
- Pattern: error
278
- # metric to push to cloudwatch. Optional as it defaults to 1
279
- MetricValue: 1
280
-
281
- Templates:
282
- LogGroup:
283
- # use the MetricName name to override the alarm defaults
284
- MyFunctionErrors:
285
- Threshold: 10
286
- ```
287
-
288
- ### Custom Metric Resources
289
-
290
- These are also defined under the resources key but more detail is required and differs per group.
291
-
292
- #### Http
293
-
294
- Cloudwatch NameSpace: `HttpCheck`
295
-
296
- ```yaml
297
- Resources:
298
- Http:
299
- # Array of resources defining the http endpoint with the Id: key
300
- - Id: https://api.example.com
301
- # enables the status code check
302
- StatusCode: 200
303
- # enables the SSL check
304
- Ssl: true
305
- # boolean tp request a compressed response
306
- Compressed: true
307
- - Id: https://www.example.com
308
- StatusCode: 301
309
- - Id: https://example.com
310
- StatusCode: 200
311
- Ssl: true
312
- # enables the body regex check
313
- BodyRegex: 'helloworld'
314
- - Id: http://www.example.com/images/cat.jpg
315
- StatusCode: 200
316
- # md5 hash of the image
317
- BodyRegex: ae49b4246a89efcb5c639f00a013e812
318
- - Id: https://api.example.com/user
319
- StatusCode: 201
320
- # default method is get but can be overridden to support post/put/head etc
321
- Method: post
322
- # specify headers using "key=value key=value"
323
- Headers: content-type=application/json
324
- # pass in custom payload for the request
325
- Payload: '{"name": "john"}'
326
- ```
327
-
328
- #### InternalHttp
329
-
330
- Cloudwatch NameSpace: `InternalHttpCheck`
331
-
332
- ```yaml
333
- Resources:
334
- InternalHttp:
335
- # Array of host groups with the uniq identifier of Environment.
336
- # This will create a nrpe lambda per group attach to the defined vpc and subnets
337
- - Environment: Prod
338
- # VPC id for the vpc the EC2 hosts are running in
339
- VpcId: vpc-1234
340
- # Array of subnets to attach to the lambda function. Supply multiple if you want to be multi AZ.
341
- # Multiple subnets from the same AZ cannot be used!
342
- Subnets:
343
- - subnet-abcd
344
- Hosts:
345
- # Array of resources defining the http endpoint with the Id: key
346
- # All the same options as Http including ssl check on the internal endpoint
347
- - Id: http://api.example.com
348
- ```
349
-
350
- #### Port
351
-
352
- Cloudwatch NameSpace: `PortCheck`
353
-
354
- ```yaml
355
- Resources:
356
- Port:
357
- # Array of resources defining the endpoint with the Id: key and Port: Int
358
- - Id: api.example.com
359
- Port: 443
360
- # can override the default timeout of 120 seconds
361
- Timeout: 60
362
- ```
363
-
364
- #### InternalPort
365
-
366
- Cloudwatch NameSpace: `InternalPortCheck`
367
-
368
- ```yaml
369
- Resources:
370
- InternalPort:
371
- # Array of host groups with the uniq identifier of Environment.
372
- # This will create a nrpe lambda per group attach to the defined vpc and subnets
373
- - Environment: Prod
374
- # VPC id for the vpc the EC2 hosts are running in
375
- VpcId: vpc-1234
376
- # Array of subnets to attach to the lambda function. Supply multiple if you want to be multi AZ.
377
- # Multiple subnets from the same AZ cannot be used!
378
- Subnets:
379
- - subnet-abcd
380
- Hosts:
381
- # Array of resources defining the endpoint with the Id: key and Port: Int
382
- # All the same options as Port
383
- - Id: api.example.com
384
- Port: 8080
385
- ```
386
-
387
- #### DomainExpiry
388
-
389
- Cloudwatch NameSpace: `DNS`
390
-
391
- ```yaml
392
- Resources:
393
- DomainExpiry:
394
- # Array of resources defining the domain with the Id: key
395
- - Id: example.com
396
- ```
397
-
398
- #### Nrpe
399
-
400
- Cloudwatch NameSpace: `NRPE`
401
-
402
- *Note: This requires the nrpe agent running and configured on your EC2 Host*
403
-
404
- ```yaml
405
- Resources:
406
- Nrpe:
407
- # Array of host groups with the uniq identifier of Environment.
408
- # This will create a nrpe lambda per group attach to the defined vpc and subnets
409
- - Environment: Prod
410
- # VPC id for the vpc the EC2 hosts are running in
411
- VpcId: vpc-1234
412
- # Array of subnets to attach to the lambda function. Supply multiple if you want to be multi AZ.
413
- # Multiple subnets from the same AZ cannot be used!
414
- Subnets:
415
- - subnet-abcd
416
- Hosts:
417
- # Array of hosts with the Id: key defining the host private ip address
418
- - Id: 10.150.10.6
419
- # Array of nrpe commands to run against the host.
420
- # A custom metric and alarm is created for each command
421
- Commands:
422
- - check_disk
423
- - Id: 10.150.10.6
424
- Commands:
425
- - check_disk
426
- ```
427
-
428
- #### Sql
429
-
430
- Cloudwatch NameSpace: `SQL`
431
-
432
- ```yaml
433
- Resources:
434
- Sql:
435
- # Array of host groups with the uniq identifier of Environment.
436
- # This will create a sql lambda per group attach to the defined vpc and subnets
437
- - Environment: Prod
438
- # VPC id for the vpc the EC2 hosts are running in
439
- VpcId: vpc-1234
440
- # Array of subnets to attach to the lambda function. Supply multiple if you want to be multi AZ.
441
- # Multiple subnets from the same AZ cannot be used!
442
- Subnets:
443
- - subnet-1234
444
- Hosts:
445
- # Array of hosts with the Id: key defining the host private ip address
446
- - Id: my-rds-instance.example.com
447
- # Secret manager secret where the sql:// connection string key:value is defined
448
- # { "connectionString": "sql://username:password@mydb:3306/information_schema"}
449
- SecretId: MyTestDatabaseSecret
450
- # Database engine. supports mysql | postgres | mssql
451
- Engine: mysql
452
- Queries:
453
- # Array of SQL queries
454
- # MetricName used to create the custom metric and alarm
455
- - MetricName: LongRunningTransactions
456
- # SQL Query to execute
457
- Query: >-
458
- SELECT pl.host,trx_id,trx_started,trx_query
459
- FROM information_schema.INNODB_TRX it INNER
460
- JOIN information_schema.PROCESSLIST pl
461
- ON pl.Id=it.trx_mysql_thread_id
462
- WHERE it.trx_started < (NOW() - INTERVAL 4 HOUR);
463
- ```
464
-
465
- Create secretmanager secret:
466
-
467
- ```bash
468
- aws secretsmanager create-secret --name MyTestDatabaseSecret \
469
- --description "My test database secret for use with guardian sql check" \
470
- --secret-string '{"connectionString":"sql://username:password@mydb:3306/information_schema"}'
471
- ```
472
-
473
- #### SFTP
474
-
475
- CloudWatch Namespace: `SftpCheck`
476
-
477
- ```yaml
478
- Resources:
479
- SFTP:
480
- # sftp endpoint, can accept both ip address or dns endpoint
481
- - Id: example.com
482
- # sftp user to test connection with
483
- User: user
484
- # optionally set port, defaults to port 22
485
- Port: 22
486
- # for added security you can use allowed hosts when creating a
487
- # connection to the sftp by supplying the public key of the sftp server.
488
- # this removes the security risk for man in the middle attacks.
489
- ServerKey: public-server-key
490
- # ssm parameter path for the password for the SFTP user.
491
- Password: /ssm/path/password
492
- # ssm parameter path for the private key for the SFTP user
493
- PrivateKey: /ssm/path/privatekey
494
- # ssm parameter path for the password for the private key
495
- PrivateKeyPass: /ssm/path/privatekey/password
496
- # optionally set a file to check its existence and test the time it takes to get the file
497
- File: file.txt
498
- # optionally check for a regex match pattern in the body of the file
499
- FileRegexMatch: ok
500
- ```
501
-
502
- #### InternalSFTP
503
-
504
- CloudWatch Namespace: `InternalSftpCheck`
505
-
506
- ```yaml
507
- Resources:
508
- InternalSFTP:
509
- # Array of host groups with the uniq identifier of Environment.
510
- # This will create a sql lambda per group attach to the defined vpc and subnets
511
- - Environment: Prod
512
- # VPC id for the vpc the EC2 hosts are running in
513
- VpcId: vpc-1234
514
- # Array of subnets to attach to the lambda function. Supply multiple if you want to be multi AZ.
515
- # Multiple subnets from the same AZ cannot be used!
516
- Subnets:
517
- - subnet-1234
518
- Hosts:
519
- # Array of sftp hosts with the Id: key defining the host private ip address
520
- - Id: example.com
521
- User: user
522
- Port: 22
523
- ServerKey: public-server-key
524
- Password: /ssm/path/password
525
- PrivateKey: /ssm/path/privatekey
526
- PrivateKeyPass: /ssm/path/privatekey/password
527
- File: file.txt
528
- FileRegexMatch: ok
529
- ```
530
-
531
- #### TLS
532
-
533
- CloudWatch Namespace: `TLSVersionCheck`
534
-
535
- ```yaml
536
- Resources:
537
- TLS:
538
- # endpoint
539
- - Id: example.com
540
- # port to check, defaults to 443
541
- Port: 443
542
- # list of tls versions to validate against
543
- # there is a metric for each version with a 0 being no supported and 1 for supported
544
- # alarm thresholds will have to be adjusted to suit your checking requirements
545
- # defaults to all versions shown below
546
- Versions:
547
- - SSLv2
548
- - SSLv3
549
- - TLSv1
550
- - TLSv1.1
551
- - TLSv1.2
552
- # checks and reports the max tls version supported as an int
553
- # ['SSLv2 => 1', 'SSLv3 => 2', 'TLSv1 => 3','TLSv1.1 => 4', 'TLSv1.2 => 5']
554
- MaxSupported: '1'
555
- ```
556
-
557
- ## Alarm Templates
558
-
559
- Each resource group has a set of default alarm templates which defines all the cloudwatch alarm options such as Threshold, Statistic, EvaluationPeriods etc. These can be manipulated in a few ways to change the values or create new alarms.
560
-
561
- Custom alarm templates are defined within the same YAML config file un the `Templates` key.
562
-
563
- ### Overriding Defaults
564
-
565
- ```yaml
566
- Templates:
567
- # define the resource group
568
- Ec2Instance:
569
- # define the Alarm name you want to override
570
- CPUUtilizationHigh:
571
- # supply the key value of the alarm property you want to override
572
- Threshold: 80
573
- ```
574
-
575
- ### Creating A New Alarm From A Default
576
-
577
- You can create a default alarm from a default alarm using the `Inherit:` key. This will inherit all properites from the default alarm which can then be overridden.
578
-
579
- ```yaml
580
- Templates:
581
- # define the resource group
582
- Ec2Instance:
583
- # define the Alarm name you want to override
584
- CPUUtilizationWarning:
585
- # Inherit the CPUUtilizationHigh alarm
586
- Inherit: CPUUtilizationHigh
587
- # supply the key value of the alarm property you want to override
588
- Threshold: 75
589
- EvaluationPeriods: 60
590
- AlarmAction: Warning
591
- ```
592
-
593
- ### Creating A New Alarm With No Defaults
594
-
595
- You can create a new alarm with out inheriting an existing one. This will the inherit the default properties for the resource group.
596
-
597
- ```yaml
598
- Templates:
599
- # define the resource group
600
- Ec2Instance:
601
- # define the Alarm name you want to override
602
- CPUUtilizationWarning:
603
- # metric name must be provided
604
- MetricName: CPUUtilization
605
- # supply the key value of the alarm property you want to override
606
- Statistic: Minimum
607
- Threshold: 75
608
- EvaluationPeriods: 60
609
- AlarmAction: Warning
610
- ```
611
-
612
- ### Disabling An Alarm
613
-
614
- You can disable an alarm by setting the alarm to `false`
615
-
616
- ```yaml
617
- Templates:
618
- # define the resource group
619
- Ec2Instance:
620
- # define the Alarm and set the value to false
621
- CPUUtilizationHigh: false
622
- ```
623
-
624
- ### Creating A New Resource Group
625
-
626
- You can create a new resource group based upon an existing resource group. For example if you had 2 target groups and wanted to disable an alarm for one but not the other you can create a new resource group which will inherit all the ApplicationTargetGroup alarms and the disabled the select alarm.
627
-
628
- ```yaml
629
- Resources:
630
- # the default resource group
631
- ApplicationTargetGroup:
632
- - Id: ApiTG
633
- LoadBalancer: MyPublicLB
634
- - Id: WebTG
635
- LoadBalancer: MyPublicLB
636
- - Id: ServiceTG
637
- LoadBalancer: MyPublicLB
638
-
639
- # my new custom resource group
640
- RedirectTargetGroup:
641
- - Id: RedirectTG
642
- LoadBalancer: MyPublicLB
643
-
644
- Templates:
645
- # create the new resource group
646
- RedirectTargetGroup:
647
- # inherit the ApplicationTargetGroup resource group
648
- Inherit: ApplicationTargetGroup
649
- # disable the selected alarm
650
- TargetResponseTime: false
651
- ```
652
-
653
- ## SNS Topics
654
-
655
- Create the topics before launching the guardian stack
656
-
657
- ```bash
658
- aws sns create-topic --name Guardian-Critical
659
- aws sns create-topic --name Guardian-Warning
660
- aws sns create-topic --name Guardian-Task
661
- aws sns create-topic --name Guardian-Informational
662
- ```
663
-
664
- SNS topics can be defined in the YAML config or during the `deploy` command using the sns switches. The full ARN must be used.
665
-
666
- ```yaml
667
- Topics:
668
- Critical: arn:aws:sns:ap-southeast-2:111111111111:Guardian-Critical
669
- Warning: arn:aws:sns:ap-southeast-2:111111111111:Guardian-Warning
670
- Task: arn:aws:sns:ap-southeast-2:111111111111:Guardian-Task
671
- Informational: arn:aws:sns:ap-southeast-2:111111111111:Guardian-Informational
672
- ```
673
-
674
- ## M Out Of N Metric Data Points
675
-
676
- This can be good to alert on groups of spikes with in a certain time frame without getting alerts for individual spikes.
677
- It works by setting the `EvaluationPeriods` as N value and `DatapointsToAlarm` as the M value.
678
- The following example will trigger the alarm if 6 out of 10 data points crossed the threshold of 90% CPU utilisation in a 10 minute period.
679
-
680
- ```yaml
681
- Templates:
682
- Ec2Instance:
683
- CPUUtilizationHigh:
684
- Threshold: 90
685
- Period: 60
686
- EvaluationPeriods: 10
687
- DatapointsToAlarm: 6
688
- ```
689
-
690
- ## Composite Alarms
691
-
692
- Composite alarms take into account a combination of alarm states and only alarm when all conditions in the rule are met. See AWS (documentation)[https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_PutCompositeAlarm.html] for rule syntax.
693
-
694
- Using the `Composites:` top level key, create the alarm using the following syntax.
695
-
696
- **NOTE:** Each composite alarm cost $0.50/month
697
-
698
- ```yaml
699
- Composites:
700
-
701
- # the key is used as the alarm name
702
- AlarmName:
703
- # Set the notification SNS topic, defaults to no notifications
704
- Action: Informational
705
- # Set a meaningful alarm description
706
- Description: test
707
- # Set the alarm rule by providing the alarm names. See above for rule syntax.
708
- # Use the show-state command to get a list of the alarm names.
709
- Rule: >-
710
- ALARM(guardian-alarm-1)
711
- AND
712
- ALARM(guardian-alarm-2)
713
- ```
714
-
715
- ## Maintenance Mode
716
-
717
- CloudWatch alarms can be enabled and disabled to allow maintenance periods without getting alert notifications.
718
- Alarms can be provided to the function the following ways
719
-
720
- **Alarm Names**
721
-
722
- Alarm names be provided by a space delimited list using the `--alarms` switch.
723
-
724
- ```bash
725
- cfn-guardian disable-alarms --group alarm-1 alarm-2
726
- cfn-guardian enable-alarms --group alarm-1 alarm-2
727
- ```
728
-
729
- **Alarm Name Prefix**
730
-
731
- Alarm name prefix will find the alarms in the account and region that start with the provided string.
732
- This can be useful if required to disable all guardian alarms, disable all alarm for a resource group or for a specific resource.
733
- Alarm names are created using the following convention.
734
-
735
- `guardian` - `ResourceGroupName` - `ResourceId` or `FriendlyName` - `AlarmName`
736
-
737
- The following example would disable/enable all alarms for all ECS Services
738
-
739
- ```bash
740
- cfn-guardian disable-alarms --alarm-prefix guardian-ECSService
741
- cfn-guardian enable-alarms --alarm-prefix guardian-ECSService
742
- ```
743
-
744
- The following example would disable/enable all alarms for the ECS Service app
745
-
746
- ```bash
747
- cfn-guardian disable-alarms --alarm-prefix guardian-ECSService-app
748
- cfn-guardian enable-alarms --alarm-prefix guardian-ECSService-app
749
- ```
750
-
751
- **Maintenance Groups**
752
-
753
- Maintenance groups are defined in the `alarms.yaml` config and creates a logical mapping between alarms.
754
-
755
- ```yaml
756
- Resources:
757
-
758
- ApplicationTargetGroup:
759
- - Id: app-tg
760
- LoadBalancer: public-lb
761
-
762
- AutoScalingGroup:
763
- - Id: ecs-asg
764
-
765
- ECSCluster:
766
- - Id: prod
767
-
768
- ECSService:
769
- - Id: app
770
- Cluster: prod
771
-
772
- Http:
773
- - Id: https://myapp.com
774
- StatusCode: 200
775
-
776
- # Define the top level key
777
- MaintenaceGroups:
778
-
779
- # Define the group name
780
- AppUpdate:
781
- # Define the resource group
782
- ECSService:
783
- # define the alarms in the resource group
784
- UnhealthyTaskCritical:
785
- # define the resource id's
786
- - Id: app
787
- # or the friendly name
788
- - Name: app
789
- Http:
790
- EndpointAvailable:
791
- - Id: https://myapp.com
792
- EndpointStatusCodeMatch:
793
- - Id: https://myapp.com
794
- ```
795
-
796
- ```bash
797
- cfn-guardian disable-alarms --group AppUpdate
798
- cfn-guardian enable-alarms --group AppUpdate
799
- ```
800
-
801
- ## Severities
802
-
803
- Severties are defined in each alarm sing the `AlarmAction` key. There are 4 options `[ Critical, Warning, Task, Informational ]`
804
-
805
- ## Contributing
806
-
807
- Bug reports and pull requests are welcome on GitHub at https://github.com/base2services/cfn-guardian.
808
-
809
- ## License
810
-
811
- The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).