@hiiretail/gcp-infra-cli 0.70.0 → 0.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ {
2
+ "name": "Root Cause Analysis documentation",
3
+ "description": "Create a Root Cause Analysis template"
4
+ }
@@ -0,0 +1,44 @@
1
+ const chalk = require('chalk');
2
+ const BaseGenerator = require('../../../src/BaseGenerator');
3
+ const { required } = require('../../../src/validators');
4
+
5
+ module.exports = class extends BaseGenerator {
6
+ prompting() {
7
+ const prompts = [
8
+ {
9
+ type: 'input',
10
+ name: 'description',
11
+ message: 'A very short description of the problem',
12
+ validate: required,
13
+ },
14
+ {
15
+ type: 'input',
16
+ name: 'date',
17
+ message: 'Date of the incident',
18
+ validate: required,
19
+ },
20
+ ];
21
+
22
+ return this.prompt(prompts).then((props) => {
23
+ this.answers = props;
24
+ });
25
+ }
26
+
27
+ writing() {
28
+ const { date } = this.answers;
29
+ const filename = `rca_${date}`;
30
+
31
+ this.fs.copyTpl(
32
+ this.templatePath('docs', 'rca.md'),
33
+ this.destinationPath('docs', 'rca', `${filename}.md`),
34
+ this.answers,
35
+ );
36
+ }
37
+
38
+ end() {
39
+ this.log(`
40
+ ${chalk.green('Your RCA template have now been created.')}
41
+ Next, push this change in a feature branch and open a pull request.
42
+ `);
43
+ }
44
+ };
@@ -0,0 +1,75 @@
1
+ # RCA - <%-description%> - <%-date%>
2
+
3
+ ## Overview
4
+
5
+ **Incident start**: <!-- The date and time when the incident started, for example: 2022-06-14 14.36 CET -->
6
+
7
+ **Incident end**: <!-- The date and time when the incident was resolved, for example: 2022-06-14 14.53 CET -->
8
+
9
+ **Problem statement**: <!-- Describe, in short, what the problem was -->
10
+
11
+ **Impacted customer(s)**: <!-- What customer(s) that were affected -->
12
+
13
+ **Impact to customer**: <!-- Describe how the customer, and end customers, was affected by the incident -->
14
+
15
+ **Ticket information**: <!-- Add link(s) to any Jira issues -->
16
+
17
+ **Services involved**: <!-- List the services that were involved in the incident -->
18
+
19
+ ## Sequence of Events
20
+
21
+ <!--
22
+ Describe the events that caused the incident, starting from first getting notified about the incident until the incident was resolved.
23
+
24
+ Example:
25
+
26
+ 2022-06-14 14.36 - Alert X was triggered
27
+ 2022-06-14 14.36 - Team started working on the incident
28
+ 2022-06-14 14.49 - A fix was pushed and deployed
29
+ 2022-06-14 14.53 - Incident was resolved
30
+ -->
31
+
32
+ ## Five Whys
33
+
34
+ <!--
35
+ Five whys is a technique used to explore the cause and effect underlying a particular problem. The primary goal of the technique is to determine the root cause of a problem by repeating the question "Why?". Each answer forms the basis of the next question.
36
+
37
+ Example:
38
+
39
+ Problem: The vehicle won't start
40
+
41
+ 1. __Why?__ - The battery is dead (First why)
42
+ 2. __Why?__ - The alternator is not functioning (Second why)
43
+ 3. __Why?__ - The alternator belt is broken (Third why)
44
+ 4. __Why?__ - The alternator belt was well beyond its useful service life and not replaced. (Fourth why)
45
+ 5. __Why?__ - The vehicle was not maintained according to the recommended service schedule. (Fifth why, the root cause)
46
+ -->
47
+
48
+ 1. __Why?__
49
+ 2. __Why?__
50
+ 3. __Why?__
51
+ 4. __Why?__
52
+ 5. __Why?__
53
+
54
+ ## Summary
55
+
56
+ <!--
57
+ Write a short summary of what the problem was, what the root cause was and what potentially action items that were taken.
58
+ -->
59
+
60
+ ## Action items
61
+
62
+ <!--
63
+ A table that describes the different actions that was the outcome of the analysis, who is the owner of the task and the status of the task.
64
+ The status should be updated until the action is completed.
65
+
66
+ Example:
67
+ | Description | Owner | Date | Status |
68
+ |-------------|-------|------|--------|
69
+ | Create alert for high CPU Usage | Bob the Builder | 2022-06-14 | Not started |
70
+ | | | | |
71
+ -->
72
+
73
+ | Description | Owner | Date | Status |
74
+ |-------------|-------|------|--------|
75
+ | | | | |
@@ -2,49 +2,50 @@
2
2
 
3
3
  ## General
4
4
 
5
- Describe in short what the purpose of the solution is.
5
+ <!-- Describe in short what the purpose of the solution is. -->
6
6
 
7
7
  ## Architecture
8
8
 
9
- Include C4 diagrams or links to the Software Guidebook.
9
+ <!-- Include C4 diagrams or links to the Software Guidebook. -->
10
10
 
11
11
  ## Business Continuity and Disaster Recovery Plan
12
12
 
13
- Link to the Business Continuity and Disaster Recovery Plan documentation.
13
+ <!-- Link to the Business Continuity and Disaster Recovery Plan documentation. -->
14
14
 
15
15
  ## Services
16
16
 
17
- A short description of what the purpose of each service is. Links to the log files of all the services that are included in the solution.
17
+ <!-- A short description of what the purpose of each service is. Links to the log files of all the services that are included in the solution. -->
18
18
 
19
19
  ## Dashboard
20
20
 
21
- Links to one or multiple dashboards.
21
+ <!-- Links to one or multiple dashboards. -->
22
22
 
23
23
  ## Service Level Objectives
24
24
 
25
- What are the SLOs?
25
+ <!-- What are the SLOs? -->
26
26
 
27
27
  ## Alerts
28
28
 
29
- What are the alerts that has been setup, where is alert sent to and what are the steps to mitigate the issue?
29
+ <!-- What are the alerts that has been setup, where is alert sent to and what are the steps to mitigate the issue? -->
30
30
 
31
31
  ## Health Checks
32
32
 
33
- Links to the configured uptime checks that has been setup in GCP.
33
+ <!-- Links to the configured uptime checks that has been setup in GCP. -->
34
34
 
35
35
  ## How do I..?
36
36
 
37
- Good to know things. Such as `How do I check the price for a specific item?`
37
+ <!-- Good to know things. Such as `How do I check the price for a specific item?` -->
38
38
 
39
39
  ## Known Issues
40
40
 
41
- Are there any known issues? If yes, what is the workaround to solve them?
41
+ <!-- Are there any known issues? If yes, what is the workaround to solve them? -->
42
42
 
43
43
  ## Contact & Escalation Matrix
44
44
 
45
- If the team is unable to resolve the issue, who is the first in line to contact?
45
+ <!-- If the team is unable to resolve the issue, who is the first in line to contact?
46
46
 
47
47
  | # | Name | Role | E-Mail | Phone number |
48
48
  | --- | --- | --- | --- | --- |
49
49
  | 1 | Mr X | My Role | mr.x@x.com | 1234567 |
50
50
  | 2 | Mr Y | My Role | mr.y@y.com | 7654321 |
51
+ -->
@@ -1,9 +1,43 @@
1
1
  # Observability and SRE
2
2
 
3
- ## SLIs
3
+ <!--
4
+ Explain what metrics you will monitor and how you will monitor them. Read through the SRE Chapter documentation
5
+ and adopt the guidelines and naming convetions that exist, https://github.com/extenda/chap-sre-docs/blob/master/README.md
4
6
 
5
- ## SLOs
7
+ List
6
8
 
7
- ## Observability
9
+ * that the clan will use GCP Operations to monitor their service and GCP-resources
10
+ * that the clan will keep this documentation up-to-date through the lifecycle of the service
11
+ * that the clan will create and maintain a Runbook
12
+ -->
8
13
 
9
- ## Operations monitoring and alert
14
+ ## Service A
15
+
16
+ <!--
17
+ List the SLIs and SLOs per service, more information about SLIs and SLOs can be found here, https://github.com/extenda/chap-sre-docs/blob/master/docs/service-levels.md
18
+ All services must list and explain how they will monitor and have SLOs set for:
19
+
20
+ * Availability
21
+ * Latency
22
+ * Throughput
23
+ * Error rate
24
+
25
+ If the team need to monitor any additional SLOs, it must be mentioned how they plan to monitor that.
26
+
27
+ If not using fully-managed services by Google, SLIs and SLOs needs to be set for those as well. When using fully-managed services, only SLIs needs to be monitored
28
+ Example:
29
+
30
+ * A clan is using Cloud SQL
31
+ * Then they need to set SLIs and SLOs based on when they need to increase CPU and Memory for example
32
+
33
+ -->
34
+
35
+ ### SLIs
36
+
37
+ ### SLOs
38
+
39
+ ## Service B
40
+
41
+ ### SLIs
42
+
43
+ ### SLOs
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hiiretail/gcp-infra-cli",
3
- "version": "0.70.0",
3
+ "version": "0.71.0",
4
4
  "description": "Infrastructure as code generator for GCP.",
5
5
  "main": "src/cli.js",
6
6
  "bin": {