@hiiretail/gcp-infra-cli 0.70.1 → 0.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/generators/docs/rca/generator.json +4 -0
- package/generators/docs/rca/index.js +44 -0
- package/generators/docs/rca/templates/docs/rca.md +75 -0
- package/generators/init/clan-infra/templates/env/project.hcl +4 -3
- package/generators/init/clan-infra/templates/infra/.terraform-version +1 -1
- package/generators/init/clan-infra/templates/infra/.terragrunt-version +1 -1
- package/generators/organization/clan-project/index.js +1 -1
- package/generators/organization/clan-project/templates/clan/clan.yaml +4 -2
- package/generators/organization/clan-project/templates/clan-project/terragrunt.hcl +1 -1
- package/generators/resources/monitoring/append.js +94 -0
- package/generators/resources/monitoring/generator.json +1 -1
- package/generators/resources/monitoring/handle-slos.js +26 -0
- package/generators/resources/monitoring/handle-uptime.js +28 -0
- package/generators/resources/monitoring/index.js +137 -25
- package/generators/resources/monitoring/templates/notification-channels/terragrunt.hcl +2 -0
- package/generators/resources/monitoring/templates/slos/slos.yaml +32 -0
- package/generators/resources/monitoring/templates/slos/terragrunt.hcl +36 -0
- package/generators/resources/monitoring/templates/uptime-checks/terragrunt.hcl +36 -0
- package/generators/resources/monitoring/templates/uptime-checks/uptime-checks.yaml +3 -0
- package/generators/resources/monitoring/validate.js +18 -0
- package/generators/tribe-resources/tribe-project/templates/project/terragrunt.hcl +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
const chalk = require('chalk');
|
|
2
|
+
const BaseGenerator = require('../../../src/BaseGenerator');
|
|
3
|
+
const { required } = require('../../../src/validators');
|
|
4
|
+
|
|
5
|
+
module.exports = class extends BaseGenerator {
|
|
6
|
+
prompting() {
|
|
7
|
+
const prompts = [
|
|
8
|
+
{
|
|
9
|
+
type: 'input',
|
|
10
|
+
name: 'description',
|
|
11
|
+
message: 'A very short description of the problem',
|
|
12
|
+
validate: required,
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
type: 'input',
|
|
16
|
+
name: 'date',
|
|
17
|
+
message: 'Date of the incident',
|
|
18
|
+
validate: required,
|
|
19
|
+
},
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
return this.prompt(prompts).then((props) => {
|
|
23
|
+
this.answers = props;
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
writing() {
|
|
28
|
+
const { date } = this.answers;
|
|
29
|
+
const filename = `rca_${date}`;
|
|
30
|
+
|
|
31
|
+
this.fs.copyTpl(
|
|
32
|
+
this.templatePath('docs', 'rca.md'),
|
|
33
|
+
this.destinationPath('docs', 'rca', `${filename}.md`),
|
|
34
|
+
this.answers,
|
|
35
|
+
);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
end() {
|
|
39
|
+
this.log(`
|
|
40
|
+
${chalk.green('Your RCA template have now been created.')}
|
|
41
|
+
Next, push this change in a feature branch and open a pull request.
|
|
42
|
+
`);
|
|
43
|
+
}
|
|
44
|
+
};
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# RCA - <%-description%> - <%-date%>
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
**Incident start**: <!-- The date and time when the incident started, for example: 2022-06-14 14.36 CET -->
|
|
6
|
+
|
|
7
|
+
**Incident end**: <!-- The date and time when the incident was resolved, for example: 2022-06-14 14.53 CET -->
|
|
8
|
+
|
|
9
|
+
**Problem statement**: <!-- Describe, in short, what the problem was -->
|
|
10
|
+
|
|
11
|
+
**Impacted customer(s)**: <!-- What customer(s) that were affected -->
|
|
12
|
+
|
|
13
|
+
**Impact to customer**: <!-- Describe how the customer, and end customers, was affected by the incident -->
|
|
14
|
+
|
|
15
|
+
**Ticket information**: <!-- Add link(s) to any Jira issues -->
|
|
16
|
+
|
|
17
|
+
**Services involved**: <!-- List the services that were involved in the incident -->
|
|
18
|
+
|
|
19
|
+
## Sequence of Events
|
|
20
|
+
|
|
21
|
+
<!--
|
|
22
|
+
Describe the events that caused the incident, starting from first getting notified about the incident until the incident was resolved.
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
|
|
26
|
+
2022-06-14 14.36 - Alert X was triggered
|
|
27
|
+
2022-06-14 14.36 - Team started working on the incident
|
|
28
|
+
2022-06-14 14.49 - A fix was pushed and deployed
|
|
29
|
+
2022-06-14 14.53 - Incident was resolved
|
|
30
|
+
-->
|
|
31
|
+
|
|
32
|
+
## Five Whys
|
|
33
|
+
|
|
34
|
+
<!--
|
|
35
|
+
Five whys is a technique used to explore the cause and effect underlying a particular problem. The primary goal of the technique is to determine the root cause of a problem by repeating the question "Why?". Each answer forms the basis of the next question.
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
|
|
39
|
+
Problem: The vehicle won't start
|
|
40
|
+
|
|
41
|
+
1. __Why?__ - The battery is dead (First why)
|
|
42
|
+
2. __Why?__ - The alternator is not functioning (Second why)
|
|
43
|
+
3. __Why?__ - The alternator belt is broken (Third why)
|
|
44
|
+
4. __Why?__ - The alternator belt was well beyond its useful service life and not replaced. (Fourth why)
|
|
45
|
+
5. __Why?__ - The vehicle was not maintained according to the recommended service schedule. (Fifth why, the root cause)
|
|
46
|
+
-->
|
|
47
|
+
|
|
48
|
+
1. __Why?__
|
|
49
|
+
2. __Why?__
|
|
50
|
+
3. __Why?__
|
|
51
|
+
4. __Why?__
|
|
52
|
+
5. __Why?__
|
|
53
|
+
|
|
54
|
+
## Summary
|
|
55
|
+
|
|
56
|
+
<!--
|
|
57
|
+
Write a short summary of what the problem was, what the root cause was and what potentially action items that were taken.
|
|
58
|
+
-->
|
|
59
|
+
|
|
60
|
+
## Action items
|
|
61
|
+
|
|
62
|
+
<!--
|
|
63
|
+
A table that describes the different actions that was the outcome of the analysis, who is the owner of the task and the status of the task.
|
|
64
|
+
The status should be updated until the action is completed.
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
| Description | Owner | Date | Status |
|
|
68
|
+
|-------------|-------|------|--------|
|
|
69
|
+
| Create alert for high CPU Usage | Bob the Builder | 2022-06-14 | Not started |
|
|
70
|
+
| | | | |
|
|
71
|
+
-->
|
|
72
|
+
|
|
73
|
+
| Description | Owner | Date | Status |
|
|
74
|
+
|-------------|-------|------|--------|
|
|
75
|
+
| | | | |
|
|
@@ -5,7 +5,8 @@ locals {
|
|
|
5
5
|
project_id = "<%-clanProject%>"
|
|
6
6
|
project_env = "<%-env%>"
|
|
7
7
|
|
|
8
|
-
project
|
|
9
|
-
network
|
|
10
|
-
tribe_project_id
|
|
8
|
+
project = local.project_id
|
|
9
|
+
network = "tribe-network"
|
|
10
|
+
tribe_project_id = "<%-tribeProject%>"
|
|
11
|
+
monitoring_project_id = "<%-tribeProject%>" # possibly will be changed to hiiretail-monitoring-prod-6500 later
|
|
11
12
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
1.
|
|
1
|
+
1.2.7
|
|
@@ -1 +1 @@
|
|
|
1
|
-
0.
|
|
1
|
+
0.38.0
|
|
@@ -123,7 +123,7 @@ module.exports = class extends BaseGenerator {
|
|
|
123
123
|
this.log(`
|
|
124
124
|
${chalk.green('Your clan projects have now been created. To finalize your configuration, please continue with manual editing of the generated files.')}
|
|
125
125
|
${chalk.green('1.')} Add clan members and groups
|
|
126
|
-
\u2192 ${chalk.cyan(path.join(clanDir, '
|
|
126
|
+
\u2192 ${chalk.cyan(path.join(clanDir, 'clan.yaml'))}
|
|
127
127
|
${chalk.green('2.')} Configure APIs, service accounts and repositories
|
|
128
128
|
\u2192 ${chalk.cyan(path.join(clanDir, 'prod', 'project.yaml'))}
|
|
129
129
|
\u2192 ${chalk.cyan(path.join(clanDir, 'staging', 'project.yaml'))}
|
|
@@ -8,8 +8,10 @@
|
|
|
8
8
|
# members:
|
|
9
9
|
# groups: []
|
|
10
10
|
# users:
|
|
11
|
-
# -
|
|
12
|
-
#
|
|
11
|
+
# - name: Alice Test
|
|
12
|
+
# email: alice@extendaretail.com
|
|
13
|
+
# - name: Bob Test
|
|
14
|
+
# email: bob@extendaretail.com
|
|
13
15
|
###
|
|
14
16
|
---
|
|
15
17
|
common-infra-repo: <%-commonInfraRepo%>
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const yaml = require('js-yaml');
|
|
3
|
+
|
|
4
|
+
const appendIncludeConfigSlo = async (fileContent, originalContentYaml, slosFilePath, inputs) => {
|
|
5
|
+
if (fileContent !== null && fileContent !== '') {
|
|
6
|
+
const configArray = Object.values(originalContentYaml);
|
|
7
|
+
const yamlPullArray = yaml.dump(configArray);
|
|
8
|
+
fs.writeFileSync(slosFilePath, `${yamlPullArray}`);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const newPullArray = [];
|
|
12
|
+
|
|
13
|
+
if (inputs.sli === 'availability') {
|
|
14
|
+
newPullArray.push(
|
|
15
|
+
{
|
|
16
|
+
display_name: 'Month - Availability',
|
|
17
|
+
slo_id: 'month-availability',
|
|
18
|
+
goal: 0.998,
|
|
19
|
+
calendar_period: 'MONTH',
|
|
20
|
+
type: 'windows_based_sli',
|
|
21
|
+
method: 'boolean_filter',
|
|
22
|
+
window_period: '60s',
|
|
23
|
+
},
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
if (inputs.sli === 'error-rate') {
|
|
27
|
+
newPullArray.push(
|
|
28
|
+
{
|
|
29
|
+
display_name: 'Month - Error rate',
|
|
30
|
+
slo_id: 'month-error-rate',
|
|
31
|
+
goal: 0.999,
|
|
32
|
+
calendar_period: 'MONTH',
|
|
33
|
+
type: 'request_based_sli',
|
|
34
|
+
method: 'good_total_ratio',
|
|
35
|
+
bad_service_filter:
|
|
36
|
+
`metric.type="knative.dev/serving/revision/request_count"
|
|
37
|
+
resource.type="knative_revision"
|
|
38
|
+
metric.labels.response_code_class="5xx"
|
|
39
|
+
resource.labels.service_name="${inputs.serviceName}"`,
|
|
40
|
+
total_service_filter:
|
|
41
|
+
`metric.type="knative.dev/serving/revision/request_count"
|
|
42
|
+
resource.type="knative_revision"
|
|
43
|
+
resource.labels.service_name=${inputs.serviceName}"`,
|
|
44
|
+
},
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
if (inputs.sli === 'latency') {
|
|
48
|
+
newPullArray.push(
|
|
49
|
+
{
|
|
50
|
+
display_name: 'Month - Latency',
|
|
51
|
+
slo_id: 'month-latency',
|
|
52
|
+
goal: 0.95,
|
|
53
|
+
calendar_period: 'MONTH',
|
|
54
|
+
type: 'request_based_sli',
|
|
55
|
+
method: 'distribution_cut',
|
|
56
|
+
metric_filter:
|
|
57
|
+
`metric.type="knative.dev/serving/revision/request_latencies"
|
|
58
|
+
resource.type="knative_revision"
|
|
59
|
+
resource.labels.service_name="${inputs.serviceName}"`,
|
|
60
|
+
range_min: 0,
|
|
61
|
+
range_max: 100,
|
|
62
|
+
},
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const finalYamlPullArray = yaml.dump(newPullArray);
|
|
67
|
+
fs.appendFileSync(slosFilePath, finalYamlPullArray);
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const appendIncludeConfigUptime = async (fileContent, uptimeContentYml, uptimeFilePath, inputs) => {
|
|
71
|
+
if (fileContent !== null && fileContent !== '') {
|
|
72
|
+
const configArray = Object.values(uptimeContentYml);
|
|
73
|
+
const yamlPullArray = yaml.dump(configArray);
|
|
74
|
+
fs.writeFileSync(uptimeFilePath, `${yamlPullArray}`);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const newPullArray = [];
|
|
78
|
+
|
|
79
|
+
newPullArray.push(
|
|
80
|
+
{
|
|
81
|
+
service_name: `${inputs.systemName}.${inputs.serviceName}`,
|
|
82
|
+
hostname: inputs.hostname,
|
|
83
|
+
path: inputs.path,
|
|
84
|
+
},
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
const finalYamlPullArray = yaml.dump(newPullArray);
|
|
88
|
+
fs.appendFileSync(uptimeFilePath, finalYamlPullArray);
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
module.exports = {
|
|
92
|
+
appendIncludeConfigSlo,
|
|
93
|
+
appendIncludeConfigUptime,
|
|
94
|
+
};
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const yaml = require('js-yaml');
|
|
3
|
+
const { appendIncludeConfigSlo } = require('./append');
|
|
4
|
+
|
|
5
|
+
const handleSlosFile = async (answers, slosFilePath) => {
|
|
6
|
+
const {
|
|
7
|
+
serviceName,
|
|
8
|
+
sli,
|
|
9
|
+
systemName,
|
|
10
|
+
} = answers;
|
|
11
|
+
|
|
12
|
+
const sloFileContent = fs.readFileSync(slosFilePath, 'utf8');
|
|
13
|
+
|
|
14
|
+
const inputs = {
|
|
15
|
+
...this.answers,
|
|
16
|
+
serviceName,
|
|
17
|
+
sli,
|
|
18
|
+
systemName,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const originalContentYaml = yaml.load(sloFileContent);
|
|
22
|
+
const fileContent = sloFileContent;
|
|
23
|
+
await appendIncludeConfigSlo(fileContent, originalContentYaml, slosFilePath, inputs);
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
module.exports = handleSlosFile;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const yaml = require('js-yaml');
|
|
3
|
+
const { appendIncludeConfigUptime } = require('./append');
|
|
4
|
+
|
|
5
|
+
const handleUptimeFile = async (answers, uptimeFilePath) => {
|
|
6
|
+
const {
|
|
7
|
+
serviceName,
|
|
8
|
+
hostname,
|
|
9
|
+
path,
|
|
10
|
+
systemName,
|
|
11
|
+
} = answers;
|
|
12
|
+
|
|
13
|
+
const uptimeFileContent = fs.readFileSync(uptimeFilePath, 'utf8');
|
|
14
|
+
|
|
15
|
+
const inputs = {
|
|
16
|
+
...this.answers,
|
|
17
|
+
serviceName,
|
|
18
|
+
hostname,
|
|
19
|
+
path,
|
|
20
|
+
systemName,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const originalContentYaml = yaml.load(uptimeFileContent);
|
|
24
|
+
const fileContent = uptimeFileContent;
|
|
25
|
+
await appendIncludeConfigUptime(fileContent, originalContentYaml, uptimeFilePath, inputs);
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
module.exports = handleUptimeFile;
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
const path = require('path');
|
|
2
2
|
const chalk = require('chalk');
|
|
3
|
+
const fs = require('fs');
|
|
3
4
|
const BaseGenerator = require('../../../src/BaseGenerator');
|
|
5
|
+
const { required } = require('../../../src/validators');
|
|
6
|
+
const helper = require('./validate');
|
|
7
|
+
const handleSlosFile = require('./handle-slos');
|
|
8
|
+
const handleUptimeFile = require('./handle-uptime');
|
|
4
9
|
|
|
5
10
|
module.exports = class extends BaseGenerator {
|
|
6
11
|
prompting() {
|
|
@@ -8,9 +13,51 @@ module.exports = class extends BaseGenerator {
|
|
|
8
13
|
{
|
|
9
14
|
type: 'list',
|
|
10
15
|
name: 'monitoringResource',
|
|
11
|
-
message: 'Select the resource
|
|
12
|
-
default: '
|
|
13
|
-
choices: ['
|
|
16
|
+
message: 'Select the resource you want to create',
|
|
17
|
+
default: 'uptime-checks',
|
|
18
|
+
choices: ['uptime-checks', 'slos'],
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
when: (response) => response.monitoringResource === 'uptime-checks' || 'slos',
|
|
22
|
+
type: 'input',
|
|
23
|
+
name: 'systemName',
|
|
24
|
+
message: 'Please provide three-letter system name as defined in Styra',
|
|
25
|
+
validate: required && helper.validSystemName,
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
when: (response) => response.monitoringResource === 'uptime-checks' || 'slos',
|
|
29
|
+
type: 'input',
|
|
30
|
+
name: 'serviceName',
|
|
31
|
+
message: 'Please provide the namespace where the service resides',
|
|
32
|
+
validate: required,
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
when: (response) => response.monitoringResource === 'uptime-checks',
|
|
36
|
+
type: 'input',
|
|
37
|
+
name: 'hostname',
|
|
38
|
+
message: 'Please provide the base hostname of the service (example: my-service.retailsvc.com)',
|
|
39
|
+
validate: required && helper.validHostname,
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
when: (response) => response.monitoringResource === 'uptime-checks',
|
|
43
|
+
type: 'input',
|
|
44
|
+
name: 'path',
|
|
45
|
+
message: 'Please provide the path to the page to run the check against. (example: /health)',
|
|
46
|
+
validate: required,
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
when: (response) => response.monitoringResource === 'slos',
|
|
50
|
+
type: 'list',
|
|
51
|
+
name: 'sli',
|
|
52
|
+
message: 'Please select the SLI',
|
|
53
|
+
default: 'availability',
|
|
54
|
+
choices: ['availability', 'error-rate', 'latency'],
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
when: (response) => response.monitoringResource === 'slos' && response.sli === 'availability',
|
|
58
|
+
type: 'confirm',
|
|
59
|
+
name: 'info',
|
|
60
|
+
message: 'WARNING: Make sure that an uptime check has been created before applying availability SLI',
|
|
14
61
|
},
|
|
15
62
|
];
|
|
16
63
|
|
|
@@ -19,37 +66,102 @@ module.exports = class extends BaseGenerator {
|
|
|
19
66
|
});
|
|
20
67
|
}
|
|
21
68
|
|
|
22
|
-
writing() {
|
|
69
|
+
async writing() {
|
|
23
70
|
const {
|
|
24
71
|
monitoringResource,
|
|
72
|
+
serviceName,
|
|
73
|
+
hostname,
|
|
74
|
+
sli,
|
|
75
|
+
systemName,
|
|
25
76
|
} = this.answers;
|
|
26
77
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
path.join('infra', env, 'monitoring', monitoringResource),
|
|
31
|
-
{
|
|
32
|
-
...this.answers,
|
|
33
|
-
env,
|
|
34
|
-
},
|
|
35
|
-
);
|
|
36
|
-
});
|
|
78
|
+
const serviceFolderName = serviceName.replace(/ /g, '-').toLowerCase();
|
|
79
|
+
const serviceDir = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource, serviceFolderName);
|
|
80
|
+
const uptimeDirPath = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource);
|
|
37
81
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
82
|
+
if (monitoringResource === 'uptime-checks') {
|
|
83
|
+
if (!fs.existsSync(uptimeDirPath)) {
|
|
84
|
+
fs.mkdirSync(uptimeDirPath, { recursive: true });
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const uptimeYamlFile = `${uptimeDirPath}/uptime-checks.yaml`;
|
|
88
|
+
if (!fs.existsSync(uptimeYamlFile)) {
|
|
89
|
+
this.copyDir(
|
|
90
|
+
'uptime-checks',
|
|
91
|
+
uptimeDirPath,
|
|
92
|
+
{
|
|
93
|
+
...this.answers,
|
|
94
|
+
serviceName,
|
|
95
|
+
hostname,
|
|
96
|
+
systemName,
|
|
97
|
+
},
|
|
98
|
+
);
|
|
99
|
+
} else {
|
|
100
|
+
await handleUptimeFile(this.answers, uptimeYamlFile);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (monitoringResource === 'slos') {
|
|
105
|
+
const fileContainsFilter = (fileName, str) => {
|
|
106
|
+
const contents = fs.readFileSync(fileName, 'utf-8');
|
|
107
|
+
const result = contents.includes(str);
|
|
108
|
+
return result;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
if (!fs.existsSync(serviceDir)) {
|
|
112
|
+
fs.mkdirSync(serviceDir, { recursive: true });
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if (fs.existsSync(`${serviceDir}/terragrunt.hcl`)) {
|
|
116
|
+
if (fileContainsFilter(`${serviceDir}/terragrunt.hcl`, 'metric_filter') === false) {
|
|
117
|
+
this.fs.copyTpl(
|
|
118
|
+
this.templatePath('slos/terragrunt.hcl'),
|
|
119
|
+
this.destinationPath(`${serviceDir}/terragrunt.hcl`),
|
|
120
|
+
{
|
|
121
|
+
...this.answers,
|
|
122
|
+
monitoringResource,
|
|
123
|
+
serviceName,
|
|
124
|
+
systemName,
|
|
125
|
+
},
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
} else {
|
|
129
|
+
this.fs.copyTpl(
|
|
130
|
+
this.templatePath('slos/terragrunt.hcl'),
|
|
131
|
+
this.destinationPath(`${serviceDir}/terragrunt.hcl`),
|
|
132
|
+
{
|
|
133
|
+
...this.answers,
|
|
134
|
+
monitoringResource,
|
|
135
|
+
serviceName,
|
|
136
|
+
systemName,
|
|
137
|
+
},
|
|
138
|
+
);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const sloYamlFile = `${serviceDir}/slos.yaml`;
|
|
142
|
+
if (!fs.existsSync(sloYamlFile)) {
|
|
143
|
+
this.fs.copyTpl(
|
|
144
|
+
this.templatePath('slos/slos.yaml'),
|
|
145
|
+
this.destinationPath(sloYamlFile),
|
|
146
|
+
{
|
|
147
|
+
...this.answers,
|
|
148
|
+
monitoringResource,
|
|
149
|
+
serviceName,
|
|
150
|
+
systemName,
|
|
151
|
+
sli,
|
|
152
|
+
},
|
|
153
|
+
);
|
|
154
|
+
} else {
|
|
155
|
+
await handleSlosFile(this.answers, sloYamlFile);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
48
158
|
}
|
|
49
159
|
|
|
50
160
|
end() {
|
|
51
161
|
this.log(`
|
|
52
162
|
${chalk.green('Your Monitoring resources have now been created.')}
|
|
53
|
-
${chalk.green('1.')}
|
|
163
|
+
${chalk.green('1.')} To finalize your configuration, please continue with manual editing of the generated files.
|
|
164
|
+
${chalk.green('2.')} Push the changes in a feature branch and open a pull request.
|
|
165
|
+
`);
|
|
54
166
|
}
|
|
55
167
|
};
|
|
@@ -18,5 +18,7 @@ locals {
|
|
|
18
18
|
inputs = merge(local.project_vars.locals, local.common_vars.locals,
|
|
19
19
|
{
|
|
20
20
|
clan_project_id = local.project_vars.locals.project_id
|
|
21
|
+
# Use var below if we decide to go with hiiretail-monitoring-prod project
|
|
22
|
+
#tribe_project_id = local.common_vars.locals.monitoring_project_id
|
|
21
23
|
}
|
|
22
24
|
)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
<% if (sli === 'latency') { %>- display_name: Month - Latency
|
|
2
|
+
slo_id: month-latency
|
|
3
|
+
goal: 0.95
|
|
4
|
+
calendar_period: MONTH
|
|
5
|
+
type: request_based_sli
|
|
6
|
+
method: distribution_cut
|
|
7
|
+
metric_filter: |-
|
|
8
|
+
metric.type="knative.dev/serving/revision/request_latencies"
|
|
9
|
+
resource.type="knative_revision"
|
|
10
|
+
resource.labels.service_name="<%-serviceName%>"
|
|
11
|
+
range_min: 0
|
|
12
|
+
range_max: 100<% } %><% if (sli === 'availability') { %>- display_name: Month - Availability
|
|
13
|
+
slo_id: month-availability
|
|
14
|
+
goal: 0.998
|
|
15
|
+
calendar_period: MONTH
|
|
16
|
+
type: windows_based_sli
|
|
17
|
+
method: boolean_filter
|
|
18
|
+
window_period: 60s<% } %><% if (sli === 'error-rate') { %>- display_name: Month - Error rate
|
|
19
|
+
slo_id: month-error-rate
|
|
20
|
+
goal: 0.999
|
|
21
|
+
calendar_period: MONTH
|
|
22
|
+
type: request_based_sli
|
|
23
|
+
method: good_total_ratio
|
|
24
|
+
bad_service_filter: |-
|
|
25
|
+
metric.type="knative.dev/serving/revision/request_count"
|
|
26
|
+
resource.type="knative_revision"
|
|
27
|
+
metric.labels.response_code_class="5xx"
|
|
28
|
+
resource.labels.service_name="<%-serviceName%>"
|
|
29
|
+
total_service_filter: |-
|
|
30
|
+
metric.type="knative.dev/serving/revision/request_count"
|
|
31
|
+
resource.type="knative_revision"
|
|
32
|
+
resource.labels.service_name="<%-serviceName%>"<% } %>
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Terragrunt will copy the Terraform configurations specified by the source parameter, along with any files in the
|
|
2
|
+
# working directory, into a temporary folder, and execute your Terraform commands in that folder.
|
|
3
|
+
terraform {
|
|
4
|
+
source = "git::https://github.com/extenda/tf-module-gcp-slo//?ref=v0.1.0"
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
# Include all settings from the root terragrunt.hcl file
|
|
8
|
+
include {
|
|
9
|
+
path = find_in_parent_folders("terragrunt_root.hcl")
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
dependency "uptimecheck_id" {
|
|
13
|
+
config_path = "../../uptime-checks"
|
|
14
|
+
mock_outputs = {
|
|
15
|
+
uptime_check_ids = ["dummy-id"]
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
locals {
|
|
20
|
+
project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
# These are the variables we have to pass in to use the module specified in the terragrunt configuration above
|
|
24
|
+
inputs = merge(
|
|
25
|
+
local.project_vars.locals,
|
|
26
|
+
{
|
|
27
|
+
service_name = "<%-systemName%>.<%-serviceName%>"
|
|
28
|
+
slos = yamldecode(file("${get_terragrunt_dir()}/slos.yaml")),
|
|
29
|
+
<% if (sli === 'availability') { %>
|
|
30
|
+
metric_filter = {
|
|
31
|
+
"metric.type" = "monitoring.googleapis.com/uptime_check/check_passed"
|
|
32
|
+
"resource.type" = "uptime_url"
|
|
33
|
+
"metric.labels.check_id" = dependency.uptimecheck_id.outputs.uptime_check_ids["<%-systemName%>.<%-serviceName%>"]
|
|
34
|
+
}<% } %>
|
|
35
|
+
}
|
|
36
|
+
)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Terragrunt will copy the Terraform configurations specified by the source parameter, along with any files in the
|
|
2
|
+
# working directory, into a temporary folder, and execute your Terraform commands in that folder.
|
|
3
|
+
terraform {
|
|
4
|
+
source = "git::https://github.com/extenda/tf-module-gcp-uptime-check//?ref=v0.1.0"
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
# Include all settings from the root terragrunt.hcl file
|
|
8
|
+
include {
|
|
9
|
+
path = find_in_parent_folders("terragrunt_root.hcl")
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
dependency "notification_channels" {
|
|
13
|
+
config_path = "../notification-channels"
|
|
14
|
+
mock_outputs = {
|
|
15
|
+
notification_channels = ["dummy-channel"]
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
locals {
|
|
20
|
+
project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
|
|
21
|
+
common_vars = read_terragrunt_config(find_in_parent_folders("common.hcl"))
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
# These are the variables we have to pass in to use the module specified in the terragrunt configuration above
|
|
25
|
+
inputs = merge(
|
|
26
|
+
local.project_vars.locals,
|
|
27
|
+
local.common_vars.locals,
|
|
28
|
+
{
|
|
29
|
+
notification_channels = dependency.notification_channels.outputs.notification_channels
|
|
30
|
+
uptime_checks = yamldecode(file("${get_terragrunt_dir()}/uptime-checks.yaml")),
|
|
31
|
+
labels = {
|
|
32
|
+
clan = local.common_vars.locals.clan_name
|
|
33
|
+
cc = local.common_vars.locals.cost_center
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
const helper = {};
|
|
2
|
+
|
|
3
|
+
helper.validHostname = (input) => {
|
|
4
|
+
const regex = new RegExp(/^(?:[a-z-]+\.){1,3}[a-z-]+$/g);
|
|
5
|
+
if (input.match(regex)) {
|
|
6
|
+
return true;
|
|
7
|
+
}
|
|
8
|
+
return 'Hostname must not include path to the page to run the check against or spaces';
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
helper.validSystemName = (input) => {
|
|
12
|
+
if (input.replace(/\s/g, '').length === 3) {
|
|
13
|
+
return true;
|
|
14
|
+
}
|
|
15
|
+
return 'System name must be 3 characters';
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
module.exports = helper;
|