@hiiretail/gcp-infra-cli 0.75.2 → 0.76.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/generators/resources/cloud-storage/templates/cloud-storage/storage.yaml +23 -0
- package/generators/resources/cloud-storage/templates/cloud-storage/terragrunt.hcl +1 -2
- package/generators/resources/monitoring/handle-yaml.js +28 -0
- package/generators/resources/monitoring/index.js +55 -116
- package/generators/resources/monitoring/templates/alerts/alerts.yaml +8 -102
- package/generators/resources/monitoring/templates/alerts/terragrunt.hcl +4 -3
- package/generators/resources/monitoring/templates/slos/slos.yaml +25 -19
- package/generators/resources/monitoring/templates/slos/terragrunt.hcl +2 -14
- package/generators/resources/monitoring/templates/uptime-checks/terragrunt.hcl +7 -5
- package/generators/resources/monitoring/templates/uptime-checks/uptime-checks.yaml +3 -3
- package/generators/resources/monitoring/validate.js +23 -12
- package/package.json +1 -1
- package/generators/resources/cloud-storage/templates/cloud-storage/spec.hcl +0 -47
- package/generators/resources/monitoring/append.js +0 -105
- package/generators/resources/monitoring/handle-alerts.js +0 -11
- package/generators/resources/monitoring/handle-slos.js +0 -28
- package/generators/resources/monitoring/handle-uptime.js +0 -28
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
names:
|
|
2
|
+
- "<%-bucketName%>"
|
|
3
|
+
prefix: "<%-prefix%>"
|
|
4
|
+
storage_class: "STANDARD" <% if (env == 'prod') { %>
|
|
5
|
+
location: "EU" <% } else { %>
|
|
6
|
+
location: "EUROPE-WEST1"<% } %>
|
|
7
|
+
versioning:
|
|
8
|
+
"<%-bucketName%>": <%-versioning%>
|
|
9
|
+
set_viewer_roles: true <% if (lifecycleRules == 'true') { %>
|
|
10
|
+
lifecycle_rules:
|
|
11
|
+
action:
|
|
12
|
+
type: "<%-action%>" <% if (action == 'SetStorageClass') { %>
|
|
13
|
+
storage_class: "<%-storageClass%>" <% } else { %>
|
|
14
|
+
storage_class: null <% } %>
|
|
15
|
+
condition:
|
|
16
|
+
age: <%-age%> <% if (createdBefore != 'null') { %>
|
|
17
|
+
created_before: "<%-createdBefore%>" <% } else { %>
|
|
18
|
+
created_before: null <% } %> <% if (withState != 'null') { %>
|
|
19
|
+
with_state: "<%-withState%>" <% } else { %>
|
|
20
|
+
with_state: null <% } %> <% if (matchesStorageClass != 'null') { %>
|
|
21
|
+
matches_storage_class: "<%-matchesStorageClass%>" <% } else { %>
|
|
22
|
+
matches_storage_class: null <% } %>
|
|
23
|
+
num_newer_versions: <%-numNewerVersions%> <% } %>
|
|
@@ -10,14 +10,13 @@ include {
|
|
|
10
10
|
}
|
|
11
11
|
|
|
12
12
|
locals {
|
|
13
|
-
spec_vars = read_terragrunt_config("${get_terragrunt_dir()}/spec.hcl")
|
|
14
13
|
project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
|
|
15
14
|
common_vars = read_terragrunt_config(find_in_parent_folders("common.hcl"))
|
|
16
15
|
}
|
|
17
16
|
|
|
18
17
|
# These are the variables we have to pass in to use the module specified in the terragrunt configuration above
|
|
19
18
|
inputs = merge(
|
|
20
|
-
|
|
19
|
+
yamldecode(file("${get_terragrunt_dir()}/storage.yaml")),
|
|
21
20
|
local.project_vars.locals,
|
|
22
21
|
{
|
|
23
22
|
project_id = local.project_vars.locals.project_id
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
const ejs = require('ejs');
|
|
2
|
+
|
|
3
|
+
const handleAlerts = (alerts, templates, answers) => {
|
|
4
|
+
const template = templates[`${answers.alertResource}`][`${answers.alert}`];
|
|
5
|
+
const newAlert = JSON.parse(ejs.render(JSON.stringify(template), answers));
|
|
6
|
+
|
|
7
|
+
alerts.push(newAlert);
|
|
8
|
+
return alerts;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
const handleSlos = (slos, templates, answers) => {
|
|
12
|
+
const template = templates[`${answers.sli}`];
|
|
13
|
+
const newSLO = JSON.parse(ejs.render(JSON.stringify(template), answers));
|
|
14
|
+
|
|
15
|
+
if (answers.burnRateAlert === 'no') newSLO.alert = {};
|
|
16
|
+
|
|
17
|
+
slos.push(newSLO);
|
|
18
|
+
return slos;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const handleUptimeChecks = (slos, templates, answers) => {
|
|
22
|
+
const newCheck = JSON.parse(ejs.render(JSON.stringify(templates), answers));
|
|
23
|
+
|
|
24
|
+
slos.push(newCheck);
|
|
25
|
+
return slos;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
module.exports = { handleAlerts, handleSlos, handleUptimeChecks };
|
|
@@ -4,12 +4,12 @@ const fs = require('fs');
|
|
|
4
4
|
const yaml = require('js-yaml');
|
|
5
5
|
const BaseGenerator = require('../../../src/BaseGenerator');
|
|
6
6
|
const { required } = require('../../../src/validators');
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
const handleUptimeFile = require('./handle-uptime');
|
|
10
|
-
const handleAlerts = require('./handle-alerts');
|
|
7
|
+
const validate = require('./validate');
|
|
8
|
+
const { handleSlos, handleAlerts, handleUptimeChecks } = require('./handle-yaml');
|
|
11
9
|
|
|
10
|
+
const uptimeCheckTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/uptime-checks/uptime-checks.yaml`));
|
|
12
11
|
const alertTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/alerts/alerts.yaml`));
|
|
12
|
+
const sloTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/slos/slos.yaml`));
|
|
13
13
|
|
|
14
14
|
module.exports = class extends BaseGenerator {
|
|
15
15
|
async prompting() {
|
|
@@ -33,14 +33,14 @@ module.exports = class extends BaseGenerator {
|
|
|
33
33
|
choices: (answers) => Object.keys(alertTemplates[`${answers.alertResource}`]),
|
|
34
34
|
},
|
|
35
35
|
{
|
|
36
|
-
when: (response) =>
|
|
36
|
+
when: (response) => ['alerts', 'slos', 'uptime-checks'].includes(response.monitoringResource),
|
|
37
37
|
type: 'input',
|
|
38
38
|
name: 'systemName',
|
|
39
|
-
message: 'Please provide three-letter system name as defined in Styra',
|
|
40
|
-
validate: required &&
|
|
39
|
+
message: 'Please provide three-letter system name as defined in Styra (example: sre, ptf, sda, che, pnp, iam...)',
|
|
40
|
+
validate: required && validate.systemName,
|
|
41
41
|
},
|
|
42
42
|
{
|
|
43
|
-
when: (response) =>
|
|
43
|
+
when: (response) => ['slos', 'uptime-checks'].includes(response.monitoringResource) || response.alertResource === 'cloud_run',
|
|
44
44
|
type: 'input',
|
|
45
45
|
name: 'serviceName',
|
|
46
46
|
message: 'Please provide the namespace where the service resides',
|
|
@@ -51,7 +51,7 @@ module.exports = class extends BaseGenerator {
|
|
|
51
51
|
type: 'input',
|
|
52
52
|
name: 'runbookLink',
|
|
53
53
|
message: 'Please provide the full URL to your runbook in confluence (Leave empty if none)',
|
|
54
|
-
validate: required &&
|
|
54
|
+
validate: required && validate.url,
|
|
55
55
|
},
|
|
56
56
|
{
|
|
57
57
|
when: (response) => response.alertResource === 'cloud_scheduler',
|
|
@@ -65,34 +65,35 @@ module.exports = class extends BaseGenerator {
|
|
|
65
65
|
type: 'input',
|
|
66
66
|
name: 'databaseId',
|
|
67
67
|
message: 'Please provide the "database id"',
|
|
68
|
-
validate: required,
|
|
68
|
+
validate: required && validate.databaseId,
|
|
69
69
|
},
|
|
70
70
|
{
|
|
71
71
|
when: (response) => response.alertResource === 'memorystore',
|
|
72
72
|
type: 'input',
|
|
73
73
|
name: 'instanceId',
|
|
74
74
|
message: 'Please provide the "instance id"',
|
|
75
|
-
validate: required,
|
|
75
|
+
validate: required && validate.instanceID,
|
|
76
76
|
},
|
|
77
77
|
{
|
|
78
78
|
when: (response) => response.alertResource === 'pub_sub',
|
|
79
79
|
type: 'input',
|
|
80
80
|
name: 'subscriptionId',
|
|
81
81
|
message: 'Please provide the "subscription id"',
|
|
82
|
-
validate: required,
|
|
82
|
+
validate: required && validate.pubSubSubscription,
|
|
83
83
|
},
|
|
84
84
|
{
|
|
85
85
|
when: (response) => response.monitoringResource === 'uptime-checks',
|
|
86
86
|
type: 'input',
|
|
87
87
|
name: 'hostname',
|
|
88
88
|
message: 'Please provide the base hostname of the service (example: my-service.retailsvc.com)',
|
|
89
|
-
validate: required &&
|
|
89
|
+
validate: required && validate.hostName,
|
|
90
90
|
},
|
|
91
91
|
{
|
|
92
92
|
when: (response) => response.monitoringResource === 'uptime-checks',
|
|
93
93
|
type: 'input',
|
|
94
94
|
name: 'path',
|
|
95
|
-
message: 'Please provide the path to
|
|
95
|
+
message: 'Please provide the path/endpoint to run the check against',
|
|
96
|
+
default: '/health',
|
|
96
97
|
validate: required,
|
|
97
98
|
},
|
|
98
99
|
{
|
|
@@ -100,132 +101,70 @@ module.exports = class extends BaseGenerator {
|
|
|
100
101
|
type: 'list',
|
|
101
102
|
name: 'sli',
|
|
102
103
|
message: 'Please select the SLI',
|
|
103
|
-
choices:
|
|
104
|
+
choices: Object.keys(sloTemplates),
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
when: (response) => response.monitoringResource === 'slos' && response.sli === 'availability',
|
|
108
|
+
type: 'input',
|
|
109
|
+
name: 'uptimeCheckId',
|
|
110
|
+
message: 'Please provide the "Uptime Check ID" (NOTE: The "Uptime check" needs to have been created first in order to fetch the ID)',
|
|
111
|
+
validate: required,
|
|
104
112
|
},
|
|
105
113
|
{
|
|
106
114
|
when: (response) => response.monitoringResource === 'slos',
|
|
107
115
|
type: 'list',
|
|
108
|
-
name: '
|
|
109
|
-
message: 'Please select yes if you want to
|
|
116
|
+
name: 'burnRateAlert',
|
|
117
|
+
message: 'Please select yes if you want to have burn-rate alerts included',
|
|
110
118
|
default: 'yes',
|
|
111
119
|
choices: ['yes', 'no'],
|
|
112
120
|
},
|
|
113
|
-
{
|
|
114
|
-
when: (response) => response.monitoringResource === 'slos' && response.sli === 'availability',
|
|
115
|
-
type: 'confirm',
|
|
116
|
-
name: 'info',
|
|
117
|
-
message: 'WARNING: Make sure that an uptime check has been created before applying availability SLI',
|
|
118
|
-
},
|
|
119
121
|
]);
|
|
120
122
|
}
|
|
121
123
|
|
|
122
124
|
async writing() {
|
|
123
|
-
const {
|
|
124
|
-
|
|
125
|
-
serviceName,
|
|
126
|
-
hostname,
|
|
127
|
-
sli,
|
|
128
|
-
systemName,
|
|
129
|
-
burnRateAlerts,
|
|
130
|
-
} = this.answers;
|
|
125
|
+
const { monitoringResource, serviceName } = this.answers;
|
|
126
|
+
const resourceDir = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource);
|
|
131
127
|
|
|
132
|
-
const
|
|
128
|
+
const copyTemplate = (resource, resourcePath, yamlPath) => {
|
|
129
|
+
if (!fs.existsSync(resourcePath)) fs.mkdirSync(resourcePath, { recursive: true });
|
|
130
|
+
if (!fs.existsSync(yamlPath)) fs.writeFileSync(yamlPath, '');
|
|
131
|
+
if (!fs.existsSync(`${path}/terragrunt.hcl`)) {
|
|
132
|
+
this.fs.copyTpl(
|
|
133
|
+
this.templatePath(`${resource}/terragrunt.hcl`),
|
|
134
|
+
this.destinationPath(`${resourcePath}/terragrunt.hcl`),
|
|
135
|
+
this.answers,
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
};
|
|
133
139
|
|
|
134
140
|
if (monitoringResource === 'uptime-checks') {
|
|
135
|
-
|
|
136
|
-
fs.mkdirSync(resourcePath, { recursive: true });
|
|
137
|
-
}
|
|
141
|
+
const yamlPath = `${resourceDir}/uptime-checks.yaml`;
|
|
138
142
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
...this.answers,
|
|
146
|
-
serviceName,
|
|
147
|
-
hostname,
|
|
148
|
-
systemName,
|
|
149
|
-
},
|
|
150
|
-
);
|
|
151
|
-
} else {
|
|
152
|
-
await handleUptimeFile(this.answers, uptimeYamlFile);
|
|
153
|
-
}
|
|
143
|
+
copyTemplate('uptime-checks', resourceDir, yamlPath);
|
|
144
|
+
|
|
145
|
+
const oldYaml = yaml.load(fs.readFileSync(yamlPath, 'utf8')) || [];
|
|
146
|
+
const newYaml = await handleUptimeChecks(oldYaml, uptimeCheckTemplates, this.answers);
|
|
147
|
+
|
|
148
|
+
fs.writeFileSync(yamlPath, yaml.dump(newYaml));
|
|
154
149
|
}
|
|
155
150
|
|
|
156
151
|
if (monitoringResource === 'slos') {
|
|
157
|
-
const
|
|
158
|
-
const
|
|
159
|
-
const
|
|
160
|
-
const contents = fs.readFileSync(fileName, 'utf-8');
|
|
161
|
-
const result = contents.includes(str);
|
|
162
|
-
return result;
|
|
163
|
-
};
|
|
164
|
-
|
|
165
|
-
if (!fs.existsSync(serviceDir)) {
|
|
166
|
-
fs.mkdirSync(serviceDir, { recursive: true });
|
|
167
|
-
}
|
|
152
|
+
const service = serviceName.replace(/ /g, '-').toLowerCase();
|
|
153
|
+
const servicePath = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource, service);
|
|
154
|
+
const yamlPath = `${servicePath}/slos.yaml`;
|
|
168
155
|
|
|
169
|
-
|
|
170
|
-
if (fileContainsFilter(`${serviceDir}/terragrunt.hcl`, 'metric_filter') === false) {
|
|
171
|
-
this.fs.copyTpl(
|
|
172
|
-
this.templatePath('slos/terragrunt.hcl'),
|
|
173
|
-
this.destinationPath(`${serviceDir}/terragrunt.hcl`),
|
|
174
|
-
{
|
|
175
|
-
...this.answers,
|
|
176
|
-
monitoringResource,
|
|
177
|
-
serviceName,
|
|
178
|
-
systemName,
|
|
179
|
-
burnRateAlerts,
|
|
180
|
-
},
|
|
181
|
-
);
|
|
182
|
-
}
|
|
183
|
-
} else {
|
|
184
|
-
this.fs.copyTpl(
|
|
185
|
-
this.templatePath('slos/terragrunt.hcl'),
|
|
186
|
-
this.destinationPath(`${serviceDir}/terragrunt.hcl`),
|
|
187
|
-
{
|
|
188
|
-
...this.answers,
|
|
189
|
-
monitoringResource,
|
|
190
|
-
serviceName,
|
|
191
|
-
systemName,
|
|
192
|
-
burnRateAlerts,
|
|
193
|
-
},
|
|
194
|
-
);
|
|
195
|
-
}
|
|
156
|
+
copyTemplate('slos', servicePath, yamlPath);
|
|
196
157
|
|
|
197
|
-
const
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
this.destinationPath(sloYamlFile),
|
|
202
|
-
{
|
|
203
|
-
...this.answers,
|
|
204
|
-
monitoringResource,
|
|
205
|
-
serviceName,
|
|
206
|
-
systemName,
|
|
207
|
-
sli,
|
|
208
|
-
burnRateAlerts,
|
|
209
|
-
},
|
|
210
|
-
);
|
|
211
|
-
} else {
|
|
212
|
-
await handleSlosFile(this.answers, sloYamlFile);
|
|
213
|
-
}
|
|
158
|
+
const oldYaml = yaml.load(fs.readFileSync(yamlPath, 'utf8')) || [];
|
|
159
|
+
const newYaml = await handleSlos(oldYaml, sloTemplates, this.answers);
|
|
160
|
+
|
|
161
|
+
fs.writeFileSync(yamlPath, yaml.dump(newYaml));
|
|
214
162
|
}
|
|
215
163
|
|
|
216
164
|
if (monitoringResource === 'alerts') {
|
|
217
|
-
const yamlPath = `${
|
|
218
|
-
const terraPath = `${resourcePath}/terragrunt.hcl`;
|
|
219
|
-
if (!fs.existsSync(resourcePath)) fs.mkdirSync(resourcePath, { recursive: true });
|
|
220
|
-
if (!fs.existsSync(yamlPath)) fs.writeFileSync(yamlPath, '');
|
|
165
|
+
const yamlPath = `${resourceDir}/alerts.yaml`;
|
|
221
166
|
|
|
222
|
-
|
|
223
|
-
this.fs.copyTpl(
|
|
224
|
-
this.templatePath('alerts/terragrunt.hcl'),
|
|
225
|
-
this.destinationPath(terraPath),
|
|
226
|
-
this.answers,
|
|
227
|
-
);
|
|
228
|
-
}
|
|
167
|
+
copyTemplate('alerts', resourceDir, yamlPath);
|
|
229
168
|
|
|
230
169
|
const oldYaml = yaml.load(fs.readFileSync(yamlPath, 'utf8')) || [];
|
|
231
170
|
const newYaml = await handleAlerts(oldYaml, alertTemplates, this.answers);
|
|
@@ -1,37 +1,4 @@
|
|
|
1
1
|
cloud_run:
|
|
2
|
-
error_count:
|
|
3
|
-
display_name: "[P3] <%-systemName%>.<%-serviceName%> | 5xx Error Request Count above 1"
|
|
4
|
-
conditions:
|
|
5
|
-
- display_name: Cloud Run Anthos - 5xx error Request Count above 1
|
|
6
|
-
condition_threshold:
|
|
7
|
-
filter: |
|
|
8
|
-
resource.type="knative_revision"
|
|
9
|
-
resource.labels.service_name="<%-serviceName%>"
|
|
10
|
-
metric.type="knative.dev/serving/revision/request_count"
|
|
11
|
-
metric.labels.response_code_class="5xx"
|
|
12
|
-
threshold_value: 1
|
|
13
|
-
aggregations:
|
|
14
|
-
- alignment_period: 60s
|
|
15
|
-
cross_series_reducer: REDUCE_SUM
|
|
16
|
-
group_by_fields:
|
|
17
|
-
- metric.label.response_code_class
|
|
18
|
-
per_series_aligner: ALIGN_DELTA
|
|
19
|
-
documentation:
|
|
20
|
-
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
21
|
-
error_rate:
|
|
22
|
-
display_name: "[P3] <%-systemName%>.<%-serviceName%> | High 5xx Error Rate"
|
|
23
|
-
conditions:
|
|
24
|
-
- display_name: Cloud Run Anthos - 3% of all requests during 10min are 5xx
|
|
25
|
-
condition_monitoring_query_language:
|
|
26
|
-
query: |
|
|
27
|
-
fetch knative_revision::knative.dev/serving/revision/request_count
|
|
28
|
-
| filter service_name = "store-data-resolver"
|
|
29
|
-
| align int_mean_aligner(10m)
|
|
30
|
-
| group_by [], sum(if(metric.response_code_class == '5xx', val(), 0)) / sum(val())
|
|
31
|
-
| condition val() > 0.03
|
|
32
|
-
| every 10m
|
|
33
|
-
documentation:
|
|
34
|
-
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
35
2
|
request_latency:
|
|
36
3
|
display_name: "[P3] <%-systemName%>.<%-serviceName%> | High Request Latency"
|
|
37
4
|
conditions:
|
|
@@ -45,7 +12,6 @@ cloud_run:
|
|
|
45
12
|
duration: 300s
|
|
46
13
|
aggregations:
|
|
47
14
|
- alignment_period: 60s
|
|
48
|
-
cross_series_reducer: REDUCE_NONE
|
|
49
15
|
per_series_aligner: ALIGN_PERCENTILE_95
|
|
50
16
|
documentation:
|
|
51
17
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
@@ -63,13 +29,12 @@ cloud_scheduler:
|
|
|
63
29
|
threshold_value: 1
|
|
64
30
|
aggregations:
|
|
65
31
|
- alignment_period: 60s
|
|
66
|
-
cross_series_reducer: REDUCE_NONE
|
|
67
32
|
per_series_aligner: ALIGN_COUNT
|
|
68
33
|
documentation:
|
|
69
34
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
70
35
|
cloud_sql:
|
|
71
36
|
cpu_over_65:
|
|
72
|
-
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - CPU over 65%"
|
|
37
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId.substring(databaseId.lastIndexOf(':') + 1)%> - CPU over 65%"
|
|
73
38
|
conditions:
|
|
74
39
|
- display_name: Cloud SQL Database - CPU utilization above 65% over 5 min
|
|
75
40
|
condition_threshold:
|
|
@@ -81,12 +46,11 @@ cloud_sql:
|
|
|
81
46
|
duration: 300s
|
|
82
47
|
aggregations:
|
|
83
48
|
- alignment_period: 60s
|
|
84
|
-
cross_series_reducer: REDUCE_NONE
|
|
85
49
|
per_series_aligner: ALIGN_MAX
|
|
86
50
|
documentation:
|
|
87
51
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
88
52
|
cpu_over_85:
|
|
89
|
-
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - CPU over 85%"
|
|
53
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId.substring(databaseId.lastIndexOf(':') + 1)%> - CPU over 85%"
|
|
90
54
|
conditions:
|
|
91
55
|
- display_name: "Cloud SQL Database - CPU-usage above 85% over 1 min"
|
|
92
56
|
condition_threshold:
|
|
@@ -98,12 +62,11 @@ cloud_sql:
|
|
|
98
62
|
duration: 60s
|
|
99
63
|
aggregations:
|
|
100
64
|
- alignment_period: 60s
|
|
101
|
-
cross_series_reducer: REDUCE_NONE
|
|
102
65
|
per_series_aligner: ALIGN_MAX
|
|
103
66
|
documentation:
|
|
104
67
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
105
68
|
cpu_over_90:
|
|
106
|
-
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - CPU over 90%"
|
|
69
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId.substring(databaseId.lastIndexOf(':') + 1)%> - CPU over 90%"
|
|
107
70
|
conditions:
|
|
108
71
|
- display_name: Cloud SQL Database - CPU-usage above 90%
|
|
109
72
|
condition_threshold:
|
|
@@ -114,63 +77,11 @@ cloud_sql:
|
|
|
114
77
|
threshold_value: 0.9
|
|
115
78
|
aggregations:
|
|
116
79
|
- alignment_period: 60s
|
|
117
|
-
cross_series_reducer: REDUCE_NONE
|
|
118
80
|
per_series_aligner: ALIGN_MAX
|
|
119
81
|
documentation:
|
|
120
82
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
121
|
-
memory_over_50:
|
|
122
|
-
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - Memory over 50%"
|
|
123
|
-
conditions:
|
|
124
|
-
- display_name: Cloud SQL Database - Memory utilization above 50% over 5 min
|
|
125
|
-
condition_threshold:
|
|
126
|
-
filter: |
|
|
127
|
-
resource.type="cloudsql_database"
|
|
128
|
-
resource.labels.database_id="<%-databaseId%>"
|
|
129
|
-
metric.type="cloudsql.googleapis.com/database/memory/utilization"
|
|
130
|
-
threshold_value: 50
|
|
131
|
-
duration: 300s
|
|
132
|
-
aggregations:
|
|
133
|
-
- alignment_period: 60s
|
|
134
|
-
cross_series_reducer: REDUCE_NONE
|
|
135
|
-
per_series_aligner: ALIGN_MAX
|
|
136
|
-
documentation:
|
|
137
|
-
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
138
|
-
memory_over_75:
|
|
139
|
-
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - Memory over 75%"
|
|
140
|
-
conditions:
|
|
141
|
-
- display_name: Cloud SQL Database - Memory utilization above 75% over 5 min
|
|
142
|
-
condition_threshold:
|
|
143
|
-
filter: |
|
|
144
|
-
resource.type="cloudsql_database"
|
|
145
|
-
resource.labels.database_id="<%-databaseId%>"
|
|
146
|
-
metric.type="cloudsql.googleapis.com/database/memory/utilization"
|
|
147
|
-
threshold_value: 75
|
|
148
|
-
duration: 300s
|
|
149
|
-
aggregations:
|
|
150
|
-
- alignment_period: 60s
|
|
151
|
-
cross_series_reducer: REDUCE_NONE
|
|
152
|
-
per_series_aligner: ALIGN_MAX
|
|
153
|
-
documentation:
|
|
154
|
-
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
155
|
-
memory_over_90:
|
|
156
|
-
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - Memory over 90%"
|
|
157
|
-
conditions:
|
|
158
|
-
- display_name: Cloud SQL Database - Memory utilization above 90%
|
|
159
|
-
condition_threshold:
|
|
160
|
-
filter: |
|
|
161
|
-
resource.type="cloudsql_database"
|
|
162
|
-
resource.labels.database_id="<%-databaseId%>"
|
|
163
|
-
metric.type="cloudsql.googleapis.com/database/memory/utilization"
|
|
164
|
-
threshold_value: 90
|
|
165
|
-
duration: 60s
|
|
166
|
-
aggregations:
|
|
167
|
-
- alignment_period: 60s
|
|
168
|
-
cross_series_reducer: REDUCE_NONE
|
|
169
|
-
per_series_aligner: ALIGN_MAX
|
|
170
|
-
documentation:
|
|
171
|
-
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
172
83
|
query_over_1s:
|
|
173
|
-
display_name: "[P4] <%-systemName%> - CloudSQL | <%-databaseId%> - Query resolve time"
|
|
84
|
+
display_name: "[P4] <%-systemName%> - CloudSQL | <%-databaseId.substring(databaseId.lastIndexOf(':') + 1)%> - Query resolve time"
|
|
174
85
|
conditions:
|
|
175
86
|
- display_name: Cloud SQL Instance Database - Per query execution times above 1000 ms
|
|
176
87
|
condition_threshold:
|
|
@@ -181,13 +92,12 @@ cloud_sql:
|
|
|
181
92
|
threshold_value: 1000000
|
|
182
93
|
aggregations:
|
|
183
94
|
- alignment_period: 60s
|
|
184
|
-
cross_series_reducer: REDUCE_NONE
|
|
185
95
|
per_series_aligner: ALIGN_DELTA
|
|
186
96
|
documentation:
|
|
187
97
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
188
98
|
memorystore:
|
|
189
99
|
memory_over_50:
|
|
190
|
-
display_name: "[P4] <%-systemName%> - Memorystore | <%-instanceId%> - Memory over 50%"
|
|
100
|
+
display_name: "[P4] <%-systemName%> - Memorystore | <%-instanceId.substring(instanceId.lastIndexOf('/') + 1)%> - Memory over 50%"
|
|
191
101
|
conditions:
|
|
192
102
|
- display_name: Memorystore Redis Instance - Memory Usage above 50% over 5 min
|
|
193
103
|
condition_threshold:
|
|
@@ -199,12 +109,11 @@ memorystore:
|
|
|
199
109
|
duration: 300s
|
|
200
110
|
aggregations:
|
|
201
111
|
- alignment_period: 60s
|
|
202
|
-
cross_series_reducer: REDUCE_NONE
|
|
203
112
|
per_series_aligner: ALIGN_MAX
|
|
204
113
|
documentation:
|
|
205
114
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
206
115
|
memory_over_75:
|
|
207
|
-
display_name: "[P4] <%-systemName%> - Memorystore | <%-instanceId%> - Memory over 75%"
|
|
116
|
+
display_name: "[P4] <%-systemName%> - Memorystore | <%-instanceId.substring(instanceId.lastIndexOf('/') + 1)%> - Memory over 75%"
|
|
208
117
|
conditions:
|
|
209
118
|
- display_name: Memorystore Redis Instance - Memory Usage above 75% for 5min
|
|
210
119
|
condition_threshold:
|
|
@@ -216,12 +125,11 @@ memorystore:
|
|
|
216
125
|
duration: 300s
|
|
217
126
|
aggregations:
|
|
218
127
|
- alignment_period: 60s
|
|
219
|
-
cross_series_reducer: REDUCE_NONE
|
|
220
128
|
per_series_aligner: ALIGN_MAX
|
|
221
129
|
documentation:
|
|
222
130
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
223
131
|
memory_over_90:
|
|
224
|
-
display_name: "[P2] <%-systemName%> - Memorystore | <%-instanceId%> - Memory over 90%"
|
|
132
|
+
display_name: "[P2] <%-systemName%> - Memorystore | <%-instanceId.substring(instanceId.lastIndexOf('/') + 1)%> - Memory over 90%"
|
|
225
133
|
conditions:
|
|
226
134
|
- display_name: Memorystore Redis Instance - Memory Usage above 90%
|
|
227
135
|
condition_threshold:
|
|
@@ -233,13 +141,12 @@ memorystore:
|
|
|
233
141
|
duration: 60s
|
|
234
142
|
aggregations:
|
|
235
143
|
- alignment_period: 60s
|
|
236
|
-
cross_series_reducer: REDUCE_NONE
|
|
237
144
|
per_series_aligner: ALIGN_MAX
|
|
238
145
|
documentation:
|
|
239
146
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
240
147
|
pub_sub:
|
|
241
148
|
unacknowledged_messages:
|
|
242
|
-
display_name: "[P4] <%-systemName%> - Pub/Sub | <%-subscriptionId%> - Undelivered message(s)"
|
|
149
|
+
display_name: "[P4] <%-systemName%> - Pub/Sub | <%-subscriptionId.substring(subscriptionId.lastIndexOf('/') + 1)%> - Undelivered message(s)"
|
|
243
150
|
conditions:
|
|
244
151
|
- display_name: Cloud Pub/Sub Subscription - Undelivered messages above 1 for 5 min
|
|
245
152
|
condition_threshold:
|
|
@@ -251,7 +158,6 @@ pub_sub:
|
|
|
251
158
|
duration: 300s
|
|
252
159
|
aggregations:
|
|
253
160
|
- alignment_period: 60s
|
|
254
|
-
cross_series_reducer: REDUCE_NONE
|
|
255
161
|
per_series_aligner: ALIGN_MEAN
|
|
256
162
|
documentation:
|
|
257
163
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
@@ -23,11 +23,12 @@ locals {
|
|
|
23
23
|
|
|
24
24
|
# These are the variables we have to pass in to use the module specified in the terragrunt configuration above
|
|
25
25
|
inputs = {
|
|
26
|
-
monitoring_project_id = local.project_vars.locals
|
|
26
|
+
monitoring_project_id = lookup(local.project_vars.locals, "monitoring_project_id", local.project_vars.locals.tribe_project_id),
|
|
27
27
|
notification_channels = dependency.notification_channels.outputs.notification_channels,
|
|
28
28
|
policies = yamldecode(file("${get_terragrunt_dir()}/alerts.yaml")),
|
|
29
29
|
user_labels = {
|
|
30
|
-
cc
|
|
31
|
-
clan
|
|
30
|
+
cc = local.common_vars.locals.cost_center
|
|
31
|
+
clan = local.common_vars.locals.clan_name
|
|
32
|
+
jira_project_key = lookup(local.project_vars.locals, "jira_project_key", null)
|
|
32
33
|
},
|
|
33
34
|
}
|
|
@@ -1,35 +1,41 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
goal: 0.95
|
|
4
|
-
calendar_period: MONTH
|
|
5
|
-
type: request_based_sli
|
|
6
|
-
method: distribution_cut
|
|
7
|
-
metric_filter: |-
|
|
8
|
-
metric.type="knative.dev/serving/revision/request_latencies"
|
|
9
|
-
resource.type="knative_revision"
|
|
10
|
-
resource.labels.service_name="<%-serviceName%>"
|
|
11
|
-
range_min: 0
|
|
12
|
-
range_max: 100<% if (burnRateAlerts === 'no') { %>
|
|
13
|
-
alert: {}<% } %><% } %><% if (sli === 'availability') { %>- display_name: Month - Availability
|
|
1
|
+
availability:
|
|
2
|
+
display_name: Month - Availability
|
|
14
3
|
slo_id: month-availability
|
|
15
4
|
goal: 0.998
|
|
16
5
|
calendar_period: MONTH
|
|
17
6
|
type: windows_based_sli
|
|
18
7
|
method: boolean_filter
|
|
19
|
-
window_period: 60s
|
|
20
|
-
|
|
8
|
+
window_period: 60s
|
|
9
|
+
metric_filter: |
|
|
10
|
+
metric.type="monitoring.googleapis.com/uptime_check/check_passed"
|
|
11
|
+
resource.type="uptime_url"
|
|
12
|
+
metric.labels.check_id="<%-uptimeCheckId%>"
|
|
13
|
+
error-rate:
|
|
14
|
+
display_name: Month - Error rate
|
|
21
15
|
slo_id: month-error-rate
|
|
22
16
|
goal: 0.999
|
|
23
17
|
calendar_period: MONTH
|
|
24
18
|
type: request_based_sli
|
|
25
19
|
method: good_total_ratio
|
|
26
|
-
bad_service_filter:
|
|
20
|
+
bad_service_filter: |
|
|
27
21
|
metric.type="knative.dev/serving/revision/request_count"
|
|
28
22
|
resource.type="knative_revision"
|
|
29
23
|
metric.labels.response_code_class="5xx"
|
|
30
24
|
resource.labels.service_name="<%-serviceName%>"
|
|
31
|
-
total_service_filter:
|
|
25
|
+
total_service_filter: |
|
|
32
26
|
metric.type="knative.dev/serving/revision/request_count"
|
|
33
27
|
resource.type="knative_revision"
|
|
34
|
-
resource.labels.service_name="<%-serviceName%>"
|
|
35
|
-
|
|
28
|
+
resource.labels.service_name="<%-serviceName%>"
|
|
29
|
+
latency:
|
|
30
|
+
display_name: Month - Latency
|
|
31
|
+
slo_id: month-latency
|
|
32
|
+
goal: 0.95
|
|
33
|
+
calendar_period: MONTH
|
|
34
|
+
type: request_based_sli
|
|
35
|
+
method: distribution_cut
|
|
36
|
+
range_min: 0
|
|
37
|
+
range_max: 100
|
|
38
|
+
metric_filter: |
|
|
39
|
+
metric.type="knative.dev/serving/revision/request_latencies"
|
|
40
|
+
resource.type="knative_revision"
|
|
41
|
+
resource.labels.service_name="<%-serviceName%>"
|
|
@@ -9,13 +9,6 @@ include {
|
|
|
9
9
|
path = find_in_parent_folders("terragrunt_root.hcl")
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
-
dependency "uptimecheck_id" {
|
|
13
|
-
config_path = "../../uptime-checks"
|
|
14
|
-
mock_outputs = {
|
|
15
|
-
uptime_check_ids = ["dummy-id"]
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
|
|
19
12
|
locals {
|
|
20
13
|
project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
|
|
21
14
|
}
|
|
@@ -25,13 +18,8 @@ inputs = merge(
|
|
|
25
18
|
local.project_vars.locals,
|
|
26
19
|
{
|
|
27
20
|
service_name = "<%-systemName%>.<%-serviceName%>"
|
|
21
|
+
monitoring_project_id = lookup(local.project_vars.locals, "monitoring_project_id", local.project_vars.locals.tribe_project_id),
|
|
28
22
|
slos = yamldecode(file("${get_terragrunt_dir()}/slos.yaml")),
|
|
29
|
-
telemetry_resource_name = "//container.googleapis.com/projects/${local.project_vars.locals.
|
|
30
|
-
<% if (sli === 'availability') { %>
|
|
31
|
-
metric_filter = {
|
|
32
|
-
"metric.type" = "monitoring.googleapis.com/uptime_check/check_passed"
|
|
33
|
-
"resource.type" = "uptime_url"
|
|
34
|
-
"metric.labels.check_id" = dependency.uptimecheck_id.outputs.uptime_check_ids["<%-systemName%>.<%-serviceName%>"]
|
|
35
|
-
}<% } %>
|
|
23
|
+
telemetry_resource_name = "//container.googleapis.com/projects/${lookup(local.project_vars.locals, "monitoring_project_id", local.project_vars.locals.tribe_project_id)}/locations/europe-west1/clusters/k8s-cluster/k8s/namespaces/<%-serviceName%>"
|
|
36
24
|
}
|
|
37
25
|
)
|
|
@@ -26,11 +26,13 @@ inputs = merge(
|
|
|
26
26
|
local.project_vars.locals,
|
|
27
27
|
local.common_vars.locals,
|
|
28
28
|
{
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
29
|
+
monitoring_project_id = lookup(local.project_vars.locals, "monitoring_project_id", local.project_vars.locals.tribe_project_id),
|
|
30
|
+
notification_channels = dependency.notification_channels.outputs.notification_channels
|
|
31
|
+
uptime_checks = yamldecode(file("${get_terragrunt_dir()}/uptime-checks.yaml")),
|
|
32
|
+
labels = {
|
|
33
|
+
clan = local.common_vars.locals.clan_name
|
|
34
|
+
cc = local.common_vars.locals.cost_center
|
|
35
|
+
jira_project_key = lookup(local.project_vars.locals, "jira_project_key", null)
|
|
34
36
|
}
|
|
35
37
|
}
|
|
36
38
|
)
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
service_name: <%-systemName%>.<%-serviceName%>
|
|
2
|
+
hostname: <%-hostname%>
|
|
3
|
+
path: <%-path%>
|
|
@@ -1,25 +1,36 @@
|
|
|
1
|
-
const
|
|
1
|
+
const validate = {};
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
validate.hostName = (input) => {
|
|
4
4
|
const regex = new RegExp(/^(?:[a-z-]+\.){1,3}[a-z-]+$/g);
|
|
5
|
-
if (input.match(regex))
|
|
6
|
-
return true;
|
|
7
|
-
}
|
|
5
|
+
if (input.match(regex)) return true;
|
|
8
6
|
return 'Hostname must not include path to the page to run the check against or spaces';
|
|
9
7
|
};
|
|
10
8
|
|
|
11
|
-
|
|
12
|
-
if (input.replace(/\s/g, '').length === 3)
|
|
13
|
-
return true;
|
|
14
|
-
}
|
|
9
|
+
validate.systemName = (input) => {
|
|
10
|
+
if (input.replace(/\s/g, '').length === 3) return true;
|
|
15
11
|
return 'System name must be 3 characters';
|
|
16
12
|
};
|
|
17
13
|
|
|
18
|
-
|
|
14
|
+
validate.url = (input) => {
|
|
19
15
|
// eslint-disable-next-line no-useless-escape
|
|
20
16
|
const regex = new RegExp(/^https:\/\/[a-zA-Z]*.[a-zA-Z]*.[a-zA-Z]*\/[a-zA-Z\/+_-]*.$/g);
|
|
21
17
|
if (regex.test(input) || input === '') return true;
|
|
22
|
-
return '
|
|
18
|
+
return 'You must enter a valid URL';
|
|
23
19
|
};
|
|
24
20
|
|
|
25
|
-
|
|
21
|
+
validate.instanceID = (input) => {
|
|
22
|
+
if (input.split('/').length === 6) return true;
|
|
23
|
+
return 'You must enter the full instance path (example: projects/example/locations/europe-west1/instances/instanceID)';
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
validate.databaseId = (input) => {
|
|
27
|
+
if (input.split(':').length === 2) return true;
|
|
28
|
+
return 'You must enter the full database path (example: my-project:databaseID)';
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
validate.pubSubSubscription = (input) => {
|
|
32
|
+
if (input.split('/').length === 4) return true;
|
|
33
|
+
return 'You must enter the full subscription path (example: projects/example/subscriptions/subscriptionId)';
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
module.exports = validate;
|
package/package.json
CHANGED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
locals {
|
|
2
|
-
###################
|
|
3
|
-
# REQUIRED INPUTS #
|
|
4
|
-
###################
|
|
5
|
-
|
|
6
|
-
names = ["<%-bucketName%>"]
|
|
7
|
-
prefix = "<%-prefix%>"
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
###################
|
|
11
|
-
# OPTIONAL INPUTS #
|
|
12
|
-
###################
|
|
13
|
-
|
|
14
|
-
# The Storage Class of the new bucket.
|
|
15
|
-
# Supported values include: STANDARD, MULTI_REGIONAL, REGIONAL, NEARLINE, COLDLINE, ARCHIVE.
|
|
16
|
-
storage_class = "STANDARD"
|
|
17
|
-
|
|
18
|
-
# The GCS location - 'EU' for multi-regional buckets.
|
|
19
|
-
<% if (env == 'prod') { %>
|
|
20
|
-
location = "EU"
|
|
21
|
-
<% } else { %>
|
|
22
|
-
location = "EUROPE-WEST1"
|
|
23
|
-
<% } %>
|
|
24
|
-
versioning = {
|
|
25
|
-
"<%-bucketName%>" = <%-versioning%>
|
|
26
|
-
}
|
|
27
|
-
set_viewer_roles = true
|
|
28
|
-
|
|
29
|
-
<% if (lifecycleRules == 'true') { %>
|
|
30
|
-
lifecycle_rules = [{
|
|
31
|
-
action = {
|
|
32
|
-
type = "<%-action%>" <% if (action == 'SetStorageClass') { %>
|
|
33
|
-
storage_class = "<%-storageClass%>" <% } else { %>
|
|
34
|
-
storage_class = null <% } %>
|
|
35
|
-
}
|
|
36
|
-
condition = {
|
|
37
|
-
age = <%-age%> <% if (createdBefore != 'null') { %>
|
|
38
|
-
created_before = "<%-createdBefore%>" <% } else { %>
|
|
39
|
-
created_before = null <% } %> <% if (withState != 'null') { %>
|
|
40
|
-
with_state = "<%-withState%>" <% } else { %>
|
|
41
|
-
with_state = null <% } %> <% if (matchesStorageClass != 'null') { %>
|
|
42
|
-
matches_storage_class = "<%-matchesStorageClass%>" <% } else { %>
|
|
43
|
-
matches_storage_class = null <% } %>
|
|
44
|
-
num_newer_versions = <%-numNewerVersions%>
|
|
45
|
-
}
|
|
46
|
-
}] <% } %>
|
|
47
|
-
}
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
const fs = require('fs');
|
|
2
|
-
const yaml = require('js-yaml');
|
|
3
|
-
|
|
4
|
-
const appendIncludeConfigSlo = async (fileContent, originalContentYaml, slosFilePath, inputs) => {
|
|
5
|
-
if (fileContent !== null && fileContent !== '') {
|
|
6
|
-
const configArray = Object.values(originalContentYaml);
|
|
7
|
-
const yamlPullArray = yaml.dump(configArray);
|
|
8
|
-
fs.writeFileSync(slosFilePath, `${yamlPullArray}`);
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
const newPullArray = [];
|
|
12
|
-
|
|
13
|
-
const availabilityConf = {
|
|
14
|
-
display_name: 'Month - Availability',
|
|
15
|
-
slo_id: 'month-availability',
|
|
16
|
-
goal: 0.998,
|
|
17
|
-
calendar_period: 'MONTH',
|
|
18
|
-
type: 'windows_based_sli',
|
|
19
|
-
method: 'boolean_filter',
|
|
20
|
-
window_period: '60s',
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
if (inputs.sli === 'availability') {
|
|
24
|
-
if (inputs.burnRateAlerts === 'no') {
|
|
25
|
-
availabilityConf.alert = {};
|
|
26
|
-
}
|
|
27
|
-
newPullArray.push(availabilityConf);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
const errorRateConf = {
|
|
31
|
-
display_name: 'Month - Error rate',
|
|
32
|
-
slo_id: 'month-error-rate',
|
|
33
|
-
goal: 0.999,
|
|
34
|
-
calendar_period: 'MONTH',
|
|
35
|
-
type: 'request_based_sli',
|
|
36
|
-
method: 'good_total_ratio',
|
|
37
|
-
bad_service_filter:
|
|
38
|
-
`metric.type="knative.dev/serving/revision/request_count"
|
|
39
|
-
resource.type="knative_revision"
|
|
40
|
-
metric.labels.response_code_class="5xx"
|
|
41
|
-
resource.labels.service_name="${inputs.serviceName}"`,
|
|
42
|
-
total_service_filter:
|
|
43
|
-
`metric.type="knative.dev/serving/revision/request_count"
|
|
44
|
-
resource.type="knative_revision"
|
|
45
|
-
resource.labels.service_name=${inputs.serviceName}"`,
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
if (inputs.sli === 'error-rate') {
|
|
49
|
-
if (inputs.burnRateAlerts === 'no') {
|
|
50
|
-
errorRateConf.alert = {};
|
|
51
|
-
}
|
|
52
|
-
newPullArray.push(errorRateConf);
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
const latencyConf = {
|
|
56
|
-
display_name: 'Month - Latency',
|
|
57
|
-
slo_id: 'month-latency',
|
|
58
|
-
goal: 0.95,
|
|
59
|
-
calendar_period: 'MONTH',
|
|
60
|
-
type: 'request_based_sli',
|
|
61
|
-
method: 'distribution_cut',
|
|
62
|
-
metric_filter:
|
|
63
|
-
`metric.type="knative.dev/serving/revision/request_latencies"
|
|
64
|
-
resource.type="knative_revision"
|
|
65
|
-
resource.labels.service_name="${inputs.serviceName}"`,
|
|
66
|
-
range_min: 0,
|
|
67
|
-
range_max: 100,
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
if (inputs.sli === 'latency') {
|
|
71
|
-
if (inputs.burnRateAlerts === 'no') {
|
|
72
|
-
latencyConf.alert = {};
|
|
73
|
-
}
|
|
74
|
-
newPullArray.push(latencyConf);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
const finalYamlPullArray = yaml.dump(newPullArray);
|
|
78
|
-
fs.appendFileSync(slosFilePath, finalYamlPullArray);
|
|
79
|
-
};
|
|
80
|
-
|
|
81
|
-
const appendIncludeConfigUptime = async (fileContent, uptimeContentYml, uptimeFilePath, inputs) => {
|
|
82
|
-
if (fileContent !== null && fileContent !== '') {
|
|
83
|
-
const configArray = Object.values(uptimeContentYml);
|
|
84
|
-
const yamlPullArray = yaml.dump(configArray);
|
|
85
|
-
fs.writeFileSync(uptimeFilePath, `${yamlPullArray}`);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
const newPullArray = [];
|
|
89
|
-
|
|
90
|
-
newPullArray.push(
|
|
91
|
-
{
|
|
92
|
-
service_name: `${inputs.systemName}.${inputs.serviceName}`,
|
|
93
|
-
hostname: inputs.hostname,
|
|
94
|
-
path: inputs.path,
|
|
95
|
-
},
|
|
96
|
-
);
|
|
97
|
-
|
|
98
|
-
const finalYamlPullArray = yaml.dump(newPullArray);
|
|
99
|
-
fs.appendFileSync(uptimeFilePath, finalYamlPullArray);
|
|
100
|
-
};
|
|
101
|
-
|
|
102
|
-
module.exports = {
|
|
103
|
-
appendIncludeConfigSlo,
|
|
104
|
-
appendIncludeConfigUptime,
|
|
105
|
-
};
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
const ejs = require('ejs');
|
|
2
|
-
|
|
3
|
-
const handleAlerts = (alerts, templates, answers) => {
|
|
4
|
-
const template = templates[`${answers.alertResource}`][`${answers.alert}`];
|
|
5
|
-
const newAlert = ejs.render(JSON.stringify(template), answers);
|
|
6
|
-
|
|
7
|
-
alerts.push(JSON.parse(newAlert));
|
|
8
|
-
return alerts;
|
|
9
|
-
};
|
|
10
|
-
|
|
11
|
-
module.exports = handleAlerts;
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
const fs = require('fs');
|
|
2
|
-
const yaml = require('js-yaml');
|
|
3
|
-
const { appendIncludeConfigSlo } = require('./append');
|
|
4
|
-
|
|
5
|
-
const handleSlosFile = async (answers, slosFilePath) => {
|
|
6
|
-
const {
|
|
7
|
-
serviceName,
|
|
8
|
-
sli,
|
|
9
|
-
systemName,
|
|
10
|
-
burnRateAlerts,
|
|
11
|
-
} = answers;
|
|
12
|
-
|
|
13
|
-
const sloFileContent = fs.readFileSync(slosFilePath, 'utf8');
|
|
14
|
-
|
|
15
|
-
const inputs = {
|
|
16
|
-
...this.answers,
|
|
17
|
-
serviceName,
|
|
18
|
-
sli,
|
|
19
|
-
systemName,
|
|
20
|
-
burnRateAlerts,
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
const originalContentYaml = yaml.load(sloFileContent);
|
|
24
|
-
const fileContent = sloFileContent;
|
|
25
|
-
await appendIncludeConfigSlo(fileContent, originalContentYaml, slosFilePath, inputs);
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
module.exports = handleSlosFile;
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
const fs = require('fs');
|
|
2
|
-
const yaml = require('js-yaml');
|
|
3
|
-
const { appendIncludeConfigUptime } = require('./append');
|
|
4
|
-
|
|
5
|
-
const handleUptimeFile = async (answers, uptimeFilePath) => {
|
|
6
|
-
const {
|
|
7
|
-
serviceName,
|
|
8
|
-
hostname,
|
|
9
|
-
path,
|
|
10
|
-
systemName,
|
|
11
|
-
} = answers;
|
|
12
|
-
|
|
13
|
-
const uptimeFileContent = fs.readFileSync(uptimeFilePath, 'utf8');
|
|
14
|
-
|
|
15
|
-
const inputs = {
|
|
16
|
-
...this.answers,
|
|
17
|
-
serviceName,
|
|
18
|
-
hostname,
|
|
19
|
-
path,
|
|
20
|
-
systemName,
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
const originalContentYaml = yaml.load(uptimeFileContent);
|
|
24
|
-
const fileContent = uptimeFileContent;
|
|
25
|
-
await appendIncludeConfigUptime(fileContent, originalContentYaml, uptimeFilePath, inputs);
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
module.exports = handleUptimeFile;
|