@hiiretail/gcp-infra-cli 0.72.0 → 0.75.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/generators/resources/monitoring/append.js +49 -38
- package/generators/resources/monitoring/handle-alerts.js +11 -0
- package/generators/resources/monitoring/handle-slos.js +2 -0
- package/generators/resources/monitoring/index.js +96 -19
- package/generators/resources/monitoring/templates/alerts/alerts.yaml +257 -0
- package/generators/resources/monitoring/templates/alerts/terragrunt.hcl +33 -0
- package/generators/resources/monitoring/templates/slos/slos.yaml +6 -3
- package/generators/resources/monitoring/templates/slos/terragrunt.hcl +3 -2
- package/generators/resources/monitoring/templates/uptime-checks/uptime-checks.yaml +3 -3
- package/generators/resources/monitoring/validate.js +7 -0
- package/generators/resources/pubsub/append.js +26 -4
- package/generators/resources/pubsub/handle-subscribers.js +24 -5
- package/generators/resources/pubsub/index.js +25 -2
- package/generators/resources/pubsub/templates/pubsub/subscribers.yaml +4 -2
- package/generators/resources/pubsub/templates/pubsub-dlq/terragrunt.hcl +41 -0
- package/generators/resources/pubsub/templates/pubsub-external/subscribers.yaml +4 -2
- package/package.json +1 -1
|
@@ -10,57 +10,68 @@ const appendIncludeConfigSlo = async (fileContent, originalContentYaml, slosFile
|
|
|
10
10
|
|
|
11
11
|
const newPullArray = [];
|
|
12
12
|
|
|
13
|
+
const availabilityConf = {
|
|
14
|
+
display_name: 'Month - Availability',
|
|
15
|
+
slo_id: 'month-availability',
|
|
16
|
+
goal: 0.998,
|
|
17
|
+
calendar_period: 'MONTH',
|
|
18
|
+
type: 'windows_based_sli',
|
|
19
|
+
method: 'boolean_filter',
|
|
20
|
+
window_period: '60s',
|
|
21
|
+
};
|
|
22
|
+
|
|
13
23
|
if (inputs.sli === 'availability') {
|
|
14
|
-
|
|
15
|
-
{
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
goal: 0.998,
|
|
19
|
-
calendar_period: 'MONTH',
|
|
20
|
-
type: 'windows_based_sli',
|
|
21
|
-
method: 'boolean_filter',
|
|
22
|
-
window_period: '60s',
|
|
23
|
-
},
|
|
24
|
-
);
|
|
24
|
+
if (inputs.burnRateAlerts === 'no') {
|
|
25
|
+
availabilityConf.alert = {};
|
|
26
|
+
}
|
|
27
|
+
newPullArray.push(availabilityConf);
|
|
25
28
|
}
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
bad_service_filter:
|
|
29
|
+
|
|
30
|
+
const errorRateConf = {
|
|
31
|
+
display_name: 'Month - Error rate',
|
|
32
|
+
slo_id: 'month-error-rate',
|
|
33
|
+
goal: 0.999,
|
|
34
|
+
calendar_period: 'MONTH',
|
|
35
|
+
type: 'request_based_sli',
|
|
36
|
+
method: 'good_total_ratio',
|
|
37
|
+
bad_service_filter:
|
|
36
38
|
`metric.type="knative.dev/serving/revision/request_count"
|
|
37
39
|
resource.type="knative_revision"
|
|
38
40
|
metric.labels.response_code_class="5xx"
|
|
39
41
|
resource.labels.service_name="${inputs.serviceName}"`,
|
|
40
|
-
|
|
42
|
+
total_service_filter:
|
|
41
43
|
`metric.type="knative.dev/serving/revision/request_count"
|
|
42
44
|
resource.type="knative_revision"
|
|
43
45
|
resource.labels.service_name=${inputs.serviceName}"`,
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
if (inputs.sli === 'error-rate') {
|
|
49
|
+
if (inputs.burnRateAlerts === 'no') {
|
|
50
|
+
errorRateConf.alert = {};
|
|
51
|
+
}
|
|
52
|
+
newPullArray.push(errorRateConf);
|
|
46
53
|
}
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
metric_filter:
|
|
54
|
+
|
|
55
|
+
const latencyConf = {
|
|
56
|
+
display_name: 'Month - Latency',
|
|
57
|
+
slo_id: 'month-latency',
|
|
58
|
+
goal: 0.95,
|
|
59
|
+
calendar_period: 'MONTH',
|
|
60
|
+
type: 'request_based_sli',
|
|
61
|
+
method: 'distribution_cut',
|
|
62
|
+
metric_filter:
|
|
57
63
|
`metric.type="knative.dev/serving/revision/request_latencies"
|
|
58
64
|
resource.type="knative_revision"
|
|
59
65
|
resource.labels.service_name="${inputs.serviceName}"`,
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
66
|
+
range_min: 0,
|
|
67
|
+
range_max: 100,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
if (inputs.sli === 'latency') {
|
|
71
|
+
if (inputs.burnRateAlerts === 'no') {
|
|
72
|
+
latencyConf.alert = {};
|
|
73
|
+
}
|
|
74
|
+
newPullArray.push(latencyConf);
|
|
64
75
|
}
|
|
65
76
|
|
|
66
77
|
const finalYamlPullArray = yaml.dump(newPullArray);
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
const ejs = require('ejs');
|
|
2
|
+
|
|
3
|
+
const handleAlerts = (alerts, templates, answers) => {
|
|
4
|
+
const template = templates[`${answers.alertResource}`][`${answers.alert}`];
|
|
5
|
+
const newAlert = ejs.render(JSON.stringify(template), answers);
|
|
6
|
+
|
|
7
|
+
alerts.push(JSON.parse(newAlert));
|
|
8
|
+
return alerts;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
module.exports = handleAlerts;
|
|
@@ -7,6 +7,7 @@ const handleSlosFile = async (answers, slosFilePath) => {
|
|
|
7
7
|
serviceName,
|
|
8
8
|
sli,
|
|
9
9
|
systemName,
|
|
10
|
+
burnRateAlerts,
|
|
10
11
|
} = answers;
|
|
11
12
|
|
|
12
13
|
const sloFileContent = fs.readFileSync(slosFilePath, 'utf8');
|
|
@@ -16,6 +17,7 @@ const handleSlosFile = async (answers, slosFilePath) => {
|
|
|
16
17
|
serviceName,
|
|
17
18
|
sli,
|
|
18
19
|
systemName,
|
|
20
|
+
burnRateAlerts,
|
|
19
21
|
};
|
|
20
22
|
|
|
21
23
|
const originalContentYaml = yaml.load(sloFileContent);
|
|
@@ -1,36 +1,86 @@
|
|
|
1
1
|
const path = require('path');
|
|
2
2
|
const chalk = require('chalk');
|
|
3
3
|
const fs = require('fs');
|
|
4
|
+
const yaml = require('js-yaml');
|
|
4
5
|
const BaseGenerator = require('../../../src/BaseGenerator');
|
|
5
6
|
const { required } = require('../../../src/validators');
|
|
6
7
|
const helper = require('./validate');
|
|
7
8
|
const handleSlosFile = require('./handle-slos');
|
|
8
9
|
const handleUptimeFile = require('./handle-uptime');
|
|
10
|
+
const handleAlerts = require('./handle-alerts');
|
|
11
|
+
|
|
12
|
+
const alertTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/alerts/alerts.yaml`));
|
|
9
13
|
|
|
10
14
|
module.exports = class extends BaseGenerator {
|
|
11
|
-
prompting() {
|
|
12
|
-
|
|
15
|
+
async prompting() {
|
|
16
|
+
this.answers = await this.prompt([
|
|
13
17
|
{
|
|
14
18
|
type: 'list',
|
|
15
19
|
name: 'monitoringResource',
|
|
16
20
|
message: 'Select the resource you want to create',
|
|
17
|
-
|
|
18
|
-
|
|
21
|
+
choices: ['alerts', 'uptime-checks', 'slos'],
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
when: (response) => response.monitoringResource === 'alerts',
|
|
25
|
+
type: 'list',
|
|
26
|
+
name: 'alertResource',
|
|
27
|
+
choices: Object.keys(alertTemplates),
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
when: (response) => response.monitoringResource === 'alerts',
|
|
31
|
+
type: 'list',
|
|
32
|
+
name: 'alert',
|
|
33
|
+
choices: (answers) => Object.keys(alertTemplates[`${answers.alertResource}`]),
|
|
19
34
|
},
|
|
20
35
|
{
|
|
21
|
-
when: (response) => response.monitoringResource === '
|
|
36
|
+
when: (response) => response.monitoringResource === ('alerts' || 'slos' || 'uptime-checks'),
|
|
22
37
|
type: 'input',
|
|
23
38
|
name: 'systemName',
|
|
24
39
|
message: 'Please provide three-letter system name as defined in Styra',
|
|
25
40
|
validate: required && helper.validSystemName,
|
|
26
41
|
},
|
|
27
42
|
{
|
|
28
|
-
when: (response) => response.monitoringResource === 'uptime-checks' || '
|
|
43
|
+
when: (response) => response.monitoringResource === ('slos' || 'uptime-checks') || response.alertResource === 'cloud_run',
|
|
29
44
|
type: 'input',
|
|
30
45
|
name: 'serviceName',
|
|
31
46
|
message: 'Please provide the namespace where the service resides',
|
|
32
47
|
validate: required,
|
|
33
48
|
},
|
|
49
|
+
{
|
|
50
|
+
when: (response) => response.monitoringResource === 'alerts',
|
|
51
|
+
type: 'input',
|
|
52
|
+
name: 'runbookLink',
|
|
53
|
+
message: 'Please provide the full URL to your runbook in confluence (Leave empty if none)',
|
|
54
|
+
validate: required && helper.validUrl,
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
when: (response) => response.alertResource === 'cloud_scheduler',
|
|
58
|
+
type: 'input',
|
|
59
|
+
name: 'jobId',
|
|
60
|
+
message: 'Please provide the "job id"',
|
|
61
|
+
validate: required,
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
when: (response) => response.alertResource === 'cloud_sql',
|
|
65
|
+
type: 'input',
|
|
66
|
+
name: 'databaseId',
|
|
67
|
+
message: 'Please provide the "database id"',
|
|
68
|
+
validate: required,
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
when: (response) => response.alertResource === 'memorystore',
|
|
72
|
+
type: 'input',
|
|
73
|
+
name: 'instanceId',
|
|
74
|
+
message: 'Please provide the "instance id"',
|
|
75
|
+
validate: required,
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
when: (response) => response.alertResource === 'pub_sub',
|
|
79
|
+
type: 'input',
|
|
80
|
+
name: 'subscriptionId',
|
|
81
|
+
message: 'Please provide the "subscription id"',
|
|
82
|
+
validate: required,
|
|
83
|
+
},
|
|
34
84
|
{
|
|
35
85
|
when: (response) => response.monitoringResource === 'uptime-checks',
|
|
36
86
|
type: 'input',
|
|
@@ -50,20 +100,23 @@ module.exports = class extends BaseGenerator {
|
|
|
50
100
|
type: 'list',
|
|
51
101
|
name: 'sli',
|
|
52
102
|
message: 'Please select the SLI',
|
|
53
|
-
default: 'availability',
|
|
54
103
|
choices: ['availability', 'error-rate', 'latency'],
|
|
55
104
|
},
|
|
105
|
+
{
|
|
106
|
+
when: (response) => response.monitoringResource === 'slos',
|
|
107
|
+
type: 'list',
|
|
108
|
+
name: 'burnRateAlerts',
|
|
109
|
+
message: 'Please select yes if you want to create burn rate alert for the SLI',
|
|
110
|
+
default: 'yes',
|
|
111
|
+
choices: ['yes', 'no'],
|
|
112
|
+
},
|
|
56
113
|
{
|
|
57
114
|
when: (response) => response.monitoringResource === 'slos' && response.sli === 'availability',
|
|
58
115
|
type: 'confirm',
|
|
59
116
|
name: 'info',
|
|
60
117
|
message: 'WARNING: Make sure that an uptime check has been created before applying availability SLI',
|
|
61
118
|
},
|
|
62
|
-
];
|
|
63
|
-
|
|
64
|
-
return this.prompt(prompts).then((props) => {
|
|
65
|
-
this.answers = props;
|
|
66
|
-
});
|
|
119
|
+
]);
|
|
67
120
|
}
|
|
68
121
|
|
|
69
122
|
async writing() {
|
|
@@ -73,22 +126,21 @@ module.exports = class extends BaseGenerator {
|
|
|
73
126
|
hostname,
|
|
74
127
|
sli,
|
|
75
128
|
systemName,
|
|
129
|
+
burnRateAlerts,
|
|
76
130
|
} = this.answers;
|
|
77
131
|
|
|
78
|
-
const
|
|
79
|
-
const serviceDir = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource, serviceFolderName);
|
|
80
|
-
const uptimeDirPath = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource);
|
|
132
|
+
const resourcePath = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource);
|
|
81
133
|
|
|
82
134
|
if (monitoringResource === 'uptime-checks') {
|
|
83
|
-
if (!fs.existsSync(
|
|
84
|
-
fs.mkdirSync(
|
|
135
|
+
if (!fs.existsSync(resourcePath)) {
|
|
136
|
+
fs.mkdirSync(resourcePath, { recursive: true });
|
|
85
137
|
}
|
|
86
138
|
|
|
87
|
-
const uptimeYamlFile = `${
|
|
139
|
+
const uptimeYamlFile = `${resourcePath}/uptime-checks.yaml`;
|
|
88
140
|
if (!fs.existsSync(uptimeYamlFile)) {
|
|
89
141
|
this.copyDir(
|
|
90
142
|
'uptime-checks',
|
|
91
|
-
|
|
143
|
+
resourcePath,
|
|
92
144
|
{
|
|
93
145
|
...this.answers,
|
|
94
146
|
serviceName,
|
|
@@ -102,6 +154,8 @@ module.exports = class extends BaseGenerator {
|
|
|
102
154
|
}
|
|
103
155
|
|
|
104
156
|
if (monitoringResource === 'slos') {
|
|
157
|
+
const serviceFolderName = serviceName.replace(/ /g, '-').toLowerCase();
|
|
158
|
+
const serviceDir = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource, serviceFolderName);
|
|
105
159
|
const fileContainsFilter = (fileName, str) => {
|
|
106
160
|
const contents = fs.readFileSync(fileName, 'utf-8');
|
|
107
161
|
const result = contents.includes(str);
|
|
@@ -122,6 +176,7 @@ module.exports = class extends BaseGenerator {
|
|
|
122
176
|
monitoringResource,
|
|
123
177
|
serviceName,
|
|
124
178
|
systemName,
|
|
179
|
+
burnRateAlerts,
|
|
125
180
|
},
|
|
126
181
|
);
|
|
127
182
|
}
|
|
@@ -134,6 +189,7 @@ module.exports = class extends BaseGenerator {
|
|
|
134
189
|
monitoringResource,
|
|
135
190
|
serviceName,
|
|
136
191
|
systemName,
|
|
192
|
+
burnRateAlerts,
|
|
137
193
|
},
|
|
138
194
|
);
|
|
139
195
|
}
|
|
@@ -149,12 +205,33 @@ module.exports = class extends BaseGenerator {
|
|
|
149
205
|
serviceName,
|
|
150
206
|
systemName,
|
|
151
207
|
sli,
|
|
208
|
+
burnRateAlerts,
|
|
152
209
|
},
|
|
153
210
|
);
|
|
154
211
|
} else {
|
|
155
212
|
await handleSlosFile(this.answers, sloYamlFile);
|
|
156
213
|
}
|
|
157
214
|
}
|
|
215
|
+
|
|
216
|
+
if (monitoringResource === 'alerts') {
|
|
217
|
+
const yamlPath = `${resourcePath}/alerts.yaml`;
|
|
218
|
+
const terraPath = `${resourcePath}/terragrunt.hcl`;
|
|
219
|
+
if (!fs.existsSync(resourcePath)) fs.mkdirSync(resourcePath, { recursive: true });
|
|
220
|
+
if (!fs.existsSync(yamlPath)) fs.writeFileSync(yamlPath, '');
|
|
221
|
+
|
|
222
|
+
if (!fs.existsSync(terraPath)) {
|
|
223
|
+
this.fs.copyTpl(
|
|
224
|
+
this.templatePath('alerts/terragrunt.hcl'),
|
|
225
|
+
this.destinationPath(terraPath),
|
|
226
|
+
this.answers,
|
|
227
|
+
);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const oldYaml = yaml.load(fs.readFileSync(yamlPath, 'utf8')) || [];
|
|
231
|
+
const newYaml = await handleAlerts(oldYaml, alertTemplates, this.answers);
|
|
232
|
+
|
|
233
|
+
fs.writeFileSync(yamlPath, yaml.dump(newYaml));
|
|
234
|
+
}
|
|
158
235
|
}
|
|
159
236
|
|
|
160
237
|
end() {
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
cloud_run:
|
|
2
|
+
error_count:
|
|
3
|
+
display_name: "[P3] <%-systemName%>.<%-serviceName%> | 5xx Error Request Count above 1"
|
|
4
|
+
conditions:
|
|
5
|
+
- display_name: Cloud Run Anthos - 5xx error Request Count above 1
|
|
6
|
+
condition_threshold:
|
|
7
|
+
filter: |
|
|
8
|
+
resource.type="knative_revision"
|
|
9
|
+
resource.labels.service_name="<%-serviceName%>"
|
|
10
|
+
metric.type="knative.dev/serving/revision/request_count"
|
|
11
|
+
metric.labels.response_code_class="5xx"
|
|
12
|
+
threshold_value: 1
|
|
13
|
+
aggregations:
|
|
14
|
+
- alignment_period: 60s
|
|
15
|
+
cross_series_reducer: REDUCE_SUM
|
|
16
|
+
group_by_fields:
|
|
17
|
+
- metric.label.response_code_class
|
|
18
|
+
per_series_aligner: ALIGN_DELTA
|
|
19
|
+
documentation:
|
|
20
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
21
|
+
error_rate:
|
|
22
|
+
display_name: "[P3] <%-systemName%>.<%-serviceName%> | High 5xx Error Rate"
|
|
23
|
+
conditions:
|
|
24
|
+
- display_name: Cloud Run Anthos - 3% of all requests during 10min are 5xx
|
|
25
|
+
condition_monitoring_query_language:
|
|
26
|
+
query: |
|
|
27
|
+
fetch knative_revision::knative.dev/serving/revision/request_count
|
|
28
|
+
| filter service_name = "store-data-resolver"
|
|
29
|
+
| align int_mean_aligner(10m)
|
|
30
|
+
| group_by [], sum(if(metric.response_code_class == '5xx', val(), 0)) / sum(val())
|
|
31
|
+
| condition val() > 0.03
|
|
32
|
+
| every 10m
|
|
33
|
+
documentation:
|
|
34
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
35
|
+
request_latency:
|
|
36
|
+
display_name: "[P3] <%-systemName%>.<%-serviceName%> | High Request Latency"
|
|
37
|
+
conditions:
|
|
38
|
+
- display_name: Cloud Run Anthos - Response Time (95%) above 1s for 5 min
|
|
39
|
+
condition_threshold:
|
|
40
|
+
filter: |
|
|
41
|
+
resource.type="knative_revision"
|
|
42
|
+
resource.labels.service_name="<%-serviceName%>"
|
|
43
|
+
metric.type="knative.dev/serving/revision/request_latencies"
|
|
44
|
+
threshold_value: 1000
|
|
45
|
+
duration: 300s
|
|
46
|
+
aggregations:
|
|
47
|
+
- alignment_period: 60s
|
|
48
|
+
cross_series_reducer: REDUCE_NONE
|
|
49
|
+
per_series_aligner: ALIGN_PERCENTILE_95
|
|
50
|
+
documentation:
|
|
51
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
52
|
+
cloud_scheduler:
|
|
53
|
+
failed_job:
|
|
54
|
+
display_name: "[P4] <%-systemName%> - Cloud Scheduler | <%-jobId%> - Job Failed"
|
|
55
|
+
conditions:
|
|
56
|
+
- display_name: Cloud Scheduler Job - Log entries with SEVERITY=Error exceed threshold
|
|
57
|
+
condition_threshold:
|
|
58
|
+
filter: |
|
|
59
|
+
resource.type="cloud_scheduler_job"
|
|
60
|
+
resource.labels.job_id="<%-jobId%>"
|
|
61
|
+
metric.type="logging.googleapis.com/log_entry_count"
|
|
62
|
+
metric.labels.severity="ERROR"
|
|
63
|
+
threshold_value: 1
|
|
64
|
+
aggregations:
|
|
65
|
+
- alignment_period: 60s
|
|
66
|
+
cross_series_reducer: REDUCE_NONE
|
|
67
|
+
per_series_aligner: ALIGN_COUNT
|
|
68
|
+
documentation:
|
|
69
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
70
|
+
cloud_sql:
|
|
71
|
+
cpu_over_65:
|
|
72
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - CPU over 65%"
|
|
73
|
+
conditions:
|
|
74
|
+
- display_name: Cloud SQL Database - CPU utilization above 65% over 5 min
|
|
75
|
+
condition_threshold:
|
|
76
|
+
filter: |
|
|
77
|
+
resource.type="cloudsql_database"
|
|
78
|
+
resource.labels.database_id="<%-databaseId%>"
|
|
79
|
+
metric.type="cloudsql.googleapis.com/database/cpu/utilization"
|
|
80
|
+
threshold_value: 0.65
|
|
81
|
+
duration: 300s
|
|
82
|
+
aggregations:
|
|
83
|
+
- alignment_period: 60s
|
|
84
|
+
cross_series_reducer: REDUCE_NONE
|
|
85
|
+
per_series_aligner: ALIGN_MAX
|
|
86
|
+
documentation:
|
|
87
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
88
|
+
cpu_over_85:
|
|
89
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - CPU over 85%"
|
|
90
|
+
conditions:
|
|
91
|
+
- display_name: "Cloud SQL Database - CPU-usage above 85% over 1 min"
|
|
92
|
+
condition_threshold:
|
|
93
|
+
filter: |
|
|
94
|
+
resource.type="cloudsql_database"
|
|
95
|
+
resource.labels.database_id="<%-databaseId%>"
|
|
96
|
+
metric.type="cloudsql.googleapis.com/database/cpu/utilization"
|
|
97
|
+
threshold_value: 0.85
|
|
98
|
+
duration: 60s
|
|
99
|
+
aggregations:
|
|
100
|
+
- alignment_period: 60s
|
|
101
|
+
cross_series_reducer: REDUCE_NONE
|
|
102
|
+
per_series_aligner: ALIGN_MAX
|
|
103
|
+
documentation:
|
|
104
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
105
|
+
cpu_over_90:
|
|
106
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - CPU over 90%"
|
|
107
|
+
conditions:
|
|
108
|
+
- display_name: Cloud SQL Database - CPU-usage above 90%
|
|
109
|
+
condition_threshold:
|
|
110
|
+
filter: |
|
|
111
|
+
resource.type="cloudsql_database"
|
|
112
|
+
resource.labels.database_id="<%-databaseId%>"
|
|
113
|
+
metric.type="cloudsql.googleapis.com/database/cpu/utilization"
|
|
114
|
+
threshold_value: 0.9
|
|
115
|
+
aggregations:
|
|
116
|
+
- alignment_period: 60s
|
|
117
|
+
cross_series_reducer: REDUCE_NONE
|
|
118
|
+
per_series_aligner: ALIGN_MAX
|
|
119
|
+
documentation:
|
|
120
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
121
|
+
memory_over_50:
|
|
122
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - Memory over 50%"
|
|
123
|
+
conditions:
|
|
124
|
+
- display_name: Cloud SQL Database - Memory utilization above 50% over 5 min
|
|
125
|
+
condition_threshold:
|
|
126
|
+
filter: |
|
|
127
|
+
resource.type="cloudsql_database"
|
|
128
|
+
resource.labels.database_id="<%-databaseId%>"
|
|
129
|
+
metric.type="cloudsql.googleapis.com/database/memory/utilization"
|
|
130
|
+
threshold_value: 50
|
|
131
|
+
duration: 300s
|
|
132
|
+
aggregations:
|
|
133
|
+
- alignment_period: 60s
|
|
134
|
+
cross_series_reducer: REDUCE_NONE
|
|
135
|
+
per_series_aligner: ALIGN_MAX
|
|
136
|
+
documentation:
|
|
137
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
138
|
+
memory_over_75:
|
|
139
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - Memory over 75%"
|
|
140
|
+
conditions:
|
|
141
|
+
- display_name: Cloud SQL Database - Memory utilization above 75% over 5 min
|
|
142
|
+
condition_threshold:
|
|
143
|
+
filter: |
|
|
144
|
+
resource.type="cloudsql_database"
|
|
145
|
+
resource.labels.database_id="<%-databaseId%>"
|
|
146
|
+
metric.type="cloudsql.googleapis.com/database/memory/utilization"
|
|
147
|
+
threshold_value: 75
|
|
148
|
+
duration: 300s
|
|
149
|
+
aggregations:
|
|
150
|
+
- alignment_period: 60s
|
|
151
|
+
cross_series_reducer: REDUCE_NONE
|
|
152
|
+
per_series_aligner: ALIGN_MAX
|
|
153
|
+
documentation:
|
|
154
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
155
|
+
memory_over_90:
|
|
156
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - Memory over 90%"
|
|
157
|
+
conditions:
|
|
158
|
+
- display_name: Cloud SQL Database - Memory utilization above 90%
|
|
159
|
+
condition_threshold:
|
|
160
|
+
filter: |
|
|
161
|
+
resource.type="cloudsql_database"
|
|
162
|
+
resource.labels.database_id="<%-databaseId%>"
|
|
163
|
+
metric.type="cloudsql.googleapis.com/database/memory/utilization"
|
|
164
|
+
threshold_value: 90
|
|
165
|
+
duration: 60s
|
|
166
|
+
aggregations:
|
|
167
|
+
- alignment_period: 60s
|
|
168
|
+
cross_series_reducer: REDUCE_NONE
|
|
169
|
+
per_series_aligner: ALIGN_MAX
|
|
170
|
+
documentation:
|
|
171
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
172
|
+
query_over_1s:
|
|
173
|
+
display_name: "[P4] <%-systemName%> - CloudSQL | <%-databaseId%> - Query resolve time"
|
|
174
|
+
conditions:
|
|
175
|
+
- display_name: Cloud SQL Instance Database - Per query execution times above 1000 ms
|
|
176
|
+
condition_threshold:
|
|
177
|
+
filter: |
|
|
178
|
+
resource.type="cloudsql_instance_database"
|
|
179
|
+
resource.labels.resource_id="<%-databaseId%>"
|
|
180
|
+
metric.type="cloudsql.googleapis.com/database/postgresql/insights/perquery/execution_time"
|
|
181
|
+
threshold_value: 1000000
|
|
182
|
+
aggregations:
|
|
183
|
+
- alignment_period: 60s
|
|
184
|
+
cross_series_reducer: REDUCE_NONE
|
|
185
|
+
per_series_aligner: ALIGN_DELTA
|
|
186
|
+
documentation:
|
|
187
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
188
|
+
memorystore:
|
|
189
|
+
memory_over_50:
|
|
190
|
+
display_name: "[P4] <%-systemName%> - Memorystore | <%-instanceId%> - Memory over 50%"
|
|
191
|
+
conditions:
|
|
192
|
+
- display_name: Memorystore Redis Instance - Memory Usage above 50% over 5 min
|
|
193
|
+
condition_threshold:
|
|
194
|
+
filter: |
|
|
195
|
+
resource.type="redis_instance"
|
|
196
|
+
resource.labels.instance_id="<%-instanceId%>"
|
|
197
|
+
metric.type="redis.googleapis.com/stats/memory/usage_ratio"
|
|
198
|
+
threshold_value: 0.5
|
|
199
|
+
duration: 300s
|
|
200
|
+
aggregations:
|
|
201
|
+
- alignment_period: 60s
|
|
202
|
+
cross_series_reducer: REDUCE_NONE
|
|
203
|
+
per_series_aligner: ALIGN_MAX
|
|
204
|
+
documentation:
|
|
205
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
206
|
+
memory_over_75:
|
|
207
|
+
display_name: "[P4] <%-systemName%> - Memorystore | <%-instanceId%> - Memory over 75%"
|
|
208
|
+
conditions:
|
|
209
|
+
- display_name: Memorystore Redis Instance - Memory Usage above 75% for 5min
|
|
210
|
+
condition_threshold:
|
|
211
|
+
filter: |
|
|
212
|
+
resource.type="redis_instance"
|
|
213
|
+
resource.labels.instance_id="<%-instanceId%>"
|
|
214
|
+
metric.type="redis.googleapis.com/stats/memory/usage_ratio"
|
|
215
|
+
threshold_value: 0.75
|
|
216
|
+
duration: 300s
|
|
217
|
+
aggregations:
|
|
218
|
+
- alignment_period: 60s
|
|
219
|
+
cross_series_reducer: REDUCE_NONE
|
|
220
|
+
per_series_aligner: ALIGN_MAX
|
|
221
|
+
documentation:
|
|
222
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
223
|
+
memory_over_90:
|
|
224
|
+
display_name: "[P2] <%-systemName%> - Memorystore | <%-instanceId%> - Memory over 90%"
|
|
225
|
+
conditions:
|
|
226
|
+
- display_name: Memorystore Redis Instance - Memory Usage above 90%
|
|
227
|
+
condition_threshold:
|
|
228
|
+
filter: |
|
|
229
|
+
resource.type="redis_instance"
|
|
230
|
+
resource.labels.instance_id="<%-instanceId%>"
|
|
231
|
+
metric.type="redis.googleapis.com/stats/memory/usage_ratio"
|
|
232
|
+
threshold_value: 0.90
|
|
233
|
+
duration: 60s
|
|
234
|
+
aggregations:
|
|
235
|
+
- alignment_period: 60s
|
|
236
|
+
cross_series_reducer: REDUCE_NONE
|
|
237
|
+
per_series_aligner: ALIGN_MAX
|
|
238
|
+
documentation:
|
|
239
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
240
|
+
pub_sub:
|
|
241
|
+
unacknowledged_messages:
|
|
242
|
+
display_name: "[P4] <%-systemName%> - Pub/Sub | <%-subscriptionId%> - Undelivered message(s)"
|
|
243
|
+
conditions:
|
|
244
|
+
- display_name: Cloud Pub/Sub Subscription - Undelivered messages above 1 for 5 min
|
|
245
|
+
condition_threshold:
|
|
246
|
+
filter: |
|
|
247
|
+
resource.type="pubsub_subscription"
|
|
248
|
+
resource.labels.subscription_id="<%-subscriptionId%>"
|
|
249
|
+
metric.type="pubsub.googleapis.com/subscription/num_undelivered_messages"
|
|
250
|
+
threshold_value: 1
|
|
251
|
+
duration: 300s
|
|
252
|
+
aggregations:
|
|
253
|
+
- alignment_period: 60s
|
|
254
|
+
cross_series_reducer: REDUCE_NONE
|
|
255
|
+
per_series_aligner: ALIGN_MEAN
|
|
256
|
+
documentation:
|
|
257
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Terragrunt will copy the Terraform configurations specified by the source parameter, along with any files in the
|
|
2
|
+
# working directory, into a temporary folder, and execute your Terraform commands in that folder.
|
|
3
|
+
terraform {
|
|
4
|
+
source = "git::https://github.com/extenda/tf-module-gcp-alert-policy//?ref=v0.1.0"
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
# Include all settings from the root terragrunt.hcl file
|
|
8
|
+
include {
|
|
9
|
+
path = find_in_parent_folders("terragrunt_root.hcl")
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
dependency "notification_channels" {
|
|
13
|
+
config_path = "../notification-channels"
|
|
14
|
+
mock_outputs = {
|
|
15
|
+
notification_channels = ["dummy-channel"]
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
locals {
|
|
20
|
+
project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
|
|
21
|
+
common_vars = read_terragrunt_config(find_in_parent_folders("common.hcl"))
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
# These are the variables we have to pass in to use the module specified in the terragrunt configuration above
|
|
25
|
+
inputs = {
|
|
26
|
+
monitoring_project_id = local.project_vars.locals.monitoring_project_id,
|
|
27
|
+
notification_channels = dependency.notification_channels.outputs.notification_channels,
|
|
28
|
+
policies = yamldecode(file("${get_terragrunt_dir()}/alerts.yaml")),
|
|
29
|
+
user_labels = {
|
|
30
|
+
cc = local.common_vars.locals.cost_center
|
|
31
|
+
clan = local.common_vars.locals.clan_name
|
|
32
|
+
},
|
|
33
|
+
}
|
|
@@ -9,13 +9,15 @@
|
|
|
9
9
|
resource.type="knative_revision"
|
|
10
10
|
resource.labels.service_name="<%-serviceName%>"
|
|
11
11
|
range_min: 0
|
|
12
|
-
range_max: 100<%
|
|
12
|
+
range_max: 100<% if (burnRateAlerts === 'no') { %>
|
|
13
|
+
alert: {}<% } %><% } %><% if (sli === 'availability') { %>- display_name: Month - Availability
|
|
13
14
|
slo_id: month-availability
|
|
14
15
|
goal: 0.998
|
|
15
16
|
calendar_period: MONTH
|
|
16
17
|
type: windows_based_sli
|
|
17
18
|
method: boolean_filter
|
|
18
|
-
window_period: 60s<%
|
|
19
|
+
window_period: 60s<% if (burnRateAlerts === 'no') { %>
|
|
20
|
+
alert: {}<% } %><% } %><% if (sli === 'error-rate') { %>- display_name: Month - Error rate
|
|
19
21
|
slo_id: month-error-rate
|
|
20
22
|
goal: 0.999
|
|
21
23
|
calendar_period: MONTH
|
|
@@ -29,4 +31,5 @@
|
|
|
29
31
|
total_service_filter: |-
|
|
30
32
|
metric.type="knative.dev/serving/revision/request_count"
|
|
31
33
|
resource.type="knative_revision"
|
|
32
|
-
resource.labels.service_name="<%-serviceName%>"<%
|
|
34
|
+
resource.labels.service_name="<%-serviceName%>"<% if (burnRateAlerts === 'no') { %>
|
|
35
|
+
alert: {}<% } %><% } %>
|
|
@@ -24,8 +24,9 @@ locals {
|
|
|
24
24
|
inputs = merge(
|
|
25
25
|
local.project_vars.locals,
|
|
26
26
|
{
|
|
27
|
-
service_name
|
|
28
|
-
slos
|
|
27
|
+
service_name = "<%-systemName%>.<%-serviceName%>"
|
|
28
|
+
slos = yamldecode(file("${get_terragrunt_dir()}/slos.yaml")),
|
|
29
|
+
telemetry_resource_name = "//container.googleapis.com/projects/${local.project_vars.locals.monitoring_project_id}/locations/europe-west1/clusters/k8s-cluster/k8s/namespaces/<%-serviceName%>"
|
|
29
30
|
<% if (sli === 'availability') { %>
|
|
30
31
|
metric_filter = {
|
|
31
32
|
"metric.type" = "monitoring.googleapis.com/uptime_check/check_passed"
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
- service_name:
|
|
2
|
-
hostname:
|
|
3
|
-
path:
|
|
1
|
+
- service_name: <%-systemName%>.<%-serviceName%>
|
|
2
|
+
hostname: <%-hostname%>
|
|
3
|
+
path: <%-path%>
|
|
@@ -15,4 +15,11 @@ helper.validSystemName = (input) => {
|
|
|
15
15
|
return 'System name must be 3 characters';
|
|
16
16
|
};
|
|
17
17
|
|
|
18
|
+
helper.validUrl = (input) => {
|
|
19
|
+
// eslint-disable-next-line no-useless-escape
|
|
20
|
+
const regex = new RegExp(/^https:\/\/[a-zA-Z]*.[a-zA-Z]*.[a-zA-Z]*\/[a-zA-Z\/+_-]*.$/g);
|
|
21
|
+
if (regex.test(input) || input === '') return true;
|
|
22
|
+
return 'Enter a valid URL';
|
|
23
|
+
};
|
|
24
|
+
|
|
18
25
|
module.exports = helper;
|
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
const yaml = require('js-yaml');
|
|
2
2
|
const fs = require('fs');
|
|
3
3
|
|
|
4
|
-
const
|
|
4
|
+
const addDLQ = async (yamlArray, env, dlqTopic) => {
|
|
5
|
+
if (env === 'prod') {
|
|
6
|
+
/* eslint-disable */
|
|
7
|
+
yamlArray[0].dead_letter_topic = dlqTopic;
|
|
8
|
+
/* eslint-enable */
|
|
9
|
+
}
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
const appendNotIncludePull = async (inputs, subscriptionFilePath, dlqTopic) => {
|
|
5
13
|
const pullArray = [];
|
|
6
14
|
|
|
7
15
|
pullArray.push(
|
|
@@ -12,11 +20,17 @@ const appendNotIncludePull = async (inputs, subscriptionFilePath) => {
|
|
|
12
20
|
},
|
|
13
21
|
);
|
|
14
22
|
|
|
23
|
+
await addDLQ(pullArray, inputs.env, dlqTopic);
|
|
15
24
|
const yamlPullArray = yaml.dump(pullArray);
|
|
16
25
|
fs.appendFileSync(subscriptionFilePath, `pull_subscriptions:\n${yamlPullArray}`);
|
|
17
26
|
};
|
|
18
27
|
|
|
19
|
-
const appendIncludePull = async (
|
|
28
|
+
const appendIncludePull = async (
|
|
29
|
+
fileContent,
|
|
30
|
+
originalContentYaml,
|
|
31
|
+
subscriptionFilePath,
|
|
32
|
+
input,
|
|
33
|
+
dlqTopic) => {
|
|
20
34
|
if (fileContent.includes('pull_subscriptions')) {
|
|
21
35
|
const pullArray = Object.values(originalContentYaml.pull_subscriptions);
|
|
22
36
|
const yamlPullArray = yaml.dump(pullArray);
|
|
@@ -33,6 +47,7 @@ const appendIncludePull = async (fileContent, originalContentYaml, subscriptionF
|
|
|
33
47
|
},
|
|
34
48
|
);
|
|
35
49
|
|
|
50
|
+
await addDLQ(newPullArray, input.env, dlqTopic);
|
|
36
51
|
const finalYamlPullArray = yaml.dump(newPullArray);
|
|
37
52
|
fs.appendFileSync(subscriptionFilePath, finalYamlPullArray);
|
|
38
53
|
|
|
@@ -43,7 +58,7 @@ const appendIncludePull = async (fileContent, originalContentYaml, subscriptionF
|
|
|
43
58
|
}
|
|
44
59
|
};
|
|
45
60
|
|
|
46
|
-
const appendNotIncludePush = async (inputs, subscriptionFilePath) => {
|
|
61
|
+
const appendNotIncludePush = async (inputs, subscriptionFilePath, dlqTopic) => {
|
|
47
62
|
const pushArray = [];
|
|
48
63
|
|
|
49
64
|
pushArray.push(
|
|
@@ -56,11 +71,17 @@ const appendNotIncludePush = async (inputs, subscriptionFilePath) => {
|
|
|
56
71
|
},
|
|
57
72
|
);
|
|
58
73
|
|
|
74
|
+
await addDLQ(pushArray, inputs.env, dlqTopic);
|
|
59
75
|
const yamlPushArray = yaml.dump(pushArray);
|
|
60
76
|
fs.appendFileSync(subscriptionFilePath, `push_subscriptions:\n${yamlPushArray}`);
|
|
61
77
|
};
|
|
62
78
|
|
|
63
|
-
const appendIncludePush = async (
|
|
79
|
+
const appendIncludePush = async (
|
|
80
|
+
fileContent,
|
|
81
|
+
originalContentYaml,
|
|
82
|
+
subscriptionFilePath,
|
|
83
|
+
input,
|
|
84
|
+
dlqTopic) => {
|
|
64
85
|
if (fileContent.includes('push_subscriptions')) {
|
|
65
86
|
const pushArray = Object.values(originalContentYaml.push_subscriptions);
|
|
66
87
|
const yamlPushArray = yaml.dump(pushArray);
|
|
@@ -78,6 +99,7 @@ const appendIncludePush = async (fileContent, originalContentYaml, subscriptionF
|
|
|
78
99
|
expiration_policy: '',
|
|
79
100
|
},
|
|
80
101
|
);
|
|
102
|
+
await addDLQ(newPushArray, input.env, dlqTopic);
|
|
81
103
|
const yamlPushArray = yaml.dump(newPushArray);
|
|
82
104
|
fs.appendFileSync(subscriptionFilePath, yamlPushArray);
|
|
83
105
|
|
|
@@ -4,7 +4,13 @@ const {
|
|
|
4
4
|
appendNotIncludePull, appendIncludePull, appendNotIncludePush, appendIncludePush,
|
|
5
5
|
} = require('./append');
|
|
6
6
|
|
|
7
|
-
const handleSubscribers = async (
|
|
7
|
+
const handleSubscribers = async (
|
|
8
|
+
env,
|
|
9
|
+
answers,
|
|
10
|
+
oidcEmail,
|
|
11
|
+
pushEndpoint,
|
|
12
|
+
subscriptionFilePath,
|
|
13
|
+
dlqTopic) => {
|
|
8
14
|
const {
|
|
9
15
|
subscriberName,
|
|
10
16
|
existingTopic,
|
|
@@ -22,22 +28,35 @@ const handleSubscribers = async (env, answers, oidcEmail, pushEndpoint, subscrip
|
|
|
22
28
|
audience,
|
|
23
29
|
oidcEmail,
|
|
24
30
|
pushEndpoint,
|
|
31
|
+
dlqTopic,
|
|
25
32
|
};
|
|
26
33
|
|
|
27
34
|
if (pushOrPull === 'pull') {
|
|
28
35
|
if (subscriptionFileContent.length === 0 || !subscriptionFileContent.includes('pull_subscriptions')) {
|
|
29
|
-
await appendNotIncludePull(inputs, subscriptionFilePath);
|
|
36
|
+
await appendNotIncludePull(inputs, subscriptionFilePath, dlqTopic);
|
|
30
37
|
} else {
|
|
31
38
|
const originalContentYaml = yaml.load(subscriptionFileContent);
|
|
32
39
|
const fileContent = subscriptionFileContent;
|
|
33
|
-
await appendIncludePull(
|
|
40
|
+
await appendIncludePull(
|
|
41
|
+
fileContent,
|
|
42
|
+
originalContentYaml,
|
|
43
|
+
subscriptionFilePath,
|
|
44
|
+
inputs,
|
|
45
|
+
dlqTopic,
|
|
46
|
+
);
|
|
34
47
|
}
|
|
35
48
|
} else if (subscriptionFileContent.length === 0 || !subscriptionFileContent.includes('push_subscriptions')) {
|
|
36
|
-
await appendNotIncludePush(inputs, subscriptionFilePath);
|
|
49
|
+
await appendNotIncludePush(inputs, subscriptionFilePath, dlqTopic);
|
|
37
50
|
} else {
|
|
38
51
|
const originalContentYaml = yaml.load(subscriptionFileContent);
|
|
39
52
|
const fileContent = subscriptionFileContent;
|
|
40
|
-
await appendIncludePush(
|
|
53
|
+
await appendIncludePush(
|
|
54
|
+
fileContent,
|
|
55
|
+
originalContentYaml,
|
|
56
|
+
subscriptionFilePath,
|
|
57
|
+
inputs,
|
|
58
|
+
dlqTopic,
|
|
59
|
+
);
|
|
41
60
|
}
|
|
42
61
|
};
|
|
43
62
|
|
|
@@ -170,6 +170,18 @@ module.exports = class extends BaseGenerator {
|
|
|
170
170
|
externalSub,
|
|
171
171
|
} = this.answers;
|
|
172
172
|
|
|
173
|
+
const dlqTopicName = `${getProjectId('prod').split('-')[0]}-common-dlq`;
|
|
174
|
+
let dlqTopic = `projects/${getProjectId('prod')}/topics/${dlqTopicName}`;
|
|
175
|
+
|
|
176
|
+
const dlqTopicDirPath = path.join(process.cwd(), 'infra', 'prod', 'pubsub', dlqTopicName);
|
|
177
|
+
this.fs.copyTpl(
|
|
178
|
+
this.templatePath('pubsub-dlq/terragrunt.hcl'),
|
|
179
|
+
this.destinationPath(`${dlqTopicDirPath}/terragrunt.hcl`),
|
|
180
|
+
{
|
|
181
|
+
...this.answers,
|
|
182
|
+
},
|
|
183
|
+
);
|
|
184
|
+
|
|
173
185
|
['prod', 'staging'].forEach(async (env) => {
|
|
174
186
|
if (createResource === 'topic') {
|
|
175
187
|
const topicDirPath = path.join(process.cwd(), 'infra', env, 'pubsub', topicName);
|
|
@@ -182,6 +194,7 @@ module.exports = class extends BaseGenerator {
|
|
|
182
194
|
env,
|
|
183
195
|
topicName,
|
|
184
196
|
subscriberName,
|
|
197
|
+
dlqTopic,
|
|
185
198
|
},
|
|
186
199
|
);
|
|
187
200
|
});
|
|
@@ -212,6 +225,7 @@ module.exports = class extends BaseGenerator {
|
|
|
212
225
|
audience,
|
|
213
226
|
oidcEmail,
|
|
214
227
|
pushEndpoint,
|
|
228
|
+
dlqTopic,
|
|
215
229
|
},
|
|
216
230
|
);
|
|
217
231
|
}
|
|
@@ -228,7 +242,7 @@ module.exports = class extends BaseGenerator {
|
|
|
228
242
|
);
|
|
229
243
|
});
|
|
230
244
|
|
|
231
|
-
await handleSubscribers(env, this.answers, oidcEmail, pushEndpoint, `${subscriptionDirPath}/subscribers.yaml
|
|
245
|
+
await handleSubscribers(env, this.answers, oidcEmail, pushEndpoint, `${subscriptionDirPath}/subscribers.yaml`, dlqTopic);
|
|
232
246
|
}
|
|
233
247
|
if (createResource === 'subscription' && externalSub === 'yes') {
|
|
234
248
|
const externalDirPath = path.join(process.cwd(), 'infra', env, 'pubsub', existingTopic, clanName);
|
|
@@ -259,6 +273,7 @@ module.exports = class extends BaseGenerator {
|
|
|
259
273
|
oidcEmail = `${oidcName}@${stagingProjectIdConsumer}.iam.gserviceaccount.com`;
|
|
260
274
|
} else {
|
|
261
275
|
oidcEmail = `${oidcName}@${prodProjectIdConsumer}.iam.gserviceaccount.com`;
|
|
276
|
+
dlqTopic = `projects/${prodProjectIdConsumer}/topics/${prodProjectIdConsumer.split('-')[0]}-common-dlq`;
|
|
262
277
|
}
|
|
263
278
|
if (!fs.existsSync(externalSubPath)) {
|
|
264
279
|
fs.writeFileSync(externalSubPath, '');
|
|
@@ -273,11 +288,19 @@ module.exports = class extends BaseGenerator {
|
|
|
273
288
|
audience,
|
|
274
289
|
oidcEmail,
|
|
275
290
|
pushEndpoint,
|
|
291
|
+
dlqTopic,
|
|
276
292
|
},
|
|
277
293
|
);
|
|
278
294
|
}
|
|
279
295
|
|
|
280
|
-
await handleSubscribers(
|
|
296
|
+
await handleSubscribers(
|
|
297
|
+
env,
|
|
298
|
+
this.answers,
|
|
299
|
+
oidcEmail,
|
|
300
|
+
pushEndpoint,
|
|
301
|
+
externalSubPath,
|
|
302
|
+
dlqTopic,
|
|
303
|
+
);
|
|
281
304
|
}
|
|
282
305
|
});
|
|
283
306
|
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
<% if (createResource == 'topic') { %><% } %><% if (createResource == 'subscription' && pushOrPull == 'push') { %>push_subscriptions:
|
|
2
2
|
- name: "<%-existingTopic%>+<%-subscriberName%>"
|
|
3
|
-
push_endpoint: "<%-pushEndpoint%>"
|
|
3
|
+
push_endpoint: "<%-pushEndpoint%>" <% if (env == 'prod') { %>
|
|
4
|
+
dead_letter_topic: "<%-dlqTopic%>" <% } %>
|
|
4
5
|
oidc_service_account_email: "<%-oidcEmail%>"
|
|
5
6
|
audience: "<%-audience%>"
|
|
6
7
|
expiration_policy: ""<% } %><% if (createResource == 'subscription' && pushOrPull == 'pull') { %>pull_subscriptions:
|
|
7
8
|
- name: "<%-existingTopic%>+<%-subscriberName%>"
|
|
8
|
-
ack_deadline_seconds: "60"
|
|
9
|
+
ack_deadline_seconds: "60" <% if (env == 'prod') { %>
|
|
10
|
+
dead_letter_topic: "<%-dlqTopic%>" <% } %>
|
|
9
11
|
expiration_policy: ""<% } %>
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Terragrunt will copy the Terraform configurations specified by the source parameter, along with any files in the
|
|
2
|
+
# working directory, into a temporary folder, and execute your Terraform commands in that folder.
|
|
3
|
+
terraform {
|
|
4
|
+
source = "git::https://github.com/terraform-google-modules/terraform-google-pubsub//?ref=v3.0.0"
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
# Include all settings from the root terragrunt.hcl file
|
|
8
|
+
include {
|
|
9
|
+
path = find_in_parent_folders("terragrunt_root.hcl")
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
locals {
|
|
13
|
+
project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
|
|
14
|
+
common_vars = read_terragrunt_config(find_in_parent_folders("common.hcl"))
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
# These are the variables we have to pass in to use the module specified in the terragrunt configuration above
|
|
18
|
+
inputs = merge (
|
|
19
|
+
local.project_vars.locals,
|
|
20
|
+
{
|
|
21
|
+
topic = "${local.common_vars.locals.clan_name}-common-dlq"
|
|
22
|
+
create_subscriptions = true
|
|
23
|
+
create_topic = true
|
|
24
|
+
push_subscriptions = [
|
|
25
|
+
{
|
|
26
|
+
name = "dlq-message-handler-subscription",
|
|
27
|
+
push_endpoint = "https://europe-west1-sre-prod-5462.cloudfunctions.net/dlq-message-handler",
|
|
28
|
+
audience = "https://europe-west1-sre-prod-5462.cloudfunctions.net/dlq-message-handler",
|
|
29
|
+
expiration_policy = "",
|
|
30
|
+
oidc_service_account_email = "pubsub-dlq-handler@${local.project_vars.locals.project_id}.iam.gserviceaccount.com",
|
|
31
|
+
},
|
|
32
|
+
],
|
|
33
|
+
topic_labels = {
|
|
34
|
+
cc = local.common_vars.locals.cost_center
|
|
35
|
+
}
|
|
36
|
+
subscription_labels = {
|
|
37
|
+
cc = local.common_vars.locals.cost_center
|
|
38
|
+
}
|
|
39
|
+
grant_token_creator = false,
|
|
40
|
+
}
|
|
41
|
+
)
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
<% if (pushOrPull == 'push') { %>push_subscriptions:
|
|
2
2
|
- name: "<%-existingTopic%>+<%-subscriberName%>"
|
|
3
|
-
push_endpoint: "<%-pushEndpoint%>"
|
|
3
|
+
push_endpoint: "<%-pushEndpoint%>" <% if (env == 'prod') { %>
|
|
4
|
+
dead_letter_topic: "<%-dlqTopic%>" <% } %>
|
|
4
5
|
oidc_service_account_email: "<%-oidcEmail%>"
|
|
5
6
|
audience: "<%-audience%>"
|
|
6
7
|
expiration_policy: ""<% } %><% if (pushOrPull == 'pull') { %>pull_subscriptions:
|
|
7
8
|
- name: "<%-existingTopic%>+<%-subscriberName%>"
|
|
8
|
-
ack_deadline_seconds: "60"
|
|
9
|
+
ack_deadline_seconds: "60" <% if (env == 'prod') { %>
|
|
10
|
+
dead_letter_topic: "<%-dlqTopic%>" <% } %>
|
|
9
11
|
expiration_policy: ""<% } %>
|