@hiiretail/gcp-infra-cli 0.71.0 → 0.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/generators/init/clan-infra/templates/env/project.hcl +4 -3
  2. package/generators/init/clan-infra/templates/infra/.terraform-version +1 -1
  3. package/generators/init/clan-infra/templates/infra/.terragrunt-version +1 -1
  4. package/generators/organization/clan-project/index.js +1 -1
  5. package/generators/organization/clan-project/templates/clan/clan.yaml +4 -2
  6. package/generators/organization/clan-project/templates/clan-project/terragrunt.hcl +1 -1
  7. package/generators/resources/monitoring/append.js +105 -0
  8. package/generators/resources/monitoring/generator.json +1 -1
  9. package/generators/resources/monitoring/handle-alerts.js +11 -0
  10. package/generators/resources/monitoring/handle-slos.js +28 -0
  11. package/generators/resources/monitoring/handle-uptime.js +28 -0
  12. package/generators/resources/monitoring/index.js +222 -33
  13. package/generators/resources/monitoring/templates/alerts/alerts.yaml +257 -0
  14. package/generators/resources/monitoring/templates/alerts/terragrunt.hcl +33 -0
  15. package/generators/resources/monitoring/templates/notification-channels/terragrunt.hcl +2 -0
  16. package/generators/resources/monitoring/templates/slos/slos.yaml +35 -0
  17. package/generators/resources/monitoring/templates/slos/terragrunt.hcl +37 -0
  18. package/generators/resources/monitoring/templates/uptime-checks/terragrunt.hcl +36 -0
  19. package/generators/resources/monitoring/templates/uptime-checks/uptime-checks.yaml +3 -0
  20. package/generators/resources/monitoring/validate.js +25 -0
  21. package/generators/tribe-resources/tribe-project/templates/project/terragrunt.hcl +1 -1
  22. package/package.json +1 -1
@@ -5,7 +5,8 @@ locals {
5
5
  project_id = "<%-clanProject%>"
6
6
  project_env = "<%-env%>"
7
7
 
8
- project = local.project_id
9
- network = "tribe-network"
10
- tribe_project_id = "<%-tribeProject%>"
8
+ project = local.project_id
9
+ network = "tribe-network"
10
+ tribe_project_id = "<%-tribeProject%>"
11
+ monitoring_project_id = "<%-tribeProject%>" # possibly will be changed to hiiretail-monitoring-prod-6500 later
11
12
  }
@@ -1 +1 @@
1
- 1.0.7
1
+ 1.2.7
@@ -1 +1 @@
1
- 0.31.8
1
+ 0.38.0
@@ -123,7 +123,7 @@ module.exports = class extends BaseGenerator {
123
123
  this.log(`
124
124
  ${chalk.green('Your clan projects have now been created. To finalize your configuration, please continue with manual editing of the generated files.')}
125
125
  ${chalk.green('1.')} Add clan members and groups
126
- \u2192 ${chalk.cyan(path.join(clanDir, 'project.yaml'))}
126
+ \u2192 ${chalk.cyan(path.join(clanDir, 'clan.yaml'))}
127
127
  ${chalk.green('2.')} Configure APIs, service accounts and repositories
128
128
  \u2192 ${chalk.cyan(path.join(clanDir, 'prod', 'project.yaml'))}
129
129
  \u2192 ${chalk.cyan(path.join(clanDir, 'staging', 'project.yaml'))}
@@ -8,8 +8,10 @@
8
8
  # members:
9
9
  # groups: []
10
10
  # users:
11
- # - alice@extendaretail.com
12
- # - bob@extendaretail.com
11
+ # - name: Alice Test
12
+ # email: alice@extendaretail.com
13
+ # - name: Bob Test
14
+ # email: bob@extendaretail.com
13
15
  ###
14
16
  ---
15
17
  common-infra-repo: <%-commonInfraRepo%>
@@ -1,5 +1,5 @@
1
1
  terraform {
2
- source = "git::https://github.com/extenda/tf-module-gcp-project//?ref=v1.0.6"
2
+ source = "git::https://github.com/extenda/tf-module-gcp-project//?ref=v1.0.8"
3
3
  }
4
4
 
5
5
  dependency "parent_folder" {
@@ -0,0 +1,105 @@
1
+ const fs = require('fs');
2
+ const yaml = require('js-yaml');
3
+
4
+ const appendIncludeConfigSlo = async (fileContent, originalContentYaml, slosFilePath, inputs) => {
5
+ if (fileContent !== null && fileContent !== '') {
6
+ const configArray = Object.values(originalContentYaml);
7
+ const yamlPullArray = yaml.dump(configArray);
8
+ fs.writeFileSync(slosFilePath, `${yamlPullArray}`);
9
+ }
10
+
11
+ const newPullArray = [];
12
+
13
+ const availabilityConf = {
14
+ display_name: 'Month - Availability',
15
+ slo_id: 'month-availability',
16
+ goal: 0.998,
17
+ calendar_period: 'MONTH',
18
+ type: 'windows_based_sli',
19
+ method: 'boolean_filter',
20
+ window_period: '60s',
21
+ };
22
+
23
+ if (inputs.sli === 'availability') {
24
+ if (inputs.burnRateAlerts === 'no') {
25
+ availabilityConf.alert = {};
26
+ }
27
+ newPullArray.push(availabilityConf);
28
+ }
29
+
30
+ const errorRateConf = {
31
+ display_name: 'Month - Error rate',
32
+ slo_id: 'month-error-rate',
33
+ goal: 0.999,
34
+ calendar_period: 'MONTH',
35
+ type: 'request_based_sli',
36
+ method: 'good_total_ratio',
37
+ bad_service_filter:
38
+ `metric.type="knative.dev/serving/revision/request_count"
39
+ resource.type="knative_revision"
40
+ metric.labels.response_code_class="5xx"
41
+ resource.labels.service_name="${inputs.serviceName}"`,
42
+ total_service_filter:
43
+ `metric.type="knative.dev/serving/revision/request_count"
44
+ resource.type="knative_revision"
45
+ resource.labels.service_name=${inputs.serviceName}"`,
46
+ };
47
+
48
+ if (inputs.sli === 'error-rate') {
49
+ if (inputs.burnRateAlerts === 'no') {
50
+ errorRateConf.alert = {};
51
+ }
52
+ newPullArray.push(errorRateConf);
53
+ }
54
+
55
+ const latencyConf = {
56
+ display_name: 'Month - Latency',
57
+ slo_id: 'month-latency',
58
+ goal: 0.95,
59
+ calendar_period: 'MONTH',
60
+ type: 'request_based_sli',
61
+ method: 'distribution_cut',
62
+ metric_filter:
63
+ `metric.type="knative.dev/serving/revision/request_latencies"
64
+ resource.type="knative_revision"
65
+ resource.labels.service_name="${inputs.serviceName}"`,
66
+ range_min: 0,
67
+ range_max: 100,
68
+ };
69
+
70
+ if (inputs.sli === 'latency') {
71
+ if (inputs.burnRateAlerts === 'no') {
72
+ latencyConf.alert = {};
73
+ }
74
+ newPullArray.push(latencyConf);
75
+ }
76
+
77
+ const finalYamlPullArray = yaml.dump(newPullArray);
78
+ fs.appendFileSync(slosFilePath, finalYamlPullArray);
79
+ };
80
+
81
+ const appendIncludeConfigUptime = async (fileContent, uptimeContentYml, uptimeFilePath, inputs) => {
82
+ if (fileContent !== null && fileContent !== '') {
83
+ const configArray = Object.values(uptimeContentYml);
84
+ const yamlPullArray = yaml.dump(configArray);
85
+ fs.writeFileSync(uptimeFilePath, `${yamlPullArray}`);
86
+ }
87
+
88
+ const newPullArray = [];
89
+
90
+ newPullArray.push(
91
+ {
92
+ service_name: `${inputs.systemName}.${inputs.serviceName}`,
93
+ hostname: inputs.hostname,
94
+ path: inputs.path,
95
+ },
96
+ );
97
+
98
+ const finalYamlPullArray = yaml.dump(newPullArray);
99
+ fs.appendFileSync(uptimeFilePath, finalYamlPullArray);
100
+ };
101
+
102
+ module.exports = {
103
+ appendIncludeConfigSlo,
104
+ appendIncludeConfigUptime,
105
+ };
@@ -1,4 +1,4 @@
1
1
  {
2
2
  "name": "Monitoring",
3
- "description": "Create monitoring dashboards and alers"
3
+ "description": "Create monitoring resources"
4
4
  }
@@ -0,0 +1,11 @@
1
+ const ejs = require('ejs');
2
+
3
+ const handleAlerts = (alerts, templates, answers) => {
4
+ const template = templates[`${answers.alertResource}`][`${answers.alert}`];
5
+ const newAlert = ejs.render(JSON.stringify(template), answers);
6
+
7
+ alerts.push(JSON.parse(newAlert));
8
+ return alerts;
9
+ };
10
+
11
+ module.exports = handleAlerts;
@@ -0,0 +1,28 @@
1
+ const fs = require('fs');
2
+ const yaml = require('js-yaml');
3
+ const { appendIncludeConfigSlo } = require('./append');
4
+
5
+ const handleSlosFile = async (answers, slosFilePath) => {
6
+ const {
7
+ serviceName,
8
+ sli,
9
+ systemName,
10
+ burnRateAlerts,
11
+ } = answers;
12
+
13
+ const sloFileContent = fs.readFileSync(slosFilePath, 'utf8');
14
+
15
+ const inputs = {
16
+ ...this.answers,
17
+ serviceName,
18
+ sli,
19
+ systemName,
20
+ burnRateAlerts,
21
+ };
22
+
23
+ const originalContentYaml = yaml.load(sloFileContent);
24
+ const fileContent = sloFileContent;
25
+ await appendIncludeConfigSlo(fileContent, originalContentYaml, slosFilePath, inputs);
26
+ };
27
+
28
+ module.exports = handleSlosFile;
@@ -0,0 +1,28 @@
1
+ const fs = require('fs');
2
+ const yaml = require('js-yaml');
3
+ const { appendIncludeConfigUptime } = require('./append');
4
+
5
+ const handleUptimeFile = async (answers, uptimeFilePath) => {
6
+ const {
7
+ serviceName,
8
+ hostname,
9
+ path,
10
+ systemName,
11
+ } = answers;
12
+
13
+ const uptimeFileContent = fs.readFileSync(uptimeFilePath, 'utf8');
14
+
15
+ const inputs = {
16
+ ...this.answers,
17
+ serviceName,
18
+ hostname,
19
+ path,
20
+ systemName,
21
+ };
22
+
23
+ const originalContentYaml = yaml.load(uptimeFileContent);
24
+ const fileContent = uptimeFileContent;
25
+ await appendIncludeConfigUptime(fileContent, originalContentYaml, uptimeFilePath, inputs);
26
+ };
27
+
28
+ module.exports = handleUptimeFile;
@@ -1,55 +1,244 @@
1
1
  const path = require('path');
2
2
  const chalk = require('chalk');
3
+ const fs = require('fs');
4
+ const yaml = require('js-yaml');
3
5
  const BaseGenerator = require('../../../src/BaseGenerator');
6
+ const { required } = require('../../../src/validators');
7
+ const helper = require('./validate');
8
+ const handleSlosFile = require('./handle-slos');
9
+ const handleUptimeFile = require('./handle-uptime');
10
+ const handleAlerts = require('./handle-alerts');
11
+
12
+ const alertTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/alerts/alerts.yaml`));
4
13
 
5
14
  module.exports = class extends BaseGenerator {
6
- prompting() {
7
- const prompts = [
15
+ async prompting() {
16
+ this.answers = await this.prompt([
8
17
  {
9
18
  type: 'list',
10
19
  name: 'monitoringResource',
11
- message: 'Select the resource that you want to monitor',
12
- default: 'dataflow',
13
- choices: ['cloudrun', 'cloudfunction', 'dataflow'],
20
+ message: 'Select the resource you want to create',
21
+ choices: ['alerts', 'uptime-checks', 'slos'],
14
22
  },
15
- ];
16
-
17
- return this.prompt(prompts).then((props) => {
18
- this.answers = props;
19
- });
23
+ {
24
+ when: (response) => response.monitoringResource === 'alerts',
25
+ type: 'list',
26
+ name: 'alertResource',
27
+ choices: Object.keys(alertTemplates),
28
+ },
29
+ {
30
+ when: (response) => response.monitoringResource === 'alerts',
31
+ type: 'list',
32
+ name: 'alert',
33
+ choices: (answers) => Object.keys(alertTemplates[`${answers.alertResource}`]),
34
+ },
35
+ {
36
+ when: (response) => response.monitoringResource === ('alerts' || 'slos' || 'uptime-checks'),
37
+ type: 'input',
38
+ name: 'systemName',
39
+ message: 'Please provide three-letter system name as defined in Styra',
40
+ validate: required && helper.validSystemName,
41
+ },
42
+ {
43
+ when: (response) => response.monitoringResource === ('slos' || 'uptime-checks') || response.alertResource === 'cloud_run',
44
+ type: 'input',
45
+ name: 'serviceName',
46
+ message: 'Please provide the namespace where the service resides',
47
+ validate: required,
48
+ },
49
+ {
50
+ when: (response) => response.monitoringResource === 'alerts',
51
+ type: 'input',
52
+ name: 'runbookLink',
53
+ message: 'Please provide the full URL to your runbook in confluence (Leave empty if none)',
54
+ validate: required && helper.validUrl,
55
+ },
56
+ {
57
+ when: (response) => response.alertResource === 'cloud_scheduler',
58
+ type: 'input',
59
+ name: 'jobId',
60
+ message: 'Please provide the "job id"',
61
+ validate: required,
62
+ },
63
+ {
64
+ when: (response) => response.alertResource === 'cloud_sql',
65
+ type: 'input',
66
+ name: 'databaseId',
67
+ message: 'Please provide the "database id"',
68
+ validate: required,
69
+ },
70
+ {
71
+ when: (response) => response.alertResource === 'memorystore',
72
+ type: 'input',
73
+ name: 'instanceId',
74
+ message: 'Please provide the "instance id"',
75
+ validate: required,
76
+ },
77
+ {
78
+ when: (response) => response.alertResource === 'pub_sub',
79
+ type: 'input',
80
+ name: 'subscriptionId',
81
+ message: 'Please provide the "subscription id"',
82
+ validate: required,
83
+ },
84
+ {
85
+ when: (response) => response.monitoringResource === 'uptime-checks',
86
+ type: 'input',
87
+ name: 'hostname',
88
+ message: 'Please provide the base hostname of the service (example: my-service.retailsvc.com)',
89
+ validate: required && helper.validHostname,
90
+ },
91
+ {
92
+ when: (response) => response.monitoringResource === 'uptime-checks',
93
+ type: 'input',
94
+ name: 'path',
95
+ message: 'Please provide the path to the page to run the check against. (example: /health)',
96
+ validate: required,
97
+ },
98
+ {
99
+ when: (response) => response.monitoringResource === 'slos',
100
+ type: 'list',
101
+ name: 'sli',
102
+ message: 'Please select the SLI',
103
+ choices: ['availability', 'error-rate', 'latency'],
104
+ },
105
+ {
106
+ when: (response) => response.monitoringResource === 'slos',
107
+ type: 'list',
108
+ name: 'burnRateAlerts',
109
+ message: 'Please select yes if you want to create burn rate alert for the SLI',
110
+ default: 'yes',
111
+ choices: ['yes', 'no'],
112
+ },
113
+ {
114
+ when: (response) => response.monitoringResource === 'slos' && response.sli === 'availability',
115
+ type: 'confirm',
116
+ name: 'info',
117
+ message: 'WARNING: Make sure that an uptime check has been created before applying availability SLI',
118
+ },
119
+ ]);
20
120
  }
21
121
 
22
- writing() {
122
+ async writing() {
23
123
  const {
24
124
  monitoringResource,
125
+ serviceName,
126
+ hostname,
127
+ sli,
128
+ systemName,
129
+ burnRateAlerts,
25
130
  } = this.answers;
26
131
 
27
- ['prod', 'staging'].forEach((env) => {
28
- this.copyDir(
29
- path.join(monitoringResource),
30
- path.join('infra', env, 'monitoring', monitoringResource),
31
- {
32
- ...this.answers,
33
- env,
34
- },
35
- );
36
- });
37
-
38
- ['prod', 'staging'].forEach((env) => {
39
- this.copyDir(
40
- 'notification-channels',
41
- path.join('infra', env, 'monitoring', 'notification-channels'),
42
- {
43
- ...this.answers,
44
- env,
45
- },
46
- );
47
- });
132
+ const resourcePath = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource);
133
+
134
+ if (monitoringResource === 'uptime-checks') {
135
+ if (!fs.existsSync(resourcePath)) {
136
+ fs.mkdirSync(resourcePath, { recursive: true });
137
+ }
138
+
139
+ const uptimeYamlFile = `${resourcePath}/uptime-checks.yaml`;
140
+ if (!fs.existsSync(uptimeYamlFile)) {
141
+ this.copyDir(
142
+ 'uptime-checks',
143
+ resourcePath,
144
+ {
145
+ ...this.answers,
146
+ serviceName,
147
+ hostname,
148
+ systemName,
149
+ },
150
+ );
151
+ } else {
152
+ await handleUptimeFile(this.answers, uptimeYamlFile);
153
+ }
154
+ }
155
+
156
+ if (monitoringResource === 'slos') {
157
+ const serviceFolderName = serviceName.replace(/ /g, '-').toLowerCase();
158
+ const serviceDir = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource, serviceFolderName);
159
+ const fileContainsFilter = (fileName, str) => {
160
+ const contents = fs.readFileSync(fileName, 'utf-8');
161
+ const result = contents.includes(str);
162
+ return result;
163
+ };
164
+
165
+ if (!fs.existsSync(serviceDir)) {
166
+ fs.mkdirSync(serviceDir, { recursive: true });
167
+ }
168
+
169
+ if (fs.existsSync(`${serviceDir}/terragrunt.hcl`)) {
170
+ if (fileContainsFilter(`${serviceDir}/terragrunt.hcl`, 'metric_filter') === false) {
171
+ this.fs.copyTpl(
172
+ this.templatePath('slos/terragrunt.hcl'),
173
+ this.destinationPath(`${serviceDir}/terragrunt.hcl`),
174
+ {
175
+ ...this.answers,
176
+ monitoringResource,
177
+ serviceName,
178
+ systemName,
179
+ burnRateAlerts,
180
+ },
181
+ );
182
+ }
183
+ } else {
184
+ this.fs.copyTpl(
185
+ this.templatePath('slos/terragrunt.hcl'),
186
+ this.destinationPath(`${serviceDir}/terragrunt.hcl`),
187
+ {
188
+ ...this.answers,
189
+ monitoringResource,
190
+ serviceName,
191
+ systemName,
192
+ burnRateAlerts,
193
+ },
194
+ );
195
+ }
196
+
197
+ const sloYamlFile = `${serviceDir}/slos.yaml`;
198
+ if (!fs.existsSync(sloYamlFile)) {
199
+ this.fs.copyTpl(
200
+ this.templatePath('slos/slos.yaml'),
201
+ this.destinationPath(sloYamlFile),
202
+ {
203
+ ...this.answers,
204
+ monitoringResource,
205
+ serviceName,
206
+ systemName,
207
+ sli,
208
+ burnRateAlerts,
209
+ },
210
+ );
211
+ } else {
212
+ await handleSlosFile(this.answers, sloYamlFile);
213
+ }
214
+ }
215
+
216
+ if (monitoringResource === 'alerts') {
217
+ const yamlPath = `${resourcePath}/alerts.yaml`;
218
+ const terraPath = `${resourcePath}/terragrunt.hcl`;
219
+ if (!fs.existsSync(resourcePath)) fs.mkdirSync(resourcePath, { recursive: true });
220
+ if (!fs.existsSync(yamlPath)) fs.writeFileSync(yamlPath, '');
221
+
222
+ if (!fs.existsSync(terraPath)) {
223
+ this.fs.copyTpl(
224
+ this.templatePath('alerts/terragrunt.hcl'),
225
+ this.destinationPath(terraPath),
226
+ this.answers,
227
+ );
228
+ }
229
+
230
+ const oldYaml = yaml.load(fs.readFileSync(yamlPath, 'utf8')) || [];
231
+ const newYaml = await handleAlerts(oldYaml, alertTemplates, this.answers);
232
+
233
+ fs.writeFileSync(yamlPath, yaml.dump(newYaml));
234
+ }
48
235
  }
49
236
 
50
237
  end() {
51
238
  this.log(`
52
239
  ${chalk.green('Your Monitoring resources have now been created.')}
53
- ${chalk.green('1.')} Push this change in a feature branch and open a pull request.`);
240
+ ${chalk.green('1.')} To finalize your configuration, please continue with manual editing of the generated files.
241
+ ${chalk.green('2.')} Push the changes in a feature branch and open a pull request.
242
+ `);
54
243
  }
55
244
  };
@@ -0,0 +1,257 @@
1
+ cloud_run:
2
+ error_count:
3
+ display_name: "[P3] <%-systemName%>.<%-serviceName%> | 5xx Error Request Count above 1"
4
+ conditions:
5
+ - display_name: Cloud Run Anthos - 5xx error Request Count above 1
6
+ condition_threshold:
7
+ filter: |
8
+ resource.type="knative_revision"
9
+ resource.labels.service_name="<%-serviceName%>"
10
+ metric.type="knative.dev/serving/revision/request_count"
11
+ metric.labels.response_code_class="5xx"
12
+ threshold_value: 1
13
+ aggregations:
14
+ - alignment_period: 60s
15
+ cross_series_reducer: REDUCE_SUM
16
+ group_by_fields:
17
+ - metric.label.response_code_class
18
+ per_series_aligner: ALIGN_DELTA
19
+ documentation:
20
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
21
+ error_rate:
22
+ display_name: "[P3] <%-systemName%>.<%-serviceName%> | High 5xx Error Rate"
23
+ conditions:
24
+ - display_name: Cloud Run Anthos - 3% of all requests during 10min are 5xx
25
+ condition_monitoring_query_language:
26
+ query: |
27
+ fetch knative_revision::knative.dev/serving/revision/request_count
28
+ | filter service_name = "store-data-resolver"
29
+ | align int_mean_aligner(10m)
30
+ | group_by [], sum(if(metric.response_code_class == '5xx', val(), 0)) / sum(val())
31
+ | condition val() > 0.03
32
+ | every 10m
33
+ documentation:
34
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
35
+ request_latency:
36
+ display_name: "[P3] <%-systemName%>.<%-serviceName%> | High Request Latency"
37
+ conditions:
38
+ - display_name: Cloud Run Anthos - Response Time (95%) above 1s for 5 min
39
+ condition_threshold:
40
+ filter: |
41
+ resource.type="knative_revision"
42
+ resource.labels.service_name="<%-serviceName%>"
43
+ metric.type="knative.dev/serving/revision/request_latencies"
44
+ threshold_value: 1000
45
+ duration: 300s
46
+ aggregations:
47
+ - alignment_period: 60s
48
+ cross_series_reducer: REDUCE_NONE
49
+ per_series_aligner: ALIGN_PERCENTILE_95
50
+ documentation:
51
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
52
+ cloud_scheduler:
53
+ failed_job:
54
+ display_name: "[P4] <%-systemName%> - Cloud Scheduler | <%-jobId%> - Job Failed"
55
+ conditions:
56
+ - display_name: Cloud Scheduler Job - Log entries with SEVERITY=Error exceed threshold
57
+ condition_threshold:
58
+ filter: |
59
+ resource.type="cloud_scheduler_job"
60
+ resource.labels.job_id="<%-jobId%>"
61
+ metric.type="logging.googleapis.com/log_entry_count"
62
+ metric.labels.severity="ERROR"
63
+ threshold_value: 1
64
+ aggregations:
65
+ - alignment_period: 60s
66
+ cross_series_reducer: REDUCE_NONE
67
+ per_series_aligner: ALIGN_COUNT
68
+ documentation:
69
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
70
+ cloud_sql:
71
+ cpu_over_65:
72
+ display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - CPU over 65%"
73
+ conditions:
74
+ - display_name: Cloud SQL Database - CPU utilization above 65% over 5 min
75
+ condition_threshold:
76
+ filter: |
77
+ resource.type="cloudsql_database"
78
+ resource.labels.database_id="<%-databaseId%>"
79
+ metric.type="cloudsql.googleapis.com/database/cpu/utilization"
80
+ threshold_value: 0.65
81
+ duration: 300s
82
+ aggregations:
83
+ - alignment_period: 60s
84
+ cross_series_reducer: REDUCE_NONE
85
+ per_series_aligner: ALIGN_MAX
86
+ documentation:
87
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
88
+ cpu_over_85:
89
+ display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - CPU over 85%"
90
+ conditions:
91
+ - display_name: "Cloud SQL Database - CPU-usage above 85% over 1 min"
92
+ condition_threshold:
93
+ filter: |
94
+ resource.type="cloudsql_database"
95
+ resource.labels.database_id="<%-databaseId%>"
96
+ metric.type="cloudsql.googleapis.com/database/cpu/utilization"
97
+ threshold_value: 0.85
98
+ duration: 60s
99
+ aggregations:
100
+ - alignment_period: 60s
101
+ cross_series_reducer: REDUCE_NONE
102
+ per_series_aligner: ALIGN_MAX
103
+ documentation:
104
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
105
+ cpu_over_90:
106
+ display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - CPU over 90%"
107
+ conditions:
108
+ - display_name: Cloud SQL Database - CPU-usage above 90%
109
+ condition_threshold:
110
+ filter: |
111
+ resource.type="cloudsql_database"
112
+ resource.labels.database_id="<%-databaseId%>"
113
+ metric.type="cloudsql.googleapis.com/database/cpu/utilization"
114
+ threshold_value: 0.9
115
+ aggregations:
116
+ - alignment_period: 60s
117
+ cross_series_reducer: REDUCE_NONE
118
+ per_series_aligner: ALIGN_MAX
119
+ documentation:
120
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
121
+ memory_over_50:
122
+ display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - Memory over 50%"
123
+ conditions:
124
+ - display_name: Cloud SQL Database - Memory utilization above 50% over 5 min
125
+ condition_threshold:
126
+ filter: |
127
+ resource.type="cloudsql_database"
128
+ resource.labels.database_id="<%-databaseId%>"
129
+ metric.type="cloudsql.googleapis.com/database/memory/utilization"
130
+ threshold_value: 50
131
+ duration: 300s
132
+ aggregations:
133
+ - alignment_period: 60s
134
+ cross_series_reducer: REDUCE_NONE
135
+ per_series_aligner: ALIGN_MAX
136
+ documentation:
137
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
138
+ memory_over_75:
139
+ display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - Memory over 75%"
140
+ conditions:
141
+ - display_name: Cloud SQL Database - Memory utilization above 75% over 5 min
142
+ condition_threshold:
143
+ filter: |
144
+ resource.type="cloudsql_database"
145
+ resource.labels.database_id="<%-databaseId%>"
146
+ metric.type="cloudsql.googleapis.com/database/memory/utilization"
147
+ threshold_value: 75
148
+ duration: 300s
149
+ aggregations:
150
+ - alignment_period: 60s
151
+ cross_series_reducer: REDUCE_NONE
152
+ per_series_aligner: ALIGN_MAX
153
+ documentation:
154
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
155
+ memory_over_90:
156
+ display_name: "[P3] <%-systemName%> - CloudSQL | <%-databaseId%> - Memory over 90%"
157
+ conditions:
158
+ - display_name: Cloud SQL Database - Memory utilization above 90%
159
+ condition_threshold:
160
+ filter: |
161
+ resource.type="cloudsql_database"
162
+ resource.labels.database_id="<%-databaseId%>"
163
+ metric.type="cloudsql.googleapis.com/database/memory/utilization"
164
+ threshold_value: 90
165
+ duration: 60s
166
+ aggregations:
167
+ - alignment_period: 60s
168
+ cross_series_reducer: REDUCE_NONE
169
+ per_series_aligner: ALIGN_MAX
170
+ documentation:
171
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
172
+ query_over_1s:
173
+ display_name: "[P4] <%-systemName%> - CloudSQL | <%-databaseId%> - Query resolve time"
174
+ conditions:
175
+ - display_name: Cloud SQL Instance Database - Per query execution times above 1000 ms
176
+ condition_threshold:
177
+ filter: |
178
+ resource.type="cloudsql_instance_database"
179
+ resource.labels.resource_id="<%-databaseId%>"
180
+ metric.type="cloudsql.googleapis.com/database/postgresql/insights/perquery/execution_time"
181
+ threshold_value: 1000000
182
+ aggregations:
183
+ - alignment_period: 60s
184
+ cross_series_reducer: REDUCE_NONE
185
+ per_series_aligner: ALIGN_DELTA
186
+ documentation:
187
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
188
+ memorystore:
189
+ memory_over_50:
190
+ display_name: "[P4] <%-systemName%> - Memorystore | <%-instanceId%> - Memory over 50%"
191
+ conditions:
192
+ - display_name: Memorystore Redis Instance - Memory Usage above 50% over 5 min
193
+ condition_threshold:
194
+ filter: |
195
+ resource.type="redis_instance"
196
+ resource.labels.instance_id="<%-instanceId%>"
197
+ metric.type="redis.googleapis.com/stats/memory/usage_ratio"
198
+ threshold_value: 0.5
199
+ duration: 300s
200
+ aggregations:
201
+ - alignment_period: 60s
202
+ cross_series_reducer: REDUCE_NONE
203
+ per_series_aligner: ALIGN_MAX
204
+ documentation:
205
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
206
+ memory_over_75:
207
+ display_name: "[P4] <%-systemName%> - Memorystore | <%-instanceId%> - Memory over 75%"
208
+ conditions:
209
+ - display_name: Memorystore Redis Instance - Memory Usage above 75% for 5min
210
+ condition_threshold:
211
+ filter: |
212
+ resource.type="redis_instance"
213
+ resource.labels.instance_id="<%-instanceId%>"
214
+ metric.type="redis.googleapis.com/stats/memory/usage_ratio"
215
+ threshold_value: 0.75
216
+ duration: 300s
217
+ aggregations:
218
+ - alignment_period: 60s
219
+ cross_series_reducer: REDUCE_NONE
220
+ per_series_aligner: ALIGN_MAX
221
+ documentation:
222
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
223
+ memory_over_90:
224
+ display_name: "[P2] <%-systemName%> - Memorystore | <%-instanceId%> - Memory over 90%"
225
+ conditions:
226
+ - display_name: Memorystore Redis Instance - Memory Usage above 90%
227
+ condition_threshold:
228
+ filter: |
229
+ resource.type="redis_instance"
230
+ resource.labels.instance_id="<%-instanceId%>"
231
+ metric.type="redis.googleapis.com/stats/memory/usage_ratio"
232
+ threshold_value: 0.90
233
+ duration: 60s
234
+ aggregations:
235
+ - alignment_period: 60s
236
+ cross_series_reducer: REDUCE_NONE
237
+ per_series_aligner: ALIGN_MAX
238
+ documentation:
239
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
240
+ pub_sub:
241
+ unacknowledged_messages:
242
+ display_name: "[P4] <%-systemName%> - Pub/Sub | <%-subscriptionId%> - Undelivered message(s)"
243
+ conditions:
244
+ - display_name: Cloud Pub/Sub Subscription - Undelivered messages above 1 for 5 min
245
+ condition_threshold:
246
+ filter: |
247
+ resource.type="pubsub_subscription"
248
+ resource.labels.subscription_id="<%-subscriptionId%>"
249
+ metric.type="pubsub.googleapis.com/subscription/num_undelivered_messages"
250
+ threshold_value: 1
251
+ duration: 300s
252
+ aggregations:
253
+ - alignment_period: 60s
254
+ cross_series_reducer: REDUCE_NONE
255
+ per_series_aligner: ALIGN_MEAN
256
+ documentation:
257
+ content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
@@ -0,0 +1,33 @@
1
+ # Terragrunt will copy the Terraform configurations specified by the source parameter, along with any files in the
2
+ # working directory, into a temporary folder, and execute your Terraform commands in that folder.
3
+ terraform {
4
+ source = "git::https://github.com/extenda/tf-module-gcp-alert-policy//?ref=v0.1.0"
5
+ }
6
+
7
+ # Include all settings from the root terragrunt.hcl file
8
+ include {
9
+ path = find_in_parent_folders("terragrunt_root.hcl")
10
+ }
11
+
12
+ dependency "notification_channels" {
13
+ config_path = "../notification-channels"
14
+ mock_outputs = {
15
+ notification_channels = ["dummy-channel"]
16
+ }
17
+ }
18
+
19
+ locals {
20
+ project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
21
+ common_vars = read_terragrunt_config(find_in_parent_folders("common.hcl"))
22
+ }
23
+
24
+ # These are the variables we have to pass in to use the module specified in the terragrunt configuration above
25
+ inputs = {
26
+ monitoring_project_id = local.project_vars.locals.monitoring_project_id,
27
+ notification_channels = dependency.notification_channels.outputs.notification_channels,
28
+ policies = yamldecode(file("${get_terragrunt_dir()}/alerts.yaml")),
29
+ user_labels = {
30
+ cc = local.common_vars.locals.cost_center
31
+ clan = local.common_vars.locals.clan_name
32
+ },
33
+ }
@@ -18,5 +18,7 @@ locals {
18
18
  inputs = merge(local.project_vars.locals, local.common_vars.locals,
19
19
  {
20
20
  clan_project_id = local.project_vars.locals.project_id
21
+ # Use var below if we decide to go with hiiretail-monitoring-prod project
22
+ #tribe_project_id = local.common_vars.locals.monitoring_project_id
21
23
  }
22
24
  )
@@ -0,0 +1,35 @@
1
+ <% if (sli === 'latency') { %>- display_name: Month - Latency
2
+ slo_id: month-latency
3
+ goal: 0.95
4
+ calendar_period: MONTH
5
+ type: request_based_sli
6
+ method: distribution_cut
7
+ metric_filter: |-
8
+ metric.type="knative.dev/serving/revision/request_latencies"
9
+ resource.type="knative_revision"
10
+ resource.labels.service_name="<%-serviceName%>"
11
+ range_min: 0
12
+ range_max: 100<% if (burnRateAlerts === 'no') { %>
13
+ alert: {}<% } %><% } %><% if (sli === 'availability') { %>- display_name: Month - Availability
14
+ slo_id: month-availability
15
+ goal: 0.998
16
+ calendar_period: MONTH
17
+ type: windows_based_sli
18
+ method: boolean_filter
19
+ window_period: 60s<% if (burnRateAlerts === 'no') { %>
20
+ alert: {}<% } %><% } %><% if (sli === 'error-rate') { %>- display_name: Month - Error rate
21
+ slo_id: month-error-rate
22
+ goal: 0.999
23
+ calendar_period: MONTH
24
+ type: request_based_sli
25
+ method: good_total_ratio
26
+ bad_service_filter: |-
27
+ metric.type="knative.dev/serving/revision/request_count"
28
+ resource.type="knative_revision"
29
+ metric.labels.response_code_class="5xx"
30
+ resource.labels.service_name="<%-serviceName%>"
31
+ total_service_filter: |-
32
+ metric.type="knative.dev/serving/revision/request_count"
33
+ resource.type="knative_revision"
34
+ resource.labels.service_name="<%-serviceName%>"<% if (burnRateAlerts === 'no') { %>
35
+ alert: {}<% } %><% } %>
@@ -0,0 +1,37 @@
1
+ # Terragrunt will copy the Terraform configurations specified by the source parameter, along with any files in the
2
+ # working directory, into a temporary folder, and execute your Terraform commands in that folder.
3
+ terraform {
4
+ source = "git::https://github.com/extenda/tf-module-gcp-slo//?ref=v0.1.0"
5
+ }
6
+
7
+ # Include all settings from the root terragrunt.hcl file
8
+ include {
9
+ path = find_in_parent_folders("terragrunt_root.hcl")
10
+ }
11
+
12
+ dependency "uptimecheck_id" {
13
+ config_path = "../../uptime-checks"
14
+ mock_outputs = {
15
+ uptime_check_ids = ["dummy-id"]
16
+ }
17
+ }
18
+
19
+ locals {
20
+ project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
21
+ }
22
+
23
+ # These are the variables we have to pass in to use the module specified in the terragrunt configuration above
24
+ inputs = merge(
25
+ local.project_vars.locals,
26
+ {
27
+ service_name = "<%-systemName%>.<%-serviceName%>"
28
+ slos = yamldecode(file("${get_terragrunt_dir()}/slos.yaml")),
29
+ telemetry_resource_name = "//container.googleapis.com/projects/${local.project_vars.locals.monitoring_project_id}/locations/europe-west1/clusters/k8s-cluster/k8s/namespaces/<%-serviceName%>"
30
+ <% if (sli === 'availability') { %>
31
+ metric_filter = {
32
+ "metric.type" = "monitoring.googleapis.com/uptime_check/check_passed"
33
+ "resource.type" = "uptime_url"
34
+ "metric.labels.check_id" = dependency.uptimecheck_id.outputs.uptime_check_ids["<%-systemName%>.<%-serviceName%>"]
35
+ }<% } %>
36
+ }
37
+ )
@@ -0,0 +1,36 @@
1
+ # Terragrunt will copy the Terraform configurations specified by the source parameter, along with any files in the
2
+ # working directory, into a temporary folder, and execute your Terraform commands in that folder.
3
+ terraform {
4
+ source = "git::https://github.com/extenda/tf-module-gcp-uptime-check//?ref=v0.1.0"
5
+ }
6
+
7
+ # Include all settings from the root terragrunt.hcl file
8
+ include {
9
+ path = find_in_parent_folders("terragrunt_root.hcl")
10
+ }
11
+
12
+ dependency "notification_channels" {
13
+ config_path = "../notification-channels"
14
+ mock_outputs = {
15
+ notification_channels = ["dummy-channel"]
16
+ }
17
+ }
18
+
19
+ locals {
20
+ project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
21
+ common_vars = read_terragrunt_config(find_in_parent_folders("common.hcl"))
22
+ }
23
+
24
+ # These are the variables we have to pass in to use the module specified in the terragrunt configuration above
25
+ inputs = merge(
26
+ local.project_vars.locals,
27
+ local.common_vars.locals,
28
+ {
29
+ notification_channels = dependency.notification_channels.outputs.notification_channels
30
+ uptime_checks = yamldecode(file("${get_terragrunt_dir()}/uptime-checks.yaml")),
31
+ labels = {
32
+ clan = local.common_vars.locals.clan_name
33
+ cc = local.common_vars.locals.cost_center
34
+ }
35
+ }
36
+ )
@@ -0,0 +1,3 @@
1
+ - service_name: <%-systemName%>.<%-serviceName%>
2
+ hostname: <%-hostname%>
3
+ path: <%-path%>
@@ -0,0 +1,25 @@
1
+ const helper = {};
2
+
3
+ helper.validHostname = (input) => {
4
+ const regex = new RegExp(/^(?:[a-z-]+\.){1,3}[a-z-]+$/g);
5
+ if (input.match(regex)) {
6
+ return true;
7
+ }
8
+ return 'Hostname must not include path to the page to run the check against or spaces';
9
+ };
10
+
11
+ helper.validSystemName = (input) => {
12
+ if (input.replace(/\s/g, '').length === 3) {
13
+ return true;
14
+ }
15
+ return 'System name must be 3 characters';
16
+ };
17
+
18
+ helper.validUrl = (input) => {
19
+ // eslint-disable-next-line no-useless-escape
20
+ const regex = new RegExp(/^https:\/\/[a-zA-Z]*.[a-zA-Z]*.[a-zA-Z]*\/[a-zA-Z\/+_-]*.$/g);
21
+ if (regex.test(input) || input === '') return true;
22
+ return 'Enter a valid URL';
23
+ };
24
+
25
+ module.exports = helper;
@@ -1,5 +1,5 @@
1
1
  terraform {
2
- source = "git::https://github.com/extenda/tf-module-gcp-project//?ref=v1.0.6"
2
+ source = "git::https://github.com/extenda/tf-module-gcp-project//?ref=v1.0.8"
3
3
  }
4
4
 
5
5
  dependency "tribe_folder" {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hiiretail/gcp-infra-cli",
3
- "version": "0.71.0",
3
+ "version": "0.74.0",
4
4
  "description": "Infrastructure as code generator for GCP.",
5
5
  "main": "src/cli.js",
6
6
  "bin": {