@hiiretail/gcp-infra-cli 0.94.0 → 0.95.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,30 +5,43 @@ function cleanAlerts(alerts) {
5
5
  const copy = [...alerts];
6
6
  return copy.map((obj) => {
7
7
  /* eslint-disable no-param-reassign */
8
- if (_.get(obj, 'documentation.content') === ' ') delete obj.documentation;
9
8
  if (_.get(obj, 'enabled') === true) delete obj.enabled;
10
9
 
11
10
  return obj;
12
11
  });
13
12
  }
14
13
 
14
+ const handleAlert = (templates, answers) => {
15
+ const alerts = [];
16
+
17
+ Object.entries(templates).forEach(([, template]) => {
18
+ const newAlert = JSON.parse(ejs.render(JSON.stringify(template), answers));
19
+ alerts.push(newAlert);
20
+ });
21
+
22
+ const cleanedAlerts = cleanAlerts(alerts);
23
+
24
+ return Array.isArray(cleanedAlerts) ? cleanedAlerts : [cleanedAlerts];
25
+ };
26
+
15
27
  const handleAlerts = (alerts, templates, answers) => {
16
- const template = templates[`${answers.alert}`];
17
- const newAlert = JSON.parse(ejs.render(JSON.stringify(template), answers));
28
+ const newAlertsList = [];
29
+
30
+ const newAlerts = handleAlert(templates, answers);
18
31
 
19
- alerts.push(newAlert);
20
- alerts = cleanAlerts(alerts);
21
- return alerts;
32
+ newAlertsList.push(...newAlerts);
33
+ return newAlertsList;
22
34
  };
23
35
 
24
36
  const handleUptimeChecks = (slos, templates, answers) => {
25
- const newCheck = JSON.parse(ejs.render(JSON.stringify(templates), answers));
37
+ const newUptimeCheck = JSON.parse(ejs.render(JSON.stringify(templates), answers));
26
38
 
27
- slos.push(newCheck);
39
+ slos.push(newUptimeCheck);
28
40
  return slos;
29
41
  };
30
42
 
31
43
  module.exports = {
44
+ handleAlert,
32
45
  handleAlerts,
33
46
  handleUptimeChecks,
34
47
  cleanAlerts,
@@ -1,24 +1,30 @@
1
1
  const path = require('path');
2
2
  const chalk = require('chalk');
3
3
  const fs = require('fs');
4
+ const ejs = require('ejs');
4
5
  const yaml = require('js-yaml');
5
6
  const BaseGenerator = require('../../../src/BaseGenerator');
6
7
  const { required } = require('../../../src/validators');
7
8
  const validate = require('./validate');
8
- const { handleAlerts, handleUptimeChecks } = require('./handle-yaml');
9
+ const {
10
+ handleAlerts,
11
+ handleUptimeChecks,
12
+ cleanAlerts,
13
+ } = require('./handle-yaml');
9
14
 
10
15
  const uptimeCheckTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/uptime-checks/uptime-checks.yaml`));
11
- const alertTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/alerts/service.yaml`));
16
+ const alertTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/alerts/generic-infra.yaml`));
12
17
 
18
+ /* istanbul ignore next */
13
19
  module.exports = class extends BaseGenerator {
14
20
  async prompting() {
15
21
  this.answers = await this.prompt([
16
22
  {
17
23
  type: 'list',
18
24
  name: 'monitoringResource',
19
- message: 'Select the resource you want to create',
25
+ message: 'Select the resource you want to create:',
20
26
  choices: [
21
- // 'alerts', // Temporarily disabled due to migration to GKE Autopilot
27
+ 'alerts',
22
28
  'uptime-checks',
23
29
  ],
24
30
  },
@@ -26,55 +32,108 @@ module.exports = class extends BaseGenerator {
26
32
  when: (response) => response.monitoringResource === 'alerts',
27
33
  type: 'list',
28
34
  name: 'alert',
35
+ message: 'Select the category of alerts you want to create:',
29
36
  choices: Object.keys(alertTemplates),
30
37
  },
38
+
39
+ {
40
+ when: (response) => response.monitoringResource === 'alerts',
41
+ type: 'list',
42
+ name: 'allOrSpecific',
43
+ message: 'Do you want to create a specific alert or all recommended alerts?',
44
+ choices: (response) => [
45
+ `All (${Object.keys(alertTemplates[response.alert] || {}).length})`,
46
+ 'Specific',
47
+ ],
48
+ },
49
+ {
50
+ when: (response) => response.monitoringResource === 'alerts' && response.allOrSpecific === 'Specific',
51
+ type: 'list',
52
+ name: 'specificAlert',
53
+ message: (response) => `Select the specific alert you want to create for ${response.alert}:`,
54
+ choices: (response) => Object.keys(alertTemplates[response.alert]),
55
+ },
56
+ {
57
+ when: (response) => response.monitoringResource === 'alerts',
58
+ type: 'input',
59
+ name: 'clan',
60
+ message: 'Please provide the clan name (ex: SRE, PNP, IAM...):',
61
+ default: () => {
62
+ const gitPath = path.resolve(process.cwd(), '.git');
63
+ if (fs.existsSync(gitPath)) {
64
+ const repoName = path.basename(process.cwd());
65
+ const regex = /-([a-z]{3})-/i;
66
+ const match = regex.exec(repoName);
67
+ return match ? match[1].toUpperCase() : '';
68
+ }
69
+ return '';
70
+ },
71
+ validate: required && validate.clan,
72
+ },
73
+ {
74
+ when: (response) => response.monitoringResource === 'alerts',
75
+ type: 'input',
76
+ name: 'projectId',
77
+ message: 'Please provide the project ID:',
78
+ validate: required && validate.projectId,
79
+ },
31
80
  {
32
81
  when: (response) => ['uptime-checks', 'alerts'].includes(response.monitoringResource),
33
82
  type: 'input',
34
83
  name: 'systemName',
35
- message: 'Please provide three-letter system name as defined in Styra (example: sre, ptf, sda, che, pnp, iam...)',
84
+ message: 'Please provide three-letter system name (ex: sre, pnp, iam...):',
85
+ default: () => {
86
+ const gitPath = path.resolve(process.cwd(), '.git');
87
+ if (fs.existsSync(gitPath)) {
88
+ const repoName = path.basename(process.cwd());
89
+ const regex = /-([a-z]{3})-/i;
90
+ const match = regex.exec(repoName);
91
+ return match ? match[1] : '';
92
+ }
93
+ return '';
94
+ },
36
95
  validate: required && validate.systemName,
37
96
  },
38
97
  {
39
- when: (response) => ['uptime-checks', 'alerts'].includes(response.monitoringResource),
98
+ when: (response) => response.monitoringResource === 'uptime-checks',
40
99
  type: 'input',
41
100
  name: 'serviceName',
42
- message: 'Please provide the namespace where the service resides',
43
- validate: required,
101
+ message: 'Please provide the namespace where the service resides:',
102
+ default: (response) => `${response.systemName}-service`,
103
+ validate: required && validate.serviceName,
44
104
  },
45
105
  {
46
- when: (response) => response.monitoringResource === 'alerts',
106
+ when: (response) => response.monitoringResource === 'uptime-checks',
47
107
  type: 'input',
48
- name: 'runbookLink',
49
- message: 'Please provide the full URL to your runbook in confluence (Leave empty if none)',
50
- validate: required && validate.confluenceUrl,
108
+ name: 'hostname',
109
+ message: 'Please provide the base hostname of the service (ex: my-service.retailsvc.com):',
110
+ validate: required && validate.hostname,
51
111
  },
52
112
  {
53
113
  when: (response) => response.monitoringResource === 'uptime-checks',
54
114
  type: 'input',
55
- name: 'hostname',
56
- message: 'Please provide the base hostname of the service (example: my-service.retailsvc.com)',
57
- validate: required && validate.hostName,
115
+ name: 'runbookLink',
116
+ message: 'Please provide the full URL to your runbook in confluence (Leave empty if none):',
117
+ validate: required && validate.runbookLink,
58
118
  },
59
119
  {
60
120
  when: (response) => response.monitoringResource === 'uptime-checks',
61
121
  type: 'input',
62
122
  name: 'path',
63
- message: 'Please provide the path/endpoint to run the check against',
64
- default: '/health',
65
- validate: required,
123
+ message: 'Please provide the path/endpoint to run the check against (ex: /health):',
124
+ validate: required && validate.path,
66
125
  },
67
126
  ]);
68
127
  }
69
128
 
70
129
  async writing() {
71
- const { monitoringResource, serviceName } = this.answers;
130
+ const { monitoringResource } = this.answers;
72
131
  const resourceDir = path.join(process.cwd(), 'infra', 'prod', 'monitoring', monitoringResource);
73
132
 
74
133
  const copyTemplate = (resource, resourcePath, yamlPath) => {
75
134
  if (!fs.existsSync(resourcePath)) fs.mkdirSync(resourcePath, { recursive: true });
76
- if (!fs.existsSync(yamlPath)) fs.writeFileSync(yamlPath, '');
77
- if (!fs.existsSync(`${path}/terragrunt.hcl`)) {
135
+ if (!fs.existsSync(yamlPath)) fs.appendFileSync(yamlPath, '');
136
+ if (!fs.existsSync(`${resourcePath}/terragrunt.hcl`)) {
78
137
  this.fs.copyTpl(
79
138
  this.templatePath(`${resource}/terragrunt.hcl`),
80
139
  this.destinationPath(`${resourcePath}/terragrunt.hcl`),
@@ -83,15 +142,48 @@ module.exports = class extends BaseGenerator {
83
142
  }
84
143
  };
85
144
 
86
- if (monitoringResource === 'alerts') {
87
- const yamlPath = `${resourceDir}/${serviceName}.yaml`;
145
+ /* istanbul ignore next */
146
+ if (monitoringResource === 'alerts' && this.answers.allOrSpecific === 'Specific') {
147
+ const yamlPath = `${resourceDir}/base-specific.yaml`;
88
148
 
89
149
  copyTemplate('alerts', resourceDir, yamlPath);
90
150
 
91
151
  const oldYaml = yaml.load(fs.readFileSync(yamlPath, 'utf8')) || [];
92
- const newYaml = await handleAlerts(oldYaml, alertTemplates, this.answers);
152
+ const template = alertTemplates[this.answers.alert][this.answers.specificAlert];
153
+ const newAlert = JSON.parse(ejs.render(JSON.stringify(template), this.answers));
93
154
 
94
- fs.writeFileSync(yamlPath, yaml.dump(newYaml, { lineWidth: 250, noArrayIndent: true }));
155
+ oldYaml.push(newAlert);
156
+ const cleanedAlerts = cleanAlerts(oldYaml);
157
+
158
+ fs.writeFileSync(
159
+ yamlPath,
160
+ yaml.dump(cleanedAlerts, {
161
+ lineWidth: 250,
162
+ noArrayIndent: false,
163
+ }),
164
+ );
165
+ }
166
+
167
+ /* istanbul ignore next */
168
+ if (monitoringResource === 'alerts' && this.answers.allOrSpecific === `All (${Object.keys(alertTemplates[this.answers.alert] || {}).length})`) {
169
+ const yamlPath = `${resourceDir}/base.yaml`;
170
+
171
+ copyTemplate('alerts', resourceDir, yamlPath);
172
+
173
+ const oldYaml = yaml.load(fs.readFileSync(yamlPath, 'utf8')) || [];
174
+ const newAlerts = handleAlerts(oldYaml, alertTemplates[this.answers.alert], this.answers);
175
+ const currentContent = fs.readFileSync(yamlPath, 'utf8');
176
+
177
+ if (currentContent !== '') {
178
+ fs.appendFileSync(yamlPath, '\n');
179
+ }
180
+
181
+ newAlerts.forEach((alert, index) => {
182
+ fs.appendFileSync(yamlPath, yaml.dump([alert], { lineWidth: 250, noArrayIndent: false }));
183
+ if (index !== newAlerts.length - 1) {
184
+ fs.appendFileSync(yamlPath, '\n');
185
+ }
186
+ });
95
187
  }
96
188
 
97
189
  if (monitoringResource === 'uptime-checks') {
@@ -108,9 +200,10 @@ module.exports = class extends BaseGenerator {
108
200
 
109
201
  end() {
110
202
  this.log(`
111
- ${chalk.green('Your Monitoring resources have now been created.')}
112
- ${chalk.green('1.')} To finalize your configuration, please continue with manual editing of the generated files.
113
- ${chalk.green('2.')} Push the changes in a feature branch and open a pull request.
203
+ ${chalk.yellow('Your monitoring resources have now been created.')}
204
+ ${chalk.green('1.')} Please note that these are just example configurations and might need further adjustments.
205
+ ${chalk.green('2.')} To finalize your configuration, please continue with manual editing of the generated files.
206
+ ${chalk.green('3.')} Push the changes in a feature branch and open a pull request.
114
207
  `);
115
208
  }
116
209
  };
@@ -13,9 +13,30 @@ cloud_scheduler:
13
13
  aggregations:
14
14
  - alignment_period: 60s
15
15
  per_series_aligner: ALIGN_COUNT
16
- group_by_fields: ["resource.label.job_id"]
16
+ group_by_fields:
17
+ - resource.label.job_id
17
18
  documentation:
18
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
19
+ subject: 'Job: `$${resource.label.job_id}`'
20
+ failed_executions:
21
+ display_name: "[P3] <%-clan%> - Cloud Scheduler | Failed Executions"
22
+ conditions:
23
+ - display_name: Cloud Scheduler - Failed Executions
24
+ condition_threshold:
25
+ filter: |
26
+ resource.type="cloud_scheduler_job"
27
+ metric.type="logging.googleapis.com/log_entry_count"
28
+ metric.labels.log="cloudscheduler.googleapis.com/executions"
29
+ metric.labels.severity="ERROR"
30
+ resource.labels.project_id="<%-projectId%>"
31
+ threshold_value: 1
32
+ duration: 300s
33
+ aggregations:
34
+ - alignment_period: 60s
35
+ per_series_aligner: ALIGN_COUNT
36
+ group_by_fields:
37
+ - resource.label.job_id
38
+ documentation:
39
+ subject: 'Job: `$${resource.label.job_id}`'
19
40
  cloud_sql:
20
41
  cpu_over_65:
21
42
  display_name: "[P3] <%-clan%> - CloudSQL | CPU over 65%"
@@ -31,9 +52,10 @@ cloud_sql:
31
52
  aggregations:
32
53
  - alignment_period: 60s
33
54
  per_series_aligner: ALIGN_MAX
34
- group_by_fields: ["resource.label.database_id"]
55
+ group_by_fields:
56
+ - resource.label.database_id
35
57
  documentation:
36
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
58
+ subject: 'Database: `$${resource.label.database_id}`'
37
59
  cpu_over_85:
38
60
  display_name: "[P3] <%-clan%> - CloudSQL | CPU over 85%"
39
61
  conditions:
@@ -48,9 +70,10 @@ cloud_sql:
48
70
  aggregations:
49
71
  - alignment_period: 60s
50
72
  per_series_aligner: ALIGN_MAX
51
- group_by_fields: ["resource.label.database_id"]
73
+ group_by_fields:
74
+ - resource.label.database_id
52
75
  documentation:
53
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
76
+ subject: 'Database: `$${resource.label.database_id}`'
54
77
  cpu_over_90:
55
78
  display_name: "[P3] <%-clan%> - CloudSQL | CPU over 90%"
56
79
  conditions:
@@ -64,9 +87,10 @@ cloud_sql:
64
87
  aggregations:
65
88
  - alignment_period: 60s
66
89
  per_series_aligner: ALIGN_MAX
67
- group_by_fields: ["resource.label.database_id"]
90
+ group_by_fields:
91
+ - resource.label.database_id
68
92
  documentation:
69
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
93
+ subject: 'Database: `$${resource.label.database_id}`'
70
94
  memory_over_90:
71
95
  display_name: "[P2] <%-clan%> - CloudSQL | Memory utilization above 90%"
72
96
  conditions:
@@ -81,9 +105,10 @@ cloud_sql:
81
105
  aggregations:
82
106
  - alignment_period: 60s
83
107
  per_series_aligner: ALIGN_MAX
84
- group_by_fields: ["resource.label.database_id"]
108
+ group_by_fields:
109
+ - resource.label.database_id
85
110
  documentation:
86
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
111
+ subject: 'Database: `$${resource.label.database_id}`'
87
112
  query_over_1s:
88
113
  display_name: "[P4] <%-clan%> - CloudSQL | Slow query"
89
114
  conditions:
@@ -102,7 +127,43 @@ cloud_sql:
102
127
  - metric.label.querystring
103
128
  - resource.label.resource_id
104
129
  documentation:
105
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
130
+ subject: 'Database: `$${resource.label.resource_id}`'
131
+ postgresql_connections:
132
+ display_name: "[P3] <%-clan%> - CloudSQL | PostgreSQL Connections"
133
+ conditions:
134
+ - display_name: Cloud SQL Database - PostgreSQL Connections
135
+ condition_threshold:
136
+ filter: |
137
+ resource.type="cloudsql_database"
138
+ metric.type="cloudsql.googleapis.com/database/postgresql/num_backends"
139
+ resource.labels.project_id="<%-projectId%>"
140
+ threshold_value: 50
141
+ duration: 300s
142
+ aggregations:
143
+ - alignment_period: 60s
144
+ per_series_aligner: ALIGN_MAX
145
+ group_by_fields:
146
+ - resource.label.database_id
147
+ documentation:
148
+ subject: 'Database: `$${resource.label.database_id}`'
149
+ aggregated_lock_time:
150
+ display_name: "[P4] <%-clan%> - CloudSQL | Aggregated Lock Time"
151
+ conditions:
152
+ - display_name: Cloud SQL Database - Aggregated Lock Time
153
+ condition_threshold:
154
+ filter: |
155
+ resource.type="cloudsql_database"
156
+ metric.type="cloudsql.googleapis.com/database/postgresql/insights/aggregate/lock_time"
157
+ resource.labels.project_id="<%-projectId%>"
158
+ threshold_value: 1000
159
+ duration: 300s
160
+ aggregations:
161
+ - alignment_period: 60s
162
+ per_series_aligner: ALIGN_MAX
163
+ group_by_fields:
164
+ - resource.label.database_id
165
+ documentation:
166
+ subject: 'Database: `$${resource.label.database_id}`'
106
167
  memorystore:
107
168
  memory_over_50:
108
169
  display_name: "[P4] <%-clan%> - Memorystore | Memory over 50%"
@@ -118,9 +179,10 @@ memorystore:
118
179
  aggregations:
119
180
  - alignment_period: 60s
120
181
  per_series_aligner: ALIGN_MAX
121
- group_by_fields: ["resource.label.instance_id"]
182
+ group_by_fields:
183
+ - resource.label.instance_id
122
184
  documentation:
123
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
185
+ subject: 'Instance: `$${resource.label.instance_id}`'
124
186
  memory_over_75:
125
187
  display_name: "[P4] <%-clan%> - Memorystore | Memory over 75%"
126
188
  conditions:
@@ -135,9 +197,10 @@ memorystore:
135
197
  aggregations:
136
198
  - alignment_period: 60s
137
199
  per_series_aligner: ALIGN_MAX
138
- group_by_fields: ["resource.label.instance_id"]
200
+ group_by_fields:
201
+ - resource.label.instance_id
139
202
  documentation:
140
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
203
+ subject: 'Instance: `$${resource.label.instance_id}`'
141
204
  memory_over_90:
142
205
  display_name: "[P2] <%-clan%> - Memorystore | Memory over 90%"
143
206
  conditions:
@@ -152,9 +215,65 @@ memorystore:
152
215
  aggregations:
153
216
  - alignment_period: 60s
154
217
  per_series_aligner: ALIGN_MAX
155
- group_by_fields: ["resource.label.instance_id"]
218
+ group_by_fields:
219
+ - resource.label.instance_id
220
+ documentation:
221
+ subject: 'Instance: `$${resource.label.instance_id}`'
222
+ cpu_utilization:
223
+ display_name: "[P2] <%-clan%> - Memorystore | CPU Utilization"
224
+ conditions:
225
+ - display_name: Memorystore - CPU Utilization
226
+ condition_threshold:
227
+ filter: |
228
+ resource.type="redis_instance"
229
+ metric.type="redis.googleapis.com/stats/cpu_utilization"
230
+ resource.labels.project_id="<%-projectId%>"
231
+ threshold_value: 0.8
232
+ duration: 300s
233
+ aggregations:
234
+ - alignment_period: 60s
235
+ per_series_aligner: ALIGN_MEAN
236
+ group_by_fields:
237
+ - resource.label.instance_id
238
+ documentation:
239
+ subject: 'Instance: `$${resource.label.instance_id}`'
240
+ system_memory_overload_duration:
241
+ display_name: "[P1] <%-clan%> - Memorystore | System Memory Overload Duration"
242
+ conditions:
243
+ - display_name: Memorystore - System Memory Overload Duration
244
+ condition_threshold:
245
+ filter: |
246
+ resource.type="redis_instance"
247
+ metric.type="redis.googleapis.com/stats/memory/system_memory_overload_duration"
248
+ resource.labels.project_id="<%-projectId%>"
249
+ threshold_value: 60
250
+ duration: 300s
251
+ aggregations:
252
+ - alignment_period: 60s
253
+ per_series_aligner: ALIGN_SUM
254
+ group_by_fields:
255
+ - resource.label.instance_id
256
+ documentation:
257
+ subject: 'Instance: `$${resource.label.instance_id}`'
258
+ calls:
259
+ display_name: "[P3] <%-clan%> - Memorystore | Calls"
260
+ conditions:
261
+ - display_name: Memorystore - Calls
262
+ condition_threshold:
263
+ filter: |
264
+ resource.type="redis_instance"
265
+ metric.type="redis.googleapis.com/commands/calls"
266
+ resource.labels.project_id="<%-projectId%>"
267
+ resource.labels.role="primary"
268
+ threshold_value: 1000
269
+ duration: 300s
270
+ aggregations:
271
+ - alignment_period: 60s
272
+ per_series_aligner: ALIGN_SUM
273
+ group_by_fields:
274
+ - resource.label.instance_id
156
275
  documentation:
157
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
276
+ subject: 'Instance: `$${resource.label.instance_id}`'
158
277
  pub_sub:
159
278
  unacknowledged_messages:
160
279
  display_name: "[P3] <%-clan%> - Pub/Sub | Undelivered message(s)"
@@ -170,9 +289,10 @@ pub_sub:
170
289
  aggregations:
171
290
  - alignment_period: 60s
172
291
  per_series_aligner: ALIGN_MEAN
173
- group_by_fields: ["resource.label.subscription_id"]
292
+ group_by_fields:
293
+ - resource.label.subscription_id
174
294
  documentation:
175
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
295
+ subject: 'Subscription: `$${resource.label.subscription_id}`'
176
296
  messages_in_dlq:
177
297
  display_name: "[P3] <%-clan%> - Pub/Sub | Message(s) in DLQ"
178
298
  conditions:
@@ -187,9 +307,10 @@ pub_sub:
187
307
  aggregations:
188
308
  - alignment_period: 60s
189
309
  per_series_aligner: ALIGN_COUNT
190
- group_by_fields: ["resource.label.subscription_id"]
310
+ group_by_fields:
311
+ - resource.label.subscription_id
191
312
  documentation:
192
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
313
+ subject: 'Subscription: `$${resource.label.subscription_id}`'
193
314
  latency:
194
315
  display_name: "[P3] <%-clan%> - Pub/Sub | Response latency distribution"
195
316
  conditions:
@@ -205,9 +326,85 @@ pub_sub:
205
326
  - alignment_period: 60s
206
327
  per_series_aligner: ALIGN_DELTA
207
328
  cross_series_reducer: REDUCE_PERCENTILE_95
208
- group_by_fields: ["resource.label.subscription_id"]
329
+ group_by_fields:
330
+ - resource.label.subscription_id
331
+ documentation:
332
+ subject: 'Subscription: `$${resource.label.subscription_id}`'
333
+ oldest_unacked_message_age:
334
+ display_name: "[P3] <%-clan%> - Pub/Sub | Oldest unacked message age"
335
+ conditions:
336
+ - display_name: Cloud Pub/Sub Subscription - Oldest unacked message age
337
+ condition_threshold:
338
+ filter: |
339
+ resource.type="pubsub_subscription"
340
+ metric.type="pubsub.googleapis.com/subscription/oldest_unacked_message_age"
341
+ resource.labels.project_id="<%-projectId%>"
342
+ threshold_value: 3600
343
+ duration: 300s
344
+ aggregations:
345
+ - alignment_period: 60s
346
+ per_series_aligner: ALIGN_MEAN
347
+ group_by_fields:
348
+ - resource.label.subscription_id
349
+ documentation:
350
+ subject: 'Subscription: `$${resource.label.subscription_id}`'
351
+ push_requests_ack:
352
+ display_name: "[P3] <%-clan%> - Pub/Sub | Push requests ack"
353
+ conditions:
354
+ - display_name: Cloud Pub/Sub Subscription - Push requests ack
355
+ condition_threshold:
356
+ filter: |
357
+ resource.type="pubsub_subscription"
358
+ metric.type="pubsub.googleapis.com/subscription/push_request_count"
359
+ metric.label.response_class!="ack"
360
+ resource.labels.project_id="<%-projectId%>"
361
+ threshold_value: 50
362
+ duration: 300s
363
+ aggregations:
364
+ - alignment_period: 60s
365
+ per_series_aligner: ALIGN_DELTA
366
+ group_by_fields:
367
+ - resource.label.subscription_id
209
368
  documentation:
210
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
369
+ subject: 'Subscription: `$${resource.label.subscription_id}`'
370
+ push_requests_success:
371
+ display_name: "[P3] <%-clan%> - Pub/Sub | Push requests success"
372
+ conditions:
373
+ - display_name: Cloud Pub/Sub Subscription - Push requests success
374
+ condition_threshold:
375
+ filter: |
376
+ resource.type="pubsub_subscription"
377
+ metric.type="pubsub.googleapis.com/subscription/push_request_count"
378
+ metric.label.response_code!="200"
379
+ resource.labels.project_id="<%-projectId%>"
380
+ threshold_value: 50
381
+ duration: 300s
382
+ aggregations:
383
+ - alignment_period: 60s
384
+ per_series_aligner: ALIGN_DELTA
385
+ group_by_fields:
386
+ - resource.label.subscription_id
387
+ documentation:
388
+ subject: 'Subscription: `$${resource.label.subscription_id}`'
389
+ delivery_latency_health_score:
390
+ display_name: "[P3] <%-clan%> - Pub/Sub | Delivery latency health score"
391
+ conditions:
392
+ - display_name: Cloud Pub/Sub Subscription - Delivery latency health score above 0.5
393
+ condition_threshold:
394
+ filter: |
395
+ resource.type="pubsub_subscription"
396
+ metric.type="pubsub.googleapis.com/subscription/delivery_latency_health_score"
397
+ resource.labels.project_id="<%-projectId%>"
398
+ threshold_value: 0.5
399
+ duration: 300s
400
+ aggregations:
401
+ - alignment_period: 60s
402
+ per_series_aligner: ALIGN_DELTA
403
+ cross_series_reducer: REDUCE_PERCENTILE_95
404
+ group_by_fields:
405
+ - resource.label.subscription_id
406
+ documentation:
407
+ subject: 'Subscription: `$${resource.label.subscription_id}`'
211
408
  cloud_function:
212
409
  failed_execution:
213
410
  display_name: "[P2] <%-clan%> - Cloud Function | Failed job execution"
@@ -228,4 +425,149 @@ cloud_function:
228
425
  - metric.label.status
229
426
  - resource.label.function_name
230
427
  documentation:
231
- content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
428
+ subject: 'Function: `$${resource.label.function_name}`'
429
+ cloud_run:
430
+ error_count:
431
+ display_name: "[P3] <%-clan%> - Cloud Run | Error Count"
432
+ conditions:
433
+ - display_name: Cloud Run - Error Count
434
+ condition_threshold:
435
+ filter: |
436
+ resource.type="cloud_run_revision"
437
+ metric.type="run.googleapis.com/request_count"
438
+ metric.label.response_code_class="5xx"
439
+ resource.labels.project_id="<%-projectId%>"
440
+ threshold_value: 50
441
+ aggregations:
442
+ - alignment_period: 60s
443
+ per_series_aligner: ALIGN_SUM
444
+ group_by_fields:
445
+ - resource.label.service_name
446
+ documentation:
447
+ subject: 'Service: `$${resource.label.service_name}`'
448
+ request_latency:
449
+ display_name: "[P3] <%-clan%> - Cloud Run | Request Latency"
450
+ conditions:
451
+ - display_name: Cloud Run - Request Latency
452
+ condition_threshold:
453
+ filter: |
454
+ resource.type="cloud_run_revision"
455
+ metric.type="run.googleapis.com/request_latencies"
456
+ resource.labels.project_id="<%-projectId%>"
457
+ threshold_value: 1000
458
+ aggregations:
459
+ - alignment_period: 60s
460
+ per_series_aligner: ALIGN_PERCENTILE_95
461
+ group_by_fields:
462
+ - resource.label.service_name
463
+ documentation:
464
+ subject: 'Service: `$${resource.label.service_name}`'
465
+ cpu_utilization:
466
+ display_name: "[P3] <%-clan%> - Cloud Run | CPU Utilization"
467
+ conditions:
468
+ - display_name: Cloud Run - CPU Utilization
469
+ condition_threshold:
470
+ filter: |
471
+ resource.type="cloud_run_revision"
472
+ metric.type="run.googleapis.com/container/cpu/utilization"
473
+ resource.labels.project_id="<%-projectId%>"
474
+ threshold_value: 0.8
475
+ duration: 300s
476
+ aggregations:
477
+ - alignment_period: 60s
478
+ per_series_aligner: ALIGN_MEAN
479
+ group_by_fields:
480
+ - resource.label.service_name
481
+ documentation:
482
+ subject: 'Service: `$${resource.label.service_name}`'
483
+ memory_utilization:
484
+ display_name: "[P3] <%-clan%> - Cloud Run | Memory Utilization"
485
+ conditions:
486
+ - display_name: Cloud Run - Memory Utilization
487
+ condition_threshold:
488
+ filter: |
489
+ resource.type="cloud_run_revision"
490
+ metric.type="run.googleapis.com/container/memory/utilization"
491
+ resource.labels.project_id="<%-projectId%>"
492
+ threshold_value: 0.8
493
+ duration: 300s
494
+ aggregations:
495
+ - alignment_period: 60s
496
+ per_series_aligner: ALIGN_MEAN
497
+ group_by_fields:
498
+ - resource.label.service_name
499
+ documentation:
500
+ subject: 'Service: `$${resource.label.service_name}`'
501
+ startup_latency:
502
+ display_name: "[P4] <%-clan%> - Cloud Run | Startup Latency"
503
+ conditions:
504
+ - display_name: Cloud Run - Startup Latency
505
+ condition_threshold:
506
+ filter: |
507
+ resource.type="cloud_run_revision"
508
+ metric.type="run.googleapis.com/container/startup_latencies"
509
+ resource.labels.project_id="<%-projectId%>"
510
+ threshold_value: 1000
511
+ duration: 300s
512
+ aggregations:
513
+ - alignment_period: 60s
514
+ per_series_aligner: ALIGN_PERCENTILE_95
515
+ group_by_fields:
516
+ - resource.label.service_name
517
+ documentation:
518
+ subject: 'Service: `$${resource.label.service_name}`'
519
+ spanner:
520
+ cpu_utilization_by_priority:
521
+ display_name: "[P1] <%-clan%> - Spanner | CPU Utilization by Priority"
522
+ conditions:
523
+ - display_name: Spanner - CPU Utilization by Priority
524
+ condition_threshold:
525
+ filter: |
526
+ resource.type="spanner_instance"
527
+ metric.type="spanner.googleapis.com/instance/cpu/utilization_by_priority"
528
+ resource.labels.project_id="<%-projectId%>"
529
+ threshold_value: 0.9
530
+ duration: 300s
531
+ aggregations:
532
+ - alignment_period: 60s
533
+ per_series_aligner: ALIGN_MEAN
534
+ group_by_fields:
535
+ - resource.label.instance_id
536
+ documentation:
537
+ subject: 'Instance: `$${resource.label.instance_id}`'
538
+ api_request_count:
539
+ display_name: "[P2] <%-clan%> - Spanner | API Request Count"
540
+ conditions:
541
+ - display_name: Spanner - API Request Count
542
+ condition_threshold:
543
+ filter: |
544
+ resource.type="spanner_instance"
545
+ metric.type="spanner.googleapis.com/api/api_request_count"
546
+ resource.labels.project_id="<%-projectId%>"
547
+ threshold_value: 1000
548
+ duration: 300s
549
+ aggregations:
550
+ - alignment_period: 60s
551
+ per_series_aligner: ALIGN_SUM
552
+ group_by_fields:
553
+ - resource.label.instance_id
554
+ documentation:
555
+ subject: 'Instance: `$${resource.label.instance_id}`'
556
+ request_latencies:
557
+ display_name: "[P3] <%-clan%> - Spanner | Request Latencies"
558
+ conditions:
559
+ - display_name: Spanner - Request Latencies
560
+ condition_threshold:
561
+ filter: |
562
+ resource.type="spanner_instance"
563
+ metric.type="spanner.googleapis.com/api/request_latencies"
564
+ resource.labels.project_id="<%-projectId%>"
565
+ threshold_value: 500
566
+ duration: 300s
567
+ aggregations:
568
+ - alignment_period: 60s
569
+ per_series_aligner: ALIGN_MEAN
570
+ group_by_fields:
571
+ - resource.label.instance_id
572
+ documentation:
573
+ subject: 'Instance: `$${resource.label.instance_id}`'
@@ -0,0 +1,37 @@
1
+ # Terragrunt will copy the Terraform configurations specified by the source parameter, along with any files in the
2
+ # working directory, into a temporary folder, and execute your Terraform commands in that folder.
3
+ terraform {
4
+ source = "git::https://github.com/extenda/tf-module-gcp-alert-policy//?ref=v2.0.0"
5
+ }
6
+
7
+ # Include all settings from the root terragrunt.hcl file
8
+ include {
9
+ path = find_in_parent_folders("terragrunt_root.hcl")
10
+ }
11
+
12
+ dependency "notification_channels" {
13
+ config_path = "../notification-channels"
14
+ mock_outputs = {
15
+ notification_channels = ["dummy-channel"]
16
+ }
17
+ }
18
+
19
+ locals {
20
+ project_vars = read_terragrunt_config(find_in_parent_folders("project.hcl"))
21
+ common_vars = read_terragrunt_config(find_in_parent_folders("common.hcl"))
22
+ alerts = flatten([for file in fileset(get_terragrunt_dir(), "./*.yaml") : yamldecode(file(file))])
23
+ labels = merge(local.common_vars.locals.default_user_labels, local.project_vars.locals.default_user_labels, {
24
+ component = local.common_vars.locals.component
25
+ product = local.common_vars.locals.product
26
+ tenant-alias = local.common_vars.locals.tenant_alias
27
+ })
28
+ }
29
+
30
+ # These are the variables we have to pass in to use the module specified in the terragrunt configuration above
31
+ inputs = {
32
+ project = local.project_vars.locals.project_id,
33
+ policies = local.alerts,
34
+ notification_channel_ids = dependency.notification_channels.outputs.notification_channels,
35
+ fallback_notification_channels = dependency.notification_channels.outputs.fallback_channels_ids,
36
+ default_user_labels = local.labels,
37
+ }
@@ -10,7 +10,17 @@ const isUrl = (str) => {
10
10
  }
11
11
  };
12
12
 
13
- validate.hostName = (input) => {
13
+ validate.clan = (input) => {
14
+ if (!hasWhitespace(input)) return true;
15
+ return 'Clan name must not include spaces';
16
+ };
17
+
18
+ validate.projectId = (input) => {
19
+ if (input.trim() !== '' && !hasWhitespace(input)) return true;
20
+ return 'Project ID must not be empty or contain spaces';
21
+ };
22
+
23
+ validate.hostname = (input) => {
14
24
  if (input.split('.').at(-1) === 'com' && !hasWhitespace(input)) return true;
15
25
  return 'Hostname must not include path to the page to run the check against or spaces';
16
26
  };
@@ -40,4 +50,9 @@ validate.pubSubSubscription = (input) => {
40
50
  return 'You must enter the full subscription path (example: projects/example/subscriptions/subscriptionId)';
41
51
  };
42
52
 
53
+ validate.path = (input) => {
54
+ if (input.includes('/') && !hasWhitespace(input)) return true;
55
+ return 'Path must contain a slash and not include spaces';
56
+ };
57
+
43
58
  module.exports = validate;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hiiretail/gcp-infra-cli",
3
- "version": "0.94.0",
3
+ "version": "0.95.0",
4
4
  "description": "Infrastructure as code generator for GCP.",
5
5
  "main": "src/cli.js",
6
6
  "bin": {