@hiiretail/gcp-infra-cli 0.77.1 → 0.78.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -6,10 +6,12 @@ const BaseGenerator = require('../../../src/BaseGenerator');
|
|
|
6
6
|
const { required } = require('../../../src/validators');
|
|
7
7
|
const validate = require('./validate');
|
|
8
8
|
const { handleSlos, handleAlerts, handleUptimeChecks } = require('./handle-yaml');
|
|
9
|
+
const { getProjectId } = require('../pubsub/get-gcp-projects');
|
|
9
10
|
|
|
10
11
|
const uptimeCheckTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/uptime-checks/uptime-checks.yaml`));
|
|
11
12
|
const alertTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/alerts/alerts.yaml`));
|
|
12
13
|
const sloTemplates = yaml.load(fs.readFileSync(`${__dirname}/templates/slos/slos.yaml`));
|
|
14
|
+
const projectId = getProjectId('prod');
|
|
13
15
|
|
|
14
16
|
module.exports = class extends BaseGenerator {
|
|
15
17
|
async prompting() {
|
|
@@ -54,32 +56,11 @@ module.exports = class extends BaseGenerator {
|
|
|
54
56
|
validate: required && validate.confluenceUrl,
|
|
55
57
|
},
|
|
56
58
|
{
|
|
57
|
-
when: (response) => response.
|
|
58
|
-
type: '
|
|
59
|
-
name: '
|
|
60
|
-
message: 'Please
|
|
61
|
-
|
|
62
|
-
},
|
|
63
|
-
{
|
|
64
|
-
when: (response) => response.alertResource === 'cloud_sql',
|
|
65
|
-
type: 'input',
|
|
66
|
-
name: 'databaseId',
|
|
67
|
-
message: 'Please provide the "database id"',
|
|
68
|
-
validate: required && validate.databaseId,
|
|
69
|
-
},
|
|
70
|
-
{
|
|
71
|
-
when: (response) => response.alertResource === 'memorystore',
|
|
72
|
-
type: 'input',
|
|
73
|
-
name: 'instanceId',
|
|
74
|
-
message: 'Please provide the "instance id"',
|
|
75
|
-
validate: required && validate.instanceID,
|
|
76
|
-
},
|
|
77
|
-
{
|
|
78
|
-
when: (response) => response.alertResource === 'pub_sub',
|
|
79
|
-
type: 'input',
|
|
80
|
-
name: 'subscriptionId',
|
|
81
|
-
message: 'Please provide the "subscription id"',
|
|
82
|
-
validate: required && validate.pubSubSubscription,
|
|
59
|
+
when: (response) => response.monitoringResource === 'alerts',
|
|
60
|
+
type: 'list',
|
|
61
|
+
name: 'projectId',
|
|
62
|
+
message: 'Please select clan project id',
|
|
63
|
+
choices: [`${projectId}`],
|
|
83
64
|
},
|
|
84
65
|
{
|
|
85
66
|
when: (response) => response.monitoringResource === 'uptime-checks',
|
|
@@ -165,7 +146,6 @@ module.exports = class extends BaseGenerator {
|
|
|
165
146
|
const yamlPath = `${resourceDir}/alerts.yaml`;
|
|
166
147
|
|
|
167
148
|
copyTemplate('alerts', resourceDir, yamlPath);
|
|
168
|
-
|
|
169
149
|
const oldYaml = yaml.load(fs.readFileSync(yamlPath, 'utf8')) || [];
|
|
170
150
|
const newYaml = await handleAlerts(oldYaml, alertTemplates, this.answers);
|
|
171
151
|
|
|
@@ -8,6 +8,7 @@ cloud_run:
|
|
|
8
8
|
resource.type="knative_revision"
|
|
9
9
|
resource.labels.service_name="<%-serviceName%>"
|
|
10
10
|
metric.type="knative.dev/serving/revision/request_latencies"
|
|
11
|
+
resource.labels.project_id="<%-projectId%>"
|
|
11
12
|
threshold_value: 1000
|
|
12
13
|
duration: 300s
|
|
13
14
|
aggregations:
|
|
@@ -17,147 +18,210 @@ cloud_run:
|
|
|
17
18
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
18
19
|
cloud_scheduler:
|
|
19
20
|
failed_job:
|
|
20
|
-
display_name: "[P4] <%-systemName%> - Cloud Scheduler |
|
|
21
|
+
display_name: "[P4] <%-systemName%> - Cloud Scheduler | Job Failed"
|
|
21
22
|
conditions:
|
|
22
23
|
- display_name: Cloud Scheduler Job - Log entries with SEVERITY=Error exceed threshold
|
|
23
24
|
condition_threshold:
|
|
24
25
|
filter: |
|
|
25
26
|
resource.type="cloud_scheduler_job"
|
|
26
|
-
resource.labels.job_id="<%-jobId%>"
|
|
27
27
|
metric.type="logging.googleapis.com/log_entry_count"
|
|
28
28
|
metric.labels.severity="ERROR"
|
|
29
|
+
resource.labels.project_id="<%-projectId%>"
|
|
29
30
|
threshold_value: 1
|
|
30
31
|
aggregations:
|
|
31
32
|
- alignment_period: 60s
|
|
32
33
|
per_series_aligner: ALIGN_COUNT
|
|
34
|
+
group_by_fields: ["resource.label.job_id"]
|
|
33
35
|
documentation:
|
|
34
36
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
35
37
|
cloud_sql:
|
|
36
38
|
cpu_over_65:
|
|
37
|
-
display_name: "[P3] <%-systemName%> - CloudSQL |
|
|
39
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | CPU over 65%"
|
|
38
40
|
conditions:
|
|
39
41
|
- display_name: Cloud SQL Database - CPU utilization above 65% over 5 min
|
|
40
42
|
condition_threshold:
|
|
41
43
|
filter: |
|
|
42
44
|
resource.type="cloudsql_database"
|
|
43
|
-
resource.labels.database_id="<%-databaseId%>"
|
|
44
45
|
metric.type="cloudsql.googleapis.com/database/cpu/utilization"
|
|
46
|
+
resource.labels.project_id="<%-projectId%>"
|
|
45
47
|
threshold_value: 0.65
|
|
46
48
|
duration: 300s
|
|
47
49
|
aggregations:
|
|
48
50
|
- alignment_period: 60s
|
|
49
51
|
per_series_aligner: ALIGN_MAX
|
|
52
|
+
group_by_fields: ["resource.label.database_id"]
|
|
50
53
|
documentation:
|
|
51
54
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
52
55
|
cpu_over_85:
|
|
53
|
-
display_name: "[P3] <%-systemName%> - CloudSQL |
|
|
56
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | CPU over 85%"
|
|
54
57
|
conditions:
|
|
55
58
|
- display_name: "Cloud SQL Database - CPU-usage above 85% over 1 min"
|
|
56
59
|
condition_threshold:
|
|
57
60
|
filter: |
|
|
58
61
|
resource.type="cloudsql_database"
|
|
59
|
-
resource.labels.database_id="<%-databaseId%>"
|
|
60
62
|
metric.type="cloudsql.googleapis.com/database/cpu/utilization"
|
|
63
|
+
resource.labels.project_id="<%-projectId%>"
|
|
61
64
|
threshold_value: 0.85
|
|
62
65
|
duration: 60s
|
|
63
66
|
aggregations:
|
|
64
67
|
- alignment_period: 60s
|
|
65
68
|
per_series_aligner: ALIGN_MAX
|
|
69
|
+
group_by_fields: ["resource.label.database_id"]
|
|
66
70
|
documentation:
|
|
67
71
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
68
72
|
cpu_over_90:
|
|
69
|
-
display_name: "[P3] <%-systemName%> - CloudSQL |
|
|
73
|
+
display_name: "[P3] <%-systemName%> - CloudSQL | CPU over 90%"
|
|
70
74
|
conditions:
|
|
71
75
|
- display_name: Cloud SQL Database - CPU-usage above 90%
|
|
72
76
|
condition_threshold:
|
|
73
77
|
filter: |
|
|
74
78
|
resource.type="cloudsql_database"
|
|
75
|
-
resource.labels.database_id="<%-databaseId%>"
|
|
76
79
|
metric.type="cloudsql.googleapis.com/database/cpu/utilization"
|
|
80
|
+
resource.labels.project_id="<%-projectId%>"
|
|
77
81
|
threshold_value: 0.9
|
|
78
82
|
aggregations:
|
|
79
83
|
- alignment_period: 60s
|
|
80
84
|
per_series_aligner: ALIGN_MAX
|
|
85
|
+
group_by_fields: ["resource.label.database_id"]
|
|
81
86
|
documentation:
|
|
82
87
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
83
88
|
query_over_1s:
|
|
84
|
-
display_name: "[P4] <%-systemName%> - CloudSQL |
|
|
89
|
+
display_name: "[P4] <%-systemName%> - CloudSQL | Query resolve time"
|
|
85
90
|
conditions:
|
|
86
91
|
- display_name: Cloud SQL Instance Database - Per query execution times above 1000 ms
|
|
87
92
|
condition_threshold:
|
|
88
93
|
filter: |
|
|
89
94
|
resource.type="cloudsql_instance_database"
|
|
90
|
-
resource.labels.resource_id="<%-databaseId%>"
|
|
91
95
|
metric.type="cloudsql.googleapis.com/database/postgresql/insights/perquery/execution_time"
|
|
96
|
+
resource.labels.project_id="<%-projectId%>"
|
|
92
97
|
threshold_value: 1000000
|
|
93
98
|
aggregations:
|
|
94
99
|
- alignment_period: 60s
|
|
95
100
|
per_series_aligner: ALIGN_DELTA
|
|
101
|
+
group_by_fields: ["resource.label.resource_id"]
|
|
96
102
|
documentation:
|
|
97
103
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
98
104
|
memorystore:
|
|
99
105
|
memory_over_50:
|
|
100
|
-
display_name: "[P4] <%-systemName%> - Memorystore |
|
|
106
|
+
display_name: "[P4] <%-systemName%> - Memorystore | Memory over 50%"
|
|
101
107
|
conditions:
|
|
102
108
|
- display_name: Memorystore Redis Instance - Memory Usage above 50% over 5 min
|
|
103
109
|
condition_threshold:
|
|
104
110
|
filter: |
|
|
105
111
|
resource.type="redis_instance"
|
|
106
|
-
resource.labels.instance_id="<%-instanceId%>"
|
|
107
112
|
metric.type="redis.googleapis.com/stats/memory/usage_ratio"
|
|
113
|
+
resource.labels.project_id="<%-projectId%>"
|
|
108
114
|
threshold_value: 0.5
|
|
109
115
|
duration: 300s
|
|
110
116
|
aggregations:
|
|
111
117
|
- alignment_period: 60s
|
|
112
118
|
per_series_aligner: ALIGN_MAX
|
|
119
|
+
group_by_fields: ["resource.label.instance_id"]
|
|
113
120
|
documentation:
|
|
114
121
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
115
122
|
memory_over_75:
|
|
116
|
-
display_name: "[P4] <%-systemName%> - Memorystore |
|
|
123
|
+
display_name: "[P4] <%-systemName%> - Memorystore | Memory over 75%"
|
|
117
124
|
conditions:
|
|
118
125
|
- display_name: Memorystore Redis Instance - Memory Usage above 75% for 5min
|
|
119
126
|
condition_threshold:
|
|
120
127
|
filter: |
|
|
121
128
|
resource.type="redis_instance"
|
|
122
|
-
resource.labels.instance_id="<%-instanceId%>"
|
|
123
129
|
metric.type="redis.googleapis.com/stats/memory/usage_ratio"
|
|
130
|
+
resource.labels.project_id="<%-projectId%>"
|
|
124
131
|
threshold_value: 0.75
|
|
125
132
|
duration: 300s
|
|
126
133
|
aggregations:
|
|
127
134
|
- alignment_period: 60s
|
|
128
135
|
per_series_aligner: ALIGN_MAX
|
|
136
|
+
group_by_fields: ["resource.label.instance_id"]
|
|
129
137
|
documentation:
|
|
130
138
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
131
139
|
memory_over_90:
|
|
132
|
-
display_name: "[P2] <%-systemName%> - Memorystore |
|
|
140
|
+
display_name: "[P2] <%-systemName%> - Memorystore | Memory over 90%"
|
|
133
141
|
conditions:
|
|
134
142
|
- display_name: Memorystore Redis Instance - Memory Usage above 90%
|
|
135
143
|
condition_threshold:
|
|
136
144
|
filter: |
|
|
137
145
|
resource.type="redis_instance"
|
|
138
|
-
resource.labels.instance_id="<%-instanceId%>"
|
|
139
146
|
metric.type="redis.googleapis.com/stats/memory/usage_ratio"
|
|
147
|
+
resource.labels.project_id="<%-projectId%>"
|
|
140
148
|
threshold_value: 0.90
|
|
141
149
|
duration: 60s
|
|
142
150
|
aggregations:
|
|
143
151
|
- alignment_period: 60s
|
|
144
152
|
per_series_aligner: ALIGN_MAX
|
|
153
|
+
group_by_fields: ["resource.label.instance_id"]
|
|
145
154
|
documentation:
|
|
146
155
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
147
156
|
pub_sub:
|
|
148
157
|
unacknowledged_messages:
|
|
149
|
-
display_name: "[
|
|
158
|
+
display_name: "[P3] <%-systemName%> - Pub/Sub | Undelivered message(s)"
|
|
150
159
|
conditions:
|
|
151
160
|
- display_name: Cloud Pub/Sub Subscription - Undelivered messages above 1 for 5 min
|
|
152
161
|
condition_threshold:
|
|
153
162
|
filter: |
|
|
154
163
|
resource.type="pubsub_subscription"
|
|
155
|
-
resource.labels.subscription_id="<%-subscriptionId%>"
|
|
156
164
|
metric.type="pubsub.googleapis.com/subscription/num_undelivered_messages"
|
|
165
|
+
resource.labels.project_id="<%-projectId%>"
|
|
157
166
|
threshold_value: 1
|
|
158
167
|
duration: 300s
|
|
159
168
|
aggregations:
|
|
160
169
|
- alignment_period: 60s
|
|
161
170
|
per_series_aligner: ALIGN_MEAN
|
|
171
|
+
group_by_fields: ["resource.label.subscription_id"]
|
|
172
|
+
documentation:
|
|
173
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
174
|
+
messages_in_dlq:
|
|
175
|
+
display_name: "[P3] <%-systemName%> - Pub/Sub | Message(s) in DLQ"
|
|
176
|
+
conditions:
|
|
177
|
+
- display_name: Cloud Pub/Sub Subscription - Number of undelivered message(s) forwarded to DLQ
|
|
178
|
+
condition_threshold:
|
|
179
|
+
filter: |
|
|
180
|
+
resource.type="pubsub_subscription"
|
|
181
|
+
metric.type="pubsub.googleapis.com/subscription/dead_letter_message_count"
|
|
182
|
+
resource.labels.project_id="<%-projectId%>"
|
|
183
|
+
threshold_value: 0
|
|
184
|
+
duration: 60s
|
|
185
|
+
aggregations:
|
|
186
|
+
- alignment_period: 60s
|
|
187
|
+
per_series_aligner: ALIGN_COUNT
|
|
188
|
+
group_by_fields: ["resource.label.subscription_id"]
|
|
189
|
+
documentation:
|
|
190
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
191
|
+
latency:
|
|
192
|
+
display_name: "[P3] <%-systemName%> - Pub/Sub | Response latency distribution"
|
|
193
|
+
conditions:
|
|
194
|
+
- display_name: Cloud Pub/Sub Subscription - Latency above 3s
|
|
195
|
+
condition_threshold:
|
|
196
|
+
filter: |
|
|
197
|
+
resource.type="pubsub_subscription"
|
|
198
|
+
metric.type="pubsub.googleapis.com/subscription/push_request_latencies"
|
|
199
|
+
resource.labels.project_id="<%-projectId%>"
|
|
200
|
+
threshold_value: 3000000
|
|
201
|
+
duration: 120s
|
|
202
|
+
aggregations:
|
|
203
|
+
- alignment_period: 60s
|
|
204
|
+
per_series_aligner: ALIGN_DELTA
|
|
205
|
+
cross_series_reducer: REDUCE_PERCENTILE_95
|
|
206
|
+
group_by_fields: ["resource.label.subscription_id"]
|
|
207
|
+
documentation:
|
|
208
|
+
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|
|
209
|
+
cloud_function:
|
|
210
|
+
failed_execution:
|
|
211
|
+
display_name: "[P2] <%-systemName%> - Cloud Function | Failed job execution"
|
|
212
|
+
conditions:
|
|
213
|
+
- display_name: Cloud Function - Execution error count
|
|
214
|
+
condition_threshold:
|
|
215
|
+
filter: |
|
|
216
|
+
resource.type="cloud_function"
|
|
217
|
+
metric.type="cloudfunctions.googleapis.com/function/execution_count"
|
|
218
|
+
metric.label.status!="ok"
|
|
219
|
+
resource.labels.project_id="<%-projectId%>"
|
|
220
|
+
threshold_value: 0
|
|
221
|
+
duration: 60s
|
|
222
|
+
aggregations:
|
|
223
|
+
- alignment_period: 60s
|
|
224
|
+
per_series_aligner: ALIGN_COUNT
|
|
225
|
+
group_by_fields: ["metric.label.status", "resource.label.function_name"]
|
|
162
226
|
documentation:
|
|
163
227
|
content: <% if (runbookLink) { %>[Runbook](<%-runbookLink%>)<%} else { %> <% } %>
|