@checkstack/healthcheck-backend 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # @checkstack/healthcheck-backend
2
2
 
3
+ ## 0.6.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 11d2679: Add ability to pause health check configurations globally. When paused, health checks continue to be scheduled but execution is skipped for all systems using that configuration. Users with manage access can pause/resume from the Health Checks config page.
8
+ - cce5453: Add notification suppression for incidents
9
+
10
+ - Added `suppressNotifications` field to incidents, allowing active incidents to optionally suppress health check notifications
11
+ - When enabled, health status change notifications will not be sent for affected systems while the incident is active (not resolved)
12
+ - Mirrors the existing maintenance notification suppression pattern
13
+ - Added toggle UI in the IncidentEditor dialog
14
+ - Added `hasActiveIncidentWithSuppression` RPC endpoint for service-to-service queries
15
+
16
+ ### Patch Changes
17
+
18
+ - Updated dependencies [11d2679]
19
+ - Updated dependencies [cce5453]
20
+ - @checkstack/healthcheck-common@0.6.0
21
+ - @checkstack/incident-common@0.4.0
22
+
3
23
  ## 0.5.0
4
24
 
5
25
  ### Minor Changes
@@ -0,0 +1 @@
1
+ ALTER TABLE "health_check_configurations" ADD COLUMN "paused" boolean DEFAULT false NOT NULL;
@@ -0,0 +1,420 @@
1
+ {
2
+ "id": "86171dc8-efcc-4246-a95a-665fdefb1a1f",
3
+ "prevId": "bb50b71f-3f81-4cb2-aac6-7e7564060fa1",
4
+ "version": "7",
5
+ "dialect": "postgresql",
6
+ "tables": {
7
+ "public.health_check_aggregates": {
8
+ "name": "health_check_aggregates",
9
+ "schema": "",
10
+ "columns": {
11
+ "id": {
12
+ "name": "id",
13
+ "type": "uuid",
14
+ "primaryKey": true,
15
+ "notNull": true,
16
+ "default": "gen_random_uuid()"
17
+ },
18
+ "configuration_id": {
19
+ "name": "configuration_id",
20
+ "type": "uuid",
21
+ "primaryKey": false,
22
+ "notNull": true
23
+ },
24
+ "system_id": {
25
+ "name": "system_id",
26
+ "type": "text",
27
+ "primaryKey": false,
28
+ "notNull": true
29
+ },
30
+ "bucket_start": {
31
+ "name": "bucket_start",
32
+ "type": "timestamp",
33
+ "primaryKey": false,
34
+ "notNull": true
35
+ },
36
+ "bucket_size": {
37
+ "name": "bucket_size",
38
+ "type": "bucket_size",
39
+ "typeSchema": "public",
40
+ "primaryKey": false,
41
+ "notNull": true
42
+ },
43
+ "run_count": {
44
+ "name": "run_count",
45
+ "type": "integer",
46
+ "primaryKey": false,
47
+ "notNull": true
48
+ },
49
+ "healthy_count": {
50
+ "name": "healthy_count",
51
+ "type": "integer",
52
+ "primaryKey": false,
53
+ "notNull": true
54
+ },
55
+ "degraded_count": {
56
+ "name": "degraded_count",
57
+ "type": "integer",
58
+ "primaryKey": false,
59
+ "notNull": true
60
+ },
61
+ "unhealthy_count": {
62
+ "name": "unhealthy_count",
63
+ "type": "integer",
64
+ "primaryKey": false,
65
+ "notNull": true
66
+ },
67
+ "latency_sum_ms": {
68
+ "name": "latency_sum_ms",
69
+ "type": "integer",
70
+ "primaryKey": false,
71
+ "notNull": false
72
+ },
73
+ "avg_latency_ms": {
74
+ "name": "avg_latency_ms",
75
+ "type": "integer",
76
+ "primaryKey": false,
77
+ "notNull": false
78
+ },
79
+ "min_latency_ms": {
80
+ "name": "min_latency_ms",
81
+ "type": "integer",
82
+ "primaryKey": false,
83
+ "notNull": false
84
+ },
85
+ "max_latency_ms": {
86
+ "name": "max_latency_ms",
87
+ "type": "integer",
88
+ "primaryKey": false,
89
+ "notNull": false
90
+ },
91
+ "p95_latency_ms": {
92
+ "name": "p95_latency_ms",
93
+ "type": "integer",
94
+ "primaryKey": false,
95
+ "notNull": false
96
+ },
97
+ "aggregated_result": {
98
+ "name": "aggregated_result",
99
+ "type": "jsonb",
100
+ "primaryKey": false,
101
+ "notNull": false
102
+ }
103
+ },
104
+ "indexes": {
105
+ "health_check_aggregates_bucket_unique": {
106
+ "name": "health_check_aggregates_bucket_unique",
107
+ "columns": [
108
+ {
109
+ "expression": "configuration_id",
110
+ "isExpression": false,
111
+ "asc": true,
112
+ "nulls": "last"
113
+ },
114
+ {
115
+ "expression": "system_id",
116
+ "isExpression": false,
117
+ "asc": true,
118
+ "nulls": "last"
119
+ },
120
+ {
121
+ "expression": "bucket_start",
122
+ "isExpression": false,
123
+ "asc": true,
124
+ "nulls": "last"
125
+ },
126
+ {
127
+ "expression": "bucket_size",
128
+ "isExpression": false,
129
+ "asc": true,
130
+ "nulls": "last"
131
+ }
132
+ ],
133
+ "isUnique": true,
134
+ "concurrently": false,
135
+ "method": "btree",
136
+ "with": {}
137
+ }
138
+ },
139
+ "foreignKeys": {
140
+ "health_check_aggregates_configuration_id_health_check_configurations_id_fk": {
141
+ "name": "health_check_aggregates_configuration_id_health_check_configurations_id_fk",
142
+ "tableFrom": "health_check_aggregates",
143
+ "tableTo": "health_check_configurations",
144
+ "columnsFrom": [
145
+ "configuration_id"
146
+ ],
147
+ "columnsTo": [
148
+ "id"
149
+ ],
150
+ "onDelete": "cascade",
151
+ "onUpdate": "no action"
152
+ }
153
+ },
154
+ "compositePrimaryKeys": {},
155
+ "uniqueConstraints": {},
156
+ "policies": {},
157
+ "checkConstraints": {},
158
+ "isRLSEnabled": false
159
+ },
160
+ "public.health_check_configurations": {
161
+ "name": "health_check_configurations",
162
+ "schema": "",
163
+ "columns": {
164
+ "id": {
165
+ "name": "id",
166
+ "type": "uuid",
167
+ "primaryKey": true,
168
+ "notNull": true,
169
+ "default": "gen_random_uuid()"
170
+ },
171
+ "name": {
172
+ "name": "name",
173
+ "type": "text",
174
+ "primaryKey": false,
175
+ "notNull": true
176
+ },
177
+ "strategy_id": {
178
+ "name": "strategy_id",
179
+ "type": "text",
180
+ "primaryKey": false,
181
+ "notNull": true
182
+ },
183
+ "config": {
184
+ "name": "config",
185
+ "type": "jsonb",
186
+ "primaryKey": false,
187
+ "notNull": true
188
+ },
189
+ "collectors": {
190
+ "name": "collectors",
191
+ "type": "jsonb",
192
+ "primaryKey": false,
193
+ "notNull": false
194
+ },
195
+ "interval_seconds": {
196
+ "name": "interval_seconds",
197
+ "type": "integer",
198
+ "primaryKey": false,
199
+ "notNull": true
200
+ },
201
+ "is_template": {
202
+ "name": "is_template",
203
+ "type": "boolean",
204
+ "primaryKey": false,
205
+ "notNull": false,
206
+ "default": false
207
+ },
208
+ "paused": {
209
+ "name": "paused",
210
+ "type": "boolean",
211
+ "primaryKey": false,
212
+ "notNull": true,
213
+ "default": false
214
+ },
215
+ "created_at": {
216
+ "name": "created_at",
217
+ "type": "timestamp",
218
+ "primaryKey": false,
219
+ "notNull": true,
220
+ "default": "now()"
221
+ },
222
+ "updated_at": {
223
+ "name": "updated_at",
224
+ "type": "timestamp",
225
+ "primaryKey": false,
226
+ "notNull": true,
227
+ "default": "now()"
228
+ }
229
+ },
230
+ "indexes": {},
231
+ "foreignKeys": {},
232
+ "compositePrimaryKeys": {},
233
+ "uniqueConstraints": {},
234
+ "policies": {},
235
+ "checkConstraints": {},
236
+ "isRLSEnabled": false
237
+ },
238
+ "public.health_check_runs": {
239
+ "name": "health_check_runs",
240
+ "schema": "",
241
+ "columns": {
242
+ "id": {
243
+ "name": "id",
244
+ "type": "uuid",
245
+ "primaryKey": true,
246
+ "notNull": true,
247
+ "default": "gen_random_uuid()"
248
+ },
249
+ "configuration_id": {
250
+ "name": "configuration_id",
251
+ "type": "uuid",
252
+ "primaryKey": false,
253
+ "notNull": true
254
+ },
255
+ "system_id": {
256
+ "name": "system_id",
257
+ "type": "text",
258
+ "primaryKey": false,
259
+ "notNull": true
260
+ },
261
+ "status": {
262
+ "name": "status",
263
+ "type": "health_check_status",
264
+ "typeSchema": "public",
265
+ "primaryKey": false,
266
+ "notNull": true
267
+ },
268
+ "latency_ms": {
269
+ "name": "latency_ms",
270
+ "type": "integer",
271
+ "primaryKey": false,
272
+ "notNull": false
273
+ },
274
+ "result": {
275
+ "name": "result",
276
+ "type": "jsonb",
277
+ "primaryKey": false,
278
+ "notNull": false
279
+ },
280
+ "timestamp": {
281
+ "name": "timestamp",
282
+ "type": "timestamp",
283
+ "primaryKey": false,
284
+ "notNull": true,
285
+ "default": "now()"
286
+ }
287
+ },
288
+ "indexes": {},
289
+ "foreignKeys": {
290
+ "health_check_runs_configuration_id_health_check_configurations_id_fk": {
291
+ "name": "health_check_runs_configuration_id_health_check_configurations_id_fk",
292
+ "tableFrom": "health_check_runs",
293
+ "tableTo": "health_check_configurations",
294
+ "columnsFrom": [
295
+ "configuration_id"
296
+ ],
297
+ "columnsTo": [
298
+ "id"
299
+ ],
300
+ "onDelete": "cascade",
301
+ "onUpdate": "no action"
302
+ }
303
+ },
304
+ "compositePrimaryKeys": {},
305
+ "uniqueConstraints": {},
306
+ "policies": {},
307
+ "checkConstraints": {},
308
+ "isRLSEnabled": false
309
+ },
310
+ "public.system_health_checks": {
311
+ "name": "system_health_checks",
312
+ "schema": "",
313
+ "columns": {
314
+ "system_id": {
315
+ "name": "system_id",
316
+ "type": "text",
317
+ "primaryKey": false,
318
+ "notNull": true
319
+ },
320
+ "configuration_id": {
321
+ "name": "configuration_id",
322
+ "type": "uuid",
323
+ "primaryKey": false,
324
+ "notNull": true
325
+ },
326
+ "enabled": {
327
+ "name": "enabled",
328
+ "type": "boolean",
329
+ "primaryKey": false,
330
+ "notNull": true,
331
+ "default": true
332
+ },
333
+ "state_thresholds": {
334
+ "name": "state_thresholds",
335
+ "type": "jsonb",
336
+ "primaryKey": false,
337
+ "notNull": false
338
+ },
339
+ "retention_config": {
340
+ "name": "retention_config",
341
+ "type": "jsonb",
342
+ "primaryKey": false,
343
+ "notNull": false
344
+ },
345
+ "created_at": {
346
+ "name": "created_at",
347
+ "type": "timestamp",
348
+ "primaryKey": false,
349
+ "notNull": true,
350
+ "default": "now()"
351
+ },
352
+ "updated_at": {
353
+ "name": "updated_at",
354
+ "type": "timestamp",
355
+ "primaryKey": false,
356
+ "notNull": true,
357
+ "default": "now()"
358
+ }
359
+ },
360
+ "indexes": {},
361
+ "foreignKeys": {
362
+ "system_health_checks_configuration_id_health_check_configurations_id_fk": {
363
+ "name": "system_health_checks_configuration_id_health_check_configurations_id_fk",
364
+ "tableFrom": "system_health_checks",
365
+ "tableTo": "health_check_configurations",
366
+ "columnsFrom": [
367
+ "configuration_id"
368
+ ],
369
+ "columnsTo": [
370
+ "id"
371
+ ],
372
+ "onDelete": "cascade",
373
+ "onUpdate": "no action"
374
+ }
375
+ },
376
+ "compositePrimaryKeys": {
377
+ "system_health_checks_system_id_configuration_id_pk": {
378
+ "name": "system_health_checks_system_id_configuration_id_pk",
379
+ "columns": [
380
+ "system_id",
381
+ "configuration_id"
382
+ ]
383
+ }
384
+ },
385
+ "uniqueConstraints": {},
386
+ "policies": {},
387
+ "checkConstraints": {},
388
+ "isRLSEnabled": false
389
+ }
390
+ },
391
+ "enums": {
392
+ "public.bucket_size": {
393
+ "name": "bucket_size",
394
+ "schema": "public",
395
+ "values": [
396
+ "hourly",
397
+ "daily"
398
+ ]
399
+ },
400
+ "public.health_check_status": {
401
+ "name": "health_check_status",
402
+ "schema": "public",
403
+ "values": [
404
+ "healthy",
405
+ "unhealthy",
406
+ "degraded"
407
+ ]
408
+ }
409
+ },
410
+ "schemas": {},
411
+ "sequences": {},
412
+ "roles": {},
413
+ "policies": {},
414
+ "views": {},
415
+ "_meta": {
416
+ "columns": {},
417
+ "schemas": {},
418
+ "tables": {}
419
+ }
420
+ }
@@ -57,6 +57,13 @@
57
57
  "when": 1768921130785,
58
58
  "tag": "0007_tense_misty_knight",
59
59
  "breakpoints": true
60
+ },
61
+ {
62
+ "idx": 8,
63
+ "version": "7",
64
+ "when": 1768934529918,
65
+ "tag": "0008_broad_black_tom",
66
+ "breakpoints": true
60
67
  }
61
68
  ]
62
69
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@checkstack/healthcheck-backend",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "type": "module",
5
5
  "main": "src/index.ts",
6
6
  "scripts": {
@@ -16,6 +16,7 @@
16
16
  "@checkstack/healthcheck-common": "workspace:*",
17
17
  "@checkstack/integration-backend": "workspace:*",
18
18
  "@checkstack/maintenance-common": "workspace:*",
19
+ "@checkstack/incident-common": "workspace:*",
19
20
  "@checkstack/queue-api": "workspace:*",
20
21
  "@checkstack/signal-common": "workspace:*",
21
22
  "@checkstack/command-backend": "workspace:*",
package/src/index.ts CHANGED
@@ -24,6 +24,7 @@ import { HealthCheckService } from "./service";
24
24
  import { catalogHooks } from "@checkstack/catalog-backend";
25
25
  import { CatalogApi } from "@checkstack/catalog-common";
26
26
  import { MaintenanceApi } from "@checkstack/maintenance-common";
27
+ import { IncidentApi } from "@checkstack/incident-common";
27
28
  import { healthCheckHooks } from "./hooks";
28
29
  import { registerSearchProvider } from "@checkstack/command-backend";
29
30
  import { resolveRoute } from "@checkstack/common";
@@ -117,6 +118,9 @@ export default createBackendPlugin({
117
118
  // Create maintenance client for notification suppression checks
118
119
  const maintenanceClient = rpcClient.forPlugin(MaintenanceApi);
119
120
 
121
+ // Create incident client for notification suppression checks
122
+ const incidentClient = rpcClient.forPlugin(IncidentApi);
123
+
120
124
  // Setup queue-based health check worker
121
125
  await setupHealthCheckWorker({
122
126
  db: database,
@@ -127,6 +131,7 @@ export default createBackendPlugin({
127
131
  signalService,
128
132
  catalogClient,
129
133
  maintenanceClient,
134
+ incidentClient,
130
135
  getEmitHook: () => storedEmitHook,
131
136
  });
132
137
 
@@ -92,6 +92,23 @@ const createMockMaintenanceClient = () => ({
92
92
  deleteMaintenance: mock(async () => ({ success: true })),
93
93
  });
94
94
 
95
+ // Helper to create mock incident client for notification suppression checks
96
+ const createMockIncidentClient = () => ({
97
+ hasActiveIncidentWithSuppression: mock(async () => ({
98
+ suppressed: false,
99
+ })),
100
+ // Other methods not used in queue-executor
101
+ listIncidents: mock(async () => ({ incidents: [] })),
102
+ getIncident: mock(async () => null),
103
+ getIncidentsForSystem: mock(async () => []),
104
+ getBulkIncidentsForSystems: mock(async () => ({ incidents: {} })),
105
+ createIncident: mock(async () => ({})),
106
+ updateIncident: mock(async () => ({})),
107
+ addUpdate: mock(async () => ({})),
108
+ resolveIncident: mock(async () => ({})),
109
+ deleteIncident: mock(async () => ({ success: true })),
110
+ });
111
+
95
112
  describe("Queue-Based Health Check Executor", () => {
96
113
  describe("scheduleHealthCheck", () => {
97
114
  it("should enqueue a health check with delay and deterministic jobId", async () => {
@@ -145,6 +162,7 @@ describe("Queue-Based Health Check Executor", () => {
145
162
  const mockQueueManager = createMockQueueManager();
146
163
  const mockCatalogClient = createMockCatalogClient();
147
164
  const mockMaintenanceClient = createMockMaintenanceClient();
165
+ const mockIncidentClient = createMockIncidentClient();
148
166
 
149
167
  await setupHealthCheckWorker({
150
168
  db: mockDb as unknown as Parameters<
@@ -164,6 +182,9 @@ describe("Queue-Based Health Check Executor", () => {
164
182
  maintenanceClient: mockMaintenanceClient as unknown as Parameters<
165
183
  typeof setupHealthCheckWorker
166
184
  >[0]["maintenanceClient"],
185
+ incidentClient: mockIncidentClient as unknown as Parameters<
186
+ typeof setupHealthCheckWorker
187
+ >[0]["incidentClient"],
167
188
  getEmitHook: () => undefined,
168
189
  });
169
190
 
@@ -264,4 +285,116 @@ describe("Queue-Based Health Check Executor", () => {
264
285
  );
265
286
  });
266
287
  });
288
+
289
+ describe("executeHealthCheckJob - paused behavior", () => {
290
+ it("should skip execution when configuration is paused", async () => {
291
+ const mockDb = createMockDb();
292
+ const mockRegistry = createMockRegistry();
293
+ const mockLogger = createMockLogger();
294
+ const mockQueueManager = createMockQueueManager();
295
+ const mockCatalogClient = createMockCatalogClient();
296
+ const mockMaintenanceClient = createMockMaintenanceClient();
297
+ const mockIncidentClient = createMockIncidentClient();
298
+ const mockSignalService = createMockSignalService();
299
+
300
+ // Mock the database to return a paused configuration
301
+ let selectCallCount = 0;
302
+ (mockDb.select as any) = mock(() => {
303
+ selectCallCount++;
304
+ if (selectCallCount === 1) {
305
+ // First call: get previous system health status
306
+ return {
307
+ from: mock(() => ({
308
+ innerJoin: mock(() => ({
309
+ where: mock(() => Promise.resolve([])),
310
+ })),
311
+ })),
312
+ };
313
+ } else if (selectCallCount === 2) {
314
+ // Second call: fetch configuration (return paused config)
315
+ return {
316
+ from: mock(() => ({
317
+ innerJoin: mock(() => ({
318
+ where: mock(() =>
319
+ Promise.resolve([
320
+ {
321
+ configId: "config-1",
322
+ configName: "Test Check",
323
+ strategyId: "test-strategy",
324
+ config: {},
325
+ collectors: [],
326
+ interval: 30,
327
+ enabled: true,
328
+ paused: true, // Configuration is paused
329
+ },
330
+ ]),
331
+ ),
332
+ })),
333
+ })),
334
+ };
335
+ }
336
+ // Default
337
+ return {
338
+ from: mock(() => ({
339
+ innerJoin: mock(() => ({
340
+ where: mock(() => Promise.resolve([])),
341
+ })),
342
+ })),
343
+ };
344
+ });
345
+
346
+ // Setup worker and get handler
347
+ const queue =
348
+ mockQueueManager.getQueue<HealthCheckJobPayload>("health-checks");
349
+ let capturedHandler:
350
+ | ((job: { data: HealthCheckJobPayload }) => Promise<void>)
351
+ | undefined;
352
+ (queue.consume as any) = mock(
353
+ async (
354
+ handler: (job: { data: HealthCheckJobPayload }) => Promise<void>,
355
+ ) => {
356
+ capturedHandler = handler;
357
+ },
358
+ );
359
+
360
+ await setupHealthCheckWorker({
361
+ db: mockDb as unknown as Parameters<
362
+ typeof setupHealthCheckWorker
363
+ >[0]["db"],
364
+ registry: mockRegistry,
365
+ collectorRegistry:
366
+ createMockCollectorRegistry() as unknown as Parameters<
367
+ typeof setupHealthCheckWorker
368
+ >[0]["collectorRegistry"],
369
+ logger: mockLogger,
370
+ queueManager: mockQueueManager,
371
+ signalService: mockSignalService,
372
+ catalogClient: mockCatalogClient as unknown as Parameters<
373
+ typeof setupHealthCheckWorker
374
+ >[0]["catalogClient"],
375
+ maintenanceClient: mockMaintenanceClient as unknown as Parameters<
376
+ typeof setupHealthCheckWorker
377
+ >[0]["maintenanceClient"],
378
+ incidentClient: mockIncidentClient as unknown as Parameters<
379
+ typeof setupHealthCheckWorker
380
+ >[0]["incidentClient"],
381
+ getEmitHook: () => undefined,
382
+ });
383
+
384
+ // Execute a paused health check
385
+ if (capturedHandler) {
386
+ await capturedHandler({
387
+ data: { configId: "config-1", systemId: "system-1" },
388
+ });
389
+ }
390
+
391
+ // Verify execution was skipped with appropriate log
392
+ expect(mockLogger.debug).toHaveBeenCalledWith(
393
+ expect.stringContaining("is paused, skipping execution"),
394
+ );
395
+
396
+ // Verify no signal was broadcast (since execution was skipped)
397
+ expect(mockSignalService.getRecordedSignals()).toHaveLength(0);
398
+ });
399
+ });
267
400
  });
@@ -22,6 +22,7 @@ import {
22
22
  } from "@checkstack/healthcheck-common";
23
23
  import { CatalogApi, catalogRoutes } from "@checkstack/catalog-common";
24
24
  import { MaintenanceApi } from "@checkstack/maintenance-common";
25
+ import { IncidentApi } from "@checkstack/incident-common";
25
26
  import { resolveRoute, type InferClient } from "@checkstack/common";
26
27
  import { HealthCheckService } from "./service";
27
28
  import { healthCheckHooks } from "./hooks";
@@ -29,6 +30,7 @@ import { healthCheckHooks } from "./hooks";
29
30
  type Db = SafeDatabase<typeof schema>;
30
31
  type CatalogClient = InferClient<typeof CatalogApi>;
31
32
  type MaintenanceClient = InferClient<typeof MaintenanceApi>;
33
+ type IncidentClient = InferClient<typeof IncidentApi>;
32
34
 
33
35
  /**
34
36
  * Payload for health check queue jobs
@@ -90,7 +92,7 @@ export async function scheduleHealthCheck(props: {
90
92
 
91
93
  /**
92
94
  * Notify system subscribers about a health state change.
93
- * Skips notification if the system has active maintenance with suppression enabled.
95
+ * Skips notification if the system has active maintenance or incident with suppression enabled.
94
96
  */
95
97
  async function notifyStateChange(props: {
96
98
  systemId: string;
@@ -98,6 +100,7 @@ async function notifyStateChange(props: {
98
100
  newStatus: HealthCheckStatus;
99
101
  catalogClient: CatalogClient;
100
102
  maintenanceClient: MaintenanceClient;
103
+ incidentClient: IncidentClient;
101
104
  logger: Logger;
102
105
  }): Promise<void> {
103
106
  const {
@@ -106,6 +109,7 @@ async function notifyStateChange(props: {
106
109
  newStatus,
107
110
  catalogClient,
108
111
  maintenanceClient,
112
+ incidentClient,
109
113
  logger,
110
114
  } = props;
111
115
 
@@ -132,6 +136,24 @@ async function notifyStateChange(props: {
132
136
  );
133
137
  }
134
138
 
139
+ // Check if notifications should be suppressed due to active incident
140
+ try {
141
+ const { suppressed } =
142
+ await incidentClient.hasActiveIncidentWithSuppression({ systemId });
143
+ if (suppressed) {
144
+ logger.debug(
145
+ `Skipping notification for ${systemId}: active incident with suppression enabled`,
146
+ );
147
+ return;
148
+ }
149
+ } catch (error) {
150
+ // Log but continue with notification - suppression check failure shouldn't block notifications
151
+ logger.warn(
152
+ `Failed to check incident suppression for ${systemId}, proceeding with notification:`,
153
+ error,
154
+ );
155
+ }
156
+
135
157
  const isRecovery = newStatus === "healthy" && previousStatus !== "healthy";
136
158
  const isDegraded = newStatus === "degraded";
137
159
  const isUnhealthy = newStatus === "unhealthy";
@@ -196,6 +218,7 @@ async function executeHealthCheckJob(props: {
196
218
  signalService: SignalService;
197
219
  catalogClient: CatalogClient;
198
220
  maintenanceClient: MaintenanceClient;
221
+ incidentClient: IncidentClient;
199
222
  getEmitHook: () => EmitHookFn | undefined;
200
223
  }): Promise<void> {
201
224
  const {
@@ -207,6 +230,7 @@ async function executeHealthCheckJob(props: {
207
230
  signalService,
208
231
  catalogClient,
209
232
  maintenanceClient,
233
+ incidentClient,
210
234
  getEmitHook,
211
235
  } = props;
212
236
  const { configId, systemId } = payload;
@@ -229,6 +253,7 @@ async function executeHealthCheckJob(props: {
229
253
  collectors: healthCheckConfigurations.collectors,
230
254
  interval: healthCheckConfigurations.intervalSeconds,
231
255
  enabled: systemHealthChecks.enabled,
256
+ paused: healthCheckConfigurations.paused,
232
257
  })
233
258
  .from(systemHealthChecks)
234
259
  .innerJoin(
@@ -251,6 +276,14 @@ async function executeHealthCheckJob(props: {
251
276
  return;
252
277
  }
253
278
 
279
+ // If configuration is paused, skip execution (job continues to be scheduled)
280
+ if (configRow.paused) {
281
+ logger.debug(
282
+ `Health check ${configId} is paused, skipping execution for system ${systemId}`,
283
+ );
284
+ return;
285
+ }
286
+
254
287
  // Fetch system name for signal payload
255
288
  let systemName = systemId;
256
289
  try {
@@ -322,6 +355,7 @@ async function executeHealthCheckJob(props: {
322
355
  newStatus: newState.status,
323
356
  catalogClient,
324
357
  maintenanceClient,
358
+ incidentClient,
325
359
  logger,
326
360
  });
327
361
  }
@@ -470,6 +504,7 @@ async function executeHealthCheckJob(props: {
470
504
  newStatus: newState.status,
471
505
  catalogClient,
472
506
  maintenanceClient,
507
+ incidentClient,
473
508
  logger,
474
509
  });
475
510
 
@@ -564,6 +599,7 @@ async function executeHealthCheckJob(props: {
564
599
  newStatus: newState.status,
565
600
  catalogClient,
566
601
  maintenanceClient,
602
+ incidentClient,
567
603
  logger,
568
604
  });
569
605
 
@@ -619,6 +655,7 @@ export async function setupHealthCheckWorker(props: {
619
655
  signalService: SignalService;
620
656
  catalogClient: CatalogClient;
621
657
  maintenanceClient: MaintenanceClient;
658
+ incidentClient: IncidentClient;
622
659
  getEmitHook: () => EmitHookFn | undefined;
623
660
  }): Promise<void> {
624
661
  const {
@@ -630,6 +667,7 @@ export async function setupHealthCheckWorker(props: {
630
667
  signalService,
631
668
  catalogClient,
632
669
  maintenanceClient,
670
+ incidentClient,
633
671
  getEmitHook,
634
672
  } = props;
635
673
 
@@ -648,6 +686,7 @@ export async function setupHealthCheckWorker(props: {
648
686
  signalService,
649
687
  catalogClient,
650
688
  maintenanceClient,
689
+ incidentClient,
651
690
  getEmitHook,
652
691
  });
653
692
  },
package/src/router.ts CHANGED
@@ -105,6 +105,14 @@ export const createHealthCheckRouter = (
105
105
  await service.deleteConfiguration(input);
106
106
  }),
107
107
 
108
+ pauseConfiguration: os.pauseConfiguration.handler(async ({ input }) => {
109
+ await service.pauseConfiguration(input);
110
+ }),
111
+
112
+ resumeConfiguration: os.resumeConfiguration.handler(async ({ input }) => {
113
+ await service.resumeConfiguration(input);
114
+ }),
115
+
108
116
  getSystemConfigurations: os.getSystemConfigurations.handler(
109
117
  async ({ input }) => {
110
118
  return service.getSystemConfigurations(input);
package/src/schema.ts CHANGED
@@ -45,6 +45,8 @@ export const healthCheckConfigurations = pgTable(
45
45
  collectors: jsonb("collectors").$type<CollectorConfigEntry[]>(),
46
46
  intervalSeconds: integer("interval_seconds").notNull(),
47
47
  isTemplate: boolean("is_template").default(false),
48
+ /** Whether this configuration is paused (execution skipped for all systems) */
49
+ paused: boolean("paused").default(false).notNull(),
48
50
  createdAt: timestamp("created_at").defaultNow().notNull(),
49
51
  updatedAt: timestamp("updated_at").defaultNow().notNull(),
50
52
  },
@@ -0,0 +1,50 @@
1
+ import { describe, it, expect, mock, beforeEach } from "bun:test";
2
+ import { HealthCheckService } from "./service";
3
+ import { createMockDb } from "@checkstack/test-utils-backend";
4
+
5
+ describe("HealthCheckService - pause/resume", () => {
6
+ let mockDb: ReturnType<typeof createMockDb>;
7
+ let service: HealthCheckService;
8
+ let mockUpdate: ReturnType<typeof mock>;
9
+ let mockSet: ReturnType<typeof mock>;
10
+ let mockWhere: ReturnType<typeof mock>;
11
+
12
+ beforeEach(() => {
13
+ mockDb = createMockDb();
14
+ mockWhere = mock(() => Promise.resolve());
15
+ mockSet = mock(() => ({ where: mockWhere }));
16
+ mockUpdate = mock(() => ({ set: mockSet }));
17
+ (mockDb.update as any) = mockUpdate;
18
+ service = new HealthCheckService(mockDb as any);
19
+ });
20
+
21
+ describe("pauseConfiguration", () => {
22
+ it("should update paused to true and set updatedAt", async () => {
23
+ await service.pauseConfiguration("config-123");
24
+
25
+ expect(mockUpdate).toHaveBeenCalled();
26
+ expect(mockSet).toHaveBeenCalledWith(
27
+ expect.objectContaining({
28
+ paused: true,
29
+ updatedAt: expect.any(Date),
30
+ }),
31
+ );
32
+ expect(mockWhere).toHaveBeenCalled();
33
+ });
34
+ });
35
+
36
+ describe("resumeConfiguration", () => {
37
+ it("should update paused to false and set updatedAt", async () => {
38
+ await service.resumeConfiguration("config-456");
39
+
40
+ expect(mockUpdate).toHaveBeenCalled();
41
+ expect(mockSet).toHaveBeenCalledWith(
42
+ expect.objectContaining({
43
+ paused: false,
44
+ updatedAt: expect.any(Date),
45
+ }),
46
+ );
47
+ expect(mockWhere).toHaveBeenCalled();
48
+ });
49
+ });
50
+ });
package/src/service.ts CHANGED
@@ -105,6 +105,20 @@ export class HealthCheckService {
105
105
  .where(eq(healthCheckConfigurations.id, id));
106
106
  }
107
107
 
108
+ async pauseConfiguration(id: string): Promise<void> {
109
+ await this.db
110
+ .update(healthCheckConfigurations)
111
+ .set({ paused: true, updatedAt: new Date() })
112
+ .where(eq(healthCheckConfigurations.id, id));
113
+ }
114
+
115
+ async resumeConfiguration(id: string): Promise<void> {
116
+ await this.db
117
+ .update(healthCheckConfigurations)
118
+ .set({ paused: false, updatedAt: new Date() })
119
+ .where(eq(healthCheckConfigurations.id, id));
120
+ }
121
+
108
122
  async getConfigurations(): Promise<HealthCheckConfiguration[]> {
109
123
  const configs = await this.db.select().from(healthCheckConfigurations);
110
124
  return configs.map((c) => this.mapConfig(c));
@@ -884,6 +898,7 @@ export class HealthCheckService {
884
898
  config: row.config,
885
899
  collectors: row.collectors ?? undefined,
886
900
  intervalSeconds: row.intervalSeconds,
901
+ paused: row.paused,
887
902
  createdAt: row.createdAt,
888
903
  updatedAt: row.updatedAt,
889
904
  };