@checkstack/healthcheck-backend 0.16.4 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/package.json +4 -4
- package/src/healthcheck-gitops-kinds.test.ts +13 -10
- package/src/healthcheck-gitops-kinds.ts +17 -1
- package/src/index.ts +8 -0
- package/src/queue-executor.ts +11 -6
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
# @checkstack/healthcheck-backend
|
|
2
2
|
|
|
3
|
+
## 0.17.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 298bf42: ### Notification System Optimizations
|
|
8
|
+
|
|
9
|
+
**System context in notifications**: All notification senders (healthcheck, incident, maintenance, dependency) now include the affected system name in the notification title and body. Users can immediately identify which system is affected without clicking through to the detail page.
|
|
10
|
+
|
|
11
|
+
**Upstream notification deduplication**: When an upstream dependency goes down affecting multiple downstream systems, the dependency notification sidecar now sends **one personalized notification per user** instead of one notification per affected system. Each user's notification lists only the systems they are subscribed to, with a link to the upstream root cause system. This prevents notification floods for users subscribed to groups containing many dependent systems.
|
|
12
|
+
|
|
13
|
+
**New catalog endpoint**: Added `getSystemGroupIds` S2S RPC endpoint on the catalog to resolve which catalog groups contain a given system, used by the dependency plugin for efficient subscriber resolution during batched notification dispatch.
|
|
14
|
+
|
|
15
|
+
### Patch Changes
|
|
16
|
+
|
|
17
|
+
- Updated dependencies [298bf42]
|
|
18
|
+
- @checkstack/catalog-common@1.5.0
|
|
19
|
+
- @checkstack/catalog-backend@0.6.0
|
|
20
|
+
- @checkstack/satellite-backend@0.2.14
|
|
21
|
+
|
|
22
|
+
## 0.16.5
|
|
23
|
+
|
|
24
|
+
### Patch Changes
|
|
25
|
+
|
|
26
|
+
- 9a320fe: Fixed an issue where GitOps-provisioned health checks were not added to the background execution queue immediately upon association.
|
|
27
|
+
- @checkstack/satellite-backend@0.2.13
|
|
28
|
+
|
|
3
29
|
## 0.16.4
|
|
4
30
|
|
|
5
31
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@checkstack/healthcheck-backend",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.17.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "src/index.ts",
|
|
6
6
|
"checkstack": {
|
|
@@ -14,18 +14,18 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@checkstack/backend-api": "0.12.0",
|
|
17
|
-
"@checkstack/catalog-backend": "0.5.
|
|
17
|
+
"@checkstack/catalog-backend": "0.5.4",
|
|
18
18
|
"@checkstack/catalog-common": "1.4.1",
|
|
19
19
|
"@checkstack/command-backend": "0.1.19",
|
|
20
20
|
"@checkstack/common": "0.6.5",
|
|
21
|
-
"@checkstack/gitops-backend": "0.2.
|
|
21
|
+
"@checkstack/gitops-backend": "0.2.3",
|
|
22
22
|
"@checkstack/gitops-common": "0.2.0",
|
|
23
23
|
"@checkstack/healthcheck-common": "0.11.0",
|
|
24
24
|
"@checkstack/incident-common": "0.4.7",
|
|
25
25
|
"@checkstack/integration-backend": "0.1.19",
|
|
26
26
|
"@checkstack/maintenance-common": "0.4.9",
|
|
27
27
|
"@checkstack/queue-api": "0.2.13",
|
|
28
|
-
"@checkstack/satellite-backend": "0.2.
|
|
28
|
+
"@checkstack/satellite-backend": "0.2.13",
|
|
29
29
|
"@checkstack/signal-common": "0.1.9",
|
|
30
30
|
"@hono/zod-validator": "^0.7.6",
|
|
31
31
|
"drizzle-orm": "^0.45.0",
|
|
@@ -115,6 +115,10 @@ function createMockService() {
|
|
|
115
115
|
})
|
|
116
116
|
.filter(Boolean);
|
|
117
117
|
}),
|
|
118
|
+
getConfiguration: mock(async (id: string) => {
|
|
119
|
+
const config = configs.find((c) => c.id === id);
|
|
120
|
+
return config as unknown as HealthCheckConfiguration | undefined;
|
|
121
|
+
}),
|
|
118
122
|
};
|
|
119
123
|
}
|
|
120
124
|
|
|
@@ -217,16 +221,13 @@ describe("Healthcheck GitOps Kind: Healthcheck", () => {
|
|
|
217
221
|
});
|
|
218
222
|
|
|
219
223
|
function buildKind() {
|
|
220
|
-
|
|
221
|
-
createService: () =>
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
getHealthCheckRegistry: () => mockHCRegistry as never,
|
|
228
|
-
getCollectorRegistry: () => mockCollectorRegistry as never,
|
|
229
|
-
});
|
|
224
|
+
const mockDeps = {
|
|
225
|
+
createService: () => mockService as any,
|
|
226
|
+
getHealthCheckRegistry: () => mockHCRegistry as any,
|
|
227
|
+
getCollectorRegistry: () => mockCollectorRegistry as any,
|
|
228
|
+
getQueueManager: () => ({ getQueue: () => ({ scheduleRecurring: async () => "job-123" }) } as any),
|
|
229
|
+
};
|
|
230
|
+
return buildHealthcheckKind(mockDeps);
|
|
230
231
|
}
|
|
231
232
|
|
|
232
233
|
it("creates a new healthcheck configuration and returns entityId", async () => {
|
|
@@ -488,9 +489,11 @@ describe("Healthcheck GitOps Kind: System Extension", () => {
|
|
|
488
489
|
associateSystem: mockService.associateSystem,
|
|
489
490
|
disassociateSystem: mockService.disassociateSystem,
|
|
490
491
|
getSystemConfigurations: mockService.getSystemConfigurations,
|
|
492
|
+
getConfiguration: mockService.getConfiguration,
|
|
491
493
|
}) as never,
|
|
492
494
|
getHealthCheckRegistry: () => createMockHealthCheckRegistry() as never,
|
|
493
495
|
getCollectorRegistry: () => createMockCollectorRegistry() as never,
|
|
496
|
+
getQueueManager: () => ({ getQueue: () => ({ scheduleRecurring: async () => "job-123" }) } as any),
|
|
494
497
|
});
|
|
495
498
|
}
|
|
496
499
|
|
|
@@ -22,6 +22,8 @@ import {
|
|
|
22
22
|
arrayField,
|
|
23
23
|
enumField,
|
|
24
24
|
} from "@checkstack/backend-api";
|
|
25
|
+
import type { QueueManager } from "@checkstack/queue-api";
|
|
26
|
+
import { scheduleHealthCheck } from "./queue-executor";
|
|
25
27
|
|
|
26
28
|
/**
|
|
27
29
|
* Lazy accessor functions — populated during init(), consumed during reconcile.
|
|
@@ -32,6 +34,7 @@ interface HealthcheckGitOpsKindsDeps {
|
|
|
32
34
|
createService: () => HealthCheckService;
|
|
33
35
|
getHealthCheckRegistry: () => HealthCheckRegistry;
|
|
34
36
|
getCollectorRegistry: () => CollectorRegistry;
|
|
37
|
+
getQueueManager: () => QueueManager;
|
|
35
38
|
}
|
|
36
39
|
|
|
37
40
|
// ─── Healthcheck Spec Schema ───────────────────────────────────────────────
|
|
@@ -323,8 +326,21 @@ export function buildSystemHealthcheckExtension(
|
|
|
323
326
|
includeLocal: entry.includeLocal,
|
|
324
327
|
});
|
|
325
328
|
|
|
329
|
+
// Retrieve config to get the interval for scheduling
|
|
330
|
+
const config = await service.getConfiguration(configId);
|
|
331
|
+
if (config) {
|
|
332
|
+
await scheduleHealthCheck({
|
|
333
|
+
queueManager: deps.getQueueManager(),
|
|
334
|
+
payload: {
|
|
335
|
+
configId,
|
|
336
|
+
systemId: systemEntityId,
|
|
337
|
+
},
|
|
338
|
+
intervalSeconds: config.intervalSeconds,
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
|
|
326
342
|
context.logger.info(
|
|
327
|
-
`GitOps: associated ${entry.ref.kind} "${entry.ref.name}" (${configId}) with System "${entity.metadata.name}"`,
|
|
343
|
+
`GitOps: associated ${entry.ref.kind} "${entry.ref.name}" (${configId}) with System "${entity.metadata.name}" and scheduled execution`,
|
|
328
344
|
);
|
|
329
345
|
}
|
|
330
346
|
|
package/src/index.ts
CHANGED
|
@@ -19,6 +19,7 @@ import {
|
|
|
19
19
|
type HealthCheckRegistry,
|
|
20
20
|
type CollectorRegistry,
|
|
21
21
|
} from "@checkstack/backend-api";
|
|
22
|
+
import type { QueueManager } from "@checkstack/queue-api";
|
|
22
23
|
import { integrationEventExtensionPoint } from "@checkstack/integration-backend";
|
|
23
24
|
import { entityKindExtensionPoint } from "@checkstack/gitops-backend";
|
|
24
25
|
import { z } from "zod";
|
|
@@ -99,6 +100,7 @@ export default createBackendPlugin({
|
|
|
99
100
|
let gitopsDb: SafeDatabase<typeof schema> | undefined;
|
|
100
101
|
let gitopsHealthCheckRegistry: HealthCheckRegistry | undefined;
|
|
101
102
|
let gitopsCollectorRegistry: CollectorRegistry | undefined;
|
|
103
|
+
let gitopsQueueManager: QueueManager | undefined;
|
|
102
104
|
|
|
103
105
|
const kindRegistry = env.getExtensionPoint(entityKindExtensionPoint);
|
|
104
106
|
registerHealthcheckGitOpsKinds({
|
|
@@ -125,6 +127,11 @@ export default createBackendPlugin({
|
|
|
125
127
|
throw new Error("CollectorRegistry not initialized");
|
|
126
128
|
return gitopsCollectorRegistry;
|
|
127
129
|
},
|
|
130
|
+
getQueueManager: () => {
|
|
131
|
+
if (!gitopsQueueManager)
|
|
132
|
+
throw new Error("QueueManager not initialized");
|
|
133
|
+
return gitopsQueueManager;
|
|
134
|
+
},
|
|
128
135
|
});
|
|
129
136
|
|
|
130
137
|
env.registerInit({
|
|
@@ -155,6 +162,7 @@ export default createBackendPlugin({
|
|
|
155
162
|
gitopsDb = database;
|
|
156
163
|
gitopsHealthCheckRegistry = healthCheckRegistry;
|
|
157
164
|
gitopsCollectorRegistry = collectorRegistry;
|
|
165
|
+
gitopsQueueManager = queueManager;
|
|
158
166
|
|
|
159
167
|
// Create catalog client for notification delegation
|
|
160
168
|
const catalogClient = rpcClient.forPlugin(CatalogApi);
|
package/src/queue-executor.ts
CHANGED
|
@@ -101,6 +101,7 @@ export async function scheduleHealthCheck(props: {
|
|
|
101
101
|
*/
|
|
102
102
|
async function notifyStateChange(props: {
|
|
103
103
|
systemId: string;
|
|
104
|
+
systemName: string;
|
|
104
105
|
previousStatus: HealthCheckStatus;
|
|
105
106
|
newStatus: HealthCheckStatus;
|
|
106
107
|
catalogClient: CatalogClient;
|
|
@@ -110,6 +111,7 @@ async function notifyStateChange(props: {
|
|
|
110
111
|
}): Promise<void> {
|
|
111
112
|
const {
|
|
112
113
|
systemId,
|
|
114
|
+
systemName,
|
|
113
115
|
previousStatus,
|
|
114
116
|
newStatus,
|
|
115
117
|
catalogClient,
|
|
@@ -168,18 +170,18 @@ async function notifyStateChange(props: {
|
|
|
168
170
|
let importance: "info" | "warning" | "critical";
|
|
169
171
|
|
|
170
172
|
if (isRecovery) {
|
|
171
|
-
title =
|
|
173
|
+
title = `System health restored: ${systemName}`;
|
|
172
174
|
body =
|
|
173
|
-
|
|
175
|
+
`All health checks for **${systemName}** are now passing. The system has returned to normal operation.`;
|
|
174
176
|
importance = "info";
|
|
175
177
|
} else if (isUnhealthy) {
|
|
176
|
-
title =
|
|
177
|
-
body =
|
|
178
|
+
title = `System health critical: ${systemName}`;
|
|
179
|
+
body = `Health checks indicate **${systemName}** is unhealthy and may be down.`;
|
|
178
180
|
importance = "critical";
|
|
179
181
|
} else if (isDegraded) {
|
|
180
|
-
title =
|
|
182
|
+
title = `System health degraded: ${systemName}`;
|
|
181
183
|
body =
|
|
182
|
-
|
|
184
|
+
`Some health checks for **${systemName}** are failing. The system may be experiencing issues.`;
|
|
183
185
|
importance = "warning";
|
|
184
186
|
} else {
|
|
185
187
|
// No notification for healthy → healthy (if somehow missed above)
|
|
@@ -535,6 +537,7 @@ async function executeHealthCheckJob(props: {
|
|
|
535
537
|
if (newState.status !== previousStatus) {
|
|
536
538
|
await notifyStateChange({
|
|
537
539
|
systemId,
|
|
540
|
+
systemName,
|
|
538
541
|
previousStatus,
|
|
539
542
|
newStatus: newState.status,
|
|
540
543
|
catalogClient,
|
|
@@ -615,6 +618,7 @@ async function executeHealthCheckJob(props: {
|
|
|
615
618
|
if (newState.status !== previousStatus) {
|
|
616
619
|
await notifyStateChange({
|
|
617
620
|
systemId,
|
|
621
|
+
systemName,
|
|
618
622
|
previousStatus,
|
|
619
623
|
newStatus: newState.status,
|
|
620
624
|
catalogClient,
|
|
@@ -732,6 +736,7 @@ async function executeHealthCheckJob(props: {
|
|
|
732
736
|
if (newState.status !== previousStatus) {
|
|
733
737
|
await notifyStateChange({
|
|
734
738
|
systemId,
|
|
739
|
+
systemName,
|
|
735
740
|
previousStatus,
|
|
736
741
|
newStatus: newState.status,
|
|
737
742
|
catalogClient,
|