@checkstack/healthcheck-backend 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +212 -0
- package/package.json +14 -14
- package/src/automations.test.ts +255 -0
- package/src/automations.ts +340 -0
- package/src/hooks.ts +69 -4
- package/src/index.ts +37 -52
- package/src/queue-executor.test.ts +137 -0
- package/src/queue-executor.ts +130 -27
- package/src/router.test.ts +5 -0
- package/src/router.ts +12 -1
- package/src/service-assignments.test.ts +184 -0
- package/src/service.ts +65 -0
- package/tsconfig.json +3 -3
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Healthcheck triggers + actions registered with the Automation Platform.
|
|
3
|
+
*
|
|
4
|
+
* Triggers:
|
|
5
|
+
* - `healthcheck.system.degraded` — existing directional hook
|
|
6
|
+
* - `healthcheck.system.healthy` — existing directional hook
|
|
7
|
+
* - `healthcheck.system.health_changed` — new umbrella hook,
|
|
8
|
+
* fires on every aggregated-health transition. Carries both the
|
|
9
|
+
* previous and new statuses so subscribers don't have to listen
|
|
10
|
+
* to two hooks and coalesce themselves.
|
|
11
|
+
*
|
|
12
|
+
* Actions:
|
|
13
|
+
* - `healthcheck.run_now`: enqueue a one-off run of a specific
|
|
14
|
+
* `(systemId, configurationId)` assignment. The recurring
|
|
15
|
+
* schedule keeps ticking; this just nudges the queue.
|
|
16
|
+
* - `healthcheck.enable_assignment` /
|
|
17
|
+
* `healthcheck.disable_assignment`: flip the `enabled` flag on an
|
|
18
|
+
* existing assignment via `service.setAssignmentEnabled`. Emits
|
|
19
|
+
* the existing `assignmentChanged` hook so the satellite-config
|
|
20
|
+
* relay picks up the change.
|
|
21
|
+
*
|
|
22
|
+
* Mutation actions emit hooks themselves (via the `emitHook` factory
|
|
23
|
+
* dep) so downstream automations + caches react the same way as
|
|
24
|
+
* RPC-driven mutations.
|
|
25
|
+
*/
|
|
26
|
+
import { z } from "zod";
|
|
27
|
+
import { Versioned, type Hook } from "@checkstack/backend-api";
|
|
28
|
+
import type { QueueManager } from "@checkstack/queue-api";
|
|
29
|
+
import type {
|
|
30
|
+
ActionDefinition,
|
|
31
|
+
TriggerDefinition,
|
|
32
|
+
} from "@checkstack/automation-backend";
|
|
33
|
+
import { HealthCheckStatusSchema } from "@checkstack/healthcheck-common";
|
|
34
|
+
|
|
35
|
+
import { healthCheckHooks } from "./hooks";
|
|
36
|
+
import {
|
|
37
|
+
HEALTH_CHECK_QUEUE,
|
|
38
|
+
type HealthCheckJobPayload,
|
|
39
|
+
} from "./queue-executor";
|
|
40
|
+
import type { HealthCheckService } from "./service";
|
|
41
|
+
|
|
42
|
+
// ─── Payload schemas — match the hook payloads exactly ─────────────────
|
|
43
|
+
|
|
44
|
+
const systemDegradedPayloadSchema = z.object({
|
|
45
|
+
systemId: z.string(),
|
|
46
|
+
systemName: z.string().optional(),
|
|
47
|
+
previousStatus: HealthCheckStatusSchema,
|
|
48
|
+
newStatus: HealthCheckStatusSchema,
|
|
49
|
+
healthyChecks: z.number(),
|
|
50
|
+
totalChecks: z.number(),
|
|
51
|
+
timestamp: z.string(),
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const systemHealthyPayloadSchema = z.object({
|
|
55
|
+
systemId: z.string(),
|
|
56
|
+
systemName: z.string().optional(),
|
|
57
|
+
previousStatus: HealthCheckStatusSchema,
|
|
58
|
+
healthyChecks: z.number(),
|
|
59
|
+
totalChecks: z.number(),
|
|
60
|
+
timestamp: z.string(),
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
const systemHealthChangedPayloadSchema = z.object({
|
|
64
|
+
systemId: z.string(),
|
|
65
|
+
systemName: z.string().optional(),
|
|
66
|
+
previousStatus: HealthCheckStatusSchema,
|
|
67
|
+
newStatus: HealthCheckStatusSchema,
|
|
68
|
+
healthyChecks: z.number(),
|
|
69
|
+
totalChecks: z.number(),
|
|
70
|
+
timestamp: z.string(),
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
const checkFailedPayloadSchema = z.object({
|
|
74
|
+
systemId: z.string(),
|
|
75
|
+
configurationId: z.string(),
|
|
76
|
+
status: HealthCheckStatusSchema,
|
|
77
|
+
latencyMs: z.number().optional(),
|
|
78
|
+
result: z.record(z.string(), z.unknown()).optional(),
|
|
79
|
+
timestamp: z.string(),
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
const flappingDetectedPayloadSchema = z.object({
|
|
83
|
+
systemId: z.string(),
|
|
84
|
+
configurationId: z.string(),
|
|
85
|
+
transitionCount: z.number(),
|
|
86
|
+
windowMinutes: z.number(),
|
|
87
|
+
timestamp: z.string(),
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// ─── Triggers ──────────────────────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
export const systemDegradedTrigger: TriggerDefinition<
|
|
93
|
+
z.infer<typeof systemDegradedPayloadSchema>
|
|
94
|
+
> = {
|
|
95
|
+
id: "system_degraded",
|
|
96
|
+
displayName: "System Health Degraded",
|
|
97
|
+
description:
|
|
98
|
+
"Fires when a system's health transitions from healthy to degraded/unhealthy",
|
|
99
|
+
category: "Health",
|
|
100
|
+
icon: "HeartPulse",
|
|
101
|
+
payloadSchema: systemDegradedPayloadSchema,
|
|
102
|
+
hook: healthCheckHooks.systemDegraded,
|
|
103
|
+
contextKey: (p) => p.systemId,
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
export const systemHealthyTrigger: TriggerDefinition<
|
|
107
|
+
z.infer<typeof systemHealthyPayloadSchema>
|
|
108
|
+
> = {
|
|
109
|
+
id: "system_healthy",
|
|
110
|
+
displayName: "System Health Restored",
|
|
111
|
+
description: "Fires when a system's health recovers to healthy",
|
|
112
|
+
category: "Health",
|
|
113
|
+
icon: "HeartPulse",
|
|
114
|
+
payloadSchema: systemHealthyPayloadSchema,
|
|
115
|
+
hook: healthCheckHooks.systemHealthy,
|
|
116
|
+
contextKey: (p) => p.systemId,
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
export const systemHealthChangedTrigger: TriggerDefinition<
|
|
120
|
+
z.infer<typeof systemHealthChangedPayloadSchema>
|
|
121
|
+
> = {
|
|
122
|
+
id: "system_health_changed",
|
|
123
|
+
displayName: "System Health Changed",
|
|
124
|
+
description:
|
|
125
|
+
"Fires on every aggregated-health transition — carries previous + new status",
|
|
126
|
+
category: "Health",
|
|
127
|
+
icon: "HeartPulse",
|
|
128
|
+
payloadSchema: systemHealthChangedPayloadSchema,
|
|
129
|
+
hook: healthCheckHooks.systemHealthChanged,
|
|
130
|
+
contextKey: (p) => p.systemId,
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
export const checkFailedTrigger: TriggerDefinition<
|
|
134
|
+
z.infer<typeof checkFailedPayloadSchema>
|
|
135
|
+
> = {
|
|
136
|
+
id: "check_failed",
|
|
137
|
+
displayName: "Health Check Failed",
|
|
138
|
+
description:
|
|
139
|
+
"Fires when an individual check run completes with a non-`healthy` status",
|
|
140
|
+
category: "Health",
|
|
141
|
+
icon: "TriangleAlert",
|
|
142
|
+
payloadSchema: checkFailedPayloadSchema,
|
|
143
|
+
hook: healthCheckHooks.checkFailed,
|
|
144
|
+
contextKey: (p) => p.systemId,
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
export const flappingDetectedTrigger: TriggerDefinition<
|
|
148
|
+
z.infer<typeof flappingDetectedPayloadSchema>
|
|
149
|
+
> = {
|
|
150
|
+
id: "flapping_detected",
|
|
151
|
+
displayName: "Health Check Flapping",
|
|
152
|
+
description:
|
|
153
|
+
"Fires when N unhealthy transitions are observed within the policy window. Re-fires on every additional transition while flapping; debounce in the automation if needed.",
|
|
154
|
+
category: "Health",
|
|
155
|
+
icon: "Repeat",
|
|
156
|
+
payloadSchema: flappingDetectedPayloadSchema,
|
|
157
|
+
hook: healthCheckHooks.flappingDetected,
|
|
158
|
+
contextKey: (p) => p.systemId,
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
export const healthCheckTriggers: TriggerDefinition<unknown>[] = [
|
|
162
|
+
systemDegradedTrigger as TriggerDefinition<unknown>,
|
|
163
|
+
systemHealthyTrigger as TriggerDefinition<unknown>,
|
|
164
|
+
systemHealthChangedTrigger as TriggerDefinition<unknown>,
|
|
165
|
+
checkFailedTrigger as TriggerDefinition<unknown>,
|
|
166
|
+
flappingDetectedTrigger as TriggerDefinition<unknown>,
|
|
167
|
+
];
|
|
168
|
+
|
|
169
|
+
// ─── Action configs ────────────────────────────────────────────────────
|
|
170
|
+
|
|
171
|
+
const runNowConfigSchema = z.object({
|
|
172
|
+
systemId: z.string().min(1).describe("Target system id"),
|
|
173
|
+
configurationId: z
|
|
174
|
+
.string()
|
|
175
|
+
.min(1)
|
|
176
|
+
.describe("Target health-check configuration id"),
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
const assignmentToggleConfigSchema = z.object({
|
|
180
|
+
systemId: z.string().min(1),
|
|
181
|
+
configurationId: z.string().min(1),
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
// ─── Artifact ──────────────────────────────────────────────────────────
|
|
185
|
+
|
|
186
|
+
const assignmentArtifactSchema = z.object({
|
|
187
|
+
systemId: z.string(),
|
|
188
|
+
configurationId: z.string(),
|
|
189
|
+
enabled: z.boolean().optional(),
|
|
190
|
+
enqueued: z.boolean().optional(),
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
export type AssignmentArtifact = z.infer<typeof assignmentArtifactSchema>;
|
|
194
|
+
|
|
195
|
+
export const assignmentArtifactType = {
|
|
196
|
+
id: "assignment",
|
|
197
|
+
displayName: "Healthcheck Assignment",
|
|
198
|
+
description:
|
|
199
|
+
"Identifies the system↔configuration assignment touched by an automation action",
|
|
200
|
+
schema: assignmentArtifactSchema,
|
|
201
|
+
} as const;
|
|
202
|
+
|
|
203
|
+
// ─── Action factory ────────────────────────────────────────────────────
|
|
204
|
+
|
|
205
|
+
export interface HealthCheckActionDeps {
|
|
206
|
+
service: HealthCheckService;
|
|
207
|
+
queueManager: QueueManager;
|
|
208
|
+
emitHook: <T>(hook: Hook<T>, payload: T) => Promise<void>;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
export function createHealthCheckActions(
|
|
212
|
+
deps: HealthCheckActionDeps,
|
|
213
|
+
): ActionDefinition<unknown, unknown>[] {
|
|
214
|
+
const runNow: ActionDefinition<
|
|
215
|
+
z.infer<typeof runNowConfigSchema>,
|
|
216
|
+
AssignmentArtifact
|
|
217
|
+
> = {
|
|
218
|
+
id: "run_now",
|
|
219
|
+
displayName: "Run Health Check Now",
|
|
220
|
+
description:
|
|
221
|
+
"Enqueue a one-off run of the given assignment. Doesn't disturb the recurring schedule.",
|
|
222
|
+
category: "Health",
|
|
223
|
+
icon: "Play",
|
|
224
|
+
config: new Versioned({ version: 1, schema: runNowConfigSchema }),
|
|
225
|
+
produces: "healthcheck.assignment",
|
|
226
|
+
execute: async ({ config, logger }) => {
|
|
227
|
+
const queue = deps.queueManager.getQueue<HealthCheckJobPayload>(
|
|
228
|
+
HEALTH_CHECK_QUEUE,
|
|
229
|
+
);
|
|
230
|
+
await queue.enqueue({
|
|
231
|
+
configId: config.configurationId,
|
|
232
|
+
systemId: config.systemId,
|
|
233
|
+
});
|
|
234
|
+
logger.info(
|
|
235
|
+
`Automation enqueued run for ${config.systemId}:${config.configurationId}`,
|
|
236
|
+
);
|
|
237
|
+
return {
|
|
238
|
+
success: true,
|
|
239
|
+
externalId: `${config.systemId}:${config.configurationId}`,
|
|
240
|
+
artifact: {
|
|
241
|
+
systemId: config.systemId,
|
|
242
|
+
configurationId: config.configurationId,
|
|
243
|
+
enqueued: true,
|
|
244
|
+
},
|
|
245
|
+
};
|
|
246
|
+
},
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
const enableAssignment: ActionDefinition<
|
|
250
|
+
z.infer<typeof assignmentToggleConfigSchema>,
|
|
251
|
+
AssignmentArtifact
|
|
252
|
+
> = {
|
|
253
|
+
id: "enable_assignment",
|
|
254
|
+
displayName: "Enable Health Check Assignment",
|
|
255
|
+
description:
|
|
256
|
+
"Flip the `enabled` flag on an existing system↔configuration assignment to true.",
|
|
257
|
+
category: "Health",
|
|
258
|
+
icon: "Power",
|
|
259
|
+
config: new Versioned({ version: 1, schema: assignmentToggleConfigSchema }),
|
|
260
|
+
produces: "healthcheck.assignment",
|
|
261
|
+
execute: async ({ config, logger }) => {
|
|
262
|
+
const updated = await deps.service.setAssignmentEnabled(
|
|
263
|
+
config.systemId,
|
|
264
|
+
config.configurationId,
|
|
265
|
+
true,
|
|
266
|
+
);
|
|
267
|
+
if (!updated) {
|
|
268
|
+
return {
|
|
269
|
+
success: false,
|
|
270
|
+
error: `Assignment not found: ${config.systemId} ↔ ${config.configurationId}`,
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
await deps.emitHook(healthCheckHooks.assignmentChanged, {
|
|
274
|
+
systemId: config.systemId,
|
|
275
|
+
configurationId: config.configurationId,
|
|
276
|
+
});
|
|
277
|
+
logger.info(
|
|
278
|
+
`Automation enabled assignment ${config.systemId}:${config.configurationId}`,
|
|
279
|
+
);
|
|
280
|
+
return {
|
|
281
|
+
success: true,
|
|
282
|
+
externalId: `${config.systemId}:${config.configurationId}`,
|
|
283
|
+
artifact: {
|
|
284
|
+
systemId: config.systemId,
|
|
285
|
+
configurationId: config.configurationId,
|
|
286
|
+
enabled: true,
|
|
287
|
+
},
|
|
288
|
+
};
|
|
289
|
+
},
|
|
290
|
+
};
|
|
291
|
+
|
|
292
|
+
const disableAssignment: ActionDefinition<
|
|
293
|
+
z.infer<typeof assignmentToggleConfigSchema>,
|
|
294
|
+
AssignmentArtifact
|
|
295
|
+
> = {
|
|
296
|
+
id: "disable_assignment",
|
|
297
|
+
displayName: "Disable Health Check Assignment",
|
|
298
|
+
description:
|
|
299
|
+
"Flip the `enabled` flag on an existing system↔configuration assignment to false.",
|
|
300
|
+
category: "Health",
|
|
301
|
+
icon: "PowerOff",
|
|
302
|
+
config: new Versioned({ version: 1, schema: assignmentToggleConfigSchema }),
|
|
303
|
+
produces: "healthcheck.assignment",
|
|
304
|
+
execute: async ({ config, logger }) => {
|
|
305
|
+
const updated = await deps.service.setAssignmentEnabled(
|
|
306
|
+
config.systemId,
|
|
307
|
+
config.configurationId,
|
|
308
|
+
false,
|
|
309
|
+
);
|
|
310
|
+
if (!updated) {
|
|
311
|
+
return {
|
|
312
|
+
success: false,
|
|
313
|
+
error: `Assignment not found: ${config.systemId} ↔ ${config.configurationId}`,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
await deps.emitHook(healthCheckHooks.assignmentChanged, {
|
|
317
|
+
systemId: config.systemId,
|
|
318
|
+
configurationId: config.configurationId,
|
|
319
|
+
});
|
|
320
|
+
logger.info(
|
|
321
|
+
`Automation disabled assignment ${config.systemId}:${config.configurationId}`,
|
|
322
|
+
);
|
|
323
|
+
return {
|
|
324
|
+
success: true,
|
|
325
|
+
externalId: `${config.systemId}:${config.configurationId}`,
|
|
326
|
+
artifact: {
|
|
327
|
+
systemId: config.systemId,
|
|
328
|
+
configurationId: config.configurationId,
|
|
329
|
+
enabled: false,
|
|
330
|
+
},
|
|
331
|
+
};
|
|
332
|
+
},
|
|
333
|
+
};
|
|
334
|
+
|
|
335
|
+
return [
|
|
336
|
+
runNow as ActionDefinition<unknown, unknown>,
|
|
337
|
+
enableAssignment as ActionDefinition<unknown, unknown>,
|
|
338
|
+
disableAssignment as ActionDefinition<unknown, unknown>,
|
|
339
|
+
];
|
|
340
|
+
}
|
package/src/hooks.ts
CHANGED
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
import { createHook } from "@checkstack/backend-api";
|
|
2
|
+
import type { HealthCheckStatus } from "@checkstack/healthcheck-common";
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* Health check hooks for cross-plugin communication and external integrations.
|
|
5
6
|
* These hooks are registered as integration events for webhook subscriptions.
|
|
7
|
+
*
|
|
8
|
+
* `status` / `previousStatus` / `newStatus` carry the canonical
|
|
9
|
+
* `HealthCheckStatus` enum values, so automation triggers built on
|
|
10
|
+
* these hooks can offer the known values for `==` comparisons in the
|
|
11
|
+
* editor.
|
|
6
12
|
*/
|
|
7
13
|
export const healthCheckHooks = {
|
|
8
14
|
/**
|
|
@@ -13,8 +19,8 @@ export const healthCheckHooks = {
|
|
|
13
19
|
systemDegraded: createHook<{
|
|
14
20
|
systemId: string;
|
|
15
21
|
systemName?: string;
|
|
16
|
-
previousStatus:
|
|
17
|
-
newStatus:
|
|
22
|
+
previousStatus: HealthCheckStatus;
|
|
23
|
+
newStatus: HealthCheckStatus;
|
|
18
24
|
healthyChecks: number;
|
|
19
25
|
totalChecks: number;
|
|
20
26
|
timestamp: string;
|
|
@@ -27,7 +33,7 @@ export const healthCheckHooks = {
|
|
|
27
33
|
systemHealthy: createHook<{
|
|
28
34
|
systemId: string;
|
|
29
35
|
systemName?: string;
|
|
30
|
-
previousStatus:
|
|
36
|
+
previousStatus: HealthCheckStatus;
|
|
31
37
|
healthyChecks: number;
|
|
32
38
|
totalChecks: number;
|
|
33
39
|
timestamp: string;
|
|
@@ -50,9 +56,68 @@ export const healthCheckHooks = {
|
|
|
50
56
|
checkCompleted: createHook<{
|
|
51
57
|
systemId: string;
|
|
52
58
|
configurationId: string;
|
|
53
|
-
status:
|
|
59
|
+
status: HealthCheckStatus;
|
|
54
60
|
latencyMs: number | undefined;
|
|
55
61
|
result: Record<string, unknown> | undefined;
|
|
56
62
|
timestamp: string;
|
|
57
63
|
}>("healthcheck.check.completed"),
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Umbrella variant of `systemDegraded` + `systemHealthy` — fires on
|
|
67
|
+
* **any** aggregated-health transition, carrying both the previous
|
|
68
|
+
* and new statuses. Subscribers (e.g. an automation that wants to
|
|
69
|
+
* react to every state change without subscribing to two hooks
|
|
70
|
+
* and coalescing themselves) prefer this one.
|
|
71
|
+
*
|
|
72
|
+
* Emitted alongside the directional hooks, never instead of them,
|
|
73
|
+
* so existing subscribers keep working unchanged.
|
|
74
|
+
*/
|
|
75
|
+
systemHealthChanged: createHook<{
|
|
76
|
+
systemId: string;
|
|
77
|
+
systemName?: string;
|
|
78
|
+
previousStatus: HealthCheckStatus;
|
|
79
|
+
newStatus: HealthCheckStatus;
|
|
80
|
+
healthyChecks: number;
|
|
81
|
+
totalChecks: number;
|
|
82
|
+
timestamp: string;
|
|
83
|
+
}>("healthcheck.system.health_changed"),
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Narrow variant of `checkCompleted` — fires only when an individual
|
|
87
|
+
* check run completed with a non-`healthy` status. Carries the
|
|
88
|
+
* latency + raw result so subscribers can branch on collector-
|
|
89
|
+
* specific fields without re-querying. Operators usually prefer
|
|
90
|
+
* this over `checkCompleted` for incident-style automation because
|
|
91
|
+
* a "trigger on any completion, then filter" automation is harder
|
|
92
|
+
* to read at a glance than a typed `check_failed` entry point.
|
|
93
|
+
*/
|
|
94
|
+
checkFailed: createHook<{
|
|
95
|
+
systemId: string;
|
|
96
|
+
configurationId: string;
|
|
97
|
+
status: HealthCheckStatus;
|
|
98
|
+
latencyMs: number | undefined;
|
|
99
|
+
result: Record<string, unknown> | undefined;
|
|
100
|
+
timestamp: string;
|
|
101
|
+
}>("healthcheck.check.failed"),
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Emitted when the flapping-detector observes ≥ N unhealthy
|
|
105
|
+
* transitions in the policy's configured window. Fires regardless
|
|
106
|
+
* of whether `autoOpenIncidentOnUnhealthy` is enabled — the hook is
|
|
107
|
+
* informational; the auto-incident pipeline still gates on the
|
|
108
|
+
* policy.
|
|
109
|
+
*
|
|
110
|
+
* Re-fires on every additional transition past the threshold while
|
|
111
|
+
* the check stays in a flapping pattern, so automations that want
|
|
112
|
+
* "page once and only once" should debounce on `(systemId,
|
|
113
|
+
* configurationId)`. Carrying the observed transition count + the
|
|
114
|
+
* window length lets subscribers reason about both.
|
|
115
|
+
*/
|
|
116
|
+
flappingDetected: createHook<{
|
|
117
|
+
systemId: string;
|
|
118
|
+
configurationId: string;
|
|
119
|
+
transitionCount: number;
|
|
120
|
+
windowMinutes: number;
|
|
121
|
+
timestamp: string;
|
|
122
|
+
}>("healthcheck.flapping_detected"),
|
|
58
123
|
} as const;
|
package/src/index.ts
CHANGED
|
@@ -27,11 +27,19 @@ import {
|
|
|
27
27
|
type CollectorRegistry,
|
|
28
28
|
} from "@checkstack/backend-api";
|
|
29
29
|
import type { QueueManager } from "@checkstack/queue-api";
|
|
30
|
-
import {
|
|
30
|
+
import {
|
|
31
|
+
automationActionExtensionPoint,
|
|
32
|
+
automationArtifactTypeExtensionPoint,
|
|
33
|
+
automationTriggerExtensionPoint,
|
|
34
|
+
} from "@checkstack/automation-backend";
|
|
31
35
|
import { entityKindExtensionPoint } from "@checkstack/gitops-backend";
|
|
32
|
-
import { z } from "zod";
|
|
33
36
|
import { createHealthCheckRouter } from "./router";
|
|
34
37
|
import { HealthCheckService } from "./service";
|
|
38
|
+
import {
|
|
39
|
+
assignmentArtifactType,
|
|
40
|
+
createHealthCheckActions,
|
|
41
|
+
healthCheckTriggers,
|
|
42
|
+
} from "./automations";
|
|
35
43
|
import { registerHealthcheckGitOpsKinds, registerHealthcheckGitOpsDocumentation } from "./healthcheck-gitops-kinds";
|
|
36
44
|
import { catalogHooks } from "@checkstack/catalog-backend";
|
|
37
45
|
import { satelliteHooks } from "@checkstack/satellite-backend";
|
|
@@ -42,34 +50,10 @@ import { CatalogApi } from "@checkstack/catalog-common";
|
|
|
42
50
|
import { MaintenanceApi } from "@checkstack/maintenance-common";
|
|
43
51
|
import { IncidentApi } from "@checkstack/incident-common";
|
|
44
52
|
import { GitOpsApi } from "@checkstack/gitops-common";
|
|
45
|
-
import { healthCheckHooks } from "./hooks";
|
|
46
53
|
import { registerSearchProvider } from "@checkstack/command-backend";
|
|
47
54
|
import { resolveRoute } from "@checkstack/common";
|
|
48
55
|
import { createHealthCheckCache } from "./cache";
|
|
49
56
|
|
|
50
|
-
// =============================================================================
|
|
51
|
-
// Integration Event Payload Schemas
|
|
52
|
-
// =============================================================================
|
|
53
|
-
|
|
54
|
-
const systemDegradedPayloadSchema = z.object({
|
|
55
|
-
systemId: z.string(),
|
|
56
|
-
systemName: z.string().optional(),
|
|
57
|
-
previousStatus: z.string(),
|
|
58
|
-
newStatus: z.string(),
|
|
59
|
-
healthyChecks: z.number(),
|
|
60
|
-
totalChecks: z.number(),
|
|
61
|
-
timestamp: z.string(),
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
const systemHealthyPayloadSchema = z.object({
|
|
65
|
-
systemId: z.string(),
|
|
66
|
-
systemName: z.string().optional(),
|
|
67
|
-
previousStatus: z.string(),
|
|
68
|
-
healthyChecks: z.number(),
|
|
69
|
-
totalChecks: z.number(),
|
|
70
|
-
timestamp: z.string(),
|
|
71
|
-
});
|
|
72
|
-
|
|
73
57
|
// Store emitHook reference for use during Phase 2 init
|
|
74
58
|
let storedEmitHook: EmitHookFn | undefined;
|
|
75
59
|
|
|
@@ -82,33 +66,19 @@ export default createBackendPlugin({
|
|
|
82
66
|
healthcheckGroupSubscription,
|
|
83
67
|
]);
|
|
84
68
|
|
|
85
|
-
//
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
{
|
|
92
|
-
hook: healthCheckHooks.systemDegraded,
|
|
93
|
-
displayName: "System Health Degraded",
|
|
94
|
-
description:
|
|
95
|
-
"Fired when a system's health status transitions from healthy to degraded/unhealthy",
|
|
96
|
-
category: "Health",
|
|
97
|
-
payloadSchema: systemDegradedPayloadSchema,
|
|
98
|
-
},
|
|
99
|
-
pluginMetadata,
|
|
100
|
-
);
|
|
101
|
-
|
|
102
|
-
integrationEvents.registerEvent(
|
|
103
|
-
{
|
|
104
|
-
hook: healthCheckHooks.systemHealthy,
|
|
105
|
-
displayName: "System Health Restored",
|
|
106
|
-
description: "Fired when a system's health status recovers to healthy",
|
|
107
|
-
category: "Health",
|
|
108
|
-
payloadSchema: systemHealthyPayloadSchema,
|
|
109
|
-
},
|
|
110
|
-
pluginMetadata,
|
|
69
|
+
// ─── Automation Platform: triggers + artifact type ─────────────────
|
|
70
|
+
// Buffered behind the extension point until automation-backend's
|
|
71
|
+
// register() runs. Actions are wired in afterPluginsReady where
|
|
72
|
+
// `emitHook` becomes available.
|
|
73
|
+
const automationTriggers = env.getExtensionPoint(
|
|
74
|
+
automationTriggerExtensionPoint,
|
|
111
75
|
);
|
|
76
|
+
for (const trigger of healthCheckTriggers) {
|
|
77
|
+
automationTriggers.registerTrigger(trigger, pluginMetadata);
|
|
78
|
+
}
|
|
79
|
+
env
|
|
80
|
+
.getExtensionPoint(automationArtifactTypeExtensionPoint)
|
|
81
|
+
.registerArtifactType(assignmentArtifactType, pluginMetadata);
|
|
112
82
|
|
|
113
83
|
// ─── GitOps Entity Kind Registration ───────────────────────────────
|
|
114
84
|
// Mutable refs — populated during init(), consumed by reconcile closures.
|
|
@@ -249,6 +219,7 @@ export default createBackendPlugin({
|
|
|
249
219
|
getEmitHook: () => storedEmitHook,
|
|
250
220
|
cache,
|
|
251
221
|
configService: config,
|
|
222
|
+
catalogClient,
|
|
252
223
|
});
|
|
253
224
|
rpc.registerRouter(healthCheckRouter, healthCheckContract);
|
|
254
225
|
|
|
@@ -325,6 +296,20 @@ export default createBackendPlugin({
|
|
|
325
296
|
healthCheckRegistry,
|
|
326
297
|
collectorRegistry,
|
|
327
298
|
);
|
|
299
|
+
|
|
300
|
+
// Register automation actions now that `emitHook` + `queueManager`
|
|
301
|
+
// are both available.
|
|
302
|
+
const automationActions = env.getExtensionPoint(
|
|
303
|
+
automationActionExtensionPoint,
|
|
304
|
+
);
|
|
305
|
+
for (const action of createHealthCheckActions({
|
|
306
|
+
service,
|
|
307
|
+
queueManager,
|
|
308
|
+
emitHook,
|
|
309
|
+
})) {
|
|
310
|
+
automationActions.registerAction(action, pluginMetadata);
|
|
311
|
+
}
|
|
312
|
+
|
|
328
313
|
onHook(
|
|
329
314
|
catalogHooks.systemDeleted,
|
|
330
315
|
async (payload) => {
|