@checkstack/incident-backend 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +217 -0
- package/package.json +18 -18
- package/src/automations.test.ts +356 -5
- package/src/automations.ts +322 -34
- package/src/hooks.ts +8 -53
- package/src/incident-entity.test.ts +266 -0
- package/src/incident-entity.ts +192 -0
- package/src/index.ts +105 -17
- package/src/router.ts +162 -98
- package/src/service.test.ts +235 -1
- package/src/service.ts +152 -4
package/src/automations.test.ts
CHANGED
|
@@ -5,16 +5,35 @@
|
|
|
5
5
|
* `core/automation-backend` cover registration validity.
|
|
6
6
|
*/
|
|
7
7
|
import { describe, it, expect, mock } from "bun:test";
|
|
8
|
+
import { SYSTEM_ACTOR } from "@checkstack/common";
|
|
8
9
|
import { createMockLogger } from "@checkstack/test-utils-backend";
|
|
9
10
|
|
|
10
11
|
import { createIncidentActions } from "./automations";
|
|
12
|
+
import {
|
|
13
|
+
deriveIncidentTriggerEvents,
|
|
14
|
+
INCIDENT_TRIGGER_EVENTS,
|
|
15
|
+
} from "./incident-entity";
|
|
11
16
|
import type { IncidentService } from "./service";
|
|
12
17
|
|
|
18
|
+
/**
|
|
19
|
+
* A default existing incident the stubbed `getIncident` returns. The
|
|
20
|
+
* status-flipping actions (resolve / add_update / update_status) now route
|
|
21
|
+
* through the reactive entity, which re-reads post-write state via
|
|
22
|
+
* `getIncident`, so the stub must answer it.
|
|
23
|
+
*/
|
|
24
|
+
const DEFAULT_INCIDENT = {
|
|
25
|
+
id: "INC-1",
|
|
26
|
+
status: "investigating" as const,
|
|
27
|
+
severity: "critical" as const,
|
|
28
|
+
systemIds: ["sys-1"],
|
|
29
|
+
};
|
|
30
|
+
|
|
13
31
|
const makeServiceStub = (overrides: Partial<IncidentService> = {}) =>
|
|
14
32
|
({
|
|
15
33
|
createIncident: mock(),
|
|
16
34
|
resolveIncident: mock(),
|
|
17
35
|
addUpdate: mock(),
|
|
36
|
+
getIncident: mock(async () => DEFAULT_INCIDENT),
|
|
18
37
|
...overrides,
|
|
19
38
|
}) as unknown as IncidentService;
|
|
20
39
|
|
|
@@ -59,20 +78,212 @@ describe("incident automation actions", () => {
|
|
|
59
78
|
expect((result.artifact as { incidentId: string }).incidentId).toBe(
|
|
60
79
|
"INC-1",
|
|
61
80
|
);
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
81
|
+
// The action now reserves an id up front and passes it (with no user)
|
|
82
|
+
// so the reactive `incident` entity can key on it and snapshot a null
|
|
83
|
+
// `prev` before the insert (§10.1).
|
|
84
|
+
expect(service.createIncident).toHaveBeenCalledWith(
|
|
85
|
+
{
|
|
86
|
+
title: "DB down",
|
|
87
|
+
description: undefined,
|
|
88
|
+
severity: "critical",
|
|
89
|
+
systemIds: ["sys-1"],
|
|
90
|
+
initialMessage: undefined,
|
|
91
|
+
suppressNotifications: false,
|
|
92
|
+
},
|
|
93
|
+
undefined,
|
|
94
|
+
expect.any(String),
|
|
95
|
+
);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it("dedupe_open_for_system reuses an existing open incident on the system", async () => {
|
|
99
|
+
const existing = {
|
|
100
|
+
id: "INC-OPEN",
|
|
101
|
+
status: "investigating",
|
|
65
102
|
severity: "critical",
|
|
66
103
|
systemIds: ["sys-1"],
|
|
67
|
-
|
|
68
|
-
|
|
104
|
+
};
|
|
105
|
+
const service = makeServiceStub({
|
|
106
|
+
// The action now delegates the dedup-serialized find-then-create to
|
|
107
|
+
// the service (which wraps it in an advisory lock).
|
|
108
|
+
createIncidentDedupedForSystem: mock(async () => ({
|
|
109
|
+
incident: existing,
|
|
110
|
+
reused: true,
|
|
111
|
+
})) as unknown as IncidentService["createIncidentDedupedForSystem"],
|
|
112
|
+
createIncident: mock() as unknown as IncidentService["createIncident"],
|
|
113
|
+
});
|
|
114
|
+
const [createAction] = createIncidentActions({ service });
|
|
115
|
+
const result = await createAction.execute({
|
|
116
|
+
...actionContext,
|
|
117
|
+
config: {
|
|
118
|
+
title: "DB down",
|
|
119
|
+
severity: "critical",
|
|
120
|
+
systemIds: ["sys-1"],
|
|
121
|
+
suppressNotifications: false,
|
|
122
|
+
dedupe_open_for_system: true,
|
|
123
|
+
} as never,
|
|
124
|
+
});
|
|
125
|
+
expect(result.success).toBe(true);
|
|
126
|
+
expect((result.artifact as { incidentId: string }).incidentId).toBe(
|
|
127
|
+
"INC-OPEN",
|
|
128
|
+
);
|
|
129
|
+
// Reused via the dedup method — no direct createIncident call.
|
|
130
|
+
expect(service.createIncident).not.toHaveBeenCalled();
|
|
131
|
+
expect(service.createIncidentDedupedForSystem).toHaveBeenCalledTimes(1);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("dedupe_open_for_system creates when no open incident exists", async () => {
|
|
135
|
+
const created = {
|
|
136
|
+
id: "INC-NEW",
|
|
137
|
+
status: "investigating",
|
|
138
|
+
severity: "critical",
|
|
139
|
+
systemIds: ["sys-1"],
|
|
140
|
+
};
|
|
141
|
+
const service = makeServiceStub({
|
|
142
|
+
createIncidentDedupedForSystem: mock(async () => ({
|
|
143
|
+
incident: created,
|
|
144
|
+
reused: false,
|
|
145
|
+
})) as unknown as IncidentService["createIncidentDedupedForSystem"],
|
|
146
|
+
});
|
|
147
|
+
const [createAction] = createIncidentActions({ service });
|
|
148
|
+
const result = await createAction.execute({
|
|
149
|
+
...actionContext,
|
|
150
|
+
config: {
|
|
151
|
+
title: "DB down",
|
|
152
|
+
severity: "critical",
|
|
153
|
+
systemIds: ["sys-1"],
|
|
154
|
+
suppressNotifications: false,
|
|
155
|
+
dedupe_open_for_system: true,
|
|
156
|
+
} as never,
|
|
157
|
+
});
|
|
158
|
+
expect((result.artifact as { incidentId: string }).incidentId).toBe(
|
|
159
|
+
"INC-NEW",
|
|
160
|
+
);
|
|
161
|
+
expect(service.createIncidentDedupedForSystem).toHaveBeenCalledTimes(1);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it("without the flag always creates (no dedup lookup)", async () => {
|
|
165
|
+
const service = makeServiceStub({
|
|
166
|
+
findActiveIncidentForSystem: mock(
|
|
167
|
+
async () => ({
|
|
168
|
+
id: "INC-OPEN",
|
|
169
|
+
status: "investigating",
|
|
170
|
+
severity: "critical",
|
|
171
|
+
systemIds: ["sys-1"],
|
|
172
|
+
}),
|
|
173
|
+
) as unknown as IncidentService["findActiveIncidentForSystem"],
|
|
174
|
+
createIncident: mock(
|
|
175
|
+
async () => ({
|
|
176
|
+
id: "INC-NEW",
|
|
177
|
+
status: "investigating",
|
|
178
|
+
severity: "critical",
|
|
179
|
+
systemIds: ["sys-1"],
|
|
180
|
+
}),
|
|
181
|
+
) as unknown as IncidentService["createIncident"],
|
|
182
|
+
});
|
|
183
|
+
const [createAction] = createIncidentActions({ service });
|
|
184
|
+
const result = await createAction.execute({
|
|
185
|
+
...actionContext,
|
|
186
|
+
config: {
|
|
187
|
+
title: "DB down",
|
|
188
|
+
severity: "critical",
|
|
189
|
+
systemIds: ["sys-1"],
|
|
190
|
+
suppressNotifications: false,
|
|
191
|
+
// dedupe_open_for_system omitted (defaults false)
|
|
192
|
+
} as never,
|
|
193
|
+
});
|
|
194
|
+
expect((result.artifact as { incidentId: string }).incidentId).toBe(
|
|
195
|
+
"INC-NEW",
|
|
196
|
+
);
|
|
197
|
+
expect(service.findActiveIncidentForSystem).not.toHaveBeenCalled();
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
// 6(a): an action-created incident is now reactive — the create runs
|
|
201
|
+
// through `handle.mutate`, so the deriver fires `incident.created`.
|
|
202
|
+
it("drives the create through handle.mutate (action-created incident is reactive)", async () => {
|
|
203
|
+
const created = {
|
|
204
|
+
id: "INC-NEW",
|
|
205
|
+
status: "investigating" as const,
|
|
206
|
+
severity: "critical" as const,
|
|
207
|
+
systemIds: ["sys-1"],
|
|
208
|
+
};
|
|
209
|
+
const service = makeServiceStub({
|
|
210
|
+
createIncident: mock(
|
|
211
|
+
async () => created,
|
|
212
|
+
) as unknown as IncidentService["createIncident"],
|
|
213
|
+
});
|
|
214
|
+
const mutate = mock(
|
|
215
|
+
async (input: { id: string; apply: () => Promise<unknown> }) =>
|
|
216
|
+
input.apply(),
|
|
217
|
+
);
|
|
218
|
+
const handle = { kind: "incident", mutate } as never;
|
|
219
|
+
const [createAction] = createIncidentActions({
|
|
220
|
+
service,
|
|
221
|
+
getIncidentEntity: () => handle,
|
|
222
|
+
});
|
|
223
|
+
await createAction.execute({
|
|
224
|
+
...actionContext,
|
|
225
|
+
config: {
|
|
226
|
+
title: "DB down",
|
|
227
|
+
severity: "critical",
|
|
228
|
+
systemIds: ["sys-1"],
|
|
229
|
+
suppressNotifications: false,
|
|
230
|
+
} as never,
|
|
231
|
+
});
|
|
232
|
+
// The create was routed through the entity handle (reactive), keyed on
|
|
233
|
+
// the reserved id passed to the service create.
|
|
234
|
+
expect(mutate).toHaveBeenCalledTimes(1);
|
|
235
|
+
const mutateArg = mutate.mock.calls[0]![0] as { id: string };
|
|
236
|
+
expect(service.createIncident).toHaveBeenCalledWith(
|
|
237
|
+
expect.anything(),
|
|
238
|
+
undefined,
|
|
239
|
+
mutateArg.id,
|
|
240
|
+
);
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
it("dedupe reuse drives NO handle.mutate (no duplicate incident.created)", async () => {
|
|
244
|
+
const existing = {
|
|
245
|
+
id: "INC-OPEN",
|
|
246
|
+
status: "investigating" as const,
|
|
247
|
+
severity: "critical" as const,
|
|
248
|
+
systemIds: ["sys-1"],
|
|
249
|
+
};
|
|
250
|
+
const service = makeServiceStub({
|
|
251
|
+
createIncidentDedupedForSystem: mock(async () => ({
|
|
252
|
+
incident: existing,
|
|
253
|
+
reused: true,
|
|
254
|
+
})) as unknown as IncidentService["createIncidentDedupedForSystem"],
|
|
255
|
+
});
|
|
256
|
+
const mutate = mock(
|
|
257
|
+
async (input: { id: string; apply: () => Promise<unknown> }) =>
|
|
258
|
+
input.apply(),
|
|
259
|
+
);
|
|
260
|
+
const handle = { kind: "incident", mutate } as never;
|
|
261
|
+
const [createAction] = createIncidentActions({
|
|
262
|
+
service,
|
|
263
|
+
getIncidentEntity: () => handle,
|
|
264
|
+
});
|
|
265
|
+
await createAction.execute({
|
|
266
|
+
...actionContext,
|
|
267
|
+
config: {
|
|
268
|
+
title: "DB down",
|
|
269
|
+
severity: "critical",
|
|
270
|
+
systemIds: ["sys-1"],
|
|
271
|
+
suppressNotifications: false,
|
|
272
|
+
dedupe_open_for_system: true,
|
|
273
|
+
} as never,
|
|
69
274
|
});
|
|
275
|
+
// A reused incident is unchanged → no entity write at all.
|
|
276
|
+
expect(mutate).not.toHaveBeenCalled();
|
|
70
277
|
});
|
|
71
278
|
});
|
|
72
279
|
|
|
73
280
|
describe("incident.resolve", () => {
|
|
74
281
|
it("returns failure when the incident doesn't exist", async () => {
|
|
75
282
|
const service = makeServiceStub({
|
|
283
|
+
// The existence guard (re-read before the driven write) sees no row.
|
|
284
|
+
getIncident: mock(
|
|
285
|
+
async () => undefined,
|
|
286
|
+
) as unknown as IncidentService["getIncident"],
|
|
76
287
|
resolveIncident: mock(
|
|
77
288
|
async () => undefined,
|
|
78
289
|
) as unknown as IncidentService["resolveIncident"],
|
|
@@ -85,6 +296,8 @@ describe("incident automation actions", () => {
|
|
|
85
296
|
});
|
|
86
297
|
expect(result.success).toBe(false);
|
|
87
298
|
expect(result.error).toMatch(/not found/i);
|
|
299
|
+
// Guard short-circuits before attempting the resolve.
|
|
300
|
+
expect(service.resolveIncident).not.toHaveBeenCalled();
|
|
88
301
|
});
|
|
89
302
|
|
|
90
303
|
it("calls service.resolveIncident on the happy path", async () => {
|
|
@@ -108,6 +321,73 @@ describe("incident automation actions", () => {
|
|
|
108
321
|
expect(result.success).toBe(true);
|
|
109
322
|
expect(service.resolveIncident).toHaveBeenCalledWith("INC-1", "Fixed");
|
|
110
323
|
});
|
|
324
|
+
|
|
325
|
+
// 6(b) regression: an action-driven resolve must route through the reactive
|
|
326
|
+
// entity (like the RPC router) so it appends an `entity_transitions` row,
|
|
327
|
+
// emits `ENTITY_CHANGED` (waking `wait_until`), and fires the
|
|
328
|
+
// `incident.resolved` deriver — not call the service directly.
|
|
329
|
+
it("routes the resolve through handle.mutate (transition + incident.resolved deriver)", async () => {
|
|
330
|
+
const resolved = {
|
|
331
|
+
id: "INC-1",
|
|
332
|
+
status: "resolved" as const,
|
|
333
|
+
severity: "critical" as const,
|
|
334
|
+
systemIds: ["sys-1"],
|
|
335
|
+
};
|
|
336
|
+
const service = makeServiceStub({
|
|
337
|
+
// `prev` (before resolve) for the deriver assertion below.
|
|
338
|
+
getIncident: mock(async () => ({
|
|
339
|
+
id: "INC-1",
|
|
340
|
+
status: "investigating" as const,
|
|
341
|
+
severity: "critical" as const,
|
|
342
|
+
systemIds: ["sys-1"],
|
|
343
|
+
})) as unknown as IncidentService["getIncident"],
|
|
344
|
+
resolveIncident: mock(
|
|
345
|
+
async () => resolved,
|
|
346
|
+
) as unknown as IncidentService["resolveIncident"],
|
|
347
|
+
});
|
|
348
|
+
const mutate = mock(
|
|
349
|
+
async (input: {
|
|
350
|
+
id: string;
|
|
351
|
+
opts?: { runId?: string };
|
|
352
|
+
apply: () => Promise<unknown>;
|
|
353
|
+
}) => input.apply(),
|
|
354
|
+
);
|
|
355
|
+
const handle = { kind: "incident", mutate } as never;
|
|
356
|
+
const resolveAction = createIncidentActions({
|
|
357
|
+
service,
|
|
358
|
+
getIncidentEntity: () => handle,
|
|
359
|
+
})[1];
|
|
360
|
+
|
|
361
|
+
const result = await resolveAction.execute({
|
|
362
|
+
...actionContext,
|
|
363
|
+
config: { incidentId: "INC-1", message: "Fixed" } as never,
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
expect(result.success).toBe(true);
|
|
367
|
+
// The write was driven through the entity handle, keyed on the incident
|
|
368
|
+
// id, with the dispatch `runId` for secret masking.
|
|
369
|
+
expect(mutate).toHaveBeenCalledTimes(1);
|
|
370
|
+
const mutateArg = mutate.mock.calls[0]![0] as {
|
|
371
|
+
id: string;
|
|
372
|
+
opts?: { runId?: string };
|
|
373
|
+
};
|
|
374
|
+
expect(mutateArg.id).toBe("INC-1");
|
|
375
|
+
expect(mutateArg.opts?.runId).toBe("run-1");
|
|
376
|
+
|
|
377
|
+
// The post-write reactive state is `resolved` — feeding the prev→next
|
|
378
|
+
// change through the deriver fires `incident.resolved` (the wake/route).
|
|
379
|
+
const events = deriveIncidentTriggerEvents({
|
|
380
|
+
kind: "incident",
|
|
381
|
+
id: "INC-1",
|
|
382
|
+
prev: { status: "investigating", severity: "critical", systemIds: ["sys-1"] },
|
|
383
|
+
next: { status: "resolved", severity: "critical", systemIds: ["sys-1"] },
|
|
384
|
+
delta: { status: "resolved" },
|
|
385
|
+
changedFields: ["status"],
|
|
386
|
+
actor: SYSTEM_ACTOR,
|
|
387
|
+
occurredAt: new Date().toISOString(),
|
|
388
|
+
});
|
|
389
|
+
expect(events).toEqual([INCIDENT_TRIGGER_EVENTS.resolved]);
|
|
390
|
+
});
|
|
111
391
|
});
|
|
112
392
|
|
|
113
393
|
describe("incident.add_update", () => {
|
|
@@ -169,4 +449,75 @@ describe("incident automation actions", () => {
|
|
|
169
449
|
});
|
|
170
450
|
});
|
|
171
451
|
});
|
|
452
|
+
|
|
453
|
+
describe("incident artifact (produces / consumes)", () => {
|
|
454
|
+
it("incident.create declares produces: incident", () => {
|
|
455
|
+
const [createAction] = createIncidentActions({
|
|
456
|
+
service: makeServiceStub(),
|
|
457
|
+
});
|
|
458
|
+
expect(createAction.produces).toBe("incident");
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
it("incident.resolve consumes the upstream incident artifact when incidentId is omitted", async () => {
|
|
462
|
+
const resolved = {
|
|
463
|
+
id: "INC-9",
|
|
464
|
+
status: "resolved",
|
|
465
|
+
severity: "critical",
|
|
466
|
+
systemIds: ["sys-1"],
|
|
467
|
+
};
|
|
468
|
+
const service = makeServiceStub({
|
|
469
|
+
resolveIncident: mock(
|
|
470
|
+
async () => resolved,
|
|
471
|
+
) as unknown as IncidentService["resolveIncident"],
|
|
472
|
+
});
|
|
473
|
+
const resolveAction = createIncidentActions({ service })[1];
|
|
474
|
+
expect(resolveAction.consumes).toEqual(["incident"]);
|
|
475
|
+
|
|
476
|
+
const result = await resolveAction.execute({
|
|
477
|
+
...actionContext,
|
|
478
|
+
// No incidentId in config — falls back to the consumed artifact.
|
|
479
|
+
config: { message: "recovered" } as never,
|
|
480
|
+
consumedArtifacts: {
|
|
481
|
+
incident: { incidentId: "INC-9", status: "investigating" },
|
|
482
|
+
},
|
|
483
|
+
});
|
|
484
|
+
expect(result.success).toBe(true);
|
|
485
|
+
expect(service.resolveIncident).toHaveBeenCalledWith("INC-9", "recovered");
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
it("incident.resolve config incidentId takes priority over the artifact", async () => {
|
|
489
|
+
const service = makeServiceStub({
|
|
490
|
+
resolveIncident: mock(
|
|
491
|
+
async () => ({
|
|
492
|
+
id: "INC-CONFIG",
|
|
493
|
+
status: "resolved",
|
|
494
|
+
severity: "high",
|
|
495
|
+
systemIds: [],
|
|
496
|
+
}),
|
|
497
|
+
) as unknown as IncidentService["resolveIncident"],
|
|
498
|
+
});
|
|
499
|
+
const resolveAction = createIncidentActions({ service })[1];
|
|
500
|
+
await resolveAction.execute({
|
|
501
|
+
...actionContext,
|
|
502
|
+
config: { incidentId: "INC-CONFIG" } as never,
|
|
503
|
+
consumedArtifacts: { incident: { incidentId: "INC-ARTIFACT" } },
|
|
504
|
+
});
|
|
505
|
+
expect(service.resolveIncident).toHaveBeenCalledWith(
|
|
506
|
+
"INC-CONFIG",
|
|
507
|
+
undefined,
|
|
508
|
+
);
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
it("incident.resolve fails clearly when neither config nor artifact has an id", async () => {
|
|
512
|
+
const service = makeServiceStub();
|
|
513
|
+
const resolveAction = createIncidentActions({ service })[1];
|
|
514
|
+
const result = await resolveAction.execute({
|
|
515
|
+
...actionContext,
|
|
516
|
+
config: {} as never,
|
|
517
|
+
consumedArtifacts: {},
|
|
518
|
+
});
|
|
519
|
+
expect(result.success).toBe(false);
|
|
520
|
+
expect(service.resolveIncident).not.toHaveBeenCalled();
|
|
521
|
+
});
|
|
522
|
+
});
|
|
172
523
|
});
|