@checkstack/incident-backend 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/router.ts CHANGED
@@ -14,10 +14,19 @@ import type { IncidentService } from "./service";
14
14
  import { CatalogApi } from "@checkstack/catalog-common";
15
15
  import { AuthApi } from "@checkstack/auth-common";
16
16
  import type { InferClient } from "@checkstack/common";
17
- import { incidentHooks } from "./hooks";
18
17
  import { notifyAffectedSystems } from "./notifications";
19
- import type { IncidentUpdate } from "@checkstack/incident-common";
18
+ import type {
19
+ IncidentUpdate,
20
+ IncidentWithSystems,
21
+ } from "@checkstack/incident-common";
20
22
  import type { IncidentCache } from "./cache";
23
+ import type { EntityHandle } from "@checkstack/automation-backend";
24
+ import {
25
+ writeIncidentEntity,
26
+ removeIncidentEntity,
27
+ toIncidentEntityState,
28
+ type IncidentEntityState,
29
+ } from "./incident-entity";
21
30
 
22
31
  export function createRouter(
23
32
  service: IncidentService,
@@ -29,6 +38,8 @@ export function createRouter(
29
38
  authClient: InferClient<typeof AuthApi>,
30
39
  logger: Logger,
31
40
  cache: IncidentCache,
41
+ /** Resolver for the reactive `incident` entity (§10.1). Undefined in tests. */
42
+ getIncidentEntity?: () => EntityHandle<IncidentEntityState> | undefined,
32
43
  ) {
33
44
  /**
34
45
  * Resolve user IDs to profile names for a list of updates.
@@ -148,7 +159,23 @@ export function createRouter(
148
159
  createIncident: os.createIncident.handler(async ({ input, context }) => {
149
160
  const userId =
150
161
  context.user && "id" in context.user ? context.user.id : undefined;
151
- const result = await service.createIncident(input, userId);
162
+
163
+ // Drive the create through the reactive `incident` entity (§10.1):
164
+ // `apply` performs the REAL `incidents`/junction write (the plugin's own
165
+ // db/tx) and returns the new reactive state; the deriver fires
166
+ // `incident.created` from the resulting change. The id is generated up
167
+ // front so the handle is keyed on it and the create's `prev` snapshot
168
+ // correctly reads the not-yet-existing row as absent.
169
+ const incidentId = crypto.randomUUID();
170
+ let result!: Awaited<ReturnType<typeof service.createIncident>>;
171
+ await writeIncidentEntity({
172
+ handle: getIncidentEntity?.(),
173
+ incidentId,
174
+ apply: async () => {
175
+ result = await service.createIncident(input, userId, incidentId);
176
+ return toIncidentEntityState(result);
177
+ },
178
+ });
152
179
 
153
180
  // Invalidate before signal so any frontend that refetches in response
154
181
  // sees fresh data. The mutation invariant for every handler in this
@@ -165,17 +192,6 @@ export function createRouter(
165
192
  action: "created",
166
193
  });
167
194
 
168
- // Emit hook for cross-plugin coordination
169
- await context.emitHook(incidentHooks.incidentCreated, {
170
- incidentId: result.id,
171
- systemIds: result.systemIds,
172
- title: result.title,
173
- description: result.description,
174
- severity: result.severity,
175
- status: result.status,
176
- createdAt: result.createdAt.toISOString(),
177
- });
178
-
179
195
  // Send notifications to system subscribers
180
196
  const systemNames = await resolveSystemNames(result.systemIds);
181
197
  await notifyAffectedSystems({
@@ -193,12 +209,32 @@ export function createRouter(
193
209
  return result;
194
210
  }),
195
211
 
196
- updateIncident: os.updateIncident.handler(async ({ input, context }) => {
197
- const result = await service.updateIncident(input);
198
- if (!result) {
212
+ updateIncident: os.updateIncident.handler(async ({ input }) => {
213
+ // Probe existence first so a missing incident still surfaces as
214
+ // NOT_FOUND without driving an entity write.
215
+ const exists = await service.getIncident(input.id);
216
+ if (!exists) {
199
217
  throw new ORPCError("NOT_FOUND", { message: "Incident not found" });
200
218
  }
201
219
 
220
+ // Drive the update through the reactive `incident` entity (§10.1);
221
+ // `apply` performs the REAL update (the plugin's own db/tx) and returns
222
+ // the new reactive state. The deriver fires `incident.updated` (or
223
+ // `incident.resolved` on a resolution) from the resulting change.
224
+ let result!: NonNullable<Awaited<ReturnType<typeof service.updateIncident>>>;
225
+ await writeIncidentEntity({
226
+ handle: getIncidentEntity?.(),
227
+ incidentId: input.id,
228
+ apply: async () => {
229
+ const updated = await service.updateIncident(input);
230
+ if (!updated) {
231
+ throw new ORPCError("NOT_FOUND", { message: "Incident not found" });
232
+ }
233
+ result = updated;
234
+ return toIncidentEntityState(result);
235
+ },
236
+ });
237
+
202
238
  await cache.invalidateForMutation({
203
239
  incidentId: result.id,
204
240
  systemIds: result.systemIds,
@@ -211,16 +247,6 @@ export function createRouter(
211
247
  action: "updated",
212
248
  });
213
249
 
214
- // Emit hook for cross-plugin coordination
215
- await context.emitHook(incidentHooks.incidentUpdated, {
216
- incidentId: result.id,
217
- systemIds: result.systemIds,
218
- title: result.title,
219
- description: result.description,
220
- severity: result.severity,
221
- status: result.status,
222
- });
223
-
224
250
  // Send notifications to system subscribers
225
251
  const systemNames = await resolveSystemNames(result.systemIds);
226
252
  await notifyAffectedSystems({
@@ -248,11 +274,30 @@ export function createRouter(
248
274
  : undefined;
249
275
  const previousStatus = previousIncident?.status;
250
276
 
251
- const result = await service.addUpdate(input, userId);
277
+ // Drive the update through the reactive `incident` entity (§10.1).
278
+ // `apply` posts the update row + (optionally) flips status in the
279
+ // plugin's own db/tx, then re-reads the post-write reactive state. The
280
+ // deriver fires `incident.resolved` on a transition to resolved,
281
+ // otherwise `incident.updated` — purely from the entity diff (so the
282
+ // `statusChange` branch collapses into the single driven write). When
283
+ // the status is unchanged, the diff is empty and no event fires.
284
+ let result!: Awaited<ReturnType<typeof service.addUpdate>>;
285
+ let incident: Awaited<ReturnType<typeof service.getIncident>>;
286
+ await writeIncidentEntity({
287
+ handle: getIncidentEntity?.(),
288
+ incidentId: input.incidentId,
289
+ apply: async () => {
290
+ result = await service.addUpdate(input, userId);
291
+ incident = await service.getIncident(input.incidentId);
292
+ // The incident must exist (the update FK-references it); guard for
293
+ // the type and to fail loudly if it vanished mid-write.
294
+ if (!incident) {
295
+ throw new ORPCError("NOT_FOUND", { message: "Incident not found" });
296
+ }
297
+ return toIncidentEntityState(incident);
298
+ },
299
+ });
252
300
 
253
- // Read post-write state directly from the service so the broadcast
254
- // payload is fresh; the cache is invalidated below before the signal.
255
- const incident = await service.getIncident(input.incidentId);
256
301
  if (incident) {
257
302
  await cache.invalidateForMutation({
258
303
  incidentId: input.incidentId,
@@ -265,28 +310,6 @@ export function createRouter(
265
310
  action: "updated",
266
311
  });
267
312
 
268
- // Emit hook for cross-plugin coordination
269
- await context.emitHook(incidentHooks.incidentUpdated, {
270
- incidentId: input.incidentId,
271
- systemIds: incident.systemIds,
272
- title: incident.title,
273
- description: incident.description,
274
- severity: incident.severity,
275
- status: incident.status,
276
- statusChange: input.statusChange,
277
- });
278
-
279
- // If status changed to resolved, emit resolved hook
280
- if (input.statusChange === "resolved") {
281
- await context.emitHook(incidentHooks.incidentResolved, {
282
- incidentId: input.incidentId,
283
- systemIds: incident.systemIds,
284
- title: incident.title,
285
- severity: incident.severity,
286
- resolvedAt: new Date().toISOString(),
287
- });
288
- }
289
-
290
313
  // Send notifications when status changes
291
314
  if (input.statusChange && previousStatus !== input.statusChange) {
292
315
  // Determine notification action based on status transition
@@ -321,15 +344,34 @@ export function createRouter(
321
344
  resolveIncident: os.resolveIncident.handler(async ({ input, context }) => {
322
345
  const userId =
323
346
  context.user && "id" in context.user ? context.user.id : undefined;
324
- const result = await service.resolveIncident(
325
- input.id,
326
- input.message,
327
- userId,
328
- );
329
- if (!result) {
347
+
348
+ const exists = await service.getIncident(input.id);
349
+ if (!exists) {
330
350
  throw new ORPCError("NOT_FOUND", { message: "Incident not found" });
331
351
  }
332
352
 
353
+ // Drive the resolve through the reactive `incident` entity (§10.1);
354
+ // `apply` performs the REAL resolve (the plugin's own db/tx) and returns
355
+ // the new reactive state. The deriver fires `incident.resolved` from the
356
+ // status → resolved transition.
357
+ let result!: NonNullable<Awaited<ReturnType<typeof service.resolveIncident>>>;
358
+ await writeIncidentEntity({
359
+ handle: getIncidentEntity?.(),
360
+ incidentId: input.id,
361
+ apply: async () => {
362
+ const resolved = await service.resolveIncident(
363
+ input.id,
364
+ input.message,
365
+ userId,
366
+ );
367
+ if (!resolved) {
368
+ throw new ORPCError("NOT_FOUND", { message: "Incident not found" });
369
+ }
370
+ result = resolved;
371
+ return toIncidentEntityState(result);
372
+ },
373
+ });
374
+
333
375
  await cache.invalidateForMutation({
334
376
  incidentId: result.id,
335
377
  systemIds: result.systemIds,
@@ -342,15 +384,6 @@ export function createRouter(
342
384
  action: "resolved",
343
385
  });
344
386
 
345
- // Emit hook for cross-plugin coordination
346
- await context.emitHook(incidentHooks.incidentResolved, {
347
- incidentId: result.id,
348
- systemIds: result.systemIds,
349
- title: result.title,
350
- severity: result.severity,
351
- resolvedAt: new Date().toISOString(),
352
- });
353
-
354
387
  // Send notifications to system subscribers
355
388
  const systemNames = await resolveSystemNames(result.systemIds);
356
389
  await notifyAffectedSystems({
@@ -369,10 +402,27 @@ export function createRouter(
369
402
  }),
370
403
 
371
404
  deleteIncident: os.deleteIncident.handler(async ({ input }) => {
372
- // Get incident before deleting to get systemIds
405
+ // Get incident before deleting to get systemIds.
373
406
  const incident = await service.getIncident(input.id);
374
- const success = await service.deleteIncident(input.id);
375
- if (success && incident) {
407
+ if (!incident) {
408
+ return { success: false };
409
+ }
410
+
411
+ // Drive the delete through the reactive `incident` entity tombstone
412
+ // (§10.1). `apply` performs the REAL delete (the plugin's own db/tx);
413
+ // the framework records the tombstone transition and emits a tombstone
414
+ // change. No `incident.deleted` trigger event exists, so the deriver
415
+ // fires nothing. `success` tracks whether the row was actually deleted.
416
+ let success = false;
417
+ await removeIncidentEntity({
418
+ handle: getIncidentEntity?.(),
419
+ incidentId: input.id,
420
+ apply: async () => {
421
+ success = await service.deleteIncident(input.id);
422
+ },
423
+ });
424
+
425
+ if (success) {
376
426
  await cache.invalidateForMutation({
377
427
  incidentId: input.id,
378
428
  systemIds: incident.systemIds,
@@ -396,11 +446,22 @@ export function createRouter(
396
446
  }),
397
447
 
398
448
  createAutoIncident: os.createAutoIncident.handler(
399
- async ({ input, context }) => {
400
- // No user context for service-initiated incidents; createdBy
401
- // stays null and the timeline shows the originating plugin via
402
- // the hook payload.
403
- const result = await service.createIncident(input);
449
+ async ({ input }) => {
450
+ // No user context for service-initiated incidents; createdBy stays
451
+ // null and the timeline shows the originating plugin via the entity
452
+ // state. Driven through the reactive `incident` entity (§10.1); the
453
+ // deriver fires `incident.created` from the resulting change. The id
454
+ // is generated up front so the create's `prev` snapshot is null.
455
+ const incidentId = crypto.randomUUID();
456
+ let result!: Awaited<ReturnType<typeof service.createIncident>>;
457
+ await writeIncidentEntity({
458
+ handle: getIncidentEntity?.(),
459
+ incidentId,
460
+ apply: async () => {
461
+ result = await service.createIncident(input, undefined, incidentId);
462
+ return toIncidentEntityState(result);
463
+ },
464
+ });
404
465
 
405
466
  await cache.invalidateForMutation({
406
467
  incidentId: result.id,
@@ -413,16 +474,6 @@ export function createRouter(
413
474
  action: "created",
414
475
  });
415
476
 
416
- await context.emitHook(incidentHooks.incidentCreated, {
417
- incidentId: result.id,
418
- systemIds: result.systemIds,
419
- title: result.title,
420
- description: result.description,
421
- severity: result.severity,
422
- status: result.status,
423
- createdAt: result.createdAt.toISOString(),
424
- });
425
-
426
477
  const systemNames = await resolveSystemNames(result.systemIds);
427
478
  await notifyAffectedSystems({
428
479
  catalogClient,
@@ -441,10 +492,31 @@ export function createRouter(
441
492
  ),
442
493
 
443
494
  resolveAutoIncident: os.resolveAutoIncident.handler(
444
- async ({ input, context }) => {
445
- const result = await service.resolveIncident(input.id, input.message);
446
- // Idempotent: a missing or already-resolved incident is treated
447
- // as success so the auto-close worker can be re-run safely.
495
+ async ({ input }) => {
496
+ // Idempotent: a missing incident is treated as success so the
497
+ // auto-close worker can be re-run safely. Probe first so the no-op
498
+ // case never drives an entity write.
499
+ const exists = await service.getIncident(input.id);
500
+ if (!exists) {
501
+ return { success: true };
502
+ }
503
+
504
+ // Drive the resolve through the reactive `incident` entity (§10.1):
505
+ // the REAL resolve runs INSIDE `apply`, so `prev` is snapshotted
506
+ // before the status flips and the deriver fires `incident.resolved`
507
+ // from the status → resolved transition. An already-resolved incident
508
+ // yields an empty diff and no event — the idempotent re-run case.
509
+ let result: IncidentWithSystems | undefined;
510
+ await writeIncidentEntity({
511
+ handle: getIncidentEntity?.(),
512
+ incidentId: input.id,
513
+ apply: async () => {
514
+ result = await service.resolveIncident(input.id, input.message);
515
+ // The probe found it; a race could still delete it mid-write.
516
+ // Fall back to the pre-write state so the diff is a no-op.
517
+ return toIncidentEntityState(result ?? exists);
518
+ },
519
+ });
448
520
  if (!result) {
449
521
  return { success: true };
450
522
  }
@@ -460,14 +532,6 @@ export function createRouter(
460
532
  action: "resolved",
461
533
  });
462
534
 
463
- await context.emitHook(incidentHooks.incidentResolved, {
464
- incidentId: result.id,
465
- systemIds: result.systemIds,
466
- title: result.title,
467
- severity: result.severity,
468
- resolvedAt: new Date().toISOString(),
469
- });
470
-
471
535
  const systemNames = await resolveSystemNames(result.systemIds);
472
536
  await notifyAffectedSystems({
473
537
  catalogClient,
@@ -1,5 +1,11 @@
1
1
  import { describe, it, expect, mock, beforeEach } from "bun:test";
2
2
  import { IncidentService } from "./service";
3
+ import {
4
+ incidents,
5
+ incidentSystems,
6
+ incidentUpdates,
7
+ incidentLinks,
8
+ } from "./schema";
3
9
 
4
10
  /**
5
11
  * Programmable mock DB that records each `select(...).from(...).where(...)`
@@ -124,3 +130,196 @@ describe("IncidentService.hasActiveIncidentWithSuppression", () => {
124
130
  expect(dbHelper.getCallCount()).toBe(1);
125
131
  });
126
132
  });
133
+
134
+ describe("IncidentService.getManyEntityStates (plugin-backed entity read)", () => {
135
+ it("returns {} for an empty id set without querying", async () => {
136
+ const dbHelper = createProgrammableSelectDb([]);
137
+ const service = new IncidentService(dbHelper.db as never);
138
+ expect(await service.getManyEntityStates([])).toEqual({});
139
+ expect(dbHelper.getCallCount()).toBe(0);
140
+ });
141
+
142
+ it("projects { status, severity, systemIds } from incidents + junction", async () => {
143
+ const dbHelper = createProgrammableSelectDb([
144
+ // 1st query: incidents rows for the requested ids.
145
+ [
146
+ { id: "inc-1", status: "investigating", severity: "major" },
147
+ { id: "inc-2", status: "resolved", severity: "minor" },
148
+ ],
149
+ // 2nd query: incident_systems junction rows for the present ids.
150
+ [
151
+ { incidentId: "inc-1", systemId: "sys-a" },
152
+ { incidentId: "inc-1", systemId: "sys-b" },
153
+ { incidentId: "inc-2", systemId: "sys-c" },
154
+ ],
155
+ ]);
156
+ const service = new IncidentService(dbHelper.db as never);
157
+ const out = await service.getManyEntityStates(["inc-1", "inc-2", "inc-x"]);
158
+ expect(out).toEqual({
159
+ "inc-1": {
160
+ status: "investigating",
161
+ severity: "major",
162
+ systemIds: ["sys-a", "sys-b"],
163
+ },
164
+ "inc-2": { status: "resolved", severity: "minor", systemIds: ["sys-c"] },
165
+ });
166
+ // Missing ids are omitted (never a null/undefined entry).
167
+ expect("inc-x" in out).toBe(false);
168
+ });
169
+
170
+ it("returns {} when none of the ids exist (no junction query)", async () => {
171
+ const dbHelper = createProgrammableSelectDb([
172
+ // incidents query returns nothing → no second query.
173
+ [],
174
+ ]);
175
+ const service = new IncidentService(dbHelper.db as never);
176
+ expect(await service.getManyEntityStates(["ghost"])).toEqual({});
177
+ expect(dbHelper.getCallCount()).toBe(1);
178
+ });
179
+
180
+ it("yields an empty systemIds array for an incident with no systems", async () => {
181
+ const dbHelper = createProgrammableSelectDb([
182
+ [{ id: "inc-1", status: "monitoring", severity: "critical" }],
183
+ [], // no junction rows
184
+ ]);
185
+ const service = new IncidentService(dbHelper.db as never);
186
+ const out = await service.getManyEntityStates(["inc-1"]);
187
+ expect(out["inc-1"]).toEqual({
188
+ status: "monitoring",
189
+ severity: "critical",
190
+ systemIds: [],
191
+ });
192
+ });
193
+ });
194
+
195
+ /**
196
+ * Table-backed fake `db` for the dedup-create path. Models just enough of
197
+ * the Drizzle surface the service touches (select/insert by TABLE IDENTITY,
198
+ * `.from`/`.where`/`.limit`, and a serializing `transaction`).
199
+ *
200
+ * Crucially `transaction(fn)` models `pg_advisory_xact_lock`: it serializes
201
+ * callers on the lock key seen in the `tx.execute(...)` SQL, so concurrent
202
+ * dedup-creates run their find-then-create one-at-a-time — exactly the
203
+ * guarantee M3 needs. Because the test confines data to a single system,
204
+ * the (ignored) WHERE clauses don't change which rows match.
205
+ */
206
+ function createDedupFakeDb() {
207
+ const store = {
208
+ incidents: [] as Array<Record<string, unknown>>,
209
+ incidentSystems: [] as Array<Record<string, unknown>>,
210
+ incidentUpdates: [] as Array<Record<string, unknown>>,
211
+ incidentLinks: [] as Array<Record<string, unknown>>,
212
+ };
213
+
214
+ const tableKey = (
215
+ table: unknown,
216
+ ): keyof typeof store | undefined => {
217
+ if (table === incidents) return "incidents";
218
+ if (table === incidentSystems) return "incidentSystems";
219
+ if (table === incidentUpdates) return "incidentUpdates";
220
+ if (table === incidentLinks) return "incidentLinks";
221
+ return undefined;
222
+ };
223
+
224
+ // Per-key serialization (the xact-lock model).
225
+ const tails = new Map<string, Promise<unknown>>();
226
+
227
+ function buildSelect() {
228
+ return (projection?: Record<string, unknown>) => {
229
+ const project = (
230
+ list: Array<Record<string, unknown>>,
231
+ ): Array<Record<string, unknown>> => {
232
+ if (!projection) return list;
233
+ const keys = Object.keys(projection);
234
+ return list.map((r) => {
235
+ const out: Record<string, unknown> = {};
236
+ for (const k of keys) out[k] = r[k];
237
+ return out;
238
+ });
239
+ };
240
+ const from = (table: unknown) => {
241
+ const key = tableKey(table);
242
+ const rows = key ? project([...store[key]]) : [];
243
+ const limit = (n: number) => Promise.resolve(rows.slice(0, n));
244
+ const where = () =>
245
+ Object.assign(Promise.resolve(rows), { limit });
246
+ return Object.assign(Promise.resolve(rows), { where, limit });
247
+ };
248
+ return { from };
249
+ };
250
+ }
251
+
252
+ function buildInsert() {
253
+ return (table: unknown) => ({
254
+ values: (vals: Record<string, unknown>) => {
255
+ const key = tableKey(table);
256
+ if (key) store[key].push({ ...vals });
257
+ return Promise.resolve();
258
+ },
259
+ });
260
+ }
261
+
262
+ const db = {
263
+ select: buildSelect(),
264
+ insert: buildInsert(),
265
+ async transaction<T>(fn: (tx: unknown) => Promise<T>): Promise<T> {
266
+ // The lock key is embedded in the SQL the helper runs via tx.execute.
267
+ let lockKey = "default";
268
+ const tx = {
269
+ execute: async (sqlObj: unknown) => {
270
+ // Drizzle sql`` carries the interpolated key in its params; the
271
+ // helper interpolates exactly one param (the lock key).
272
+ const params = (sqlObj as { queryChunks?: unknown[] }).queryChunks;
273
+ const found = JSON.stringify(params ?? sqlObj).match(
274
+ /incident\.dedupe-open-for-system:[^"\\]+/,
275
+ );
276
+ if (found) lockKey = found[0];
277
+ return { rows: [] };
278
+ },
279
+ };
280
+ // Serialize on the lock key: chain after the current tail.
281
+ const prior = tails.get(lockKey) ?? Promise.resolve();
282
+ let resolveTail!: () => void;
283
+ const myTail = new Promise<void>((r) => (resolveTail = r));
284
+ tails.set(
285
+ lockKey,
286
+ prior.then(() => myTail),
287
+ );
288
+ await prior;
289
+ try {
290
+ return await fn(tx);
291
+ } finally {
292
+ resolveTail();
293
+ }
294
+ },
295
+ };
296
+
297
+ return { db: db as unknown, store };
298
+ }
299
+
300
+ describe("IncidentService.createIncidentDedupedForSystem (M3)", () => {
301
+ it("two concurrent dedupe creates for one system open exactly ONE incident", async () => {
302
+ const { db, store } = createDedupFakeDb();
303
+ const service = new IncidentService(db as never);
304
+
305
+ const input = {
306
+ title: "Down",
307
+ severity: "critical" as const,
308
+ systemIds: ["sys-1"],
309
+ suppressNotifications: false,
310
+ };
311
+
312
+ // Sustained + flapping fire concurrently for the same system. Without
313
+ // the per-system lock both would find no open incident and both create.
314
+ const [a, b] = await Promise.all([
315
+ service.createIncidentDedupedForSystem(input, "sys-1"),
316
+ service.createIncidentDedupedForSystem(input, "sys-1"),
317
+ ]);
318
+
319
+ // Exactly one incident row created.
320
+ expect(store.incidents).toHaveLength(1);
321
+ // One created, one reused — both return the same incident id.
322
+ expect(a.incident.id).toBe(b.incident.id);
323
+ expect([a.reused, b.reused].filter(Boolean)).toHaveLength(1);
324
+ });
325
+ });