@checkstack/incident-backend 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/service.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { eq, and, inArray, ne } from "drizzle-orm";
2
- import { withXactLock, type SafeDatabase } from "@checkstack/backend-api";
2
+ import type { AdvisoryLockService, SafeDatabase } from "@checkstack/backend-api";
3
3
  import * as schema from "./schema";
4
4
  import {
5
5
  incidents,
@@ -27,7 +27,10 @@ function generateId(): string {
27
27
  }
28
28
 
29
29
  export class IncidentService {
30
- constructor(private db: Db) {}
30
+ constructor(
31
+ private db: Db,
32
+ private advisoryLock: AdvisoryLockService,
33
+ ) {}
31
34
 
32
35
  /**
33
36
  * List incidents with optional filters
@@ -235,33 +238,38 @@ export class IncidentService {
235
238
  id: string = generateId(),
236
239
  ): Promise<IncidentWithSystems> {
237
240
 
238
- await this.db.insert(incidents).values({
239
- id,
240
- title: input.title,
241
- description: input.description,
242
- status: "investigating",
243
- severity: input.severity,
244
- suppressNotifications: input.suppressNotifications ?? false,
245
- });
246
-
247
- // Insert system associations
248
- for (const systemId of input.systemIds) {
249
- await this.db.insert(incidentSystems).values({
250
- incidentId: id,
251
- systemId,
241
+ // Atomic: the incident row, its system associations, and any initial update
242
+ // must all commit together. Without the transaction a failure mid-loop left
243
+ // a committed incident with only some (or none) of its system links.
244
+ await this.db.transaction(async (tx) => {
245
+ await tx.insert(incidents).values({
246
+ id,
247
+ title: input.title,
248
+ description: input.description,
249
+ status: "investigating",
250
+ severity: input.severity,
251
+ suppressNotifications: input.suppressNotifications ?? false,
252
252
  });
253
- }
254
253
 
255
- // Add initial update if provided
256
- if (input.initialMessage) {
257
- await this.db.insert(incidentUpdates).values({
258
- id: generateId(),
259
- incidentId: id,
260
- message: input.initialMessage,
261
- statusChange: "investigating",
262
- createdBy: userId,
263
- });
264
- }
254
+ // Insert system associations
255
+ for (const systemId of input.systemIds) {
256
+ await tx.insert(incidentSystems).values({
257
+ incidentId: id,
258
+ systemId,
259
+ });
260
+ }
261
+
262
+ // Add initial update if provided
263
+ if (input.initialMessage) {
264
+ await tx.insert(incidentUpdates).values({
265
+ id: generateId(),
266
+ incidentId: id,
267
+ message: input.initialMessage,
268
+ statusChange: "investigating",
269
+ createdBy: userId,
270
+ });
271
+ }
272
+ });
265
273
 
266
274
  return (await this.getIncident(id))!;
267
275
  }
@@ -290,24 +298,29 @@ export class IncidentService {
290
298
  if (input.suppressNotifications !== undefined)
291
299
  updateData.suppressNotifications = input.suppressNotifications;
292
300
 
293
- await this.db
294
- .update(incidents)
295
- .set(updateData)
296
- .where(eq(incidents.id, input.id));
297
-
298
- // Update system associations if provided
299
- if (input.systemIds !== undefined) {
300
- await this.db
301
- .delete(incidentSystems)
302
- .where(eq(incidentSystems.incidentId, input.id));
303
-
304
- for (const systemId of input.systemIds) {
305
- await this.db.insert(incidentSystems).values({
306
- incidentId: input.id,
307
- systemId,
308
- });
301
+ // Atomic: the field update and the delete-then-reinsert of system links must
302
+ // commit together. Without the transaction a failure after the delete left
303
+ // the incident with ALL system associations wiped.
304
+ await this.db.transaction(async (tx) => {
305
+ await tx
306
+ .update(incidents)
307
+ .set(updateData)
308
+ .where(eq(incidents.id, input.id));
309
+
310
+ // Update system associations if provided
311
+ if (input.systemIds !== undefined) {
312
+ await tx
313
+ .delete(incidentSystems)
314
+ .where(eq(incidentSystems.incidentId, input.id));
315
+
316
+ for (const systemId of input.systemIds) {
317
+ await tx.insert(incidentSystems).values({
318
+ incidentId: input.id,
319
+ systemId,
320
+ });
321
+ }
309
322
  }
310
- }
323
+ });
311
324
 
312
325
  return (await this.getIncident(input.id))!;
313
326
  }
@@ -321,20 +334,25 @@ export class IncidentService {
321
334
  ): Promise<IncidentUpdate> {
322
335
  const id = generateId();
323
336
 
324
- // If status change is provided, update the incident status
325
- if (input.statusChange) {
326
- await this.db
327
- .update(incidents)
328
- .set({ status: input.statusChange, updatedAt: new Date() })
329
- .where(eq(incidents.id, input.incidentId));
330
- }
337
+ // Atomic: the status flip and the timeline entry that records it must commit
338
+ // together. Without the transaction a failed insert left the incident in a
339
+ // new status with no update row explaining it (status/timeline divergence).
340
+ await this.db.transaction(async (tx) => {
341
+ // If status change is provided, update the incident status
342
+ if (input.statusChange) {
343
+ await tx
344
+ .update(incidents)
345
+ .set({ status: input.statusChange, updatedAt: new Date() })
346
+ .where(eq(incidents.id, input.incidentId));
347
+ }
331
348
 
332
- await this.db.insert(incidentUpdates).values({
333
- id,
334
- incidentId: input.incidentId,
335
- message: input.message,
336
- statusChange: input.statusChange,
337
- createdBy: userId,
349
+ await tx.insert(incidentUpdates).values({
350
+ id,
351
+ incidentId: input.incidentId,
352
+ message: input.message,
353
+ statusChange: input.statusChange,
354
+ createdBy: userId,
355
+ });
338
356
  });
339
357
 
340
358
  const [update] = await this.db
@@ -364,18 +382,21 @@ export class IncidentService {
364
382
 
365
383
  if (!existing) return undefined;
366
384
 
367
- await this.db
368
- .update(incidents)
369
- .set({ status: "resolved", updatedAt: new Date() })
370
- .where(eq(incidents.id, id));
385
+ // Atomic: mark resolved + write the resolution timeline entry together.
386
+ await this.db.transaction(async (tx) => {
387
+ await tx
388
+ .update(incidents)
389
+ .set({ status: "resolved", updatedAt: new Date() })
390
+ .where(eq(incidents.id, id));
371
391
 
372
- // Add resolution update entry
373
- await this.db.insert(incidentUpdates).values({
374
- id: generateId(),
375
- incidentId: id,
376
- message: message ?? "Incident resolved",
377
- statusChange: "resolved",
378
- createdBy: userId,
392
+ // Add resolution update entry
393
+ await tx.insert(incidentUpdates).values({
394
+ id: generateId(),
395
+ incidentId: id,
396
+ message: message ?? "Incident resolved",
397
+ statusChange: "resolved",
398
+ createdBy: userId,
399
+ });
379
400
  });
380
401
 
381
402
  return (await this.getIncident(id))!;
@@ -528,15 +549,16 @@ export class IncidentService {
528
549
  create: () => Promise<IncidentWithSystems>,
529
550
  ) => Promise<IncidentWithSystems> = (create) => create(),
530
551
  ): Promise<{ incident: IncidentWithSystems; reused: boolean }> {
531
- return withXactLock({
532
- db: this.db,
552
+ return this.advisoryLock.withXactLock({
533
553
  key: `incident.dedupe-open-for-system:${dedupeSystemId}`,
534
- // The find + create run on `this.db` (the pool), NOT on `tx`. That is
535
- // safe here because `pg_advisory_xact_lock` BLOCKS every other holder
536
- // of this key until this transaction commits: a racing caller waits
537
- // at lock-acquire, so its find can't observe "no open incident" until
538
- // ours has already committed the insert. The critical section is thus
539
- // serialized by the lock window even though it doesn't ride `tx`.
554
+ // The find + create deliberately run on `this.db` (the admin pool), NOT
555
+ // on the lock connection. That is safe because `pg_advisory_xact_lock`
556
+ // BLOCKS every other holder of this key until this lock transaction
557
+ // commits: a racing caller waits at lock-acquire, so its find can't
558
+ // observe "no open incident" until ours has already committed the
559
+ // insert. Crucially, the lock transaction lives on the DEDICATED lock
560
+ // pool (see `createAdvisoryLockService(lockPool)`), so holding it open
561
+ // while the work runs on the admin pool cannot starve the admin pool.
540
562
  fn: async () => {
541
563
  const existing = await this.findActiveIncidentForSystem(dedupeSystemId);
542
564
  if (existing) {
package/tsconfig.json CHANGED
@@ -4,6 +4,12 @@
4
4
  "src"
5
5
  ],
6
6
  "references": [
7
+ {
8
+ "path": "../ai-backend"
9
+ },
10
+ {
11
+ "path": "../ai-common"
12
+ },
7
13
  {
8
14
  "path": "../auth-common"
9
15
  },