@checkstack/incident-backend 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +157 -0
- package/drizzle/0003_careful_ken_ellis.sql +10 -0
- package/drizzle/meta/0003_snapshot.json +300 -0
- package/drizzle/meta/_journal.json +7 -0
- package/package.json +14 -11
- package/src/ai/incident-add-link.test.ts +62 -0
- package/src/ai/incident-add-link.ts +63 -0
- package/src/ai/incident-add-update.test.ts +56 -0
- package/src/ai/incident-add-update.ts +66 -0
- package/src/ai/incident-create.test.ts +70 -0
- package/src/ai/incident-create.ts +65 -0
- package/src/ai/incident-delete.test.ts +88 -0
- package/src/ai/incident-delete.ts +76 -0
- package/src/ai/incident-get.projection.test.ts +43 -0
- package/src/ai/incident-remove-link.test.ts +56 -0
- package/src/ai/incident-remove-link.ts +69 -0
- package/src/ai/incident-resolve.test.ts +61 -0
- package/src/ai/incident-resolve.ts +69 -0
- package/src/ai/incident-update.test.ts +87 -0
- package/src/ai/incident-update.ts +94 -0
- package/src/ai/register-ai-tools.ts +33 -0
- package/src/ai-projection.test.ts +38 -0
- package/src/automations.test.ts +2 -0
- package/src/index.ts +54 -2
- package/src/schema.ts +20 -9
- package/src/service.test.ts +47 -6
- package/src/service.ts +98 -76
- package/tsconfig.json +6 -0
package/src/service.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { eq, and, inArray, ne } from "drizzle-orm";
|
|
2
|
-
import {
|
|
2
|
+
import type { AdvisoryLockService, SafeDatabase } from "@checkstack/backend-api";
|
|
3
3
|
import * as schema from "./schema";
|
|
4
4
|
import {
|
|
5
5
|
incidents,
|
|
@@ -27,7 +27,10 @@ function generateId(): string {
|
|
|
27
27
|
}
|
|
28
28
|
|
|
29
29
|
export class IncidentService {
|
|
30
|
-
constructor(
|
|
30
|
+
constructor(
|
|
31
|
+
private db: Db,
|
|
32
|
+
private advisoryLock: AdvisoryLockService,
|
|
33
|
+
) {}
|
|
31
34
|
|
|
32
35
|
/**
|
|
33
36
|
* List incidents with optional filters
|
|
@@ -235,33 +238,38 @@ export class IncidentService {
|
|
|
235
238
|
id: string = generateId(),
|
|
236
239
|
): Promise<IncidentWithSystems> {
|
|
237
240
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
await this.db.insert(incidentSystems).values({
|
|
250
|
-
incidentId: id,
|
|
251
|
-
systemId,
|
|
241
|
+
// Atomic: the incident row, its system associations, and any initial update
|
|
242
|
+
// must all commit together. Without the transaction a failure mid-loop left
|
|
243
|
+
// a committed incident with only some (or none) of its system links.
|
|
244
|
+
await this.db.transaction(async (tx) => {
|
|
245
|
+
await tx.insert(incidents).values({
|
|
246
|
+
id,
|
|
247
|
+
title: input.title,
|
|
248
|
+
description: input.description,
|
|
249
|
+
status: "investigating",
|
|
250
|
+
severity: input.severity,
|
|
251
|
+
suppressNotifications: input.suppressNotifications ?? false,
|
|
252
252
|
});
|
|
253
|
-
}
|
|
254
253
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
254
|
+
// Insert system associations
|
|
255
|
+
for (const systemId of input.systemIds) {
|
|
256
|
+
await tx.insert(incidentSystems).values({
|
|
257
|
+
incidentId: id,
|
|
258
|
+
systemId,
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Add initial update if provided
|
|
263
|
+
if (input.initialMessage) {
|
|
264
|
+
await tx.insert(incidentUpdates).values({
|
|
265
|
+
id: generateId(),
|
|
266
|
+
incidentId: id,
|
|
267
|
+
message: input.initialMessage,
|
|
268
|
+
statusChange: "investigating",
|
|
269
|
+
createdBy: userId,
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
});
|
|
265
273
|
|
|
266
274
|
return (await this.getIncident(id))!;
|
|
267
275
|
}
|
|
@@ -290,24 +298,29 @@ export class IncidentService {
|
|
|
290
298
|
if (input.suppressNotifications !== undefined)
|
|
291
299
|
updateData.suppressNotifications = input.suppressNotifications;
|
|
292
300
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
incidentId
|
|
307
|
-
|
|
308
|
-
|
|
301
|
+
// Atomic: the field update and the delete-then-reinsert of system links must
|
|
302
|
+
// commit together. Without the transaction a failure after the delete left
|
|
303
|
+
// the incident with ALL system associations wiped.
|
|
304
|
+
await this.db.transaction(async (tx) => {
|
|
305
|
+
await tx
|
|
306
|
+
.update(incidents)
|
|
307
|
+
.set(updateData)
|
|
308
|
+
.where(eq(incidents.id, input.id));
|
|
309
|
+
|
|
310
|
+
// Update system associations if provided
|
|
311
|
+
if (input.systemIds !== undefined) {
|
|
312
|
+
await tx
|
|
313
|
+
.delete(incidentSystems)
|
|
314
|
+
.where(eq(incidentSystems.incidentId, input.id));
|
|
315
|
+
|
|
316
|
+
for (const systemId of input.systemIds) {
|
|
317
|
+
await tx.insert(incidentSystems).values({
|
|
318
|
+
incidentId: input.id,
|
|
319
|
+
systemId,
|
|
320
|
+
});
|
|
321
|
+
}
|
|
309
322
|
}
|
|
310
|
-
}
|
|
323
|
+
});
|
|
311
324
|
|
|
312
325
|
return (await this.getIncident(input.id))!;
|
|
313
326
|
}
|
|
@@ -321,20 +334,25 @@ export class IncidentService {
|
|
|
321
334
|
): Promise<IncidentUpdate> {
|
|
322
335
|
const id = generateId();
|
|
323
336
|
|
|
324
|
-
//
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
337
|
+
// Atomic: the status flip and the timeline entry that records it must commit
|
|
338
|
+
// together. Without the transaction a failed insert left the incident in a
|
|
339
|
+
// new status with no update row explaining it (status/timeline divergence).
|
|
340
|
+
await this.db.transaction(async (tx) => {
|
|
341
|
+
// If status change is provided, update the incident status
|
|
342
|
+
if (input.statusChange) {
|
|
343
|
+
await tx
|
|
344
|
+
.update(incidents)
|
|
345
|
+
.set({ status: input.statusChange, updatedAt: new Date() })
|
|
346
|
+
.where(eq(incidents.id, input.incidentId));
|
|
347
|
+
}
|
|
331
348
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
349
|
+
await tx.insert(incidentUpdates).values({
|
|
350
|
+
id,
|
|
351
|
+
incidentId: input.incidentId,
|
|
352
|
+
message: input.message,
|
|
353
|
+
statusChange: input.statusChange,
|
|
354
|
+
createdBy: userId,
|
|
355
|
+
});
|
|
338
356
|
});
|
|
339
357
|
|
|
340
358
|
const [update] = await this.db
|
|
@@ -364,18 +382,21 @@ export class IncidentService {
|
|
|
364
382
|
|
|
365
383
|
if (!existing) return undefined;
|
|
366
384
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
385
|
+
// Atomic: mark resolved + write the resolution timeline entry together.
|
|
386
|
+
await this.db.transaction(async (tx) => {
|
|
387
|
+
await tx
|
|
388
|
+
.update(incidents)
|
|
389
|
+
.set({ status: "resolved", updatedAt: new Date() })
|
|
390
|
+
.where(eq(incidents.id, id));
|
|
371
391
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
392
|
+
// Add resolution update entry
|
|
393
|
+
await tx.insert(incidentUpdates).values({
|
|
394
|
+
id: generateId(),
|
|
395
|
+
incidentId: id,
|
|
396
|
+
message: message ?? "Incident resolved",
|
|
397
|
+
statusChange: "resolved",
|
|
398
|
+
createdBy: userId,
|
|
399
|
+
});
|
|
379
400
|
});
|
|
380
401
|
|
|
381
402
|
return (await this.getIncident(id))!;
|
|
@@ -528,15 +549,16 @@ export class IncidentService {
|
|
|
528
549
|
create: () => Promise<IncidentWithSystems>,
|
|
529
550
|
) => Promise<IncidentWithSystems> = (create) => create(),
|
|
530
551
|
): Promise<{ incident: IncidentWithSystems; reused: boolean }> {
|
|
531
|
-
return withXactLock({
|
|
532
|
-
db: this.db,
|
|
552
|
+
return this.advisoryLock.withXactLock({
|
|
533
553
|
key: `incident.dedupe-open-for-system:${dedupeSystemId}`,
|
|
534
|
-
// The find + create run on `this.db` (the pool), NOT
|
|
535
|
-
// safe
|
|
536
|
-
// of this key until this transaction
|
|
537
|
-
// at lock-acquire, so its find can't
|
|
538
|
-
// ours has already committed the
|
|
539
|
-
//
|
|
554
|
+
// The find + create deliberately run on `this.db` (the admin pool), NOT
|
|
555
|
+
// on the lock connection. That is safe because `pg_advisory_xact_lock`
|
|
556
|
+
// BLOCKS every other holder of this key until this lock transaction
|
|
557
|
+
// commits: a racing caller waits at lock-acquire, so its find can't
|
|
558
|
+
// observe "no open incident" until ours has already committed the
|
|
559
|
+
// insert. Crucially, the lock transaction lives on the DEDICATED lock
|
|
560
|
+
// pool (see `createAdvisoryLockService(lockPool)`), so holding it open
|
|
561
|
+
// while the work runs on the admin pool cannot starve the admin pool.
|
|
540
562
|
fn: async () => {
|
|
541
563
|
const existing = await this.findActiveIncidentForSystem(dedupeSystemId);
|
|
542
564
|
if (existing) {
|