@checkstack/incident-backend 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +217 -0
- package/package.json +18 -18
- package/src/automations.test.ts +356 -5
- package/src/automations.ts +322 -34
- package/src/hooks.ts +8 -53
- package/src/incident-entity.test.ts +266 -0
- package/src/incident-entity.ts +192 -0
- package/src/index.ts +105 -17
- package/src/router.ts +162 -98
- package/src/service.test.ts +235 -1
- package/src/service.ts +152 -4
package/src/service.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { eq, and, inArray, ne } from "drizzle-orm";
|
|
2
|
-
import type { SafeDatabase } from "@checkstack/backend-api";
|
|
2
|
+
import type { AdvisoryLockService, SafeDatabase } from "@checkstack/backend-api";
|
|
3
3
|
import * as schema from "./schema";
|
|
4
4
|
import {
|
|
5
5
|
incidents,
|
|
@@ -17,6 +17,7 @@ import type {
|
|
|
17
17
|
UpdateIncidentInput,
|
|
18
18
|
AddIncidentUpdateInput,
|
|
19
19
|
IncidentStatus,
|
|
20
|
+
IncidentSeverity,
|
|
20
21
|
} from "@checkstack/incident-common";
|
|
21
22
|
|
|
22
23
|
type Db = SafeDatabase<typeof schema>;
|
|
@@ -26,7 +27,10 @@ function generateId(): string {
|
|
|
26
27
|
}
|
|
27
28
|
|
|
28
29
|
export class IncidentService {
|
|
29
|
-
constructor(
|
|
30
|
+
constructor(
|
|
31
|
+
private db: Db,
|
|
32
|
+
private advisoryLock: AdvisoryLockService,
|
|
33
|
+
) {}
|
|
30
34
|
|
|
31
35
|
/**
|
|
32
36
|
* List incidents with optional filters
|
|
@@ -125,6 +129,62 @@ export class IncidentService {
|
|
|
125
129
|
};
|
|
126
130
|
}
|
|
127
131
|
|
|
132
|
+
/**
|
|
133
|
+
* Batched reactive-state read for the `incident` entity (Model B
|
|
134
|
+
* plugin-backed `read` accessor). Given incident ids, return the reactive
|
|
135
|
+
* subset `{ status, severity, systemIds }` for each that exists (missing
|
|
136
|
+
* ids omitted). Reads the AUTHORITATIVE `incidents` + `incident_systems`
|
|
137
|
+
* tables — no framework `entity_state` storage. This is the single source
|
|
138
|
+
* of truth `handle.mutate` snapshots `prev` from and `get`/`getMany`/scope
|
|
139
|
+
* enrichment route through.
|
|
140
|
+
*/
|
|
141
|
+
async getManyEntityStates(
|
|
142
|
+
ids: ReadonlyArray<string>,
|
|
143
|
+
): Promise<
|
|
144
|
+
Record<string, { status: IncidentStatus; severity: IncidentSeverity; systemIds: string[] }>
|
|
145
|
+
> {
|
|
146
|
+
if (ids.length === 0) return {};
|
|
147
|
+
|
|
148
|
+
const rows = await this.db
|
|
149
|
+
.select({
|
|
150
|
+
id: incidents.id,
|
|
151
|
+
status: incidents.status,
|
|
152
|
+
severity: incidents.severity,
|
|
153
|
+
})
|
|
154
|
+
.from(incidents)
|
|
155
|
+
.where(inArray(incidents.id, [...ids]));
|
|
156
|
+
if (rows.length === 0) return {};
|
|
157
|
+
|
|
158
|
+
const presentIds = rows.map((r) => r.id);
|
|
159
|
+
const systemRows = await this.db
|
|
160
|
+
.select({
|
|
161
|
+
incidentId: incidentSystems.incidentId,
|
|
162
|
+
systemId: incidentSystems.systemId,
|
|
163
|
+
})
|
|
164
|
+
.from(incidentSystems)
|
|
165
|
+
.where(inArray(incidentSystems.incidentId, presentIds));
|
|
166
|
+
|
|
167
|
+
const systemsByIncident = new Map<string, string[]>();
|
|
168
|
+
for (const r of systemRows) {
|
|
169
|
+
const list = systemsByIncident.get(r.incidentId);
|
|
170
|
+
if (list) list.push(r.systemId);
|
|
171
|
+
else systemsByIncident.set(r.incidentId, [r.systemId]);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const out: Record<
|
|
175
|
+
string,
|
|
176
|
+
{ status: IncidentStatus; severity: IncidentSeverity; systemIds: string[] }
|
|
177
|
+
> = {};
|
|
178
|
+
for (const row of rows) {
|
|
179
|
+
out[row.id] = {
|
|
180
|
+
status: row.status,
|
|
181
|
+
severity: row.severity,
|
|
182
|
+
systemIds: systemsByIncident.get(row.id) ?? [],
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
return out;
|
|
186
|
+
}
|
|
187
|
+
|
|
128
188
|
/**
|
|
129
189
|
* Get active incidents for a system
|
|
130
190
|
*/
|
|
@@ -165,13 +225,18 @@ export class IncidentService {
|
|
|
165
225
|
}
|
|
166
226
|
|
|
167
227
|
/**
|
|
168
|
-
* Create a new incident
|
|
228
|
+
* Create a new incident.
|
|
229
|
+
*
|
|
230
|
+
* `id` may be supplied by the caller so the reactive `incident` entity can
|
|
231
|
+
* be keyed on a known id BEFORE the insert runs (the create's `prev`
|
|
232
|
+
* snapshot must read the not-yet-existing row as absent — see §10.1). When
|
|
233
|
+
* omitted, a fresh id is generated. The id is server-owned either way.
|
|
169
234
|
*/
|
|
170
235
|
async createIncident(
|
|
171
236
|
input: CreateIncidentInput,
|
|
172
237
|
userId?: string,
|
|
238
|
+
id: string = generateId(),
|
|
173
239
|
): Promise<IncidentWithSystems> {
|
|
174
|
-
const id = generateId();
|
|
175
240
|
|
|
176
241
|
await this.db.insert(incidents).values({
|
|
177
242
|
id,
|
|
@@ -407,4 +472,87 @@ export class IncidentService {
|
|
|
407
472
|
|
|
408
473
|
return !!match;
|
|
409
474
|
}
|
|
475
|
+
|
|
476
|
+
/**
|
|
477
|
+
* Find a single OPEN (not-resolved) incident affecting `systemId`, if
|
|
478
|
+
* any. Returns the incident with its systems, mirroring the old
|
|
479
|
+
* auto-incident `findActiveAutoIncident(systemId)` dedup semantic. Used
|
|
480
|
+
* by `incident.create`'s opt-in `dedupe_open_for_system` flag so a
|
|
481
|
+
* second trigger for an already-incidented system reuses the open
|
|
482
|
+
* incident rather than opening a duplicate.
|
|
483
|
+
*/
|
|
484
|
+
async findActiveIncidentForSystem(
|
|
485
|
+
systemId: string,
|
|
486
|
+
): Promise<IncidentWithSystems | undefined> {
|
|
487
|
+
const systemIncidents = await this.db
|
|
488
|
+
.select({ incidentId: incidentSystems.incidentId })
|
|
489
|
+
.from(incidentSystems)
|
|
490
|
+
.where(eq(incidentSystems.systemId, systemId));
|
|
491
|
+
|
|
492
|
+
const ids = systemIncidents.map((r) => r.incidentId);
|
|
493
|
+
if (ids.length === 0) return undefined;
|
|
494
|
+
|
|
495
|
+
const [match] = await this.db
|
|
496
|
+
.select({ id: incidents.id })
|
|
497
|
+
.from(incidents)
|
|
498
|
+
.where(and(inArray(incidents.id, ids), ne(incidents.status, "resolved")))
|
|
499
|
+
.limit(1);
|
|
500
|
+
|
|
501
|
+
if (!match) return undefined;
|
|
502
|
+
return this.getIncident(match.id);
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* Dedup-aware create for a single system, used by the `incident.create`
|
|
507
|
+
* automation action when `dedupe_open_for_system` is set. Serializes the
|
|
508
|
+
* check-then-create per system with a transaction-scoped advisory lock so
|
|
509
|
+
* two concurrent triggers for the same system (e.g. sustained + flapping)
|
|
510
|
+
* can't both observe "no open incident" and both create one. The critical
|
|
511
|
+
* section is short (a find + an insert), so a transaction-scoped lock is
|
|
512
|
+
* the right primitive (it auto-releases at COMMIT, no leak possible).
|
|
513
|
+
*
|
|
514
|
+
* Returns `{ incident, reused }` — `reused` is true when an already-open
|
|
515
|
+
* incident for the system was found and returned instead of creating.
|
|
516
|
+
*/
|
|
517
|
+
async createIncidentDedupedForSystem(
|
|
518
|
+
input: CreateIncidentInput,
|
|
519
|
+
dedupeSystemId: string,
|
|
520
|
+
userId?: string,
|
|
521
|
+
newId?: string,
|
|
522
|
+
/**
|
|
523
|
+
* Optional create wrapper (§10.1, 6(a)). When the dedup decides to CREATE,
|
|
524
|
+
* the actual create runs through this wrapper INSIDE the advisory lock, so
|
|
525
|
+
* the reactive `incident` entity write (which snapshots `prev` before the
|
|
526
|
+
* insert) is serialized with the dedup check. The wrapper receives the
|
|
527
|
+
* bound create thunk and MUST call it exactly once, returning its result.
|
|
528
|
+
* Defaults to calling the create directly (non-reactive).
|
|
529
|
+
*/
|
|
530
|
+
onCreate: (
|
|
531
|
+
create: () => Promise<IncidentWithSystems>,
|
|
532
|
+
) => Promise<IncidentWithSystems> = (create) => create(),
|
|
533
|
+
): Promise<{ incident: IncidentWithSystems; reused: boolean }> {
|
|
534
|
+
return this.advisoryLock.withXactLock({
|
|
535
|
+
key: `incident.dedupe-open-for-system:${dedupeSystemId}`,
|
|
536
|
+
// The find + create deliberately run on `this.db` (the admin pool), NOT
|
|
537
|
+
// on the lock connection. That is safe because `pg_advisory_xact_lock`
|
|
538
|
+
// BLOCKS every other holder of this key until this lock transaction
|
|
539
|
+
// commits: a racing caller waits at lock-acquire, so its find can't
|
|
540
|
+
// observe "no open incident" until ours has already committed the
|
|
541
|
+
// insert. Crucially, the lock transaction lives on the DEDICATED lock
|
|
542
|
+
// pool (see `createAdvisoryLockService(lockPool)`), so holding it open
|
|
543
|
+
// while the work runs on the admin pool cannot starve the admin pool.
|
|
544
|
+
fn: async () => {
|
|
545
|
+
const existing = await this.findActiveIncidentForSystem(dedupeSystemId);
|
|
546
|
+
if (existing) {
|
|
547
|
+
return { incident: existing, reused: true };
|
|
548
|
+
}
|
|
549
|
+
// Create through the caller's wrapper (reactive entity write) so the
|
|
550
|
+
// `incident.created` emit is serialized inside the dedup lock.
|
|
551
|
+
const incident = await onCreate(() =>
|
|
552
|
+
this.createIncident(input, userId, newId),
|
|
553
|
+
);
|
|
554
|
+
return { incident, reused: false };
|
|
555
|
+
},
|
|
556
|
+
});
|
|
557
|
+
}
|
|
410
558
|
}
|