@checkstack/incident-backend 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +157 -0
- package/package.json +18 -18
- package/src/automations.test.ts +356 -5
- package/src/automations.ts +322 -34
- package/src/hooks.ts +8 -53
- package/src/incident-entity.test.ts +266 -0
- package/src/incident-entity.ts +192 -0
- package/src/index.ts +96 -16
- package/src/router.ts +162 -98
- package/src/service.test.ts +199 -0
- package/src/service.ts +147 -3
package/src/service.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { eq, and, inArray, ne } from "drizzle-orm";
|
|
2
|
-
import type
|
|
2
|
+
import { withXactLock, type SafeDatabase } from "@checkstack/backend-api";
|
|
3
3
|
import * as schema from "./schema";
|
|
4
4
|
import {
|
|
5
5
|
incidents,
|
|
@@ -17,6 +17,7 @@ import type {
|
|
|
17
17
|
UpdateIncidentInput,
|
|
18
18
|
AddIncidentUpdateInput,
|
|
19
19
|
IncidentStatus,
|
|
20
|
+
IncidentSeverity,
|
|
20
21
|
} from "@checkstack/incident-common";
|
|
21
22
|
|
|
22
23
|
type Db = SafeDatabase<typeof schema>;
|
|
@@ -125,6 +126,62 @@ export class IncidentService {
|
|
|
125
126
|
};
|
|
126
127
|
}
|
|
127
128
|
|
|
129
|
+
/**
|
|
130
|
+
* Batched reactive-state read for the `incident` entity (Model B
|
|
131
|
+
* plugin-backed `read` accessor). Given incident ids, return the reactive
|
|
132
|
+
* subset `{ status, severity, systemIds }` for each that exists (missing
|
|
133
|
+
* ids omitted). Reads the AUTHORITATIVE `incidents` + `incident_systems`
|
|
134
|
+
* tables — no framework `entity_state` storage. This is the single source
|
|
135
|
+
* of truth `handle.mutate` snapshots `prev` from and `get`/`getMany`/scope
|
|
136
|
+
* enrichment route through.
|
|
137
|
+
*/
|
|
138
|
+
async getManyEntityStates(
|
|
139
|
+
ids: ReadonlyArray<string>,
|
|
140
|
+
): Promise<
|
|
141
|
+
Record<string, { status: IncidentStatus; severity: IncidentSeverity; systemIds: string[] }>
|
|
142
|
+
> {
|
|
143
|
+
if (ids.length === 0) return {};
|
|
144
|
+
|
|
145
|
+
const rows = await this.db
|
|
146
|
+
.select({
|
|
147
|
+
id: incidents.id,
|
|
148
|
+
status: incidents.status,
|
|
149
|
+
severity: incidents.severity,
|
|
150
|
+
})
|
|
151
|
+
.from(incidents)
|
|
152
|
+
.where(inArray(incidents.id, [...ids]));
|
|
153
|
+
if (rows.length === 0) return {};
|
|
154
|
+
|
|
155
|
+
const presentIds = rows.map((r) => r.id);
|
|
156
|
+
const systemRows = await this.db
|
|
157
|
+
.select({
|
|
158
|
+
incidentId: incidentSystems.incidentId,
|
|
159
|
+
systemId: incidentSystems.systemId,
|
|
160
|
+
})
|
|
161
|
+
.from(incidentSystems)
|
|
162
|
+
.where(inArray(incidentSystems.incidentId, presentIds));
|
|
163
|
+
|
|
164
|
+
const systemsByIncident = new Map<string, string[]>();
|
|
165
|
+
for (const r of systemRows) {
|
|
166
|
+
const list = systemsByIncident.get(r.incidentId);
|
|
167
|
+
if (list) list.push(r.systemId);
|
|
168
|
+
else systemsByIncident.set(r.incidentId, [r.systemId]);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const out: Record<
|
|
172
|
+
string,
|
|
173
|
+
{ status: IncidentStatus; severity: IncidentSeverity; systemIds: string[] }
|
|
174
|
+
> = {};
|
|
175
|
+
for (const row of rows) {
|
|
176
|
+
out[row.id] = {
|
|
177
|
+
status: row.status,
|
|
178
|
+
severity: row.severity,
|
|
179
|
+
systemIds: systemsByIncident.get(row.id) ?? [],
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
return out;
|
|
183
|
+
}
|
|
184
|
+
|
|
128
185
|
/**
|
|
129
186
|
* Get active incidents for a system
|
|
130
187
|
*/
|
|
@@ -165,13 +222,18 @@ export class IncidentService {
|
|
|
165
222
|
}
|
|
166
223
|
|
|
167
224
|
/**
|
|
168
|
-
* Create a new incident
|
|
225
|
+
* Create a new incident.
|
|
226
|
+
*
|
|
227
|
+
* `id` may be supplied by the caller so the reactive `incident` entity can
|
|
228
|
+
* be keyed on a known id BEFORE the insert runs (the create's `prev`
|
|
229
|
+
* snapshot must read the not-yet-existing row as absent — see §10.1). When
|
|
230
|
+
* omitted, a fresh id is generated. The id is server-owned either way.
|
|
169
231
|
*/
|
|
170
232
|
async createIncident(
|
|
171
233
|
input: CreateIncidentInput,
|
|
172
234
|
userId?: string,
|
|
235
|
+
id: string = generateId(),
|
|
173
236
|
): Promise<IncidentWithSystems> {
|
|
174
|
-
const id = generateId();
|
|
175
237
|
|
|
176
238
|
await this.db.insert(incidents).values({
|
|
177
239
|
id,
|
|
@@ -407,4 +469,86 @@ export class IncidentService {
|
|
|
407
469
|
|
|
408
470
|
return !!match;
|
|
409
471
|
}
|
|
472
|
+
|
|
473
|
+
/**
|
|
474
|
+
* Find a single OPEN (not-resolved) incident affecting `systemId`, if
|
|
475
|
+
* any. Returns the incident with its systems, mirroring the old
|
|
476
|
+
* auto-incident `findActiveAutoIncident(systemId)` dedup semantic. Used
|
|
477
|
+
* by `incident.create`'s opt-in `dedupe_open_for_system` flag so a
|
|
478
|
+
* second trigger for an already-incidented system reuses the open
|
|
479
|
+
* incident rather than opening a duplicate.
|
|
480
|
+
*/
|
|
481
|
+
async findActiveIncidentForSystem(
|
|
482
|
+
systemId: string,
|
|
483
|
+
): Promise<IncidentWithSystems | undefined> {
|
|
484
|
+
const systemIncidents = await this.db
|
|
485
|
+
.select({ incidentId: incidentSystems.incidentId })
|
|
486
|
+
.from(incidentSystems)
|
|
487
|
+
.where(eq(incidentSystems.systemId, systemId));
|
|
488
|
+
|
|
489
|
+
const ids = systemIncidents.map((r) => r.incidentId);
|
|
490
|
+
if (ids.length === 0) return undefined;
|
|
491
|
+
|
|
492
|
+
const [match] = await this.db
|
|
493
|
+
.select({ id: incidents.id })
|
|
494
|
+
.from(incidents)
|
|
495
|
+
.where(and(inArray(incidents.id, ids), ne(incidents.status, "resolved")))
|
|
496
|
+
.limit(1);
|
|
497
|
+
|
|
498
|
+
if (!match) return undefined;
|
|
499
|
+
return this.getIncident(match.id);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* Dedup-aware create for a single system, used by the `incident.create`
|
|
504
|
+
* automation action when `dedupe_open_for_system` is set. Serializes the
|
|
505
|
+
* check-then-create per system with a transaction-scoped advisory lock so
|
|
506
|
+
* two concurrent triggers for the same system (e.g. sustained + flapping)
|
|
507
|
+
* can't both observe "no open incident" and both create one. The critical
|
|
508
|
+
* section is short (a find + an insert), so a transaction-scoped lock is
|
|
509
|
+
* the right primitive (it auto-releases at COMMIT, no leak possible).
|
|
510
|
+
*
|
|
511
|
+
* Returns `{ incident, reused }` — `reused` is true when an already-open
|
|
512
|
+
* incident for the system was found and returned instead of creating.
|
|
513
|
+
*/
|
|
514
|
+
async createIncidentDedupedForSystem(
|
|
515
|
+
input: CreateIncidentInput,
|
|
516
|
+
dedupeSystemId: string,
|
|
517
|
+
userId?: string,
|
|
518
|
+
newId?: string,
|
|
519
|
+
/**
|
|
520
|
+
* Optional create wrapper (§10.1, 6(a)). When the dedup decides to CREATE,
|
|
521
|
+
* the actual create runs through this wrapper INSIDE the advisory lock, so
|
|
522
|
+
* the reactive `incident` entity write (which snapshots `prev` before the
|
|
523
|
+
* insert) is serialized with the dedup check. The wrapper receives the
|
|
524
|
+
* bound create thunk and MUST call it exactly once, returning its result.
|
|
525
|
+
* Defaults to calling the create directly (non-reactive).
|
|
526
|
+
*/
|
|
527
|
+
onCreate: (
|
|
528
|
+
create: () => Promise<IncidentWithSystems>,
|
|
529
|
+
) => Promise<IncidentWithSystems> = (create) => create(),
|
|
530
|
+
): Promise<{ incident: IncidentWithSystems; reused: boolean }> {
|
|
531
|
+
return withXactLock({
|
|
532
|
+
db: this.db,
|
|
533
|
+
key: `incident.dedupe-open-for-system:${dedupeSystemId}`,
|
|
534
|
+
// The find + create run on `this.db` (the pool), NOT on `tx`. That is
|
|
535
|
+
// safe here because `pg_advisory_xact_lock` BLOCKS every other holder
|
|
536
|
+
// of this key until this transaction commits: a racing caller waits
|
|
537
|
+
// at lock-acquire, so its find can't observe "no open incident" until
|
|
538
|
+
// ours has already committed the insert. The critical section is thus
|
|
539
|
+
// serialized by the lock window even though it doesn't ride `tx`.
|
|
540
|
+
fn: async () => {
|
|
541
|
+
const existing = await this.findActiveIncidentForSystem(dedupeSystemId);
|
|
542
|
+
if (existing) {
|
|
543
|
+
return { incident: existing, reused: true };
|
|
544
|
+
}
|
|
545
|
+
// Create through the caller's wrapper (reactive entity write) so the
|
|
546
|
+
// `incident.created` emit is serialized inside the dedup lock.
|
|
547
|
+
const incident = await onCreate(() =>
|
|
548
|
+
this.createIncident(input, userId, newId),
|
|
549
|
+
);
|
|
550
|
+
return { incident, reused: false };
|
|
551
|
+
},
|
|
552
|
+
});
|
|
553
|
+
}
|
|
410
554
|
}
|