@checkstack/incident-backend 1.1.5 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +124 -0
- package/package.json +18 -16
- package/src/automations.test.ts +172 -0
- package/src/automations.ts +313 -0
- package/src/hooks.ts +15 -6
- package/src/index.ts +26 -72
- package/src/router.ts +90 -0
- package/tsconfig.json +6 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,129 @@
|
|
|
1
1
|
# @checkstack/incident-backend
|
|
2
2
|
|
|
3
|
+
## 1.3.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 41c77f4: feat(automation): type enum-able trigger/artifact fields as enums for editor value autocompletion
|
|
8
|
+
|
|
9
|
+
The automation editor's staged completion offers concrete values after a
|
|
10
|
+
comparator (`{{ trigger.payload.severity == "high" }}`) only when the
|
|
11
|
+
field's JSON Schema carries an `enum`. Several trigger payload + artifact
|
|
12
|
+
schemas declared closed-set fields as loose `z.string()`, so no values
|
|
13
|
+
were suggested. Tightened them to the canonical enums that already
|
|
14
|
+
existed in each plugin's `-common` package (and matched the hook payload
|
|
15
|
+
types in lockstep so the trigger's `payloadSchema` and `hook` keep the
|
|
16
|
+
same `TPayload`):
|
|
17
|
+
|
|
18
|
+
- **incident** — trigger payloads: `severity` → `IncidentSeverityEnum`,
|
|
19
|
+
`status` / `statusChange` → `IncidentStatusEnum`.
|
|
20
|
+
- **healthcheck** — trigger payloads: `previousStatus` / `newStatus` /
|
|
21
|
+
`status` → `HealthCheckStatusSchema` (across systemDegraded,
|
|
22
|
+
systemHealthy, systemHealthChanged, checkFailed; plus checkCompleted's
|
|
23
|
+
hook type).
|
|
24
|
+
- **dependency** — trigger + artifact: `impactType` → `ImpactTypeSchema`;
|
|
25
|
+
impactPropagated `previousState` / `newState` → `DerivedStateSchema`.
|
|
26
|
+
Also deduped the inline `impactTypeSchema` action-config enum to reuse
|
|
27
|
+
the canonical `ImpactTypeSchema`.
|
|
28
|
+
- **maintenance** — trigger + artifact: `status` →
|
|
29
|
+
`MaintenanceStatusEnum`; deduped the inline `maintenanceStatusEnum`
|
|
30
|
+
(used by `add_update.statusChange`) to the canonical one.
|
|
31
|
+
- **slo** — `achievement.unlocked` trigger + hook: `achievement` →
|
|
32
|
+
`AchievementTypeSchema`.
|
|
33
|
+
|
|
34
|
+
Runtime behaviour is unchanged — these fields always carried valid enum
|
|
35
|
+
values (the underlying records are enum-constrained); only the schema
|
|
36
|
+
types were loose. The hook payload generics are now precise too, which
|
|
37
|
+
caught one stale test fixture asserting an invalid `impactType: "soft"`.
|
|
38
|
+
|
|
39
|
+
Fields that look enum-ish but are genuinely free-form were intentionally
|
|
40
|
+
left as `z.string()`: satellite `region` (user-entered), Jira issue
|
|
41
|
+
`status` (per-instance workflow name), notification `strategyQualifiedId`
|
|
42
|
+
/ `errorMessage`, healthcheck collector `result`, and script
|
|
43
|
+
`stdout` / `stderr`.
|
|
44
|
+
|
|
45
|
+
- 41c77f4: feat(incident): register incident lifecycle as automation triggers + actions
|
|
46
|
+
|
|
47
|
+
Adds three triggers (`incident.created`, `incident.updated`,
|
|
48
|
+
`incident.resolved`) backed by the existing hooks, each exposing
|
|
49
|
+
`incidentId` as the context key so `wait_for_trigger` waits match the
|
|
50
|
+
same incident across the run. Adds four actions (`incident.create`,
|
|
51
|
+
`incident.resolve`, `incident.add_update`, `incident.update_status`)
|
|
52
|
+
wrapping the existing `IncidentService` methods so operators can compose
|
|
53
|
+
incident flows in the Automation editor.
|
|
54
|
+
|
|
55
|
+
### Patch Changes
|
|
56
|
+
|
|
57
|
+
- Updated dependencies [e2d6f25]
|
|
58
|
+
- Updated dependencies [41c77f4]
|
|
59
|
+
- Updated dependencies [e1a2077]
|
|
60
|
+
- Updated dependencies [41c77f4]
|
|
61
|
+
- Updated dependencies [41c77f4]
|
|
62
|
+
- Updated dependencies [41c77f4]
|
|
63
|
+
- Updated dependencies [41c77f4]
|
|
64
|
+
- Updated dependencies [41c77f4]
|
|
65
|
+
- Updated dependencies [41c77f4]
|
|
66
|
+
- Updated dependencies [41c77f4]
|
|
67
|
+
- Updated dependencies [41c77f4]
|
|
68
|
+
- Updated dependencies [4832e33]
|
|
69
|
+
- Updated dependencies [6d52276]
|
|
70
|
+
- Updated dependencies [6d52276]
|
|
71
|
+
- Updated dependencies [35bc682]
|
|
72
|
+
- @checkstack/automation-backend@0.2.0
|
|
73
|
+
- @checkstack/automation-common@0.2.0
|
|
74
|
+
- @checkstack/integration-backend@0.2.0
|
|
75
|
+
- @checkstack/integration-common@0.6.0
|
|
76
|
+
- @checkstack/catalog-backend@1.2.0
|
|
77
|
+
- @checkstack/common@0.12.0
|
|
78
|
+
- @checkstack/backend-api@0.18.0
|
|
79
|
+
- @checkstack/catalog-common@2.2.3
|
|
80
|
+
- @checkstack/incident-common@1.3.1
|
|
81
|
+
- @checkstack/auth-common@0.7.2
|
|
82
|
+
- @checkstack/command-backend@0.1.31
|
|
83
|
+
- @checkstack/notification-common@1.2.1
|
|
84
|
+
- @checkstack/signal-common@0.2.5
|
|
85
|
+
- @checkstack/cache-api@0.3.6
|
|
86
|
+
- @checkstack/cache-utils@0.2.11
|
|
87
|
+
|
|
88
|
+
## 1.2.0
|
|
89
|
+
|
|
90
|
+
### Minor Changes
|
|
91
|
+
|
|
92
|
+
- ba07ae2: Quiet down notification spam on flapping systems, auto-open incidents when a check goes critical, and let operators land directly on the broken checks.
|
|
93
|
+
|
|
94
|
+
Notification policy lives **per healthcheck assignment** (one row per `system × configuration`). Different checks on the same system are fully independent — disabling a setting on one check does not affect the others. Defaults preserve existing behaviour for `suppressDeEscalations`; **auto-incident defaults to on** for new and existing assignments.
|
|
95
|
+
|
|
96
|
+
- **`suppressDeEscalations`** (off by default). When on, transitions from a worse state to a better-but-still-failing state (e.g. `unhealthy → degraded`) no longer fire a notification. Escalations and full recoveries to `healthy` are unaffected. Resolved per assignment (the just-ran check is the one driving any aggregate transition).
|
|
97
|
+
- **`autoOpenIncidentOnUnhealthy`** (on by default). Either of two independent triggers can open the auto-incident:
|
|
98
|
+
- **`sustainedUnhealthyTrigger`** (default 30 min) — opens when the check stays continuously unhealthy for the configured duration. Catches real outages.
|
|
99
|
+
- **`flappingTrigger`** (default 3 transitions in 60 min) — opens when the check flips to unhealthy that many times in the window. Catches persistent flapping where each unhealthy phase is too brief for the sustained trigger.
|
|
100
|
+
Each trigger can be individually disabled. One incident per system: triggering checks attach to an existing active auto-incident.
|
|
101
|
+
- **`useNotificationSuppression`** (on by default, only meaningful when auto-open is on). Controls whether the auto-opened incident is created with `suppressNotifications: true` — leaving this off opens the incident but still pings operators on each transition.
|
|
102
|
+
- **`skipDuringMaintenance`** (on by default). No auto-incident is opened while the system has an active maintenance window with suppression. The system is intentionally down and shouldn't trip the on-call.
|
|
103
|
+
- **`autoCloseAfterMinutes`** (default 30). Auto-close cooldown is now per-assignment and snapshotted per-incident at open time — later policy edits don't alter in-flight incidents. Setting `null` ("Never auto-close") leaves the incident for manual resolution.
|
|
104
|
+
- **Require-recovery rule.** After any auto-incident closes (manual or auto), no new auto-incident can open until the check has logged at least one healthy run. Prevents a "operator dismissed but it's still broken" loop.
|
|
105
|
+
- **Auto-close worker** ticks every 60s and resolves auto-opened incidents whose systems have been healthy for their per-row `cooldownMinutes`. Rows with `null` cooldown are skipped entirely. Per-incident: failed close attempts are logged but never abort the sweep.
|
|
106
|
+
- **`incidentResolved` hook subscriber** syncs the auto-incident mapping when an operator manually resolves the incident, so the require-recovery rule sees the close immediately.
|
|
107
|
+
- **Platform-wide defaults.** New admin RPCs `getPlatformNotificationDefaults` / `setPlatformNotificationDefaults` (under the existing `healthcheck.configuration.{read,manage}` access rules) let operators set notification policy once for the whole instance. Per-assignment rows with `notificationPolicy: null` inherit the platform defaults at read time. UI: a "Notification defaults" button in the Assignment IDE opens a modal editor. The per-assignment Notifications panel shows an inheritance banner — "Using platform defaults" (read-only) with an "Override" button, or "Custom override" with a "Use platform defaults" button to revert. The all-or-nothing model keeps the mental model simple: each assignment is either fully inherited or fully overridden.
|
|
108
|
+
- **New service-level RPCs** on the incident plugin (`createAutoIncident`, `resolveAutoIncident`) let other plugins open/close incidents without a user context. Reused by the healthcheck auto-incident flow.
|
|
109
|
+
- **Health-state notification CTA** now deep-links to `?filter=failing` on the system detail page for non-recovery transitions (label changes to "View failing checks"). The system overview gains an `All / Failing / Healthy` segmented filter wired to the same `?filter=…` param.
|
|
110
|
+
- **Notification bell badge** now counts collapse groups instead of raw rows, so the number matches what the user sees in the notifications list. Built on `COUNT(DISTINCT COALESCE(collapse_key, id))` — notifications without a collapse key still each count as one.
|
|
111
|
+
- **`statusFilter` on `getHistory` / `getDetailedHistory`** lets the run-history page and the drawer's Recent Runs panel filter to `All / Healthy / Failing` via shared pills, with the page resetting to the first page on filter change.
|
|
112
|
+
- **Pagination defaults aligned with selector options.** Several pages defaulted to a page size (5 or 20) that wasn't in the dropdown's options (`[10, 25, 50, 100]`), so the page-size `<Select>` rendered empty. The drawer's Recent Runs now defaults to 10; the Run History, History List, and Delivery Logs pages now default to 25.
|
|
113
|
+
|
|
114
|
+
Includes Drizzle migrations adding the `notification_policy` jsonb column to `system_health_checks`, plus two new tables: `health_check_unhealthy_transitions` (for threshold counting) and `health_check_auto_incidents` (for mapping back to incident ids during auto-close).
|
|
115
|
+
|
|
116
|
+
### Patch Changes
|
|
117
|
+
|
|
118
|
+
- Updated dependencies [ba07ae2]
|
|
119
|
+
- @checkstack/incident-common@1.3.0
|
|
120
|
+
- @checkstack/backend-api@0.17.1
|
|
121
|
+
- @checkstack/cache-api@0.3.5
|
|
122
|
+
- @checkstack/catalog-backend@1.1.6
|
|
123
|
+
- @checkstack/command-backend@0.1.30
|
|
124
|
+
- @checkstack/integration-backend@0.1.30
|
|
125
|
+
- @checkstack/cache-utils@0.2.10
|
|
126
|
+
|
|
3
127
|
## 1.1.5
|
|
4
128
|
|
|
5
129
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@checkstack/incident-backend",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"license": "Elastic-2.0",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.ts",
|
|
@@ -14,27 +14,29 @@
|
|
|
14
14
|
"lint:code": "eslint . --max-warnings 0"
|
|
15
15
|
},
|
|
16
16
|
"dependencies": {
|
|
17
|
-
"@checkstack/backend-api": "0.
|
|
18
|
-
"@checkstack/cache-api": "0.3.
|
|
19
|
-
"@checkstack/cache-utils": "0.2.
|
|
20
|
-
"@checkstack/incident-common": "1.
|
|
21
|
-
"@checkstack/catalog-common": "2.2.
|
|
22
|
-
"@checkstack/catalog-backend": "1.1.
|
|
23
|
-
"@checkstack/notification-common": "1.
|
|
24
|
-
"@checkstack/auth-common": "0.7.
|
|
25
|
-
"@checkstack/command-backend": "0.1.
|
|
26
|
-
"@checkstack/signal-common": "0.2.
|
|
27
|
-
"@checkstack/integration-backend": "0.1.
|
|
28
|
-
"@checkstack/integration-common": "0.
|
|
29
|
-
"@checkstack/
|
|
17
|
+
"@checkstack/backend-api": "0.17.1",
|
|
18
|
+
"@checkstack/cache-api": "0.3.5",
|
|
19
|
+
"@checkstack/cache-utils": "0.2.10",
|
|
20
|
+
"@checkstack/incident-common": "1.3.0",
|
|
21
|
+
"@checkstack/catalog-common": "2.2.2",
|
|
22
|
+
"@checkstack/catalog-backend": "1.1.6",
|
|
23
|
+
"@checkstack/notification-common": "1.2.0",
|
|
24
|
+
"@checkstack/auth-common": "0.7.1",
|
|
25
|
+
"@checkstack/command-backend": "0.1.30",
|
|
26
|
+
"@checkstack/signal-common": "0.2.4",
|
|
27
|
+
"@checkstack/integration-backend": "0.1.30",
|
|
28
|
+
"@checkstack/integration-common": "0.5.0",
|
|
29
|
+
"@checkstack/automation-backend": "0.1.0",
|
|
30
|
+
"@checkstack/automation-common": "0.1.0",
|
|
31
|
+
"@checkstack/common": "0.11.0",
|
|
30
32
|
"drizzle-orm": "^0.45.0",
|
|
31
33
|
"zod": "^4.2.1",
|
|
32
34
|
"@orpc/server": "^1.13.2"
|
|
33
35
|
},
|
|
34
36
|
"devDependencies": {
|
|
35
37
|
"@checkstack/drizzle-helper": "0.0.5",
|
|
36
|
-
"@checkstack/scripts": "0.3.
|
|
37
|
-
"@checkstack/test-utils-backend": "0.1.
|
|
38
|
+
"@checkstack/scripts": "0.3.3",
|
|
39
|
+
"@checkstack/test-utils-backend": "0.1.30",
|
|
38
40
|
"@checkstack/tsconfig": "0.0.7",
|
|
39
41
|
"@types/bun": "^1.0.0",
|
|
40
42
|
"drizzle-kit": "^0.31.10",
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Behaviour tests for the incident automation actions. Triggers don't
|
|
3
|
+
* need their own tests — they're plain shape declarations against the
|
|
4
|
+
* existing hooks (`incidentHooks`) and the registry tests in
|
|
5
|
+
* `core/automation-backend` cover registration validity.
|
|
6
|
+
*/
|
|
7
|
+
import { describe, it, expect, mock } from "bun:test";
|
|
8
|
+
import { createMockLogger } from "@checkstack/test-utils-backend";
|
|
9
|
+
|
|
10
|
+
import { createIncidentActions } from "./automations";
|
|
11
|
+
import type { IncidentService } from "./service";
|
|
12
|
+
|
|
13
|
+
const makeServiceStub = (overrides: Partial<IncidentService> = {}) =>
|
|
14
|
+
({
|
|
15
|
+
createIncident: mock(),
|
|
16
|
+
resolveIncident: mock(),
|
|
17
|
+
addUpdate: mock(),
|
|
18
|
+
...overrides,
|
|
19
|
+
}) as unknown as IncidentService;
|
|
20
|
+
|
|
21
|
+
const logger = createMockLogger();
|
|
22
|
+
|
|
23
|
+
const actionContext = {
|
|
24
|
+
consumedArtifacts: {},
|
|
25
|
+
runId: "run-1",
|
|
26
|
+
automationId: "auto-1",
|
|
27
|
+
contextKey: "INC-1",
|
|
28
|
+
logger,
|
|
29
|
+
getService: async <T,>(): Promise<T> => {
|
|
30
|
+
throw new Error("not used");
|
|
31
|
+
},
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
describe("incident automation actions", () => {
|
|
35
|
+
describe("incident.create", () => {
|
|
36
|
+
it("calls service.createIncident with the config payload", async () => {
|
|
37
|
+
const created = {
|
|
38
|
+
id: "INC-1",
|
|
39
|
+
status: "investigating",
|
|
40
|
+
severity: "critical",
|
|
41
|
+
systemIds: ["sys-1"],
|
|
42
|
+
};
|
|
43
|
+
const service = makeServiceStub({
|
|
44
|
+
createIncident: mock(
|
|
45
|
+
async () => created,
|
|
46
|
+
) as unknown as IncidentService["createIncident"],
|
|
47
|
+
});
|
|
48
|
+
const [createAction] = createIncidentActions({ service });
|
|
49
|
+
const result = await createAction.execute({
|
|
50
|
+
...actionContext,
|
|
51
|
+
config: {
|
|
52
|
+
title: "DB down",
|
|
53
|
+
severity: "critical",
|
|
54
|
+
systemIds: ["sys-1"],
|
|
55
|
+
suppressNotifications: false,
|
|
56
|
+
} as never,
|
|
57
|
+
});
|
|
58
|
+
expect(result.success).toBe(true);
|
|
59
|
+
expect((result.artifact as { incidentId: string }).incidentId).toBe(
|
|
60
|
+
"INC-1",
|
|
61
|
+
);
|
|
62
|
+
expect(service.createIncident).toHaveBeenCalledWith({
|
|
63
|
+
title: "DB down",
|
|
64
|
+
description: undefined,
|
|
65
|
+
severity: "critical",
|
|
66
|
+
systemIds: ["sys-1"],
|
|
67
|
+
initialMessage: undefined,
|
|
68
|
+
suppressNotifications: false,
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
describe("incident.resolve", () => {
|
|
74
|
+
it("returns failure when the incident doesn't exist", async () => {
|
|
75
|
+
const service = makeServiceStub({
|
|
76
|
+
resolveIncident: mock(
|
|
77
|
+
async () => undefined,
|
|
78
|
+
) as unknown as IncidentService["resolveIncident"],
|
|
79
|
+
});
|
|
80
|
+
const actions = createIncidentActions({ service });
|
|
81
|
+
const resolveAction = actions[1];
|
|
82
|
+
const result = await resolveAction.execute({
|
|
83
|
+
...actionContext,
|
|
84
|
+
config: { incidentId: "missing" } as never,
|
|
85
|
+
});
|
|
86
|
+
expect(result.success).toBe(false);
|
|
87
|
+
expect(result.error).toMatch(/not found/i);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it("calls service.resolveIncident on the happy path", async () => {
|
|
91
|
+
const resolved = {
|
|
92
|
+
id: "INC-1",
|
|
93
|
+
status: "resolved",
|
|
94
|
+
severity: "critical",
|
|
95
|
+
systemIds: ["sys-1"],
|
|
96
|
+
};
|
|
97
|
+
const service = makeServiceStub({
|
|
98
|
+
resolveIncident: mock(
|
|
99
|
+
async () => resolved,
|
|
100
|
+
) as unknown as IncidentService["resolveIncident"],
|
|
101
|
+
});
|
|
102
|
+
const actions = createIncidentActions({ service });
|
|
103
|
+
const resolveAction = actions[1];
|
|
104
|
+
const result = await resolveAction.execute({
|
|
105
|
+
...actionContext,
|
|
106
|
+
config: { incidentId: "INC-1", message: "Fixed" } as never,
|
|
107
|
+
});
|
|
108
|
+
expect(result.success).toBe(true);
|
|
109
|
+
expect(service.resolveIncident).toHaveBeenCalledWith("INC-1", "Fixed");
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
describe("incident.add_update", () => {
|
|
114
|
+
it("forwards message + statusChange to service.addUpdate", async () => {
|
|
115
|
+
const update = {
|
|
116
|
+
id: "upd-1",
|
|
117
|
+
incidentId: "INC-1",
|
|
118
|
+
message: "msg",
|
|
119
|
+
createdAt: new Date(),
|
|
120
|
+
};
|
|
121
|
+
const service = makeServiceStub({
|
|
122
|
+
addUpdate: mock(
|
|
123
|
+
async () => update,
|
|
124
|
+
) as unknown as IncidentService["addUpdate"],
|
|
125
|
+
});
|
|
126
|
+
const actions = createIncidentActions({ service });
|
|
127
|
+
const addUpdateAction = actions[2];
|
|
128
|
+
const result = await addUpdateAction.execute({
|
|
129
|
+
...actionContext,
|
|
130
|
+
config: {
|
|
131
|
+
incidentId: "INC-1",
|
|
132
|
+
message: "Investigating",
|
|
133
|
+
statusChange: "identified",
|
|
134
|
+
} as never,
|
|
135
|
+
});
|
|
136
|
+
expect(result.success).toBe(true);
|
|
137
|
+
expect(service.addUpdate).toHaveBeenCalledWith({
|
|
138
|
+
incidentId: "INC-1",
|
|
139
|
+
message: "Investigating",
|
|
140
|
+
statusChange: "identified",
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
describe("incident.update_status", () => {
|
|
146
|
+
it("delegates to addUpdate with a generated audit message", async () => {
|
|
147
|
+
const update = {
|
|
148
|
+
id: "upd-2",
|
|
149
|
+
incidentId: "INC-1",
|
|
150
|
+
message: "Status changed to monitoring",
|
|
151
|
+
createdAt: new Date(),
|
|
152
|
+
};
|
|
153
|
+
const service = makeServiceStub({
|
|
154
|
+
addUpdate: mock(
|
|
155
|
+
async () => update,
|
|
156
|
+
) as unknown as IncidentService["addUpdate"],
|
|
157
|
+
});
|
|
158
|
+
const actions = createIncidentActions({ service });
|
|
159
|
+
const updateStatusAction = actions[3];
|
|
160
|
+
const result = await updateStatusAction.execute({
|
|
161
|
+
...actionContext,
|
|
162
|
+
config: { incidentId: "INC-1", status: "monitoring" } as never,
|
|
163
|
+
});
|
|
164
|
+
expect(result.success).toBe(true);
|
|
165
|
+
expect(service.addUpdate).toHaveBeenCalledWith({
|
|
166
|
+
incidentId: "INC-1",
|
|
167
|
+
message: "Status changed to monitoring",
|
|
168
|
+
statusChange: "monitoring",
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
});
|
|
172
|
+
});
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Incident triggers + actions registered with the Automation platform.
|
|
3
|
+
*
|
|
4
|
+
* Triggers re-expose the existing incident hooks as automation entry
|
|
5
|
+
* points; actions wrap the existing `IncidentService` methods so
|
|
6
|
+
* operators can compose them into automation flows (e.g. "when an
|
|
7
|
+
* incident is created, file a Jira ticket and post an update").
|
|
8
|
+
*
|
|
9
|
+
* Each trigger declares a `contextKey` extractor returning the
|
|
10
|
+
* `incidentId` — the dispatch engine uses it to scope artifact lookups
|
|
11
|
+
* and to match `wait_for_trigger` waits against the same incident.
|
|
12
|
+
*/
|
|
13
|
+
import { z } from "zod";
|
|
14
|
+
import { Versioned } from "@checkstack/backend-api";
|
|
15
|
+
import type {
|
|
16
|
+
ActionDefinition,
|
|
17
|
+
TriggerDefinition,
|
|
18
|
+
} from "@checkstack/automation-backend";
|
|
19
|
+
import {
|
|
20
|
+
IncidentSeverityEnum,
|
|
21
|
+
IncidentStatusEnum,
|
|
22
|
+
} from "@checkstack/incident-common";
|
|
23
|
+
|
|
24
|
+
import { incidentHooks } from "./hooks";
|
|
25
|
+
import type { IncidentService } from "./service";
|
|
26
|
+
|
|
27
|
+
// ─── Payload schemas — match the hook payloads exactly ─────────────────
|
|
28
|
+
|
|
29
|
+
const incidentCreatedPayloadSchema = z.object({
|
|
30
|
+
incidentId: z.string(),
|
|
31
|
+
systemIds: z.array(z.string()),
|
|
32
|
+
title: z.string(),
|
|
33
|
+
description: z.string().optional(),
|
|
34
|
+
severity: IncidentSeverityEnum,
|
|
35
|
+
status: IncidentStatusEnum,
|
|
36
|
+
createdAt: z.string(),
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
const incidentUpdatedPayloadSchema = z.object({
|
|
40
|
+
incidentId: z.string(),
|
|
41
|
+
systemIds: z.array(z.string()),
|
|
42
|
+
title: z.string(),
|
|
43
|
+
description: z.string().optional(),
|
|
44
|
+
severity: IncidentSeverityEnum,
|
|
45
|
+
status: IncidentStatusEnum,
|
|
46
|
+
statusChange: IncidentStatusEnum.optional(),
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
const incidentResolvedPayloadSchema = z.object({
|
|
50
|
+
incidentId: z.string(),
|
|
51
|
+
systemIds: z.array(z.string()),
|
|
52
|
+
title: z.string(),
|
|
53
|
+
severity: IncidentSeverityEnum,
|
|
54
|
+
resolvedAt: z.string(),
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
// ─── Triggers ──────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
export const incidentCreatedTrigger: TriggerDefinition<
|
|
60
|
+
z.infer<typeof incidentCreatedPayloadSchema>
|
|
61
|
+
> = {
|
|
62
|
+
id: "created",
|
|
63
|
+
displayName: "Incident Created",
|
|
64
|
+
description: "Fires when a new incident is created",
|
|
65
|
+
category: "Incidents",
|
|
66
|
+
icon: "CircleAlert",
|
|
67
|
+
payloadSchema: incidentCreatedPayloadSchema,
|
|
68
|
+
hook: incidentHooks.incidentCreated,
|
|
69
|
+
contextKey: (p) => p.incidentId,
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
export const incidentUpdatedTrigger: TriggerDefinition<
|
|
73
|
+
z.infer<typeof incidentUpdatedPayloadSchema>
|
|
74
|
+
> = {
|
|
75
|
+
id: "updated",
|
|
76
|
+
displayName: "Incident Updated",
|
|
77
|
+
description: "Fires when an incident is updated (info or status change)",
|
|
78
|
+
category: "Incidents",
|
|
79
|
+
icon: "CircleAlert",
|
|
80
|
+
payloadSchema: incidentUpdatedPayloadSchema,
|
|
81
|
+
hook: incidentHooks.incidentUpdated,
|
|
82
|
+
contextKey: (p) => p.incidentId,
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
export const incidentResolvedTrigger: TriggerDefinition<
|
|
86
|
+
z.infer<typeof incidentResolvedPayloadSchema>
|
|
87
|
+
> = {
|
|
88
|
+
id: "resolved",
|
|
89
|
+
displayName: "Incident Resolved",
|
|
90
|
+
description: "Fires when an incident is marked as resolved",
|
|
91
|
+
category: "Incidents",
|
|
92
|
+
icon: "CircleCheck",
|
|
93
|
+
payloadSchema: incidentResolvedPayloadSchema,
|
|
94
|
+
hook: incidentHooks.incidentResolved,
|
|
95
|
+
contextKey: (p) => p.incidentId,
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* All incident triggers as a heterogeneous list. Typed as
|
|
100
|
+
* `TriggerDefinition<unknown>[]` so the array can be iterated in the
|
|
101
|
+
* plugin entry without TypeScript collapsing the union to a single
|
|
102
|
+
* payload shape.
|
|
103
|
+
*/
|
|
104
|
+
export const incidentTriggers: TriggerDefinition<unknown>[] = [
|
|
105
|
+
incidentCreatedTrigger as TriggerDefinition<unknown>,
|
|
106
|
+
incidentUpdatedTrigger as TriggerDefinition<unknown>,
|
|
107
|
+
incidentResolvedTrigger as TriggerDefinition<unknown>,
|
|
108
|
+
];
|
|
109
|
+
|
|
110
|
+
// ─── Action configs ────────────────────────────────────────────────────
|
|
111
|
+
|
|
112
|
+
const incidentCreateConfigSchema = z.object({
|
|
113
|
+
title: z.string().min(1),
|
|
114
|
+
description: z.string().optional(),
|
|
115
|
+
severity: IncidentSeverityEnum,
|
|
116
|
+
systemIds: z.array(z.string()).min(1),
|
|
117
|
+
initialMessage: z.string().optional(),
|
|
118
|
+
suppressNotifications: z.boolean().optional().default(false),
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
const incidentResolveConfigSchema = z.object({
|
|
122
|
+
incidentId: z.string().min(1),
|
|
123
|
+
message: z.string().optional(),
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
const incidentAddUpdateConfigSchema = z.object({
|
|
127
|
+
incidentId: z.string().min(1),
|
|
128
|
+
message: z.string().min(1),
|
|
129
|
+
statusChange: IncidentStatusEnum.optional(),
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
const incidentUpdateStatusConfigSchema = z.object({
|
|
133
|
+
incidentId: z.string().min(1),
|
|
134
|
+
status: IncidentStatusEnum,
|
|
135
|
+
/**
|
|
136
|
+
* Optional accompanying message. Defaults to a generic transition note
|
|
137
|
+
* so the audit trail is never empty.
|
|
138
|
+
*/
|
|
139
|
+
message: z.string().optional(),
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
// ─── Action artifact shapes ────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
interface IncidentArtifact {
|
|
145
|
+
incidentId: string;
|
|
146
|
+
status: string;
|
|
147
|
+
severity: string;
|
|
148
|
+
systemIds: string[];
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
interface IncidentUpdateArtifact {
|
|
152
|
+
updateId: string;
|
|
153
|
+
incidentId: string;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// ─── Actions ───────────────────────────────────────────────────────────
|
|
157
|
+
|
|
158
|
+
export interface IncidentActionDeps {
|
|
159
|
+
service: IncidentService;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export function createIncidentActions(
|
|
163
|
+
deps: IncidentActionDeps,
|
|
164
|
+
): ActionDefinition<unknown, unknown>[] {
|
|
165
|
+
const { service } = deps;
|
|
166
|
+
|
|
167
|
+
const createAction: ActionDefinition<
|
|
168
|
+
z.infer<typeof incidentCreateConfigSchema>,
|
|
169
|
+
IncidentArtifact
|
|
170
|
+
> = {
|
|
171
|
+
id: "create",
|
|
172
|
+
displayName: "Create Incident",
|
|
173
|
+
description: "Open a new incident affecting one or more systems",
|
|
174
|
+
category: "Incidents",
|
|
175
|
+
icon: "CircleAlert",
|
|
176
|
+
config: new Versioned({
|
|
177
|
+
version: 1,
|
|
178
|
+
schema: incidentCreateConfigSchema,
|
|
179
|
+
}),
|
|
180
|
+
execute: async ({ config, logger }) => {
|
|
181
|
+
const incident = await service.createIncident({
|
|
182
|
+
title: config.title,
|
|
183
|
+
description: config.description,
|
|
184
|
+
severity: config.severity,
|
|
185
|
+
systemIds: config.systemIds,
|
|
186
|
+
initialMessage: config.initialMessage,
|
|
187
|
+
suppressNotifications: config.suppressNotifications,
|
|
188
|
+
});
|
|
189
|
+
logger.info(`Automation created incident ${incident.id}`);
|
|
190
|
+
return {
|
|
191
|
+
success: true,
|
|
192
|
+
externalId: incident.id,
|
|
193
|
+
artifact: {
|
|
194
|
+
incidentId: incident.id,
|
|
195
|
+
status: incident.status,
|
|
196
|
+
severity: incident.severity,
|
|
197
|
+
systemIds: incident.systemIds,
|
|
198
|
+
},
|
|
199
|
+
};
|
|
200
|
+
},
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
const resolveAction: ActionDefinition<
|
|
204
|
+
z.infer<typeof incidentResolveConfigSchema>,
|
|
205
|
+
IncidentArtifact
|
|
206
|
+
> = {
|
|
207
|
+
id: "resolve",
|
|
208
|
+
displayName: "Resolve Incident",
|
|
209
|
+
description: "Mark an existing incident as resolved",
|
|
210
|
+
category: "Incidents",
|
|
211
|
+
icon: "CircleCheck",
|
|
212
|
+
config: new Versioned({
|
|
213
|
+
version: 1,
|
|
214
|
+
schema: incidentResolveConfigSchema,
|
|
215
|
+
}),
|
|
216
|
+
execute: async ({ config, logger }) => {
|
|
217
|
+
const incident = await service.resolveIncident(
|
|
218
|
+
config.incidentId,
|
|
219
|
+
config.message,
|
|
220
|
+
);
|
|
221
|
+
if (!incident) {
|
|
222
|
+
return {
|
|
223
|
+
success: false,
|
|
224
|
+
error: `Incident ${config.incidentId} not found`,
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
logger.info(`Automation resolved incident ${incident.id}`);
|
|
228
|
+
return {
|
|
229
|
+
success: true,
|
|
230
|
+
externalId: incident.id,
|
|
231
|
+
artifact: {
|
|
232
|
+
incidentId: incident.id,
|
|
233
|
+
status: incident.status,
|
|
234
|
+
severity: incident.severity,
|
|
235
|
+
systemIds: incident.systemIds,
|
|
236
|
+
},
|
|
237
|
+
};
|
|
238
|
+
},
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
const addUpdateAction: ActionDefinition<
|
|
242
|
+
z.infer<typeof incidentAddUpdateConfigSchema>,
|
|
243
|
+
IncidentUpdateArtifact
|
|
244
|
+
> = {
|
|
245
|
+
id: "add_update",
|
|
246
|
+
displayName: "Add Incident Update",
|
|
247
|
+
description: "Post a status update to an existing incident",
|
|
248
|
+
category: "Incidents",
|
|
249
|
+
icon: "MessageSquare",
|
|
250
|
+
config: new Versioned({
|
|
251
|
+
version: 1,
|
|
252
|
+
schema: incidentAddUpdateConfigSchema,
|
|
253
|
+
}),
|
|
254
|
+
execute: async ({ config, logger }) => {
|
|
255
|
+
const update = await service.addUpdate({
|
|
256
|
+
incidentId: config.incidentId,
|
|
257
|
+
message: config.message,
|
|
258
|
+
statusChange: config.statusChange,
|
|
259
|
+
});
|
|
260
|
+
logger.info(
|
|
261
|
+
`Automation added update ${update.id} to incident ${config.incidentId}`,
|
|
262
|
+
);
|
|
263
|
+
return {
|
|
264
|
+
success: true,
|
|
265
|
+
externalId: update.id,
|
|
266
|
+
artifact: {
|
|
267
|
+
updateId: update.id,
|
|
268
|
+
incidentId: update.incidentId,
|
|
269
|
+
},
|
|
270
|
+
};
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
|
|
274
|
+
const updateStatusAction: ActionDefinition<
|
|
275
|
+
z.infer<typeof incidentUpdateStatusConfigSchema>,
|
|
276
|
+
IncidentUpdateArtifact
|
|
277
|
+
> = {
|
|
278
|
+
id: "update_status",
|
|
279
|
+
displayName: "Update Incident Status",
|
|
280
|
+
description: "Change an incident's status and post an audit update",
|
|
281
|
+
category: "Incidents",
|
|
282
|
+
icon: "Activity",
|
|
283
|
+
config: new Versioned({
|
|
284
|
+
version: 1,
|
|
285
|
+
schema: incidentUpdateStatusConfigSchema,
|
|
286
|
+
}),
|
|
287
|
+
execute: async ({ config, logger }) => {
|
|
288
|
+
const update = await service.addUpdate({
|
|
289
|
+
incidentId: config.incidentId,
|
|
290
|
+
message: config.message ?? `Status changed to ${config.status}`,
|
|
291
|
+
statusChange: config.status,
|
|
292
|
+
});
|
|
293
|
+
logger.info(
|
|
294
|
+
`Automation set incident ${config.incidentId} status → ${config.status}`,
|
|
295
|
+
);
|
|
296
|
+
return {
|
|
297
|
+
success: true,
|
|
298
|
+
externalId: update.id,
|
|
299
|
+
artifact: {
|
|
300
|
+
updateId: update.id,
|
|
301
|
+
incidentId: update.incidentId,
|
|
302
|
+
},
|
|
303
|
+
};
|
|
304
|
+
},
|
|
305
|
+
};
|
|
306
|
+
|
|
307
|
+
return [
|
|
308
|
+
createAction as ActionDefinition<unknown, unknown>,
|
|
309
|
+
resolveAction as ActionDefinition<unknown, unknown>,
|
|
310
|
+
addUpdateAction as ActionDefinition<unknown, unknown>,
|
|
311
|
+
updateStatusAction as ActionDefinition<unknown, unknown>,
|
|
312
|
+
];
|
|
313
|
+
}
|
package/src/hooks.ts
CHANGED
|
@@ -1,8 +1,17 @@
|
|
|
1
1
|
import { createHook } from "@checkstack/backend-api";
|
|
2
|
+
import type {
|
|
3
|
+
IncidentSeverity,
|
|
4
|
+
IncidentStatus,
|
|
5
|
+
} from "@checkstack/incident-common";
|
|
2
6
|
|
|
3
7
|
/**
|
|
4
8
|
* Incident hooks for cross-plugin communication.
|
|
5
9
|
* Other plugins can subscribe to these hooks to react to incident lifecycle events.
|
|
10
|
+
*
|
|
11
|
+
* `severity` / `status` carry the canonical enum values
|
|
12
|
+
* (`IncidentSeverity` / `IncidentStatus`) rather than loose strings, so
|
|
13
|
+
* automation triggers built on these hooks can offer the known values
|
|
14
|
+
* for `==` comparisons in the editor.
|
|
6
15
|
*/
|
|
7
16
|
export const incidentHooks = {
|
|
8
17
|
/**
|
|
@@ -14,8 +23,8 @@ export const incidentHooks = {
|
|
|
14
23
|
systemIds: string[];
|
|
15
24
|
title: string;
|
|
16
25
|
description?: string;
|
|
17
|
-
severity:
|
|
18
|
-
status:
|
|
26
|
+
severity: IncidentSeverity;
|
|
27
|
+
status: IncidentStatus;
|
|
19
28
|
createdAt: string;
|
|
20
29
|
}>("incident.created"),
|
|
21
30
|
|
|
@@ -28,9 +37,9 @@ export const incidentHooks = {
|
|
|
28
37
|
systemIds: string[];
|
|
29
38
|
title: string;
|
|
30
39
|
description?: string;
|
|
31
|
-
severity:
|
|
32
|
-
status:
|
|
33
|
-
statusChange?:
|
|
40
|
+
severity: IncidentSeverity;
|
|
41
|
+
status: IncidentStatus;
|
|
42
|
+
statusChange?: IncidentStatus;
|
|
34
43
|
}>("incident.updated"),
|
|
35
44
|
|
|
36
45
|
/**
|
|
@@ -41,7 +50,7 @@ export const incidentHooks = {
|
|
|
41
50
|
incidentId: string;
|
|
42
51
|
systemIds: string[];
|
|
43
52
|
title: string;
|
|
44
|
-
severity:
|
|
53
|
+
severity: IncidentSeverity;
|
|
45
54
|
resolvedAt: string;
|
|
46
55
|
}>("incident.resolved"),
|
|
47
56
|
} as const;
|
package/src/index.ts
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import * as schema from "./schema";
|
|
2
2
|
import type { SafeDatabase } from "@checkstack/backend-api";
|
|
3
|
-
import { z } from "zod";
|
|
4
3
|
import {
|
|
5
4
|
incidentAccessRules,
|
|
6
5
|
incidentAccess,
|
|
@@ -11,7 +10,10 @@ import {
|
|
|
11
10
|
incidentGroupSubscription,
|
|
12
11
|
} from "@checkstack/incident-common";
|
|
13
12
|
import { createBackendPlugin, coreServices } from "@checkstack/backend-api";
|
|
14
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
automationActionExtensionPoint,
|
|
15
|
+
automationTriggerExtensionPoint,
|
|
16
|
+
} from "@checkstack/automation-backend";
|
|
15
17
|
import {
|
|
16
18
|
NotificationApi,
|
|
17
19
|
specToRegistration,
|
|
@@ -23,40 +25,8 @@ import { AuthApi } from "@checkstack/auth-common";
|
|
|
23
25
|
import { catalogHooks } from "@checkstack/catalog-backend";
|
|
24
26
|
import { registerSearchProvider } from "@checkstack/command-backend";
|
|
25
27
|
import { resolveRoute } from "@checkstack/common";
|
|
26
|
-
import { incidentHooks } from "./hooks";
|
|
27
28
|
import { createIncidentCache } from "./cache";
|
|
28
|
-
|
|
29
|
-
// =============================================================================
|
|
30
|
-
// Integration Event Payload Schemas
|
|
31
|
-
// =============================================================================
|
|
32
|
-
|
|
33
|
-
const incidentCreatedPayloadSchema = z.object({
|
|
34
|
-
incidentId: z.string(),
|
|
35
|
-
systemIds: z.array(z.string()),
|
|
36
|
-
title: z.string(),
|
|
37
|
-
description: z.string().optional(),
|
|
38
|
-
severity: z.string(),
|
|
39
|
-
status: z.string(),
|
|
40
|
-
createdAt: z.string(),
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
const incidentUpdatedPayloadSchema = z.object({
|
|
44
|
-
incidentId: z.string(),
|
|
45
|
-
systemIds: z.array(z.string()),
|
|
46
|
-
title: z.string(),
|
|
47
|
-
description: z.string().optional(),
|
|
48
|
-
severity: z.string(),
|
|
49
|
-
status: z.string(),
|
|
50
|
-
statusChange: z.string().optional(),
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
const incidentResolvedPayloadSchema = z.object({
|
|
54
|
-
incidentId: z.string(),
|
|
55
|
-
systemIds: z.array(z.string()),
|
|
56
|
-
title: z.string(),
|
|
57
|
-
severity: z.string(),
|
|
58
|
-
resolvedAt: z.string(),
|
|
59
|
-
});
|
|
29
|
+
import { createIncidentActions, incidentTriggers } from "./automations";
|
|
60
30
|
|
|
61
31
|
// =============================================================================
|
|
62
32
|
// Plugin Definition
|
|
@@ -71,44 +41,16 @@ export default createBackendPlugin({
|
|
|
71
41
|
incidentGroupSubscription,
|
|
72
42
|
]);
|
|
73
43
|
|
|
74
|
-
// Register hooks as
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
{
|
|
81
|
-
hook: incidentHooks.incidentCreated,
|
|
82
|
-
displayName: "Incident Created",
|
|
83
|
-
description: "Fired when a new incident is created",
|
|
84
|
-
category: "Incidents",
|
|
85
|
-
payloadSchema: incidentCreatedPayloadSchema,
|
|
86
|
-
},
|
|
87
|
-
pluginMetadata,
|
|
88
|
-
);
|
|
89
|
-
|
|
90
|
-
integrationEvents.registerEvent(
|
|
91
|
-
{
|
|
92
|
-
hook: incidentHooks.incidentUpdated,
|
|
93
|
-
displayName: "Incident Updated",
|
|
94
|
-
description:
|
|
95
|
-
"Fired when an incident is updated (info or status change)",
|
|
96
|
-
category: "Incidents",
|
|
97
|
-
payloadSchema: incidentUpdatedPayloadSchema,
|
|
98
|
-
},
|
|
99
|
-
pluginMetadata,
|
|
100
|
-
);
|
|
101
|
-
|
|
102
|
-
integrationEvents.registerEvent(
|
|
103
|
-
{
|
|
104
|
-
hook: incidentHooks.incidentResolved,
|
|
105
|
-
displayName: "Incident Resolved",
|
|
106
|
-
description: "Fired when an incident is marked as resolved",
|
|
107
|
-
category: "Incidents",
|
|
108
|
-
payloadSchema: incidentResolvedPayloadSchema,
|
|
109
|
-
},
|
|
110
|
-
pluginMetadata,
|
|
44
|
+
// Register hooks as automation triggers — buffered until the
|
|
45
|
+
// automation plugin's `register()` runs and the extension point
|
|
46
|
+
// resolves. Triggers expose `contextKey` so wait_for_trigger can
|
|
47
|
+
// match resume events back to the originating incident.
|
|
48
|
+
const automationTriggers = env.getExtensionPoint(
|
|
49
|
+
automationTriggerExtensionPoint,
|
|
111
50
|
);
|
|
51
|
+
for (const trigger of incidentTriggers) {
|
|
52
|
+
automationTriggers.registerTrigger(trigger, pluginMetadata);
|
|
53
|
+
}
|
|
112
54
|
|
|
113
55
|
let incidentCache:
|
|
114
56
|
| ReturnType<typeof createIncidentCache>
|
|
@@ -153,6 +95,18 @@ export default createBackendPlugin({
|
|
|
153
95
|
);
|
|
154
96
|
rpc.registerRouter(router, incidentContract);
|
|
155
97
|
|
|
98
|
+
// Register incident actions with the Automation platform. We
|
|
99
|
+
// capture the service in closure here (rather than via a
|
|
100
|
+
// service ref + ctx.getService at execute time) because the
|
|
101
|
+
// service has no per-request state — one instance for the life
|
|
102
|
+
// of the plugin is correct.
|
|
103
|
+
const automationActions = env.getExtensionPoint(
|
|
104
|
+
automationActionExtensionPoint,
|
|
105
|
+
);
|
|
106
|
+
for (const action of createIncidentActions({ service })) {
|
|
107
|
+
automationActions.registerAction(action, pluginMetadata);
|
|
108
|
+
}
|
|
109
|
+
|
|
156
110
|
// Register "Create Incident" command in the command palette
|
|
157
111
|
registerSearchProvider({
|
|
158
112
|
pluginMetadata,
|
package/src/router.ts
CHANGED
|
@@ -395,6 +395,96 @@ export function createRouter(
|
|
|
395
395
|
return { suppressed };
|
|
396
396
|
}),
|
|
397
397
|
|
|
398
|
+
createAutoIncident: os.createAutoIncident.handler(
|
|
399
|
+
async ({ input, context }) => {
|
|
400
|
+
// No user context for service-initiated incidents; createdBy
|
|
401
|
+
// stays null and the timeline shows the originating plugin via
|
|
402
|
+
// the hook payload.
|
|
403
|
+
const result = await service.createIncident(input);
|
|
404
|
+
|
|
405
|
+
await cache.invalidateForMutation({
|
|
406
|
+
incidentId: result.id,
|
|
407
|
+
systemIds: result.systemIds,
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
await signalService.broadcast(INCIDENT_UPDATED, {
|
|
411
|
+
incidentId: result.id,
|
|
412
|
+
systemIds: result.systemIds,
|
|
413
|
+
action: "created",
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
await context.emitHook(incidentHooks.incidentCreated, {
|
|
417
|
+
incidentId: result.id,
|
|
418
|
+
systemIds: result.systemIds,
|
|
419
|
+
title: result.title,
|
|
420
|
+
description: result.description,
|
|
421
|
+
severity: result.severity,
|
|
422
|
+
status: result.status,
|
|
423
|
+
createdAt: result.createdAt.toISOString(),
|
|
424
|
+
});
|
|
425
|
+
|
|
426
|
+
const systemNames = await resolveSystemNames(result.systemIds);
|
|
427
|
+
await notifyAffectedSystems({
|
|
428
|
+
catalogClient,
|
|
429
|
+
notificationClient,
|
|
430
|
+
logger,
|
|
431
|
+
incidentId: result.id,
|
|
432
|
+
incidentTitle: result.title,
|
|
433
|
+
systemIds: result.systemIds,
|
|
434
|
+
systemNames,
|
|
435
|
+
action: "created",
|
|
436
|
+
severity: result.severity,
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
return { id: result.id };
|
|
440
|
+
},
|
|
441
|
+
),
|
|
442
|
+
|
|
443
|
+
resolveAutoIncident: os.resolveAutoIncident.handler(
|
|
444
|
+
async ({ input, context }) => {
|
|
445
|
+
const result = await service.resolveIncident(input.id, input.message);
|
|
446
|
+
// Idempotent: a missing or already-resolved incident is treated
|
|
447
|
+
// as success so the auto-close worker can be re-run safely.
|
|
448
|
+
if (!result) {
|
|
449
|
+
return { success: true };
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
await cache.invalidateForMutation({
|
|
453
|
+
incidentId: result.id,
|
|
454
|
+
systemIds: result.systemIds,
|
|
455
|
+
});
|
|
456
|
+
|
|
457
|
+
await signalService.broadcast(INCIDENT_UPDATED, {
|
|
458
|
+
incidentId: result.id,
|
|
459
|
+
systemIds: result.systemIds,
|
|
460
|
+
action: "resolved",
|
|
461
|
+
});
|
|
462
|
+
|
|
463
|
+
await context.emitHook(incidentHooks.incidentResolved, {
|
|
464
|
+
incidentId: result.id,
|
|
465
|
+
systemIds: result.systemIds,
|
|
466
|
+
title: result.title,
|
|
467
|
+
severity: result.severity,
|
|
468
|
+
resolvedAt: new Date().toISOString(),
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
const systemNames = await resolveSystemNames(result.systemIds);
|
|
472
|
+
await notifyAffectedSystems({
|
|
473
|
+
catalogClient,
|
|
474
|
+
notificationClient,
|
|
475
|
+
logger,
|
|
476
|
+
incidentId: result.id,
|
|
477
|
+
incidentTitle: result.title,
|
|
478
|
+
systemIds: result.systemIds,
|
|
479
|
+
systemNames,
|
|
480
|
+
action: "resolved",
|
|
481
|
+
severity: result.severity,
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
return { success: true };
|
|
485
|
+
},
|
|
486
|
+
),
|
|
487
|
+
|
|
398
488
|
addLink: os.addLink.handler(async ({ input }) => {
|
|
399
489
|
// Verify incident exists so the FK violation surfaces as NOT_FOUND.
|
|
400
490
|
const incident = await service.getIncident(input.incidentId);
|