@checkstack/maintenance-backend 1.1.6 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,519 @@
1
+ /**
2
+ * Maintenance actions registered with the Automation Platform.
3
+ *
4
+ * The `maintenance.created` / `maintenance.updated` entry points used to be
5
+ * hook-backed triggers re-exposing `maintenanceHooks`. Phase 4 (reactive
6
+ * automation engine §10.2) migrated the maintenance domain onto the entity
7
+ * state machine, and the domain is now a Model-B PLUGIN-BACKED entity: the
8
+ * `maintenances` table IS the current-state storage. The triggers + their
9
+ * hooks are removed, and the same qualified trigger event ids are derived
10
+ * from `maintenance` entity changes (see `./entity.ts`). Mutation actions
11
+ * therefore drive the REAL write through `handle.mutate` (the write runs
12
+ * inside `apply`) instead of emitting a hook; the deriver re-fires the
13
+ * equivalent trigger events for downstream automations.
14
+ *
15
+ * Actions wrap `MaintenanceService` for `create`, `update`, and
16
+ * `add_update`, plus two "system-shaped" actions:
17
+ *
18
+ * - `set_system`: schedule a maintenance window that starts now and
19
+ * covers a single system, for a given duration. The convenient
20
+ * "park this system for an hour" operation.
21
+ * - `clear_system`: close every active/scheduled maintenance that
22
+ * covers a given system. The convenient "let it back into rotation
23
+ * even if maintenance was over-scheduled" operation.
24
+ */
25
+ import { z } from "zod";
26
+ import { Versioned } from "@checkstack/backend-api";
27
+ import type {
28
+ ActionDefinition,
29
+ EntityHandle,
30
+ TriggerDefinition,
31
+ } from "@checkstack/automation-backend";
32
+ import { makeEntityDrivenTriggerSetup } from "@checkstack/automation-backend";
33
+ import { SYSTEM_ACTOR } from "@checkstack/common";
34
+ import {
35
+ MaintenanceStatusEnum,
36
+ type MaintenanceStatus,
37
+ } from "@checkstack/maintenance-common";
38
+
39
+ import type {
40
+ ActionRunScope,
41
+ EntityMutationOpts,
42
+ } from "@checkstack/automation-backend";
43
+ import type { MaintenanceService } from "./service";
44
+ import {
45
+ toMaintenanceEntityState,
46
+ writeMaintenanceEntity,
47
+ type MaintenanceEntityState,
48
+ } from "./entity";
49
+
50
+ // ─── Triggers ──────────────────────────────────────────────────────────
51
+ //
52
+ // These two triggers are ENTITY-DRIVEN (reactive automation engine §10.2): the
53
+ // `maintenance` entity's change deriver fires `maintenance.created` /
54
+ // `maintenance.updated` via Stage-1 routing, so they no longer subscribe to a
55
+ // hook. A no-op `setup` (`makeEntityDrivenTriggerSetup`) keeps them in the
56
+ // editor's trigger catalog (and payload-introspectable) without re-introducing
57
+ // a hook — mirroring how the incident / catalog / dependency / healthcheck
58
+ // domains kept their registrations after migrating. The runtime
59
+ // `trigger.payload` matches these schemas via the `maintenanceChangeToPayload`
60
+ // mapper registered alongside the deriver.
61
+ //
62
+ // The reactive `maintenance` entity state is `{ status, systemIds, startAt,
63
+ // endAt }`. The descriptive fields the old hook carried (`title`,
64
+ // `description`) are NOT derivable from an entity change, so they are OMITTED
65
+ // from the entity-driven payload; the schemas declare only what the mapper
66
+ // produces.
67
+ const maintenanceCreatedPayloadSchema = z.object({
68
+ maintenanceId: z.string(),
69
+ status: MaintenanceStatusEnum,
70
+ systemIds: z.array(z.string()),
71
+ startAt: z.string(),
72
+ endAt: z.string(),
73
+ });
74
+
75
+ const maintenanceUpdatedPayloadSchema = z.object({
76
+ maintenanceId: z.string(),
77
+ status: MaintenanceStatusEnum,
78
+ systemIds: z.array(z.string()),
79
+ startAt: z.string(),
80
+ endAt: z.string(),
81
+ });
82
+
83
+ export const maintenanceCreatedTrigger: TriggerDefinition<
84
+ z.infer<typeof maintenanceCreatedPayloadSchema>
85
+ > = {
86
+ id: "created",
87
+ displayName: "Maintenance Created",
88
+ description: "Fires when a new maintenance window is scheduled",
89
+ category: "Maintenance",
90
+ icon: "Wrench",
91
+ payloadSchema: maintenanceCreatedPayloadSchema,
92
+ setup: makeEntityDrivenTriggerSetup<
93
+ z.infer<typeof maintenanceCreatedPayloadSchema>
94
+ >(),
95
+ contextKey: (p) => p.maintenanceId,
96
+ };
97
+
98
+ export const maintenanceUpdatedTrigger: TriggerDefinition<
99
+ z.infer<typeof maintenanceUpdatedPayloadSchema>
100
+ > = {
101
+ id: "updated",
102
+ displayName: "Maintenance Updated",
103
+ description:
104
+ "Fires when a maintenance window's status, schedule, or affected systems change",
105
+ category: "Maintenance",
106
+ icon: "Wrench",
107
+ payloadSchema: maintenanceUpdatedPayloadSchema,
108
+ setup: makeEntityDrivenTriggerSetup<
109
+ z.infer<typeof maintenanceUpdatedPayloadSchema>
110
+ >(),
111
+ contextKey: (p) => p.maintenanceId,
112
+ };
113
+
114
+ /**
115
+ * All maintenance triggers as a heterogeneous list. Typed as
116
+ * `TriggerDefinition<unknown>[]` so the array can be iterated in the plugin
117
+ * entry without TypeScript collapsing the union to a single payload shape.
118
+ */
119
+ export const maintenanceTriggers: TriggerDefinition<unknown>[] = [
120
+ maintenanceCreatedTrigger as TriggerDefinition<unknown>,
121
+ maintenanceUpdatedTrigger as TriggerDefinition<unknown>,
122
+ ];
123
+
124
+ /**
125
+ * Mutation opts for an action-originated entity write: the run id (so
126
+ * run-secret masking applies to the persisted state — reactive automation
127
+ * engine §3.5) + the run's actor (so the derived change event carries the
128
+ * same actor the firing trigger had).
129
+ */
130
+ function mutationOpts(args: {
131
+ runId: string;
132
+ scope?: ActionRunScope;
133
+ }): EntityMutationOpts {
134
+ return {
135
+ runId: args.runId,
136
+ actor: args.scope?.trigger.actor ?? SYSTEM_ACTOR,
137
+ };
138
+ }
139
+
140
+ // ─── Action configs ────────────────────────────────────────────────────
141
+
142
+ const createConfigSchema = z.object({
143
+ title: z.string().min(1),
144
+ description: z.string().optional(),
145
+ systemIds: z.array(z.string()).min(1),
146
+ startAt: z
147
+ .string()
148
+ .describe("ISO timestamp when the window starts"),
149
+ endAt: z.string().describe("ISO timestamp when the window ends"),
150
+ suppressNotifications: z.boolean().optional().default(false),
151
+ });
152
+
153
+ const updateConfigSchema = z.object({
154
+ maintenanceId: z.string().min(1),
155
+ title: z.string().optional(),
156
+ description: z.string().optional(),
157
+ systemIds: z.array(z.string()).optional(),
158
+ startAt: z.string().optional(),
159
+ endAt: z.string().optional(),
160
+ suppressNotifications: z.boolean().optional(),
161
+ });
162
+
163
+ const addUpdateConfigSchema = z.object({
164
+ maintenanceId: z.string().min(1),
165
+ message: z.string().min(1),
166
+ statusChange: MaintenanceStatusEnum.optional(),
167
+ });
168
+
169
+ const setSystemConfigSchema = z.object({
170
+ systemId: z.string().min(1),
171
+ title: z.string().optional(),
172
+ description: z.string().optional(),
173
+ durationMinutes: z
174
+ .number()
175
+ .int()
176
+ .min(1)
177
+ .max(30 * 24 * 60)
178
+ .describe("Window length in minutes (1 min – 30 days)"),
179
+ suppressNotifications: z.boolean().optional().default(false),
180
+ });
181
+
182
+ const clearSystemConfigSchema = z.object({
183
+ systemId: z.string().min(1),
184
+ message: z
185
+ .string()
186
+ .optional()
187
+ .describe(
188
+ "Note appended to the maintenance update log; defaults to a generic 'cleared by automation' message",
189
+ ),
190
+ });
191
+
192
+ // ─── Artifact ──────────────────────────────────────────────────────────
193
+
194
+ const maintenanceArtifactSchema = z.object({
195
+ maintenanceId: z.string(),
196
+ status: MaintenanceStatusEnum,
197
+ systemIds: z.array(z.string()),
198
+ startAt: z.string(),
199
+ endAt: z.string(),
200
+ });
201
+
202
+ export type MaintenanceArtifact = z.infer<typeof maintenanceArtifactSchema>;
203
+
204
+ export const maintenanceArtifactType = {
205
+ id: "window",
206
+ displayName: "Maintenance Window",
207
+ description: "Maintenance row touched (created/updated/closed) by an automation",
208
+ schema: maintenanceArtifactSchema,
209
+ } as const;
210
+
211
+ // ─── Action factory ────────────────────────────────────────────────────
212
+
213
+ export interface MaintenanceActionDeps {
214
+ service: MaintenanceService;
215
+ /**
216
+ * Reactive `maintenance` entity handle (PLUGIN-BACKED, §10.2). Driving a
217
+ * window's write through `handle.mutate` (the REAL write runs inside
218
+ * `apply`, instead of emitting the old `maintenance.created`/`.updated`
219
+ * hooks) is what re-fires the equivalent trigger events for downstream
220
+ * automations via the change-deriver.
221
+ */
222
+ entityHandle: EntityHandle<MaintenanceEntityState>;
223
+ /**
224
+ * Override for `Date.now()`. Only used by `set_system` to compute
225
+ * `endAt = now + durationMinutes`. Tests inject a fixed clock; the
226
+ * default uses the real wall clock.
227
+ */
228
+ now?: () => Date;
229
+ }
230
+
231
+ function toArtifact(maint: {
232
+ id: string;
233
+ status: MaintenanceStatus;
234
+ systemIds: string[];
235
+ startAt: Date | string;
236
+ endAt: Date | string;
237
+ }): MaintenanceArtifact {
238
+ return {
239
+ maintenanceId: maint.id,
240
+ status: maint.status,
241
+ systemIds: maint.systemIds,
242
+ startAt:
243
+ maint.startAt instanceof Date ? maint.startAt.toISOString() : maint.startAt,
244
+ endAt: maint.endAt instanceof Date ? maint.endAt.toISOString() : maint.endAt,
245
+ };
246
+ }
247
+
248
+ export function createMaintenanceActions(
249
+ deps: MaintenanceActionDeps,
250
+ ): ActionDefinition<unknown, unknown>[] {
251
+ const now = deps.now ?? (() => new Date());
252
+
253
+ const createAction: ActionDefinition<
254
+ z.infer<typeof createConfigSchema>,
255
+ MaintenanceArtifact
256
+ > = {
257
+ id: "create",
258
+ displayName: "Schedule Maintenance",
259
+ description: "Schedule a new maintenance window",
260
+ category: "Maintenance",
261
+ icon: "Wrench",
262
+ config: new Versioned({ version: 1, schema: createConfigSchema }),
263
+ produces: "maintenance.window",
264
+ execute: async ({ config, logger, runId, scope }) => {
265
+ // Drive the create through the reactive `maintenance` entity (§10.2):
266
+ // the REAL write runs inside `apply` and the deriver fires
267
+ // `maintenance.created`. The id is generated up front so the create's
268
+ // `prev` snapshot reads the not-yet-existing row as absent.
269
+ const maintenanceId = crypto.randomUUID();
270
+ let created!: Awaited<ReturnType<typeof deps.service.createMaintenance>>;
271
+ await writeMaintenanceEntity({
272
+ handle: deps.entityHandle,
273
+ maintenanceId,
274
+ opts: mutationOpts({ runId, scope }),
275
+ apply: async () => {
276
+ created = await deps.service.createMaintenance(
277
+ {
278
+ title: config.title,
279
+ description: config.description,
280
+ systemIds: config.systemIds,
281
+ startAt: new Date(config.startAt),
282
+ endAt: new Date(config.endAt),
283
+ suppressNotifications: config.suppressNotifications,
284
+ },
285
+ maintenanceId,
286
+ );
287
+ return toMaintenanceEntityState(created);
288
+ },
289
+ });
290
+ const artifact = toArtifact(created);
291
+ logger.info(`Automation scheduled maintenance ${created.id}`);
292
+ return {
293
+ success: true,
294
+ externalId: created.id,
295
+ artifact,
296
+ };
297
+ },
298
+ };
299
+
300
+ const updateAction: ActionDefinition<
301
+ z.infer<typeof updateConfigSchema>,
302
+ MaintenanceArtifact
303
+ > = {
304
+ id: "update",
305
+ displayName: "Update Maintenance",
306
+ description: "Update an existing maintenance window's metadata or schedule",
307
+ category: "Maintenance",
308
+ icon: "Wrench",
309
+ config: new Versioned({ version: 1, schema: updateConfigSchema }),
310
+ produces: "maintenance.window",
311
+ execute: async ({ config, logger, runId, scope }) => {
312
+ // Probe existence first so a missing window returns a clean failure
313
+ // without driving an entity write (no `prev` to snapshot).
314
+ const exists = await deps.service.getMaintenance(config.maintenanceId);
315
+ if (!exists) {
316
+ return {
317
+ success: false,
318
+ error: `Maintenance not found: ${config.maintenanceId}`,
319
+ };
320
+ }
321
+ // Drive the update through the reactive `maintenance` entity (§10.2);
322
+ // the REAL write runs inside `apply` and the deriver fires
323
+ // `maintenance.updated`.
324
+ let updated!: NonNullable<
325
+ Awaited<ReturnType<typeof deps.service.updateMaintenance>>
326
+ >;
327
+ await writeMaintenanceEntity({
328
+ handle: deps.entityHandle,
329
+ maintenanceId: config.maintenanceId,
330
+ opts: mutationOpts({ runId, scope }),
331
+ apply: async () => {
332
+ const result = await deps.service.updateMaintenance({
333
+ id: config.maintenanceId,
334
+ title: config.title,
335
+ description: config.description,
336
+ systemIds: config.systemIds,
337
+ startAt: config.startAt ? new Date(config.startAt) : undefined,
338
+ endAt: config.endAt ? new Date(config.endAt) : undefined,
339
+ suppressNotifications: config.suppressNotifications,
340
+ });
341
+ if (!result) {
342
+ throw new Error(`Maintenance not found: ${config.maintenanceId}`);
343
+ }
344
+ updated = result;
345
+ return toMaintenanceEntityState(updated);
346
+ },
347
+ });
348
+ const artifact = toArtifact(updated);
349
+ logger.info(`Automation updated maintenance ${updated.id}`);
350
+ return { success: true, externalId: updated.id, artifact };
351
+ },
352
+ };
353
+
354
+ const addUpdateAction: ActionDefinition<
355
+ z.infer<typeof addUpdateConfigSchema>,
356
+ MaintenanceArtifact
357
+ > = {
358
+ id: "add_update",
359
+ displayName: "Add Maintenance Update",
360
+ description: "Append a status-update note to a maintenance window",
361
+ category: "Maintenance",
362
+ icon: "MessageSquarePlus",
363
+ config: new Versioned({ version: 1, schema: addUpdateConfigSchema }),
364
+ produces: "maintenance.window",
365
+ execute: async ({ config, logger, runId, scope }) => {
366
+ // Drive the update through the reactive `maintenance` entity (§10.2):
367
+ // `apply` posts the update row + (optionally) flips status, then
368
+ // re-reads the post-write state. The deriver fires `maintenance.updated`
369
+ // purely from the entity diff (no diff → no event).
370
+ let refreshed: Awaited<ReturnType<typeof deps.service.getMaintenance>>;
371
+ let missing = false;
372
+ await writeMaintenanceEntity({
373
+ handle: deps.entityHandle,
374
+ maintenanceId: config.maintenanceId,
375
+ opts: mutationOpts({ runId, scope }),
376
+ apply: async () => {
377
+ await deps.service.addUpdate({
378
+ maintenanceId: config.maintenanceId,
379
+ message: config.message,
380
+ statusChange: config.statusChange,
381
+ });
382
+ // Re-fetch so we surface the latest window state to the next step +
383
+ // so the entity state matches the (now-updated) row.
384
+ refreshed = await deps.service.getMaintenance(config.maintenanceId);
385
+ if (!refreshed) {
386
+ missing = true;
387
+ throw new Error(
388
+ `Maintenance ${config.maintenanceId} not found after update`,
389
+ );
390
+ }
391
+ return toMaintenanceEntityState(refreshed);
392
+ },
393
+ }).catch((error) => {
394
+ // The "vanished mid-write" case is a soft failure for the action, not
395
+ // a thrown run error; rethrow anything else.
396
+ if (!missing) throw error;
397
+ });
398
+ if (!refreshed) {
399
+ return {
400
+ success: false,
401
+ error: `Maintenance ${config.maintenanceId} not found after update`,
402
+ };
403
+ }
404
+ const artifact = toArtifact(refreshed);
405
+ logger.info(`Automation added update to maintenance ${refreshed.id}`);
406
+ return { success: true, externalId: refreshed.id, artifact };
407
+ },
408
+ };
409
+
410
+ const setSystemAction: ActionDefinition<
411
+ z.infer<typeof setSystemConfigSchema>,
412
+ MaintenanceArtifact
413
+ > = {
414
+ id: "set_system",
415
+ displayName: "Set System Maintenance",
416
+ description:
417
+ "Schedule a maintenance window covering a single system, starting now and lasting `durationMinutes` minutes.",
418
+ category: "Maintenance",
419
+ icon: "Wrench",
420
+ config: new Versioned({ version: 1, schema: setSystemConfigSchema }),
421
+ produces: "maintenance.window",
422
+ execute: async ({ config, logger, runId, scope }) => {
423
+ const startAt = now();
424
+ const endAt = new Date(
425
+ startAt.getTime() + config.durationMinutes * 60_000,
426
+ );
427
+ // Drive the create through the reactive `maintenance` entity (§10.2):
428
+ // the REAL write runs inside `apply` and the deriver fires
429
+ // `maintenance.created`. The id is generated up front so the create's
430
+ // `prev` snapshot reads the not-yet-existing row as absent.
431
+ const maintenanceId = crypto.randomUUID();
432
+ let created!: Awaited<ReturnType<typeof deps.service.createMaintenance>>;
433
+ await writeMaintenanceEntity({
434
+ handle: deps.entityHandle,
435
+ maintenanceId,
436
+ opts: mutationOpts({ runId, scope }),
437
+ apply: async () => {
438
+ created = await deps.service.createMaintenance(
439
+ {
440
+ title:
441
+ config.title ?? `Automation maintenance (${config.systemId})`,
442
+ description: config.description,
443
+ systemIds: [config.systemId],
444
+ startAt,
445
+ endAt,
446
+ suppressNotifications: config.suppressNotifications,
447
+ },
448
+ maintenanceId,
449
+ );
450
+ return toMaintenanceEntityState(created);
451
+ },
452
+ });
453
+ const artifact = toArtifact(created);
454
+ logger.info(
455
+ `Automation parked system ${config.systemId} via maintenance ${created.id}`,
456
+ );
457
+ return { success: true, externalId: created.id, artifact };
458
+ },
459
+ };
460
+
461
+ interface ClearSystemArtifact {
462
+ systemId: string;
463
+ closedMaintenanceIds: string[];
464
+ }
465
+
466
+ const clearSystemAction: ActionDefinition<
467
+ z.infer<typeof clearSystemConfigSchema>,
468
+ ClearSystemArtifact
469
+ > = {
470
+ id: "clear_system",
471
+ displayName: "Clear System Maintenance",
472
+ description:
473
+ "Close every active or scheduled maintenance window that covers this system.",
474
+ category: "Maintenance",
475
+ icon: "Wrench",
476
+ config: new Versioned({ version: 1, schema: clearSystemConfigSchema }),
477
+ produces: "maintenance.window",
478
+ execute: async ({ config, logger, runId, scope }) => {
479
+ const active = await deps.service.getMaintenancesForSystem(config.systemId);
480
+ const closedIds: string[] = [];
481
+ const message = config.message ?? "Cleared by automation";
482
+ for (const window of active) {
483
+ // Drive each close through the reactive `maintenance` entity (§10.2):
484
+ // the REAL close runs inside `apply` and the deriver fires
485
+ // `maintenance.updated` from the status → completed transition.
486
+ let closed: Awaited<ReturnType<typeof deps.service.closeMaintenance>>;
487
+ await writeMaintenanceEntity({
488
+ handle: deps.entityHandle,
489
+ maintenanceId: window.id,
490
+ opts: mutationOpts({ runId, scope }),
491
+ apply: async () => {
492
+ closed = await deps.service.closeMaintenance(window.id, message);
493
+ // Fall back to the pre-close window so the diff is a no-op when the
494
+ // row vanished mid-write (the loop just skips it below).
495
+ return toMaintenanceEntityState(closed ?? window);
496
+ },
497
+ });
498
+ if (!closed) continue;
499
+ closedIds.push(closed.id);
500
+ }
501
+ logger.info(
502
+ `Automation cleared maintenance for system ${config.systemId} (${closedIds.length} window(s))`,
503
+ );
504
+ return {
505
+ success: true,
506
+ externalId: config.systemId,
507
+ artifact: { systemId: config.systemId, closedMaintenanceIds: closedIds },
508
+ };
509
+ },
510
+ };
511
+
512
+ return [
513
+ createAction as ActionDefinition<unknown, unknown>,
514
+ updateAction as ActionDefinition<unknown, unknown>,
515
+ addUpdateAction as ActionDefinition<unknown, unknown>,
516
+ setSystemAction as ActionDefinition<unknown, unknown>,
517
+ clearSystemAction as ActionDefinition<unknown, unknown>,
518
+ ];
519
+ }