@checkstack/automation-backend 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +453 -0
  2. package/drizzle/0000_acoustic_diamondback.sql +80 -0
  3. package/drizzle/0001_mute_vindicator.sql +12 -0
  4. package/drizzle/0002_silky_omega_red.sql +12 -0
  5. package/drizzle/meta/0000_snapshot.json +688 -0
  6. package/drizzle/meta/0001_snapshot.json +785 -0
  7. package/drizzle/meta/0002_snapshot.json +861 -0
  8. package/drizzle/meta/_journal.json +27 -0
  9. package/drizzle.config.ts +12 -0
  10. package/package.json +41 -0
  11. package/src/action-registry.ts +83 -0
  12. package/src/action-types.ts +324 -0
  13. package/src/artifact-store.ts +140 -0
  14. package/src/artifact-type-registry.ts +64 -0
  15. package/src/automation-store.ts +227 -0
  16. package/src/builtin-actions.test.ts +185 -0
  17. package/src/builtin-actions.ts +132 -0
  18. package/src/builtin-triggers.test.ts +264 -0
  19. package/src/builtin-triggers.ts +365 -0
  20. package/src/dispatch/action-kind.ts +44 -0
  21. package/src/dispatch/condition.ts +61 -0
  22. package/src/dispatch/delay-queue.ts +91 -0
  23. package/src/dispatch/engine.test.ts +1198 -0
  24. package/src/dispatch/engine.ts +1672 -0
  25. package/src/dispatch/path-nav.ts +65 -0
  26. package/src/dispatch/render.test.ts +75 -0
  27. package/src/dispatch/render.ts +136 -0
  28. package/src/dispatch/run-state-store.ts +143 -0
  29. package/src/dispatch/run-state.ts +298 -0
  30. package/src/dispatch/scope.test.ts +40 -0
  31. package/src/dispatch/scope.ts +125 -0
  32. package/src/dispatch/stalled-sweeper.ts +164 -0
  33. package/src/dispatch/test-fixtures.ts +558 -0
  34. package/src/dispatch/trigger-subscriber.ts +397 -0
  35. package/src/dispatch/types.ts +259 -0
  36. package/src/extension-points.ts +88 -0
  37. package/src/index.ts +379 -0
  38. package/src/migration/from-webhook-subscriptions.test.ts +237 -0
  39. package/src/migration/from-webhook-subscriptions.ts +398 -0
  40. package/src/registries.test.ts +357 -0
  41. package/src/router.test.ts +724 -0
  42. package/src/router.ts +556 -0
  43. package/src/schema.ts +310 -0
  44. package/src/trigger-registry.ts +99 -0
  45. package/src/validate-definition.test.ts +306 -0
  46. package/src/validate-definition.ts +304 -0
  47. package/tsconfig.json +41 -0
@@ -0,0 +1,298 @@
1
+ /**
2
+ * Drizzle-backed implementation of `RunStore`. The dispatch engine uses
3
+ * this for every run / step / wait-lock write so durability survives
4
+ * process restarts.
5
+ *
6
+ * Kept thin: each method maps almost 1:1 to a DB statement. Concurrency
7
+ * and consistency concerns live in the calling code (the dispatcher and
8
+ * trigger subscriber).
9
+ */
10
+ import { and, desc, eq, inArray, isNotNull, isNull, lte, sql } from "drizzle-orm";
11
+ import type { SafeDatabase } from "@checkstack/backend-api";
12
+
13
+ import {
14
+ automationRunSteps,
15
+ automationRuns,
16
+ automationWaitLocks,
17
+ } from "../schema";
18
+ import type {
19
+ CreateRunInput,
20
+ CreateStepInput,
21
+ CreateWaitLockInput,
22
+ LoadedRun,
23
+ LoadedStep,
24
+ LoadedWaitLock,
25
+ RunStore,
26
+ } from "./types";
27
+
28
+ type Schema = {
29
+ automationRuns: typeof automationRuns;
30
+ automationRunSteps: typeof automationRunSteps;
31
+ automationWaitLocks: typeof automationWaitLocks;
32
+ };
33
+
34
+ const ACTIVE_STATUSES = ["pending", "running", "waiting"] as const;
35
+
36
+ export function createRunStore(db: SafeDatabase<Schema>): RunStore {
37
+ return {
38
+ async createRun(input: CreateRunInput): Promise<string> {
39
+ const [row] = await db
40
+ .insert(automationRuns)
41
+ .values({
42
+ automationId: input.automationId,
43
+ triggerId: input.triggerId,
44
+ triggerEventId: input.triggerEventId,
45
+ triggerPayload: input.triggerPayload,
46
+ contextKey: input.contextKey,
47
+ status: "running",
48
+ })
49
+ .returning({ id: automationRuns.id });
50
+ if (!row) throw new Error("createRun: insert returned no rows");
51
+ return row.id;
52
+ },
53
+
54
+ async updateRunStatus(runId, status, errorMessage): Promise<void> {
55
+ const isTerminal =
56
+ status === "success" ||
57
+ status === "failed" ||
58
+ status === "cancelled" ||
59
+ status === "skipped";
60
+ await db
61
+ .update(automationRuns)
62
+ .set({
63
+ status,
64
+ errorMessage: errorMessage ?? null,
65
+ finishedAt: isTerminal ? new Date() : null,
66
+ })
67
+ .where(eq(automationRuns.id, runId));
68
+ },
69
+
70
+ async loadRun(runId: string): Promise<LoadedRun | undefined> {
71
+ const rows = await db
72
+ .select()
73
+ .from(automationRuns)
74
+ .where(eq(automationRuns.id, runId))
75
+ .limit(1);
76
+ const row = rows[0];
77
+ if (!row) return undefined;
78
+ return {
79
+ id: row.id,
80
+ automationId: row.automationId,
81
+ triggerId: row.triggerId,
82
+ triggerEventId: row.triggerEventId,
83
+ triggerPayload: row.triggerPayload,
84
+ contextKey: row.contextKey,
85
+ status: row.status,
86
+ errorMessage: row.errorMessage,
87
+ startedAt: row.startedAt,
88
+ finishedAt: row.finishedAt,
89
+ };
90
+ },
91
+
92
+ async countActiveRuns(automationId: string): Promise<number> {
93
+ const rows = await db
94
+ .select({ count: sql<number>`count(*)::int` })
95
+ .from(automationRuns)
96
+ .where(
97
+ and(
98
+ eq(automationRuns.automationId, automationId),
99
+ inArray(automationRuns.status, [...ACTIVE_STATUSES]),
100
+ ),
101
+ );
102
+ return rows[0]?.count ?? 0;
103
+ },
104
+
105
+ async hasActiveRun(automationId: string): Promise<boolean> {
106
+ const rows = await db
107
+ .select({ id: automationRuns.id })
108
+ .from(automationRuns)
109
+ .where(
110
+ and(
111
+ eq(automationRuns.automationId, automationId),
112
+ inArray(automationRuns.status, [...ACTIVE_STATUSES]),
113
+ ),
114
+ )
115
+ .limit(1);
116
+ return rows.length > 0;
117
+ },
118
+
119
+ async cancelActiveRuns(
120
+ automationId: string,
121
+ reason: string,
122
+ ): Promise<string[]> {
123
+ const rows = await db
124
+ .update(automationRuns)
125
+ .set({
126
+ status: "cancelled",
127
+ errorMessage: reason,
128
+ finishedAt: new Date(),
129
+ })
130
+ .where(
131
+ and(
132
+ eq(automationRuns.automationId, automationId),
133
+ inArray(automationRuns.status, [...ACTIVE_STATUSES]),
134
+ ),
135
+ )
136
+ .returning({ id: automationRuns.id });
137
+ return rows.map((r) => r.id);
138
+ },
139
+
140
+ async createStep(input: CreateStepInput): Promise<string> {
141
+ const [row] = await db
142
+ .insert(automationRunSteps)
143
+ .values({
144
+ runId: input.runId,
145
+ actionPath: input.actionPath,
146
+ actionId: input.actionId,
147
+ actionKind: input.actionKind,
148
+ providerActionId: input.providerActionId,
149
+ status: "running",
150
+ attempts: 1,
151
+ })
152
+ .returning({ id: automationRunSteps.id });
153
+ if (!row) throw new Error("createStep: insert returned no rows");
154
+ return row.id;
155
+ },
156
+
157
+ async updateStep(stepId, patch): Promise<void> {
158
+ const isTerminal =
159
+ patch.status === "success" ||
160
+ patch.status === "failed" ||
161
+ patch.status === "skipped";
162
+ const set: Record<string, unknown> = {
163
+ status: patch.status,
164
+ errorMessage: patch.errorMessage ?? null,
165
+ resultPayload: patch.resultPayload ?? null,
166
+ };
167
+ if (isTerminal) set.finishedAt = new Date();
168
+ if (patch.incrementAttempts) {
169
+ set.attempts = sql`${automationRunSteps.attempts} + 1`;
170
+ }
171
+ await db
172
+ .update(automationRunSteps)
173
+ .set(set)
174
+ .where(eq(automationRunSteps.id, stepId));
175
+ },
176
+
177
+ async findStepByPath(runId, actionPath): Promise<LoadedStep | undefined> {
178
+ const rows = await db
179
+ .select()
180
+ .from(automationRunSteps)
181
+ .where(
182
+ and(
183
+ eq(automationRunSteps.runId, runId),
184
+ eq(automationRunSteps.actionPath, actionPath),
185
+ ),
186
+ )
187
+ .orderBy(desc(automationRunSteps.startedAt))
188
+ .limit(1);
189
+ const row = rows[0];
190
+ if (!row) return;
191
+ return {
192
+ id: row.id,
193
+ runId: row.runId,
194
+ actionPath: row.actionPath,
195
+ actionId: row.actionId,
196
+ actionKind: row.actionKind,
197
+ status: row.status,
198
+ attempts: row.attempts,
199
+ errorMessage: row.errorMessage,
200
+ resultPayload: row.resultPayload,
201
+ startedAt: row.startedAt,
202
+ finishedAt: row.finishedAt,
203
+ };
204
+ },
205
+
206
+ async createWaitLock(input: CreateWaitLockInput): Promise<string> {
207
+ const [row] = await db
208
+ .insert(automationWaitLocks)
209
+ .values({
210
+ runId: input.runId,
211
+ actionPath: input.actionPath,
212
+ kind: input.kind,
213
+ eventId: input.eventId,
214
+ contextKey: input.contextKey,
215
+ filterTemplate: input.filterTemplate,
216
+ timeoutAt: input.timeoutAt,
217
+ })
218
+ .returning({ id: automationWaitLocks.id });
219
+ if (!row) throw new Error("createWaitLock: insert returned no rows");
220
+ return row.id;
221
+ },
222
+
223
+ async loadWaitLock(id) {
224
+ const rows = await db
225
+ .select()
226
+ .from(automationWaitLocks)
227
+ .where(eq(automationWaitLocks.id, id))
228
+ .limit(1);
229
+ const row = rows[0];
230
+ if (!row) return;
231
+ return {
232
+ id: row.id,
233
+ runId: row.runId,
234
+ actionPath: row.actionPath,
235
+ kind: row.kind as "trigger" | "delay",
236
+ eventId: row.eventId,
237
+ contextKey: row.contextKey,
238
+ filterTemplate: row.filterTemplate,
239
+ timeoutAt: row.timeoutAt,
240
+ createdAt: row.createdAt,
241
+ };
242
+ },
243
+
244
+ async findWaitLocksFor(
245
+ eventId: string,
246
+ contextKey: string | null,
247
+ ): Promise<LoadedWaitLock[]> {
248
+ const filters = [
249
+ eq(automationWaitLocks.eventId, eventId),
250
+ contextKey === null
251
+ ? isNull(automationWaitLocks.contextKey)
252
+ : eq(automationWaitLocks.contextKey, contextKey),
253
+ ];
254
+ const rows = await db
255
+ .select()
256
+ .from(automationWaitLocks)
257
+ .where(and(...filters));
258
+ return rows.map((r) => ({
259
+ id: r.id,
260
+ runId: r.runId,
261
+ actionPath: r.actionPath,
262
+ kind: r.kind as "trigger" | "delay",
263
+ eventId: r.eventId,
264
+ contextKey: r.contextKey,
265
+ filterTemplate: r.filterTemplate,
266
+ timeoutAt: r.timeoutAt,
267
+ createdAt: r.createdAt,
268
+ }));
269
+ },
270
+
271
+ async deleteWaitLock(id: string): Promise<void> {
272
+ await db.delete(automationWaitLocks).where(eq(automationWaitLocks.id, id));
273
+ },
274
+
275
+ async sweepExpiredWaitLocks(now: Date): Promise<LoadedWaitLock[]> {
276
+ const rows = await db
277
+ .select()
278
+ .from(automationWaitLocks)
279
+ .where(
280
+ and(
281
+ isNotNull(automationWaitLocks.timeoutAt),
282
+ lte(automationWaitLocks.timeoutAt, now),
283
+ ),
284
+ );
285
+ return rows.map((r) => ({
286
+ id: r.id,
287
+ runId: r.runId,
288
+ actionPath: r.actionPath,
289
+ kind: r.kind as "trigger" | "delay",
290
+ eventId: r.eventId,
291
+ contextKey: r.contextKey,
292
+ filterTemplate: r.filterTemplate,
293
+ timeoutAt: r.timeoutAt,
294
+ createdAt: r.createdAt,
295
+ }));
296
+ },
297
+ };
298
+ }
@@ -0,0 +1,40 @@
1
+ import { describe, it, expect } from "bun:test";
2
+ import { SYSTEM_ACTOR } from "@checkstack/common";
3
+ import { buildInitialScope } from "./scope";
4
+
5
+ describe("buildInitialScope — trigger.actor", () => {
6
+ const base = {
7
+ triggerId: "t1",
8
+ triggerEventId: "incident.created",
9
+ payload: { incidentId: "i1" },
10
+ startedAt: new Date("2026-05-30T00:00:00.000Z"),
11
+ };
12
+
13
+ it("defaults trigger.actor to the system actor when none is supplied", () => {
14
+ const scope = buildInitialScope(base);
15
+ const trigger = scope.trigger as { actor: unknown };
16
+ expect(trigger.actor).toEqual(SYSTEM_ACTOR);
17
+ });
18
+
19
+ it("exposes the supplied actor under trigger.actor alongside the payload", () => {
20
+ const actor = { type: "user", id: "user-1", name: "Nico" } as const;
21
+ const scope = buildInitialScope({ ...base, actor });
22
+ const trigger = scope.trigger as { actor: unknown; payload: unknown };
23
+ expect(trigger.actor).toEqual(actor);
24
+ expect(trigger.payload).toEqual({ incidentId: "i1" });
25
+ });
26
+
27
+ it("exposes trigger.id and the canonical trigger.event (with eventId alias)", () => {
28
+ const scope = buildInitialScope(base);
29
+ const trigger = scope.trigger as {
30
+ id: unknown;
31
+ event: unknown;
32
+ eventId: unknown;
33
+ };
34
+ expect(trigger.id).toBe("t1");
35
+ // `event` is canonical (matches the editor + script contract); `eventId`
36
+ // stays as a back-compat alias.
37
+ expect(trigger.event).toBe("incident.created");
38
+ expect(trigger.eventId).toBe("incident.created");
39
+ });
40
+ });
@@ -0,0 +1,125 @@
1
+ /**
2
+ * Variable scope construction for the dispatch engine.
3
+ *
4
+ * The shape exposed to templates is:
5
+ *
6
+ * trigger.id, trigger.event, trigger.payload.*
7
+ * trigger.actor.{type,id,name} (who/what caused the event)
8
+ * variables.* (from `variables` blocks)
9
+ * artifacts.<actionId>.<localArtifactName>.* (set when an action produces)
10
+ * repeat.item, repeat.index (only inside a repeat)
11
+ * now (helper, ISO string of dispatch start)
12
+ *
13
+ * Keep this shape stable — the editor's intellisense reads it.
14
+ */
15
+ import { SYSTEM_ACTOR, type Actor } from "@checkstack/common";
16
+ import type { DispatchContext } from "./types";
17
+
18
+ /**
19
+ * Build the initial scope at run start. Subsequent blocks (variables,
20
+ * repeat) clone-and-extend this scope rather than mutating it, so a
21
+ * variable defined inside a nested block does not leak to siblings.
22
+ *
23
+ * `actor` carries who/what caused the originating event (a user, an
24
+ * application/API client, a service, or the system). It defaults to the
25
+ * system actor so callers that don't have one still produce a complete scope.
26
+ */
27
+ export function buildInitialScope(args: {
28
+ triggerId: string;
29
+ triggerEventId: string;
30
+ payload: Record<string, unknown>;
31
+ actor?: Actor;
32
+ startedAt: Date;
33
+ }): Record<string, unknown> {
34
+ return {
35
+ trigger: {
36
+ id: args.triggerId,
37
+ event: args.triggerEventId,
38
+ // Back-compat alias for the former internal key. Templates and the
39
+ // editor use `trigger.event`; `eventId` stays so older saved scope
40
+ // snapshots / automations referencing it keep resolving.
41
+ eventId: args.triggerEventId,
42
+ actor: args.actor ?? SYSTEM_ACTOR,
43
+ payload: args.payload,
44
+ },
45
+ variables: {},
46
+ artifacts: {},
47
+ now: args.startedAt.toISOString(),
48
+ };
49
+ }
50
+
51
+ /**
52
+ * Clone a scope, optionally extending it with new fields. Used to push
53
+ * down a child scope when entering nested blocks (variables, repeat
54
+ * iteration body). The structural clone is shallow at the top level —
55
+ * inner objects are shared, which is fine because primitives never
56
+ * mutate them.
57
+ */
58
+ export function extendScope(
59
+ scope: Record<string, unknown>,
60
+ patch: Record<string, unknown>,
61
+ ): Record<string, unknown> {
62
+ return { ...scope, ...patch };
63
+ }
64
+
65
+ /**
66
+ * Push `variables.*` updates. Variable defs run before any downstream
67
+ * action, so we merge into the existing `variables` namespace.
68
+ */
69
+ export function extendVariables(
70
+ scope: Record<string, unknown>,
71
+ newVars: Record<string, unknown>,
72
+ ): Record<string, unknown> {
73
+ const existing = (scope.variables as Record<string, unknown>) ?? {};
74
+ return { ...scope, variables: { ...existing, ...newVars } };
75
+ }
76
+
77
+ /**
78
+ * Push `repeat.*` info onto the scope for the iteration body.
79
+ */
80
+ export function withRepeatContext(
81
+ scope: Record<string, unknown>,
82
+ repeat: { index: number; item?: unknown },
83
+ ): Record<string, unknown> {
84
+ return { ...scope, repeat };
85
+ }
86
+
87
+ /**
88
+ * Resolve and attach upstream artifacts the calling action declared in
89
+ * `consumes`. Looks up each type within the current run's scope and
90
+ * exposes the data under `artifacts.<type>`.
91
+ *
92
+ * Conflict policy: if multiple actions in the same automation produced
93
+ * the same artifact type, the most-recent open artifact wins. Operators
94
+ * who want explicit producer pinning should reference by action id in a
95
+ * template (`artifacts.<id>.<name>`), which the engine auto-populates for
96
+ * every producing action.
97
+ */
98
+ export async function resolveConsumedArtifacts(
99
+ ctx: DispatchContext,
100
+ consumes: ReadonlyArray<string>,
101
+ ownerPluginId: string,
102
+ ): Promise<Record<string, unknown>> {
103
+ if (consumes.length === 0) return {};
104
+ const result: Record<string, unknown> = {};
105
+
106
+ for (const localType of consumes) {
107
+ // `consumes` carries local artifact ids; the stored artifact type is
108
+ // the fully-qualified `${pluginId}.${id}` the producing action wrote.
109
+ // Qualify against the consuming action's own plugin (same-plugin
110
+ // handoff) for the lookup, but key the result by the local id so the
111
+ // action's `execute` reads `consumedArtifacts[localId]`.
112
+ const qualifiedType = `${ownerPluginId}.${localType}`;
113
+ const found = await ctx.deps.artifactStore.find({
114
+ automationId: ctx.run.automation.id,
115
+ contextKey: ctx.run.contextKey,
116
+ artifactType: qualifiedType,
117
+ onlyOpen: true,
118
+ });
119
+ if (found) {
120
+ result[localType] = found.data;
121
+ }
122
+ }
123
+
124
+ return result;
125
+ }
@@ -0,0 +1,164 @@
1
+ /**
2
+ * Stalled-run sweeper.
3
+ *
4
+ * Periodically scans for runs whose heartbeat is older than a
5
+ * configurable threshold and resumes them. Combined with the per-run
6
+ * Postgres advisory lock, this gives the platform restart safety + safe
7
+ * horizontal scaling: when an instance crashes mid-execution, another
8
+ * instance picks up the dropped runs after the heartbeat threshold
9
+ * elapses.
10
+ *
11
+ * Also sweeps expired wait locks:
12
+ * - `kind: "delay"` locks past `timeoutAt` resume the run (in case
13
+ * the queue scheduler lost the job).
14
+ * - `kind: "trigger"` locks past `timeoutAt` fail the run with a
15
+ * clear "wait timed out" error.
16
+ */
17
+ import type { Logger } from "@checkstack/backend-api";
18
+
19
+ import type { AutomationStore } from "../automation-store";
20
+ import { recoverStalledRun, resumeRun } from "./engine";
21
+ import type { DispatchDeps } from "./types";
22
+
23
+ export interface StalledSweeperArgs {
24
+ deps: DispatchDeps;
25
+ automationStore: AutomationStore;
26
+ logger: Logger;
27
+ /** Heartbeat age (ms) above which a run is considered stalled. */
28
+ staleAfterMs?: number;
29
+ /** Poll interval (ms). */
30
+ intervalMs?: number;
31
+ }
32
+
33
+ export interface StalledSweeper {
34
+ /** Run one sweep cycle. Useful in tests. */
35
+ sweep: () => Promise<void>;
36
+ /** Stop the periodic polling. */
37
+ stop: () => void;
38
+ }
39
+
40
+ const DEFAULT_STALE_MS = 60_000; // 1 minute
41
+ const DEFAULT_INTERVAL_MS = 30_000; // every 30 seconds
42
+
43
+ export function startStalledSweeper(
44
+ args: StalledSweeperArgs,
45
+ ): StalledSweeper {
46
+ const staleMs = args.staleAfterMs ?? DEFAULT_STALE_MS;
47
+ const intervalMs = args.intervalMs ?? DEFAULT_INTERVAL_MS;
48
+
49
+ const sweep = async (): Promise<void> => {
50
+ await sweepStalledRuns(args, staleMs);
51
+ await sweepExpiredWaitLocks(args);
52
+ };
53
+
54
+ let timer: ReturnType<typeof setInterval> | undefined = setInterval(() => {
55
+ sweep().catch((error) => {
56
+ args.logger.warn(
57
+ `automation stalled sweeper failed: ${(error as Error).message}`,
58
+ );
59
+ });
60
+ }, intervalMs);
61
+
62
+ return {
63
+ sweep,
64
+ stop: () => {
65
+ if (timer) {
66
+ clearInterval(timer);
67
+ timer = undefined;
68
+ }
69
+ },
70
+ };
71
+ }
72
+
73
+ async function sweepStalledRuns(
74
+ args: StalledSweeperArgs,
75
+ staleMs: number,
76
+ ): Promise<void> {
77
+ const threshold = new Date(Date.now() - staleMs);
78
+ const stalled = await args.deps.runStateStore.findStalledRunIds(threshold);
79
+ if (stalled.length === 0) return;
80
+ args.logger.debug(
81
+ `automation sweeper: ${stalled.length} stalled run(s) detected`,
82
+ );
83
+
84
+ for (const runId of stalled) {
85
+ const acquired = await args.deps.runStateStore.tryAdvisoryLock(runId);
86
+ if (!acquired) continue; // another instance already on it
87
+ try {
88
+ const run = await args.deps.runStore.loadRun(runId);
89
+ if (!run) continue;
90
+ const automation = await args.automationStore.getById(run.automationId);
91
+ if (!automation) {
92
+ await args.deps.runStore.updateRunStatus(
93
+ runId,
94
+ "failed",
95
+ "automation deleted while run was stalled",
96
+ );
97
+ await args.deps.runStateStore.clear(runId);
98
+ continue;
99
+ }
100
+ args.logger.info(`automation sweeper: recovering run ${runId}`);
101
+ await recoverStalledRun(args.deps, {
102
+ runId,
103
+ automation: {
104
+ id: automation.id,
105
+ name: automation.name,
106
+ status: automation.status,
107
+ definition: automation.definition,
108
+ },
109
+ });
110
+ } catch (error) {
111
+ args.logger.warn(
112
+ `automation sweeper failed to recover ${runId}: ${(error as Error).message}`,
113
+ );
114
+ } finally {
115
+ await args.deps.runStateStore.releaseAdvisoryLock(runId);
116
+ }
117
+ }
118
+ }
119
+
120
+ async function sweepExpiredWaitLocks(
121
+ args: StalledSweeperArgs,
122
+ ): Promise<void> {
123
+ const now = new Date();
124
+ const expired = await args.deps.runStore.sweepExpiredWaitLocks(now);
125
+ if (expired.length === 0) return;
126
+
127
+ for (const lock of expired) {
128
+ if (lock.kind === "delay") {
129
+ // The queue scheduler may have lost the job — wake the run
130
+ // ourselves. Idempotent: resumeRun takes the advisory lock and
131
+ // skips if someone else already resumed.
132
+ const run = await args.deps.runStore.loadRun(lock.runId);
133
+ if (!run) {
134
+ await args.deps.runStore.deleteWaitLock(lock.id);
135
+ continue;
136
+ }
137
+ const automation = await args.automationStore.getById(run.automationId);
138
+ if (!automation) {
139
+ await args.deps.runStore.deleteWaitLock(lock.id);
140
+ continue;
141
+ }
142
+ await args.deps.runStore.deleteWaitLock(lock.id);
143
+ await resumeRun(args.deps, {
144
+ runId: lock.runId,
145
+ automation: {
146
+ id: automation.id,
147
+ name: automation.name,
148
+ status: automation.status,
149
+ definition: automation.definition,
150
+ },
151
+ waitedAtPath: lock.actionPath,
152
+ });
153
+ continue;
154
+ }
155
+ // Trigger lock expired without firing — fail the run.
156
+ await args.deps.runStore.deleteWaitLock(lock.id);
157
+ await args.deps.runStore.updateRunStatus(
158
+ lock.runId,
159
+ "failed",
160
+ `wait_for_trigger timed out waiting for ${lock.eventId}`,
161
+ );
162
+ await args.deps.runStateStore.clear(lock.runId);
163
+ }
164
+ }