@checkstack/automation-backend 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +453 -0
- package/drizzle/0000_acoustic_diamondback.sql +80 -0
- package/drizzle/0001_mute_vindicator.sql +12 -0
- package/drizzle/0002_silky_omega_red.sql +12 -0
- package/drizzle/meta/0000_snapshot.json +688 -0
- package/drizzle/meta/0001_snapshot.json +785 -0
- package/drizzle/meta/0002_snapshot.json +861 -0
- package/drizzle/meta/_journal.json +27 -0
- package/drizzle.config.ts +12 -0
- package/package.json +41 -0
- package/src/action-registry.ts +83 -0
- package/src/action-types.ts +324 -0
- package/src/artifact-store.ts +140 -0
- package/src/artifact-type-registry.ts +64 -0
- package/src/automation-store.ts +227 -0
- package/src/builtin-actions.test.ts +185 -0
- package/src/builtin-actions.ts +132 -0
- package/src/builtin-triggers.test.ts +264 -0
- package/src/builtin-triggers.ts +365 -0
- package/src/dispatch/action-kind.ts +44 -0
- package/src/dispatch/condition.ts +61 -0
- package/src/dispatch/delay-queue.ts +91 -0
- package/src/dispatch/engine.test.ts +1198 -0
- package/src/dispatch/engine.ts +1672 -0
- package/src/dispatch/path-nav.ts +65 -0
- package/src/dispatch/render.test.ts +75 -0
- package/src/dispatch/render.ts +136 -0
- package/src/dispatch/run-state-store.ts +143 -0
- package/src/dispatch/run-state.ts +298 -0
- package/src/dispatch/scope.test.ts +40 -0
- package/src/dispatch/scope.ts +125 -0
- package/src/dispatch/stalled-sweeper.ts +164 -0
- package/src/dispatch/test-fixtures.ts +558 -0
- package/src/dispatch/trigger-subscriber.ts +397 -0
- package/src/dispatch/types.ts +259 -0
- package/src/extension-points.ts +88 -0
- package/src/index.ts +379 -0
- package/src/migration/from-webhook-subscriptions.test.ts +237 -0
- package/src/migration/from-webhook-subscriptions.ts +398 -0
- package/src/registries.test.ts +357 -0
- package/src/router.test.ts +724 -0
- package/src/router.ts +556 -0
- package/src/schema.ts +310 -0
- package/src/trigger-registry.ts +99 -0
- package/src/validate-definition.test.ts +306 -0
- package/src/validate-definition.ts +304 -0
- package/tsconfig.json +41 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Drizzle-backed implementation of `RunStore`. The dispatch engine uses
|
|
3
|
+
* this for every run / step / wait-lock write so durability survives
|
|
4
|
+
* process restarts.
|
|
5
|
+
*
|
|
6
|
+
* Kept thin: each method maps almost 1:1 to a DB statement. Concurrency
|
|
7
|
+
* and consistency concerns live in the calling code (the dispatcher and
|
|
8
|
+
* trigger subscriber).
|
|
9
|
+
*/
|
|
10
|
+
import { and, desc, eq, inArray, isNotNull, isNull, lte, sql } from "drizzle-orm";
|
|
11
|
+
import type { SafeDatabase } from "@checkstack/backend-api";
|
|
12
|
+
|
|
13
|
+
import {
|
|
14
|
+
automationRunSteps,
|
|
15
|
+
automationRuns,
|
|
16
|
+
automationWaitLocks,
|
|
17
|
+
} from "../schema";
|
|
18
|
+
import type {
|
|
19
|
+
CreateRunInput,
|
|
20
|
+
CreateStepInput,
|
|
21
|
+
CreateWaitLockInput,
|
|
22
|
+
LoadedRun,
|
|
23
|
+
LoadedStep,
|
|
24
|
+
LoadedWaitLock,
|
|
25
|
+
RunStore,
|
|
26
|
+
} from "./types";
|
|
27
|
+
|
|
28
|
+
type Schema = {
|
|
29
|
+
automationRuns: typeof automationRuns;
|
|
30
|
+
automationRunSteps: typeof automationRunSteps;
|
|
31
|
+
automationWaitLocks: typeof automationWaitLocks;
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
const ACTIVE_STATUSES = ["pending", "running", "waiting"] as const;
|
|
35
|
+
|
|
36
|
+
export function createRunStore(db: SafeDatabase<Schema>): RunStore {
|
|
37
|
+
return {
|
|
38
|
+
async createRun(input: CreateRunInput): Promise<string> {
|
|
39
|
+
const [row] = await db
|
|
40
|
+
.insert(automationRuns)
|
|
41
|
+
.values({
|
|
42
|
+
automationId: input.automationId,
|
|
43
|
+
triggerId: input.triggerId,
|
|
44
|
+
triggerEventId: input.triggerEventId,
|
|
45
|
+
triggerPayload: input.triggerPayload,
|
|
46
|
+
contextKey: input.contextKey,
|
|
47
|
+
status: "running",
|
|
48
|
+
})
|
|
49
|
+
.returning({ id: automationRuns.id });
|
|
50
|
+
if (!row) throw new Error("createRun: insert returned no rows");
|
|
51
|
+
return row.id;
|
|
52
|
+
},
|
|
53
|
+
|
|
54
|
+
async updateRunStatus(runId, status, errorMessage): Promise<void> {
|
|
55
|
+
const isTerminal =
|
|
56
|
+
status === "success" ||
|
|
57
|
+
status === "failed" ||
|
|
58
|
+
status === "cancelled" ||
|
|
59
|
+
status === "skipped";
|
|
60
|
+
await db
|
|
61
|
+
.update(automationRuns)
|
|
62
|
+
.set({
|
|
63
|
+
status,
|
|
64
|
+
errorMessage: errorMessage ?? null,
|
|
65
|
+
finishedAt: isTerminal ? new Date() : null,
|
|
66
|
+
})
|
|
67
|
+
.where(eq(automationRuns.id, runId));
|
|
68
|
+
},
|
|
69
|
+
|
|
70
|
+
async loadRun(runId: string): Promise<LoadedRun | undefined> {
|
|
71
|
+
const rows = await db
|
|
72
|
+
.select()
|
|
73
|
+
.from(automationRuns)
|
|
74
|
+
.where(eq(automationRuns.id, runId))
|
|
75
|
+
.limit(1);
|
|
76
|
+
const row = rows[0];
|
|
77
|
+
if (!row) return undefined;
|
|
78
|
+
return {
|
|
79
|
+
id: row.id,
|
|
80
|
+
automationId: row.automationId,
|
|
81
|
+
triggerId: row.triggerId,
|
|
82
|
+
triggerEventId: row.triggerEventId,
|
|
83
|
+
triggerPayload: row.triggerPayload,
|
|
84
|
+
contextKey: row.contextKey,
|
|
85
|
+
status: row.status,
|
|
86
|
+
errorMessage: row.errorMessage,
|
|
87
|
+
startedAt: row.startedAt,
|
|
88
|
+
finishedAt: row.finishedAt,
|
|
89
|
+
};
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
async countActiveRuns(automationId: string): Promise<number> {
|
|
93
|
+
const rows = await db
|
|
94
|
+
.select({ count: sql<number>`count(*)::int` })
|
|
95
|
+
.from(automationRuns)
|
|
96
|
+
.where(
|
|
97
|
+
and(
|
|
98
|
+
eq(automationRuns.automationId, automationId),
|
|
99
|
+
inArray(automationRuns.status, [...ACTIVE_STATUSES]),
|
|
100
|
+
),
|
|
101
|
+
);
|
|
102
|
+
return rows[0]?.count ?? 0;
|
|
103
|
+
},
|
|
104
|
+
|
|
105
|
+
async hasActiveRun(automationId: string): Promise<boolean> {
|
|
106
|
+
const rows = await db
|
|
107
|
+
.select({ id: automationRuns.id })
|
|
108
|
+
.from(automationRuns)
|
|
109
|
+
.where(
|
|
110
|
+
and(
|
|
111
|
+
eq(automationRuns.automationId, automationId),
|
|
112
|
+
inArray(automationRuns.status, [...ACTIVE_STATUSES]),
|
|
113
|
+
),
|
|
114
|
+
)
|
|
115
|
+
.limit(1);
|
|
116
|
+
return rows.length > 0;
|
|
117
|
+
},
|
|
118
|
+
|
|
119
|
+
async cancelActiveRuns(
|
|
120
|
+
automationId: string,
|
|
121
|
+
reason: string,
|
|
122
|
+
): Promise<string[]> {
|
|
123
|
+
const rows = await db
|
|
124
|
+
.update(automationRuns)
|
|
125
|
+
.set({
|
|
126
|
+
status: "cancelled",
|
|
127
|
+
errorMessage: reason,
|
|
128
|
+
finishedAt: new Date(),
|
|
129
|
+
})
|
|
130
|
+
.where(
|
|
131
|
+
and(
|
|
132
|
+
eq(automationRuns.automationId, automationId),
|
|
133
|
+
inArray(automationRuns.status, [...ACTIVE_STATUSES]),
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
.returning({ id: automationRuns.id });
|
|
137
|
+
return rows.map((r) => r.id);
|
|
138
|
+
},
|
|
139
|
+
|
|
140
|
+
async createStep(input: CreateStepInput): Promise<string> {
|
|
141
|
+
const [row] = await db
|
|
142
|
+
.insert(automationRunSteps)
|
|
143
|
+
.values({
|
|
144
|
+
runId: input.runId,
|
|
145
|
+
actionPath: input.actionPath,
|
|
146
|
+
actionId: input.actionId,
|
|
147
|
+
actionKind: input.actionKind,
|
|
148
|
+
providerActionId: input.providerActionId,
|
|
149
|
+
status: "running",
|
|
150
|
+
attempts: 1,
|
|
151
|
+
})
|
|
152
|
+
.returning({ id: automationRunSteps.id });
|
|
153
|
+
if (!row) throw new Error("createStep: insert returned no rows");
|
|
154
|
+
return row.id;
|
|
155
|
+
},
|
|
156
|
+
|
|
157
|
+
async updateStep(stepId, patch): Promise<void> {
|
|
158
|
+
const isTerminal =
|
|
159
|
+
patch.status === "success" ||
|
|
160
|
+
patch.status === "failed" ||
|
|
161
|
+
patch.status === "skipped";
|
|
162
|
+
const set: Record<string, unknown> = {
|
|
163
|
+
status: patch.status,
|
|
164
|
+
errorMessage: patch.errorMessage ?? null,
|
|
165
|
+
resultPayload: patch.resultPayload ?? null,
|
|
166
|
+
};
|
|
167
|
+
if (isTerminal) set.finishedAt = new Date();
|
|
168
|
+
if (patch.incrementAttempts) {
|
|
169
|
+
set.attempts = sql`${automationRunSteps.attempts} + 1`;
|
|
170
|
+
}
|
|
171
|
+
await db
|
|
172
|
+
.update(automationRunSteps)
|
|
173
|
+
.set(set)
|
|
174
|
+
.where(eq(automationRunSteps.id, stepId));
|
|
175
|
+
},
|
|
176
|
+
|
|
177
|
+
async findStepByPath(runId, actionPath): Promise<LoadedStep | undefined> {
|
|
178
|
+
const rows = await db
|
|
179
|
+
.select()
|
|
180
|
+
.from(automationRunSteps)
|
|
181
|
+
.where(
|
|
182
|
+
and(
|
|
183
|
+
eq(automationRunSteps.runId, runId),
|
|
184
|
+
eq(automationRunSteps.actionPath, actionPath),
|
|
185
|
+
),
|
|
186
|
+
)
|
|
187
|
+
.orderBy(desc(automationRunSteps.startedAt))
|
|
188
|
+
.limit(1);
|
|
189
|
+
const row = rows[0];
|
|
190
|
+
if (!row) return;
|
|
191
|
+
return {
|
|
192
|
+
id: row.id,
|
|
193
|
+
runId: row.runId,
|
|
194
|
+
actionPath: row.actionPath,
|
|
195
|
+
actionId: row.actionId,
|
|
196
|
+
actionKind: row.actionKind,
|
|
197
|
+
status: row.status,
|
|
198
|
+
attempts: row.attempts,
|
|
199
|
+
errorMessage: row.errorMessage,
|
|
200
|
+
resultPayload: row.resultPayload,
|
|
201
|
+
startedAt: row.startedAt,
|
|
202
|
+
finishedAt: row.finishedAt,
|
|
203
|
+
};
|
|
204
|
+
},
|
|
205
|
+
|
|
206
|
+
async createWaitLock(input: CreateWaitLockInput): Promise<string> {
|
|
207
|
+
const [row] = await db
|
|
208
|
+
.insert(automationWaitLocks)
|
|
209
|
+
.values({
|
|
210
|
+
runId: input.runId,
|
|
211
|
+
actionPath: input.actionPath,
|
|
212
|
+
kind: input.kind,
|
|
213
|
+
eventId: input.eventId,
|
|
214
|
+
contextKey: input.contextKey,
|
|
215
|
+
filterTemplate: input.filterTemplate,
|
|
216
|
+
timeoutAt: input.timeoutAt,
|
|
217
|
+
})
|
|
218
|
+
.returning({ id: automationWaitLocks.id });
|
|
219
|
+
if (!row) throw new Error("createWaitLock: insert returned no rows");
|
|
220
|
+
return row.id;
|
|
221
|
+
},
|
|
222
|
+
|
|
223
|
+
async loadWaitLock(id) {
|
|
224
|
+
const rows = await db
|
|
225
|
+
.select()
|
|
226
|
+
.from(automationWaitLocks)
|
|
227
|
+
.where(eq(automationWaitLocks.id, id))
|
|
228
|
+
.limit(1);
|
|
229
|
+
const row = rows[0];
|
|
230
|
+
if (!row) return;
|
|
231
|
+
return {
|
|
232
|
+
id: row.id,
|
|
233
|
+
runId: row.runId,
|
|
234
|
+
actionPath: row.actionPath,
|
|
235
|
+
kind: row.kind as "trigger" | "delay",
|
|
236
|
+
eventId: row.eventId,
|
|
237
|
+
contextKey: row.contextKey,
|
|
238
|
+
filterTemplate: row.filterTemplate,
|
|
239
|
+
timeoutAt: row.timeoutAt,
|
|
240
|
+
createdAt: row.createdAt,
|
|
241
|
+
};
|
|
242
|
+
},
|
|
243
|
+
|
|
244
|
+
async findWaitLocksFor(
|
|
245
|
+
eventId: string,
|
|
246
|
+
contextKey: string | null,
|
|
247
|
+
): Promise<LoadedWaitLock[]> {
|
|
248
|
+
const filters = [
|
|
249
|
+
eq(automationWaitLocks.eventId, eventId),
|
|
250
|
+
contextKey === null
|
|
251
|
+
? isNull(automationWaitLocks.contextKey)
|
|
252
|
+
: eq(automationWaitLocks.contextKey, contextKey),
|
|
253
|
+
];
|
|
254
|
+
const rows = await db
|
|
255
|
+
.select()
|
|
256
|
+
.from(automationWaitLocks)
|
|
257
|
+
.where(and(...filters));
|
|
258
|
+
return rows.map((r) => ({
|
|
259
|
+
id: r.id,
|
|
260
|
+
runId: r.runId,
|
|
261
|
+
actionPath: r.actionPath,
|
|
262
|
+
kind: r.kind as "trigger" | "delay",
|
|
263
|
+
eventId: r.eventId,
|
|
264
|
+
contextKey: r.contextKey,
|
|
265
|
+
filterTemplate: r.filterTemplate,
|
|
266
|
+
timeoutAt: r.timeoutAt,
|
|
267
|
+
createdAt: r.createdAt,
|
|
268
|
+
}));
|
|
269
|
+
},
|
|
270
|
+
|
|
271
|
+
async deleteWaitLock(id: string): Promise<void> {
|
|
272
|
+
await db.delete(automationWaitLocks).where(eq(automationWaitLocks.id, id));
|
|
273
|
+
},
|
|
274
|
+
|
|
275
|
+
async sweepExpiredWaitLocks(now: Date): Promise<LoadedWaitLock[]> {
|
|
276
|
+
const rows = await db
|
|
277
|
+
.select()
|
|
278
|
+
.from(automationWaitLocks)
|
|
279
|
+
.where(
|
|
280
|
+
and(
|
|
281
|
+
isNotNull(automationWaitLocks.timeoutAt),
|
|
282
|
+
lte(automationWaitLocks.timeoutAt, now),
|
|
283
|
+
),
|
|
284
|
+
);
|
|
285
|
+
return rows.map((r) => ({
|
|
286
|
+
id: r.id,
|
|
287
|
+
runId: r.runId,
|
|
288
|
+
actionPath: r.actionPath,
|
|
289
|
+
kind: r.kind as "trigger" | "delay",
|
|
290
|
+
eventId: r.eventId,
|
|
291
|
+
contextKey: r.contextKey,
|
|
292
|
+
filterTemplate: r.filterTemplate,
|
|
293
|
+
timeoutAt: r.timeoutAt,
|
|
294
|
+
createdAt: r.createdAt,
|
|
295
|
+
}));
|
|
296
|
+
},
|
|
297
|
+
};
|
|
298
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { SYSTEM_ACTOR } from "@checkstack/common";
|
|
3
|
+
import { buildInitialScope } from "./scope";
|
|
4
|
+
|
|
5
|
+
describe("buildInitialScope — trigger.actor", () => {
|
|
6
|
+
const base = {
|
|
7
|
+
triggerId: "t1",
|
|
8
|
+
triggerEventId: "incident.created",
|
|
9
|
+
payload: { incidentId: "i1" },
|
|
10
|
+
startedAt: new Date("2026-05-30T00:00:00.000Z"),
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
it("defaults trigger.actor to the system actor when none is supplied", () => {
|
|
14
|
+
const scope = buildInitialScope(base);
|
|
15
|
+
const trigger = scope.trigger as { actor: unknown };
|
|
16
|
+
expect(trigger.actor).toEqual(SYSTEM_ACTOR);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it("exposes the supplied actor under trigger.actor alongside the payload", () => {
|
|
20
|
+
const actor = { type: "user", id: "user-1", name: "Nico" } as const;
|
|
21
|
+
const scope = buildInitialScope({ ...base, actor });
|
|
22
|
+
const trigger = scope.trigger as { actor: unknown; payload: unknown };
|
|
23
|
+
expect(trigger.actor).toEqual(actor);
|
|
24
|
+
expect(trigger.payload).toEqual({ incidentId: "i1" });
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("exposes trigger.id and the canonical trigger.event (with eventId alias)", () => {
|
|
28
|
+
const scope = buildInitialScope(base);
|
|
29
|
+
const trigger = scope.trigger as {
|
|
30
|
+
id: unknown;
|
|
31
|
+
event: unknown;
|
|
32
|
+
eventId: unknown;
|
|
33
|
+
};
|
|
34
|
+
expect(trigger.id).toBe("t1");
|
|
35
|
+
// `event` is canonical (matches the editor + script contract); `eventId`
|
|
36
|
+
// stays as a back-compat alias.
|
|
37
|
+
expect(trigger.event).toBe("incident.created");
|
|
38
|
+
expect(trigger.eventId).toBe("incident.created");
|
|
39
|
+
});
|
|
40
|
+
});
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Variable scope construction for the dispatch engine.
|
|
3
|
+
*
|
|
4
|
+
* The shape exposed to templates is:
|
|
5
|
+
*
|
|
6
|
+
* trigger.id, trigger.event, trigger.payload.*
|
|
7
|
+
* trigger.actor.{type,id,name} (who/what caused the event)
|
|
8
|
+
* variables.* (from `variables` blocks)
|
|
9
|
+
* artifacts.<actionId>.<localArtifactName>.* (set when an action produces)
|
|
10
|
+
* repeat.item, repeat.index (only inside a repeat)
|
|
11
|
+
* now (helper, ISO string of dispatch start)
|
|
12
|
+
*
|
|
13
|
+
* Keep this shape stable — the editor's intellisense reads it.
|
|
14
|
+
*/
|
|
15
|
+
import { SYSTEM_ACTOR, type Actor } from "@checkstack/common";
|
|
16
|
+
import type { DispatchContext } from "./types";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Build the initial scope at run start. Subsequent blocks (variables,
|
|
20
|
+
* repeat) clone-and-extend this scope rather than mutating it, so a
|
|
21
|
+
* variable defined inside a nested block does not leak to siblings.
|
|
22
|
+
*
|
|
23
|
+
* `actor` carries who/what caused the originating event (a user, an
|
|
24
|
+
* application/API client, a service, or the system). It defaults to the
|
|
25
|
+
* system actor so callers that don't have one still produce a complete scope.
|
|
26
|
+
*/
|
|
27
|
+
export function buildInitialScope(args: {
|
|
28
|
+
triggerId: string;
|
|
29
|
+
triggerEventId: string;
|
|
30
|
+
payload: Record<string, unknown>;
|
|
31
|
+
actor?: Actor;
|
|
32
|
+
startedAt: Date;
|
|
33
|
+
}): Record<string, unknown> {
|
|
34
|
+
return {
|
|
35
|
+
trigger: {
|
|
36
|
+
id: args.triggerId,
|
|
37
|
+
event: args.triggerEventId,
|
|
38
|
+
// Back-compat alias for the former internal key. Templates and the
|
|
39
|
+
// editor use `trigger.event`; `eventId` stays so older saved scope
|
|
40
|
+
// snapshots / automations referencing it keep resolving.
|
|
41
|
+
eventId: args.triggerEventId,
|
|
42
|
+
actor: args.actor ?? SYSTEM_ACTOR,
|
|
43
|
+
payload: args.payload,
|
|
44
|
+
},
|
|
45
|
+
variables: {},
|
|
46
|
+
artifacts: {},
|
|
47
|
+
now: args.startedAt.toISOString(),
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Clone a scope, optionally extending it with new fields. Used to push
|
|
53
|
+
* down a child scope when entering nested blocks (variables, repeat
|
|
54
|
+
* iteration body). The structural clone is shallow at the top level —
|
|
55
|
+
* inner objects are shared, which is fine because primitives never
|
|
56
|
+
* mutate them.
|
|
57
|
+
*/
|
|
58
|
+
export function extendScope(
|
|
59
|
+
scope: Record<string, unknown>,
|
|
60
|
+
patch: Record<string, unknown>,
|
|
61
|
+
): Record<string, unknown> {
|
|
62
|
+
return { ...scope, ...patch };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Push `variables.*` updates. Variable defs run before any downstream
|
|
67
|
+
* action, so we merge into the existing `variables` namespace.
|
|
68
|
+
*/
|
|
69
|
+
export function extendVariables(
|
|
70
|
+
scope: Record<string, unknown>,
|
|
71
|
+
newVars: Record<string, unknown>,
|
|
72
|
+
): Record<string, unknown> {
|
|
73
|
+
const existing = (scope.variables as Record<string, unknown>) ?? {};
|
|
74
|
+
return { ...scope, variables: { ...existing, ...newVars } };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Push `repeat.*` info onto the scope for the iteration body.
|
|
79
|
+
*/
|
|
80
|
+
export function withRepeatContext(
|
|
81
|
+
scope: Record<string, unknown>,
|
|
82
|
+
repeat: { index: number; item?: unknown },
|
|
83
|
+
): Record<string, unknown> {
|
|
84
|
+
return { ...scope, repeat };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Resolve and attach upstream artifacts the calling action declared in
|
|
89
|
+
* `consumes`. Looks up each type within the current run's scope and
|
|
90
|
+
* exposes the data under `artifacts.<type>`.
|
|
91
|
+
*
|
|
92
|
+
* Conflict policy: if multiple actions in the same automation produced
|
|
93
|
+
* the same artifact type, the most-recent open artifact wins. Operators
|
|
94
|
+
* who want explicit producer pinning should reference by action id in a
|
|
95
|
+
* template (`artifacts.<id>.<name>`), which the engine auto-populates for
|
|
96
|
+
* every producing action.
|
|
97
|
+
*/
|
|
98
|
+
export async function resolveConsumedArtifacts(
|
|
99
|
+
ctx: DispatchContext,
|
|
100
|
+
consumes: ReadonlyArray<string>,
|
|
101
|
+
ownerPluginId: string,
|
|
102
|
+
): Promise<Record<string, unknown>> {
|
|
103
|
+
if (consumes.length === 0) return {};
|
|
104
|
+
const result: Record<string, unknown> = {};
|
|
105
|
+
|
|
106
|
+
for (const localType of consumes) {
|
|
107
|
+
// `consumes` carries local artifact ids; the stored artifact type is
|
|
108
|
+
// the fully-qualified `${pluginId}.${id}` the producing action wrote.
|
|
109
|
+
// Qualify against the consuming action's own plugin (same-plugin
|
|
110
|
+
// handoff) for the lookup, but key the result by the local id so the
|
|
111
|
+
// action's `execute` reads `consumedArtifacts[localId]`.
|
|
112
|
+
const qualifiedType = `${ownerPluginId}.${localType}`;
|
|
113
|
+
const found = await ctx.deps.artifactStore.find({
|
|
114
|
+
automationId: ctx.run.automation.id,
|
|
115
|
+
contextKey: ctx.run.contextKey,
|
|
116
|
+
artifactType: qualifiedType,
|
|
117
|
+
onlyOpen: true,
|
|
118
|
+
});
|
|
119
|
+
if (found) {
|
|
120
|
+
result[localType] = found.data;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return result;
|
|
125
|
+
}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stalled-run sweeper.
|
|
3
|
+
*
|
|
4
|
+
* Periodically scans for runs whose heartbeat is older than a
|
|
5
|
+
* configurable threshold and resumes them. Combined with the per-run
|
|
6
|
+
* Postgres advisory lock, this gives the platform restart safety + safe
|
|
7
|
+
* horizontal scaling: when an instance crashes mid-execution, another
|
|
8
|
+
* instance picks up the dropped runs after the heartbeat threshold
|
|
9
|
+
* elapses.
|
|
10
|
+
*
|
|
11
|
+
* Also sweeps expired wait locks:
|
|
12
|
+
* - `kind: "delay"` locks past `timeoutAt` resume the run (in case
|
|
13
|
+
* the queue scheduler lost the job).
|
|
14
|
+
* - `kind: "trigger"` locks past `timeoutAt` fail the run with a
|
|
15
|
+
* clear "wait timed out" error.
|
|
16
|
+
*/
|
|
17
|
+
import type { Logger } from "@checkstack/backend-api";
|
|
18
|
+
|
|
19
|
+
import type { AutomationStore } from "../automation-store";
|
|
20
|
+
import { recoverStalledRun, resumeRun } from "./engine";
|
|
21
|
+
import type { DispatchDeps } from "./types";
|
|
22
|
+
|
|
23
|
+
export interface StalledSweeperArgs {
|
|
24
|
+
deps: DispatchDeps;
|
|
25
|
+
automationStore: AutomationStore;
|
|
26
|
+
logger: Logger;
|
|
27
|
+
/** Heartbeat age (ms) above which a run is considered stalled. */
|
|
28
|
+
staleAfterMs?: number;
|
|
29
|
+
/** Poll interval (ms). */
|
|
30
|
+
intervalMs?: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface StalledSweeper {
|
|
34
|
+
/** Run one sweep cycle. Useful in tests. */
|
|
35
|
+
sweep: () => Promise<void>;
|
|
36
|
+
/** Stop the periodic polling. */
|
|
37
|
+
stop: () => void;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const DEFAULT_STALE_MS = 60_000; // 1 minute
|
|
41
|
+
const DEFAULT_INTERVAL_MS = 30_000; // every 30 seconds
|
|
42
|
+
|
|
43
|
+
export function startStalledSweeper(
|
|
44
|
+
args: StalledSweeperArgs,
|
|
45
|
+
): StalledSweeper {
|
|
46
|
+
const staleMs = args.staleAfterMs ?? DEFAULT_STALE_MS;
|
|
47
|
+
const intervalMs = args.intervalMs ?? DEFAULT_INTERVAL_MS;
|
|
48
|
+
|
|
49
|
+
const sweep = async (): Promise<void> => {
|
|
50
|
+
await sweepStalledRuns(args, staleMs);
|
|
51
|
+
await sweepExpiredWaitLocks(args);
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
let timer: ReturnType<typeof setInterval> | undefined = setInterval(() => {
|
|
55
|
+
sweep().catch((error) => {
|
|
56
|
+
args.logger.warn(
|
|
57
|
+
`automation stalled sweeper failed: ${(error as Error).message}`,
|
|
58
|
+
);
|
|
59
|
+
});
|
|
60
|
+
}, intervalMs);
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
sweep,
|
|
64
|
+
stop: () => {
|
|
65
|
+
if (timer) {
|
|
66
|
+
clearInterval(timer);
|
|
67
|
+
timer = undefined;
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async function sweepStalledRuns(
|
|
74
|
+
args: StalledSweeperArgs,
|
|
75
|
+
staleMs: number,
|
|
76
|
+
): Promise<void> {
|
|
77
|
+
const threshold = new Date(Date.now() - staleMs);
|
|
78
|
+
const stalled = await args.deps.runStateStore.findStalledRunIds(threshold);
|
|
79
|
+
if (stalled.length === 0) return;
|
|
80
|
+
args.logger.debug(
|
|
81
|
+
`automation sweeper: ${stalled.length} stalled run(s) detected`,
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
for (const runId of stalled) {
|
|
85
|
+
const acquired = await args.deps.runStateStore.tryAdvisoryLock(runId);
|
|
86
|
+
if (!acquired) continue; // another instance already on it
|
|
87
|
+
try {
|
|
88
|
+
const run = await args.deps.runStore.loadRun(runId);
|
|
89
|
+
if (!run) continue;
|
|
90
|
+
const automation = await args.automationStore.getById(run.automationId);
|
|
91
|
+
if (!automation) {
|
|
92
|
+
await args.deps.runStore.updateRunStatus(
|
|
93
|
+
runId,
|
|
94
|
+
"failed",
|
|
95
|
+
"automation deleted while run was stalled",
|
|
96
|
+
);
|
|
97
|
+
await args.deps.runStateStore.clear(runId);
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
args.logger.info(`automation sweeper: recovering run ${runId}`);
|
|
101
|
+
await recoverStalledRun(args.deps, {
|
|
102
|
+
runId,
|
|
103
|
+
automation: {
|
|
104
|
+
id: automation.id,
|
|
105
|
+
name: automation.name,
|
|
106
|
+
status: automation.status,
|
|
107
|
+
definition: automation.definition,
|
|
108
|
+
},
|
|
109
|
+
});
|
|
110
|
+
} catch (error) {
|
|
111
|
+
args.logger.warn(
|
|
112
|
+
`automation sweeper failed to recover ${runId}: ${(error as Error).message}`,
|
|
113
|
+
);
|
|
114
|
+
} finally {
|
|
115
|
+
await args.deps.runStateStore.releaseAdvisoryLock(runId);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
async function sweepExpiredWaitLocks(
|
|
121
|
+
args: StalledSweeperArgs,
|
|
122
|
+
): Promise<void> {
|
|
123
|
+
const now = new Date();
|
|
124
|
+
const expired = await args.deps.runStore.sweepExpiredWaitLocks(now);
|
|
125
|
+
if (expired.length === 0) return;
|
|
126
|
+
|
|
127
|
+
for (const lock of expired) {
|
|
128
|
+
if (lock.kind === "delay") {
|
|
129
|
+
// The queue scheduler may have lost the job — wake the run
|
|
130
|
+
// ourselves. Idempotent: resumeRun takes the advisory lock and
|
|
131
|
+
// skips if someone else already resumed.
|
|
132
|
+
const run = await args.deps.runStore.loadRun(lock.runId);
|
|
133
|
+
if (!run) {
|
|
134
|
+
await args.deps.runStore.deleteWaitLock(lock.id);
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
const automation = await args.automationStore.getById(run.automationId);
|
|
138
|
+
if (!automation) {
|
|
139
|
+
await args.deps.runStore.deleteWaitLock(lock.id);
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
await args.deps.runStore.deleteWaitLock(lock.id);
|
|
143
|
+
await resumeRun(args.deps, {
|
|
144
|
+
runId: lock.runId,
|
|
145
|
+
automation: {
|
|
146
|
+
id: automation.id,
|
|
147
|
+
name: automation.name,
|
|
148
|
+
status: automation.status,
|
|
149
|
+
definition: automation.definition,
|
|
150
|
+
},
|
|
151
|
+
waitedAtPath: lock.actionPath,
|
|
152
|
+
});
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
// Trigger lock expired without firing — fail the run.
|
|
156
|
+
await args.deps.runStore.deleteWaitLock(lock.id);
|
|
157
|
+
await args.deps.runStore.updateRunStatus(
|
|
158
|
+
lock.runId,
|
|
159
|
+
"failed",
|
|
160
|
+
`wait_for_trigger timed out waiting for ${lock.eventId}`,
|
|
161
|
+
);
|
|
162
|
+
await args.deps.runStateStore.clear(lock.runId);
|
|
163
|
+
}
|
|
164
|
+
}
|