@checkstack/automation-backend 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +453 -0
  2. package/drizzle/0000_acoustic_diamondback.sql +80 -0
  3. package/drizzle/0001_mute_vindicator.sql +12 -0
  4. package/drizzle/0002_silky_omega_red.sql +12 -0
  5. package/drizzle/meta/0000_snapshot.json +688 -0
  6. package/drizzle/meta/0001_snapshot.json +785 -0
  7. package/drizzle/meta/0002_snapshot.json +861 -0
  8. package/drizzle/meta/_journal.json +27 -0
  9. package/drizzle.config.ts +12 -0
  10. package/package.json +41 -0
  11. package/src/action-registry.ts +83 -0
  12. package/src/action-types.ts +324 -0
  13. package/src/artifact-store.ts +140 -0
  14. package/src/artifact-type-registry.ts +64 -0
  15. package/src/automation-store.ts +227 -0
  16. package/src/builtin-actions.test.ts +185 -0
  17. package/src/builtin-actions.ts +132 -0
  18. package/src/builtin-triggers.test.ts +264 -0
  19. package/src/builtin-triggers.ts +365 -0
  20. package/src/dispatch/action-kind.ts +44 -0
  21. package/src/dispatch/condition.ts +61 -0
  22. package/src/dispatch/delay-queue.ts +91 -0
  23. package/src/dispatch/engine.test.ts +1198 -0
  24. package/src/dispatch/engine.ts +1672 -0
  25. package/src/dispatch/path-nav.ts +65 -0
  26. package/src/dispatch/render.test.ts +75 -0
  27. package/src/dispatch/render.ts +136 -0
  28. package/src/dispatch/run-state-store.ts +143 -0
  29. package/src/dispatch/run-state.ts +298 -0
  30. package/src/dispatch/scope.test.ts +40 -0
  31. package/src/dispatch/scope.ts +125 -0
  32. package/src/dispatch/stalled-sweeper.ts +164 -0
  33. package/src/dispatch/test-fixtures.ts +558 -0
  34. package/src/dispatch/trigger-subscriber.ts +397 -0
  35. package/src/dispatch/types.ts +259 -0
  36. package/src/extension-points.ts +88 -0
  37. package/src/index.ts +379 -0
  38. package/src/migration/from-webhook-subscriptions.test.ts +237 -0
  39. package/src/migration/from-webhook-subscriptions.ts +398 -0
  40. package/src/registries.test.ts +357 -0
  41. package/src/router.test.ts +724 -0
  42. package/src/router.ts +556 -0
  43. package/src/schema.ts +310 -0
  44. package/src/trigger-registry.ts +99 -0
  45. package/src/validate-definition.test.ts +306 -0
  46. package/src/validate-definition.ts +304 -0
  47. package/tsconfig.json +41 -0
@@ -0,0 +1,1672 @@
1
+ /**
2
+ * Automation dispatch engine.
3
+ *
4
+ * Walks an automation's action tree, executing each of the 10 primitive
5
+ * kinds (`action`, `choose`, `parallel`, `delay`, `repeat`, `variables`,
6
+ * `condition`, `stop`, `wait_for_trigger`, `sequence`). Persists run +
7
+ * step state via the `RunStore`, scope snapshots via the
8
+ * `RunStateStore`, and queue-backed suspensions via the `QueueManager`.
9
+ *
10
+ * Durability properties guaranteed by this engine:
11
+ *
12
+ * 1. **Restart safety.** After every successful step the engine
13
+ * writes a scope snapshot keyed on `runId`. If the host process
14
+ * dies, the stalled-run sweeper (`stalled-sweeper.ts`) picks up
15
+ * any run whose heartbeat is older than the threshold, acquires
16
+ * a Postgres advisory lock, and resumes from the snapshot.
17
+ *
18
+ * 2. **Horizontal scaling.** All trigger subscriptions use
19
+ * `mode: "work-queue"` so exactly one instance processes a given
20
+ * trigger event. Resume paths additionally take a Postgres
21
+ * session-level advisory lock per runId, so even two sweepers
22
+ * racing for the same stalled run can't both execute it.
23
+ *
24
+ * 3. **Suspensions anywhere.** `wait_for_trigger` and `delay` are
25
+ * supported at the top level, inside any depth of `choose`, inside
26
+ * `parallel` branches (each branch's state is tracked on the
27
+ * parallel step's `result_payload.branchOutcomes` so a resume
28
+ * doesn't re-execute siblings), and inside `repeat` iterations
29
+ * (the loop continues from the iteration after the resumed one;
30
+ * for_each lists are cached on the step so the iteration order is
31
+ * stable across the suspension). The `sequence` primitive wraps
32
+ * multi-action branches so a parallel branch can hold a full
33
+ * open/wait/close lifecycle.
34
+ *
35
+ * 4. **Queue-backed delay.** Every `delay` action persists a wait
36
+ * lock of `kind: "delay"` AND enqueues a scheduled job on the
37
+ * `automation-delay` queue. The queue is the wake-up trigger;
38
+ * the lock is the durable state. If the queue job is lost (Redis
39
+ * flush, etc.) the stalled sweeper still catches the expired
40
+ * lock at its `timeoutAt`.
41
+ *
42
+ * 5. **Stalled-recovery safety boundary.** The sweeper refuses to
43
+ * recover a run whose last-completed action is inside a `parallel`
44
+ * branch when no wait lock exists — branch concurrency state was
45
+ * lost, so neither re-running nor skip-and-continue is safe.
46
+ * Intentional waits inside parallel branches use the wait-lock
47
+ * resume path and are unaffected.
48
+ */
49
+ import type {
50
+ Action,
51
+ ChooseInput,
52
+ ConditionGuardInput,
53
+ DelayInput,
54
+ ParallelInput,
55
+ ProviderAction,
56
+ RepeatInput,
57
+ SequenceInput,
58
+ StopInput,
59
+ VariablesInput,
60
+ WaitForTriggerInput,
61
+ } from "@checkstack/automation-common";
62
+ import { SYSTEM_ACTOR, type Actor } from "@checkstack/common";
63
+ import type {
64
+ TemplateContext,
65
+ } from "@checkstack/template-engine";
66
+
67
+ import type { ActionRunScope } from "../action-types";
68
+ import { detectActionKind, type ActionKind } from "./action-kind";
69
+ import { evaluateCondition } from "./condition";
70
+ import { parseActionPath } from "./path-nav";
71
+ import {
72
+ renderConfig,
73
+ renderExpression,
74
+ renderString,
75
+ renderValue,
76
+ } from "./render";
77
+ import {
78
+ buildInitialScope,
79
+ extendVariables,
80
+ resolveConsumedArtifacts,
81
+ withRepeatContext,
82
+ } from "./scope";
83
+ import {
84
+ formatActionPath,
85
+ type ActionPath,
86
+ type DispatchContext,
87
+ type DispatchDeps,
88
+ type LoadedAutomation,
89
+ type SequenceOutcome,
90
+ type StepOutcome,
91
+ } from "./types";
92
+
93
+ /** Name of the durable queue we use for crash-safe delays. */
94
+ export const DELAY_QUEUE_NAME = "automation-delay";
95
+
96
+ /**
97
+ * Job payload for a delay-resume queue message. The queue's `startDelay`
98
+ * carries the timing; the payload tells the consumer which run to wake.
99
+ */
100
+ export interface DelayResumeJob {
101
+ runId: string;
102
+ waitLockId: string;
103
+ }
104
+
105
+ // ─── Public entry points ──────────────────────────────────────────────────
106
+
107
+ export interface DispatchTriggerArgs {
108
+ automation: LoadedAutomation;
109
+ triggerId: string;
110
+ triggerEventId: string;
111
+ payload: Record<string, unknown>;
112
+ /**
113
+ * Who/what caused the originating event. Persisted as part of the run's
114
+ * scope snapshot and exposed to the automation as `trigger.actor`. Defaults
115
+ * to the system actor when the caller has none.
116
+ */
117
+ actor?: Actor;
118
+ contextKey: string | null;
119
+ }
120
+
121
+ /**
122
+ * Dispatch a fresh trigger event for an automation.
123
+ *
124
+ * 1. Creates the `automation_runs` row
125
+ * 2. Walks `definition.actions` sequentially
126
+ * 3. Persists step state along the way
127
+ * 4. Updates the run status to terminal on completion
128
+ *
129
+ * Returns the assigned `runId` plus the terminal status.
130
+ */
131
+ export async function dispatchTrigger(
132
+ deps: DispatchDeps,
133
+ args: DispatchTriggerArgs,
134
+ ): Promise<{ runId: string; status: string }> {
135
+ const startedAt = new Date();
136
+ const runId = await deps.runStore.createRun({
137
+ automationId: args.automation.id,
138
+ triggerId: args.triggerId,
139
+ triggerEventId: args.triggerEventId,
140
+ triggerPayload: args.payload,
141
+ contextKey: args.contextKey,
142
+ });
143
+
144
+ const ctx: DispatchContext = {
145
+ deps,
146
+ run: {
147
+ runId,
148
+ automation: args.automation,
149
+ triggerId: args.triggerId,
150
+ triggerEventId: args.triggerEventId,
151
+ contextKey: args.contextKey,
152
+ startedAt,
153
+ },
154
+ payload: args.payload,
155
+ scope: buildInitialScope({
156
+ triggerId: args.triggerId,
157
+ triggerEventId: args.triggerEventId,
158
+ payload: args.payload,
159
+ actor: args.actor,
160
+ startedAt,
161
+ }),
162
+ resuming: false,
163
+ };
164
+
165
+ // Initial scope snapshot — gives the stalled sweeper something to
166
+ // work with even if we crash before the first step finishes.
167
+ await deps.runStateStore.upsert({
168
+ runId,
169
+ scopeSnapshot: ctx.scope,
170
+ lastActionPath: null,
171
+ });
172
+
173
+ const definition = args.automation.definition;
174
+ const outcome = await walkSequence(
175
+ definition.actions,
176
+ ["actions"],
177
+ ctx,
178
+ );
179
+
180
+ return await finaliseRun(ctx, outcome);
181
+ }
182
+
183
+ export interface ResumeRunArgs {
184
+ runId: string;
185
+ automation: LoadedAutomation;
186
+ /**
187
+ * The suspended action's path (the wait_for_trigger or delay action).
188
+ * Walking continues from the next sibling after this position,
189
+ * unwinding nested containers as needed.
190
+ */
191
+ waitedAtPath: string;
192
+ /**
193
+ * Optional payload of the event that satisfied the wait. Exposed to
194
+ * downstream actions as `resume.payload`.
195
+ */
196
+ payload?: Record<string, unknown>;
197
+ }
198
+
199
+ /**
200
+ * Resume a suspended run after a wait_for_trigger or delay satisfies.
201
+ *
202
+ * Loads the persisted scope snapshot, rebuilds the dispatch context,
203
+ * and walks the action tree with a `resumeRemainder` pointing at the
204
+ * suspended action — the walker skips ahead, treats the suspended
205
+ * action as already complete, and continues with its successor.
206
+ *
207
+ * Supports arbitrary nesting through `choose` branches. `parallel` /
208
+ * `repeat` suspensions are rejected (and never persisted in the first
209
+ * place).
210
+ */
211
+ export async function resumeRun(
212
+ deps: DispatchDeps,
213
+ args: ResumeRunArgs,
214
+ ): Promise<{ status: string }> {
215
+ const run = await deps.runStore.loadRun(args.runId);
216
+ if (!run) throw new Error(`Cannot resume — run ${args.runId} not found`);
217
+
218
+ const waitedAt = parseActionPath(args.waitedAtPath);
219
+
220
+ // Try to acquire the advisory lock so two resumers don't race.
221
+ const acquired = await deps.runStateStore.tryAdvisoryLock(args.runId);
222
+ if (!acquired) {
223
+ deps.logger.debug(
224
+ `resumeRun: another instance already holds the lock for run ${args.runId}; skipping`,
225
+ );
226
+ return { status: run.status };
227
+ }
228
+
229
+ try {
230
+ const persisted = await deps.runStateStore.load(args.runId);
231
+ const scope = persisted?.scopeSnapshot
232
+ ? { ...persisted.scopeSnapshot }
233
+ : buildInitialScope({
234
+ triggerId: run.triggerId,
235
+ triggerEventId: run.triggerEventId,
236
+ payload: run.triggerPayload,
237
+ startedAt: run.startedAt,
238
+ });
239
+ if (args.payload !== undefined) {
240
+ scope.resume = { payload: args.payload };
241
+ }
242
+
243
+ await deps.runStore.updateRunStatus(args.runId, "running");
244
+ await deps.runStateStore.heartbeat(args.runId);
245
+
246
+ const ctx: DispatchContext = {
247
+ deps,
248
+ run: {
249
+ runId: args.runId,
250
+ automation: args.automation,
251
+ triggerId: run.triggerId,
252
+ triggerEventId: run.triggerEventId,
253
+ contextKey: run.contextKey,
254
+ startedAt: run.startedAt,
255
+ },
256
+ payload: run.triggerPayload,
257
+ scope,
258
+ resuming: true,
259
+ };
260
+
261
+ // Drop the "actions" anchor before forwarding into the walker —
262
+ // we always start a resume at the top-level actions list, so the
263
+ // remainder is the rest of the suspended action's path.
264
+ const remainder = waitedAt.slice(1);
265
+ const outcome = await walkSequence(
266
+ args.automation.definition.actions,
267
+ ["actions"],
268
+ ctx,
269
+ { resumeRemainder: remainder },
270
+ );
271
+
272
+ return await finaliseRun(ctx, outcome);
273
+ } finally {
274
+ await deps.runStateStore.releaseAdvisoryLock(args.runId);
275
+ }
276
+ }
277
+
278
+ /**
279
+ * Recover a stalled run. Loads from the durable snapshot, computes the
280
+ * resume target as the next action after the last completed one, and
281
+ * walks. The caller (`stalled-sweeper.ts`) holds the advisory lock for
282
+ * us so a `resumeRun` racing against this one will skip cleanly.
283
+ */
284
+ export async function recoverStalledRun(
285
+ deps: DispatchDeps,
286
+ args: { runId: string; automation: LoadedAutomation },
287
+ ): Promise<{ status: string }> {
288
+ const run = await deps.runStore.loadRun(args.runId);
289
+ if (!run) throw new Error(`recoverStalledRun: run ${args.runId} not found`);
290
+ if (run.status !== "running" && run.status !== "waiting") {
291
+ return { status: run.status };
292
+ }
293
+
294
+ const persisted = await deps.runStateStore.load(args.runId);
295
+ if (!persisted) {
296
+ // No snapshot — give up on this run rather than re-running from
297
+ // scratch (it may have already had observable side effects).
298
+ await deps.runStore.updateRunStatus(
299
+ args.runId,
300
+ "failed",
301
+ "Stalled run had no persisted state; cannot safely recover",
302
+ );
303
+ await deps.runStateStore.clear(args.runId);
304
+ return { status: "failed" };
305
+ }
306
+
307
+ await deps.runStore.updateRunStatus(args.runId, "running");
308
+ await deps.runStateStore.heartbeat(args.runId);
309
+
310
+ const ctx: DispatchContext = {
311
+ deps,
312
+ run: {
313
+ runId: args.runId,
314
+ automation: args.automation,
315
+ triggerId: run.triggerId,
316
+ triggerEventId: run.triggerEventId,
317
+ contextKey: run.contextKey,
318
+ startedAt: run.startedAt,
319
+ },
320
+ payload: run.triggerPayload,
321
+ scope: { ...persisted.scopeSnapshot },
322
+ resuming: true,
323
+ };
324
+
325
+ if (persisted.lastActionPath === null) {
326
+ // Crashed before the first step finished — start from the top.
327
+ const outcome = await walkSequence(
328
+ args.automation.definition.actions,
329
+ ["actions"],
330
+ ctx,
331
+ );
332
+ return await finaliseRun(ctx, outcome);
333
+ }
334
+
335
+ const lastDone = parseActionPath(persisted.lastActionPath);
336
+ if (lastDone.includes("parallel")) {
337
+ // Stalled inside a parallel branch without a wait lock means we
338
+ // lost branch concurrency state — we don't know which sibling
339
+ // branches completed (side effects done) versus were still
340
+ // running on the dead host. Rerunning the whole parallel would
341
+ // double-fire completed branches; recovering one branch in
342
+ // isolation would skip incomplete ones. Fail loudly so an
343
+ // operator notices. (Intentional waits inside parallel branches
344
+ // ride the wait-lock resume path, not this one — see
345
+ // `sweepExpiredWaitLocks` and `wakeWaitingRuns`.)
346
+ await deps.runStore.updateRunStatus(
347
+ args.runId,
348
+ "failed",
349
+ `Stalled inside parallel branch at ${persisted.lastActionPath} — branch concurrency state lost; manual recovery required`,
350
+ );
351
+ await deps.runStateStore.clear(args.runId);
352
+ return { status: "failed" };
353
+ }
354
+ // Mid-repeat-iteration stalls are safe: iterations are sequential
355
+ // and the path tells us exactly which iteration's body to resume.
356
+
357
+ const remainder = lastDone.slice(1);
358
+ const outcome = await walkSequence(
359
+ args.automation.definition.actions,
360
+ ["actions"],
361
+ ctx,
362
+ { resumeRemainder: remainder },
363
+ );
364
+ return await finaliseRun(ctx, outcome);
365
+ }
366
+
367
+ // ─── Run finalisation ─────────────────────────────────────────────────────
368
+
369
+ async function finaliseRun(
370
+ ctx: DispatchContext,
371
+ outcome: SequenceOutcome,
372
+ ): Promise<{ runId: string; status: string }> {
373
+ let status: "success" | "failed" | "waiting";
374
+ let errorMessage: string | undefined;
375
+ switch (outcome.kind) {
376
+ case "completed": {
377
+ status = "success";
378
+ break;
379
+ }
380
+ case "stopped": {
381
+ status = outcome.error ? "failed" : "success";
382
+ errorMessage = outcome.reason;
383
+ break;
384
+ }
385
+ case "suspended": {
386
+ status = "waiting";
387
+ break;
388
+ }
389
+ }
390
+ await ctx.deps.runStore.updateRunStatus(
391
+ ctx.run.runId,
392
+ status,
393
+ errorMessage,
394
+ );
395
+ // Terminal runs drop their durable state. Suspended runs keep it so
396
+ // resumption has the scope to work with.
397
+ await (status === "waiting" ? ctx.deps.runStateStore.upsert({
398
+ runId: ctx.run.runId,
399
+ scopeSnapshot: ctx.scope,
400
+ lastActionPath: null,
401
+ }) : ctx.deps.runStateStore.clear(ctx.run.runId));
402
+ return { runId: ctx.run.runId, status };
403
+ }
404
+
405
+ // ─── Sequence walker ──────────────────────────────────────────────────────
406
+
407
+ interface WalkOptions {
408
+ /**
409
+ * When set, walking starts in resume mode. The first segment of
410
+ * `resumeRemainder` is an index into the sequence we should skip
411
+ * ahead to. After processing the resume target, walking continues
412
+ * normally.
413
+ */
414
+ resumeRemainder?: ActionPath;
415
+ }
416
+
417
+ async function walkSequence(
418
+ actions: ReadonlyArray<Action>,
419
+ basePath: ActionPath,
420
+ ctx: DispatchContext,
421
+ options: WalkOptions = {},
422
+ ): Promise<SequenceOutcome> {
423
+ const remainder = options.resumeRemainder;
424
+
425
+ if (remainder !== undefined) {
426
+ if (remainder.length === 0) {
427
+ // Caller targeted this exact sequence's slot. Nothing more to
428
+ // skip; walk everything normally.
429
+ return await walkSequence(actions, basePath, ctx);
430
+ }
431
+ const targetIndex = remainder[0];
432
+ if (typeof targetIndex !== "number") {
433
+ throw new TypeError(
434
+ `Resume path corrupt at ${basePath.join(".")}: expected numeric index, got ${String(
435
+ targetIndex,
436
+ )}`,
437
+ );
438
+ }
439
+
440
+ for (const [i, action] of actions.entries()) {
441
+ if (i < targetIndex) continue;
442
+ const path: ActionPath = [...basePath, i];
443
+
444
+ if (i === targetIndex) {
445
+ if (remainder.length === 1) {
446
+ // The action at this index is the suspended one. It already
447
+ // ran; mark a synthetic step note (operators see it as
448
+ // "resumed") and move past.
449
+ ctx.deps.logger.debug(
450
+ `Resume target reached at ${formatActionPath(path)} — continuing past`,
451
+ );
452
+ continue;
453
+ }
454
+ // Descend into the container at this index with the deeper
455
+ // remainder so the choose handler (etc.) can route correctly.
456
+ const outcome = await executeAction(action, path, ctx, {
457
+ resumeRemainder: remainder.slice(1),
458
+ });
459
+ const propagated = propagate(outcome, action.continue_on_error, ctx, path);
460
+ if (propagated.terminal) return propagated.terminal;
461
+ continue;
462
+ }
463
+
464
+ // After the resume target we walk normally — no more remainder.
465
+ const outcome = await executeAction(action, path, ctx);
466
+ const propagated = propagate(outcome, action.continue_on_error, ctx, path);
467
+ if (propagated.terminal) return propagated.terminal;
468
+ }
469
+ return { kind: "completed" };
470
+ }
471
+
472
+ for (const [i, action] of actions.entries()) {
473
+ const path: ActionPath = [...basePath, i];
474
+ const outcome = await executeAction(action, path, ctx);
475
+ const propagated = propagate(outcome, action.continue_on_error, ctx, path);
476
+ if (propagated.terminal) return propagated.terminal;
477
+ }
478
+ return { kind: "completed" };
479
+ }
480
+
481
+ /**
482
+ * Project a single step's outcome onto the enclosing sequence's
483
+ * outcome. Returns `{ terminal: undefined }` to mean "keep walking".
484
+ */
485
+ function propagate(
486
+ outcome: StepOutcome,
487
+ continueOnError: boolean | undefined,
488
+ ctx: DispatchContext,
489
+ path: ActionPath,
490
+ ): { terminal?: SequenceOutcome } {
491
+ if (outcome.kind === "stopped") {
492
+ return { terminal: outcome };
493
+ }
494
+ if (outcome.kind === "failed") {
495
+ if (continueOnError) {
496
+ ctx.deps.logger.warn(
497
+ `Action ${formatActionPath(path)} failed but continue_on_error=true: ${outcome.error}`,
498
+ );
499
+ return {};
500
+ }
501
+ return {
502
+ terminal: { kind: "stopped", reason: outcome.error, error: true },
503
+ };
504
+ }
505
+ if (outcome.kind === "suspended") {
506
+ return {
507
+ terminal: { kind: "suspended", suspendingStepId: outcome.stepId },
508
+ };
509
+ }
510
+ return {};
511
+ }
512
+
513
+ // ─── Action dispatch (per kind) ───────────────────────────────────────────
514
+
515
+ interface ExecuteOptions {
516
+ /** Forwarded into container actions during resume. */
517
+ resumeRemainder?: ActionPath;
518
+ }
519
+
520
+ async function executeAction(
521
+ action: Action,
522
+ path: ActionPath,
523
+ ctx: DispatchContext,
524
+ options: ExecuteOptions = {},
525
+ ): Promise<StepOutcome> {
526
+ const kind = detectActionKind(action);
527
+
528
+ // `enabled: false` → record a skip, no work.
529
+ if (action.enabled === false) {
530
+ await recordSkipStep(ctx, path, action, kind, "disabled");
531
+ return { kind: "skipped", reason: "disabled" };
532
+ }
533
+
534
+ switch (kind) {
535
+ case "action": {
536
+ return await executeProviderAction(action as ProviderAction, path, ctx);
537
+ }
538
+ case "choose": {
539
+ return await executeChoose(
540
+ action as ChooseInput,
541
+ path,
542
+ ctx,
543
+ options.resumeRemainder,
544
+ );
545
+ }
546
+ case "parallel": {
547
+ return await executeParallel(
548
+ action as ParallelInput,
549
+ path,
550
+ ctx,
551
+ options.resumeRemainder,
552
+ );
553
+ }
554
+ case "delay": {
555
+ return await executeDelay(action as DelayInput, path, ctx);
556
+ }
557
+ case "repeat": {
558
+ return await executeRepeat(
559
+ action as RepeatInput,
560
+ path,
561
+ ctx,
562
+ options.resumeRemainder,
563
+ );
564
+ }
565
+ case "variables": {
566
+ return await executeVariables(action as VariablesInput, path, ctx);
567
+ }
568
+ case "condition": {
569
+ return await executeConditionGuard(action as ConditionGuardInput, path, ctx);
570
+ }
571
+ case "stop": {
572
+ return await executeStop(action as StopInput, path, ctx);
573
+ }
574
+ case "wait_for_trigger": {
575
+ return await executeWaitForTrigger(
576
+ action as WaitForTriggerInput,
577
+ path,
578
+ ctx,
579
+ );
580
+ }
581
+ case "sequence": {
582
+ return await executeSequence(
583
+ action as SequenceInput,
584
+ path,
585
+ ctx,
586
+ options.resumeRemainder,
587
+ );
588
+ }
589
+ }
590
+ }
591
+
592
+ // ─── Helpers ─────────────────────────────────────────────────────────────
593
+
594
+ function templateContext(ctx: DispatchContext): TemplateContext {
595
+ return ctx.scope as TemplateContext;
596
+ }
597
+
598
+ /**
599
+ * Project the dispatch scope into the {@link ActionRunScope} handed to an
600
+ * action's `execute`. The internal `variables` key is normalised to the
601
+ * public contract name `vars`, and the trigger identity (`id`, `event`,
602
+ * `actor`) is projected so scripts can branch on which trigger fired and who
603
+ * caused it.
604
+ */
605
+ function actionRunScope(ctx: DispatchContext): ActionRunScope {
606
+ const scope = ctx.scope as {
607
+ trigger?: {
608
+ id?: unknown;
609
+ event?: unknown;
610
+ eventId?: unknown;
611
+ actor?: unknown;
612
+ payload?: unknown;
613
+ };
614
+ artifacts?: unknown;
615
+ variables?: unknown;
616
+ repeat?: { index?: unknown; item?: unknown };
617
+ };
618
+ const trigger = scope.trigger;
619
+ // `event` is canonical; fall back to the legacy `eventId` alias for older
620
+ // scope snapshots persisted before the rename.
621
+ const event =
622
+ typeof trigger?.event === "string"
623
+ ? trigger.event
624
+ : typeof trigger?.eventId === "string"
625
+ ? trigger.eventId
626
+ : "";
627
+ const result: ActionRunScope = {
628
+ trigger: {
629
+ id: typeof trigger?.id === "string" ? trigger.id : "",
630
+ event,
631
+ actor: isActor(trigger?.actor) ? trigger.actor : SYSTEM_ACTOR,
632
+ payload: coerceRecord(trigger?.payload),
633
+ },
634
+ artifacts: coerceRecord(scope.artifacts),
635
+ vars: coerceRecord(scope.variables),
636
+ };
637
+ if (scope.repeat && typeof scope.repeat.index === "number") {
638
+ result.repeat = { index: scope.repeat.index, item: scope.repeat.item };
639
+ }
640
+ return result;
641
+ }
642
+
643
+ /** Narrow an unknown scope value to a string-keyed record (else `{}`). */
644
+ function coerceRecord(value: unknown): Record<string, unknown> {
645
+ return value && typeof value === "object" && !Array.isArray(value)
646
+ ? (value as Record<string, unknown>)
647
+ : {};
648
+ }
649
+
650
+ /** Structural check that a scope value is a usable {@link Actor}. */
651
+ function isActor(value: unknown): value is Actor {
652
+ if (!value || typeof value !== "object") return false;
653
+ const candidate = value as Record<string, unknown>;
654
+ return (
655
+ typeof candidate.type === "string" && typeof candidate.id === "string"
656
+ );
657
+ }
658
+
659
+ async function recordSkipStep(
660
+ ctx: DispatchContext,
661
+ path: ActionPath,
662
+ action: Action,
663
+ kind: ActionKind,
664
+ reason: string,
665
+ ): Promise<void> {
666
+ const stepId = await ctx.deps.runStore.createStep({
667
+ runId: ctx.run.runId,
668
+ actionPath: formatActionPath(path),
669
+ actionId: action.id ?? null,
670
+ actionKind: kind,
671
+ providerActionId:
672
+ kind === "action" ? (action as ProviderAction).action : null,
673
+ });
674
+ await ctx.deps.runStore.updateStep(stepId, {
675
+ status: "skipped",
676
+ errorMessage: reason,
677
+ });
678
+ }
679
+
680
+ /**
681
+ * Persist a scope snapshot + heartbeat after a step completes. Called
682
+ * after every successful or terminal-non-failure outcome so the stalled
683
+ * sweeper can resume cleanly.
684
+ */
685
+ async function checkpoint(
686
+ ctx: DispatchContext,
687
+ lastDonePath: ActionPath,
688
+ ): Promise<void> {
689
+ await ctx.deps.runStateStore.upsert({
690
+ runId: ctx.run.runId,
691
+ scopeSnapshot: ctx.scope,
692
+ lastActionPath: formatActionPath(lastDonePath),
693
+ });
694
+ }
695
+
696
+ // ─── Primitive: `action` (provider call) ─────────────────────────────────
697
+
698
+ async function executeProviderAction(
699
+ action: ProviderAction,
700
+ path: ActionPath,
701
+ ctx: DispatchContext,
702
+ ): Promise<StepOutcome> {
703
+ const registry = ctx.deps.registries.actions;
704
+ const registered = registry.getAction(action.action);
705
+
706
+ const stepId = await ctx.deps.runStore.createStep({
707
+ runId: ctx.run.runId,
708
+ actionPath: formatActionPath(path),
709
+ actionId: action.id ?? null,
710
+ actionKind: "action",
711
+ providerActionId: action.action,
712
+ });
713
+
714
+ if (!registered) {
715
+ const error = `Unknown action "${action.action}" — not registered by any plugin.`;
716
+ await ctx.deps.runStore.updateStep(stepId, {
717
+ status: "failed",
718
+ errorMessage: error,
719
+ });
720
+ return { kind: "failed", error };
721
+ }
722
+
723
+ let renderedConfig: unknown;
724
+ try {
725
+ renderedConfig = renderConfig({
726
+ config: action.config,
727
+ jsonSchema: registered.configJsonSchema,
728
+ context: templateContext(ctx),
729
+ filters: ctx.deps.filters,
730
+ });
731
+ } catch (error) {
732
+ const message = `Failed to render config: ${(error as Error).message}`;
733
+ await ctx.deps.runStore.updateStep(stepId, {
734
+ status: "failed",
735
+ errorMessage: message,
736
+ });
737
+ return { kind: "failed", error: message };
738
+ }
739
+
740
+ const parsed = registered.config.schema.safeParse(renderedConfig);
741
+ if (!parsed.success) {
742
+ const message = `Config validation failed: ${parsed.error.message}`;
743
+ await ctx.deps.runStore.updateStep(stepId, {
744
+ status: "failed",
745
+ errorMessage: message,
746
+ });
747
+ return { kind: "failed", error: message };
748
+ }
749
+
750
+ const consumed = await resolveConsumedArtifacts(
751
+ ctx,
752
+ registered.consumes ?? [],
753
+ registered.ownerPluginId,
754
+ );
755
+
756
+ let result: Awaited<ReturnType<typeof registered.execute>>;
757
+ try {
758
+ result = await registered.execute({
759
+ config: parsed.data,
760
+ consumedArtifacts: consumed,
761
+ scope: actionRunScope(ctx),
762
+ runId: ctx.run.runId,
763
+ automationId: ctx.run.automation.id,
764
+ contextKey: ctx.run.contextKey,
765
+ logger: ctx.deps.logger,
766
+ getService: ctx.deps.getService,
767
+ });
768
+ } catch (error) {
769
+ const message = (error as Error).message;
770
+ await ctx.deps.runStore.updateStep(stepId, {
771
+ status: "failed",
772
+ errorMessage: message,
773
+ });
774
+ return { kind: "failed", error: message };
775
+ }
776
+
777
+ if (!result.success) {
778
+ const message = result.error ?? "action returned success=false";
779
+ await ctx.deps.runStore.updateStep(stepId, {
780
+ status: "failed",
781
+ errorMessage: message,
782
+ });
783
+ return { kind: "failed", error: message };
784
+ }
785
+
786
+ if (registered.produces && result.artifact !== undefined) {
787
+ // Producers MUST have an id (enforced by validate-definition). Guard
788
+ // defensively so a malformed definition fails loud rather than via a
789
+ // non-null assertion.
790
+ if (!action.id) {
791
+ const message = `Action "${action.action}" produces an artifact but has no id; it cannot be referenced as artifacts.<id>.<name>`;
792
+ await ctx.deps.runStore.updateStep(stepId, {
793
+ status: "failed",
794
+ errorMessage: message,
795
+ });
796
+ return { kind: "failed", error: message };
797
+ }
798
+ await ctx.deps.artifactStore.record({
799
+ automationId: ctx.run.automation.id,
800
+ runId: ctx.run.runId,
801
+ stepId,
802
+ actionId: action.id,
803
+ artifactType: registered.produces,
804
+ data: result.artifact as Record<string, unknown>,
805
+ contextKey: ctx.run.contextKey,
806
+ });
807
+ // The local artifact name is `produces` with the owning plugin prefix
808
+ // stripped (e.g. `integration-jira.issue` → `issue`). Falls back to the
809
+ // full `produces` if it somehow lacks the expected prefix.
810
+ const prefix = `${registered.ownerPluginId}.`;
811
+ const localName = registered.produces.startsWith(prefix)
812
+ ? registered.produces.slice(prefix.length)
813
+ : registered.produces;
814
+ const existingArtifacts =
815
+ (ctx.scope.artifacts as Record<string, unknown>) ?? {};
816
+ const existingForAction = existingArtifacts[action.id];
817
+ const nestedForAction =
818
+ existingForAction !== null &&
819
+ typeof existingForAction === "object" &&
820
+ !Array.isArray(existingForAction)
821
+ ? (existingForAction as Record<string, unknown>)
822
+ : {};
823
+ ctx.scope.artifacts = {
824
+ ...existingArtifacts,
825
+ [action.id]: { ...nestedForAction, [localName]: result.artifact },
826
+ };
827
+ }
828
+
829
+ await ctx.deps.runStore.updateStep(stepId, {
830
+ status: "success",
831
+ resultPayload: { externalId: result.externalId },
832
+ });
833
+ await checkpoint(ctx, path);
834
+ return { kind: "ok" };
835
+ }
836
+
837
+ // ─── Primitive: `choose` ─────────────────────────────────────────────────
838
+
839
+ async function executeChoose(
840
+ action: ChooseInput,
841
+ path: ActionPath,
842
+ ctx: DispatchContext,
843
+ resumeRemainder?: ActionPath,
844
+ ): Promise<StepOutcome> {
845
+ const stepId = await ctx.deps.runStore.createStep({
846
+ runId: ctx.run.runId,
847
+ actionPath: formatActionPath(path),
848
+ actionId: action.id ?? null,
849
+ actionKind: "choose",
850
+ providerActionId: null,
851
+ });
852
+
853
+ // Resume path inside a choose looks like ["choose", branchIdx, "sequence", ...].
854
+ if (resumeRemainder !== undefined && resumeRemainder.length > 0) {
855
+ if (resumeRemainder[0] !== "choose") {
856
+ throw new Error(
857
+ `Resume path corrupt at ${formatActionPath(path)}: expected "choose", got ${String(
858
+ resumeRemainder[0],
859
+ )}`,
860
+ );
861
+ }
862
+ const branchIdx = resumeRemainder[1];
863
+ if (typeof branchIdx !== "number") {
864
+ throw new TypeError(
865
+ `Resume path corrupt at ${formatActionPath(path)}: expected branch index`,
866
+ );
867
+ }
868
+ if (resumeRemainder[2] !== "sequence") {
869
+ throw new Error(
870
+ `Resume path corrupt at ${formatActionPath(path)}: expected "sequence", got ${String(
871
+ resumeRemainder[2],
872
+ )}`,
873
+ );
874
+ }
875
+ const inner = resumeRemainder.slice(3);
876
+ const branch = action.choose[branchIdx];
877
+ if (!branch) {
878
+ const message = `Resume target choose[${branchIdx}] no longer exists in this automation definition`;
879
+ await ctx.deps.runStore.updateStep(stepId, {
880
+ status: "failed",
881
+ errorMessage: message,
882
+ });
883
+ return { kind: "failed", error: message };
884
+ }
885
+ const outcome = await walkSequence(
886
+ branch.sequence,
887
+ [...path, "choose", branchIdx, "sequence"],
888
+ ctx,
889
+ { resumeRemainder: inner },
890
+ );
891
+ await ctx.deps.runStore.updateStep(stepId, {
892
+ status: outcome.kind === "completed" ? "success" : "failed",
893
+ resultPayload: { resumedBranch: branchIdx },
894
+ errorMessage: outcome.kind === "stopped" ? outcome.reason : undefined,
895
+ });
896
+ if (outcome.kind === "completed") await checkpoint(ctx, path);
897
+ return sequenceToStep(outcome);
898
+ }
899
+
900
+ // Normal first-time execution: evaluate `when`s in order.
901
+ for (const [i, branch] of action.choose.entries()) {
902
+ let take: boolean;
903
+ try {
904
+ take = evaluateCondition(
905
+ branch.when,
906
+ templateContext(ctx),
907
+ ctx.deps.filters,
908
+ );
909
+ } catch (error) {
910
+ const message = `Failed to evaluate choose[${i}].when: ${(error as Error).message}`;
911
+ await ctx.deps.runStore.updateStep(stepId, {
912
+ status: "failed",
913
+ errorMessage: message,
914
+ });
915
+ return { kind: "failed", error: message };
916
+ }
917
+ if (take) {
918
+ const outcome = await walkSequence(
919
+ branch.sequence,
920
+ [...path, "choose", i, "sequence"],
921
+ ctx,
922
+ );
923
+ await ctx.deps.runStore.updateStep(stepId, {
924
+ status: outcome.kind === "completed" ? "success" : "failed",
925
+ resultPayload: { matchedBranch: i },
926
+ errorMessage:
927
+ outcome.kind === "stopped" ? outcome.reason : undefined,
928
+ });
929
+ if (outcome.kind === "completed") await checkpoint(ctx, path);
930
+ return sequenceToStep(outcome);
931
+ }
932
+ }
933
+
934
+ if (action.else && action.else.length > 0) {
935
+ const outcome = await walkSequence(
936
+ action.else,
937
+ [...path, "else"],
938
+ ctx,
939
+ );
940
+ await ctx.deps.runStore.updateStep(stepId, {
941
+ status: outcome.kind === "completed" ? "success" : "failed",
942
+ resultPayload: { matchedBranch: "else" },
943
+ errorMessage: outcome.kind === "stopped" ? outcome.reason : undefined,
944
+ });
945
+ if (outcome.kind === "completed") await checkpoint(ctx, path);
946
+ return sequenceToStep(outcome);
947
+ }
948
+
949
+ await ctx.deps.runStore.updateStep(stepId, {
950
+ status: "success",
951
+ resultPayload: { matchedBranch: null },
952
+ });
953
+ await checkpoint(ctx, path);
954
+ return { kind: "ok" };
955
+ }
956
+
957
+ // ─── Primitive: `parallel` ───────────────────────────────────────────────
958
+
959
+ /**
960
+ * Per-branch terminal outcome shape persisted in the parallel step's
961
+ * `result_payload.branchOutcomes`. Resumption reads this to know which
962
+ * branches still need work and which have already completed (or
963
+ * failed) — so resuming one branch doesn't re-execute the others.
964
+ */
965
+ interface StoredBranchOutcome {
966
+ status: "completed" | "stopped" | "failed" | "suspended";
967
+ reason?: string;
968
+ error?: boolean;
969
+ }
970
+
971
+ function outcomeToStored(outcome: SequenceOutcome): StoredBranchOutcome {
972
+ if (outcome.kind === "completed") return { status: "completed" };
973
+ if (outcome.kind === "suspended") return { status: "suspended" };
974
+ return {
975
+ status: outcome.error ? "failed" : "stopped",
976
+ reason: outcome.reason,
977
+ error: outcome.error,
978
+ };
979
+ }
980
+
981
+ async function executeParallel(
982
+ action: ParallelInput,
983
+ path: ActionPath,
984
+ ctx: DispatchContext,
985
+ resumeRemainder?: ActionPath,
986
+ ): Promise<StepOutcome> {
987
+ if (resumeRemainder !== undefined && resumeRemainder.length > 0) {
988
+ return await resumeParallel(action, path, ctx, resumeRemainder);
989
+ }
990
+
991
+ const stepId = await ctx.deps.runStore.createStep({
992
+ runId: ctx.run.runId,
993
+ actionPath: formatActionPath(path),
994
+ actionId: action.id ?? null,
995
+ actionKind: "parallel",
996
+ providerActionId: null,
997
+ });
998
+
999
+ // Walk every branch concurrently. Promise.all resolves when each
1000
+ // branch has reached a terminal outcome (completed / failed / stopped
1001
+ // / suspended). A branch reaching "suspended" means its inner
1002
+ // wait_for_trigger or delay wrote a wait-lock — the parallel keeps
1003
+ // the rest of the branch outcomes around and itself suspends.
1004
+ const promises = action.parallel.map(async (a, i) =>
1005
+ walkSequence([a], [...path, "parallel", i], ctx),
1006
+ );
1007
+ const outcomes = await Promise.all(promises);
1008
+
1009
+ const branchOutcomes: Record<string, StoredBranchOutcome> = {};
1010
+ for (const [i, o] of outcomes.entries()) {
1011
+ branchOutcomes[String(i)] = outcomeToStored(o);
1012
+ }
1013
+ await ctx.deps.runStore.updateStep(stepId, {
1014
+ status: outcomes.some((o) => o.kind === "suspended") ? "waiting" : "running",
1015
+ resultPayload: { branchOutcomes },
1016
+ });
1017
+ return await finaliseParallel(action, ctx, stepId, branchOutcomes, path);
1018
+ }
1019
+
1020
+ async function resumeParallel(
1021
+ action: ParallelInput,
1022
+ path: ActionPath,
1023
+ ctx: DispatchContext,
1024
+ resumeRemainder: ActionPath,
1025
+ ): Promise<StepOutcome> {
1026
+ if (resumeRemainder[0] !== "parallel") {
1027
+ throw new Error(
1028
+ `Resume path corrupt at ${formatActionPath(path)}: expected "parallel", got ${String(
1029
+ resumeRemainder[0],
1030
+ )}`,
1031
+ );
1032
+ }
1033
+ const branchIdx = resumeRemainder[1];
1034
+ if (typeof branchIdx !== "number") {
1035
+ throw new TypeError(
1036
+ `Resume path corrupt at ${formatActionPath(path)}: expected numeric branch index`,
1037
+ );
1038
+ }
1039
+ const inner = resumeRemainder.slice(2);
1040
+
1041
+ const branch = action.parallel[branchIdx];
1042
+ if (!branch) {
1043
+ const message = `Resume target parallel[${branchIdx}] no longer exists in this automation definition`;
1044
+ const stepId = await ctx.deps.runStore.createStep({
1045
+ runId: ctx.run.runId,
1046
+ actionPath: formatActionPath(path),
1047
+ actionId: action.id ?? null,
1048
+ actionKind: "parallel",
1049
+ providerActionId: null,
1050
+ });
1051
+ await ctx.deps.runStore.updateStep(stepId, {
1052
+ status: "failed",
1053
+ errorMessage: message,
1054
+ });
1055
+ return { kind: "failed", error: message };
1056
+ }
1057
+
1058
+ const existing = await ctx.deps.runStore.findStepByPath(
1059
+ ctx.run.runId,
1060
+ formatActionPath(path),
1061
+ );
1062
+ if (!existing) {
1063
+ const message = `Cannot resume parallel at ${formatActionPath(path)} — original step record missing`;
1064
+ return { kind: "failed", error: message };
1065
+ }
1066
+ const stepId = existing.id;
1067
+ const branchOutcomes: Record<string, StoredBranchOutcome> = {
1068
+ ...(existing.resultPayload?.branchOutcomes as
1069
+ | Record<string, StoredBranchOutcome>
1070
+ | undefined),
1071
+ };
1072
+
1073
+ // Walk only the suspended branch. The walker enters resume mode for
1074
+ // exactly this branch; other branches' side effects already happened
1075
+ // before the original suspension and must not re-execute.
1076
+ const outcome = await walkSequence(
1077
+ [branch],
1078
+ [...path, "parallel", branchIdx],
1079
+ ctx,
1080
+ { resumeRemainder: inner },
1081
+ );
1082
+ branchOutcomes[String(branchIdx)] = outcomeToStored(outcome);
1083
+ await ctx.deps.runStore.updateStep(stepId, {
1084
+ status: Object.values(branchOutcomes).some(
1085
+ (o) => o.status === "suspended",
1086
+ )
1087
+ ? "waiting"
1088
+ : "running",
1089
+ resultPayload: { branchOutcomes },
1090
+ });
1091
+
1092
+ return await finaliseParallel(action, ctx, stepId, branchOutcomes, path);
1093
+ }
1094
+
1095
+ /**
1096
+ * Aggregate the per-branch outcome map into a single step outcome.
1097
+ * Any still-suspended branch means the parallel itself stays
1098
+ * suspended; any failure (without `continue_on_error`) fails it; else
1099
+ * it completes.
1100
+ */
1101
+ async function finaliseParallel(
1102
+ action: ParallelInput,
1103
+ ctx: DispatchContext,
1104
+ stepId: string,
1105
+ branchOutcomes: Record<string, StoredBranchOutcome>,
1106
+ path: ActionPath,
1107
+ ): Promise<StepOutcome> {
1108
+ const stillSuspended = action.parallel.some(
1109
+ (_, i) => branchOutcomes[String(i)]?.status === "suspended",
1110
+ );
1111
+ if (stillSuspended) {
1112
+ return { kind: "suspended", stepId };
1113
+ }
1114
+
1115
+ const failures = action.parallel
1116
+ .map((_, i) => branchOutcomes[String(i)])
1117
+ .filter((o): o is StoredBranchOutcome => o !== undefined)
1118
+ .filter((o) => o.status === "failed");
1119
+ if (failures.length > 0 && !action.continue_on_error) {
1120
+ const reason = failures[0]!.reason;
1121
+ await ctx.deps.runStore.updateStep(stepId, {
1122
+ status: "failed",
1123
+ errorMessage: reason,
1124
+ });
1125
+ return { kind: "failed", error: reason ?? "parallel branch failed" };
1126
+ }
1127
+
1128
+ await ctx.deps.runStore.updateStep(stepId, { status: "success" });
1129
+ await checkpoint(ctx, path);
1130
+ return { kind: "ok" };
1131
+ }
1132
+
1133
+ // ─── Primitive: `delay` (queue-backed) ───────────────────────────────────
1134
+
1135
+ async function executeDelay(
1136
+ action: DelayInput,
1137
+ path: ActionPath,
1138
+ ctx: DispatchContext,
1139
+ ): Promise<StepOutcome> {
1140
+ const stepId = await ctx.deps.runStore.createStep({
1141
+ runId: ctx.run.runId,
1142
+ actionPath: formatActionPath(path),
1143
+ actionId: action.id ?? null,
1144
+ actionKind: "delay",
1145
+ providerActionId: null,
1146
+ });
1147
+
1148
+ let seconds: number;
1149
+ if ("seconds" in action.delay) {
1150
+ seconds = action.delay.seconds;
1151
+ } else {
1152
+ const rendered = renderExpression(
1153
+ action.delay.template,
1154
+ templateContext(ctx),
1155
+ ctx.deps.filters,
1156
+ );
1157
+ const n = typeof rendered === "number" ? rendered : Number(rendered);
1158
+ if (!Number.isFinite(n) || n < 0) {
1159
+ const message = `delay template evaluated to invalid duration: ${String(rendered)}`;
1160
+ await ctx.deps.runStore.updateStep(stepId, {
1161
+ status: "failed",
1162
+ errorMessage: message,
1163
+ });
1164
+ return { kind: "failed", error: message };
1165
+ }
1166
+ seconds = Math.floor(n);
1167
+ }
1168
+
1169
+ const timeoutAt = new Date(Date.now() + seconds * 1000);
1170
+ const waitLockId = await ctx.deps.runStore.createWaitLock({
1171
+ runId: ctx.run.runId,
1172
+ actionPath: formatActionPath(path),
1173
+ kind: "delay",
1174
+ eventId: `@@delay:${ctx.run.runId}:${formatActionPath(path)}`,
1175
+ contextKey: null,
1176
+ filterTemplate: null,
1177
+ timeoutAt,
1178
+ });
1179
+
1180
+ // Persist scope BEFORE we suspend so a sweeper / queue resume can
1181
+ // load it without racing the step write below.
1182
+ await checkpoint(ctx, path);
1183
+
1184
+ const queue = ctx.deps.queueManager.getQueue<DelayResumeJob>(DELAY_QUEUE_NAME);
1185
+ await queue.enqueue(
1186
+ { runId: ctx.run.runId, waitLockId },
1187
+ { startDelay: seconds, jobId: `${ctx.run.runId}:${waitLockId}` },
1188
+ );
1189
+
1190
+ await ctx.deps.runStore.updateStep(stepId, {
1191
+ status: "waiting",
1192
+ resultPayload: { sleepSeconds: seconds, waitLockId, queueBacked: true },
1193
+ });
1194
+ return { kind: "suspended", stepId };
1195
+ }
1196
+
1197
+ // ─── Primitive: `repeat` ─────────────────────────────────────────────────
1198
+
1199
+ const DEFAULT_MAX_ITERATIONS = 1000;
1200
+
1201
+ async function executeRepeat(
1202
+ action: RepeatInput,
1203
+ path: ActionPath,
1204
+ ctx: DispatchContext,
1205
+ resumeRemainder?: ActionPath,
1206
+ ): Promise<StepOutcome> {
1207
+ // Resume path: walk the suspended iteration first, then continue
1208
+ // with the remaining iterations per the loop mode.
1209
+ if (resumeRemainder !== undefined && resumeRemainder.length > 0) {
1210
+ return await resumeRepeat(action, path, ctx, resumeRemainder);
1211
+ }
1212
+
1213
+ const stepId = await ctx.deps.runStore.createStep({
1214
+ runId: ctx.run.runId,
1215
+ actionPath: formatActionPath(path),
1216
+ actionId: action.id ?? null,
1217
+ actionKind: "repeat",
1218
+ providerActionId: null,
1219
+ });
1220
+
1221
+ // For for_each: cache the list on the step so a resume sees the
1222
+ // same iteration order even if downstream actions mutated the
1223
+ // expression source.
1224
+ let forEachList: unknown[] | undefined;
1225
+ if ("for_each" in action.repeat) {
1226
+ const evaluated = renderExpression(
1227
+ action.repeat.for_each,
1228
+ templateContext(ctx),
1229
+ ctx.deps.filters,
1230
+ );
1231
+ if (!Array.isArray(evaluated)) {
1232
+ const message = `repeat.for_each expression did not evaluate to an array: ${String(evaluated)}`;
1233
+ await ctx.deps.runStore.updateStep(stepId, {
1234
+ status: "failed",
1235
+ errorMessage: message,
1236
+ });
1237
+ return { kind: "failed", error: message };
1238
+ }
1239
+ forEachList = evaluated;
1240
+ await ctx.deps.runStore.updateStep(stepId, {
1241
+ status: "running",
1242
+ resultPayload: { forEachList },
1243
+ });
1244
+ }
1245
+
1246
+ return await runRepeatLoop(action, path, ctx, stepId, 0, forEachList);
1247
+ }
1248
+
1249
+ async function resumeRepeat(
1250
+ action: RepeatInput,
1251
+ path: ActionPath,
1252
+ ctx: DispatchContext,
1253
+ resumeRemainder: ActionPath,
1254
+ ): Promise<StepOutcome> {
1255
+ if (resumeRemainder[0] !== "repeat") {
1256
+ throw new Error(
1257
+ `Resume path corrupt at ${formatActionPath(path)}: expected "repeat", got ${String(
1258
+ resumeRemainder[0],
1259
+ )}`,
1260
+ );
1261
+ }
1262
+ const iterIdx = resumeRemainder[1];
1263
+ if (typeof iterIdx !== "number") {
1264
+ throw new TypeError(
1265
+ `Resume path corrupt at ${formatActionPath(path)}: expected numeric iteration index`,
1266
+ );
1267
+ }
1268
+ if (resumeRemainder[2] !== "sequence") {
1269
+ throw new Error(
1270
+ `Resume path corrupt at ${formatActionPath(path)}: expected "sequence", got ${String(
1271
+ resumeRemainder[2],
1272
+ )}`,
1273
+ );
1274
+ }
1275
+ const inner = resumeRemainder.slice(3);
1276
+
1277
+ const existing = await ctx.deps.runStore.findStepByPath(
1278
+ ctx.run.runId,
1279
+ formatActionPath(path),
1280
+ );
1281
+ if (!existing) {
1282
+ return {
1283
+ kind: "failed",
1284
+ error: `Cannot resume repeat at ${formatActionPath(path)} — original step record missing`,
1285
+ };
1286
+ }
1287
+ const stepId = existing.id;
1288
+ const forEachList =
1289
+ (existing.resultPayload?.forEachList as unknown[] | undefined) ??
1290
+ undefined;
1291
+
1292
+ // Build the iteration-N child scope (matches the original execution
1293
+ // so repeat.item / repeat.index resolve correctly during the
1294
+ // resumed sequence walk).
1295
+ const item =
1296
+ forEachList === undefined ? undefined : forEachList[iterIdx];
1297
+ const childScope = withRepeatContext(ctx.scope, {
1298
+ index: iterIdx,
1299
+ ...(item === undefined ? {} : { item }),
1300
+ });
1301
+ const childCtx = { ...ctx, scope: childScope };
1302
+
1303
+ const iterOutcome = await walkSequence(
1304
+ action.repeat.sequence,
1305
+ [...path, "repeat", iterIdx, "sequence"],
1306
+ childCtx,
1307
+ { resumeRemainder: inner },
1308
+ );
1309
+ if (iterOutcome.kind !== "completed") {
1310
+ // Iteration didn't finish — propagate (suspended re-suspends the
1311
+ // repeat, stopped/failed terminates the loop).
1312
+ await ctx.deps.runStore.updateStep(stepId, {
1313
+ status: iterOutcome.kind === "suspended" ? "waiting" : "failed",
1314
+ resultPayload: { iterations: iterIdx, forEachList },
1315
+ errorMessage:
1316
+ iterOutcome.kind === "stopped" ? iterOutcome.reason : undefined,
1317
+ });
1318
+ return sequenceToStep(iterOutcome);
1319
+ }
1320
+
1321
+ // Continue the loop from the next iteration.
1322
+ return await runRepeatLoop(action, path, ctx, stepId, iterIdx + 1, forEachList);
1323
+ }
1324
+
1325
+ /**
1326
+ * Drive iterations from `startIter` forward. Used by both fresh
1327
+ * execution (start = 0) and resume (start = N+1 after iteration N
1328
+ * resumes to completion).
1329
+ */
1330
+ async function runRepeatLoop(
1331
+ action: RepeatInput,
1332
+ path: ActionPath,
1333
+ ctx: DispatchContext,
1334
+ stepId: string,
1335
+ startIter: number,
1336
+ forEachList: unknown[] | undefined,
1337
+ ): Promise<StepOutcome> {
1338
+ const repeat = action.repeat;
1339
+ let iterationsRun = startIter;
1340
+ let outcome: SequenceOutcome = { kind: "completed" };
1341
+
1342
+ if ("count" in repeat) {
1343
+ for (let i = startIter; i < repeat.count; i += 1) {
1344
+ const childScope = withRepeatContext(ctx.scope, { index: i });
1345
+ outcome = await walkSequence(
1346
+ repeat.sequence,
1347
+ [...path, "repeat", i, "sequence"],
1348
+ { ...ctx, scope: childScope },
1349
+ );
1350
+ iterationsRun = i + 1;
1351
+ if (outcome.kind !== "completed") break;
1352
+ }
1353
+ } else if ("for_each" in repeat) {
1354
+ if (!forEachList) {
1355
+ // Should never happen — fresh execution and resume both set this
1356
+ // — but guard so we fail loud instead of silently iterating zero.
1357
+ const message = `repeat at ${formatActionPath(path)} resumed without a cached for_each list`;
1358
+ await ctx.deps.runStore.updateStep(stepId, {
1359
+ status: "failed",
1360
+ errorMessage: message,
1361
+ });
1362
+ return { kind: "failed", error: message };
1363
+ }
1364
+ for (let i = startIter; i < forEachList.length; i += 1) {
1365
+ const childScope = withRepeatContext(ctx.scope, {
1366
+ index: i,
1367
+ item: forEachList[i],
1368
+ });
1369
+ outcome = await walkSequence(
1370
+ repeat.sequence,
1371
+ [...path, "repeat", i, "sequence"],
1372
+ { ...ctx, scope: childScope },
1373
+ );
1374
+ iterationsRun = i + 1;
1375
+ if (outcome.kind !== "completed") break;
1376
+ }
1377
+ } else if ("while" in repeat) {
1378
+ const max = repeat.max_iterations ?? DEFAULT_MAX_ITERATIONS;
1379
+ let i = startIter;
1380
+ while (i < max) {
1381
+ const childScope = withRepeatContext(ctx.scope, { index: i });
1382
+ const childCtx = { ...ctx, scope: childScope };
1383
+ let cond: boolean;
1384
+ try {
1385
+ cond = evaluateCondition(
1386
+ repeat.while,
1387
+ templateContext(childCtx),
1388
+ ctx.deps.filters,
1389
+ );
1390
+ } catch {
1391
+ cond = false;
1392
+ }
1393
+ if (!cond) break;
1394
+ outcome = await walkSequence(
1395
+ repeat.sequence,
1396
+ [...path, "repeat", i, "sequence"],
1397
+ childCtx,
1398
+ );
1399
+ i += 1;
1400
+ iterationsRun = i;
1401
+ if (outcome.kind !== "completed") break;
1402
+ }
1403
+ } else {
1404
+ const max = repeat.max_iterations ?? DEFAULT_MAX_ITERATIONS;
1405
+ let i = startIter;
1406
+ while (i < max) {
1407
+ const childScope = withRepeatContext(ctx.scope, { index: i });
1408
+ const childCtx = { ...ctx, scope: childScope };
1409
+ outcome = await walkSequence(
1410
+ repeat.sequence,
1411
+ [...path, "repeat", i, "sequence"],
1412
+ childCtx,
1413
+ );
1414
+ i += 1;
1415
+ iterationsRun = i;
1416
+ if (outcome.kind !== "completed") break;
1417
+ let done: boolean;
1418
+ try {
1419
+ done = evaluateCondition(
1420
+ repeat.until,
1421
+ templateContext(childCtx),
1422
+ ctx.deps.filters,
1423
+ );
1424
+ } catch {
1425
+ done = false;
1426
+ }
1427
+ if (done) break;
1428
+ }
1429
+ }
1430
+
1431
+ await ctx.deps.runStore.updateStep(stepId, {
1432
+ status:
1433
+ outcome.kind === "completed"
1434
+ ? "success"
1435
+ : outcome.kind === "suspended"
1436
+ ? "waiting"
1437
+ : "failed",
1438
+ resultPayload: { iterations: iterationsRun, forEachList },
1439
+ errorMessage: outcome.kind === "stopped" ? outcome.reason : undefined,
1440
+ });
1441
+ if (outcome.kind === "completed") await checkpoint(ctx, path);
1442
+ return sequenceToStep(outcome);
1443
+ }
1444
+
1445
+ // ─── Primitive: `variables` ───────────────────────────────────────────────
1446
+
1447
+ async function executeVariables(
1448
+ action: VariablesInput,
1449
+ path: ActionPath,
1450
+ ctx: DispatchContext,
1451
+ ): Promise<StepOutcome> {
1452
+ const stepId = await ctx.deps.runStore.createStep({
1453
+ runId: ctx.run.runId,
1454
+ actionPath: formatActionPath(path),
1455
+ actionId: action.id ?? null,
1456
+ actionKind: "variables",
1457
+ providerActionId: null,
1458
+ });
1459
+
1460
+ const rendered: Record<string, unknown> = {};
1461
+ try {
1462
+ for (const [k, v] of Object.entries(action.variables)) {
1463
+ rendered[k] =
1464
+ typeof v === "string"
1465
+ ? renderString(v, templateContext(ctx), ctx.deps.filters)
1466
+ : renderValue(v, templateContext(ctx), ctx.deps.filters);
1467
+ }
1468
+ } catch (error) {
1469
+ const message = `Failed to render variables: ${(error as Error).message}`;
1470
+ await ctx.deps.runStore.updateStep(stepId, {
1471
+ status: "failed",
1472
+ errorMessage: message,
1473
+ });
1474
+ return { kind: "failed", error: message };
1475
+ }
1476
+
1477
+ ctx.scope = extendVariables(ctx.scope, rendered);
1478
+ await ctx.deps.runStore.updateStep(stepId, {
1479
+ status: "success",
1480
+ resultPayload: { defined: Object.keys(rendered) },
1481
+ });
1482
+ await checkpoint(ctx, path);
1483
+ return { kind: "ok" };
1484
+ }
1485
+
1486
+ // ─── Primitive: `condition` (guard) ──────────────────────────────────────
1487
+
1488
+ async function executeConditionGuard(
1489
+ action: ConditionGuardInput,
1490
+ path: ActionPath,
1491
+ ctx: DispatchContext,
1492
+ ): Promise<StepOutcome> {
1493
+ const stepId = await ctx.deps.runStore.createStep({
1494
+ runId: ctx.run.runId,
1495
+ actionPath: formatActionPath(path),
1496
+ actionId: action.id ?? null,
1497
+ actionKind: "condition",
1498
+ providerActionId: null,
1499
+ });
1500
+
1501
+ let pass: boolean;
1502
+ try {
1503
+ pass = evaluateCondition(
1504
+ action.condition,
1505
+ templateContext(ctx),
1506
+ ctx.deps.filters,
1507
+ );
1508
+ } catch (error) {
1509
+ const message = `Failed to evaluate condition: ${(error as Error).message}`;
1510
+ await ctx.deps.runStore.updateStep(stepId, {
1511
+ status: "failed",
1512
+ errorMessage: message,
1513
+ });
1514
+ return { kind: "failed", error: message };
1515
+ }
1516
+
1517
+ if (pass) {
1518
+ await ctx.deps.runStore.updateStep(stepId, { status: "success" });
1519
+ await checkpoint(ctx, path);
1520
+ return { kind: "ok" };
1521
+ }
1522
+ await ctx.deps.runStore.updateStep(stepId, {
1523
+ status: "failed",
1524
+ errorMessage: "condition gate failed",
1525
+ });
1526
+ return { kind: "stopped", reason: "condition gate failed", error: false };
1527
+ }
1528
+
1529
+ // ─── Primitive: `stop` ───────────────────────────────────────────────────
1530
+
1531
+ async function executeStop(
1532
+ action: StopInput,
1533
+ path: ActionPath,
1534
+ ctx: DispatchContext,
1535
+ ): Promise<StepOutcome> {
1536
+ const stepId = await ctx.deps.runStore.createStep({
1537
+ runId: ctx.run.runId,
1538
+ actionPath: formatActionPath(path),
1539
+ actionId: action.id ?? null,
1540
+ actionKind: "stop",
1541
+ providerActionId: null,
1542
+ });
1543
+ await ctx.deps.runStore.updateStep(stepId, {
1544
+ status: "success",
1545
+ resultPayload: { reason: action.stop.reason, error: action.stop.error },
1546
+ });
1547
+ return {
1548
+ kind: "stopped",
1549
+ reason: action.stop.reason,
1550
+ error: action.stop.error,
1551
+ };
1552
+ }
1553
+
1554
+ // ─── Primitive: `wait_for_trigger` ───────────────────────────────────────
1555
+
1556
+ async function executeWaitForTrigger(
1557
+ action: WaitForTriggerInput,
1558
+ path: ActionPath,
1559
+ ctx: DispatchContext,
1560
+ ): Promise<StepOutcome> {
1561
+ const stepId = await ctx.deps.runStore.createStep({
1562
+ runId: ctx.run.runId,
1563
+ actionPath: formatActionPath(path),
1564
+ actionId: action.id ?? null,
1565
+ actionKind: "wait_for_trigger",
1566
+ providerActionId: null,
1567
+ });
1568
+
1569
+ const timeoutAt = action.wait_for_trigger.timeout_seconds
1570
+ ? new Date(Date.now() + action.wait_for_trigger.timeout_seconds * 1000)
1571
+ : null;
1572
+
1573
+ const contextKey =
1574
+ action.wait_for_trigger.context_key === undefined
1575
+ ? ctx.run.contextKey
1576
+ : renderString(
1577
+ action.wait_for_trigger.context_key,
1578
+ templateContext(ctx),
1579
+ ctx.deps.filters,
1580
+ );
1581
+
1582
+ await ctx.deps.runStore.createWaitLock({
1583
+ runId: ctx.run.runId,
1584
+ actionPath: formatActionPath(path),
1585
+ kind: "trigger",
1586
+ eventId: action.wait_for_trigger.event,
1587
+ contextKey,
1588
+ filterTemplate: action.wait_for_trigger.filter ?? null,
1589
+ timeoutAt,
1590
+ });
1591
+
1592
+ // Persist scope before suspending so the resume path has it.
1593
+ await checkpoint(ctx, path);
1594
+
1595
+ await ctx.deps.runStore.updateStep(stepId, { status: "waiting" });
1596
+ return { kind: "suspended", stepId };
1597
+ }
1598
+
1599
+ // ─── Helpers ─────────────────────────────────────────────────────────────
1600
+
1601
+ // ─── Primitive: `sequence` ───────────────────────────────────────────────
1602
+
1603
+ /**
1604
+ * Wrap an ordered list of actions as a single Action. Walking semantics
1605
+ * are identical to walking a top-level `actions:` list — including
1606
+ * suspension propagation and resume support.
1607
+ *
1608
+ * Primary use case: providing multi-action branches inside `parallel`.
1609
+ * Resume routing through `sequence` consumes `["sequence", innerIdx, …]`
1610
+ * from the remainder.
1611
+ */
1612
+ async function executeSequence(
1613
+ action: SequenceInput,
1614
+ path: ActionPath,
1615
+ ctx: DispatchContext,
1616
+ resumeRemainder?: ActionPath,
1617
+ ): Promise<StepOutcome> {
1618
+ const stepId = await ctx.deps.runStore.createStep({
1619
+ runId: ctx.run.runId,
1620
+ actionPath: formatActionPath(path),
1621
+ actionId: action.id ?? null,
1622
+ actionKind: "sequence",
1623
+ providerActionId: null,
1624
+ });
1625
+
1626
+ let outcome: SequenceOutcome;
1627
+ if (resumeRemainder !== undefined && resumeRemainder.length > 0) {
1628
+ if (resumeRemainder[0] !== "sequence") {
1629
+ throw new Error(
1630
+ `Resume path corrupt at ${formatActionPath(path)}: expected "sequence", got ${String(
1631
+ resumeRemainder[0],
1632
+ )}`,
1633
+ );
1634
+ }
1635
+ const inner = resumeRemainder.slice(1);
1636
+ outcome = await walkSequence(
1637
+ action.sequence,
1638
+ [...path, "sequence"],
1639
+ ctx,
1640
+ { resumeRemainder: inner },
1641
+ );
1642
+ } else {
1643
+ outcome = await walkSequence(
1644
+ action.sequence,
1645
+ [...path, "sequence"],
1646
+ ctx,
1647
+ );
1648
+ }
1649
+
1650
+ await ctx.deps.runStore.updateStep(stepId, {
1651
+ status:
1652
+ outcome.kind === "completed"
1653
+ ? "success"
1654
+ : outcome.kind === "suspended"
1655
+ ? "waiting"
1656
+ : "failed",
1657
+ errorMessage: outcome.kind === "stopped" ? outcome.reason : undefined,
1658
+ });
1659
+ if (outcome.kind === "completed") await checkpoint(ctx, path);
1660
+ return sequenceToStep(outcome);
1661
+ }
1662
+
1663
+ function sequenceToStep(seq: SequenceOutcome): StepOutcome {
1664
+ if (seq.kind === "completed") return { kind: "ok" };
1665
+ if (seq.kind === "suspended") {
1666
+ return { kind: "suspended", stepId: seq.suspendingStepId };
1667
+ }
1668
+ if (seq.error) {
1669
+ return { kind: "failed", error: seq.reason ?? "stopped with error" };
1670
+ }
1671
+ return { kind: "stopped", reason: seq.reason };
1672
+ }