@cleocode/playbooks 2026.4.92 → 2026.4.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/runtime.ts ADDED
@@ -0,0 +1,912 @@
1
+ /**
2
+ * Playbook runtime — deterministic state machine executor for `.cantbook` flows.
3
+ *
4
+ * This module is the executable heart of CLEO's T910 "Orchestration Coherence v4"
5
+ * pipeline. It walks a validated {@link PlaybookDefinition} one node at a time,
6
+ * merging node outputs into a shared `context`, enforcing per-node iteration
7
+ * caps, and pausing for HITL approval gates via the signed resume-token
8
+ * protocol (see `approval.ts`).
9
+ *
10
+ * Design constraints (non-negotiable):
11
+ *
12
+ * 1. Pure dependency injection — the runtime never imports or instantiates
13
+ * subprocess code. Callers pass an {@link AgentDispatcher} for `agentic`
14
+ * nodes and an optional {@link DeterministicRunner} for `deterministic`
15
+ * nodes. Tests can therefore exercise every branch without mocking any
16
+ * `@cleocode/*` module.
17
+ * 2. Deterministic ordering — a topological traversal is computed up front
18
+ * from the {@link PlaybookDefinition.edges} graph (with `depends[]`
19
+ * treated as reverse edges, exactly as the parser's cycle check does).
20
+ * Execution order is stable across runs for the same definition.
21
+ * 3. Fail-closed policy — unknown node kinds, missing successors, unresolved
22
+ * `inject_into` targets, or dispatcher errors all terminate the run with
23
+ * a typed `terminalStatus`. The runtime never silently swallows failures.
24
+ * 4. HITL gates persist — when an `approval` node executes, the run is
25
+ * marked `paused` in `playbook_runs`, a {@link PlaybookApproval} row is
26
+ * written with the HMAC-signed resume token, and the returned
27
+ * {@link ExecutePlaybookResult.approvalToken} is what the human reviewer
28
+ * must present via `resumePlaybook` to continue.
29
+ *
30
+ * @task T930 — Playbook Runtime State Machine
31
+ */
32
+
33
+ import type { DatabaseSync } from 'node:sqlite';
34
+ import type {
35
+ PlaybookAgenticNode,
36
+ PlaybookApprovalNode,
37
+ PlaybookDefinition,
38
+ PlaybookDeterministicNode,
39
+ PlaybookNode,
40
+ PlaybookRun,
41
+ PlaybookRunStatus,
42
+ } from '@cleocode/contracts';
43
+ import { createApprovalGate, getPlaybookSecret } from './approval.js';
44
+ import {
45
+ createPlaybookApproval,
46
+ createPlaybookRun,
47
+ getPlaybookApprovalByToken,
48
+ getPlaybookRun,
49
+ updatePlaybookRun,
50
+ } from './state.js';
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // Public interfaces — dependency-injected executors
54
+ // ---------------------------------------------------------------------------
55
+
56
+ /**
57
+ * Input payload handed to {@link AgentDispatcher.dispatch} on every
58
+ * `agentic` node execution. All fields are read-only from the dispatcher's
59
+ * perspective — the runtime owns the lifecycle.
60
+ */
61
+ export interface AgentDispatchInput {
62
+ /** Playbook run identifier (FK into `playbook_runs.run_id`). */
63
+ runId: string;
64
+ /** Node identifier within the run graph. */
65
+ nodeId: string;
66
+ /** Agent identity resolved from `node.agent` (falls back to `node.skill`). */
67
+ agentId: string;
68
+ /** Task identifier lifted from `context.taskId` if present, otherwise `runId`. */
69
+ taskId: string;
70
+ /** Snapshot of the accumulated bindings at dispatch time. */
71
+ context: Record<string, unknown>;
72
+ /** 1-based iteration counter for this specific node (retry-aware). */
73
+ iteration: number;
74
+ }
75
+
76
+ /**
77
+ * Terminal output returned by {@link AgentDispatcher.dispatch}. The runtime
78
+ * merges {@link output} into the run context on `status === 'success'` and
79
+ * triggers iteration-cap / escalation logic on `'failure'`.
80
+ */
81
+ export interface AgentDispatchResult {
82
+ status: 'success' | 'failure';
83
+ /** Key-value pairs merged into the run context on success. */
84
+ output: Record<string, unknown>;
85
+ /** Human-readable failure reason. Persisted to `playbook_runs.error_context`. */
86
+ error?: string;
87
+ }
88
+
89
+ /**
90
+ * Injected contract for spawning subagents. Implementations MUST NOT depend
91
+ * on any `@cleocode/*` module — the runtime passes all state through
92
+ * {@link AgentDispatchInput} so tests can provide deterministic stubs.
93
+ */
94
+ export interface AgentDispatcher {
95
+ /** Execute a single `agentic` node; return a success/failure envelope. */
96
+ dispatch(input: AgentDispatchInput): Promise<AgentDispatchResult>;
97
+ }
98
+
99
+ /**
100
+ * Input payload handed to {@link DeterministicRunner.run} on every
101
+ * `deterministic` node execution.
102
+ */
103
+ export interface DeterministicRunInput {
104
+ runId: string;
105
+ nodeId: string;
106
+ command: string;
107
+ args: readonly string[];
108
+ cwd?: string;
109
+ env?: Readonly<Record<string, string>>;
110
+ /** Timeout in milliseconds; `undefined` means the runner picks a default. */
111
+ timeout_ms?: number;
112
+ context: Record<string, unknown>;
113
+ iteration: number;
114
+ }
115
+
116
+ /**
117
+ * Terminal output returned by {@link DeterministicRunner.run}. Shape mirrors
118
+ * {@link AgentDispatchResult} for runtime uniformity.
119
+ */
120
+ export interface DeterministicRunResult {
121
+ status: 'success' | 'failure';
122
+ output: Record<string, unknown>;
123
+ error?: string;
124
+ }
125
+
126
+ /**
127
+ * Injected contract for running `deterministic` nodes (CLI tools, validators,
128
+ * decide scripts). If not supplied, the runtime delegates to
129
+ * {@link AgentDispatcher.dispatch} with `agentId = "deterministic:<command>"`
130
+ * so a single stub can cover both node kinds during unit testing.
131
+ */
132
+ export interface DeterministicRunner {
133
+ run(input: DeterministicRunInput): Promise<DeterministicRunResult>;
134
+ }
135
+
136
+ // ---------------------------------------------------------------------------
137
+ // Public interfaces — runtime entry points
138
+ // ---------------------------------------------------------------------------
139
+
140
+ /**
141
+ * Options accepted by {@link executePlaybook}.
142
+ */
143
+ export interface ExecutePlaybookOptions {
144
+ /** Open `node:sqlite` handle with the T889 migration applied. */
145
+ db: DatabaseSync;
146
+ /** Validated playbook definition (output of {@link parsePlaybook}). */
147
+ playbook: PlaybookDefinition;
148
+ /** SHA-256 hex of the playbook source (output of {@link parsePlaybook}). */
149
+ playbookHash: string;
150
+ /** Starting bindings seeded into the run context (e.g. `{ taskId }`). */
151
+ initialContext: Record<string, unknown>;
152
+ /** Required dispatcher for `agentic` nodes. */
153
+ dispatcher: AgentDispatcher;
154
+ /** Optional runner for `deterministic` nodes. Defaults to `dispatcher`. */
155
+ deterministicRunner?: DeterministicRunner;
156
+ /** Override secret for HMAC resume-token signing. */
157
+ approvalSecret?: string;
158
+ /** Fallback per-node iteration cap when `on_failure.max_iterations` is unset. */
159
+ maxIterationsDefault?: number;
160
+ /** Epic id persisted to `playbook_runs.epic_id` for dashboard filtering. */
161
+ epicId?: string;
162
+ /** Session id persisted to `playbook_runs.session_id`. */
163
+ sessionId?: string;
164
+ /** Injectable clock for deterministic tests (defaults to `() => new Date()`). */
165
+ now?: () => Date;
166
+ }
167
+
168
+ /**
169
+ * Options accepted by {@link resumePlaybook}. The runtime validates that the
170
+ * supplied approval token resolves to an `approved` {@link PlaybookApproval}
171
+ * row before continuing execution.
172
+ */
173
+ export interface ResumePlaybookOptions {
174
+ db: DatabaseSync;
175
+ playbook: PlaybookDefinition;
176
+ /** The token previously returned in {@link ExecutePlaybookResult.approvalToken}. */
177
+ approvalToken: string;
178
+ dispatcher: AgentDispatcher;
179
+ deterministicRunner?: DeterministicRunner;
180
+ approvalSecret?: string;
181
+ maxIterationsDefault?: number;
182
+ now?: () => Date;
183
+ }
184
+
185
+ /**
186
+ * Terminal status values reported by the runtime.
187
+ */
188
+ export type PlaybookTerminalStatus =
189
+ | 'completed'
190
+ | 'failed'
191
+ | 'pending_approval'
192
+ | 'exceeded_iteration_cap';
193
+
194
+ /**
195
+ * Final envelope returned by both {@link executePlaybook} and
196
+ * {@link resumePlaybook}.
197
+ */
198
+ export interface ExecutePlaybookResult {
199
+ runId: string;
200
+ terminalStatus: PlaybookTerminalStatus;
201
+ /** Fully-merged bindings at the point the runtime stopped. */
202
+ finalContext: Record<string, unknown>;
203
+ /** Set when `terminalStatus === 'pending_approval'`. */
204
+ approvalToken?: string;
205
+ /** Set when the run stopped because a specific node failed. */
206
+ failedNodeId?: string;
207
+ /** Set when the run stopped because a node hit its iteration cap. */
208
+ exceededNodeId?: string;
209
+ /** Human-readable error reason mirrored from the last failing node. */
210
+ errorContext?: string;
211
+ }
212
+
213
+ // ---------------------------------------------------------------------------
214
+ // Internal types — graph + iteration bookkeeping
215
+ // ---------------------------------------------------------------------------
216
+
217
+ /**
218
+ * Pre-computed edge adjacency tuple — used to look up both outgoing and
219
+ * incoming edges in O(1) per node during execution.
220
+ */
221
+ interface EdgeIndex {
222
+ /** Map from node id → list of successor node ids in declaration order. */
223
+ readonly outgoing: ReadonlyMap<string, readonly string[]>;
224
+ /** Map from node id → list of predecessor node ids in declaration order. */
225
+ readonly incoming: ReadonlyMap<string, readonly string[]>;
226
+ }
227
+
228
+ /**
229
+ * Stable lookup for nodes by id. {@link PlaybookDefinition.nodes} is an
230
+ * ordered array; this map amortizes the otherwise-O(n) lookup per step.
231
+ */
232
+ type NodeIndex = ReadonlyMap<string, PlaybookNode>;
233
+
234
+ /**
235
+ * Signal emitted by {@link executeAgenticNode}, {@link executeDeterministicNode},
236
+ * and {@link executeApprovalNode}. The runtime's main loop translates this
237
+ * into either a step-forward (advance to the next node), a pause (write the
238
+ * approval row and return), or a terminal failure.
239
+ */
240
+ type NodeOutcome =
241
+ | { kind: 'success'; output: Record<string, unknown> }
242
+ | { kind: 'failure'; error: string }
243
+ | { kind: 'awaiting_approval'; token: string; approvalId: string };
244
+
245
+ /**
246
+ * Error code stamped onto errors thrown by the runtime for invalid inputs.
247
+ * Exported for parity with the rest of the `@cleocode/playbooks` error codes.
248
+ */
249
+ export const E_PLAYBOOK_RUNTIME_INVALID = 'E_PLAYBOOK_RUNTIME_INVALID' as const;
250
+
251
+ /**
252
+ * Error code thrown when {@link resumePlaybook} is called with a token that
253
+ * does not resolve to an `approved` gate.
254
+ */
255
+ export const E_PLAYBOOK_RESUME_BLOCKED = 'E_PLAYBOOK_RESUME_BLOCKED' as const;
256
+
257
+ // ---------------------------------------------------------------------------
258
+ // Graph helpers
259
+ // ---------------------------------------------------------------------------
260
+
261
+ /**
262
+ * Build {@link EdgeIndex} from a validated playbook. The parser guarantees
263
+ * every `from`/`to` references a known node id, so lookups are safe.
264
+ *
265
+ * `depends[]` entries are treated as reverse edges (`dep → node`), matching
266
+ * the cycle-detection logic in `parser.ts::hasCycle`.
267
+ */
268
+ function buildEdgeIndex(def: PlaybookDefinition): EdgeIndex {
269
+ const outgoing = new Map<string, string[]>();
270
+ const incoming = new Map<string, string[]>();
271
+ for (const n of def.nodes) {
272
+ outgoing.set(n.id, []);
273
+ incoming.set(n.id, []);
274
+ }
275
+ for (const e of def.edges) {
276
+ outgoing.get(e.from)?.push(e.to);
277
+ incoming.get(e.to)?.push(e.from);
278
+ }
279
+ for (const n of def.nodes) {
280
+ if (!n.depends) continue;
281
+ for (const dep of n.depends) {
282
+ // dep -> n is a reverse edge; push onto outgoing(dep) and incoming(n)
283
+ // only if it is not already there (idempotent with explicit edges).
284
+ const out = outgoing.get(dep);
285
+ if (out && !out.includes(n.id)) out.push(n.id);
286
+ const inc = incoming.get(n.id);
287
+ if (inc && !inc.includes(dep)) inc.push(dep);
288
+ }
289
+ }
290
+ return {
291
+ outgoing: new Map([...outgoing].map(([k, v]) => [k, Object.freeze([...v])])),
292
+ incoming: new Map([...incoming].map(([k, v]) => [k, Object.freeze([...v])])),
293
+ };
294
+ }
295
+
296
+ /**
297
+ * Resolve the single entry node. An entry node is one with no incoming edges
298
+ * after `depends[]` is folded in. If multiple candidates exist, the first in
299
+ * {@link PlaybookDefinition.nodes} declaration order wins so execution is
300
+ * deterministic across process restarts.
301
+ *
302
+ * Throws if no entry node exists (every node has a predecessor — impossible
303
+ * for a DAG but we defensively check anyway).
304
+ */
305
+ function resolveEntryNode(def: PlaybookDefinition, idx: EdgeIndex): PlaybookNode {
306
+ for (const n of def.nodes) {
307
+ const preds = idx.incoming.get(n.id);
308
+ if (preds && preds.length === 0) return n;
309
+ }
310
+ throw new Error(
311
+ `${E_PLAYBOOK_RUNTIME_INVALID}: no entry node (every node has a predecessor) in ${def.name}`,
312
+ );
313
+ }
314
+
315
+ /**
316
+ * Return the single successor node id for `nodeId`, or `null` if `nodeId` is
317
+ * terminal (no outgoing edges — the "end" state in the design contract).
318
+ *
319
+ * Throws on fan-out (> 1 successor) because the deterministic runtime does
320
+ * not support branching without an explicit `decide`-node contract. A
321
+ * follow-up task can add guarded branching here — see README.
322
+ */
323
+ function resolveNextNodeId(nodeId: string, idx: EdgeIndex): string | null {
324
+ const outs = idx.outgoing.get(nodeId) ?? [];
325
+ if (outs.length === 0) return null;
326
+ if (outs.length > 1) {
327
+ throw new Error(
328
+ `${E_PLAYBOOK_RUNTIME_INVALID}: node ${nodeId} has ${outs.length} successors; branching requires an approval/decide node`,
329
+ );
330
+ }
331
+ // Safe: length === 1
332
+ const [next] = outs;
333
+ if (next === undefined) {
334
+ throw new Error(`${E_PLAYBOOK_RUNTIME_INVALID}: node ${nodeId} has undefined successor`);
335
+ }
336
+ return next;
337
+ }
338
+
339
+ /**
340
+ * Look up the {@link PlaybookNode} by id. Throws on unknown id so callers
341
+ * surface invariant violations at the runtime boundary.
342
+ */
343
+ function resolveNode(nodeId: string, idx: NodeIndex): PlaybookNode {
344
+ const node = idx.get(nodeId);
345
+ if (node === undefined) {
346
+ throw new Error(`${E_PLAYBOOK_RUNTIME_INVALID}: unknown node id "${nodeId}"`);
347
+ }
348
+ return node;
349
+ }
350
+
351
+ // ---------------------------------------------------------------------------
352
+ // Per-node execution
353
+ // ---------------------------------------------------------------------------
354
+
355
+ /**
356
+ * Execute a single `agentic` node via the injected {@link AgentDispatcher}.
357
+ * The dispatcher receives the current context and must return a success /
358
+ * failure envelope — any thrown exception is normalized into a failure.
359
+ */
360
+ async function executeAgenticNode(
361
+ node: PlaybookAgenticNode,
362
+ runId: string,
363
+ context: Record<string, unknown>,
364
+ iteration: number,
365
+ dispatcher: AgentDispatcher,
366
+ ): Promise<NodeOutcome> {
367
+ const agentId = node.agent ?? node.skill;
368
+ if (agentId === undefined) {
369
+ // Parser guarantees at-least-one, but narrow defensively.
370
+ return {
371
+ kind: 'failure',
372
+ error: `${E_PLAYBOOK_RUNTIME_INVALID}: node ${node.id} is agentic but has no skill or agent`,
373
+ };
374
+ }
375
+ const taskIdRaw = context['taskId'];
376
+ const taskId = typeof taskIdRaw === 'string' && taskIdRaw.length > 0 ? taskIdRaw : runId;
377
+
378
+ try {
379
+ const result = await dispatcher.dispatch({
380
+ runId,
381
+ nodeId: node.id,
382
+ agentId,
383
+ taskId,
384
+ context: { ...context },
385
+ iteration,
386
+ });
387
+ if (result.status === 'success') {
388
+ return { kind: 'success', output: result.output };
389
+ }
390
+ return { kind: 'failure', error: result.error ?? `agent ${agentId} returned failure` };
391
+ } catch (err) {
392
+ return {
393
+ kind: 'failure',
394
+ error: err instanceof Error ? err.message : String(err),
395
+ };
396
+ }
397
+ }
398
+
399
+ /**
400
+ * Execute a single `deterministic` node. If the caller supplied a dedicated
401
+ * {@link DeterministicRunner}, it is used; otherwise the runtime falls back
402
+ * to {@link AgentDispatcher.dispatch} with a synthetic `agentId` so a single
403
+ * stub can cover both node kinds during unit tests.
404
+ */
405
+ async function executeDeterministicNode(
406
+ node: PlaybookDeterministicNode,
407
+ runId: string,
408
+ context: Record<string, unknown>,
409
+ iteration: number,
410
+ dispatcher: AgentDispatcher,
411
+ runner: DeterministicRunner | undefined,
412
+ ): Promise<NodeOutcome> {
413
+ try {
414
+ if (runner !== undefined) {
415
+ const input: DeterministicRunInput = {
416
+ runId,
417
+ nodeId: node.id,
418
+ command: node.command,
419
+ args: node.args,
420
+ context: { ...context },
421
+ iteration,
422
+ };
423
+ if (node.cwd !== undefined) input.cwd = node.cwd;
424
+ if (node.env !== undefined) input.env = node.env;
425
+ if (node.timeout_ms !== undefined) input.timeout_ms = node.timeout_ms;
426
+ const result = await runner.run(input);
427
+ if (result.status === 'success') {
428
+ return { kind: 'success', output: result.output };
429
+ }
430
+ return {
431
+ kind: 'failure',
432
+ error: result.error ?? `command ${node.command} returned failure`,
433
+ };
434
+ }
435
+ // Fallback: dispatch as an agentic call with a synthetic agent id.
436
+ const taskIdRaw = context['taskId'];
437
+ const taskId = typeof taskIdRaw === 'string' && taskIdRaw.length > 0 ? taskIdRaw : runId;
438
+ const agentId = `deterministic:${node.command}`;
439
+ const result = await dispatcher.dispatch({
440
+ runId,
441
+ nodeId: node.id,
442
+ agentId,
443
+ taskId,
444
+ context: {
445
+ ...context,
446
+ __deterministic: {
447
+ command: node.command,
448
+ args: [...node.args],
449
+ cwd: node.cwd,
450
+ env: node.env,
451
+ timeout_ms: node.timeout_ms,
452
+ },
453
+ },
454
+ iteration,
455
+ });
456
+ if (result.status === 'success') {
457
+ return { kind: 'success', output: result.output };
458
+ }
459
+ return {
460
+ kind: 'failure',
461
+ error: result.error ?? `command ${node.command} returned failure`,
462
+ };
463
+ } catch (err) {
464
+ return {
465
+ kind: 'failure',
466
+ error: err instanceof Error ? err.message : String(err),
467
+ };
468
+ }
469
+ }
470
+
471
+ /**
472
+ * Execute a single `approval` node. Writes a pending {@link PlaybookApproval}
473
+ * row and returns an `awaiting_approval` outcome — the main loop translates
474
+ * this into a `paused` run state.
475
+ */
476
+ function executeApprovalNode(
477
+ node: PlaybookApprovalNode,
478
+ runId: string,
479
+ context: Record<string, unknown>,
480
+ db: DatabaseSync,
481
+ secret: string,
482
+ ): NodeOutcome {
483
+ const gate = createApprovalGate(db, {
484
+ runId,
485
+ nodeId: node.id,
486
+ bindings: context,
487
+ secret,
488
+ reason: node.prompt,
489
+ });
490
+ return { kind: 'awaiting_approval', token: gate.token, approvalId: gate.approvalId };
491
+ }
492
+
493
+ // ---------------------------------------------------------------------------
494
+ // Main execution loop
495
+ // ---------------------------------------------------------------------------
496
+
497
+ /**
498
+ * Determine the effective iteration cap for a node. Falls back to the
499
+ * runtime default (3) when `on_failure.max_iterations` is unset. The parser
500
+ * already validates the upper bound of 10.
501
+ */
502
+ function iterationCapFor(node: PlaybookNode, runtimeDefault: number): number {
503
+ const cap = node.on_failure?.max_iterations;
504
+ if (typeof cap === 'number' && Number.isFinite(cap) && cap >= 0) return cap;
505
+ return runtimeDefault;
506
+ }
507
+
508
+ /**
509
+ * Core step-by-step executor shared by {@link executePlaybook} and
510
+ * {@link resumePlaybook}. Starts at `startNodeId` and walks the graph until
511
+ * a terminal outcome is reached.
512
+ *
513
+ * Persists:
514
+ * - `playbook_runs.current_node` at every step so crash-resume is possible.
515
+ * - `playbook_runs.bindings` after every successful merge.
516
+ * - `playbook_runs.iteration_counts` after every attempt (success or failure).
517
+ * - `playbook_runs.status`/`error_context`/`completed_at` at termination.
518
+ *
519
+ * @internal
520
+ */
521
+ async function runFromNode(args: {
522
+ db: DatabaseSync;
523
+ playbook: PlaybookDefinition;
524
+ run: PlaybookRun;
525
+ startNodeId: string;
526
+ nodeIndex: NodeIndex;
527
+ edgeIndex: EdgeIndex;
528
+ context: Record<string, unknown>;
529
+ iterationCounts: Record<string, number>;
530
+ dispatcher: AgentDispatcher;
531
+ deterministicRunner: DeterministicRunner | undefined;
532
+ approvalSecret: string;
533
+ maxIterationsDefault: number;
534
+ now: () => Date;
535
+ }): Promise<ExecutePlaybookResult> {
536
+ const {
537
+ db,
538
+ run,
539
+ startNodeId,
540
+ nodeIndex,
541
+ edgeIndex,
542
+ context,
543
+ iterationCounts,
544
+ dispatcher,
545
+ deterministicRunner,
546
+ approvalSecret,
547
+ maxIterationsDefault,
548
+ now,
549
+ } = args;
550
+
551
+ let currentId: string | null = startNodeId;
552
+ let lastError: string | undefined;
553
+ let failedNodeId: string | undefined;
554
+ let exceededNodeId: string | undefined;
555
+
556
+ while (currentId !== null) {
557
+ const node = resolveNode(currentId, nodeIndex);
558
+ const cap = iterationCapFor(node, maxIterationsDefault);
559
+
560
+ // Advance iteration counter up front so a thrown dispatcher still bumps it.
561
+ const attempt = (iterationCounts[node.id] ?? 0) + 1;
562
+ iterationCounts[node.id] = attempt;
563
+
564
+ // Persist per-step bookkeeping before dispatch so crashes are recoverable.
565
+ updatePlaybookRun(db, run.runId, {
566
+ currentNode: node.id,
567
+ iterationCounts: { ...iterationCounts },
568
+ });
569
+
570
+ let outcome: NodeOutcome;
571
+ if (node.type === 'agentic') {
572
+ outcome = await executeAgenticNode(node, run.runId, context, attempt, dispatcher);
573
+ } else if (node.type === 'deterministic') {
574
+ outcome = await executeDeterministicNode(
575
+ node,
576
+ run.runId,
577
+ context,
578
+ attempt,
579
+ dispatcher,
580
+ deterministicRunner,
581
+ );
582
+ } else if (node.type === 'approval') {
583
+ outcome = executeApprovalNode(node, run.runId, context, db, approvalSecret);
584
+ } else {
585
+ // Exhaustiveness guard — never type to force a compile-time error on
586
+ // future PlaybookNodeType additions.
587
+ const exhaustive: never = node;
588
+ throw new Error(
589
+ `${E_PLAYBOOK_RUNTIME_INVALID}: unknown node kind ${JSON.stringify(exhaustive)}`,
590
+ );
591
+ }
592
+
593
+ if (outcome.kind === 'success') {
594
+ // Merge outputs into context and persist.
595
+ Object.assign(context, outcome.output);
596
+ updatePlaybookRun(db, run.runId, { bindings: { ...context } });
597
+ currentId = resolveNextNodeId(node.id, edgeIndex);
598
+ continue;
599
+ }
600
+
601
+ if (outcome.kind === 'awaiting_approval') {
602
+ // Persist pause + token and return. Caller resumes with the token.
603
+ const pausedAt = now().toISOString();
604
+ updatePlaybookRun(db, run.runId, {
605
+ status: 'paused',
606
+ errorContext: null,
607
+ bindings: { ...context },
608
+ iterationCounts: { ...iterationCounts },
609
+ });
610
+ return {
611
+ runId: run.runId,
612
+ terminalStatus: 'pending_approval',
613
+ finalContext: { ...context, __pausedAt: pausedAt },
614
+ approvalToken: outcome.token,
615
+ };
616
+ }
617
+
618
+ // outcome.kind === 'failure' — record the error and evaluate retry/escalate.
619
+ lastError = outcome.error;
620
+ const injectTarget = node.on_failure?.inject_into;
621
+
622
+ // Cap semantics: `cap === 0` disables retries entirely (first failure = fatal).
623
+ // For cap > 0 we allow up to `cap` total attempts per node.
624
+ if (attempt >= cap) {
625
+ if (injectTarget !== undefined && injectTarget !== node.id) {
626
+ // Escalate: hand control back to the inject target with the error in context.
627
+ if (!nodeIndex.has(injectTarget)) {
628
+ failedNodeId = node.id;
629
+ break;
630
+ }
631
+ context['__lastError'] = outcome.error;
632
+ context['__lastFailedNode'] = node.id;
633
+ updatePlaybookRun(db, run.runId, {
634
+ errorContext: outcome.error,
635
+ bindings: { ...context },
636
+ });
637
+ currentId = injectTarget;
638
+ // Reset the iteration counter on the injected target so it can retry
639
+ // with the enriched context without immediately tripping its own cap.
640
+ iterationCounts[injectTarget] = 0;
641
+ continue;
642
+ }
643
+ exceededNodeId = node.id;
644
+ break;
645
+ }
646
+
647
+ // Retry semantics: if `inject_into` points elsewhere, hand off control;
648
+ // otherwise re-execute the same node on the next loop iteration.
649
+ if (injectTarget !== undefined && injectTarget !== node.id) {
650
+ if (!nodeIndex.has(injectTarget)) {
651
+ failedNodeId = node.id;
652
+ break;
653
+ }
654
+ context['__lastError'] = outcome.error;
655
+ context['__lastFailedNode'] = node.id;
656
+ updatePlaybookRun(db, run.runId, {
657
+ errorContext: outcome.error,
658
+ bindings: { ...context },
659
+ });
660
+ currentId = injectTarget;
661
+ iterationCounts[injectTarget] = 0;
662
+ continue;
663
+ }
664
+ // Retry the same node (currentId stays the same).
665
+ updatePlaybookRun(db, run.runId, { errorContext: outcome.error });
666
+ }
667
+
668
+ // Terminal transition — completed vs failed vs exceeded.
669
+ const completedAt = now().toISOString();
670
+ if (exceededNodeId !== undefined) {
671
+ updatePlaybookRun(db, run.runId, {
672
+ status: 'failed',
673
+ errorContext: lastError ?? null,
674
+ completedAt,
675
+ bindings: { ...context },
676
+ iterationCounts: { ...iterationCounts },
677
+ });
678
+ const result: ExecutePlaybookResult = {
679
+ runId: run.runId,
680
+ terminalStatus: 'exceeded_iteration_cap',
681
+ finalContext: { ...context },
682
+ exceededNodeId,
683
+ };
684
+ if (lastError !== undefined) result.errorContext = lastError;
685
+ return result;
686
+ }
687
+
688
+ if (failedNodeId !== undefined) {
689
+ updatePlaybookRun(db, run.runId, {
690
+ status: 'failed',
691
+ errorContext: lastError ?? null,
692
+ completedAt,
693
+ bindings: { ...context },
694
+ iterationCounts: { ...iterationCounts },
695
+ });
696
+ const result: ExecutePlaybookResult = {
697
+ runId: run.runId,
698
+ terminalStatus: 'failed',
699
+ finalContext: { ...context },
700
+ failedNodeId,
701
+ };
702
+ if (lastError !== undefined) result.errorContext = lastError;
703
+ return result;
704
+ }
705
+
706
+ // Reached terminal "end" state (no outgoing edges) — run completed.
707
+ updatePlaybookRun(db, run.runId, {
708
+ status: 'completed',
709
+ currentNode: null,
710
+ completedAt,
711
+ bindings: { ...context },
712
+ iterationCounts: { ...iterationCounts },
713
+ errorContext: null,
714
+ });
715
+ return {
716
+ runId: run.runId,
717
+ terminalStatus: 'completed',
718
+ finalContext: { ...context },
719
+ };
720
+ }
721
+
722
+ // ---------------------------------------------------------------------------
723
+ // Public entry points
724
+ // ---------------------------------------------------------------------------
725
+
726
+ /**
727
+ * Execute a playbook from its entry node until a terminal state is reached
728
+ * (`completed`, `failed`, `exceeded_iteration_cap`, or `pending_approval`).
729
+ *
730
+ * Every execution is persisted to `playbook_runs` so that crashes or HITL
731
+ * pauses can resume via {@link resumePlaybook}. Returned
732
+ * {@link ExecutePlaybookResult.finalContext} is a fully-merged snapshot at
733
+ * the moment the runtime stopped.
734
+ *
735
+ * @param options - Runtime configuration, including the injected dispatcher.
736
+ * @returns Terminal envelope describing where the run stopped.
737
+ */
738
+ export async function executePlaybook(
739
+ options: ExecutePlaybookOptions,
740
+ ): Promise<ExecutePlaybookResult> {
741
+ const now = options.now ?? (() => new Date());
742
+ const maxIterationsDefault = options.maxIterationsDefault ?? 3;
743
+ if (!Number.isInteger(maxIterationsDefault) || maxIterationsDefault < 0) {
744
+ throw new Error(
745
+ `${E_PLAYBOOK_RUNTIME_INVALID}: maxIterationsDefault must be a non-negative integer (got ${maxIterationsDefault})`,
746
+ );
747
+ }
748
+ const approvalSecret = options.approvalSecret ?? getPlaybookSecret();
749
+
750
+ const nodeIndex: NodeIndex = new Map(options.playbook.nodes.map((n) => [n.id, n]));
751
+ const edgeIndex = buildEdgeIndex(options.playbook);
752
+ const entry = resolveEntryNode(options.playbook, edgeIndex);
753
+
754
+ const createInput: Parameters<typeof createPlaybookRun>[1] = {
755
+ playbookName: options.playbook.name,
756
+ playbookHash: options.playbookHash,
757
+ initialBindings: { ...options.initialContext },
758
+ };
759
+ if (options.epicId !== undefined) createInput.epicId = options.epicId;
760
+ if (options.sessionId !== undefined) createInput.sessionId = options.sessionId;
761
+
762
+ const run = createPlaybookRun(options.db, createInput);
763
+ const context: Record<string, unknown> = { ...options.initialContext };
764
+ const iterationCounts: Record<string, number> = {};
765
+
766
+ const runArgs: Parameters<typeof runFromNode>[0] = {
767
+ db: options.db,
768
+ playbook: options.playbook,
769
+ run,
770
+ startNodeId: entry.id,
771
+ nodeIndex,
772
+ edgeIndex,
773
+ context,
774
+ iterationCounts,
775
+ dispatcher: options.dispatcher,
776
+ deterministicRunner: options.deterministicRunner,
777
+ approvalSecret,
778
+ maxIterationsDefault,
779
+ now,
780
+ };
781
+ return runFromNode(runArgs);
782
+ }
783
+
784
+ /**
785
+ * Resume a paused playbook run using a HITL approval token. The runtime
786
+ * validates that the token maps to an `approved` {@link PlaybookApproval}
787
+ * row and that the associated run is in `paused` state, then continues from
788
+ * the approval node's single successor.
789
+ *
790
+ * @throws Error stamped with {@link E_PLAYBOOK_RESUME_BLOCKED} if the token
791
+ * is unknown, the gate is still `pending`, the gate was `rejected`, the
792
+ * run is not `paused`, or the approval node has no successor.
793
+ */
794
+ export async function resumePlaybook(
795
+ options: ResumePlaybookOptions,
796
+ ): Promise<ExecutePlaybookResult> {
797
+ const approval = getPlaybookApprovalByToken(options.db, options.approvalToken);
798
+ if (approval === null) {
799
+ throw new Error(
800
+ `${E_PLAYBOOK_RESUME_BLOCKED}: no approval gate for token ${options.approvalToken}`,
801
+ );
802
+ }
803
+ if (approval.status === 'pending') {
804
+ throw new Error(
805
+ `${E_PLAYBOOK_RESUME_BLOCKED}: gate ${approval.approvalId} is still pending — approve before resuming`,
806
+ );
807
+ }
808
+ if (approval.status === 'rejected') {
809
+ const run = getPlaybookRun(options.db, approval.runId);
810
+ // Mark run failed on resume-after-reject so dashboards stay consistent.
811
+ if (run !== null && run.status !== 'failed') {
812
+ updatePlaybookRun(options.db, approval.runId, {
813
+ status: 'failed',
814
+ errorContext: approval.reason ?? 'gate rejected',
815
+ completedAt: (options.now ?? (() => new Date()))().toISOString(),
816
+ });
817
+ }
818
+ throw new Error(
819
+ `${E_PLAYBOOK_RESUME_BLOCKED}: gate ${approval.approvalId} was rejected` +
820
+ (approval.reason ? ` (${approval.reason})` : ''),
821
+ );
822
+ }
823
+
824
+ // approval.status === 'approved' past this point.
825
+ const run = getPlaybookRun(options.db, approval.runId);
826
+ if (run === null) {
827
+ throw new Error(
828
+ `${E_PLAYBOOK_RESUME_BLOCKED}: run ${approval.runId} no longer exists (deleted?)`,
829
+ );
830
+ }
831
+ const validResumeStatuses: readonly PlaybookRunStatus[] = ['paused', 'running'];
832
+ if (!validResumeStatuses.includes(run.status)) {
833
+ throw new Error(
834
+ `${E_PLAYBOOK_RESUME_BLOCKED}: run ${run.runId} is ${run.status}, expected paused|running`,
835
+ );
836
+ }
837
+
838
+ // Validate the approval node is present and resolve its single successor.
839
+ const nodeIndex: NodeIndex = new Map(options.playbook.nodes.map((n) => [n.id, n]));
840
+ const edgeIndex = buildEdgeIndex(options.playbook);
841
+ const approvalNode = nodeIndex.get(approval.nodeId);
842
+ if (approvalNode === undefined || approvalNode.type !== 'approval') {
843
+ throw new Error(
844
+ `${E_PLAYBOOK_RESUME_BLOCKED}: approval node ${approval.nodeId} not found in playbook ${options.playbook.name}`,
845
+ );
846
+ }
847
+ const successor = resolveNextNodeId(approvalNode.id, edgeIndex);
848
+ if (successor === null) {
849
+ // Approval at the tail of the graph completes the run immediately.
850
+ const completedAt = (options.now ?? (() => new Date()))().toISOString();
851
+ updatePlaybookRun(options.db, run.runId, {
852
+ status: 'completed',
853
+ currentNode: null,
854
+ completedAt,
855
+ errorContext: null,
856
+ });
857
+ return {
858
+ runId: run.runId,
859
+ terminalStatus: 'completed',
860
+ finalContext: { ...run.bindings },
861
+ };
862
+ }
863
+
864
+ // Return the run to `running` before proceeding so dashboards reflect activity.
865
+ updatePlaybookRun(options.db, run.runId, {
866
+ status: 'running',
867
+ currentNode: successor,
868
+ errorContext: null,
869
+ });
870
+
871
+ const now = options.now ?? (() => new Date());
872
+ const maxIterationsDefault = options.maxIterationsDefault ?? 3;
873
+ const approvalSecret = options.approvalSecret ?? getPlaybookSecret();
874
+ const context: Record<string, unknown> = { ...run.bindings };
875
+ const iterationCounts: Record<string, number> = { ...run.iterationCounts };
876
+
877
+ // Log the approval decision into the context so downstream nodes can act on it.
878
+ context['__lastApproval'] = {
879
+ nodeId: approval.nodeId,
880
+ approvalId: approval.approvalId,
881
+ approver: approval.approver,
882
+ reason: approval.reason,
883
+ approvedAt: approval.approvedAt,
884
+ };
885
+
886
+ // Persist an approval trace row for audit purposes. createPlaybookApproval
887
+ // is distinct from createApprovalGate — the latter generates the HMAC
888
+ // resume token, while this helper records arbitrary approval state.
889
+ createPlaybookApproval(options.db, {
890
+ runId: run.runId,
891
+ nodeId: approval.nodeId,
892
+ token: `resume:${approval.token}:${now().getTime()}`,
893
+ autoPassed: true,
894
+ });
895
+
896
+ const runArgs: Parameters<typeof runFromNode>[0] = {
897
+ db: options.db,
898
+ playbook: options.playbook,
899
+ run,
900
+ startNodeId: successor,
901
+ nodeIndex,
902
+ edgeIndex,
903
+ context,
904
+ iterationCounts,
905
+ dispatcher: options.dispatcher,
906
+ deterministicRunner: options.deterministicRunner,
907
+ approvalSecret,
908
+ maxIterationsDefault,
909
+ now,
910
+ };
911
+ return runFromNode(runArgs);
912
+ }