@juicesharp/rpiv-workflow 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +449 -0
  3. package/api.ts +557 -0
  4. package/audit.ts +217 -0
  5. package/built-ins.ts +65 -0
  6. package/command.ts +137 -0
  7. package/docs/cover.png +0 -0
  8. package/docs/cover.svg +120 -0
  9. package/docs/workflow-authoring.md +629 -0
  10. package/docs/workflow-basics.md +122 -0
  11. package/docs-protocol.ts +106 -0
  12. package/fanout.ts +96 -0
  13. package/host.ts +97 -0
  14. package/index.ts +230 -0
  15. package/internal-utils.ts +69 -0
  16. package/internal.ts +27 -0
  17. package/layers.ts +33 -0
  18. package/lifecycle.ts +274 -0
  19. package/load/cache.test.ts +82 -0
  20. package/load/cache.ts +40 -0
  21. package/load/index.ts +159 -0
  22. package/load/merge.ts +136 -0
  23. package/load/normalize.ts +73 -0
  24. package/load/paths.ts +32 -0
  25. package/load/resolve-default.ts +43 -0
  26. package/load/shape-guards.test.ts +74 -0
  27. package/load/shape-guards.ts +42 -0
  28. package/messages.ts +185 -0
  29. package/outcomes/collectors/directory-path.test.ts +64 -0
  30. package/outcomes/collectors/directory-path.ts +40 -0
  31. package/outcomes/collectors/index.ts +21 -0
  32. package/outcomes/collectors/tool-call.test.ts +110 -0
  33. package/outcomes/collectors/tool-call.ts +63 -0
  34. package/outcomes/collectors/transcript-path.test.ts +70 -0
  35. package/outcomes/collectors/transcript-path.ts +53 -0
  36. package/outcomes/collectors/union.test.ts +59 -0
  37. package/outcomes/collectors/union.ts +55 -0
  38. package/outcomes/collectors/url.test.ts +67 -0
  39. package/outcomes/collectors/url.ts +45 -0
  40. package/outcomes/collectors/workspace-diff.test.ts +107 -0
  41. package/outcomes/collectors/workspace-diff.ts +123 -0
  42. package/outcomes/git-commit.test.ts +194 -0
  43. package/outcomes/git-commit.ts +192 -0
  44. package/outcomes/index.ts +22 -0
  45. package/outcomes/parsers/index.ts +11 -0
  46. package/outcomes/parsers/json-body.test.ts +80 -0
  47. package/outcomes/parsers/json-body.ts +50 -0
  48. package/outcomes/side-effect.ts +26 -0
  49. package/output-spec.ts +170 -0
  50. package/output.ts +98 -0
  51. package/package.json +83 -0
  52. package/preview.ts +120 -0
  53. package/routing.ts +79 -0
  54. package/runner/chain-advance.ts +185 -0
  55. package/runner/index.ts +7 -0
  56. package/runner/runner.ts +356 -0
  57. package/runner/script-stage.ts +240 -0
  58. package/runner/stage-lifecycle.ts +447 -0
  59. package/sessions/extraction.ts +297 -0
  60. package/sessions/index.ts +7 -0
  61. package/sessions/sessions.ts +269 -0
  62. package/sessions/spawn.ts +135 -0
  63. package/state/index.ts +27 -0
  64. package/state/paths.ts +46 -0
  65. package/state/reads.ts +190 -0
  66. package/state/state.ts +115 -0
  67. package/state/writes.ts +58 -0
  68. package/transcript.ts +156 -0
  69. package/triggers.ts +27 -0
  70. package/typebox-adapter.ts +48 -0
  71. package/types.ts +237 -0
  72. package/validate-output.ts +120 -0
  73. package/validate-workflow.ts +491 -0
package/routing.ts ADDED
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Next-stage lookup over a `Workflow`'s edge graph.
3
+ *
4
+ * `nextStage` is the single chokepoint: given the current stage name + the
5
+ * runtime context, it returns a `RoutingResult` — `{ kind: "next", stage }`
6
+ * if the chain continues, `{ kind: "stop" }` for terminal stages (no
7
+ * outgoing edge OR explicit `STOP`), `{ kind: "err", reason }` if the
8
+ * routing layer detected a violation (an `EdgeFn` body threw, or an
9
+ * `EdgeFn` returned an undeclared target).
10
+ *
11
+ * Errors are returned, not thrown. The caller (runner) switches on
12
+ * `kind` and routes `"err"` through `recordTerminalFailure` — same as
13
+ * any other halt site.
14
+ */
15
+
16
+ import { type EdgeContext, type EdgeFn, STOP, type Workflow } from "./api.js";
17
+
18
+ /**
19
+ * Three-way return from `nextStage`. Matches the convention established by
20
+ * `sessions.ts:ExtractionOutcome` and `load.ts:NormalizeResult` — every
21
+ * multi-state result in the package carries an explicit `kind` discriminator.
22
+ */
23
+ export type RoutingResult = { kind: "next"; stage: string } | { kind: "stop" } | { kind: "err"; reason: string };
24
+
25
+ /**
26
+ * Returns `{ kind: "next", stage }` to advance, `{ kind: "stop" }` for
27
+ * terminal stages (no outgoing edge OR explicit `STOP`), or
28
+ * `{ kind: "err", reason }` when an `EdgeFn` threw or returned an
29
+ * undeclared target. Load-time `validateWorkflow` should catch the
30
+ * undeclared-target case for predicates with `.targets` metadata; the
31
+ * runtime check is the last line of defense.
32
+ */
33
+ export function nextStage(workflow: Workflow, current: string, ctx: EdgeContext): RoutingResult {
34
+ const target = workflow.edges[current];
35
+ if (target === undefined || target === STOP) return { kind: "stop" };
36
+ if (typeof target === "string") return resolveTarget(workflow, current, target);
37
+
38
+ const picked = invokeEdgeFn(target, ctx, current);
39
+ if (picked.kind === "err") return picked;
40
+ if (picked.value === STOP) return { kind: "stop" };
41
+ return resolveTarget(workflow, current, picked.value);
42
+ }
43
+
44
+ /**
45
+ * True iff the current stage's edge is an `EdgeFn` — i.e., a routing decision
46
+ * was made. The runner uses this to decide whether to write a routing-audit
47
+ * row. String edges are deterministic and not worth auditing.
48
+ */
49
+ export function edgeIsDecision(workflow: Workflow, current: string): boolean {
50
+ return typeof workflow.edges[current] === "function";
51
+ }
52
+
53
+ // ---------------------------------------------------------------------------
54
+ // Internals
55
+ // ---------------------------------------------------------------------------
56
+
57
+ function invokeEdgeFn(
58
+ fn: EdgeFn,
59
+ ctx: EdgeContext,
60
+ current: string,
61
+ ): { kind: "ok"; value: string } | { kind: "err"; reason: string } {
62
+ try {
63
+ return { kind: "ok", value: fn(ctx) };
64
+ } catch (e) {
65
+ const msg = e instanceof Error ? e.message : String(e);
66
+ return {
67
+ kind: "err",
68
+ reason: `workflow edge function at "${current}" threw: ${msg}`,
69
+ };
70
+ }
71
+ }
72
+
73
+ function resolveTarget(workflow: Workflow, current: string, target: string): RoutingResult {
74
+ if (workflow.stages[target]) return { kind: "next", stage: target };
75
+ return {
76
+ kind: "err",
77
+ reason: `workflow edge from "${current}" returned "${target}" which is not a declared stage in workflow "${workflow.name}"`,
78
+ };
79
+ }
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Routing layer after a stage completes successfully: pick the next stage,
3
+ * audit predicate-mediated decisions, enforce the backward-jump guard,
4
+ * then recurse via `runStageOrRecordFailure`.
5
+ *
6
+ * `nextStage` returns a tagged union; `advanceChain` switches on `kind`
7
+ * instead of catching. `runStageOrRecordFailure` owns the catch for
8
+ * downstream-stage throws.
9
+ */
10
+
11
+ import { nowIso, recordTerminalFailure } from "../audit.js";
12
+ import { skillStageRef } from "../lifecycle.js";
13
+ import {
14
+ ERR_BACKWARD_JUMP_EXHAUSTED,
15
+ MSG_BACKWARD_JUMP_EXHAUSTED,
16
+ MSG_CHAIN_ADVANCE_FAILED,
17
+ MSG_ROUTING_AUDIT_DROPPED,
18
+ } from "../messages.js";
19
+ import { edgeIsDecision, nextStage } from "../routing.js";
20
+ import { appendRoutingDecision } from "../state/index.js";
21
+ import type { RunContext, RunnerCtx } from "../types.js";
22
+ import { finalizeWorkflow, lifecycleCtxFor, runStageOrRecordFailure } from "./runner.js";
23
+
24
+ /**
25
+ * Decomposed into three helpers — `auditRoutingDecision`,
26
+ * `checkBackwardJumpGuard`, `haltOnRoutingError` — each owning one
27
+ * structural concern.
28
+ */
29
+ export async function advanceChain(
30
+ curCtx: RunnerCtx,
31
+ currentName: string,
32
+ idx: number,
33
+ run: RunContext,
34
+ ): Promise<void> {
35
+ // Mark the just-completed stage as visited BEFORE consulting the next edge.
36
+ // A thrown EdgeFn would otherwise leave currentName un-marked, opening a
37
+ // (narrow) window where a recovery path could under-count revisits.
38
+ run.visited.add(currentName);
39
+
40
+ const wasDecision = edgeIsDecision(run.workflow, currentName);
41
+ const result = nextStage(run.workflow, currentName, { output: run.state.output, state: run.state });
42
+
43
+ if (result.kind === "err") {
44
+ await haltOnRoutingError(curCtx, run, currentName, result.reason);
45
+ return;
46
+ }
47
+
48
+ const fromRef = skillStageRef(currentName, idx + 1, run.workflow.stages[currentName]?.skill ?? currentName);
49
+
50
+ if (result.kind === "stop") {
51
+ await run.lifecycle.fire(curCtx, "onRoute", fromRef, "stop", lifecycleCtxFor(run));
52
+ finalizeWorkflow(curCtx, run);
53
+ return;
54
+ }
55
+
56
+ const nextName = result.stage;
57
+ if (wasDecision) {
58
+ auditRoutingDecision(curCtx, run, idx, currentName, nextName);
59
+ if (!(await checkBackwardJumpGuard(curCtx, run, nextName))) return;
60
+ }
61
+
62
+ // Fire onRoute after the routing decision has been audited (when applicable),
63
+ // before the next stage runs. Deterministic auto-edges still fire so
64
+ // listeners see every transition.
65
+ await run.lifecycle.fire(curCtx, "onRoute", fromRef, nextName, lifecycleCtxFor(run));
66
+
67
+ // runStageOrRecordFailure owns the catch for throws out of the *next* stage,
68
+ // so the JSONL row records `nextName` (the stage that actually threw)
69
+ // rather than `currentName` (which would mis-attribute the failure to
70
+ // the prior stage that already completed successfully).
71
+ await runStageOrRecordFailure(curCtx, nextName, idx + 1, run);
72
+ }
73
+
74
+ /**
75
+ * Persist a routing-decision audit row for a predicate-mediated transition.
76
+ * Deterministic auto-edges aren't audited (no decision was made).
77
+ *
78
+ * A dropped audit row degrades the trail but does NOT invalidate the run;
79
+ * on write failure we surface the gap (live notify + result-envelope
80
+ * field) and continue. Halting here would discard a correct in-memory
81
+ * decision to recover from transient disk weather — the asymmetry with
82
+ * `recordStage` is deliberate (stage rows are reconstruction inputs;
83
+ * routing rows are pure telemetry).
84
+ */
85
+ function auditRoutingDecision(
86
+ curCtx: RunnerCtx,
87
+ run: RunContext,
88
+ idx: number,
89
+ currentName: string,
90
+ nextName: string,
91
+ ): void {
92
+ const fromStageIndex = idx + 1;
93
+ const wrote = appendRoutingDecision(run.cwd, run.runId, {
94
+ type: "routing",
95
+ fromStageIndex,
96
+ fromStage: currentName,
97
+ decision: nextName,
98
+ ts: nowIso(),
99
+ });
100
+ if (!wrote) {
101
+ run.state.telemetry.droppedRoutingRows.push({ fromStageIndex, fromStage: currentName, decision: nextName });
102
+ curCtx.ui.notify(MSG_ROUTING_AUDIT_DROPPED(currentName, nextName), "warning");
103
+ }
104
+ }
105
+
106
+ /**
107
+ * Per-loop cap on decision-edge retries. Returns `true` when the run may
108
+ * continue, `false` when the cap tripped (and the terminal failure has
109
+ * been recorded).
110
+ *
111
+ * A "backward jump" is a *decision-edge* resolving to an already-visited
112
+ * stage — i.e. a deliberate retry choice. Deterministic forward edges that
113
+ * pass through a cycle (the body of a multi-stage loop) are NOT counted,
114
+ * because they're consequences of the retry decision rather than
115
+ * independent retry events. Without this distinction the cap would trip
116
+ * mid-loop on any cycle longer than 2 stages, burning the entire budget
117
+ * on a single retry iteration's deterministic hops.
118
+ *
119
+ * Reset-on-escape: a decision resolving to a NOT-visited stage escapes the
120
+ * current cycle (we've moved to fresh territory), so the counter resets.
121
+ * Each independent loop gets its own retry budget instead of a single
122
+ * global pool that drains across unrelated loops.
123
+ *
124
+ * Trip attribution targets `nextName` (the stage the guard refused to
125
+ * re-enter), not the just-completed stage. Same lesson as Q12+IB.
126
+ */
127
+ async function checkBackwardJumpGuard(curCtx: RunnerCtx, run: RunContext, nextName: string): Promise<boolean> {
128
+ const { state } = run;
129
+ if (!run.visited.has(nextName)) {
130
+ state.telemetry.backwardJumps = 0;
131
+ return true;
132
+ }
133
+ state.telemetry.backwardJumps++;
134
+ if (state.telemetry.backwardJumps <= run.maxBackwardJumps) return true;
135
+ await recordTerminalFailure(
136
+ curCtx,
137
+ {
138
+ cwd: run.cwd,
139
+ runId: run.runId,
140
+ state,
141
+ stageName: nextName,
142
+ skill: nextName,
143
+ lifecycle: run.lifecycle,
144
+ runIdentity: { workflow: run.workflow.name, totalStages: run.totalStages, trigger: run.trigger },
145
+ },
146
+ {
147
+ status: "failed",
148
+ notifyMsg: MSG_BACKWARD_JUMP_EXHAUSTED(state.telemetry.backwardJumps, run.maxBackwardJumps),
149
+ notifyLevel: "error",
150
+ errMsg: ERR_BACKWARD_JUMP_EXHAUSTED(state.telemetry.backwardJumps, run.maxBackwardJumps),
151
+ },
152
+ );
153
+ return false;
154
+ }
155
+
156
+ /**
157
+ * Halt the chain on a routing-layer error result (e.g. the EdgeFn returned
158
+ * an undeclared target, or threw and was wrapped). Attribution targets
159
+ * `currentName` (the edge belongs to the just-completed stage).
160
+ */
161
+ async function haltOnRoutingError(
162
+ curCtx: RunnerCtx,
163
+ run: RunContext,
164
+ currentName: string,
165
+ reason: string,
166
+ ): Promise<void> {
167
+ await recordTerminalFailure(
168
+ curCtx,
169
+ {
170
+ cwd: run.cwd,
171
+ runId: run.runId,
172
+ state: run.state,
173
+ stageName: currentName,
174
+ skill: currentName,
175
+ lifecycle: run.lifecycle,
176
+ runIdentity: { workflow: run.workflow.name, totalStages: run.totalStages, trigger: run.trigger },
177
+ },
178
+ {
179
+ status: "failed",
180
+ notifyMsg: MSG_CHAIN_ADVANCE_FAILED(currentName, reason),
181
+ notifyLevel: "error",
182
+ errMsg: reason,
183
+ },
184
+ );
185
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Workflow runner public surface. The runner is internally split into
3
+ * three files (see `runner.ts`'s header for the module map); this barrel
4
+ * re-exports only the symbols the package itself needs to publish.
5
+ */
6
+
7
+ export { MAX_BACKWARD_JUMPS, type RunWorkflowOptions, type RunWorkflowResult, runWorkflow } from "./runner.js";
@@ -0,0 +1,356 @@
1
+ /**
2
+ * Workflow orchestration entry point. `runWorkflow` walks a `Workflow`'s
3
+ * edge graph stage-by-stage; per-stage work (sessions, extraction,
4
+ * validation, audit row writes) lives in sessions.ts + audit.ts. This
5
+ * directory owns graph traversal, per-stage prerequisites, and routing.
6
+ *
7
+ * Modules:
8
+ * - runner.ts — runWorkflow + countReachableStages +
9
+ * runStageOrRecordFailure + finalizeWorkflow.
10
+ * - stage-lifecycle.ts — runStage + StagePreflightError + preflight
11
+ * pipeline + outcome.collector.snapshot hook.
12
+ * - chain-advance.ts — advanceChain + routing audit + backward-jump
13
+ * guard + halt-on-error.
14
+ *
15
+ * Ctx lifecycle: every level only touches the ctx it was handed.
16
+ * - `newSession({cancelled: false})` invalidates the outer ctx; all
17
+ * further work runs on `freshCtx` inside `withSession`, and the
18
+ * outer function simply unwinds.
19
+ * - `cancelled: true` means no replacement happened — outer ctx remains
20
+ * valid.
21
+ * - Continue policy has no newSession — same ctx throughout.
22
+ *
23
+ * Vocabulary: "stage" = one stage activation in this run; "phase" = one
24
+ * `## Phase N:` subdivision inside an implement plan artifact.
25
+ */
26
+
27
+ import type { Workflow } from "../api.js";
28
+ import { notifyPartialArtifacts, nowIso, recordTerminalFailure } from "../audit.js";
29
+ import { handleToString } from "../handle.js";
30
+ import type { WorkflowContext, WorkflowHost } from "../host.js";
31
+ import { currentPrimaryArtifact } from "../internal-utils.js";
32
+ import { buildLifecycleContext, LifecycleDispatcher, type LifecycleListeners } from "../lifecycle.js";
33
+ import { MSG_STAGE_THREW, MSG_WORKFLOW_COMPLETE, STATUS_KEY } from "../messages.js";
34
+ import { generateRunId, writeHeader } from "../state/index.js";
35
+ import { DEFAULT_TRIGGER, type RunTrigger } from "../triggers.js";
36
+ import type { RunContext, RunnerCtx, RunState } from "../types.js";
37
+ import { runStage, StagePreflightError } from "./stage-lifecycle.js";
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Policy constants
41
+ // ---------------------------------------------------------------------------
42
+
43
+ /**
44
+ * Per-loop cap on decision-edge retries. A "backward jump" is a *decision*
45
+ * resolving to an already-visited stage — i.e. the user's predicate chose to
46
+ * retry. Deterministic edges through a cycle (the loop body) are NOT
47
+ * counted; the budget is per retry iteration, not per hop. A decision
48
+ * escaping the loop (target not visited) resets the counter so each
49
+ * independent loop in the workflow gets its own fresh budget. With 2: the
50
+ * loop runs once unconditionally and may retry up to 2 more times.
51
+ */
52
+ export const MAX_BACKWARD_JUMPS = 2;
53
+
54
+ // ---------------------------------------------------------------------------
55
+ // Public surface
56
+ // ---------------------------------------------------------------------------
57
+
58
+ export interface RunWorkflowOptions {
59
+ /** Workflow to execute — caller resolves by name from `LoadedWorkflows`. */
60
+ workflow: Workflow;
61
+ /** Passed to the start stage as its argument. */
62
+ input: string;
63
+ /** Required for "continue"-policy stages (host.sendUserMessage). */
64
+ host?: WorkflowHost;
65
+ /** Defaults to MAX_BACKWARD_JUMPS. */
66
+ maxBackwardJumps?: number;
67
+ /**
68
+ * What triggered this run. `/wf` sets `{ kind: "command", name: "wf" }`;
69
+ * programmatic embedders default to `DEFAULT_TRIGGER`. Recorded in the
70
+ * JSONL header and surfaced on every lifecycle callback via
71
+ * `LifecycleContext.trigger`.
72
+ */
73
+ trigger?: RunTrigger;
74
+ /**
75
+ * Per-call lifecycle listener bundle. Fires AFTER every globally
76
+ * registered bundle (see `registerLifecycle`). Listener throws are
77
+ * caught + logged via `ctx.ui.notify(..., "warning")`; never halt the
78
+ * run.
79
+ */
80
+ lifecycle?: LifecycleListeners;
81
+ }
82
+
83
+ export interface RunWorkflowResult {
84
+ /**
85
+ * The run's identity on disk — the `<run-id>` portion of
86
+ * `<cwd>/.rpiv/workflows/<run-id>.jsonl`. Live consumers can hand
87
+ * this to `readLastStage` / `listArtifacts` / future inspect-past-run
88
+ * helpers without recomputing the slug.
89
+ *
90
+ * Undefined ONLY for pre-flight rejections (start stage not declared,
91
+ * continue-policy stages without pi) where no JSONL file was created.
92
+ */
93
+ runId?: string;
94
+ stagesCompleted: number;
95
+ success: boolean;
96
+ /**
97
+ * Primary artifact at run termination, serialised to its handle's
98
+ * canonical string form (fs → path, url → href, opaque → id). Undefined
99
+ * if no produces stage produced one. Callers that need the full
100
+ * structured handle read `output.artifacts[0]` off the run's last
101
+ * recorded stage (via `readLastStage`).
102
+ */
103
+ lastArtifact?: string;
104
+ error?: string;
105
+ /**
106
+ * Routing decisions made in memory but whose JSONL audit row failed to
107
+ * persist. Empty in the common case. Surfaced so consumers reading the
108
+ * run's JSONL can disambiguate a missing routing row ("deterministic
109
+ * edge — never written") from a dropped one ("decision was made, write
110
+ * failed"). The run still succeeds — routing rows are telemetry, not
111
+ * reconstruction inputs.
112
+ */
113
+ droppedRoutingRows?: Array<{ fromStageIndex: number; fromStage: string; decision: string }>;
114
+ }
115
+
116
+ // ---------------------------------------------------------------------------
117
+ // runWorkflow — workflow entry point
118
+ // ---------------------------------------------------------------------------
119
+
120
+ /**
121
+ * Each subsequent `newSession()` is invoked on the freshCtx returned by the
122
+ * previous withSession — never on a captured outer ctx (which Pi invalidates
123
+ * as soon as the session is replaced).
124
+ */
125
+ export async function runWorkflow(ctx: WorkflowContext, options: RunWorkflowOptions): Promise<RunWorkflowResult> {
126
+ const { workflow } = options;
127
+ if (!workflow.stages[workflow.start]) {
128
+ return {
129
+ stagesCompleted: 0,
130
+ success: false,
131
+ error: `Workflow "${workflow.name}" start stage "${workflow.start}" is not declared`,
132
+ };
133
+ }
134
+
135
+ // Continue-policy stages thread the prior session via the host's
136
+ // sendUserMessage; if no host was passed, enforceSessionInvariants would
137
+ // throw at the first such stage.
138
+ // Reject at workflow entry so embedders get a clean envelope instead of a throw.
139
+ if (options.host === undefined && Object.values(workflow.stages).some((s) => s.sessionPolicy === "continue")) {
140
+ return {
141
+ stagesCompleted: 0,
142
+ success: false,
143
+ error: "workflow contains continue-policy stages which require a workflow host",
144
+ };
145
+ }
146
+
147
+ const cwd = ctx.cwd;
148
+ const runId = generateRunId();
149
+ const totalStages = countReachableStages(workflow);
150
+ const trigger = options.trigger ?? DEFAULT_TRIGGER;
151
+
152
+ writeHeader(cwd, {
153
+ runId,
154
+ workflow: workflow.name,
155
+ input: options.input,
156
+ ts: nowIso(),
157
+ trigger,
158
+ });
159
+
160
+ const state: RunState = {
161
+ originalInput: options.input,
162
+ primaryArtifact: undefined,
163
+ output: undefined,
164
+ named: {},
165
+ stagesCompleted: 0,
166
+ lastAllocatedStageNumber: 0,
167
+ telemetry: {
168
+ backwardJumps: 0,
169
+ droppedRoutingRows: [],
170
+ },
171
+ termination: {
172
+ success: false,
173
+ error: undefined,
174
+ },
175
+ };
176
+
177
+ const maxBackwardJumps = options.maxBackwardJumps ?? MAX_BACKWARD_JUMPS;
178
+ const lifecycle = new LifecycleDispatcher(options.lifecycle);
179
+
180
+ // Snapshot the skill registry BEFORE any stage opens a fresh session.
181
+ // Pi invalidates the `WorkflowHost` handle on the first `ctx.newSession()`,
182
+ // so this is the only safe moment to enumerate. After this point, the
183
+ // runner reads `run.registeredSkills`; `options.host` survives only on
184
+ // `run.continueHost` for the continue-policy session handler.
185
+ const registeredSkills = options.host ? snapshotRegisteredSkills(options.host) : undefined;
186
+
187
+ const run: RunContext = {
188
+ cwd,
189
+ runId,
190
+ workflow,
191
+ totalStages,
192
+ state,
193
+ visited: new Set(),
194
+ registeredSkills,
195
+ continueHost: options.host,
196
+ maxBackwardJumps,
197
+ trigger,
198
+ lifecycle,
199
+ };
200
+
201
+ await lifecycle.fire(ctx, "onWorkflowStart", lifecycleCtxFor(run));
202
+
203
+ // runStageOrRecordFailure (not bare runStage) so a throw out of the start stage —
204
+ // notably enforceSessionInvariants violations — records a JSONL failure
205
+ // row keyed on the failing stage rather than leaving a header-only file
206
+ // that every shape-filtered reader skips. Same wrapper used by
207
+ // advanceChain for downstream stages.
208
+ await runStageOrRecordFailure(ctx, workflow.start, 0, run);
209
+
210
+ const result: RunWorkflowResult = {
211
+ runId,
212
+ stagesCompleted: state.stagesCompleted,
213
+ success: state.termination.success,
214
+ lastArtifact: (() => {
215
+ const a = currentPrimaryArtifact(state);
216
+ return a ? handleToString(a.handle) : undefined;
217
+ })(),
218
+ error: state.termination.error,
219
+ ...(state.telemetry.droppedRoutingRows.length > 0
220
+ ? { droppedRoutingRows: state.telemetry.droppedRoutingRows }
221
+ : {}),
222
+ };
223
+
224
+ await lifecycle.fire(ctx, "onWorkflowEnd", result, lifecycleCtxFor(run));
225
+ return result;
226
+ }
227
+
228
+ /** Build a `LifecycleContext` from the current `RunContext`. Captured per fire so listeners always see the latest `state` snapshot. */
229
+ export function lifecycleCtxFor(run: RunContext) {
230
+ return buildLifecycleContext({
231
+ cwd: run.cwd,
232
+ runId: run.runId,
233
+ workflow: run.workflow.name,
234
+ totalStages: run.totalStages,
235
+ trigger: run.trigger,
236
+ state: run.state,
237
+ });
238
+ }
239
+
240
+ /**
241
+ * Upper bound for the status-line denominator — BFS reach from `workflow.start`.
242
+ *
243
+ * Relies on every `EdgeFn` carrying `.targets`. `validate-workflow.ts` enforces
244
+ * this at load time, so by the time the runner sees a workflow the contract
245
+ * holds. A `.targets`-less EdgeFn here means validation was bypassed (test
246
+ * fixture or programmatic embedder) — surface loudly instead of silently
247
+ * counting all declared stages.
248
+ */
249
+ function countReachableStages(workflow: Workflow): number {
250
+ const seen = new Set<string>();
251
+ const frontier: string[] = [workflow.start];
252
+ while (frontier.length > 0) {
253
+ const cur = frontier.shift()!;
254
+ if (seen.has(cur)) continue;
255
+ seen.add(cur);
256
+ const edge = workflow.edges[cur];
257
+ if (edge === undefined || edge === "stop") continue;
258
+ if (typeof edge === "string") {
259
+ if (workflow.stages[edge] && !seen.has(edge)) frontier.push(edge);
260
+ } else if (Array.isArray(edge.targets)) {
261
+ for (const t of edge.targets) {
262
+ if (t !== "stop" && workflow.stages[t] && !seen.has(t)) frontier.push(t);
263
+ }
264
+ } else {
265
+ throw new Error(
266
+ `countReachableStages: edge from "${cur}" is an EdgeFn without .targets — validateWorkflow should have rejected this workflow`,
267
+ );
268
+ }
269
+ }
270
+ return seen.size;
271
+ }
272
+
273
+ /**
274
+ * Wraps `runStage` so a thrown stage records a JSONL failure row attributed
275
+ * to the stage that actually threw — not to the prior stage in the chain.
276
+ * Used by both `runWorkflow` (start stage) and `advanceChain` (next stage)
277
+ * so there's exactly one place that translates "stage threw" →
278
+ * `state.termination.error` + JSONL row. Without this, the start-stage call
279
+ * leaves a header-only file and `advanceChain`'s own catch mis-attributes
280
+ * the failure to the prior stage (`currentName` is still bound to the
281
+ * iteration that just succeeded).
282
+ *
283
+ * Two flavours of throw are caught here:
284
+ *
285
+ * - `StagePreflightError` — a known preflight failure carrying its own
286
+ * attribution + messages. Recorded with the carried payload exactly.
287
+ * - Any other `Error` — unexpected machinery failure; recorded with the
288
+ * generic `MSG_STAGE_THREW` shape attributed to the stage id.
289
+ */
290
+ export async function runStageOrRecordFailure(
291
+ curCtx: RunnerCtx,
292
+ name: string,
293
+ idx: number,
294
+ run: RunContext,
295
+ ): Promise<void> {
296
+ try {
297
+ await runStage(curCtx, name, idx, run);
298
+ } catch (e) {
299
+ if (e instanceof StagePreflightError) {
300
+ await recordTerminalFailure(
301
+ curCtx,
302
+ auditCtxFor(run, name, e.skill),
303
+ { status: "failed", notifyMsg: e.notifyMsg, notifyLevel: "error", errMsg: e.errMsg },
304
+ e.notifyPartial ? (ctx) => notifyPartialArtifacts(ctx, run.cwd, run.runId) : undefined,
305
+ );
306
+ return;
307
+ }
308
+ const reason = e instanceof Error ? e.message : String(e);
309
+ await recordTerminalFailure(curCtx, auditCtxFor(run, name, name), {
310
+ status: "failed",
311
+ notifyMsg: MSG_STAGE_THREW(name, reason),
312
+ notifyLevel: "error",
313
+ errMsg: reason,
314
+ });
315
+ }
316
+ }
317
+
318
+ /** Build an AuditCtx for a stage failure that escaped a session (preflight halts, downstream throws). */
319
+ function auditCtxFor(run: RunContext, stageName: string, skill: string) {
320
+ return {
321
+ cwd: run.cwd,
322
+ runId: run.runId,
323
+ state: run.state,
324
+ stageName,
325
+ skill,
326
+ lifecycle: run.lifecycle,
327
+ runIdentity: { workflow: run.workflow.name, totalStages: run.totalStages, trigger: run.trigger },
328
+ };
329
+ }
330
+
331
+ export function finalizeWorkflow(curCtx: RunnerCtx, run: RunContext): void {
332
+ curCtx.ui.setStatus(STATUS_KEY, undefined);
333
+ curCtx.ui.notify(MSG_WORKFLOW_COMPLETE(run.state.stagesCompleted), "info");
334
+ run.state.termination.success = true;
335
+ }
336
+
337
+ /**
338
+ * Build the `registeredSkills` snapshot consumed by `ensureSkillRegistered`.
339
+ *
340
+ * Pi prefixes skill-source commands with `"skill:"` (agent-session.js); we
341
+ * strip the prefix so the set keys match `stage.skill` directly. Called
342
+ * exactly once per run, before any `ctx.newSession()` opens (which is when
343
+ * Pi marks the `WorkflowHost` handle stale).
344
+ *
345
+ * Non-skill commands (slash commands registered by extensions) are filtered
346
+ * out — the preflight only cares about skills.
347
+ */
348
+ function snapshotRegisteredSkills(host: WorkflowHost): ReadonlySet<string> {
349
+ const skills = new Set<string>();
350
+ for (const cmd of host.getCommands()) {
351
+ if (cmd.source !== "skill") continue;
352
+ const name = cmd.name.startsWith("skill:") ? cmd.name.slice("skill:".length) : cmd.name;
353
+ skills.add(name);
354
+ }
355
+ return skills;
356
+ }