@juicesharp/rpiv-workflow 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +449 -0
- package/api.ts +557 -0
- package/audit.ts +217 -0
- package/built-ins.ts +65 -0
- package/command.ts +137 -0
- package/docs/cover.png +0 -0
- package/docs/cover.svg +120 -0
- package/docs/workflow-authoring.md +629 -0
- package/docs/workflow-basics.md +122 -0
- package/docs-protocol.ts +106 -0
- package/fanout.ts +96 -0
- package/host.ts +97 -0
- package/index.ts +230 -0
- package/internal-utils.ts +69 -0
- package/internal.ts +27 -0
- package/layers.ts +33 -0
- package/lifecycle.ts +274 -0
- package/load/cache.test.ts +82 -0
- package/load/cache.ts +40 -0
- package/load/index.ts +159 -0
- package/load/merge.ts +136 -0
- package/load/normalize.ts +73 -0
- package/load/paths.ts +32 -0
- package/load/resolve-default.ts +43 -0
- package/load/shape-guards.test.ts +74 -0
- package/load/shape-guards.ts +42 -0
- package/messages.ts +185 -0
- package/outcomes/collectors/directory-path.test.ts +64 -0
- package/outcomes/collectors/directory-path.ts +40 -0
- package/outcomes/collectors/index.ts +21 -0
- package/outcomes/collectors/tool-call.test.ts +110 -0
- package/outcomes/collectors/tool-call.ts +63 -0
- package/outcomes/collectors/transcript-path.test.ts +70 -0
- package/outcomes/collectors/transcript-path.ts +53 -0
- package/outcomes/collectors/union.test.ts +59 -0
- package/outcomes/collectors/union.ts +55 -0
- package/outcomes/collectors/url.test.ts +67 -0
- package/outcomes/collectors/url.ts +45 -0
- package/outcomes/collectors/workspace-diff.test.ts +107 -0
- package/outcomes/collectors/workspace-diff.ts +123 -0
- package/outcomes/git-commit.test.ts +194 -0
- package/outcomes/git-commit.ts +192 -0
- package/outcomes/index.ts +22 -0
- package/outcomes/parsers/index.ts +11 -0
- package/outcomes/parsers/json-body.test.ts +80 -0
- package/outcomes/parsers/json-body.ts +50 -0
- package/outcomes/side-effect.ts +26 -0
- package/output-spec.ts +170 -0
- package/output.ts +98 -0
- package/package.json +83 -0
- package/preview.ts +120 -0
- package/routing.ts +79 -0
- package/runner/chain-advance.ts +185 -0
- package/runner/index.ts +7 -0
- package/runner/runner.ts +356 -0
- package/runner/script-stage.ts +240 -0
- package/runner/stage-lifecycle.ts +447 -0
- package/sessions/extraction.ts +297 -0
- package/sessions/index.ts +7 -0
- package/sessions/sessions.ts +269 -0
- package/sessions/spawn.ts +135 -0
- package/state/index.ts +27 -0
- package/state/paths.ts +46 -0
- package/state/reads.ts +190 -0
- package/state/state.ts +115 -0
- package/state/writes.ts +58 -0
- package/transcript.ts +156 -0
- package/triggers.ts +27 -0
- package/typebox-adapter.ts +48 -0
- package/types.ts +237 -0
- package/validate-output.ts +120 -0
- package/validate-workflow.ts +491 -0
package/routing.ts
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Next-stage lookup over a `Workflow`'s edge graph.
|
|
3
|
+
*
|
|
4
|
+
* `nextStage` is the single chokepoint: given the current stage name + the
|
|
5
|
+
* runtime context, it returns a `RoutingResult` — `{ kind: "next", stage }`
|
|
6
|
+
* if the chain continues, `{ kind: "stop" }` for terminal stages (no
|
|
7
|
+
* outgoing edge OR explicit `STOP`), `{ kind: "err", reason }` if the
|
|
8
|
+
* routing layer detected a violation (an `EdgeFn` body threw, or an
|
|
9
|
+
* `EdgeFn` returned an undeclared target).
|
|
10
|
+
*
|
|
11
|
+
* Errors are returned, not thrown. The caller (runner) switches on
|
|
12
|
+
* `kind` and routes `"err"` through `recordTerminalFailure` — same as
|
|
13
|
+
* any other halt site.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { type EdgeContext, type EdgeFn, STOP, type Workflow } from "./api.js";
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Three-way return from `nextStage`. Matches the convention established by
|
|
20
|
+
* `sessions.ts:ExtractionOutcome` and `load.ts:NormalizeResult` — every
|
|
21
|
+
* multi-state result in the package carries an explicit `kind` discriminator.
|
|
22
|
+
*/
|
|
23
|
+
export type RoutingResult = { kind: "next"; stage: string } | { kind: "stop" } | { kind: "err"; reason: string };
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Returns `{ kind: "next", stage }` to advance, `{ kind: "stop" }` for
|
|
27
|
+
* terminal stages (no outgoing edge OR explicit `STOP`), or
|
|
28
|
+
* `{ kind: "err", reason }` when an `EdgeFn` threw or returned an
|
|
29
|
+
* undeclared target. Load-time `validateWorkflow` should catch the
|
|
30
|
+
* undeclared-target case for predicates with `.targets` metadata; the
|
|
31
|
+
* runtime check is the last line of defense.
|
|
32
|
+
*/
|
|
33
|
+
export function nextStage(workflow: Workflow, current: string, ctx: EdgeContext): RoutingResult {
|
|
34
|
+
const target = workflow.edges[current];
|
|
35
|
+
if (target === undefined || target === STOP) return { kind: "stop" };
|
|
36
|
+
if (typeof target === "string") return resolveTarget(workflow, current, target);
|
|
37
|
+
|
|
38
|
+
const picked = invokeEdgeFn(target, ctx, current);
|
|
39
|
+
if (picked.kind === "err") return picked;
|
|
40
|
+
if (picked.value === STOP) return { kind: "stop" };
|
|
41
|
+
return resolveTarget(workflow, current, picked.value);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* True iff the current stage's edge is an `EdgeFn` — i.e., a routing decision
|
|
46
|
+
* was made. The runner uses this to decide whether to write a routing-audit
|
|
47
|
+
* row. String edges are deterministic and not worth auditing.
|
|
48
|
+
*/
|
|
49
|
+
export function edgeIsDecision(workflow: Workflow, current: string): boolean {
|
|
50
|
+
return typeof workflow.edges[current] === "function";
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Internals
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
function invokeEdgeFn(
|
|
58
|
+
fn: EdgeFn,
|
|
59
|
+
ctx: EdgeContext,
|
|
60
|
+
current: string,
|
|
61
|
+
): { kind: "ok"; value: string } | { kind: "err"; reason: string } {
|
|
62
|
+
try {
|
|
63
|
+
return { kind: "ok", value: fn(ctx) };
|
|
64
|
+
} catch (e) {
|
|
65
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
66
|
+
return {
|
|
67
|
+
kind: "err",
|
|
68
|
+
reason: `workflow edge function at "${current}" threw: ${msg}`,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function resolveTarget(workflow: Workflow, current: string, target: string): RoutingResult {
|
|
74
|
+
if (workflow.stages[target]) return { kind: "next", stage: target };
|
|
75
|
+
return {
|
|
76
|
+
kind: "err",
|
|
77
|
+
reason: `workflow edge from "${current}" returned "${target}" which is not a declared stage in workflow "${workflow.name}"`,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Routing layer after a stage completes successfully: pick the next stage,
|
|
3
|
+
* audit predicate-mediated decisions, enforce the backward-jump guard,
|
|
4
|
+
* then recurse via `runStageOrRecordFailure`.
|
|
5
|
+
*
|
|
6
|
+
* `nextStage` returns a tagged union; `advanceChain` switches on `kind`
|
|
7
|
+
* instead of catching. `runStageOrRecordFailure` owns the catch for
|
|
8
|
+
* downstream-stage throws.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { nowIso, recordTerminalFailure } from "../audit.js";
|
|
12
|
+
import { skillStageRef } from "../lifecycle.js";
|
|
13
|
+
import {
|
|
14
|
+
ERR_BACKWARD_JUMP_EXHAUSTED,
|
|
15
|
+
MSG_BACKWARD_JUMP_EXHAUSTED,
|
|
16
|
+
MSG_CHAIN_ADVANCE_FAILED,
|
|
17
|
+
MSG_ROUTING_AUDIT_DROPPED,
|
|
18
|
+
} from "../messages.js";
|
|
19
|
+
import { edgeIsDecision, nextStage } from "../routing.js";
|
|
20
|
+
import { appendRoutingDecision } from "../state/index.js";
|
|
21
|
+
import type { RunContext, RunnerCtx } from "../types.js";
|
|
22
|
+
import { finalizeWorkflow, lifecycleCtxFor, runStageOrRecordFailure } from "./runner.js";
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Decomposed into three helpers — `auditRoutingDecision`,
|
|
26
|
+
* `checkBackwardJumpGuard`, `haltOnRoutingError` — each owning one
|
|
27
|
+
* structural concern.
|
|
28
|
+
*/
|
|
29
|
+
export async function advanceChain(
|
|
30
|
+
curCtx: RunnerCtx,
|
|
31
|
+
currentName: string,
|
|
32
|
+
idx: number,
|
|
33
|
+
run: RunContext,
|
|
34
|
+
): Promise<void> {
|
|
35
|
+
// Mark the just-completed stage as visited BEFORE consulting the next edge.
|
|
36
|
+
// A thrown EdgeFn would otherwise leave currentName un-marked, opening a
|
|
37
|
+
// (narrow) window where a recovery path could under-count revisits.
|
|
38
|
+
run.visited.add(currentName);
|
|
39
|
+
|
|
40
|
+
const wasDecision = edgeIsDecision(run.workflow, currentName);
|
|
41
|
+
const result = nextStage(run.workflow, currentName, { output: run.state.output, state: run.state });
|
|
42
|
+
|
|
43
|
+
if (result.kind === "err") {
|
|
44
|
+
await haltOnRoutingError(curCtx, run, currentName, result.reason);
|
|
45
|
+
return;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const fromRef = skillStageRef(currentName, idx + 1, run.workflow.stages[currentName]?.skill ?? currentName);
|
|
49
|
+
|
|
50
|
+
if (result.kind === "stop") {
|
|
51
|
+
await run.lifecycle.fire(curCtx, "onRoute", fromRef, "stop", lifecycleCtxFor(run));
|
|
52
|
+
finalizeWorkflow(curCtx, run);
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const nextName = result.stage;
|
|
57
|
+
if (wasDecision) {
|
|
58
|
+
auditRoutingDecision(curCtx, run, idx, currentName, nextName);
|
|
59
|
+
if (!(await checkBackwardJumpGuard(curCtx, run, nextName))) return;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Fire onRoute after the routing decision has been audited (when applicable),
|
|
63
|
+
// before the next stage runs. Deterministic auto-edges still fire so
|
|
64
|
+
// listeners see every transition.
|
|
65
|
+
await run.lifecycle.fire(curCtx, "onRoute", fromRef, nextName, lifecycleCtxFor(run));
|
|
66
|
+
|
|
67
|
+
// runStageOrRecordFailure owns the catch for throws out of the *next* stage,
|
|
68
|
+
// so the JSONL row records `nextName` (the stage that actually threw)
|
|
69
|
+
// rather than `currentName` (which would mis-attribute the failure to
|
|
70
|
+
// the prior stage that already completed successfully).
|
|
71
|
+
await runStageOrRecordFailure(curCtx, nextName, idx + 1, run);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Persist a routing-decision audit row for a predicate-mediated transition.
|
|
76
|
+
* Deterministic auto-edges aren't audited (no decision was made).
|
|
77
|
+
*
|
|
78
|
+
* A dropped audit row degrades the trail but does NOT invalidate the run;
|
|
79
|
+
* on write failure we surface the gap (live notify + result-envelope
|
|
80
|
+
* field) and continue. Halting here would discard a correct in-memory
|
|
81
|
+
* decision to recover from transient disk weather — the asymmetry with
|
|
82
|
+
* `recordStage` is deliberate (stage rows are reconstruction inputs;
|
|
83
|
+
* routing rows are pure telemetry).
|
|
84
|
+
*/
|
|
85
|
+
function auditRoutingDecision(
|
|
86
|
+
curCtx: RunnerCtx,
|
|
87
|
+
run: RunContext,
|
|
88
|
+
idx: number,
|
|
89
|
+
currentName: string,
|
|
90
|
+
nextName: string,
|
|
91
|
+
): void {
|
|
92
|
+
const fromStageIndex = idx + 1;
|
|
93
|
+
const wrote = appendRoutingDecision(run.cwd, run.runId, {
|
|
94
|
+
type: "routing",
|
|
95
|
+
fromStageIndex,
|
|
96
|
+
fromStage: currentName,
|
|
97
|
+
decision: nextName,
|
|
98
|
+
ts: nowIso(),
|
|
99
|
+
});
|
|
100
|
+
if (!wrote) {
|
|
101
|
+
run.state.telemetry.droppedRoutingRows.push({ fromStageIndex, fromStage: currentName, decision: nextName });
|
|
102
|
+
curCtx.ui.notify(MSG_ROUTING_AUDIT_DROPPED(currentName, nextName), "warning");
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Per-loop cap on decision-edge retries. Returns `true` when the run may
|
|
108
|
+
* continue, `false` when the cap tripped (and the terminal failure has
|
|
109
|
+
* been recorded).
|
|
110
|
+
*
|
|
111
|
+
* A "backward jump" is a *decision-edge* resolving to an already-visited
|
|
112
|
+
* stage — i.e. a deliberate retry choice. Deterministic forward edges that
|
|
113
|
+
* pass through a cycle (the body of a multi-stage loop) are NOT counted,
|
|
114
|
+
* because they're consequences of the retry decision rather than
|
|
115
|
+
* independent retry events. Without this distinction the cap would trip
|
|
116
|
+
* mid-loop on any cycle longer than 2 stages, burning the entire budget
|
|
117
|
+
* on a single retry iteration's deterministic hops.
|
|
118
|
+
*
|
|
119
|
+
* Reset-on-escape: a decision resolving to a NOT-visited stage escapes the
|
|
120
|
+
* current cycle (we've moved to fresh territory), so the counter resets.
|
|
121
|
+
* Each independent loop gets its own retry budget instead of a single
|
|
122
|
+
* global pool that drains across unrelated loops.
|
|
123
|
+
*
|
|
124
|
+
* Trip attribution targets `nextName` (the stage the guard refused to
|
|
125
|
+
* re-enter), not the just-completed stage. Same lesson as Q12+IB.
|
|
126
|
+
*/
|
|
127
|
+
async function checkBackwardJumpGuard(curCtx: RunnerCtx, run: RunContext, nextName: string): Promise<boolean> {
|
|
128
|
+
const { state } = run;
|
|
129
|
+
if (!run.visited.has(nextName)) {
|
|
130
|
+
state.telemetry.backwardJumps = 0;
|
|
131
|
+
return true;
|
|
132
|
+
}
|
|
133
|
+
state.telemetry.backwardJumps++;
|
|
134
|
+
if (state.telemetry.backwardJumps <= run.maxBackwardJumps) return true;
|
|
135
|
+
await recordTerminalFailure(
|
|
136
|
+
curCtx,
|
|
137
|
+
{
|
|
138
|
+
cwd: run.cwd,
|
|
139
|
+
runId: run.runId,
|
|
140
|
+
state,
|
|
141
|
+
stageName: nextName,
|
|
142
|
+
skill: nextName,
|
|
143
|
+
lifecycle: run.lifecycle,
|
|
144
|
+
runIdentity: { workflow: run.workflow.name, totalStages: run.totalStages, trigger: run.trigger },
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
status: "failed",
|
|
148
|
+
notifyMsg: MSG_BACKWARD_JUMP_EXHAUSTED(state.telemetry.backwardJumps, run.maxBackwardJumps),
|
|
149
|
+
notifyLevel: "error",
|
|
150
|
+
errMsg: ERR_BACKWARD_JUMP_EXHAUSTED(state.telemetry.backwardJumps, run.maxBackwardJumps),
|
|
151
|
+
},
|
|
152
|
+
);
|
|
153
|
+
return false;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Halt the chain on a routing-layer error result (e.g. the EdgeFn returned
|
|
158
|
+
* an undeclared target, or threw and was wrapped). Attribution targets
|
|
159
|
+
* `currentName` (the edge belongs to the just-completed stage).
|
|
160
|
+
*/
|
|
161
|
+
async function haltOnRoutingError(
|
|
162
|
+
curCtx: RunnerCtx,
|
|
163
|
+
run: RunContext,
|
|
164
|
+
currentName: string,
|
|
165
|
+
reason: string,
|
|
166
|
+
): Promise<void> {
|
|
167
|
+
await recordTerminalFailure(
|
|
168
|
+
curCtx,
|
|
169
|
+
{
|
|
170
|
+
cwd: run.cwd,
|
|
171
|
+
runId: run.runId,
|
|
172
|
+
state: run.state,
|
|
173
|
+
stageName: currentName,
|
|
174
|
+
skill: currentName,
|
|
175
|
+
lifecycle: run.lifecycle,
|
|
176
|
+
runIdentity: { workflow: run.workflow.name, totalStages: run.totalStages, trigger: run.trigger },
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
status: "failed",
|
|
180
|
+
notifyMsg: MSG_CHAIN_ADVANCE_FAILED(currentName, reason),
|
|
181
|
+
notifyLevel: "error",
|
|
182
|
+
errMsg: reason,
|
|
183
|
+
},
|
|
184
|
+
);
|
|
185
|
+
}
|
package/runner/index.ts
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow runner public surface. The runner is internally split into
|
|
3
|
+
* three files (see `runner.ts`'s header for the module map); this barrel
|
|
4
|
+
* re-exports only the symbols the package itself needs to publish.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export { MAX_BACKWARD_JUMPS, type RunWorkflowOptions, type RunWorkflowResult, runWorkflow } from "./runner.js";
|
package/runner/runner.ts
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workflow orchestration entry point. `runWorkflow` walks a `Workflow`'s
|
|
3
|
+
* edge graph stage-by-stage; per-stage work (sessions, extraction,
|
|
4
|
+
* validation, audit row writes) lives in sessions.ts + audit.ts. This
|
|
5
|
+
* directory owns graph traversal, per-stage prerequisites, and routing.
|
|
6
|
+
*
|
|
7
|
+
* Modules:
|
|
8
|
+
* - runner.ts — runWorkflow + countReachableStages +
|
|
9
|
+
* runStageOrRecordFailure + finalizeWorkflow.
|
|
10
|
+
* - stage-lifecycle.ts — runStage + StagePreflightError + preflight
|
|
11
|
+
* pipeline + outcome.collector.snapshot hook.
|
|
12
|
+
* - chain-advance.ts — advanceChain + routing audit + backward-jump
|
|
13
|
+
* guard + halt-on-error.
|
|
14
|
+
*
|
|
15
|
+
* Ctx lifecycle: every level only touches the ctx it was handed.
|
|
16
|
+
* - `newSession({cancelled: false})` invalidates the outer ctx; all
|
|
17
|
+
* further work runs on `freshCtx` inside `withSession`, and the
|
|
18
|
+
* outer function simply unwinds.
|
|
19
|
+
* - `cancelled: true` means no replacement happened — outer ctx remains
|
|
20
|
+
* valid.
|
|
21
|
+
* - Continue policy has no newSession — same ctx throughout.
|
|
22
|
+
*
|
|
23
|
+
* Vocabulary: "stage" = one stage activation in this run; "phase" = one
|
|
24
|
+
* `## Phase N:` subdivision inside an implement plan artifact.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import type { Workflow } from "../api.js";
|
|
28
|
+
import { notifyPartialArtifacts, nowIso, recordTerminalFailure } from "../audit.js";
|
|
29
|
+
import { handleToString } from "../handle.js";
|
|
30
|
+
import type { WorkflowContext, WorkflowHost } from "../host.js";
|
|
31
|
+
import { currentPrimaryArtifact } from "../internal-utils.js";
|
|
32
|
+
import { buildLifecycleContext, LifecycleDispatcher, type LifecycleListeners } from "../lifecycle.js";
|
|
33
|
+
import { MSG_STAGE_THREW, MSG_WORKFLOW_COMPLETE, STATUS_KEY } from "../messages.js";
|
|
34
|
+
import { generateRunId, writeHeader } from "../state/index.js";
|
|
35
|
+
import { DEFAULT_TRIGGER, type RunTrigger } from "../triggers.js";
|
|
36
|
+
import type { RunContext, RunnerCtx, RunState } from "../types.js";
|
|
37
|
+
import { runStage, StagePreflightError } from "./stage-lifecycle.js";
|
|
38
|
+
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// Policy constants
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Per-loop cap on decision-edge retries. A "backward jump" is a *decision*
|
|
45
|
+
* resolving to an already-visited stage — i.e. the user's predicate chose to
|
|
46
|
+
* retry. Deterministic edges through a cycle (the loop body) are NOT
|
|
47
|
+
* counted; the budget is per retry iteration, not per hop. A decision
|
|
48
|
+
* escaping the loop (target not visited) resets the counter so each
|
|
49
|
+
* independent loop in the workflow gets its own fresh budget. With 2: the
|
|
50
|
+
* loop runs once unconditionally and may retry up to 2 more times.
|
|
51
|
+
*/
|
|
52
|
+
export const MAX_BACKWARD_JUMPS = 2;
|
|
53
|
+
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// Public surface
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
|
|
58
|
+
export interface RunWorkflowOptions {
|
|
59
|
+
/** Workflow to execute — caller resolves by name from `LoadedWorkflows`. */
|
|
60
|
+
workflow: Workflow;
|
|
61
|
+
/** Passed to the start stage as its argument. */
|
|
62
|
+
input: string;
|
|
63
|
+
/** Required for "continue"-policy stages (host.sendUserMessage). */
|
|
64
|
+
host?: WorkflowHost;
|
|
65
|
+
/** Defaults to MAX_BACKWARD_JUMPS. */
|
|
66
|
+
maxBackwardJumps?: number;
|
|
67
|
+
/**
|
|
68
|
+
* What triggered this run. `/wf` sets `{ kind: "command", name: "wf" }`;
|
|
69
|
+
* programmatic embedders default to `DEFAULT_TRIGGER`. Recorded in the
|
|
70
|
+
* JSONL header and surfaced on every lifecycle callback via
|
|
71
|
+
* `LifecycleContext.trigger`.
|
|
72
|
+
*/
|
|
73
|
+
trigger?: RunTrigger;
|
|
74
|
+
/**
|
|
75
|
+
* Per-call lifecycle listener bundle. Fires AFTER every globally
|
|
76
|
+
* registered bundle (see `registerLifecycle`). Listener throws are
|
|
77
|
+
* caught + logged via `ctx.ui.notify(..., "warning")`; never halt the
|
|
78
|
+
* run.
|
|
79
|
+
*/
|
|
80
|
+
lifecycle?: LifecycleListeners;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export interface RunWorkflowResult {
|
|
84
|
+
/**
|
|
85
|
+
* The run's identity on disk — the `<run-id>` portion of
|
|
86
|
+
* `<cwd>/.rpiv/workflows/<run-id>.jsonl`. Live consumers can hand
|
|
87
|
+
* this to `readLastStage` / `listArtifacts` / future inspect-past-run
|
|
88
|
+
* helpers without recomputing the slug.
|
|
89
|
+
*
|
|
90
|
+
* Undefined ONLY for pre-flight rejections (start stage not declared,
|
|
91
|
+
* continue-policy stages without pi) where no JSONL file was created.
|
|
92
|
+
*/
|
|
93
|
+
runId?: string;
|
|
94
|
+
stagesCompleted: number;
|
|
95
|
+
success: boolean;
|
|
96
|
+
/**
|
|
97
|
+
* Primary artifact at run termination, serialised to its handle's
|
|
98
|
+
* canonical string form (fs → path, url → href, opaque → id). Undefined
|
|
99
|
+
* if no produces stage produced one. Callers that need the full
|
|
100
|
+
* structured handle read `output.artifacts[0]` off the run's last
|
|
101
|
+
* recorded stage (via `readLastStage`).
|
|
102
|
+
*/
|
|
103
|
+
lastArtifact?: string;
|
|
104
|
+
error?: string;
|
|
105
|
+
/**
|
|
106
|
+
* Routing decisions made in memory but whose JSONL audit row failed to
|
|
107
|
+
* persist. Empty in the common case. Surfaced so consumers reading the
|
|
108
|
+
* run's JSONL can disambiguate a missing routing row ("deterministic
|
|
109
|
+
* edge — never written") from a dropped one ("decision was made, write
|
|
110
|
+
* failed"). The run still succeeds — routing rows are telemetry, not
|
|
111
|
+
* reconstruction inputs.
|
|
112
|
+
*/
|
|
113
|
+
droppedRoutingRows?: Array<{ fromStageIndex: number; fromStage: string; decision: string }>;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
// runWorkflow — workflow entry point
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Each subsequent `newSession()` is invoked on the freshCtx returned by the
|
|
122
|
+
* previous withSession — never on a captured outer ctx (which Pi invalidates
|
|
123
|
+
* as soon as the session is replaced).
|
|
124
|
+
*/
|
|
125
|
+
export async function runWorkflow(ctx: WorkflowContext, options: RunWorkflowOptions): Promise<RunWorkflowResult> {
|
|
126
|
+
const { workflow } = options;
|
|
127
|
+
if (!workflow.stages[workflow.start]) {
|
|
128
|
+
return {
|
|
129
|
+
stagesCompleted: 0,
|
|
130
|
+
success: false,
|
|
131
|
+
error: `Workflow "${workflow.name}" start stage "${workflow.start}" is not declared`,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Continue-policy stages thread the prior session via the host's
|
|
136
|
+
// sendUserMessage; if no host was passed, enforceSessionInvariants would
|
|
137
|
+
// throw at the first such stage.
|
|
138
|
+
// Reject at workflow entry so embedders get a clean envelope instead of a throw.
|
|
139
|
+
if (options.host === undefined && Object.values(workflow.stages).some((s) => s.sessionPolicy === "continue")) {
|
|
140
|
+
return {
|
|
141
|
+
stagesCompleted: 0,
|
|
142
|
+
success: false,
|
|
143
|
+
error: "workflow contains continue-policy stages which require a workflow host",
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const cwd = ctx.cwd;
|
|
148
|
+
const runId = generateRunId();
|
|
149
|
+
const totalStages = countReachableStages(workflow);
|
|
150
|
+
const trigger = options.trigger ?? DEFAULT_TRIGGER;
|
|
151
|
+
|
|
152
|
+
writeHeader(cwd, {
|
|
153
|
+
runId,
|
|
154
|
+
workflow: workflow.name,
|
|
155
|
+
input: options.input,
|
|
156
|
+
ts: nowIso(),
|
|
157
|
+
trigger,
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
const state: RunState = {
|
|
161
|
+
originalInput: options.input,
|
|
162
|
+
primaryArtifact: undefined,
|
|
163
|
+
output: undefined,
|
|
164
|
+
named: {},
|
|
165
|
+
stagesCompleted: 0,
|
|
166
|
+
lastAllocatedStageNumber: 0,
|
|
167
|
+
telemetry: {
|
|
168
|
+
backwardJumps: 0,
|
|
169
|
+
droppedRoutingRows: [],
|
|
170
|
+
},
|
|
171
|
+
termination: {
|
|
172
|
+
success: false,
|
|
173
|
+
error: undefined,
|
|
174
|
+
},
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
const maxBackwardJumps = options.maxBackwardJumps ?? MAX_BACKWARD_JUMPS;
|
|
178
|
+
const lifecycle = new LifecycleDispatcher(options.lifecycle);
|
|
179
|
+
|
|
180
|
+
// Snapshot the skill registry BEFORE any stage opens a fresh session.
|
|
181
|
+
// Pi invalidates the `WorkflowHost` handle on the first `ctx.newSession()`,
|
|
182
|
+
// so this is the only safe moment to enumerate. After this point, the
|
|
183
|
+
// runner reads `run.registeredSkills`; `options.host` survives only on
|
|
184
|
+
// `run.continueHost` for the continue-policy session handler.
|
|
185
|
+
const registeredSkills = options.host ? snapshotRegisteredSkills(options.host) : undefined;
|
|
186
|
+
|
|
187
|
+
const run: RunContext = {
|
|
188
|
+
cwd,
|
|
189
|
+
runId,
|
|
190
|
+
workflow,
|
|
191
|
+
totalStages,
|
|
192
|
+
state,
|
|
193
|
+
visited: new Set(),
|
|
194
|
+
registeredSkills,
|
|
195
|
+
continueHost: options.host,
|
|
196
|
+
maxBackwardJumps,
|
|
197
|
+
trigger,
|
|
198
|
+
lifecycle,
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
await lifecycle.fire(ctx, "onWorkflowStart", lifecycleCtxFor(run));
|
|
202
|
+
|
|
203
|
+
// runStageOrRecordFailure (not bare runStage) so a throw out of the start stage —
|
|
204
|
+
// notably enforceSessionInvariants violations — records a JSONL failure
|
|
205
|
+
// row keyed on the failing stage rather than leaving a header-only file
|
|
206
|
+
// that every shape-filtered reader skips. Same wrapper used by
|
|
207
|
+
// advanceChain for downstream stages.
|
|
208
|
+
await runStageOrRecordFailure(ctx, workflow.start, 0, run);
|
|
209
|
+
|
|
210
|
+
const result: RunWorkflowResult = {
|
|
211
|
+
runId,
|
|
212
|
+
stagesCompleted: state.stagesCompleted,
|
|
213
|
+
success: state.termination.success,
|
|
214
|
+
lastArtifact: (() => {
|
|
215
|
+
const a = currentPrimaryArtifact(state);
|
|
216
|
+
return a ? handleToString(a.handle) : undefined;
|
|
217
|
+
})(),
|
|
218
|
+
error: state.termination.error,
|
|
219
|
+
...(state.telemetry.droppedRoutingRows.length > 0
|
|
220
|
+
? { droppedRoutingRows: state.telemetry.droppedRoutingRows }
|
|
221
|
+
: {}),
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
await lifecycle.fire(ctx, "onWorkflowEnd", result, lifecycleCtxFor(run));
|
|
225
|
+
return result;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/** Build a `LifecycleContext` from the current `RunContext`. Captured per fire so listeners always see the latest `state` snapshot. */
|
|
229
|
+
export function lifecycleCtxFor(run: RunContext) {
|
|
230
|
+
return buildLifecycleContext({
|
|
231
|
+
cwd: run.cwd,
|
|
232
|
+
runId: run.runId,
|
|
233
|
+
workflow: run.workflow.name,
|
|
234
|
+
totalStages: run.totalStages,
|
|
235
|
+
trigger: run.trigger,
|
|
236
|
+
state: run.state,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Upper bound for the status-line denominator — BFS reach from `workflow.start`.
|
|
242
|
+
*
|
|
243
|
+
* Relies on every `EdgeFn` carrying `.targets`. `validate-workflow.ts` enforces
|
|
244
|
+
* this at load time, so by the time the runner sees a workflow the contract
|
|
245
|
+
* holds. A `.targets`-less EdgeFn here means validation was bypassed (test
|
|
246
|
+
* fixture or programmatic embedder) — surface loudly instead of silently
|
|
247
|
+
* counting all declared stages.
|
|
248
|
+
*/
|
|
249
|
+
function countReachableStages(workflow: Workflow): number {
|
|
250
|
+
const seen = new Set<string>();
|
|
251
|
+
const frontier: string[] = [workflow.start];
|
|
252
|
+
while (frontier.length > 0) {
|
|
253
|
+
const cur = frontier.shift()!;
|
|
254
|
+
if (seen.has(cur)) continue;
|
|
255
|
+
seen.add(cur);
|
|
256
|
+
const edge = workflow.edges[cur];
|
|
257
|
+
if (edge === undefined || edge === "stop") continue;
|
|
258
|
+
if (typeof edge === "string") {
|
|
259
|
+
if (workflow.stages[edge] && !seen.has(edge)) frontier.push(edge);
|
|
260
|
+
} else if (Array.isArray(edge.targets)) {
|
|
261
|
+
for (const t of edge.targets) {
|
|
262
|
+
if (t !== "stop" && workflow.stages[t] && !seen.has(t)) frontier.push(t);
|
|
263
|
+
}
|
|
264
|
+
} else {
|
|
265
|
+
throw new Error(
|
|
266
|
+
`countReachableStages: edge from "${cur}" is an EdgeFn without .targets — validateWorkflow should have rejected this workflow`,
|
|
267
|
+
);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
return seen.size;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Wraps `runStage` so a thrown stage records a JSONL failure row attributed
|
|
275
|
+
* to the stage that actually threw — not to the prior stage in the chain.
|
|
276
|
+
* Used by both `runWorkflow` (start stage) and `advanceChain` (next stage)
|
|
277
|
+
* so there's exactly one place that translates "stage threw" →
|
|
278
|
+
* `state.termination.error` + JSONL row. Without this, the start-stage call
|
|
279
|
+
* leaves a header-only file and `advanceChain`'s own catch mis-attributes
|
|
280
|
+
* the failure to the prior stage (`currentName` is still bound to the
|
|
281
|
+
* iteration that just succeeded).
|
|
282
|
+
*
|
|
283
|
+
* Two flavours of throw are caught here:
|
|
284
|
+
*
|
|
285
|
+
* - `StagePreflightError` — a known preflight failure carrying its own
|
|
286
|
+
* attribution + messages. Recorded with the carried payload exactly.
|
|
287
|
+
* - Any other `Error` — unexpected machinery failure; recorded with the
|
|
288
|
+
* generic `MSG_STAGE_THREW` shape attributed to the stage id.
|
|
289
|
+
*/
|
|
290
|
+
export async function runStageOrRecordFailure(
|
|
291
|
+
curCtx: RunnerCtx,
|
|
292
|
+
name: string,
|
|
293
|
+
idx: number,
|
|
294
|
+
run: RunContext,
|
|
295
|
+
): Promise<void> {
|
|
296
|
+
try {
|
|
297
|
+
await runStage(curCtx, name, idx, run);
|
|
298
|
+
} catch (e) {
|
|
299
|
+
if (e instanceof StagePreflightError) {
|
|
300
|
+
await recordTerminalFailure(
|
|
301
|
+
curCtx,
|
|
302
|
+
auditCtxFor(run, name, e.skill),
|
|
303
|
+
{ status: "failed", notifyMsg: e.notifyMsg, notifyLevel: "error", errMsg: e.errMsg },
|
|
304
|
+
e.notifyPartial ? (ctx) => notifyPartialArtifacts(ctx, run.cwd, run.runId) : undefined,
|
|
305
|
+
);
|
|
306
|
+
return;
|
|
307
|
+
}
|
|
308
|
+
const reason = e instanceof Error ? e.message : String(e);
|
|
309
|
+
await recordTerminalFailure(curCtx, auditCtxFor(run, name, name), {
|
|
310
|
+
status: "failed",
|
|
311
|
+
notifyMsg: MSG_STAGE_THREW(name, reason),
|
|
312
|
+
notifyLevel: "error",
|
|
313
|
+
errMsg: reason,
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/** Build an AuditCtx for a stage failure that escaped a session (preflight halts, downstream throws). */
|
|
319
|
+
function auditCtxFor(run: RunContext, stageName: string, skill: string) {
|
|
320
|
+
return {
|
|
321
|
+
cwd: run.cwd,
|
|
322
|
+
runId: run.runId,
|
|
323
|
+
state: run.state,
|
|
324
|
+
stageName,
|
|
325
|
+
skill,
|
|
326
|
+
lifecycle: run.lifecycle,
|
|
327
|
+
runIdentity: { workflow: run.workflow.name, totalStages: run.totalStages, trigger: run.trigger },
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
export function finalizeWorkflow(curCtx: RunnerCtx, run: RunContext): void {
|
|
332
|
+
curCtx.ui.setStatus(STATUS_KEY, undefined);
|
|
333
|
+
curCtx.ui.notify(MSG_WORKFLOW_COMPLETE(run.state.stagesCompleted), "info");
|
|
334
|
+
run.state.termination.success = true;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Build the `registeredSkills` snapshot consumed by `ensureSkillRegistered`.
|
|
339
|
+
*
|
|
340
|
+
* Pi prefixes skill-source commands with `"skill:"` (agent-session.js); we
|
|
341
|
+
* strip the prefix so the set keys match `stage.skill` directly. Called
|
|
342
|
+
* exactly once per run, before any `ctx.newSession()` opens (which is when
|
|
343
|
+
* Pi marks the `WorkflowHost` handle stale).
|
|
344
|
+
*
|
|
345
|
+
* Non-skill commands (slash commands registered by extensions) are filtered
|
|
346
|
+
* out — the preflight only cares about skills.
|
|
347
|
+
*/
|
|
348
|
+
function snapshotRegisteredSkills(host: WorkflowHost): ReadonlySet<string> {
|
|
349
|
+
const skills = new Set<string>();
|
|
350
|
+
for (const cmd of host.getCommands()) {
|
|
351
|
+
if (cmd.source !== "skill") continue;
|
|
352
|
+
const name = cmd.name.startsWith("skill:") ? cmd.name.slice("skill:".length) : cmd.name;
|
|
353
|
+
skills.add(name);
|
|
354
|
+
}
|
|
355
|
+
return skills;
|
|
356
|
+
}
|