npm - pi-crew - Versions diffs - 0.9.4 → 0.9.7 - Mend

pi-crew 0.9.4 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/CHANGELOG.md +592 -0
package/README.md +55 -3
package/docs/HARNESS_BACKLOG.md +51 -3
package/docs/dynamic-workflows.md +315 -2
package/docs/fix-plan-disabletools-exit-null.md +219 -0
package/docs/troubleshooting.md +102 -0
package/package.json +8 -2
package/src/extension/command-completions.ts +1 -0
package/src/extension/crew-shortcuts.ts +1 -0
package/src/extension/register.ts +2 -0
package/src/extension/registration/commands.ts +3 -0
package/src/extension/team-tool/doctor.ts +14 -0
package/src/extension/team-tool/goal.ts +1 -0
package/src/extension/team-tool/run.ts +4 -0
package/src/runtime/background-runner.ts +24 -2
package/src/runtime/chain-runner.ts +1 -0
package/src/runtime/child-pi.ts +101 -10
package/src/runtime/crash-recovery.ts +78 -36
package/src/runtime/deterministic-ast.ts +161 -0
package/src/runtime/dwf-state-store.ts +97 -0
package/src/runtime/dynamic-workflow-context.ts +381 -7
package/src/runtime/dynamic-workflow-runner.ts +94 -2
package/src/runtime/goal-loop-runner.ts +2 -0
package/src/runtime/live-session-runtime.ts +1 -0
package/src/runtime/model-scope.ts +1 -0
package/src/runtime/peer-dep.ts +1 -0
package/src/runtime/pi-args.ts +11 -0
package/src/runtime/resilient-edit.ts +1 -0
package/src/runtime/result-extractor.ts +72 -7
package/src/runtime/task-runner.ts +1 -0
package/src/runtime/team-runner.ts +8 -3
package/src/runtime/zombie-scanner.ts +297 -0
package/src/schema/team-tool-schema.ts +28 -0
package/src/state/contracts.ts +1 -0
package/src/state/hook-instinct-bridge.ts +3 -0
package/src/state/state-store.ts +3 -0
package/src/state/types.ts +9 -0
package/src/ui/dashboard-panes/progress-pane.ts +5 -0
package/src/ui/dwf-phase-display.ts +151 -0
package/src/ui/run-snapshot-cache.ts +4 -0
package/src/ui/snapshot-types.ts +3 -0
package/src/utils/bm25-search.ts +2 -0
package/src/workflows/workflow-config.ts +3 -0
package/src/worktree/worktree-manager.ts +94 -0
package/types/dwf.d.ts +187 -0

package/src/runtime/dynamic-workflow-context.ts CHANGED Viewed

@@ -30,13 +30,17 @@ import { Semaphore } from "./semaphore.ts";
 import { executeWithRetry } from "./retry-executor.ts";
 import { allAgents, discoverAgents } from "../agents/discover-agents.ts";
 import { writeArtifact } from "../state/artifact-store.ts";
+import { appendEvent } from "../state/event-log.ts";
 import { appendMailboxMessage, readMailbox } from "../state/mailbox.ts";
 import { renderPlanTemplate } from "./plan-templates.ts";
+import { prepareAgentWorktree, cleanupAgentWorktree } from "../worktree/worktree-manager.ts";
 import { logInternalError } from "../utils/internal-error.ts";
 import { randomBytes } from "node:crypto";
+import type { TSchema } from "@sinclair/typebox";
 import type { AgentConfig } from "../agents/agent-config.ts";
 import type { TeamConfig } from "../teams/team-config.ts";
 import type { TeamRunManifest } from "../state/types.ts";
+import type { DwfCheckpointState } from "./dwf-state-store.ts";
 export interface AgentCallOpts {
 	prompt: string;
@@ -61,6 +65,19 @@ export interface AgentCallOpts {
 	 *  JSON-verdict judge, but the user's reviewer.md agent is a markdown code-reviewer.
 	 *  When set, the resolved agent's systemPrompt is replaced entirely. */
 	systemPrompt?: string;
+	/** Round-13 P0-3: optional TypeBox schema. When set, the call's output is validated
+	 *  against the schema after extraction. Validation failure yields ok:false with a
+	 *  structured `error` and undefined `structured` field. Forward-compatible: when
+	 *  undefined, behavior is identical to the regex-based extractor. */
+	schema?: TSchema;
+	/** round-17 P2-4: spawn this agent in an isolated git worktree.
+	 *  Useful when parallel agents modify files concurrently (avoids conflicts). The
+	 *  worktree is created from HEAD, the agent runs there, and on completion the
+	 *  diff is captured as an artifact before cleanup. Default false.
+	 *  If worktree creation fails (no git repo, dirty leader), the agent runs in the
+	 *  normal cwd and a warning is logged via ctx.log(). Backward compatible —
+	 *  omitting it is identical to `false`. */
+	worktree?: boolean;
 }
 export interface AgentResult {
@@ -75,6 +92,16 @@ export interface AgentResult {
 	durationMs?: number;
 }
+/** round-14 P1-2: per-workflow token budget. Frozen read-only surface exposed as ctx.budget. */
+export interface WorkflowBudget {
+	/** Configured budget, or null when unbounded. */
+	total: number | null;
+	/** Tokens consumed so far (accumulated from each ctx.agent() run's usage). */
+	spent(): number;
+	/** Tokens remaining; Infinity when total is null. */
+	remaining(): number;
+}
 export interface WorkflowCtx {
 	cwd: string;
 	runId: string;
@@ -83,6 +110,15 @@ export interface WorkflowCtx {
 	agent(opts: AgentCallOpts): Promise<AgentResult>;
 	/** Bounded fan-out preserving order (wraps mapConcurrent). */
 	fanOut<T>(items: T[], limit: number, fn: (item: T, i: number) => Promise<AgentResult>): Promise<AgentResult[]>;
+	/** Pipeline: sequential per-item stages, parallel across items (bounded by
+	 *  ctx.semaphore). Each item passes through all stages in order; different
+	 *  items may run concurrently. A failed stage yields `null` for that item
+	 *  (logged via ctx.log) and other items continue. Aborts propagate.
+	 *  round-16 (P2-1). */
+	pipeline<TItem, TResult = unknown>(
+		items: TItem[],
+		...stages: Array<(previous: TResult, original: TItem, index: number) => Promise<TResult> | TResult>
+	): Promise<(TResult | null)[]>;
 	/** Run a reviewer agent over an artifact; parse {outcome, feedback}. §3.2. */
 	review(taskId: string, reviewerRole?: string, opts?: { content?: string; artifactPath?: string; disableTools?: boolean }): Promise<{ outcome: "accept" | "reject" | "changes_requested"; feedback: string }>;
 	/** Re-run a task with feedback (wraps executeWithRetry). */
@@ -97,6 +133,22 @@ export interface WorkflowCtx {
 	vars: Record<string, unknown>;
 	/** Mark the final result. ONLY this artifact reaches the main context. */
 	setResult(artifactPath: string, meta?: Record<string, unknown>): void;
+	/** Mark the start of a named workflow phase. Emits a `dwf.phase_started` event
+	 *  (and a `dwf.phase_completed` for the previous phase, if any) to the run's
+	 *  events.jsonl. Idempotent on the same title — calling twice with the same
+	 *  title is a no-op. Phase titles are in-memory only; the events log is the
+	 *  durable source of truth for phase boundaries. */
+	phase(title: string): void;
+	/** round-14 P1-3: append a workflow-level log line. Persists to events.jsonl
+	 *  as a `dwf.log` event and keeps a bounded in-memory copy (capped at 1000). */
+	log(message: unknown): void;
+	/** round-14 P1-2: per-workflow token budget. ctx.agent() auto-rejects with
+	 *  ok:false once exhausted. */
+	budget: WorkflowBudget;
+	/** round-14 P1-5: typed workflow arguments. Reads the value passed via
+	 *  MakeWorkflowCtxOptions.args (sourced from manifest.args). Defaults to {}
+	 *  when unset. */
+	args<T = unknown>(): T;
 	semaphore: Semaphore;
 	/** Abort signal (cancel/stop). */
 	signal: AbortSignal;
@@ -107,6 +159,19 @@ export interface MakeWorkflowCtxOptions {
 	signal: AbortSignal;
 	team?: TeamConfig;
 	modelOverride?: string;
+	/** round-14 P1-2: per-workflow token budget. null/undefined = unbounded. */
+	tokenBudget?: number | null;
+	/** round-14 P1-5: typed workflow arguments (sourced from manifest.args). Defaults to {}. */
+	args?: unknown;
+	/** round-18 P2-3: checkpoint state to hydrate ctx with on resume. When provided,
+	 *  the ctx starts with the resumed vars/phases/logs/spent/agentCount instead of
+	 *  empty defaults. Omit (or undefined) for a fresh run — backward compatible. */
+	resumedState?: DwfCheckpointState;
+	/** round-18 P2-3: callback invoked after each `ctx.agent()` call completes
+	 *  (success OR fail). The runner wires this to `DwfStore.save()` so a crash after
+	 *  an agent call leaves a durable checkpoint. Best-effort — failures are swallowed
+	 *  so checkpointing can never crash the workflow. */
+	onCheckpoint?: (state: DwfCheckpointState) => void;
 }
 /**
@@ -163,6 +228,31 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
 	const concurrency = Math.max(1, opts.concurrency ?? 4);
 	const semaphore = new Semaphore(concurrency);
 	let finalResult: { artifactPath: string; meta?: Record<string, unknown> } | undefined;
+	// round-18 P2-3: agent invocation counter. Hydrated from a resumed checkpoint so a
+	// resumed run keeps an accurate count; incremented in agent()'s finally block.
+	let agentCount = opts.resumedState ? opts.resumedState.agentCount : 0;
+	// round-12 P0-1: in-memory phase state, exposed via non-enumerable getter like __finalResult.
+	// The events log is the durable source of truth for phase boundaries.
+	// round-18 P2-3: hydrate phaseState from a resumed checkpoint (backward compatible when unset).
+	let phaseState: { currentPhase: string | undefined; phases: string[] } = opts.resumedState
+		? { currentPhase: opts.resumedState.currentPhase, phases: [...opts.resumedState.phases] }
+		: { currentPhase: undefined, phases: [] };
+	let phaseCapWarned = false;
+	// round-14 P1-2/P1-3/P1-5: closure-scoped runtime state shared by budget/log/args.
+	// Mirrors the pi-dynamic-workflows RuntimeState pattern (workflow.ts:state).
+	// round-18 P2-3: hydrate spent/logs from a resumed checkpoint (backward compatible when unset).
+	const wfState: { spent: number; logs: string[]; args: unknown } = {
+		spent: opts.resumedState?.spent ?? 0,
+		logs: opts.resumedState ? [...opts.resumedState.logs].slice(0, 1000) : [],
+		args: opts.args ?? {},
+	};
+	// round-14 P1-2: frozen budget surface. The closures read wfState.spent so the
+	// object stays live after Object.freeze(ctx). total is a snapshot primitive.
+	const budget = Object.freeze({
+		total: opts.tokenBudget ?? null,
+		spent: () => wfState.spent,
+		remaining: () => (opts.tokenBudget == null ? Infinity : Math.max(0, opts.tokenBudget - wfState.spent)),
+	} satisfies WorkflowBudget);
 	const ctx: WorkflowCtx = {
 		cwd: manifest.cwd,
@@ -173,7 +263,16 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
 		async agent(call: AgentCallOpts): Promise<AgentResult> {
 			await semaphore.acquire();
 			const started = Date.now();
+			// round-17 P2-4: declared before the try so the finally can clean it up
+			// regardless of which return/throw path is taken.
+			let worktreePath: string | undefined;
+			let worktreeBranch: string | undefined;
 			try {
+				// round-14 P1-2: budget check BEFORE spawning. When the per-workflow token
+				// budget is exhausted, reject the call without consuming a child worker.
+				if (budget.total !== null && budget.remaining() <= 0) {
+					return { ok: false, text: "", error: "workflow token budget exhausted", durationMs: 0 };
+				}
 				const agentConfig = resolveAgentForRole(call.role, {
 					explicitAgent: call.agent,
 					team: opts.team,
@@ -185,12 +284,49 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
 				let effectiveAgent = call.disableTools === true ? { ...agentConfig, disableTools: true, tools: [] } : agentConfig;
 				// Per-call systemPrompt override (replaces the resolved agent's persona/output-format).
 				// Used by ctx.review() to force a JSON-verdict judge instead of the role's markdown reviewer.
-				if (call.systemPrompt !== undefined) {
+				// Round-13 P0-3: when a schema is provided, append a JSON-output instruction so
+				// the model returns parseable JSON instead of prose. Schema name is intentionally
+				// generic — we don't reveal TypeBox internal types.
+				//
+				// Smoke-test fix: when BOTH schema AND an explicit call.systemPrompt are set,
+				// the call.systemPrompt is the caller's intended persona (e.g. a JSON-verdict
+				// judge). It MUST be used as the base for the JSON instruction — otherwise the
+				// role's persona leaks through and the model returns prose, failing schema
+				// validation. Previously call.systemPrompt was silently dropped when a schema
+				// was present, which confused models into returning text like "hello".
+				if (call.schema !== undefined) {
+					const base = call.systemPrompt ?? effectiveAgent.systemPrompt;
+					effectiveAgent = {
+						...effectiveAgent,
+						systemPrompt: composeSchemaSystemPrompt(base, call.schema),
+					};
+				} else if (call.systemPrompt !== undefined) {
 					effectiveAgent = { ...effectiveAgent, systemPrompt: call.systemPrompt };
 				}
 				const task = composeAgentTask(call);
+				// round-17 P2-4: worktree isolation per agent. When requested, spawn the
+				// agent in an isolated git worktree so parallel file-modifying agents
+				// don't clobber each other. Falls back to the normal cwd (with a warning)
+				// when worktree creation is unavailable (no git repo, dirty leader).
+				let agentCwd = manifest.cwd;
+				if (call.worktree === true) {
+					const wt = prepareAgentWorktree(
+						manifest,
+						`dwf-agent-${Date.now()}-${randomBytes(4).toString("hex")}`,
+					);
+					if (wt?.worktreePath) {
+						agentCwd = wt.cwd;
+						worktreePath = wt.worktreePath;
+						worktreeBranch = wt.branch;
+						ctx.log(`worktree: agent isolated at ${wt.worktreePath}`);
+					} else {
+						ctx.log("worktree: creation unavailable — falling back to normal cwd");
+					}
+				}
 				const childResult = await runChildPi({
-					cwd: manifest.cwd,
+					cwd: agentCwd,
 					task,
 					agent: effectiveAgent,
 					model: call.model ?? opts.modelOverride ?? agentConfig.model,
@@ -206,6 +342,9 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
 					return { ok: false, text: "", error: childResult.error ?? `exit ${childResult.exitCode}`, durationMs: Date.now() - started };
 				}
 				const parsed = parsePiJsonOutput(childResult.stdout);
+				// round-14 P1-2: accumulate this run's token usage into the workflow budget.
+				// Covers both the success and schema-mismatch paths (both report parsed.usage).
+				wfState.spent += (parsed.usage?.input ?? 0) + (parsed.usage?.output ?? 0);
 				let text = parsed.finalText ?? "";
 				// Round-11 test fix: parsePiJsonOutput only extracts text from pi event stream
 				// ({type:"message_end", message:{role:"assistant", content:[...]}}). When the
@@ -214,7 +353,11 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
 				if (!text.trim()) {
 					text = extractTextFallback(childResult.stdout);
 				}
-				const extracted = extractStructuredResult(text);
+				// Round-13 P0-3: schema validation post-extraction. The schema option is
+				// additive — when undefined the call site is unchanged. With a schema,
+				// extracted.error means the worker output didn't match expected shape and
+				// the script should treat the result as failed (ok:false, error set).
+				const extracted = extractStructuredResult(text, call.schema);
 				// Write a side artifact for audit/isolation (§0b G3).
 				const rel = `wf/${Date.now()}-${randomBytes(4).toString("hex")}.md`;
 				const artifact = writeArtifact(manifest.artifactsRoot, {
@@ -223,6 +366,16 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
 					content: text,
 					producer: "dynamic-workflow",
 				});
+				if (call.schema !== undefined && !extracted.structured) {
+					return {
+						ok: false,
+						text,
+						usage: parsed.usage,
+						artifactPath: artifact.path,
+						error: extracted.error ?? "structured output does not match schema",
+						durationMs: Date.now() - started,
+					};
+				}
 				return {
 					ok: true,
 					text,
@@ -235,12 +388,70 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
 				logInternalError("dynamic-workflow-context.agent", error, `runId=${manifest.runId}`);
 				return { ok: false, text: "", error: error instanceof Error ? error.message : String(error), durationMs: Date.now() - started };
 			} finally {
+				// round-17 P2-4: clean up the worktree after the agent completes (success
+				// OR failure). Captures the diff as an artifact before removal. Best-effort
+				// — a leak must never crash the workflow.
+				if (worktreePath) {
+					try {
+						cleanupAgentWorktree(manifest, worktreePath, worktreeBranch);
+					} catch (cleanupError) {
+						logInternalError("dynamic-workflow-context.worktree-cleanup", cleanupError, `worktreePath=${worktreePath}`);
+					}
+				}
+				// round-18 P2-3: checkpoint AFTER the agent completes (success or fail) so a
+				// crash between agent calls leaves durable state to resume from. The counter is
+				// incremented here (after the call) so the checkpoint reflects the call that ran.
+				agentCount++;
+				if (opts.onCheckpoint) {
+					try {
+						opts.onCheckpoint({
+							runId: manifest.runId,
+							vars: ctx.vars,
+							phases: phaseState.phases,
+							currentPhase: phaseState.currentPhase,
+							logs: wfState.logs.slice(0, 1000),
+							spent: wfState.spent,
+							agentCount,
+							updatedAt: new Date().toISOString(),
+						});
+					} catch (checkpointError) {
+						logInternalError("dynamic-workflow-context.checkpoint", checkpointError, `runId=${manifest.runId}`);
+					}
+				}
 				semaphore.release();
 			}
 		},
 		async fanOut<T>(items: T[], limit: number, fn: (item: T, i: number) => Promise<AgentResult>): Promise<AgentResult[]> {
 			return mapConcurrent(items, Math.max(1, limit), fn);
 		},
+		async pipeline<TItem, TResult = unknown>(
+			items: TItem[],
+			...stages: Array<(previous: TResult, original: TItem, index: number) => Promise<TResult> | TResult>
+		): Promise<(TResult | null)[]> {
+			if (!Array.isArray(items)) {
+				throw new TypeError("pipeline() expects an array as the first argument");
+			}
+			if (stages.length === 0 || stages.some((s) => typeof s !== "function")) {
+				throw new TypeError("pipeline() stages must be functions");
+			}
+			if (items.length === 0) return [];
+			// Parallel across items, bounded by the workflow concurrency (mirrors fanOut).
+			// Per-item stages run sequentially. A failed stage yields null for that item
+			// (logged via ctx.log) and the remaining items continue. Aborts propagate.
+			return mapConcurrent(items, concurrency, async (item, index): Promise<TResult | null> => {
+				let value: unknown = item;
+				for (const stage of stages) {
+					try {
+						value = await stage(value as TResult, item, index);
+					} catch (error) {
+						if (opts.signal.aborted) throw error;
+						ctx.log(`pipeline[${index}] failed: ${error instanceof Error ? error.message : String(error)}`);
+						return null;
+					}
+				}
+				return value as TResult;
+			});
+		},
 		async review(taskId: string, reviewerRole = "reviewer", reviewOpts?: { content?: string; artifactPath?: string; disableTools?: boolean }): Promise<{ outcome: "accept" | "reject" | "changes_requested"; feedback: string }> {
 			// review() is a VERDICT step: it must produce a parseable JSON {outcome, feedback}, not a
 			// free-form markdown review. The resolved reviewer agent (e.g. ~/.pi/agent/agents/reviewer.md)
@@ -339,10 +550,62 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
 		renderTemplate(name: string, vars: Record<string, string>): unknown {
 			return renderPlanTemplate(name, vars);
 		},
-		vars: {} as Record<string, unknown>,
+		vars: opts.resumedState ? { ...opts.resumedState.vars } : ({} as Record<string, unknown>),
 		setResult(artifactPath: string, meta?: Record<string, unknown>): void {
 			finalResult = { artifactPath, meta };
 		},
+		phase(title: string): void {
+			if (typeof title !== "string" || title.length === 0) {
+				throw new TypeError("ctx.phase(title) requires a non-empty string title.");
+			}
+			// Idempotency: same phase title → no event, no state change.
+			if (title === phaseState.currentPhase) return;
+			// Close out the previous open phase BEFORE the new one opens.
+			if (phaseState.currentPhase !== undefined) {
+				appendEvent(manifest.eventsPath, {
+					type: "dwf.phase_completed",
+					runId: manifest.runId,
+					data: { phase: phaseState.currentPhase },
+				});
+			}
+			phaseState.currentPhase = title;
+			// Dedup append with hard cap to bound memory; events still flow.
+			if (!phaseState.phases.includes(title)) {
+				if (phaseState.phases.length < 100) {
+					phaseState.phases.push(title);
+				} else if (!phaseCapWarned) {
+					phaseCapWarned = true;
+					logInternalError(
+						"dynamic-workflow-context.phase-cap",
+						new Error("Phase list cap of 100 reached; further phases still emit events but are not added to the in-memory phases[] list. Use the events log as the durable source of truth."),
+						`runId=${manifest.runId}`,
+					);
+				}
+			}
+			appendEvent(manifest.eventsPath, {
+				type: "dwf.phase_started",
+				runId: manifest.runId,
+				data: { phase: title },
+			});
+		},
+		budget,
+		log(message: unknown): void {
+			// round-14 P1-3: stringify non-strings, keep a bounded in-memory copy, and
+			// always emit a dwf.log event (the events log is the durable source of truth).
+			const text = typeof message === "string" ? message : JSON.stringify(message);
+			if (wfState.logs.length < 1000) {
+				wfState.logs.push(text);
+			}
+			appendEvent(manifest.eventsPath, {
+				type: "dwf.log",
+				runId: manifest.runId,
+				data: { message: text },
+			});
+		},
+		args<T = unknown>(): T {
+			// round-14 P1-5: typed workflow args sourced from manifest (via opts.args).
+			return wfState.args as T;
+		},
 	};
 	// Attach the final-result slot via a non-enumerable getter so the runner can read it
@@ -351,6 +614,25 @@ export function makeWorkflowCtx(manifest: TeamRunManifest, opts: MakeWorkflowCtx
 		get: () => finalResult,
 		enumerable: false,
 	});
+	// round-12 P0-1: phase state is read-only from the runner; the script can only mutate
+	// it via ctx.phase(title), which is the documented public surface.
+	Object.defineProperty(ctx, "__phaseState", {
+		get: () => phaseState,
+		enumerable: false,
+	});
+	// round-14 P1-3: in-memory log buffer is read-only from the runner; the script can only
+	// append via ctx.log(message). The events log remains the durable source of truth.
+	Object.defineProperty(ctx, "__logs", {
+		get: () => wfState.logs,
+		enumerable: false,
+	});
+	// round-18 P2-3: agent invocation counter is read-only from the runner. The script can
+	// only advance it via ctx.agent() (incremented in agent()'s finally). Exposed so
+	// getWorkflowCheckpoint() can report an accurate count.
+	Object.defineProperty(ctx, "__agentCount", {
+		get: () => agentCount,
+		enumerable: false,
+	});
 	return ctx;
 }
@@ -359,11 +641,103 @@ export function getWorkflowFinalResult(ctx: WorkflowCtx): { artifactPath: string
 	return (ctx as unknown as { __finalResult?: { artifactPath: string; meta?: Record<string, unknown> } }).__finalResult;
 }
+/** Read the in-memory phase state set by the script (runner-only; not part of the public ctx surface). */
+export function getWorkflowPhaseState(ctx: WorkflowCtx): { currentPhase: string | undefined; phases: string[] } | undefined {
+	return (ctx as unknown as { __phaseState?: { currentPhase: string | undefined; phases: string[] } }).__phaseState;
+}
+/** Read the in-memory log buffer appended by ctx.log() (runner-only; not part of the public ctx surface).
+ *  Capped at 1000 entries — the events log (dwf.log) is the durable source of truth. */
+export function getWorkflowLogs(ctx: WorkflowCtx): string[] | undefined {
+	return (ctx as unknown as { __logs?: string[] }).__logs;
+}
+/** round-18 P2-3: snapshot the current DWF checkpoint state (runner-only; not part of the public
+ *  ctx surface). Mirrors getWorkflowFinalResult/getWorkflowPhaseState. The runner relies on the
+ *  `onCheckpoint` callback for accurate per-agent-call checkpoints (it captures the closure value
+ *  at call time); this helper is a best-effort snapshot for inspection/debugging. */
+export function getWorkflowCheckpoint(ctx: WorkflowCtx): DwfCheckpointState {
+	const phaseState = getWorkflowPhaseState(ctx);
+	const logs = getWorkflowLogs(ctx);
+	return {
+		runId: ctx.runId,
+		vars: ctx.vars,
+		phases: phaseState?.phases ?? [],
+		currentPhase: phaseState?.currentPhase,
+		logs: logs ?? [],
+		spent: ctx.budget.spent(),
+		agentCount: (ctx as unknown as { __agentCount?: number }).__agentCount ?? 0,
+		updatedAt: new Date().toISOString(),
+	};
+}
 /** Compose the agent task: prompt + optional dependency-input context block. */
 function composeAgentTask(call: AgentCallOpts): string {
-	if (!call.inputs?.length) return call.prompt;
-	const block = call.inputs.map((p) => `- ${p}`).join("\n");
-	return `${call.prompt}\n\n## Inputs (artifact paths)\n${block}`;
+	let base = call.prompt;
+	if (call.inputs?.length) {
+		const block = call.inputs.map((p) => `- ${p}`).join("\n");
+		base = `${base}\n\n## Inputs (artifact paths)\n${block}`;
+	}
+	// Round-13 P0-3: when a schema is requested, append a JSON-output directive.
+	// The directive lives at the END of the prompt so it wins over any conflicting
+	// persona instruction in the agent's system prompt.
+	if (call.schema !== undefined) {
+		base = `${base}\n\n## Output format\nRespond with ONLY a single JSON object that matches the schema described in your instructions. Begin your response with { and end with }. Do not wrap the JSON in a code fence. Do not add any prose before or after the JSON.`;
+	}
+	return base;
+}
+/**
+ * Round-13 P0-3: compose a system-prompt suffix that asks the agent to output a
+ * structured JSON object matching the schema's required shape. We don't expose
+ * the TypeBox internal type — we describe the SHAPE so the model can match it.
+ */
+function composeSchemaSystemPrompt(base: string | undefined, schema: TSchema): string {
+	const shape = describeSchemaShape(schema, 0);
+	const intro = "You are a structured-output assistant. ";
+	const instruction = `When responding, output ONLY a single JSON object matching this shape (no prose, no markdown fences, no commentary): ${shape}. Begin your response with { and end with }.`;
+	if (typeof base === "string" && base.length > 0) {
+		return `${base}\n\n${intro}${instruction}`;
+	}
+	return `${intro}${instruction}`;
+}
+/**
+ * Walk a TypeBox schema recursively and produce a human-readable shape description.
+ * Depth-limited to avoid runaway expansion on deeply nested schemas.
+ */
+function describeSchemaShape(schema: unknown, depth: number): string {
+	if (depth > 4) return "{...}";
+	if (!schema || typeof schema !== "object") return "any";
+	const obj = schema as Record<string, unknown>;
+	// TypeBox: every schema has a `type` discriminator or a `kind` field.
+	const kind = obj.kind as string | undefined;
+	const type = obj.type as string | undefined;
+	if (kind === "object" || type === "object") {
+		const properties = obj.properties;
+		if (!properties || typeof properties !== "object") return "{}";
+		const required = Array.isArray(obj.required) ? new Set(obj.required as string[]) : new Set<string>();
+		const props = Object.entries(properties as Record<string, unknown>)
+			.map(([key, sub]) => {
+				const mark = required.has(key) ? "" : "?";
+				return `"${key}"${mark}: ${describeSchemaShape(sub, depth + 1)}`;
+			})
+			.join(", ");
+		return `{${props}}`;
+	}
+	if (kind === "array" || type === "array") {
+		const items = obj.items;
+		return `[${describeSchemaShape(items, depth + 1)}]`;
+	}
+	if (type === "string") return "string";
+	if (type === "number" || type === "integer") return "number";
+	if (type === "boolean") return "boolean";
+	if (type === "null") return "null";
+	// Union/Enum fallbacks.
+	if (Array.isArray(obj.anyOf)) return obj.anyOf.map((s) => describeSchemaShape(s, depth + 1)).join(" | ");
+	if (Array.isArray(obj.oneOf)) return obj.oneOf.map((s) => describeSchemaShape(s, depth + 1)).join(" | ");
+	if (Array.isArray(obj.enum)) return obj.enum.map((v) => JSON.stringify(v)).join(" | ");
+	return "any";
 }
 /**

package/src/runtime/dynamic-workflow-runner.ts CHANGED Viewed

@@ -23,7 +23,9 @@ import { resolveRealContainedPath } from "../utils/safe-paths.ts";
 import { appendEvent } from "../state/event-log.ts";
 import { writeArtifact } from "../state/artifact-store.ts";
 import { logInternalError } from "../utils/internal-error.ts";
-import { makeWorkflowCtx, getWorkflowFinalResult } from "./dynamic-workflow-context.ts";
+import { makeWorkflowCtx, getWorkflowFinalResult, getWorkflowPhaseState } from "./dynamic-workflow-context.ts";
+import { DwfStore } from "./dwf-state-store.ts";
+import { assertDeterministicScript, isDeterminismCheckEnabled } from "./deterministic-ast.ts";
 import { projectCrewRoot, userPiRoot, packageRoot } from "../utils/paths.ts";
 import type { DynamicWorkflowConfig } from "../workflows/workflow-config.ts";
 import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
@@ -36,6 +38,8 @@ export interface RunDynamicWorkflowInput {
 	signal: AbortSignal;
 	concurrency?: number;
 	modelOverride?: string;
+	/** round-14 P1-2: per-workflow token budget. Overrides workflow.maxTokenBudget. */
+	tokenBudget?: number;
 }
 export interface RunDynamicWorkflowResult {
@@ -46,6 +50,27 @@ export interface RunDynamicWorkflowResult {
 /** The signature a .dwf.ts default export must satisfy. */
 export type DynamicWorkflowScript = (ctx: import("./dynamic-workflow-context.ts").WorkflowCtx) => Promise<void> | void;
+/**
+ * round-12 P0-4: defensive structured-clone guard at the runner boundary.
+ *
+ * Today this is mostly future-proofing: a DWF script's `setResult()` path
+ * reads an artifact file as a string, and strings are always structured-
+ * cloneable. But if a future code path produces a non-cloneable value
+ * (e.g. a Worker postMessage payload that wraps a Symbol or function), we
+ * want a clear, actionable error here — not a cryptic `DataCloneError`
+ * from deep inside the artifact store. The error message also nudges
+ * users toward the most common cause: forgetting `await` on ctx.agent()
+ * or ctx.review() in their script.
+ */
+function assertStructuredCloneable(value: unknown, name: string): void {
+	try {
+		structuredClone(value);
+	} catch (error) {
+		const detail = error instanceof Error ? error.message : String(error);
+		throw new Error(`${name} must be structured-cloneable; did you forget to await ctx.agent() or ctx.review()? ${detail}`);
+	}
+}
 /**
  * Resolve + validate the script path against the allowlist of workflow dirs (§0c C5).
  * Returns the real contained path or throws.
@@ -79,12 +104,24 @@ function resolveScriptPath(workflow: DynamicWorkflowConfig, cwd: string): string
 /**
  * Transpile + load the .dwf.ts default export. Uses jiti (already a dep) for TS→JS.
  * Returns the default export function or throws.
+ *
+ * Round-13 P0-2: after reading the script source, run `assertDeterministicScript`
+ * to reject non-deterministic calls (Date.now()/Math.random()/new Date()) BEFORE
+ * jiti executes the module. The check is opt-out via PI_CREW_DWF_SKIP_DETERMINISM_CHECK=1.
  */
 async function loadWorkflowModule(scriptPath: string): Promise<DynamicWorkflowScript> {
+	// Round-13 P0-2: read source first so we can AST-scan before execution.
+	// jiti does not surface the transpiled source back to us, so we read the
+	// raw .dwf.ts file. This is the same source jiti will execute.
+	const scriptSource = readFileSync(scriptPath, "utf-8");
+	if (isDeterminismCheckEnabled()) {
+		assertDeterministicScript(scriptSource);
+	}
 	// jiti is the same loader async-runner.ts uses (resolveTypeScriptLoader). We require it
 	// lazily so this module stays importable in environments without jiti (type-only consumers).
 	// Fix round-4: use createRequire(import.meta.url) so `require` works under the strip-types
 	// loader fallback (Node ≥ 22.6) where bare `require` is not defined in ESM scope.
+		// LAZY: defer dynamic import of node:module to its call site.
 	const { createRequire } = await import("node:module");
 	const require = createRequire(import.meta.url);
 	// eslint-disable-next-line @typescript-eslint/no-require-imports
@@ -109,11 +146,37 @@ export async function runDynamicWorkflow(input: RunDynamicWorkflowInput): Promis
 	appendEvent(eventsPath, { type: "dwf.started", runId: manifest.runId, data: { workflow: workflow.name, script: scriptPath } });
+	// round-18 P2-3: resume/checkpoint. Load any existing checkpoint for this run's stateRoot.
+	// stateRoot is already <crewRoot>/state/runs/<runId>, so the checkpoint lands at
+	// <stateRoot>/dwf-checkpoint.json (no double-nesting). A missing checkpoint (fresh run)
+	// yields undefined — makeWorkflowCtx starts with empty defaults (backward compatible).
+	const dwfStore = new DwfStore(manifest.stateRoot);
+	const resumedState = dwfStore.load();
+	if (resumedState) {
+		appendEvent(eventsPath, {
+			type: "dwf.resumed",
+			runId: manifest.runId,
+			data: { agentCount: resumedState.agentCount, phases: resumedState.phases, currentPhase: resumedState.currentPhase },
+		});
+	}
 	const ctx = makeWorkflowCtx(manifest, {
 		concurrency: input.concurrency ?? workflow.maxConcurrency ?? 4,
 		signal,
 		team: input.team,
 		modelOverride: input.modelOverride,
+		tokenBudget: input.tokenBudget ?? workflow.maxTokenBudget,
+		args: manifest.args,
+		resumedState,
+		// round-18 P2-3: checkpoint after each ctx.agent() call so a crash between calls
+		// leaves durable state. onCheckpoint captures the closure values at call time.
+		onCheckpoint: (state) => {
+			try {
+				dwfStore.save(state);
+			} catch (error) {
+				logInternalError("dynamic-workflow-runner.checkpoint-save", error, `runId=${manifest.runId}`);
+			}
+		},
 	});
 	// Freeze the ctx so the script cannot add/override capability methods (§0c C4).
@@ -150,6 +213,12 @@ export async function runDynamicWorkflow(input: RunDynamicWorkflowInput): Promis
 	const final = getWorkflowFinalResult(ctx);
 	const finalText = final ? readFinalArtifact(final.artifactPath) : `(dynamic workflow '${workflow.name}' completed without calling ctx.setResult())`;
+	// round-12 P0-4: fail fast on unawaited Promise returns BEFORE we try to
+	// write a 2 KB blob that contains a Promise reference. structuredClone on
+	// a string always succeeds; if it doesn't, the script returned something
+	// uncloneable (most often an unawaited Promise) and we want a clear error.
+	assertStructuredCloneable(finalText, "final artifact content (set via ctx.setResult)");
 	// Write a summary artifact mirroring the static-workflow summary.md contract (run.ts reads this).
 	const summary = writeArtifact(manifest.artifactsRoot, {
 		kind: "result",
@@ -158,12 +227,35 @@ export async function runDynamicWorkflow(input: RunDynamicWorkflowInput): Promis
 		producer: "dynamic-workflow",
 	});
+	// round-12 P0-1: safety net — if a script never explicitly closes its
+	// final phase before returning, the runner emits a closing event so the
+	// last open phase is always terminated before dwf.completed.
+	const phaseState = getWorkflowPhaseState(ctx);
+	if (phaseState?.currentPhase !== undefined) {
+		appendEvent(eventsPath, {
+			type: "dwf.phase_completed",
+			runId: manifest.runId,
+			data: { phase: phaseState.currentPhase },
+		});
+		phaseState.currentPhase = undefined;
+	}
 	appendEvent(eventsPath, { type: "dwf.completed", runId: manifest.runId, data: { workflow: workflow.name, summaryArtifact: summary.path } });
+	// round-18 P2-3: the run completed cleanly — delete the checkpoint so a fresh re-run
+	// (same runId) starts from scratch rather than resuming stale state.
+	dwfStore.delete();
+	// round-12 P0-4: also guard the manifest.summary slice (the value is
+	// written into JSON-serialized manifest state — a Promise here would also
+	// crash later in the run-event-bus emitter).
+	const summaryText = finalText.slice(0, 2000);
+	assertStructuredCloneable(summaryText, "manifest.summary (derived from final result)");
 	const updatedManifest: TeamRunManifest = {
 		...manifest,
 		status: "completed",
-		summary: finalText.slice(0, 2000),
+		summary: summaryText,
 		updatedAt: new Date().toISOString(),
 		artifacts: [...manifest.artifacts, summary],
 	};