npm - pi-crew - Versions diffs - 0.9.5 → 0.9.8 - Mend

pi-crew 0.9.5 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CHANGELOG.md +556 -0
package/README.md +10 -3
package/docs/HARNESS_BACKLOG.md +51 -3
package/docs/dynamic-workflows.md +315 -2
package/docs/fix-plan-disabletools-exit-null.md +219 -0
package/docs/troubleshooting.md +76 -0
package/package.json +10 -3
package/src/config/defaults.ts +8 -4
package/src/extension/team-tool/doctor.ts +14 -0
package/src/extension/team-tool/run.ts +2 -0
package/src/runtime/background-runner.ts +1 -1
package/src/runtime/capability-inventory.ts +20 -1
package/src/runtime/child-pi.ts +109 -11
package/src/runtime/deterministic-ast.ts +161 -0
package/src/runtime/dwf-state-store.ts +97 -0
package/src/runtime/dynamic-workflow-context.ts +381 -7
package/src/runtime/dynamic-workflow-runner.ts +93 -2
package/src/runtime/pi-args.ts +11 -0
package/src/runtime/result-extractor.ts +72 -7
package/src/runtime/task-output-context.ts +25 -9
package/src/runtime/team-runner.ts +8 -3
package/src/runtime/zombie-scanner.ts +297 -0
package/src/schema/team-tool-schema.ts +28 -0
package/src/skills/discover-skills.ts +61 -8
package/src/skills/validate.ts +267 -0
package/src/state/contracts.ts +1 -0
package/src/state/state-store.ts +3 -0
package/src/state/types.ts +9 -0
package/src/ui/dashboard-panes/progress-pane.ts +5 -0
package/src/ui/dwf-phase-display.ts +151 -0
package/src/ui/keybinding-map.ts +128 -41
package/src/ui/run-event-bus.ts +83 -0
package/src/ui/run-snapshot-cache.ts +4 -0
package/src/ui/snapshot-types.ts +3 -0
package/src/workflows/workflow-config.ts +3 -0
package/src/worktree/worktree-manager.ts +94 -0
package/types/dwf.d.ts +187 -0

package/src/runtime/dynamic-workflow-runner.ts CHANGED Viewed

@@ -23,7 +23,9 @@ import { resolveRealContainedPath } from "../utils/safe-paths.ts";
 import { appendEvent } from "../state/event-log.ts";
 import { writeArtifact } from "../state/artifact-store.ts";
 import { logInternalError } from "../utils/internal-error.ts";
-import { makeWorkflowCtx, getWorkflowFinalResult } from "./dynamic-workflow-context.ts";
+import { makeWorkflowCtx, getWorkflowFinalResult, getWorkflowPhaseState } from "./dynamic-workflow-context.ts";
+import { DwfStore } from "./dwf-state-store.ts";
+import { assertDeterministicScript, isDeterminismCheckEnabled } from "./deterministic-ast.ts";
 import { projectCrewRoot, userPiRoot, packageRoot } from "../utils/paths.ts";
 import type { DynamicWorkflowConfig } from "../workflows/workflow-config.ts";
 import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
@@ -36,6 +38,8 @@ export interface RunDynamicWorkflowInput {
 	signal: AbortSignal;
 	concurrency?: number;
 	modelOverride?: string;
+	/** round-14 P1-2: per-workflow token budget. Overrides workflow.maxTokenBudget. */
+	tokenBudget?: number;
 }
 export interface RunDynamicWorkflowResult {
@@ -46,6 +50,27 @@ export interface RunDynamicWorkflowResult {
 /** The signature a .dwf.ts default export must satisfy. */
 export type DynamicWorkflowScript = (ctx: import("./dynamic-workflow-context.ts").WorkflowCtx) => Promise<void> | void;
+/**
+ * round-12 P0-4: defensive structured-clone guard at the runner boundary.
+ *
+ * Today this is mostly future-proofing: a DWF script's `setResult()` path
+ * reads an artifact file as a string, and strings are always structured-
+ * cloneable. But if a future code path produces a non-cloneable value
+ * (e.g. a Worker postMessage payload that wraps a Symbol or function), we
+ * want a clear, actionable error here — not a cryptic `DataCloneError`
+ * from deep inside the artifact store. The error message also nudges
+ * users toward the most common cause: forgetting `await` on ctx.agent()
+ * or ctx.review() in their script.
+ */
+function assertStructuredCloneable(value: unknown, name: string): void {
+	try {
+		structuredClone(value);
+	} catch (error) {
+		const detail = error instanceof Error ? error.message : String(error);
+		throw new Error(`${name} must be structured-cloneable; did you forget to await ctx.agent() or ctx.review()? ${detail}`);
+	}
+}
 /**
  * Resolve + validate the script path against the allowlist of workflow dirs (§0c C5).
  * Returns the real contained path or throws.
@@ -79,8 +104,19 @@ function resolveScriptPath(workflow: DynamicWorkflowConfig, cwd: string): string
 /**
  * Transpile + load the .dwf.ts default export. Uses jiti (already a dep) for TS→JS.
  * Returns the default export function or throws.
+ *
+ * Round-13 P0-2: after reading the script source, run `assertDeterministicScript`
+ * to reject non-deterministic calls (Date.now()/Math.random()/new Date()) BEFORE
+ * jiti executes the module. The check is opt-out via PI_CREW_DWF_SKIP_DETERMINISM_CHECK=1.
  */
 async function loadWorkflowModule(scriptPath: string): Promise<DynamicWorkflowScript> {
+	// Round-13 P0-2: read source first so we can AST-scan before execution.
+	// jiti does not surface the transpiled source back to us, so we read the
+	// raw .dwf.ts file. This is the same source jiti will execute.
+	const scriptSource = readFileSync(scriptPath, "utf-8");
+	if (isDeterminismCheckEnabled()) {
+		assertDeterministicScript(scriptSource);
+	}
 	// jiti is the same loader async-runner.ts uses (resolveTypeScriptLoader). We require it
 	// lazily so this module stays importable in environments without jiti (type-only consumers).
 	// Fix round-4: use createRequire(import.meta.url) so `require` works under the strip-types
@@ -110,11 +146,37 @@ export async function runDynamicWorkflow(input: RunDynamicWorkflowInput): Promis
 	appendEvent(eventsPath, { type: "dwf.started", runId: manifest.runId, data: { workflow: workflow.name, script: scriptPath } });
+	// round-18 P2-3: resume/checkpoint. Load any existing checkpoint for this run's stateRoot.
+	// stateRoot is already <crewRoot>/state/runs/<runId>, so the checkpoint lands at
+	// <stateRoot>/dwf-checkpoint.json (no double-nesting). A missing checkpoint (fresh run)
+	// yields undefined — makeWorkflowCtx starts with empty defaults (backward compatible).
+	const dwfStore = new DwfStore(manifest.stateRoot);
+	const resumedState = dwfStore.load();
+	if (resumedState) {
+		appendEvent(eventsPath, {
+			type: "dwf.resumed",
+			runId: manifest.runId,
+			data: { agentCount: resumedState.agentCount, phases: resumedState.phases, currentPhase: resumedState.currentPhase },
+		});
+	}
 	const ctx = makeWorkflowCtx(manifest, {
 		concurrency: input.concurrency ?? workflow.maxConcurrency ?? 4,
 		signal,
 		team: input.team,
 		modelOverride: input.modelOverride,
+		tokenBudget: input.tokenBudget ?? workflow.maxTokenBudget,
+		args: manifest.args,
+		resumedState,
+		// round-18 P2-3: checkpoint after each ctx.agent() call so a crash between calls
+		// leaves durable state. onCheckpoint captures the closure values at call time.
+		onCheckpoint: (state) => {
+			try {
+				dwfStore.save(state);
+			} catch (error) {
+				logInternalError("dynamic-workflow-runner.checkpoint-save", error, `runId=${manifest.runId}`);
+			}
+		},
 	});
 	// Freeze the ctx so the script cannot add/override capability methods (§0c C4).
@@ -151,6 +213,12 @@ export async function runDynamicWorkflow(input: RunDynamicWorkflowInput): Promis
 	const final = getWorkflowFinalResult(ctx);
 	const finalText = final ? readFinalArtifact(final.artifactPath) : `(dynamic workflow '${workflow.name}' completed without calling ctx.setResult())`;
+	// round-12 P0-4: fail fast on unawaited Promise returns BEFORE we try to
+	// write a 2 KB blob that contains a Promise reference. structuredClone on
+	// a string always succeeds; if it doesn't, the script returned something
+	// uncloneable (most often an unawaited Promise) and we want a clear error.
+	assertStructuredCloneable(finalText, "final artifact content (set via ctx.setResult)");
 	// Write a summary artifact mirroring the static-workflow summary.md contract (run.ts reads this).
 	const summary = writeArtifact(manifest.artifactsRoot, {
 		kind: "result",
@@ -159,12 +227,35 @@ export async function runDynamicWorkflow(input: RunDynamicWorkflowInput): Promis
 		producer: "dynamic-workflow",
 	});
+	// round-12 P0-1: safety net — if a script never explicitly closes its
+	// final phase before returning, the runner emits a closing event so the
+	// last open phase is always terminated before dwf.completed.
+	const phaseState = getWorkflowPhaseState(ctx);
+	if (phaseState?.currentPhase !== undefined) {
+		appendEvent(eventsPath, {
+			type: "dwf.phase_completed",
+			runId: manifest.runId,
+			data: { phase: phaseState.currentPhase },
+		});
+		phaseState.currentPhase = undefined;
+	}
 	appendEvent(eventsPath, { type: "dwf.completed", runId: manifest.runId, data: { workflow: workflow.name, summaryArtifact: summary.path } });
+	// round-18 P2-3: the run completed cleanly — delete the checkpoint so a fresh re-run
+	// (same runId) starts from scratch rather than resuming stale state.
+	dwfStore.delete();
+	// round-12 P0-4: also guard the manifest.summary slice (the value is
+	// written into JSON-serialized manifest state — a Promise here would also
+	// crash later in the run-event-bus emitter).
+	const summaryText = finalText.slice(0, 2000);
+	assertStructuredCloneable(summaryText, "manifest.summary (derived from final result)");
 	const updatedManifest: TeamRunManifest = {
 		...manifest,
 		status: "completed",
-		summary: finalText.slice(0, 2000),
+		summary: summaryText,
 		updatedAt: new Date().toISOString(),
 		artifacts: [...manifest.artifacts, summary],
 	};

package/src/runtime/pi-args.ts CHANGED Viewed

@@ -243,6 +243,12 @@ export function createSafeTempDir(base: string, prefix: string): string {
 }
 export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerArgsResult {
+	// NOTE: do NOT add an argv flag like `--crew-subagent` here. Pi uses a strict
+	// option parser and REJECTS unknown flags with a non-zero exit, which would
+	// break every ctx.agent() call. The authoritative sub-agent identity signal
+	// is the PI_CREW_KIND=subagent ENV var (set below) — the zombie scanner and
+	// doctor --zombies read it from /proc/<pid>/environ. The user's main session
+	// never sets it, so it can never be matched as a sub-agent.
 	const args = ["--mode", "json", "-p"];
 	if (input.sessionEnabled === false) args.push("--no-session");
@@ -327,6 +333,11 @@ export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerA
 	return {
 		args,
 		env: {
+			// PI_CREW_KIND is the authoritative machine-readable sub-agent marker. It is always
+		// present on a child-pi process and NEVER present on a user's interactive main session.
+			// doctor --zombies uses it to safely list orphaned sub-agents without ever matching a
+		// main session (the lesson from an accidental `kill` of a live main session).
+			PI_CREW_KIND: "subagent",
 			PI_CREW_INHERIT_PROJECT_CONTEXT: input.agent.inheritProjectContext ? "1" : "0",
 			PI_CREW_INHERIT_SKILLS: input.agent.inheritSkills ? "1" : "0",
 			PI_CREW_DEPTH: String(parentDepth + 1),

package/src/runtime/result-extractor.ts CHANGED Viewed

@@ -1,11 +1,19 @@
 /**
  * Structured Result Extractor — attempts to extract structured data from worker output.
  * Tries multiple extraction strategies before falling back to raw text.
+ *
+ * Round-13 P0-3: optional `schema` (TypeBox `TSchema`) — when provided, extracted
+ * data is validated against the schema via `Value.Check`. On mismatch, the result
+ * is `structured:false` with an explanatory `error`. Backward compatible: when
+ * schema is undefined, behavior is identical to the previous regex-based extractor.
  */
+import type { TSchema } from "@sinclair/typebox";
+import { Value } from "@sinclair/typebox/value";
 export interface ExtractedResult {
 	/** Whether structured data was successfully extracted */
 	structured: boolean;
-	/** Parsed structured data (if structured=true) */
+	/** Parsed structured data (if structured=true AND validated against schema if provided) */
 	data: unknown;
 	/** Raw text output (always available) */
 	rawText: string;
@@ -15,9 +23,13 @@ export interface ExtractedResult {
 /**
  * Extract structured result from raw worker output text.
- * Tries strategies in order: direct JSON, fenced JSON, key-value markers.
+ * Tries strategies in order: direct JSON, fenced JSON, key-value markers, scan.
+ *
+ * @param raw - the raw text output from a worker
+ * @param schema - optional TypeBox schema. When provided, the extracted value is
+ *                 validated; mismatch produces `{structured:false, error:...}`.
  */
-export function extractStructuredResult(raw: string, _schema?: Record<string, unknown>): ExtractedResult {
+export function extractStructuredResult(raw: string, schema?: TSchema): ExtractedResult {
 	const trimmed = raw.trim();
 	if (!trimmed) {
 		return { structured: false, data: null, rawText: raw };
@@ -26,19 +38,19 @@ export function extractStructuredResult(raw: string, _schema?: Record<string, un
 	// Strategy 1: Direct JSON parse (entire output is JSON)
 	const directResult = tryDirectJson(trimmed);
 	if (directResult !== undefined) {
-		return { structured: true, data: directResult, rawText: raw };
+		return finalize(directResult, raw, schema);
 	}
 	// Strategy 2: Extract from ```json ... ``` fence
 	const fencedResult = tryFencedJson(trimmed);
 	if (fencedResult !== undefined) {
-		return { structured: true, data: fencedResult, rawText: raw };
+		return finalize(fencedResult, raw, schema);
 	}
 	// Strategy 3: Extract from markers like "RESULT:" or "OUTPUT:"
 	const markerResult = tryMarkerExtraction(trimmed);
 	if (markerResult !== undefined) {
-		return { structured: true, data: markerResult, rawText: raw };
+		return finalize(markerResult, raw, schema);
 	}
 	// Strategy 4: Scan for the first JSON object/array anywhere in text.
@@ -46,12 +58,65 @@ export function extractStructuredResult(raw: string, _schema?: Record<string, un
 	// around the JSON. This catches JSON embedded in sentences, lists, or prose.
 	const scannedResult = tryScanJson(trimmed);
 	if (scannedResult !== undefined) {
-		return { structured: true, data: scannedResult, rawText: raw };
+		return finalize(scannedResult, raw, schema);
 	}
 	return { structured: false, data: null, rawText: raw };
 }
+/**
+ * After extracting a candidate object, validate it against the optional TypeBox schema.
+ * When no schema is given, behavior is the legacy "structured:true" path.
+ * When a schema is given and validation fails, return structured:false with a
+ * clear error message (caller can surface this in the AgentResult).
+ *
+ * NOTE: TypeBox 0.34.49's `Value.Check` returns a boolean and does not expose
+ * per-error paths in its public API. We use the boolean + a fallback "type mismatch"
+ * description. Scripts that need detailed diagnostics can wrap their own validator.
+ */
+function finalize(candidate: unknown, raw: string, schema: TSchema | undefined): ExtractedResult {
+	if (!schema) {
+		return { structured: true, data: candidate, rawText: raw };
+	}
+	const ok = Value.Check(schema, candidate);
+	if (ok) {
+		return { structured: true, data: candidate, rawText: raw };
+	}
+	return {
+		structured: false,
+		data: null,
+		rawText: raw,
+		error: `structured output does not match schema: expected shape ${describeSchemaShape(schema)}, got ${describeValue(candidate)}`,
+	};
+}
+function describeValue(value: unknown): string {
+	try {
+		const json = JSON.stringify(value);
+		return json.length > 200 ? `${json.slice(0, 200)}…` : json;
+	} catch {
+		return typeof value;
+	}
+}
+function describeSchemaShape(schema: unknown): string {
+	if (!schema || typeof schema !== "object") return "any";
+	const obj = schema as Record<string, unknown>;
+	const kind = obj.kind as string | undefined;
+	const type = obj.type as string | undefined;
+	if (kind === "object" || type === "object") {
+		const properties = obj.properties;
+		if (!properties || typeof properties !== "object") return "object";
+		return `object<${Object.keys(properties as Record<string, unknown>).join(",")}>`;
+	}
+	if (kind === "array" || type === "array") return "array";
+	if (type === "string") return "string";
+	if (type === "number" || type === "integer") return "number";
+	if (type === "boolean") return "boolean";
+	if (Array.isArray(obj.anyOf) || Array.isArray(obj.oneOf)) return "union";
+	return "any";
+}
 function tryDirectJson(text: string): unknown | undefined {
 	if (!text.startsWith("{") && !text.startsWith("[")) return undefined;
 	try {

package/src/runtime/task-output-context.ts CHANGED Viewed

@@ -30,20 +30,36 @@ function containedExists(filePath: string, baseDir?: string): boolean {
 	}
 }
-function readIfSmall(filePath: string, maxBytes = 24_000, baseDir?: string): string | undefined {
+/**
+ * L4 output-handling: single consistent threshold for all artifact reads.
+ * Sized from real data (27 result artifacts: max 9226 bytes; 100% < 16KB).
+ * 32KB gives 2x headroom over the largest observed real output while still
+ * bounding memory. Larger than the old inconsistent per-call-site values
+ * (24K/40K/80K) which truncated the same artifact differently depending on
+ * which code path read it.
+ */
+const MAX_RESULT_INLINE_BYTES = 32_000;
+function readIfSmall(filePath: string, baseDir?: string): string | undefined {
+	const maxBytes = MAX_RESULT_INLINE_BYTES;
 	try {
 		const safePath = baseDir ? resolveRealContainedPath(baseDir, filePath) : filePath;
 		const stat = fs.statSync(safePath);
 		if (stat.size > maxBytes) {
-			// Use bounded read to avoid loading entire file into memory
-			const buf = Buffer.alloc(maxBytes);
+			// L4: head + tail instead of head-only. Keeps closing markdown
+			// structure (code fences, headings) instead of leaving them truncated.
+			const head = Math.floor(maxBytes * 0.75);
+			const tail = maxBytes - head;
+			const headBuf = Buffer.alloc(head);
+			const tailBuf = Buffer.alloc(tail);
 			const fd = fs.openSync(safePath, "r");
 			try {
-				fs.readSync(fd, buf, 0, maxBytes, 0);
+				fs.readSync(fd, headBuf, 0, head, 0);
+				fs.readSync(fd, tailBuf, 0, tail, stat.size - tail);
 			} finally {
 				fs.closeSync(fd);
 			}
-			return `${buf.toString("utf-8")}\n\n...(truncated ${stat.size - maxBytes} bytes)`;
+			return `${headBuf.toString("utf-8")}\n\n...[pi-crew truncated ${stat.size - maxBytes} bytes, head+tail preserved]...\n${tailBuf.toString("utf-8")}`;
 		}
 		return fs.readFileSync(safePath, "utf-8");
 	} catch {
@@ -99,7 +115,7 @@ export function collectDependencyOutputContext(manifest: TeamRunManifest, tasks:
 	const byStep = new Map(tasks.map((item) => [item.stepId, item]).filter((entry): entry is [string, TeamTaskState] => Boolean(entry[0])));
 	const byId = new Map(tasks.map((item) => [item.id, item]));
 	const dependencies = task.dependsOn.map((dep) => byStep.get(dep) ?? byId.get(dep)).filter((item): item is TeamTaskState => Boolean(item)).map((item) => {
-		const resultText = item.resultArtifact ? readIfSmall(item.resultArtifact.path, 24_000, manifest.artifactsRoot) : undefined;
+		const resultText = item.resultArtifact ? readIfSmall(item.resultArtifact.path, manifest.artifactsRoot) : undefined;
 		return {
 			taskId: item.id,
 			role: item.role,
@@ -113,7 +129,7 @@ export function collectDependencyOutputContext(manifest: TeamRunManifest, tasks:
 	});
 	const sharedReads = (step.reads === false ? [] : step.reads ?? []).map((name) => {
 		const filePath = sharedPath(manifest, name);
-		return { name, path: filePath, content: readIfSmall(filePath, 24_000, path.resolve(manifest.artifactsRoot, "shared")) ?? "" };
+		return { name, path: filePath, content: readIfSmall(filePath, path.resolve(manifest.artifactsRoot, "shared")) ?? "" };
 	}).filter((item) => item.content.trim().length > 0);
 	return { dependencies, sharedReads };
 }
@@ -139,7 +155,7 @@ export function renderDependencyOutputContext(context: DependencyOutputContext):
 export function writeTaskSharedOutput(manifest: TeamRunManifest, step: WorkflowStep, task: TeamTaskState): ArtifactDescriptor | undefined {
 	if (step.output === false) return undefined;
 	const name = safeSharedName(step.output || `${task.id}.md`);
-	const source = task.resultArtifact ? readIfSmall(task.resultArtifact.path, 80_000, manifest.artifactsRoot) : undefined;
+	const source = task.resultArtifact ? readIfSmall(task.resultArtifact.path, manifest.artifactsRoot) : undefined;
 	if (!source) return undefined;
 	return writeArtifact(manifest.artifactsRoot, {
 		kind: "metadata",
@@ -160,7 +176,7 @@ export function writeTaskInputsArtifact(manifest: TeamRunManifest, task: TeamTas
 export function aggregateTaskOutputs(tasks: TeamTaskState[], manifest?: TeamRunManifest): string {
 	return tasks.map((task, index) => {
-		const body = task.resultArtifact ? readIfSmall(task.resultArtifact.path, 40_000, manifest?.artifactsRoot) : undefined;
+		const body = task.resultArtifact ? readIfSmall(task.resultArtifact.path, manifest?.artifactsRoot) : undefined;
 		const hasBody = Boolean(body?.trim());
 		const expectedMissing = task.resultArtifact && !containedExists(task.resultArtifact.path, manifest?.artifactsRoot);
 		const status = task.status === "skipped"

package/src/runtime/team-runner.ts CHANGED Viewed

@@ -63,16 +63,21 @@ builtInRegistry.register(VitePlugin);
  * executing. The team-runner has no periodic heartbeat today, so any
  * team run lasting >5min is at risk.
  */
-function startTeamRunHeartbeat(stateRoot: string, runId: string, lastTaskUpdateAt?: string): () => void {
+function startTeamRunHeartbeat(stateRoot: string, runId: string): () => void {
 	const heartbeatPath = path.join(stateRoot, "heartbeat.json");
 	const writeHeartbeat = (): void => {
 		try {
+			// lastTaskUpdateAt is written fresh on each tick so the heartbeat
+			// never carries a stale creation-time timestamp. Previously this
+			// captured manifest.updatedAt once at startup, making the value
+			// permanently stale throughout the run.
+			const now = new Date().toISOString();
 			fs.writeFileSync(heartbeatPath, JSON.stringify({
 				pid: process.pid,
 				at: Date.now(),
 				runId,
 				kind: "team-runner",
-				lastTaskUpdateAt,
+				lastTaskUpdateAt: now,
 			}), { encoding: "utf-8", mode: 0o600 });
 		} catch {
 			// best-effort
@@ -439,7 +444,7 @@ export async function executeTeamRun(input: ExecuteTeamRunInput): Promise<{ mani
 	// (NO_PID_HEARTBEAT_STALE_MS). Previously only sub-task runners wrote
 	// heartbeats; the team-level run had no heartbeat, so any multi-phase
 	// workflow lasting >5min was marked stale and cancelled.
-	const stopTeamHeartbeat = startTeamRunHeartbeat(manifest.stateRoot, manifest.runId, manifest.updatedAt);
+	const stopTeamHeartbeat = startTeamRunHeartbeat(manifest.stateRoot, manifest.runId);
 	const cleanupUsage = (): void => {
 		for (const task of input.tasks) clearTrackedTaskUsage(task.id);