npm - pi-crew - Versions diffs - 0.8.13 → 0.9.0 - Mend

pi-crew 0.8.13 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/CHANGELOG.md +296 -0
package/README.md +118 -2
package/docs/FEATURE_INTAKE.md +1 -1
package/docs/HARNESS.md +20 -19
package/docs/PROJECT_REVIEW.md +132 -133
package/docs/PROJECT_REVIEW_FIXES.md +130 -131
package/docs/actions-reference.md +127 -121
package/docs/architecture.md +1 -1
package/docs/code-review-2026-05-11.md +134 -134
package/docs/commands-reference.md +108 -106
package/docs/comparison-pi-subagents-vs-pi-crew.md +105 -105
package/docs/deep-review-report.md +1 -1
package/docs/dynamic-workflows.md +90 -0
package/docs/fixes/BATCH_A_H1_H2.md +17 -17
package/docs/fixes/bug-007-async-notifier-stale-ctx.md +23 -23
package/docs/followup-plan-2026-05-12.md +135 -135
package/docs/followup-review-2026-05-12.md +86 -86
package/docs/followup-review-round3-2026-05-12.md +123 -123
package/docs/goals.md +59 -0
package/docs/implementation-plan-top3.md +4 -4
package/docs/issue-29-analysis.md +2 -2
package/docs/oh-my-pi-research.md +154 -154
package/docs/optimization-plan.md +2 -0
package/docs/perf/baseline-2026-05.md +9 -9
package/docs/perf/final-report-2026-05.md +2 -2
package/docs/perf/sprint-1-report.md +2 -2
package/docs/perf/sprint-2-report.md +1 -1
package/docs/perf/upgrade-plan-2026-05.md +72 -72
package/docs/pi-crew-bugs.md +230 -230
package/docs/pi-crew-investigation-report.md +102 -102
package/docs/pi-crew-test-round5.md +4 -4
package/docs/runtime-analysis-child-vs-live.md +57 -57
package/docs/runtime-migration-in-process-analysis.md +97 -97
package/install.mjs +3 -2
package/package.json +2 -4
package/skills/orchestration/SKILL.md +11 -11
package/src/agents/agent-config.ts +4 -0
package/src/config/config.ts +39 -0
package/src/config/types.ts +11 -0
package/src/extension/action-suggestions.ts +2 -1
package/src/extension/async-notifier.ts +10 -0
package/src/extension/help.ts +14 -0
package/src/extension/project-init.ts +7 -20
package/src/extension/registration/commands.ts +27 -0
package/src/extension/team-tool/destructive-gate.ts +1 -1
package/src/extension/team-tool/goal-wrap.ts +288 -0
package/src/extension/team-tool/goal.ts +405 -0
package/src/extension/team-tool/run.ts +103 -4
package/src/extension/team-tool/workflow-manage.ts +194 -0
package/src/extension/team-tool.ts +20 -0
package/src/hooks/types.ts +3 -1
package/src/runtime/async-runner.ts +24 -2
package/src/runtime/background-runner.ts +68 -19
package/src/runtime/child-pi.ts +6 -1
package/src/runtime/completion-guard.ts +1 -1
package/src/runtime/dynamic-workflow-context.ts +450 -0
package/src/runtime/dynamic-workflow-runner.ts +180 -0
package/src/runtime/global-worker-cap.ts +96 -0
package/src/runtime/goal-evaluator.ts +294 -0
package/src/runtime/goal-loop-runner.ts +612 -0
package/src/runtime/goal-state-store.ts +209 -0
package/src/runtime/pi-args.ts +10 -2
package/src/runtime/result-extractor.ts +32 -0
package/src/runtime/team-runner.ts +11 -1
package/src/runtime/verification-gates.ts +85 -5
package/src/runtime/verification-integrity.ts +110 -0
package/src/runtime/verification-worktree.ts +136 -0
package/src/runtime/workspace-lock.ts +448 -0
package/src/schema/config-schema.ts +26 -0
package/src/schema/team-tool-schema.ts +39 -4
package/src/state/atomic-write.ts +9 -0
package/src/state/contracts.ts +14 -0
package/src/state/crew-init.ts +18 -5
package/src/state/event-log.ts +7 -1
package/src/state/state-store.ts +2 -0
package/src/state/types.ts +82 -0
package/src/state/worker-atomic-writer.ts +176 -0
package/src/utils/redaction.ts +104 -24
package/src/workflows/discover-workflows.ts +25 -1
package/src/workflows/workflow-config.ts +13 -0
package/teams/parallel-research.team.md +1 -1
package/workflows/examples/hello.dwf.ts +24 -0

package/src/runtime/goal-state-store.ts ADDED Viewed

@@ -0,0 +1,209 @@
+/**
+ * goal-state-store.ts — Persistent outer state for the autonomous goal loop (P0/P1).
+ *
+ * Spec: research-findings/goal-workflow/00-SPEC.md §2.3
+ * Plan: research-findings/goal-workflow/07-PLAN.md v3 §0b G2 (one manifest per turn,
+ * goal loop owns OUTER state) + §0c C10 (hardening: assertSafePathId + UUID goalId).
+ *
+ * Stores GoalLoopState as atomic JSON at <crewRoot>/state/goals/<goalId>.json.
+ * Modeled on ScheduleStore (state/schedule.ts:86) but with atomicWriteJson +
+ * path-traversal defense (assertSafePathId on every public method).
+ *
+ * Per §0c C2: budget lives here (budgetUsed accumulates collectRunMetrics across turns);
+ * per-turn usage stays on each turn's TeamRunManifest/tasks.json.
+ */
+import { mkdirSync, existsSync, readFileSync, writeFileSync, readdirSync, unlinkSync, openSync, closeSync, statSync } from "node:fs";
+import { dirname } from "node:path";
+import { atomicWriteJson } from "../state/atomic-write.ts";
+import { appendEvent } from "../state/event-log.ts";
+import { assertSafePathId } from "../utils/safe-paths.ts";
+import { createRunId } from "../utils/ids.ts";
+import { projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
+import { logInternalError } from "../utils/internal-error.ts";
+import type { GoalLoopState, GoalLoopStatus } from "../state/types.ts";
+/** Default state-root resolver: project scope if a project crew-root exists, else user scope. */
+function resolveGoalsRoot(cwd: string): string {
+	const crewRoot = projectCrewRoot(cwd) ?? userCrewRoot();
+	return `${crewRoot}/state/goals`;
+}
+/** Goal file path for a goalId. Asserts the id is path-safe (§0c C10). */
+function goalFilePath(cwd: string, goalId: string): string {
+	assertSafePathId("goalId", goalId);
+	return `${resolveGoalsRoot(cwd)}/${goalId}.json`;
+}
+/**
+ * GoalStore — CRUD for GoalLoopState files.
+ *
+ * Concurrency: writes are atomic (temp+rename+fsync via atomicWriteJson). For
+ * read-modify-write sequences under contention, callers should coordinate via
+ * GoalLoopState.state transitions (cooperative, the goal loop is single-writer
+ * between turns). There is no file-lock here because the loop is the sole writer
+ * during its lifetime; `goal stop`/`pause`/`resume` from another session flip
+ * state fields that the loop checks between turns (cooperative, §0c C11).
+ */
+export class GoalStore {
+	private readonly cwd: string;
+	constructor(cwd: string) {
+		this.cwd = cwd;
+	}
+	/** Generate a fresh, path-safe goalId (never user-derived — §0c C10). */
+	createGoalId(): string {
+		return createRunId("goal");
+	}
+	/** Load a goal by id. Returns undefined if missing/corrupt. Throws on unsafe goalId (§0c C10). */
+	load(goalId: string): GoalLoopState | undefined {
+		// Path-safety check runs BEFORE the try/catch so traversal attempts throw (not silently return undefined).
+		const path = goalFilePath(this.cwd, goalId);
+		try {
+			if (!existsSync(path)) return undefined;
+			const raw = readFileSync(path, "utf-8");
+			const parsed = JSON.parse(raw);
+			if (!parsed || typeof parsed !== "object" || typeof parsed.goalId !== "string") return undefined;
+			return parsed as GoalLoopState;
+		} catch {
+			return undefined;
+		}
+	}
+	/** Atomically persist a goal state. Emits a goal.state_changed event if eventsPath given. */
+	save(state: GoalLoopState, eventsPath?: string): void {
+		assertSafePathId("goalId", state.goalId);
+		const path = goalFilePath(this.cwd, state.goalId);
+		const next = { ...state, updatedAt: new Date().toISOString() };
+		try {
+			mkdirSync(dirname(path), { recursive: true });
+			atomicWriteJson(path, next);
+			if (eventsPath) {
+				appendEvent(eventsPath, { type: "goal.state_changed", runId: state.goalId, data: { goalId: state.goalId, state: state.state } });
+			}
+		} catch (error) {
+			logInternalError("goal-state-store.save", error, `goalId=${state.goalId}`);
+			throw error;
+		}
+	}
+	/** Patch a goal's top-level fields (e.g. state, turnsUsed, budgetUsed, currentRunId). */
+	patch(goalId: string, patch: Partial<GoalLoopState>, eventsPath?: string): GoalLoopState | undefined {
+		const current = this.load(goalId);
+		if (!current) return undefined;
+		const next: GoalLoopState = { ...current, ...patch, goalId: current.goalId, createdAt: current.createdAt };
+		this.save(next, eventsPath);
+		return next;
+	}
+	/** Convenience: transition state with optional event emission. */
+	setStatus(goalId: string, state: GoalLoopStatus, eventsPath?: string): GoalLoopState | undefined {
+		return this.patch(goalId, { state }, eventsPath);
+	}
+	/**
+	 * Compare-And-Set status for atomic stuck↔resume transitions (P1b, RFC v0.5 §P1b).
+	 *
+	 * Loads current state; if `current.state === expected`, sets it to `next`,
+	 * persists, and emits a `goal.state_changed` event (reusing the save()
+	 * emission pattern). Otherwise returns undefined (CAS failed — no mutation,
+	 * no event). This prevents lost updates when the background loop and a
+	 * `goal resume`/idle-sweeper session race to flip `state`.
+	 *
+	 * Legal P1b transitions enforced by callers (not by this method):
+	 *   running → stuck,  stuck → running,  stuck → cancelled.
+	 */
+	/**
+	 * Cross-process-safe compare-and-set (cold-review #2 HIGH #2 fix).
+	 *
+	 * The synchronous read-check-write is only atomic within one event loop. `goal resume` runs
+	 * in a DIFFERENT process than the (exited) loop, so two concurrent resumes both see
+	 * `stuck`, both pass the CAS, both spawn → double background loops, double budget burn.
+	 *
+	 * Fix: wrap the read-modify-write in an O_EXCL lockfile per goalId. O_EXCL is atomic at the
+	 * OS level — only one process can create the lockfile. The operation is fast (ms), so stale
+	 * lockfiles are rare; a 5s age guard force-clears them (crash recovery).
+	 */
+	compareAndSetStatus(
+		goalId: string,
+		expected: GoalLoopStatus,
+		next: GoalLoopStatus,
+		eventsPath?: string,
+	): GoalLoopState | undefined {
+		const lockPath = `${goalFilePath(this.cwd, goalId)}.cas.lock`;
+		if (!this.acquireCasLock(lockPath)) {
+			return undefined; // Another process holds the CAS lock — caller treats as CAS-failed.
+		}
+		try {
+			const current = this.load(goalId);
+			if (!current) return undefined;
+			if (current.state !== expected) return undefined; // CAS failed — state moved underneath us.
+			const updated: GoalLoopState = { ...current, state: next };
+			this.save(updated, eventsPath);
+			return updated;
+		} finally {
+			try { unlinkSync(lockPath); } catch { /* best-effort; may already be gone */ }
+		}
+	}
+	/** Acquire an O_EXCL lockfile for CAS, with stale-lock recovery (5s age guard). */
+	private acquireCasLock(lockPath: string): boolean {
+		try {
+			const fd = openSync(lockPath, "wx"); // O_EXCL — throws EEXIST if already exists.
+			closeSync(fd);
+			return true;
+		} catch (error) {
+			const code = (error as NodeJS.ErrnoException).code;
+			if (code !== "EEXIST") return false;
+			// Stale recovery: if the lockfile is older than 5s, force-delete and retry once.
+			try {
+				const stat = statSync(lockPath);
+				if (Date.now() - stat.mtimeMs > 5000) {
+					unlinkSync(lockPath);
+				const fd = openSync(lockPath, "wx");
+				closeSync(fd);
+				return true;
+			}
+			} catch { /* fall through */ }
+			return false;
+		}
+	}
+	/** Remove a goal file (used by `goal clear`). Returns true if deleted. */
+	remove(goalId: string): boolean {
+		try {
+			const path = goalFilePath(this.cwd, goalId);
+			if (!existsSync(path)) return false;
+			unlinkSync(path);
+			return true;
+		} catch (error) {
+			logInternalError("goal-state-store.remove", error, `goalId=${goalId}`);
+			return false;
+		}
+	}
+	/** List all known goals (newest first by updatedAt). */
+	list(): GoalLoopState[] {
+		try {
+			const root = resolveGoalsRoot(this.cwd);
+			if (!existsSync(root)) return [];
+			const entries = readdirSync(root) as string[];
+			const goals: GoalLoopState[] = [];
+			for (const entry of entries) {
+				if (!entry.endsWith(".json")) continue;
+				const goalId = entry.slice(0, -".json".length);
+				// Skip entries that fail the safe-id check (defensive; createGoalId always produces safe ids).
+				if (!/^[A-Za-z0-9_-]+$/.test(goalId)) continue;
+				const g = this.load(goalId);
+				if (g) goals.push(g);
+			}
+			goals.sort((a, b) => (b.updatedAt ?? "").localeCompare(a.updatedAt ?? ""));
+			return goals;
+		} catch (error) {
+			logInternalError("goal-state-store.list", error, `cwd=${this.cwd}`);
+			return [];
+		}
+	}
+}

package/src/runtime/pi-args.ts CHANGED Viewed

@@ -269,8 +269,16 @@ export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerA
 	const explicitTools = policy.tools;
 	const excludeTools = policy.excludeTools;
-	if (explicitTools?.length) args.push("--tools", explicitTools.join(","));
-	if (excludeTools?.length) args.push("--exclude-tools", excludeTools.join(","));
+	// §0c C6: agent.disableTools (Pi `--no-tools`) fully disables all tools. Used by
+	// capability-locked agents (e.g. the goal-judge) that must have NO agency.
+	// MUST come before any --tools/--exclude-tools so it wins (Pi applies last-wins).
+	// An empty `tools:[]` is INSUFFICIENT because the length-check below skips it.
+	if (input.agent.disableTools === true) {
+		args.push("--no-tools");
+	} else {
+		if (explicitTools?.length) args.push("--tools", explicitTools.join(","));
+		if (excludeTools?.length) args.push("--exclude-tools", excludeTools.join(","));
+	}
 	// Always add --no-extensions before --extension to prevent user extensions from being auto-loaded.
 	// User extensions in ~/.pi/agent/extensions/ may fail due to missing dependencies.
 	args.push("--no-extensions");

package/src/runtime/result-extractor.ts CHANGED Viewed

@@ -41,6 +41,14 @@ export function extractStructuredResult(raw: string, _schema?: Record<string, un
 		return { structured: true, data: markerResult, rawText: raw };
 	}
+	// Strategy 4: Scan for the first JSON object/array anywhere in text.
+	// Models often add prose preamble/epilogue ("Here's my review:", "Let me analyze...")
+	// around the JSON. This catches JSON embedded in sentences, lists, or prose.
+	const scannedResult = tryScanJson(trimmed);
+	if (scannedResult !== undefined) {
+		return { structured: true, data: scannedResult, rawText: raw };
+	}
 	return { structured: false, data: null, rawText: raw };
 }
@@ -63,6 +71,30 @@ function tryFencedJson(text: string): unknown | undefined {
 	}
 }
+/**
+ * Strategy 4: Scan for the first balanced JSON object/array anywhere in text.
+ * Robust against prose preamble/epilogue that models add around JSON output.
+ * Returns the first valid JSON value found, or undefined.
+ */
+function tryScanJson(text: string): unknown | undefined {
+	// Find the first '{' or '[' in the text.
+	for (let i = 0; i < text.length; i++) {
+		const ch = text[i];
+		if (ch !== "{" && ch !== "[") continue;
+		const rest = text.slice(i);
+		const end = findMatchingBracket(rest);
+		if (end <= 0) continue;
+		const candidate = rest.slice(0, end);
+		try {
+			return JSON.parse(candidate);
+		} catch {
+			// Not valid JSON at this position; keep scanning for the next '{'/'['.
+			continue;
+		}
+	}
+	return undefined;
+}
 function tryMarkerExtraction(text: string): unknown | undefined {
 	// Try to find JSON after common markers
 	const markers = ["RESULT:", "OUTPUT:", "ANSWER:", "### Result\n", "## Output\n"];

package/src/runtime/team-runner.ts CHANGED Viewed

@@ -408,9 +408,19 @@ export function hasPendingMutatingTaskAtBoundary(tasks: TeamTaskState[]): boolea
 function dagReadyTaskIds(tasks: TeamTaskState[], completedIds: Set<string>): string[] | null {
 	const hasExplicitDeps = tasks.some((t) => t.dependsOn.length > 0);
 	if (!hasExplicitDeps) return null;
+	// FIX (goal-wrap runtime test): task.dependsOn stores STEP IDs (e.g. "execute"), not
+	// task IDs (e.g. "02_execute"). The DAG scheduler compares deps against completedIds
+	// (which are task IDs), so step-ID deps would never match → dependent tasks stuck blocked
+	// forever. Map step IDs -> task IDs first (mirror dependencySatisfied in
+	// task-graph-scheduler.ts which handles this via stepToTaskId). buildDagExecutionPlan +
+	// getDagReadyTasks then work on consistent task IDs.
+	const stepToTaskId = new Map<string, string>();
+	for (const t of tasks) {
+		if (t.stepId) stepToTaskId.set(t.stepId, t.id);
+	}
 	const nodes: TaskNode[] = tasks.map((t) => ({
 		id: t.id,
-		dependsOn: t.dependsOn,
+		dependsOn: t.dependsOn.map((dep) => stepToTaskId.get(dep) ?? dep),
 		phase: t.adaptive?.phase ?? t.stepId,
 	}));
 	const plan = buildDagExecutionPlan(nodes);

package/src/runtime/verification-gates.ts CHANGED Viewed

@@ -13,8 +13,73 @@ import { spawn } from "node:child_process";
 import * as fs from "node:fs";
 import * as path from "node:path";
 import { writeArtifact } from "../state/artifact-store.ts";
+import { redactSecretString } from "../utils/redaction.ts";
+import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
 import type { VerificationContract, VerificationCommandResult, GreenLevel, ArtifactDescriptor } from "../state/types.ts";
+/**
+ * Phase 1.5 #1 (RFC 13 §6 info-disclosure mitigation): sanitize the env passed
+ * to verification commands so worker-induced output cannot leak model-provider
+ * secrets. P1f redaction at artifact-write + judge-bound is regex-best-effort
+ * against adversarial workers; this kills the leak at the source by never
+ * giving the verification process the secret in the first place.
+ *
+ * Opt-in via `PI_CREW_VERIFICATION_SANITIZE_ENV=1` to avoid breaking existing
+ * flows whose tests legitimately need API access. Escape hatch:
+ * `PI_CREW_VERIFICATION_PRESERVE_ENV=KEY1,KEY2,...` lets users explicitly opt
+ * specific secrets back in (audited via the allowlist validator).
+ */
+const VERIFICATION_ENV_ALLOWLIST: readonly string[] = [
+	// Essential non-secret vars only — NO model-provider keys by default.
+	"PATH",
+	"HOME",
+	"USER",
+	"SHELL",
+	"TERM",
+	"LANG",
+	"LC_ALL",
+	"LC_COLLATE",
+	"LC_CTYPE",
+	"LC_MESSAGES",
+	"LC_MONETARY",
+	"LC_NUMERIC",
+	"LC_TIME",
+	"XDG_CONFIG_HOME",
+	"XDG_DATA_HOME",
+	"XDG_CACHE_HOME",
+	"XDG_RUNTIME_DIR",
+	"NVM_BIN",
+	"NVM_DIR",
+	"NVM_INC",
+	"NODE_PATH",
+	"NODE_DISABLE_COLORS",
+	"NODE_EXTRA_CA_CERTS",
+	"NPM_CONFIG_REGISTRY",
+	"NPM_CONFIG_USERCONFIG",
+	"NPM_CONFIG_GLOBALCONFIG",
+];
+/** Whether env sanitization for verification is enabled (env var opt-in). */
+export function isVerificationEnvSanitizeEnabled(): boolean {
+	return process.env.PI_CREW_VERIFICATION_SANITIZE_ENV === "1" || process.env.PI_TEAMS_VERIFICATION_SANITIZE_ENV === "1";
+}
+/**
+ * Build the env dict for a verification command. When sanitization is enabled,
+ * strips everything except VERIFICATION_ENV_ALLOWLIST + any explicitly-preserved
+ * keys (PI_CREW_VERIFICATION_PRESERVE_ENV=KEY1,KEY2). Always adds FORCE_COLOR=0
+ * to keep output plain-text (matches pre-existing behavior).
+ */
+function buildVerificationEnv(): Record<string, string> {
+	if (!isVerificationEnvSanitizeEnabled()) {
+		return { ...process.env, FORCE_COLOR: "0" };
+	}
+	const preserveRaw = process.env.PI_CREW_VERIFICATION_PRESERVE_ENV ?? process.env.PI_TEAMS_VERIFICATION_PRESERVE_ENV ?? "";
+	const preserve = preserveRaw.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
+	const allowList = [...VERIFICATION_ENV_ALLOWLIST, ...preserve];
+	return { ...sanitizeEnvSecrets(process.env, { allowList }), FORCE_COLOR: "0" };
+}
 export interface PhaseGateResult {
 	phase: number;
 	name: string;
@@ -116,7 +181,7 @@ async function executeCommand(
 		const shell = spawn("sh", ["-c", command], {
 			cwd,
 			timeout: timeoutMs,
-			env: { ...process.env, FORCE_COLOR: "0" },
+			env: buildVerificationEnv(),
 		});
 		shell.stdout?.on("data", (data) => {
@@ -249,6 +314,11 @@ export async function executeVerificationCommands(
 	taskId: string,
 	artifactsRoot: string,
 	signal?: AbortSignal,
+	/** Phase 1.5 #2 (RFC 16): when provided, run verification commands in this
+	 *  pristine git-worktree path instead of `cwd`. The caller is responsible
+	 *  for preparing + cleaning up the worktree (see verification-worktree.ts).
+	 *  When undefined, behavior is unchanged (run in `cwd`). */
+	worktreeCwd?: string,
 ): Promise<VerificationCommandResult[]> {
 	if (!contract.commands || contract.commands.length === 0) {
 		return [];
@@ -269,8 +339,17 @@ export async function executeVerificationCommands(
 		fs.mkdirSync(gatesDir, { recursive: true });
 	}
+	// Phase 1.5 #2: run phase gates inside the worktree when provided.
+	const execCwd = worktreeCwd ?? cwd;
 	// Run phase gates
-	const bundle = await runPhaseGates(gates, cwd, signal, (phaseResult) => {
+	const bundle = await runPhaseGates(gates, execCwd, signal, (phaseResult) => {
+		// P1f: redact secrets from verification output BEFORE persisting to the
+		// world-readable artifact file. redactSecretString is best-effort vs
+		// adversarial workers (RFC §6 — Med-High residual). writeArtifact ALSO
+		// redacts (defense-in-depth); this explicit pass sanitizes the raw output
+		// at the source so the in-memory bundle and the summary below are clean.
+		const safeOutput = redactSecretString(phaseResult.output || "");
 		// Write phase artifact immediately for observability
 		const phaseArtifact = writeArtifact(artifactsRoot, {
 			kind: "log",
@@ -284,7 +363,7 @@ export async function executeVerificationCommands(
 				phaseResult.error ? `Error: ${phaseResult.error}` : "",
 				"",
 				"## Output",
-				phaseResult.output || "(no output)",
+				safeOutput || "(no output)",
 			].join("\n"),
 			producer: taskId,
 		});
@@ -297,11 +376,12 @@ export async function executeVerificationCommands(
 		});
 	});
-	// Write summary artifact
+	// Write summary artifact. Redact the whole bundle JSON (it embeds the raw
+	// per-phase output strings) BEFORE writeArtifact persists it.
 	const summaryArtifact = writeArtifact(artifactsRoot, {
 		kind: "metadata",
 		relativePath: `verification-gates/${taskId}-summary.json`,
-		content: JSON.stringify(bundle, null, 2),
+		content: redactSecretString(JSON.stringify(bundle, null, 2)),
 		producer: taskId,
 	});

package/src/runtime/verification-integrity.ts ADDED Viewed

@@ -0,0 +1,110 @@
+/**
+ * Verification Integrity — manifest bookend snapshot helper (RFC §P1a).
+ *
+ * Hashes a FIXED set of project-manifest files so the goal loop can detect
+ * drift between T_snap (before verification runs) and T_verify_done (after the
+ * command exits). This closes the PERSISTENT-edit subcase of workspace
+ * tampering (a worker that rewrites package.json / lockfile and leaves it
+ * changed). See RFC §P1a / §6 STRIDE for the full threat model.
+ *
+ * RESIDUALS (documented; closed by Phase 1.5 git-worktree sandbox, NOT here):
+ *  - Round-trip tamper: a worker can edit a manifest, run the test, then REVERT
+ *    before T_verify_done so the hash matches T_snap. Content-addressed
+ *    execution (git-worktree) is required to close this. Not fixable by hashing.
+ *  - Invoked-script tampering: only the manifest files in MANIFEST_FILES are
+ *    hashed. A worker that overwrites a script the verification command invokes
+ *    is NOT caught. Phase 1.5 git-worktree closes this.
+ *  - node_modules/ and transitive deps are deliberately NOT hashed (size +
+ *    churn); package-lock.json IS hashed, which transitively pins resolved
+ *    dependency versions.
+ *
+ * Pure leaf module: depends only on node: built-ins. Does NOT import
+ * goal-loop-runner or goal-evaluator (keeps the P1a helper unit-testable in
+ * isolation and avoids pulling the conflict-zone modules into this file).
+ *
+ * @module verification-integrity
+ */
+import { createHash } from "node:crypto";
+import * as fs from "node:fs";
+import * as path from "node:path";
+/**
+ * Fixed set of project-manifest files considered by {@link snapshotManifests}.
+ * Only files from this set that EXIST in the target directory are hashed.
+ * (RFC §P1a: package.json, package-lock.json, pyproject.toml, setup.py,
+ * Cargo.toml, Cargo.lock, go.mod, tsconfig.json.)
+ */
+export const MANIFEST_FILES = [
+	"package.json",
+	"package-lock.json",
+	"pyproject.toml",
+	"setup.py",
+	"Cargo.toml",
+	"Cargo.lock",
+	"go.mod",
+	"tsconfig.json",
+] as const;
+/**
+ * sha256-hash the manifest files from the fixed set that EXIST in `cwd`.
+ *
+ * - Missing files are SKIPPED silently (not errors): a Python project has no
+ *   package.json, a JS project has no Cargo.toml, etc.
+ * - Non-regular files (directories, etc.) are skipped.
+ * - node_modules is NEVER hashed.
+ *
+ * @returns A map of relative manifest path -> sha256 hex digest for each
+ * present file. Stable key order = MANIFEST_FILES order (insertion order).
+ */
+export function snapshotManifests(cwd: string): Record<string, string> {
+	const snapshot: Record<string, string> = {};
+	for (const rel of MANIFEST_FILES) {
+		const abs = path.join(cwd, rel);
+		let stat: fs.Stats;
+		try {
+			stat = fs.statSync(abs);
+		} catch {
+			continue; // missing file — skip gracefully
+		}
+		if (!stat.isFile()) continue; // directory / special — skip
+		try {
+			const content = fs.readFileSync(abs);
+			snapshot[rel] = createHash("sha256").update(content).digest("hex");
+		} catch {
+			continue; // unreadable (permissions/race) — skip gracefully
+		}
+	}
+	return snapshot;
+}
+/**
+ * Compare two snapshots and return the list of DRIFTED file paths.
+ *
+ * A file is considered drifted if:
+ *  - its hash differs between `a` and `b`, OR
+ *  - it is present in only one of the two snapshots (added or removed).
+ *
+ * @returns Sorted array of relative manifest paths that drifted. Identical
+ * snapshots yield `[]`.
+ */
+export function compareSnapshot(
+	a: Record<string, string>,
+	b: Record<string, string>,
+): string[] {
+	const drifted = new Set<string>();
+	for (const [key, hash] of Object.entries(a)) {
+		const other = b[key];
+		if (other === undefined) {
+			drifted.add(key); // removed between a -> b
+		} else if (other !== hash) {
+			drifted.add(key); // content changed
+		}
+	}
+	for (const key of Object.keys(b)) {
+		if (a[key] === undefined) {
+			drifted.add(key); // added between a -> b
+		}
+	}
+	return [...drifted].sort();
+}

package/src/runtime/verification-worktree.ts ADDED Viewed

@@ -0,0 +1,136 @@
+/**
+ * Phase 1.5 #2 — Git-worktree verification sandbox.
+ *
+ * RFC: research-findings/goal-workflow/16-PHASE1.5-WORKTREE-SANDBOX-RFC.md
+ *
+ * Closes the two remaining Med-High tampering residuals from RFC 13 §6:
+ *   (1) Round-trip manifest tamper (MAJ#2): worker edits package.json, runs
+ *       test, reverts before T_verify_done → snapshot matches → undetected.
+ *   (2) Invoked-script tampering: worker rewrites a script the verification
+ *       command invokes; only MANIFEST_FILES are hashed → invisible.
+ *
+ * Mitigation: run verification commands in a pristine `git worktree` at the
+ * T_snap commit SHA. Worker edits in the main workspace are invisible to
+ * the verification subprocess. Content-addressed execution: the test passes
+ * (or fails) against the original code, regardless of worker edits.
+ *
+ * Opt-in via PI_CREW_VERIFICATION_WORKTREE=1 (mirrors Phase 1.5 #1 pattern).
+ * Auto-fallback to existing behavior when: not a git repo, dirty index,
+ * git unavailable, or opt-out explicitly set. NEVER blocks the goal loop.
+ *
+ * Pure leaf module: depends only on node: built-ins + git CLI. No imports
+ * from goal-loop-runner or verification-gates (keeps unit-testable).
+ */
+import { execFileSync } from "node:child_process";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+export interface VerificationWorktree {
+	/** Absolute path to the pristine worktree directory. */
+	worktreePath: string;
+	/** Commit SHA the worktree is checked out at (matches T_snap). */
+	commitSha: string;
+	/** Cleanup handle — call to remove the worktree + temp dir. Idempotent. */
+	cleanup: () => void;
+}
+/** Whether the worktree sandbox is enabled (env var opt-in). */
+export function isWorktreeSandboxEnabled(): boolean {
+	const v = process.env.PI_CREW_VERIFICATION_WORKTREE ?? process.env.PI_TEAMS_VERIFICATION_WORKTREE;
+	return v === "1" || v === "true";
+}
+/**
+ * Detect whether the worktree sandbox is AVAILABLE at `cwd`:
+ *  - opt-in env var set
+ *  - git executable on PATH
+ *  - cwd is inside a git repo
+ *  - git index is clean (no uncommitted changes that would be lost)
+ *
+ * Returns false (with reason) when any precondition fails. Callers MUST
+ * gracefully fall back to non-sandboxed execution — never block the goal.
+ */
+export function checkWorktreeSandboxAvailable(cwd: string): { available: true; commitSha: string } | { available: false; reason: string } {
+	if (!isWorktreeSandboxEnabled()) {
+		return { available: false, reason: "PI_CREW_VERIFICATION_WORKTREE not set (opt-in)" };
+	}
+	try {
+		// Is cwd inside a git repo? `git rev-parse --show-toplevel` errors out
+		// (non-zero exit) when not in a repo. execFileSync throws on non-zero.
+		const toplevel = execFileSync("git", ["rev-parse", "--show-toplevel"], { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+		if (!toplevel) return { available: false, reason: "git rev-parse returned empty toplevel" };
+		// Current commit SHA (this is what T_snap will pin to).
+		const commitSha = execFileSync("git", ["rev-parse", "HEAD"], { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+		if (!commitSha) return { available: false, reason: "git rev-parse HEAD returned empty SHA" };
+		// Dirty index? `git status --porcelain` outputs non-empty if there are
+		// uncommitted changes. We refuse to sandbox a dirty workspace because
+		// the worktree would NOT contain the in-progress edits (T_snap would
+		// pin to a stale commit). Better to fall back + warn than silently
+		// verify against the wrong code.
+		const status = execFileSync("git", ["status", "--porcelain"], { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
+		if (status.length > 0) return { available: false, reason: `dirty git index (${status.split("\n").length} changed files); refusing to sandbox — worktree would pin to stale commit` };
+		return { available: true, commitSha };
+	} catch (error) {
+		const msg = error instanceof Error ? error.message : String(error);
+		return { available: false, reason: `git precondition check failed: ${msg.slice(0, 200)}` };
+	}
+}
+/**
+ * Prepare a pristine git worktree at `commitSha`. The worktree is a fresh
+ * checkout of the project at that commit — it does NOT contain worker edits
+ * from the main workspace.
+ *
+ * `git worktree add --detach <tmp>/wt-<sha8> <sha>` creates a detached-HEAD
+ * worktree (no branch pollution). Returns the worktree path + cleanup handle.
+ *
+ * Cleanup is idempotent (safe to call multiple times) and best-effort (swallows
+ * errors so a stuck worktree doesn't propagate into the goal loop).
+ */
+export function prepareVerificationWorktree(cwd: string, commitSha: string): VerificationWorktree {
+	// Temp parent dir under os.tmpdir() so worktrees are auto-cleaned on reboot.
+	const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "pi-crew-wt-"));
+	const shortSha = commitSha.slice(0, 8);
+	const worktreePath = path.join(tmpRoot, `wt-${shortSha}`);
+	let cleaned = false;
+	const cleanup = (): void => {
+		if (cleaned) return;
+		cleaned = true;
+		// Remove the worktree (force = proceed even if it has untracked files).
+		try {
+			execFileSync("git", ["worktree", "remove", "--force", worktreePath], { cwd, stdio: ["ignore", "pipe", "pipe"], timeout: 5000 });
+		} catch {
+			// Fall back to `git worktree prune` if remove fails (already gone).
+			try { execFileSync("git", ["worktree", "prune"], { cwd, stdio: ["ignore", "pipe", "pipe"], timeout: 5000 }); } catch { /* best-effort */ }
+		}
+		// Remove the temp parent dir.
+		try { fs.rmSync(tmpRoot, { recursive: true, force: true }); } catch { /* best-effort */ }
+	};
+	try {
+		execFileSync("git", ["worktree", "add", "--detach", worktreePath, commitSha], { cwd, stdio: ["ignore", "pipe", "pipe"], timeout: 30_000 });
+		return { worktreePath, commitSha, cleanup };
+	} catch (error) {
+		cleanup();
+		const msg = error instanceof Error ? error.message : String(error);
+		throw new Error(`git worktree add failed (cwd=${cwd}, sha=${shortSha}): ${msg.slice(0, 300)}`);
+	}
+}
+/**
+ * RAII wrapper: prepare worktree, run `fn(worktree)`, ALWAYS cleanup in finally.
+ *
+ * `fn` may throw — the worktree is removed regardless. The original error
+ * propagates (cleanup errors are swallowed and best-effort).
+ *
+ * If preparation fails, the function rethrows WITHOUT calling fn — caller
+ * must handle the prep failure (typically by falling back to non-sandboxed).
+ */
+export async function withVerificationWorktree<T>(cwd: string, commitSha: string, fn: (worktree: VerificationWorktree) => Promise<T> | T): Promise<T> {
+	const worktree = prepareVerificationWorktree(cwd, commitSha);
+	try {
+		return await fn(worktree);
+	} finally {
+		worktree.cleanup();
+	}
+}