pi-crew 0.8.13 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/CHANGELOG.md +296 -0
  2. package/README.md +118 -2
  3. package/docs/FEATURE_INTAKE.md +1 -1
  4. package/docs/HARNESS.md +20 -19
  5. package/docs/PROJECT_REVIEW.md +132 -133
  6. package/docs/PROJECT_REVIEW_FIXES.md +130 -131
  7. package/docs/actions-reference.md +127 -121
  8. package/docs/architecture.md +1 -1
  9. package/docs/code-review-2026-05-11.md +134 -134
  10. package/docs/commands-reference.md +108 -106
  11. package/docs/comparison-pi-subagents-vs-pi-crew.md +105 -105
  12. package/docs/deep-review-report.md +1 -1
  13. package/docs/dynamic-workflows.md +90 -0
  14. package/docs/fixes/BATCH_A_H1_H2.md +17 -17
  15. package/docs/fixes/bug-007-async-notifier-stale-ctx.md +23 -23
  16. package/docs/followup-plan-2026-05-12.md +135 -135
  17. package/docs/followup-review-2026-05-12.md +86 -86
  18. package/docs/followup-review-round3-2026-05-12.md +123 -123
  19. package/docs/goals.md +59 -0
  20. package/docs/implementation-plan-top3.md +4 -4
  21. package/docs/issue-29-analysis.md +2 -2
  22. package/docs/oh-my-pi-research.md +154 -154
  23. package/docs/optimization-plan.md +2 -0
  24. package/docs/perf/baseline-2026-05.md +9 -9
  25. package/docs/perf/final-report-2026-05.md +2 -2
  26. package/docs/perf/sprint-1-report.md +2 -2
  27. package/docs/perf/sprint-2-report.md +1 -1
  28. package/docs/perf/upgrade-plan-2026-05.md +72 -72
  29. package/docs/pi-crew-bugs.md +230 -230
  30. package/docs/pi-crew-investigation-report.md +102 -102
  31. package/docs/pi-crew-test-round5.md +4 -4
  32. package/docs/runtime-analysis-child-vs-live.md +57 -57
  33. package/docs/runtime-migration-in-process-analysis.md +97 -97
  34. package/install.mjs +3 -2
  35. package/package.json +2 -4
  36. package/skills/orchestration/SKILL.md +11 -11
  37. package/src/agents/agent-config.ts +4 -0
  38. package/src/config/config.ts +39 -0
  39. package/src/config/types.ts +11 -0
  40. package/src/extension/action-suggestions.ts +2 -1
  41. package/src/extension/async-notifier.ts +10 -0
  42. package/src/extension/help.ts +14 -0
  43. package/src/extension/project-init.ts +7 -20
  44. package/src/extension/registration/commands.ts +27 -0
  45. package/src/extension/team-tool/destructive-gate.ts +1 -1
  46. package/src/extension/team-tool/goal-wrap.ts +288 -0
  47. package/src/extension/team-tool/goal.ts +405 -0
  48. package/src/extension/team-tool/run.ts +103 -4
  49. package/src/extension/team-tool/workflow-manage.ts +194 -0
  50. package/src/extension/team-tool.ts +20 -0
  51. package/src/hooks/types.ts +3 -1
  52. package/src/runtime/async-runner.ts +24 -2
  53. package/src/runtime/background-runner.ts +68 -19
  54. package/src/runtime/child-pi.ts +6 -1
  55. package/src/runtime/completion-guard.ts +1 -1
  56. package/src/runtime/dynamic-workflow-context.ts +450 -0
  57. package/src/runtime/dynamic-workflow-runner.ts +180 -0
  58. package/src/runtime/global-worker-cap.ts +96 -0
  59. package/src/runtime/goal-evaluator.ts +294 -0
  60. package/src/runtime/goal-loop-runner.ts +612 -0
  61. package/src/runtime/goal-state-store.ts +209 -0
  62. package/src/runtime/pi-args.ts +10 -2
  63. package/src/runtime/result-extractor.ts +32 -0
  64. package/src/runtime/team-runner.ts +11 -1
  65. package/src/runtime/verification-gates.ts +85 -5
  66. package/src/runtime/verification-integrity.ts +110 -0
  67. package/src/runtime/verification-worktree.ts +136 -0
  68. package/src/runtime/workspace-lock.ts +448 -0
  69. package/src/schema/config-schema.ts +26 -0
  70. package/src/schema/team-tool-schema.ts +39 -4
  71. package/src/state/atomic-write.ts +9 -0
  72. package/src/state/contracts.ts +14 -0
  73. package/src/state/crew-init.ts +18 -5
  74. package/src/state/event-log.ts +7 -1
  75. package/src/state/state-store.ts +2 -0
  76. package/src/state/types.ts +82 -0
  77. package/src/state/worker-atomic-writer.ts +176 -0
  78. package/src/utils/redaction.ts +104 -24
  79. package/src/workflows/discover-workflows.ts +25 -1
  80. package/src/workflows/workflow-config.ts +13 -0
  81. package/teams/parallel-research.team.md +1 -1
  82. package/workflows/examples/hello.dwf.ts +24 -0
@@ -0,0 +1,209 @@
1
+ /**
2
+ * goal-state-store.ts — Persistent outer state for the autonomous goal loop (P0/P1).
3
+ *
4
+ * Spec: research-findings/goal-workflow/00-SPEC.md §2.3
5
+ * Plan: research-findings/goal-workflow/07-PLAN.md v3 §0b G2 (one manifest per turn,
6
+ * goal loop owns OUTER state) + §0c C10 (hardening: assertSafePathId + UUID goalId).
7
+ *
8
+ * Stores GoalLoopState as atomic JSON at <crewRoot>/state/goals/<goalId>.json.
9
+ * Modeled on ScheduleStore (state/schedule.ts:86) but with atomicWriteJson +
10
+ * path-traversal defense (assertSafePathId on every public method).
11
+ *
12
+ * Per §0c C2: budget lives here (budgetUsed accumulates collectRunMetrics across turns);
13
+ * per-turn usage stays on each turn's TeamRunManifest/tasks.json.
14
+ */
15
+
16
+ import { mkdirSync, existsSync, readFileSync, writeFileSync, readdirSync, unlinkSync, openSync, closeSync, statSync } from "node:fs";
17
+ import { dirname } from "node:path";
18
+ import { atomicWriteJson } from "../state/atomic-write.ts";
19
+ import { appendEvent } from "../state/event-log.ts";
20
+ import { assertSafePathId } from "../utils/safe-paths.ts";
21
+ import { createRunId } from "../utils/ids.ts";
22
+ import { projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
23
+ import { logInternalError } from "../utils/internal-error.ts";
24
+ import type { GoalLoopState, GoalLoopStatus } from "../state/types.ts";
25
+
26
+ /** Default state-root resolver: project scope if a project crew-root exists, else user scope. */
27
+ function resolveGoalsRoot(cwd: string): string {
28
+ const crewRoot = projectCrewRoot(cwd) ?? userCrewRoot();
29
+ return `${crewRoot}/state/goals`;
30
+ }
31
+
32
+ /** Goal file path for a goalId. Asserts the id is path-safe (§0c C10). */
33
+ function goalFilePath(cwd: string, goalId: string): string {
34
+ assertSafePathId("goalId", goalId);
35
+ return `${resolveGoalsRoot(cwd)}/${goalId}.json`;
36
+ }
37
+
38
+ /**
39
+ * GoalStore — CRUD for GoalLoopState files.
40
+ *
41
+ * Concurrency: writes are atomic (temp+rename+fsync via atomicWriteJson). For
42
+ * read-modify-write sequences under contention, callers should coordinate via
43
+ * GoalLoopState.state transitions (cooperative, the goal loop is single-writer
44
+ * between turns). There is no file-lock here because the loop is the sole writer
45
+ * during its lifetime; `goal stop`/`pause`/`resume` from another session flip
46
+ * state fields that the loop checks between turns (cooperative, §0c C11).
47
+ */
48
+ export class GoalStore {
49
+ private readonly cwd: string;
50
+
51
+ constructor(cwd: string) {
52
+ this.cwd = cwd;
53
+ }
54
+
55
+ /** Generate a fresh, path-safe goalId (never user-derived — §0c C10). */
56
+ createGoalId(): string {
57
+ return createRunId("goal");
58
+ }
59
+
60
+ /** Load a goal by id. Returns undefined if missing/corrupt. Throws on unsafe goalId (§0c C10). */
61
+ load(goalId: string): GoalLoopState | undefined {
62
+ // Path-safety check runs BEFORE the try/catch so traversal attempts throw (not silently return undefined).
63
+ const path = goalFilePath(this.cwd, goalId);
64
+ try {
65
+ if (!existsSync(path)) return undefined;
66
+ const raw = readFileSync(path, "utf-8");
67
+ const parsed = JSON.parse(raw);
68
+ if (!parsed || typeof parsed !== "object" || typeof parsed.goalId !== "string") return undefined;
69
+ return parsed as GoalLoopState;
70
+ } catch {
71
+ return undefined;
72
+ }
73
+ }
74
+
75
+ /** Atomically persist a goal state. Emits a goal.state_changed event if eventsPath given. */
76
+ save(state: GoalLoopState, eventsPath?: string): void {
77
+ assertSafePathId("goalId", state.goalId);
78
+ const path = goalFilePath(this.cwd, state.goalId);
79
+ const next = { ...state, updatedAt: new Date().toISOString() };
80
+ try {
81
+ mkdirSync(dirname(path), { recursive: true });
82
+ atomicWriteJson(path, next);
83
+ if (eventsPath) {
84
+ appendEvent(eventsPath, { type: "goal.state_changed", runId: state.goalId, data: { goalId: state.goalId, state: state.state } });
85
+ }
86
+ } catch (error) {
87
+ logInternalError("goal-state-store.save", error, `goalId=${state.goalId}`);
88
+ throw error;
89
+ }
90
+ }
91
+
92
+ /** Patch a goal's top-level fields (e.g. state, turnsUsed, budgetUsed, currentRunId). */
93
+ patch(goalId: string, patch: Partial<GoalLoopState>, eventsPath?: string): GoalLoopState | undefined {
94
+ const current = this.load(goalId);
95
+ if (!current) return undefined;
96
+ const next: GoalLoopState = { ...current, ...patch, goalId: current.goalId, createdAt: current.createdAt };
97
+ this.save(next, eventsPath);
98
+ return next;
99
+ }
100
+
101
+ /** Convenience: transition state with optional event emission. */
102
+ setStatus(goalId: string, state: GoalLoopStatus, eventsPath?: string): GoalLoopState | undefined {
103
+ return this.patch(goalId, { state }, eventsPath);
104
+ }
105
+
106
+ /**
107
+ * Compare-And-Set status for atomic stuck↔resume transitions (P1b, RFC v0.5 §P1b).
108
+ *
109
+ * Loads current state; if `current.state === expected`, sets it to `next`,
110
+ * persists, and emits a `goal.state_changed` event (reusing the save()
111
+ * emission pattern). Otherwise returns undefined (CAS failed — no mutation,
112
+ * no event). This prevents lost updates when the background loop and a
113
+ * `goal resume`/idle-sweeper session race to flip `state`.
114
+ *
115
+ * Legal P1b transitions enforced by callers (not by this method):
116
+ * running → stuck, stuck → running, stuck → cancelled.
117
+ */
118
+ /**
119
+ * Cross-process-safe compare-and-set (cold-review #2 HIGH #2 fix).
120
+ *
121
+ * The synchronous read-check-write is only atomic within one event loop. `goal resume` runs
122
+ * in a DIFFERENT process than the (exited) loop, so two concurrent resumes both see
123
+ * `stuck`, both pass the CAS, both spawn → double background loops, double budget burn.
124
+ *
125
+ * Fix: wrap the read-modify-write in an O_EXCL lockfile per goalId. O_EXCL is atomic at the
126
+ * OS level — only one process can create the lockfile. The operation is fast (ms), so stale
127
+ * lockfiles are rare; a 5s age guard force-clears them (crash recovery).
128
+ */
129
+ compareAndSetStatus(
130
+ goalId: string,
131
+ expected: GoalLoopStatus,
132
+ next: GoalLoopStatus,
133
+ eventsPath?: string,
134
+ ): GoalLoopState | undefined {
135
+ const lockPath = `${goalFilePath(this.cwd, goalId)}.cas.lock`;
136
+ if (!this.acquireCasLock(lockPath)) {
137
+ return undefined; // Another process holds the CAS lock — caller treats as CAS-failed.
138
+ }
139
+ try {
140
+ const current = this.load(goalId);
141
+ if (!current) return undefined;
142
+ if (current.state !== expected) return undefined; // CAS failed — state moved underneath us.
143
+ const updated: GoalLoopState = { ...current, state: next };
144
+ this.save(updated, eventsPath);
145
+ return updated;
146
+ } finally {
147
+ try { unlinkSync(lockPath); } catch { /* best-effort; may already be gone */ }
148
+ }
149
+ }
150
+
151
+ /** Acquire an O_EXCL lockfile for CAS, with stale-lock recovery (5s age guard). */
152
+ private acquireCasLock(lockPath: string): boolean {
153
+ try {
154
+ const fd = openSync(lockPath, "wx"); // O_EXCL — throws EEXIST if already exists.
155
+ closeSync(fd);
156
+ return true;
157
+ } catch (error) {
158
+ const code = (error as NodeJS.ErrnoException).code;
159
+ if (code !== "EEXIST") return false;
160
+ // Stale recovery: if the lockfile is older than 5s, force-delete and retry once.
161
+ try {
162
+ const stat = statSync(lockPath);
163
+ if (Date.now() - stat.mtimeMs > 5000) {
164
+ unlinkSync(lockPath);
165
+ const fd = openSync(lockPath, "wx");
166
+ closeSync(fd);
167
+ return true;
168
+ }
169
+ } catch { /* fall through */ }
170
+ return false;
171
+ }
172
+ }
173
+
174
+ /** Remove a goal file (used by `goal clear`). Returns true if deleted. */
175
+ remove(goalId: string): boolean {
176
+ try {
177
+ const path = goalFilePath(this.cwd, goalId);
178
+ if (!existsSync(path)) return false;
179
+ unlinkSync(path);
180
+ return true;
181
+ } catch (error) {
182
+ logInternalError("goal-state-store.remove", error, `goalId=${goalId}`);
183
+ return false;
184
+ }
185
+ }
186
+
187
+ /** List all known goals (newest first by updatedAt). */
188
+ list(): GoalLoopState[] {
189
+ try {
190
+ const root = resolveGoalsRoot(this.cwd);
191
+ if (!existsSync(root)) return [];
192
+ const entries = readdirSync(root) as string[];
193
+ const goals: GoalLoopState[] = [];
194
+ for (const entry of entries) {
195
+ if (!entry.endsWith(".json")) continue;
196
+ const goalId = entry.slice(0, -".json".length);
197
+ // Skip entries that fail the safe-id check (defensive; createGoalId always produces safe ids).
198
+ if (!/^[A-Za-z0-9_-]+$/.test(goalId)) continue;
199
+ const g = this.load(goalId);
200
+ if (g) goals.push(g);
201
+ }
202
+ goals.sort((a, b) => (b.updatedAt ?? "").localeCompare(a.updatedAt ?? ""));
203
+ return goals;
204
+ } catch (error) {
205
+ logInternalError("goal-state-store.list", error, `cwd=${this.cwd}`);
206
+ return [];
207
+ }
208
+ }
209
+ }
@@ -269,8 +269,16 @@ export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerA
269
269
  const explicitTools = policy.tools;
270
270
  const excludeTools = policy.excludeTools;
271
271
 
272
- if (explicitTools?.length) args.push("--tools", explicitTools.join(","));
273
- if (excludeTools?.length) args.push("--exclude-tools", excludeTools.join(","));
272
+ // §0c C6: agent.disableTools (Pi `--no-tools`) fully disables all tools. Used by
273
+ // capability-locked agents (e.g. the goal-judge) that must have NO agency.
274
+ // MUST come before any --tools/--exclude-tools so it wins (Pi applies last-wins).
275
+ // An empty `tools:[]` is INSUFFICIENT because the length-check below skips it.
276
+ if (input.agent.disableTools === true) {
277
+ args.push("--no-tools");
278
+ } else {
279
+ if (explicitTools?.length) args.push("--tools", explicitTools.join(","));
280
+ if (excludeTools?.length) args.push("--exclude-tools", excludeTools.join(","));
281
+ }
274
282
  // Always add --no-extensions before --extension to prevent user extensions from being auto-loaded.
275
283
  // User extensions in ~/.pi/agent/extensions/ may fail due to missing dependencies.
276
284
  args.push("--no-extensions");
@@ -41,6 +41,14 @@ export function extractStructuredResult(raw: string, _schema?: Record<string, un
41
41
  return { structured: true, data: markerResult, rawText: raw };
42
42
  }
43
43
 
44
+ // Strategy 4: Scan for the first JSON object/array anywhere in text.
45
+ // Models often add prose preamble/epilogue ("Here's my review:", "Let me analyze...")
46
+ // around the JSON. This catches JSON embedded in sentences, lists, or prose.
47
+ const scannedResult = tryScanJson(trimmed);
48
+ if (scannedResult !== undefined) {
49
+ return { structured: true, data: scannedResult, rawText: raw };
50
+ }
51
+
44
52
  return { structured: false, data: null, rawText: raw };
45
53
  }
46
54
 
@@ -63,6 +71,30 @@ function tryFencedJson(text: string): unknown | undefined {
63
71
  }
64
72
  }
65
73
 
74
+ /**
75
+ * Strategy 4: Scan for the first balanced JSON object/array anywhere in text.
76
+ * Robust against prose preamble/epilogue that models add around JSON output.
77
+ * Returns the first valid JSON value found, or undefined.
78
+ */
79
+ function tryScanJson(text: string): unknown | undefined {
80
+ // Find the first '{' or '[' in the text.
81
+ for (let i = 0; i < text.length; i++) {
82
+ const ch = text[i];
83
+ if (ch !== "{" && ch !== "[") continue;
84
+ const rest = text.slice(i);
85
+ const end = findMatchingBracket(rest);
86
+ if (end <= 0) continue;
87
+ const candidate = rest.slice(0, end);
88
+ try {
89
+ return JSON.parse(candidate);
90
+ } catch {
91
+ // Not valid JSON at this position; keep scanning for the next '{'/'['.
92
+ continue;
93
+ }
94
+ }
95
+ return undefined;
96
+ }
97
+
66
98
  function tryMarkerExtraction(text: string): unknown | undefined {
67
99
  // Try to find JSON after common markers
68
100
  const markers = ["RESULT:", "OUTPUT:", "ANSWER:", "### Result\n", "## Output\n"];
@@ -408,9 +408,19 @@ export function hasPendingMutatingTaskAtBoundary(tasks: TeamTaskState[]): boolea
408
408
  function dagReadyTaskIds(tasks: TeamTaskState[], completedIds: Set<string>): string[] | null {
409
409
  const hasExplicitDeps = tasks.some((t) => t.dependsOn.length > 0);
410
410
  if (!hasExplicitDeps) return null;
411
+ // FIX (goal-wrap runtime test): task.dependsOn stores STEP IDs (e.g. "execute"), not
412
+ // task IDs (e.g. "02_execute"). The DAG scheduler compares deps against completedIds
413
+ // (which are task IDs), so step-ID deps would never match → dependent tasks stuck blocked
414
+ // forever. Map step IDs -> task IDs first (mirror dependencySatisfied in
415
+ // task-graph-scheduler.ts which handles this via stepToTaskId). buildDagExecutionPlan +
416
+ // getDagReadyTasks then work on consistent task IDs.
417
+ const stepToTaskId = new Map<string, string>();
418
+ for (const t of tasks) {
419
+ if (t.stepId) stepToTaskId.set(t.stepId, t.id);
420
+ }
411
421
  const nodes: TaskNode[] = tasks.map((t) => ({
412
422
  id: t.id,
413
- dependsOn: t.dependsOn,
423
+ dependsOn: t.dependsOn.map((dep) => stepToTaskId.get(dep) ?? dep),
414
424
  phase: t.adaptive?.phase ?? t.stepId,
415
425
  }));
416
426
  const plan = buildDagExecutionPlan(nodes);
@@ -13,8 +13,73 @@ import { spawn } from "node:child_process";
13
13
  import * as fs from "node:fs";
14
14
  import * as path from "node:path";
15
15
  import { writeArtifact } from "../state/artifact-store.ts";
16
+ import { redactSecretString } from "../utils/redaction.ts";
17
+ import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
16
18
  import type { VerificationContract, VerificationCommandResult, GreenLevel, ArtifactDescriptor } from "../state/types.ts";
17
19
 
20
+ /**
21
+ * Phase 1.5 #1 (RFC 13 §6 info-disclosure mitigation): sanitize the env passed
22
+ * to verification commands so worker-induced output cannot leak model-provider
23
+ * secrets. P1f redaction at artifact-write + judge-bound is regex-best-effort
24
+ * against adversarial workers; this kills the leak at the source by never
25
+ * giving the verification process the secret in the first place.
26
+ *
27
+ * Opt-in via `PI_CREW_VERIFICATION_SANITIZE_ENV=1` to avoid breaking existing
28
+ * flows whose tests legitimately need API access. Escape hatch:
29
+ * `PI_CREW_VERIFICATION_PRESERVE_ENV=KEY1,KEY2,...` lets users explicitly opt
30
+ * specific secrets back in (audited via the allowlist validator).
31
+ */
32
+ const VERIFICATION_ENV_ALLOWLIST: readonly string[] = [
33
+ // Essential non-secret vars only — NO model-provider keys by default.
34
+ "PATH",
35
+ "HOME",
36
+ "USER",
37
+ "SHELL",
38
+ "TERM",
39
+ "LANG",
40
+ "LC_ALL",
41
+ "LC_COLLATE",
42
+ "LC_CTYPE",
43
+ "LC_MESSAGES",
44
+ "LC_MONETARY",
45
+ "LC_NUMERIC",
46
+ "LC_TIME",
47
+ "XDG_CONFIG_HOME",
48
+ "XDG_DATA_HOME",
49
+ "XDG_CACHE_HOME",
50
+ "XDG_RUNTIME_DIR",
51
+ "NVM_BIN",
52
+ "NVM_DIR",
53
+ "NVM_INC",
54
+ "NODE_PATH",
55
+ "NODE_DISABLE_COLORS",
56
+ "NODE_EXTRA_CA_CERTS",
57
+ "NPM_CONFIG_REGISTRY",
58
+ "NPM_CONFIG_USERCONFIG",
59
+ "NPM_CONFIG_GLOBALCONFIG",
60
+ ];
61
+
62
+ /** Whether env sanitization for verification is enabled (env var opt-in). */
63
+ export function isVerificationEnvSanitizeEnabled(): boolean {
64
+ return process.env.PI_CREW_VERIFICATION_SANITIZE_ENV === "1" || process.env.PI_TEAMS_VERIFICATION_SANITIZE_ENV === "1";
65
+ }
66
+
67
+ /**
68
+ * Build the env dict for a verification command. When sanitization is enabled,
69
+ * strips everything except VERIFICATION_ENV_ALLOWLIST + any explicitly-preserved
70
+ * keys (PI_CREW_VERIFICATION_PRESERVE_ENV=KEY1,KEY2). Always adds FORCE_COLOR=0
71
+ * to keep output plain-text (matches pre-existing behavior).
72
+ */
73
+ function buildVerificationEnv(): Record<string, string> {
74
+ if (!isVerificationEnvSanitizeEnabled()) {
75
+ return { ...process.env, FORCE_COLOR: "0" };
76
+ }
77
+ const preserveRaw = process.env.PI_CREW_VERIFICATION_PRESERVE_ENV ?? process.env.PI_TEAMS_VERIFICATION_PRESERVE_ENV ?? "";
78
+ const preserve = preserveRaw.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
79
+ const allowList = [...VERIFICATION_ENV_ALLOWLIST, ...preserve];
80
+ return { ...sanitizeEnvSecrets(process.env, { allowList }), FORCE_COLOR: "0" };
81
+ }
82
+
18
83
  export interface PhaseGateResult {
19
84
  phase: number;
20
85
  name: string;
@@ -116,7 +181,7 @@ async function executeCommand(
116
181
  const shell = spawn("sh", ["-c", command], {
117
182
  cwd,
118
183
  timeout: timeoutMs,
119
- env: { ...process.env, FORCE_COLOR: "0" },
184
+ env: buildVerificationEnv(),
120
185
  });
121
186
 
122
187
  shell.stdout?.on("data", (data) => {
@@ -249,6 +314,11 @@ export async function executeVerificationCommands(
249
314
  taskId: string,
250
315
  artifactsRoot: string,
251
316
  signal?: AbortSignal,
317
+ /** Phase 1.5 #2 (RFC 16): when provided, run verification commands in this
318
+ * pristine git-worktree path instead of `cwd`. The caller is responsible
319
+ * for preparing + cleaning up the worktree (see verification-worktree.ts).
320
+ * When undefined, behavior is unchanged (run in `cwd`). */
321
+ worktreeCwd?: string,
252
322
  ): Promise<VerificationCommandResult[]> {
253
323
  if (!contract.commands || contract.commands.length === 0) {
254
324
  return [];
@@ -269,8 +339,17 @@ export async function executeVerificationCommands(
269
339
  fs.mkdirSync(gatesDir, { recursive: true });
270
340
  }
271
341
 
342
+ // Phase 1.5 #2: run phase gates inside the worktree when provided.
343
+ const execCwd = worktreeCwd ?? cwd;
344
+
272
345
  // Run phase gates
273
- const bundle = await runPhaseGates(gates, cwd, signal, (phaseResult) => {
346
+ const bundle = await runPhaseGates(gates, execCwd, signal, (phaseResult) => {
347
+ // P1f: redact secrets from verification output BEFORE persisting to the
348
+ // world-readable artifact file. redactSecretString is best-effort vs
349
+ // adversarial workers (RFC §6 — Med-High residual). writeArtifact ALSO
350
+ // redacts (defense-in-depth); this explicit pass sanitizes the raw output
351
+ // at the source so the in-memory bundle and the summary below are clean.
352
+ const safeOutput = redactSecretString(phaseResult.output || "");
274
353
  // Write phase artifact immediately for observability
275
354
  const phaseArtifact = writeArtifact(artifactsRoot, {
276
355
  kind: "log",
@@ -284,7 +363,7 @@ export async function executeVerificationCommands(
284
363
  phaseResult.error ? `Error: ${phaseResult.error}` : "",
285
364
  "",
286
365
  "## Output",
287
- phaseResult.output || "(no output)",
366
+ safeOutput || "(no output)",
288
367
  ].join("\n"),
289
368
  producer: taskId,
290
369
  });
@@ -297,11 +376,12 @@ export async function executeVerificationCommands(
297
376
  });
298
377
  });
299
378
 
300
- // Write summary artifact
379
+ // Write summary artifact. Redact the whole bundle JSON (it embeds the raw
380
+ // per-phase output strings) BEFORE writeArtifact persists it.
301
381
  const summaryArtifact = writeArtifact(artifactsRoot, {
302
382
  kind: "metadata",
303
383
  relativePath: `verification-gates/${taskId}-summary.json`,
304
- content: JSON.stringify(bundle, null, 2),
384
+ content: redactSecretString(JSON.stringify(bundle, null, 2)),
305
385
  producer: taskId,
306
386
  });
307
387
 
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Verification Integrity — manifest bookend snapshot helper (RFC §P1a).
3
+ *
4
+ * Hashes a FIXED set of project-manifest files so the goal loop can detect
5
+ * drift between T_snap (before verification runs) and T_verify_done (after the
6
+ * command exits). This closes the PERSISTENT-edit subcase of workspace
7
+ * tampering (a worker that rewrites package.json / lockfile and leaves it
8
+ * changed). See RFC §P1a / §6 STRIDE for the full threat model.
9
+ *
10
+ * RESIDUALS (documented; closed by Phase 1.5 git-worktree sandbox, NOT here):
11
+ * - Round-trip tamper: a worker can edit a manifest, run the test, then REVERT
12
+ * before T_verify_done so the hash matches T_snap. Content-addressed
13
+ * execution (git-worktree) is required to close this. Not fixable by hashing.
14
+ * - Invoked-script tampering: only the manifest files in MANIFEST_FILES are
15
+ * hashed. A worker that overwrites a script the verification command invokes
16
+ * is NOT caught. Phase 1.5 git-worktree closes this.
17
+ * - node_modules/ and transitive deps are deliberately NOT hashed (size +
18
+ * churn); package-lock.json IS hashed, which transitively pins resolved
19
+ * dependency versions.
20
+ *
21
+ * Pure leaf module: depends only on node: built-ins. Does NOT import
22
+ * goal-loop-runner or goal-evaluator (keeps the P1a helper unit-testable in
23
+ * isolation and avoids pulling the conflict-zone modules into this file).
24
+ *
25
+ * @module verification-integrity
26
+ */
27
+
28
+ import { createHash } from "node:crypto";
29
+ import * as fs from "node:fs";
30
+ import * as path from "node:path";
31
+
32
+ /**
33
+ * Fixed set of project-manifest files considered by {@link snapshotManifests}.
34
+ * Only files from this set that EXIST in the target directory are hashed.
35
+ * (RFC §P1a: package.json, package-lock.json, pyproject.toml, setup.py,
36
+ * Cargo.toml, Cargo.lock, go.mod, tsconfig.json.)
37
+ */
38
+ export const MANIFEST_FILES = [
39
+ "package.json",
40
+ "package-lock.json",
41
+ "pyproject.toml",
42
+ "setup.py",
43
+ "Cargo.toml",
44
+ "Cargo.lock",
45
+ "go.mod",
46
+ "tsconfig.json",
47
+ ] as const;
48
+
49
+ /**
50
+ * sha256-hash the manifest files from the fixed set that EXIST in `cwd`.
51
+ *
52
+ * - Missing files are SKIPPED silently (not errors): a Python project has no
53
+ * package.json, a JS project has no Cargo.toml, etc.
54
+ * - Non-regular files (directories, etc.) are skipped.
55
+ * - node_modules is NEVER hashed.
56
+ *
57
+ * @returns A map of relative manifest path -> sha256 hex digest for each
58
+ * present file. Stable key order = MANIFEST_FILES order (insertion order).
59
+ */
60
+ export function snapshotManifests(cwd: string): Record<string, string> {
61
+ const snapshot: Record<string, string> = {};
62
+ for (const rel of MANIFEST_FILES) {
63
+ const abs = path.join(cwd, rel);
64
+ let stat: fs.Stats;
65
+ try {
66
+ stat = fs.statSync(abs);
67
+ } catch {
68
+ continue; // missing file — skip gracefully
69
+ }
70
+ if (!stat.isFile()) continue; // directory / special — skip
71
+ try {
72
+ const content = fs.readFileSync(abs);
73
+ snapshot[rel] = createHash("sha256").update(content).digest("hex");
74
+ } catch {
75
+ continue; // unreadable (permissions/race) — skip gracefully
76
+ }
77
+ }
78
+ return snapshot;
79
+ }
80
+
81
+ /**
82
+ * Compare two snapshots and return the list of DRIFTED file paths.
83
+ *
84
+ * A file is considered drifted if:
85
+ * - its hash differs between `a` and `b`, OR
86
+ * - it is present in only one of the two snapshots (added or removed).
87
+ *
88
+ * @returns Sorted array of relative manifest paths that drifted. Identical
89
+ * snapshots yield `[]`.
90
+ */
91
+ export function compareSnapshot(
92
+ a: Record<string, string>,
93
+ b: Record<string, string>,
94
+ ): string[] {
95
+ const drifted = new Set<string>();
96
+ for (const [key, hash] of Object.entries(a)) {
97
+ const other = b[key];
98
+ if (other === undefined) {
99
+ drifted.add(key); // removed between a -> b
100
+ } else if (other !== hash) {
101
+ drifted.add(key); // content changed
102
+ }
103
+ }
104
+ for (const key of Object.keys(b)) {
105
+ if (a[key] === undefined) {
106
+ drifted.add(key); // added between a -> b
107
+ }
108
+ }
109
+ return [...drifted].sort();
110
+ }
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Phase 1.5 #2 — Git-worktree verification sandbox.
3
+ *
4
+ * RFC: research-findings/goal-workflow/16-PHASE1.5-WORKTREE-SANDBOX-RFC.md
5
+ *
6
+ * Closes the two remaining Med-High tampering residuals from RFC 13 §6:
7
+ * (1) Round-trip manifest tamper (MAJ#2): worker edits package.json, runs
8
+ * test, reverts before T_verify_done → snapshot matches → undetected.
9
+ * (2) Invoked-script tampering: worker rewrites a script the verification
10
+ * command invokes; only MANIFEST_FILES are hashed → invisible.
11
+ *
12
+ * Mitigation: run verification commands in a pristine `git worktree` at the
13
+ * T_snap commit SHA. Worker edits in the main workspace are invisible to
14
+ * the verification subprocess. Content-addressed execution: the test passes
15
+ * (or fails) against the original code, regardless of worker edits.
16
+ *
17
+ * Opt-in via PI_CREW_VERIFICATION_WORKTREE=1 (mirrors Phase 1.5 #1 pattern).
18
+ * Auto-fallback to existing behavior when: not a git repo, dirty index,
19
+ * git unavailable, or opt-out explicitly set. NEVER blocks the goal loop.
20
+ *
21
+ * Pure leaf module: depends only on node: built-ins + git CLI. No imports
22
+ * from goal-loop-runner or verification-gates (keeps unit-testable).
23
+ */
24
+ import { execFileSync } from "node:child_process";
25
+ import * as fs from "node:fs";
26
+ import * as os from "node:os";
27
+ import * as path from "node:path";
28
+
29
+ export interface VerificationWorktree {
30
+ /** Absolute path to the pristine worktree directory. */
31
+ worktreePath: string;
32
+ /** Commit SHA the worktree is checked out at (matches T_snap). */
33
+ commitSha: string;
34
+ /** Cleanup handle — call to remove the worktree + temp dir. Idempotent. */
35
+ cleanup: () => void;
36
+ }
37
+
38
+ /** Whether the worktree sandbox is enabled (env var opt-in). */
39
+ export function isWorktreeSandboxEnabled(): boolean {
40
+ const v = process.env.PI_CREW_VERIFICATION_WORKTREE ?? process.env.PI_TEAMS_VERIFICATION_WORKTREE;
41
+ return v === "1" || v === "true";
42
+ }
43
+
44
+ /**
45
+ * Detect whether the worktree sandbox is AVAILABLE at `cwd`:
46
+ * - opt-in env var set
47
+ * - git executable on PATH
48
+ * - cwd is inside a git repo
49
+ * - git index is clean (no uncommitted changes that would be lost)
50
+ *
51
+ * Returns false (with reason) when any precondition fails. Callers MUST
52
+ * gracefully fall back to non-sandboxed execution — never block the goal.
53
+ */
54
+ export function checkWorktreeSandboxAvailable(cwd: string): { available: true; commitSha: string } | { available: false; reason: string } {
55
+ if (!isWorktreeSandboxEnabled()) {
56
+ return { available: false, reason: "PI_CREW_VERIFICATION_WORKTREE not set (opt-in)" };
57
+ }
58
+ try {
59
+ // Is cwd inside a git repo? `git rev-parse --show-toplevel` errors out
60
+ // (non-zero exit) when not in a repo. execFileSync throws on non-zero.
61
+ const toplevel = execFileSync("git", ["rev-parse", "--show-toplevel"], { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
62
+ if (!toplevel) return { available: false, reason: "git rev-parse returned empty toplevel" };
63
+ // Current commit SHA (this is what T_snap will pin to).
64
+ const commitSha = execFileSync("git", ["rev-parse", "HEAD"], { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
65
+ if (!commitSha) return { available: false, reason: "git rev-parse HEAD returned empty SHA" };
66
+ // Dirty index? `git status --porcelain` outputs non-empty if there are
67
+ // uncommitted changes. We refuse to sandbox a dirty workspace because
68
+ // the worktree would NOT contain the in-progress edits (T_snap would
69
+ // pin to a stale commit). Better to fall back + warn than silently
70
+ // verify against the wrong code.
71
+ const status = execFileSync("git", ["status", "--porcelain"], { cwd, stdio: ["ignore", "pipe", "pipe"], encoding: "utf-8" }).trim();
72
+ if (status.length > 0) return { available: false, reason: `dirty git index (${status.split("\n").length} changed files); refusing to sandbox — worktree would pin to stale commit` };
73
+ return { available: true, commitSha };
74
+ } catch (error) {
75
+ const msg = error instanceof Error ? error.message : String(error);
76
+ return { available: false, reason: `git precondition check failed: ${msg.slice(0, 200)}` };
77
+ }
78
+ }
79
+
80
+ /**
81
+ * Prepare a pristine git worktree at `commitSha`. The worktree is a fresh
82
+ * checkout of the project at that commit — it does NOT contain worker edits
83
+ * from the main workspace.
84
+ *
85
+ * `git worktree add --detach <tmp>/wt-<sha8> <sha>` creates a detached-HEAD
86
+ * worktree (no branch pollution). Returns the worktree path + cleanup handle.
87
+ *
88
+ * Cleanup is idempotent (safe to call multiple times) and best-effort (swallows
89
+ * errors so a stuck worktree doesn't propagate into the goal loop).
90
+ */
91
+ export function prepareVerificationWorktree(cwd: string, commitSha: string): VerificationWorktree {
92
+ // Temp parent dir under os.tmpdir() so worktrees are auto-cleaned on reboot.
93
+ const tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), "pi-crew-wt-"));
94
+ const shortSha = commitSha.slice(0, 8);
95
+ const worktreePath = path.join(tmpRoot, `wt-${shortSha}`);
96
+ let cleaned = false;
97
+ const cleanup = (): void => {
98
+ if (cleaned) return;
99
+ cleaned = true;
100
+ // Remove the worktree (force = proceed even if it has untracked files).
101
+ try {
102
+ execFileSync("git", ["worktree", "remove", "--force", worktreePath], { cwd, stdio: ["ignore", "pipe", "pipe"], timeout: 5000 });
103
+ } catch {
104
+ // Fall back to `git worktree prune` if remove fails (already gone).
105
+ try { execFileSync("git", ["worktree", "prune"], { cwd, stdio: ["ignore", "pipe", "pipe"], timeout: 5000 }); } catch { /* best-effort */ }
106
+ }
107
+ // Remove the temp parent dir.
108
+ try { fs.rmSync(tmpRoot, { recursive: true, force: true }); } catch { /* best-effort */ }
109
+ };
110
+ try {
111
+ execFileSync("git", ["worktree", "add", "--detach", worktreePath, commitSha], { cwd, stdio: ["ignore", "pipe", "pipe"], timeout: 30_000 });
112
+ return { worktreePath, commitSha, cleanup };
113
+ } catch (error) {
114
+ cleanup();
115
+ const msg = error instanceof Error ? error.message : String(error);
116
+ throw new Error(`git worktree add failed (cwd=${cwd}, sha=${shortSha}): ${msg.slice(0, 300)}`);
117
+ }
118
+ }
119
+
120
+ /**
121
+ * RAII wrapper: prepare worktree, run `fn(worktree)`, ALWAYS cleanup in finally.
122
+ *
123
+ * `fn` may throw — the worktree is removed regardless. The original error
124
+ * propagates (cleanup errors are swallowed and best-effort).
125
+ *
126
+ * If preparation fails, the function rethrows WITHOUT calling fn — caller
127
+ * must handle the prep failure (typically by falling back to non-sandboxed).
128
+ */
129
+ export async function withVerificationWorktree<T>(cwd: string, commitSha: string, fn: (worktree: VerificationWorktree) => Promise<T> | T): Promise<T> {
130
+ const worktree = prepareVerificationWorktree(cwd, commitSha);
131
+ try {
132
+ return await fn(worktree);
133
+ } finally {
134
+ worktree.cleanup();
135
+ }
136
+ }