pi-crew 0.8.14 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/CHANGELOG.md +366 -0
  2. package/README.md +112 -2
  3. package/docs/FEATURE_INTAKE.md +1 -1
  4. package/docs/HARNESS.md +20 -19
  5. package/docs/PROJECT_REVIEW.md +132 -133
  6. package/docs/PROJECT_REVIEW_FIXES.md +130 -131
  7. package/docs/actions-reference.md +127 -121
  8. package/docs/architecture.md +1 -1
  9. package/docs/code-review-2026-05-11.md +134 -134
  10. package/docs/commands-reference.md +108 -106
  11. package/docs/comparison-pi-subagents-vs-pi-crew.md +105 -105
  12. package/docs/deep-review-report.md +1 -1
  13. package/docs/dynamic-workflows.md +90 -0
  14. package/docs/fixes/BATCH_A_H1_H2.md +17 -17
  15. package/docs/fixes/bug-007-async-notifier-stale-ctx.md +23 -23
  16. package/docs/followup-plan-2026-05-12.md +135 -135
  17. package/docs/followup-review-2026-05-12.md +86 -86
  18. package/docs/followup-review-round3-2026-05-12.md +123 -123
  19. package/docs/goals.md +59 -0
  20. package/docs/implementation-plan-top3.md +4 -4
  21. package/docs/issue-29-analysis.md +2 -2
  22. package/docs/oh-my-pi-research.md +154 -154
  23. package/docs/optimization-plan.md +2 -0
  24. package/docs/perf/baseline-2026-05.md +9 -9
  25. package/docs/perf/final-report-2026-05.md +2 -2
  26. package/docs/perf/sprint-1-report.md +2 -2
  27. package/docs/perf/sprint-2-report.md +1 -1
  28. package/docs/perf/upgrade-plan-2026-05.md +72 -72
  29. package/docs/pi-crew-bugs.md +230 -230
  30. package/docs/pi-crew-investigation-report.md +102 -102
  31. package/docs/pi-crew-test-round5.md +4 -4
  32. package/docs/runtime-analysis-child-vs-live.md +57 -57
  33. package/docs/runtime-migration-in-process-analysis.md +97 -97
  34. package/package.json +2 -4
  35. package/skills/orchestration/SKILL.md +11 -11
  36. package/src/agents/agent-config.ts +4 -0
  37. package/src/config/config.ts +39 -0
  38. package/src/config/types.ts +11 -0
  39. package/src/extension/action-suggestions.ts +2 -1
  40. package/src/extension/async-notifier.ts +10 -0
  41. package/src/extension/help.ts +14 -0
  42. package/src/extension/registration/commands.ts +27 -0
  43. package/src/extension/team-tool/destructive-gate.ts +1 -1
  44. package/src/extension/team-tool/goal-wrap.ts +288 -0
  45. package/src/extension/team-tool/goal.ts +405 -0
  46. package/src/extension/team-tool/run.ts +103 -4
  47. package/src/extension/team-tool/workflow-manage.ts +194 -0
  48. package/src/extension/team-tool.ts +20 -0
  49. package/src/hooks/types.ts +3 -1
  50. package/src/runtime/async-runner.ts +27 -2
  51. package/src/runtime/background-runner.ts +68 -19
  52. package/src/runtime/child-pi.ts +9 -1
  53. package/src/runtime/completion-guard.ts +1 -1
  54. package/src/runtime/dynamic-workflow-context.ts +450 -0
  55. package/src/runtime/dynamic-workflow-runner.ts +180 -0
  56. package/src/runtime/global-worker-cap.ts +96 -0
  57. package/src/runtime/goal-evaluator.ts +294 -0
  58. package/src/runtime/goal-loop-runner.ts +612 -0
  59. package/src/runtime/goal-state-store.ts +209 -0
  60. package/src/runtime/iteration-hooks.ts +2 -1
  61. package/src/runtime/pi-args.ts +10 -2
  62. package/src/runtime/post-checks.ts +2 -1
  63. package/src/runtime/result-extractor.ts +32 -0
  64. package/src/runtime/team-runner.ts +11 -1
  65. package/src/runtime/verification-gates.ts +88 -5
  66. package/src/runtime/verification-integrity.ts +110 -0
  67. package/src/runtime/verification-worktree.ts +136 -0
  68. package/src/runtime/workspace-lock.ts +448 -0
  69. package/src/schema/config-schema.ts +26 -0
  70. package/src/schema/team-tool-schema.ts +39 -4
  71. package/src/state/atomic-write.ts +9 -0
  72. package/src/state/contracts.ts +14 -0
  73. package/src/state/crew-init.ts +18 -5
  74. package/src/state/event-log.ts +7 -1
  75. package/src/state/state-store.ts +2 -0
  76. package/src/state/types.ts +82 -0
  77. package/src/state/worker-atomic-writer.ts +190 -0
  78. package/src/utils/env-allowlist.ts +30 -0
  79. package/src/utils/redaction.ts +104 -24
  80. package/src/utils/safe-paths.ts +55 -14
  81. package/src/workflows/discover-workflows.ts +25 -1
  82. package/src/workflows/workflow-config.ts +13 -0
  83. package/src/worktree/cleanup.ts +2 -1
  84. package/src/worktree/worktree-manager.ts +4 -3
  85. package/teams/parallel-research.team.md +1 -1
  86. package/workflows/examples/hello.dwf.ts +24 -0
@@ -0,0 +1,209 @@
1
+ /**
2
+ * goal-state-store.ts — Persistent outer state for the autonomous goal loop (P0/P1).
3
+ *
4
+ * Spec: research-findings/goal-workflow/00-SPEC.md §2.3
5
+ * Plan: research-findings/goal-workflow/07-PLAN.md v3 §0b G2 (one manifest per turn,
6
+ * goal loop owns OUTER state) + §0c C10 (hardening: assertSafePathId + UUID goalId).
7
+ *
8
+ * Stores GoalLoopState as atomic JSON at <crewRoot>/state/goals/<goalId>.json.
9
+ * Modeled on ScheduleStore (state/schedule.ts:86) but with atomicWriteJson +
10
+ * path-traversal defense (assertSafePathId on every public method).
11
+ *
12
+ * Per §0c C2: budget lives here (budgetUsed accumulates collectRunMetrics across turns);
13
+ * per-turn usage stays on each turn's TeamRunManifest/tasks.json.
14
+ */
15
+
16
+ import { mkdirSync, existsSync, readFileSync, writeFileSync, readdirSync, unlinkSync, openSync, closeSync, statSync } from "node:fs";
17
+ import { dirname } from "node:path";
18
+ import { atomicWriteJson } from "../state/atomic-write.ts";
19
+ import { appendEvent } from "../state/event-log.ts";
20
+ import { assertSafePathId } from "../utils/safe-paths.ts";
21
+ import { createRunId } from "../utils/ids.ts";
22
+ import { projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
23
+ import { logInternalError } from "../utils/internal-error.ts";
24
+ import type { GoalLoopState, GoalLoopStatus } from "../state/types.ts";
25
+
26
+ /** Default state-root resolver: project scope if a project crew-root exists, else user scope. */
27
+ function resolveGoalsRoot(cwd: string): string {
28
+ const crewRoot = projectCrewRoot(cwd) ?? userCrewRoot();
29
+ return `${crewRoot}/state/goals`;
30
+ }
31
+
32
+ /** Goal file path for a goalId. Asserts the id is path-safe (§0c C10). */
33
+ function goalFilePath(cwd: string, goalId: string): string {
34
+ assertSafePathId("goalId", goalId);
35
+ return `${resolveGoalsRoot(cwd)}/${goalId}.json`;
36
+ }
37
+
38
+ /**
39
+ * GoalStore — CRUD for GoalLoopState files.
40
+ *
41
+ * Concurrency: writes are atomic (temp+rename+fsync via atomicWriteJson). For
42
+ * read-modify-write sequences under contention, callers should coordinate via
43
+ * GoalLoopState.state transitions (cooperative, the goal loop is single-writer
44
+ * between turns). There is no file-lock here because the loop is the sole writer
45
+ * during its lifetime; `goal stop`/`pause`/`resume` from another session flip
46
+ * state fields that the loop checks between turns (cooperative, §0c C11).
47
+ */
48
+ export class GoalStore {
49
+ private readonly cwd: string;
50
+
51
+ constructor(cwd: string) {
52
+ this.cwd = cwd;
53
+ }
54
+
55
+ /** Generate a fresh, path-safe goalId (never user-derived — §0c C10). */
56
+ createGoalId(): string {
57
+ return createRunId("goal");
58
+ }
59
+
60
+ /** Load a goal by id. Returns undefined if missing/corrupt. Throws on unsafe goalId (§0c C10). */
61
+ load(goalId: string): GoalLoopState | undefined {
62
+ // Path-safety check runs BEFORE the try/catch so traversal attempts throw (not silently return undefined).
63
+ const path = goalFilePath(this.cwd, goalId);
64
+ try {
65
+ if (!existsSync(path)) return undefined;
66
+ const raw = readFileSync(path, "utf-8");
67
+ const parsed = JSON.parse(raw);
68
+ if (!parsed || typeof parsed !== "object" || typeof parsed.goalId !== "string") return undefined;
69
+ return parsed as GoalLoopState;
70
+ } catch {
71
+ return undefined;
72
+ }
73
+ }
74
+
75
+ /** Atomically persist a goal state. Emits a goal.state_changed event if eventsPath given. */
76
+ save(state: GoalLoopState, eventsPath?: string): void {
77
+ assertSafePathId("goalId", state.goalId);
78
+ const path = goalFilePath(this.cwd, state.goalId);
79
+ const next = { ...state, updatedAt: new Date().toISOString() };
80
+ try {
81
+ mkdirSync(dirname(path), { recursive: true });
82
+ atomicWriteJson(path, next);
83
+ if (eventsPath) {
84
+ appendEvent(eventsPath, { type: "goal.state_changed", runId: state.goalId, data: { goalId: state.goalId, state: state.state } });
85
+ }
86
+ } catch (error) {
87
+ logInternalError("goal-state-store.save", error, `goalId=${state.goalId}`);
88
+ throw error;
89
+ }
90
+ }
91
+
92
+ /** Patch a goal's top-level fields (e.g. state, turnsUsed, budgetUsed, currentRunId). */
93
+ patch(goalId: string, patch: Partial<GoalLoopState>, eventsPath?: string): GoalLoopState | undefined {
94
+ const current = this.load(goalId);
95
+ if (!current) return undefined;
96
+ const next: GoalLoopState = { ...current, ...patch, goalId: current.goalId, createdAt: current.createdAt };
97
+ this.save(next, eventsPath);
98
+ return next;
99
+ }
100
+
101
+ /** Convenience: transition state with optional event emission. */
102
+ setStatus(goalId: string, state: GoalLoopStatus, eventsPath?: string): GoalLoopState | undefined {
103
+ return this.patch(goalId, { state }, eventsPath);
104
+ }
105
+
106
+ /**
107
+ * Compare-And-Set status for atomic stuck↔resume transitions (P1b, RFC v0.5 §P1b).
108
+ *
109
+ * Loads current state; if `current.state === expected`, sets it to `next`,
110
+ * persists, and emits a `goal.state_changed` event (reusing the save()
111
+ * emission pattern). Otherwise returns undefined (CAS failed — no mutation,
112
+ * no event). This prevents lost updates when the background loop and a
113
+ * `goal resume`/idle-sweeper session race to flip `state`.
114
+ *
115
+ * Legal P1b transitions enforced by callers (not by this method):
116
+ * running → stuck, stuck → running, stuck → cancelled.
117
+ */
118
+ /**
119
+ * Cross-process-safe compare-and-set (cold-review #2 HIGH #2 fix).
120
+ *
121
+ * The synchronous read-check-write is only atomic within one event loop. `goal resume` runs
122
+ * in a DIFFERENT process than the (exited) loop, so two concurrent resumes both see
123
+ * `stuck`, both pass the CAS, both spawn → double background loops, double budget burn.
124
+ *
125
+ * Fix: wrap the read-modify-write in an O_EXCL lockfile per goalId. O_EXCL is atomic at the
126
+ * OS level — only one process can create the lockfile. The operation is fast (ms), so stale
127
+ * lockfiles are rare; a 5s age guard force-clears them (crash recovery).
128
+ */
129
+ compareAndSetStatus(
130
+ goalId: string,
131
+ expected: GoalLoopStatus,
132
+ next: GoalLoopStatus,
133
+ eventsPath?: string,
134
+ ): GoalLoopState | undefined {
135
+ const lockPath = `${goalFilePath(this.cwd, goalId)}.cas.lock`;
136
+ if (!this.acquireCasLock(lockPath)) {
137
+ return undefined; // Another process holds the CAS lock — caller treats as CAS-failed.
138
+ }
139
+ try {
140
+ const current = this.load(goalId);
141
+ if (!current) return undefined;
142
+ if (current.state !== expected) return undefined; // CAS failed — state moved underneath us.
143
+ const updated: GoalLoopState = { ...current, state: next };
144
+ this.save(updated, eventsPath);
145
+ return updated;
146
+ } finally {
147
+ try { unlinkSync(lockPath); } catch { /* best-effort; may already be gone */ }
148
+ }
149
+ }
150
+
151
+ /** Acquire an O_EXCL lockfile for CAS, with stale-lock recovery (5s age guard). */
152
+ private acquireCasLock(lockPath: string): boolean {
153
+ try {
154
+ const fd = openSync(lockPath, "wx"); // O_EXCL — throws EEXIST if already exists.
155
+ closeSync(fd);
156
+ return true;
157
+ } catch (error) {
158
+ const code = (error as NodeJS.ErrnoException).code;
159
+ if (code !== "EEXIST") return false;
160
+ // Stale recovery: if the lockfile is older than 5s, force-delete and retry once.
161
+ try {
162
+ const stat = statSync(lockPath);
163
+ if (Date.now() - stat.mtimeMs > 5000) {
164
+ unlinkSync(lockPath);
165
+ const fd = openSync(lockPath, "wx");
166
+ closeSync(fd);
167
+ return true;
168
+ }
169
+ } catch { /* fall through */ }
170
+ return false;
171
+ }
172
+ }
173
+
174
+ /** Remove a goal file (used by `goal clear`). Returns true if deleted. */
175
+ remove(goalId: string): boolean {
176
+ try {
177
+ const path = goalFilePath(this.cwd, goalId);
178
+ if (!existsSync(path)) return false;
179
+ unlinkSync(path);
180
+ return true;
181
+ } catch (error) {
182
+ logInternalError("goal-state-store.remove", error, `goalId=${goalId}`);
183
+ return false;
184
+ }
185
+ }
186
+
187
+ /** List all known goals (newest first by updatedAt). */
188
+ list(): GoalLoopState[] {
189
+ try {
190
+ const root = resolveGoalsRoot(this.cwd);
191
+ if (!existsSync(root)) return [];
192
+ const entries = readdirSync(root) as string[];
193
+ const goals: GoalLoopState[] = [];
194
+ for (const entry of entries) {
195
+ if (!entry.endsWith(".json")) continue;
196
+ const goalId = entry.slice(0, -".json".length);
197
+ // Skip entries that fail the safe-id check (defensive; createGoalId always produces safe ids).
198
+ if (!/^[A-Za-z0-9_-]+$/.test(goalId)) continue;
199
+ const g = this.load(goalId);
200
+ if (g) goals.push(g);
201
+ }
202
+ goals.sort((a, b) => (b.updatedAt ?? "").localeCompare(a.updatedAt ?? ""));
203
+ return goals;
204
+ } catch (error) {
205
+ logInternalError("goal-state-store.list", error, `cwd=${this.cwd}`);
206
+ return [];
207
+ }
208
+ }
209
+ }
@@ -7,6 +7,7 @@
7
7
  import { spawn } from "node:child_process";
8
8
  import * as fs from "node:fs";
9
9
  import * as path from "node:path";
10
+ import { WINDOWS_ESSENTIAL_ENV_VARS } from "../utils/env-allowlist.ts";
10
11
  import { resolveShellForScript } from "../utils/resolve-shell.ts";
11
12
  import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
12
13
  import { DENIED_METRIC_NAMES } from "./metric-parser.ts";
@@ -171,7 +172,7 @@ export async function runIterationHook(
171
172
  const { command, args } = resolveShellForScript(resolvedScript);
172
173
  const child = spawn(command, args, {
173
174
  cwd: payload.cwd,
174
- env: { ...sanitizeEnvSecrets(process.env, { allowList: ["PATH", "HOME", "USER", "USERPROFILE", "TEMP", "TMP", "TMPDIR", "LANG", "LC_ALL", "ComSpec", "SystemRoot", "PI_CREW_*"] }), PI_CREW_HOOK: "1" },
175
+ env: { ...sanitizeEnvSecrets(process.env, { allowList: ["PATH", "HOME", "USER", ...WINDOWS_ESSENTIAL_ENV_VARS, "TMPDIR", "LANG", "LC_ALL", "PI_CREW_*"] }), PI_CREW_HOOK: "1" },
175
176
  stdio: ["pipe", "pipe", "pipe"],
176
177
  });
177
178
 
@@ -269,8 +269,16 @@ export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerA
269
269
  const explicitTools = policy.tools;
270
270
  const excludeTools = policy.excludeTools;
271
271
 
272
- if (explicitTools?.length) args.push("--tools", explicitTools.join(","));
273
- if (excludeTools?.length) args.push("--exclude-tools", excludeTools.join(","));
272
+ // §0c C6: agent.disableTools (Pi `--no-tools`) fully disables all tools. Used by
273
+ // capability-locked agents (e.g. the goal-judge) that must have NO agency.
274
+ // MUST come before any --tools/--exclude-tools so it wins (Pi applies last-wins).
275
+ // An empty `tools:[]` is INSUFFICIENT because the length-check below skips it.
276
+ if (input.agent.disableTools === true) {
277
+ args.push("--no-tools");
278
+ } else {
279
+ if (explicitTools?.length) args.push("--tools", explicitTools.join(","));
280
+ if (excludeTools?.length) args.push("--exclude-tools", excludeTools.join(","));
281
+ }
274
282
  // Always add --no-extensions before --extension to prevent user extensions from being auto-loaded.
275
283
  // User extensions in ~/.pi/agent/extensions/ may fail due to missing dependencies.
276
284
  args.push("--no-extensions");
@@ -6,6 +6,7 @@
6
6
  */
7
7
  import { execFileSync } from "node:child_process";
8
8
  import * as path from "node:path";
9
+ import { WINDOWS_ESSENTIAL_ENV_VARS } from "../utils/env-allowlist.ts";
9
10
  import { resolveShellForScript } from "../utils/resolve-shell.ts";
10
11
  import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
11
12
 
@@ -94,7 +95,7 @@ export async function runPostCheck(config: PostCheckConfig, cwd: string): Promis
94
95
  timeout: timeoutMs,
95
96
  encoding: "utf-8",
96
97
  maxBuffer: 10 * 1024 * 1024, // 10 MB
97
- env: { ...sanitizeEnvSecrets(process.env, { allowList: ["PATH", "HOME", "USER", "USERPROFILE", "TEMP", "TMP", "TMPDIR", "LANG", "LC_ALL", "ComSpec", "SystemRoot", "PI_CREW_*"] }), PI_CREW_POST_CHECK: "1" },
98
+ env: { ...sanitizeEnvSecrets(process.env, { allowList: ["PATH", "HOME", "USER", ...WINDOWS_ESSENTIAL_ENV_VARS, "TMPDIR", "LANG", "LC_ALL", "PI_CREW_*"] }), PI_CREW_POST_CHECK: "1" },
98
99
  });
99
100
 
100
101
  const durationMs = Date.now() - startTime;
@@ -41,6 +41,14 @@ export function extractStructuredResult(raw: string, _schema?: Record<string, un
41
41
  return { structured: true, data: markerResult, rawText: raw };
42
42
  }
43
43
 
44
+ // Strategy 4: Scan for the first JSON object/array anywhere in text.
45
+ // Models often add prose preamble/epilogue ("Here's my review:", "Let me analyze...")
46
+ // around the JSON. This catches JSON embedded in sentences, lists, or prose.
47
+ const scannedResult = tryScanJson(trimmed);
48
+ if (scannedResult !== undefined) {
49
+ return { structured: true, data: scannedResult, rawText: raw };
50
+ }
51
+
44
52
  return { structured: false, data: null, rawText: raw };
45
53
  }
46
54
 
@@ -63,6 +71,30 @@ function tryFencedJson(text: string): unknown | undefined {
63
71
  }
64
72
  }
65
73
 
74
+ /**
75
+ * Strategy 4: Scan for the first balanced JSON object/array anywhere in text.
76
+ * Robust against prose preamble/epilogue that models add around JSON output.
77
+ * Returns the first valid JSON value found, or undefined.
78
+ */
79
+ function tryScanJson(text: string): unknown | undefined {
80
+ // Find the first '{' or '[' in the text.
81
+ for (let i = 0; i < text.length; i++) {
82
+ const ch = text[i];
83
+ if (ch !== "{" && ch !== "[") continue;
84
+ const rest = text.slice(i);
85
+ const end = findMatchingBracket(rest);
86
+ if (end <= 0) continue;
87
+ const candidate = rest.slice(0, end);
88
+ try {
89
+ return JSON.parse(candidate);
90
+ } catch {
91
+ // Not valid JSON at this position; keep scanning for the next '{'/'['.
92
+ continue;
93
+ }
94
+ }
95
+ return undefined;
96
+ }
97
+
66
98
  function tryMarkerExtraction(text: string): unknown | undefined {
67
99
  // Try to find JSON after common markers
68
100
  const markers = ["RESULT:", "OUTPUT:", "ANSWER:", "### Result\n", "## Output\n"];
@@ -408,9 +408,19 @@ export function hasPendingMutatingTaskAtBoundary(tasks: TeamTaskState[]): boolea
408
408
  function dagReadyTaskIds(tasks: TeamTaskState[], completedIds: Set<string>): string[] | null {
409
409
  const hasExplicitDeps = tasks.some((t) => t.dependsOn.length > 0);
410
410
  if (!hasExplicitDeps) return null;
411
+ // FIX (goal-wrap runtime test): task.dependsOn stores STEP IDs (e.g. "execute"), not
412
+ // task IDs (e.g. "02_execute"). The DAG scheduler compares deps against completedIds
413
+ // (which are task IDs), so step-ID deps would never match → dependent tasks stuck blocked
414
+ // forever. Map step IDs -> task IDs first (mirror dependencySatisfied in
415
+ // task-graph-scheduler.ts which handles this via stepToTaskId). buildDagExecutionPlan +
416
+ // getDagReadyTasks then work on consistent task IDs.
417
+ const stepToTaskId = new Map<string, string>();
418
+ for (const t of tasks) {
419
+ if (t.stepId) stepToTaskId.set(t.stepId, t.id);
420
+ }
411
421
  const nodes: TaskNode[] = tasks.map((t) => ({
412
422
  id: t.id,
413
- dependsOn: t.dependsOn,
423
+ dependsOn: t.dependsOn.map((dep) => stepToTaskId.get(dep) ?? dep),
414
424
  phase: t.adaptive?.phase ?? t.stepId,
415
425
  }));
416
426
  const plan = buildDagExecutionPlan(nodes);
@@ -12,9 +12,77 @@
12
12
  import { spawn } from "node:child_process";
13
13
  import * as fs from "node:fs";
14
14
  import * as path from "node:path";
15
+ import { WINDOWS_ESSENTIAL_ENV_VARS } from "../utils/env-allowlist.ts";
15
16
  import { writeArtifact } from "../state/artifact-store.ts";
17
+ import { redactSecretString } from "../utils/redaction.ts";
18
+ import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
16
19
  import type { VerificationContract, VerificationCommandResult, GreenLevel, ArtifactDescriptor } from "../state/types.ts";
17
20
 
21
+ /**
22
+ * Phase 1.5 #1 (RFC 13 §6 info-disclosure mitigation): sanitize the env passed
23
+ * to verification commands so worker-induced output cannot leak model-provider
24
+ * secrets. P1f redaction at artifact-write + judge-bound is regex-best-effort
25
+ * against adversarial workers; this kills the leak at the source by never
26
+ * giving the verification process the secret in the first place.
27
+ *
28
+ * Opt-in via `PI_CREW_VERIFICATION_SANITIZE_ENV=1` to avoid breaking existing
29
+ * flows whose tests legitimately need API access. Escape hatch:
30
+ * `PI_CREW_VERIFICATION_PRESERVE_ENV=KEY1,KEY2,...` lets users explicitly opt
31
+ * specific secrets back in (audited via the allowlist validator).
32
+ */
33
+ const VERIFICATION_ENV_ALLOWLIST: readonly string[] = [
34
+ // Essential non-secret vars only — NO model-provider keys by default.
35
+ "PATH",
36
+ "HOME",
37
+ "USER",
38
+ "SHELL",
39
+ "TERM",
40
+ "LANG",
41
+ "LC_ALL",
42
+ "LC_COLLATE",
43
+ "LC_CTYPE",
44
+ "LC_MESSAGES",
45
+ "LC_MONETARY",
46
+ "LC_NUMERIC",
47
+ "LC_TIME",
48
+ "XDG_CONFIG_HOME",
49
+ "XDG_DATA_HOME",
50
+ "XDG_CACHE_HOME",
51
+ "XDG_RUNTIME_DIR",
52
+ // Windows essentials — see WINDOWS_ESSENTIAL_ENV_VARS (src/utils/env-allowlist.ts).
53
+ ...WINDOWS_ESSENTIAL_ENV_VARS,
54
+ "NVM_BIN",
55
+ "NVM_DIR",
56
+ "NVM_INC",
57
+ "NODE_PATH",
58
+ "NODE_DISABLE_COLORS",
59
+ "NODE_EXTRA_CA_CERTS",
60
+ "NPM_CONFIG_REGISTRY",
61
+ "NPM_CONFIG_USERCONFIG",
62
+ "NPM_CONFIG_GLOBALCONFIG",
63
+ ];
64
+
65
+ /** Whether env sanitization for verification is enabled (env var opt-in). */
66
+ export function isVerificationEnvSanitizeEnabled(): boolean {
67
+ return process.env.PI_CREW_VERIFICATION_SANITIZE_ENV === "1" || process.env.PI_TEAMS_VERIFICATION_SANITIZE_ENV === "1";
68
+ }
69
+
70
+ /**
71
+ * Build the env dict for a verification command. When sanitization is enabled,
72
+ * strips everything except VERIFICATION_ENV_ALLOWLIST + any explicitly-preserved
73
+ * keys (PI_CREW_VERIFICATION_PRESERVE_ENV=KEY1,KEY2). Always adds FORCE_COLOR=0
74
+ * to keep output plain-text (matches pre-existing behavior).
75
+ */
76
+ function buildVerificationEnv(): Record<string, string> {
77
+ if (!isVerificationEnvSanitizeEnabled()) {
78
+ return { ...process.env, FORCE_COLOR: "0" };
79
+ }
80
+ const preserveRaw = process.env.PI_CREW_VERIFICATION_PRESERVE_ENV ?? process.env.PI_TEAMS_VERIFICATION_PRESERVE_ENV ?? "";
81
+ const preserve = preserveRaw.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
82
+ const allowList = [...VERIFICATION_ENV_ALLOWLIST, ...preserve];
83
+ return { ...sanitizeEnvSecrets(process.env, { allowList }), FORCE_COLOR: "0" };
84
+ }
85
+
18
86
  export interface PhaseGateResult {
19
87
  phase: number;
20
88
  name: string;
@@ -116,7 +184,7 @@ async function executeCommand(
116
184
  const shell = spawn("sh", ["-c", command], {
117
185
  cwd,
118
186
  timeout: timeoutMs,
119
- env: { ...process.env, FORCE_COLOR: "0" },
187
+ env: buildVerificationEnv(),
120
188
  });
121
189
 
122
190
  shell.stdout?.on("data", (data) => {
@@ -249,6 +317,11 @@ export async function executeVerificationCommands(
249
317
  taskId: string,
250
318
  artifactsRoot: string,
251
319
  signal?: AbortSignal,
320
+ /** Phase 1.5 #2 (RFC 16): when provided, run verification commands in this
321
+ * pristine git-worktree path instead of `cwd`. The caller is responsible
322
+ * for preparing + cleaning up the worktree (see verification-worktree.ts).
323
+ * When undefined, behavior is unchanged (run in `cwd`). */
324
+ worktreeCwd?: string,
252
325
  ): Promise<VerificationCommandResult[]> {
253
326
  if (!contract.commands || contract.commands.length === 0) {
254
327
  return [];
@@ -269,8 +342,17 @@ export async function executeVerificationCommands(
269
342
  fs.mkdirSync(gatesDir, { recursive: true });
270
343
  }
271
344
 
345
+ // Phase 1.5 #2: run phase gates inside the worktree when provided.
346
+ const execCwd = worktreeCwd ?? cwd;
347
+
272
348
  // Run phase gates
273
- const bundle = await runPhaseGates(gates, cwd, signal, (phaseResult) => {
349
+ const bundle = await runPhaseGates(gates, execCwd, signal, (phaseResult) => {
350
+ // P1f: redact secrets from verification output BEFORE persisting to the
351
+ // world-readable artifact file. redactSecretString is best-effort vs
352
+ // adversarial workers (RFC §6 — Med-High residual). writeArtifact ALSO
353
+ // redacts (defense-in-depth); this explicit pass sanitizes the raw output
354
+ // at the source so the in-memory bundle and the summary below are clean.
355
+ const safeOutput = redactSecretString(phaseResult.output || "");
274
356
  // Write phase artifact immediately for observability
275
357
  const phaseArtifact = writeArtifact(artifactsRoot, {
276
358
  kind: "log",
@@ -284,7 +366,7 @@ export async function executeVerificationCommands(
284
366
  phaseResult.error ? `Error: ${phaseResult.error}` : "",
285
367
  "",
286
368
  "## Output",
287
- phaseResult.output || "(no output)",
369
+ safeOutput || "(no output)",
288
370
  ].join("\n"),
289
371
  producer: taskId,
290
372
  });
@@ -297,11 +379,12 @@ export async function executeVerificationCommands(
297
379
  });
298
380
  });
299
381
 
300
- // Write summary artifact
382
+ // Write summary artifact. Redact the whole bundle JSON (it embeds the raw
383
+ // per-phase output strings) BEFORE writeArtifact persists it.
301
384
  const summaryArtifact = writeArtifact(artifactsRoot, {
302
385
  kind: "metadata",
303
386
  relativePath: `verification-gates/${taskId}-summary.json`,
304
- content: JSON.stringify(bundle, null, 2),
387
+ content: redactSecretString(JSON.stringify(bundle, null, 2)),
305
388
  producer: taskId,
306
389
  });
307
390
 
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Verification Integrity — manifest bookend snapshot helper (RFC §P1a).
3
+ *
4
+ * Hashes a FIXED set of project-manifest files so the goal loop can detect
5
+ * drift between T_snap (before verification runs) and T_verify_done (after the
6
+ * command exits). This closes the PERSISTENT-edit subcase of workspace
7
+ * tampering (a worker that rewrites package.json / lockfile and leaves it
8
+ * changed). See RFC §P1a / §6 STRIDE for the full threat model.
9
+ *
10
+ * RESIDUALS (documented; closed by Phase 1.5 git-worktree sandbox, NOT here):
11
+ * - Round-trip tamper: a worker can edit a manifest, run the test, then REVERT
12
+ * before T_verify_done so the hash matches T_snap. Content-addressed
13
+ * execution (git-worktree) is required to close this. Not fixable by hashing.
14
+ * - Invoked-script tampering: only the manifest files in MANIFEST_FILES are
15
+ * hashed. A worker that overwrites a script the verification command invokes
16
+ * is NOT caught. Phase 1.5 git-worktree closes this.
17
+ * - node_modules/ and transitive deps are deliberately NOT hashed (size +
18
+ * churn); package-lock.json IS hashed, which transitively pins resolved
19
+ * dependency versions.
20
+ *
21
+ * Pure leaf module: depends only on node: built-ins. Does NOT import
22
+ * goal-loop-runner or goal-evaluator (keeps the P1a helper unit-testable in
23
+ * isolation and avoids pulling the conflict-zone modules into this file).
24
+ *
25
+ * @module verification-integrity
26
+ */
27
+
28
+ import { createHash } from "node:crypto";
29
+ import * as fs from "node:fs";
30
+ import * as path from "node:path";
31
+
32
+ /**
33
+ * Fixed set of project-manifest files considered by {@link snapshotManifests}.
34
+ * Only files from this set that EXIST in the target directory are hashed.
35
+ * (RFC §P1a: package.json, package-lock.json, pyproject.toml, setup.py,
36
+ * Cargo.toml, Cargo.lock, go.mod, tsconfig.json.)
37
+ */
38
+ export const MANIFEST_FILES = [
39
+ "package.json",
40
+ "package-lock.json",
41
+ "pyproject.toml",
42
+ "setup.py",
43
+ "Cargo.toml",
44
+ "Cargo.lock",
45
+ "go.mod",
46
+ "tsconfig.json",
47
+ ] as const;
48
+
49
+ /**
50
+ * sha256-hash the manifest files from the fixed set that EXIST in `cwd`.
51
+ *
52
+ * - Missing files are SKIPPED silently (not errors): a Python project has no
53
+ * package.json, a JS project has no Cargo.toml, etc.
54
+ * - Non-regular files (directories, etc.) are skipped.
55
+ * - node_modules is NEVER hashed.
56
+ *
57
+ * @returns A map of relative manifest path -> sha256 hex digest for each
58
+ * present file. Stable key order = MANIFEST_FILES order (insertion order).
59
+ */
60
+ export function snapshotManifests(cwd: string): Record<string, string> {
61
+ const snapshot: Record<string, string> = {};
62
+ for (const rel of MANIFEST_FILES) {
63
+ const abs = path.join(cwd, rel);
64
+ let stat: fs.Stats;
65
+ try {
66
+ stat = fs.statSync(abs);
67
+ } catch {
68
+ continue; // missing file — skip gracefully
69
+ }
70
+ if (!stat.isFile()) continue; // directory / special — skip
71
+ try {
72
+ const content = fs.readFileSync(abs);
73
+ snapshot[rel] = createHash("sha256").update(content).digest("hex");
74
+ } catch {
75
+ continue; // unreadable (permissions/race) — skip gracefully
76
+ }
77
+ }
78
+ return snapshot;
79
+ }
80
+
81
+ /**
82
+ * Compare two snapshots and return the list of DRIFTED file paths.
83
+ *
84
+ * A file is considered drifted if:
85
+ * - its hash differs between `a` and `b`, OR
86
+ * - it is present in only one of the two snapshots (added or removed).
87
+ *
88
+ * @returns Sorted array of relative manifest paths that drifted. Identical
89
+ * snapshots yield `[]`.
90
+ */
91
+ export function compareSnapshot(
92
+ a: Record<string, string>,
93
+ b: Record<string, string>,
94
+ ): string[] {
95
+ const drifted = new Set<string>();
96
+ for (const [key, hash] of Object.entries(a)) {
97
+ const other = b[key];
98
+ if (other === undefined) {
99
+ drifted.add(key); // removed between a -> b
100
+ } else if (other !== hash) {
101
+ drifted.add(key); // content changed
102
+ }
103
+ }
104
+ for (const key of Object.keys(b)) {
105
+ if (a[key] === undefined) {
106
+ drifted.add(key); // added between a -> b
107
+ }
108
+ }
109
+ return [...drifted].sort();
110
+ }