pi-crew 0.8.14 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +366 -0
- package/README.md +112 -2
- package/docs/FEATURE_INTAKE.md +1 -1
- package/docs/HARNESS.md +20 -19
- package/docs/PROJECT_REVIEW.md +132 -133
- package/docs/PROJECT_REVIEW_FIXES.md +130 -131
- package/docs/actions-reference.md +127 -121
- package/docs/architecture.md +1 -1
- package/docs/code-review-2026-05-11.md +134 -134
- package/docs/commands-reference.md +108 -106
- package/docs/comparison-pi-subagents-vs-pi-crew.md +105 -105
- package/docs/deep-review-report.md +1 -1
- package/docs/dynamic-workflows.md +90 -0
- package/docs/fixes/BATCH_A_H1_H2.md +17 -17
- package/docs/fixes/bug-007-async-notifier-stale-ctx.md +23 -23
- package/docs/followup-plan-2026-05-12.md +135 -135
- package/docs/followup-review-2026-05-12.md +86 -86
- package/docs/followup-review-round3-2026-05-12.md +123 -123
- package/docs/goals.md +59 -0
- package/docs/implementation-plan-top3.md +4 -4
- package/docs/issue-29-analysis.md +2 -2
- package/docs/oh-my-pi-research.md +154 -154
- package/docs/optimization-plan.md +2 -0
- package/docs/perf/baseline-2026-05.md +9 -9
- package/docs/perf/final-report-2026-05.md +2 -2
- package/docs/perf/sprint-1-report.md +2 -2
- package/docs/perf/sprint-2-report.md +1 -1
- package/docs/perf/upgrade-plan-2026-05.md +72 -72
- package/docs/pi-crew-bugs.md +230 -230
- package/docs/pi-crew-investigation-report.md +102 -102
- package/docs/pi-crew-test-round5.md +4 -4
- package/docs/runtime-analysis-child-vs-live.md +57 -57
- package/docs/runtime-migration-in-process-analysis.md +97 -97
- package/package.json +2 -4
- package/skills/orchestration/SKILL.md +11 -11
- package/src/agents/agent-config.ts +4 -0
- package/src/config/config.ts +39 -0
- package/src/config/types.ts +11 -0
- package/src/extension/action-suggestions.ts +2 -1
- package/src/extension/async-notifier.ts +10 -0
- package/src/extension/help.ts +14 -0
- package/src/extension/registration/commands.ts +27 -0
- package/src/extension/team-tool/destructive-gate.ts +1 -1
- package/src/extension/team-tool/goal-wrap.ts +288 -0
- package/src/extension/team-tool/goal.ts +405 -0
- package/src/extension/team-tool/run.ts +103 -4
- package/src/extension/team-tool/workflow-manage.ts +194 -0
- package/src/extension/team-tool.ts +20 -0
- package/src/hooks/types.ts +3 -1
- package/src/runtime/async-runner.ts +27 -2
- package/src/runtime/background-runner.ts +68 -19
- package/src/runtime/child-pi.ts +9 -1
- package/src/runtime/completion-guard.ts +1 -1
- package/src/runtime/dynamic-workflow-context.ts +450 -0
- package/src/runtime/dynamic-workflow-runner.ts +180 -0
- package/src/runtime/global-worker-cap.ts +96 -0
- package/src/runtime/goal-evaluator.ts +294 -0
- package/src/runtime/goal-loop-runner.ts +612 -0
- package/src/runtime/goal-state-store.ts +209 -0
- package/src/runtime/iteration-hooks.ts +2 -1
- package/src/runtime/pi-args.ts +10 -2
- package/src/runtime/post-checks.ts +2 -1
- package/src/runtime/result-extractor.ts +32 -0
- package/src/runtime/team-runner.ts +11 -1
- package/src/runtime/verification-gates.ts +88 -5
- package/src/runtime/verification-integrity.ts +110 -0
- package/src/runtime/verification-worktree.ts +136 -0
- package/src/runtime/workspace-lock.ts +448 -0
- package/src/schema/config-schema.ts +26 -0
- package/src/schema/team-tool-schema.ts +39 -4
- package/src/state/atomic-write.ts +9 -0
- package/src/state/contracts.ts +14 -0
- package/src/state/crew-init.ts +18 -5
- package/src/state/event-log.ts +7 -1
- package/src/state/state-store.ts +2 -0
- package/src/state/types.ts +82 -0
- package/src/state/worker-atomic-writer.ts +190 -0
- package/src/utils/env-allowlist.ts +30 -0
- package/src/utils/redaction.ts +104 -24
- package/src/utils/safe-paths.ts +55 -14
- package/src/workflows/discover-workflows.ts +25 -1
- package/src/workflows/workflow-config.ts +13 -0
- package/src/worktree/cleanup.ts +2 -1
- package/src/worktree/worktree-manager.ts +4 -3
- package/teams/parallel-research.team.md +1 -1
- package/workflows/examples/hello.dwf.ts +24 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* goal-state-store.ts — Persistent outer state for the autonomous goal loop (P0/P1).
|
|
3
|
+
*
|
|
4
|
+
* Spec: research-findings/goal-workflow/00-SPEC.md §2.3
|
|
5
|
+
* Plan: research-findings/goal-workflow/07-PLAN.md v3 §0b G2 (one manifest per turn,
|
|
6
|
+
* goal loop owns OUTER state) + §0c C10 (hardening: assertSafePathId + UUID goalId).
|
|
7
|
+
*
|
|
8
|
+
* Stores GoalLoopState as atomic JSON at <crewRoot>/state/goals/<goalId>.json.
|
|
9
|
+
* Modeled on ScheduleStore (state/schedule.ts:86) but with atomicWriteJson +
|
|
10
|
+
* path-traversal defense (assertSafePathId on every public method).
|
|
11
|
+
*
|
|
12
|
+
* Per §0c C2: budget lives here (budgetUsed accumulates collectRunMetrics across turns);
|
|
13
|
+
* per-turn usage stays on each turn's TeamRunManifest/tasks.json.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { mkdirSync, existsSync, readFileSync, writeFileSync, readdirSync, unlinkSync, openSync, closeSync, statSync } from "node:fs";
|
|
17
|
+
import { dirname } from "node:path";
|
|
18
|
+
import { atomicWriteJson } from "../state/atomic-write.ts";
|
|
19
|
+
import { appendEvent } from "../state/event-log.ts";
|
|
20
|
+
import { assertSafePathId } from "../utils/safe-paths.ts";
|
|
21
|
+
import { createRunId } from "../utils/ids.ts";
|
|
22
|
+
import { projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
|
|
23
|
+
import { logInternalError } from "../utils/internal-error.ts";
|
|
24
|
+
import type { GoalLoopState, GoalLoopStatus } from "../state/types.ts";
|
|
25
|
+
|
|
26
|
+
/** Default state-root resolver: project scope if a project crew-root exists, else user scope. */
|
|
27
|
+
function resolveGoalsRoot(cwd: string): string {
|
|
28
|
+
const crewRoot = projectCrewRoot(cwd) ?? userCrewRoot();
|
|
29
|
+
return `${crewRoot}/state/goals`;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Goal file path for a goalId. Asserts the id is path-safe (§0c C10). */
|
|
33
|
+
function goalFilePath(cwd: string, goalId: string): string {
|
|
34
|
+
assertSafePathId("goalId", goalId);
|
|
35
|
+
return `${resolveGoalsRoot(cwd)}/${goalId}.json`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* GoalStore — CRUD for GoalLoopState files.
|
|
40
|
+
*
|
|
41
|
+
* Concurrency: writes are atomic (temp+rename+fsync via atomicWriteJson). For
|
|
42
|
+
* read-modify-write sequences under contention, callers should coordinate via
|
|
43
|
+
* GoalLoopState.state transitions (cooperative, the goal loop is single-writer
|
|
44
|
+
* between turns). There is no file-lock here because the loop is the sole writer
|
|
45
|
+
* during its lifetime; `goal stop`/`pause`/`resume` from another session flip
|
|
46
|
+
* state fields that the loop checks between turns (cooperative, §0c C11).
|
|
47
|
+
*/
|
|
48
|
+
export class GoalStore {
|
|
49
|
+
private readonly cwd: string;
|
|
50
|
+
|
|
51
|
+
constructor(cwd: string) {
|
|
52
|
+
this.cwd = cwd;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Generate a fresh, path-safe goalId (never user-derived — §0c C10). */
|
|
56
|
+
createGoalId(): string {
|
|
57
|
+
return createRunId("goal");
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Load a goal by id. Returns undefined if missing/corrupt. Throws on unsafe goalId (§0c C10). */
|
|
61
|
+
load(goalId: string): GoalLoopState | undefined {
|
|
62
|
+
// Path-safety check runs BEFORE the try/catch so traversal attempts throw (not silently return undefined).
|
|
63
|
+
const path = goalFilePath(this.cwd, goalId);
|
|
64
|
+
try {
|
|
65
|
+
if (!existsSync(path)) return undefined;
|
|
66
|
+
const raw = readFileSync(path, "utf-8");
|
|
67
|
+
const parsed = JSON.parse(raw);
|
|
68
|
+
if (!parsed || typeof parsed !== "object" || typeof parsed.goalId !== "string") return undefined;
|
|
69
|
+
return parsed as GoalLoopState;
|
|
70
|
+
} catch {
|
|
71
|
+
return undefined;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Atomically persist a goal state. Emits a goal.state_changed event if eventsPath given. */
|
|
76
|
+
save(state: GoalLoopState, eventsPath?: string): void {
|
|
77
|
+
assertSafePathId("goalId", state.goalId);
|
|
78
|
+
const path = goalFilePath(this.cwd, state.goalId);
|
|
79
|
+
const next = { ...state, updatedAt: new Date().toISOString() };
|
|
80
|
+
try {
|
|
81
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
82
|
+
atomicWriteJson(path, next);
|
|
83
|
+
if (eventsPath) {
|
|
84
|
+
appendEvent(eventsPath, { type: "goal.state_changed", runId: state.goalId, data: { goalId: state.goalId, state: state.state } });
|
|
85
|
+
}
|
|
86
|
+
} catch (error) {
|
|
87
|
+
logInternalError("goal-state-store.save", error, `goalId=${state.goalId}`);
|
|
88
|
+
throw error;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/** Patch a goal's top-level fields (e.g. state, turnsUsed, budgetUsed, currentRunId). */
|
|
93
|
+
patch(goalId: string, patch: Partial<GoalLoopState>, eventsPath?: string): GoalLoopState | undefined {
|
|
94
|
+
const current = this.load(goalId);
|
|
95
|
+
if (!current) return undefined;
|
|
96
|
+
const next: GoalLoopState = { ...current, ...patch, goalId: current.goalId, createdAt: current.createdAt };
|
|
97
|
+
this.save(next, eventsPath);
|
|
98
|
+
return next;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/** Convenience: transition state with optional event emission. */
|
|
102
|
+
setStatus(goalId: string, state: GoalLoopStatus, eventsPath?: string): GoalLoopState | undefined {
|
|
103
|
+
return this.patch(goalId, { state }, eventsPath);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Compare-And-Set status for atomic stuck↔resume transitions (P1b, RFC v0.5 §P1b).
|
|
108
|
+
*
|
|
109
|
+
* Loads current state; if `current.state === expected`, sets it to `next`,
|
|
110
|
+
* persists, and emits a `goal.state_changed` event (reusing the save()
|
|
111
|
+
* emission pattern). Otherwise returns undefined (CAS failed — no mutation,
|
|
112
|
+
* no event). This prevents lost updates when the background loop and a
|
|
113
|
+
* `goal resume`/idle-sweeper session race to flip `state`.
|
|
114
|
+
*
|
|
115
|
+
* Legal P1b transitions enforced by callers (not by this method):
|
|
116
|
+
* running → stuck, stuck → running, stuck → cancelled.
|
|
117
|
+
*/
|
|
118
|
+
/**
|
|
119
|
+
* Cross-process-safe compare-and-set (cold-review #2 HIGH #2 fix).
|
|
120
|
+
*
|
|
121
|
+
* The synchronous read-check-write is only atomic within one event loop. `goal resume` runs
|
|
122
|
+
* in a DIFFERENT process than the (exited) loop, so two concurrent resumes both see
|
|
123
|
+
* `stuck`, both pass the CAS, both spawn → double background loops, double budget burn.
|
|
124
|
+
*
|
|
125
|
+
* Fix: wrap the read-modify-write in an O_EXCL lockfile per goalId. O_EXCL is atomic at the
|
|
126
|
+
* OS level — only one process can create the lockfile. The operation is fast (ms), so stale
|
|
127
|
+
* lockfiles are rare; a 5s age guard force-clears them (crash recovery).
|
|
128
|
+
*/
|
|
129
|
+
compareAndSetStatus(
|
|
130
|
+
goalId: string,
|
|
131
|
+
expected: GoalLoopStatus,
|
|
132
|
+
next: GoalLoopStatus,
|
|
133
|
+
eventsPath?: string,
|
|
134
|
+
): GoalLoopState | undefined {
|
|
135
|
+
const lockPath = `${goalFilePath(this.cwd, goalId)}.cas.lock`;
|
|
136
|
+
if (!this.acquireCasLock(lockPath)) {
|
|
137
|
+
return undefined; // Another process holds the CAS lock — caller treats as CAS-failed.
|
|
138
|
+
}
|
|
139
|
+
try {
|
|
140
|
+
const current = this.load(goalId);
|
|
141
|
+
if (!current) return undefined;
|
|
142
|
+
if (current.state !== expected) return undefined; // CAS failed — state moved underneath us.
|
|
143
|
+
const updated: GoalLoopState = { ...current, state: next };
|
|
144
|
+
this.save(updated, eventsPath);
|
|
145
|
+
return updated;
|
|
146
|
+
} finally {
|
|
147
|
+
try { unlinkSync(lockPath); } catch { /* best-effort; may already be gone */ }
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/** Acquire an O_EXCL lockfile for CAS, with stale-lock recovery (5s age guard). */
|
|
152
|
+
private acquireCasLock(lockPath: string): boolean {
|
|
153
|
+
try {
|
|
154
|
+
const fd = openSync(lockPath, "wx"); // O_EXCL — throws EEXIST if already exists.
|
|
155
|
+
closeSync(fd);
|
|
156
|
+
return true;
|
|
157
|
+
} catch (error) {
|
|
158
|
+
const code = (error as NodeJS.ErrnoException).code;
|
|
159
|
+
if (code !== "EEXIST") return false;
|
|
160
|
+
// Stale recovery: if the lockfile is older than 5s, force-delete and retry once.
|
|
161
|
+
try {
|
|
162
|
+
const stat = statSync(lockPath);
|
|
163
|
+
if (Date.now() - stat.mtimeMs > 5000) {
|
|
164
|
+
unlinkSync(lockPath);
|
|
165
|
+
const fd = openSync(lockPath, "wx");
|
|
166
|
+
closeSync(fd);
|
|
167
|
+
return true;
|
|
168
|
+
}
|
|
169
|
+
} catch { /* fall through */ }
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/** Remove a goal file (used by `goal clear`). Returns true if deleted. */
|
|
175
|
+
remove(goalId: string): boolean {
|
|
176
|
+
try {
|
|
177
|
+
const path = goalFilePath(this.cwd, goalId);
|
|
178
|
+
if (!existsSync(path)) return false;
|
|
179
|
+
unlinkSync(path);
|
|
180
|
+
return true;
|
|
181
|
+
} catch (error) {
|
|
182
|
+
logInternalError("goal-state-store.remove", error, `goalId=${goalId}`);
|
|
183
|
+
return false;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/** List all known goals (newest first by updatedAt). */
|
|
188
|
+
list(): GoalLoopState[] {
|
|
189
|
+
try {
|
|
190
|
+
const root = resolveGoalsRoot(this.cwd);
|
|
191
|
+
if (!existsSync(root)) return [];
|
|
192
|
+
const entries = readdirSync(root) as string[];
|
|
193
|
+
const goals: GoalLoopState[] = [];
|
|
194
|
+
for (const entry of entries) {
|
|
195
|
+
if (!entry.endsWith(".json")) continue;
|
|
196
|
+
const goalId = entry.slice(0, -".json".length);
|
|
197
|
+
// Skip entries that fail the safe-id check (defensive; createGoalId always produces safe ids).
|
|
198
|
+
if (!/^[A-Za-z0-9_-]+$/.test(goalId)) continue;
|
|
199
|
+
const g = this.load(goalId);
|
|
200
|
+
if (g) goals.push(g);
|
|
201
|
+
}
|
|
202
|
+
goals.sort((a, b) => (b.updatedAt ?? "").localeCompare(a.updatedAt ?? ""));
|
|
203
|
+
return goals;
|
|
204
|
+
} catch (error) {
|
|
205
|
+
logInternalError("goal-state-store.list", error, `cwd=${this.cwd}`);
|
|
206
|
+
return [];
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
import { spawn } from "node:child_process";
|
|
8
8
|
import * as fs from "node:fs";
|
|
9
9
|
import * as path from "node:path";
|
|
10
|
+
import { WINDOWS_ESSENTIAL_ENV_VARS } from "../utils/env-allowlist.ts";
|
|
10
11
|
import { resolveShellForScript } from "../utils/resolve-shell.ts";
|
|
11
12
|
import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
|
|
12
13
|
import { DENIED_METRIC_NAMES } from "./metric-parser.ts";
|
|
@@ -171,7 +172,7 @@ export async function runIterationHook(
|
|
|
171
172
|
const { command, args } = resolveShellForScript(resolvedScript);
|
|
172
173
|
const child = spawn(command, args, {
|
|
173
174
|
cwd: payload.cwd,
|
|
174
|
-
env: { ...sanitizeEnvSecrets(process.env, { allowList: ["PATH", "HOME", "USER",
|
|
175
|
+
env: { ...sanitizeEnvSecrets(process.env, { allowList: ["PATH", "HOME", "USER", ...WINDOWS_ESSENTIAL_ENV_VARS, "TMPDIR", "LANG", "LC_ALL", "PI_CREW_*"] }), PI_CREW_HOOK: "1" },
|
|
175
176
|
stdio: ["pipe", "pipe", "pipe"],
|
|
176
177
|
});
|
|
177
178
|
|
package/src/runtime/pi-args.ts
CHANGED
|
@@ -269,8 +269,16 @@ export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerA
|
|
|
269
269
|
const explicitTools = policy.tools;
|
|
270
270
|
const excludeTools = policy.excludeTools;
|
|
271
271
|
|
|
272
|
-
|
|
273
|
-
|
|
272
|
+
// §0c C6: agent.disableTools (Pi `--no-tools`) fully disables all tools. Used by
|
|
273
|
+
// capability-locked agents (e.g. the goal-judge) that must have NO agency.
|
|
274
|
+
// MUST come before any --tools/--exclude-tools so it wins (Pi applies last-wins).
|
|
275
|
+
// An empty `tools:[]` is INSUFFICIENT because the length-check below skips it.
|
|
276
|
+
if (input.agent.disableTools === true) {
|
|
277
|
+
args.push("--no-tools");
|
|
278
|
+
} else {
|
|
279
|
+
if (explicitTools?.length) args.push("--tools", explicitTools.join(","));
|
|
280
|
+
if (excludeTools?.length) args.push("--exclude-tools", excludeTools.join(","));
|
|
281
|
+
}
|
|
274
282
|
// Always add --no-extensions before --extension to prevent user extensions from being auto-loaded.
|
|
275
283
|
// User extensions in ~/.pi/agent/extensions/ may fail due to missing dependencies.
|
|
276
284
|
args.push("--no-extensions");
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { execFileSync } from "node:child_process";
|
|
8
8
|
import * as path from "node:path";
|
|
9
|
+
import { WINDOWS_ESSENTIAL_ENV_VARS } from "../utils/env-allowlist.ts";
|
|
9
10
|
import { resolveShellForScript } from "../utils/resolve-shell.ts";
|
|
10
11
|
import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
|
|
11
12
|
|
|
@@ -94,7 +95,7 @@ export async function runPostCheck(config: PostCheckConfig, cwd: string): Promis
|
|
|
94
95
|
timeout: timeoutMs,
|
|
95
96
|
encoding: "utf-8",
|
|
96
97
|
maxBuffer: 10 * 1024 * 1024, // 10 MB
|
|
97
|
-
env: { ...sanitizeEnvSecrets(process.env, { allowList: ["PATH", "HOME", "USER",
|
|
98
|
+
env: { ...sanitizeEnvSecrets(process.env, { allowList: ["PATH", "HOME", "USER", ...WINDOWS_ESSENTIAL_ENV_VARS, "TMPDIR", "LANG", "LC_ALL", "PI_CREW_*"] }), PI_CREW_POST_CHECK: "1" },
|
|
98
99
|
});
|
|
99
100
|
|
|
100
101
|
const durationMs = Date.now() - startTime;
|
|
@@ -41,6 +41,14 @@ export function extractStructuredResult(raw: string, _schema?: Record<string, un
|
|
|
41
41
|
return { structured: true, data: markerResult, rawText: raw };
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
+
// Strategy 4: Scan for the first JSON object/array anywhere in text.
|
|
45
|
+
// Models often add prose preamble/epilogue ("Here's my review:", "Let me analyze...")
|
|
46
|
+
// around the JSON. This catches JSON embedded in sentences, lists, or prose.
|
|
47
|
+
const scannedResult = tryScanJson(trimmed);
|
|
48
|
+
if (scannedResult !== undefined) {
|
|
49
|
+
return { structured: true, data: scannedResult, rawText: raw };
|
|
50
|
+
}
|
|
51
|
+
|
|
44
52
|
return { structured: false, data: null, rawText: raw };
|
|
45
53
|
}
|
|
46
54
|
|
|
@@ -63,6 +71,30 @@ function tryFencedJson(text: string): unknown | undefined {
|
|
|
63
71
|
}
|
|
64
72
|
}
|
|
65
73
|
|
|
74
|
+
/**
|
|
75
|
+
* Strategy 4: Scan for the first balanced JSON object/array anywhere in text.
|
|
76
|
+
* Robust against prose preamble/epilogue that models add around JSON output.
|
|
77
|
+
* Returns the first valid JSON value found, or undefined.
|
|
78
|
+
*/
|
|
79
|
+
function tryScanJson(text: string): unknown | undefined {
|
|
80
|
+
// Find the first '{' or '[' in the text.
|
|
81
|
+
for (let i = 0; i < text.length; i++) {
|
|
82
|
+
const ch = text[i];
|
|
83
|
+
if (ch !== "{" && ch !== "[") continue;
|
|
84
|
+
const rest = text.slice(i);
|
|
85
|
+
const end = findMatchingBracket(rest);
|
|
86
|
+
if (end <= 0) continue;
|
|
87
|
+
const candidate = rest.slice(0, end);
|
|
88
|
+
try {
|
|
89
|
+
return JSON.parse(candidate);
|
|
90
|
+
} catch {
|
|
91
|
+
// Not valid JSON at this position; keep scanning for the next '{'/'['.
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return undefined;
|
|
96
|
+
}
|
|
97
|
+
|
|
66
98
|
function tryMarkerExtraction(text: string): unknown | undefined {
|
|
67
99
|
// Try to find JSON after common markers
|
|
68
100
|
const markers = ["RESULT:", "OUTPUT:", "ANSWER:", "### Result\n", "## Output\n"];
|
|
@@ -408,9 +408,19 @@ export function hasPendingMutatingTaskAtBoundary(tasks: TeamTaskState[]): boolea
|
|
|
408
408
|
function dagReadyTaskIds(tasks: TeamTaskState[], completedIds: Set<string>): string[] | null {
|
|
409
409
|
const hasExplicitDeps = tasks.some((t) => t.dependsOn.length > 0);
|
|
410
410
|
if (!hasExplicitDeps) return null;
|
|
411
|
+
// FIX (goal-wrap runtime test): task.dependsOn stores STEP IDs (e.g. "execute"), not
|
|
412
|
+
// task IDs (e.g. "02_execute"). The DAG scheduler compares deps against completedIds
|
|
413
|
+
// (which are task IDs), so step-ID deps would never match → dependent tasks stuck blocked
|
|
414
|
+
// forever. Map step IDs -> task IDs first (mirror dependencySatisfied in
|
|
415
|
+
// task-graph-scheduler.ts which handles this via stepToTaskId). buildDagExecutionPlan +
|
|
416
|
+
// getDagReadyTasks then work on consistent task IDs.
|
|
417
|
+
const stepToTaskId = new Map<string, string>();
|
|
418
|
+
for (const t of tasks) {
|
|
419
|
+
if (t.stepId) stepToTaskId.set(t.stepId, t.id);
|
|
420
|
+
}
|
|
411
421
|
const nodes: TaskNode[] = tasks.map((t) => ({
|
|
412
422
|
id: t.id,
|
|
413
|
-
dependsOn: t.dependsOn,
|
|
423
|
+
dependsOn: t.dependsOn.map((dep) => stepToTaskId.get(dep) ?? dep),
|
|
414
424
|
phase: t.adaptive?.phase ?? t.stepId,
|
|
415
425
|
}));
|
|
416
426
|
const plan = buildDagExecutionPlan(nodes);
|
|
@@ -12,9 +12,77 @@
|
|
|
12
12
|
import { spawn } from "node:child_process";
|
|
13
13
|
import * as fs from "node:fs";
|
|
14
14
|
import * as path from "node:path";
|
|
15
|
+
import { WINDOWS_ESSENTIAL_ENV_VARS } from "../utils/env-allowlist.ts";
|
|
15
16
|
import { writeArtifact } from "../state/artifact-store.ts";
|
|
17
|
+
import { redactSecretString } from "../utils/redaction.ts";
|
|
18
|
+
import { sanitizeEnvSecrets } from "../utils/env-filter.ts";
|
|
16
19
|
import type { VerificationContract, VerificationCommandResult, GreenLevel, ArtifactDescriptor } from "../state/types.ts";
|
|
17
20
|
|
|
21
|
+
/**
|
|
22
|
+
* Phase 1.5 #1 (RFC 13 §6 info-disclosure mitigation): sanitize the env passed
|
|
23
|
+
* to verification commands so worker-induced output cannot leak model-provider
|
|
24
|
+
* secrets. P1f redaction at artifact-write + judge-bound is regex-best-effort
|
|
25
|
+
* against adversarial workers; this kills the leak at the source by never
|
|
26
|
+
* giving the verification process the secret in the first place.
|
|
27
|
+
*
|
|
28
|
+
* Opt-in via `PI_CREW_VERIFICATION_SANITIZE_ENV=1` to avoid breaking existing
|
|
29
|
+
* flows whose tests legitimately need API access. Escape hatch:
|
|
30
|
+
* `PI_CREW_VERIFICATION_PRESERVE_ENV=KEY1,KEY2,...` lets users explicitly opt
|
|
31
|
+
* specific secrets back in (audited via the allowlist validator).
|
|
32
|
+
*/
|
|
33
|
+
const VERIFICATION_ENV_ALLOWLIST: readonly string[] = [
|
|
34
|
+
// Essential non-secret vars only — NO model-provider keys by default.
|
|
35
|
+
"PATH",
|
|
36
|
+
"HOME",
|
|
37
|
+
"USER",
|
|
38
|
+
"SHELL",
|
|
39
|
+
"TERM",
|
|
40
|
+
"LANG",
|
|
41
|
+
"LC_ALL",
|
|
42
|
+
"LC_COLLATE",
|
|
43
|
+
"LC_CTYPE",
|
|
44
|
+
"LC_MESSAGES",
|
|
45
|
+
"LC_MONETARY",
|
|
46
|
+
"LC_NUMERIC",
|
|
47
|
+
"LC_TIME",
|
|
48
|
+
"XDG_CONFIG_HOME",
|
|
49
|
+
"XDG_DATA_HOME",
|
|
50
|
+
"XDG_CACHE_HOME",
|
|
51
|
+
"XDG_RUNTIME_DIR",
|
|
52
|
+
// Windows essentials — see WINDOWS_ESSENTIAL_ENV_VARS (src/utils/env-allowlist.ts).
|
|
53
|
+
...WINDOWS_ESSENTIAL_ENV_VARS,
|
|
54
|
+
"NVM_BIN",
|
|
55
|
+
"NVM_DIR",
|
|
56
|
+
"NVM_INC",
|
|
57
|
+
"NODE_PATH",
|
|
58
|
+
"NODE_DISABLE_COLORS",
|
|
59
|
+
"NODE_EXTRA_CA_CERTS",
|
|
60
|
+
"NPM_CONFIG_REGISTRY",
|
|
61
|
+
"NPM_CONFIG_USERCONFIG",
|
|
62
|
+
"NPM_CONFIG_GLOBALCONFIG",
|
|
63
|
+
];
|
|
64
|
+
|
|
65
|
+
/** Whether env sanitization for verification is enabled (env var opt-in). */
|
|
66
|
+
export function isVerificationEnvSanitizeEnabled(): boolean {
|
|
67
|
+
return process.env.PI_CREW_VERIFICATION_SANITIZE_ENV === "1" || process.env.PI_TEAMS_VERIFICATION_SANITIZE_ENV === "1";
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Build the env dict for a verification command. When sanitization is enabled,
|
|
72
|
+
* strips everything except VERIFICATION_ENV_ALLOWLIST + any explicitly-preserved
|
|
73
|
+
* keys (PI_CREW_VERIFICATION_PRESERVE_ENV=KEY1,KEY2). Always adds FORCE_COLOR=0
|
|
74
|
+
* to keep output plain-text (matches pre-existing behavior).
|
|
75
|
+
*/
|
|
76
|
+
function buildVerificationEnv(): Record<string, string> {
|
|
77
|
+
if (!isVerificationEnvSanitizeEnabled()) {
|
|
78
|
+
return { ...process.env, FORCE_COLOR: "0" };
|
|
79
|
+
}
|
|
80
|
+
const preserveRaw = process.env.PI_CREW_VERIFICATION_PRESERVE_ENV ?? process.env.PI_TEAMS_VERIFICATION_PRESERVE_ENV ?? "";
|
|
81
|
+
const preserve = preserveRaw.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
|
|
82
|
+
const allowList = [...VERIFICATION_ENV_ALLOWLIST, ...preserve];
|
|
83
|
+
return { ...sanitizeEnvSecrets(process.env, { allowList }), FORCE_COLOR: "0" };
|
|
84
|
+
}
|
|
85
|
+
|
|
18
86
|
export interface PhaseGateResult {
|
|
19
87
|
phase: number;
|
|
20
88
|
name: string;
|
|
@@ -116,7 +184,7 @@ async function executeCommand(
|
|
|
116
184
|
const shell = spawn("sh", ["-c", command], {
|
|
117
185
|
cwd,
|
|
118
186
|
timeout: timeoutMs,
|
|
119
|
-
env:
|
|
187
|
+
env: buildVerificationEnv(),
|
|
120
188
|
});
|
|
121
189
|
|
|
122
190
|
shell.stdout?.on("data", (data) => {
|
|
@@ -249,6 +317,11 @@ export async function executeVerificationCommands(
|
|
|
249
317
|
taskId: string,
|
|
250
318
|
artifactsRoot: string,
|
|
251
319
|
signal?: AbortSignal,
|
|
320
|
+
/** Phase 1.5 #2 (RFC 16): when provided, run verification commands in this
|
|
321
|
+
* pristine git-worktree path instead of `cwd`. The caller is responsible
|
|
322
|
+
* for preparing + cleaning up the worktree (see verification-worktree.ts).
|
|
323
|
+
* When undefined, behavior is unchanged (run in `cwd`). */
|
|
324
|
+
worktreeCwd?: string,
|
|
252
325
|
): Promise<VerificationCommandResult[]> {
|
|
253
326
|
if (!contract.commands || contract.commands.length === 0) {
|
|
254
327
|
return [];
|
|
@@ -269,8 +342,17 @@ export async function executeVerificationCommands(
|
|
|
269
342
|
fs.mkdirSync(gatesDir, { recursive: true });
|
|
270
343
|
}
|
|
271
344
|
|
|
345
|
+
// Phase 1.5 #2: run phase gates inside the worktree when provided.
|
|
346
|
+
const execCwd = worktreeCwd ?? cwd;
|
|
347
|
+
|
|
272
348
|
// Run phase gates
|
|
273
|
-
const bundle = await runPhaseGates(gates,
|
|
349
|
+
const bundle = await runPhaseGates(gates, execCwd, signal, (phaseResult) => {
|
|
350
|
+
// P1f: redact secrets from verification output BEFORE persisting to the
|
|
351
|
+
// world-readable artifact file. redactSecretString is best-effort vs
|
|
352
|
+
// adversarial workers (RFC §6 — Med-High residual). writeArtifact ALSO
|
|
353
|
+
// redacts (defense-in-depth); this explicit pass sanitizes the raw output
|
|
354
|
+
// at the source so the in-memory bundle and the summary below are clean.
|
|
355
|
+
const safeOutput = redactSecretString(phaseResult.output || "");
|
|
274
356
|
// Write phase artifact immediately for observability
|
|
275
357
|
const phaseArtifact = writeArtifact(artifactsRoot, {
|
|
276
358
|
kind: "log",
|
|
@@ -284,7 +366,7 @@ export async function executeVerificationCommands(
|
|
|
284
366
|
phaseResult.error ? `Error: ${phaseResult.error}` : "",
|
|
285
367
|
"",
|
|
286
368
|
"## Output",
|
|
287
|
-
|
|
369
|
+
safeOutput || "(no output)",
|
|
288
370
|
].join("\n"),
|
|
289
371
|
producer: taskId,
|
|
290
372
|
});
|
|
@@ -297,11 +379,12 @@ export async function executeVerificationCommands(
|
|
|
297
379
|
});
|
|
298
380
|
});
|
|
299
381
|
|
|
300
|
-
// Write summary artifact
|
|
382
|
+
// Write summary artifact. Redact the whole bundle JSON (it embeds the raw
|
|
383
|
+
// per-phase output strings) BEFORE writeArtifact persists it.
|
|
301
384
|
const summaryArtifact = writeArtifact(artifactsRoot, {
|
|
302
385
|
kind: "metadata",
|
|
303
386
|
relativePath: `verification-gates/${taskId}-summary.json`,
|
|
304
|
-
content: JSON.stringify(bundle, null, 2),
|
|
387
|
+
content: redactSecretString(JSON.stringify(bundle, null, 2)),
|
|
305
388
|
producer: taskId,
|
|
306
389
|
});
|
|
307
390
|
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verification Integrity — manifest bookend snapshot helper (RFC §P1a).
|
|
3
|
+
*
|
|
4
|
+
* Hashes a FIXED set of project-manifest files so the goal loop can detect
|
|
5
|
+
* drift between T_snap (before verification runs) and T_verify_done (after the
|
|
6
|
+
* command exits). This closes the PERSISTENT-edit subcase of workspace
|
|
7
|
+
* tampering (a worker that rewrites package.json / lockfile and leaves it
|
|
8
|
+
* changed). See RFC §P1a / §6 STRIDE for the full threat model.
|
|
9
|
+
*
|
|
10
|
+
* RESIDUALS (documented; closed by Phase 1.5 git-worktree sandbox, NOT here):
|
|
11
|
+
* - Round-trip tamper: a worker can edit a manifest, run the test, then REVERT
|
|
12
|
+
* before T_verify_done so the hash matches T_snap. Content-addressed
|
|
13
|
+
* execution (git-worktree) is required to close this. Not fixable by hashing.
|
|
14
|
+
* - Invoked-script tampering: only the manifest files in MANIFEST_FILES are
|
|
15
|
+
* hashed. A worker that overwrites a script the verification command invokes
|
|
16
|
+
* is NOT caught. Phase 1.5 git-worktree closes this.
|
|
17
|
+
* - node_modules/ and transitive deps are deliberately NOT hashed (size +
|
|
18
|
+
* churn); package-lock.json IS hashed, which transitively pins resolved
|
|
19
|
+
* dependency versions.
|
|
20
|
+
*
|
|
21
|
+
* Pure leaf module: depends only on node: built-ins. Does NOT import
|
|
22
|
+
* goal-loop-runner or goal-evaluator (keeps the P1a helper unit-testable in
|
|
23
|
+
* isolation and avoids pulling the conflict-zone modules into this file).
|
|
24
|
+
*
|
|
25
|
+
* @module verification-integrity
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { createHash } from "node:crypto";
|
|
29
|
+
import * as fs from "node:fs";
|
|
30
|
+
import * as path from "node:path";
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Fixed set of project-manifest files considered by {@link snapshotManifests}.
|
|
34
|
+
* Only files from this set that EXIST in the target directory are hashed.
|
|
35
|
+
* (RFC §P1a: package.json, package-lock.json, pyproject.toml, setup.py,
|
|
36
|
+
* Cargo.toml, Cargo.lock, go.mod, tsconfig.json.)
|
|
37
|
+
*/
|
|
38
|
+
export const MANIFEST_FILES = [
|
|
39
|
+
"package.json",
|
|
40
|
+
"package-lock.json",
|
|
41
|
+
"pyproject.toml",
|
|
42
|
+
"setup.py",
|
|
43
|
+
"Cargo.toml",
|
|
44
|
+
"Cargo.lock",
|
|
45
|
+
"go.mod",
|
|
46
|
+
"tsconfig.json",
|
|
47
|
+
] as const;
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* sha256-hash the manifest files from the fixed set that EXIST in `cwd`.
|
|
51
|
+
*
|
|
52
|
+
* - Missing files are SKIPPED silently (not errors): a Python project has no
|
|
53
|
+
* package.json, a JS project has no Cargo.toml, etc.
|
|
54
|
+
* - Non-regular files (directories, etc.) are skipped.
|
|
55
|
+
* - node_modules is NEVER hashed.
|
|
56
|
+
*
|
|
57
|
+
* @returns A map of relative manifest path -> sha256 hex digest for each
|
|
58
|
+
* present file. Stable key order = MANIFEST_FILES order (insertion order).
|
|
59
|
+
*/
|
|
60
|
+
export function snapshotManifests(cwd: string): Record<string, string> {
|
|
61
|
+
const snapshot: Record<string, string> = {};
|
|
62
|
+
for (const rel of MANIFEST_FILES) {
|
|
63
|
+
const abs = path.join(cwd, rel);
|
|
64
|
+
let stat: fs.Stats;
|
|
65
|
+
try {
|
|
66
|
+
stat = fs.statSync(abs);
|
|
67
|
+
} catch {
|
|
68
|
+
continue; // missing file — skip gracefully
|
|
69
|
+
}
|
|
70
|
+
if (!stat.isFile()) continue; // directory / special — skip
|
|
71
|
+
try {
|
|
72
|
+
const content = fs.readFileSync(abs);
|
|
73
|
+
snapshot[rel] = createHash("sha256").update(content).digest("hex");
|
|
74
|
+
} catch {
|
|
75
|
+
continue; // unreadable (permissions/race) — skip gracefully
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return snapshot;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Compare two snapshots and return the list of DRIFTED file paths.
|
|
83
|
+
*
|
|
84
|
+
* A file is considered drifted if:
|
|
85
|
+
* - its hash differs between `a` and `b`, OR
|
|
86
|
+
* - it is present in only one of the two snapshots (added or removed).
|
|
87
|
+
*
|
|
88
|
+
* @returns Sorted array of relative manifest paths that drifted. Identical
|
|
89
|
+
* snapshots yield `[]`.
|
|
90
|
+
*/
|
|
91
|
+
export function compareSnapshot(
|
|
92
|
+
a: Record<string, string>,
|
|
93
|
+
b: Record<string, string>,
|
|
94
|
+
): string[] {
|
|
95
|
+
const drifted = new Set<string>();
|
|
96
|
+
for (const [key, hash] of Object.entries(a)) {
|
|
97
|
+
const other = b[key];
|
|
98
|
+
if (other === undefined) {
|
|
99
|
+
drifted.add(key); // removed between a -> b
|
|
100
|
+
} else if (other !== hash) {
|
|
101
|
+
drifted.add(key); // content changed
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
for (const key of Object.keys(b)) {
|
|
105
|
+
if (a[key] === undefined) {
|
|
106
|
+
drifted.add(key); // added between a -> b
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return [...drifted].sort();
|
|
110
|
+
}
|