npm - pi-crew - Versions diffs - 0.9.10 → 0.9.12 - Mend

pi-crew 0.9.10 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +90 -0
package/package.json +1 -1
package/src/config/role-tools.ts +39 -6
package/src/extension/crew-shortcuts.ts +29 -2
package/src/extension/registration/commands.ts +61 -34
package/src/runtime/async-runner.ts +70 -74
package/src/runtime/background-runner.ts +13 -2
package/src/runtime/process-status.ts +7 -2
package/src/runtime/role-permission.ts +5 -21
package/src/runtime/task-runner/prompt-builder.ts +1 -0
package/src/state/artifact-store.ts +22 -2
package/src/ui/crew-footer.ts +3 -3
package/src/ui/crew-select-list.ts +1 -1
package/src/ui/dashboard-panes/agents-pane.ts +26 -4
package/src/ui/dashboard-panes/cancellation-pane.ts +23 -0
package/src/ui/keybinding-map.ts +7 -3
package/src/ui/live-conversation-overlay.ts +2 -2
package/src/ui/live-run-sidebar.ts +18 -10
package/src/ui/overlays/help-overlay.ts +166 -0
package/src/ui/run-dashboard.ts +210 -70
package/src/ui/status-colors.ts +45 -0
package/src/ui/widget/index.ts +46 -3
package/src/ui/widget/widget-formatters.ts +22 -7
package/src/ui/widget/widget-renderer.ts +31 -27
package/src/utils/redaction.ts +49 -31
package/src/utils/visual.ts +3 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,95 @@
 # Changelog
+## [v0.9.12] — TUI UI/UX polish (21 findings) (2026-06-27)
+Comprehensive TUI UX review of pi-crew's UI layer (`src/ui/` + `src/utils/visual.ts`). 21 findings (5×P1, 10×P2, 6×P3), all addressed. Full review with evidence-backed file:line citations: `research-findings/pi-crew-uiux-review.md`.
+### Bug fixes
+- **F-3 (P1)** — `src/ui/live-conversation-overlay.ts`: local `pad` used `s.length` (counted ANSI escape bytes) and `content.slice` (UTF-16 units) → border drift on every colored or CJK line. Replaced with `pad`/`truncate` from `utils/visual.ts` (reference impl: `transcript-viewer.ts`).
+- **F-1 / F-2 / V-3 (P1/P2)** — `src/ui/status-colors.ts` (new shared `colorizeStatusGlyphs()`): unified status-glyph colorization covers ⏳ (waiting), ⚠ (needs_attention), and the braille spinner range ⠁–⣿ — the two most attention-demanding states were previously uncolored. Replaces duplicated per-module glyph maps/regexes in `widget-renderer.ts`, `live-run-sidebar.ts`, and `run-dashboard.ts`.
+- **L-1 (P1)** — `src/ui/run-dashboard.ts`: windowed run list with `scrollOffset`; selection can no longer escape the rendered 8-row window. ↓ past row 7 previously hid the highlight and Enter acted on an invisible run. Brute-force verified for 0–30 runs.
+- **L-2 (P1)** — cancellation/failure reason now shown in default detail row (`run-dashboard.ts`) and `live-run-sidebar.ts`. Previously rendered only in `progress-pane` (pane `2`). `cancellation-pane.ts` wired in via `summarizeTerminalReason()` (D-1).
+- **V-1 (P2)** — tabular-aligned numeric metrics across `run-dashboard` footer, `dashboard-panes/agents-pane.ts`, and `widget/widget-formatters.ts` via width-aware `alignMetric` (visibleWidth-based). Eliminates per-tick column jitter on transitions like 9.9s→10.0s and 950→1.0k.
+- **F-5 (P2)** — `src/runtime/process-status.ts`: `ERROR_VISIBILITY_GRACE_MS = 10 * 60_000` (was the 8s `COMPLETED_VISIBILITY_GRACE_MS` shared with completed runs). Failed/cancelled runs now linger 10 min in the crew widget — the run-level header (✗ team/workflow · X/Y agents) is the "one-line trace". Successful completions still vanish in 8s.
+- **K-1 (P2)** — new `src/ui/overlays/help-overlay.ts`: `?` opens the HelpOverlay rendering `BINDINGS[]` grouped by scope. ~16/20 keybindings were previously undiscoverable (no `?` cheatsheet existed). Header hint updated.
+- **F-6 (P2)** — `src/ui/live-run-sidebar.ts`: auto-close countdown moved inside the bordered box (was rendered below the bottom border).
+- **V-2 (P2)** — `src/ui/crew-footer.ts`, `src/ui/crew-select-list.ts`: dropped ASCII `'...'` 3rd arg from `truncate(...)` so they use the default `'…'` (U+2026) consistently with the rest of the UI. Test updated.
+- **L-3 / L-4 / L-5 (P2/P3)** — width-aware `runLabel` keeps the goal visible (the most meaningful field); widget prioritizes running > queued > waiting with `finishedSlots` so finished rows only fill leftover budget and never push a live agent's activity line off-screen; run-list separator unified to ` · `.
+- **F-4 / F-7 (P2/P3)** — stale-snapshot hint in the default dashboard view when manifest reads are flaky; actionable empty/error states (no more "Dashboard error — see logs").
+- **T-1 (P3)** — `src/utils/visual.ts`: ZWJ (`U+200D`) removed from `WIDE_RANGES` (now correctly width-0; was inflating compound-emoji width and over-truncating).
+- **T-2 (P3)** — `src/ui/widget/index.ts`: debounced (~120ms) SIGWINCH + stdout-resize listener busts the render cache and requests a repaint. Guarded against double-registration across widget reinstalls.
+- **D-1 (P3)** — `src/ui/dashboard-panes/cancellation-pane.ts` wired in via `summarizeTerminalReason()` (was dead-imported by `test/unit/cancellation-pane.test.ts`, so kept and given a real consumer instead of deleted).
+### Shortcut collision fix
+- **Extension-load warning**: `alt+d` collided with `tui.editor.deleteWordForward` (verified in pi-tui `TUI_KEYBINDINGS`). Pi resolved in favor of the extension, *stripping* the editor's delete-word-forward binding. Moved dashboard shortcut to **`alt+c`** (mnemonic: **C**rew — verified free against the full built-in `alt+` keymap: occupied letters are `b, d, f, v, y`).
+- **Test guard hardened**: `test/unit/crew-shortcuts.test.ts` collision set now includes the editor keys (`alt+b/d/f/y`, `alt+backspace`, `alt+delete`). The previous set was incomplete (`alt+v, alt+enter, alt+arrows` only), which is why the bug slipped past tests. This class of regression is now caught at test time.
+### Decisions (deviations from the initial review)
+- **K-3 `KEY_RESERVED`**: NOT dead code — consumed by `test/unit/keybinding-map.parity.test.ts:29,185-203` and `test/manual/l2-keybinding-dispatch-smoke.mjs`. Corrected the misleading "dead code" doc instead of deleting (deleting would have broken the parity test).
+- **K-2 `alt+m` / `alt+t`**: blocked — mailbox overlay is run-scoped (requires a runId; reached via the dashboard); `team-status` is a text command (`handleTeamTool({action:"status"})`), not an overlay opener. `alt+d → dashboard` is the only wired shortcut.
+- **F-5 location**: fixed at the correct layer (`process-status.ts` run-level grace) rather than the report's suggested `widget-renderer.ts` agent-row linger; the run-level header line is the right "one-line trace" per the finding.
+### Verification
+- `npx tsc --noEmit` + `npm run typecheck`: 0 errors (incl. strip-types import smoke).
+- Focused UI cluster: **74/74 pass** across 8 suites — `crew-shortcuts` (incl. strengthened collision assertion), `crew-footer` (incl. renamed V-2 test), `keybinding-map parity`, `cancellation-pane`, `process-status ×3`, `agents-pane-cost`.
+- Full suite baseline (before this batch): 5642 / 5639 pass / 0 fail. Two post-change full runs hit `ETIMEDOUT` on `spawnSync` inside integration child-spawn tests (`worktree-run`, `cleanup-full-flow`) under load (0 assertion failures — environmental flake). To be reconfirmed against CI.
+## [v0.9.11] — Per-run lock path for background-runner (parallel-spawn race) (2026-06-27)
+Bug caught by an E2E parallel-spawn test in this session, NOT by unit tests (which cannot spawn multiple real processes). Independent of the F1-F5/redaction batches.
+### Bug fix
+- **Shared `run.lock` killed concurrent background runners** (`src/runtime/background-runner.ts:417`). The bootstrap call passed a fake manifest `{ stateRoot: "", runId, cwd }` to `withRunLockSync` because the real manifest was not loaded yet. `lockPath()` = `path.join(manifest.stateRoot, "run.lock")` = `path.join("", "run.lock")` = `"run.lock"` — a RELATIVE path at cwd, SHARED across every run regardless of runId. When multiple background agents spawned in the same instant (e.g. parallel `Agent` calls), they raced on the single shared lock: one acquired, the rest failed fast ("Run 'run.lock' is locked by another operation") and exited within 3s. The existing "FIX Issue #3" comment claimed to prevent concurrent runners "for the same runId", but the lock path never contained the runId. Fix: compute the real per-run stateRoot via `createRunPaths(cwd, runId).stateRoot` before locking, so each run locks its own `<cwd>/.crew/state/runs/<runId>/run.lock`. Matches `locks-race.test.ts`.
+### Verification
+- `npx tsc --noEmit` EXIT 0
+- 7 lock-related suites pass (locks-race 10, background-runner-console-redirect 4, async-runner 13, api-locks 1, orphan-worker-registry 15, locks-untested 11, team-runner-heartbeat 2)
+- E2E reproduce (decisive): BEFORE the fix, 3 parallel background explorers → 1 pass + 2 fail (background.log: "Failed to acquire lock"). AFTER the fix, same scenario → 3/3 pass, 0 lock errors.
+### Lesson
+Concurrency/lock bugs only reproduce when multiple real processes spawn simultaneously — unit tests mocking a single process can never catch them. E2E parallel-spawn smoke tests are the only way to verify. (Reinforces the v0.9.9 lesson: E2E with real extension load is decisive.)
+## [v0.9.11] — Read-only permission model fixes F1-F5 (2026-06-27)
+Review of the role permission model (question: "do read-only workflows still persist their task output?") confirmed output persistence is runner-driven and correct, but found 5 findings — one the same defect class as the v0.9.10 writer incident (Fix 5), in the opposite direction.
+### Bug fixes
+- **`security-reviewer`/`test-engineer` tool config unreachable (F1, HIGH)** (`src/config/role-tools.ts`). Map keys were `security_reviewer`/`test_engineer` (underscore) while the runtime role strings are hyphenated (`agents/security-reviewer.md` → `security-reviewer`). `getToolConfig` did not normalize, so it returned `{}` and the strictest tool restrictions in the codebase silently never applied. Same defect class as the writer incident, opposite direction (under-enforce vs over-enforce). Tests masked it: they queried only the underscore forms. Fix: quote+hyphen the keys (a bare `security-reviewer:` key parses as subtraction — must be quoted) and normalize in `getToolConfig` (`role.replaceAll("_","-")`); added a regression test that derives role names from the runtime sets and asserts each resolves its intended config.
+- **`critic`/`planner` tool-config gaps (F2)** (`src/config/role-tools.ts`). `critic` had no entry (a custom critic agent had no tool-level read-only enforcement); `planner`'s entry only excluded `ask_question` and did not enforce read-only. Added a `critic` entry and strengthened `planner` to a read-only tool-set.
+- **`planner` kept read-only with deliverable guidance (F3)** (`src/runtime/task-runner/prompt-builder.ts`). `planner` emits deliverables (`output: plan.md`) but moving it to WRITE_ROLES would fire the plan-approval gate BEFORE planning (breaking default/implementation workflows — `team-runner.ts:399` relies on planner being read-only). Fix: keep planner read-only and add a prompt line telling read-only roles their RESULT TEXT is persisted by the runner, so they emit deliverables as text instead of attempting file writes.
+- **`verifier` reclassified read-only → write (F4)** (`src/runtime/role-permission.ts` + `src/config/role-tools.ts`). `verifier`'s task runs tests via bash with redirects/cache writes (`npm test | tee`, `mkdir`, `rm`), all forbidden by the read-only prompt gate — a direct contradiction with `agents/verifier.md`. Moved verifier to WRITE_ROLES; tool-config keeps bash but excludes edit/write so source integrity is preserved. Mirrors `cold-verifier`.
+- **Dead command-enforcement removed (F5)** (`src/runtime/role-permission.ts`). `isReadOnlyCommand`/`checkRolePermission`/`READ_ONLY_COMMANDS` had zero runtime callers (only tests). Real protection lives in the role tool-config + `safe-paths.ts`/`resolveRealContainedPath` (10+ runtime callers). Deleted the dead code.
+### Verification
+- `npx tsc --noEmit` EXIT 0
+- 124 tests pass / 0 fail across 13 suites + 1 integration (role-tools 15, role-permission-cov 23, role-permission 2, role-permission.spawn 3, prompt-builder-cov 15, v0-8-0-tool-policy-unification 10, skill-instructions 16, plan-approval-boundary 7, crew-contracts 6, goal-loop-team-roles 5, t9-cold-verifier 5, completion-guard 7, verification-gates 10, role-tools-integration 3)
+- E2E: `research` workflow 3/3 tasks — explorer+analyst (read-only) persisted findings, writer wrote the deliverable file
+## [v0.9.11] — Secret redaction & env hardening (2026-06-27)
+Independent security review (review team, 3/4 tasks, ~360K tokens) flagged 3 Medium findings in the secret-redaction and env-passthrough surfaces. All verified by live `npx tsx` repro + source trace before fixing.
+### Bug fixes
+- **`redactAuthHeader` leaked credential values (L3/L5)** (`src/utils/redaction.ts`). Two defects: (1) `indexOf` matched only the FIRST `authorization:` occurrence per call, so a second header on a later line leaked verbatim; (2) the word-boundary allow-list excluded `-` and `\t`, so `Proxy-Authorization:` / `X-Authorization:` and tab-indented headers were not recognized. Fix: loop over all occurrences and add `-`/`\t` to a shared `AUTH_HEADER_BOUNDARY_CHARS` set (used by both `redactAuthHeader` and `redactBearerTokens`). Latent weakness caught by repro (NOT by the reviewer's proposed fix): the old code only APPENDED a ` ***` marker without removing the value — `"authorization: Basic abc123"` became `"authorization: Basic abc123 ***"` (credential still visible). The redact branch now blanks the value: `line.substring(authIdx, authIdx+14) + " ***"` → `"authorization: ***"`. Consistent with `redactInlineSecrets`.
+- **`writeArtifact` flat-redaction only (M2)** (`src/state/artifact-store.ts:130`). Applied only `redactSecretString` (flat regex scan), so quoted-JSON secrets (`"api_key":"sk-..."`) and nested keys survived into persisted artifacts (e.g. `startup-evidence.json` holds up to 500 chars of raw child stderr). Fix: structural-then-flat — when content parses as JSON, run `redactSecrets` (recursive) first, then flat `redactSecretString`. Order matters: structural catches quoted keys, flat still catches Bearer/JWT/Auth headers inside JSON string values. Formatting is preserved: the input is re-stringified with the SAME indentation (pretty → indent 2, compact → compact), so pretty-printed artifacts like group-join metadata keep their `"partial": false` whitespace (caught by `test/integration/phase4-runtime.test.ts` regression on CI after the first attempt shipped a compact re-stringify).
+- **Provider API keys leaked into the detached background runner (M1)** (`src/runtime/async-runner.ts:162`). The env allowlist forwarded 14 provider keys (MINIMAX/OPENAI/ANTHROPIC/...) to the background runner, contradicting `child-pi.ts:275` ("API keys are NOT needed — config file"). Keys leaked into V8 fatal-error reports (`--report-on-fatalerror` writes `environmentVariables` unredacted). The inline comment "same as child-pi.ts" was false. Fix: extracted `BACKGROUND_RUNNER_ENV_ALLOWLIST` (exported, unit-testable) and removed the 14 provider keys. Prereq verified: `background-runner.ts` does not read provider keys directly.
+### Verification
+- `npx tsc --noEmit` EXIT 0
+- 21 targeted suites pass (~130 tests): redaction-cov (32), redaction-p1f (18), redaction-transcript-roundtrip (3), child-pi-sec1-redaction (8), artifact-store (4), async-runner (13), env-filter (4), env-filter-cov (9), security-hardening (8), round28-otlp-crlf (4), child-pi-compaction-real (9), + others
+- Live repro: `redactSecretString("Proxy-Authorization: Basic c2VjcmV0")` → `"Proxy-Authorization: ***"` (was: unchanged leak)
 ## [v0.9.10 (continued)] — Round 29 follow-ups: BG2 sweep bug fixes, test optimization, E2E verification (2026-06-26)
 A full-suite verify run (`verify-full2`, 5502 tests, 774 suites) surfaced 4 file-level timeouts and 2 real correctness bugs. This release fixes the 2 real bugs, the underlying cause of 2 of the 4 timeouts (chain-runner + orphan-worker-registry + cleanup-full-flow self-deadlock + HandoffManager interval leak), and adds E2E verification artifacts to prove all fixes hold against the live runtime, not just static analysis.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-crew",
-  "version": "0.9.10",
+  "version": "0.9.12",
   "description": "Pi extension for coordinated AI teams, workflows, worktrees, and async task orchestration",
   "author": "baphuongna",
   "license": "MIT",

package/src/config/role-tools.ts CHANGED Viewed

@@ -22,9 +22,23 @@ export const ROLE_TOOL_CONFIGS: Record<string, RoleToolConfig> = {
 		excludeTools: ["edit", "write", "ask_question"],
 	},
-	// Planner - Planning and documentation
+	// Planner - Read-only planning; emits plans as TEXT (runner persists result).
+	// F2/F3: strengthened to a read-only tool-set matching its READ_ONLY_ROLES
+	// classification. Deliverables are emitted as RESULT TEXT (consumed by
+	// adaptive-plan.ts / runner shared-output), NOT file writes — so the
+	// plan-approval gate boundary (planner = read-only) is preserved. Moving
+	// planner to WRITE_ROLES would fire the gate before planning, breaking the
+	// default/implementation workflows.
 	planner: {
-		excludeTools: ["ask_question"],
+		tools: ["read", "grep", "find", "ls", "glob"],
+		excludeTools: ["edit", "write", "bash", "web", "ask_question"],
+	},
+	// Critic - Read-only plan/design critique (F2: was missing from the map,
+	// so a custom critic agent had no tool-level read-only enforcement).
+	critic: {
+		tools: ["read", "grep", "find", "ls", "glob"],
+		excludeTools: ["edit", "write", "bash", "web"],
 	},
 	// Executor - Full access (default)
@@ -45,13 +59,26 @@ export const ROLE_TOOL_CONFIGS: Record<string, RoleToolConfig> = {
 	},
 	// Security Reviewer - Strict restrictions
-	security_reviewer: {
+	// F1: key is hyphenated to match the runtime role string (agents/
+	// security-reviewer.md → "security-reviewer"). The underscore form never
+	// resolved at runtime (returned {}), silently dropping enforcement.
+	"security-reviewer": {
 		tools: ["read", "grep", "find"],
 		excludeTools: ["edit", "write", "bash", "web", "ask_question"],
 	},
-	// Test Engineer - Can write tests
-	test_engineer: {
+	// Verifier - Runs tests (needs bash) but must NOT edit source (F4: moved
+	// from READ_ONLY_ROLES to WRITE_ROLES — the read-only prompt gate forbids
+	// the test-running redirects / cache writes its task requires, contradicting
+	// agents/verifier.md). Tool-set keeps bash but excludes edit/write so source
+	// integrity is preserved during verification. Mirrors cold-verifier behavior.
+	verifier: {
+		tools: ["read", "grep", "find", "ls", "bash"],
+		excludeTools: ["edit", "write", "web"],
+	},
+	// Test Engineer - Can write tests (F1: hyphenated key)
+	"test-engineer": {
 		tools: ["read", "edit", "write", "bash", "ls"],
 		excludeTools: ["web"],
 	},
@@ -61,7 +88,13 @@ export const ROLE_TOOL_CONFIGS: Record<string, RoleToolConfig> = {
  * Get tool configuration for a specific role.
  */
 export function getToolConfig(role: string): RoleToolConfig {
-	return ROLE_TOOL_CONFIGS[role] ?? {};
+	// F1: normalize hyphen/underscore. Runtime role strings are hyphenated
+	// (agents/security-reviewer.md → "security-reviewer") but map keys were
+	// historically underscored, silently returning {} at runtime — the same
+	// defect class as the v0.9.10 writer incident (opposite direction:
+	// under-enforce instead of over-enforce). Accept both forms.
+	const key = role.includes("_") ? role.replaceAll("_", "-") : role;
+	return ROLE_TOOL_CONFIGS[key] ?? ROLE_TOOL_CONFIGS[role] ?? {};
 }
 /**

package/src/extension/crew-shortcuts.ts CHANGED Viewed

@@ -6,9 +6,25 @@
  * built-in keymap (see analysis of pi-tui core/keybindings defaults):
  *
  *   alt+s → open the pi-crew settings overlay (config + theme picker)
+ *   alt+c → open the pi-crew run dashboard overlay (mnemonic: **C**rew)
  *
- * `alt+<letter>` combos are safe: Pi only binds `alt+v`, `alt+enter`, and the
- * alt+arrow navigation keys. `alt+s` is mnemonic (settings) and free.
+ * OCCUPIED alt+ keys in the built-in keymap (must NOT reuse — verified against
+ * pi-tui TUI_KEYBINDINGS + pi core keybindings.js):
+ *   alt+b (cursor word left)   alt+f (cursor word right)
+ *   alt+d (delete word forward) alt+y (yank pop)
+ *   alt+v (paste)              alt+s (crew settings — this module)
+ *   alt+enter / alt+up/down/left/right / alt+backspace / alt+delete
+ * Free alt+<letter> keys include: a, c, e, g, h, i, j, k, l, m, n, o, p, q,
+ * r, t, u, w, x, z.
+ *
+ * NOTE: an earlier revision used `alt+d` for the dashboard; that collided
+ * with `tui.editor.deleteWordForward` and Pi's conflict detector stripped the
+ * editor binding. `alt+c` is free AND mnemonic.
+ *
+ * NOTE: alt+m (mailbox) and alt+t (status) were considered but are NOT wired
+ * — the mailbox overlay is run-scoped (requires a runId; reached via the
+ * dashboard) and there is no standalone status overlay (status is a text
+ * command). See the K-2 note accompanying openTeamDashboard in commands.ts.
  *
  * Shortcuts are guarded by `hasUI` so they never fire in print/RPC mode, and
  * by the optional `registerShortcut` API so older Pi versions degrade
@@ -39,6 +55,17 @@ const CREW_SHORTCUTS: ReadonlyArray<ShortcutRegistration> = [
 			await openTeamSettingsOverlay(ctx);
 		},
 	},
+	{
+		key: "alt+c",
+		description: "pi-crew: open run dashboard (Crew)",
+		// Lazy-import so the heavy UI module chain (RunDashboard etc.) is only
+		// loaded on first use, not at extension load.
+		handler: async (ctx) => {
+		// LAZY: defer dynamic import of ./registration/commands.ts to its call site.
+			const { openTeamDashboard } = await import("./registration/commands.ts");
+			await openTeamDashboard(ctx);
+		},
+	},
 ];
 /**

package/src/extension/registration/commands.ts CHANGED Viewed

@@ -260,6 +260,66 @@ async function handleHealthDashboardAction(ctx: ExtensionCommandContext, selecti
 let depsRef: RegisterTeamCommandsDeps | undefined;
+/**
+ * Open the pi-crew run dashboard overlay and run its action loop.
+ *
+ * Extracted verbatim from the `team-dashboard` command so it is reusable from
+ * a keyboard shortcut (alt+c, see crew-shortcuts.ts). Takes the base
+ * `ExtensionContext` (the shortcut handler's context) — uses only `hasUI`,
+ * `cwd`, `ui`, and `sessionManager` fields, so both `ExtensionContext` and
+ * `ExtensionCommandContext` satisfy it. Reads run caches via the module-level
+ * `depsRef` (set by `registerTeamCommands`), so it is a no-op if commands
+ * have not been registered yet. `deps` is captured into a local const so the
+ * non-undefined narrowing survives the awaited overlay call.
+ *
+ * The dashboard action helpers (handleMailboxDashboardAction, the viewers,
+ * notifyCommandResult, teamCommandContext/handleTeamTool) are declared for
+ * `ExtensionCommandContext` but only ever read base `ExtensionContext` fields
+ * (verified). The keyboard-shortcut path supplies an `ExtensionContext`, so we
+ * bridge those over-typed helpers with a single cast rather than relaxing
+ * signatures across several modules (some outside this file's scope).
+ */
+export async function openTeamDashboard(ctx: ExtensionContext): Promise<void> {
+	if (!ctx.hasUI) return;
+	const deps = depsRef;
+	if (!deps) return;
+	const cmdCtx = ctx as ExtensionCommandContext;
+	for (;;) {
+		// Extract sessionId for workspace-scoped filtering
+		const sessionId = cmdCtx.sessionManager?.getSessionId?.();
+		const runs = deps.getManifestCache(cmdCtx.cwd).list(50);
+		const uiConfig = loadConfig(cmdCtx.cwd).config.ui;
+		const rightPanel = (uiConfig?.dashboardPlacement ?? DEFAULT_UI.dashboardPlacement) === "right";
+		const width = rightPanel ? Math.min(90, Math.max(40, uiConfig?.dashboardWidth ?? DEFAULT_UI.dashboardWidth)) : "90%";
+		const { RunDashboard } = await ui();
+		const selection = await cmdCtx.ui.custom<RunDashboardSelection | undefined>((tui, theme, _keybindings, done) => new RunDashboard(runs, done, theme, { placement: rightPanel ? "right" : "center", showModel: uiConfig?.showModel, showTokens: uiConfig?.showTokens, showTools: uiConfig?.showTools, snapshotCache: deps.getRunSnapshotCache?.(cmdCtx.cwd), runProvider: () => deps.getManifestCache(cmdCtx.cwd).list(50), registry: deps.getMetricRegistry?.(), workspaceId: sessionId, requestRender: () => requestRenderTarget(tui) }), { overlay: true, overlayOptions: rightPanel ? { width, minWidth: 40, maxHeight: "100%", anchor: "top-right", offsetX: 0, offsetY: 0, margin: { top: 0, right: 0, bottom: 0, left: 0 } } : { width, maxHeight: "90%", anchor: "center", margin: 2 } });
+		if (!selection) return;
+		if (selection.action === "reload") continue;
+		if (selection.action === "notifications-dismiss") {
+			deps.dismissNotifications?.();
+			cmdCtx.ui.notify("pi-crew notifications dismissed.", "info");
+			continue;
+		}
+		if (selection.action === "mailbox-detail") {
+			await handleMailboxDashboardAction(cmdCtx, selection.runId);
+			deps.getRunSnapshotCache?.(cmdCtx.cwd).invalidate(selection.runId);
+			continue;
+		}
+		if (selection.action === "health-recovery" || selection.action === "health-kill-stale" || selection.action === "health-diagnostic-export") {
+			await handleHealthDashboardAction(cmdCtx, selection);
+			deps.getRunSnapshotCache?.(cmdCtx.cwd).invalidate(selection.runId);
+			continue;
+		}
+		if (selection.action === "agent-transcript" && await openTranscriptViewer(cmdCtx, selection.runId)) continue;
+		if (selection.action === "agent-live" && await openLiveConversation(cmdCtx, selection.runId)) continue;
+		if (selection.action === "agent-live") { await notifyCommandResult(cmdCtx, commandText({ content: [{ type: "text", text: "No live agent found for this run." }] })); continue; }
+		const result = selection.action === "api" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-manifest" } }, teamCommandContext(cmdCtx)) : selection.action === "agents" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "agent-dashboard" } }, teamCommandContext(cmdCtx)) : selection.action === "mailbox" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-mailbox" } }, teamCommandContext(cmdCtx)) : selection.action === "agent-events" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-events", limit: 50 } }, teamCommandContext(cmdCtx)) : selection.action === "agent-output" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-output", maxBytes: 32_000 } }, teamCommandContext(cmdCtx)) : selection.action === "agent-transcript" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-transcript" } }, teamCommandContext(cmdCtx)) : // eslint-disable-next-line @typescript-eslint/no-explicit-any
+			await handleTeamTool({ action: selection.action as any, runId: selection.runId }, teamCommandContext(cmdCtx));
+		await notifyCommandResult(cmdCtx, commandText(result));
+		return;
+	}
+}
 export function registerTeamCommands(pi: ExtensionAPI, deps: RegisterTeamCommandsDeps): void {
 	depsRef = deps;
 	pi.registerCommand("teams", {
@@ -497,40 +557,7 @@ export function registerTeamCommands(pi: ExtensionAPI, deps: RegisterTeamCommand
 	} });
 	pi.registerCommand("team-dashboard", { description: "Open a pi-crew run dashboard overlay", handler: async (_args: string, ctx: ExtensionCommandContext) => {
-		for (;;) {
-			// Extract sessionId for workspace-scoped filtering
-			const sessionId = ctx.sessionManager?.getSessionId?.();
-			const runs = deps.getManifestCache(ctx.cwd).list(50);
-			const uiConfig = loadConfig(ctx.cwd).config.ui;
-			const rightPanel = (uiConfig?.dashboardPlacement ?? DEFAULT_UI.dashboardPlacement) === "right";
-			const width = rightPanel ? Math.min(90, Math.max(40, uiConfig?.dashboardWidth ?? DEFAULT_UI.dashboardWidth)) : "90%";
-			const { RunDashboard } = await ui();
-			const selection = await ctx.ui.custom<RunDashboardSelection | undefined>((tui, theme, _keybindings, done) => new RunDashboard(runs, done, theme, { placement: rightPanel ? "right" : "center", showModel: uiConfig?.showModel, showTokens: uiConfig?.showTokens, showTools: uiConfig?.showTools, snapshotCache: deps.getRunSnapshotCache?.(ctx.cwd), runProvider: () => deps.getManifestCache(ctx.cwd).list(50), registry: deps.getMetricRegistry?.(), workspaceId: sessionId, requestRender: () => requestRenderTarget(tui) }), { overlay: true, overlayOptions: rightPanel ? { width, minWidth: 40, maxHeight: "100%", anchor: "top-right", offsetX: 0, offsetY: 0, margin: { top: 0, right: 0, bottom: 0, left: 0 } } : { width, maxHeight: "90%", anchor: "center", margin: 2 } });
-			if (!selection) return;
-			if (selection.action === "reload") continue;
-			if (selection.action === "notifications-dismiss") {
-				deps.dismissNotifications?.();
-				ctx.ui.notify("pi-crew notifications dismissed.", "info");
-				continue;
-			}
-			if (selection.action === "mailbox-detail") {
-				await handleMailboxDashboardAction(ctx, selection.runId);
-				deps.getRunSnapshotCache?.(ctx.cwd).invalidate(selection.runId);
-				continue;
-			}
-			if (selection.action === "health-recovery" || selection.action === "health-kill-stale" || selection.action === "health-diagnostic-export") {
-				await handleHealthDashboardAction(ctx, selection);
-				deps.getRunSnapshotCache?.(ctx.cwd).invalidate(selection.runId);
-				continue;
-			}
-			if (selection.action === "agent-transcript" && await openTranscriptViewer(ctx, selection.runId)) continue;
-			if (selection.action === "agent-live" && await openLiveConversation(ctx, selection.runId)) continue;
-			if (selection.action === "agent-live") { await notifyCommandResult(ctx, commandText({ content: [{ type: "text", text: "No live agent found for this run." }] })); continue; }
-			const result = selection.action === "api" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-manifest" } }, teamCommandContext(ctx)) : selection.action === "agents" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "agent-dashboard" } }, teamCommandContext(ctx)) : selection.action === "mailbox" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-mailbox" } }, teamCommandContext(ctx)) : selection.action === "agent-events" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-events", limit: 50 } }, teamCommandContext(ctx)) : selection.action === "agent-output" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-output", maxBytes: 32_000 } }, teamCommandContext(ctx)) : selection.action === "agent-transcript" ? await handleTeamTool({ action: "api", runId: selection.runId, config: { operation: "read-agent-transcript" } }, teamCommandContext(ctx)) : // eslint-disable-next-line @typescript-eslint/no-explicit-any
-				await handleTeamTool({ action: selection.action as any, runId: selection.runId }, teamCommandContext(ctx));
-			await notifyCommandResult(ctx, commandText(result));
-			return;
-		}
+		await openTeamDashboard(ctx);
 	} });
 	pi.registerCommand("team-mascot", { description: "Show an animated mascot splash", handler: async (args: string, ctx: ExtensionCommandContext) => {

package/src/runtime/async-runner.ts CHANGED Viewed

@@ -150,6 +150,75 @@ export interface SpawnBackgroundTeamRunResult {
 	logPath: string;
 }
+/**
+ * Env vars explicitly forwarded to the detached background runner.
+ *
+ * Provider API keys (MINIMAX/OPENAI/ANTHROPIC/...) are INTENTIONALLY OMITTED
+ * (security review M1): the background runner only spawns child Pi workers,
+ * which read keys from the Pi config file (not env). Passing keys via env
+ * leaks them into V8 fatal-error reports (--report-on-fatalerror writes the
+ * `environmentVariables` section unredacted). Matches child-pi.ts policy.
+ * Exported so the invariant is unit-testable (test/unit/async-runner.test.ts).
+ */
+export const BACKGROUND_RUNNER_ENV_ALLOWLIST: string[] = [
+	// Essential non-secret vars
+	"PATH",
+	"HOME",
+	"USER",
+	"SHELL",
+	"TERM",
+	"LANG",
+	"LC_ALL",
+	"LC_COLLATE",
+	"LC_CTYPE",
+	"LC_MESSAGES",
+	"LC_MONETARY",
+	"LC_NUMERIC",
+	"LC_TIME",
+	"XDG_CONFIG_HOME",
+	"XDG_DATA_HOME",
+	"XDG_CACHE_HOME",
+	"XDG_RUNTIME_DIR",
+	// Windows essentials — see WINDOWS_ESSENTIAL_ENV_VARS (src/utils/env-allowlist.ts).
+	...WINDOWS_ESSENTIAL_ENV_VARS,
+	"NVM_BIN",
+	"NVM_DIR",
+	"NVM_INC",
+	"NODE_PATH",
+	"NODE_DISABLE_COLORS",
+	"NODE_EXTRA_CA_CERTS",
+	"NPM_CONFIG_REGISTRY",
+	"NPM_CONFIG_USERCONFIG",
+	"NPM_CONFIG_GLOBALCONFIG",
+	// PI_CREW_PARENT_PID is needed for parent-guard (liveness check).
+	"PI_CREW_DEPTH",
+	"PI_CREW_MAX_DEPTH",
+	"PI_CREW_INHERIT_PROJECT_CONTEXT",
+	"PI_CREW_INHERIT_SKILLS",
+	"PI_CREW_PARENT_PID",
+	"PI_TEAMS_DEPTH",
+	"PI_TEAMS_MAX_DEPTH",
+	"PI_TEAMS_INHERIT_PROJECT_CONTEXT",
+	"PI_TEAMS_INHERIT_SKILLS",
+	"PI_TEAMS_PI_BIN",
+	"PI_TEAMS_MOCK_CHILD_PI",
+	"PI_CREW_ALLOW_MOCK",
+	// Phase 1.5: worker-thread atomic writer opt-in (RFC 15).
+	"PI_CREW_WORKER_ATOMIC_WRITER",
+	"PI_TEAMS_WORKER_ATOMIC_WRITER",
+	// Phase 1.5 #1: verification env sanitization opt-in (RFC 13 §6).
+	"PI_CREW_VERIFICATION_SANITIZE_ENV",
+	"PI_TEAMS_VERIFICATION_SANITIZE_ENV",
+	"PI_CREW_VERIFICATION_PRESERVE_ENV",
+	"PI_TEAMS_VERIFICATION_PRESERVE_ENV",
+	// Phase 1.5 #2: verification git-worktree sandbox opt-in (RFC 16).
+	"PI_CREW_VERIFICATION_WORKTREE",
+	"PI_TEAMS_VERIFICATION_WORKTREE",
+	// Phase 1.5 #3: V8 diagnostic report on fatal error (RFC 17 — investigation).
+	"PI_CREW_BG_REPORT_ON_FATAL",
+	"PI_TEAMS_BG_REPORT_ON_FATAL",
+];
 export async function spawnBackgroundTeamRun(manifest: TeamRunManifest): Promise<SpawnBackgroundTeamRunResult> {
 	const runnerPath = path.join(path.dirname(fileURLToPath(import.meta.url)), "background-runner.ts");
 	const logPath = path.join(manifest.stateRoot, "background.log");
@@ -159,80 +228,7 @@ export async function spawnBackgroundTeamRun(manifest: TeamRunManifest): Promise
 	// to prevent leaking all env vars (including secrets) to detached background runner.
 	// Previously, destructuring only removed PI_CREW_PARENT_PID but kept everything else.
 	const filteredEnv = sanitizeEnvSecrets(process.env, {
-		allowList: [
-			// Model provider API keys (same as child-pi.ts)
-			"MINIMAX_API_KEY",
-			"MINIMAX_GROUP_ID",
-			"OPENAI_API_KEY",
-			"OPENAI_ORG_ID",
-			"ANTHROPIC_API_KEY",
-			"GOOGLE_API_KEY",
-			"GOOGLE_GENERATIVE_LANGUAGE_API_KEY",
-			"AZURE_OPENAI_API_KEY",
-			"AZURE_OPENAI_ENDPOINT",
-			"AWS_ACCESS_KEY_ID",
-			"AWS_SECRET_ACCESS_KEY",
-			"AWS_REGION",
-			"ZEU_API_KEY",
-			"ZERODEV_API_KEY",
-			// Essential non-secret vars
-			"PATH",
-			"HOME",
-			"USER",
-			"SHELL",
-			"TERM",
-			"LANG",
-			"LC_ALL",
-			"LC_COLLATE",
-			"LC_CTYPE",
-			"LC_MESSAGES",
-			"LC_MONETARY",
-			"LC_NUMERIC",
-			"LC_TIME",
-			"XDG_CONFIG_HOME",
-			"XDG_DATA_HOME",
-			"XDG_CACHE_HOME",
-			"XDG_RUNTIME_DIR",
-			// Windows essentials — see WINDOWS_ESSENTIAL_ENV_VARS (src/utils/env-allowlist.ts).
-			...WINDOWS_ESSENTIAL_ENV_VARS,
-			"NVM_BIN",
-			"NVM_DIR",
-			"NVM_INC",
-			"NODE_PATH",
-			"NODE_DISABLE_COLORS",
-			"NODE_EXTRA_CA_CERTS",
-			"NPM_CONFIG_REGISTRY",
-			"NPM_CONFIG_USERCONFIG",
-			"NPM_CONFIG_GLOBALCONFIG",
-			// FIX: explicit list matches child-pi.ts to prevent regression.
-			// PI_CREW_PARENT_PID is needed for parent-guard (liveness check).
-			"PI_CREW_DEPTH",
-			"PI_CREW_MAX_DEPTH",
-			"PI_CREW_INHERIT_PROJECT_CONTEXT",
-			"PI_CREW_INHERIT_SKILLS",
-			"PI_CREW_PARENT_PID",
-			"PI_TEAMS_DEPTH",
-			"PI_TEAMS_MAX_DEPTH",
-			"PI_TEAMS_INHERIT_PROJECT_CONTEXT",
-			"PI_TEAMS_INHERIT_SKILLS",
-			"PI_TEAMS_PI_BIN",
-			"PI_TEAMS_MOCK_CHILD_PI",
-			"PI_CREW_ALLOW_MOCK",
-			// Phase 1.5: worker-thread atomic writer opt-in (RFC 15).
-			"PI_CREW_WORKER_ATOMIC_WRITER",
-			"PI_TEAMS_WORKER_ATOMIC_WRITER",
-			// Phase 1.5 #1: verification env sanitization opt-in (RFC 13 §6).
-			"PI_CREW_VERIFICATION_SANITIZE_ENV",
-			"PI_TEAMS_VERIFICATION_SANITIZE_ENV",
-			"PI_CREW_VERIFICATION_PRESERVE_ENV",
-			"PI_TEAMS_VERIFICATION_PRESERVE_ENV",
-			// Phase 1.5 #2: verification git-worktree sandbox opt-in (RFC 16).
-			"PI_CREW_VERIFICATION_WORKTREE",
-			"PI_TEAMS_VERIFICATION_WORKTREE",
-			// Phase 1.5 #3: V8 diagnostic report on fatal error (RFC 17 — investigation).
-			"PI_CREW_BG_REPORT_ON_FATAL",
-			"PI_TEAMS_BG_REPORT_ON_FATAL",
-		],
+		allowList: BACKGROUND_RUNNER_ENV_ALLOWLIST,
 	});
 	// FIX: removed delete workarounds — with explicit allowlist, these vars
 	// are no longer auto-leaked. Matches child-pi.ts.

package/src/runtime/background-runner.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import {
 	withRunLockSync,
 } from "../state/locks.ts";
 import {
+	createRunPaths,
 	loadRunManifestById,
 	saveRunManifest,
 	updateRunStatus,
@@ -411,11 +412,21 @@ async function main(): Promise<void> {
 		);
 	// FIX Issue #3: Wrap in withRunLockSync to prevent concurrent background-runners
 	// for the same runId from reading stale manifest state. If lock cannot be
-	// acquired within 5s, fail immediately rather than proceeding with stale data.
+	// be acquired within 5s, fail immediately rather than proceeding with stale data.
+	//
+	// BUGFIX (caught by E2E parallel-spawn, 2026-06-27): the lock manifest must
+	// carry the REAL per-run stateRoot, NOT an empty string. lockPath() derives
+	// `<stateRoot>/run.lock`, so `stateRoot: ""` collapses every concurrent
+	// background-runner (different runIds, same spawn instant) onto a SINGLE
+	// shared `run.lock` at cwd — 1 acquires, the rest fail-fast and die. Compute
+	// the per-run stateRoot from (cwd, runId) via createRunPaths (same helper
+	// resolveRunStateRoot uses internally), so each run locks its own
+	// `<cwd>/.crew/state/runs/<runId>/run.lock`. Matches locks-race.test.ts.
+	const bootstrapStateRoot = createRunPaths(cwd, runId).stateRoot;
 	let loaded: { manifest: TeamRunManifest; tasks: TeamTaskState[] } | undefined;
 	try {
 		loaded = withRunLockSync(
-			{ stateRoot: "", runId, cwd } as TeamRunManifest,
+			{ stateRoot: bootstrapStateRoot, runId, cwd } as TeamRunManifest,
 			() => loadRunManifestById(cwd, runId),
 			{ staleMs: 30_000 },
 		);

package/src/runtime/process-status.ts CHANGED Viewed

@@ -16,7 +16,11 @@ export interface ProcessLiveness {
  */
 const ORPHANED_ACTIVE_RUN_MS = 2 * 60 * 1000;
 /** How long a completed run stays visible in the widget after completion. */
-const COMPLETED_VISIBILITY_GRACE_MS = 8000;
+const COMPLETED_VISIBILITY_GRACE_MS = 8_000;
+/** Errors (failed/cancelled) linger far longer so a failed run leaves a visible
+ * trace in the crew widget for ~10 min (F-5). Successful completions vanish
+ * quickly to keep the widget quiet. */
+const ERROR_VISIBILITY_GRACE_MS = 10 * 60 * 1000;
 /** Maximum age (ms) for an active run before it's considered stale.
  * After this time, PID-only liveness is unreliable due to PID recycling. */
 const STALE_ACTIVE_RUN_MS = 30 * 60 * 1000;
@@ -120,12 +124,13 @@ export function isDisplayActiveRun(run: TeamRunManifest, agents: CrewAgentRecord
 	}
 	// Grace period: show completed runs for a few seconds so users see the result.
 	if (run.status === "completed" || run.status === "failed" || run.status === "cancelled") {
+		const grace = run.status === "completed" ? COMPLETED_VISIBILITY_GRACE_MS : ERROR_VISIBILITY_GRACE_MS;
 		const lastAgentActivity = agents.reduce<number>((max, agent) => {
 			const ts = agent.completedAt ?? agent.startedAt;
 			const parsed = ts ? new Date(ts).getTime() : 0;
 			return Number.isFinite(parsed) && parsed > max ? parsed : max;
 		}, new Date(run.updatedAt).getTime());
-		if (Number.isFinite(lastAgentActivity) && now - lastAgentActivity < COMPLETED_VISIBILITY_GRACE_MS) return true;
+		if (Number.isFinite(lastAgentActivity) && now - lastAgentActivity < grace) return true;
 		return false;
 	}
 	if (!isActiveRunStatus(run.status)) return false;

package/src/runtime/role-permission.ts CHANGED Viewed

@@ -1,11 +1,10 @@
-import { isSensitivePath } from "./sensitive-paths.ts";
 export type RolePermissionMode = "read_only" | "workspace_write" | "danger_full_access" | "explicit_confirm";
-const READ_ONLY_ROLES = new Set(["explorer", "reviewer", "security-reviewer", "verifier", "analyst", "critic", "planner"]);
-const WRITE_ROLES = new Set(["executor", "test-engineer", "writer"]);
-const READ_ONLY_COMMANDS = new Set(["cat", "head", "tail", "less", "more", "wc", "ls", "find", "grep", "rg", "awk", "sed", "echo", "printf", "which", "where", "whoami", "pwd", "env", "printenv", "date", "df", "du", "uname", "file", "stat", "diff", "sort", "uniq", "tr", "cut", "paste", "test", "true", "false", "type", "readlink", "realpath", "basename", "dirname", "sha256sum", "md5sum", "xxd", "hexdump", "od", "strings", "tree", "jq", "git", "gh"]);
+// Read-only roles: cannot mutate files/source. `verifier` is NOT here — it runs
+// tests (bash + cache writes) so it is a WRITE role (F4). `planner` stays
+// read-only to preserve the plan-approval gate boundary (F3).
+const READ_ONLY_ROLES = new Set(["explorer", "reviewer", "security-reviewer", "analyst", "critic", "planner"]);
+const WRITE_ROLES = new Set(["executor", "test-engineer", "writer", "verifier"]);
 export interface PermissionCheckResult {
 	allowed: boolean;
 	mode: RolePermissionMode;
@@ -18,21 +17,6 @@ export function permissionForRole(role: string): RolePermissionMode {
 	return "workspace_write";
 }
-export function isReadOnlyCommand(command: string): boolean {
-	const first = command.trim().split(/\s+/)[0]?.split(/[\\/]/).pop() ?? "";
-	return READ_ONLY_COMMANDS.has(first) && !/\s(-i|--in-place)\b|\s>{1,2}\s|\brm\b|\bmv\b|\bcp\b|\b(?:npm|pnpm|yarn|bun)\s+(install|add|ci|remove)\b|\bgit\s+(commit|push|merge|rebase|reset|checkout|clean)\b/.test(command);
-}
-export function checkRolePermission(role: string, command: string, filePath?: string): PermissionCheckResult {
-	const mode = permissionForRole(role);
-	// Also block access to known sensitive paths even for read-only commands
-	if (filePath && isSensitivePath(filePath)) {
-		return { allowed: false, mode, reason: `Path '${filePath}' is sensitive (credentials, SSH keys, etc.) — access denied for all roles.` };
-	}
-	if (mode === "read_only" && !isReadOnlyCommand(command)) return { allowed: false, mode, reason: `Role '${role}' is read-only and command may modify state.` };
-	return { allowed: true, mode };
-}
 export function currentCrewRole(env: NodeJS.ProcessEnv = process.env): string | undefined {
 	return env.PI_CREW_ROLE?.trim() || env.PI_TEAMS_ROLE?.trim() || undefined;
 }

package/src/runtime/task-runner/prompt-builder.ts CHANGED Viewed

@@ -30,6 +30,7 @@ function readOnlyRoleInstructions(role: string): string {
 		"- Do not use shell redirects, heredocs, in-place edits, package installs, git commit/merge/rebase/reset/checkout, or other state-mutating commands.",
 		"- If implementation changes are needed, report exact recommendations instead of applying them.",
 		"- Prefer read/grep/find/listing tools and read-only git inspection commands.",
+		"- Your final RESULT TEXT is persisted automatically by the runner (as a result artifact and, if the step declares `output:`, to a shared file). To deliver a plan, report, or findings, EMIT THEM AS TEXT in your final result — do NOT try to write a file yourself.",
 	].join("\n");
 }