npm - pi-crew - Versions diffs - 0.7.5 → 0.7.7 - Mend

pi-crew 0.7.5 → 0.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

package/CHANGELOG.md +71 -0
package/README.md +11 -11
package/docs/commands-reference.md +14 -10
package/docs/troubleshooting.md +131 -0
package/docs/usage.md +9 -4
package/package.json +1 -1
package/src/config/config.ts +11 -4
package/src/extension/action-suggestions.ts +71 -0
package/src/extension/context-status-injection.ts +32 -1
package/src/extension/register.ts +71 -65
package/src/extension/team-tool/api.ts +3 -2
package/src/extension/team-tool/cancel.ts +5 -4
package/src/extension/team-tool/explain.ts +2 -1
package/src/extension/team-tool/failure-patterns.ts +124 -0
package/src/extension/team-tool/inspect.ts +10 -6
package/src/extension/team-tool/lifecycle-actions.ts +5 -4
package/src/extension/team-tool/respond.ts +4 -3
package/src/extension/team-tool/run-not-found.ts +54 -0
package/src/extension/team-tool/run.ts +26 -4
package/src/extension/team-tool/status.ts +58 -4
package/src/extension/team-tool.ts +5 -3
package/src/runtime/async-runner.ts +7 -0
package/src/runtime/background-runner.ts +7 -1
package/src/runtime/chain-parser.ts +13 -5
package/src/runtime/checkpoint.ts +13 -1
package/src/runtime/child-pi.ts +9 -1
package/src/runtime/crash-recovery.ts +21 -1
package/src/runtime/live-session-runtime.ts +15 -1
package/src/runtime/parent-guard.ts +2 -2
package/src/runtime/pi-spawn.ts +66 -0
package/src/runtime/stale-reconciler.ts +38 -3
package/src/runtime/task-runner.ts +10 -1
package/src/runtime/team-runner.ts +19 -2
package/src/runtime/verification-gates.ts +21 -1
package/src/schema/team-tool-schema.ts +9 -0
package/src/state/blob-store.ts +12 -10
package/src/state/event-log-rotation.ts +114 -93
package/src/state/event-log.ts +79 -20
package/src/state/health-store.ts +6 -1
package/src/state/locks.ts +66 -16
package/src/state/state-store.ts +14 -1
package/src/ui/card-colors.ts +7 -3
package/src/ui/dashboard-panes/agents-pane.ts +15 -2
package/src/ui/live-duration.ts +58 -0
package/src/ui/tool-render.ts +7 -11
package/src/ui/tool-renderers/index.ts +6 -3
package/src/ui/widget/widget-formatters.ts +2 -13
package/src/utils/fs-watch.ts +11 -60
package/src/utils/run-watcher-registry.ts +164 -0
package/src/workflows/discover-workflows.ts +2 -1
package/src/workflows/workflow-config.ts +5 -0
package/src/runtime/dynamic-script-runner.ts +0 -497
package/src/runtime/sandbox.ts +0 -335

package/src/extension/register.ts CHANGED Viewed

@@ -82,7 +82,8 @@ import {
 import { RenderScheduler } from "../ui/render-scheduler.ts";
 import { runEventBus } from "../ui/run-event-bus.ts";
 import { createRunSnapshotCache } from "../ui/run-snapshot-cache.ts";
-import { closeWatcher, watchCrewState } from "../utils/fs-watch.ts";
+import { closeWatcher } from "../utils/fs-watch.ts";
+import { RunWatcherRegistry } from "../utils/run-watcher-registry.ts";
 import { logInternalError } from "../utils/internal-error.ts";
 import {
 	clearProjectRootCache,
@@ -725,8 +726,13 @@ export function registerPiTeams(pi: ExtensionAPI): void {
 	// Linux), file changes (manifest/tasks/events/agents) trigger an
 	// immediate cache invalidate via renderScheduler.schedule. Falls back to
 	// poll-only behavior on systems where fs.watch errors.
-	let crewWatcher: import("node:fs").FSWatcher | undefined;
-	let userCrewWatcher: import("node:fs").FSWatcher | undefined;
+	// pts/2 hang fix (2026-06-16): the previous RECURSIVE fs.watch(<state>, {recursive:true})
+	// exploded to O(total run history) inotify watches on Linux (109→339 observed) and
+	// caused a permanent busy-loop. Replaced with bounded per-active-run watchers via
+	// RunWatcherRegistry (root watcher on runs/ for new-run detection + one non-recursive
+	// watcher per active run, reconciled each preload tick in buildFrame).
+	let crewRunWatchers: RunWatcherRegistry | undefined;
+	let userCrewWatchers: RunWatcherRegistry | undefined;
 	// Separate map for foreground team-run AbortControllers (distinct from subagent controllers).
 	// P0 fix: stopSessionBoundSubagents must NOT abort foreground team runs on session switch.
 	// Foreground team runs run in the same process as the session; they naturally clean up
@@ -1116,10 +1122,10 @@ export function registerPiTeams(pi: ExtensionAPI): void {
 			clearTimeout(preloadTimer);
 			preloadTimer = undefined;
 		}
-		closeWatcher(crewWatcher);
-		crewWatcher = undefined;
-		closeWatcher(userCrewWatcher);
-		userCrewWatcher = undefined;
+		crewRunWatchers?.closeAll();
+		crewRunWatchers = undefined;
+		userCrewWatchers?.closeAll();
+		userCrewWatchers = undefined;
 		stopSessionBoundSubagents();
 		// P0 fix: also abort foreground team runs on session shutdown (not on session switch).
 		// This is the only place where foreground team run controllers should be aborted.
@@ -1590,6 +1596,25 @@ export function registerPiTeams(pi: ExtensionAPI): void {
 			lastFrameSnapshotCache = getRunSnapshotCache(currentCtx.cwd);
 			const manifests = lastFrameManifestCache.list(20);
 			lastPreloadedManifests = manifests;
+			// pts/2 hang fix: reconcile per-run watchers against the ACTIVE set only.
+			// This bounds inotify cost to O(active runs) — completed runs stop being
+			// watched as soon as they leave running/queued/planning status, instead of
+			// the recursive watcher watching the entire run history forever.
+			{
+				const onRunChange = (runId: string): void => {
+					if (cleanedUp || sessionGeneration !== ownerGeneration) return;
+					getRunSnapshotCache(currentCtx?.cwd ?? process.cwd()).invalidate(runId);
+					renderScheduler?.schedule({ runId });
+				};
+				const onWatchErr = (error: unknown): void => {
+					logInternalError("register.runWatcher.change", error);
+				};
+				const active = manifests
+					.filter((r) => r.status === "running" || r.status === "queued" || r.status === "planning")
+					.map((r) => ({ runId: r.runId, runDir: r.stateRoot }));
+				crewRunWatchers?.reconcile(active, onRunChange, onWatchErr);
+				userCrewWatchers?.reconcile(active, onRunChange, onWatchErr);
+			}
 			const runIds = manifests.map((r) => r.runId);
 			await lastFrameSnapshotCache.preloadAllStale(runIds);
 			return true;
@@ -1815,72 +1840,53 @@ export function registerPiTeams(pi: ExtensionAPI): void {
 		renderSchedulerUnsubscribers.push(unsubscribeRunEvents);
 		// Start async preload loop — refreshes snapshot cache in background
 		startPreloadLoop(fallbackMs, effectiveRefreshMs);
-		// 1.3: native FS watcher on `<crewRoot>/state`. Triggers an immediate
-		// renderScheduler.schedule({runId}) when files inside any run change so
-		// the snapshot cache invalidates well before the 1s preload tick. Falls
-		// back silently to poll-only behavior on systems where recursive
-		// fs.watch is not supported.
+		// 1.3: BOUNDED run watcher (pts/2 hang fix 2026-06-16). Previously this was
+		// a RECURSIVE fs.watch(<state>, {recursive:true}) which on Linux expands to
+		// ONE inotify watch PER SUBDIR — with many historical runs under
+		// .crew/state/runs/ this ballooned to hundreds of watches (109→339 observed)
+		// and the event volume caused a permanent busy-loop (71% CPU, 400KB/s read).
+		// Now: a single non-recursive watcher on the runs/ ROOT (to detect new run
+		// dirs appearing — crew.run.created is never emitted) plus per-active-run
+		// watchers reconciled each preload tick in buildFrame. Total inotify cost is
+		// O(active runs), not O(total history). Falls back to poll-only (the preload
+		// loop already polls every effectiveRefreshMs) on systems where fs.watch
+		// errors or the runs dir is absent.
+		const crewRunWatcherOnChange = (runId: string): void => {
+			if (cleanedUp || sessionGeneration !== ownerGeneration) return;
+			getRunSnapshotCache(currentCtx?.cwd ?? process.cwd()).invalidate(runId);
+			renderScheduler?.schedule({ runId });
+		};
+		const crewRunWatcherOnError = (error: unknown): void => {
+			logInternalError("register.crewRunWatchers.error", error);
+		};
 		try {
-			closeWatcher(crewWatcher);
-			crewWatcher = undefined;
-			const stateDir = path.join(projectCrewRoot(ctx.cwd), "state");
-			const watcher = watchCrewState(
-				stateDir,
-				(runId) => {
-					if (cleanedUp || sessionGeneration !== ownerGeneration)
-						return;
-					// Invalidate snapshot cache so the next renderTick reads fresh state from disk.
-					// Without this, renderTick re-renders from stale lastPreloadedManifests and
-					// shows ghost "running" entries for runs that already completed on disk.
-					const sc = getRunSnapshotCache(
-						currentCtx?.cwd ?? process.cwd(),
-					);
-					sc.invalidate(runId);
-					renderScheduler?.schedule({ runId });
-				},
-				(error) => {
-					logInternalError("register.crewWatcher.error", error);
-					closeWatcher(crewWatcher);
-					crewWatcher = undefined;
-				},
-			);
-			if (watcher) crewWatcher = watcher;
+			crewRunWatchers?.closeAll();
+			crewRunWatchers = undefined;
+			const crewRunsDir = path.join(projectCrewRoot(ctx.cwd), "state", "runs");
+			if (fs.existsSync(crewRunsDir)) {
+				crewRunWatchers = new RunWatcherRegistry();
+				crewRunWatchers.setRootWatcher(crewRunsDir, crewRunWatcherOnChange, crewRunWatcherOnError);
+			}
 		} catch (error) {
-			logInternalError("register.crewWatcher.start", error);
+			logInternalError("register.crewRunWatchers.start", error);
 		}
-		// Also watch user-level state dir — fast-fix and other user-scoped runs
-		// write manifests there. Without this watcher, runs completing in user-level
+		// Also watch user-level runs dir — fast-fix and other user-scoped runs
+		// write manifests there. Without this, runs completing in user-level
 		// state never trigger cache invalidation, causing ghost "running" entries.
 		try {
-			closeWatcher(userCrewWatcher);
-			userCrewWatcher = undefined;
-			const userStateDir = path.join(userCrewRoot(), "state");
-			if (fs.existsSync(userStateDir)) {
-				const userWatcher = watchCrewState(
-					userStateDir,
-					(runId) => {
-						if (cleanedUp || sessionGeneration !== ownerGeneration)
-							return;
-						const sc = getRunSnapshotCache(
-							currentCtx?.cwd ?? process.cwd(),
-						);
-						sc.invalidate(runId);
-						renderScheduler?.schedule({ runId });
-					},
-					(error) => {
-						logInternalError(
-							"register.userCrewWatcher.error",
-							error,
-						);
-						closeWatcher(userCrewWatcher);
-						userCrewWatcher = undefined;
-					},
-				);
-				if (userWatcher) userCrewWatcher = userWatcher;
+			userCrewWatchers?.closeAll();
+			userCrewWatchers = undefined;
+			const userRunsDir = path.join(userCrewRoot(), "state", "runs");
+			if (fs.existsSync(userRunsDir)) {
+				userCrewWatchers = new RunWatcherRegistry();
+				userCrewWatchers.setRootWatcher(userRunsDir, crewRunWatcherOnChange, crewRunWatcherOnError);
 			}
 		} catch (error) {
-			logInternalError("register.userCrewWatcher.start", error);
+			logInternalError("register.userCrewWatchers.start", error);
 		}
+		// Kick an immediate preload so the first buildFrame reconciles per-run
+		// watchers for any runs that are already active on session start.
+		backgroundPreload();
 	});
 	pi.on("session_before_switch", () => {
 		sessionGeneration++;

package/src/extension/team-tool/api.ts CHANGED Viewed

@@ -24,6 +24,7 @@ import { resolveRealContainedPath } from "../../utils/safe-paths.ts";
 import type { PiTeamsToolResult } from "../tool-result.ts";
 import { locateRunCwd } from "../team-tool.ts";
 import { configRecord, result, type TeamContext } from "./context.ts";
+import { RUN_NOT_FOUND_HINT } from "./run-not-found.ts";
 export function globMatch(value: string, pattern: string): boolean {
 	// Prevent ReDoS: reject excessively long patterns
@@ -91,9 +92,9 @@ export async function handleApi(params: TeamToolParamsValue, ctx: TeamContext):
 	}
 	if (!params.runId) return result("API requires runId.", { action: "api", status: "error" }, true);
 	const runCwd = locateRunCwd(params.runId, ctx.cwd);
-	if (!runCwd) return result(`Run '${params.runId}' not found.`, { action: "api", status: "error" }, true);
+	if (!runCwd) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "api", status: "error" }, true);
 	const loaded = loadRunManifestById(runCwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "api", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "api", status: "error" }, true);
 	if (operation === "read-manifest") {
 		return result(JSON.stringify(loaded.manifest, null, 2), { action: "api", status: "ok", runId: loaded.manifest.runId, artifactsRoot: loaded.manifest.artifactsRoot });
 	}

package/src/extension/team-tool/cancel.ts CHANGED Viewed

@@ -12,6 +12,7 @@ import { executeHook, appendHookEvent } from "../../hooks/registry.ts";
 import type { PiTeamsToolResult } from "../tool-result.ts";
 import { locateRunCwd } from "../team-tool.ts";
 import { result, type TeamContext } from "./context.ts";
+import { RUN_NOT_FOUND_HINT } from "./run-not-found.ts";
 import { enforceDestructiveIntent, intentFromConfig } from "./intent-policy.ts";
 import { invalidateSnapshot, type CacheControlDeps } from "./cache-control.ts";
@@ -80,9 +81,9 @@ function cancelReasonFromParams(params: TeamToolParamsValue): CancellationReason
 export async function handleRetry(params: TeamToolParamsValue, ctx: TeamContext, deps?: CacheControlDeps): Promise<PiTeamsToolResult> {
 	if (!params.runId) return result("Retry requires runId.", { action: "retry", status: "error" }, true);
 	const runCwd = locateRunCwd(params.runId, ctx.cwd);
-	if (!runCwd) return result(`Run '${params.runId}' not found.`, { action: "retry", status: "error" }, true);
+	if (!runCwd) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "retry", status: "error" }, true);
 	const loaded = loadRunManifestById(runCwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "retry", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "retry", status: "error" }, true);
 	// Pre-lock ownership check: reject foreign-owned runs unless force is set
 	const foreignRun = typeof loaded.manifest.ownerSessionId === "string" && loaded.manifest.ownerSessionId !== ctx.sessionId;
@@ -145,9 +146,9 @@ export async function handleCancel(params: TeamToolParamsValue, ctx: TeamContext
 	if (intentError) return intentError;
 	if (!params.runId) return result("Cancel requires runId.", { action: "cancel", status: "error" }, true);
 	const runCwd = locateRunCwd(params.runId, ctx.cwd);
-	if (!runCwd) return result(`Run '${params.runId}' not found.`, { action: "cancel", status: "error" }, true);
+	if (!runCwd) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "cancel", status: "error" }, true);
 	const loaded = loadRunManifestById(runCwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "cancel", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "cancel", status: "error" }, true);
 	// Pre-lock ownership check: reject foreign-owned runs unless force is set
 	const preCheck = abortOwned(loaded.manifest.runId, undefined, ctx, params.force);

package/src/extension/team-tool/explain.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import * as fs from "node:fs";
+import { RUN_NOT_FOUND_HINT } from "./run-not-found.ts";
 import * as path from "node:path";
 import { loadRunManifestById } from "../../state/state-store.ts";
 import type { TeamRunManifest, TeamTaskState } from "../../state/types.ts";
@@ -211,7 +212,7 @@ export function handleExplain(params: {
   const loaded = loadRunManifestById(cwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
   if (!loaded) {
-    return result(`Run '${params.runId}' not found.`, { action: "explain", status: "error" }, true);
+    return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "explain", status: "error" }, true);
   }
   const { manifest, tasks } = loaded;

package/src/extension/team-tool/failure-patterns.ts ADDED Viewed

@@ -0,0 +1,124 @@
+/**
+ * failure-patterns.ts — Group failed tasks by error similarity (Round 17 BS-4).
+ *
+ * Before this, a run with 8 failed tasks surfaced 8 separate raw error
+ * strings. The user had to mentally group them ("5 of these say 'model
+ * routing fallback failed'"). This module detects common failure patterns
+ * so `summary` can say "5 of 8 failures share root cause: X".
+ *
+ * Grouping strategy (cheap, deterministic, no ML):
+ *   1. Normalize: lowercase, collapse whitespace, strip task ids / run ids /
+ *      absolute paths / numbers → a canonical "signature".
+ *   2. Bucket by signature. Buckets with >1 member are "common patterns".
+ *   3. Sort by frequency desc.
+ *
+ * Conservative: only buckets with >=2 members count as a pattern (a single
+ * failure is just itself). Returns [] when there are no repeated signatures.
+ */
+export interface FailurePattern {
+	/** Canonical error signature used for grouping. */
+	signature: string;
+	/** A representative original error (the shortest variant) for display. */
+	representative: string;
+	/** Task ids that hit this pattern. */
+	taskIds: string[];
+	/** Count of failures in this bucket (== taskIds.length). */
+	count: number;
+}
+export interface FailurePatternInput {
+	id: string;
+	status: string;
+	error?: string;
+}
+/**
+ * Normalize an error string into a grouping signature.
+ * Exported for unit testing.
+ */
+export function normalizeErrorSignature(error: string | undefined): string {
+	if (!error) return "(no error detail)";
+	let s = error.toLowerCase();
+	// Strip run ids (team_YYYYMMDDHHMMSS_xxxxxxxxxxxxxxxx)
+	s = s.replace(/team_\d{8,}_[a-z0-9]{12,}/g, "<run>");
+	// Strip task ids (01_explore, adaptive-03-executor, etc.)
+	s = s.replace(/\b(adaptive-)?\d{2,}[a-z0-9_-]+/g, "<task>");
+	// Strip absolute paths
+	s = s.replace(/\/(?:home|users|tmp|var|opt|root)[^\s'"]*/g, "<path>");
+	// Strip numbers (line numbers, counts, pids, ms durations)
+	s = s.replace(/\b\d+\b/g, "N");
+	// Collapse whitespace
+	s = s.replace(/\s+/g, " ").trim();
+	return s || "(no error detail)";
+}
+/**
+ * Group failed tasks by error-pattern similarity. Only groups with >=2
+ * members are returned (singletons are not "patterns"). Sorted by count desc.
+ *
+ * @param tasks  the run's tasks (any with status 'failed'/'cancelled' are
+ *               considered failures for aggregation purposes).
+ */
+export function aggregateFailurePatterns(tasks: FailurePatternInput[]): FailurePattern[] {
+	const failed = tasks.filter(
+		(t) => t.status === "failed" || t.status === "cancelled",
+	);
+	if (failed.length === 0) return [];
+	const buckets = new Map<string, FailurePattern>();
+	for (const t of failed) {
+		const signature = normalizeErrorSignature(t.error);
+		const existing = buckets.get(signature);
+		if (existing) {
+			existing.taskIds.push(t.id);
+			existing.count += 1;
+			// Keep the shortest non-empty variant as representative (most readable).
+			if (t.error && (!existing.representative || t.error.length < existing.representative.length)) {
+				existing.representative = t.error;
+			}
+		} else {
+			buckets.set(signature, {
+				signature,
+				representative: t.error ?? "(no error detail)",
+				taskIds: [t.id],
+				count: 1,
+			});
+		}
+	}
+	// Only patterns with >=2 members (repeated root causes).
+	return [...buckets.values()]
+		.filter((b) => b.count >= 2)
+		.sort((a, b) => b.count - a.count);
+}
+/**
+ * Render failure patterns as human-readable lines for the `summary` action.
+ * Returns [] when there are no repeated patterns (so the caller can omit the
+ * section entirely).
+ *
+ * Example output:
+ *   Common failure patterns (3 of 5 failures share 2 root causes):
+ *   - [×3] model routing fallback failed: all 2 candidates exhausted
+ *       tasks: 02_exec, 03_exec, 04_exec
+ *   - [×2] EPERM: operation not permitted, rename
+ *       tasks: 05_exec, 06_exec
+ */
+export function formatFailurePatterns(tasks: FailurePatternInput[]): string[] {
+	const patterns = aggregateFailurePatterns(tasks);
+	if (patterns.length === 0) return [];
+	const failedCount = tasks.filter(
+		(t) => t.status === "failed" || t.status === "cancelled",
+	).length;
+	const groupedCount = patterns.reduce((sum, p) => sum + p.count, 0);
+	const lines = [
+		`Common failure patterns (${groupedCount} of ${failedCount} failures share ${patterns.length} root cause${patterns.length === 1 ? "" : "s"}):`,
+	];
+	for (const p of patterns) {
+		const rep = p.representative.length > 100 ? `${p.representative.slice(0, 99)}…` : p.representative;
+		lines.push(`- [×${p.count}] ${rep}`);
+		const shown = p.taskIds.slice(0, 6);
+		const more = p.taskIds.length > 6 ? `, +${p.taskIds.length - 6} more` : "";
+		lines.push(`    tasks: ${shown.join(", ")}${more}`);
+	}
+	return lines;
+}

package/src/extension/team-tool/inspect.ts CHANGED Viewed

@@ -5,13 +5,15 @@ import { aggregateUsage, formatUsage, formatCostReport } from "../../state/usage
 import type { PiTeamsToolResult } from "../tool-result.ts";
 import { locateRunCwd } from "../team-tool.ts";
 import { result, type TeamContext } from "./context.ts";
+import { RUN_NOT_FOUND_HINT } from "./run-not-found.ts";
+import { formatFailurePatterns } from "./failure-patterns.ts";
 export function handleEvents(params: TeamToolParamsValue, ctx: TeamContext): PiTeamsToolResult {
 	if (!params.runId) return result("Events requires runId.", { action: "events", status: "error" }, true);
 	const runCwd = locateRunCwd(params.runId, ctx.cwd);
-	if (!runCwd) return result(`Run '${params.runId}' not found.`, { action: "events", status: "error" }, true);
+	if (!runCwd) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "events", status: "error" }, true);
 	const loaded = loadRunManifestById(runCwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "events", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "events", status: "error" }, true);
 	const events = readEvents(loaded.manifest.eventsPath);
 	const lines = [`Events for ${loaded.manifest.runId}:`, ...(events.length ? events.map((event) => `${event.time} ${event.type}${event.taskId ? ` ${event.taskId}` : ""}${event.message ? `: ${event.message}` : ""}${event.data ? ` ${JSON.stringify(event.data)}` : ""}`) : ["(none)"])];
 	return result(lines.join("\n"), { action: "events", status: "ok", runId: loaded.manifest.runId, artifactsRoot: loaded.manifest.artifactsRoot });
@@ -20,9 +22,9 @@ export function handleEvents(params: TeamToolParamsValue, ctx: TeamContext): PiT
 export function handleArtifacts(params: TeamToolParamsValue, ctx: TeamContext): PiTeamsToolResult {
 	if (!params.runId) return result("Artifacts requires runId.", { action: "artifacts", status: "error" }, true);
 	const runCwd = locateRunCwd(params.runId, ctx.cwd);
-	if (!runCwd) return result(`Run '${params.runId}' not found.`, { action: "artifacts", status: "error" }, true);
+	if (!runCwd) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "artifacts", status: "error" }, true);
 	const loaded = loadRunManifestById(runCwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "artifacts", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "artifacts", status: "error" }, true);
 	const lines = [`Artifacts for ${loaded.manifest.runId}:`, ...(loaded.manifest.artifacts.length ? loaded.manifest.artifacts.map((artifact) => `- ${artifact.kind}: ${artifact.path}${artifact.sizeBytes !== undefined ? ` (${artifact.sizeBytes} bytes)` : ""}${artifact.contentHash ? ` sha256=${artifact.contentHash.slice(0, 12)}` : ""}`) : ["- (none)"])];
 	return result(lines.join("\n"), { action: "artifacts", status: "ok", runId: loaded.manifest.runId, artifactsRoot: loaded.manifest.artifactsRoot });
 }
@@ -30,10 +32,11 @@ export function handleArtifacts(params: TeamToolParamsValue, ctx: TeamContext):
 export function handleSummary(params: TeamToolParamsValue, ctx: TeamContext): PiTeamsToolResult {
 	if (!params.runId) return result("Summary requires runId.", { action: "summary", status: "error" }, true);
 	const runCwd = locateRunCwd(params.runId, ctx.cwd);
-	if (!runCwd) return result(`Run '${params.runId}' not found.`, { action: "summary", status: "error" }, true);
+	if (!runCwd) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "summary", status: "error" }, true);
 	const loaded = loadRunManifestById(runCwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "summary", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "summary", status: "error" }, true);
 	const usage = aggregateUsage(loaded.tasks);
+	const failurePatternLines = formatFailurePatterns(loaded.tasks);
 	const lines = [
 		`Summary for ${loaded.manifest.runId}`,
 		`Status: ${loaded.manifest.status}`,
@@ -43,6 +46,7 @@ export function handleSummary(params: TeamToolParamsValue, ctx: TeamContext): Pi
 		`Usage: ${formatUsage(usage)}`,
 		"",
 		formatCostReport(loaded.tasks),
+		...(failurePatternLines.length > 0 ? ["", ...failurePatternLines] : []),
 		"",
 		"Tasks:",
 		...loaded.tasks.map((task) => `- ${task.id}: ${task.status} (${task.role} -> ${task.agent})${task.error ? ` - ${task.error}` : ""}`),

package/src/extension/team-tool/lifecycle-actions.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import { importRunBundle } from "../run-import.ts";
 import { pruneFinishedRuns } from "../run-maintenance.ts";
 import type { PiTeamsToolResult } from "../tool-result.ts";
 import { configRecord, result, type TeamContext } from "./context.ts";
+import { RUN_NOT_FOUND_HINT } from "./run-not-found.ts";
 import { enforceDestructiveIntent, intentFromConfig } from "./intent-policy.ts";
 import { executeHook, appendHookEvent } from "../../hooks/registry.ts";
 import { resolveRealContainedPath } from "../../utils/safe-paths.ts";
@@ -18,7 +19,7 @@ import * as path from "node:path";
 export function handleWorktrees(params: TeamToolParamsValue, ctx: TeamContext): PiTeamsToolResult {
 	if (!params.runId) return result("Worktrees requires runId.", { action: "worktrees", status: "error" }, true);
 	const loaded = loadRunManifestById(ctx.cwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "worktrees", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "worktrees", status: "error" }, true);
 	const withWorktrees = loaded.tasks.filter((task) => task.worktree);
 	const lines = [`Worktrees for ${loaded.manifest.runId}:`, ...(withWorktrees.length ? withWorktrees.map((task) => `- ${task.id}: ${task.worktree!.path} branch=${task.worktree!.branch} reused=${task.worktree!.reused ? "true" : "false"}`) : ["- (none)"])];
 	return result(lines.join("\n"), { action: "worktrees", status: "ok", runId: loaded.manifest.runId, artifactsRoot: loaded.manifest.artifactsRoot });
@@ -47,7 +48,7 @@ export function handleImport(params: TeamToolParamsValue, ctx: TeamContext): PiT
 export async function handleExport(params: TeamToolParamsValue, ctx: TeamContext): Promise<PiTeamsToolResult> {
 	if (!params.runId) return result("Export requires runId.", { action: "export", status: "error" }, true);
 	const loaded = loadRunManifestById(ctx.cwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "export", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "export", status: "error" }, true);
 	// SECURITY: Ownership check — only the owner session may export a run.
 	// Foreign-run export requires confirm: true (explicit user intent).
@@ -96,7 +97,7 @@ export async function handleForget(params: TeamToolParamsValue, ctx: TeamContext
 	if (!params.runId) return result("Forget requires runId.", { action: "forget", status: "error" }, true);
 	if (!params.confirm) return result("forget requires confirm: true.", { action: "forget", status: "error" }, true);
 	const loaded = loadRunManifestById(ctx.cwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "forget", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "forget", status: "error" }, true);
 	// Ownership check — prevent cross-session deletion unless force is set
 	const foreignRun = typeof loaded.manifest.ownerSessionId === "string" && loaded.manifest.ownerSessionId !== ctx.sessionId;
@@ -126,7 +127,7 @@ export async function handleCleanup(params: TeamToolParamsValue, ctx: TeamContex
 	if (intentError) return intentError;
 	if (!params.runId) return result("Cleanup requires runId.", { action: "cleanup", status: "error" }, true);
 	const loaded = loadRunManifestById(ctx.cwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "cleanup", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "cleanup", status: "error" }, true);
 	// Ownership check — prevent cross-session worktree cleanup unless force is set
 	const foreignRun = typeof loaded.manifest.ownerSessionId === "string" && loaded.manifest.ownerSessionId !== ctx.sessionId;

package/src/extension/team-tool/respond.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { logInternalError } from "../../utils/internal-error.ts";
 import type { PiTeamsToolResult } from "../tool-result.ts";
 import { locateRunCwd } from "../team-tool.ts";
 import { result, type TeamContext } from "./context.ts";
+import { RUN_NOT_FOUND_HINT } from "./run-not-found.ts";
 /**
  * Handle `respond` action: send a message to a waiting (interactive) task.
@@ -19,13 +20,13 @@ export function handleRespond(params: TeamToolParamsValue, ctx: TeamContext): Pi
 	if (!params.message && !params.taskId) return result("Respond requires taskId and/or message.", { action: "respond", status: "error" }, true);
 	const runCwd = locateRunCwd(params.runId, ctx.cwd);
-	if (!runCwd) return result(`Run '${params.runId}' not found.`, { action: "respond", status: "error" }, true);
+	if (!runCwd) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "respond", status: "error" }, true);
 	const loaded = loadRunManifestById(runCwd, params.runId); // NOTE: no withRunLock - best-effort only; concurrent writes may cause inconsistency
-	if (!loaded) return result(`Run '${params.runId}' not found.`, { action: "respond", status: "error" }, true);
+	if (!loaded) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "respond", status: "error" }, true);
 	return withRunLockSync(loaded.manifest, () => {
 		const fresh = loadRunManifestById(loaded.manifest.cwd, params.runId!); // NOTE: inside withRunLockSync - consistent read
-		if (!fresh) return result(`Run '${params.runId}' not found.`, { action: "respond", status: "error" }, true);
+		if (!fresh) return result(`Run '${params.runId}' not found.${RUN_NOT_FOUND_HINT}`, { action: "respond", status: "error" }, true);
 		const foreignRun = typeof fresh.manifest.ownerSessionId === "string" && fresh.manifest.ownerSessionId !== ctx.sessionId;
 		if (foreignRun && !params.force) return result(`Run ${fresh.manifest.runId} belongs to another session. Use force: true to override.`, { action: "respond", status: "error", runId: fresh.manifest.runId }, true);

package/src/extension/team-tool/run-not-found.ts ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * run-not-found.ts — Centralized "Run not found" error helper (DX: F2).
+ *
+ * Round 16 DX audit found that a stale/typo'd runId hits a blank
+ * "Run '<id>' not found." wall in 8+ handlers (status, resume, steer, export,
+ * forget, cleanup, invalidate, worktrees, events, artifacts). The run IDs are
+ * long (`team_20260615173318_b9c8fe49a74e0760`), so typos/truncation are
+ * near-certain for new users — yet `team list` (which shows recent runs) is
+ * never suggested.
+ *
+ * This module centralizes the message + recovery hint so every handler stays
+ * consistent and the hint never drifts.
+ */
+import { result, type TeamContext } from "./context.ts";
+import type { TeamToolDetails } from "../team-tool-types.ts";
+/** Recovery hint appended to every "Run not found" message. */
+export const RUN_NOT_FOUND_HINT =
+	"\n\nTip: run action='list' to see recent runs and their IDs.";
+/**
+ * Build the standard "Run not found" error result with a recovery hint.
+ *
+ * @param runId  the (missing/typo'd) run id the caller passed
+ * @param action the action that was attempted (for the details.action field)
+ */
+export function runNotFound(runId: string, action: string): ReturnType<typeof result> {
+	return result(
+		`Run '${runId}' not found.${RUN_NOT_FOUND_HINT}`,
+		{ action, status: "error" } satisfies TeamToolDetails,
+		true,
+	);
+}
+/**
+ * Helper: resolve a runId to its cwd, returning a runNotFound() result when
+ * missing. Reduces the boilerplate `locateRunCwd → if (!runCwd) return ...`
+ * duplicated across handlers.
+ */
+export function resolveRunOrNotFound(
+	runId: string,
+	action: string,
+	cwd: string,
+	locate: (runId: string, cwd: string) => string | undefined,
+): { kind: "found"; runCwd: string } | { kind: "notfound"; result: ReturnType<typeof result> } {
+	const runCwd = locate(runId, cwd);
+	if (!runCwd) return { kind: "notfound", result: runNotFound(runId, action) };
+	return { kind: "found", runCwd };
+}
+// Re-export TeamContext so callers importing this helper don't need a second
+// import line — keeps the diff in each handler to a single import swap.
+export type { TeamContext };

package/src/extension/team-tool/run.ts CHANGED Viewed

@@ -184,13 +184,17 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
 		// connecting PipelineRunner to the actual team execution system
 		const stageInfo = pipelineWorkflow.stages.map((s) => `- ${s.name} (${s.team})`).join("\n");
 		return result([
-			`Pipeline workflow: ${workflow.name}`,
+			`Pipeline workflow '${workflow.name}' is not yet wired into the team execution system.`,
 			`Goal: ${goal}`,
-			`Stages (${pipelineWorkflow.stages.length}):`,
+			`Defined stages (${pipelineWorkflow.stages.length}):`,
 			stageInfo,
 			"",
-			"Pipeline execution is available via the PipelineRunner API.",
-			"Full CLI integration requires connecting to the team execution system.",
+			"To actually run work right now, use a supported workflow instead:",
+			"  - action='run' workflow='default'  (explore → plan → execute → verify)",
+			"  - action='run' workflow='implementation'  (adaptive, parallel specialists)",
+			"  - action='run' workflow='research'  (explore → analyze → write)",
+			"",
+			"Run action='list' resource='workflow' to see all available workflows.",
 		].join("\n"), { action: "run", status: "ok" }, false);
 	}
@@ -219,6 +223,24 @@ export async function handleRun(params: TeamToolParamsValue, ctx: TeamContext):
 	registerActiveRun(updatedManifest);
 	const loadedConfig = loadConfig(resolvedCtx.cwd);
+	// DX (Round 16 F4): surface config errors/warnings instead of silently
+	// proceeding with defaults. Non-blocking: emit a config.warning event so
+	// it shows in the run timeline and status, and log it. A malformed config
+	// (bad JSON / wrong types) should not be a silent no-op — doctor/config
+	// actions already surface these; run should too.
+	const configIssues = [
+		...(loadedConfig.error ? [`Config error: ${loadedConfig.error}`] : []),
+		...(loadedConfig.warnings ?? []),
+	];
+	if (configIssues.length > 0) {
+		void appendEventAsync(updatedManifest.eventsPath, {
+			type: "config.warning",
+			runId: updatedManifest.runId,
+			message: `Loaded config from ${loadedConfig.path || "(defaults)"} with ${configIssues.length} issue(s): ${configIssues.join("; ")}`,
+			data: { error: loadedConfig.error, warnings: loadedConfig.warnings, path: loadedConfig.path },
+		}).catch((error) => logInternalError("team-tool.run.configWarning", error, `runId=${updatedManifest.runId}`));
+		logInternalError("team-tool.run.configWarning", new Error(`config issues: ${configIssues.join("; ")}`), `runId=${updatedManifest.runId} path=${loadedConfig.path ?? "(defaults)"}`);
+	}
 	const executedConfig = effectiveRunConfig(loadedConfig.config, params.config);
 	const runtime = await resolveCrewRuntime(executedConfig);
 	const runtimeResolution = runtimeResolutionState(runtime);