npm - pi-crew - Versions diffs - 0.9.8 → 0.9.10 - Mend

pi-crew 0.9.8 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/CHANGELOG.md +311 -0
package/README.md +2 -2
package/docs/fixes/v0.9.10/locks-fix-verify.md +3 -0
package/docs/fixes/v0.9.10/smoke-test.md +12 -0
package/package.json +1 -1
package/src/extension/register.ts +94 -21
package/src/extension/registration/subagent-helpers.ts +1 -0
package/src/extension/registration/subagent-tools.ts +9 -0
package/src/extension/team-tool/doctor.ts +41 -18
package/src/runtime/batch-barrier.ts +145 -0
package/src/runtime/child-pi.ts +135 -22
package/src/runtime/compact-pipeline.ts +56 -0
package/src/runtime/compact-stages/ansi-strip-stage.ts +25 -0
package/src/runtime/compact-stages/blank-collapse-stage.ts +31 -0
package/src/runtime/compact-stages/deduplicate-stage.ts +34 -0
package/src/runtime/compact-stages/head-snap-stage.ts +57 -0
package/src/runtime/compact-stages/index.ts +13 -0
package/src/runtime/compact-stages/tail-capture-stage.ts +72 -0
package/src/runtime/compact-stages/truncation-stage.ts +71 -0
package/src/runtime/crash-classification.ts +208 -0
package/src/runtime/custom-tools/irc-tool.ts +47 -7
package/src/runtime/handoff-manager.ts +10 -0
package/src/runtime/important-line-classifier.ts +130 -0
package/src/runtime/iteration-hooks.ts +7 -19
package/src/runtime/live-agent-manager.ts +185 -0
package/src/runtime/live-session-runtime.ts +50 -1
package/src/runtime/model-fallback.ts +29 -1
package/src/runtime/process-lifecycle.ts +481 -0
package/src/runtime/role-permission.ts +2 -2
package/src/runtime/stream-preview.ts +9 -2
package/src/runtime/subagent-manager.ts +6 -0
package/src/runtime/task-output-context.ts +209 -24
package/src/runtime/task-runner.ts +76 -15
package/src/runtime/tool-output-pruner.ts +334 -0
package/src/state/locks.ts +16 -0
package/src/state/state-store.ts +8 -2
package/src/state/types.ts +5 -0
package/src/ui/live-run-sidebar.ts +6 -1
package/src/ui/loaders.ts +24 -4
package/src/ui/run-dashboard.ts +6 -1
package/src/ui/run-event-bus.ts +1 -1
package/src/ui/run-snapshot-cache.ts +50 -16
package/src/ui/widget/index.ts +27 -5
package/src/ui/widget/widget-renderer.ts +43 -13
package/src/utils/redaction.ts +17 -1
package/src/utils/visual.ts +6 -0
package/src/ui/crew-widget.ts +0 -544

package/src/runtime/task-output-context.ts CHANGED Viewed

@@ -4,6 +4,9 @@ import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../stat
 import { writeArtifact } from "../state/artifact-store.ts";
 import { resolveRealContainedPath } from "../utils/safe-paths.ts";
 import type { WorkflowStep } from "../workflows/workflow-config.ts";
+import { pruneToolOutputs, type ToolResultEntry, type FileEditEvent, DEFAULT_PRUNE_CONFIG } from "./tool-output-pruner.ts";
+import { applyCompactPipeline } from "./compact-pipeline.ts";
+import { ANSI_STRIP_STAGE, BLANK_COLLAPSE_STAGE, TruncationStage } from "./compact-stages/index.ts";
 export interface DependencyContextEntry {
 	taskId: string;
@@ -18,7 +21,14 @@ export interface DependencyContextEntry {
 export interface DependencyOutputContext {
 	dependencies: DependencyContextEntry[];
-	sharedReads: Array<{ name: string; path: string; content: string }>;
+	/**
+	 * Each shared artifact read, truncated for inline injection. When truncation
+	 * is materially lossy (file size > 2× MAX_RESULT_INLINE_BYTES) the FULL
+	 * content is also teed to `${artifactsRoot}/tee/${taskId}-${name}.full.txt`
+	 * and the path is exposed via `fullOutputPath` so the downstream worker
+	 * can `read` it back if it needs the dropped middle.
+	 */
+	sharedReads: Array<{ name: string; path: string; content: string; fullOutputPath?: string }>;
 }
 function containedExists(filePath: string, baseDir?: string): boolean {
@@ -38,35 +48,127 @@ function containedExists(filePath: string, baseDir?: string): boolean {
  * (24K/40K/80K) which truncated the same artifact differently depending on
  * which code path read it.
  */
-const MAX_RESULT_INLINE_BYTES = 32_000;
+export const MAX_RESULT_INLINE_BYTES = 32_000;
-function readIfSmall(filePath: string, baseDir?: string): string | undefined {
-	const maxBytes = MAX_RESULT_INLINE_BYTES;
+/**
+ * Read a file and return its content, truncating to a head+tail slice if it
+ * exceeds {@link MAX_RESULT_INLINE_BYTES} characters. Multi-byte UTF-8
+ * sequences are preserved by reading the full file as a UTF-8 string and
+ * slicing by character count (not raw bytes).
+ */
+export interface TeeRecoveryOptions {
+	/** Absolute path to write the full (non-truncated) content to. */
+	fullOutputPath: string;
+}
+export interface ReadIfSmallTeeResult {
+	/** Truncated content (or full content when no truncation). */
+	content: string;
+	/** Set only when tee was actually written (file size > 2× threshold + write succeeded). */
+	fullOutputPath?: string;
+}
+/**
+ * Sanitize a taskId / artifactName into a flat tee filename. Any character
+ * outside [A-Za-z0-9._-] is replaced with underscore so the resulting path
+ * is always single-segment and cannot escape the tee directory.
+ */
+function safeTeeName(taskId: string, artifactName: string): string {
+	const safe = (s: string): string => s.replace(/[^A-Za-z0-9._-]/g, "_");
+	return `${safe(taskId)}-${safe(artifactName)}.full.txt`;
+}
+/**
+ * Canonical tee path for a shared artifact read.
+ *
+ * Format: `${artifactsRoot}/tee/${taskId}-${artifactName}.full.txt`
+ *
+ * The downstream worker prompt includes this path so the worker can `read`
+ * the full content when it needs the dropped middle.
+ */
+export function teePathForArtifact(artifactsRoot: string, taskId: string, artifactName: string): string {
+	return path.join(artifactsRoot, "tee", safeTeeName(taskId, artifactName));
+}
+/**
+ * Best-effort tee write. Returns true on success, false on any error (write
+ * failures are silent — tee is enhancement, never a hard dependency). The
+ * truncated inline content is still returned by the caller either way.
+ */
+function writeTeeFile(fullOutputPath: string, content: string): boolean {
 	try {
-		const safePath = baseDir ? resolveRealContainedPath(baseDir, filePath) : filePath;
-		const stat = fs.statSync(safePath);
-		if (stat.size > maxBytes) {
-			// L4: head + tail instead of head-only. Keeps closing markdown
-			// structure (code fences, headings) instead of leaving them truncated.
-			const head = Math.floor(maxBytes * 0.75);
-			const tail = maxBytes - head;
-			const headBuf = Buffer.alloc(head);
-			const tailBuf = Buffer.alloc(tail);
-			const fd = fs.openSync(safePath, "r");
-			try {
-				fs.readSync(fd, headBuf, 0, head, 0);
-				fs.readSync(fd, tailBuf, 0, tail, stat.size - tail);
-			} finally {
-				fs.closeSync(fd);
+		fs.mkdirSync(path.dirname(fullOutputPath), { recursive: true });
+		fs.writeFileSync(fullOutputPath, content, "utf-8");
+		return true;
+	} catch {
+		return false;
+	}
+}
+/**
+ * Read a file with optional tee-recovery (P1-A). Returns the truncated
+ * content AND (when tee was actually written) the absolute path to the full
+ * file. Returns undefined if the file cannot be read at all.
+ *
+ * Tee threshold: only when content.length > 2 * MAX_RESULT_INLINE_BYTES
+ * (the head+tail is materially lossy — small over-threshold files are not
+ * teed because the inline content is mostly intact and the worker can live
+ * with the 75/25 split). File content is read once and reused for both the
+ * pipeline (truncation) and the tee write (full file).
+ *
+ * Truncation behavior is unchanged from the P0-A pipeline: ANSI strip +
+ * blank collapse BEFORE truncation, important-line preservation (P0-B)
+ * inside TruncationStage, marker wording matches the pre-P1-A `readIfSmall`
+ * output exactly (L4 backward-compat).
+ */
+export function readIfSmallWithTee(
+	filePath: string,
+	opts: { baseDir?: string; tee?: TeeRecoveryOptions } = {},
+): ReadIfSmallTeeResult | undefined {
+	const maxChars = MAX_RESULT_INLINE_BYTES;
+	try {
+		const safePath = opts.baseDir ? resolveRealContainedPath(opts.baseDir, filePath) : filePath;
+		const content = fs.readFileSync(safePath, "utf-8");
+		if (content.length > maxChars) {
+			let fullOutputPath: string | undefined;
+			// Tee only when truncation is materially lossy (>2× threshold).
+			if (opts.tee && content.length > maxChars * 2) {
+				if (writeTeeFile(opts.tee.fullOutputPath, content)) {
+					fullOutputPath = opts.tee.fullOutputPath;
+				}
 			}
-			return `${headBuf.toString("utf-8")}\n\n...[pi-crew truncated ${stat.size - maxBytes} bytes, head+tail preserved]...\n${tailBuf.toString("utf-8")}`;
+			const result = applyCompactPipeline(content, [
+				ANSI_STRIP_STAGE,
+				BLANK_COLLAPSE_STAGE,
+				new TruncationStage(maxChars, {
+					preserveImportant: true,
+					marker: { verb: "truncated", unit: "chars", headSeparator: "\n\n", tailSeparator: "\n" },
+				}),
+			]);
+			return fullOutputPath ? { content: result.text, fullOutputPath } : { content: result.text };
 		}
-		return fs.readFileSync(safePath, "utf-8");
+		return { content };
 	} catch {
 		return undefined;
 	}
 }
+/**
+ * Read a file and return its content, truncating to a head+tail slice if it
+ * exceeds {@link MAX_RESULT_INLINE_BYTES} characters. Multi-byte UTF-8
+ * sequences are preserved by reading the full file as a UTF-8 string and
+ * slicing by character count (not raw bytes).
+ *
+ * Thin wrapper around {@link readIfSmallWithTee} for backward compatibility
+ * — callers that do not need tee-recovery metadata get just the content
+ * string. New tee-recovery call sites should use {@link readIfSmallWithTee}
+ * directly so they can include the full output path in the worker prompt.
+ */
+export function readIfSmall(filePath: string, baseDir?: string): string | undefined {
+	const result = readIfSmallWithTee(filePath, { baseDir });
+	return result?.content;
+}
 function safeSharedName(name: string): string {
 	const normalized = name.replaceAll("\\", "/").replace(/^\.\/+/, "");
 	if (!normalized || normalized.split("/").some((segment) => segment === "..") || path.isAbsolute(normalized)) throw new Error(`Invalid shared artifact name: ${name}`);
@@ -111,6 +213,56 @@ function aggregateUsage(task: TeamTaskState): DependencyContextEntry["usage"] {
 	return { inputTokens, outputTokens, durationMs };
 }
+/**
+ * Apply staleness-aware pruning to shared reads before they are injected
+ * into a downstream worker's prompt. Converts shared reads to generic
+ * {@link ToolResultEntry}s (toolName="read") and file edits from dependency
+ * artifacts, then delegates to {@link pruneToolOutputs}. Superseded reads
+ * (same base file re-read, or file edited by a later dependency) are replaced
+ * with compact digest notices, reducing context bloat.
+ *
+ * OPT-IN: the default prune config protects recent results and only fires
+ * when minimum-savings hysteresis is met, so small/unique reads pass through
+ * unchanged.
+ */
+function pruneSharedReads(
+	reads: Array<{ name: string; path: string; content: string }>,
+	dependencies: DependencyContextEntry[],
+	artifactsRoot: string,
+): Array<{ name: string; path: string; content: string }> {
+	if (reads.length === 0) return reads;
+	// Convert shared reads to tool result entries (ordered oldest → newest
+	// by position in the reads array — earlier entries are "older").
+	const entries: ToolResultEntry[] = reads.map((read, index) => ({
+		id: `shared-read-${index}`,
+		toolName: "read",
+		target: read.path,
+		content: read.content,
+	}));
+	// Collect file edit events from dependency artifacts produced to shared/.
+	// A dependency that wrote a shared file after an earlier read invalidates
+	// that read (the content is now stale relative to the latest version).
+	// Artifact entries from listTaskArtifacts() are already relative to
+	// artifactsRoot (e.g. "shared/foo.md"), so resolve directly against
+	// artifactsRoot — NOT against a "shared" subdirectory (which would
+	// double-prefix to <artifactsRoot>/shared/shared/foo.md).
+	const fileEdits: FileEditEvent[] = [];
+	for (let depIndex = 0; depIndex < dependencies.length; depIndex++) {
+		const dep = dependencies[depIndex]!;
+		const produced = dep.artifactsProduced ?? [];
+		for (const artifact of produced) {
+			if (typeof artifact !== "string") continue;
+			// Map artifact path (relative to artifactsRoot) to absolute and
+			// check against read targets.
+			fileEdits.push({ target: path.resolve(artifactsRoot, artifact), index: reads.length + depIndex });
+		}
+	}
+	const pruned = pruneToolOutputs(entries, DEFAULT_PRUNE_CONFIG);
+	if (pruned.prunedCount === 0) return reads;
+	// Map pruned entries back to the shared-read shape.
+	return pruned.results.map((entry, index) => ({ ...reads[index]!, content: entry.content }));
+}
 export function collectDependencyOutputContext(manifest: TeamRunManifest, tasks: TeamTaskState[], task: TeamTaskState, step: WorkflowStep): DependencyOutputContext {
 	const byStep = new Map(tasks.map((item) => [item.stepId, item]).filter((entry): entry is [string, TeamTaskState] => Boolean(entry[0])));
 	const byId = new Map(tasks.map((item) => [item.id, item]));
@@ -127,10 +279,35 @@ export function collectDependencyOutputContext(manifest: TeamRunManifest, tasks:
 			usage: aggregateUsage(item),
 		};
 	});
-	const sharedReads = (step.reads === false ? [] : step.reads ?? []).map((name) => {
+	const rawSharedReads = (step.reads === false ? [] : step.reads ?? []).map((name) => {
 		const filePath = sharedPath(manifest, name);
-		return { name, path: filePath, content: readIfSmall(filePath, path.resolve(manifest.artifactsRoot, "shared")) ?? "" };
+		// P1-A tee-recovery: when the shared artifact is large enough that the
+		// 75/25 head+tail split is materially lossy (>2× MAX_RESULT_INLINE_BYTES),
+		// tee the full content to ${artifactsRoot}/tee/${taskId}-${name}.full.txt
+		// and expose the path so the downstream worker can `read` the full file
+		// if it needs the dropped middle. The truncated content is still
+		// included inline; tee is an enhancement, not a hard dependency. Tee
+		// write is best-effort (writeTeeFile swallows I/O errors and the result
+		// simply omits fullOutputPath in that case).
+		const teePath = teePathForArtifact(manifest.artifactsRoot, task.id, name);
+		const teeResult = readIfSmallWithTee(filePath, {
+			baseDir: path.resolve(manifest.artifactsRoot, "shared"),
+			tee: { fullOutputPath: teePath },
+		});
+		if (teeResult === undefined) return { name, path: filePath, content: "" };
+		const entry: { name: string; path: string; content: string; fullOutputPath?: string } = {
+			name,
+			path: filePath,
+			content: teeResult.content,
+		};
+		if (teeResult.fullOutputPath) entry.fullOutputPath = teeResult.fullOutputPath;
+		return entry;
 	}).filter((item) => item.content.trim().length > 0);
+	// Apply staleness-aware pruning to shared reads: drops superseded reads
+	// (same file re-read with different selectors) and replaces stale large
+	// outputs with compact digest notices before injecting into the worker
+	// prompt. OPT-IN: default config protects recent results.
+	const sharedReads = pruneSharedReads(rawSharedReads, dependencies, manifest.artifactsRoot);
 	return { dependencies, sharedReads };
 }
@@ -147,7 +324,15 @@ export function renderDependencyOutputContext(context: DependencyOutputContext):
 	}
 	if (context.sharedReads.length) {
 		parts.push("# Shared Run Context Reads", "");
-		for (const read of context.sharedReads) parts.push(`## shared/${read.name}`, `Path: ${read.path}`, "", read.content.trim(), "");
+		for (const read of context.sharedReads) {
+			parts.push(`## shared/${read.name}`, `Path: ${read.path}`);
+			// P1-A tee-recovery hint: when the file was materially truncated
+			// (>2× threshold) the full content was teed to fullOutputPath so the
+			// worker can read the dropped middle if needed. The path is inside
+			// artifactsRoot/tee/ and goes through the normal permission gate.
+			if (read.fullOutputPath) parts.push(`Full output (if you need the missing middle): ${read.fullOutputPath}`);
+			parts.push("", read.content.trim(), "");
+		}
 	}
 	return parts.join("\n").trim();
 }

package/src/runtime/task-runner.ts CHANGED Viewed

@@ -763,7 +763,30 @@ export async function runTeamTask(
 					"",
 				);
 				if (!error) break;
-				const nextModel = attemptModels[i + 1];
+				let nextModel = attemptModels[i + 1];
+				// FIX 1 (task packet 01_01-agent): when the precomputed attempt
+				// chain is exhausted but the failure is retryable, do a one-shot
+				// re-resolve via buildConfiguredModelRouting with the failed
+				// model as parent. This finds alternative providers/models the
+				// original chain missed (e.g. a registry gained new fallbacks
+				// after the precompute, or the precompute ran before the parent
+				// model was known). If a different candidate is found, use it as
+				// nextModel; otherwise fall through to the existing break.
+				if (!nextModel && isRetryableModelFailure(error)) {
+					const reResolved = buildConfiguredModelRouting({
+						overrideModel: undefined,
+						stepModel: undefined,
+						teamRoleModel: undefined,
+						agentModel: undefined,
+						fallbackModels: undefined,
+						parentModel: attempt.model,
+						modelRegistry: input.modelRegistry,
+						cwd: task.cwd,
+						scopeModelsPatterns: await resolveTaskScopeModelsPatterns(task.cwd),
+					});
+					const alt = reResolved.candidates.find((c) => c !== attempt.model);
+					if (alt) nextModel = alt;
+				}
 				if (!nextModel || !isRetryableModelFailure(error)) break;
 				logs.push(formatModelAttemptNote(attempt, nextModel), "");
 			}
@@ -1368,19 +1391,57 @@ async function resolveTaskScopeModelsPatterns(cwd: string): Promise<string[]> {
  * or when there are no retryable error messages.
  */
 export function detectRetryableModelFailureFromOutput(parsed: ParsedPiJsonOutput): string | undefined {
+	// Primary signal: pre-extracted `errorMessages` (from pi-json-output parser).
+	// The parser already filters to non-empty trimmed strings from message_end
+	// events.
 	const messages = parsed.errorMessages;
-	if (!messages || messages.length === 0) return undefined;
-	// Find the first retryable model-failure message (429 / rate-limit / overloaded / 5xx / ...).
-	const retryable = messages.find((m) => isRetryableModelFailure(m));
-	if (!retryable) return undefined;
-	// Did the run actually produce real output despite the transient errors?
-	// If finalText / textEvents / patches exist, the model recovered and we
-	// should NOT mark the run as failed — only flag it when the worker yielded
-	// nothing (the 429-only case from the bug report).
-	const hasRealOutput =
-		(parsed.finalText?.trim().length ?? 0) > 0 ||
-		parsed.textEvents.some((t) => t.trim().length > 0) ||
-		(parsed.patches?.length ?? 0) > 0;
-	if (hasRealOutput) return undefined;
-	return `Model returned only retryable errors and no output: ${retryable}`;
+	if (messages && messages.length > 0) {
+		// Find the first retryable model-failure message
+		// (429 / rate-limit / overloaded / 5xx / ...).
+		const retryable = messages.find((m) => isRetryableModelFailure(m));
+		if (retryable) {
+			// Did the run actually produce real output despite the transient errors?
+			// If finalText / textEvents / patches exist, the model recovered and we
+			// should NOT mark the run as failed — only flag it when the worker
+			// yielded nothing (the 429-only case from the bug report).
+			const hasRealOutput =
+				(parsed.finalText?.trim().length ?? 0) > 0 ||
+				parsed.textEvents.some((t) => t.trim().length > 0) ||
+				(parsed.patches?.length ?? 0) > 0;
+			if (hasRealOutput) return undefined;
+			return `Model returned only retryable errors and no output: ${retryable}`;
+		}
+	}
+	// Secondary signal (FIX 3, task packet 01_01-agent): inspect a raw
+	// `messageEndEvents` (or `transcript`) array on the parsed output. The
+	// ParsedPiJsonOutput type does not currently declare this field, so we
+	// read it through a local extension cast. Callers that pass it (tests, a
+	// future parser that captures the full event stream) get a second chance
+	// to surface retryable failures. Primary path still wins when it matches.
+	const raw = parsed as ParsedPiJsonOutput & {
+		messageEndEvents?: unknown;
+		transcript?: unknown;
+	};
+	const eventSource = Array.isArray(raw.messageEndEvents)
+		? raw.messageEndEvents
+		: Array.isArray(raw.transcript)
+			? raw.transcript
+			: undefined;
+	if (!eventSource || eventSource.length === 0) return undefined;
+	for (const candidate of eventSource) {
+		if (!candidate || typeof candidate !== "object") continue;
+		const event = candidate as { stopReason?: unknown; errorMessage?: unknown };
+		if (event.stopReason !== "error") continue;
+		if (typeof event.errorMessage !== "string" || event.errorMessage.length === 0) continue;
+		if (!isRetryableModelFailure(event.errorMessage)) continue;
+		// Same real-output gate as the primary signal — don't flag runs that
+		// recovered with real final text / patches.
+		const hasRealOutput =
+			(parsed.finalText?.trim().length ?? 0) > 0 ||
+			parsed.textEvents.some((t) => t.trim().length > 0) ||
+			(parsed.patches?.length ?? 0) > 0;
+		if (hasRealOutput) return undefined;
+		return `Model returned only retryable errors and no output: ${event.errorMessage}`;
+	}
+	return undefined;
 }