npm - @oh-my-pi/pi-coding-agent - Versions diffs - 15.3.0 → 15.3.2 - Mend

@oh-my-pi/pi-coding-agent 15.3.0 → 15.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +15 -0
package/dist/types/cli/auth-gateway-cli.d.ts +1 -1
package/dist/types/config/model-registry.d.ts +26 -0
package/dist/types/modes/components/status-line.d.ts +6 -0
package/dist/types/session/session-manager.d.ts +10 -0
package/dist/types/task/types.d.ts +8 -0
package/package.json +7 -7
package/src/cli/auth-gateway-cli.ts +71 -2
package/src/commands/auth-gateway.ts +2 -0
package/src/config/model-registry.ts +46 -21
package/src/extensibility/plugins/marketplace/manager.ts +20 -1
package/src/hashline/parser.ts +44 -5
package/src/internal-urls/docs-index.generated.ts +3 -1
package/src/lsp/config.ts +87 -22
package/src/modes/components/status-line.ts +124 -31
package/src/modes/utils/context-usage.ts +18 -7
package/src/sdk.ts +4 -1
package/src/session/agent-session.ts +14 -2
package/src/session/session-manager.ts +68 -3
package/src/slash-commands/builtin-registry.ts +9 -4
package/src/task/executor.ts +29 -0
package/src/task/render.ts +53 -1
package/src/task/types.ts +8 -0
package/src/tools/jtd-to-json-schema.ts +5 -1
package/src/tools/read.ts +3 -35
package/src/utils/clipboard.ts +14 -3
package/src/utils/image-resize.ts +28 -5

package/src/modes/components/status-line.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import * as fs from "node:fs";
+import type { AgentMessage } from "@oh-my-pi/pi-agent-core";
 import { estimateTokens } from "@oh-my-pi/pi-agent-core/compaction";
 import { type Component, truncateToWidth, visibleWidth } from "@oh-my-pi/pi-tui";
 import { formatCount, getProjectDir } from "@oh-my-pi/pi-utils";
@@ -40,9 +41,102 @@ export interface StatusLineSettings {
 }
 // ═══════════════════════════════════════════════════════════════════════════
-// Rendering Helpers
+// Per-message token cache
 // ═══════════════════════════════════════════════════════════════════════════
+/**
+ * Symbol-keyed sidecar tagged onto each `AgentMessage` to memoize its
+ * `estimateTokens` result. Keyed by message identity (the object itself);
+ * a cheap content fingerprint detects in-place mutations (post-hoc error
+ * attachment, retry-truncated branch rebuild, etc.) and forces recompute.
+ *
+ * Cache lives on the message — multiple `StatusLineComponent` instances
+ * share it for free, and entries collect with the message itself when the
+ * conversation is replaced or compacted.
+ */
+const kTokenCache = Symbol("statusLine.tokenCache");
+interface TaggedMessage {
+	[kTokenCache]?: { fingerprint: string; tokens: number };
+}
+/**
+ * Cheap structural fingerprint mirroring `estimateTokens`'s content walk.
+ * O(blocks) — only reads string `.length` and primitives, never copies or
+ * serializes content. Any in-place mutation that alters total tokenized
+ * content also alters one of the byte-length sums or block counts captured
+ * here, forcing the cached `estimateTokens` value to be recomputed.
+ */
+function messageFingerprint(msg: AgentMessage): string {
+	const role = (msg as { role?: string }).role ?? "";
+	const ts = (msg as { timestamp?: number }).timestamp ?? 0;
+	let textLen = 0;
+	let blocks = 0;
+	let images = 0;
+	if (role === "bashExecution") {
+		const b = msg as { command?: unknown; output?: unknown };
+		if (typeof b.command === "string") textLen += b.command.length;
+		if (typeof b.output === "string") textLen += b.output.length;
+	} else if (role === "user") {
+		const content = (msg as { content?: unknown }).content;
+		if (typeof content === "string") {
+			textLen += content.length;
+		} else if (Array.isArray(content)) {
+			blocks = content.length;
+			for (const block of content) {
+				if (block?.type === "text" && typeof block.text === "string") textLen += block.text.length;
+			}
+		}
+	} else if (role === "assistant") {
+		const content = (msg as { content?: unknown }).content;
+		if (Array.isArray(content)) {
+			blocks = content.length;
+			for (const block of content) {
+				if (!block || typeof block !== "object") continue;
+				const b = block as { type?: string; text?: string; thinking?: string; name?: string; arguments?: unknown };
+				if (b.type === "text" && typeof b.text === "string") textLen += b.text.length;
+				else if (b.type === "thinking" && typeof b.thinking === "string") textLen += b.thinking.length;
+				else if (b.type === "toolCall") {
+					if (typeof b.name === "string") textLen += b.name.length;
+					// Argument bytes vary; a length proxy is enough to detect in-place edits.
+					textLen += b.arguments === undefined ? 0 : JSON.stringify(b.arguments).length;
+				}
+			}
+		}
+	} else if (role === "toolResult" || role === "hookMessage") {
+		const content = (msg as { content?: unknown }).content;
+		if (typeof content === "string") {
+			textLen += content.length;
+		} else if (Array.isArray(content)) {
+			blocks = content.length;
+			for (const block of content) {
+				if (!block || typeof block !== "object") continue;
+				const b = block as { type?: string; text?: string };
+				if (b.type === "text" && typeof b.text === "string") textLen += b.text.length;
+				else if (b.type === "image") images++;
+			}
+		}
+	} else if (role === "branchSummary" || role === "compactionSummary") {
+		const s = (msg as { summary?: unknown }).summary;
+		if (typeof s === "string") textLen += s.length;
+	}
+	return `${role}:${ts}:${textLen}:${blocks}:${images}`;
+}
+/**
+ * Token count for a single message, using the per-message sidecar cache.
+ * The caller MUST skip caching for the last message during streaming —
+ * it may still be growing and its tokens belong recomputed each refresh.
+ */
+function tokensForMessage(msg: AgentMessage): number {
+	const fp = messageFingerprint(msg);
+	const tagged = msg as TaggedMessage;
+	const cached = tagged[kTokenCache];
+	if (cached && cached.fingerprint === fp) return cached.tokens;
+	const tokens = estimateTokens(msg);
+	tagged[kTokenCache] = { fingerprint: fp, tokens };
+	return tokens;
+}
 // ═══════════════════════════════════════════════════════════════════════════
 // StatusLineComponent
 // ═══════════════════════════════════════════════════════════════════════════
@@ -85,14 +179,11 @@ export class StatusLineComponent implements Component {
 	// TTL design (which re-walked every message on each refresh and produced
 	// ~1.1 s sync freezes on 2,000+ message sessions because `updateEditorTopBorder`
 	// is called on every agent event in event-controller). The new scheme
-	// exploits the fact that `session.messages` is append-only during a turn
-	// and only shrinks on compaction.
-	#cachedBreakdown: { usedTokens: number; contextWindow: number } | null = null;
-	// Per-message token counts indexed by `session.messages` position. Entries
-	// here are immutable: a message at index `i` is finalized (its content
-	// no longer mutates) once index `i+1` exists. We therefore cache all but
-	// the LAST message (which may still be growing during streaming).
-	#messageTokenCache: number[] = [];
+	// caches by message-object identity (a Symbol-keyed sidecar on each
+	// message) plus a cheap content fingerprint, so in-place mutations of
+	// an existing message (post-hoc error attachment, retry-truncated
+	// branch rebuild, replaceMessages with the same length) are detected
+	// and recomputed.
 	// Cached non-message total (system prompt + tools + skills). Invalidated
 	// when the inputs-identity fingerprint changes (model swap, skill toggle,
 	// tool registration).
@@ -331,7 +422,12 @@ export class StatusLineComponent implements Component {
 		return null;
 	}
-	#refreshUsageInBackground(): void {
+	/**
+	 * Background-refresh the Anthropic OAuth quota report. Guarded by a 5-min
+	 * TTL on both success (cache lifetime) and error (backoff). Exposed
+	 * (non-private) so unit tests can verify the backoff invariant.
+	 */
+	refreshUsageInBackground(): void {
 		const now = Date.now();
 		if (this.#usageInFlight) return;
 		if (this.#usageFetchedAt > 0 && now - this.#usageFetchedAt < 5 * 60_000) return;
@@ -345,7 +441,11 @@ export class StatusLineComponent implements Component {
 				this.#usageFetchedAt = Date.now();
 			})
 			.catch(() => {
-				/* keep last known data on error */
+				// Backoff on error: stamp the fetch time so the 5-min TTL guard
+				// also acts as an error budget. Without this, every render
+				// kicks off another fetch (gated only by #usageInFlight),
+				// which hammers the endpoint during a network outage / 5xx.
+				this.#usageFetchedAt = Date.now();
 			})
 			.finally(() => {
 				this.#usageInFlight = false;
@@ -414,29 +514,22 @@ export class StatusLineComponent implements Component {
 			this.#nonMessageInputsKey = inputsKey;
 		}
-		// 2) Message tokens — incremental.
-		//    Compaction handling: if messages.length shrank, the array was
-		//    truncated. Reset cache; the next iteration rebuilds from scratch.
-		if (this.#messageTokenCache.length > Math.max(0, messages.length - 1)) {
-			this.#messageTokenCache.length = 0;
-		}
-		//    Cache all but the last message. The last message may still be
-		//    growing during streaming (assistant delta blocks append to the
-		//    existing message); recomputing it each refresh is one
-		//    `estimateTokens` call (~0.5 ms) and stays correct.
-		while (this.#messageTokenCache.length < Math.max(0, messages.length - 1)) {
-			const idx = this.#messageTokenCache.length;
-			this.#messageTokenCache.push(estimateTokens(messages[idx]));
-		}
+		// 2) Message tokens — incremental. The sidecar cache lives on the
+		//    message object itself (Symbol-keyed), keyed by identity and
+		//    validated by a cheap content fingerprint. Mutations that
+		//    replace messages (replaceMessages, branch rebuild, compaction)
+		//    yield fresh objects → cache miss → recompute. In-place
+		//    mutations on the same object are caught by fingerprint
+		//    mismatch. The LAST message is always recomputed because it
+		//    may still be growing during streaming.
 		let messagesTokens = 0;
-		for (const t of this.#messageTokenCache) messagesTokens += t;
-		if (messages.length > 0) {
-			messagesTokens += estimateTokens(messages[messages.length - 1]);
+		const lastIdx = messages.length - 1;
+		for (let i = 0; i < messages.length; i++) {
+			messagesTokens += i === lastIdx ? estimateTokens(messages[i]) : tokensForMessage(messages[i]);
 		}
 		const usedTokens = this.#nonMessageTokensCache + messagesTokens;
-		this.#cachedBreakdown = { usedTokens, contextWindow };
-		return this.#cachedBreakdown;
+		return { usedTokens, contextWindow };
 	}
 	/**
@@ -457,7 +550,7 @@ export class StatusLineComponent implements Component {
 		const state = this.session.state;
 		// Trigger background fetch (5-min TTL); render uses cached value
-		this.#refreshUsageInBackground();
+		this.refreshUsageInBackground();
 		// Get usage statistics
 		const aggregateUsageStats = this.session.sessionManager?.getUsageStatistics() ?? {

package/src/modes/utils/context-usage.ts CHANGED Viewed

@@ -75,12 +75,28 @@ export function estimateToolSchemaTokens(
  * messages walked incrementally as new entries append.
  */
 export function computeNonMessageTokens(session: AgentSession): number {
+	const parts = computeNonMessageBreakdown(session);
+	return parts.systemPromptTokens + parts.systemContextTokens + parts.toolsTokens + parts.skillsTokens;
+}
+/**
+ * Shared helper for the four non-message token totals. Single source of truth
+ * for both `computeNonMessageTokens` (status-line incremental cache) and
+ * `computeContextBreakdown` (/context panel). The split avoids drift between
+ * the two surfaces — they MUST report the same numbers.
+ */
+function computeNonMessageBreakdown(session: AgentSession): {
+	skillsTokens: number;
+	toolsTokens: number;
+	systemContextTokens: number;
+	systemPromptTokens: number;
+} {
 	const skillsTokens = estimateSkillsTokens(session.skills ?? []);
 	const toolsTokens = estimateToolSchemaTokens(session.agent?.state?.tools ?? []);
 	const systemPromptParts = session.systemPrompt ?? [];
 	const systemContextTokens = countTokens(systemPromptParts.slice(1));
 	const systemPromptTokens = Math.max(0, countTokens(systemPromptParts[0] ?? "") - skillsTokens);
-	return systemPromptTokens + systemContextTokens + toolsTokens + skillsTokens;
+	return { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens };
 }
 /**
@@ -91,9 +107,6 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
 	const model = session.model;
 	const contextWindow = model?.contextWindow ?? 0;
-	const skillsTokens = estimateSkillsTokens(session.skills ?? []);
-	const toolsTokens = estimateToolSchemaTokens(session.agent?.state?.tools ?? []);
 	let messagesTokens = 0;
 	const convo = session.messages;
 	if (convo) {
@@ -108,9 +121,7 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
 	//   Tools         = JSON tool schema sent separately on the wire
 	//   Skills        = the skill list embedded in the system prompt
 	//   Messages      = conversation messages
-	const systemPromptParts = session.systemPrompt;
-	const systemPromptTokens = Math.max(0, countTokens(systemPromptParts?.[0] ?? "") - skillsTokens);
-	const systemContextTokens = countTokens(systemPromptParts?.slice(1) ?? []);
+	const { skillsTokens, toolsTokens, systemContextTokens, systemPromptTokens } = computeNonMessageBreakdown(session);
 	const categories: CategoryInfo[] = [
 		{ id: "systemPrompt", label: "System prompt", tokens: systemPromptTokens, color: "accent", glyph: CELL_FILLED },

package/src/sdk.ts CHANGED Viewed

@@ -1893,7 +1893,10 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 				streamSimple(streamModel, context, {
 					...streamOptions,
 					onAuthError: async (provider, oldKey, error) => {
-						await modelRegistry.authStorage.invalidateCredentialMatching(provider, oldKey, streamOptions?.signal);
+						await modelRegistry.authStorage.invalidateCredentialMatching(provider, oldKey, {
+							signal: streamOptions?.signal,
+							sessionId: agent.sessionId,
+						});
 						logger.debug("Retrying provider request after credential invalidation", {
 							provider,
 							error: error instanceof Error ? error.message : String(error),

package/src/session/agent-session.ts CHANGED Viewed

@@ -751,6 +751,9 @@ export class AgentSession {
 	// Event subscription state
 	#unsubscribeAgent?: () => void;
+	#unsubscribeAppendOnly?: () => void;
+	/** Last (enable, providerId) tuple resolved by `#syncAppendOnlyContext` — used to skip no-op invalidations. */
+	#lastAppendOnlyResolution?: { enable: boolean; providerId: string | undefined };
 	#eventListeners: AgentSessionEventListener[] = [];
 	/** Tracks pending steering messages for UI display. Removed when delivered.
@@ -1141,7 +1144,7 @@ export class AgentSession {
 		// (session persistence, hooks, auto-compaction, retry logic)
 		this.#unsubscribeAgent = this.agent.subscribe(this.#handleAgentEvent);
 		// Re-evaluate append-only context mode when the setting changes at runtime.
-		onAppendOnlyModeChanged(_value => this.#syncAppendOnlyContext(this.model));
+		this.#unsubscribeAppendOnly = onAppendOnlyModeChanged(_value => this.#syncAppendOnlyContext(this.model));
 	}
 	/** Model registry for API key resolution and model discovery */
@@ -2785,6 +2788,10 @@ export class AgentSession {
 		await hindsightState?.flushRetainQueue();
 		hindsightState?.dispose();
 		this.#disconnectFromAgent();
+		if (this.#unsubscribeAppendOnly) {
+			this.#unsubscribeAppendOnly();
+			this.#unsubscribeAppendOnly = undefined;
+		}
 		this.#eventListeners = [];
 	}
@@ -5980,7 +5987,12 @@ export class AgentSession {
 	 */
 	#syncAppendOnlyContext(model: Model | null | undefined): void {
 		const setting = this.settings.get("provider.appendOnlyContext") ?? "auto";
-		const enable = setting === "on" || (setting === "auto" && model?.provider === "deepseek");
+		const providerId = model?.provider;
+		const enable = setting === "on" || (setting === "auto" && providerId === "deepseek");
+		const prev = this.#lastAppendOnlyResolution;
+		if (prev && prev.enable === enable && prev.providerId === providerId) return;
+		this.#lastAppendOnlyResolution = { enable, providerId };
 		if (enable && !this.agent.appendOnlyContext) {
 			this.agent.setAppendOnlyContext(new AppendOnlyContextManager());
 		} else if (enable && this.agent.appendOnlyContext) {

package/src/session/session-manager.ts CHANGED Viewed

@@ -942,12 +942,71 @@ function extractFirstUserPrompt(entries: Array<Record<string, unknown>>): string
 	return undefined;
 }
+/**
+ * Promote orphaned `<basename>.jsonl.<snowflake>.bak` backups created by
+ * `#replaceSessionFileAfterEperm` back to their primary path when the primary
+ * is missing. This runs once per session-dir scan, before the main `*.jsonl`
+ * glob, so a crash between the two renames in the EPERM-rewrite path does not
+ * leave the user's last good state stranded outside the loader's view.
+ *
+ * Exported for testing.
+ */
+export async function recoverOrphanedBackups(sessionDir: string, storage: SessionStorage): Promise<void> {
+	let backups: string[];
+	try {
+		backups = storage.listFilesSync(sessionDir, "*.bak");
+	} catch {
+		return;
+	}
+	if (backups.length === 0) return;
+	// For each primary path, pick the newest backup (highest mtime) as the recovery source.
+	const candidates = new Map<string, { backup: string; mtimeMs: number }>();
+	for (const backup of backups) {
+		const name = path.basename(backup);
+		// Expect "<primary>.<snowflake>.bak" where <primary> ends in ".jsonl".
+		if (!name.endsWith(".bak")) continue;
+		const trimmed = name.slice(0, -".bak".length);
+		const dotIdx = trimmed.lastIndexOf(".");
+		if (dotIdx <= 0) continue;
+		const primaryName = trimmed.slice(0, dotIdx);
+		if (!primaryName.endsWith(".jsonl")) continue;
+		const primaryPath = path.join(sessionDir, primaryName);
+		let mtimeMs = 0;
+		try {
+			mtimeMs = storage.statSync(backup).mtimeMs;
+		} catch {
+			continue;
+		}
+		const existing = candidates.get(primaryPath);
+		if (!existing || mtimeMs > existing.mtimeMs) {
+			candidates.set(primaryPath, { backup, mtimeMs });
+		}
+	}
+	for (const [primaryPath, { backup }] of candidates) {
+		if (storage.existsSync(primaryPath)) continue;
+		try {
+			await storage.rename(backup, primaryPath);
+			logger.warn("Recovered orphaned session backup", {
+				sessionFile: primaryPath,
+				backupPath: backup,
+			});
+		} catch (err) {
+			logger.warn("Failed to recover orphaned session backup", {
+				sessionFile: primaryPath,
+				backupPath: backup,
+				error: toError(err).message,
+			});
+		}
+	}
+}
 /**
  * Reads all session files from the directory and returns them sorted by mtime (newest first).
  * Uses low-level file I/O to efficiently read only the first 4KB of each file
  * to extract the JSON header and first user message without loading entire session logs into memory.
  */
 async function getSortedSessions(sessionDir: string, storage: SessionStorage): Promise<RecentSessionInfo[]> {
+	await recoverOrphanedBackups(sessionDir, storage);
 	try {
 		const files: string[] = storage.listFilesSync(sessionDir, "*.jsonl");
 		const sessions: RecentSessionInfo[] = [];
@@ -2149,10 +2208,14 @@ export class SessionManager {
 	}
 	// Windows can reject overwrite-style rename with EPERM even after our own writer is closed.
 	// Move the old session file aside first so a failed retry can roll back to the last good file.
+	// The backup uses a plain `<basename>.<snowflake>.bak` name (no leading dot) so that if the
+	// process crashes between the two renames, `recoverOrphanedBackups` can find it via the
+	// shared `*.bak` glob on both real and in-memory storage backends and promote it back to
+	// the primary on the next session-dir scan.
 	async #replaceSessionFileAfterEperm(tempPath: string, targetPath: string, renameError: unknown): Promise<void> {
 		const dir = path.resolve(targetPath, "..");
-		const backupPath = path.join(dir, `.${path.basename(targetPath)}.${Snowflake.next()}.bak`);
+		const backupPath = path.join(dir, `${path.basename(targetPath)}.${Snowflake.next()}.bak`);
 		try {
 			await this.storage.rename(targetPath, backupPath);
 		} catch (err) {
@@ -2167,13 +2230,14 @@ export class SessionManager {
 			await this.storage.rename(tempPath, targetPath);
 		} catch (err) {
 			const replaceError = toError(err);
+			const originalError = toError(renameError);
 			try {
 				await this.storage.rename(backupPath, targetPath);
 			} catch (rollbackErr) {
 				const rollbackError = toError(rollbackErr);
 				throw new Error(
-					`Failed to replace session file after EPERM (${replaceError.message}); rollback from ${backupPath} also failed: ${rollbackError.message}`,
-					{ cause: replaceError },
+					`Failed to replace session file after EPERM (original: ${originalError.message}; retry: ${replaceError.message}); rollback from ${backupPath} also failed: ${rollbackError.message}`,
+					{ cause: originalError },
 				);
 			}
 			throw replaceError;
@@ -3244,6 +3308,7 @@ export class SessionManager {
 	): Promise<SessionInfo[]> {
 		const dir = sessionDir ?? SessionManager.getDefaultSessionDir(cwd, undefined, storage);
 		try {
+			await recoverOrphanedBackups(dir, storage);
 			const files = storage.listFilesSync(dir, "*.jsonl");
 			return await collectSessionsFromFiles(files, storage);
 		} catch {

package/src/slash-commands/builtin-registry.ts CHANGED Viewed

@@ -73,9 +73,13 @@ const BUILTIN_SLASH_COMMAND_REGISTRY: ReadonlyArray<SlashCommandSpec> = [
 		allowArgs: true,
 		handleTui: async (command, runtime) => {
 			const hadArgs = !!command.args;
+			// Capture state BEFORE the call: when plan mode is already active,
+			// handlePlanModeCommand may exit it (on confirmed exit) or leave it on (on cancel
+			// or warning). In every "already active" case the typed args are NOT consumed,
+			// so preserve them in history regardless of the user's confirm/cancel choice.
+			const wasPlanModeEnabled = runtime.ctx.planModeEnabled;
 			await runtime.ctx.handlePlanModeCommand(command.args || undefined);
-			if (hadArgs && runtime.ctx.planModeEnabled) {
-				// plan was already active — preserve the typed command in input history
+			if (hadArgs && wasPlanModeEnabled) {
 				runtime.ctx.editor.addToHistory(command.text);
 			}
 			runtime.ctx.editor.setText("");
@@ -96,9 +100,10 @@ const BUILTIN_SLASH_COMMAND_REGISTRY: ReadonlyArray<SlashCommandSpec> = [
 		allowArgs: true,
 		handleTui: async (command, runtime) => {
 			const hadArgs = !!command.args;
+			// Capture state BEFORE the call (see /plan above for rationale).
+			const wasGoalModeEnabled = runtime.ctx.goalModeEnabled;
 			await runtime.ctx.handleGoalModeCommand(command.args || undefined);
-			if (hadArgs && runtime.ctx.goalModeEnabled) {
-				// goal was already active — preserve the typed command in input history
+			if (hadArgs && wasGoalModeEnabled) {
 				runtime.ctx.editor.addToHistory(command.text);
 			}
 			runtime.ctx.editor.setText("");

package/src/task/executor.ts CHANGED Viewed

@@ -49,6 +49,7 @@ import {
 	TASK_SUBAGENT_EVENT_CHANNEL,
 	TASK_SUBAGENT_LIFECYCLE_CHANNEL,
 	TASK_SUBAGENT_PROGRESS_CHANNEL,
+	type TaskToolDetails,
 } from "./types";
 const MCP_CALL_TIMEOUT_MS = 60_000;
@@ -909,6 +910,11 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 				if (intent) {
 					progress.lastIntent = intent;
 				}
+				// Reset any prior in-flight task snapshot so we don't show stale
+				// nested progress when the agent enters a fresh `task` call.
+				if (event.toolName === "task") {
+					progress.inflightTaskDetails = undefined;
+				}
 				break;
 			}
@@ -927,6 +933,12 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 				progress.currentTool = undefined;
 				progress.currentToolArgs = undefined;
 				progress.currentToolStartMs = undefined;
+				// The finalized TaskToolDetails will be captured below into
+				// `extractedToolData.task`; drop the in-flight snapshot so the
+				// renderer doesn't double-count it against the final entry.
+				if (event.toolName === "task") {
+					progress.inflightTaskDetails = undefined;
+				}
 				// Check for registered subagent tool handler
 				const handler = subprocessToolRegistry.getHandler(event.toolName);
@@ -979,6 +991,23 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 				break;
 			}
+			case "tool_execution_update": {
+				// Surface nested-subagent progress mid-flight. The child task
+				// tool emits incremental `onUpdate` calls carrying its current
+				// `TaskToolDetails` (results + progress); we stash the latest
+				// snapshot so the parent UI can render the in-flight subtree
+				// without waiting for the call to finish.
+				if (event.toolName === "task") {
+					const partial = (event as { partialResult?: { details?: unknown } }).partialResult;
+					const details = partial && typeof partial === "object" ? partial.details : undefined;
+					if (details && typeof details === "object" && "results" in (details as TaskToolDetails)) {
+						progress.inflightTaskDetails = details as TaskToolDetails;
+						flushProgress = true;
+					}
+				}
+				break;
+			}
 			case "message_update": {
 				if (event.message?.role !== "assistant") break;
 				const assistantEvent = (

package/src/task/render.ts CHANGED Viewed

@@ -639,7 +639,8 @@ function renderAgentProgress(
 			}
 		}
-		for (const [toolName, dataArray] of Object.entries(progress.extractedToolData)) {
+		for (const toolName in progress.extractedToolData) {
+			const dataArray = progress.extractedToolData[toolName];
 			// Handle report_finding with tree formatting
 			if (toolName === "report_finding") {
 				const findings = normalizeReportFindings(dataArray);
@@ -649,6 +650,11 @@ function renderAgentProgress(
 				continue;
 			}
+			// Nested `task` data has its own dedicated tree renderer below that
+			// also merges in the in-flight snapshot — skip the generic inline
+			// path so we don't render twice.
+			if (toolName === "task") continue;
 			const handler = subprocessToolRegistry.getHandler(toolName);
 			if (handler?.renderInline) {
 				const displayCount = expanded ? (dataArray as unknown[]).length : 3;
@@ -671,6 +677,20 @@ function renderAgentProgress(
 		}
 	}
+	// Nested `task` tree: completed sub-calls from `extractedToolData.task` plus
+	// the in-flight snapshot (if any). Surfacing this in the live view means
+	// the user sees deep-tree progress without waiting for this agent to finish
+	// its own turn.
+	const completedTaskCalls = (progress.extractedToolData?.task as TaskToolDetails[] | undefined) ?? [];
+	const inflight = progress.inflightTaskDetails;
+	if (completedTaskCalls.length > 0 || inflight) {
+		const snapshots = inflight ? [...completedTaskCalls, inflight] : completedTaskCalls;
+		const nestedLines = renderNestedTaskTree(snapshots, expanded, theme, spinnerFrame);
+		for (const line of nestedLines) {
+			lines.push(`${continuePrefix}${line}`);
+		}
+	}
 	// Expanded view: recent output and tools
 	if (expanded && progress.status === "running") {
 		const output = progress.recentOutput.join("\n");
@@ -1067,6 +1087,38 @@ function renderNestedTaskResults(detailsList: TaskToolDetails[], expanded: boole
 	return lines;
 }
+/**
+ * Render a list of `TaskToolDetails` snapshots — completed (`results[]`) or
+ * in-flight (`progress[]`) — as an interleaved tree. Used by the live progress
+ * view to surface nested subagent activity while this agent is still running.
+ */
+function renderNestedTaskTree(
+	detailsList: TaskToolDetails[],
+	expanded: boolean,
+	theme: Theme,
+	spinnerFrame?: number,
+): string[] {
+	const lines: string[] = [];
+	for (const details of detailsList) {
+		const hasResults = Boolean(details.results && details.results.length > 0);
+		if (hasResults) {
+			details.results.forEach((result, index) => {
+				const isLast = index === details.results.length - 1;
+				lines.push(...renderAgentResult(result, isLast, expanded, theme));
+			});
+			continue;
+		}
+		const inflight = details.progress;
+		if (inflight && inflight.length > 0) {
+			inflight.forEach((prog, index) => {
+				const isLast = index === inflight.length - 1;
+				lines.push(...renderAgentProgress(prog, isLast, expanded, theme, spinnerFrame));
+			});
+		}
+	}
+	return lines;
+}
 subprocessToolRegistry.register<TaskToolDetails>("task", {
 	extractData: event => {
 		const details = event.result?.details;

package/src/task/types.ts CHANGED Viewed

@@ -236,6 +236,14 @@ export interface AgentProgress {
 		attempt: number;
 		errorMessage: string;
 	};
+	/**
+	 * Snapshot of the most recent `task` tool call's in-flight `TaskToolDetails`,
+	 * captured from `tool_execution_update`. Lets the parent UI surface live
+	 * nested-subagent progress while this agent is still inside its own `task`
+	 * call. Cleared when the call ends — finalized data lives in
+	 * `extractedToolData.task` after that.
+	 */
+	inflightTaskDetails?: TaskToolDetails;
 }
 /** Result from a single agent execution */

package/src/tools/jtd-to-json-schema.ts CHANGED Viewed

@@ -180,7 +180,11 @@ function normalizeMixedSchemaNode(schema: unknown): unknown {
 	}
 	if (isJTDSchema(schema)) {
-		return normalizeMixedSchemaNode(convertSchema(schema));
+		// `convertSchema` is itself fully recursive and emits pure JSON Schema, so
+		// re-walking the result with `normalizeMixedSchemaNode` is unnecessary and
+		// unsafe: it would treat user-named properties whose keys happen to be JTD
+		// keywords (e.g. `ref`, `elements`) as nested JTD forms (#1345).
+		return convertSchema(schema);
 	}
 	const normalized: Record<string, unknown> = {};