npm - @oh-my-pi/pi-coding-agent - Versions diffs - 12.15.1 → 12.16.0 - Mend

@oh-my-pi/pi-coding-agent 12.15.1 → 12.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +25 -0
package/package.json +7 -7
package/src/config/model-registry.ts +27 -10
package/src/config/settings-schema.ts +1 -1
package/src/internal-urls/docs-index.generated.ts +2 -1
package/src/main.ts +2 -1
package/src/mcp/tool-bridge.ts +27 -3
package/src/sdk.ts +3 -1
package/src/session/auth-storage.ts +32 -0
package/src/task/executor.ts +123 -101
package/src/tools/submit-result.ts +13 -2
package/src/web/search/providers/anthropic.ts +7 -2

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,31 @@
 ## [Unreleased]
+## [12.16.0] - 2026-02-21
+### Added
+- Added `peekApiKey` method to AuthStorage for non-blocking API key retrieval during model discovery without triggering OAuth token refresh
+- Exported `finalizeSubprocessOutput` function to handle subprocess output finalization with submit_result validation
+- Exported `SubmitResultItem` interface for type-safe submit_result tool data extraction
+- Added automatic reminders when subagent stops without calling submit_result tool (up to 3 reminders before aborting)
+- Added system warnings when subagent calls submit_result with null/undefined data or exits without calling submit_result after reminders
+### Changed
+- Changed model refresh behavior to support configurable strategies: uses 'online' mode when listing models and 'online-if-uncached' mode otherwise for improved performance
+- Changed default thinking level from 'off' to 'high' for improved reasoning and planning
+- Changed model discovery to use non-blocking API key peek instead of full key retrieval, improving performance by avoiding unnecessary OAuth token refreshes
+- Simplified submit_result termination logic to immediately abort on successful tool execution instead of waiting for message_end event
+- Updated submit_result tool to only terminate on successful execution (when isError is false), allowing retries on tool errors
+- Refactored subprocess output finalization logic into dedicated `finalizeSubprocessOutput` function for better testability and maintainability
+- Improved handling of missing submit_result calls by automatically aborting with exit code 1 after 3 reminder prompts
+### Fixed
+- Fixed submit_result retry behavior to properly handle tool execution errors and allow the subagent to retry before aborting
+- Fixed submit_result tool extraction to properly validate status field and only accept 'success' or 'aborted' results
 ## [12.15.1] - 2026-02-20
 ### Changed

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@oh-my-pi/pi-coding-agent",
-	"version": "12.15.1",
+	"version": "12.16.0",
 	"description": "Coding agent CLI with read, bash, edit, write tools and session management",
 	"type": "module",
 	"bin": {
@@ -85,12 +85,12 @@
 	},
 	"dependencies": {
 		"@mozilla/readability": "0.6.0",
-		"@oh-my-pi/omp-stats": "12.15.1",
-		"@oh-my-pi/pi-agent-core": "12.15.1",
-		"@oh-my-pi/pi-ai": "12.15.1",
-		"@oh-my-pi/pi-natives": "12.15.1",
-		"@oh-my-pi/pi-tui": "12.15.1",
-		"@oh-my-pi/pi-utils": "12.15.1",
+		"@oh-my-pi/omp-stats": "12.16.0",
+		"@oh-my-pi/pi-agent-core": "12.16.0",
+		"@oh-my-pi/pi-ai": "12.16.0",
+		"@oh-my-pi/pi-natives": "12.16.0",
+		"@oh-my-pi/pi-tui": "12.16.0",
+		"@oh-my-pi/pi-utils": "12.16.0",
 		"@sinclair/typebox": "^0.34.48",
 		"@xterm/headless": "^6.0.0",
 		"ajv": "^8.18.0",

package/src/config/model-registry.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import {
 	googleGeminiCliModelManagerOptions,
 	type Model,
 	type ModelManagerOptions,
+	type ModelRefreshStrategy,
 	normalizeDomain,
 	type OAuthCredentials,
 	type OAuthLoginCallbacks,
@@ -494,7 +495,7 @@ export class ModelRegistry {
 	/**
 	 * Reload models from disk (built-in + custom from models.json).
 	 */
-	async refresh(): Promise<void> {
+	async refresh(strategy: ModelRefreshStrategy = "online-if-uncached"): Promise<void> {
 		this.#modelsConfigFile.invalidate();
 		this.#customProviderApiKeys.clear();
 		this.#keylessProviders.clear();
@@ -502,7 +503,7 @@ export class ModelRegistry {
 		this.#modelOverrides.clear();
 		this.#configError = undefined;
 		this.#loadModels();
-		await this.#refreshRuntimeDiscoveries();
+		await this.#refreshRuntimeDiscoveries(strategy);
 	}
 	/**
@@ -681,7 +682,7 @@ export class ModelRegistry {
 		};
 	}
-	async #refreshRuntimeDiscoveries(): Promise<void> {
+	async #refreshRuntimeDiscoveries(strategy: ModelRefreshStrategy): Promise<void> {
 		const configuredDiscoveriesPromise =
 			this.#discoverableProviders.length === 0
 				? Promise.resolve<Model<Api>[]>([])
@@ -690,7 +691,7 @@ export class ModelRegistry {
 					);
 		const [configuredDiscovered, builtInDiscovered] = await Promise.all([
 			configuredDiscoveriesPromise,
-			this.#discoverBuiltInProviderModels(),
+			this.#discoverBuiltInProviderModels(strategy),
 		]);
 		const discovered = [...configuredDiscovered, ...builtInDiscovered];
 		if (discovered.length === 0) {
@@ -721,7 +722,7 @@ export class ModelRegistry {
 		}
 	}
-	async #discoverBuiltInProviderModels(): Promise<Model<Api>[]> {
+	async #discoverBuiltInProviderModels(strategy: ModelRefreshStrategy): Promise<Model<Api>[]> {
 		// Skip providers already handled by configured discovery (e.g. user-configured ollama with discovery.type)
 		const configuredDiscoveryProviders = new Set(this.#discoverableProviders.map(p => p.provider));
 		const managerOptions = (await this.#collectBuiltInModelManagerOptions()).filter(
@@ -730,7 +731,9 @@ export class ModelRegistry {
 		if (managerOptions.length === 0) {
 			return [];
 		}
-		const discoveries = await Promise.all(managerOptions.map(options => this.#discoverWithModelManager(options)));
+		const discoveries = await Promise.all(
+			managerOptions.map(options => this.#discoverWithModelManager(options, strategy)),
+		);
 		return discoveries.flat();
 	}
@@ -770,9 +773,13 @@ export class ModelRegistry {
 				},
 			},
 		];
+		// Use peekApiKey to avoid OAuth token refresh during discovery.
+		// The token is only needed if the dynamic fetch fires (cache miss),
+		// and failures there are handled gracefully.
+		const peekKey = (descriptor: { providerId: string }) => this.#peekApiKeyForProvider(descriptor.providerId);
 		const [standardProviderKeys, specialKeys] = await Promise.all([
-			Promise.all(PROVIDER_DESCRIPTORS.map(descriptor => this.getApiKeyForProvider(descriptor.providerId))),
-			Promise.all(specialProviderDescriptors.map(descriptor => this.getApiKeyForProvider(descriptor.providerId))),
+			Promise.all(PROVIDER_DESCRIPTORS.map(peekKey)),
+			Promise.all(specialProviderDescriptors.map(peekKey)),
 		]);
 		const options: ModelManagerOptions<Api>[] = [];
 		for (let i = 0; i < PROVIDER_DESCRIPTORS.length; i++) {
@@ -799,10 +806,13 @@ export class ModelRegistry {
 		return options;
 	}
-	async #discoverWithModelManager(options: ModelManagerOptions<Api>): Promise<Model<Api>[]> {
+	async #discoverWithModelManager(
+		options: ModelManagerOptions<Api>,
+		strategy: ModelRefreshStrategy,
+	): Promise<Model<Api>[]> {
 		try {
 			const manager = createModelManager(options);
-			const result = await manager.refresh();
+			const result = await manager.refresh(strategy);
 			return result.models.map(model =>
 				model.provider === options.providerId ? model : { ...model, provider: options.providerId },
 			);
@@ -970,6 +980,13 @@ export class ModelRegistry {
 		return this.authStorage.getApiKey(provider, sessionId, { baseUrl });
 	}
+	async #peekApiKeyForProvider(provider: string): Promise<string | undefined> {
+		if (this.#keylessProviders.has(provider)) {
+			return kNoAuth;
+		}
+		return this.authStorage.peekApiKey(provider);
+	}
 	/**
 	 * Check if a model is using OAuth credentials (subscription).
 	 */

package/src/config/settings-schema.ts CHANGED Viewed

@@ -179,7 +179,7 @@ export const SETTINGS_SCHEMA = {
 	defaultThinkingLevel: {
 		type: "enum",
 		values: ["off", "minimal", "low", "medium", "high", "xhigh"] as const,
-		default: "off",
+		default: "high",
 		ui: {
 			tab: "agent",
 			label: "Thinking level",

package/src/internal-urls/docs-index.generated.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 // Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
-export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["bash-tool-runtime.md","blob-artifact-architecture.md","compaction.md","config-usage.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","skills.md","slash-command-internals.md","task-agent-discovery.md","theme.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md","tui.md"];
+export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["bash-tool-runtime.md","blob-artifact-architecture.md","compaction.md","config-usage.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","skills.md","slash-command-internals.md","task-agent-discovery.md","theme.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md","tui.md"];
 export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
 	"bash-tool-runtime.md": "# Bash tool runtime\n\nThis document describes the **`bash` tool** runtime path used by agent tool calls, from command normalization to execution, truncation/artifacts, and rendering.\n\nIt also calls out where behavior diverges in interactive TUI, print mode, RPC mode, and user-initiated bang (`!`) shell execution.\n\n## Scope and runtime surfaces\n\nThere are two different bash execution surfaces in coding-agent:\n\n1. **Tool-call surface** (`toolName: \"bash\"`): used when the model calls the bash tool.\n   - Entry point: `BashTool.execute()`.\n2. **User bang-command surface** (`!cmd` from interactive input or RPC `bash` command): session-level helper path.\n   - Entry point: `AgentSession.executeBash()`.\n\nBoth eventually use `executeBash()` in `src/exec/bash-executor.ts` for non-PTY execution, but only the tool-call path runs normalization/interception and tool renderer logic.\n\n## End-to-end tool-call pipeline\n\n## 1) Input normalization and parameter merge\n\n`BashTool.execute()` first normalizes the raw command via `normalizeBashCommand()`:\n\n- extracts trailing `| head -n N`, `| head -N`, `| tail -n N`, `| tail -N` into structured limits,\n- trims trailing/leading whitespace,\n- keeps internal whitespace intact.\n\nThen it merges extracted limits with explicit tool args:\n\n- explicit `head`/`tail` args override extracted values,\n- extracted values are fallback only.\n\n### Caveat\n\n`bash-normalize.ts` comments mention stripping `2>&1`, but current implementation does not remove it. Runtime behavior is still correct (stdout/stderr are already merged), but the normalization behavior is narrower than comments suggest.\n\n## 2) Optional interception (blocked-command path)\n\nIf `bashInterceptor.enabled` is true, `BashTool` loads rules from settings and runs `checkBashInterception()` against the normalized command.\n\nInterception behavior:\n\n- command is blocked **only** when:\n  - regex rule matches, and\n  - the suggested tool is present in `ctx.toolNames`.\n- invalid regex rules are silently skipped.\n- on block, `BashTool` throws `ToolError` with message:\n  - `Blocked: ...`\n  - original command included.\n\nDefault rule patterns (defined in code) target common misuses:\n\n- file readers (`cat`, `head`, `tail`, ...)\n- search tools (`grep`, `rg`, ...)\n- file finders (`find`, `fd`, ...)\n- in-place editors (`sed -i`, `perl -i`, `awk -i inplace`)\n- shell redirection writes (`echo ... > file`, heredoc redirection)\n\n### Caveat\n\n`InterceptionResult` includes `suggestedTool`, but `BashTool` currently surfaces only the message text (no structured suggested-tool field in `details`).\n\n## 3) CWD validation and timeout clamping\n\n`cwd` is resolved relative to session cwd (`resolveToCwd`), then validated via `stat`:\n\n- missing path -> `ToolError(\"Working directory does not exist: ...\")`\n- non-directory -> `ToolError(\"Working directory is not a directory: ...\")`\n\nTimeout is clamped to `[1, 3600]` seconds and converted to milliseconds.\n\n## 4) Artifact allocation + environment injection\n\nBefore execution, the tool allocates an artifact path/id (best-effort) and injects `$ARTIFACTS` env when session artifacts dir is available.\n\n- artifact allocation failure is non-fatal (execution continues without artifact spill file),\n- artifact id/path are passed into execution path for full-output persistence on truncation.\n\n## 5) PTY vs non-PTY execution selection\n\n`BashTool` chooses PTY execution only when all are true:\n\n- `bash.virtualTerminal === \"on\"`\n- `PI_NO_PTY !== \"1\"`\n- tool context has UI (`ctx.hasUI === true` and `ctx.ui` set)\n\nOtherwise it uses non-interactive `executeBash()`.\n\nThat means print mode and non-UI RPC/tool contexts always use non-PTY.\n\n## Non-interactive execution engine (`executeBash`)\n\n## Shell session reuse model\n\n`executeBash()` caches native `Shell` instances in a process-global map keyed by:\n\n- shell path,\n- configured command prefix,\n- snapshot path,\n- serialized shell env,\n- optional agent session key.\n\nFor session-level executions, `AgentSession.executeBash()` passes `sessionKey: this.sessionId`, isolating reuse per session.\n\nTool-call path does **not** pass `sessionKey`, so reuse scope is based on shell config/snapshot/env.\n\n## Shell config and snapshot behavior\n\nAt each call, executor loads settings shell config (`shell`, `env`, optional `prefix`).\n\nIf selected shell includes `bash`, it attempts `getOrCreateSnapshot()`:\n\n- snapshot captures aliases/functions/options from user rc,\n- snapshot creation is best-effort,\n- failure falls back to no snapshot.\n\nIf `prefix` is configured, command becomes:\n\n```text\n<prefix> <command>\n```\n\n## Streaming and cancellation\n\n`Shell.run()` streams chunks to callback. Executor pipes each chunk into `OutputSink` and optional `onChunk` callback.\n\nCancellation:\n\n- aborted signal triggers `shellSession.abort(...)`,\n- timeout from native result is mapped to `cancelled: true` + annotation text,\n- explicit cancellation similarly returns `cancelled: true` + annotation.\n\nNo exception is thrown inside executor for timeout/cancel; it returns structured `BashResult` and lets caller map error semantics.\n\n## Interactive PTY path (`runInteractiveBashPty`)\n\nWhen PTY is enabled, tool runs `runInteractiveBashPty()` which opens an overlay console component and drives a native `PtySession`.\n\nBehavior highlights:\n\n- xterm-headless virtual terminal renders viewport in overlay,\n- keyboard input is normalized (including Kitty sequences and application cursor mode handling),\n- `esc` while running kills the PTY session,\n- terminal resize propagates to PTY (`session.resize(cols, rows)`).\n\nEnvironment hardening defaults are injected for unattended runs:\n\n- pagers disabled (`PAGER=cat`, `GIT_PAGER=cat`, etc.),\n- editor prompts disabled (`GIT_EDITOR=true`, `EDITOR=true`, ...),\n- terminal/auth prompts reduced (`GIT_TERMINAL_PROMPT=0`, `SSH_ASKPASS=/usr/bin/false`, `CI=1`),\n- package-manager/tool automation flags for non-interactive behavior.\n\nPTY output is normalized (`CRLF`/`CR` to `LF`, `sanitizeText`) and written into `OutputSink`, including artifact spill support.\n\nOn PTY startup/runtime error, sink receives `PTY error: ...` line and command finalizes with undefined exit code.\n\n## Output handling: streaming, truncation, artifact spill\n\nBoth PTY and non-PTY paths use `OutputSink`.\n\n## OutputSink semantics\n\n- keeps an in-memory UTF-8-safe tail buffer (`DEFAULT_MAX_BYTES`, currently 50KB),\n- tracks total bytes/lines seen,\n- if artifact path exists and output overflows (or file already active), writes full stream to artifact file,\n- when memory threshold overflows, trims in-memory buffer to tail (UTF-8 boundary safe),\n- marks `truncated` when overflow/file spill occurs.\n\n`dump()` returns:\n\n- `output` (possibly annotated prefix),\n- `truncated`,\n- `totalLines/totalBytes`,\n- `outputLines/outputBytes`,\n- `artifactId` if artifact file was active.\n\n### Long-output caveat\n\nRuntime truncation is byte-threshold based in `OutputSink` (50KB default). It does not enforce a hard 2000-line cap in this code path.\n\n## Live tool updates\n\nFor non-PTY execution, `BashTool` uses a separate `TailBuffer` for partial updates and emits `onUpdate` snapshots while command is running.\n\nFor PTY execution, live rendering is handled by custom UI overlay, not by `onUpdate` text chunks.\n\n## Result shaping, metadata, and error mapping\n\nAfter execution:\n\n1. `cancelled` handling:\n   - if abort signal is aborted -> throw `ToolAbortError` (abort semantics),\n   - else -> throw `ToolError` (treated as tool failure).\n2. PTY `timedOut` -> throw `ToolError`.\n3. apply head/tail filters to final output text (`applyHeadTail`, head then tail).\n4. empty output becomes `(no output)`.\n5. attach truncation metadata via `toolResult(...).truncationFromSummary(result, { direction: \"tail\" })`.\n6. exit-code mapping:\n   - missing exit code -> `ToolError(\"... missing exit status\")`\n   - non-zero exit -> `ToolError(\"... Command exited with code N\")`\n   - zero exit -> success result.\n\nSuccess payload structure:\n\n- `content`: text output,\n- `details.meta.truncation` when truncated, including:\n  - `direction`, `truncatedBy`, total/output line+byte counts,\n  - `shownRange`,\n  - `artifactId` when available.\n\nBecause built-in tools are wrapped with `wrapToolWithMetaNotice()`, truncation notice text is appended to final text content automatically (for example: `Full: artifact://<id>`).\n\n## Rendering paths\n\n## Tool-call renderer (`bashToolRenderer`)\n\n`bashToolRenderer` is used for tool-call messages (`toolCall` / `toolResult`):\n\n- collapsed mode shows visual-line-truncated preview,\n- expanded mode shows all currently available output text,\n- warning line includes truncation reason and `artifact://<id>` when truncated,\n- timeout value (from args) is shown in footer metadata line.\n\n### Caveat: full artifact expansion\n\n`BashRenderContext` has `isFullOutput`, but current renderer context builder does not set it for bash tool results. Expanded view still uses the text already in result content (tail/truncated output) unless another caller provides full artifact content.\n\n## User bang-command component (`BashExecutionComponent`)\n\n`BashExecutionComponent` is for user `!` commands in interactive mode (not model tool calls):\n\n- streams chunks live,\n- collapsed preview keeps last 20 logical lines,\n- line clamp at 4000 chars per line,\n- shows truncation + artifact warnings when metadata is present,\n- marks cancelled/error/exit state separately.\n\nThis component is wired by `CommandController.handleBashCommand()` and fed from `AgentSession.executeBash()`.\n\n## Mode-specific behavior differences\n\n| Surface                        | Entry path                                            | PTY eligible                                                         | Live output UX                                                           | Error surfacing                                  |\n| ------------------------------ | ----------------------------------------------------- | -------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------ |\n| Interactive tool call          | `BashTool.execute`                                    | Yes, when `bash.virtualTerminal=on` and UI exists and `PI_NO_PTY!=1` | PTY overlay (interactive) or streamed tail updates                       | Tool errors become `toolResult.isError`          |\n| Print mode tool call           | `BashTool.execute`                                    | No (no UI context)                                                   | No TUI overlay; output appears in event stream/final assistant text flow | Same tool error mapping                          |\n| RPC tool call (agent tooling)  | `BashTool.execute`                                    | Usually no UI -> non-PTY                                             | Structured tool events/results                                           | Same tool error mapping                          |\n| Interactive bang command (`!`) | `AgentSession.executeBash` + `BashExecutionComponent` | No (uses executor directly)                                          | Dedicated bash execution component                                       | Controller catches exceptions and shows UI error |\n| RPC `bash` command             | `rpc-mode` -> `session.executeBash`                   | No                                                                   | Returns `BashResult` directly                                            | Consumer handles returned fields                 |\n\n## Operational caveats\n\n- Interceptor only blocks commands when suggested tool is currently available in context.\n- If artifact allocation fails, truncation still occurs but no `artifact://` back-reference is available.\n- Shell session cache has no explicit eviction in this module; lifetime is process-scoped.\n- PTY and non-PTY timeout surfaces differ:\n  - PTY exposes explicit `timedOut` result field,\n  - non-PTY maps timeout into `cancelled + annotation` summary.\n\n## Implementation files\n\n- [`src/tools/bash.ts`](../packages/coding-agent/src/tools/bash.ts) — tool entrypoint, normalization/interception, PTY/non-PTY selection, result/error mapping, bash tool renderer.\n- [`src/tools/bash-normalize.ts`](../packages/coding-agent/src/tools/bash-normalize.ts) — command normalization and post-run head/tail filtering.\n- [`src/tools/bash-interceptor.ts`](../packages/coding-agent/src/tools/bash-interceptor.ts) — interceptor rule matching and blocked-command messages.\n- [`src/exec/bash-executor.ts`](../packages/coding-agent/src/exec/bash-executor.ts) — non-PTY executor, shell session reuse, cancellation wiring, output sink integration.\n- [`src/tools/bash-interactive.ts`](../packages/coding-agent/src/tools/bash-interactive.ts) — PTY runtime, overlay UI, input normalization, non-interactive env defaults.\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts) — `OutputSink` truncation/artifact spill and summary metadata.\n- [`src/tools/output-utils.ts`](../packages/coding-agent/src/tools/output-utils.ts) — artifact allocation helpers and streaming tail buffer.\n- [`src/tools/output-meta.ts`](../packages/coding-agent/src/tools/output-meta.ts) — truncation metadata shape + notice injection wrapper.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level `executeBash`, message recording, abort lifecycle.\n- [`src/modes/components/bash-execution.ts`](../packages/coding-agent/src/modes/components/bash-execution.ts) — interactive `!` command execution component.\n- [`src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts) — wiring for interactive `!` command UI stream/update completion.\n- [`src/modes/rpc/rpc-mode.ts`](../packages/coding-agent/src/modes/rpc/rpc-mode.ts) — RPC `bash` and `abort_bash` command surface.\n- [`src/internal-urls/artifact-protocol.ts`](../packages/coding-agent/src/internal-urls/artifact-protocol.ts) — `artifact://<id>` resolution.\n",
@@ -18,6 +18,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
 	"mcp-protocol-transports.md": "# MCP Protocol and Transport Internals\n\nThis document describes how coding-agent implements MCP JSON-RPC messaging and how protocol concerns are split from transport concerns.\n\n## Scope\n\nCovers:\n\n- JSON-RPC request/response and notification flow\n- Request correlation and lifecycle for stdio and HTTP/SSE transports\n- Timeout and cancellation behavior\n- Error propagation and malformed payload handling\n- Transport selection boundaries (`stdio` vs `http`/`sse`)\n- Which reconnect/retry responsibilities are transport-level vs manager-level\n\nDoes not cover extension authoring UX or command UI.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/transports/stdio.ts`](../packages/coding-agent/src/mcp/transports/stdio.ts)\n- [`src/mcp/transports/http.ts`](../packages/coding-agent/src/mcp/transports/http.ts)\n- [`src/mcp/transports/index.ts`](../packages/coding-agent/src/mcp/transports/index.ts)\n- [`src/mcp/json-rpc.ts`](../packages/coding-agent/src/mcp/json-rpc.ts)\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n\n## Layer boundaries\n\n### Protocol layer (JSON-RPC + MCP methods)\n\n- Message shapes are defined in `types.ts` (`JsonRpcRequest`, `JsonRpcNotification`, `JsonRpcResponse`, `JsonRpcMessage`).\n- MCP client logic (`client.ts`) decides method order and session handshake:\n  1. `initialize` request\n  2. `notifications/initialized` notification\n  3. method calls like `tools/list`, `tools/call`\n\n### Transport layer (`MCPTransport`)\n\n`MCPTransport` abstracts delivery and lifecycle:\n\n- `request(method, params, options?) -> Promise<T>`\n- `notify(method, params?) -> Promise<void>`\n- `close()`\n- `connected`\n- optional callbacks: `onClose`, `onError`, `onNotification`\n\nTransport implementations own framing and I/O details:\n\n- `StdioTransport`: newline-delimited JSON over subprocess stdio\n- `HttpTransport`: JSON-RPC over HTTP POST, with optional SSE responses/listening\n\n### Important current caveat\n\nTransport callbacks (`onClose`, `onError`, `onNotification`) are implemented, but current `MCPClient`/`MCPManager` flows do not wire reconnection logic to these callbacks. Notifications are only consumed if caller registers handlers.\n\n## Transport selection\n\n`client.ts:createTransport()` chooses transport from config:\n\n- `type` omitted or `\"stdio\"` -> `createStdioTransport`\n- `\"http\"` or `\"sse\"` -> `createHttpTransport`\n\n`\"sse\"` is treated as an HTTP transport variant (same class), not a separate transport implementation.\n\n## JSON-RPC message flow and correlation\n\n## Request IDs\n\nEach transport generates per-request IDs (`Math.random` + timestamp string). IDs are transport-local correlation tokens.\n\n## Stdio correlation path\n\n- Outbound request is serialized as one JSON object + `\\n`.\n- `#pendingRequests: Map<id, {resolve,reject}>` stores in-flight requests.\n- Read loop parses JSONL from stdout and calls `#handleMessage`.\n- If inbound message has matching `id`, request resolves/rejects.\n- If inbound message has `method` and no `id`, treated as notification and sent to `onNotification`.\n\nUnknown IDs are ignored (no rejection, no error callback).\n\n## HTTP correlation path\n\n- Outbound request is HTTP `POST` with JSON body and generated `id`.\n- Non-SSE response path: parse one JSON-RPC response and return `result`/throw on `error`.\n- SSE response path (`Content-Type: text/event-stream`): stream events, return first message whose `id` matches expected request ID and has `result` or `error`.\n- SSE messages with `method` and no `id` are treated as notifications.\n\nIf SSE stream ends before matching response, request fails with `No response received for request ID ...`.\n\n## Notifications\n\nClient emits JSON-RPC notifications via `transport.notify(...)`.\n\n- Stdio: writes notification frame to stdin (`jsonrpc`, `method`, optional `params`) plus newline.\n- HTTP: sends POST body without `id`; success accepts `2xx` or `202 Accepted`.\n\nServer-initiated notifications are only surfaced through transport `onNotification`; there is no default global subscriber in manager/client.\n\n## Stdio transport internals\n\n## Lifecycle and state transitions\n\n- Initial: `connected=false`, `process=null`, pending map empty\n- `connect()`:\n  - spawn subprocess with configured command/args/env/cwd\n  - mark connected\n  - start stdout read loop (`readJsonl`)\n  - start stderr loop (read/discard; currently silent)\n- `close()`:\n  - mark disconnected\n  - reject all pending requests (`Transport closed`)\n  - kill subprocess\n  - await read loop shutdown\n  - emit `onClose`\n\nIf read loop exits unexpectedly, `finally` triggers `#handleClose()` which performs the same pending-request rejection and close callback.\n\n## Timeout and cancellation\n\nPer request:\n\n- timeout defaults to `config.timeout ?? 30000`\n- optional `AbortSignal` from caller\n- abort and timeout both reject the pending promise and clean map entry\n\nCancellation is local only: transport does not send protocol-level cancellation notification to the server.\n\n## Malformed payload handling\n\nIn read loop:\n\n- each parsed JSONL line is passed to `#handleMessage` in `try/catch`\n- malformed/invalid message handling exceptions are dropped (`Skip malformed lines` comment)\n- loop continues, so one bad message does not kill the connection\n\nIf the underlying stream parser throws, `onError` is invoked (when still connected), then connection closes.\n\n## Disconnect/failure behavior\n\nWhen process exits or stream closes:\n\n- all in-flight requests are rejected with `Transport closed`\n- no automatic restart or reconnect\n- higher layers must reconnect by creating a new transport\n\n## Backpressure/streaming notes\n\n- Outbound writes use `stdin.write()` + `flush()` without awaiting drain semantics.\n- There is no explicit queue or high-watermark management in transport.\n- Inbound processing is stream-driven (`for await` over `readJsonl`), one parsed message at a time.\n\n## HTTP/SSE transport internals\n\n## Lifecycle and connection semantics\n\nHTTP transport has logical connection state, but request path is stateless per HTTP call:\n\n- `connect()` sets `connected=true` (no socket/session handshake)\n- optional server session tracking via `Mcp-Session-Id` header\n- `close()` optionally sends `DELETE` with `Mcp-Session-Id`, aborts SSE listener, emits `onClose`\n\nSo `connected` means \"transport usable\", not \"persistent stream established\".\n\n## Session header behavior\n\n- On POST response, if `Mcp-Session-Id` header is present, transport stores it.\n- Subsequent requests/notifications include `Mcp-Session-Id`.\n- `close()` tries to terminate server session with HTTP DELETE; termination failures are ignored.\n\n## Timeout and cancellation\n\nFor both `request()` and `notify()`:\n\n- timeout uses `AbortController` (`config.timeout ?? 30000`)\n- external signal, if provided, is merged via `AbortSignal.any([...])`\n- AbortError handling distinguishes caller abort vs timeout\n\nErrors thrown:\n\n- timeout: `Request timeout after ...ms` (or `SSE response timeout ...`, `Notify timeout ...`)\n- caller abort: original AbortError is rethrown when external signal is already aborted\n\n## HTTP error propagation\n\nOn non-OK response:\n\n- response text is included in thrown error (`HTTP <status>: <text>`)\n- if present, auth hints from `WWW-Authenticate` and `Mcp-Auth-Server` are appended\n\nOn JSON-RPC error object:\n\n- throws `MCP error <code>: <message>`\n\nMalformed JSON body (`response.json()` failure) propagates as parse exception.\n\n## SSE behavior and modes\n\nTwo SSE paths exist:\n\n1. **Per-request SSE response** (`#parseSSEResponse`)\n   - used when POST response content type is `text/event-stream`\n   - consumes stream until matching response id found\n   - can process interleaved notifications during same stream\n\n2. **Background SSE listener** (`startSSEListener()`)\n   - optional GET listener for server-initiated notifications\n   - currently not automatically started by MCP manager/client\n   - if GET returns `405`, listener silently disables itself (server does not support this mode)\n\n## Malformed payload and disconnect handling\n\nSSE JSON parsing errors bubble out of `readSseJson` and reject request/listener.\n\n- Request SSE parse errors reject the active request.\n- Background listener errors trigger `onError` (except AbortError).\n- No auto-reconnect for background listener.\n\n## `json-rpc.ts` utility vs transport abstraction\n\n`src/mcp/json-rpc.ts` provides `callMCP()` and `parseSSE()` helpers for direct HTTP MCP calls (used by Exa integration), not the `MCPTransport` abstraction used by `MCPClient`/`MCPManager`.\n\nNotable differences from `HttpTransport`:\n\n- parses entire response text first, then extracts first `data: ` line (`parseSSE`), with JSON fallback\n- no request timeout management, no abort API, no session-id handling, no transport lifecycle\n- returns raw JSON-RPC envelope object\n\nThis path is lightweight but less robust than full transport implementation.\n\n## Retry/reconnect responsibilities\n\n## Transport-level\n\nCurrent transport implementations do **not**:\n\n- retry failed requests\n- reconnect after stdio process exit\n- reconnect SSE listeners\n- resend in-flight requests after disconnect\n\nThey fail fast and propagate errors.\n\n## Manager/client-level\n\n`MCPManager` handles discovery/initial connection orchestration and can reconnect only by running connect flows again (`connectToServer`/`discoverAndConnect` paths). It does not auto-heal an already connected transport on runtime failure callbacks.\n\n`MCPManager` does have startup fallback behavior for slow servers (deferred tools from cache), but that is tool availability fallback, not transport retry.\n\n## Failure scenarios summary\n\n- **Malformed stdio message line**: dropped; stream continues.\n- **Stdio stream/process ends**: transport closes; pending requests rejected as `Transport closed`.\n- **HTTP non-2xx**: request/notify throws HTTP error.\n- **Invalid JSON response**: parse exception propagated.\n- **SSE ends without matching id**: request fails with `No response received for request ID ...`.\n- **Timeout**: transport-specific timeout error.\n- **Caller abort**: AbortError/reason propagated from caller signal.\n\n## Practical boundary rule\n\nIf the concern is message shape, id correlation, or MCP method ordering, it belongs to protocol/client logic.\n\nIf the concern is framing (JSONL vs HTTP/SSE), stream parsing, fetch/spawn lifecycle, timeout clocks, or connection teardown, it belongs to transport implementation.",
 	"mcp-runtime-lifecycle.md": "# MCP runtime lifecycle\n\nThis document describes how MCP servers are discovered, connected, exposed as tools, refreshed, and torn down in the coding-agent runtime.\n\n## Lifecycle at a glance\n\n1. **SDK startup** calls `discoverAndLoadMCPTools()` (unless MCP is disabled).\n2. **Discovery** (`loadAllMCPConfigs`) resolves MCP server configs from capability sources, filters disabled/project/Exa entries, and preserves source metadata.\n3. **Manager connect phase** (`MCPManager.connectServers`) starts per-server connect + `tools/list` in parallel.\n4. **Fast startup gate** waits up to 250ms, then may return:\n   - fully loaded `MCPTool`s,\n   - failures per server,\n   - or cached `DeferredMCPTool`s for still-pending servers.\n5. **SDK wiring** merges MCP tools into runtime tool registry for the session.\n6. **Live session** can refresh MCP tools via `/mcp` flows (`disconnectAll` + rediscover + `session.refreshMCPTools`).\n7. **Teardown** happens when callers invoke `disconnectServer`/`disconnectAll`; manager also clears MCP tool registrations for disconnected servers.\n\n## Discovery and load phase\n\n### Entry path from SDK\n\n`createAgentSession()` in `src/sdk.ts` performs MCP startup when `enableMCP` is true (default):\n\n- calls `discoverAndLoadMCPTools(cwd, { ... })`,\n- passes `authStorage`, cache storage, and `mcp.enableProjectConfig` setting,\n- always sets `filterExa: true`,\n- logs per-server load/connect errors,\n- stores returned manager in `toolSession.mcpManager` and session result.\n\nIf `enableMCP` is false, MCP discovery is skipped entirely.\n\n### Config discovery and filtering\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical MCP server items through capability discovery, then converts to legacy `MCPServerConfig`.\n\nFiltering behavior:\n\n- `enableProjectConfig: false` removes project-level entries (`_source.level === \"project\"`).\n- `enabled: false` servers are skipped before connect attempts.\n- Exa servers are filtered out by default and API keys are extracted for native Exa tool integration.\n\nResult includes both `configs` and `sources` (metadata used later for provider labeling).\n\n### Discovery-level failure behavior\n\n`discoverAndLoadMCPTools()` distinguishes two failure classes:\n\n- **Discovery hard failure** (exception from `manager.discoverAndConnect`, typically from config discovery): returns an empty tool set and one synthetic error `{ path: \".mcp.json\", error }`.\n- **Per-server runtime/connect failure**: manager returns partial success with `errors` map; other servers continue.\n\nSo startup does not fail the whole agent session when individual MCP servers fail.\n\n## Manager state model\n\n`MCPManager` tracks runtime lifecycle with separate registries:\n\n- `#connections: Map<string, MCPServerConnection>` — fully connected servers.\n- `#pendingConnections: Map<string, Promise<MCPServerConnection>>` — handshake in progress.\n- `#pendingToolLoads: Map<string, Promise<{ connection, serverTools }>>` — connected but tools still loading.\n- `#tools: CustomTool[]` — current MCP tool view exposed to callers.\n- `#sources: Map<string, SourceMeta>` — provider/source metadata even before connect completes.\n\n`getConnectionStatus(name)` derives status from these maps:\n\n- `connected` if in `#connections`,\n- `connecting` if pending connect or pending tool load,\n- `disconnected` otherwise.\n\n## Connection establishment and startup timing\n\n## Per-server connect pipeline\n\nFor each discovered server in `connectServers()`:\n\n1. store/update source metadata,\n2. skip if already connected/pending,\n3. validate transport fields (`validateServerConfig`),\n4. resolve auth/shell substitutions (`#resolveAuthConfig`),\n5. call `connectToServer(name, resolvedConfig)`,\n6. call `listTools(connection)`,\n7. cache tool definitions (`MCPToolCache.set`) best-effort.\n\n`connectToServer()` behavior (`src/mcp/client.ts`):\n\n- creates stdio or HTTP/SSE transport,\n- performs MCP `initialize` + `notifications/initialized`,\n- uses timeout (`config.timeout` or 30s default),\n- closes transport on init failure.\n\n### Fast startup gate + deferred fallback\n\n`connectServers()` waits on a race between:\n\n- all connect/tool-load tasks settled, and\n- `STARTUP_TIMEOUT_MS = 250`.\n\nAfter 250ms:\n\n- fulfilled tasks become live `MCPTool`s,\n- rejected tasks produce per-server errors,\n- still-pending tasks:\n  - use cached tool definitions if available (`MCPToolCache.get`) to create `DeferredMCPTool`s,\n  - otherwise block until those pending tasks settle.\n\nThis is a hybrid startup model: fast return when cache is available, correctness wait when cache is not.\n\n### Background completion behavior\n\nEach pending `toolsPromise` also has a background continuation that eventually:\n\n- replaces that server’s tool slice in manager state via `#replaceServerTools`,\n- writes cache,\n- logs late failures only after startup (`allowBackgroundLogging`).\n\n## Tool exposure and live-session availability\n\n### Startup registration\n\n`discoverAndLoadMCPTools()` converts manager tools into `LoadedCustomTool[]` and decorates paths (`mcp:<server> via <providerName>` when known).\n\n`createAgentSession()` then pushes these tools into `customTools`, which are wrapped and added to the runtime tool registry with names like `mcp_<server>_<tool>`.\n\n### Tool calls\n\n- `MCPTool` calls tools through an already connected `MCPServerConnection`.\n- `DeferredMCPTool` waits for `waitForConnection(server)` before calling; this allows cached tools to exist before connection is ready.\n\nBoth return structured tool output and convert transport/tool errors into `MCP error: ...` tool content (abort remains abort).\n\n## Refresh/reload paths (startup vs live reload)\n\n### Initial startup path\n\n- one-time discovery/load in `sdk.ts`,\n- tools are registered in initial session tool registry.\n\n### Interactive reload path\n\n`/mcp reload` path (`src/modes/controllers/mcp-command-controller.ts`) does:\n\n1. `mcpManager.disconnectAll()`,\n2. `mcpManager.discoverAndConnect()`,\n3. `session.refreshMCPTools(mcpManager.getTools())`.\n\n`session.refreshMCPTools()` (`src/session/agent-session.ts`) removes all `mcp_` tools, re-wraps latest MCP tools, and re-activates tool set so MCP changes apply without restarting session.\n\nThere is also a follow-up path for late connections: after waiting for a specific server, if status becomes `connected`, it re-runs `session.refreshMCPTools(...)` so newly available tools are rebound in-session.\n\n## Health, reconnect, and partial failure behavior\n\nCurrent runtime behavior is intentionally minimal:\n\n- **No autonomous health monitor** in manager/client.\n- **No automatic reconnect loop** when a transport drops.\n- Manager does not subscribe to transport `onClose`/`onError`; status is registry-driven.\n- Reconnect is explicit: reload flow or direct `connectServers()` invocation.\n\nOperationally:\n\n- one server failing does not remove tools from healthy servers,\n- connect/list failures are isolated per server,\n- tool cache and background updates are best-effort (warnings/errors logged, no hard stop).\n\n## Teardown semantics\n\n### Server-level teardown\n\n`disconnectServer(name)`:\n\n- removes pending entries/source metadata,\n- closes transport if connected,\n- removes that server’s `mcp_` tools from manager state.\n\n### Global teardown\n\n`disconnectAll()`:\n\n- closes all active transports with `Promise.allSettled`,\n- clears pending maps, sources, connections, and manager tool list.\n\nIn current wiring, explicit teardown is used in MCP command flows (for reload/remove/disable). There is no separate automatic manager disposal hook in the startup path itself; callers are responsible for invoking manager disconnect methods when they need deterministic MCP shutdown.\n\n## Failure modes and guarantees\n\n| Scenario | Behavior | Hard fail vs best-effort |\n| --- | --- | --- |\n| Discovery throws (capability/config load path) | Loader returns empty tools + synthetic `.mcp.json` error | Best-effort session startup |\n| Invalid server config | Server skipped with validation error entry | Best-effort per server |\n| Connect timeout/init failure | Server error recorded; others continue | Best-effort per server |\n| `tools/list` still pending at startup with cache hit | Deferred tools returned immediately | Best-effort fast startup |\n| `tools/list` still pending at startup without cache | Startup waits for pending to settle | Hard wait for correctness |\n| Late background tool-load failure | Logged after startup gate | Best-effort logging |\n| Runtime dropped transport | No automatic reconnect; future calls fail until reconnect/reload | Best-effort recovery via manual action |\n\n## Public API surface\n\n`src/mcp/index.ts` re-exports loader/manager/client APIs for external callers. `src/sdk.ts` exposes `discoverMCPServers()` as a convenience wrapper returning the same loader result shape.\n\n## Implementation files\n\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts) — loader facade, discovery error normalization, `LoadedCustomTool` conversion.\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts) — lifecycle state registries, parallel connect/list flow, refresh/disconnect.\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts) — transport setup, initialize handshake, list/call/disconnect.\n- [`src/mcp/index.ts`](../packages/coding-agent/src/mcp/index.ts) — MCP module API exports.\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts) — startup wiring into session/tool registry.\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts) — config discovery/filtering/validation used by manager.\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts) — `MCPTool` and `DeferredMCPTool` runtime behavior.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — `refreshMCPTools` live rebinding.\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts) — interactive reload/reconnect flows.\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts) — subagent MCP proxying via parent manager connections.\n",
 	"mcp-server-tool-authoring.md": "# MCP server and tool authoring\n\nThis document explains how MCP server definitions become callable `mcp_*` tools in coding-agent, and what operators should expect when configs are invalid, duplicated, disabled, or auth-gated.\n\n## Architecture at a glance\n\n```text\nConfig sources (.omp/.claude/.cursor/.vscode/mcp.json, mcp.json, etc.)\n  -> discovery providers normalize to canonical MCPServer\n  -> capability loader dedupes by server name (higher provider priority wins)\n  -> loadAllMCPConfigs converts to MCPServerConfig + skips enabled:false\n  -> MCPManager connects/listTools (with auth/header/env resolution)\n  -> MCPTool/DeferredMCPTool bridge exposes tools as mcp_<server>_<tool>\n  -> AgentSession.refreshMCPTools replaces live MCP tools immediately\n```\n\n## 1) Server config model and validation\n\n`src/mcp/types.ts` defines the authoring shape used by MCP config writers and runtime:\n\n- `stdio` (default when `type` missing): requires `command`, optional `args`, `env`, `cwd`\n- `http`: requires `url`, optional `headers`\n- `sse`: requires `url`, optional `headers` (kept for compatibility)\n- shared fields: `enabled`, `timeout`, `auth`\n\n`validateServerConfig()` (`src/mcp/config.ts`) enforces transport basics:\n\n- rejects configs that set both `command` and `url`\n- requires `command` for stdio\n- requires `url` for http/sse\n- rejects unknown `type`\n\n`config-writer.ts` applies this validation for add/update operations and also validates server names:\n\n- non-empty\n- max 100 chars\n- only `[a-zA-Z0-9_.-]`\n\n### Transport pitfalls\n\n- `type` omitted means stdio. If you intended HTTP/SSE but omitted `type`, `command` becomes mandatory.\n- `sse` is still accepted but treated as HTTP transport internally (`createHttpTransport`).\n- Validation is structural, not reachability: a syntactically valid URL can still fail at connect time.\n\n## 2) Discovery, normalization, and precedence\n\n### Capability-based discovery\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical `MCPServer` items via `loadCapability(mcpCapability.id)`.\n\nThe capability layer (`src/capability/index.ts`) then:\n\n1. loads providers in priority order\n2. dedupes by `server.name` (first win = highest priority)\n3. validates deduped items\n\nResult: duplicate server names across sources are not merged. One definition wins; lower-priority duplicates are shadowed.\n\n### `.mcp.json` and related files\n\nThe dedicated fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json` (low priority).\n\nIn practice MCP servers also come from higher-priority providers (for example native `.omp/...` and tool-specific config dirs). Authoring guidance:\n\n- Prefer `.omp/mcp.json` (project) or `~/.omp/mcp.json` (user) for explicit control.\n- Use root `mcp.json` / `.mcp.json` when you need fallback compatibility.\n- Reusing the same server name in multiple sources causes precedence shadowing, not merge.\n\n### Normalization behavior\n\n`convertToLegacyConfig()` (`src/mcp/config.ts`) maps canonical `MCPServer` to runtime `MCPServerConfig`.\n\nKey behavior:\n\n- transport inferred as `server.transport ?? (command ? \"stdio\" : url ? \"http\" : \"stdio\")`\n- disabled servers (`enabled === false`) are dropped before connection\n- optional fields are preserved when present\n\n### Environment expansion during discovery\n\n`mcp-json.ts` expands env placeholders in string fields with `expandEnvVarsDeep()`:\n\n- supports `${VAR}` and `${VAR:-default}`\n- unresolved values remain literal `${VAR}` strings\n\n`mcp-json.ts` also performs runtime type checks for user JSON and logs warnings for invalid `enabled`/`timeout` values instead of hard-failing the whole file.\n\n## 3) Auth and runtime value resolution\n\n`MCPManager.prepareConfig()`/`#resolveAuthConfig()` (`src/mcp/manager.ts`) is the final pre-connect pass.\n\n### OAuth credential injection\n\nIf config has:\n\n```ts\nauth: { type: \"oauth\", credentialId: \"...\" }\n```\n\nand credential exists in auth storage:\n\n- `http`/`sse`: injects `Authorization: Bearer <access_token>` header\n- `stdio`: injects `OAUTH_ACCESS_TOKEN` env var\n\nIf credential lookup fails, manager logs a warning and continues with unresolved auth.\n\n### Header/env value resolution\n\nBefore connect, manager resolves each header/env value via `resolveConfigValue()` (`src/config/resolve-config-value.ts`):\n\n- value starting with `!` => execute shell command, use trimmed stdout (cached)\n- otherwise, treat value as environment variable name first (`process.env[name]`), fallback to literal value\n- unresolved command/env values are omitted from final headers/env map\n\nOperational caveat: this means a mistyped secret command/env key can silently remove that header/env entry, producing downstream 401/403 or server startup failures.\n\n## 4) Tool bridge: MCP -> agent-callable tools\n\n`src/mcp/tool-bridge.ts` converts MCP tool definitions into `CustomTool`s.\n\n### Naming and collision domain\n\nTool names are generated as:\n\n```text\nmcp_<sanitized_server_name>_<sanitized_tool_name>\n```\n\nRules:\n\n- lowercases\n- non-`[a-z_]` chars become `_`\n- repeated underscores collapse\n- redundant `<server>_` prefix in tool name is stripped once\n\nThis avoids many collisions, but not all. Different raw names can still sanitize to the same identifier (for example `my-server` and `my.server` both sanitize similarly), and registry insertion is last-write-wins.\n\n### Schema mapping\n\n`convertSchema()` keeps MCP JSON Schema mostly as-is but patches object schemas missing `properties` with `{}` for provider compatibility.\n\n### Execution mapping\n\n`MCPTool.execute()` / `DeferredMCPTool.execute()`:\n\n- calls MCP `tools/call`\n- flattens MCP content into displayable text\n- returns structured details (`serverName`, `mcpToolName`, provider metadata)\n- maps server-reported `isError` to `Error: ...` text result\n- maps thrown transport/runtime failures to `MCP error: ...`\n- preserves abort semantics by translating AbortError into `ToolAbortError`\n\n## 5) Operator lifecycle: add/edit/remove and live updates\n\nInteractive mode exposes `/mcp` in `src/modes/controllers/mcp-command-controller.ts`.\n\nSupported operations:\n\n- `add` (wizard or quick-add)\n- `remove` / `rm`\n- `enable` / `disable`\n- `test`\n- `reauth` / `unauth`\n- `reload`\n\nConfig writes are atomic (`writeMCPConfigFile`: temp file + rename).\n\nAfter changes, controller calls `#reloadMCP()`:\n\n1. `mcpManager.disconnectAll()`\n2. `mcpManager.discoverAndConnect()`\n3. `session.refreshMCPTools(mcpManager.getTools())`\n\n`refreshMCPTools()` replaces all `mcp_` registry entries and immediately re-activates the latest MCP tool set, so changes take effect without restarting the session.\n\n### Mode differences\n\n- **Interactive/TUI mode**: `/mcp` gives in-app UX (wizard, OAuth flow, connection status text, immediate runtime rebinding).\n- **SDK/headless integration**: `discoverAndLoadMCPTools()` (`src/mcp/loader.ts`) returns loaded tools + per-server errors; no `/mcp` command UX.\n\n## 6) User-visible error surfaces\n\nCommon error strings users/operators see:\n\n- add/update validation failures:\n  - `Invalid server config: ...`\n  - `Server \"<name>\" already exists in <path>`\n- quick-add argument issues:\n  - `Use either --url or -- <command...>, not both.`\n  - `--token requires --url (HTTP/SSE transport).`\n- connect/test failures:\n  - `Failed to connect to \"<name>\": <message>`\n  - timeout help text suggests increasing timeout\n  - auth help text for `401/403`\n- auth/OAuth flows:\n  - `Authentication required ... OAuth endpoints could not be discovered`\n  - `OAuth flow timed out. Please try again.`\n  - `OAuth authentication failed: ...`\n- disabled server usage:\n  - `Server \"<name>\" is disabled. Run /mcp enable <name> first.`\n\nBad source JSON in discovery is generally handled as warnings/logs; config-writer paths throw explicit errors.\n\n## 7) Practical authoring guidance\n\nFor robust MCP authoring in this codebase:\n\n1. Keep server names globally unique across all MCP-capable config sources.\n2. Prefer alphanumeric/underscore names to avoid sanitized-name collisions in generated `mcp_*` tool names.\n3. Use explicit `type` to avoid accidental stdio defaults.\n4. Treat `enabled: false` as hard-off: server is omitted from runtime connect set.\n5. For OAuth configs, store a valid `credentialId`; otherwise auth injection is skipped.\n6. If using command-based secret resolution (`!cmd`), verify command output is stable and non-empty.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts)\n- [`src/mcp/config-writer.ts`](../packages/coding-agent/src/mcp/config-writer.ts)\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts)\n- [`src/discovery/mcp-json.ts`](../packages/coding-agent/src/discovery/mcp-json.ts)\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n- [`src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`src/config/resolve-config-value.ts`](../packages/coding-agent/src/config/resolve-config-value.ts)\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts)\n",
+	"memory.md": "# Autonomous Memory\n\nWhen enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into each new session. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable via `/settings` or `config.yml`:\n\n```yaml\nmemories:\n  enabled: true\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Cite the memory artifact path when memory changes the plan, and pair it with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Reading memory artifacts\n\nThe agent can read memory files directly using `memory://` URLs with the `read` tool:\n\n| URL | Content |\n|---|---|\n| `memory://root` | Compact summary injected at startup |\n| `memory://root/MEMORY.md` | Full long-term memory document |\n| `memory://root/skills/<name>/SKILL.md` | A generated skill playbook |\n\n### `/memory` slash command\n\n| Subcommand | Effect |\n|---|---|\n| `view` | Show the current memory injection payload |\n| `clear` / `reset` | Delete all memory data and generated artifacts |\n| `enqueue` / `rebuild` | Force consolidation to run at next startup |\n\n## How it works\n\nMemories are built by a background pipeline that at startup or manually triggered via slash command.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, or currently active are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nAll output is scanned for secrets before being written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven entirely by static prompt files in `src/prompts/memories/`.\n\n| File | Purpose | Variables |\n|---|---|---|\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation.md` | Prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read_path.md` | Memory guidance injected into live sessions | `{{memory_summary}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n|---|---|---|\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` | Cross-session synthesis |\n\nIf `smol` is not configured, Phase 2 falls back to the `default` role.\n\n## Configuration\n\n| Setting | Default | Description |\n|---|---|---|\n| `memories.enabled` | `false` | Master switch |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `src/memories/index.ts` — pipeline orchestration, injection, slash command handling\n- `src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `src/prompts/memories/` — memory prompt templates\n- `src/internal-urls/memory-protocol.ts` — `memory://` URL handler\n",
 	"models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n  <provider-id>:\n    # provider-level config\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n## Provider-level fields\n\n```yaml\nproviders:\n  my-provider:\n    baseUrl: https://api.example.com/v1\n    apiKey: MY_PROVIDER_API_KEY\n    api: openai-completions\n    headers:\n      X-Team: platform\n    authHeader: true\n    auth: apiKey\n    discovery:\n      type: ollama\n    modelOverrides:\n      some-model-id:\n        name: Renamed model\n    models:\n      - id: some-model-id\n        name: Some Model\n        api: openai-completions\n        reasoning: false\n        input: [text]\n        cost:\n          input: 0\n          output: 0\n          cacheRead: 0\n          cacheWrite: 0\n        contextWindow: 128000\n        maxTokens: 16384\n        headers:\n          X-Model: value\n        compat:\n          supportsStore: true\n          supportsDeveloperRole: true\n          supportsReasoningEffort: true\n          maxTokensField: max_completion_tokens\n          openRouterRouting:\n            only: [anthropic]\n          vercelGatewayRouting:\n            order: [anthropic, openai]\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default) or `none`\n- `discovery.type`: `ollama`\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n   - same `provider + id` replaces existing\n   - otherwise append\n6. Apply runtime-discovered models (currently Ollama), then re-apply model overrides.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-completions`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls `GET /api/tags` on Ollama and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n  ollama:\n    baseUrl: http://127.0.0.1:11434\n    api: openai-completions\n    auth: none\n    discovery:\n      type: ollama\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `plan`, `commit`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n  openai-codex:\n    modelOverrides:\n      gpt-5.3-codex-spark:\n        contextPromotionTarget: openai-codex/gpt-5.3-codex\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\n`models.yml` supports this `compat` subset:\n\n- `supportsStore`\n- `supportsDeveloperRole`\n- `supportsReasoningEffort`\n- `maxTokensField` (`max_completion_tokens` or `max_tokens`)\n- `openRouterRouting.only` / `openRouterRouting.order`\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order`\n\nThese are consumed by the OpenAI-completions transport logic and combined with URL-based auto-detection.\n\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n  local-openai:\n    baseUrl: http://127.0.0.1:8000/v1\n    auth: none\n    api: openai-completions\n    models:\n      - id: Qwen/Qwen2.5-Coder-32B-Instruct\n        name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n  anthropic-proxy:\n    baseUrl: https://proxy.example.com/anthropic\n    apiKey: ANTHROPIC_PROXY_API_KEY\n    api: anthropic-messages\n    authHeader: true\n    models:\n      - id: claude-sonnet-4-20250514\n        name: Claude Sonnet 4 (Proxy)\n        reasoning: true\n        input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n  openrouter:\n    baseUrl: https://my-proxy.example.com/v1\n    headers:\n      X-Team: platform\n    modelOverrides:\n      anthropic/claude-sonnet-4:\n        name: Sonnet 4 (Corp)\n        compat:\n          openRouterRouting:\n            only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`.\n\nOne notable legacy path remains: web-search Anthropic auth resolution still reads `~/.omp/agent/models.json` directly in `src/web/search/auth.ts`.\n\nIf you rely on that specific path, keep JSON compatibility in mind until that module is migrated.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
 	"natives-addon-loader-runtime.md": "# Natives Addon Loader Runtime\n\nThis document deep-dives the addon loading/validation layer in `@oh-my-pi/pi-natives`: how `native.ts` decides which `.node` file to load, when embedded payload extraction runs, and how startup failures are reported.\n\n## Implementation files\n\n- `packages/natives/src/native.ts`\n- `packages/natives/src/embedded-addon.ts`\n- `packages/natives/src/bindings.ts`\n- `packages/natives/package.json`\n\n## Scope and responsibility\n\nLoader/runtime responsibilities are intentionally narrow:\n\n- Build a platform/CPU-aware candidate list for addon filenames and directories.\n- Optionally materialize an embedded addon into a versioned per-user cache directory.\n- Attempt candidates in deterministic order.\n- Reject stale or incompatible addons via `validateNative` before exposing bindings.\n\nOut of scope here: module-specific grep/text/highlight behavior.\n\n## Runtime inputs and derived state\n\nAt module initialization (`export const native = loadNative();`), `native.ts` computes static context:\n\n- **Platform tag**: ``${process.platform}-${process.arch}`` (for example `darwin-arm64`).\n- **Package version**: from `packages/natives/package.json` (`version` field).\n- **Core directories**:\n  - `nativeDir`: package-local `packages/natives/native`.\n  - `execDir`: directory containing `process.execPath`.\n  - `versionedDir`: `<getNativesDir()>/<packageVersion>`.\n  - `userDataDir` fallback:\n    - Windows: `%LOCALAPPDATA%/omp` (or `%USERPROFILE%/AppData/Local/omp`).\n    - Non-Windows: `~/.local/bin`.\n- **Compiled-binary mode** (`isCompiledBinary`): true if any of:\n  - `PI_COMPILED` env var is set, or\n  - `import.meta.url` contains Bun-embedded markers (`$bunfs`, `~BUN`, `%7EBUN`).\n- **Variant override**: `PI_NATIVE_VARIANT` (`modern`/`baseline` only; invalid values ignored).\n- **Selected variant**: explicit override, otherwise runtime AVX2 detection on x64 (`modern` if AVX2, else `baseline`).\n\n## Platform support and tag resolution\n\n`SUPPORTED_PLATFORMS` is fixed to:\n\n- `linux-x64`\n- `linux-arm64`\n- `darwin-x64`\n- `darwin-arm64`\n- `win32-x64`\n\nBehavior detail:\n\n- Unsupported platforms are not rejected up-front.\n- Loader still tries all computed candidates first.\n- If nothing loads, it throws an explicit unsupported-platform error listing supported tags.\n\nThis preserves useful diagnostics for near-miss cases while still failing hard for truly unsupported targets.\n\n## Variant selection (`modern` / `baseline` / default)\n\n### x64 behavior\n\n1. If `PI_NATIVE_VARIANT` is `modern` or `baseline`, that value wins.\n2. Else detect AVX2 support:\n   - Linux: scan `/proc/cpuinfo` for `avx2`.\n   - macOS: query `sysctl` (`machdep.cpu.leaf7_features`, fallback `machdep.cpu.features`).\n   - Windows: run PowerShell `[System.Runtime.Intrinsics.X86.Avx2]::IsSupported`.\n3. Result:\n   - AVX2 available -> `modern`\n   - AVX2 unavailable/undetectable -> `baseline`\n\n### Non-x64 behavior\n\n- No variant is used; loader stays on the default filename (`pi_natives.<platform>-<arch>.node`).\n\n### Filename construction\n\nGiven `tag = <platform>-<arch>`:\n\n- Non-x64 or no variant: `pi_natives.<tag>.node`\n- x64 + `modern`: try in order\n  1. `pi_natives.<tag>-modern.node`\n  2. `pi_natives.<tag>-baseline.node` (intentional fallback)\n- x64 + `baseline`: only `pi_natives.<tag>-baseline.node`\n\nThe `addonLabel` used in final error messages is either `<tag>` or `<tag> (<variant>)`.\n\n## Candidate path construction and fallback ordering\n\n`native.ts` builds candidate pools before any `require(...)` call.\n\n### Debug/dev candidates (only when `PI_DEV` is set)\n\nPrepended first:\n\n1. `<nativeDir>/pi_natives.dev.node`\n2. `<execDir>/pi_natives.dev.node`\n\nThis path is explicit debug intent and always outranks release candidates.\n\n### Release candidates\n\nBuilt from variant-resolved filename list and searched in this order:\n\n- **Non-compiled runtime**:\n  1. `<nativeDir>/<filename>`\n  2. `<execDir>/<filename>`\n\n- **Compiled runtime** (`PI_COMPILED` or Bun embedded markers):\n  1. `<versionedDir>/<filename>`\n  2. `<userDataDir>/<filename>`\n  3. `<nativeDir>/<filename>`\n  4. `<execDir>/<filename>`\n\n`dedupedCandidates` removes duplicates while preserving first occurrence order.\n\n### Final runtime sequence\n\nAt load time:\n\n1. Optional embedded extraction candidate (if produced) is inserted at the front.\n2. Remaining deduplicated candidates are tried in order.\n3. First candidate that both `require(...)`s and passes `validateNative(...)` wins.\n\n## Embedded addon extraction lifecycle\n\n`embedded-addon.ts` defines a generated manifest shape:\n\n- `platformTag`\n- `version`\n- `files[]` where each entry has `variant`, `filename`, `filePath`\n\nCurrent checked-in default is `embeddedAddon: null`; compiled artifacts may replace this with real metadata.\n\n### Extraction state machine\n\nExtraction (`maybeExtractEmbeddedAddon`) runs only when all gates pass:\n\n1. `isCompiledBinary === true`\n2. `embeddedAddon !== null`\n3. `embeddedAddon.platformTag === platformTag`\n4. `embeddedAddon.version === packageVersion`\n5. A variant-appropriate embedded file is found\n\nVariant file selection mirrors runtime variant intent:\n\n- Non-x64: prefer `default`, then first available file.\n- x64 + `modern`: prefer `modern`, fallback to `baseline`.\n- x64 + `baseline`: require `baseline`.\n\nMaterialization behavior:\n\n1. Ensure `<versionedDir>` exists (`mkdirSync(..., { recursive: true })`).\n2. If `<versionedDir>/<selected filename>` already exists, reuse it (no rewrite).\n3. Else read embedded source `filePath` and write target file.\n4. Return target path for highest-priority load attempt.\n\nOn failure, extraction does not crash immediately; it appends an error entry (directory creation or write failure) and loader proceeds to normal candidate probing.\n\n## Lifecycle and state transitions\n\n```text\nInit\n  -> Compute platform/version/variant/candidate lists\n  -> (Compiled + embedded manifest matches?)\n       yes -> Try extract embedded to versionedDir (record errors, continue)\n       no  -> Skip extraction\n  -> For each runtime candidate in order:\n       require(candidate)\n       -> success: validateNative\n            -> pass: return bindings (READY)\n            -> fail: record error, continue\n       -> failure: record error, continue\n  -> none loaded:\n       if unsupported platform tag -> throw Unsupported platform\n       else -> throw Failed to load (full tried-path diagnostics + hints)\n```\n\n## `validateNative` contract checks\n\n`validateNative(bindings, source)` enforces a function-only contract over `NativeBindings` at startup.\n\nMechanics:\n\n- For each required export name, it checks `typeof bindings[name] === \"function\"`.\n- Missing names are aggregated.\n- If any are missing, loader throws:\n  - source addon path,\n  - missing export list,\n  - rebuild command hint.\n\nThis is a hard compatibility gate against stale binaries, partial builds, and symbol/name drift.\n\n### JS API ↔ native export mapping (validation gate)\n\n| JS binding name checked in `validateNative` | Expected native export name |\n| --- | --- |\n| `grep` | `grep` |\n| `glob` | `glob` |\n| `highlightCode` | `highlightCode` |\n| `executeShell` | `executeShell` |\n| `PtySession` | `PtySession` |\n| `Shell` | `Shell` |\n| `visibleWidth` | `visibleWidth` |\n| `getSystemInfo` | `getSystemInfo` |\n| `getWorkProfile` | `getWorkProfile` |\n| `invalidateFsScanCache` | `invalidateFsScanCache` |\n\nNote: `bindings.ts` declares only the base `cancelWork(id)` member; module `types.ts` files declaration-merge additional symbols that `validateNative` enforces.\n\n## Failure behavior and diagnostics\n\n## Unsupported platform\n\nIf all candidates fail and `platformTag` is not in `SUPPORTED_PLATFORMS`, loader throws:\n\n- `Unsupported platform: <tag>`\n- Full supported-platform list\n- Explicit issue-reporting guidance\n\n## Stale binary / mismatch symptoms\n\nTypical stale mismatch signal:\n\n- `Native addon missing exports (<candidate>). Missing: ...`\n\nCommon causes:\n\n- Old `.node` binary from previous package version/API shape.\n- Wrong variant artifact selected (for x64).\n- New Rust export not present in loaded artifact.\n\nLoader behavior:\n\n- Records per-candidate missing-export failures.\n- Continues probing remaining candidates.\n- If no candidate validates, final error includes every attempted path with each failure message.\n\n## Compiled-binary startup failures\n\nIn compiled mode final diagnostics include:\n\n- expected versioned cache target paths (`<versionedDir>/<filename>`),\n- remediation to delete stale `<versionedDir>` and rerun,\n- direct release download `curl` commands for each expected filename.\n\n## Non-compiled startup failures\n\nIn normal package/runtime mode final diagnostics include:\n\n- reinstall hint (`bun install @oh-my-pi/pi-natives`),\n- local rebuild command (`bun --cwd=packages/natives run build:native`),\n- optional x64 variant build hint (`TARGET_VARIANT=baseline|modern ...`).\n\n## Dev/debug versus release behavior\n\nWhen `PI_DEV` is set:\n\n- `pi_natives.dev.node` candidates are prepended ahead of all release candidates.\n- Loader emits per-candidate console diagnostics (`Loaded native addon...` and load errors).\n\nWithout `PI_DEV`:\n\n- Only release candidate chain is used.\n- No dev console diagnostics are emitted.\n\nOperationally, this means debug sessions can validate an ad-hoc dev addon first, while production/release runs remain on deterministic release artifact probing.",
 	"natives-architecture.md": "# Natives Architecture\n\n`@oh-my-pi/pi-natives` is a three-layer stack:\n\n1. **TypeScript wrapper/API layer** exposes stable JS/TS entrypoints.\n2. **Addon loading/validation layer** resolves and validates the `.node` binary for the current runtime.\n3. **Rust N-API module layer** implements performance-critical primitives exported to JS.\n\nThis document is the foundation for deeper module-level docs.\n\n## Implementation files\n\n- `packages/natives/src/index.ts`\n- `packages/natives/src/native.ts`\n- `packages/natives/src/bindings.ts`\n- `packages/natives/src/embedded-addon.ts`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n\n## Layer 1: TypeScript wrapper/API layer\n\n`packages/natives/src/index.ts` is the public barrel. It groups exports by capability domain and re-exports typed wrappers rather than exposing raw N-API bindings directly.\n\nCurrent top-level groups:\n\n- **Search/text primitives**: `grep`, `glob`, `text`, `highlight`\n- **Execution/process/terminal primitives**: `shell`, `pty`, `ps`, `keys`\n- **System/media/conversion primitives**: `image`, `html`, `clipboard`, `system-info`, `work`\n\n`packages/natives/src/bindings.ts` defines the base interface contract:\n\n- `NativeBindings` starts with shared members (`cancelWork(id: number)`)\n- module-specific bindings are added by declaration merging from each module’s `types.ts`\n- `Cancellable` standardizes timeout and abort-signal options for wrappers that expose cancellation\n\n**Guaranteed contract (API-facing):** consumers import from `@oh-my-pi/pi-natives` and use typed wrappers.\n\n**Implementation detail (may change):** declaration merging and internal wrapper layout (`src/<module>/index.ts`, `src/<module>/types.ts`).\n\n## Layer 2: Addon loading and validation\n\n`packages/natives/src/native.ts` owns runtime addon selection, optional extraction, and export validation.\n\n### Candidate resolution model\n\n- Platform tag is `\"${process.platform}-${process.arch}\"`.\n- Supported tags are currently:\n  - `linux-x64`\n  - `linux-arm64`\n  - `darwin-x64`\n  - `darwin-arm64`\n  - `win32-x64`\n- x64 can use CPU variants:\n  - `modern` (AVX2-capable)\n  - `baseline` (fallback)\n- Non-x64 uses the default filename (no variant suffix).\n\nFilename strategy:\n\n- Release: `pi_natives.<platform>-<arch>.node`\n- x64 variant release: `pi_natives.<platform>-<arch>-modern.node` and/or `...-baseline.node`\n- Dev: `pi_natives.dev.node` (preferred when `PI_DEV` is set)\n\n### Platform-specific variant detection\n\nFor x64, variant selection uses:\n\n- **Linux**: `/proc/cpuinfo`\n- **macOS**: `sysctl machdep.cpu.leaf7_features` / `machdep.cpu.features`\n- **Windows**: PowerShell check for `System.Runtime.Intrinsics.X86.Avx2`\n\n`PI_NATIVE_VARIANT` can explicitly force `modern` or `baseline`.\n\n### Binary distribution and extraction model\n\n`packages/natives/package.json` includes both `src` and `native` in published files. The `native/` directory stores prebuilt platform artifacts.\n\nFor compiled binaries (`PI_COMPILED` or Bun embedded runtime markers), loader behavior is:\n\n1. Check versioned user cache path: `<getNativesDir()>/<packageVersion>/...`\n2. Check legacy compiled-binary location:\n   - Windows: `%LOCALAPPDATA%/omp` (fallback `%USERPROFILE%/AppData/Local/omp`)\n   - non-Windows: `~/.local/bin`\n3. Fall back to packaged `native/` and executable directory candidates\n\nIf an embedded addon manifest is present (`embedded-addon.ts` generated by `scripts/embed-native.ts`), `native.ts` can materialize the matching embedded binary into the versioned cache directory before loading.\n\n### Validation and failure modes\n\nAfter `require(candidate)`, `validateNative(...)` verifies required exports (for example `grep`, `glob`, `highlightCode`, `PtySession`, `Shell`, `getSystemInfo`, `getWorkProfile`, `invalidateFsScanCache`).\n\nFailure paths are explicit:\n\n- **Unsupported platform tag**: throws with supported platform list\n- **No loadable candidate**: throws with all attempted paths and remediation hints\n- **Missing exports**: throws with exact missing names and rebuild command\n- **Embedded extraction errors**: records directory/write failures and includes them in final load diagnostics\n\n**Guaranteed contract (API-facing):** addon load either succeeds with a validated binding set or fails fast with actionable error text.\n\n**Implementation detail (may change):** exact candidate search order and compiled-binary fallback path ordering.\n\n## Layer 3: Rust N-API module layer\n\n`crates/pi-natives/src/lib.rs` is the Rust entry module that declares exported module ownership:\n\n- `clipboard`\n- `fd`\n- `fs_cache`\n- `glob`\n- `glob_util`\n- `grep`\n- `highlight`\n- `html`\n- `image`\n- `keys`\n- `prof`\n- `ps`\n- `pty`\n- `shell`\n- `system_info`\n- `task`\n- `text`\n\nThese modules implement the N-API symbols consumed and validated by `native.ts`. JS-level names are surfaced through the TS wrappers in `packages/natives/src`.\n\n**Guaranteed contract (API-facing):** Rust module exports must match the binding names expected by `validateNative` and wrapper modules.\n\n**Implementation detail (may change):** internal Rust module decomposition and helper module boundaries (`glob_util`, `task`, etc.).\n\n## Ownership boundaries\n\nAt architecture level, ownership is split as follows:\n\n- **TS wrapper/API ownership (`packages/natives/src`)**\n  - public API grouping, option typing, and stable JS ergonomics\n  - cancellation surface (`timeoutMs`, `AbortSignal`) exposed to callers\n- **Loader ownership (`packages/natives/src/native.ts`)**\n  - runtime binary selection\n  - CPU variant selection and override handling\n  - compiled-binary extraction and candidate probing\n  - hard validation of required native exports\n- **Rust ownership (`crates/pi-natives/src`)**\n  - algorithmic and system-level implementation\n  - platform-native behavior and performance-sensitive logic\n  - N-API symbol implementation that TS wrappers consume\n\n## Runtime flow (high level)\n\n1. Consumer imports from `@oh-my-pi/pi-natives`.\n2. Wrapper module calls into singleton `native` binding.\n3. `native.ts` selects candidate binary for platform/arch/variant.\n4. Optional embedded binary extraction occurs for compiled distributions.\n5. Addon is loaded and export set is validated.\n6. Wrapper returns typed results to caller.\n\n## Glossary\n\n- **Native addon**: A `.node` binary loaded via Node-API (N-API).\n- **Platform tag**: Runtime tuple `platform-arch` (for example `darwin-arm64`).\n- **Variant**: x64 CPU-specific build flavor (`modern` AVX2, `baseline` fallback).\n- **Wrapper**: TS function/class that provides typed API over raw native exports.\n- **Declaration merging**: TS technique used by module `types.ts` files to extend `NativeBindings`.\n- **Compiled binary mode**: Runtime mode where the CLI is bundled and native addons are resolved from extracted/cache paths instead of only package-local paths.\n- **Embedded addon**: Build artifact metadata and file references generated into `embedded-addon.ts` so compiled binaries can extract matching `.node` payloads.\n- **Validation gate**: `validateNative(...)` check that rejects stale/mismatched binaries missing required exports.\n",

package/src/main.ts CHANGED Viewed

@@ -515,7 +515,8 @@ export async function runRootCommand(parsed: Args, rawArgs: string[]): Promise<v
 	// Create AuthStorage and ModelRegistry upfront
 	const authStorage = await discoverAuthStorage();
 	const modelRegistry = new ModelRegistry(authStorage);
-	await modelRegistry.refresh();
+	const refreshStrategy = parsedArgs.listModels !== undefined ? "online" : "online-if-uncached";
+	await modelRegistry.refresh(refreshStrategy);
 	debugStartup("main:discoverModels");
 	time("discoverModels");

package/src/mcp/tool-bridge.ts CHANGED Viewed

@@ -50,19 +50,43 @@ export interface MCPToolDetails {
 	providerName?: string;
 }
+/**
+ * Recursively strip JSON Schema fields that cause AJV validation errors.
+ *
+ * - `$schema`: AJV throws on unknown meta-schema URIs (e.g. draft 2020-12 from schemars 1.x / rmcp 0.15+)
+ * - `nullable`: OpenAPI 3.0 extension, not standard JSON Schema — AJV rejects it as an unknown keyword.
+ */
+function sanitizeSchema(schema: unknown): unknown {
+	if (Array.isArray(schema)) {
+		return schema.map(sanitizeSchema);
+	}
+	if (schema !== null && typeof schema === "object") {
+		const { $schema: _, nullable: __, ...rest } = schema as Record<string, unknown>;
+		const out: Record<string, unknown> = {};
+		for (const [k, v] of Object.entries(rest)) {
+			out[k] = sanitizeSchema(v);
+		}
+		return out;
+	}
+	return schema;
+}
 /**
  * Convert JSON Schema from MCP to TypeBox-compatible schema.
  * MCP uses standard JSON Schema, TypeBox uses a compatible subset.
  *
  * Also normalizes schemas to work around common issues:
  * - Adds `properties: {}` to object schemas missing it (some LLM providers require this)
+ * - Strips `$schema` and `nullable` fields (see sanitizeSchema)
  */
 function convertSchema(mcpSchema: MCPToolDefinition["inputSchema"]): TSchema {
+	const schema = sanitizeSchema(mcpSchema) as Record<string, unknown>;
 	// Normalize: object schemas must have properties field for some providers
-	if (mcpSchema.type === "object" && !("properties" in mcpSchema)) {
-		return { ...mcpSchema, properties: {} } as unknown as TSchema;
+	if (schema.type === "object" && !("properties" in schema)) {
+		return { ...schema, properties: {} } as unknown as TSchema;
 	}
-	return mcpSchema as unknown as TSchema;
+	return schema as unknown as TSchema;
 }
 /**

package/src/sdk.ts CHANGED Viewed

@@ -551,7 +551,9 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	// Use provided or create AuthStorage and ModelRegistry
 	const authStorage = options.authStorage ?? (await discoverAuthStorage(agentDir));
 	const modelRegistry = options.modelRegistry ?? new ModelRegistry(authStorage);
-	await modelRegistry.refresh();
+	if (!options.modelRegistry) {
+		await modelRegistry.refresh();
+	}
 	time("discoverModels");
 	const settings = options.settings ?? (await Settings.init({ cwd, agentDir }));

package/src/session/auth-storage.ts CHANGED Viewed

@@ -1364,6 +1364,38 @@ export class AuthStorage {
 		return undefined;
 	}
+	/**
+	 * Peek at API key for a provider without refreshing OAuth tokens.
+	 * Used for model discovery where we only need to know if credentials exist
+	 * and get a best-effort token. The actual refresh happens lazily when the
+	 * provider is used for an API call.
+	 */
+	async peekApiKey(provider: string): Promise<string | undefined> {
+		const runtimeKey = this.#runtimeOverrides.get(provider);
+		if (runtimeKey) {
+			return runtimeKey;
+		}
+		const apiKeySelection = this.#selectCredentialByType(provider, "api_key");
+		if (apiKeySelection) {
+			return resolveConfigValue(apiKeySelection.credential.key);
+		}
+		// Return current OAuth access token only if it is not already expired.
+		const oauthSelection = this.#selectCredentialByType(provider, "oauth");
+		if (oauthSelection) {
+			const expiresAt = oauthSelection.credential.expires;
+			if (Number.isFinite(expiresAt) && expiresAt > Date.now()) {
+				return oauthSelection.credential.access;
+			}
+		}
+		const envKey = getEnvApiKey(provider);
+		if (envKey) return envKey;
+		return this.#fallbackResolver?.(provider) ?? undefined;
+	}
 	/**
 	 * Get API key for a provider.
 	 * Priority:

package/src/task/executor.ts CHANGED Viewed

@@ -248,6 +248,98 @@ function resolveFallbackCompletion(rawOutput: string, outputSchema: unknown): {
 	return { data: candidate };
 }
+export interface SubmitResultItem {
+	data?: unknown;
+	status?: "success" | "aborted";
+	error?: string;
+}
+interface FinalizeSubprocessOutputArgs {
+	rawOutput: string;
+	exitCode: number;
+	stderr: string;
+	doneAborted: boolean;
+	signalAborted: boolean;
+	submitResultItems?: SubmitResultItem[];
+	reportFindings?: ReviewFinding[];
+	outputSchema: unknown;
+}
+interface FinalizeSubprocessOutputResult {
+	rawOutput: string;
+	exitCode: number;
+	stderr: string;
+	abortedViaSubmitResult: boolean;
+	hasSubmitResult: boolean;
+}
+export const SUBAGENT_WARNING_NULL_SUBMIT_RESULT = "SYSTEM WARNING: Subagent called submit_result with null data.";
+export const SUBAGENT_WARNING_MISSING_SUBMIT_RESULT =
+	"SYSTEM WARNING: Subagent exited without calling submit_result tool after 3 reminders.";
+export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): FinalizeSubprocessOutputResult {
+	let { rawOutput, exitCode, stderr } = args;
+	const { submitResultItems, reportFindings, doneAborted, signalAborted, outputSchema } = args;
+	let abortedViaSubmitResult = false;
+	const hasSubmitResult = Array.isArray(submitResultItems) && submitResultItems.length > 0;
+	if (hasSubmitResult) {
+		const lastSubmitResult = submitResultItems[submitResultItems.length - 1];
+		if (lastSubmitResult?.status === "aborted") {
+			abortedViaSubmitResult = true;
+			exitCode = 0;
+			stderr = lastSubmitResult.error || "Subagent aborted task";
+			try {
+				rawOutput = JSON.stringify({ aborted: true, error: lastSubmitResult.error }, null, 2);
+			} catch {
+				rawOutput = `{"aborted":true,"error":"${lastSubmitResult.error || "Unknown error"}"}`;
+			}
+		} else {
+			const submitData = lastSubmitResult?.data;
+			if (submitData === null || submitData === undefined) {
+				rawOutput = rawOutput
+					? `${SUBAGENT_WARNING_NULL_SUBMIT_RESULT}\n\n${rawOutput}`
+					: SUBAGENT_WARNING_NULL_SUBMIT_RESULT;
+			} else {
+				const completeData = normalizeCompleteData(submitData, reportFindings);
+				try {
+					rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
+				} catch (err) {
+					const errorMessage = err instanceof Error ? err.message : String(err);
+					rawOutput = `{"error":"Failed to serialize submit_result data: ${errorMessage}"}`;
+				}
+				exitCode = 0;
+				stderr = "";
+			}
+		}
+	} else {
+		const allowFallback = exitCode === 0 && !doneAborted && !signalAborted;
+		const { normalized: normalizedSchema, error: schemaError } = normalizeOutputSchema(outputSchema);
+		const hasOutputSchema = normalizedSchema !== undefined && !schemaError;
+		const fallback = allowFallback ? resolveFallbackCompletion(rawOutput, outputSchema) : null;
+		if (fallback) {
+			const completeData = normalizeCompleteData(fallback.data, reportFindings);
+			try {
+				rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
+			} catch (err) {
+				const errorMessage = err instanceof Error ? err.message : String(err);
+				rawOutput = `{"error":"Failed to serialize fallback completion: ${errorMessage}"}`;
+			}
+			exitCode = 0;
+			stderr = "";
+		} else if (!hasOutputSchema && allowFallback && rawOutput.trim().length > 0) {
+			exitCode = 0;
+			stderr = "";
+		} else if (exitCode === 0) {
+			rawOutput = rawOutput
+				? `${SUBAGENT_WARNING_MISSING_SUBMIT_RESULT}\n\n${rawOutput}`
+				: SUBAGENT_WARNING_MISSING_SUBMIT_RESULT;
+		}
+	}
+	return { rawOutput, exitCode, stderr, abortedViaSubmitResult, hasSubmitResult };
+}
 /**
  * Extract a short preview from tool args for display.
  */
@@ -468,7 +560,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 	type AbortReason = "signal" | "terminate";
 	let abortSent = false;
 	let abortReason: AbortReason | undefined;
-	let pendingTerminationTimeoutId: NodeJS.Timeout | null = null;
 	const listenerController = new AbortController();
 	const listenerSignal = listenerController.signal;
 	const abortController = new AbortController();
@@ -502,24 +593,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 		if (activeSession) {
 			void activeSession.abort();
 		}
-		cancelPendingTermination();
-	};
-	const schedulePendingTermination = () => {
-		if (pendingTerminationTimeoutId || abortSent || resolved) return;
-		pendingTerminationTimeoutId = setTimeout(() => {
-			pendingTerminationTimeoutId = null;
-			if (!resolved) {
-				requestAbort("terminate");
-			}
-		}, 2000);
-	};
-	const cancelPendingTermination = () => {
-		if (pendingTerminationTimeoutId) {
-			clearTimeout(pendingTerminationTimeoutId);
-			pendingTerminationTimeoutId = null;
-		}
 	};
 	// Handle abort signal
@@ -704,6 +777,9 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 								existing.push(data);
 							}
 							progress.extractedToolData[event.toolName] = existing;
+							if (event.toolName === "submit_result") {
+								submitResultCalled = true;
+							}
 						}
 					}
@@ -717,8 +793,7 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 							isError: event.isError,
 						})
 					) {
-						// Don't terminate immediately - wait for message_end to get token counts
-						schedulePendingTermination();
+						requestAbort("terminate");
 					}
 				}
 				flushProgress = true;
@@ -785,11 +860,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 					// Accumulate tokens for progress display
 					progress.tokens += getUsageTokens(messageUsage);
 				}
-				// If pending termination, now we have tokens - terminate immediately
-				if (pendingTerminationTimeoutId) {
-					cancelPendingTermination();
-					requestAbort("terminate");
-				}
 				break;
 			}
@@ -979,9 +1049,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 			const MAX_SUBMIT_RESULT_RETRIES = 3;
 			unsubscribe = session.subscribe(event => {
-				if (event.type === "tool_execution_end" && event.toolName === "submit_result") {
-					submitResultCalled = true;
-				}
 				if (isAgentEvent(event)) {
 					try {
 						processEvent(event);
@@ -999,25 +1066,26 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 			const reminderToolChoice = buildSubmitResultToolChoice(session.model);
 			let retryCount = 0;
-			let previousTools: string[] | null = null;
-			try {
-				while (!submitResultCalled && retryCount < MAX_SUBMIT_RESULT_RETRIES && !abortSignal.aborted) {
+			while (!submitResultCalled && retryCount < MAX_SUBMIT_RESULT_RETRIES && !abortSignal.aborted) {
+				try {
 					retryCount++;
-					if (!previousTools) {
-						previousTools = session.getActiveToolNames();
-						await session.setActiveToolsByName(["submit_result"]);
-					}
 					const reminder = renderPromptTemplate(submitReminderTemplate, {
 						retryCount,
 						maxRetries: MAX_SUBMIT_RESULT_RETRIES,
 					});
 					await session.prompt(reminder, reminderToolChoice ? { toolChoice: reminderToolChoice } : undefined);
+				} catch (err) {
+					logger.error("Subagent prompt failed", {
+						error: err instanceof Error ? err.message : String(err),
+					});
 				}
-			} finally {
-				if (previousTools) {
-					await session.setActiveToolsByName(previousTools);
-				}
+			}
+			if (!submitResultCalled && !abortSignal.aborted) {
+				aborted = true;
+				exitCode = 1;
+				error ??= SUBAGENT_WARNING_MISSING_SUBMIT_RESULT;
 			}
 			const lastMessage = session.state.messages[session.state.messages.length - 1];
@@ -1076,7 +1144,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 		clearTimeout(progressTimeoutId);
 		progressTimeoutId = null;
 	}
-	cancelPendingTermination();
 	let exitCode = done.exitCode;
 	if (done.error) {
@@ -1085,67 +1152,22 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 	// Use final output if available, otherwise accumulated output
 	let rawOutput = finalOutputChunks.length > 0 ? finalOutputChunks.join("") : outputChunks.join("");
-	let abortedViaSubmitResult = false;
-	const submitResultItems = progress.extractedToolData?.submit_result as
-		| Array<{ data?: unknown; status?: "success" | "aborted"; error?: string }>
-		| undefined;
+	const submitResultItems = progress.extractedToolData?.submit_result as SubmitResultItem[] | undefined;
 	const reportFindings = progress.extractedToolData?.report_finding as ReviewFinding[] | undefined;
-	const hasSubmitResult = Array.isArray(submitResultItems) && submitResultItems.length > 0;
-	if (hasSubmitResult) {
-		const lastSubmitResult = submitResultItems[submitResultItems.length - 1];
-		if (lastSubmitResult?.status === "aborted") {
-			// Agent explicitly aborted via submit_result tool - clean exit with error info
-			abortedViaSubmitResult = true;
-			exitCode = 0;
-			stderr = lastSubmitResult.error || "Subagent aborted task";
-			try {
-				rawOutput = JSON.stringify({ aborted: true, error: lastSubmitResult.error }, null, 2);
-			} catch {
-				rawOutput = `{"aborted":true,"error":"${lastSubmitResult.error || "Unknown error"}"}`;
-			}
-		} else {
-			// Normal successful completion
-			const submitData = lastSubmitResult?.data;
-			if (submitData === null || submitData === undefined) {
-				// Agent called submit_result but with null/undefined data — treat as missing
-				// so the fallback path can try to extract output from conversation text
-				const warning = "SYSTEM WARNING: Subagent called submit_result with null data.";
-				rawOutput = rawOutput ? `${warning}\n\n${rawOutput}` : warning;
-			} else {
-				const completeData = normalizeCompleteData(submitData, reportFindings);
-				try {
-					rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
-				} catch (err) {
-					const errorMessage = err instanceof Error ? err.message : String(err);
-					rawOutput = `{"error":"Failed to serialize submit_result data: ${errorMessage}"}`;
-				}
-				exitCode = 0;
-				stderr = "";
-			}
-		}
-	} else {
-		const allowFallback = exitCode === 0 && !done.aborted && !signal?.aborted;
-		const { normalized: normalizedSchema, error: schemaError } = normalizeOutputSchema(outputSchema);
-		const hasOutputSchema = normalizedSchema !== undefined && !schemaError;
-		const fallback = allowFallback ? resolveFallbackCompletion(rawOutput, outputSchema) : null;
-		if (fallback) {
-			const completeData = normalizeCompleteData(fallback.data, reportFindings);
-			try {
-				rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
-			} catch (err) {
-				const errorMessage = err instanceof Error ? err.message : String(err);
-				rawOutput = `{"error":"Failed to serialize fallback completion: ${errorMessage}"}`;
-			}
-			exitCode = 0;
-			stderr = "";
-		} else if (!hasOutputSchema && allowFallback && rawOutput.trim().length > 0) {
-			exitCode = 0;
-			stderr = "";
-		} else if (exitCode === 0) {
-			const warning = "SYSTEM WARNING: Subagent exited without calling submit_result tool after 3 reminders.";
-			rawOutput = rawOutput ? `${warning}\n\n${rawOutput}` : warning;
-		}
-	}
+	const finalized = finalizeSubprocessOutput({
+		rawOutput,
+		exitCode,
+		stderr,
+		doneAborted: Boolean(done.aborted),
+		signalAborted: Boolean(signal?.aborted),
+		submitResultItems,
+		reportFindings,
+		outputSchema,
+	});
+	rawOutput = finalized.rawOutput;
+	exitCode = finalized.exitCode;
+	stderr = finalized.stderr;
+	const { abortedViaSubmitResult, hasSubmitResult } = finalized;
 	const { content: truncatedOutput, truncated } = truncateTail(rawOutput, {
 		maxBytes: MAX_OUTPUT_BYTES,
 		maxLines: MAX_OUTPUT_LINES,

package/src/tools/submit-result.ts CHANGED Viewed

@@ -136,6 +136,17 @@ export class SubmitResultTool implements AgentTool<TObject, SubmitResultDetails>
 // Register subprocess tool handler for extraction + termination.
 subprocessToolRegistry.register<SubmitResultDetails>("submit_result", {
-	extractData: event => event.result?.details as SubmitResultDetails | undefined,
-	shouldTerminate: () => true,
+	extractData: event => {
+		const details = event.result?.details;
+		if (!details || typeof details !== "object") return undefined;
+		const record = details as Record<string, unknown>;
+		const status = record.status;
+		if (status !== "success" && status !== "aborted") return undefined;
+		return {
+			data: record.data,
+			status,
+			error: typeof record.error === "string" ? record.error : undefined,
+		};
+	},
+	shouldTerminate: event => !event.isError,
 });

package/src/web/search/providers/anthropic.ts CHANGED Viewed

@@ -4,9 +4,14 @@
  * Uses Claude's built-in web_search_20250305 tool to search the web.
  * Returns synthesized answers with citations and source metadata.
  */
-import { type AnthropicSystemBlock, buildAnthropicSystemBlocks, stripClaudeToolPrefix } from "@oh-my-pi/pi-ai";
+import {
+	type AnthropicSystemBlock,
+	buildAnthropicHeaders,
+	buildAnthropicSystemBlocks,
+	stripClaudeToolPrefix,
+} from "@oh-my-pi/pi-ai";
 import { $env } from "@oh-my-pi/pi-utils";
-import { buildAnthropicHeaders, buildAnthropicUrl, findAnthropicAuth } from "../../../web/search/auth";
+import { buildAnthropicUrl, findAnthropicAuth } from "../../../web/search/auth";
 import type {
 	AnthropicApiResponse,
 	AnthropicAuthConfig,