@oh-my-pi/pi-coding-agent 12.15.1 → 12.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [12.16.0] - 2026-02-21
6
+
7
+ ### Added
8
+
9
+ - Added `peekApiKey` method to AuthStorage for non-blocking API key retrieval during model discovery without triggering OAuth token refresh
10
+ - Exported `finalizeSubprocessOutput` function to handle subprocess output finalization with submit_result validation
11
+ - Exported `SubmitResultItem` interface for type-safe submit_result tool data extraction
12
+ - Added automatic reminders when subagent stops without calling submit_result tool (up to 3 reminders before aborting)
13
+ - Added system warnings when subagent calls submit_result with null/undefined data or exits without calling submit_result after reminders
14
+
15
+ ### Changed
16
+
17
+ - Changed model refresh behavior to support configurable strategies: uses 'online' mode when listing models and 'online-if-uncached' mode otherwise for improved performance
18
+ - Changed default thinking level from 'off' to 'high' for improved reasoning and planning
19
+ - Changed model discovery to use non-blocking API key peek instead of full key retrieval, improving performance by avoiding unnecessary OAuth token refreshes
20
+ - Simplified submit_result termination logic to immediately abort on successful tool execution instead of waiting for message_end event
21
+ - Updated submit_result tool to only terminate on successful execution (when isError is false), allowing retries on tool errors
22
+ - Refactored subprocess output finalization logic into dedicated `finalizeSubprocessOutput` function for better testability and maintainability
23
+ - Improved handling of missing submit_result calls by automatically aborting with exit code 1 after 3 reminder prompts
24
+
25
+ ### Fixed
26
+
27
+ - Fixed submit_result retry behavior to properly handle tool execution errors and allow the subagent to retry before aborting
28
+ - Fixed submit_result tool extraction to properly validate status field and only accept 'success' or 'aborted' results
29
+
5
30
  ## [12.15.1] - 2026-02-20
6
31
 
7
32
  ### Changed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-coding-agent",
3
- "version": "12.15.1",
3
+ "version": "12.16.0",
4
4
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
5
5
  "type": "module",
6
6
  "bin": {
@@ -85,12 +85,12 @@
85
85
  },
86
86
  "dependencies": {
87
87
  "@mozilla/readability": "0.6.0",
88
- "@oh-my-pi/omp-stats": "12.15.1",
89
- "@oh-my-pi/pi-agent-core": "12.15.1",
90
- "@oh-my-pi/pi-ai": "12.15.1",
91
- "@oh-my-pi/pi-natives": "12.15.1",
92
- "@oh-my-pi/pi-tui": "12.15.1",
93
- "@oh-my-pi/pi-utils": "12.15.1",
88
+ "@oh-my-pi/omp-stats": "12.16.0",
89
+ "@oh-my-pi/pi-agent-core": "12.16.0",
90
+ "@oh-my-pi/pi-ai": "12.16.0",
91
+ "@oh-my-pi/pi-natives": "12.16.0",
92
+ "@oh-my-pi/pi-tui": "12.16.0",
93
+ "@oh-my-pi/pi-utils": "12.16.0",
94
94
  "@sinclair/typebox": "^0.34.48",
95
95
  "@xterm/headless": "^6.0.0",
96
96
  "ajv": "^8.18.0",
@@ -10,6 +10,7 @@ import {
10
10
  googleGeminiCliModelManagerOptions,
11
11
  type Model,
12
12
  type ModelManagerOptions,
13
+ type ModelRefreshStrategy,
13
14
  normalizeDomain,
14
15
  type OAuthCredentials,
15
16
  type OAuthLoginCallbacks,
@@ -494,7 +495,7 @@ export class ModelRegistry {
494
495
  /**
495
496
  * Reload models from disk (built-in + custom from models.json).
496
497
  */
497
- async refresh(): Promise<void> {
498
+ async refresh(strategy: ModelRefreshStrategy = "online-if-uncached"): Promise<void> {
498
499
  this.#modelsConfigFile.invalidate();
499
500
  this.#customProviderApiKeys.clear();
500
501
  this.#keylessProviders.clear();
@@ -502,7 +503,7 @@ export class ModelRegistry {
502
503
  this.#modelOverrides.clear();
503
504
  this.#configError = undefined;
504
505
  this.#loadModels();
505
- await this.#refreshRuntimeDiscoveries();
506
+ await this.#refreshRuntimeDiscoveries(strategy);
506
507
  }
507
508
 
508
509
  /**
@@ -681,7 +682,7 @@ export class ModelRegistry {
681
682
  };
682
683
  }
683
684
 
684
- async #refreshRuntimeDiscoveries(): Promise<void> {
685
+ async #refreshRuntimeDiscoveries(strategy: ModelRefreshStrategy): Promise<void> {
685
686
  const configuredDiscoveriesPromise =
686
687
  this.#discoverableProviders.length === 0
687
688
  ? Promise.resolve<Model<Api>[]>([])
@@ -690,7 +691,7 @@ export class ModelRegistry {
690
691
  );
691
692
  const [configuredDiscovered, builtInDiscovered] = await Promise.all([
692
693
  configuredDiscoveriesPromise,
693
- this.#discoverBuiltInProviderModels(),
694
+ this.#discoverBuiltInProviderModels(strategy),
694
695
  ]);
695
696
  const discovered = [...configuredDiscovered, ...builtInDiscovered];
696
697
  if (discovered.length === 0) {
@@ -721,7 +722,7 @@ export class ModelRegistry {
721
722
  }
722
723
  }
723
724
 
724
- async #discoverBuiltInProviderModels(): Promise<Model<Api>[]> {
725
+ async #discoverBuiltInProviderModels(strategy: ModelRefreshStrategy): Promise<Model<Api>[]> {
725
726
  // Skip providers already handled by configured discovery (e.g. user-configured ollama with discovery.type)
726
727
  const configuredDiscoveryProviders = new Set(this.#discoverableProviders.map(p => p.provider));
727
728
  const managerOptions = (await this.#collectBuiltInModelManagerOptions()).filter(
@@ -730,7 +731,9 @@ export class ModelRegistry {
730
731
  if (managerOptions.length === 0) {
731
732
  return [];
732
733
  }
733
- const discoveries = await Promise.all(managerOptions.map(options => this.#discoverWithModelManager(options)));
734
+ const discoveries = await Promise.all(
735
+ managerOptions.map(options => this.#discoverWithModelManager(options, strategy)),
736
+ );
734
737
  return discoveries.flat();
735
738
  }
736
739
 
@@ -770,9 +773,13 @@ export class ModelRegistry {
770
773
  },
771
774
  },
772
775
  ];
776
+ // Use peekApiKey to avoid OAuth token refresh during discovery.
777
+ // The token is only needed if the dynamic fetch fires (cache miss),
778
+ // and failures there are handled gracefully.
779
+ const peekKey = (descriptor: { providerId: string }) => this.#peekApiKeyForProvider(descriptor.providerId);
773
780
  const [standardProviderKeys, specialKeys] = await Promise.all([
774
- Promise.all(PROVIDER_DESCRIPTORS.map(descriptor => this.getApiKeyForProvider(descriptor.providerId))),
775
- Promise.all(specialProviderDescriptors.map(descriptor => this.getApiKeyForProvider(descriptor.providerId))),
781
+ Promise.all(PROVIDER_DESCRIPTORS.map(peekKey)),
782
+ Promise.all(specialProviderDescriptors.map(peekKey)),
776
783
  ]);
777
784
  const options: ModelManagerOptions<Api>[] = [];
778
785
  for (let i = 0; i < PROVIDER_DESCRIPTORS.length; i++) {
@@ -799,10 +806,13 @@ export class ModelRegistry {
799
806
  return options;
800
807
  }
801
808
 
802
- async #discoverWithModelManager(options: ModelManagerOptions<Api>): Promise<Model<Api>[]> {
809
+ async #discoverWithModelManager(
810
+ options: ModelManagerOptions<Api>,
811
+ strategy: ModelRefreshStrategy,
812
+ ): Promise<Model<Api>[]> {
803
813
  try {
804
814
  const manager = createModelManager(options);
805
- const result = await manager.refresh();
815
+ const result = await manager.refresh(strategy);
806
816
  return result.models.map(model =>
807
817
  model.provider === options.providerId ? model : { ...model, provider: options.providerId },
808
818
  );
@@ -970,6 +980,13 @@ export class ModelRegistry {
970
980
  return this.authStorage.getApiKey(provider, sessionId, { baseUrl });
971
981
  }
972
982
 
983
+ async #peekApiKeyForProvider(provider: string): Promise<string | undefined> {
984
+ if (this.#keylessProviders.has(provider)) {
985
+ return kNoAuth;
986
+ }
987
+ return this.authStorage.peekApiKey(provider);
988
+ }
989
+
973
990
  /**
974
991
  * Check if a model is using OAuth credentials (subscription).
975
992
  */
@@ -179,7 +179,7 @@ export const SETTINGS_SCHEMA = {
179
179
  defaultThinkingLevel: {
180
180
  type: "enum",
181
181
  values: ["off", "minimal", "low", "medium", "high", "xhigh"] as const,
182
- default: "off",
182
+ default: "high",
183
183
  ui: {
184
184
  tab: "agent",
185
185
  label: "Thinking level",
@@ -1,6 +1,6 @@
1
1
  // Auto-generated by scripts/generate-docs-index.ts - DO NOT EDIT
2
2
 
3
- export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["bash-tool-runtime.md","blob-artifact-architecture.md","compaction.md","config-usage.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","skills.md","slash-command-internals.md","task-agent-discovery.md","theme.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md","tui.md"];
3
+ export const EMBEDDED_DOC_FILENAMES: readonly string[] = ["bash-tool-runtime.md","blob-artifact-architecture.md","compaction.md","config-usage.md","custom-tools.md","environment-variables.md","extension-loading.md","extensions.md","fs-scan-cache-architecture.md","gemini-manifest-extensions.md","handoff-generation-pipeline.md","hooks.md","mcp-protocol-transports.md","mcp-runtime-lifecycle.md","mcp-server-tool-authoring.md","memory.md","models.md","natives-addon-loader-runtime.md","natives-architecture.md","natives-binding-contract.md","natives-build-release-debugging.md","natives-media-system-utils.md","natives-rust-task-cancellation.md","natives-shell-pty-process.md","natives-text-search-pipeline.md","non-compaction-retry-policy.md","notebook-tool-runtime.md","plugin-manager-installer-plumbing.md","porting-from-pi-mono.md","porting-to-natives.md","provider-streaming-internals.md","python-repl.md","rpc.md","rulebook-matching-pipeline.md","sdk.md","secrets.md","session-operations-export-share-fork-resume.md","session-switching-and-recent-listing.md","session-tree-plan.md","session.md","skills.md","slash-command-internals.md","task-agent-discovery.md","theme.md","tree.md","ttsr-injection-lifecycle.md","tui-runtime-internals.md","tui.md"];
4
4
 
5
5
  export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
6
6
  "bash-tool-runtime.md": "# Bash tool runtime\n\nThis document describes the **`bash` tool** runtime path used by agent tool calls, from command normalization to execution, truncation/artifacts, and rendering.\n\nIt also calls out where behavior diverges in interactive TUI, print mode, RPC mode, and user-initiated bang (`!`) shell execution.\n\n## Scope and runtime surfaces\n\nThere are two different bash execution surfaces in coding-agent:\n\n1. **Tool-call surface** (`toolName: \"bash\"`): used when the model calls the bash tool.\n - Entry point: `BashTool.execute()`.\n2. **User bang-command surface** (`!cmd` from interactive input or RPC `bash` command): session-level helper path.\n - Entry point: `AgentSession.executeBash()`.\n\nBoth eventually use `executeBash()` in `src/exec/bash-executor.ts` for non-PTY execution, but only the tool-call path runs normalization/interception and tool renderer logic.\n\n## End-to-end tool-call pipeline\n\n## 1) Input normalization and parameter merge\n\n`BashTool.execute()` first normalizes the raw command via `normalizeBashCommand()`:\n\n- extracts trailing `| head -n N`, `| head -N`, `| tail -n N`, `| tail -N` into structured limits,\n- trims trailing/leading whitespace,\n- keeps internal whitespace intact.\n\nThen it merges extracted limits with explicit tool args:\n\n- explicit `head`/`tail` args override extracted values,\n- extracted values are fallback only.\n\n### Caveat\n\n`bash-normalize.ts` comments mention stripping `2>&1`, but current implementation does not remove it. Runtime behavior is still correct (stdout/stderr are already merged), but the normalization behavior is narrower than comments suggest.\n\n## 2) Optional interception (blocked-command path)\n\nIf `bashInterceptor.enabled` is true, `BashTool` loads rules from settings and runs `checkBashInterception()` against the normalized command.\n\nInterception behavior:\n\n- command is blocked **only** when:\n - regex rule matches, and\n - the suggested tool is present in `ctx.toolNames`.\n- invalid regex rules are silently skipped.\n- on block, `BashTool` throws `ToolError` with message:\n - `Blocked: ...`\n - original command included.\n\nDefault rule patterns (defined in code) target common misuses:\n\n- file readers (`cat`, `head`, `tail`, ...)\n- search tools (`grep`, `rg`, ...)\n- file finders (`find`, `fd`, ...)\n- in-place editors (`sed -i`, `perl -i`, `awk -i inplace`)\n- shell redirection writes (`echo ... > file`, heredoc redirection)\n\n### Caveat\n\n`InterceptionResult` includes `suggestedTool`, but `BashTool` currently surfaces only the message text (no structured suggested-tool field in `details`).\n\n## 3) CWD validation and timeout clamping\n\n`cwd` is resolved relative to session cwd (`resolveToCwd`), then validated via `stat`:\n\n- missing path -> `ToolError(\"Working directory does not exist: ...\")`\n- non-directory -> `ToolError(\"Working directory is not a directory: ...\")`\n\nTimeout is clamped to `[1, 3600]` seconds and converted to milliseconds.\n\n## 4) Artifact allocation + environment injection\n\nBefore execution, the tool allocates an artifact path/id (best-effort) and injects `$ARTIFACTS` env when session artifacts dir is available.\n\n- artifact allocation failure is non-fatal (execution continues without artifact spill file),\n- artifact id/path are passed into execution path for full-output persistence on truncation.\n\n## 5) PTY vs non-PTY execution selection\n\n`BashTool` chooses PTY execution only when all are true:\n\n- `bash.virtualTerminal === \"on\"`\n- `PI_NO_PTY !== \"1\"`\n- tool context has UI (`ctx.hasUI === true` and `ctx.ui` set)\n\nOtherwise it uses non-interactive `executeBash()`.\n\nThat means print mode and non-UI RPC/tool contexts always use non-PTY.\n\n## Non-interactive execution engine (`executeBash`)\n\n## Shell session reuse model\n\n`executeBash()` caches native `Shell` instances in a process-global map keyed by:\n\n- shell path,\n- configured command prefix,\n- snapshot path,\n- serialized shell env,\n- optional agent session key.\n\nFor session-level executions, `AgentSession.executeBash()` passes `sessionKey: this.sessionId`, isolating reuse per session.\n\nTool-call path does **not** pass `sessionKey`, so reuse scope is based on shell config/snapshot/env.\n\n## Shell config and snapshot behavior\n\nAt each call, executor loads settings shell config (`shell`, `env`, optional `prefix`).\n\nIf selected shell includes `bash`, it attempts `getOrCreateSnapshot()`:\n\n- snapshot captures aliases/functions/options from user rc,\n- snapshot creation is best-effort,\n- failure falls back to no snapshot.\n\nIf `prefix` is configured, command becomes:\n\n```text\n<prefix> <command>\n```\n\n## Streaming and cancellation\n\n`Shell.run()` streams chunks to callback. Executor pipes each chunk into `OutputSink` and optional `onChunk` callback.\n\nCancellation:\n\n- aborted signal triggers `shellSession.abort(...)`,\n- timeout from native result is mapped to `cancelled: true` + annotation text,\n- explicit cancellation similarly returns `cancelled: true` + annotation.\n\nNo exception is thrown inside executor for timeout/cancel; it returns structured `BashResult` and lets caller map error semantics.\n\n## Interactive PTY path (`runInteractiveBashPty`)\n\nWhen PTY is enabled, tool runs `runInteractiveBashPty()` which opens an overlay console component and drives a native `PtySession`.\n\nBehavior highlights:\n\n- xterm-headless virtual terminal renders viewport in overlay,\n- keyboard input is normalized (including Kitty sequences and application cursor mode handling),\n- `esc` while running kills the PTY session,\n- terminal resize propagates to PTY (`session.resize(cols, rows)`).\n\nEnvironment hardening defaults are injected for unattended runs:\n\n- pagers disabled (`PAGER=cat`, `GIT_PAGER=cat`, etc.),\n- editor prompts disabled (`GIT_EDITOR=true`, `EDITOR=true`, ...),\n- terminal/auth prompts reduced (`GIT_TERMINAL_PROMPT=0`, `SSH_ASKPASS=/usr/bin/false`, `CI=1`),\n- package-manager/tool automation flags for non-interactive behavior.\n\nPTY output is normalized (`CRLF`/`CR` to `LF`, `sanitizeText`) and written into `OutputSink`, including artifact spill support.\n\nOn PTY startup/runtime error, sink receives `PTY error: ...` line and command finalizes with undefined exit code.\n\n## Output handling: streaming, truncation, artifact spill\n\nBoth PTY and non-PTY paths use `OutputSink`.\n\n## OutputSink semantics\n\n- keeps an in-memory UTF-8-safe tail buffer (`DEFAULT_MAX_BYTES`, currently 50KB),\n- tracks total bytes/lines seen,\n- if artifact path exists and output overflows (or file already active), writes full stream to artifact file,\n- when memory threshold overflows, trims in-memory buffer to tail (UTF-8 boundary safe),\n- marks `truncated` when overflow/file spill occurs.\n\n`dump()` returns:\n\n- `output` (possibly annotated prefix),\n- `truncated`,\n- `totalLines/totalBytes`,\n- `outputLines/outputBytes`,\n- `artifactId` if artifact file was active.\n\n### Long-output caveat\n\nRuntime truncation is byte-threshold based in `OutputSink` (50KB default). It does not enforce a hard 2000-line cap in this code path.\n\n## Live tool updates\n\nFor non-PTY execution, `BashTool` uses a separate `TailBuffer` for partial updates and emits `onUpdate` snapshots while command is running.\n\nFor PTY execution, live rendering is handled by custom UI overlay, not by `onUpdate` text chunks.\n\n## Result shaping, metadata, and error mapping\n\nAfter execution:\n\n1. `cancelled` handling:\n - if abort signal is aborted -> throw `ToolAbortError` (abort semantics),\n - else -> throw `ToolError` (treated as tool failure).\n2. PTY `timedOut` -> throw `ToolError`.\n3. apply head/tail filters to final output text (`applyHeadTail`, head then tail).\n4. empty output becomes `(no output)`.\n5. attach truncation metadata via `toolResult(...).truncationFromSummary(result, { direction: \"tail\" })`.\n6. exit-code mapping:\n - missing exit code -> `ToolError(\"... missing exit status\")`\n - non-zero exit -> `ToolError(\"... Command exited with code N\")`\n - zero exit -> success result.\n\nSuccess payload structure:\n\n- `content`: text output,\n- `details.meta.truncation` when truncated, including:\n - `direction`, `truncatedBy`, total/output line+byte counts,\n - `shownRange`,\n - `artifactId` when available.\n\nBecause built-in tools are wrapped with `wrapToolWithMetaNotice()`, truncation notice text is appended to final text content automatically (for example: `Full: artifact://<id>`).\n\n## Rendering paths\n\n## Tool-call renderer (`bashToolRenderer`)\n\n`bashToolRenderer` is used for tool-call messages (`toolCall` / `toolResult`):\n\n- collapsed mode shows visual-line-truncated preview,\n- expanded mode shows all currently available output text,\n- warning line includes truncation reason and `artifact://<id>` when truncated,\n- timeout value (from args) is shown in footer metadata line.\n\n### Caveat: full artifact expansion\n\n`BashRenderContext` has `isFullOutput`, but current renderer context builder does not set it for bash tool results. Expanded view still uses the text already in result content (tail/truncated output) unless another caller provides full artifact content.\n\n## User bang-command component (`BashExecutionComponent`)\n\n`BashExecutionComponent` is for user `!` commands in interactive mode (not model tool calls):\n\n- streams chunks live,\n- collapsed preview keeps last 20 logical lines,\n- line clamp at 4000 chars per line,\n- shows truncation + artifact warnings when metadata is present,\n- marks cancelled/error/exit state separately.\n\nThis component is wired by `CommandController.handleBashCommand()` and fed from `AgentSession.executeBash()`.\n\n## Mode-specific behavior differences\n\n| Surface | Entry path | PTY eligible | Live output UX | Error surfacing |\n| ------------------------------ | ----------------------------------------------------- | -------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------ |\n| Interactive tool call | `BashTool.execute` | Yes, when `bash.virtualTerminal=on` and UI exists and `PI_NO_PTY!=1` | PTY overlay (interactive) or streamed tail updates | Tool errors become `toolResult.isError` |\n| Print mode tool call | `BashTool.execute` | No (no UI context) | No TUI overlay; output appears in event stream/final assistant text flow | Same tool error mapping |\n| RPC tool call (agent tooling) | `BashTool.execute` | Usually no UI -> non-PTY | Structured tool events/results | Same tool error mapping |\n| Interactive bang command (`!`) | `AgentSession.executeBash` + `BashExecutionComponent` | No (uses executor directly) | Dedicated bash execution component | Controller catches exceptions and shows UI error |\n| RPC `bash` command | `rpc-mode` -> `session.executeBash` | No | Returns `BashResult` directly | Consumer handles returned fields |\n\n## Operational caveats\n\n- Interceptor only blocks commands when suggested tool is currently available in context.\n- If artifact allocation fails, truncation still occurs but no `artifact://` back-reference is available.\n- Shell session cache has no explicit eviction in this module; lifetime is process-scoped.\n- PTY and non-PTY timeout surfaces differ:\n - PTY exposes explicit `timedOut` result field,\n - non-PTY maps timeout into `cancelled + annotation` summary.\n\n## Implementation files\n\n- [`src/tools/bash.ts`](../packages/coding-agent/src/tools/bash.ts) — tool entrypoint, normalization/interception, PTY/non-PTY selection, result/error mapping, bash tool renderer.\n- [`src/tools/bash-normalize.ts`](../packages/coding-agent/src/tools/bash-normalize.ts) — command normalization and post-run head/tail filtering.\n- [`src/tools/bash-interceptor.ts`](../packages/coding-agent/src/tools/bash-interceptor.ts) — interceptor rule matching and blocked-command messages.\n- [`src/exec/bash-executor.ts`](../packages/coding-agent/src/exec/bash-executor.ts) — non-PTY executor, shell session reuse, cancellation wiring, output sink integration.\n- [`src/tools/bash-interactive.ts`](../packages/coding-agent/src/tools/bash-interactive.ts) — PTY runtime, overlay UI, input normalization, non-interactive env defaults.\n- [`src/session/streaming-output.ts`](../packages/coding-agent/src/session/streaming-output.ts) — `OutputSink` truncation/artifact spill and summary metadata.\n- [`src/tools/output-utils.ts`](../packages/coding-agent/src/tools/output-utils.ts) — artifact allocation helpers and streaming tail buffer.\n- [`src/tools/output-meta.ts`](../packages/coding-agent/src/tools/output-meta.ts) — truncation metadata shape + notice injection wrapper.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — session-level `executeBash`, message recording, abort lifecycle.\n- [`src/modes/components/bash-execution.ts`](../packages/coding-agent/src/modes/components/bash-execution.ts) — interactive `!` command execution component.\n- [`src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts) — wiring for interactive `!` command UI stream/update completion.\n- [`src/modes/rpc/rpc-mode.ts`](../packages/coding-agent/src/modes/rpc/rpc-mode.ts) — RPC `bash` and `abort_bash` command surface.\n- [`src/internal-urls/artifact-protocol.ts`](../packages/coding-agent/src/internal-urls/artifact-protocol.ts) — `artifact://<id>` resolution.\n",
@@ -18,6 +18,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
18
18
  "mcp-protocol-transports.md": "# MCP Protocol and Transport Internals\n\nThis document describes how coding-agent implements MCP JSON-RPC messaging and how protocol concerns are split from transport concerns.\n\n## Scope\n\nCovers:\n\n- JSON-RPC request/response and notification flow\n- Request correlation and lifecycle for stdio and HTTP/SSE transports\n- Timeout and cancellation behavior\n- Error propagation and malformed payload handling\n- Transport selection boundaries (`stdio` vs `http`/`sse`)\n- Which reconnect/retry responsibilities are transport-level vs manager-level\n\nDoes not cover extension authoring UX or command UI.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/transports/stdio.ts`](../packages/coding-agent/src/mcp/transports/stdio.ts)\n- [`src/mcp/transports/http.ts`](../packages/coding-agent/src/mcp/transports/http.ts)\n- [`src/mcp/transports/index.ts`](../packages/coding-agent/src/mcp/transports/index.ts)\n- [`src/mcp/json-rpc.ts`](../packages/coding-agent/src/mcp/json-rpc.ts)\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n\n## Layer boundaries\n\n### Protocol layer (JSON-RPC + MCP methods)\n\n- Message shapes are defined in `types.ts` (`JsonRpcRequest`, `JsonRpcNotification`, `JsonRpcResponse`, `JsonRpcMessage`).\n- MCP client logic (`client.ts`) decides method order and session handshake:\n 1. `initialize` request\n 2. `notifications/initialized` notification\n 3. method calls like `tools/list`, `tools/call`\n\n### Transport layer (`MCPTransport`)\n\n`MCPTransport` abstracts delivery and lifecycle:\n\n- `request(method, params, options?) -> Promise<T>`\n- `notify(method, params?) -> Promise<void>`\n- `close()`\n- `connected`\n- optional callbacks: `onClose`, `onError`, `onNotification`\n\nTransport implementations own framing and I/O details:\n\n- `StdioTransport`: newline-delimited JSON over subprocess stdio\n- `HttpTransport`: JSON-RPC over HTTP POST, with optional SSE responses/listening\n\n### Important current caveat\n\nTransport callbacks (`onClose`, `onError`, `onNotification`) are implemented, but current `MCPClient`/`MCPManager` flows do not wire reconnection logic to these callbacks. Notifications are only consumed if caller registers handlers.\n\n## Transport selection\n\n`client.ts:createTransport()` chooses transport from config:\n\n- `type` omitted or `\"stdio\"` -> `createStdioTransport`\n- `\"http\"` or `\"sse\"` -> `createHttpTransport`\n\n`\"sse\"` is treated as an HTTP transport variant (same class), not a separate transport implementation.\n\n## JSON-RPC message flow and correlation\n\n## Request IDs\n\nEach transport generates per-request IDs (`Math.random` + timestamp string). IDs are transport-local correlation tokens.\n\n## Stdio correlation path\n\n- Outbound request is serialized as one JSON object + `\\n`.\n- `#pendingRequests: Map<id, {resolve,reject}>` stores in-flight requests.\n- Read loop parses JSONL from stdout and calls `#handleMessage`.\n- If inbound message has matching `id`, request resolves/rejects.\n- If inbound message has `method` and no `id`, treated as notification and sent to `onNotification`.\n\nUnknown IDs are ignored (no rejection, no error callback).\n\n## HTTP correlation path\n\n- Outbound request is HTTP `POST` with JSON body and generated `id`.\n- Non-SSE response path: parse one JSON-RPC response and return `result`/throw on `error`.\n- SSE response path (`Content-Type: text/event-stream`): stream events, return first message whose `id` matches expected request ID and has `result` or `error`.\n- SSE messages with `method` and no `id` are treated as notifications.\n\nIf SSE stream ends before matching response, request fails with `No response received for request ID ...`.\n\n## Notifications\n\nClient emits JSON-RPC notifications via `transport.notify(...)`.\n\n- Stdio: writes notification frame to stdin (`jsonrpc`, `method`, optional `params`) plus newline.\n- HTTP: sends POST body without `id`; success accepts `2xx` or `202 Accepted`.\n\nServer-initiated notifications are only surfaced through transport `onNotification`; there is no default global subscriber in manager/client.\n\n## Stdio transport internals\n\n## Lifecycle and state transitions\n\n- Initial: `connected=false`, `process=null`, pending map empty\n- `connect()`:\n - spawn subprocess with configured command/args/env/cwd\n - mark connected\n - start stdout read loop (`readJsonl`)\n - start stderr loop (read/discard; currently silent)\n- `close()`:\n - mark disconnected\n - reject all pending requests (`Transport closed`)\n - kill subprocess\n - await read loop shutdown\n - emit `onClose`\n\nIf read loop exits unexpectedly, `finally` triggers `#handleClose()` which performs the same pending-request rejection and close callback.\n\n## Timeout and cancellation\n\nPer request:\n\n- timeout defaults to `config.timeout ?? 30000`\n- optional `AbortSignal` from caller\n- abort and timeout both reject the pending promise and clean map entry\n\nCancellation is local only: transport does not send protocol-level cancellation notification to the server.\n\n## Malformed payload handling\n\nIn read loop:\n\n- each parsed JSONL line is passed to `#handleMessage` in `try/catch`\n- malformed/invalid message handling exceptions are dropped (`Skip malformed lines` comment)\n- loop continues, so one bad message does not kill the connection\n\nIf the underlying stream parser throws, `onError` is invoked (when still connected), then connection closes.\n\n## Disconnect/failure behavior\n\nWhen process exits or stream closes:\n\n- all in-flight requests are rejected with `Transport closed`\n- no automatic restart or reconnect\n- higher layers must reconnect by creating a new transport\n\n## Backpressure/streaming notes\n\n- Outbound writes use `stdin.write()` + `flush()` without awaiting drain semantics.\n- There is no explicit queue or high-watermark management in transport.\n- Inbound processing is stream-driven (`for await` over `readJsonl`), one parsed message at a time.\n\n## HTTP/SSE transport internals\n\n## Lifecycle and connection semantics\n\nHTTP transport has logical connection state, but request path is stateless per HTTP call:\n\n- `connect()` sets `connected=true` (no socket/session handshake)\n- optional server session tracking via `Mcp-Session-Id` header\n- `close()` optionally sends `DELETE` with `Mcp-Session-Id`, aborts SSE listener, emits `onClose`\n\nSo `connected` means \"transport usable\", not \"persistent stream established\".\n\n## Session header behavior\n\n- On POST response, if `Mcp-Session-Id` header is present, transport stores it.\n- Subsequent requests/notifications include `Mcp-Session-Id`.\n- `close()` tries to terminate server session with HTTP DELETE; termination failures are ignored.\n\n## Timeout and cancellation\n\nFor both `request()` and `notify()`:\n\n- timeout uses `AbortController` (`config.timeout ?? 30000`)\n- external signal, if provided, is merged via `AbortSignal.any([...])`\n- AbortError handling distinguishes caller abort vs timeout\n\nErrors thrown:\n\n- timeout: `Request timeout after ...ms` (or `SSE response timeout ...`, `Notify timeout ...`)\n- caller abort: original AbortError is rethrown when external signal is already aborted\n\n## HTTP error propagation\n\nOn non-OK response:\n\n- response text is included in thrown error (`HTTP <status>: <text>`)\n- if present, auth hints from `WWW-Authenticate` and `Mcp-Auth-Server` are appended\n\nOn JSON-RPC error object:\n\n- throws `MCP error <code>: <message>`\n\nMalformed JSON body (`response.json()` failure) propagates as parse exception.\n\n## SSE behavior and modes\n\nTwo SSE paths exist:\n\n1. **Per-request SSE response** (`#parseSSEResponse`)\n - used when POST response content type is `text/event-stream`\n - consumes stream until matching response id found\n - can process interleaved notifications during same stream\n\n2. **Background SSE listener** (`startSSEListener()`)\n - optional GET listener for server-initiated notifications\n - currently not automatically started by MCP manager/client\n - if GET returns `405`, listener silently disables itself (server does not support this mode)\n\n## Malformed payload and disconnect handling\n\nSSE JSON parsing errors bubble out of `readSseJson` and reject request/listener.\n\n- Request SSE parse errors reject the active request.\n- Background listener errors trigger `onError` (except AbortError).\n- No auto-reconnect for background listener.\n\n## `json-rpc.ts` utility vs transport abstraction\n\n`src/mcp/json-rpc.ts` provides `callMCP()` and `parseSSE()` helpers for direct HTTP MCP calls (used by Exa integration), not the `MCPTransport` abstraction used by `MCPClient`/`MCPManager`.\n\nNotable differences from `HttpTransport`:\n\n- parses entire response text first, then extracts first `data: ` line (`parseSSE`), with JSON fallback\n- no request timeout management, no abort API, no session-id handling, no transport lifecycle\n- returns raw JSON-RPC envelope object\n\nThis path is lightweight but less robust than full transport implementation.\n\n## Retry/reconnect responsibilities\n\n## Transport-level\n\nCurrent transport implementations do **not**:\n\n- retry failed requests\n- reconnect after stdio process exit\n- reconnect SSE listeners\n- resend in-flight requests after disconnect\n\nThey fail fast and propagate errors.\n\n## Manager/client-level\n\n`MCPManager` handles discovery/initial connection orchestration and can reconnect only by running connect flows again (`connectToServer`/`discoverAndConnect` paths). It does not auto-heal an already connected transport on runtime failure callbacks.\n\n`MCPManager` does have startup fallback behavior for slow servers (deferred tools from cache), but that is tool availability fallback, not transport retry.\n\n## Failure scenarios summary\n\n- **Malformed stdio message line**: dropped; stream continues.\n- **Stdio stream/process ends**: transport closes; pending requests rejected as `Transport closed`.\n- **HTTP non-2xx**: request/notify throws HTTP error.\n- **Invalid JSON response**: parse exception propagated.\n- **SSE ends without matching id**: request fails with `No response received for request ID ...`.\n- **Timeout**: transport-specific timeout error.\n- **Caller abort**: AbortError/reason propagated from caller signal.\n\n## Practical boundary rule\n\nIf the concern is message shape, id correlation, or MCP method ordering, it belongs to protocol/client logic.\n\nIf the concern is framing (JSONL vs HTTP/SSE), stream parsing, fetch/spawn lifecycle, timeout clocks, or connection teardown, it belongs to transport implementation.",
19
19
  "mcp-runtime-lifecycle.md": "# MCP runtime lifecycle\n\nThis document describes how MCP servers are discovered, connected, exposed as tools, refreshed, and torn down in the coding-agent runtime.\n\n## Lifecycle at a glance\n\n1. **SDK startup** calls `discoverAndLoadMCPTools()` (unless MCP is disabled).\n2. **Discovery** (`loadAllMCPConfigs`) resolves MCP server configs from capability sources, filters disabled/project/Exa entries, and preserves source metadata.\n3. **Manager connect phase** (`MCPManager.connectServers`) starts per-server connect + `tools/list` in parallel.\n4. **Fast startup gate** waits up to 250ms, then may return:\n - fully loaded `MCPTool`s,\n - failures per server,\n - or cached `DeferredMCPTool`s for still-pending servers.\n5. **SDK wiring** merges MCP tools into runtime tool registry for the session.\n6. **Live session** can refresh MCP tools via `/mcp` flows (`disconnectAll` + rediscover + `session.refreshMCPTools`).\n7. **Teardown** happens when callers invoke `disconnectServer`/`disconnectAll`; manager also clears MCP tool registrations for disconnected servers.\n\n## Discovery and load phase\n\n### Entry path from SDK\n\n`createAgentSession()` in `src/sdk.ts` performs MCP startup when `enableMCP` is true (default):\n\n- calls `discoverAndLoadMCPTools(cwd, { ... })`,\n- passes `authStorage`, cache storage, and `mcp.enableProjectConfig` setting,\n- always sets `filterExa: true`,\n- logs per-server load/connect errors,\n- stores returned manager in `toolSession.mcpManager` and session result.\n\nIf `enableMCP` is false, MCP discovery is skipped entirely.\n\n### Config discovery and filtering\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical MCP server items through capability discovery, then converts to legacy `MCPServerConfig`.\n\nFiltering behavior:\n\n- `enableProjectConfig: false` removes project-level entries (`_source.level === \"project\"`).\n- `enabled: false` servers are skipped before connect attempts.\n- Exa servers are filtered out by default and API keys are extracted for native Exa tool integration.\n\nResult includes both `configs` and `sources` (metadata used later for provider labeling).\n\n### Discovery-level failure behavior\n\n`discoverAndLoadMCPTools()` distinguishes two failure classes:\n\n- **Discovery hard failure** (exception from `manager.discoverAndConnect`, typically from config discovery): returns an empty tool set and one synthetic error `{ path: \".mcp.json\", error }`.\n- **Per-server runtime/connect failure**: manager returns partial success with `errors` map; other servers continue.\n\nSo startup does not fail the whole agent session when individual MCP servers fail.\n\n## Manager state model\n\n`MCPManager` tracks runtime lifecycle with separate registries:\n\n- `#connections: Map<string, MCPServerConnection>` — fully connected servers.\n- `#pendingConnections: Map<string, Promise<MCPServerConnection>>` — handshake in progress.\n- `#pendingToolLoads: Map<string, Promise<{ connection, serverTools }>>` — connected but tools still loading.\n- `#tools: CustomTool[]` — current MCP tool view exposed to callers.\n- `#sources: Map<string, SourceMeta>` — provider/source metadata even before connect completes.\n\n`getConnectionStatus(name)` derives status from these maps:\n\n- `connected` if in `#connections`,\n- `connecting` if pending connect or pending tool load,\n- `disconnected` otherwise.\n\n## Connection establishment and startup timing\n\n## Per-server connect pipeline\n\nFor each discovered server in `connectServers()`:\n\n1. store/update source metadata,\n2. skip if already connected/pending,\n3. validate transport fields (`validateServerConfig`),\n4. resolve auth/shell substitutions (`#resolveAuthConfig`),\n5. call `connectToServer(name, resolvedConfig)`,\n6. call `listTools(connection)`,\n7. cache tool definitions (`MCPToolCache.set`) best-effort.\n\n`connectToServer()` behavior (`src/mcp/client.ts`):\n\n- creates stdio or HTTP/SSE transport,\n- performs MCP `initialize` + `notifications/initialized`,\n- uses timeout (`config.timeout` or 30s default),\n- closes transport on init failure.\n\n### Fast startup gate + deferred fallback\n\n`connectServers()` waits on a race between:\n\n- all connect/tool-load tasks settled, and\n- `STARTUP_TIMEOUT_MS = 250`.\n\nAfter 250ms:\n\n- fulfilled tasks become live `MCPTool`s,\n- rejected tasks produce per-server errors,\n- still-pending tasks:\n - use cached tool definitions if available (`MCPToolCache.get`) to create `DeferredMCPTool`s,\n - otherwise block until those pending tasks settle.\n\nThis is a hybrid startup model: fast return when cache is available, correctness wait when cache is not.\n\n### Background completion behavior\n\nEach pending `toolsPromise` also has a background continuation that eventually:\n\n- replaces that server’s tool slice in manager state via `#replaceServerTools`,\n- writes cache,\n- logs late failures only after startup (`allowBackgroundLogging`).\n\n## Tool exposure and live-session availability\n\n### Startup registration\n\n`discoverAndLoadMCPTools()` converts manager tools into `LoadedCustomTool[]` and decorates paths (`mcp:<server> via <providerName>` when known).\n\n`createAgentSession()` then pushes these tools into `customTools`, which are wrapped and added to the runtime tool registry with names like `mcp_<server>_<tool>`.\n\n### Tool calls\n\n- `MCPTool` calls tools through an already connected `MCPServerConnection`.\n- `DeferredMCPTool` waits for `waitForConnection(server)` before calling; this allows cached tools to exist before connection is ready.\n\nBoth return structured tool output and convert transport/tool errors into `MCP error: ...` tool content (abort remains abort).\n\n## Refresh/reload paths (startup vs live reload)\n\n### Initial startup path\n\n- one-time discovery/load in `sdk.ts`,\n- tools are registered in initial session tool registry.\n\n### Interactive reload path\n\n`/mcp reload` path (`src/modes/controllers/mcp-command-controller.ts`) does:\n\n1. `mcpManager.disconnectAll()`,\n2. `mcpManager.discoverAndConnect()`,\n3. `session.refreshMCPTools(mcpManager.getTools())`.\n\n`session.refreshMCPTools()` (`src/session/agent-session.ts`) removes all `mcp_` tools, re-wraps latest MCP tools, and re-activates tool set so MCP changes apply without restarting session.\n\nThere is also a follow-up path for late connections: after waiting for a specific server, if status becomes `connected`, it re-runs `session.refreshMCPTools(...)` so newly available tools are rebound in-session.\n\n## Health, reconnect, and partial failure behavior\n\nCurrent runtime behavior is intentionally minimal:\n\n- **No autonomous health monitor** in manager/client.\n- **No automatic reconnect loop** when a transport drops.\n- Manager does not subscribe to transport `onClose`/`onError`; status is registry-driven.\n- Reconnect is explicit: reload flow or direct `connectServers()` invocation.\n\nOperationally:\n\n- one server failing does not remove tools from healthy servers,\n- connect/list failures are isolated per server,\n- tool cache and background updates are best-effort (warnings/errors logged, no hard stop).\n\n## Teardown semantics\n\n### Server-level teardown\n\n`disconnectServer(name)`:\n\n- removes pending entries/source metadata,\n- closes transport if connected,\n- removes that server’s `mcp_` tools from manager state.\n\n### Global teardown\n\n`disconnectAll()`:\n\n- closes all active transports with `Promise.allSettled`,\n- clears pending maps, sources, connections, and manager tool list.\n\nIn current wiring, explicit teardown is used in MCP command flows (for reload/remove/disable). There is no separate automatic manager disposal hook in the startup path itself; callers are responsible for invoking manager disconnect methods when they need deterministic MCP shutdown.\n\n## Failure modes and guarantees\n\n| Scenario | Behavior | Hard fail vs best-effort |\n| --- | --- | --- |\n| Discovery throws (capability/config load path) | Loader returns empty tools + synthetic `.mcp.json` error | Best-effort session startup |\n| Invalid server config | Server skipped with validation error entry | Best-effort per server |\n| Connect timeout/init failure | Server error recorded; others continue | Best-effort per server |\n| `tools/list` still pending at startup with cache hit | Deferred tools returned immediately | Best-effort fast startup |\n| `tools/list` still pending at startup without cache | Startup waits for pending to settle | Hard wait for correctness |\n| Late background tool-load failure | Logged after startup gate | Best-effort logging |\n| Runtime dropped transport | No automatic reconnect; future calls fail until reconnect/reload | Best-effort recovery via manual action |\n\n## Public API surface\n\n`src/mcp/index.ts` re-exports loader/manager/client APIs for external callers. `src/sdk.ts` exposes `discoverMCPServers()` as a convenience wrapper returning the same loader result shape.\n\n## Implementation files\n\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts) — loader facade, discovery error normalization, `LoadedCustomTool` conversion.\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts) — lifecycle state registries, parallel connect/list flow, refresh/disconnect.\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts) — transport setup, initialize handshake, list/call/disconnect.\n- [`src/mcp/index.ts`](../packages/coding-agent/src/mcp/index.ts) — MCP module API exports.\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts) — startup wiring into session/tool registry.\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts) — config discovery/filtering/validation used by manager.\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts) — `MCPTool` and `DeferredMCPTool` runtime behavior.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — `refreshMCPTools` live rebinding.\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts) — interactive reload/reconnect flows.\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts) — subagent MCP proxying via parent manager connections.\n",
20
20
  "mcp-server-tool-authoring.md": "# MCP server and tool authoring\n\nThis document explains how MCP server definitions become callable `mcp_*` tools in coding-agent, and what operators should expect when configs are invalid, duplicated, disabled, or auth-gated.\n\n## Architecture at a glance\n\n```text\nConfig sources (.omp/.claude/.cursor/.vscode/mcp.json, mcp.json, etc.)\n -> discovery providers normalize to canonical MCPServer\n -> capability loader dedupes by server name (higher provider priority wins)\n -> loadAllMCPConfigs converts to MCPServerConfig + skips enabled:false\n -> MCPManager connects/listTools (with auth/header/env resolution)\n -> MCPTool/DeferredMCPTool bridge exposes tools as mcp_<server>_<tool>\n -> AgentSession.refreshMCPTools replaces live MCP tools immediately\n```\n\n## 1) Server config model and validation\n\n`src/mcp/types.ts` defines the authoring shape used by MCP config writers and runtime:\n\n- `stdio` (default when `type` missing): requires `command`, optional `args`, `env`, `cwd`\n- `http`: requires `url`, optional `headers`\n- `sse`: requires `url`, optional `headers` (kept for compatibility)\n- shared fields: `enabled`, `timeout`, `auth`\n\n`validateServerConfig()` (`src/mcp/config.ts`) enforces transport basics:\n\n- rejects configs that set both `command` and `url`\n- requires `command` for stdio\n- requires `url` for http/sse\n- rejects unknown `type`\n\n`config-writer.ts` applies this validation for add/update operations and also validates server names:\n\n- non-empty\n- max 100 chars\n- only `[a-zA-Z0-9_.-]`\n\n### Transport pitfalls\n\n- `type` omitted means stdio. If you intended HTTP/SSE but omitted `type`, `command` becomes mandatory.\n- `sse` is still accepted but treated as HTTP transport internally (`createHttpTransport`).\n- Validation is structural, not reachability: a syntactically valid URL can still fail at connect time.\n\n## 2) Discovery, normalization, and precedence\n\n### Capability-based discovery\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical `MCPServer` items via `loadCapability(mcpCapability.id)`.\n\nThe capability layer (`src/capability/index.ts`) then:\n\n1. loads providers in priority order\n2. dedupes by `server.name` (first win = highest priority)\n3. validates deduped items\n\nResult: duplicate server names across sources are not merged. One definition wins; lower-priority duplicates are shadowed.\n\n### `.mcp.json` and related files\n\nThe dedicated fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json` (low priority).\n\nIn practice MCP servers also come from higher-priority providers (for example native `.omp/...` and tool-specific config dirs). Authoring guidance:\n\n- Prefer `.omp/mcp.json` (project) or `~/.omp/mcp.json` (user) for explicit control.\n- Use root `mcp.json` / `.mcp.json` when you need fallback compatibility.\n- Reusing the same server name in multiple sources causes precedence shadowing, not merge.\n\n### Normalization behavior\n\n`convertToLegacyConfig()` (`src/mcp/config.ts`) maps canonical `MCPServer` to runtime `MCPServerConfig`.\n\nKey behavior:\n\n- transport inferred as `server.transport ?? (command ? \"stdio\" : url ? \"http\" : \"stdio\")`\n- disabled servers (`enabled === false`) are dropped before connection\n- optional fields are preserved when present\n\n### Environment expansion during discovery\n\n`mcp-json.ts` expands env placeholders in string fields with `expandEnvVarsDeep()`:\n\n- supports `${VAR}` and `${VAR:-default}`\n- unresolved values remain literal `${VAR}` strings\n\n`mcp-json.ts` also performs runtime type checks for user JSON and logs warnings for invalid `enabled`/`timeout` values instead of hard-failing the whole file.\n\n## 3) Auth and runtime value resolution\n\n`MCPManager.prepareConfig()`/`#resolveAuthConfig()` (`src/mcp/manager.ts`) is the final pre-connect pass.\n\n### OAuth credential injection\n\nIf config has:\n\n```ts\nauth: { type: \"oauth\", credentialId: \"...\" }\n```\n\nand credential exists in auth storage:\n\n- `http`/`sse`: injects `Authorization: Bearer <access_token>` header\n- `stdio`: injects `OAUTH_ACCESS_TOKEN` env var\n\nIf credential lookup fails, manager logs a warning and continues with unresolved auth.\n\n### Header/env value resolution\n\nBefore connect, manager resolves each header/env value via `resolveConfigValue()` (`src/config/resolve-config-value.ts`):\n\n- value starting with `!` => execute shell command, use trimmed stdout (cached)\n- otherwise, treat value as environment variable name first (`process.env[name]`), fallback to literal value\n- unresolved command/env values are omitted from final headers/env map\n\nOperational caveat: this means a mistyped secret command/env key can silently remove that header/env entry, producing downstream 401/403 or server startup failures.\n\n## 4) Tool bridge: MCP -> agent-callable tools\n\n`src/mcp/tool-bridge.ts` converts MCP tool definitions into `CustomTool`s.\n\n### Naming and collision domain\n\nTool names are generated as:\n\n```text\nmcp_<sanitized_server_name>_<sanitized_tool_name>\n```\n\nRules:\n\n- lowercases\n- non-`[a-z_]` chars become `_`\n- repeated underscores collapse\n- redundant `<server>_` prefix in tool name is stripped once\n\nThis avoids many collisions, but not all. Different raw names can still sanitize to the same identifier (for example `my-server` and `my.server` both sanitize similarly), and registry insertion is last-write-wins.\n\n### Schema mapping\n\n`convertSchema()` keeps MCP JSON Schema mostly as-is but patches object schemas missing `properties` with `{}` for provider compatibility.\n\n### Execution mapping\n\n`MCPTool.execute()` / `DeferredMCPTool.execute()`:\n\n- calls MCP `tools/call`\n- flattens MCP content into displayable text\n- returns structured details (`serverName`, `mcpToolName`, provider metadata)\n- maps server-reported `isError` to `Error: ...` text result\n- maps thrown transport/runtime failures to `MCP error: ...`\n- preserves abort semantics by translating AbortError into `ToolAbortError`\n\n## 5) Operator lifecycle: add/edit/remove and live updates\n\nInteractive mode exposes `/mcp` in `src/modes/controllers/mcp-command-controller.ts`.\n\nSupported operations:\n\n- `add` (wizard or quick-add)\n- `remove` / `rm`\n- `enable` / `disable`\n- `test`\n- `reauth` / `unauth`\n- `reload`\n\nConfig writes are atomic (`writeMCPConfigFile`: temp file + rename).\n\nAfter changes, controller calls `#reloadMCP()`:\n\n1. `mcpManager.disconnectAll()`\n2. `mcpManager.discoverAndConnect()`\n3. `session.refreshMCPTools(mcpManager.getTools())`\n\n`refreshMCPTools()` replaces all `mcp_` registry entries and immediately re-activates the latest MCP tool set, so changes take effect without restarting the session.\n\n### Mode differences\n\n- **Interactive/TUI mode**: `/mcp` gives in-app UX (wizard, OAuth flow, connection status text, immediate runtime rebinding).\n- **SDK/headless integration**: `discoverAndLoadMCPTools()` (`src/mcp/loader.ts`) returns loaded tools + per-server errors; no `/mcp` command UX.\n\n## 6) User-visible error surfaces\n\nCommon error strings users/operators see:\n\n- add/update validation failures:\n - `Invalid server config: ...`\n - `Server \"<name>\" already exists in <path>`\n- quick-add argument issues:\n - `Use either --url or -- <command...>, not both.`\n - `--token requires --url (HTTP/SSE transport).`\n- connect/test failures:\n - `Failed to connect to \"<name>\": <message>`\n - timeout help text suggests increasing timeout\n - auth help text for `401/403`\n- auth/OAuth flows:\n - `Authentication required ... OAuth endpoints could not be discovered`\n - `OAuth flow timed out. Please try again.`\n - `OAuth authentication failed: ...`\n- disabled server usage:\n - `Server \"<name>\" is disabled. Run /mcp enable <name> first.`\n\nBad source JSON in discovery is generally handled as warnings/logs; config-writer paths throw explicit errors.\n\n## 7) Practical authoring guidance\n\nFor robust MCP authoring in this codebase:\n\n1. Keep server names globally unique across all MCP-capable config sources.\n2. Prefer alphanumeric/underscore names to avoid sanitized-name collisions in generated `mcp_*` tool names.\n3. Use explicit `type` to avoid accidental stdio defaults.\n4. Treat `enabled: false` as hard-off: server is omitted from runtime connect set.\n5. For OAuth configs, store a valid `credentialId`; otherwise auth injection is skipped.\n6. If using command-based secret resolution (`!cmd`), verify command output is stable and non-empty.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts)\n- [`src/mcp/config-writer.ts`](../packages/coding-agent/src/mcp/config-writer.ts)\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts)\n- [`src/discovery/mcp-json.ts`](../packages/coding-agent/src/discovery/mcp-json.ts)\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n- [`src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`src/config/resolve-config-value.ts`](../packages/coding-agent/src/config/resolve-config-value.ts)\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts)\n",
21
+ "memory.md": "# Autonomous Memory\n\nWhen enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into each new session. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable via `/settings` or `config.yml`:\n\n```yaml\nmemories:\n enabled: true\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Cite the memory artifact path when memory changes the plan, and pair it with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Reading memory artifacts\n\nThe agent can read memory files directly using `memory://` URLs with the `read` tool:\n\n| URL | Content |\n|---|---|\n| `memory://root` | Compact summary injected at startup |\n| `memory://root/MEMORY.md` | Full long-term memory document |\n| `memory://root/skills/<name>/SKILL.md` | A generated skill playbook |\n\n### `/memory` slash command\n\n| Subcommand | Effect |\n|---|---|\n| `view` | Show the current memory injection payload |\n| `clear` / `reset` | Delete all memory data and generated artifacts |\n| `enqueue` / `rebuild` | Force consolidation to run at next startup |\n\n## How it works\n\nMemories are built by a background pipeline that at startup or manually triggered via slash command.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, or currently active are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nAll output is scanned for secrets before being written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven entirely by static prompt files in `src/prompts/memories/`.\n\n| File | Purpose | Variables |\n|---|---|---|\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation.md` | Prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read_path.md` | Memory guidance injected into live sessions | `{{memory_summary}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n|---|---|---|\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` | Cross-session synthesis |\n\nIf `smol` is not configured, Phase 2 falls back to the `default` role.\n\n## Configuration\n\n| Setting | Default | Description |\n|---|---|---|\n| `memories.enabled` | `false` | Master switch |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `src/memories/index.ts` — pipeline orchestration, injection, slash command handling\n- `src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `src/prompts/memories/` — memory prompt templates\n- `src/internal-urls/memory-protocol.ts` — `memory://` URL handler\n",
21
22
  "models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default) or `none`\n- `discovery.type`: `ollama`\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Apply runtime-discovered models (currently Ollama), then re-apply model overrides.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-completions`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls `GET /api/tags` on Ollama and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-completions\n auth: none\n discovery:\n type: ollama\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `plan`, `commit`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.3-codex-spark:\n contextPromotionTarget: openai-codex/gpt-5.3-codex\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\n`models.yml` supports this `compat` subset:\n\n- `supportsStore`\n- `supportsDeveloperRole`\n- `supportsReasoningEffort`\n- `maxTokensField` (`max_completion_tokens` or `max_tokens`)\n- `openRouterRouting.only` / `openRouterRouting.order`\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order`\n\nThese are consumed by the OpenAI-completions transport logic and combined with URL-based auto-detection.\n\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`.\n\nOne notable legacy path remains: web-search Anthropic auth resolution still reads `~/.omp/agent/models.json` directly in `src/web/search/auth.ts`.\n\nIf you rely on that specific path, keep JSON compatibility in mind until that module is migrated.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
22
23
  "natives-addon-loader-runtime.md": "# Natives Addon Loader Runtime\n\nThis document deep-dives the addon loading/validation layer in `@oh-my-pi/pi-natives`: how `native.ts` decides which `.node` file to load, when embedded payload extraction runs, and how startup failures are reported.\n\n## Implementation files\n\n- `packages/natives/src/native.ts`\n- `packages/natives/src/embedded-addon.ts`\n- `packages/natives/src/bindings.ts`\n- `packages/natives/package.json`\n\n## Scope and responsibility\n\nLoader/runtime responsibilities are intentionally narrow:\n\n- Build a platform/CPU-aware candidate list for addon filenames and directories.\n- Optionally materialize an embedded addon into a versioned per-user cache directory.\n- Attempt candidates in deterministic order.\n- Reject stale or incompatible addons via `validateNative` before exposing bindings.\n\nOut of scope here: module-specific grep/text/highlight behavior.\n\n## Runtime inputs and derived state\n\nAt module initialization (`export const native = loadNative();`), `native.ts` computes static context:\n\n- **Platform tag**: ``${process.platform}-${process.arch}`` (for example `darwin-arm64`).\n- **Package version**: from `packages/natives/package.json` (`version` field).\n- **Core directories**:\n - `nativeDir`: package-local `packages/natives/native`.\n - `execDir`: directory containing `process.execPath`.\n - `versionedDir`: `<getNativesDir()>/<packageVersion>`.\n - `userDataDir` fallback:\n - Windows: `%LOCALAPPDATA%/omp` (or `%USERPROFILE%/AppData/Local/omp`).\n - Non-Windows: `~/.local/bin`.\n- **Compiled-binary mode** (`isCompiledBinary`): true if any of:\n - `PI_COMPILED` env var is set, or\n - `import.meta.url` contains Bun-embedded markers (`$bunfs`, `~BUN`, `%7EBUN`).\n- **Variant override**: `PI_NATIVE_VARIANT` (`modern`/`baseline` only; invalid values ignored).\n- **Selected variant**: explicit override, otherwise runtime AVX2 detection on x64 (`modern` if AVX2, else `baseline`).\n\n## Platform support and tag resolution\n\n`SUPPORTED_PLATFORMS` is fixed to:\n\n- `linux-x64`\n- `linux-arm64`\n- `darwin-x64`\n- `darwin-arm64`\n- `win32-x64`\n\nBehavior detail:\n\n- Unsupported platforms are not rejected up-front.\n- Loader still tries all computed candidates first.\n- If nothing loads, it throws an explicit unsupported-platform error listing supported tags.\n\nThis preserves useful diagnostics for near-miss cases while still failing hard for truly unsupported targets.\n\n## Variant selection (`modern` / `baseline` / default)\n\n### x64 behavior\n\n1. If `PI_NATIVE_VARIANT` is `modern` or `baseline`, that value wins.\n2. Else detect AVX2 support:\n - Linux: scan `/proc/cpuinfo` for `avx2`.\n - macOS: query `sysctl` (`machdep.cpu.leaf7_features`, fallback `machdep.cpu.features`).\n - Windows: run PowerShell `[System.Runtime.Intrinsics.X86.Avx2]::IsSupported`.\n3. Result:\n - AVX2 available -> `modern`\n - AVX2 unavailable/undetectable -> `baseline`\n\n### Non-x64 behavior\n\n- No variant is used; loader stays on the default filename (`pi_natives.<platform>-<arch>.node`).\n\n### Filename construction\n\nGiven `tag = <platform>-<arch>`:\n\n- Non-x64 or no variant: `pi_natives.<tag>.node`\n- x64 + `modern`: try in order\n 1. `pi_natives.<tag>-modern.node`\n 2. `pi_natives.<tag>-baseline.node` (intentional fallback)\n- x64 + `baseline`: only `pi_natives.<tag>-baseline.node`\n\nThe `addonLabel` used in final error messages is either `<tag>` or `<tag> (<variant>)`.\n\n## Candidate path construction and fallback ordering\n\n`native.ts` builds candidate pools before any `require(...)` call.\n\n### Debug/dev candidates (only when `PI_DEV` is set)\n\nPrepended first:\n\n1. `<nativeDir>/pi_natives.dev.node`\n2. `<execDir>/pi_natives.dev.node`\n\nThis path is explicit debug intent and always outranks release candidates.\n\n### Release candidates\n\nBuilt from variant-resolved filename list and searched in this order:\n\n- **Non-compiled runtime**:\n 1. `<nativeDir>/<filename>`\n 2. `<execDir>/<filename>`\n\n- **Compiled runtime** (`PI_COMPILED` or Bun embedded markers):\n 1. `<versionedDir>/<filename>`\n 2. `<userDataDir>/<filename>`\n 3. `<nativeDir>/<filename>`\n 4. `<execDir>/<filename>`\n\n`dedupedCandidates` removes duplicates while preserving first occurrence order.\n\n### Final runtime sequence\n\nAt load time:\n\n1. Optional embedded extraction candidate (if produced) is inserted at the front.\n2. Remaining deduplicated candidates are tried in order.\n3. First candidate that both `require(...)`s and passes `validateNative(...)` wins.\n\n## Embedded addon extraction lifecycle\n\n`embedded-addon.ts` defines a generated manifest shape:\n\n- `platformTag`\n- `version`\n- `files[]` where each entry has `variant`, `filename`, `filePath`\n\nCurrent checked-in default is `embeddedAddon: null`; compiled artifacts may replace this with real metadata.\n\n### Extraction state machine\n\nExtraction (`maybeExtractEmbeddedAddon`) runs only when all gates pass:\n\n1. `isCompiledBinary === true`\n2. `embeddedAddon !== null`\n3. `embeddedAddon.platformTag === platformTag`\n4. `embeddedAddon.version === packageVersion`\n5. A variant-appropriate embedded file is found\n\nVariant file selection mirrors runtime variant intent:\n\n- Non-x64: prefer `default`, then first available file.\n- x64 + `modern`: prefer `modern`, fallback to `baseline`.\n- x64 + `baseline`: require `baseline`.\n\nMaterialization behavior:\n\n1. Ensure `<versionedDir>` exists (`mkdirSync(..., { recursive: true })`).\n2. If `<versionedDir>/<selected filename>` already exists, reuse it (no rewrite).\n3. Else read embedded source `filePath` and write target file.\n4. Return target path for highest-priority load attempt.\n\nOn failure, extraction does not crash immediately; it appends an error entry (directory creation or write failure) and loader proceeds to normal candidate probing.\n\n## Lifecycle and state transitions\n\n```text\nInit\n -> Compute platform/version/variant/candidate lists\n -> (Compiled + embedded manifest matches?)\n yes -> Try extract embedded to versionedDir (record errors, continue)\n no -> Skip extraction\n -> For each runtime candidate in order:\n require(candidate)\n -> success: validateNative\n -> pass: return bindings (READY)\n -> fail: record error, continue\n -> failure: record error, continue\n -> none loaded:\n if unsupported platform tag -> throw Unsupported platform\n else -> throw Failed to load (full tried-path diagnostics + hints)\n```\n\n## `validateNative` contract checks\n\n`validateNative(bindings, source)` enforces a function-only contract over `NativeBindings` at startup.\n\nMechanics:\n\n- For each required export name, it checks `typeof bindings[name] === \"function\"`.\n- Missing names are aggregated.\n- If any are missing, loader throws:\n - source addon path,\n - missing export list,\n - rebuild command hint.\n\nThis is a hard compatibility gate against stale binaries, partial builds, and symbol/name drift.\n\n### JS API ↔ native export mapping (validation gate)\n\n| JS binding name checked in `validateNative` | Expected native export name |\n| --- | --- |\n| `grep` | `grep` |\n| `glob` | `glob` |\n| `highlightCode` | `highlightCode` |\n| `executeShell` | `executeShell` |\n| `PtySession` | `PtySession` |\n| `Shell` | `Shell` |\n| `visibleWidth` | `visibleWidth` |\n| `getSystemInfo` | `getSystemInfo` |\n| `getWorkProfile` | `getWorkProfile` |\n| `invalidateFsScanCache` | `invalidateFsScanCache` |\n\nNote: `bindings.ts` declares only the base `cancelWork(id)` member; module `types.ts` files declaration-merge additional symbols that `validateNative` enforces.\n\n## Failure behavior and diagnostics\n\n## Unsupported platform\n\nIf all candidates fail and `platformTag` is not in `SUPPORTED_PLATFORMS`, loader throws:\n\n- `Unsupported platform: <tag>`\n- Full supported-platform list\n- Explicit issue-reporting guidance\n\n## Stale binary / mismatch symptoms\n\nTypical stale mismatch signal:\n\n- `Native addon missing exports (<candidate>). Missing: ...`\n\nCommon causes:\n\n- Old `.node` binary from previous package version/API shape.\n- Wrong variant artifact selected (for x64).\n- New Rust export not present in loaded artifact.\n\nLoader behavior:\n\n- Records per-candidate missing-export failures.\n- Continues probing remaining candidates.\n- If no candidate validates, final error includes every attempted path with each failure message.\n\n## Compiled-binary startup failures\n\nIn compiled mode final diagnostics include:\n\n- expected versioned cache target paths (`<versionedDir>/<filename>`),\n- remediation to delete stale `<versionedDir>` and rerun,\n- direct release download `curl` commands for each expected filename.\n\n## Non-compiled startup failures\n\nIn normal package/runtime mode final diagnostics include:\n\n- reinstall hint (`bun install @oh-my-pi/pi-natives`),\n- local rebuild command (`bun --cwd=packages/natives run build:native`),\n- optional x64 variant build hint (`TARGET_VARIANT=baseline|modern ...`).\n\n## Dev/debug versus release behavior\n\nWhen `PI_DEV` is set:\n\n- `pi_natives.dev.node` candidates are prepended ahead of all release candidates.\n- Loader emits per-candidate console diagnostics (`Loaded native addon...` and load errors).\n\nWithout `PI_DEV`:\n\n- Only release candidate chain is used.\n- No dev console diagnostics are emitted.\n\nOperationally, this means debug sessions can validate an ad-hoc dev addon first, while production/release runs remain on deterministic release artifact probing.",
23
24
  "natives-architecture.md": "# Natives Architecture\n\n`@oh-my-pi/pi-natives` is a three-layer stack:\n\n1. **TypeScript wrapper/API layer** exposes stable JS/TS entrypoints.\n2. **Addon loading/validation layer** resolves and validates the `.node` binary for the current runtime.\n3. **Rust N-API module layer** implements performance-critical primitives exported to JS.\n\nThis document is the foundation for deeper module-level docs.\n\n## Implementation files\n\n- `packages/natives/src/index.ts`\n- `packages/natives/src/native.ts`\n- `packages/natives/src/bindings.ts`\n- `packages/natives/src/embedded-addon.ts`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n\n## Layer 1: TypeScript wrapper/API layer\n\n`packages/natives/src/index.ts` is the public barrel. It groups exports by capability domain and re-exports typed wrappers rather than exposing raw N-API bindings directly.\n\nCurrent top-level groups:\n\n- **Search/text primitives**: `grep`, `glob`, `text`, `highlight`\n- **Execution/process/terminal primitives**: `shell`, `pty`, `ps`, `keys`\n- **System/media/conversion primitives**: `image`, `html`, `clipboard`, `system-info`, `work`\n\n`packages/natives/src/bindings.ts` defines the base interface contract:\n\n- `NativeBindings` starts with shared members (`cancelWork(id: number)`)\n- module-specific bindings are added by declaration merging from each module’s `types.ts`\n- `Cancellable` standardizes timeout and abort-signal options for wrappers that expose cancellation\n\n**Guaranteed contract (API-facing):** consumers import from `@oh-my-pi/pi-natives` and use typed wrappers.\n\n**Implementation detail (may change):** declaration merging and internal wrapper layout (`src/<module>/index.ts`, `src/<module>/types.ts`).\n\n## Layer 2: Addon loading and validation\n\n`packages/natives/src/native.ts` owns runtime addon selection, optional extraction, and export validation.\n\n### Candidate resolution model\n\n- Platform tag is `\"${process.platform}-${process.arch}\"`.\n- Supported tags are currently:\n - `linux-x64`\n - `linux-arm64`\n - `darwin-x64`\n - `darwin-arm64`\n - `win32-x64`\n- x64 can use CPU variants:\n - `modern` (AVX2-capable)\n - `baseline` (fallback)\n- Non-x64 uses the default filename (no variant suffix).\n\nFilename strategy:\n\n- Release: `pi_natives.<platform>-<arch>.node`\n- x64 variant release: `pi_natives.<platform>-<arch>-modern.node` and/or `...-baseline.node`\n- Dev: `pi_natives.dev.node` (preferred when `PI_DEV` is set)\n\n### Platform-specific variant detection\n\nFor x64, variant selection uses:\n\n- **Linux**: `/proc/cpuinfo`\n- **macOS**: `sysctl machdep.cpu.leaf7_features` / `machdep.cpu.features`\n- **Windows**: PowerShell check for `System.Runtime.Intrinsics.X86.Avx2`\n\n`PI_NATIVE_VARIANT` can explicitly force `modern` or `baseline`.\n\n### Binary distribution and extraction model\n\n`packages/natives/package.json` includes both `src` and `native` in published files. The `native/` directory stores prebuilt platform artifacts.\n\nFor compiled binaries (`PI_COMPILED` or Bun embedded runtime markers), loader behavior is:\n\n1. Check versioned user cache path: `<getNativesDir()>/<packageVersion>/...`\n2. Check legacy compiled-binary location:\n - Windows: `%LOCALAPPDATA%/omp` (fallback `%USERPROFILE%/AppData/Local/omp`)\n - non-Windows: `~/.local/bin`\n3. Fall back to packaged `native/` and executable directory candidates\n\nIf an embedded addon manifest is present (`embedded-addon.ts` generated by `scripts/embed-native.ts`), `native.ts` can materialize the matching embedded binary into the versioned cache directory before loading.\n\n### Validation and failure modes\n\nAfter `require(candidate)`, `validateNative(...)` verifies required exports (for example `grep`, `glob`, `highlightCode`, `PtySession`, `Shell`, `getSystemInfo`, `getWorkProfile`, `invalidateFsScanCache`).\n\nFailure paths are explicit:\n\n- **Unsupported platform tag**: throws with supported platform list\n- **No loadable candidate**: throws with all attempted paths and remediation hints\n- **Missing exports**: throws with exact missing names and rebuild command\n- **Embedded extraction errors**: records directory/write failures and includes them in final load diagnostics\n\n**Guaranteed contract (API-facing):** addon load either succeeds with a validated binding set or fails fast with actionable error text.\n\n**Implementation detail (may change):** exact candidate search order and compiled-binary fallback path ordering.\n\n## Layer 3: Rust N-API module layer\n\n`crates/pi-natives/src/lib.rs` is the Rust entry module that declares exported module ownership:\n\n- `clipboard`\n- `fd`\n- `fs_cache`\n- `glob`\n- `glob_util`\n- `grep`\n- `highlight`\n- `html`\n- `image`\n- `keys`\n- `prof`\n- `ps`\n- `pty`\n- `shell`\n- `system_info`\n- `task`\n- `text`\n\nThese modules implement the N-API symbols consumed and validated by `native.ts`. JS-level names are surfaced through the TS wrappers in `packages/natives/src`.\n\n**Guaranteed contract (API-facing):** Rust module exports must match the binding names expected by `validateNative` and wrapper modules.\n\n**Implementation detail (may change):** internal Rust module decomposition and helper module boundaries (`glob_util`, `task`, etc.).\n\n## Ownership boundaries\n\nAt architecture level, ownership is split as follows:\n\n- **TS wrapper/API ownership (`packages/natives/src`)**\n - public API grouping, option typing, and stable JS ergonomics\n - cancellation surface (`timeoutMs`, `AbortSignal`) exposed to callers\n- **Loader ownership (`packages/natives/src/native.ts`)**\n - runtime binary selection\n - CPU variant selection and override handling\n - compiled-binary extraction and candidate probing\n - hard validation of required native exports\n- **Rust ownership (`crates/pi-natives/src`)**\n - algorithmic and system-level implementation\n - platform-native behavior and performance-sensitive logic\n - N-API symbol implementation that TS wrappers consume\n\n## Runtime flow (high level)\n\n1. Consumer imports from `@oh-my-pi/pi-natives`.\n2. Wrapper module calls into singleton `native` binding.\n3. `native.ts` selects candidate binary for platform/arch/variant.\n4. Optional embedded binary extraction occurs for compiled distributions.\n5. Addon is loaded and export set is validated.\n6. Wrapper returns typed results to caller.\n\n## Glossary\n\n- **Native addon**: A `.node` binary loaded via Node-API (N-API).\n- **Platform tag**: Runtime tuple `platform-arch` (for example `darwin-arm64`).\n- **Variant**: x64 CPU-specific build flavor (`modern` AVX2, `baseline` fallback).\n- **Wrapper**: TS function/class that provides typed API over raw native exports.\n- **Declaration merging**: TS technique used by module `types.ts` files to extend `NativeBindings`.\n- **Compiled binary mode**: Runtime mode where the CLI is bundled and native addons are resolved from extracted/cache paths instead of only package-local paths.\n- **Embedded addon**: Build artifact metadata and file references generated into `embedded-addon.ts` so compiled binaries can extract matching `.node` payloads.\n- **Validation gate**: `validateNative(...)` check that rejects stale/mismatched binaries missing required exports.\n",
package/src/main.ts CHANGED
@@ -515,7 +515,8 @@ export async function runRootCommand(parsed: Args, rawArgs: string[]): Promise<v
515
515
  // Create AuthStorage and ModelRegistry upfront
516
516
  const authStorage = await discoverAuthStorage();
517
517
  const modelRegistry = new ModelRegistry(authStorage);
518
- await modelRegistry.refresh();
518
+ const refreshStrategy = parsedArgs.listModels !== undefined ? "online" : "online-if-uncached";
519
+ await modelRegistry.refresh(refreshStrategy);
519
520
  debugStartup("main:discoverModels");
520
521
  time("discoverModels");
521
522
 
@@ -50,19 +50,43 @@ export interface MCPToolDetails {
50
50
  providerName?: string;
51
51
  }
52
52
 
53
+ /**
54
+ * Recursively strip JSON Schema fields that cause AJV validation errors.
55
+ *
56
+ * - `$schema`: AJV throws on unknown meta-schema URIs (e.g. draft 2020-12 from schemars 1.x / rmcp 0.15+)
57
+ * - `nullable`: OpenAPI 3.0 extension, not standard JSON Schema — AJV rejects it as an unknown keyword.
58
+ */
59
+ function sanitizeSchema(schema: unknown): unknown {
60
+ if (Array.isArray(schema)) {
61
+ return schema.map(sanitizeSchema);
62
+ }
63
+ if (schema !== null && typeof schema === "object") {
64
+ const { $schema: _, nullable: __, ...rest } = schema as Record<string, unknown>;
65
+ const out: Record<string, unknown> = {};
66
+ for (const [k, v] of Object.entries(rest)) {
67
+ out[k] = sanitizeSchema(v);
68
+ }
69
+ return out;
70
+ }
71
+ return schema;
72
+ }
73
+
53
74
  /**
54
75
  * Convert JSON Schema from MCP to TypeBox-compatible schema.
55
76
  * MCP uses standard JSON Schema, TypeBox uses a compatible subset.
56
77
  *
57
78
  * Also normalizes schemas to work around common issues:
58
79
  * - Adds `properties: {}` to object schemas missing it (some LLM providers require this)
80
+ * - Strips `$schema` and `nullable` fields (see sanitizeSchema)
59
81
  */
60
82
  function convertSchema(mcpSchema: MCPToolDefinition["inputSchema"]): TSchema {
83
+ const schema = sanitizeSchema(mcpSchema) as Record<string, unknown>;
84
+
61
85
  // Normalize: object schemas must have properties field for some providers
62
- if (mcpSchema.type === "object" && !("properties" in mcpSchema)) {
63
- return { ...mcpSchema, properties: {} } as unknown as TSchema;
86
+ if (schema.type === "object" && !("properties" in schema)) {
87
+ return { ...schema, properties: {} } as unknown as TSchema;
64
88
  }
65
- return mcpSchema as unknown as TSchema;
89
+ return schema as unknown as TSchema;
66
90
  }
67
91
 
68
92
  /**
package/src/sdk.ts CHANGED
@@ -551,7 +551,9 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
551
551
  // Use provided or create AuthStorage and ModelRegistry
552
552
  const authStorage = options.authStorage ?? (await discoverAuthStorage(agentDir));
553
553
  const modelRegistry = options.modelRegistry ?? new ModelRegistry(authStorage);
554
- await modelRegistry.refresh();
554
+ if (!options.modelRegistry) {
555
+ await modelRegistry.refresh();
556
+ }
555
557
  time("discoverModels");
556
558
 
557
559
  const settings = options.settings ?? (await Settings.init({ cwd, agentDir }));
@@ -1364,6 +1364,38 @@ export class AuthStorage {
1364
1364
  return undefined;
1365
1365
  }
1366
1366
 
1367
+ /**
1368
+ * Peek at API key for a provider without refreshing OAuth tokens.
1369
+ * Used for model discovery where we only need to know if credentials exist
1370
+ * and get a best-effort token. The actual refresh happens lazily when the
1371
+ * provider is used for an API call.
1372
+ */
1373
+ async peekApiKey(provider: string): Promise<string | undefined> {
1374
+ const runtimeKey = this.#runtimeOverrides.get(provider);
1375
+ if (runtimeKey) {
1376
+ return runtimeKey;
1377
+ }
1378
+
1379
+ const apiKeySelection = this.#selectCredentialByType(provider, "api_key");
1380
+ if (apiKeySelection) {
1381
+ return resolveConfigValue(apiKeySelection.credential.key);
1382
+ }
1383
+
1384
+ // Return current OAuth access token only if it is not already expired.
1385
+ const oauthSelection = this.#selectCredentialByType(provider, "oauth");
1386
+ if (oauthSelection) {
1387
+ const expiresAt = oauthSelection.credential.expires;
1388
+ if (Number.isFinite(expiresAt) && expiresAt > Date.now()) {
1389
+ return oauthSelection.credential.access;
1390
+ }
1391
+ }
1392
+
1393
+ const envKey = getEnvApiKey(provider);
1394
+ if (envKey) return envKey;
1395
+
1396
+ return this.#fallbackResolver?.(provider) ?? undefined;
1397
+ }
1398
+
1367
1399
  /**
1368
1400
  * Get API key for a provider.
1369
1401
  * Priority:
@@ -248,6 +248,98 @@ function resolveFallbackCompletion(rawOutput: string, outputSchema: unknown): {
248
248
  return { data: candidate };
249
249
  }
250
250
 
251
+ export interface SubmitResultItem {
252
+ data?: unknown;
253
+ status?: "success" | "aborted";
254
+ error?: string;
255
+ }
256
+
257
+ interface FinalizeSubprocessOutputArgs {
258
+ rawOutput: string;
259
+ exitCode: number;
260
+ stderr: string;
261
+ doneAborted: boolean;
262
+ signalAborted: boolean;
263
+ submitResultItems?: SubmitResultItem[];
264
+ reportFindings?: ReviewFinding[];
265
+ outputSchema: unknown;
266
+ }
267
+
268
+ interface FinalizeSubprocessOutputResult {
269
+ rawOutput: string;
270
+ exitCode: number;
271
+ stderr: string;
272
+ abortedViaSubmitResult: boolean;
273
+ hasSubmitResult: boolean;
274
+ }
275
+
276
+ export const SUBAGENT_WARNING_NULL_SUBMIT_RESULT = "SYSTEM WARNING: Subagent called submit_result with null data.";
277
+ export const SUBAGENT_WARNING_MISSING_SUBMIT_RESULT =
278
+ "SYSTEM WARNING: Subagent exited without calling submit_result tool after 3 reminders.";
279
+
280
+ export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): FinalizeSubprocessOutputResult {
281
+ let { rawOutput, exitCode, stderr } = args;
282
+ const { submitResultItems, reportFindings, doneAborted, signalAborted, outputSchema } = args;
283
+ let abortedViaSubmitResult = false;
284
+ const hasSubmitResult = Array.isArray(submitResultItems) && submitResultItems.length > 0;
285
+
286
+ if (hasSubmitResult) {
287
+ const lastSubmitResult = submitResultItems[submitResultItems.length - 1];
288
+ if (lastSubmitResult?.status === "aborted") {
289
+ abortedViaSubmitResult = true;
290
+ exitCode = 0;
291
+ stderr = lastSubmitResult.error || "Subagent aborted task";
292
+ try {
293
+ rawOutput = JSON.stringify({ aborted: true, error: lastSubmitResult.error }, null, 2);
294
+ } catch {
295
+ rawOutput = `{"aborted":true,"error":"${lastSubmitResult.error || "Unknown error"}"}`;
296
+ }
297
+ } else {
298
+ const submitData = lastSubmitResult?.data;
299
+ if (submitData === null || submitData === undefined) {
300
+ rawOutput = rawOutput
301
+ ? `${SUBAGENT_WARNING_NULL_SUBMIT_RESULT}\n\n${rawOutput}`
302
+ : SUBAGENT_WARNING_NULL_SUBMIT_RESULT;
303
+ } else {
304
+ const completeData = normalizeCompleteData(submitData, reportFindings);
305
+ try {
306
+ rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
307
+ } catch (err) {
308
+ const errorMessage = err instanceof Error ? err.message : String(err);
309
+ rawOutput = `{"error":"Failed to serialize submit_result data: ${errorMessage}"}`;
310
+ }
311
+ exitCode = 0;
312
+ stderr = "";
313
+ }
314
+ }
315
+ } else {
316
+ const allowFallback = exitCode === 0 && !doneAborted && !signalAborted;
317
+ const { normalized: normalizedSchema, error: schemaError } = normalizeOutputSchema(outputSchema);
318
+ const hasOutputSchema = normalizedSchema !== undefined && !schemaError;
319
+ const fallback = allowFallback ? resolveFallbackCompletion(rawOutput, outputSchema) : null;
320
+ if (fallback) {
321
+ const completeData = normalizeCompleteData(fallback.data, reportFindings);
322
+ try {
323
+ rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
324
+ } catch (err) {
325
+ const errorMessage = err instanceof Error ? err.message : String(err);
326
+ rawOutput = `{"error":"Failed to serialize fallback completion: ${errorMessage}"}`;
327
+ }
328
+ exitCode = 0;
329
+ stderr = "";
330
+ } else if (!hasOutputSchema && allowFallback && rawOutput.trim().length > 0) {
331
+ exitCode = 0;
332
+ stderr = "";
333
+ } else if (exitCode === 0) {
334
+ rawOutput = rawOutput
335
+ ? `${SUBAGENT_WARNING_MISSING_SUBMIT_RESULT}\n\n${rawOutput}`
336
+ : SUBAGENT_WARNING_MISSING_SUBMIT_RESULT;
337
+ }
338
+ }
339
+
340
+ return { rawOutput, exitCode, stderr, abortedViaSubmitResult, hasSubmitResult };
341
+ }
342
+
251
343
  /**
252
344
  * Extract a short preview from tool args for display.
253
345
  */
@@ -468,7 +560,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
468
560
  type AbortReason = "signal" | "terminate";
469
561
  let abortSent = false;
470
562
  let abortReason: AbortReason | undefined;
471
- let pendingTerminationTimeoutId: NodeJS.Timeout | null = null;
472
563
  const listenerController = new AbortController();
473
564
  const listenerSignal = listenerController.signal;
474
565
  const abortController = new AbortController();
@@ -502,24 +593,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
502
593
  if (activeSession) {
503
594
  void activeSession.abort();
504
595
  }
505
- cancelPendingTermination();
506
- };
507
-
508
- const schedulePendingTermination = () => {
509
- if (pendingTerminationTimeoutId || abortSent || resolved) return;
510
- pendingTerminationTimeoutId = setTimeout(() => {
511
- pendingTerminationTimeoutId = null;
512
- if (!resolved) {
513
- requestAbort("terminate");
514
- }
515
- }, 2000);
516
- };
517
-
518
- const cancelPendingTermination = () => {
519
- if (pendingTerminationTimeoutId) {
520
- clearTimeout(pendingTerminationTimeoutId);
521
- pendingTerminationTimeoutId = null;
522
- }
523
596
  };
524
597
 
525
598
  // Handle abort signal
@@ -704,6 +777,9 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
704
777
  existing.push(data);
705
778
  }
706
779
  progress.extractedToolData[event.toolName] = existing;
780
+ if (event.toolName === "submit_result") {
781
+ submitResultCalled = true;
782
+ }
707
783
  }
708
784
  }
709
785
 
@@ -717,8 +793,7 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
717
793
  isError: event.isError,
718
794
  })
719
795
  ) {
720
- // Don't terminate immediately - wait for message_end to get token counts
721
- schedulePendingTermination();
796
+ requestAbort("terminate");
722
797
  }
723
798
  }
724
799
  flushProgress = true;
@@ -785,11 +860,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
785
860
  // Accumulate tokens for progress display
786
861
  progress.tokens += getUsageTokens(messageUsage);
787
862
  }
788
- // If pending termination, now we have tokens - terminate immediately
789
- if (pendingTerminationTimeoutId) {
790
- cancelPendingTermination();
791
- requestAbort("terminate");
792
- }
793
863
  break;
794
864
  }
795
865
 
@@ -979,9 +1049,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
979
1049
 
980
1050
  const MAX_SUBMIT_RESULT_RETRIES = 3;
981
1051
  unsubscribe = session.subscribe(event => {
982
- if (event.type === "tool_execution_end" && event.toolName === "submit_result") {
983
- submitResultCalled = true;
984
- }
985
1052
  if (isAgentEvent(event)) {
986
1053
  try {
987
1054
  processEvent(event);
@@ -999,25 +1066,26 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
999
1066
  const reminderToolChoice = buildSubmitResultToolChoice(session.model);
1000
1067
 
1001
1068
  let retryCount = 0;
1002
- let previousTools: string[] | null = null;
1003
- try {
1004
- while (!submitResultCalled && retryCount < MAX_SUBMIT_RESULT_RETRIES && !abortSignal.aborted) {
1069
+ while (!submitResultCalled && retryCount < MAX_SUBMIT_RESULT_RETRIES && !abortSignal.aborted) {
1070
+ try {
1005
1071
  retryCount++;
1006
- if (!previousTools) {
1007
- previousTools = session.getActiveToolNames();
1008
- await session.setActiveToolsByName(["submit_result"]);
1009
- }
1010
1072
  const reminder = renderPromptTemplate(submitReminderTemplate, {
1011
1073
  retryCount,
1012
1074
  maxRetries: MAX_SUBMIT_RESULT_RETRIES,
1013
1075
  });
1014
1076
 
1015
1077
  await session.prompt(reminder, reminderToolChoice ? { toolChoice: reminderToolChoice } : undefined);
1078
+ } catch (err) {
1079
+ logger.error("Subagent prompt failed", {
1080
+ error: err instanceof Error ? err.message : String(err),
1081
+ });
1016
1082
  }
1017
- } finally {
1018
- if (previousTools) {
1019
- await session.setActiveToolsByName(previousTools);
1020
- }
1083
+ }
1084
+
1085
+ if (!submitResultCalled && !abortSignal.aborted) {
1086
+ aborted = true;
1087
+ exitCode = 1;
1088
+ error ??= SUBAGENT_WARNING_MISSING_SUBMIT_RESULT;
1021
1089
  }
1022
1090
 
1023
1091
  const lastMessage = session.state.messages[session.state.messages.length - 1];
@@ -1076,7 +1144,6 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
1076
1144
  clearTimeout(progressTimeoutId);
1077
1145
  progressTimeoutId = null;
1078
1146
  }
1079
- cancelPendingTermination();
1080
1147
 
1081
1148
  let exitCode = done.exitCode;
1082
1149
  if (done.error) {
@@ -1085,67 +1152,22 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
1085
1152
 
1086
1153
  // Use final output if available, otherwise accumulated output
1087
1154
  let rawOutput = finalOutputChunks.length > 0 ? finalOutputChunks.join("") : outputChunks.join("");
1088
- let abortedViaSubmitResult = false;
1089
- const submitResultItems = progress.extractedToolData?.submit_result as
1090
- | Array<{ data?: unknown; status?: "success" | "aborted"; error?: string }>
1091
- | undefined;
1155
+ const submitResultItems = progress.extractedToolData?.submit_result as SubmitResultItem[] | undefined;
1092
1156
  const reportFindings = progress.extractedToolData?.report_finding as ReviewFinding[] | undefined;
1093
- const hasSubmitResult = Array.isArray(submitResultItems) && submitResultItems.length > 0;
1094
- if (hasSubmitResult) {
1095
- const lastSubmitResult = submitResultItems[submitResultItems.length - 1];
1096
- if (lastSubmitResult?.status === "aborted") {
1097
- // Agent explicitly aborted via submit_result tool - clean exit with error info
1098
- abortedViaSubmitResult = true;
1099
- exitCode = 0;
1100
- stderr = lastSubmitResult.error || "Subagent aborted task";
1101
- try {
1102
- rawOutput = JSON.stringify({ aborted: true, error: lastSubmitResult.error }, null, 2);
1103
- } catch {
1104
- rawOutput = `{"aborted":true,"error":"${lastSubmitResult.error || "Unknown error"}"}`;
1105
- }
1106
- } else {
1107
- // Normal successful completion
1108
- const submitData = lastSubmitResult?.data;
1109
- if (submitData === null || submitData === undefined) {
1110
- // Agent called submit_result but with null/undefined data — treat as missing
1111
- // so the fallback path can try to extract output from conversation text
1112
- const warning = "SYSTEM WARNING: Subagent called submit_result with null data.";
1113
- rawOutput = rawOutput ? `${warning}\n\n${rawOutput}` : warning;
1114
- } else {
1115
- const completeData = normalizeCompleteData(submitData, reportFindings);
1116
- try {
1117
- rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
1118
- } catch (err) {
1119
- const errorMessage = err instanceof Error ? err.message : String(err);
1120
- rawOutput = `{"error":"Failed to serialize submit_result data: ${errorMessage}"}`;
1121
- }
1122
- exitCode = 0;
1123
- stderr = "";
1124
- }
1125
- }
1126
- } else {
1127
- const allowFallback = exitCode === 0 && !done.aborted && !signal?.aborted;
1128
- const { normalized: normalizedSchema, error: schemaError } = normalizeOutputSchema(outputSchema);
1129
- const hasOutputSchema = normalizedSchema !== undefined && !schemaError;
1130
- const fallback = allowFallback ? resolveFallbackCompletion(rawOutput, outputSchema) : null;
1131
- if (fallback) {
1132
- const completeData = normalizeCompleteData(fallback.data, reportFindings);
1133
- try {
1134
- rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
1135
- } catch (err) {
1136
- const errorMessage = err instanceof Error ? err.message : String(err);
1137
- rawOutput = `{"error":"Failed to serialize fallback completion: ${errorMessage}"}`;
1138
- }
1139
- exitCode = 0;
1140
- stderr = "";
1141
- } else if (!hasOutputSchema && allowFallback && rawOutput.trim().length > 0) {
1142
- exitCode = 0;
1143
- stderr = "";
1144
- } else if (exitCode === 0) {
1145
- const warning = "SYSTEM WARNING: Subagent exited without calling submit_result tool after 3 reminders.";
1146
- rawOutput = rawOutput ? `${warning}\n\n${rawOutput}` : warning;
1147
- }
1148
- }
1157
+ const finalized = finalizeSubprocessOutput({
1158
+ rawOutput,
1159
+ exitCode,
1160
+ stderr,
1161
+ doneAborted: Boolean(done.aborted),
1162
+ signalAborted: Boolean(signal?.aborted),
1163
+ submitResultItems,
1164
+ reportFindings,
1165
+ outputSchema,
1166
+ });
1167
+ rawOutput = finalized.rawOutput;
1168
+ exitCode = finalized.exitCode;
1169
+ stderr = finalized.stderr;
1170
+ const { abortedViaSubmitResult, hasSubmitResult } = finalized;
1149
1171
  const { content: truncatedOutput, truncated } = truncateTail(rawOutput, {
1150
1172
  maxBytes: MAX_OUTPUT_BYTES,
1151
1173
  maxLines: MAX_OUTPUT_LINES,
@@ -136,6 +136,17 @@ export class SubmitResultTool implements AgentTool<TObject, SubmitResultDetails>
136
136
 
137
137
  // Register subprocess tool handler for extraction + termination.
138
138
  subprocessToolRegistry.register<SubmitResultDetails>("submit_result", {
139
- extractData: event => event.result?.details as SubmitResultDetails | undefined,
140
- shouldTerminate: () => true,
139
+ extractData: event => {
140
+ const details = event.result?.details;
141
+ if (!details || typeof details !== "object") return undefined;
142
+ const record = details as Record<string, unknown>;
143
+ const status = record.status;
144
+ if (status !== "success" && status !== "aborted") return undefined;
145
+ return {
146
+ data: record.data,
147
+ status,
148
+ error: typeof record.error === "string" ? record.error : undefined,
149
+ };
150
+ },
151
+ shouldTerminate: event => !event.isError,
141
152
  });
@@ -4,9 +4,14 @@
4
4
  * Uses Claude's built-in web_search_20250305 tool to search the web.
5
5
  * Returns synthesized answers with citations and source metadata.
6
6
  */
7
- import { type AnthropicSystemBlock, buildAnthropicSystemBlocks, stripClaudeToolPrefix } from "@oh-my-pi/pi-ai";
7
+ import {
8
+ type AnthropicSystemBlock,
9
+ buildAnthropicHeaders,
10
+ buildAnthropicSystemBlocks,
11
+ stripClaudeToolPrefix,
12
+ } from "@oh-my-pi/pi-ai";
8
13
  import { $env } from "@oh-my-pi/pi-utils";
9
- import { buildAnthropicHeaders, buildAnthropicUrl, findAnthropicAuth } from "../../../web/search/auth";
14
+ import { buildAnthropicUrl, findAnthropicAuth } from "../../../web/search/auth";
10
15
  import type {
11
16
  AnthropicApiResponse,
12
17
  AnthropicAuthConfig,