@oh-my-pi/pi-coding-agent 14.6.6 → 14.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +50 -0
  2. package/examples/hooks/handoff.ts +1 -1
  3. package/examples/hooks/qna.ts +1 -1
  4. package/examples/sdk/03-custom-prompt.ts +7 -4
  5. package/examples/sdk/README.md +1 -1
  6. package/package.json +12 -12
  7. package/src/autoresearch/index.ts +48 -44
  8. package/src/cli/grep-cli.ts +1 -1
  9. package/src/cli/read-cli.ts +58 -0
  10. package/src/cli.ts +1 -0
  11. package/src/commands/read.ts +40 -0
  12. package/src/commit/agentic/agent.ts +1 -1
  13. package/src/commit/analysis/conventional.ts +1 -1
  14. package/src/commit/analysis/summary.ts +1 -1
  15. package/src/commit/changelog/generate.ts +1 -1
  16. package/src/commit/map-reduce/map-phase.ts +1 -1
  17. package/src/commit/map-reduce/reduce-phase.ts +1 -1
  18. package/src/config/settings-schema.ts +49 -0
  19. package/src/config/settings.ts +71 -1
  20. package/src/dap/client.ts +1 -0
  21. package/src/discovery/builtin.ts +34 -9
  22. package/src/edit/line-hash.ts +34 -4
  23. package/src/edit/modes/hashline.ts +352 -8
  24. package/src/edit/streaming.ts +4 -1
  25. package/src/export/html/index.ts +1 -1
  26. package/src/extensibility/extensions/runner.ts +3 -3
  27. package/src/extensibility/extensions/types.ts +4 -4
  28. package/src/internal-urls/docs-index.generated.ts +1 -1
  29. package/src/main.ts +13 -18
  30. package/src/memories/index.ts +1 -1
  31. package/src/modes/components/agent-dashboard.ts +1 -1
  32. package/src/modes/components/read-tool-group.ts +4 -9
  33. package/src/modes/components/tool-execution.ts +4 -0
  34. package/src/modes/controllers/event-controller.ts +2 -0
  35. package/src/modes/interactive-mode.ts +19 -12
  36. package/src/modes/rpc/rpc-types.ts +1 -1
  37. package/src/modes/utils/context-usage.ts +12 -5
  38. package/src/modes/utils/ui-helpers.ts +1 -0
  39. package/src/prompts/system/plan-mode-active.md +7 -3
  40. package/src/prompts/system/plan-mode-approved.md +5 -0
  41. package/src/prompts/system/project-prompt.md +36 -0
  42. package/src/prompts/system/system-prompt.md +0 -29
  43. package/src/prompts/tools/github.md +1 -0
  44. package/src/prompts/tools/read.md +15 -14
  45. package/src/sdk.ts +29 -28
  46. package/src/session/agent-session.ts +20 -12
  47. package/src/session/compaction/branch-summarization.ts +1 -1
  48. package/src/session/compaction/compaction.ts +3 -3
  49. package/src/session/session-dump-format.ts +10 -5
  50. package/src/session/streaming-output.ts +1 -1
  51. package/src/slash-commands/builtin-registry.ts +2 -2
  52. package/src/system-prompt.ts +35 -3
  53. package/src/task/executor.ts +4 -3
  54. package/src/task/isolation-backend.ts +22 -0
  55. package/src/tools/fetch.ts +4 -4
  56. package/src/tools/gh.ts +187 -0
  57. package/src/tools/inspect-image.ts +1 -1
  58. package/src/tools/output-meta.ts +1 -1
  59. package/src/tools/path-utils.ts +11 -0
  60. package/src/tools/read.ts +393 -204
  61. package/src/tools/search.ts +1 -1
  62. package/src/tools/sqlite-reader.ts +1 -1
  63. package/src/utils/commit-message-generator.ts +1 -1
  64. package/src/utils/title-generator.ts +1 -1
  65. package/src/web/search/providers/anthropic.ts +1 -1
  66. package/src/workspace-tree.ts +396 -0
@@ -21,7 +21,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
21
21
  "mcp-runtime-lifecycle.md": "# MCP runtime lifecycle\n\nThis document describes how MCP servers are discovered, connected, exposed as tools, refreshed, and torn down in the coding-agent runtime.\n\n## Lifecycle at a glance\n\n1. **SDK startup** calls `discoverAndLoadMCPTools()` (unless MCP is disabled).\n2. **Discovery** (`loadAllMCPConfigs`) resolves MCP server configs from capability sources, filters disabled/project/Exa entries, and preserves source metadata.\n3. **Manager connect phase** (`MCPManager.connectServers`) starts per-server connect + `tools/list` in parallel.\n4. **Fast startup gate** waits up to 250ms, then may return:\n - fully loaded `MCPTool`s,\n - failures per server,\n - or cached `DeferredMCPTool`s for still-pending servers.\n5. **SDK wiring** merges MCP tools into runtime tool registry for the session.\n6. **Post-connect enrichment** best-effort loads resources, resource templates, prompts, and optional resource subscriptions.\n7. **Live session** can refresh MCP tools via `/mcp` flows (`disconnectAll` + rediscover + `session.refreshMCPTools`) and can reconnect individual servers on transport close or `/mcp reconnect`.\n8. **Teardown** happens when callers invoke `disconnectServer`/`disconnectAll`; manager also clears MCP tool/resource/prompt registrations for disconnected servers.\n\n## Discovery and load phase\n\n### Entry path from SDK\n\n`createAgentSession()` in `src/sdk.ts` performs MCP startup when `enableMCP` is true (default):\n\n- calls `discoverAndLoadMCPTools(cwd, { ... })`,\n- passes `authStorage`, cache storage, and `mcp.enableProjectConfig` setting,\n- always sets `filterExa: true`,\n- logs per-server load/connect errors,\n- stores returned manager in `toolSession.mcpManager` and session result.\n\nIf `enableMCP` is false, MCP discovery is skipped entirely.\n\n### Config discovery and filtering\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical MCP server items through capability discovery, then converts to legacy `MCPServerConfig`.\n\nFiltering behavior:\n\n- `enableProjectConfig: false` removes project-level entries (`_source.level === \"project\"`).\n- `enabled: false` servers are skipped before connect attempts.\n- Exa servers are filtered out by default and API keys are extracted for native Exa tool integration.\n\nResult includes both `configs` and `sources` (metadata used later for provider labeling).\n\n### Discovery-level failure behavior\n\n`discoverAndLoadMCPTools()` distinguishes two failure classes:\n\n- **Discovery hard failure** (exception from `manager.discoverAndConnect`, typically from config discovery): returns an empty tool set and one synthetic error `{ path: \".mcp.json\", error }`.\n- **Per-server runtime/connect failure**: manager returns partial success with `errors` map; other servers continue.\n\nSo startup does not fail the whole agent session when individual MCP servers fail.\n\n## Manager state model\n\n`MCPManager` tracks runtime lifecycle with separate registries:\n\n- `#connections: Map<string, MCPServerConnection>` — fully connected servers.\n- `#pendingConnections: Map<string, Promise<MCPServerConnection>>` — handshake in progress.\n- `#pendingToolLoads: Map<string, Promise<{ connection, serverTools }>>` — connected but tools still loading.\n- `#tools: CustomTool[]` — current MCP tool view exposed to callers.\n- `#sources: Map<string, SourceMeta>` — provider/source metadata even before connect completes.\n- `#pendingReconnections: Map<string, Promise<MCPServerConnection | null>>` — reconnects in progress after a dropped transport or explicit reconnect.\n- `#serverConfigs: Map<string, MCPServerConfig>` — original unresolved configs preserved so reconnect can re-resolve credentials without leaking resolved tokens.\n\n`getConnectionStatus(name)` derives status from these maps:\n\n- `connected` if in `#connections`,\n- `connecting` if pending connect, pending tool load, or pending reconnect,\n- `disconnected` otherwise.\n\n## Connection establishment and startup timing\n\n## Per-server connect pipeline\n\nFor each discovered server in `connectServers()`:\n\n1. store/update source metadata,\n2. skip if already connected/pending/reconnecting,\n3. validate transport fields (`validateServerConfig`),\n4. resolve auth/shell substitutions (`#resolveAuthConfig`),\n5. call `connectToServer(name, resolvedConfig)` with manager notification/request handlers,\n6. wire HTTP OAuth refresh and transport `onClose` reconnect handling,\n7. call `listTools(connection)`,\n8. cache tool definitions (`MCPToolCache.set`) best-effort,\n9. best-effort load resources, resource templates, prompts, and subscriptions after tools load.\n\n`connectToServer()` behavior (`src/mcp/client.ts`):\n\n- creates stdio or HTTP/SSE transport,\n- performs MCP `initialize`,\n- for HTTP/SSE, starts the optional background SSE listener before `notifications/initialized`,\n- sends `notifications/initialized`,\n- uses timeout (`config.timeout` or 30s default),\n- closes transport on init failure.\n\n### Fast startup gate + deferred fallback\n\n`connectServers()` waits on a race between:\n\n- all connect/tool-load tasks settled, and\n- `STARTUP_TIMEOUT_MS = 250`.\n\nAfter 250ms:\n\n- fulfilled tasks become live `MCPTool`s,\n- rejected tasks produce per-server errors,\n- still-pending tasks:\n - use cached tool definitions if available (`MCPToolCache.get`) to create `DeferredMCPTool`s,\n - otherwise block until those pending tasks settle.\n\nThis is a hybrid startup model: fast return when cache is available, correctness wait when cache is not.\n\n### Background completion behavior\n\nEach pending `toolsPromise` also has a background continuation that eventually:\n\n- replaces that server’s tool slice in manager state via `#replaceServerTools`,\n- writes cache,\n- logs late failures only after startup (`allowBackgroundLogging`).\n\n## Tool exposure and live-session availability\n\n### Startup registration\n\n`discoverAndLoadMCPTools()` converts manager tools into `LoadedCustomTool[]` and decorates paths (`mcp:<server> via <providerName>` when known).\n\n`createAgentSession()` then pushes these tools into `customTools`, which are wrapped and added to the runtime tool registry with names like `mcp__<server>_<tool>`.\n\n### Tool calls\n\n- `MCPTool` calls tools through an already connected `MCPServerConnection`.\n- `DeferredMCPTool` waits for `waitForConnection(server)` before calling; this allows cached tools to exist before connection is ready.\n- Both attempt a reconnect + single retry for retriable connection failures.\n\nBoth return structured tool output and convert remaining transport/tool errors into `MCP error: ...` tool content (abort remains abort).\n\n## Refresh/reload paths (startup vs live reload)\n\n### Initial startup path\n\n- one-time discovery/load in `sdk.ts`,\n- tools are registered in initial session tool registry.\n\n### Interactive reload path\n\n`/mcp reload` path (`src/modes/controllers/mcp-command-controller.ts`) does:\n\n1. `mcpManager.disconnectAll()`,\n2. `mcpManager.discoverAndConnect()`,\n3. `session.refreshMCPTools(mcpManager.getTools())`.\n\n`session.refreshMCPTools()` (`src/session/agent-session.ts`) removes all `mcp__` tools, re-wraps latest MCP tools, and re-activates tool set so MCP changes apply without restarting session.\n\nThere is also a follow-up path for late connections: after waiting for a specific server, if status becomes `connected`, it re-runs `session.refreshMCPTools(...)` so newly available tools are rebound in-session.\n\n## Health, reconnect, and partial failure behavior\n\nCurrent runtime behavior is connection-event driven:\n\n- **No autonomous polling health monitor** in manager/client.\n- **Automatic reconnect is wired to `transport.onClose`** for managed connections.\n- Reconnect retries with backoff (`500`, `1000`, `2000`, `4000` ms), reloads tools, and notifies consumers on success.\n- Tool calls that see retriable connection errors also attempt one reconnect + retry.\n- Reconnect is also explicit via `/mcp reconnect <name>` or broader `/mcp reload`.\n\nOperationally:\n\n- one server failing does not remove tools from healthy servers,\n- connect/list failures are isolated per server,\n- stale tools may remain visible while reconnect is attempted; calls report MCP errors if recovery fails,\n- tool cache, resource/prompt loading, subscriptions, and background updates are best-effort (warnings/errors logged, no hard stop).\n\n## Teardown semantics\n\n### Server-level teardown\n\n`disconnectServer(name)`:\n\n- removes pending entries, source metadata, saved config, resource refresh/subscription state,\n- detaches `onClose` so explicit close does not trigger reconnect,\n- closes transport if connected,\n- filters manager tool state for names beginning with `mcp__${name}_`.\n\n### Global teardown\n\n`disconnectAll()`:\n\n- detaches `onClose` for all active transports, then closes them with `Promise.allSettled`,\n- clears pending maps, sources, saved configs, connections, subscriptions, resource refreshes, and manager tool list.\n\nIn current wiring, explicit teardown is used in MCP command flows (for reload/remove/disable). Startup stores the manager on the session; callers that need deterministic MCP shutdown should invoke manager disconnect methods.\n\n## Failure modes and guarantees\n\n| Scenario | Behavior | Hard fail vs best-effort |\n| ---------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | ------------------------------ |\n| Discovery throws (capability/config load path) | Loader returns empty tools + synthetic `.mcp.json` error | Best-effort session startup |\n| Invalid server config | Server skipped with validation error entry | Best-effort per server |\n| Connect timeout/init failure | Server error recorded; others continue | Best-effort per server |\n| `tools/list` still pending at startup with cache hit | Deferred tools returned immediately | Best-effort fast startup |\n| `tools/list` still pending at startup without cache | Startup waits for pending to settle | Hard wait for correctness |\n| Late background tool-load failure | Logged after startup gate | Best-effort logging |\n| Runtime dropped transport | Manager attempts reconnect; stale tools remain while reconnecting and future calls may retry once or fail with MCP errors | Best-effort automatic recovery |\n\n## Public API surface\n\n`src/mcp/index.ts` re-exports loader/manager/client APIs for external callers. `src/sdk.ts` exposes `discoverMCPServers()` as a convenience wrapper returning the same loader result shape.\n\n## Implementation files\n\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts) — loader facade, discovery error normalization, `LoadedCustomTool` conversion.\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts) — lifecycle state registries, parallel connect/list flow, refresh/disconnect.\n- [`src/mcp/client.ts`](../packages/coding-agent/src/mcp/client.ts) — transport setup, initialize handshake, list/call/disconnect.\n- [`src/mcp/index.ts`](../packages/coding-agent/src/mcp/index.ts) — MCP module API exports.\n- [`src/sdk.ts`](../packages/coding-agent/src/sdk.ts) — startup wiring into session/tool registry.\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts) — config discovery/filtering/validation used by manager.\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts) — `MCPTool` and `DeferredMCPTool` runtime behavior.\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts) — `refreshMCPTools` live rebinding.\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts) — interactive reload/reconnect flows.\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts) — subagent MCP proxying via parent manager connections.\n",
22
22
  "mcp-server-tool-authoring.md": "# MCP server and tool authoring\n\nThis document explains how MCP server definitions become callable `mcp__*` tools in coding-agent, and what operators should expect when configs are invalid, duplicated, disabled, or auth-gated.\n\n## Architecture at a glance\n\n```text\nConfig sources (.omp/.claude/.cursor/.vscode/mcp.json, mcp.json, etc.)\n -> discovery providers normalize to canonical MCPServer\n -> capability loader dedupes by server name (higher provider priority wins)\n -> loadAllMCPConfigs converts to MCPServerConfig + skips enabled:false\n -> MCPManager connects/listTools (with auth/header/env resolution)\n -> manager best-effort loads resources/prompts and subscribes to resource updates when enabled\n -> MCPTool/DeferredMCPTool bridge exposes tools as mcp__<server>_<tool>\n -> AgentSession.refreshMCPTools replaces live MCP tools immediately\n```\n\n## 1) Server config model and validation\n\n`src/mcp/types.ts` defines the authoring shape used by MCP config writers and runtime:\n\n- `stdio` (default when `type` missing): requires `command`, optional `args`, `env`, `cwd`\n- `http`: requires `url`, optional `headers`\n- `sse`: requires `url`, optional `headers` (kept for compatibility)\n- shared fields: `enabled`, `timeout`, `auth`, `oauth`\n\n`validateServerConfig()` (`src/mcp/config.ts`) enforces transport basics:\n\n- rejects configs that set both `command` and `url`\n- requires `command` for stdio\n- requires `url` for http/sse\n- rejects unknown `type`\n\n`config-writer.ts` applies this validation for add/update operations and also validates server names:\n\n- non-empty\n- max 100 chars\n- only `[a-zA-Z0-9_.-]`\n\n### Transport pitfalls\n\n- `type` omitted means stdio. If you intended HTTP/SSE but omitted `type`, `command` becomes mandatory.\n- `sse` is still accepted but treated as HTTP transport internally (`createHttpTransport`).\n- Validation is structural, not reachability: a syntactically valid URL can still fail at connect time.\n\n## 2) Discovery, normalization, and precedence\n\n### Capability-based discovery\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical `MCPServer` items via `loadCapability(mcpCapability.id)`.\n\nThe capability layer (`src/capability/index.ts`) then:\n\n1. loads providers in priority order\n2. dedupes by `server.name` (first win = highest priority)\n3. validates deduped items\n\nResult: duplicate server names across sources are not merged. One definition wins; lower-priority duplicates are shadowed.\n\n### `.mcp.json` and related files\n\nThe dedicated fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json` (low priority).\n\nIn practice MCP servers also come from higher-priority providers (for example native `.omp/...` and tool-specific config dirs). Authoring guidance:\n\n- Prefer `.omp/mcp.json` (project) or `~/.omp/agent/mcp.json` (user) for explicit control.\n- Use root `mcp.json` / `.mcp.json` when you need fallback compatibility.\n- Reusing the same server name in multiple sources causes precedence shadowing, not merge.\n\n### Normalization behavior\n\n`convertToLegacyConfig()` (`src/mcp/config.ts`) maps canonical `MCPServer` to runtime `MCPServerConfig`.\n\nKey behavior:\n\n- transport inferred as `server.transport ?? (command ? \"stdio\" : url ? \"http\" : \"stdio\")`\n- disabled servers (`enabled === false`) are dropped before connection\n- optional fields are preserved when present\n\n### Environment expansion during discovery\n\n`mcp-json.ts` expands env placeholders in string fields with `expandEnvVarsDeep()`:\n\n- supports `${VAR}` and `${VAR:-default}`\n- unresolved values remain literal `${VAR}` strings\n\n`mcp-json.ts` also performs runtime type checks for user JSON and logs warnings for invalid `enabled`/`timeout` values instead of hard-failing the whole file.\n\n## 3) Auth and runtime value resolution\n\n`MCPManager.prepareConfig()`/`#resolveAuthConfig()` (`src/mcp/manager.ts`) is the final pre-connect pass.\n\n### OAuth credential injection\n\nIf config has:\n\n```ts\nauth: { type: \"oauth\", credentialId: \"...\" }\n```\n\nand credential exists in auth storage:\n\n- `http`/`sse`: injects `Authorization: Bearer <access_token>` header\n- `stdio`: injects `OAUTH_ACCESS_TOKEN` env var\n\nIf credential lookup fails, manager logs a warning and continues with unresolved auth.\n\n### Header/env value resolution\n\nBefore connect, manager resolves stdio `env` values and HTTP/SSE `headers` values via `resolveConfigValue()` (`src/config/resolve-config-value.ts`):\n\n- value starting with `!` => execute shell command, use trimmed stdout (cached)\n- failed, timed-out, or whitespace-only commands produce `undefined`, so that entry is omitted\n- otherwise, treat value as environment variable name first (`process.env[name]`), fallback to literal value\n\nOperational caveat: a mistyped `!` secret command can silently remove that header/env entry, producing downstream 401/403 or server startup failures. A mistyped environment variable name is sent literally unless that literal happens to be meaningful to the server.\n\n## 4) Tool bridge: MCP -> agent-callable tools\n\n`src/mcp/tool-bridge.ts` converts MCP tool definitions into `CustomTool`s.\n\n### Naming and collision domain\n\nTool names are generated as:\n\n```text\nmcp__<sanitized_server_name>_<sanitized_tool_name>\n```\n\nRules:\n\n- lowercases\n- non-`[a-z_]` chars become `_`\n- repeated underscores collapse\n- redundant `<server>_` prefix in tool name is stripped once\n\nThis avoids many collisions, but not all. Different raw names can still sanitize to the same identifier (for example `my-server` and `my.server` both sanitize similarly), and registry insertion is last-write-wins.\n\n### Schema mapping\n\n`tool-bridge.ts` passes each MCP `inputSchema` through `sanitizeSchemaForMCP()` before registering it as a `CustomTool` schema.\n\n### Execution mapping\n\n`MCPTool.execute()` / `DeferredMCPTool.execute()`:\n\n- calls MCP `tools/call`\n- flattens MCP content into displayable text\n- returns structured details (`serverName`, `mcpToolName`, provider metadata)\n- maps server-reported `isError` to `Error: ...` text result\n- attempts reconnect + one retry for retriable connection errors\n- maps remaining thrown transport/runtime failures to `MCP error: ...`\n- preserves abort semantics by translating AbortError into `ToolAbortError`\n\n## 5) Operator lifecycle: add/edit/remove and live updates\n\nInteractive mode exposes `/mcp` in `src/modes/controllers/mcp-command-controller.ts`.\n\nSupported operations:\n\n- `add` (wizard or quick-add)\n- `remove` / `rm`\n- `enable` / `disable`\n- `test`\n- `reauth` / `unauth`\n- `reconnect`\n- `reload`\n- `resources`, `prompts`, `notifications`\n- Smithery search/login/logout flows\n\nConfig writes are atomic (`writeMCPConfigFile`: temp file + rename).\n\nAfter changes, controller calls `#reloadMCP()`:\n\n1. `mcpManager.disconnectAll()`\n2. `mcpManager.discoverAndConnect()`\n3. `session.refreshMCPTools(mcpManager.getTools())`\n\n`refreshMCPTools()` replaces all `mcp__` registry entries and immediately re-activates the latest MCP tool set, so changes take effect without restarting the session.\n\n### Mode differences\n\n- **Interactive/TUI mode**: `/mcp` gives in-app UX (wizard, OAuth flow, connection status text, immediate runtime rebinding).\n- **SDK/headless integration**: `discoverAndLoadMCPTools()` (`src/mcp/loader.ts`) returns loaded tools + per-server errors; no `/mcp` command UX.\n\n## 6) User-visible error surfaces\n\nCommon error strings users/operators see:\n\n- add/update validation failures:\n - `Invalid server config: ...`\n - `Server \"<name>\" already exists in <path>`\n- quick-add argument issues:\n - `Use either --url or -- <command...>, not both.`\n - `--token requires --url (HTTP/SSE transport).`\n- connect/test failures:\n - `Failed to connect to \"<name>\": <message>`\n - timeout help text suggests increasing timeout\n - auth help text for `401/403`\n- auth/OAuth flows:\n - `Authentication required ... OAuth endpoints could not be discovered`\n - `OAuth flow timed out. Please try again.`\n - `OAuth authentication failed: ...`\n- disabled server usage:\n - `Server \"<name>\" is disabled. Run /mcp enable <name> first.`\n\nBad source JSON in discovery is generally handled as warnings/logs; config-writer paths throw explicit errors.\n\n## 7) Practical authoring guidance\n\nFor robust MCP authoring in this codebase:\n\n1. Keep server names globally unique across all MCP-capable config sources.\n2. Prefer names that remain distinct after MCP tool-name sanitization to avoid generated `mcp__` collisions.\n3. Use explicit `type` to avoid accidental stdio defaults.\n4. Treat `enabled: false` as hard-off: server is omitted from runtime connect set.\n5. For OAuth configs, store a valid `credentialId`; otherwise auth injection is skipped.\n6. If using command-based secret resolution (`!cmd`), verify command output is stable and non-empty.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts)\n- [`src/mcp/config-writer.ts`](../packages/coding-agent/src/mcp/config-writer.ts)\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts)\n- [`src/discovery/mcp-json.ts`](../packages/coding-agent/src/discovery/mcp-json.ts)\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n- [`src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`src/config/resolve-config-value.ts`](../packages/coding-agent/src/config/resolve-config-value.ts)\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts)\n",
23
23
  "memory.md": "# Autonomous Memory\n\nWhen enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into each new session. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable via `/settings` or `config.yml`:\n\n```yaml\nmemories:\n enabled: true\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Cite the memory artifact path when memory changes the plan, and pair it with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Reading memory artifacts\n\nThe agent can read memory files directly using `memory://` URLs with the `read` tool:\n\n| URL | Content |\n| -------------------------------------- | ----------------------------------- |\n| `memory://root` | Compact summary injected at startup |\n| `memory://root/MEMORY.md` | Full long-term memory document |\n| `memory://root/skills/<name>/SKILL.md` | A generated skill playbook |\n\n### `/memory` slash command\n\n| Subcommand | Effect |\n| --------------------- | ---------------------------------------------- |\n| `view` | Show the current memory injection payload |\n| `clear` / `reset` | Delete all memory data and generated artifacts |\n| `enqueue` / `rebuild` | Force consolidation to run at next startup |\n\n## How it works\n\nMemories are built by a background pipeline that runs at startup or when manually triggered via slash command.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, or currently active are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nAll output is scanned for secrets before being written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven by static prompt files in `packages/coding-agent/src/prompts/memories/`.\n\n| File | Purpose | Variables |\n| --------------------- | ------------------------------------------- | ------------------------------------------- |\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation.md` | Prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read_path.md` | Memory guidance injected into live sessions | `{{memory_summary}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n| ----------------------- | ------------------------------------------------------------------- | -------------------------------- |\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` (falls back to `default`, then current/first registry model) | Cross-session synthesis |\n\nIf the requested memory role is not configured, memory model resolution falls back to the `default` role, then the active session model, then the first model in the registry.\n\n## Configuration\n\n| Setting | Default | Description |\n| ------------------------------------- | ------- | --------------------------------------------------------- |\n| `memories.enabled` | `false` | Master switch |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `packages/coding-agent/src/memories/index.ts` — pipeline orchestration, injection, slash command handling\n- `packages/coding-agent/src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `packages/coding-agent/src/prompts/memories/` — memory prompt templates\n- `packages/coding-agent/src/internal-urls/memory-protocol.ts` — `memory://` URL handler\n",
24
- "models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `discovery.type`: `ollama`, `llama.cpp`, or `lm-studio`\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `headers`\n- `compat`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `disableStrictTools`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `claude-opus-4-6`\n- `claude-haiku-4-5`\n- `gpt-5.3-codex`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: codex\n name: Zenmux Codex\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/codex: gpt-5.3-codex\n p-codex/codex: gpt-5.3-codex\n exclude:\n - demo/codex-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `task`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-codex` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.3-codex-spark:\n contextPromotionTarget: openai-codex/gpt-5.3-codex\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/model-registry.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.omp/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
24
+ "models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `discovery.type`: `ollama`, `llama.cpp`, or `lm-studio`\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `headers`\n- `compat`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `disableStrictTools`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `claude-opus-4-6`\n- `claude-haiku-4-5`\n- `gpt-5.3-codex`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: codex\n name: Zenmux Codex\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/codex: gpt-5.3-codex\n p-codex/codex: gpt-5.3-codex\n exclude:\n - demo/codex-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `input`, `cost`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL` or `http://127.0.0.1:11434`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `task`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-codex` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/claude-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for Spark -> non-Spark on the same provider:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.3-codex-spark:\n contextPromotionTarget: openai-codex/gpt-5.3-codex\n```\n\nThe built-in model generator also assigns this automatically for `*-spark` models when a same-provider base model exists.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/model-registry.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.omp/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
25
25
  "natives-addon-loader-runtime.md": "# Natives Addon Loader Runtime\n\nThis document covers the runtime loader shipped by `@oh-my-pi/pi-natives`: how `native/index.js` decides which `.node` file to require, how compiled-binary embedded payloads are extracted, and what startup failures report.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/package.json`\n\n## Scope and responsibility\n\nThe loader is intentionally narrow:\n\n- Build a platform/CPU-aware candidate list for addon filenames and directories.\n- Treat an embedded-addon manifest as the authoritative compiled-binary signal when present.\n- Optionally materialize an embedded addon into a versioned per-user cache directory.\n- Attempt candidates in deterministic order and return the first addon that `require(...)` loads.\n\nThe current loader does **not** run a separate `validateNative(...)` export-presence gate. API shape is provided by the generated N-API binding file (`native/index.d.ts`) and the loaded addon itself. A stale binary therefore normally fails as a missing property or native load error rather than as a custom \"missing exports\" validation error.\n\n## Runtime inputs and derived state\n\nAt module initialization, `native/index.js` computes:\n\n- **Platform tag**: `${process.platform}-${process.arch}` (for example `darwin-arm64`).\n- **Package version**: from `packages/natives/package.json`.\n- **Core directories**:\n - `nativeDir`: package-local `packages/natives/native`.\n - `execDir`: directory containing `process.execPath`.\n - `versionedDir`: `<getNativesDir()>/<packageVersion>`.\n - `userDataDir` fallback:\n - Windows: `%LOCALAPPDATA%/omp` or `%USERPROFILE%/AppData/Local/omp`.\n - Non-Windows: `~/.local/bin`.\n- **Natives cache root** (`getNativesDir()`):\n - if `$XDG_DATA_HOME/omp` exists, `$XDG_DATA_HOME/omp/natives`;\n - otherwise `~/.omp/natives`.\n- **Compiled-binary mode** (`detectCompiledBinary`): true if any of:\n - embedded-addon manifest is non-null,\n - `PI_COMPILED` env var is set,\n - `import.meta.url` contains Bun embedded markers (`$bunfs`, `~BUN`, `%7EBUN`).\n- **Variant override**: `PI_NATIVE_VARIANT` (`modern`/`baseline` only; invalid values ignored).\n- **Selected variant**: explicit override, otherwise runtime AVX2 detection on x64 (`modern` if AVX2, else `baseline`).\n\n## Platform support and tag resolution\n\n`SUPPORTED_PLATFORMS` is fixed to:\n\n- `linux-x64`\n- `linux-arm64`\n- `darwin-x64`\n- `darwin-arm64`\n- `win32-x64`\n\nUnsupported platforms are not rejected before probing. The loader first tries the computed candidate paths. If all fail and `platformTag` is unsupported, it throws an unsupported-platform error listing supported tags.\n\n## Variant selection (`modern` / `baseline` / default)\n\n### x64 behavior\n\n1. `PI_NATIVE_VARIANT=modern|baseline` wins when valid.\n2. Otherwise AVX2 support is detected:\n - Linux: scan `/proc/cpuinfo` for `avx2`.\n - macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`.\n - Windows: PowerShell `[System.Runtime.Intrinsics.X86.Avx2]::IsSupported`.\n3. AVX2 selects `modern`; unavailable or undetectable AVX2 selects `baseline`.\n\n### Non-x64 behavior\n\nNo variant suffix is used; the filename is `pi_natives.<platform>-<arch>.node`.\n\n### Filename construction\n\n`loader-state.js#getAddonFilenames` returns:\n\n- Non-x64 or no variant: `pi_natives.<tag>.node`\n- x64 + `modern`:\n 1. `pi_natives.<tag>-modern.node`\n 2. `pi_natives.<tag>-baseline.node`\n 3. `pi_natives.<tag>.node`\n- x64 + `baseline`:\n 1. `pi_natives.<tag>-baseline.node`\n 2. `pi_natives.<tag>.node`\n\nThe default unsuffixed fallback remains part of the x64 candidate list.\n\n## Candidate path construction and fallback ordering\n\n`resolveLoaderCandidates(...)` expands every filename across directories, then de-duplicates while preserving first occurrence order.\n\n### Non-compiled runtime\n\nFor each filename, candidates are:\n\n1. `<nativeDir>/<filename>`\n2. `<execDir>/<filename>`\n\n### Compiled runtime\n\nFor each filename, candidates are:\n\n1. `<versionedDir>/<filename>`\n2. `<userDataDir>/<filename>`\n3. `<nativeDir>/<filename>`\n4. `<execDir>/<filename>`\n\nAt load time, an extracted embedded candidate, when produced, is prepended ahead of these de-duplicated candidates.\n\n## Embedded addon extraction lifecycle\n\n`embedded-addon.js` is generated by `scripts/embed-native.ts`. The reset stub exports `embeddedAddon = null`. A populated manifest has:\n\n- `platformTag`\n- `version`\n- `files[]` entries with `variant`, `filename`, and `filePath`\n\nExtraction (`maybeExtractEmbeddedAddon`) runs only when:\n\n1. compiled-binary mode is true,\n2. `embeddedAddon` is non-null,\n3. manifest `platformTag` equals the runtime platform tag,\n4. manifest `version` equals the package version,\n5. a variant-appropriate embedded file exists.\n\nVariant file selection:\n\n- Non-x64: prefer `default`, then first available file.\n- x64 + `modern`: prefer `modern`, fallback to `baseline`.\n- x64 + `baseline`: require `baseline`.\n\nMaterialization:\n\n1. Ensure `<versionedDir>` exists.\n2. Reuse `<versionedDir>/<selected filename>` if it already exists.\n3. Otherwise read `selectedEmbeddedFile.filePath` and write the target path.\n4. Return the target path as the first candidate.\n\nDirectory creation or write failures are appended to the loader error list; probing continues through normal candidates.\n\n## Lifecycle and state transitions\n\n```text\nInit\n -> Load package metadata and embedded-addon manifest\n -> Compute platform/version/variant/filenames/candidate paths\n -> (compiled + embedded manifest matches?)\n yes -> try extract to versionedDir (record errors, continue)\n no -> skip extraction\n -> For each runtime candidate in order:\n require(candidate)\n -> success: return addon exports (READY)\n -> failure: record error, continue\n -> none loaded:\n if unsupported platform tag -> throw Unsupported platform\n else -> throw Failed to load (tried-path diagnostics + hints)\n```\n\n## Failure behavior and diagnostics\n\n### Unsupported platform\n\nIf all candidates fail and `platformTag` is not supported, the loader throws:\n\n- `Unsupported platform: <tag>`\n- supported platform list\n- issue-reporting guidance\n\n### No loadable candidate\n\nIf the platform is supported but no candidate can be loaded, the final error includes:\n\n- `Failed to load pi_natives native addon for <platformTag>` or `<platformTag> (<variant>)`\n- every attempted path with the corresponding `require(...)` error\n- mode-specific remediation hints\n\n### Compiled-binary startup failures\n\nCompiled mode diagnostics include:\n\n- expected versioned cache target paths (`<versionedDir>/<filename>`),\n- remediation to delete the versioned cache and rerun,\n- direct release download `curl` commands for each expected filename.\n\n### Non-compiled startup failures\n\nNormal package/runtime diagnostics include:\n\n- reinstall hint (`bun install @oh-my-pi/pi-natives`),\n- local rebuild command (`bun --cwd=packages/natives run build`),\n- optional x64 variant build hint (`TARGET_VARIANT=baseline|modern bun --cwd=packages/natives run build`).\n",
26
26
  "natives-architecture.md": "# Natives Architecture\n\n`@oh-my-pi/pi-natives` is now a two-layer package around a loader:\n\n1. **CommonJS loader/package entrypoint** resolves and loads the correct `.node` addon and patches generated enum objects onto the export object.\n2. **Rust N-API module layer** implements the exported functions/classes and emits the generated TypeScript declarations.\n\nThis document is the foundation for deeper module-level docs.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n\n## Package entrypoint and public surface\n\n`packages/natives/package.json` points directly at generated native bindings:\n\n- `main`: `./native/index.js`\n- `types`: `./native/index.d.ts`\n- `exports[\".\"].types`: `./native/index.d.ts`\n- `exports[\".\"].import`: `./native/index.js`\n\nThere is no current `packages/natives/src` TypeScript wrapper layer. Consumers import functions/classes/enums directly from `@oh-my-pi/pi-natives`; the type contract is the generated `native/index.d.ts` plus enum exports appended by `scripts/gen-enums.ts`.\n\nCurrent capability groups in the generated API include:\n\n- **Search/text/code primitives**: `grep`, `search`, `hasMatch`, `fuzzyFind`, `glob`, `astGrep`, `astEdit`, text width/slicing/wrapping/sanitization, syntax highlighting, token counting.\n- **Execution/process/terminal primitives**: `executeShell`, `Shell`, `PtySession`, process-tree helpers, key parsing.\n- **System/media/conversion primitives**: clipboard, image resize/encode/SIXEL, HTML-to-Markdown, macOS appearance/power helpers, work profiling, Windows ProjFS overlay helpers.\n\n## Loader layer\n\n`packages/natives/native/index.js` owns runtime addon selection and optional embedded extraction.\n\n### Candidate resolution model\n\n- Platform tag is `${process.platform}-${process.arch}`.\n- Supported tags are currently:\n - `linux-x64`\n - `linux-arm64`\n - `darwin-x64`\n - `darwin-arm64`\n - `win32-x64`\n- x64 can use CPU variants:\n - `modern` (AVX2-capable)\n - `baseline` (fallback)\n- Non-x64 uses the default filename without a variant suffix.\n\nFilename strategy:\n\n- Default: `pi_natives.<platform>-<arch>.node`\n- x64 variant: `pi_natives.<platform>-<arch>-modern.node` or `...-baseline.node`\n- x64 runtime fallback includes the unsuffixed default filename after variant candidates.\n\n### Platform-specific variant detection\n\nFor x64, variant selection uses:\n\n- Linux: `/proc/cpuinfo`\n- macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`\n- Windows: PowerShell check for `System.Runtime.Intrinsics.X86.Avx2`\n\n`PI_NATIVE_VARIANT` can force `modern` or `baseline`; invalid values are ignored.\n\n### Binary distribution and extraction model\n\n`packages/natives/package.json` publishes `native/`, which contains the loader, generated declarations, generated enum patch, embedded-addon manifest stub, and prebuilt `.node` artifacts.\n\nFor compiled binaries, loader behavior is:\n\n1. Check versioned user cache path: `<getNativesDir()>/<packageVersion>/...`.\n2. Check legacy compiled-binary location:\n - Windows: `%LOCALAPPDATA%/omp` (fallback `%USERPROFILE%/AppData/Local/omp`)\n - non-Windows: `~/.local/bin`\n3. Fall back to packaged `native/` and executable directory candidates.\n\n`getNativesDir()` uses `$XDG_DATA_HOME/omp/natives` when `$XDG_DATA_HOME/omp` exists; otherwise it uses `~/.omp/natives`.\n\nIf a populated embedded addon manifest is present, it is also treated as a compiled-binary signal. The loader can extract the matching embedded `.node` into the versioned cache directory before candidate probing.\n\n### Failure modes\n\nLoader failures are explicit:\n\n- **Unsupported platform tag**: after failed probing, throws with supported platform list.\n- **No loadable candidate**: throws with all attempted paths and remediation hints.\n- **Embedded extraction errors**: directory/write failures are recorded and included in final load diagnostics if no candidate loads.\n\nThe current loader does not perform a separate post-`require` export validation pass.\n\n## Rust N-API module layer\n\n`crates/pi-natives/src/lib.rs` declares exported module ownership:\n\n- `appearance`\n- `ast`\n- `clipboard`\n- `fd`\n- `fs_cache`\n- `glob`\n- `glob_util`\n- `grep`\n- `highlight`\n- `html`\n- `image`\n- `keys`\n- `language`\n- `power`\n- `prof`\n- `projfs_overlay`\n- `ps`\n- `pty`\n- `shell`\n- `task`\n- `text`\n- `tokens`\n- `utils` (crate-private helpers)\n\nN-API exports are generated from Rust `#[napi]` functions/classes/objects/enums. Snake_case Rust names are exposed as camelCase JavaScript names unless explicitly configured by napi-rs.\n\n## Ownership boundaries\n\n- **Loader/package ownership (`packages/natives/native`, `packages/natives/scripts`)**\n - runtime binary selection\n - CPU variant selection and override handling\n - compiled-binary embedded extraction\n - generated TypeScript declarations and enum export patching\n- **Rust ownership (`crates/pi-natives/src`)**\n - algorithmic and system-level implementation\n - platform-native behavior and performance-sensitive logic\n - N-API symbol implementation consumed directly by package callers\n- **Consumer ownership (`packages/coding-agent`, `packages/tui`)**\n - user-facing policy and fallbacks that are not built into the native API\n - higher-level rendering, artifact, shell-session, and command behavior\n\n## Runtime flow (high level)\n\n1. Consumer imports from `@oh-my-pi/pi-natives`.\n2. `native/index.js` computes platform/arch/variant and candidate paths.\n3. Optional embedded binary extraction occurs for compiled distributions.\n4. The first `require(candidate)` that succeeds becomes the exported addon object.\n5. Generated enum objects are appended to `module.exports`.\n6. Caller invokes generated N-API functions/classes directly.\n\n## Glossary\n\n- **Native addon**: A `.node` binary loaded via Node-API (N-API).\n- **Platform tag**: Runtime tuple `platform-arch` (for example `darwin-arm64`).\n- **Variant**: x64 CPU-specific build flavor (`modern` AVX2, `baseline` fallback).\n- **Generated binding declaration**: `native/index.d.ts` emitted by napi-rs during `build-native.ts`.\n- **Compiled binary mode**: Runtime mode where the CLI is bundled and native addons are resolved from embedded/cache paths before package-local paths.\n- **Embedded addon**: Build artifact metadata and file references generated into `native/embedded-addon.js` so compiled binaries can extract matching `.node` payloads.\n",
27
27
  "natives-binding-contract.md": "# Natives Binding Contract (JavaScript/TypeScript Side)\n\nThis document defines the JS/TS contract between `@oh-my-pi/pi-natives` callers and the loaded N-API addon.\n\nCurrent package shape is direct-to-native: there is no `packages/natives/src/<module>` TypeScript wrapper layer. The public API is the generated `packages/natives/native/index.d.ts` declaration file, the CommonJS loader in `packages/natives/native/index.js`, and the Rust `#[napi]` exports in `crates/pi-natives/src`.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n- Rust modules under `crates/pi-natives/src/*.rs`\n\n## Contract model\n\nThe contract has three parts:\n\n1. **Generated runtime loader** (`native/index.js`)\n - computes candidates and `require(...)`s the `.node` addon;\n - exports the loaded addon object directly;\n - appends enum objects generated by `scripts/gen-enums.ts`.\n2. **Generated TypeScript declarations** (`native/index.d.ts`)\n - generated by napi-rs during `scripts/build-native.ts`;\n - declares exported functions, classes, object interfaces, and native enums;\n - is the package `types` entry.\n3. **Rust N-API exports** (`crates/pi-natives/src`)\n - `#[napi]` functions/classes/objects/enums are the source of generated declarations and runtime symbols;\n - snake_case Rust names become camelCase JavaScript names by napi-rs convention.\n\nThere is no current `NativeBindings` declaration-merging lifecycle and no `validateNative(...)` required-export list in the loader.\n\n## Public export surface organization\n\n`packages/natives/package.json` exposes the package root only:\n\n```json\n{\n \"main\": \"./native/index.js\",\n \"types\": \"./native/index.d.ts\",\n \"exports\": {\n \".\": {\n \"types\": \"./native/index.d.ts\",\n \"import\": \"./native/index.js\"\n }\n }\n}\n```\n\nConsumers in `packages/coding-agent` and `packages/tui` import directly from `@oh-my-pi/pi-natives`.\n\n## JS API ↔ native export mapping (representative)\n\n| Category | Public JS API | Rust source | Return style |\n| ----------------- | ---------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------- | ----------------------------- |\n| Grep | `grep(options, onMatch?)` | `grep.rs` | `Promise<GrepResult>` |\n| Grep | `search(content, options)` | `grep.rs` | `SearchResult` |\n| Grep | `hasMatch(content, pattern, ignoreCase?, multiline?)` | `grep.rs` | `boolean` |\n| Fuzzy path search | `fuzzyFind(options)` | `fd.rs` | `Promise<FuzzyFindResult>` |\n| Glob | `glob(options, onMatch?)` | `glob.rs` | `Promise<GlobResult>` |\n| Glob cache | `invalidateFsScanCache(path?)` | `fs_cache.rs` | `void` |\n| AST search/edit | `astGrep(options)`, `astEdit(options)` | `ast.rs` | `Promise<...>` |\n| Shell | `executeShell(options, onChunk?)` | `shell.rs` | `Promise<ShellExecuteResult>` |\n| Shell | `new Shell(options?)`, `shell.run(...)`, `shell.abort()` | `shell.rs` | class / promises |\n| PTY | `new PtySession()`, `start/write/resize/kill` | `pty.rs` | class / promises |\n| Process | `killTree(pid, signal)`, `listDescendants(pid)` | `ps.rs` | sync |\n| Keys | `parseKey`, `matchesKey`, Kitty/legacy helpers | `keys.rs` | sync |\n| Text | `wrapTextWithAnsi`, `truncateToWidth`, `sliceWithWidth`, `extractSegments`, `sanitizeText`, `visibleWidth` | `text.rs` | sync |\n| Highlight | `highlightCode`, `supportsLanguage`, `getSupportedLanguages` | `highlight.rs` | sync |\n| HTML | `htmlToMarkdown(html, options?)` | `html.rs` | `Promise<string>` |\n| Image | `PhotonImage`, `encodeSixel` | `image.rs` | class / sync / promises |\n| Clipboard | `copyToClipboard`, `readImageFromClipboard` | `clipboard.rs` | sync / promise |\n| Tokens | `countTokens(input, encoding?)` | `tokens.rs` | sync |\n| System | `detectMacOSAppearance`, `MacAppearanceObserver`, `MacOSPowerAssertion`, `getWorkProfile`, ProjFS helpers | `appearance.rs`, `power.rs`, `prof.rs`, `projfs_overlay.rs` | mixed |\n\n## Sync vs async contract differences\n\nThe contract preserves Rust/N-API call style:\n\n- **Promise-returning exports** for worker-thread or async runtime work (`grep`, `glob`, `fuzzyFind`, `astGrep`, `astEdit`, `htmlToMarkdown`, shell/PTY runs, image parse/resize/encode, clipboard image read).\n- **Synchronous exports** for deterministic in-memory transforms/parsers or direct system calls (`search`, `hasMatch`, highlighting, text utilities, token counting, process queries, `copyToClipboard`, `encodeSixel`).\n- **Constructor exports** for stateful runtime objects (`Shell`, `PtySession`, `PhotonImage`, macOS observer/power handles).\n\nChanging sync ↔ async for an existing export is a breaking public API change because consumers call these exports directly.\n\n## Object and enum typing patterns\n\n### Object patterns\n\n`#[napi(object)]` Rust structs become TS interfaces, for example:\n\n- `GrepResult`, `SearchResult`, `GlobResult`, `FuzzyFindResult`\n- `ShellRunResult`, `ShellExecuteResult`, `PtyRunResult`, `MinimizerResult`\n- `AstFindResult`, `AstReplaceResult`\n- `System`/media payloads such as `ClipboardImage`, `WorkProfile`, `ParsedKittyResult`\n\nRuntime shape correctness is owned by napi-rs and the Rust implementation.\n\n### Enum patterns\n\nNative enums are represented in generated declarations and also appended to `module.exports` by `scripts/gen-enums.ts`, because the loader is hand-maintained CommonJS around the generated addon. Current enum objects include:\n\n- `AstMatchStrictness`\n- `Ellipsis`\n- `Encoding`\n- `FileType`\n- `GrepOutputMode`\n- `ImageFormat`\n- `KeyEventType`\n- `MacOSAppearance`\n- `SamplingFilter`\n\n## Error behavior and caveats\n\n- Addon load failure or unsupported platform throws during package import from `native/index.js`.\n- The loader does not verify the full export set after `require(...)`; stale or mismatched binaries surface as native load errors or missing members at use sites.\n- N-API conversion validates basic argument conversion, but TS optional fields do not guarantee semantic validity for untyped callers.\n- Numeric enum declarations do not prevent out-of-range numeric values from untyped callers unless the Rust function rejects them during conversion.\n- Callback exports use napi-rs `ThreadsafeFunction` shape: `(error: Error | null, value) => void`. Native code generally emits successful values; hard failures reject/throw through the owning call.\n\n## Maintainer checklist for binding changes\n\nWhen adding/changing an export, update all of:\n\n1. Rust `#[napi]` implementation in the owning `crates/pi-natives/src/<module>.rs`.\n2. `crates/pi-natives/src/lib.rs` if a new module is added.\n3. Any consumer imports/callsites in `packages/coding-agent` or `packages/tui`.\n4. Build output by running the natives build so `native/index.d.ts` and `native/index.js` stay in sync.\n5. `scripts/gen-enums.ts` if enum runtime export patching needs to change.\n\nDo not add a parallel TS wrapper convention unless the package design intentionally moves back to wrappers; current consumers depend on the direct generated API.\n",
package/src/main.ts CHANGED
@@ -5,13 +5,20 @@
5
5
  * createAgentSession() options. The SDK does the heavy lifting.
6
6
  */
7
7
 
8
- import { realpathSync } from "node:fs";
9
8
  import * as fs from "node:fs/promises";
10
9
  import * as os from "node:os";
11
10
  import * as path from "node:path";
12
11
  import { createInterface } from "node:readline/promises";
13
12
  import type { ImageContent } from "@oh-my-pi/pi-ai";
14
- import { $env, getProjectDir, logger, postmortem, setProjectDir, VERSION } from "@oh-my-pi/pi-utils";
13
+ import {
14
+ $env,
15
+ getProjectDir,
16
+ logger,
17
+ normalizePathForComparison,
18
+ postmortem,
19
+ setProjectDir,
20
+ VERSION,
21
+ } from "@oh-my-pi/pi-utils";
15
22
  import chalk from "chalk";
16
23
  import type { Args } from "./cli/args";
17
24
  import { processFileArguments } from "./cli/file-processor";
@@ -216,15 +223,6 @@ async function runInteractiveMode(
216
223
  }
217
224
  }
218
225
 
219
- function normalizePathForComparison(value: string): string {
220
- const resolved = path.resolve(value);
221
- let realPath = resolved;
222
- try {
223
- realPath = realpathSync(resolved);
224
- } catch {}
225
- return process.platform === "win32" ? realPath.toLowerCase() : realPath;
226
- }
227
-
228
226
  async function promptForkSession(session: SessionInfo): Promise<boolean> {
229
227
  if (!process.stdin.isTTY) {
230
228
  return false;
@@ -353,10 +351,7 @@ async function maybeAutoChdir(parsed: Args): Promise<void> {
353
351
  return;
354
352
  }
355
353
 
356
- const normalizePath = (value: string) => {
357
- const resolved = realpathSync(path.resolve(value));
358
- return process.platform === "win32" ? resolved.toLowerCase() : resolved;
359
- };
354
+ const normalizePath = normalizePathForComparison;
360
355
 
361
356
  const cwd = normalizePath(getProjectDir());
362
357
  const normalizedHome = normalizePath(home);
@@ -540,11 +535,11 @@ async function buildSessionOptions(
540
535
 
541
536
  // System prompt
542
537
  if (resolvedSystemPrompt && resolvedAppendPrompt) {
543
- options.systemPrompt = `${resolvedSystemPrompt}\n\n${resolvedAppendPrompt}`;
538
+ options.systemPrompt = defaultPrompt => [resolvedSystemPrompt, resolvedAppendPrompt, ...defaultPrompt.slice(1)];
544
539
  } else if (resolvedSystemPrompt) {
545
- options.systemPrompt = resolvedSystemPrompt;
540
+ options.systemPrompt = defaultPrompt => [resolvedSystemPrompt, ...defaultPrompt.slice(1)];
546
541
  } else if (resolvedAppendPrompt) {
547
- options.systemPrompt = defaultPrompt => `${defaultPrompt}\n\n${resolvedAppendPrompt}`;
542
+ options.systemPrompt = defaultPrompt => [...defaultPrompt, resolvedAppendPrompt];
548
543
  }
549
544
 
550
545
  // Tools
@@ -602,7 +602,7 @@ async function runStage1Job(options: {
602
602
  const response = await completeSimple(
603
603
  model,
604
604
  {
605
- systemPrompt: stageOneSystemTemplate,
605
+ systemPrompt: [stageOneSystemTemplate],
606
606
  messages: [{ role: "user", content: [{ type: "text", text: inputPrompt }], timestamp: Date.now() }],
607
607
  },
608
608
  {
@@ -635,7 +635,7 @@ export class AgentDashboard extends Container {
635
635
  modelRegistry,
636
636
  settings,
637
637
  model: selectedModel,
638
- systemPrompt,
638
+ systemPrompt: [systemPrompt],
639
639
  hasUI: false,
640
640
  enableLsp: false,
641
641
  enableMCP: false,
@@ -8,6 +8,7 @@ import type { ToolExecutionHandle } from "./tool-execution";
8
8
  type ReadRenderArgs = {
9
9
  path?: string;
10
10
  file_path?: string;
11
+ // Legacy field from the old schema; tolerated for rebuilt transcripts.
11
12
  sel?: string;
12
13
  };
13
14
 
@@ -37,7 +38,6 @@ function getSuffixResolution(details: ReadToolResultDetails | undefined): ReadTo
37
38
  type ReadEntry = {
38
39
  toolCallId: string;
39
40
  path: string;
40
- sel?: string;
41
41
  status: "pending" | "success" | "warning" | "error";
42
42
  correctedFrom?: string;
43
43
  contentText?: string;
@@ -62,15 +62,14 @@ export class ReadToolGroupComponent extends Container implements ToolExecutionHa
62
62
 
63
63
  updateArgs(args: ReadRenderArgs, toolCallId?: string): void {
64
64
  if (!toolCallId) return;
65
- const rawPath = args.file_path || args.path || "";
65
+ const basePath = args.file_path || args.path || "";
66
+ const rawPath = args.sel ? `${basePath}:${args.sel}` : basePath;
66
67
  const entry: ReadEntry = this.#entries.get(toolCallId) ?? {
67
68
  toolCallId,
68
69
  path: rawPath,
69
- sel: args.sel,
70
70
  status: "pending",
71
71
  };
72
72
  entry.path = rawPath;
73
- entry.sel = args.sel;
74
73
  this.#entries.set(toolCallId, entry);
75
74
  this.#updateDisplay();
76
75
  }
@@ -172,9 +171,8 @@ export class ReadToolGroupComponent extends Container implements ToolExecutionHa
172
171
  #addContentPreview(entry: ReadEntry): void {
173
172
  const lang = getLanguageFromPath(entry.path);
174
173
  const filePath = shortenPath(entry.path);
175
- const selectionSuffix = entry.sel ? `:${entry.sel}` : "";
176
174
  const correctionSuffix = entry.correctedFrom ? ` (corrected from ${shortenPath(entry.correctedFrom)})` : "";
177
- const title = filePath ? `Read ${filePath}${selectionSuffix}${correctionSuffix}` : `Read${selectionSuffix}`;
175
+ const title = filePath ? `Read ${filePath}${correctionSuffix}` : "Read";
178
176
  let cachedWidth: number | undefined;
179
177
  let cachedLines: string[] | undefined;
180
178
  const expanded = this.#expanded;
@@ -211,9 +209,6 @@ export class ReadToolGroupComponent extends Container implements ToolExecutionHa
211
209
  #formatPath(entry: ReadEntry): string {
212
210
  const filePath = shortenPath(entry.path);
213
211
  let pathDisplay = filePath ? theme.fg("accent", filePath) : theme.fg("toolOutput", "…");
214
- if (entry.sel) {
215
- pathDisplay += theme.fg("warning", `:${entry.sel}`);
216
- }
217
212
  if (entry.correctedFrom) {
218
213
  pathDisplay += theme.fg("dim", ` (corrected from ${shortenPath(entry.correctedFrom)})`);
219
214
  }
@@ -67,6 +67,7 @@ export interface ToolExecutionOptions {
67
67
  showImages?: boolean; // default: true (only used if terminal supports images)
68
68
  editFuzzyThreshold?: number;
69
69
  editAllowFuzzy?: boolean;
70
+ hashlineAutoDropPureInsertDuplicates?: boolean;
70
71
  }
71
72
 
72
73
  export interface ToolExecutionHandle {
@@ -100,6 +101,7 @@ export class ToolExecutionComponent extends Container {
100
101
  #showImages: boolean;
101
102
  #editFuzzyThreshold: number | undefined;
102
103
  #editAllowFuzzy: boolean | undefined;
104
+ #hashlineAutoDropPureInsertDuplicates: boolean | undefined;
103
105
  #isPartial = true;
104
106
  #tool?: AgentTool;
105
107
  #ui: TUI;
@@ -147,6 +149,7 @@ export class ToolExecutionComponent extends Container {
147
149
  this.#showImages = options.showImages ?? true;
148
150
  this.#editFuzzyThreshold = options.editFuzzyThreshold;
149
151
  this.#editAllowFuzzy = options.editAllowFuzzy;
152
+ this.#hashlineAutoDropPureInsertDuplicates = options.hashlineAutoDropPureInsertDuplicates;
150
153
  this.#tool = tool;
151
154
  this.#ui = ui;
152
155
  this.#cwd = cwd;
@@ -248,6 +251,7 @@ export class ToolExecutionComponent extends Container {
248
251
  signal: controller.signal,
249
252
  fuzzyThreshold: this.#editFuzzyThreshold,
250
253
  allowFuzzy: this.#editAllowFuzzy,
254
+ hashlineAutoDropPureInsertDuplicates: this.#hashlineAutoDropPureInsertDuplicates,
251
255
  });
252
256
  if (controller.signal.aborted) return;
253
257
  if (previews) {
@@ -280,6 +280,7 @@ export class EventController {
280
280
  showImages: settings.get("terminal.showImages"),
281
281
  editFuzzyThreshold: settings.get("edit.fuzzyThreshold"),
282
282
  editAllowFuzzy: settings.get("edit.fuzzyMatch"),
283
+ hashlineAutoDropPureInsertDuplicates: settings.get("edit.hashlineAutoDropPureInsertDuplicates"),
283
284
  },
284
285
  tool,
285
286
  this.ctx.ui,
@@ -383,6 +384,7 @@ export class EventController {
383
384
  showImages: settings.get("terminal.showImages"),
384
385
  editFuzzyThreshold: settings.get("edit.fuzzyThreshold"),
385
386
  editAllowFuzzy: settings.get("edit.fuzzyMatch"),
387
+ hashlineAutoDropPureInsertDuplicates: settings.get("edit.hashlineAutoDropPureInsertDuplicates"),
386
388
  },
387
389
  tool,
388
390
  this.ctx.ui,
@@ -1060,7 +1060,7 @@ export class InteractiveMode implements InteractiveModeContext {
1060
1060
 
1061
1061
  async #approvePlan(
1062
1062
  planContent: string,
1063
- options: { planFilePath: string; finalPlanFilePath: string },
1063
+ options: { planFilePath: string; finalPlanFilePath: string; preserveContext?: boolean },
1064
1064
  ): Promise<void> {
1065
1065
  await renameApprovedPlanFile({
1066
1066
  planFilePath: options.planFilePath,
@@ -1070,14 +1070,16 @@ export class InteractiveMode implements InteractiveModeContext {
1070
1070
  });
1071
1071
  const previousTools = this.#planModePreviousTools ?? this.session.getActiveToolNames();
1072
1072
  await this.#exitPlanMode({ silent: true, paused: false });
1073
- await this.handleClearCommand();
1074
- // The new session has a fresh local:// root — persist the approved plan there
1075
- // so `local://<title>.md` resolves correctly in the execution session.
1076
- const newLocalPath = resolveLocalUrlToPath(options.finalPlanFilePath, {
1077
- getArtifactsDir: () => this.sessionManager.getArtifactsDir(),
1078
- getSessionId: () => this.sessionManager.getSessionId(),
1079
- });
1080
- await Bun.write(newLocalPath, planContent);
1073
+ if (!options.preserveContext) {
1074
+ await this.handleClearCommand();
1075
+ // The new session has a fresh local:// root persist the approved plan there
1076
+ // so `local://<title>.md` resolves correctly in the execution session.
1077
+ const newLocalPath = resolveLocalUrlToPath(options.finalPlanFilePath, {
1078
+ getArtifactsDir: () => this.sessionManager.getArtifactsDir(),
1079
+ getSessionId: () => this.sessionManager.getSessionId(),
1080
+ });
1081
+ await Bun.write(newLocalPath, planContent);
1082
+ }
1081
1083
  if (previousTools.length > 0) {
1082
1084
  await this.session.setActiveToolsByName(previousTools);
1083
1085
  }
@@ -1086,6 +1088,7 @@ export class InteractiveMode implements InteractiveModeContext {
1086
1088
  const planModePrompt = prompt.render(planModeApprovedPrompt, {
1087
1089
  planContent,
1088
1090
  finalPlanFilePath: options.finalPlanFilePath,
1091
+ contextPreserved: options.preserveContext === true,
1089
1092
  });
1090
1093
  await this.session.prompt(planModePrompt, { synthetic: true });
1091
1094
  }
@@ -1129,14 +1132,14 @@ export class InteractiveMode implements InteractiveModeContext {
1129
1132
  this.#renderPlanPreview(planContent);
1130
1133
  const choice = await this.showHookSelector(
1131
1134
  "Plan mode - next step",
1132
- ["Approve and execute", "Refine plan", "Stay in plan mode"],
1135
+ ["Approve and execute", "Approve and keep context", "Refine plan", "Stay in plan mode"],
1133
1136
  {
1134
1137
  helpText: this.#getPlanReviewHelpText(),
1135
1138
  onExternalEditor: () => void this.#openPlanInExternalEditor(planFilePath),
1136
1139
  },
1137
1140
  );
1138
1141
 
1139
- if (choice === "Approve and execute") {
1142
+ if (choice === "Approve and execute" || choice === "Approve and keep context") {
1140
1143
  const finalPlanFilePath = details.finalPlanFilePath || planFilePath;
1141
1144
  try {
1142
1145
  const latestPlanContent = await this.#readPlanFile(planFilePath);
@@ -1144,7 +1147,11 @@ export class InteractiveMode implements InteractiveModeContext {
1144
1147
  this.showError(`Plan file not found at ${planFilePath}`);
1145
1148
  return;
1146
1149
  }
1147
- await this.#approvePlan(latestPlanContent, { planFilePath, finalPlanFilePath });
1150
+ await this.#approvePlan(latestPlanContent, {
1151
+ planFilePath,
1152
+ finalPlanFilePath,
1153
+ preserveContext: choice === "Approve and keep context",
1154
+ });
1148
1155
  } catch (error) {
1149
1156
  this.showError(
1150
1157
  `Failed to finalize approved plan: ${error instanceof Error ? error.message : String(error)}`,
@@ -89,7 +89,7 @@ export interface RpcSessionState {
89
89
  queuedMessageCount: number;
90
90
  todoPhases: TodoPhase[];
91
91
  /** For session dump / export (plain-text parity with /dump). */
92
- systemPrompt?: string;
92
+ systemPrompt?: string[];
93
93
  dumpTools?: Array<{ name: string; description: string; parameters: unknown }>;
94
94
  /** Current context window usage. Null tokens/percent when unknown (e.g. right after compaction). */
95
95
  contextUsage?: ContextUsage;
@@ -18,13 +18,13 @@ const CELL_FILLED_MESSAGES = "⛃";
18
18
  const CELL_FREE = "⛶";
19
19
  const CELL_BUFFER = "⛝";
20
20
 
21
- type CategoryId = "systemPrompt" | "systemTools" | "skills" | "messages";
21
+ type CategoryId = "systemPrompt" | "systemContext" | "systemTools" | "skills" | "messages";
22
22
 
23
23
  interface CategoryInfo {
24
24
  id: CategoryId;
25
25
  label: string;
26
26
  tokens: number;
27
- color: "accent" | "warning" | "success" | "userMessageText";
27
+ color: "accent" | "warning" | "success" | "userMessageText" | "customMessageLabel";
28
28
  glyph: string;
29
29
  }
30
30
 
@@ -86,12 +86,19 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
86
86
  // Tools = JSON tool schema sent separately on the wire
87
87
  // Skills = the skill list embedded in the system prompt
88
88
  // Messages = conversation messages
89
- const systemPromptTextTokens = countTokens(session.systemPrompt);
90
- const systemPromptTokens = Math.max(0, systemPromptTextTokens - skillsTokens);
89
+ const systemPromptTokens = Math.max(0, countTokens(session.systemPrompt[0] ?? "") - skillsTokens);
90
+ const systemContextTokens = countTokens(session.systemPrompt.slice(1));
91
91
 
92
92
  const categories: CategoryInfo[] = [
93
93
  { id: "systemPrompt", label: "System prompt", tokens: systemPromptTokens, color: "accent", glyph: CELL_FILLED },
94
94
  { id: "systemTools", label: "System tools", tokens: toolsTokens, color: "warning", glyph: CELL_FILLED },
95
+ {
96
+ id: "systemContext",
97
+ label: "System context",
98
+ tokens: systemContextTokens,
99
+ color: "customMessageLabel",
100
+ glyph: CELL_FILLED,
101
+ },
95
102
  { id: "skills", label: "Skills", tokens: skillsTokens, color: "success", glyph: CELL_FILLED },
96
103
  {
97
104
  id: "messages",
@@ -134,7 +141,7 @@ export function computeContextBreakdown(session: AgentSession): ContextBreakdown
134
141
 
135
142
  interface CellSpec {
136
143
  glyph: string;
137
- color: "accent" | "warning" | "success" | "userMessageText" | "muted" | "dim";
144
+ color: "accent" | "warning" | "success" | "userMessageText" | "customMessageLabel" | "muted" | "dim";
138
145
  }
139
146
 
140
147
  function planCells(breakdown: ContextBreakdown): CellSpec[] {
@@ -336,6 +336,7 @@ export class UiHelpers {
336
336
  showImages: settings.get("terminal.showImages"),
337
337
  editFuzzyThreshold: settings.get("edit.fuzzyThreshold"),
338
338
  editAllowFuzzy: settings.get("edit.fuzzyMatch"),
339
+ hashlineAutoDropPureInsertDuplicates: settings.get("edit.hashlineAutoDropPureInsertDuplicates"),
339
340
  },
340
341
  tool,
341
342
  this.ctx.ui,
@@ -6,7 +6,7 @@ You **MUST NOT**:
6
6
  - Run state-changing commands (git commit, npm install, etc.)
7
7
  - Make any system changes
8
8
 
9
- To implement: call `{{exitToolName}}` → user approves new session starts with full write access to execute the plan.
9
+ To implement: call `{{exitToolName}}` → user approves an execution option full write access is restored to execute the plan.
10
10
  You **MUST NOT** ask the user to exit plan mode for you; you **MUST** call `{{exitToolName}}` yourself.
11
11
  </critical>
12
12
 
@@ -21,7 +21,11 @@ You **MUST** create a plan at `{{planFilePath}}`.
21
21
  You **MUST** use `{{editToolName}}` for incremental updates; use `{{writeToolName}}` only for create/full replace.
22
22
 
23
23
  <caution>
24
- Plan execution runs in fresh context (session cleared). You **MUST** make the plan file self-contained: include requirements, decisions, key findings, remaining todos needed to continue without prior session history.
24
+ The approval selector includes:
25
+ - **Approve and execute**: starts execution in fresh context (session cleared).
26
+ - **Approve and keep context**: starts execution in this session, preserving exploration history.
27
+
28
+ You **MUST** still make the plan file self-contained: include requirements, decisions, key findings, and remaining todos needed to continue without prior session history.
25
29
  </caution>
26
30
 
27
31
  {{#if reentry}}
@@ -100,7 +104,7 @@ You **MUST** ask questions throughout. You **MUST NOT** make large assumptions a
100
104
  <critical>
101
105
  Your turn ends ONLY by:
102
106
  1. Using `{{askToolName}}` to gather information, OR
103
- 2. Calling `{{exitToolName}}` when ready — this triggers user approval, then a new implementation session with full tool access
107
+ 2. Calling `{{exitToolName}}` when ready — this triggers user approval, then implementation with full tool access
104
108
 
105
109
  You **MUST NOT** ask plan approval via text or `{{askToolName}}`; you **MUST** use `{{exitToolName}}`.
106
110
  You **MUST** keep going until complete.
@@ -3,6 +3,11 @@ Plan approved. You **MUST** execute it now.
3
3
  </critical>
4
4
 
5
5
  Finalized plan artifact: `{{finalPlanFilePath}}`
6
+ {{#if contextPreserved}}
7
+ Context was preserved for execution. Use the existing conversation history when it is useful, and treat the finalized plan as the source of truth if it conflicts with earlier exploration.
8
+ {{else}}
9
+ Execution may be running in fresh context. Treat the finalized plan as the source of truth.
10
+ {{/if}}
6
11
 
7
12
  ## Plan
8
13