@oh-my-pi/pi-coding-agent 15.10.0 → 15.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/CHANGELOG.md +75 -1
  2. package/dist/types/cli/dry-balance-cli.d.ts +15 -1
  3. package/dist/types/commit/analysis/conventional.d.ts +2 -2
  4. package/dist/types/commit/analysis/summary.d.ts +2 -2
  5. package/dist/types/commit/changelog/generate.d.ts +2 -2
  6. package/dist/types/commit/changelog/index.d.ts +2 -2
  7. package/dist/types/commit/map-reduce/index.d.ts +3 -3
  8. package/dist/types/commit/map-reduce/map-phase.d.ts +2 -2
  9. package/dist/types/commit/map-reduce/reduce-phase.d.ts +2 -2
  10. package/dist/types/commit/model-selection.d.ts +10 -4
  11. package/dist/types/config/api-key-resolver.d.ts +34 -0
  12. package/dist/types/config/model-registry.d.ts +17 -1
  13. package/dist/types/config/settings-schema.d.ts +9 -0
  14. package/dist/types/dap/config.d.ts +14 -1
  15. package/dist/types/dap/types.d.ts +10 -0
  16. package/dist/types/lsp/utils.d.ts +3 -2
  17. package/dist/types/modes/components/chat-block.d.ts +64 -0
  18. package/dist/types/modes/components/custom-editor.d.ts +3 -0
  19. package/dist/types/modes/components/overlay-box.d.ts +17 -0
  20. package/dist/types/modes/components/plan-review-overlay.d.ts +59 -0
  21. package/dist/types/modes/components/plan-toc.d.ts +41 -0
  22. package/dist/types/modes/components/read-tool-group.d.ts +2 -0
  23. package/dist/types/modes/components/transcript-container.d.ts +11 -0
  24. package/dist/types/modes/controllers/command-controller.d.ts +1 -0
  25. package/dist/types/modes/controllers/event-controller.d.ts +0 -1
  26. package/dist/types/modes/controllers/extension-ui-controller.d.ts +0 -1
  27. package/dist/types/modes/controllers/input-controller.d.ts +1 -1
  28. package/dist/types/modes/controllers/streaming-reveal.d.ts +22 -0
  29. package/dist/types/modes/controllers/tan-command-controller.d.ts +6 -0
  30. package/dist/types/modes/interactive-mode.d.ts +15 -5
  31. package/dist/types/modes/theme/theme.d.ts +1 -1
  32. package/dist/types/modes/types.d.ts +18 -5
  33. package/dist/types/modes/utils/copy-targets.d.ts +21 -1
  34. package/dist/types/plan-mode/approved-plan.d.ts +27 -8
  35. package/dist/types/plan-mode/plan-protection.d.ts +4 -4
  36. package/dist/types/sdk.d.ts +2 -0
  37. package/dist/types/session/agent-session.d.ts +21 -0
  38. package/dist/types/session/messages.d.ts +12 -0
  39. package/dist/types/session/session-manager.d.ts +3 -1
  40. package/dist/types/slash-commands/types.d.ts +4 -6
  41. package/dist/types/task/executor.d.ts +7 -0
  42. package/dist/types/task/index.d.ts +1 -0
  43. package/dist/types/task/render.d.ts +3 -2
  44. package/dist/types/tools/archive-reader.d.ts +5 -0
  45. package/dist/types/tools/ast-edit.d.ts +3 -0
  46. package/dist/types/tools/ast-grep.d.ts +3 -0
  47. package/dist/types/tools/bash.d.ts +1 -0
  48. package/dist/types/tools/find.d.ts +8 -4
  49. package/dist/types/tools/grouped-file-output.d.ts +95 -12
  50. package/dist/types/tools/memory-render.d.ts +4 -1
  51. package/dist/types/tools/plan-mode-guard.d.ts +8 -9
  52. package/dist/types/tools/render-utils.d.ts +5 -9
  53. package/dist/types/tools/search.d.ts +4 -0
  54. package/dist/types/tools/sqlite-reader.d.ts +1 -0
  55. package/dist/types/tools/todo.d.ts +3 -2
  56. package/dist/types/tools/write.d.ts +3 -0
  57. package/dist/types/tui/output-block.d.ts +16 -4
  58. package/dist/types/tui/status-line.d.ts +3 -0
  59. package/dist/types/utils/enhanced-paste.d.ts +20 -0
  60. package/dist/types/web/search/providers/kimi.d.ts +1 -1
  61. package/package.json +9 -9
  62. package/src/auto-thinking/classifier.ts +5 -1
  63. package/src/cli/dry-balance-cli.ts +52 -17
  64. package/src/cli/gallery-cli.ts +4 -1
  65. package/src/cli/gallery-fixtures/misc.ts +29 -0
  66. package/src/commit/analysis/conventional.ts +2 -2
  67. package/src/commit/analysis/summary.ts +2 -2
  68. package/src/commit/changelog/generate.ts +2 -2
  69. package/src/commit/changelog/index.ts +2 -2
  70. package/src/commit/map-reduce/index.ts +3 -3
  71. package/src/commit/map-reduce/map-phase.ts +2 -2
  72. package/src/commit/map-reduce/reduce-phase.ts +2 -2
  73. package/src/commit/model-selection.ts +33 -9
  74. package/src/commit/pipeline.ts +4 -4
  75. package/src/config/api-key-resolver.ts +58 -0
  76. package/src/config/model-registry.ts +25 -2
  77. package/src/config/settings-schema.ts +10 -0
  78. package/src/config/settings.ts +20 -2
  79. package/src/dap/config.ts +41 -2
  80. package/src/dap/defaults.json +1 -0
  81. package/src/dap/session.ts +1 -0
  82. package/src/dap/types.ts +10 -0
  83. package/src/debug/index.ts +40 -54
  84. package/src/edit/renderer.ts +82 -78
  85. package/src/eval/__tests__/llm-bridge.test.ts +90 -31
  86. package/src/eval/llm-bridge.ts +8 -3
  87. package/src/goals/tools/goal-tool.ts +36 -26
  88. package/src/internal-urls/docs-index.generated.ts +6 -6
  89. package/src/lsp/utils.ts +3 -2
  90. package/src/main.ts +9 -7
  91. package/src/memories/index.ts +12 -5
  92. package/src/mnemopi/backend.ts +5 -1
  93. package/src/modes/acp/acp-agent.ts +33 -26
  94. package/src/modes/components/assistant-message.ts +2 -9
  95. package/src/modes/components/chat-block.ts +111 -0
  96. package/src/modes/components/copy-selector.ts +1 -44
  97. package/src/modes/components/custom-editor.ts +23 -0
  98. package/src/modes/components/custom-message.ts +1 -3
  99. package/src/modes/components/execution-shared.ts +1 -2
  100. package/src/modes/components/hook-message.ts +1 -3
  101. package/src/modes/components/overlay-box.ts +108 -0
  102. package/src/modes/components/plan-review-overlay.ts +799 -0
  103. package/src/modes/components/plan-toc.ts +138 -0
  104. package/src/modes/components/read-tool-group.ts +20 -4
  105. package/src/modes/components/skill-message.ts +0 -1
  106. package/src/modes/components/tips.txt +1 -0
  107. package/src/modes/components/todo-reminder.ts +0 -2
  108. package/src/modes/components/tool-execution.ts +68 -88
  109. package/src/modes/components/transcript-container.ts +84 -24
  110. package/src/modes/components/user-message.ts +1 -2
  111. package/src/modes/controllers/command-controller-shared.ts +7 -6
  112. package/src/modes/controllers/command-controller.ts +57 -55
  113. package/src/modes/controllers/event-controller.ts +41 -40
  114. package/src/modes/controllers/extension-ui-controller.ts +10 -73
  115. package/src/modes/controllers/input-controller.ts +124 -119
  116. package/src/modes/controllers/mcp-command-controller.ts +69 -60
  117. package/src/modes/controllers/selector-controller.ts +23 -25
  118. package/src/modes/controllers/streaming-reveal.ts +212 -0
  119. package/src/modes/controllers/tan-command-controller.ts +173 -0
  120. package/src/modes/interactive-mode.ts +169 -94
  121. package/src/modes/setup-wizard/wizard-overlay.ts +1 -1
  122. package/src/modes/theme/theme-schema.json +1 -1
  123. package/src/modes/theme/theme.ts +8 -4
  124. package/src/modes/types.ts +18 -7
  125. package/src/modes/utils/copy-targets.ts +133 -27
  126. package/src/modes/utils/ui-helpers.ts +44 -46
  127. package/src/plan-mode/approved-plan.ts +66 -43
  128. package/src/plan-mode/plan-protection.ts +4 -4
  129. package/src/prompts/system/background-tan-dispatch.md +8 -0
  130. package/src/prompts/system/plan-mode-active.md +67 -58
  131. package/src/prompts/system/plan-mode-approved.md +1 -1
  132. package/src/sdk.ts +11 -37
  133. package/src/session/agent-session.ts +82 -6
  134. package/src/session/messages.ts +26 -0
  135. package/src/session/session-manager.ts +13 -5
  136. package/src/slash-commands/builtin-registry.ts +36 -9
  137. package/src/slash-commands/types.ts +4 -6
  138. package/src/task/executor.ts +5 -2
  139. package/src/task/index.ts +4 -0
  140. package/src/task/render.ts +212 -147
  141. package/src/tools/archive-reader.ts +64 -0
  142. package/src/tools/ask.ts +119 -164
  143. package/src/tools/ast-edit.ts +98 -71
  144. package/src/tools/ast-grep.ts +37 -43
  145. package/src/tools/bash.ts +50 -6
  146. package/src/tools/debug.ts +20 -8
  147. package/src/tools/fetch.ts +297 -7
  148. package/src/tools/find.ts +44 -30
  149. package/src/tools/gh-renderer.ts +81 -42
  150. package/src/tools/grouped-file-output.ts +272 -48
  151. package/src/tools/image-gen.ts +150 -103
  152. package/src/tools/inspect-image-renderer.ts +63 -41
  153. package/src/tools/inspect-image.ts +8 -1
  154. package/src/tools/job.ts +3 -4
  155. package/src/tools/memory-render.ts +4 -1
  156. package/src/tools/plan-mode-guard.ts +21 -39
  157. package/src/tools/read.ts +23 -16
  158. package/src/tools/render-utils.ts +21 -37
  159. package/src/tools/resolve.ts +14 -0
  160. package/src/tools/search-tool-bm25.ts +36 -23
  161. package/src/tools/search.ts +80 -78
  162. package/src/tools/sqlite-reader.ts +9 -12
  163. package/src/tools/todo.ts +118 -52
  164. package/src/tools/write.ts +81 -62
  165. package/src/tui/output-block.ts +60 -13
  166. package/src/tui/status-line.ts +5 -1
  167. package/src/utils/commit-message-generator.ts +9 -1
  168. package/src/utils/enhanced-paste.ts +202 -0
  169. package/src/utils/title-generator.ts +2 -1
  170. package/src/web/search/providers/anthropic.ts +25 -19
  171. package/src/web/search/providers/exa.ts +11 -3
  172. package/src/web/search/providers/kimi.ts +28 -17
  173. package/src/web/search/providers/parallel.ts +35 -24
  174. package/src/web/search/providers/synthetic.ts +8 -6
  175. package/src/web/search/providers/tavily.ts +9 -8
  176. package/src/web/search/providers/zai.ts +8 -6
@@ -20,7 +20,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
20
20
  "handoff-generation-pipeline.md": "# `/handoff` generation pipeline\n\nThis document describes how the coding-agent implements `/handoff`: trigger path, oneshot generation, session switch, context reinjection, persistence, and UI behavior.\n\n## Scope\n\nCovers:\n\n- Interactive `/handoff` command dispatch\n- `AgentSession.handoff()` lifecycle and state transitions\n- `generateHandoff(...)` request shape\n- How old/new sessions persist handoff data differently\n- UI behavior for success, cancel, and failure\n\nDoes not cover:\n\n- Generic tree navigation/branch internals\n- Non-handoff session commands (`/new`, `/fork`, `/resume`)\n\n## Implementation files\n\n- [`../src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`packages/agent/src/compaction/compaction.ts`](../packages/agent/src/compaction/compaction.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/extensibility/slash-commands.ts`](../packages/coding-agent/src/extensibility/slash-commands.ts)\n\n## Trigger path\n\n1. `/handoff` is declared in builtin slash command metadata (`slash-commands.ts`) with optional inline hint: `[focus instructions]`.\n2. In interactive input handling (`InputController`), submit text matching `/handoff` or `/handoff ...` is intercepted before normal prompt submission.\n3. The editor is cleared and `handleHandoffCommand(customInstructions?)` is called.\n4. `CommandController.handleHandoffCommand` performs a preflight guard using current entries:\n - Counts `type === \"message\"` entries.\n - If `< 2`, it warns: `Nothing to hand off (no messages yet)` and returns.\n\nThe same minimum-content guard exists again inside `AgentSession.handoff()` and throws if violated. This duplicates safety at both UI and session layers.\n\n## End-to-end lifecycle\n\n### 1) Start handoff generation\n\n`AgentSession.handoff(customInstructions?)`:\n\n- Reads current branch entries (`sessionManager.getBranch()`).\n- Validates minimum message count (`>= 2`).\n- Creates `#handoffAbortController` and links any caller-provided abort signal to it.\n- Resolves the current model API key through `ModelRegistry`.\n- Calls `generateHandoff(...)` with:\n - live agent messages (`agent.state.messages`),\n - the current model and API key,\n - the base system prompt (`#baseSystemPrompt`),\n - the live tool array (`agent.state.tools`),\n - optional focus instructions,\n - coding-agent message conversion (`convertToLlm`),\n - provider metadata, current thinking level, and `initiatorOverride: \"agent\"`.\n\n`generateHandoff(...)` lives in `packages/agent/src/compaction/compaction.ts` next to summarization. It renders `packages/agent/src/compaction/prompts/handoff-document.md` via `renderHandoffPrompt(...)` with optional `additionalFocus`.\n\n### 2) Generate and capture output\n\n`generateHandoff(...)` converts the existing `AgentMessage[]` history to real LLM `Message[]` history, then appends one trailing agent-attributed `user` message containing the rendered handoff prompt.\n\nThe request uses `completeSimple(...)` directly:\n\n```ts\nawait completeSimple(\n model,\n {\n systemPrompt,\n messages: requestMessages,\n tools,\n },\n {\n apiKey,\n signal,\n reasoning: resolveCompactionEffort(model, options.thinkingLevel),\n toolChoice: \"none\",\n initiatorOverride,\n metadata,\n },\n);\n```\n\nImportant generation properties:\n\n- The request preserves the live provider cache prefix by reusing the same system prompt, tool definitions, and real message history shape as the active agent.\n- The handoff instruction is a trailing `user` message, not a developer message, so the cached prefix remains aligned with the prior turn.\n- `toolChoice: \"none\"` prevents intentional tool dispatch.\n- The returned assistant content is filtered to text blocks and joined with `\\n`; stray tool-call blocks are ignored if a provider does not honor `toolChoice: \"none\"`.\n- `stopReason === \"error\"` throws a generation error.\n\nNo agent-loop events are used for capture. The handoff path no longer waits for `agent_end` and no longer scans the latest assistant message.\n\n### 3) Cancellation checks\n\nCancellation throws `Error(\"Handoff cancelled\")`; a completed generation with no text returns `undefined`.\n\n- caller signal aborts `#handoffAbortController`\n- `completeSimple(...)` receives the abort signal\n- aborted handoff signal or provider `AbortError` is normalized to `Error(\"Handoff cancelled\")`\n- empty generated text returns `undefined`\n\n`AgentSession.handoff()` always clears `#handoffAbortController` in `finally`.\n\n### 4) New session creation\n\nIf text was generated and not aborted:\n\n1. Flush current session writer (`sessionManager.flush()`).\n2. Cancel session-owned async jobs.\n3. Start a brand-new session with `parentSession` pointing at the previous session file when one exists.\n4. Reset in-memory agent state (`agent.reset()`).\n5. Rebind `agent.sessionId` to the new session id.\n6. Rekey/reset Hindsight and Mnemopi memory session tracking for the new session.\n7. Clear queued context arrays (`#steeringMessages`, `#followUpMessages`, `#pendingNextTurnMessages`) and any scheduled hidden next-turn generation.\n8. Reset todo reminder counter.\n\n### 5) Handoff-context injection\n\nThe generated handoff document is wrapped by coding-agent session glue and appended to the new session as a `custom_message` entry:\n\n```text\n<handoff-context>\n...handoff text...\n</handoff-context>\n\nThe above is a handoff document from a previous session. Use this context to continue the work seamlessly.\n```\n\nInsertion call:\n\n```ts\nthis.sessionManager.appendCustomMessageEntry(\n \"handoff\",\n handoffContent,\n true,\n undefined,\n \"agent\",\n);\n```\n\nSemantics:\n\n- `customType`: `\"handoff\"`\n- `display`: `true` (visible in TUI rebuild)\n- attribution: `\"agent\"`\n- Entry type: `custom_message` (participates in LLM context)\n\n### 6) Rebuild active agent context\n\nAfter injection:\n\n1. `buildDisplaySessionContext()` resolves message list for current leaf.\n2. `agent.replaceMessages(sessionContext.messages)` makes the injected handoff message active context.\n3. Todo phases are synchronized from the new branch.\n4. Method returns `{ document: handoffText, savedPath? }`.\n\nAt this point, the active LLM context in the new session contains the injected handoff message, not the old transcript.\n\n## Persistence model: old session vs new session\n\n### Old session\n\nHandoff generation is a oneshot request, not a visible agent turn. The generated handoff text is not appended to the old session as an assistant message.\n\nResult: the original session keeps its prior transcript unchanged except for data already persisted before handoff began.\n\n### New session\n\nAfter session reset, handoff is persisted as `custom_message` with `customType: \"handoff\"`.\n\n`buildSessionContext()` converts this entry into a runtime custom/user-context message via `createCustomMessage(...)`, so it is included in future prompts from the new session.\n\nAuto-triggered handoffs can additionally write a timestamped `handoff-*.md` artifact under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled. Manual `/handoff` does not write that artifact.\n\n## Controller/UI behavior\n\n`CommandController.handleHandoffCommand` behavior:\n\n- Shows a status loader: `Generating handoff… (esc to cancel)`.\n- Calls `await session.handoff(customInstructions)`.\n- If result is `undefined`: `showError(\"Handoff cancelled\")`.\n- On success:\n - `rebuildChatFromMessages()` (loads new session context, including injected handoff)\n - invalidates status line and editor top border\n - reloads todos\n - appends success chat line: `New session started with handoff context`\n- On exception:\n - if message is `\"Handoff cancelled\"` or error name is `AbortError`: `showError(\"Handoff cancelled\")`\n - otherwise: `showError(\"Handoff failed: <message>\")`\n- Stops the loader, restores the previous Escape handler, and requests render at end.\n\nManual `/handoff` no longer streams the generated document into chat. A cancellable loader remains visible while the oneshot request runs, and the chat is rebuilt after generation completes.\n\n## Cancellation semantics\n\n### Session-level cancellation primitive\n\n`AgentSession` exposes:\n\n- `abortHandoff()` → aborts `#handoffAbortController`\n- `isGeneratingHandoff` → true while controller exists\n\nWhen this abort path is used, the abort signal is passed to `completeSimple(...)`; `handoff()` normalizes the cancellation to `Error(\"Handoff cancelled\")`, and command controller maps it to cancellation UI.\n\n### Interactive `/handoff` path\n\nThe command controller installs a temporary Escape handler for `/handoff` while the loader is visible. Pressing Escape calls `session.abortHandoff()`, which aborts the `completeSimple(...)` request through `#handoffAbortController`.\n\n## Aborted vs failed handoff\n\nCurrent UI classification:\n\n- **Aborted/cancelled**\n - `abortHandoff()` path triggers `\"Handoff cancelled\"`, or\n - thrown `AbortError`\n - UI shows `Handoff cancelled`\n- **Failed**\n - any other thrown error from `handoff()` / `generateHandoff()` / provider request path\n - UI shows `Handoff failed: ...`\n\nAdditional nuance: if generation completes but no text is returned, `handoff()` returns `undefined` and controller currently reports **cancelled**, not **failed**.\n\n## Short-session and minimum-content guardrails\n\nTwo guards prevent low-signal handoffs:\n\n- UI layer (`handleHandoffCommand`): warns and returns early for `< 2` message entries\n- Session layer (`handoff()`): throws the same condition as an error\n\nThis avoids creating a new session with empty/near-empty handoff context.\n\n## State transition summary\n\nHigh-level state flow:\n\n1. Interactive slash command intercepted.\n2. Preflight message-count guard.\n3. `#handoffAbortController` created (`isGeneratingHandoff = true`).\n4. `generateHandoff(...)` issues one `completeSimple(...)` request with live system prompt, tools, message history, current thinking level, and trailing handoff prompt.\n5. Assistant response text blocks are joined; tool-call blocks are discarded.\n6. If missing text → return `undefined`; if aborted → cancellation error path.\n7. If present:\n - flush old session\n - cancel async jobs\n - create new empty session with previous session as parent\n - reset runtime queues/counters\n - append `custom_message(handoff)`\n - optionally save an auto-triggered handoff document under the session artifacts directory when `compaction.handoffSaveToDisk` is enabled\n8. Controller rebuilds chat UI and announces success.\n9. `#handoffAbortController` cleared (`isGeneratingHandoff = false`).\n\n## Known assumptions and limitations\n\n- No structural validation checks that generated markdown follows the requested section format.\n- Missing generated text is reported as cancellation in controller UX.\n- Manual handoff has no streaming visibility; a cancellable loader is shown until the UI updates after generation completes.\n- Auto-triggered handoffs can write a timestamped `handoff-*.md` artifact when `compaction.handoffSaveToDisk` is enabled; write failure is logged and does not fail the handoff.\n",
21
21
  "hooks.md": "# Hooks\n\nThis document describes the **current hook subsystem code** in `src/extensibility/hooks/*`.\n\n## Current status in runtime\n\nThe hook package (`src/extensibility/hooks/`) is still exported and usable as an API surface, but the default CLI runtime now initializes the **extension runner** path. In current startup flow:\n\n- `--hook` is treated as an alias for `--extension` (CLI paths are merged into `additionalExtensionPaths`)\n- tools are wrapped by `ExtensionToolWrapper`, not `HookToolWrapper`\n- context transforms and lifecycle emissions go through `ExtensionRunner`\n\nSo this file documents the hook subsystem implementation itself (types/loader/runner/wrapper), including legacy behavior and constraints.\n\n## Key files\n\n- `src/extensibility/hooks/types.ts` — hook context, event types, and result contracts\n- `src/extensibility/hooks/loader.ts` — module loading and hook discovery bridge\n- `src/extensibility/hooks/runner.ts` — event dispatch, command lookup, error signaling\n- `src/extensibility/hooks/tool-wrapper.ts` — pre/post tool interception wrapper\n- `src/extensibility/hooks/index.ts` — exports/re-exports\n\n## What a hook module is\n\nA hook module must default-export a factory:\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function hook(pi: HookAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => {\n if (\n event.toolName === \"bash\" &&\n String(event.input.command ?? \"\").includes(\"rm -rf\")\n ) {\n return { block: true, reason: \"blocked by policy\" };\n }\n });\n}\n```\n\nThe factory can:\n\n- register event handlers with `pi.on(...)`\n- send persistent custom messages with `pi.sendMessage(...)`\n- persist non-LLM state with `pi.appendEntry(...)`\n- register slash commands via `pi.registerCommand(...)`\n- register custom message renderers via `pi.registerMessageRenderer(...)`\n- run shell commands via `pi.exec(...)`\n- author schemas/helpers with injected `pi.zod`, `pi.typebox`, and package exports via `pi.pi`\n\n## Discovery and loading\n\n`discoverAndLoadHooks(configuredPaths, cwd)` does:\n\n1. Load discovered hooks from capability registry (`loadCapability(\"hooks\")`)\n2. Append explicitly configured paths (deduped by absolute path)\n3. Call `loadHooks(allPaths, cwd)`\n\n`loadHooks` then imports each path and expects a `default` function.\n\n### Path resolution\n\n`loader.ts` resolves hook paths as:\n\n- absolute path: used as-is\n- `~` path: expanded\n- relative path: resolved against `cwd`\n\n### Important legacy mismatch\n\nDiscovery providers for `hookCapability` still model pre/post shell-style hook files (for example `.claude/hooks/pre/*`, `.omp/.../hooks/pre/*`).\n\nThe hook loader here uses dynamic module import and requires a default JS/TS hook factory. If a discovered hook path is not importable as a module, load fails and is reported in `LoadHooksResult.errors`.\n\n## Event surfaces\n\nHook events are strongly typed in `types.ts`.\n\n### Session events\n\n- `session_start`\n- `session_before_switch` → can return `{ cancel?: boolean }`\n- `session_switch`\n- `session_before_branch` → can return `{ cancel?: boolean; skipConversationRestore?: boolean }`\n- `session_branch`\n- `session_before_compact` → can return `{ cancel?: boolean; compaction?: CompactionResult }`\n- `session.compacting` → can return `{ context?: string[]; prompt?: string; preserveData?: Record<string, unknown> }`\n- `session_compact`\n- `session_before_tree` → can return `{ cancel?: boolean; summary?: { summary: string; details?: unknown } }`\n- `session_tree`\n- `session_shutdown`\n\n### Agent/context events\n\n- `context` → can return `{ messages?: Message[] }`\n- `before_agent_start` → can return `{ message?: { customType; content; display; details } }`\n- `agent_start`\n- `agent_end`\n- `turn_start`\n- `turn_end`\n- `auto_compaction_start`\n- `auto_compaction_end`\n- `auto_retry_start`\n- `auto_retry_end`\n- `ttsr_triggered`\n- `todo_reminder`\n\n### Tool events (pre/post model)\n\n- `tool_call` (pre-execution) → can return `{ block?: boolean; reason?: string }`\n- `tool_result` (post-execution) → can return `{ content?; details?; isError? }`\n\nThis is the hook subsystem’s core pre/post interception model.\n\n```text\nHook tool interception flow\n\ntool_call handlers\n │\n ├─ any { block: true }? ── yes ──> throw (tool blocked)\n │\n └─ no\n │\n ▼\n execute underlying tool\n │\n ├─ success ──> tool_result handlers can override { content, details }\n │\n └─ error ──> emit tool_result(isError=true) then rethrow original error\n```\n\n## Execution model and mutation semantics\n\n### 1) Pre-execution: `tool_call`\n\n`HookToolWrapper.execute()` emits `tool_call` before tool execution.\n\n- if any handler returns `{ block: true }`, execution stops\n- if handler throws, wrapper fails closed and blocks execution\n- returned `reason` becomes the thrown error text\n\n### 2) Tool execution\n\nUnderlying tool executes normally if not blocked.\n\n### 3) Post-execution: `tool_result`\n\nAfter success, wrapper emits `tool_result` with:\n\n- `toolName`, `toolCallId`, `input`\n- `content`\n- `details`\n- `isError: false`\n\nIf handler returns overrides:\n\n- `content` can replace result content\n- `details` can replace result details\n\nOn tool failure, wrapper emits `tool_result` with `isError: true` and error text content, then rethrows original error.\n\n### What hooks can mutate\n\n- LLM context for a single call via `context` (`messages` replacement chain)\n- tool output content/details on successful tool calls (`tool_result` path)\n- pre-agent injected message via `before_agent_start`\n- cancellation/custom compaction/tree behavior via `session_before_*` and `session.compacting`\n\n### What hooks cannot mutate in this implementation\n\n- raw tool input parameters in-place (only block/allow on `tool_call`)\n- execution continuation after thrown tool errors (error path rethrows)\n- final success/error status in wrapper behavior (returned `isError` is typed but not applied by `HookToolWrapper`)\n\n## Ordering and conflict behavior\n\n### Discovery-level ordering\n\nCapability providers are priority-sorted (higher first). Dedupe is by capability key, first wins.\n\nFor `hooks`, capability key is `${type}:${tool}:${name}`. Shadowed duplicates from lower-priority providers are marked and excluded from effective discovered list.\n\n### Load order\n\n`discoverAndLoadHooks` builds a flat `allPaths` list, deduped by resolved absolute path, then `loadHooks` iterates in that order.\nFile order within each discovered directory depends on `readdir` output; the hook loader does not perform an additional sort.\n\n### Runtime handler order\n\nInside `HookRunner`, order is deterministic by registration sequence:\n\n1. hooks array order\n2. handler registration order per hook/event\n\nConflict behavior by event type:\n\n- `tool_call`: last returned result wins unless a handler blocks; first block short-circuits\n- `tool_result`: last returned override wins (no short-circuit)\n- `context`: chained; each handler receives prior handler’s message output\n- `before_agent_start`: first returned message is kept; later messages ignored\n- `session_before_*`: latest returned result is tracked; `cancel: true` short-circuits immediately\n- `session.compacting`: latest returned result wins\n\nCommand/renderer conflicts:\n\n- `getCommand(name)` returns first match across hooks (first loaded wins)\n- `getMessageRenderer(customType)` returns first match\n- `getRegisteredCommands()` returns all commands (no dedupe)\n\n## UI interactions (`HookContext.ui`)\n\n`HookUIContext` includes:\n\n- `select`, `confirm`, `input`, `editor`\n- `notify`\n- `setStatus`\n- `custom`\n- `setEditorText`, `getEditorText`\n- `theme` getter\n\n`ctx` includes `hasUI`, `cwd`, `sessionManager`, `modelRegistry`, current `model`, `isIdle()`, `abort()`, and `hasQueuedMessages()`.\n\nWhen running with no UI, the default no-op context behavior is:\n\n- `select/input/editor` return `undefined`\n- `confirm` returns `false`\n- `notify`, `setStatus`, `setEditorText` are no-ops\n- `getEditorText` returns `\"\"`\n\n### Status line behavior\n\nHook status text set via `ctx.ui.setStatus(key, text)` is:\n\n- stored per key\n- sorted by key name\n- sanitized (`\\r`, `\\n`, `\\t` → spaces; repeated spaces collapsed)\n- joined and width-truncated for display\n\n## Error propagation and fallback\n\n### Load-time\n\n- invalid module or missing default export → captured in `LoadHooksResult.errors`\n- loading continues for other hooks\n\n### Event-time\n\n`HookRunner.emit(...)` catches handler errors for most events and emits `HookError` to listeners (`hookPath`, `event`, `error`), then continues.\n\n`emitToolCall(...)` is stricter: handler errors are not swallowed there; they propagate to caller. In `HookToolWrapper`, this blocks the tool call (fail-safe).\n\n## Realistic API examples\n\n### Block unsafe bash commands\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"tool_call\", async (event, ctx) => {\n if (event.toolName !== \"bash\") return;\n const cmd = String(event.input.command ?? \"\");\n if (!cmd.includes(\"rm -rf\")) return;\n\n if (!ctx.hasUI) return { block: true, reason: \"rm -rf blocked (no UI)\" };\n const ok = await ctx.ui.confirm(\"Dangerous command\", `Allow: ${cmd}`);\n if (!ok) return { block: true, reason: \"user denied command\" };\n });\n}\n```\n\n### Redact tool output on post-execution\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"tool_result\", async (event) => {\n if (event.toolName !== \"read\" || event.isError) return;\n\n const redacted = event.content.map((chunk) => {\n if (chunk.type !== \"text\") return chunk;\n return {\n ...chunk,\n text: chunk.text.replaceAll(/API_KEY=\\S+/g, \"API_KEY=[REDACTED]\"),\n };\n });\n\n return { content: redacted };\n });\n}\n```\n\n### Modify model context per LLM call\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.on(\"context\", async (event) => {\n const filtered = event.messages.filter(\n (msg) => !(msg.role === \"custom\" && msg.customType === \"debug-only\"),\n );\n return { messages: filtered };\n });\n}\n```\n\n### Register slash command with command-safe context methods\n\n```ts\nimport type { HookAPI } from \"@oh-my-pi/pi-coding-agent/extensibility/hooks\";\n\nexport default function (pi: HookAPI): void {\n pi.registerCommand(\"handoff\", {\n description: \"Create a new session with setup message\",\n handler: async (_args, ctx) => {\n await ctx.waitForIdle();\n await ctx.newSession({\n parentSession: ctx.sessionManager.getSessionFile(),\n setup: async (sm) => {\n sm.appendMessage({\n role: \"user\",\n content: [\n { type: \"text\", text: \"Continue from prior session summary.\" },\n ],\n timestamp: Date.now(),\n });\n },\n });\n },\n });\n}\n```\n\n## Export surface\n\n`src/extensibility/hooks/index.ts` and the package subpath `@oh-my-pi/pi-coding-agent/extensibility/hooks` export:\n\n- loading APIs (`discoverAndLoadHooks`, `loadHooks`)\n- runner and wrapper (`HookRunner`, `HookToolWrapper`)\n- all hook types\n- `execCommand` re-export\n\nThe package root (`@oh-my-pi/pi-coding-agent`) does not re-export `HookAPI`; import legacy hook types from the hooks subpath.\n",
22
22
  "install-id.md": "# Install ID\n\nA persistent per-install UUID that identifies a single oh-my-pi installation across sessions. Used as a stable correlation key for server-side dedup of telemetry-style pushes (currently the auto-QA grievance flush from `report_tool_issue`).\n\n## API\n\nExported from `@oh-my-pi/pi-utils` (`packages/utils/src/dirs.ts`):\n\n| Symbol | Purpose |\n| --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |\n| `getInstallId(): string` | Returns the install ID, generating and persisting one on first call. Result is cached in-process for the lifetime of the runtime. |\n| `__resetInstallIdCacheForTests(): void` | Clears the in-process cache. Test-only — MUST NOT be called from production code. |\n\nGenerated IDs are lowercase RFC 4122 UUIDs. Existing persisted values are accepted case-insensitively when they match `^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$` with the regex `i` flag, and are returned exactly as stored.\n\n## Storage\n\n- Path: `<config-root>/install-id` — i.e. `~/.omp/install-id` by default, respecting `PI_CONFIG_DIR` via `getConfigRootDir()`.\n- Format: a single UUID line (trailing `\\n`).\n- Permissions: file is created with mode `0o600`.\n- Lifecycle: independent of `~/.omp/agent/`. Wiping agent state (sessions, settings, DB) does NOT regenerate the install ID; only deleting the `install-id` file itself does.\n\n## Generation and lifecycle\n\n1. First call to `getInstallId()` reads the file. If contents parse as a valid UUID, that value is cached and returned.\n2. Otherwise the helper calls `crypto.randomUUID()` (Node's CSPRNG-backed UUID v4) to mint a new ID.\n3. The new value is written via `open(O_WRONLY | O_CREAT | O_EXCL, 0o600)`. The exclusive-create guard means two processes hitting first-call simultaneously cannot both succeed — the loser sees `EEXIST`, re-reads the winner's file, and adopts that ID.\n4. If the existing file contained non-empty garbage (failed UUID regex), it is `unlink`ed before the exclusive create so `O_EXCL` does not trip on stale data.\n5. Any other write failure (read-only FS, permission error) is swallowed: the freshly generated UUID is still cached in-memory so the rest of the process sees a stable value, and subsequent process launches will retry persistence.\n6. Subsequent in-process calls return the cached value without touching disk. Mutating the file on disk after the first call has no effect until the process restarts (or tests call `__resetInstallIdCacheForTests`).\n\n## Consumers\n\n- `packages/coding-agent/src/tools/report-tool-issue.ts` — included as `installId` in the auto-QA grievance push body so the backend can deduplicate repeated reports from the same install. See `dev.autoqaPush.*` settings and `PI_AUTO_QA_PUSH_*` env vars.\n\nNew consumers MUST treat the value as opaque and MUST NOT derive PII from it; the helper does not mix in hostname, username, or any other host-identifying entropy.\n\n## See also\n\n- [environment-variables.md](environment-variables.md) — `PI_CONFIG_DIR` controls where `install-id` lives.\n- [config-usage.md](config-usage.md) — broader config-root layout.\n",
23
- "keybindings.md": "# Keybindings\n\nRun `/hotkeys` inside an `omp` session to see the active chords for your current build. The list reflects any remaps loaded from disk and any bindings added by extensions.\n\n## Customize keybindings\n\nUser remaps live in `~/.omp/agent/keybindings.yml`. The file is a YAML mapping whose keys are keybinding action IDs and whose values are either one chord string or an array of chord strings. It is not read from `~/.omp/agent/config.yml`, and there is no nested `keybindings` object.\n\n```yaml\napp.model.cycleForward: Ctrl+P\napp.model.selectTemporary: Alt+P\napp.plan.toggle: Alt+Shift+P\n```\n\nChord names are case-insensitive and use the same notation shown in the UI, such as `Ctrl+P`, `Alt+Shift+P`, `Shift+Enter`, and `Ctrl+Backspace`.\n\nSet an action to an empty array to disable it:\n\n```yaml\napp.stt.toggle: []\n```\n\n## Common action IDs\n\n| Action ID | Default | Meaning |\n| --------------------------- | -------------------------------------- | --------------------------------------------- |\n| `app.model.cycleForward` | `Ctrl+P` | Cycle role models forward |\n| `app.model.cycleBackward` | `Shift+Ctrl+P` | Cycle role models in temporary mode |\n| `app.model.selectTemporary` | `Alt+P` | Pick a model temporarily for this session |\n| `app.model.select` | `Alt+M` | Open the model selector and set roles |\n| `app.plan.toggle` | `Alt+Shift+P` | Toggle plan mode |\n| `app.history.search` | `Ctrl+R` | Search prompt history |\n| `app.tools.expand` | `Ctrl+O` | Toggle tool-output expansion |\n| `app.thinking.toggle` | `Ctrl+T` | Toggle thinking-block visibility |\n| `app.thinking.cycle` | `Shift+Tab` | Cycle thinking level |\n| `app.editor.external` | `Ctrl+G` | Edit the draft in `$VISUAL` / `$EDITOR` |\n| `app.message.followUp` | `Ctrl+Q`, `Ctrl+Enter` | Queue a follow-up message |\n| `app.message.dequeue` | `Alt+Up` | Dequeue a queued message back into the editor |\n| `app.display.reset` | `Ctrl+L` | Reset terminal display |\n| `app.clipboard.copyLine` | `Alt+Shift+L` | Copy the current line |\n| `app.clipboard.copyPrompt` | `Alt+Shift+C` | Copy the whole prompt |\n| `app.clipboard.pasteImage` | `Ctrl+V` (`Alt+V` fallback on Windows) | Paste an image from the clipboard |\n| `app.stt.toggle` | `Alt+H` | Toggle speech-to-text recording |\n\nOn Windows Terminal, `Ctrl+V` may be handled by the terminal paste command before `omp` sees it; use the `Alt+V` fallback when clipboard image paste appears to do nothing. Windows Terminal also swallows `Ctrl+Enter`, so the follow-up shortcut also binds `Ctrl+Q` — the same chord GitHub Copilot CLI uses. If your existing `keybindings.yml` already assigns `Ctrl+Q` to another action, that user remap wins and follow-up keeps `Ctrl+Enter` unless you explicitly bind `app.message.followUp`.\n\nOlder unqualified action names are migrated when `keybindings.yml` is loaded, but new docs and new configs should use the namespaced action IDs above. Existing `keybindings.json` files are still accepted and migrated to `keybindings.yml`; `keybindings.yaml` is also accepted.\n",
23
+ "keybindings.md": "# Keybindings\n\nRun `/hotkeys` inside an `omp` session to see the active chords for your current build. The list reflects any remaps loaded from disk and any bindings added by extensions.\n\n## Customize keybindings\n\nUser remaps live in `~/.omp/agent/keybindings.yml`. The file is a YAML mapping whose keys are keybinding action IDs and whose values are either one chord string or an array of chord strings. It is not read from `~/.omp/agent/config.yml`, and there is no nested `keybindings` object.\n\n```yaml\napp.model.cycleForward: Ctrl+P\napp.model.selectTemporary: Alt+P\napp.plan.toggle: Alt+Shift+P\n```\n\nChord names are case-insensitive and use the same notation shown in the UI, such as `Ctrl+P`, `Alt+Shift+P`, `Shift+Enter`, and `Ctrl+Backspace`.\n\nSet an action to an empty array to disable it:\n\n```yaml\napp.stt.toggle: []\n```\n\n## Common action IDs\n\n| Action ID | Default | Meaning |\n| --------------------------- | -------------------------------------- | --------------------------------------------- |\n| `app.model.cycleForward` | `Ctrl+P` | Cycle role models forward |\n| `app.model.cycleBackward` | `Shift+Ctrl+P` | Cycle role models in temporary mode |\n| `app.model.selectTemporary` | `Alt+P` | Pick a model temporarily for this session |\n| `app.model.select` | `Alt+M` | Open the model selector and set roles |\n| `app.plan.toggle` | `Alt+Shift+P` | Toggle plan mode |\n| `app.history.search` | `Ctrl+R` | Search prompt history |\n| `app.tools.expand` | `Ctrl+O` | Toggle tool-output expansion |\n| `app.thinking.toggle` | `Ctrl+T` | Toggle thinking-block visibility |\n| `app.thinking.cycle` | `Shift+Tab` | Cycle thinking level |\n| `app.editor.external` | `Ctrl+G` | Edit the draft in `$VISUAL` / `$EDITOR` |\n| `app.message.followUp` | `Ctrl+Q`, `Ctrl+Enter` | Queue a follow-up message |\n| `app.message.dequeue` | `Alt+Up` | Dequeue a queued message back into the editor |\n| `app.display.reset` | `Ctrl+L` | Reset terminal display |\n| `app.clipboard.copyLine` | `Alt+Shift+L` | Copy the current line |\n| `app.clipboard.copyPrompt` | `Alt+Shift+C` | Copy the whole prompt |\n| `app.clipboard.pasteImage` | `Ctrl+V` (`Alt+V` fallback on Windows) | Paste an image from the clipboard |\n| `app.stt.toggle` | `Alt+H` | Toggle speech-to-text recording |\n\nOn Windows Terminal, `Ctrl+V` may be handled by the terminal paste command before `omp` sees it; use the `Alt+V` fallback when clipboard image paste appears to do nothing. Windows Terminal also swallows `Ctrl+Enter`, so the follow-up shortcut also binds `Ctrl+Q` — the same chord GitHub Copilot CLI uses. If your existing `keybindings.yml` already assigns `Ctrl+Q` to another action, that user remap wins and follow-up keeps `Ctrl+Enter` unless you explicitly bind `app.message.followUp`.\n\nTerminals that implement OSC 5522 enhanced paste can send clipboard MIME data directly to `omp`; image pastes are attached as `[Image #N]`, while text/plain paste events keep normal paste behavior. When OSC 5522 is unavailable, bracketed paste still handles text, and a pasted single image-file path is loaded as an image when the file is readable from the `omp` host.\n\nOlder unqualified action names are migrated when `keybindings.yml` is loaded, but new docs and new configs should use the namespaced action IDs above. Existing `keybindings.json` files are still accepted and migrated to `keybindings.yml`; `keybindings.yaml` is also accepted.\n",
24
24
  "local-models.md": "# Embedded Local Tiny-Model Experiments\n\nThis document summarizes the experiments behind the optional **local** tiny-model paths for\nsession-title generation (`providers.tinyModel`), Mnemopi memory extraction/consolidation\n(`providers.memoryModel`), and the `auto` thinking-level difficulty classifier\n(`providers.autoThinkingModel`, which reuses the memory-model registry). It is a factual engineering\nrecord for maintainers: what we measured, which recipes won, and which models we shipped. All three\nsettings default to `online`, so existing users incur no downloads or on-device inference cost unless\nthey opt in.\n\n## Runtime / environment findings\n\n- **Stack**: `@huggingface/transformers` (transformers.js) v4 running under Bun. In Bun the library\n loads the **native `onnxruntime-node` backend** (not the WASM build).\n- **Device policy**: local tiny models default to CPU-only inference and retry once on CPU if an\n explicit accelerated provider cannot initialize.\n - Pick a provider persistently with the `providers.tinyModelDevice` setting (`default` keeps CPU),\n or per-run with the `PI_TINY_DEVICE` env var (which overrides the setting).\n - Accepted values are `cpu`, `gpu`, `metal`/`webgpu`, `auto`, `cuda`, `dml`, `coreml`, `wasm`,\n `webnn`, `webnn-gpu`, `webnn-cpu`, and `webnn-npu`.\n - Direct `coreml` remains opt-in via `PI_TINY_DEVICE=coreml`; it is not part of the default because\n cached decoder-LLM ONNX loads can fail during session initialization.\n - WebGPU/Metal works for the single-process eval harness, but the production worker forces\n Darwin `gpu`/`webgpu`/`auto` requests back to CPU because ONNX Runtime/Bun currently\n hard-crashes on worker teardown after WebGPU inference.\n - Use `providers.tinyModelDevice` or `PI_TINY_DEVICE` only when explicitly opting out of the CPU\n default.\n- **Quantization: q4 is the sweet spot** — smaller on disk, faster to load, and fast at inference.\n q8/int8 loads slower _and_ infers slower on CPU. Every shipped model defaults to `q4`; override the\n precision persistently with the `providers.tinyModelDtype` setting (`default` keeps `q4`, e.g. `fp16`\n for higher fidelity), or per-run with `PI_TINY_DTYPE` (which overrides the setting). Accepts `auto`,\n `fp32`, `fp16`, `q8`, `int8`, `uint8`, `q4`, `bnb4`, `q4f16`, `q2`, `q2f16`, `q1`, `q1f16`; an\n unrecognized value fails loudly at worker startup.\n- **Load-time correction (important).** An earlier belief that \"q4 >=1B models take minutes to load\"\n was a **measurement artifact** caused by running ~5 multi-GB HuggingFace downloads in parallel\n (I/O saturation). Clean, isolated **warm** loads are all sub-3s:\n - TinyLlama-1.1B q4: ~0.5s\n - Llama-3.2-1B q4: ~2.8s (`graphOpt=all`) / ~0.5s (`disabled`)\n - LFM2-1.2B q4: ~0.36s\n - Qwen2.5-1.5B q4: ~1.5s\n - Qwen3-1.7B q4: ~1.6s\n - gemma-3-1b q4: ~1.1s\n - Conclusion: **1B–1.7B models are viable on CPU.**\n- **`session_options.graphOptimizationLevel`** trades load vs inference speed: `disabled` = fastest\n load, slightly slower inference; `all` = default.\n- **First run** downloads weights from the HF Hub to a cache dir (q4 weights ~200MB–1.1GB depending\n on model); subsequent **warm** loads are sub-second to ~3s. Inference is async and\n background-friendly for memory tasks; titles are semi-interactive.\n\n## Task 1: Session title generation (`providers.tinyModel`)\n\n**Task**: turn the first user message into a 3–6 word title. Tiny models (sub-1B) suffice.\n\n**Winning recipe**:\n\n- Plain system prompt (no few-shot).\n- **Prefill** the assistant turn with `<title>` and **stop at `</title>`**, then take the first line.\n- Greedy decoding (`do_sample:false`), `enable_thinking:false` in the chat template.\n\n**What we learned**:\n\n- **Few-shot examples HURT sub-0.6B models** for titles; the tag-prefill rescues even 270M models.\n- **Token biasing (`bad_words_ids`) is a confirmed no-op** here — the prefill already controls the\n opener.\n\n**Leaderboard** (tag trick, CPU, warm):\n\n| Model | Verdict |\n| ------------- | ----------------------------------- |\n| LFM2-350M | Best speed/quality balance (~212MB) |\n| Qwen3-0.6B | Most robust |\n| gemma-3-270m | Smallest viable |\n| Qwen2.5-0.5B | Acceptable |\n| SmolLM2-135M | Too small |\n| flan-t5-small | Rejected — just echoes the input |\n\n**Shipped local options**: `lfm2-350m`, `qwen3-0.6b`, `gemma-270m`, `qwen2.5-0.5b`, `lfm2-700m`.\n**Default**: `online` (pi/smol).\n\n## Task 2: Mnemopi memory (`providers.memoryModel`)\n\nMnemopi runs two small-LLM tasks:\n\n1. **Extraction** — pull durable, structured items from a single message.\n2. **Consolidation** — summarize a list of memories into 1–3 faithful sentences.\n\nThese need **bigger models than titles: 1B–1.7B**. We tested LFM2-1.2B, Qwen2.5-1.5B, Qwen3-1.7B,\nand gemma-3-1b (q4, CPU) via four parallel agents each running 27–31 experiments.\n\n### Extraction findings\n\nThe stock 5-category JSON prompt fails on small models in two ways:\n\n1. The all-empty example `{\"facts\":[],...}` gets **copied verbatim** → 0 facts extracted.\n2. Capable models emit **JSON objects inside arrays**, which Mnemopi's `String(item)` coerces into\n the literal string `[object Object]`.\n\nThe robust fix is a **one-item-per-line output format** (consumed by Mnemopi's parser line-fallback)\nor a **flat JSON array of strings**. Every model also over-extracts pure small talk; an explicit\nchit-chat → NONE example is the best mitigation.\n\n### Technique polarity flips vs titles\n\n- At 1B+, **few-shot is the dominant quality lever**: e.g. Qwen2.5-1.5B extraction F1 0.52 → 0.83\n going 1 → 3 shots; gemma recall 0.65 → 0.92 with 2 shots.\n- **Prefill HURTS extraction** — it forces output on small talk, producing false positives.\n- **System-split** (instructions in the system role) helps models that have a system role.\n- **Greedy >= temperature** for both tasks.\n- **Token biasing** is again a no-op.\n\n### Per-model verdicts (head-to-head, 16-fixture set)\n\n- **Qwen3-1.7B** — most disciplined extraction: returns empty on small talk, no buried-fact leak,\n preserves language, clean flat JSON. Weaknesses: coarse granularity, missed a multi-turn value\n update.\n- **Qwen2.5-1.5B** — best extraction granularity (atomic facts), caught the value update, zero\n small-talk leakage. Weaknesses: weakest consolidation (run-on, no dedup) and one degenerate\n buried-fact output.\n- **gemma-3-1b** — best consolidation (dedup works, faithful, clean single-memory). Weaknesses: leaks\n small talk and translated German.\n- **LFM2-1.2B** — solid and fastest to load. Weaknesses: `Label: value` noise, small-talk + buried\n leaks, a fluffy single-memory summary.\n\n### Recommendation\n\nExtraction favors **precision** (do not pollute long-term memory) → **Qwen3-1.7B is the best single\npick** (its consolidation is good enough). If running a second model for consolidation, **gemma-3-1b**\nwins that task.\n\n**Shipped local options**: `qwen3-1.7b` (recommended), `gemma-3-1b`, `qwen2.5-1.5b`, `lfm2-1.2b`.\n**Default**: `online` (the configured smol model).\n\n### Known Mnemopi parser bugs (surfaced by these experiments)\n\n- `String(item)` produces `[object Object]` on object array items.\n- The line-fallback drops items `<=10` chars, so a correct short fact like `Name: Can` is discarded.\n\n\n## Integration notes\n\n- `providers.tinyModel`, `providers.memoryModel`, and `providers.autoThinkingModel` default to\n `online`, so existing users get **no downloads or on-device inference cost** unless they opt in.\n- Local inference runs **in a worker** (off the main thread); models are cached on disk and\n downloaded on first use.\n- The memory local path applies the refined recipes (line-format + small-talk-guarded extraction\n prompt, hardened consolidation prompt) via Mnemopi prompt overrides; the **online path is\n unchanged**.\n- `providers.autoThinkingModel` uses the same shipped local options as `providers.memoryModel`.\n",
25
25
  "lsp-config.md": "# LSP configuration in OMP\n\nThis guide explains how to configure language servers for the OMP coding agent.\n\nSource of truth in code:\n\n- Server config type: `packages/coding-agent/src/lsp/types.ts` (`ServerConfig`)\n- Config loader: `packages/coding-agent/src/lsp/config.ts`\n- Built-in server definitions: `packages/coding-agent/src/lsp/defaults.json`\n\n## Auto-detection\n\nWhen no LSP config file is present, OMP auto-detects servers by intersecting two conditions:\n\n1. The project directory contains at least one of the server's `rootMarkers`.\n2. The server binary is available — checked in project-local bin directories first (e.g., `node_modules/.bin/`, `.venv/bin/`), then `$PATH`.\n\nNo configuration is required for common setups. The built-in server list covers most popular languages; see [`defaults.json`](../packages/coding-agent/src/lsp/defaults.json) for the full set.\n\n## Config file locations\n\nOMP merges LSP config from multiple files, lowest to highest priority:\n\n| Priority | Location |\n| ----------- | --------------------------------------------------------------------------------------------------------------------------- |\n| 5 (lowest) | `~/lsp.json`, `~/.lsp.json`, `~/lsp.yaml`, `~/.lsp.yaml`, `~/lsp.yml`, `~/.lsp.yml` |\n| 4 | Plugin LSP configs (marketplace / `--plugin-dir` roots) |\n| 3 | User config dirs: `~/.omp/agent/lsp.*`, `~/.claude/lsp.*`, `~/.codex/lsp.*`, `~/.gemini/lsp.*` |\n| 2 | Project config dirs: `<project>/.omp/lsp.*`, `<project>/.claude/lsp.*`, `<project>/.codex/lsp.*`, `<project>/.gemini/lsp.*` |\n| 1 (highest) | Project root: `<project>/lsp.*` and `<project>/.lsp.*` |\n\nEach location accepts `.json`, `.yaml`, and `.yml` variants, including hidden-file versions (`.lsp.json`, `.lsp.yaml`, `.lsp.yml`). Files are merged in order: higher-priority files override lower-priority fields for the same server. Servers not mentioned in any override file remain at their built-in defaults.\n\n**Recommended locations:**\n\n- User-wide preferences → `~/.omp/agent/lsp.json`\n- Project-specific overrides → `<project>/.omp/lsp.json`\n\n> **Note:** Auto-detection is skipped only when at least one config file contributes server overrides. A config file that only sets `idleTimeoutMs` still lets OMP auto-detect built-in servers. When server overrides exist, OMP merges them with defaults and then loads servers that have matching `rootMarkers`, an available binary, and are not explicitly `disabled`.\n\n## File shape\n\nBoth JSON and YAML are accepted. The top-level object can use either a `servers` wrapper key or a flat map directly:\n\n```json\n{\n \"servers\": {\n \"server-name\": { ... }\n },\n \"idleTimeoutMs\": 300000\n}\n```\n\nor (flat, without the `servers` wrapper):\n\n```json\n{\n \"server-name\": { ... },\n \"idleTimeoutMs\": 300000\n}\n```\n\nTop-level keys:\n\n- `servers` — map of server name to `ServerConfig` (optional wrapper; flat form is equivalent)\n- `idleTimeoutMs` — shut down idle language servers after this many milliseconds; disabled by default\n\n## ServerConfig fields\n\n| Field | Type | Required | Description |\n| ----------------- | ---------- | -------- | ---------------------------------------------------------------------------------------------------------------- |\n| `command` | `string` | yes | Binary name (resolved via PATH/local bins) or absolute path |\n| `args` | `string[]` | no | Arguments passed to the binary |\n| `fileTypes` | `string[]` | yes | File extensions this server handles, e.g. `[\".ts\", \".tsx\"]` |\n| `rootMarkers` | `string[]` | yes | Files/dirs that indicate a project root; glob patterns (e.g. `*.cabal`) are supported |\n| `initOptions` | `object` | no | Sent as `initializationOptions` during LSP handshake |\n| `settings` | `object` | no | Workspace settings pushed via `workspace/didChangeConfiguration` |\n| `disabled` | `boolean` | no | Set to `true` to disable this server entirely |\n| `warmupTimeoutMs` | `number` | no | Startup timeout in ms for this server (overrides the global default) |\n| `isLinter` | `boolean` | no | Mark server as linter/formatter only; excluded from type-intelligence operations (hover, go-to-definition, etc.) |\n| `capabilities` | `object` | no | Opt-in server-specific features; see [Capabilities](#capabilities) |\n\n`resolvedCommand` is populated automatically at runtime — do not set it manually.\n\n### Capabilities\n\nThe `capabilities` object enables optional server-specific features that OMP supports on a per-server basis:\n\n```json\n{\n \"capabilities\": {\n \"flycheck\": true,\n \"ssr\": true,\n \"expandMacro\": true,\n \"runnables\": true,\n \"relatedTests\": true\n }\n}\n```\n\nAll fields are boolean and optional. They are currently used by `rust-analyzer`.\n\n## Common recipes\n\n### Override a built-in server's settings\n\nPartial overrides are merged onto the built-in defaults. You only need to specify the fields you want to change.\n\n```json\n{\n \"servers\": {\n \"typescript-language-server\": {\n \"args\": [\"--stdio\", \"--log-level\", \"4\"]\n }\n }\n}\n```\n\n```yaml\nservers:\n gopls:\n settings:\n gopls:\n gofumpt: false\n staticcheck: false\n```\n\n### Disable a built-in server\n\n```json\n{\n \"servers\": {\n \"eslint\": {\n \"disabled\": true\n }\n }\n}\n```\n\n### Register a custom server\n\nNew servers require `command`, `fileTypes`, and `rootMarkers`. All other fields are optional.\n\n```json\n{\n \"servers\": {\n \"my-lsp\": {\n \"command\": \"my-lsp-server\",\n \"args\": [\"--stdio\"],\n \"fileTypes\": [\".xyz\"],\n \"rootMarkers\": [\".xyz-project\", \".git\"]\n }\n }\n}\n```\n\n### Set a global idle timeout\n\nShut down language servers that have been inactive for more than five minutes:\n\n```json\n{\n \"idleTimeoutMs\": 300000\n}\n```\n\n### Disable a server for one project, keep it globally\n\nPlace the override in `<project>/.omp/lsp.json`:\n\n```json\n{\n \"servers\": {\n \"pylsp\": {\n \"disabled\": true\n }\n }\n}\n```\n\nThe user-level config in `~/.omp/agent/lsp.json` is unaffected; pylsp is only suppressed in this project.\n\n## Built-in server list\n\nThe following servers ship in `defaults.json` and are eligible for auto-detection:\n\n| Server key | Language(s) | Binary |\n| ----------------------------- | ----------------------------- | --------------------------------- |\n| `rust-analyzer` | Rust | `rust-analyzer` |\n| `clangd` | C, C++, ObjC | `clangd` |\n| `zls` | Zig | `zls` |\n| `gopls` | Go | `gopls` |\n| `typescript-language-server` | TypeScript, JavaScript | `typescript-language-server` |\n| `denols` | TypeScript, JavaScript (Deno) | `deno` |\n| `biome` | TS/JS/JSON (linter) | `biome` |\n| `eslint` | TS/JS/Vue/Svelte (linter) | `vscode-eslint-language-server` |\n| `vscode-html-language-server` | HTML | `vscode-html-language-server` |\n| `vscode-css-language-server` | CSS, SCSS, Less | `vscode-css-language-server` |\n| `vscode-json-language-server` | JSON | `vscode-json-language-server` |\n| `tailwindcss` | HTML, CSS, TS/JS | `tailwindcss-language-server` |\n| `svelte` | Svelte | `svelteserver` |\n| `vue-language-server` | Vue | `vue-language-server` |\n| `astro` | Astro | `astro-ls` |\n| `pyright` | Python | `pyright-langserver` |\n| `basedpyright` | Python | `basedpyright-langserver` |\n| `pylsp` | Python | `pylsp` |\n| `ruff` | Python (linter) | `ruff` |\n| `jdtls` | Java | `jdtls` |\n| `kotlin-lsp` | Kotlin | `kotlin-lsp` |\n| `metals` | Scala | `metals` |\n| `hls` | Haskell | `haskell-language-server-wrapper` |\n| `ocamllsp` | OCaml | `ocamllsp` |\n| `elixirls` | Elixir | `elixir-ls` |\n| `erlangls` | Erlang | `erlang_ls` |\n| `gleam` | Gleam | `gleam` |\n| `solargraph` | Ruby | `solargraph` |\n| `ruby-lsp` | Ruby | `ruby-lsp` |\n| `rubocop` | Ruby (linter) | `rubocop` |\n| `bashls` | Bash, Zsh | `bash-language-server` |\n| `lua-language-server` | Lua | `lua-language-server` |\n| `intelephense` | PHP | `intelephense` |\n| `phpactor` | PHP | `phpactor` |\n| `omnisharp` | C# | `omnisharp` |\n| `yamlls` | YAML | `yaml-language-server` |\n| `terraformls` | Terraform | `terraform-ls` |\n| `dockerls` | Dockerfile | `docker-langserver` |\n| `helm-ls` | Helm | `helm_ls` |\n| `nixd` | Nix | `nixd` |\n| `nil` | Nix | `nil` |\n| `ols` | Odin | `ols` |\n| `dartls` | Dart | `dart` |\n| `marksman` | Markdown | `marksman` |\n| `texlab` | LaTeX | `texlab` |\n| `graphql` | GraphQL | `graphql-lsp` |\n| `prismals` | Prisma | `prisma-language-server` |\n| `vimls` | Vim script | `vim-language-server` |\n| `emmet-language-server` | HTML, CSS, JSX | `emmet-language-server` |\n| `sourcekit-lsp` | Swift | `sourcekit-lsp` |\n| `swiftlint` | Swift (linter) | `swiftlint` |\n| `tlaplus` | TLA+ | `tlapm_lsp` |\n",
26
26
  "marketplace.md": "# Marketplace plugin system\n\nThe marketplace system lets you discover, install, and manage plugins from Git, local, or direct-catalog sources. It is compatible with the Claude Code plugin registry format.\n\n## Quick start\n\n```\n/marketplace add anthropics/claude-plugins-official\n/marketplace install wordpress.com@claude-plugins-official\n```\n\nIn the TUI, `/marketplace` with no arguments opens the interactive plugin browser. In non-TUI command handling, `/marketplace` lists configured marketplaces; use `/marketplace discover` to browse.\n\n## Concepts\n\nA **marketplace** is a Git repository (or local directory) containing a catalog file at `.claude-plugin/marketplace.json`. The catalog lists available plugins with their sources, descriptions, and metadata.\n\nA **plugin** is a directory containing Claude/OMP plugin content such as skills, commands, hooks, tools, MCP servers, LSP servers, rules, prompts, or extension modules. Plugins are identified by `name@marketplace` (e.g. `code-review@claude-plugins-official`).\n\n**Scopes**: marketplace plugins can be installed at two scopes:\n\n- **user** (default) -- available in all projects, stored in `~/.omp/plugins/installed_plugins.json`\n- **project** -- available only in the active project, stored in the nearest project `.omp/plugins/installed_plugins.json`\n\nEnabled project-scoped installs shadow enabled user-scoped installs of the same plugin. A disabled project install does not shadow the user install.\n\n## Commands\n\n### Interactive mode\n\n| Command | Effect |\n| -------------- | ----------------------------------------- |\n| `/marketplace` | Open interactive plugin browser (install) |\n\n### Marketplace management\n\n| Command | Effect |\n| ---------------------------- | -------------------------------------------- |\n| `/marketplace add <source>` | Add a marketplace source |\n| `/marketplace remove <name>` | Remove a marketplace |\n| `/marketplace update [name]` | Re-fetch catalog(s); omit name to update all |\n| `/marketplace list` | List configured marketplaces |\n\n### Plugin operations\n\n| Command | Effect |\n| ------------------------------------------------------------------------- | -------------------------------------------------- |\n| `/marketplace discover [marketplace]` | Browse available plugins |\n| `/marketplace install [--force] [--scope user\\|project] name@marketplace` | Install a plugin |\n| `/marketplace uninstall [--scope user\\|project] name@marketplace` | Uninstall a plugin; no args opens the TUI selector |\n| `/marketplace installed` | List installed marketplace plugins |\n| `/marketplace upgrade [--scope user\\|project] [name@marketplace]` | Upgrade one or all plugins |\n| `/plugins list` | List npm/link and marketplace plugins |\n| `/plugins enable [--scope user\\|project] name@marketplace` | Enable a marketplace plugin |\n| `/plugins disable [--scope user\\|project] name@marketplace` | Disable a marketplace plugin |\n\n### CLI equivalents\n\nThe same operations are available from the command line:\n\n```\nomp plugin marketplace add <source>\nomp plugin marketplace remove <name>\nomp plugin marketplace update [name]\nomp plugin marketplace list\nomp plugin discover [marketplace]\nomp plugin install [--force] [--scope user|project] name@marketplace\nomp plugin uninstall [--scope user|project] name@marketplace\nomp plugin upgrade [--scope user|project] [name@marketplace]\nomp plugin enable [--scope user|project] name@marketplace\nomp plugin disable [--scope user|project] name@marketplace\n```\n\n## Marketplace sources\n\nWhen you run `/marketplace add <source>`, the system classifies the source:\n\n| Source format | Type | Example |\n| ------------------------------- | -------------------------------------------------- | -------------------------------------- |\n| `owner/repo` | GitHub shorthand | `anthropics/claude-plugins-official` |\n| `https://...*.json` | Direct catalog URL | `https://example.com/marketplace.json` |\n| `https://...` / `http://...` | Git repository unless the URL path ends in `.json` | `https://github.com/org/repo` |\n| `git@...` / `ssh://...` | Git repository | `git@github.com:org/repo.git` |\n| `./path` or `~/path` or `/path` | Local directory | `./my-marketplace` |\n\nGit and local sources must contain `.claude-plugin/marketplace.json`. Direct catalog URLs cache only the JSON catalog; plugins in URL-sourced catalogs cannot use relative string sources like `\"./plugins/foo\"`.\n\n## Catalog format (marketplace.json)\n\nA marketplace catalog lives at `.claude-plugin/marketplace.json` in the repository root:\n\n```json\n{\n \"$schema\": \"https://anthropic.com/claude-code/marketplace.schema.json\",\n \"name\": \"my-marketplace\",\n \"owner\": {\n \"name\": \"Your Name\",\n \"email\": \"you@example.com\"\n },\n \"metadata\": {\n \"description\": \"A collection of plugins\",\n \"version\": \"1.0.0\",\n \"pluginRoot\": \"plugins\"\n },\n \"plugins\": [\n {\n \"name\": \"my-plugin\",\n \"description\": \"What this plugin does\",\n \"source\": \"./my-plugin\",\n \"category\": \"development\",\n \"homepage\": \"https://github.com/you/my-plugin\"\n }\n ]\n}\n```\n\n### Required fields\n\n| Field | Description |\n| ------------ | ---------------------------------------------------------------------------------------------------------------- |\n| `name` | Marketplace name. Lowercase alphanumeric, hyphens, and dots. Must start and end with alphanumeric. Max 64 chars. |\n| `owner.name` | Marketplace owner name |\n| `plugins` | Array of plugin entries |\n\nTop-level `metadata.description`, `metadata.version`, and `metadata.pluginRoot` are optional. When `metadata.pluginRoot` is set, it is prepended to relative plugin `source` paths.\n\n### Plugin entry fields\n\n| Field | Required | Description |\n| ------------- | -------- | --------------------------------------------------------------------------------------- |\n| `name` | yes | Plugin name (same rules as marketplace name) |\n| `source` | yes | Where to find the plugin (see below) |\n| `description` | no | Short description |\n| `version` | no | Version string; install version falls back to plugin manifest, source SHA, then `0.0.0` |\n| `author` | no | `{ name, email? }` |\n| `homepage` | no | URL |\n| `repository` | no | Repository URL/string |\n| `license` | no | License string |\n| `keywords` | no | Array of string keywords |\n| `category` | no | Category string (e.g. `development`, `productivity`, `security`) |\n| `tags` | no | Array of string tags |\n| `strict` | no | Boolean |\n| `commands` | no | Slash commands provided |\n| `agents` | no | Agents provided |\n| `hooks` | no | Hook definitions |\n| `mcpServers` | no | MCP server definitions |\n| `lspServers` | no | LSP server definitions or path; copied to `.lsp.json` on install |\n\n### Plugin source formats\n\nThe `source` field supports these formats. String sources must start with `./` and are resolved inside the marketplace root, after optional `metadata.pluginRoot` is prepended:\n\n**Relative path** (within the marketplace repo):\n\n```json\n\"source\": \"./my-plugin\"\n```\n\n**Git repository URL**:\n\n```json\n\"source\": {\n \"source\": \"url\",\n \"url\": \"https://github.com/org/repo.git\",\n \"sha\": \"abc123...\"\n}\n```\n\n**GitHub shorthand**:\n\n```json\n\"source\": {\n \"source\": \"github\",\n \"repo\": \"org/repo\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**Git subdirectory** (monorepo):\n\n```json\n\"source\": {\n \"source\": \"git-subdir\",\n \"url\": \"https://github.com/org/monorepo.git\",\n \"path\": \"plugins/my-plugin\",\n \"ref\": \"main\",\n \"sha\": \"abc123...\"\n}\n```\n\n**npm package** (parsed but not installable yet):\n\n```json\n\"source\": {\n \"source\": \"npm\",\n \"package\": \"@scope/my-plugin\",\n \"version\": \"1.0.0\"\n}\n```\n\nCurrent installer behavior rejects npm marketplace sources with `npm plugin sources are not yet supported`; use relative, GitHub, URL, or git-subdir sources.\n\n## On-disk layout\n\n```\n~/.omp/\n marketplaces.json # Registry of added marketplaces\n plugins/\n installed_plugins.json # User-scoped marketplace plugins (version: 2)\n cache/\n marketplaces/<name>/ # Cached marketplace clone/catalog\n plugins/<marketplace>___<plugin>___<version>/ # Cached plugin directories\n\n<project>/.omp/\n plugins/\n installed_plugins.json # Project-scoped marketplace plugins (version: 2)\n```\n\n## Naming rules\n\nMarketplace and plugin names must:\n\n- Start and end with a lowercase letter or digit\n- Contain only lowercase letters, digits, hyphens, and dots\n- Be at most 64 characters\n\nPlugin IDs (`name@marketplace`) must be at most 128 characters total.\n\nValid examples: `my-plugin`, `code-review`, `wordpress.com`, `ai-firstify`\nInvalid examples: `-bad`, `bad-`, `.bad`, `Bad`, `under_score`\n",
@@ -30,7 +30,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
30
30
  "mcp-server-tool-authoring.md": "# MCP server and tool authoring\n\nThis document explains how MCP server definitions become callable `mcp__*` tools in coding-agent, and what operators should expect when configs are invalid, duplicated, disabled, or auth-gated.\n\n## Architecture at a glance\n\n```text\nConfig sources (.omp/.claude/.cursor/.vscode/mcp.json, mcp.json, etc.)\n -> discovery providers normalize to canonical MCPServer\n -> capability loader dedupes by server name (higher provider priority wins)\n -> loadAllMCPConfigs converts to MCPServerConfig + skips enabled:false\n -> MCPManager connects/listTools (with auth/header/env resolution)\n -> manager best-effort loads resources/prompts and subscribes to resource updates when enabled\n -> MCPTool/DeferredMCPTool bridge exposes tools as mcp__<server>_<tool>\n -> AgentSession.refreshMCPTools replaces live MCP tools immediately\n```\n\n## 1) Server config model and validation\n\n`src/mcp/types.ts` defines the authoring shape used by MCP config writers and runtime:\n\n- `stdio` (default when `type` missing): requires `command`, optional `args`, `env`, `cwd`\n- `http`: requires `url`, optional `headers`\n- `sse`: requires `url`, optional `headers` (kept for compatibility)\n- shared fields: `enabled`, `timeout`, `auth`, `oauth`\n\n`validateServerConfig()` (`src/mcp/config.ts`) enforces transport basics:\n\n- rejects configs that set both `command` and `url`\n- requires `command` for stdio\n- requires `url` for http/sse\n- rejects unknown `type`\n\n`config-writer.ts` applies this validation for add/update operations and also validates server names:\n\n- non-empty\n- max 100 chars\n- only `[a-zA-Z0-9_.-]`\n\n### Transport pitfalls\n\n- `type` omitted means stdio. If you intended HTTP/SSE but omitted `type`, `command` becomes mandatory.\n- `sse` is still accepted but treated as HTTP transport internally (`createHttpTransport`).\n- Validation is structural, not reachability: a syntactically valid URL can still fail at connect time.\n\n## 2) Discovery, normalization, and precedence\n\n### Capability-based discovery\n\n`loadAllMCPConfigs()` (`src/mcp/config.ts`) loads canonical `MCPServer` items via `loadCapability(mcpCapability.id)`.\n\nThe capability layer (`src/capability/index.ts`) then:\n\n1. loads providers in priority order\n2. dedupes by `server.name` (first win = highest priority)\n3. validates deduped items\n\nResult: duplicate server names across sources are not merged. One definition wins; lower-priority duplicates are shadowed.\n\n### `.mcp.json` and related files\n\nThe dedicated fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json` (low priority).\n\nIn practice MCP servers also come from higher-priority providers (for example native `.omp/...` and tool-specific config dirs). Authoring guidance:\n\n- Prefer `.omp/mcp.json` (project) or `~/.omp/agent/mcp.json` (user) for explicit control.\n- Use root `mcp.json` / `.mcp.json` when you need fallback compatibility.\n- Reusing the same server name in multiple sources causes precedence shadowing, not merge.\n\n### Normalization behavior\n\n`convertToLegacyConfig()` (`src/mcp/config.ts`) maps canonical `MCPServer` to runtime `MCPServerConfig`.\n\nKey behavior:\n\n- transport inferred as `server.transport ?? (command ? \"stdio\" : url ? \"http\" : \"stdio\")`\n- disabled servers (`enabled === false`) and names in the user `disabledServers` list are dropped before connection\n- optional fields are preserved when present\n\n### Environment expansion during discovery\n\nOMP-native MCP config (`.omp/mcp.json`, `~/.omp/agent/mcp.json`, plus their `.mcp.json` variants) expands `${VAR}` and `${VAR:-default}` placeholders recursively before converting to runtime config. It also accepts boolean/string forms for `enabled` (`true`, `false`, `1`, `0`) and numeric strings for `timeout`.\n\nThe standalone fallback provider in `src/discovery/mcp-json.ts` reads project-root `mcp.json` and `.mcp.json`, expands the same `${...}` placeholders, and type-checks `enabled`/`timeout` without coercing string values.\n\nInvalid `enabled`/`timeout` values are ignored with warnings rather than failing the whole file.\n\n## 3) Auth and runtime value resolution\n\n`MCPManager.prepareConfig()`/`#resolveAuthConfig()` (`src/mcp/manager.ts`) is the final pre-connect pass.\n\n### OAuth credential injection\n\nIf config has:\n\n```ts\nauth: { type: \"oauth\", credentialId: \"...\" }\n```\n\nand credential exists in auth storage:\n\n- `http`/`sse`: injects `Authorization: Bearer <access_token>` header\n- `stdio`: injects `OAUTH_ACCESS_TOKEN` env var\n\nIf credential lookup fails, manager logs a warning and continues with unresolved auth.\n\n### Header/env value resolution\n\nBefore connect, manager resolves stdio `env` values and HTTP/SSE `headers` values via `resolveConfigValue()` (`src/config/resolve-config-value.ts`):\n\n- value starting with `!` => execute shell command, use trimmed stdout (cached)\n- failed, timed-out, or whitespace-only commands produce `undefined`, so that entry is omitted\n- otherwise, treat value as environment variable name first (`process.env[name]`), fallback to literal value\n\nOperational caveat: a mistyped `!` secret command can silently remove that header/env entry, producing downstream 401/403 or server startup failures. A mistyped environment variable name is sent literally unless that literal happens to be meaningful to the server.\n\n## 4) Tool bridge: MCP -> agent-callable tools\n\n`src/mcp/tool-bridge.ts` converts MCP tool definitions into `CustomTool`s.\n\n### Naming and collision domain\n\nTool names are generated as:\n\n```text\nmcp__<sanitized_server_name>_<sanitized_tool_name>\n```\n\nRules:\n\n- lowercases\n- non-`[a-z_]` chars become `_`\n- repeated underscores collapse\n- redundant `<server>_` prefix in tool name is stripped once\n\nThis avoids many collisions, but not all. Different raw names can still sanitize to the same identifier (for example `my-server` and `my.server` both sanitize similarly), and registry insertion is last-write-wins.\n\n### Schema mapping\n\n`tool-bridge.ts` passes each MCP `inputSchema` through `normalizeSchemaForMCP()` before registering it as a `CustomTool` schema.\n\n### Execution mapping\n\n`MCPTool.execute()` / `DeferredMCPTool.execute()`:\n\n- calls MCP `tools/call`\n- flattens MCP content into displayable text\n- returns structured details (`serverName`, `mcpToolName`, provider metadata)\n- maps server-reported `isError` to `Error: ...` text result\n- attempts reconnect + one retry for retriable connection errors\n- maps remaining thrown transport/runtime failures to `MCP error: ...`\n- preserves abort semantics by translating AbortError into `ToolAbortError`\n\n## 5) Operator lifecycle: add/edit/remove and live updates\n\nInteractive mode exposes `/mcp` in `src/modes/controllers/mcp-command-controller.ts`.\n\nSupported operations:\n\n- `add` (wizard or quick-add)\n- `remove` / `rm`\n- `enable` / `disable`\n- `test`\n- `reauth` / `unauth`\n- `reconnect`\n- `reload`\n- `resources`, `prompts`, `notifications`\n- Smithery search/login/logout flows\n\nConfig writes are atomic (`writeMCPConfigFile`: temp file + rename).\n\nAfter changes, controller calls `#reloadMCP()`:\n\n1. `mcpManager.disconnectAll()`\n2. `mcpManager.discoverAndConnect()`\n3. `session.refreshMCPTools(mcpManager.getTools())`\n\n`refreshMCPTools()` replaces all `mcp__` registry entries and immediately re-activates the latest MCP tool set, so changes take effect without restarting the session.\n\n### Mode differences\n\n- **Interactive/TUI mode**: `/mcp` gives in-app UX (wizard, OAuth flow, connection status text, immediate runtime rebinding).\n- **SDK/headless integration**: `discoverAndLoadMCPTools()` (`src/mcp/loader.ts`) returns loaded tools + per-server errors; no `/mcp` command UX.\n\n## 6) User-visible error surfaces\n\nCommon error strings users/operators see:\n\n- add/update validation failures:\n - `Invalid server config: ...`\n - `Server \"<name>\" already exists in <path>`\n- quick-add argument issues:\n - `Use either --url or -- <command...>, not both.`\n - `--token requires --url (HTTP/SSE transport).`\n- connect/test failures:\n - `Failed to connect to \"<name>\": <message>`\n - timeout help text suggests increasing timeout\n - auth help text for `401/403`\n- auth/OAuth flows:\n - `Authentication required ... OAuth endpoints could not be discovered`\n - `OAuth flow timed out. Please try again.`\n - `OAuth authentication failed: ...`\n- disabled server usage:\n - `Server \"<name>\" is disabled. Run /mcp enable <name> first.`\n\nBad source JSON in discovery is generally handled as warnings/logs; config-writer paths throw explicit errors.\n\n## 7) Practical authoring guidance\n\nFor robust MCP authoring in this codebase:\n\n1. Keep server names globally unique across all MCP-capable config sources.\n2. Prefer names that remain distinct after MCP tool-name sanitization to avoid generated `mcp__` collisions.\n3. Use explicit `type` to avoid accidental stdio defaults.\n4. Treat `enabled: false` as hard-off: server is omitted from runtime connect set.\n5. For OAuth configs, store a valid `credentialId`; otherwise auth injection is skipped.\n6. If using command-based secret resolution (`!cmd`), verify command output is stable and non-empty.\n\n## Implementation files\n\n- [`src/mcp/types.ts`](../packages/coding-agent/src/mcp/types.ts)\n- [`src/mcp/config.ts`](../packages/coding-agent/src/mcp/config.ts)\n- [`src/mcp/config-writer.ts`](../packages/coding-agent/src/mcp/config-writer.ts)\n- [`src/mcp/tool-bridge.ts`](../packages/coding-agent/src/mcp/tool-bridge.ts)\n- [`src/discovery/mcp-json.ts`](../packages/coding-agent/src/discovery/mcp-json.ts)\n- [`src/modes/controllers/mcp-command-controller.ts`](../packages/coding-agent/src/modes/controllers/mcp-command-controller.ts)\n- [`src/mcp/manager.ts`](../packages/coding-agent/src/mcp/manager.ts)\n- [`src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`src/config/resolve-config-value.ts`](../packages/coding-agent/src/config/resolve-config-value.ts)\n- [`src/mcp/loader.ts`](../packages/coding-agent/src/mcp/loader.ts)\n",
31
31
  "memory.md": "# Autonomous Memory\n\nWhen the local memory backend is enabled, the agent automatically extracts durable knowledge from past sessions and injects a compact summary into future sessions for the same project. Over time it builds a project-scoped memory store — technical decisions, recurring workflows, pitfalls — that carries forward without manual effort.\n\nDisabled by default. Enable the local summary pipeline via `/settings` or `config.yml`:\n\n```yaml\nmemory:\n backend: local\n```\n\n## Usage\n\n### What gets injected\n\nAt session start, if a memory summary exists for the current project, it is injected into the system prompt as a **Memory Guidance** block. The agent is instructed to:\n\n- Treat memory as heuristic context — useful for process and prior decisions, not authoritative on current repo state.\n- Cite the memory artifact path when memory changes the plan, and pair it with current-repo evidence before acting.\n- Prefer repo state and user instruction when they conflict with memory; treat conflicting memory as stale.\n\n### Reading memory artifacts\n\nThe agent can read memory files directly using `memory://` URLs with the `read` tool:\n\n| URL | Content |\n| -------------------------------------- | ----------------------------------- |\n| `memory://root` | Compact summary injected at startup |\n| `memory://root/MEMORY.md` | Full long-term memory document |\n| `memory://root/skills/<name>/SKILL.md` | A generated skill playbook |\n\n### `/memory` slash command\n\n| Subcommand | Effect |\n| --------------------- | --------------------------------------------------------- |\n| `view` | Show the current backend injection payload |\n| `stats` | Show backend-specific memory statistics, when supported |\n| `diagnose` | Show backend-specific diagnostics, when supported |\n| `clear` / `reset` | Delete active backend memory data/artifacts |\n| `enqueue` / `rebuild` | Force consolidation/retention work for the active backend |\n\n## How it works\n\nLocal summary memories are built by a background pipeline that runs at startup or when manually triggered via slash command. The pipeline is skipped for subagents and for sessions that are not persisted to a session file.\n\n**Phase 1 — per-session extraction:** For each past session that has changed since it was last processed, a model reads the session history and extracts durable signal: technical decisions, constraints, resolved failures, recurring workflows. Sessions that are too recent, too old, currently active, or beyond the configured scan/age limits are skipped. Each extraction produces a raw memory block and a short synopsis for that session.\n\n**Phase 2 — consolidation:** After extraction, a second model pass reads all per-session extractions and produces three outputs written to disk:\n\n- `MEMORY.md` — a curated long-term memory document\n- `memory_summary.md` — the compact text injected at session start\n- `skills/` — reusable procedural playbooks, each in its own subdirectory\n\nPhase 2 uses a lease and heartbeat to prevent double-running when multiple processes start simultaneously. Stale skill directories from prior runs are pruned automatically.\n\nConsolidated output is redacted for common secret/token patterns before `MEMORY.md`, `memory_summary.md`, or generated skills are written to disk.\n\n### Extraction behavior\n\nMemory extraction and consolidation behavior is driven by static prompt files in `packages/coding-agent/src/prompts/memories/`.\n\n| File | Purpose | Variables |\n| --------------------- | ------------------------------------------- | ------------------------------------------- |\n| `stage_one_system.md` | System prompt for per-session extraction | — |\n| `stage_one_input.md` | User-turn template wrapping session content | `{{thread_id}}`, `{{response_items_json}}` |\n| `consolidation.md` | Prompt for cross-session consolidation | `{{raw_memories}}`, `{{rollout_summaries}}` |\n| `read_path.md` | Memory guidance injected into live sessions | `{{memory_summary}}` |\n\n### Model selection\n\nMemory piggybacks on the model role system.\n\n| Phase | Role | Purpose |\n| ----------------------- | ------------------------------------------------------------------- | -------------------------------- |\n| Phase 1 (extraction) | `default` | Per-session knowledge extraction |\n| Phase 2 (consolidation) | `smol` (falls back to `default`, then current/first registry model) | Cross-session synthesis |\n\nIf the requested memory role is not configured, memory model resolution falls back to the `default` role, then the active session model, then the first model in the registry.\n\n## Configuration\n\n| Setting | Default | Description |\n| ------------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------- |\n| `memory.backend` | `off` | Select `local` for this pipeline; legacy `memories.enabled: true` is migrated to `memory.backend: local` when no explicit backend is set |\n| `memories.maxRolloutAgeDays` | `30` | Sessions older than this are not processed |\n| `memories.minRolloutIdleHours` | `12` | Sessions active more recently than this are skipped |\n| `memories.maxRolloutsPerStartup` | `64` | Cap on sessions processed in a single startup |\n| `memories.summaryInjectionTokenLimit` | `5000` | Max tokens of the summary injected into the system prompt |\n\nAdditional tuning knobs (concurrency, lease durations, token budgets) are available in config for advanced use.\n\n## Key files\n\n- `packages/coding-agent/src/memories/index.ts` — pipeline orchestration, injection, slash command handling\n- `packages/coding-agent/src/memories/storage.ts` — SQLite-backed job queue and thread registry\n- `packages/coding-agent/src/prompts/memories/` — memory prompt templates\n- `packages/coding-agent/src/internal-urls/memory-protocol.ts` — `memory://` URL handler\n",
32
32
  "mnemosyne-memory-backend.md": "# Mnemopi memory backend\n\nOh My Pi can use `@oh-my-pi/pi-mnemopi` as a local long-term memory backend.\n\nSet:\n\n```yaml\nmemory:\n backend: mnemopi\n```\n\nExample:\n\n```yaml\nmemory:\n backend: mnemopi\nmnemopi:\n scoping: per-project-tagged\n```\n\nWith this backend enabled, the coding agent:\n\n1. Opens one or more local Mnemopi SQLite databases according to the configured bank scoping.\n2. Recalls relevant memories into a `<memories>` block for the first model turn of a session and refreshes the base prompt if recall happens from the `agent_start` listener.\n3. Retains completed conversation turns into the retain bank after agent turns, no more often than `mnemopi.retainEveryNTurns`.\n4. Adds recalled memory as extra compaction context when compaction asks the memory backend for `preCompactionContext`.\n5. Uses the normal `/memory view`, `/memory stats`, `/memory diagnose`, `/memory clear`, and `/memory enqueue` commands through the shared memory backend interface.\n\nRecalled memory is background context, not instructions. Current user messages and tool output take precedence when they conflict.\n\n## Settings\n\n| Setting | Default | Description |\n| ------------------------------- | ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `memory.backend` | `off` | Set to `mnemopi` to enable this backend. |\n| `mnemopi.dbPath` | agent memories dir | Optional SQLite database path. |\n| `mnemopi.bank` | project directory name | Base bank name passed to `Mnemopi`; the coding-agent wrapper scopes from this base according to `mnemopi.scoping`. |\n| `mnemopi.scoping` | `per-project` | Memory visibility mode: `global` = one shared bank, `per-project` = isolated project memory, `per-project-tagged` = project-local writes plus global recall visibility. |\n| `mnemopi.autoRecall` | `true` | Recall memory on the first turn of a session. |\n| `mnemopi.autoRetain` | `true` | Retain completed turns automatically. |\n| `mnemopi.retainEveryNTurns` | `4` | Minimum user turns between automatic retain writes. |\n| `mnemopi.recallLimit` | `8` | Maximum recalled memories in the prompt block. |\n| `mnemopi.recallContextTurns` | `3` | Prior user-bounded turns included in recall queries. |\n| `mnemopi.recallMaxQueryChars` | `4000` | Maximum composed recall query length. |\n| `mnemopi.injectionTokenLimit` | `5000` | Approximate token budget for memory prompt injection. |\n| `mnemopi.debug` | `false` | Enable debug logging for backend failures. |\n| `mnemopi.noEmbeddings` | `false` | Pass `noEmbeddings` to `Mnemopi` and force FTS-only recall. |\n| `mnemopi.embeddingModel` | env/default | Embedding model passed to `Mnemopi`. |\n| `mnemopi.embeddingApiUrl` | env/default | OpenAI-compatible embedding endpoint passed to `Mnemopi`. |\n| `mnemopi.embeddingApiKey` | env/default | Embedding API key passed to `Mnemopi`. |\n| `mnemopi.llmMode` | `smol` | `smol` uses the configured pi-ai smol model, `remote` uses the settings below, and `none` disables LLM calls. |\n| `mnemopi.llmBaseUrl` | env/default | OpenAI-compatible LLM endpoint for `llmMode: remote`. |\n| `mnemopi.llmApiKey` | env/default | LLM API key for `llmMode: remote`. |\n| `mnemopi.llmModel` | env/default | LLM model id for `llmMode: remote`. |\n\n## Scoping\n\nThe coding-agent wrapper applies scoping on top of the underlying `Mnemopi` package:\n\n- `global` uses one shared bank for recall and writes.\n- `per-project` writes to and recalls from a bank derived from the current git repository root (or cwd) plus a stable hash.\n- `per-project-tagged` writes to the project-local bank and recalls from both the project-local bank and the shared global bank, with duplicate recall results merged.\n\nThe combined project-plus-global behavior lives in the wrapper. The `@oh-my-pi/pi-mnemopi` package itself still exposes banks and constructor options directly, including `bank` for selecting a bank name. Project-local banks other than the shared bank are stored as sibling bank databases managed by Mnemopi's `BankManager`.\n\n## LLM and embeddings\n\nThe backend passes these settings to the `Mnemopi` constructor; if a setting is omitted, Mnemopi falls back to its `MNEMOPI_*` environment defaults. The backend does not download or run a local GGUF LLM. LLM-dependent paths use a configured pi-ai model, a dynamic completion function, a remote OpenAI-compatible endpoint, or deterministic no-LLM fallbacks.\n\nFTS-only:\n\n```yaml\nmemory:\n backend: mnemopi\nmnemopi:\n noEmbeddings: true\n```\n\nEquivalent constructor shape:\n\n```ts\nnew Mnemopi({ noEmbeddings: true });\n```\n\nRemote embeddings:\n\n```yaml\nmnemopi:\n embeddingModel: text-embedding-3-small\n embeddingApiUrl: https://api.openai.com/v1\n embeddingApiKey: ${OPENAI_API_KEY}\n```\n\nEquivalent constructor shape:\n\n```ts\nnew Mnemopi({\n embeddingModel: \"text-embedding-3-small\",\n embeddingApiUrl: \"https://api.openai.com/v1\",\n embeddingApiKey,\n});\n```\n\nRemote LLM:\n\n```yaml\nmnemopi:\n llmMode: remote\n llmBaseUrl: https://api.openai.com/v1\n llmApiKey: ${OPENAI_API_KEY}\n llmModel: gpt-4.1-mini\n```\n\nEquivalent constructor shapes:\n\n```ts\nnew Mnemopi({ llm: { baseUrl, apiKey, model } });\nnew Mnemopi({ llmBaseUrl: baseUrl, llmApiKey: apiKey, llmModel: model });\n```\n\nDynamic function LLM for rotating OAuth tokens:\n\n```ts\nnew Mnemopi({\n llm: async (prompt, opts) => {\n const token = await getFreshOauthToken();\n return await completeWithPiAi(prompt, {\n token,\n maxTokens: opts?.maxTokens,\n temperature: opts?.temperature,\n });\n },\n});\n```\n\npi-ai smol model LLM:\n\n```yaml\nmnemopi:\n llmMode: smol\n```\n\nThe coding agent resolves its configured smol role and passes a dynamic completion function so every Mnemopi LLM call can fetch the current provider credentials at call time:\n\n```ts\nnew Mnemopi({\n llm: async (prompt, opts) => completeSmolWithCurrentAuth(prompt, opts),\n});\n```\n\n## Operational notes\n\n- The default shared database lives under the agent memories directory in `mnemopi/mnemopi.db`; project-scoped banks use sibling database paths under that Mnemopi directory.\n- `/memory clear` removes every scoped Mnemopi SQLite database and sidecar WAL/SHM files for the active configuration.\n- `/memory enqueue` forces retention of the current session, flushes pending fact extractions, and runs Mnemopi sleep/consolidation.\n- `/memory stats` and `/memory diagnose` render backend-specific bank statistics/diagnostics when the Mnemopi backend is active.\n- Subagents do not own separate Mnemopi retain loops; they alias the parent state when a parent Mnemopi state exists, and otherwise remain inert.\n",
33
- "models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `discovery.type`: `ollama`, `llama.cpp`, `lm-studio`, `openai-models-list`, or `proxy`\n- `transport`: `pi-native` only. When set, every model under that provider is sent to an `omp auth-gateway` compatible `baseUrl` via `POST /v1/pi/stream`; `apiKey` is the gateway bearer.\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `apiKey`\n- `headers`\n- `compat`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `disableStrictTools`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `claude-opus-4-6`\n- `claude-haiku-4-5`\n- `gpt-5.3-codex`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: codex\n name: Zenmux Codex\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/codex: gpt-5.3-codex\n p-codex/codex: gpt-5.3-codex\n exclude:\n - demo/codex-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `thinking`, `input`, `cost`, `premiumMultiplier`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL`, or `OLLAMA_HOST`, or `http://127.0.0.1:11434`\n- context window: `OLLAMA_CONTEXT_LENGTH` if set, otherwise Ollama `/api/show` metadata, otherwise `128000`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n`OLLAMA_CONTEXT_LENGTH` does not configure Ollama's runtime `num_ctx`; set that in Ollama/model configuration separately.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Proxy discovery (`discovery.type: proxy`)\n\nFor Anthropic+OpenAI-compatible proxies (new-api / one-api / similar)\nthat expose both `/v1/messages` and `/v1/chat/completions` behind the same\nhost. Discovery hits `GET /v1/models` (10s timeout, OpenAI-style payload) and\nderives each model's `api` from the entry's `supported_endpoint_types`:\n\n- contains `\"anthropic\"` -> `api: anthropic-messages` (routes via `/v1/messages`)\n- contains `\"openai\"` -> `api: openai-completions` (routes via `/v1/chat/completions`)\n- otherwise -> falls back to provider-level `api` if set, else dropped\n\nProvider-level `api` is **optional** with `discovery.type: proxy` because the\nper-model wire is auto-detected. The Anthropic SDK strips a trailing `/v1`\nfrom `baseUrl` before appending `/v1/messages`, so a single discovery `baseUrl`\n(ending in `/v1`) round-trips correctly to both wires.\n\n```yaml\nproviders:\n newapi-reseller:\n baseUrl: https://api.example.com/v1\n apiKey: xxxx\n authHeader: true # injects Authorization: Bearer for openai models\n disableStrictTools: true # most anthropic-fronted proxies reject `strict`\n discovery:\n type: proxy\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `OMP_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`OMP_AUTH_BROKER_URL`, `OMP_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `task`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-codex` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/claude-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for an explicit OpenAI fallback:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.5:\n contextPromotionTarget: openai-codex/gpt-5.4\n```\n\nThe built-in model policy currently links OpenAI `codex-spark` variants to `gpt-5.5`, and `gpt-5.5` to `gpt-5.4`, when that target exists on the same provider/API.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/models-config-schema.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsMultipleSystemMessages` — preserve separate leading system/developer messages instead of coalescing them. Default: auto (known OpenAI-compatible hosted APIs preserve; strict-template/local hosts coalesce).\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `disableReasoningOnToolChoice` — drop reasoning fields whenever any `tool_choice` is sent. Default: auto (DeepSeek reasoning models).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `allowsSyntheticReasoningContentForToolCalls` — allow a placeholder reasoning field when a prior assistant tool-call turn lacks provider reasoning content. Default: `true`; set `false` for providers that validate the exact reasoning value.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`, `supportsMidConversationSystem`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. This is enabled by default for all `anthropic-messages` providers because it guarantees schema conformance in agentic systems.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.omp/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
33
+ "models.md": "# Model and Provider Configuration (`models.yml`)\n\nThis document describes how the coding-agent currently loads models, applies overrides, resolves credentials, and chooses models at runtime.\n\n## What controls model behavior\n\nPrimary implementation files:\n\n- `src/config/model-registry.ts` — loads built-in + custom models, provider overrides, runtime discovery, auth integration\n- `src/config/model-resolver.ts` — parses model patterns and selects initial/smol/slow models\n- `src/config/settings-schema.ts` — model-related settings (`modelRoles`, provider transport preferences)\n- `src/session/auth-storage.ts` — API key + OAuth resolution order\n- `packages/ai/src/models.ts` and `packages/ai/src/types.ts` — built-in providers/models and `Model`/`compat` types\n\n## Config file location and legacy behavior\n\nDefault config path:\n\n- `~/.omp/agent/models.yml`\n\nLegacy behavior still present:\n\n- If `models.yml` is missing and `models.json` exists at the same location, it is migrated to `models.yml`.\n- Explicit `.json` / `.jsonc` config paths are still supported when passed programmatically to `ModelRegistry`.\n\n## `models.yml` shape\n\n```yaml\nproviders:\n <provider-id>:\n # provider-level config\nequivalence:\n overrides:\n <provider-id>/<model-id>: <canonical-model-id>\n exclude:\n - <provider-id>/<model-id>\n```\n\n`provider-id` is the canonical provider key used across selection and auth lookup.\n\n`equivalence` is optional and configures canonical model grouping on top of concrete provider models:\n\n- `overrides` maps an exact concrete selector (`provider/modelId`) to an official upstream canonical id\n- `exclude` opts a concrete selector out of canonical grouping\n\n## Provider-level fields\n\n```yaml\nproviders:\n my-provider:\n baseUrl: https://api.example.com/v1\n apiKey: MY_PROVIDER_API_KEY\n api: openai-completions\n headers:\n X-Team: platform\n authHeader: true\n auth: apiKey\n disableStrictTools: false # set true for Anthropic-compatible endpoints that reject the strict field\n discovery:\n type: ollama\n modelOverrides:\n some-model-id:\n name: Renamed model\n models:\n - id: some-model-id\n name: Some Model\n api: openai-completions\n reasoning: false\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 128000\n maxTokens: 16384\n headers:\n X-Model: value\n compat:\n supportsStore: true\n supportsDeveloperRole: true\n supportsReasoningEffort: true\n maxTokensField: max_completion_tokens\n openRouterRouting:\n only: [anthropic]\n vercelGatewayRouting:\n order: [anthropic, openai]\n extraBody:\n gateway: m1-01\n controller: mlx\n```\n\n### Allowed provider/model `api` values\n\n- `openai-completions`\n- `openai-responses`\n- `openai-codex-responses`\n- `azure-openai-responses`\n- `anthropic-messages`\n- `google-generative-ai`\n- `google-vertex`\n\n### Allowed auth/discovery values\n\n- `auth`: `apiKey` (default), `none`, or `oauth`; for `models.yml` custom models, `oauth` is accepted by schema but does not waive the `apiKey` requirement\n- `discovery.type`: `ollama`, `llama.cpp`, `lm-studio`, `openai-models-list`, or `proxy`\n- `transport`: `pi-native` only. When set, every model under that provider is sent to an `omp auth-gateway` compatible `baseUrl` via `POST /v1/pi/stream`; `apiKey` is the gateway bearer.\n\n## Validation rules (current)\n\n### Full custom provider (`models` is non-empty)\n\nRequired:\n\n- `baseUrl`\n- `apiKey` unless `auth: none`\n- `api` at provider level or each model\n\n### Override-only provider (`models` missing or empty)\n\nMust define at least one of:\n\n- `baseUrl`\n- `apiKey`\n- `headers`\n- `compat`\n- `disableStrictTools`\n- `modelOverrides`\n- `discovery`\n\n### Discovery\n\n- `discovery` requires provider-level `api`.\n\n### Model value checks\n\n- `id` required\n- `contextWindow` and `maxTokens` must be positive if provided\n\n## Merge and override order\n\nModelRegistry pipeline (on refresh):\n\n1. Load built-in providers/models from `@oh-my-pi/pi-ai`.\n2. Load `models.yml` custom config.\n3. Apply provider overrides (`baseUrl`, `headers`, `disableStrictTools`) to built-in models.\n4. Apply `modelOverrides` (per provider + model id).\n5. Merge custom `models`:\n - same `provider + id` replaces existing\n - otherwise append\n6. Load cached/runtime-discovered models (Ollama, llama.cpp, LM Studio, plus built-in provider managers), then re-apply model overrides.\n\n### Provider-model cache and static fingerprint\n\nCached per-provider model lists are persisted in the model-cache SQLite\ndatabase (schema v3) with a `static_fingerprint` column that hashes the\nstatic catalog slice merged into the row. When `resolveProviderModels`\nskips the network fetch and the fingerprint of the in-memory static\ncatalog matches the cached one, the cached rows are returned verbatim —\nthe static + dynamic merge is bypassed entirely. The fingerprint is\nmemoized per process via a WeakMap keyed by the static-models array\nreference, so repeated cold-start calls do not re-hash.\n\n## Canonical model equivalence and coalescing\n\nThe registry keeps every concrete provider model and then builds a canonical layer above them.\n\nCanonical ids are official upstream ids only, for example:\n\n- `claude-opus-4-6`\n- `claude-haiku-4-5`\n- `gpt-5.3-codex`\n\n### `models.yml` equivalence config\n\nExample:\n\n```yaml\nproviders:\n zenmux:\n baseUrl: https://api.zenmux.example/v1\n apiKey: ZENMUX_API_KEY\n api: openai-codex-responses\n models:\n - id: codex\n name: Zenmux Codex\n reasoning: true\n input: [text]\n cost:\n input: 0\n output: 0\n cacheRead: 0\n cacheWrite: 0\n contextWindow: 200000\n maxTokens: 32768\n\nequivalence:\n overrides:\n zenmux/codex: gpt-5.3-codex\n p-codex/codex: gpt-5.3-codex\n exclude:\n - demo/codex-preview\n```\n\nBuild order for canonical grouping:\n\n1. exact user override from `equivalence.overrides`\n2. bundled official-id matches from built-in model metadata\n3. conservative heuristic normalization for gateway/provider variants\n4. fallback to the concrete model's own id\n\nCurrent heuristics are intentionally narrow:\n\n- embedded upstream prefixes can be stripped when present, for example `anthropic/...` or `openai/...`\n- dotted and dashed version variants can normalize only when they map to an existing official id, for example `4.6 -> 4-6`\n- ambiguous families or versions are not merged without a bundled match or explicit override\n\n### Canonical resolution behavior\n\nWhen multiple concrete variants share a canonical id, resolution uses:\n\n1. availability and auth\n2. `config.yml` `modelProviderOrder`\n3. existing registry/provider order if `modelProviderOrder` is unset\n\nDisabled or unauthenticated providers are skipped.\n\nSession state and transcripts continue to record the concrete provider/model that actually executed the turn.\n\nProvider defaults vs per-model overrides:\n\n- Provider `headers` are baseline.\n- Model `headers` override provider header keys.\n- `modelOverrides` can override model metadata (`name`, `reasoning`, `thinking`, `input`, `cost`, `premiumMultiplier`, `contextWindow`, `maxTokens`, `headers`, `compat`, `contextPromotionTarget`).\n- `compat` is deep-merged for nested routing blocks (`openRouterRouting`, `vercelGatewayRouting`, `extraBody`).\n\n## Runtime discovery integration\n\n### Implicit Ollama discovery\n\nIf `ollama` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `ollama`\n- api: `openai-responses`\n- base URL: `OLLAMA_BASE_URL`, or `OLLAMA_HOST`, or `http://127.0.0.1:11434`\n- context window: `OLLAMA_CONTEXT_LENGTH` if set, otherwise Ollama `/api/show` metadata, otherwise `128000`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls Ollama endpoints and normalizes discovered OpenAI-compatible models to `openai-responses`.\n\n`OLLAMA_CONTEXT_LENGTH` does not configure Ollama's runtime `num_ctx`; set that in Ollama/model configuration separately.\n\n### Implicit llama.cpp discovery\n\nIf `llama.cpp` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `llama.cpp`\n- api: `openai-responses`\n- base URL: `LLAMA_CPP_BASE_URL` or `http://127.0.0.1:8080`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery calls llama.cpp model endpoints and synthesizes model entries with local defaults.\n\n### Implicit LM Studio discovery\n\nIf `lm-studio` is not explicitly configured, registry adds an implicit discoverable provider:\n\n- provider: `lm-studio`\n- api: `openai-completions`\n- base URL: `LM_STUDIO_BASE_URL` or `http://127.0.0.1:1234/v1`\n- auth mode: keyless (`auth: none` behavior)\n\nRuntime discovery fetches models (`GET /models`) and synthesizes model entries with local defaults.\n\n### Explicit provider discovery\n\nYou can configure discovery yourself:\n\n```yaml\nproviders:\n ollama:\n baseUrl: http://127.0.0.1:11434\n api: openai-responses\n auth: none\n discovery:\n type: ollama\n\n llama.cpp:\n baseUrl: http://127.0.0.1:8080\n api: openai-responses\n auth: none\n discovery:\n type: llama.cpp\n```\n\n### Proxy discovery (`discovery.type: proxy`)\n\nFor Anthropic+OpenAI-compatible proxies (new-api / one-api / similar)\nthat expose both `/v1/messages` and `/v1/chat/completions` behind the same\nhost. Discovery hits `GET /v1/models` (10s timeout, OpenAI-style payload) and\nderives each model's `api` from the entry's `supported_endpoint_types`:\n\n- contains `\"anthropic\"` -> `api: anthropic-messages` (routes via `/v1/messages`)\n- contains `\"openai\"` -> `api: openai-completions` (routes via `/v1/chat/completions`)\n- otherwise -> falls back to provider-level `api` if set, else dropped\n\nProvider-level `api` is **optional** with `discovery.type: proxy` because the\nper-model wire is auto-detected. The Anthropic SDK strips a trailing `/v1`\nfrom `baseUrl` before appending `/v1/messages`, so a single discovery `baseUrl`\n(ending in `/v1`) round-trips correctly to both wires.\n\n```yaml\nproviders:\n newapi-reseller:\n baseUrl: https://api.example.com/v1\n apiKey: xxxx\n authHeader: true # injects Authorization: Bearer for openai models\n disableStrictTools: true # most anthropic-fronted proxies reject `strict`\n discovery:\n type: proxy\n```\n\n### Extension provider registration\n\nExtensions can register providers at runtime (`pi.registerProvider(...)`), including:\n\n- model replacement/append for a provider\n- custom stream handler registration for new API IDs\n- custom OAuth provider registration\n\n## Auth and API key resolution order\n\nWhen requesting a key for a provider, effective order is:\n\n1. Runtime override (CLI `--api-key`)\n2. Stored API key credential in `agent.db`\n3. Stored OAuth credential in `agent.db` (with refresh)\n4. Environment variable mapping (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.)\n5. ModelRegistry fallback resolver (provider `apiKey` from `models.yml`, env-name-or-literal semantics)\n\n`models.yml` `apiKey` behavior:\n\n- Value is first treated as an environment variable name.\n- If no env var exists, the literal string is used as the token.\n\nIf `authHeader: true` and provider `apiKey` is set, models get:\n\n- `Authorization: Bearer <resolved-key>` header injected.\n\nKeyless providers:\n\n- Providers marked `auth: none` are treated as available without credentials.\n- `getApiKey*` returns `kNoAuth` for them.\n\n### Broker mode\n\nWhen `OMP_AUTH_BROKER_URL` (or `auth.broker.url`) is set, the local SQLite credential store is replaced by `RemoteAuthCredentialStore`. Layers 2 and 3 above (stored API key / OAuth in `agent.db`) are served from a broker-supplied snapshot whose `refresh` tokens are redacted; expiry triggers `POST /v1/credential/:id/refresh` on the broker rather than a local refresh.\n\n`AuthStorage.setConfigApiKey` lets a `models.yml` `apiKey` win over a broker-resolved OAuth token without overriding a runtime `--api-key`. See [`auth-broker-gateway.md`](./auth-broker-gateway.md) for the full broker / gateway design and env surface (`OMP_AUTH_BROKER_URL`, `OMP_AUTH_BROKER_TOKEN`, `auth.broker.url`, `auth.broker.token`).\n\n## Model availability vs all models\n\n- `getAll()` returns the loaded model registry (built-in + merged custom + discovered).\n- `getAvailable()` filters to models that are keyless or have resolvable auth.\n\nSo a model can exist in registry but not be selectable until auth is available.\n\n## Runtime model resolution\n\n### CLI and pattern parsing\n\n`model-resolver.ts` supports:\n\n- exact `provider/modelId`\n- exact canonical model id\n- exact model id (provider inferred)\n- fuzzy/substring matching\n- glob scope patterns in `--models` (e.g. `openai/*`, `*sonnet*`)\n- optional `:thinkingLevel` suffix (`off|minimal|low|medium|high|xhigh`)\n\n`--provider` is legacy; `--model` is preferred.\n\nResolution precedence for exact selectors:\n\n1. exact `provider/modelId` bypasses coalescing\n2. exact canonical id resolves through the canonical index\n3. exact bare concrete id still works\n4. fuzzy and glob matching run after the exact paths\n\n### Initial model selection priority\n\n`findInitialModel(...)` uses this order:\n\n1. explicit CLI provider+model\n2. first scoped model (if not resuming)\n3. saved default provider/model\n4. known provider defaults (e.g. OpenAI/Anthropic/etc.) among available models\n5. first available model\n\n### Role aliases and settings\n\nSupported model roles:\n\n- `default`, `smol`, `slow`, `vision`, `plan`, `designer`, `commit`, `task`\n\nRole aliases like `pi/smol` expand through `settings.modelRoles`. Each role value can also append a thinking selector such as `:minimal`, `:low`, `:medium`, or `:high`.\n\nIf a role points at another role, the target model still inherits normally and any explicit suffix on the referring role wins for that role-specific use.\n\nRelated settings:\n\n- `modelRoles` (record)\n- `enabledModels` (scoped pattern list)\n- `modelProviderOrder` (global canonical-provider precedence)\n- `providers.kimiApiFormat` (`openai` or `anthropic` request format)\n- `providers.openaiWebsockets` (`auto|off|on` websocket preference for OpenAI Codex transport)\n\n`modelRoles` may store either:\n\n- `provider/modelId` to pin a concrete provider variant\n- a canonical id such as `gpt-5.3-codex` to allow provider coalescing\n\nFor `enabledModels` and CLI `--models`:\n\n- exact canonical ids expand to all concrete variants in that canonical group\n- explicit `provider/modelId` entries stay exact\n- globs and fuzzy matches still operate on concrete models\n\nGlobal `enabledModels` and `disabledProviders` entries may also be scoped to a path prefix:\n\n```yaml\nenabledModels:\n - claude-sonnet-4-5\n - path: ~/work\n models:\n - anthropic/claude-opus-4-5\ndisabledProviders:\n - ollama\n - path: ~/private\n providers:\n - anthropic\n```\n\nString entries apply everywhere. Scoped entries apply when the current working directory is the configured path or one of its subdirectories. Use `path`, `paths`, `pathPrefix`, or `pathPrefixes`; use `models` for `enabledModels`, `providers` for `disabledProviders`, or `values` for either.\n\n## `/model` and `--list-models`\n\nBoth surfaces keep provider-prefixed models visible and selectable.\n\nThey now also expose canonical/coalesced models:\n\n- `/model` includes a canonical view alongside provider tabs\n- `--list-models` prints a canonical section plus the concrete provider rows\n\nSelecting a canonical entry stores the canonical selector. Selecting a provider row stores the explicit `provider/modelId`.\n\n## Context promotion (model-level fallback chains)\n\nContext promotion is an overflow recovery mechanism for small-context variants (for example `*-spark`) that automatically promotes to a larger-context sibling when the API rejects a request with a context length error.\n\n### Trigger and order\n\nWhen a turn fails with a context overflow error (e.g. `context_length_exceeded`), `AgentSession` attempts promotion **before** falling back to compaction:\n\n1. If `contextPromotion.enabled` is true, resolve a promotion target (see below).\n2. If a target is found, switch to it and retry the request — no compaction needed.\n3. If no target is available, fall through to auto-compaction on the current model.\n\n### Target selection\n\nSelection is model-driven, not role-driven:\n\n1. `currentModel.contextPromotionTarget` (if configured)\n2. smallest larger-context model on the same provider + API\n\nCandidates are ignored unless credentials resolve (`ModelRegistry.getApiKey(...)`).\n\n### OpenAI Codex websocket handoff\n\nIf switching from/to `openai-codex-responses`, session provider state key `openai-codex-responses` is closed before model switch. This drops websocket transport state so the next turn starts clean on the promoted model.\n\n### Persistence behavior\n\nPromotion uses temporary switching (`setModelTemporary`):\n\n- recorded as a temporary `model_change` in session history\n- does not rewrite saved role mapping\n\n### Configuring explicit fallback chains\n\nConfigure fallback directly in model metadata via `contextPromotionTarget`.\n\n`contextPromotionTarget` accepts either:\n\n- `provider/model-id` (explicit)\n- `model-id` (resolved within current provider)\n\nExample (`models.yml`) for an explicit OpenAI fallback:\n\n```yaml\nproviders:\n openai-codex:\n modelOverrides:\n gpt-5.5:\n contextPromotionTarget: openai-codex/gpt-5.4\n```\n\nThe built-in model policy currently links OpenAI `codex-spark` variants to `gpt-5.5`, and `gpt-5.5` to `gpt-5.4`, when that target exists on the same provider/API.\n\n## Compatibility and routing fields\n\nThe `compat` block on a provider or model overrides the URL-based auto-detection in `packages/ai/src/providers/openai-completions-compat.ts`. It is validated by `OpenAICompatSchema` in `packages/coding-agent/src/config/models-config-schema.ts` and consumed by every `openai-completions` transport (`packages/ai/src/providers/openai-completions.ts`). The canonical type is `OpenAICompat` in `packages/ai/src/types.ts`.\n\n`models.yml` accepts the following keys (all optional; unset falls back to URL detection):\n\nRequest shaping:\n\n- `supportsStore` — emit `store: false` on requests. Default: auto (off for non-standard endpoints).\n- `supportsDeveloperRole` — use the `developer` system role for reasoning models instead of `system`. Default: auto.\n- `supportsMultipleSystemMessages` — preserve separate leading system/developer messages instead of coalescing them. Default: auto (known OpenAI-compatible hosted APIs preserve; strict-template/local hosts coalesce).\n- `supportsUsageInStreaming` — send `stream_options: { include_usage: true }` to receive token usage on streaming responses. Default: `true`.\n- `maxTokensField` — `\"max_completion_tokens\"` or `\"max_tokens\"`. Default: auto.\n- `supportsToolChoice` — emit the `tool_choice` parameter when the caller forces a specific tool. Default: `true`. Set `false` for endpoints that 400 on `tool_choice` (e.g. DeepSeek when reasoning is on).\n- `disableReasoningOnForcedToolChoice` — drop `reasoning_effort` / OpenRouter `reasoning` whenever `tool_choice` forces a call. Default: auto (Kimi/Anthropic-fronted endpoints).\n- `disableReasoningOnToolChoice` — drop reasoning fields whenever any `tool_choice` is sent. Default: auto (DeepSeek reasoning models).\n- `extraBody` — extra top-level fields merged into every request body (gateway hints, controller selectors, etc.).\n\nReasoning / thinking:\n\n- `supportsReasoningEffort` — accept `reasoning_effort`. Default: auto (off for Grok and zAI).\n- `reasoningEffortMap` — partial map from internal effort levels (`minimal|low|medium|high|xhigh`) to provider-specific strings (e.g. DeepSeek maps `xhigh -> \"max\"`).\n- `thinkingFormat` — request shape for thinking: `\"openai\"` (`reasoning_effort`), `\"openrouter\"` (`reasoning: { effort }`), `\"zai\"` (`thinking: { type: \"enabled\" }`), `\"qwen\"` (top-level `enable_thinking`), or `\"qwen-chat-template\"` (`chat_template_kwargs.enable_thinking`). Default: `\"openai\"`.\n- `reasoningContentField` — assistant field carrying chain-of-thought: `\"reasoning_content\"`, `\"reasoning\"`, or `\"reasoning_text\"`. Default: auto.\n- `requiresReasoningContentForToolCalls` — assistant tool-call turns must round-trip the reasoning field (DeepSeek-R1, Kimi, OpenRouter when reasoning is on). Default: `false`.\n- `allowsSyntheticReasoningContentForToolCalls` — allow a placeholder reasoning field when a prior assistant tool-call turn lacks provider reasoning content. Default: `true`; set `false` for providers that validate the exact reasoning value.\n- `requiresAssistantContentForToolCalls` — assistant tool-call turns must include non-empty text content (Kimi). Default: `false`.\n\nTool / message normalization:\n\n- `requiresToolResultName` — tool-result messages need a `name` field (Mistral). Default: auto.\n- `requiresAssistantAfterToolResult` — a user message after a tool result needs an assistant turn in between. Default: auto.\n- `requiresThinkingAsText` — convert thinking blocks to text wrapped in `<thinking>` delimiters (Mistral). Default: auto.\n- `requiresMistralToolIds` — normalize tool-call ids to exactly 9 alphanumeric chars. Default: auto.\n- `supportsStrictMode` — accept the per-tool `strict` field on tool schemas. Default: conservative auto-detect per provider/baseUrl.\n- `toolStrictMode` — `\"all_strict\"` forces strict on every tool, `\"none\"` forces it off; unset keeps the existing per-tool mixed behavior.\n\nGateway routing (only applied when `baseUrl` matches the gateway):\n\n- `openRouterRouting.only` / `openRouterRouting.order` — provider routing on `openrouter.ai` (see <https://openrouter.ai/docs/provider-routing>).\n- `vercelGatewayRouting.only` / `vercelGatewayRouting.order` — provider routing on `ai-gateway.vercel.sh` (see <https://vercel.com/docs/ai-gateway/models-and-providers/provider-options>).\n\nProvider-level `compat` is the baseline; per-model `compat` is deep-merged on top, with `openRouterRouting`, `vercelGatewayRouting`, and `extraBody` merged as nested objects.\n\n### Anthropic compatibility (`anthropic-messages`)\n\nFor `anthropic-messages` models the runtime uses a separate `AnthropicCompat` shape (`packages/ai/src/types.ts`). The `models.yml` schema currently exposes only the strict-tools opt-out as a top-level provider field (see below); the remaining Anthropic-side knobs (`disableAdaptiveThinking`, `supportsEagerToolInputStreaming`, `supportsLongCacheRetention`, `supportsMidConversationSystem`) are set by built-in catalog metadata and are not user-configurable from `models.yml`.\n\n### Strict tool schemas (`disableStrictTools`)\n\nAnthropic's API supports a `strict` field on tool definitions that forces the model to always follow the provided schema exactly. OMP enables it by default for a small allowlist of high-frequency built-in `anthropic-messages` tools (`bash`, `python`, `edit`, and `find`) whose schemas fit Anthropic's strict grammar limits; other tools still send normalized schemas but omit `strict`.\n\nThird-party providers that front the Anthropic API (AWS Bedrock, Azure, self-hosted proxies) do not always implement this field and will reject requests that include it. Set `disableStrictTools: true` at the provider level to opt out of strict mode for the allowlisted tools:\n\n```yaml\nproviders:\n bedrock-anthropic:\n baseUrl: https://bedrock-runtime.us-east-1.amazonaws.com/anthropic\n apiKey: AWS_BEARER_TOKEN\n api: anthropic-messages\n disableStrictTools: true\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Bedrock)\n input: [text, image]\n contextWindow: 200000\n maxTokens: 16384\n cost:\n input: 3.00\n output: 15.00\n cacheRead: 0.30\n cacheWrite: 3.75\n```\n\n`disableStrictTools` is a provider-level flag that applies to all models in the provider. It disables the Anthropic `strict` marker only for tools that OMP would otherwise mark strict; it does not change runtime tool argument validation. OMP can automatically retry without strict tools after Anthropic reports a strict-grammar-too-large error before the first streamed token, but proxies that reject the `strict` field for other reasons should set this flag explicitly.\n\nTool schemas going on the wire are normalized by the unified flow in\n`packages/ai/src/utils/schema/normalize.ts` (Google/CCA/MCP dispatchers\nplus the OpenAI strict-mode sanitize+enforce pipeline). See\n[`ai-schema-normalize.md`](./ai-schema-normalize.md) for the strict-mode\nedge cases (local `$ref` inlining, single-item `allOf` collapse,\n`anyOf`-wrapper description hoist, enum/const primitive-type inference)\nand the per-provider dispatcher mapping.\n\n## Practical examples\n\n### Local OpenAI-compatible endpoint (no auth)\n\n```yaml\nproviders:\n local-openai:\n baseUrl: http://127.0.0.1:8000/v1\n auth: none\n api: openai-completions\n models:\n - id: Qwen/Qwen2.5-Coder-32B-Instruct\n name: Qwen 2.5 Coder 32B (local)\n```\n\n### Hosted proxy with env-based key\n\n```yaml\nproviders:\n anthropic-proxy:\n baseUrl: https://proxy.example.com/anthropic\n apiKey: ANTHROPIC_PROXY_API_KEY\n api: anthropic-messages\n authHeader: true\n disableStrictTools: true # if the proxy doesn't support strict tool schemas\n models:\n - id: claude-sonnet-4-20250514\n name: Claude Sonnet 4 (Proxy)\n reasoning: true\n input: [text, image]\n```\n\n### Override built-in provider route + model metadata\n\n```yaml\nproviders:\n openrouter:\n baseUrl: https://my-proxy.example.com/v1\n headers:\n X-Team: platform\n modelOverrides:\n anthropic/claude-sonnet-4:\n name: Sonnet 4 (Corp)\n compat:\n openRouterRouting:\n only: [anthropic]\n```\n\n## Legacy consumer caveat\n\nMost model configuration now flows through `models.yml` via `ModelRegistry`. Explicit `.json` / `.jsonc` paths remain supported only when passed programmatically to `ModelRegistry`; the default user config is `~/.omp/agent/models.yml`.\n\n## Failure mode\n\nIf `models.yml` fails schema or validation checks:\n\n- registry keeps operating with built-in models\n- error is exposed via `ModelRegistry.getError()` and surfaced in UI/notifications\n",
34
34
  "natives-addon-loader-runtime.md": "# Natives Addon Loader Runtime\n\nThis document covers the runtime loader shipped by `@oh-my-pi/pi-natives`: how `native/index.js` decides which `.node` file to require, how compiled-binary embedded payloads are extracted, and what startup failures report.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/package.json`\n\n## Scope and responsibility\n\nThe loader is intentionally narrow:\n\n- Build a platform/CPU-aware candidate list for addon filenames and directories.\n- Treat an embedded-addon manifest as a compiled-binary signal when present.\n- Optionally materialize embedded addon archive contents into a versioned per-user cache directory.\n- On Windows `node_modules` installs, stage addon files into the versioned cache to avoid locked-DLL update failures.\n- Attempt candidates in deterministic order and return the first addon that `require(...)` loads and validates.\n\nFor install and compiled-binary paths, the loader verifies a release sentinel export named from `package.json#version` (for example `__piNativesV15_7_2`). Workspace-dev loads skip this validation so a local checkout can rebuild after a pull. The loader does not validate the full export surface; stale same-version or incomplete binaries still surface as missing members or native errors at use sites.\n\n## Runtime inputs and derived state\n\nAt module initialization, `native/index.js` computes:\n\n- **Platform tag**: `${process.platform}-${process.arch}` (for example `darwin-arm64`).\n- **Package version**: from `packages/natives/package.json`.\n- **Core directories**:\n - `leafPackageDir`: directory of the platform leaf package, resolved via `require.resolve(\"@oh-my-pi/pi-natives-<tag>/package.json\")`; `null` when no leaf is installed (e.g. local dev).\n - `nativeDir`: package-local `packages/natives/native`.\n - `execDir`: directory containing `process.execPath`.\n - `versionedDir`: `<getNativesDir()>/<packageVersion>`.\n - `userDataDir` fallback:\n - Windows: `%LOCALAPPDATA%/omp` or `%USERPROFILE%/AppData/Local/omp`.\n - Non-Windows: `~/.local/bin`.\n- **Natives cache root** (`getNativesDir()`):\n - if `$XDG_DATA_HOME/omp` exists, `$XDG_DATA_HOME/omp/natives`;\n - otherwise `~/.omp/natives`.\n- **Compiled-binary mode** (`detectCompiledBinary`): true if any of:\n - embedded-addon manifest is non-null,\n - `PI_COMPILED` env var is set,\n - `import.meta.url` contains Bun embedded markers (`$bunfs`, `~BUN`, `%7EBUN`).\n- **Windows staging mode** (`shouldStageNodeModulesAddon`): true only on Windows, in non-compiled mode, when `nativeDir` is inside `node_modules`.\n- **Variant override**: `PI_NATIVE_VARIANT` (`modern`/`baseline` only; invalid values ignored).\n- **Selected variant**: explicit override, otherwise runtime AVX2 detection on x64 (`modern` if AVX2, else `baseline`).\n\n## Platform support and tag resolution\n\n`SUPPORTED_PLATFORMS` is fixed to:\n\n- `linux-x64`\n- `linux-arm64`\n- `darwin-x64`\n- `darwin-arm64`\n- `win32-x64`\n\nUnsupported platforms are not rejected before probing. The loader first tries the computed candidate paths. If all fail and `platformTag` is unsupported, it throws an unsupported-platform error listing supported tags.\n\n## Variant selection (`modern` / `baseline` / default)\n\n### x64 behavior\n\n1. `PI_NATIVE_VARIANT=modern|baseline` wins when valid.\n2. Otherwise AVX2 support is detected:\n - Linux: scan `/proc/cpuinfo` for `avx2`.\n - macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`.\n - Windows: PowerShell `[System.Runtime.Intrinsics.X86.Avx2]::IsSupported`.\n3. AVX2 selects `modern`; unavailable or undetectable AVX2 selects `baseline`.\n\n### Non-x64 behavior\n\nNo variant suffix is used; the filename is `pi_natives.<platform>-<arch>.node`.\n\n### Filename construction\n\n`loader-state.js#getAddonFilenames` returns:\n\n- Non-x64 or no variant: `pi_natives.<tag>.node`\n- x64 + `modern`:\n 1. `pi_natives.<tag>-modern.node`\n 2. `pi_natives.<tag>-baseline.node`\n 3. `pi_natives.<tag>.node`\n- x64 + `baseline`:\n 1. `pi_natives.<tag>-baseline.node`\n 2. `pi_natives.<tag>.node`\n\nThe default unsuffixed fallback remains part of the x64 candidate list.\n\n## Candidate path construction and fallback ordering\n\n`resolveLoaderCandidates(...)` expands every filename across directories, then de-duplicates while preserving first occurrence order.\n\n### Non-compiled runtime\n\nFor each filename, candidates are, in order:\n\n1. `<leafPackageDir>/<filename>` (omitted when `leafPackageDir` is `null`)\n2. `<nativeDir>/<filename>`\n3. `<execDir>/<filename>`\n\nThe leaf package dir comes first so the optional-dependency binary published with the release is preferred over any `.node` left in the core package's `native/` (e.g. a stale local-dev build).\n\nOn Windows installs where `nativeDir` is inside a `node_modules` segment (`shouldStageNodeModulesAddon`), `<versionedDir>/<filename>` staging candidates are prepended ahead of the leaf candidates so a locked `node_modules` binary can be sidestepped during `bun install -g` updates. The staged file is copied from `leafPackageDir ?? nativeDir` before probing.\n\n### Compiled runtime\n\nFor each filename, candidates are:\n\n1. `<versionedDir>/<filename>`\n2. `<userDataDir>/<filename>`\n3. `<nativeDir>/<filename>`\n4. `<execDir>/<filename>`\n\nAt load time, an extracted embedded candidate, or a staged Windows candidate when no embedded candidate exists, is prepended ahead of these de-duplicated candidates.\n\n## Embedded addon extraction lifecycle\n\n`embedded-addon.js` is generated by `scripts/embed-native.ts`. The reset stub exports `embeddedAddon = null`. A populated manifest has:\n\n- `platformTag`\n- `version`\n- `archive`: `{ format: \"tar.gz\", filename, filePath }`\n- `files[]` entries with `variant`, `filename`, and `size`\n\nExtraction (`maybeExtractEmbeddedAddon`) runs only when:\n\n1. compiled-binary mode is true,\n2. `embeddedAddon` is non-null,\n3. manifest `platformTag` equals the runtime platform tag,\n4. manifest `version` equals the package version,\n5. a variant-appropriate embedded file exists.\n\nVariant file selection:\n\n- Non-x64: prefer `default`, then first available file.\n- x64 + `modern`: prefer `modern`, fallback to `baseline`.\n- x64 + `baseline`: require `baseline`.\n\nMaterialization:\n\n1. Ensure `<versionedDir>` exists.\n2. Select `<versionedDir>/<selected filename>`.\n3. If the current cached file exists and its size matches manifest metadata, reuse it.\n4. Otherwise extract `embeddedAddon.archive.filePath` into `<versionedDir>` using the manifest `files[]` allowlist.\n5. Verify the selected target by size and return it as the first candidate.\n\nArchive, directory, or write failures are appended to the loader error list; probing continues through normal candidates.\n\n## Lifecycle and state transitions\n\n```text\nInit\n -> Load package metadata and embedded-addon manifest\n -> Compute platform/version/variant/filenames/candidate paths\n -> (compiled + embedded manifest matches?)\n yes -> extract archive to versionedDir when needed (record errors, continue)\n no -> skip extraction\n -> (Windows non-compiled node_modules install and no embedded candidate?)\n yes -> stage leaf/core addon to versionedDir (record errors, continue)\n no -> skip staging\n -> For each runtime candidate in order:\n require(candidate)\n -> sentinel validation passes or is workspace-dev: return addon exports (READY)\n -> failure: record error, continue\n -> none loaded:\n if unsupported platform tag -> throw Unsupported platform\n else -> throw Failed to load (tried-path diagnostics + hints)\n```\n\n## Failure behavior and diagnostics\n\n### Unsupported platform\n\nIf all candidates fail and `platformTag` is not supported, the loader throws:\n\n- `Unsupported platform: <tag>`\n- supported platform list\n- issue-reporting guidance\n\n### No loadable candidate\n\nIf the platform is supported but no candidate can be loaded, the final error includes:\n\n- `Failed to load pi_natives native addon for <platformTag>` or `<platformTag> (<variant>)`\n- every attempted path with the corresponding `require(...)` or sentinel-validation error\n- mode-specific remediation hints\n\n### Compiled-binary startup failures\n\nCompiled mode diagnostics include:\n\n- expected versioned cache target paths (`<versionedDir>/<filename>`),\n- remediation to delete the versioned cache and rerun,\n- direct release download `curl` commands for each expected filename.\n- release sentinel mismatch details when a loadable `.node` belongs to another `@oh-my-pi/pi-natives` version.\n\n### Non-compiled startup failures\n\nNormal package/runtime diagnostics include:\n\n- reinstall hint (`bun install @oh-my-pi/pi-natives`),\n- local rebuild command (`bun --cwd=packages/natives run build`),\n- optional x64 variant build hint (`TARGET_VARIANT=baseline|modern bun --cwd=packages/natives run build`).\n",
35
35
  "natives-architecture.md": "# Natives Architecture\n\n`@oh-my-pi/pi-natives` is a two-layer package around an ESM loader:\n\n1. **ESM loader/package entrypoint** resolves and loads the correct `.node` addon with `createRequire`, validates the release sentinel outside workspace-dev loads, and re-exports generated classes/functions plus enum runtime objects as explicit named ESM exports.\n2. **Rust N-API module layer** implements the exported functions/classes and emits the generated TypeScript declarations.\n\nThis document is the foundation for deeper module-level docs.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/native/embedded-addon.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/embed-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n\n## Package entrypoint and public surface\n\n`packages/natives/package.json` points at generated native artifacts:\n\n- `main`: `./native/index.js`\n- `types`: `./native/index.d.ts`\n- `exports[\".\"].types`: `./native/index.d.ts`\n- `exports[\".\"].import`: `./native/index.js`\n\nThere is no current `packages/natives/src` TypeScript wrapper layer. Consumers import functions/classes/enums directly from `@oh-my-pi/pi-natives`; the type contract is the generated `native/index.d.ts` plus the explicit named exports generated into `native/index.js` by `scripts/gen-enums.ts`.\n\nCurrent capability groups in the generated API include:\n\n- **Search/text/code primitives**: `grep`, `search`, `hasMatch`, `fuzzyFind`, `glob`, `astGrep`, `astEdit`, `blockRangeAt`, `summarizeCode`, text width/slicing/wrapping/sanitization, syntax highlighting, token counting.\n- **Execution/process/terminal primitives**: `executeShell`, `Shell`, `PtySession`, `Process`, key parsing, bash fixups.\n- **System/media/isolation/conversion primitives**: clipboard, SIXEL encoding, HTML-to-Markdown, macOS appearance/power helpers, work profiling, workspace scanning, isolation backend helpers (`iso*`).\n\n## Loader layer\n\n`packages/natives/native/index.js` owns runtime addon selection and optional embedded extraction.\n\n### Candidate resolution model\n\n- Platform tag is `${process.platform}-${process.arch}`.\n- Supported tags are currently:\n - `linux-x64`\n - `linux-arm64`\n - `darwin-x64`\n - `darwin-arm64`\n - `win32-x64`\n- x64 can use CPU variants:\n - `modern` (AVX2-capable)\n - `baseline` (fallback)\n- Non-x64 uses the default filename without a variant suffix.\n\nFilename strategy:\n\n- Default: `pi_natives.<platform>-<arch>.node`\n- x64 variant: `pi_natives.<platform>-<arch>-modern.node` or `...-baseline.node`\n- x64 runtime fallback includes the unsuffixed default filename after variant candidates.\n\n### Platform-specific variant detection\n\nFor x64, variant selection uses:\n\n- Linux: `/proc/cpuinfo`\n- macOS: `sysctl -n machdep.cpu.leaf7_features`, then `machdep.cpu.features`\n- Windows: PowerShell check for `System.Runtime.Intrinsics.X86.Avx2`\n\n`PI_NATIVE_VARIANT` can force `modern` or `baseline`; invalid values are ignored.\n\n### Binary distribution and extraction model\n\nThe published `@oh-my-pi/pi-natives` package ships **only** the loader layer in `native/`: the ESM loader (`index.js`), generated declarations (`index.d.ts`), the `loader-state.js`/`.d.ts` helpers, and the embedded-addon manifest stub (`embedded-addon.js`). It carries no `.node` binaries.\n\nEach platform's prebuilt `.node` is published as a separate optional-dependency leaf package — `@oh-my-pi/pi-natives-<platform>-<arch>`, one per supported tag — which the core lists in `optionalDependencies` at the lockstep version during publish. npm/bun install only the leaf whose `os`/`cpu` match the host. The working-tree package keeps built `.node` files under `native/` for local dev; the release-publish rewrite (`prepareNativeCorePackage` in `scripts/ci-release-publish.ts`) strips them from the core tarball, and the leaves are generated by `packages/natives/scripts/gen-npm-packages.ts` (`LEAF_TARGETS`). Adding a build target therefore requires a matching `LEAF_TARGETS` entry, or the binary never reaches npm users.\n\nFor compiled binaries, loader behavior is:\n\n1. Check versioned user cache path: `<getNativesDir()>/<packageVersion>/...`.\n2. Check legacy compiled-binary location:\n - Windows: `%LOCALAPPDATA%/omp` (fallback `%USERPROFILE%/AppData/Local/omp`)\n - non-Windows: `~/.local/bin`\n3. Fall back to packaged `native/` and executable directory candidates.\n\n`getNativesDir()` uses `$XDG_DATA_HOME/omp/natives` when `$XDG_DATA_HOME/omp` exists; otherwise it uses `~/.omp/natives`.\n\nIf a populated embedded addon manifest is present, it is also treated as a compiled-binary signal. Current embedded manifests point at a gzip-compressed tar archive (`embedded-addons.<tag>.tar.gz`) that contains one or more matching `.node` files. The loader extracts the archive into the versioned cache directory, validates the selected file by size, and prepends that cache path before normal candidate probing.\n\nFor npm/bun installs (non-compiled), `loader-state.js` resolves the platform leaf directory via `require.resolve(\"@oh-my-pi/pi-natives-<tag>/package.json\")` and probes its `.node` **before** the core package's `native/` directory and the executable directory. The optional-dependency binary is therefore preferred over any `.node` left in the core (e.g. a stale local-dev build). On Windows `node_modules` installs, the loader first stages the selected leaf/core addon into `<getNativesDir()>/<packageVersion>/...` and prepends that staged path so running processes do not lock the `node_modules` copy during global updates.\n\n### Failure modes\n\nLoader failures are explicit:\n\n- **Unsupported platform tag**: after failed probing, throws with supported platform list.\n- **No loadable candidate**: throws with all attempted paths and remediation hints.\n- **Embedded/staging errors**: directory/write/archive/staging failures are recorded and included in final load diagnostics if no candidate loads.\n- **Release mismatch**: outside workspace-dev loads, a candidate that loads but lacks the version sentinel export for `package.json#version` is rejected with a reinstall hint.\n\n## Rust N-API module layer\n\n`crates/pi-natives/src/lib.rs` declares exported module ownership:\n\n- `appearance`\n- `ast`\n- `block`\n- `clipboard`\n- `fd`\n- `fs_cache`\n- `glob`\n- `glob_util`\n- `grep`\n- `highlight`\n- `html`\n- `iso`\n- `keys`\n- `language` (re-exported from `pi_ast`)\n- `power`\n- `prof`\n- `ps`\n- `pty`\n- `shell`\n- `sixel`\n- `summary`\n- `task`\n- `text`\n- `tokens`\n- `utils` (crate-private helpers)\n- `workspace`\n\nN-API exports are generated from Rust `#[napi]` functions/classes/objects/enums. Snake_case Rust names are exposed as camelCase JavaScript names unless explicitly configured by napi-rs.\n\n## Ownership boundaries\n\n- **Loader/package ownership (`packages/natives/native`, `packages/natives/scripts`)**\n - runtime binary selection\n - CPU variant selection and override handling\n - compiled-binary embedded archive extraction\n - Windows `node_modules` addon staging\n - generated TypeScript declarations and explicit ESM export/enum patching\n- **Rust ownership (`crates/pi-natives/src`)**\n - algorithmic and system-level implementation\n - platform-native behavior and performance-sensitive logic\n - N-API symbol implementation consumed directly by package callers\n- **Consumer ownership (`packages/coding-agent`, `packages/tui`)**\n - user-facing policy and fallbacks that are not built into the native API\n - higher-level rendering, artifact, shell-session, and command behavior\n\n## Runtime flow (high level)\n\n1. Consumer imports from `@oh-my-pi/pi-natives`.\n2. `native/index.js` computes platform/arch/variant and candidate paths.\n3. Optional embedded archive extraction or Windows `node_modules` staging can prepend a versioned-cache candidate.\n4. Each candidate is `require(...)`d; install/compiled loads must expose the package-version sentinel.\n5. The loaded addon object is bound to explicit named ESM exports, including generated enum objects.\n6. Caller invokes generated N-API functions/classes directly.\n\n## Glossary\n\n- **Native addon**: A `.node` binary loaded via Node-API (N-API).\n- **Platform tag**: Runtime tuple `platform-arch` (for example `darwin-arm64`).\n- **Platform leaf package**: Per-platform npm package `@oh-my-pi/pi-natives-<tag>` that carries one platform's prebuilt `.node`. The core depends on every leaf via `optionalDependencies`; the package manager installs only the host-matching one (`os`/`cpu`).\n- **Variant**: x64 CPU-specific build flavor (`modern` AVX2, `baseline` fallback).\n- **Generated binding declaration**: `native/index.d.ts` emitted by napi-rs during `build-native.ts`.\n- **Version sentinel**: Rust export named from the package version (for example `__piNativesV15_7_2`) that lets the loader reject a `.node` from a different release.\n- **Compiled binary mode**: Runtime mode where the CLI is bundled and native addons are resolved from embedded/cache paths before package-local paths.\n- **Embedded addon**: Build artifact metadata and archive reference generated into `native/embedded-addon.js` so compiled binaries can extract matching `.node` payloads.\n",
36
36
  "natives-binding-contract.md": "# Natives Binding Contract (JavaScript/TypeScript Side)\n\nThis document defines the JS/TS contract between `@oh-my-pi/pi-natives` callers and the loaded N-API addon.\n\nCurrent package shape is direct-to-native: there is no `packages/natives/src/<module>` TypeScript wrapper layer. The public API is the generated `packages/natives/native/index.d.ts` declaration file, the ESM loader/export wrapper in `packages/natives/native/index.js`, and the Rust `#[napi]` exports in `crates/pi-natives/src`.\n\n## Implementation files\n\n- `packages/natives/native/index.js`\n- `packages/natives/native/index.d.ts`\n- `packages/natives/native/loader-state.js`\n- `packages/natives/scripts/build-native.ts`\n- `packages/natives/scripts/gen-enums.ts`\n- `packages/natives/package.json`\n- `crates/pi-natives/src/lib.rs`\n- Rust modules under `crates/pi-natives/src/*.rs`\n\n## Contract model\n\nThe contract has three parts:\n\n1. **ESM runtime loader/export wrapper** (`native/index.js`)\n - calls `loadNative()` from `loader-state.js`, which `require(...)`s the `.node` addon;\n - binds generated classes/functions as explicit named ESM exports;\n - emits enum runtime objects generated by `scripts/gen-enums.ts`.\n2. **Generated TypeScript declarations** (`native/index.d.ts`)\n - generated by napi-rs during `scripts/build-native.ts`;\n - declares exported functions, classes, object interfaces, and native enums;\n - is the package `types` entry.\n3. **Rust N-API exports** (`crates/pi-natives/src`)\n - `#[napi]` functions/classes/objects/enums are the source of generated declarations and runtime symbols;\n - snake_case Rust names become camelCase JavaScript names by napi-rs convention.\n\nThere is no current `NativeBindings` declaration-merging lifecycle and no full required-export list in the loader. Install/compiled loads do validate the package-version sentinel export; workspace-dev loads skip that check.\n\n## Public export surface organization\n\n`packages/natives/package.json` exposes the package root only:\n\n```json\n{\n \"main\": \"./native/index.js\",\n \"types\": \"./native/index.d.ts\",\n \"exports\": {\n \".\": {\n \"types\": \"./native/index.d.ts\",\n \"import\": \"./native/index.js\"\n }\n }\n}\n```\n\nConsumers in `packages/coding-agent` and `packages/tui` import directly from `@oh-my-pi/pi-natives`.\n\n## JS API ↔ native export mapping (representative)\n\n| Category | Public JS API | Rust source | Return style |\n| ----------------- | --------------------------------------------------------------------------------------------------------- | ------------------------------------------------ | -------------------------- |\n| Grep | `grep(options, onMatch?)` | `grep.rs` | `Promise<GrepResult>` |\n| Grep | `search(content, options)` | `grep.rs` | `SearchResult` |\n| Grep | `hasMatch(content, pattern, ignoreCase?, multiline?)` | `grep.rs` | `boolean` |\n| Fuzzy path search | `fuzzyFind(options)` | `fd.rs` | `Promise<FuzzyFindResult>` |\n| Glob/workspace | `glob(options, onMatch?)`, `listWorkspace(options)` | `glob.rs`, `workspace.rs` | `Promise<...>` |\n| Glob cache | `invalidateFsScanCache(path?)` | `fs_cache.rs` | `void` |\n| AST/block/summary | `astGrep(options)`, `astEdit(options)`, `blockRangeAt(options)`, `summarizeCode(options)` | `ast.rs`, `block.rs`, `summary.rs` | mixed |\n| Shell | `executeShell(options, onChunk?)` | `shell.rs` | `Promise<ShellRunResult>` |\n| Shell | `new Shell(options?)`, `shell.run(...)`, `shell.abort()` | `shell.rs` | class / promises |\n| PTY | `new PtySession()`, `start/write/resize/kill` | `pty.rs` | class / promises |\n| Process | `Process.fromPid/fromPath`, `status/children/killTree/terminate/waitForExit` | `ps.rs` | class / mixed |\n| Keys | `parseKey`, `matchesKey`, Kitty/legacy helpers | `keys.rs` | sync |\n| Text | `wrapTextWithAnsi`, `truncateToWidth`, `sliceWithWidth`, `extractSegments`, `visibleWidth` | `text.rs` | sync |\n| Highlight | `highlightCode`, `supportsLanguage`, `getSupportedLanguages` | `highlight.rs` | sync |\n| HTML | `htmlToMarkdown(html, options?)` | `html.rs` | `Promise<string>` |\n| SIXEL | `encodeSixel` | `sixel.rs` | sync |\n| Clipboard | `copyToClipboard`, `readImageFromClipboard` | `clipboard.rs` | sync / promise |\n| Tokens | `countTokens(input, encoding?)` | `tokens.rs` | sync |\n| System/isolation | `detectMacOSAppearance`, `MacAppearanceObserver`, `MacOSPowerAssertion`, `getWorkProfile`, `iso*` helpers | `appearance.rs`, `power.rs`, `prof.rs`, `iso.rs` | mixed |\n\n## Sync vs async contract differences\n\nThe contract preserves Rust/N-API call style:\n\n- **Promise-returning exports** for worker-thread or async runtime work (`grep`, `glob`, `fuzzyFind`, `astGrep`, `astEdit`, `htmlToMarkdown`, shell/PTY runs, `isoStart`/`isoStop`/`isoDiff`, clipboard image read, workspace scan).\n- **Synchronous exports** for deterministic in-memory transforms/parsers or direct system calls (`search`, `hasMatch`, highlighting, text utilities, token counting, process construction/status, `copyToClipboard`, `encodeSixel`, isolation probe/resolve helpers).\n- **Constructor exports** for stateful runtime objects (`Shell`, `PtySession`, `Process`, macOS observer/power handles).\n\nChanging sync ↔ async for an existing export is a breaking public API change because consumers call these exports directly.\n\n## Object and enum typing patterns\n\n### Object patterns\n\n`#[napi(object)]` Rust structs become TS interfaces, for example:\n\n- `GrepResult`, `SearchResult`, `GlobResult`, `FuzzyFindResult`\n- `ShellRunResult`, `ShellExecuteResult`, `PtyRunResult`, `MinimizerResult`\n- `AstFindResult`, `AstReplaceResult`, `BlockRange`, `SummaryResult`\n- `System`/media/isolation payloads such as `ClipboardImage`, `WorkProfile`, `ParsedKittyResult`, `IsoResolveResult`\n\nRuntime shape correctness is owned by napi-rs and the Rust implementation.\n\n### Enum patterns\n\nNative enums are represented in generated declarations and also emitted as runtime objects by `scripts/gen-enums.ts`, because napi-rs string enums are TS-only without explicit JS exports. Current enum objects include:\n\n- `AstMatchStrictness`\n- `Ellipsis`\n- `Encoding`\n- `FileType`\n- `GrepOutputMode`\n- `IsoBackendKind`\n- `IsoChangeKind`\n- `KeyEventType`\n- `MacOSAppearance`\n- `ProcessStatus`\n\n## Error behavior and caveats\n\n- Addon load failure or unsupported platform throws during package import from `native/index.js`.\n- The loader rejects install/compiled candidates that lack the package-version sentinel export. It does not verify the full export set after `require(...)`; stale same-version or incomplete binaries surface as native load errors or missing members at use sites.\n- N-API conversion validates basic argument conversion, but TS optional fields do not guarantee semantic validity for untyped callers.\n- Numeric enum declarations do not prevent out-of-range numeric values from untyped callers unless the Rust function rejects them during conversion.\n- Callback exports use napi-rs `ThreadsafeFunction` shape: `(error: Error | null, value) => void`. Native code generally emits successful values; hard failures reject/throw through the owning call.\n\n## Maintainer checklist for binding changes\n\nWhen adding/changing an export, update all of:\n\n1. Rust `#[napi]` implementation in the owning `crates/pi-natives/src/<module>.rs`.\n2. `crates/pi-natives/src/lib.rs` if a new module is added.\n3. Any consumer imports/callsites in `packages/coding-agent` or `packages/tui`.\n4. Build output by running the natives build so `native/index.d.ts` and `native/index.js` stay in sync.\n5. `scripts/gen-enums.ts` if enum runtime export patching needs to change.\n\nDo not add a parallel TS wrapper convention unless the package design intentionally moves back to wrappers; current consumers depend on the direct generated API.\n",
@@ -52,7 +52,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
52
52
  "rulebook-matching-pipeline.md": "# Rulebook Matching Pipeline\n\nThis document describes how coding-agent discovers rules from supported config formats, normalizes them into a single `Rule` shape, resolves precedence conflicts, and splits the result into:\n\n- **Rulebook rules** (available to the model via system prompt + `rule://` URLs)\n- **TTSR rules** (time-travel stream interruption rules)\n\nIt reflects the current implementation, including partial semantics and metadata that is parsed but not enforced.\n\n## Implementation files\n\n- [`packages/coding-agent/src/capability/rule.ts`](../packages/coding-agent/src/capability/rule.ts)\n- [`packages/coding-agent/src/capability/rule-buckets.ts`](../packages/coding-agent/src/capability/rule-buckets.ts)\n- [`packages/coding-agent/src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`packages/coding-agent/src/discovery/index.ts`](../packages/coding-agent/src/discovery/index.ts)\n- [`packages/coding-agent/src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`packages/coding-agent/src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`packages/coding-agent/src/discovery/builtin-defaults.ts`](../packages/coding-agent/src/discovery/builtin-defaults.ts)\n- [`packages/coding-agent/src/discovery/agents.ts`](../packages/coding-agent/src/discovery/agents.ts)\n- [`packages/coding-agent/src/discovery/cursor.ts`](../packages/coding-agent/src/discovery/cursor.ts)\n- [`packages/coding-agent/src/discovery/windsurf.ts`](../packages/coding-agent/src/discovery/windsurf.ts)\n- [`packages/coding-agent/src/discovery/cline.ts`](../packages/coding-agent/src/discovery/cline.ts)\n- [`packages/coding-agent/src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`packages/coding-agent/src/system-prompt.ts`](../packages/coding-agent/src/system-prompt.ts)\n- [`packages/coding-agent/src/internal-urls/rule-protocol.ts`](../packages/coding-agent/src/internal-urls/rule-protocol.ts)\n- [`packages/utils/src/frontmatter.ts`](../packages/utils/src/frontmatter.ts)\n\n## 1. Canonical rule shape\n\nAll providers normalize source files into `Rule`:\n\n```ts\ninterface Rule {\n name: string;\n path: string;\n content: string;\n globs?: string[];\n alwaysApply?: boolean;\n description?: string;\n condition?: string[];\n scope?: string[];\n interruptMode?: \"never\" | \"prose-only\" | \"tool-only\" | \"always\";\n _source: SourceMeta;\n}\n```\n\nCapability identity is `rule.name` (`ruleCapability.key = rule => rule.name`).\n\nConsequence: precedence and deduplication are **name-based only**. Two different files with the same `name` are considered the same logical rule.\n\n## 2. Discovery sources and normalization\n\n`src/discovery/index.ts` auto-registers providers. For `rules`, current providers are:\n\n- `native` (priority `100`)\n- `agents` (priority `70`)\n- `cursor` (priority `50`)\n- `windsurf` (priority `50`)\n- `cline` (priority `40`)\n- `builtin-defaults` (priority `1`)\n\n### Native provider (`builtin.ts`)\n\nLoads `.omp` rules from:\n\n- project: `<cwd>/.omp/rules/*.{md,mdc}` when the cwd `.omp` directory exists\n- user: `~/.omp/agent/rules/*.{md,mdc}`\n- sticky user rule: `~/.omp/agent/RULES.md`\n- sticky project rule: nearest ancestor `.omp/RULES.md` while walking from cwd toward the repository root\n\nNormalization:\n\n- `name` = filename without `.md`/`.mdc`\n- frontmatter parsed via `parseFrontmatter`\n- `content` = body (frontmatter stripped)\n- `globs`, `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by `buildRuleFromMarkdown`\n- top-level `RULES.md` is synthesized as rule name `RULES` and forced to `alwaysApply: true`\n\nImportant caveat: `condition` values that look like file globs are converted into `tool:edit(...)` / `tool:write(...)` scope shorthands with catch-all condition `.*`.\n\n### Agents provider (`agents.ts`)\n\nLoads from both `.agent` and `.agents` directories:\n\n- project: walk upward from `cwd` to repo root, loading `<ancestor>/.agent/rules/*.{md,mdc}` and `<ancestor>/.agents/rules/*.{md,mdc}`\n- user: `~/.agent/rules/*.{md,mdc}` and `~/.agents/rules/*.{md,mdc}`\n\nNormalization uses the shared `buildRuleFromMarkdown` path: filename-derived name, stripped frontmatter body, and parsed `globs`, `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode`.\n\n### Cursor provider (`cursor.ts`)\n\nLoads from:\n\n- user: `~/.cursor/rules/*.{mdc,md}`\n- project: `<cwd>/.cursor/rules/*.{mdc,md}`\n\nNormalization (`transformMDCRule`):\n\n- `description`: kept only if string\n- `alwaysApply`: only `true` is preserved (`false` becomes `undefined`)\n- `globs`: accepts array (string elements only) or single string\n- `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` are parsed by shared rule helpers\n- `name` from filename without extension\n\n### Windsurf provider (`windsurf.ts`)\n\nLoads from:\n\n- user: `~/.codeium/windsurf/memories/global_rules.md` (fixed rule name `global_rules`)\n- project: `<cwd>/.windsurf/rules/*.md`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `global_rules` for the user global file and derived from filename for project rules\n\n### Cline provider (`cline.ts`)\n\nSearches upward from `cwd` for nearest `.clinerules`:\n\n- if directory: loads `*.md` inside it\n- if file: loads single file as rule named `clinerules`\n\nNormalization:\n\n- `globs`: array-of-string or single string\n- `alwaysApply`, `description`, `condition`/legacy `ttsr_trigger`, `scope`, and `interruptMode` parsed by shared rule helpers\n- `name` is fixed to `clinerules` for a `.clinerules` file and derived from filename for `.clinerules/*.md`\n\n## 3. Frontmatter parsing behavior and ambiguity\n\nAll providers use `parseFrontmatter` (`utils/frontmatter.ts`) with these semantics:\n\n1. Frontmatter is parsed only when content starts with `---` and has a closing `\\n---`.\n2. Body is trimmed after frontmatter extraction.\n3. If YAML parse fails:\n - warning is logged,\n - parser falls back to simple `key: value` line parsing (`^(\\w+):\\s*(.*)$`).\n\nAmbiguity consequences:\n\n- Fallback parser does not support arrays, nested objects, quoting rules, or hyphenated keys.\n- Fallback values become strings (for example `alwaysApply: true` becomes string `\"true\"`), so providers requiring boolean/string types may drop metadata.\n- `ttsr_trigger` works in fallback (underscore key); keys like `thinking-level` would not.\n- Files without valid frontmatter still load as rules with empty metadata and full content body.\n\n## 4. Provider precedence and deduplication\n\n`loadCapability(\"rules\")` (`capability/index.ts`) merges provider outputs and then deduplicates by `rule.name`.\n\n### Precedence model\n\n- Providers are ordered by priority descending.\n- Equal priority keeps registration order (`cursor` before `windsurf` from `discovery/index.ts`).\n- Dedup is first-wins: first encountered rule name is kept; later same-name items are marked `_shadowed` in `all` and excluded from `items`.\n\nEffective rule provider order is currently:\n\n1. `native` (100)\n2. `agents` (70)\n3. `cursor` (50)\n4. `windsurf` (50)\n5. `cline` (40)\n6. `builtin-defaults` (1)\n\n### Intra-provider ordering caveat\n\nWithin a provider, item order comes from `loadFilesFromDir` glob result ordering plus explicit push order. This is deterministic enough for normal use but not explicitly sorted in code.\n\nNotable source-order differences:\n\n- `native` appends project `.omp/rules`, user `~/.omp/agent/rules`, user `RULES.md`, then nearest project `RULES.md`.\n- `agents` appends project-walk `.agent`/`.agents` rule dirs before user home dirs.\n- `cursor` appends user then project results.\n- `windsurf` appends user `global_rules` first, then project rules.\n- `cline` loads only nearest `.clinerules` source.\n- `builtin-defaults` uses the embedded rule source order.\n\n## 5. Split into Rulebook, Always-Apply, and TTSR buckets\n\nAfter rule discovery in `createAgentSession` (`sdk.ts`), `bucketRules(...)` applies session-level filtering and bucket assignment:\n\n1. Drop rules listed in `ttsr.disabledRules`.\n2. Drop rules from the `builtin-defaults` provider when `ttsr.builtinRules === false`.\n3. Register rules with non-empty `condition` into `TtsrManager`; if registration succeeds, the rule is TTSR-only.\n4. Put remaining `alwaysApply === true` rules into `alwaysApplyRules`.\n5. Put remaining rules with `description` into `rulebookRules`.\n\n### Bucket behavior\n\n- **TTSR bucket**: any enabled rule with a non-empty parsed `condition` that `TtsrManager.addRule(...)` accepts. Takes priority over other buckets.\n- **Always-apply bucket**: `alwaysApply === true`, not TTSR. Full content injected into system prompt. Resolvable via `rule://`.\n- **Rulebook bucket**: must have description, must not be TTSR, must not be `alwaysApply`. Listed in system prompt by name+description; content read on demand via `rule://`.\n- A rule with both `condition` and `alwaysApply` goes to TTSR only if TTSR registration accepts it; otherwise it can fall through to always-apply.\n- A rule with both `alwaysApply` and `description` goes to always-apply only (not rulebook).\n\n## 6. How metadata affects runtime surfaces\n\n### `description`\n\n- Required for inclusion in rulebook.\n- Rendered in system prompt `<rules>` block.\n- Missing description means rule is not available via `rule://` and not listed in system prompt rules.\n\n### `globs`\n\n- Carried through on `Rule`.\n- Rendered as `<glob>...</glob>` entries in the system prompt rules block.\n- Exposed in rules UI state (`extensions` mode list).\n- Used by TTSR as a global path gate: if a TTSR rule has globs, the match context must include at least one matching file path.\n- Not used to automatically select rulebook rules for `rule://`; rulebook matching remains advisory prompt behavior.\n\n### `alwaysApply`\n\n- Parsed and preserved by providers.\n- Used in UI display (`\"always\"` trigger label in extensions state manager).\n- Used as an exclusion condition from `rulebookRules`.\n- **Full rule content is auto-injected into the system prompt** (before the rulebook rules section).\n- Rule is also addressable via `rule://<name>` for re-reading.\n\n### `condition`, `scope`, and `interruptMode`\n\n- `condition` is the current TTSR trigger field; legacy `ttsr_trigger` / `ttsrTrigger` are accepted as fallback inputs during parsing.\n- `scope` narrows TTSR matching scope. A condition token that looks like a file glob becomes `tool:edit(<glob>)` and `tool:write(<glob>)` scope entries plus catch-all condition `.*`.\n- `interruptMode` can override the global TTSR interrupt mode for the rule.\n\n## 7. System prompt inclusion path\n\n`buildSystemPromptInternal` receives both `rules` (rulebook) and `alwaysApplyRules`.\n\nAlways-apply rules are rendered first, injecting their raw content directly into the prompt.\n\nRulebook rules are rendered in a `# Rules` section with:\n\n- `Read rule://<name> when working in matching domain`\n- Each rule's `name`, `description`, and optional `<glob>` list\n\nThis is advisory/contextual: prompt text asks the model to read applicable rules, but code does not enforce glob applicability.\n\n## 8. `rule://` internal URL behavior\n\n`RuleProtocolHandler` is registered with:\n\n```ts\nnew RuleProtocolHandler({\n getRules: () => [...rulebookRules, ...alwaysApplyRules],\n});\n```\n\nImplications:\n\n- `rule://<name>` resolves against both **rulebookRules** and **alwaysApplyRules**.\n- TTSR-only rules and rules with no description and no `alwaysApply` are not addressable via `rule://`.\n- Resolution is exact name match.\n- Unknown names return error listing available rule names.\n- Returned content is raw `rule.content` (frontmatter stripped), content type `text/markdown`.\n\n## 9. Known partial / non-enforced semantics\n\n1. The rule providers currently loaded for `rules` are `native`, `agents`, `cursor`, `windsurf`, `cline`, and embedded `builtin-defaults`; provider files for other tools may parse other config formats but do not register rule loaders.\n2. `globs` metadata is surfaced to prompt/UI and is used as a global path gate for TTSR matching, but it is not used to automatically select rulebook rules for `rule://`.\n3. Rule selection for `rule://` includes rulebook and always-apply rules, but not TTSR-only rules.\n4. Discovery warnings (`loadCapability(\"rules\").warnings`) are produced but `createAgentSession` does not currently surface/log them in this path.\n",
53
53
  "sdk.md": "# SDK\n\nThe SDK is the in-process integration surface for `@oh-my-pi/pi-coding-agent`.\nUse it when you want direct access to agent state, event streaming, tool wiring, and session control from your own Bun/Node process.\n\nIf you need cross-language/process isolation, use RPC mode instead.\n\n## Installation\n\n```bash\nbun add @oh-my-pi/pi-coding-agent\n```\n\n## Entry points\n\n`@oh-my-pi/pi-coding-agent` exports the SDK APIs from the package root (and also via `@oh-my-pi/pi-coding-agent/sdk`).\n\nCore exports for embedders:\n\n- `createAgentSession`\n- `SessionManager`\n- `Settings`\n- `AuthStorage`\n- `ModelRegistry`\n- `discoverAuthStorage`\n- Discovery helpers (`discoverExtensions`, `discoverSkills`, `discoverContextFiles`, `discoverPromptTemplates`, `discoverSlashCommands`, `discoverCustomTSCommands`, `discoverMCPServers`)\n- Tool factory surface (`createTools`, `BUILTIN_TOOLS`, tool classes)\n\n## Quick start (auto-discovery defaults)\n\n```ts\nimport { createAgentSession } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session, modelFallbackMessage } = await createAgentSession();\n\nif (modelFallbackMessage) {\n process.stderr.write(`${modelFallbackMessage}\\n`);\n}\n\nconst unsubscribe = session.subscribe((event) => {\n if (\n event.type === \"message_update\" &&\n event.assistantMessageEvent.type === \"text_delta\"\n ) {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n});\n\nawait session.prompt(\"Summarize this repository in 3 bullets.\");\nunsubscribe();\nawait session.dispose();\n```\n\n## What `createAgentSession()` discovers by default\n\n`createAgentSession()` follows “provide to override, omit to discover”.\n\nIf omitted, it resolves:\n\n- `cwd`: `getProjectDir()`\n- `agentDir`: `~/.omp/agent` (via `getAgentDir()`)\n- `authStorage`: `discoverAuthStorage(agentDir)`\n- `modelRegistry`: `new ModelRegistry(authStorage)` + background `refreshInBackground()` when the registry is not provided\n- `settings`: `await Settings.init({ cwd, agentDir })`\n- `sessionManager`: `SessionManager.create(cwd)` (file-backed)\n- skills/context files/prompt templates/slash commands/extensions/custom TS commands\n- built-in tools via `createTools(...)`\n- MCP tools (enabled by default; Exa MCP servers are folded into native Exa integration, and browser automation MCP servers are filtered when the built-in browser tool is enabled)\n- LSP integration (enabled by default)\n- `eventBus`: new `EventBus()` unless supplied\n\n### Required vs optional inputs\n\nTypically you must provide only what you want to control:\n\n- **Must provide**: nothing for a minimal session\n- **Usually provide explicitly** in embedders:\n - `sessionManager` (if you need in-memory or custom location)\n - `authStorage` + `modelRegistry` (if you own credential/model lifecycle)\n - `model` or `modelPattern` (if deterministic model selection matters)\n - `settings` (if you need isolated/test config)\n\n## Session manager behavior (persistent vs in-memory)\n\n`AgentSession` always uses a `SessionManager`; behavior depends on which factory you use.\n\n### File-backed (default)\n\n```ts\nimport { createAgentSession, SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session } = await createAgentSession({\n sessionManager: SessionManager.create(process.cwd()),\n});\n\nconsole.log(session.sessionFile); // absolute .jsonl path\n```\n\n- Persists conversation/messages/state deltas to session files.\n- Supports resume/open/list/fork workflows.\n- `session.sessionFile` is defined.\n\n### In-memory\n\n```ts\nimport { createAgentSession, SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst { session } = await createAgentSession({\n sessionManager: SessionManager.inMemory(),\n});\n\nconsole.log(session.sessionFile); // undefined\n```\n\n- No filesystem persistence.\n- Useful for tests, ephemeral workers, request-scoped agents.\n- Session methods still work, but persistence-specific behaviors (file resume/fork paths) are naturally limited.\n\n### Resume/open/list helpers\n\n```ts\nimport { SessionManager } from \"@oh-my-pi/pi-coding-agent\";\n\nconst recent = await SessionManager.continueRecent(process.cwd());\nconst listed = await SessionManager.list(process.cwd());\nconst opened = listed[0] ? await SessionManager.open(listed[0].path) : null;\n```\n\n## Model and auth wiring\n\n`createAgentSession()` uses `ModelRegistry` + `AuthStorage` for model selection and API key resolution.\n\n### Explicit wiring\n\n```ts\nimport {\n createAgentSession,\n discoverAuthStorage,\n ModelRegistry,\n SessionManager,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nconst authStorage = await discoverAuthStorage();\nconst modelRegistry = new ModelRegistry(authStorage);\nawait modelRegistry.refresh();\n\nconst available = modelRegistry.getAvailable();\nif (available.length === 0)\n throw new Error(\"No authenticated models available\");\n\nconst { session } = await createAgentSession({\n authStorage,\n modelRegistry,\n model: available[0],\n thinkingLevel: \"medium\",\n sessionManager: SessionManager.inMemory(),\n});\n```\n\n### Selection order when `model` is omitted\n\nWhen no explicit `model`/`modelPattern` is provided:\n\n1. restore model from existing session (if restorable + key available)\n2. settings default model role (`default`)\n3. first available model with valid auth\n\nIf restore fails, `modelFallbackMessage` explains fallback.\n\n### Auth priority\n\n`AuthStorage.getApiKey(...)` resolves in this order:\n\n1. runtime override (`setRuntimeApiKey`, used by CLI `--api-key`)\n2. config-sourced API key override (`models.yml` provider `apiKey`)\n3. stored API-key credential in `agent.db` / broker-backed storage\n4. stored OAuth credential, including refresh when needed\n5. provider environment variables\n6. custom-provider resolver fallback\n\n## Event subscription model\n\nSubscribe with `session.subscribe(listener)`; it returns an unsubscribe function.\n\n```ts\nconst unsubscribe = session.subscribe((event) => {\n switch (event.type) {\n case \"agent_start\":\n case \"turn_start\":\n case \"tool_execution_start\":\n break;\n case \"message_update\":\n if (event.assistantMessageEvent.type === \"text_delta\") {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n break;\n }\n});\n```\n\n`AgentSessionEvent` includes core `AgentEvent` plus session-level events:\n\n- `auto_compaction_start` / `auto_compaction_end`\n- `auto_retry_start` / `auto_retry_end`\n- `retry_fallback_applied` / `retry_fallback_succeeded`\n- `ttsr_triggered`\n- `todo_reminder` / `todo_auto_clear`\n- `irc_message`\n\n## Prompt lifecycle\n\n`session.prompt(text, options?)` is the primary entry point.\n\nBehavior:\n\n1. optional command/template expansion (`/` commands, custom commands, file slash commands, prompt templates)\n2. if currently streaming:\n - requires `streamingBehavior: \"steer\" | \"followUp\"`\n - queues instead of throwing work away\n3. if idle:\n - validates model + API key\n - appends user message\n - starts agent turn\n\nRelated APIs:\n\n- `sendUserMessage(content, { deliverAs? })`\n- `steer(text, images?)`\n- `followUp(text, images?)`\n- `sendCustomMessage({ customType, content, ... }, { deliverAs?, triggerTurn? })`\n- `abort()`\n\n## Tools and extension integration\n\n### Built-ins and filtering\n\n- Built-ins come from `createTools(...)` and `BUILTIN_TOOLS`.\n- `toolNames` acts as an allowlist for built-ins.\n- `customTools` and extension-registered tools are still included.\n- Hidden tools (for example `yield`) are opt-in unless required by options.\n\n```ts\nconst { session } = await createAgentSession({\n toolNames: [\"read\", \"search\", \"find\", \"write\"],\n requireYieldTool: true,\n});\n```\n\n### Extensions\n\n- `extensions`: inline `ExtensionFactory[]`\n- `additionalExtensionPaths`: load extra extension files\n- `disableExtensionDiscovery`: disable automatic extension scanning\n- `preloadedExtensions`: reuse already loaded extension set\n\n### Runtime tool set changes\n\n`AgentSession` supports runtime activation updates:\n\n- `getActiveToolNames()`\n- `getAllToolNames()`\n- `setActiveToolsByName(names)`\n- `refreshMCPTools(mcpTools)`\n\nSystem prompt is rebuilt to reflect active tool changes.\n\n## Discovery helpers\n\nUse these when you want partial control without recreating internal discovery logic:\n\n- `discoverAuthStorage(agentDir?)`\n- `discoverExtensions(cwd?)`\n- `discoverSkills(cwd?, _agentDir?, settings?)`\n- `discoverContextFiles(cwd?, _agentDir?)`\n- `discoverPromptTemplates(cwd?, agentDir?)`\n- `discoverSlashCommands(cwd?)`\n- `discoverCustomTSCommands(cwd?, agentDir?)`\n- `discoverMCPServers(cwd?)`\n- `buildSystemPrompt(options?)`\n\n## Subagent-oriented options\n\nFor SDK consumers building orchestrators (similar to task executor flow):\n\n- `outputSchema`: passes structured output expectation into tool context\n- `requireYieldTool`: forces `yield` tool inclusion\n- `taskDepth`: recursion-depth context for nested task sessions\n- `parentTaskPrefix`: artifact naming prefix for nested task outputs\n\nThese are optional for normal single-agent embedding.\n\n## `createAgentSession()` return value\n\n```ts\ntype CreateAgentSessionResult = {\n session: AgentSession;\n extensionsResult: LoadExtensionsResult;\n setToolUIContext: (uiContext: ExtensionUIContext, hasUI: boolean) => void;\n mcpManager?: MCPManager;\n modelFallbackMessage?: string;\n lspServers?: Array<{\n name: string;\n status: \"ready\" | \"error\";\n fileTypes: string[];\n error?: string;\n }>;\n eventBus: EventBus;\n};\n```\n\nUse `setToolUIContext(...)` only if your embedder provides UI capabilities that tools/extensions should call into.\n\n## Startup performance\n\n`createAgentSession()` runs two background optimizations to overlap I/O with the rest of session setup:\n\n- **Model-host preconnect.** As soon as the model is resolved, the SDK fires a best-effort `fetch.preconnect(model.baseUrl)` so DNS + TCP + TLS + HTTP/2 to the provider's host happens in parallel with extension/skill load, tool registry build, and system-prompt assembly. The first real `fetch(...)` then reuses the warm connection, saving 100–300 ms on transcontinental hops (e.g. residential IP → `api.anthropic.com`). Implementation lives in `preconnectModelHost()` in `packages/coding-agent/src/sdk.ts`. If `fetch.preconnect` is unavailable (non-Bun runtime) or the call throws, the optimization is silently skipped — never a hard dependency. Applies to every mode (interactive, print, RPC, ACP).\n- **Conditional LSP warmup.** Startup LSP servers (those returned by `discoverStartupLspServers(cwd)`) are only warmed when **all** of these hold:\n - `enableLsp !== false` on the session options, **and**\n - `options.hasUI === true` (interactive TUI), **and**\n - the `lsp.diagnosticsOnWrite` setting is enabled.\n\n Print / script / RPC / ACP invocations (`hasUI=false`) skip the warmup entirely: they don't render the warmup status indicator and typically finish before the language servers would stabilize, so warming them just spends CPU parsing big `initialize` responses concurrently with the LLM stream consumer and jitters perceived latency. Tools that actually need an LSP server still spin one up on demand through `getOrCreateClient()` — only the _startup_ warmup is skipped. The returned `lspServers` field in `CreateAgentSessionResult` is therefore `undefined` (not an empty array) whenever the warmup branch was bypassed.\n\n## Minimal controlled embed example\n\n```ts\nimport {\n createAgentSession,\n discoverAuthStorage,\n ModelRegistry,\n SessionManager,\n Settings,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nconst authStorage = await discoverAuthStorage();\nconst modelRegistry = new ModelRegistry(authStorage);\nawait modelRegistry.refresh();\n\nconst settings = Settings.isolated({\n \"compaction.enabled\": true,\n \"retry.enabled\": true,\n});\n\nconst { session } = await createAgentSession({\n authStorage,\n modelRegistry,\n settings,\n sessionManager: SessionManager.inMemory(),\n toolNames: [\"read\", \"search\", \"find\", \"edit\", \"write\"],\n enableMCP: false,\n enableLsp: true,\n});\n\nsession.subscribe((event) => {\n if (\n event.type === \"message_update\" &&\n event.assistantMessageEvent.type === \"text_delta\"\n ) {\n process.stdout.write(event.assistantMessageEvent.delta);\n }\n});\n\nawait session.prompt(\"Find all TODO comments in this repo and propose fixes.\");\nawait session.dispose();\n```\n",
54
54
  "secrets.md": "# Secret Obfuscation\n\nPrevents sensitive values (API keys, tokens, passwords) from being sent to LLM providers. When enabled, secrets are replaced before outbound text content leaves the process. Reversible obfuscation placeholders are restored when session context is rebuilt for display or resume.\n\n## Enabling\n\nDisabled by default. Toggle via `/settings` UI or directly in `config.yml`:\n\n```yaml\nsecrets:\n enabled: true\n```\n\n## How it works\n\n1. On session startup, secrets are collected from two sources:\n - **Environment variables** whose names match common secret patterns (`KEY`, `SECRET`, `TOKEN`, `PASSWORD`, `PASS`, `AUTH`, `CREDENTIAL`, `PRIVATE`, `OAUTH`) with values >= 8 characters\n - **`secrets.yml` files** (see below)\n\n2. Outbound text messages to the LLM have secret values replaced with deterministic placeholders like `#AB12#`.\n\n3. Session context is deep-walked and obfuscation placeholders are restored when building display/resume context. Replace-mode substitutions are one-way and are not restored.\n\nTwo modes control what happens to each secret:\n\n| Mode | Behavior | Reversible |\n| --------------------- | ------------------------------------------------------- | -------------------------------------------- |\n| `obfuscate` (default) | Replaced with deterministic placeholder `#[A-Z0-9]{4}#` | Yes (deobfuscated in display/resume context) |\n| `replace` | Replaced with deterministic same-length string | No (one-way) |\n\n## secrets.yml\n\nDefine custom secret entries in YAML. Two locations are checked:\n\n| Level | Path | Purpose |\n| ------- | -------------------------- | --------------------------- |\n| Global | `~/.omp/agent/secrets.yml` | Secrets across all projects |\n| Project | `<cwd>/.omp/secrets.yml` | Project-specific secrets |\n\nProject entries override global entries with matching `content`.\n\n### Schema\n\nEach entry in the array has these fields:\n\n| Field | Type | Required | Description |\n| ------------- | ---------------------------- | -------- | ------------------------------------------------- |\n| `type` | `\"plain\"` or `\"regex\"` | Yes | Match strategy |\n| `content` | string | Yes | The secret value (plain) or regex pattern (regex) |\n| `mode` | `\"obfuscate\"` or `\"replace\"` | No | Default: `\"obfuscate\"` |\n| `replacement` | string | No | Custom replacement (replace mode only) |\n| `flags` | string | No | Regex flags (regex type only) |\n\n### Examples\n\n#### Plain secrets\n\n```yaml\n# Obfuscate a specific API key (default mode)\n- type: plain\n content: sk-proj-abc123def456\n\n# Replace a database password with a fixed string\n- type: plain\n content: hunter2\n mode: replace\n replacement: \"********\"\n```\n\n#### Regex secrets\n\n```yaml\n# Obfuscate any AWS-style key\n- type: regex\n content: \"AKIA[0-9A-Z]{16}\"\n\n# Case-insensitive match with explicit flags\n- type: regex\n content: \"api[_-]?key\\\\s*=\\\\s*\\\\w+\"\n flags: \"i\"\n\n# Regex literal syntax (pattern and flags in one string)\n- type: regex\n content: \"/bearer\\\\s+[a-zA-Z0-9._~+\\\\/=-]+/i\"\n```\n\nRegex entries always scan globally (the `g` flag is enforced automatically). The regex literal syntax `/pattern/flags` is supported as an alternative to separate `content` + `flags` fields. Escaped slashes within the pattern (`\\\\/`) are handled correctly.\n\n#### Replace mode with regex\n\n```yaml\n# One-way replace connection strings (not reversible)\n- type: regex\n content: \"postgres://[^\\\\s]+\"\n mode: replace\n replacement: \"postgres://***\"\n```\n\n## Interaction with env var detection\n\nEnvironment variables are collected first, then file-defined entries are appended. File entries can cover secrets that don't live in env vars (config files, hardcoded values, etc.). If the same plain value appears in both env and file entries, the env entry's obfuscate-mode mapping is used first.\n\n## Key files\n\n- `packages/coding-agent/src/secrets/index.ts` -- loading, merging, env var collection\n- `packages/coding-agent/src/secrets/obfuscator.ts` -- `SecretObfuscator` class, placeholder generation, message obfuscation\n- `packages/coding-agent/src/secrets/regex.ts` -- regex literal parsing and compilation\n- `packages/coding-agent/src/config/settings-schema.ts` -- `secrets.enabled` setting definition\n\n## See also\n\n- [`auth-broker-gateway.md`](./auth-broker-gateway.md) -- remote credential vault and forward-proxy that keep provider OAuth refresh tokens and access tokens off developer hosts entirely (complementary to in-process obfuscation).\n",
55
- "session-operations-export-share-fork-resume.md": "# Session Operations: export, dump, share, fork, resume/continue\n\nThis document describes operator-visible behavior for session export/share/fork/resume operations as currently implemented.\n\n## Implementation files\n\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/export/html/index.ts`](../packages/coding-agent/src/export/html/index.ts)\n- [`../src/export/custom-share.ts`](../packages/coding-agent/src/export/custom-share.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n\n## Operation matrix\n\n| Operation | Entry path | Session mutation | Session file creation/switch | Output artifact |\n| --------------------------------------- | ------------------------- | ------------------------------------- | ---------------------------------------------------------------------------------- | --------------------------------------------------------------- |\n| `/dump` | Interactive slash command | No | No | Clipboard text |\n| `/export [path]` | Interactive slash command | No | No | HTML file |\n| `--export <session.jsonl> [outputPath]` | CLI startup fast-path | No runtime session mutation | No active session; reads target file | HTML file |\n| `/share` | Interactive slash command | No | No | Temp HTML + share URL/gist |\n| `/fork` | Interactive slash command | Yes (active session identity changes) | Creates new session file and switches current session to it (persistent mode only) | Copies artifact directory to new session namespace when present |\n| `--fork <id\\|path>` | CLI startup | Yes after session creation | Creates a new session fork from the selected source into current cwd/session dir | None |\n| `/resume` | Interactive slash command | Yes (active in-memory state replaced) | Switches to selected existing session file | None |\n| `--resume` | CLI startup picker | Yes after session creation | Opens selected existing session file | None |\n| `--resume <id\\|path>` | CLI startup | Yes after session creation | Opens existing session; global cross-project match can fork into current project | None |\n| `--continue` | CLI startup | Yes after session creation | Opens terminal breadcrumb or most-recent session; creates new one if none exists | None |\n\n## Export and dump\n\n### `/export [outputPath]` (interactive)\n\nFlow:\n\n1. `InputController` routes `/export...` to `CommandController.handleExportCommand`.\n2. The command splits on whitespace and uses only the first argument after `/export` as `outputPath`.\n3. `AgentSession.exportToHtml()` calls `exportSessionToHtml(sessionManager, state, { outputPath, themeName })`.\n4. On success, UI shows path and opens the file in browser.\n\nBehavior details:\n\n- `--copy`, `clipboard`, and `copy` arguments are explicitly rejected with a warning to use `/dump`.\n- Export embeds session header/entries/leaf plus current `systemPrompt` and tool descriptions from agent state.\n- No session entries are appended during export.\n\nCaveat:\n\n- Argument parsing is whitespace-based (`text.split(/\\s+/)`), so quoted paths with spaces are not preserved as a single path by this command path.\n\n### `--export <inputSessionFile> [outputPath]` (CLI)\n\nFlow in `main.ts`:\n\n1. Handled early (before interactive/session startup).\n2. Calls `exportFromFile(inputPath, outputPath?)`.\n3. `SessionManager.open(inputPath)` loads entries, then HTML is generated and written.\n4. Process prints `Exported to: ...` and exits.\n\nBehavior details:\n\n- Missing input file surfaces as `File not found: <path>`.\n- This path does not create an `AgentSession` and does not mutate any running session.\n\n### `/dump` (interactive clipboard export)\n\nFlow:\n\n1. `CommandController.handleDumpCommand()` calls `session.formatSessionAsText()`.\n2. If empty string, reports `No messages to dump yet.`\n3. Otherwise copies to clipboard via native `copyToClipboard`.\n\nDump content includes:\n\n- System prompt\n- Active model/thinking level\n- Tool definitions + parameters\n- User/assistant messages\n- Thinking blocks and tool calls\n- Tool results and execution blocks (except `excludeFromContext` bash/python entries)\n- Custom/hook/file mention/branch summary/compaction summary entries\n\nNo session persistence changes are made by dumping.\n\n## Share\n\n`/share` is interactive-only and always starts by exporting current session to a temp HTML file.\n\n### Phase 1: temp export\n\n- Temp file path: `${os.tmpdir()}/${Snowflake.next()}.html`\n- Uses `session.exportToHtml(tmpFile)`\n- If export fails (notably in-memory sessions), share ends with error.\n\n### Phase 2: custom share handler (if present)\n\n`loadCustomShare()` checks `~/.omp/agent` for first existing candidate:\n\n- `share.ts`\n- `share.js`\n- `share.mjs`\n\nRequirements:\n\n- Module must default-export a function `(htmlPath) => Promise<CustomShareResult | string | undefined>`.\n\nIf present and valid:\n\n- UI enters `Sharing...` loader state.\n- Handler result interpretation:\n - string => treated as URL, shown and opened\n - object => `url` and/or `message` shown; `url` opened\n - `undefined`/falsy => generic `Session shared`\n- Temp file is removed after completion.\n\nCritical fallback behavior:\n\n- If custom handler exists but loading fails, command errors and returns.\n- If custom handler executes and throws, command errors and returns.\n- In both failure cases, it **does not** fall back to GitHub gist.\n- Gist fallback happens only when no custom share script exists.\n\n### Phase 3: default gist fallback\n\nOnly when no custom share handler is found:\n\n1. Validates `gh auth status`.\n2. Shows `Creating gist...` loader.\n3. Runs `gh gist create --public=false <tmpFile>`.\n4. Parses gist URL, derives gist id, builds preview URL `https://gistpreview.github.io/?<id>`.\n5. Shows both preview and gist URLs; opens preview.\n\nCancellation/abort semantics in share:\n\n- Loader has `onAbort` hook that restores editor UI and reports `Share cancelled`.\n- The underlying `gh gist create` command is not passed an abort signal in this code path; cancellation is UI-level and checked after command returns.\n\n## Fork\n\nInteractive `/fork` creates a new session from the current one and switches the active session identity.\n\n### Preconditions and immediate guards\n\n- If agent is streaming, `/fork` is rejected with warning.\n- UI status/loading indicators are cleared before operation.\n\n### Session-level flow\n\n`AgentSession.fork()`:\n\n1. Emits `session_before_switch` with `reason: \"fork\"` (cancellable).\n2. Flushes pending writes.\n3. Calls `SessionManager.fork()`.\n4. Copies artifacts directory from old session namespace to new namespace (best-effort; non-ENOENT copy failures are logged, not fatal).\n5. Updates `agent.sessionId`.\n6. Emits `session_switch` with `reason: \"fork\"`.\n\n`SessionManager.fork()` behavior:\n\n- Requires persistent mode and existing session file.\n- Creates new session id and new JSONL file path.\n- Rewrites header with:\n - new `id`\n - new timestamp\n - `cwd` unchanged\n - `parentSession` set to previous session id\n- Keeps all non-header entries unchanged in the new file.\n\n### Non-persistent behavior\n\n- In-memory session manager returns `undefined` from `fork()`.\n- `AgentSession.fork()` returns `false`.\n- UI reports `Fork failed (session not persisted or cancelled)`.\n\n### CLI `--fork <id|path>`\n\nStartup `--fork` is resolved before normal session creation:\n\n1. `--fork` is rejected with `--no-session`.\n2. Path-like values (`/`, `\\`, or `.jsonl`) call `SessionManager.forkFrom(path, cwd, sessionDir)`.\n3. Other values resolve via `resolveResumableSession(...)`: local sessions first, then global search when `sessionDir` is not forced. Matching accepts lowercased session id prefixes, full JSONL filename prefixes, and timestamp-stripped filename id suffixes.\n4. The forked file is created in the current cwd/session-dir scope and becomes the active session manager for startup.\n\n## Resume and continue\n\n## Interactive `/resume`\n\nFlow:\n\n1. Opens session selector populated via `SessionManager.list(currentCwd, currentSessionDir)`.\n2. On selection, `SelectorController.handleResumeSession(sessionPath)` calls `session.switchSession(sessionPath)`.\n3. UI clears/rebuilds chat and todos, then reports `Resumed session`.\n\nNotes:\n\n- This picker only lists sessions in the current session directory scope.\n- It does not use global cross-project search.\n\n## CLI `--resume`\n\n### `--resume` (no value)\n\n- `main.ts` lists sessions for current cwd/sessionDir and opens picker.\n- Selected path is opened with `SessionManager.open(selectedPath)` before session creation.\n\n### `--resume <value>`\n\n`createSessionManager()` resolution order:\n\n1. If value looks like path (`/`, `\\`, or `.jsonl`), open directly.\n2. Else `resolveResumableSession(...)` searches:\n - current scope (`SessionManager.list(cwd, sessionDir)`)\n - global sessions (`SessionManager.listAll()`) only when no explicit `sessionDir` was provided\n3. Matching accepts case-insensitive session id prefixes, full JSONL filename prefixes, and the id suffix after the timestamp in `<timestamp>_<sessionId>.jsonl`.\n\nCross-project id match behavior:\n\n- If matched session cwd differs from current cwd, CLI asks:\n - `Session found in different project ... Fork into current directory? [y/N]`\n- On yes: `SessionManager.forkFrom(match.path, cwd, sessionDir)` creates a new local forked file.\n- On no/non-TTY default: command errors.\n\n## CLI `--continue`\n\n`SessionManager.continueRecent(cwd, sessionDir)`:\n\n1. Resolves session dir for current cwd.\n2. Reads terminal-scoped breadcrumb first.\n3. Falls back to most recently modified session file.\n4. Opens found session; if none exists, creates new session.\n\nThis is startup-only behavior; there is no interactive `/continue` slash command.\n\n## How session switching actually mutates runtime state\n\n`AgentSession.switchSession(sessionPath)` does the runtime transition used by resume-like operations:\n\n1. Emit `session_before_switch` with `reason: \"resume\"` and `targetSessionFile` (cancellable).\n2. Disconnect agent event subscription and abort in-flight work.\n3. Flush current session manager writes.\n4. Capture rollback state for the current session, agent messages, queued steering/follow-up/next-turn messages, model/thinking/service-tier, MCP selections, tools, and system prompt.\n5. Clear queued steering/follow-up/next-turn messages.\n6. `sessionManager.setSessionFile(sessionPath)` and update `agent.sessionId`.\n7. Build session context from loaded entries.\n8. Restore MCP selections/tools/system prompt for the target session.\n9. Emit `session_switch` with `reason: \"resume\"`.\n10. Replace agent messages from context and sync todos.\n11. Close provider sessions when switching files, or when same-file reload changed replay messages.\n12. Restore model (if available in current registry).\n13. Restore or initialize thinking level and service tier.\n14. Reconnect agent event subscription.\n\nIf any step after the capture fails, `switchSession()` restores the captured state and reconnects the previous agent subscription before rethrowing.\n\nNo new session file is created by `switchSession()` itself.\n\n## Event emissions and cancellation points\n\n### Switch/fork lifecycle hooks\n\nFor `newSession`, `fork`, and `switchSession`:\n\n- Before event: `session_before_switch`\n - reasons: `new`, `fork`, `resume`\n - cancellable by returning `{ cancel: true }`\n- After event: `session_switch`\n - same reason set\n - includes `previousSessionFile`\n\n`ExtensionRunner.emit()` returns early on the first cancelling before-event result.\n\n### Custom tool `onSession` behavior\n\nSDK bridges extension session events to custom tool `onSession` callbacks:\n\n- `session_switch` -> `onSession({ reason: \"switch\", previousSessionFile })`\n- `session_branch` -> `reason: \"branch\"`\n- `session_start` -> `reason: \"start\"`\n- `session_tree` -> `reason: \"tree\"`\n- `session_shutdown` -> `reason: \"shutdown\"`\n\nThese callbacks are observational; they do not cancel switch/fork.\n\n### Other cancellation surfaces relevant to this doc\n\n- `/fork` is blocked while streaming (user must wait/abort current response first).\n- `/resume` selector can be cancelled by user closing selector.\n- Cross-project `--resume <id>` can be cancelled by declining fork prompt.\n- `/share` has UI abort path (`Share cancelled`) for gist flow; it does not wire process-kill semantics for `gh gist create` in this code path.\n\n## Non-persistent (in-memory) session behavior\n\nWhen session manager is created with `SessionManager.inMemory()` (`--no-session`):\n\n- Session file path is absent.\n- `/export` and `/share` fail with `Cannot export in-memory session to HTML` (propagated to command error UI).\n- `/fork` fails because `SessionManager.fork()` requires persistence.\n- `/dump` still works because it serializes in-memory agent state.\n- CLI resume/continue semantics are bypassed if `--no-session` is set, because manager creation returns in-memory immediately.\n\n## Known implementation caveats (as of current code)\n\n- `SelectorController.handleResumeSession()` does not check the boolean result from `session.switchSession(...)`; a hook-cancelled switch can still proceed through UI \"Resumed session\" repaint/status path.\n- `/share` custom-share failures do not degrade to default gist fallback; they terminate the command with error.\n- `/export` argument tokenization is simplistic and does not preserve quoted paths with spaces.\n",
55
+ "session-operations-export-share-fork-resume.md": "# Session Operations: export, dump, share, fresh, fork, resume/continue\n\nThis document describes operator-visible behavior for session export/share/fork/resume operations as currently implemented.\n\n## Implementation files\n\n- [`../src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/export/html/index.ts`](../packages/coding-agent/src/export/html/index.ts)\n- [`../src/export/custom-share.ts`](../packages/coding-agent/src/export/custom-share.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n\n## Operation matrix\n\n| Operation | Entry path | Session mutation | Session file creation/switch | Output artifact |\n| --------------------------------------- | ------------------------- | ------------------------------------- | ---------------------------------------------------------------------------------- | --------------------------------------------------------------- |\n| `/dump` | Interactive slash command | No | No | Clipboard text |\n| `/export [path]` | Interactive slash command | No | No | HTML file |\n| `--export <session.jsonl> [outputPath]` | CLI startup fast-path | No runtime session mutation | No active session; reads target file | HTML file |\n| `/share` | Interactive slash command | No | No | Temp HTML + share URL/gist |\n| `/fresh` | Interactive slash command | Yes (provider-facing in-memory id/state only) | No; keeps current session file/header | None |\n| `/fork` | Interactive slash command | Yes (active session identity changes) | Creates new session file and switches current session to it (persistent mode only) | Copies artifact directory to new session namespace when present |\n| `--fork <id\\|path>` | CLI startup | Yes after session creation | Creates a new session fork from the selected source into current cwd/session dir | None |\n| `/resume` | Interactive slash command | Yes (active in-memory state replaced) | Switches to selected existing session file | None |\n| `--resume` | CLI startup picker | Yes after session creation | Opens selected existing session file | None |\n| `--resume <id\\|path>` | CLI startup | Yes after session creation | Opens existing session; global cross-project match can fork into current project | None |\n| `--continue` | CLI startup | Yes after session creation | Opens terminal breadcrumb or most-recent session; creates new one if none exists | None |\n\n## Export and dump\n\n### `/export [outputPath]` (interactive)\n\nFlow:\n\n1. `InputController` routes `/export...` to `CommandController.handleExportCommand`.\n2. The command splits on whitespace and uses only the first argument after `/export` as `outputPath`.\n3. `AgentSession.exportToHtml()` calls `exportSessionToHtml(sessionManager, state, { outputPath, themeName })`.\n4. On success, UI shows path and opens the file in browser.\n\nBehavior details:\n\n- `--copy`, `clipboard`, and `copy` arguments are explicitly rejected with a warning to use `/dump`.\n- Export embeds session header/entries/leaf plus current `systemPrompt` and tool descriptions from agent state.\n- No session entries are appended during export.\n\nCaveat:\n\n- Argument parsing is whitespace-based (`text.split(/\\s+/)`), so quoted paths with spaces are not preserved as a single path by this command path.\n\n### `--export <inputSessionFile> [outputPath]` (CLI)\n\nFlow in `main.ts`:\n\n1. Handled early (before interactive/session startup).\n2. Calls `exportFromFile(inputPath, outputPath?)`.\n3. `SessionManager.open(inputPath)` loads entries, then HTML is generated and written.\n4. Process prints `Exported to: ...` and exits.\n\nBehavior details:\n\n- Missing input file surfaces as `File not found: <path>`.\n- This path does not create an `AgentSession` and does not mutate any running session.\n\n### `/dump` (interactive clipboard export)\n\nFlow:\n\n1. `CommandController.handleDumpCommand()` calls `session.formatSessionAsText()`.\n2. If empty string, reports `No messages to dump yet.`\n3. Otherwise copies to clipboard via native `copyToClipboard`.\n\nDump content includes:\n\n- System prompt\n- Active model/thinking level\n- Tool definitions + parameters\n- User/assistant messages\n- Thinking blocks and tool calls\n- Tool results and execution blocks (except `excludeFromContext` bash/python entries)\n- Custom/hook/file mention/branch summary/compaction summary entries\n\nNo session persistence changes are made by dumping.\n\n## Share\n\n`/share` is interactive-only and always starts by exporting current session to a temp HTML file.\n\n### Phase 1: temp export\n\n- Temp file path: `${os.tmpdir()}/${Snowflake.next()}.html`\n- Uses `session.exportToHtml(tmpFile)`\n- If export fails (notably in-memory sessions), share ends with error.\n\n### Phase 2: custom share handler (if present)\n\n`loadCustomShare()` checks `~/.omp/agent` for first existing candidate:\n\n- `share.ts`\n- `share.js`\n- `share.mjs`\n\nRequirements:\n\n- Module must default-export a function `(htmlPath) => Promise<CustomShareResult | string | undefined>`.\n\nIf present and valid:\n\n- UI enters `Sharing...` loader state.\n- Handler result interpretation:\n - string => treated as URL, shown and opened\n - object => `url` and/or `message` shown; `url` opened\n - `undefined`/falsy => generic `Session shared`\n- Temp file is removed after completion.\n\nCritical fallback behavior:\n\n- If custom handler exists but loading fails, command errors and returns.\n- If custom handler executes and throws, command errors and returns.\n- In both failure cases, it **does not** fall back to GitHub gist.\n- Gist fallback happens only when no custom share script exists.\n\n### Phase 3: default gist fallback\n\nOnly when no custom share handler is found:\n\n1. Validates `gh auth status`.\n2. Shows `Creating gist...` loader.\n3. Runs `gh gist create --public=false <tmpFile>`.\n4. Parses gist URL, derives gist id, builds preview URL `https://gistpreview.github.io/?<id>`.\n5. Shows both preview and gist URLs; opens preview.\n\nCancellation/abort semantics in share:\n\n- Loader has `onAbort` hook that restores editor UI and reports `Share cancelled`.\n- The underlying `gh gist create` command is not passed an abort signal in this code path; cancellation is UI-level and checked after command returns.\n\n## Fork\n\nInteractive `/fork` creates a new session from the current one and switches the active session identity.\n\n### Preconditions and immediate guards\n\n- If agent is streaming, `/fork` is rejected with warning.\n- UI status/loading indicators are cleared before operation.\n\n### Session-level flow\n\n`AgentSession.fork()`:\n\n1. Emits `session_before_switch` with `reason: \"fork\"` (cancellable).\n2. Flushes pending writes.\n3. Calls `SessionManager.fork()`.\n4. Copies artifacts directory from old session namespace to new namespace (best-effort; non-ENOENT copy failures are logged, not fatal).\n5. Updates `agent.sessionId`.\n6. Emits `session_switch` with `reason: \"fork\"`.\n\n`SessionManager.fork()` behavior:\n\n- Requires persistent mode and existing session file.\n- Creates new session id and new JSONL file path.\n- Rewrites header with:\n - new `id`\n - new timestamp\n - `cwd` unchanged\n - `parentSession` set to previous session id\n- Keeps all non-header entries unchanged in the new file.\n\n### Non-persistent behavior\n\n- In-memory session manager returns `undefined` from `fork()`.\n- `AgentSession.fork()` returns `false`.\n- UI reports `Fork failed (session not persisted or cancelled)`.\n\n### CLI `--fork <id|path>`\n\nStartup `--fork` is resolved before normal session creation:\n\n1. `--fork` is rejected with `--no-session`.\n2. Path-like values (`/`, `\\`, or `.jsonl`) call `SessionManager.forkFrom(path, cwd, sessionDir)`.\n3. Other values resolve via `resolveResumableSession(...)`: local sessions first, then global search when `sessionDir` is not forced. Matching accepts lowercased session id prefixes, full JSONL filename prefixes, and timestamp-stripped filename id suffixes.\n4. The forked file is created in the current cwd/session-dir scope and becomes the active session manager for startup.\n\n## Resume and continue\n\n## Interactive `/resume`\n\nFlow:\n\n1. Opens session selector populated via `SessionManager.list(currentCwd, currentSessionDir)`.\n2. On selection, `SelectorController.handleResumeSession(sessionPath)` calls `session.switchSession(sessionPath)`.\n3. UI clears/rebuilds chat and todos, then reports `Resumed session`.\n\nNotes:\n\n- This picker only lists sessions in the current session directory scope.\n- It does not use global cross-project search.\n\n## CLI `--resume`\n\n### `--resume` (no value)\n\n- `main.ts` lists sessions for current cwd/sessionDir and opens picker.\n- Selected path is opened with `SessionManager.open(selectedPath)` before session creation.\n\n### `--resume <value>`\n\n`createSessionManager()` resolution order:\n\n1. If value looks like path (`/`, `\\`, or `.jsonl`), open directly.\n2. Else `resolveResumableSession(...)` searches:\n - current scope (`SessionManager.list(cwd, sessionDir)`)\n - global sessions (`SessionManager.listAll()`) only when no explicit `sessionDir` was provided\n3. Matching accepts case-insensitive session id prefixes, full JSONL filename prefixes, and the id suffix after the timestamp in `<timestamp>_<sessionId>.jsonl`.\n\nCross-project id match behavior:\n\n- If matched session cwd differs from current cwd, CLI asks:\n - `Session found in different project ... Fork into current directory? [y/N]`\n- On yes: `SessionManager.forkFrom(match.path, cwd, sessionDir)` creates a new local forked file.\n- On no/non-TTY default: command errors.\n\n## CLI `--continue`\n\n`SessionManager.continueRecent(cwd, sessionDir)`:\n\n1. Resolves session dir for current cwd.\n2. Reads terminal-scoped breadcrumb first.\n3. Falls back to most recently modified session file.\n4. Opens found session; if none exists, creates new session.\n\nThis is startup-only behavior; there is no interactive `/continue` slash command.\n\n## How session switching actually mutates runtime state\n\n`AgentSession.switchSession(sessionPath)` does the runtime transition used by resume-like operations:\n\n1. Emit `session_before_switch` with `reason: \"resume\"` and `targetSessionFile` (cancellable).\n2. Disconnect agent event subscription and abort in-flight work.\n3. Flush current session manager writes.\n4. Capture rollback state for the current session, agent messages, queued steering/follow-up/next-turn messages, model/thinking/service-tier, MCP selections, tools, and system prompt.\n5. Clear queued steering/follow-up/next-turn messages.\n6. `sessionManager.setSessionFile(sessionPath)` and update `agent.sessionId`.\n7. Build session context from loaded entries.\n8. Restore MCP selections/tools/system prompt for the target session.\n9. Emit `session_switch` with `reason: \"resume\"`.\n10. Replace agent messages from context and sync todos.\n11. Close provider sessions when switching files, or when same-file reload changed replay messages.\n12. Restore model (if available in current registry).\n13. Restore or initialize thinking level and service tier.\n14. Reconnect agent event subscription.\n\nIf any step after the capture fails, `switchSession()` restores the captured state and reconnects the previous agent subscription before rethrowing.\n\nNo new session file is created by `switchSession()` itself.\n\n## Event emissions and cancellation points\n\n### Switch/fork lifecycle hooks\n\nFor `newSession`, `fork`, and `switchSession`:\n\n- Before event: `session_before_switch`\n - reasons: `new`, `fork`, `resume`\n - cancellable by returning `{ cancel: true }`\n- After event: `session_switch`\n - same reason set\n - includes `previousSessionFile`\n\n`ExtensionRunner.emit()` returns early on the first cancelling before-event result.\n\n### Custom tool `onSession` behavior\n\nSDK bridges extension session events to custom tool `onSession` callbacks:\n\n- `session_switch` -> `onSession({ reason: \"switch\", previousSessionFile })`\n- `session_branch` -> `reason: \"branch\"`\n- `session_start` -> `reason: \"start\"`\n- `session_tree` -> `reason: \"tree\"`\n- `session_shutdown` -> `reason: \"shutdown\"`\n\nThese callbacks are observational; they do not cancel switch/fork.\n\n### Other cancellation surfaces relevant to this doc\n\n- `/fork` is blocked while streaming (user must wait/abort current response first).\n- `/resume` selector can be cancelled by user closing selector.\n- Cross-project `--resume <id>` can be cancelled by declining fork prompt.\n- `/share` has UI abort path (`Share cancelled`) for gist flow; it does not wire process-kill semantics for `gh gist create` in this code path.\n\n## Non-persistent (in-memory) session behavior\n\nWhen session manager is created with `SessionManager.inMemory()` (`--no-session`):\n\n- Session file path is absent.\n- `/export` and `/share` fail with `Cannot export in-memory session to HTML` (propagated to command error UI).\n- `/fork` fails because `SessionManager.fork()` requires persistence.\n- `/dump` still works because it serializes in-memory agent state.\n- CLI resume/continue semantics are bypassed if `--no-session` is set, because manager creation returns in-memory immediately.\n\n## Known implementation caveats (as of current code)\n\n- `SelectorController.handleResumeSession()` does not check the boolean result from `session.switchSession(...)`; a hook-cancelled switch can still proceed through UI \"Resumed session\" repaint/status path.\n- `/share` custom-share failures do not degrade to default gist fallback; they terminate the command with error.\n- `/export` argument tokenization is simplistic and does not preserve quoted paths with spaces.\n",
56
56
  "session-switching-and-recent-listing.md": "# Session switching and recent session listing\n\nThis document describes how coding-agent discovers recent sessions, resolves `--resume` targets, presents session pickers, and switches the active runtime session.\n\nIt focuses on current implementation behavior, including fallback paths and caveats.\n\n## Implementation files\n\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/cli/session-picker.ts`](../packages/coding-agent/src/cli/session-picker.ts)\n- [`../src/modes/components/session-selector.ts`](../packages/coding-agent/src/modes/components/session-selector.ts)\n- [`../src/modes/controllers/selector-controller.ts`](../packages/coding-agent/src/modes/controllers/selector-controller.ts)\n- [`../src/main.ts`](../packages/coding-agent/src/main.ts)\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`../src/modes/utils/ui-helpers.ts`](../packages/coding-agent/src/modes/utils/ui-helpers.ts)\n\n## Recent-session discovery\n\n### Directory scope\n\n`SessionManager` stores sessions under a cwd-scoped directory by default:\n\n- `~/.omp/agent/sessions/--<cwd-encoded>--/*.jsonl`\n\n`SessionManager.list(cwd, sessionDir?)` reads only that directory unless an explicit `sessionDir` is provided.\n\n### Two listing paths with different payloads\n\nThere are two different listing pipelines:\n\n1. `getRecentSessions(sessionDir, limit)` (welcome/summary view)\n - Reads only a 4KB prefix (`readTextSlices(..., 4096, 0)[0]`) from each file.\n - Parses header + earliest user text preview.\n - Returns lightweight `RecentSessionInfo` with lazy `name` and `timeAgo` getters.\n - Sorts by file `mtime` descending.\n\n2. `SessionManager.list(...)` / `SessionManager.listAll()` (resume pickers and ID matching)\n - Reads a 4KB prefix plus a bounded 32 KiB tail in one `readTextSlices(...)` call per file, not the full JSONL file.\n - Builds `SessionInfo` objects (`id`, `cwd`, `title`, `messageCount`, `firstMessage`, `allMessagesText`, timestamps, lifecycle status).\n - Uses prefix parsing plus marker counting for list text, and tail parsing for the final-message lifecycle status; later messages beyond the prefix may not be present in `allMessagesText`.\n - Sorts by `modified` descending.\n\n### Metadata fallback behavior\n\nFor recent summaries (`RecentSessionInfo`):\n\n- display name preference: `header.title` -> first user prompt -> `header.id` -> filename\n- name is truncated to 40 chars for compact displays\n- control characters/newlines are stripped/sanitized from title-derived names\n\nFor `SessionInfo` list entries:\n\n- `title` is `header.title` or the last compaction `shortSummary` seen in the 4KB prefix\n- `firstMessage` is first user message text discoverable from the prefix or `\"(no messages)\"`\n\n## `--continue` resolution and terminal breadcrumb preference\n\n`SessionManager.continueRecent(cwd, sessionDir?)` resolves the target in this order:\n\n1. Read terminal-scoped breadcrumb (`~/.omp/agent/terminal-sessions/<terminal-id>`)\n2. Validate breadcrumb:\n - current terminal can be identified\n - breadcrumb cwd matches current cwd (resolved path compare)\n - referenced file still exists\n3. If breadcrumb is invalid/missing, fall back to newest file by mtime in the session dir (`findMostRecentSession`)\n4. If none found, create a new session\n\nTerminal ID derivation prefers TTY path and falls back to env-based identifiers (`TMUX_PANE`, `CMUX_SURFACE_ID`, `KITTY_WINDOW_ID`, `TERM_SESSION_ID`, `WT_SESSION`).\n\nBreadcrumb writes are best-effort and non-fatal.\n\n## Startup-time resume target resolution (`main.ts`)\n\n### `--resume <value>`\n\n`createSessionManager(...)` handles string-valued `--resume` in two modes:\n\n1. Path-like value (contains `/`, `\\\\`, or ends with `.jsonl`)\n - direct `SessionManager.open(sessionArg, parsed.sessionDir)`\n\n2. Resume key value\n - `resolveResumableSession(...)` searches local sessions first, then all sessions when `sessionDir` is not forced\n - matching is case-insensitive and accepts `id` prefix, full JSONL filename prefix, or the session-id suffix after the timestamp\n - first match in modified-descending order is used (no ambiguity prompt)\n\nCross-project match behavior:\n\n- if matched session cwd differs from current cwd, CLI prompts whether to fork into current project\n- yes -> `SessionManager.forkFrom(...)`\n- no -> throws error (`Session \"...\" is in another project (...)`)\n\nNo match -> throws error (`Session \"...\" not found.`).\n\n### `--resume` (no value)\n\nHandled after initial session-manager construction:\n\n1. list local sessions with `SessionManager.list(cwd, parsed.sessionDir)`\n2. if empty: print `No sessions found` and exit early\n3. open TUI picker (`selectSession`)\n4. if canceled: print `No session selected` and exit early\n5. if selected: `SessionManager.open(selectedPath)`\n\n### `--continue`\n\nUses `SessionManager.continueRecent(...)` directly (breadcrumb-first behavior above).\n\n## Picker-based selection internals\n\n## CLI picker (`src/cli/session-picker.ts`)\n\n`selectSession(sessions)` creates a standalone TUI with `SessionSelectorComponent` and resolves exactly once:\n\n- selection -> resolves selected path\n- cancel (Esc) -> resolves `null`\n- hard exit (Ctrl+C path) -> stops TUI and `process.exit(0)`\n\n## Interactive in-session picker (`SelectorController.showSessionSelector`)\n\nFlow:\n\n1. fetch sessions from current session dir via `SessionManager.list(currentCwd, currentSessionDir)`\n2. mount `SessionSelectorComponent` in editor area using `showSelector(...)`\n3. callbacks:\n - select -> close selector and call `handleResumeSession(sessionPath)`\n - cancel -> restore editor and rerender\n - exit -> `ctx.shutdown()`\n\n## Session selector component behavior\n\n`SessionList` supports:\n\n- arrow/page navigation\n- Enter to select\n- Delete to delete after confirmation\n- Esc to cancel\n- Ctrl+C to exit\n- fuzzy search across session id/title/cwd/first message/all messages/path\n\nEmpty-list render behavior:\n\n- renders `No sessions in current folder. Press Tab to view all.`\n- Enter/Delete on empty do nothing (no callback)\n- Esc/Ctrl+C still work\n\nCaveat: the empty-state UI mentions Tab, but this component currently has no Tab handler and current wiring only lists current-scope sessions.\n\n## Runtime switch execution (`AgentSession.switchSession`)\n\n`switchSession(sessionPath)` is the core in-process switch path.\n\nLifecycle/state transition:\n\n1. capture `previousSessionFile`\n2. emit `session_before_switch` hook event (`reason: \"resume\"`, cancellable)\n3. if canceled -> return `false` with no switch\n4. disconnect from current agent event stream\n5. abort active generation/tool flow\n6. clear queued steering/follow-up/next-turn message buffers\n7. flush session writer (`sessionManager.flush()`) to persist pending writes\n8. `sessionManager.setSessionFile(sessionPath)`\n - updates session file pointer\n - writes terminal breadcrumb\n - loads entries / migrates / blob-resolves / reindexes\n - if missing/invalid file data: initializes a new session at that path and rewrites header\n9. update `agent.sessionId`\n10. rebuild display context via `buildDisplaySessionContext()`\n11. restore persisted/discovered MCP tool selections and rebuild active tools/system prompt when discovery is enabled\n12. emit `session_switch` hook event (`reason: \"resume\"`, `previousSessionFile`)\n13. replace agent messages with rebuilt context and sync todos\n14. close provider sessions when switching to a different session or when same-session reload changed replay messages\n15. restore default model from `sessionContext.models.default` if available and present in model registry\n16. restore thinking level and service tier:\n - thinking uses persisted `thinking_level_change`, otherwise the configured default clamped to model capability\n - service tier uses persisted `service_tier_change`, otherwise the configured `serviceTier` setting (`\"none\"` becomes unset)\n17. reconnect agent listeners and return `true`\n\n## UI state rebuild after interactive switch\n\n`SelectorController.handleResumeSession` performs UI reset around `switchSession`:\n\n- stop loading animation\n- clear status container\n- clear pending-message UI and pending tool map\n- reset streaming component/message references\n- call `session.switchSession(...)`\n- clear chat container and rerender from session context (`renderInitialMessages`)\n- reload todos from new session artifacts\n- show `Resumed session`\n\nSo visible conversation/todo state is rebuilt from the new session file.\n\n## Startup resume vs in-session switch\n\n### Startup resume (`--continue`, `--resume`, direct open)\n\n- Session file is chosen before `createAgentSession(...)`.\n- `sdk.ts` builds `existingSession = sessionManager.buildSessionContext()`.\n- Agent messages are restored once during session creation.\n- Model/thinking are selected during creation (including restore/fallback logic).\n- Interactive mode then runs `#restoreModeFromSession()` to re-enter persisted mode state (currently plan/plan_paused).\n\n### In-session switch (`/resume`-style selector path)\n\n- Uses `AgentSession.switchSession(...)` on an already-running `AgentSession`.\n- Messages/model/thinking are rebuilt immediately in place.\n- Hook `session_before_switch`/`session_switch` events are emitted.\n- UI chat/todos are refreshed.\n- No dedicated post-switch mode restore call is made in selector flow; mode re-entry behavior is not symmetric with startup `#restoreModeFromSession()`.\n\n## Failure and edge-case behavior\n\n### Cancellation paths\n\n- CLI picker cancel -> returns `null`, caller prints `No session selected`, process exits early.\n- Interactive picker cancel -> editor restored, no session change.\n- Hook cancellation (`session_before_switch`) -> `switchSession()` returns `false`.\n\n### Empty list paths\n\n- CLI `--resume` (no value): empty list prints `No sessions found` and exits.\n- Interactive selector: empty list renders message and remains cancellable.\n\n### Missing/invalid target session file\n\nWhen opening/switching to a specific path (`setSessionFile`):\n\n- ENOENT -> treated as empty -> new session initialized at that exact path and persisted.\n- malformed/invalid header (or effectively unreadable parsed entries) -> treated as empty -> new session initialized and persisted.\n\nThis is recovery behavior, not hard failure.\n\n### Hard failures\n\nSwitch/open can still throw on true I/O failures (permission errors, rewrite failures, etc.), which propagate to callers.\n\n### ID prefix matching caveats\n\n- Matching uses `startsWith` on the lowercased session id, lowercased JSONL filename, and lowercased id suffix after the filename timestamp.\n- First match in modified-descending order wins; there is no ambiguity UI if multiple sessions share a prefix.\n- Prefix-listing metadata is intentionally lightweight, so search text may not include messages outside the first 4KB of the session file.\n",
57
57
  "session-tree-plan.md": "# Session tree architecture (current)\n\nReference: [session.md](../docs/session.md)\n\nThis document describes how session tree navigation works today: in-memory tree model, leaf movement rules, branching behavior, and extension/event integration.\n\n## What this subsystem is\n\nThe session is stored as an append-only entry log, but runtime behavior is tree-based:\n\n- Every non-header entry has `id` and `parentId`.\n- The active position is `leafId` in `SessionManager`.\n- Appending an entry always creates a child of the current leaf.\n- Branching does **not** rewrite history; it only changes where the leaf points before the next append.\n\nKey files:\n\n- `src/session/session-manager.ts` — tree data model, traversal, leaf movement, branch/session extraction\n- `src/session/agent-session.ts` — `/tree` navigation flow, summarization, hook/event emission\n- `src/modes/components/tree-selector.ts` — interactive tree UI behavior and filtering\n- `src/modes/controllers/selector-controller.ts` — selector orchestration for `/tree` and `/branch`\n- `src/modes/controllers/input-controller.ts` — command routing (`/tree`, `/branch`, double-escape behavior)\n- `src/session/messages.ts` — conversion of `branch_summary`, `compaction`, and `custom_message` entries into LLM context messages\n\n## Tree data model in `SessionManager`\n\nRuntime indices:\n\n- `#byId: Map<string, SessionEntry>` — fast lookup for any entry\n- `#leafId: string | null` — current position in the tree\n- `#labelsById: Map<string, string>` — resolved labels by target entry id\n\nTree APIs:\n\n- `getBranch(fromId?)` walks parent links to root and returns root→node path\n- `getTree()` returns `SessionTreeNode[]` (`entry`, `children`, `label`)\n - parent links become children arrays\n - entries with missing parents are treated as roots\n - children are sorted oldest→newest by timestamp\n- `getChildren(parentId)` returns direct children\n- `getLabel(id)` resolves current label from `labelsById`\n\n`getTree()` is a runtime projection; persistence remains append-only JSONL entries.\n\n## Leaf movement semantics\n\nThere are three leaf movement primitives:\n\n1. `branch(entryId)`\n - Validates entry exists\n - Sets `leafId = entryId`\n - No new entry is written\n\n2. `resetLeaf()`\n - Sets `leafId = null`\n - Next append creates a new root entry (`parentId = null`)\n\n3. `branchWithSummary(branchFromId, summary, details?, fromExtension?)`\n - Accepts `branchFromId: string | null`\n - Sets `leafId = branchFromId`\n - Appends a `branch_summary` entry as child of that leaf\n - When `branchFromId` is `null`, `fromId` is persisted as `\"root\"`\n\n## `/tree` navigation behavior (same session file)\n\n`AgentSession.navigateTree()` is navigation, not file forking.\n\nFlow:\n\n1. Validate target and compute abandoned path (`collectEntriesForBranchSummary`)\n2. Emit `session_before_tree` with `TreePreparation`\n3. Optionally summarize abandoned entries (hook-provided summary or built-in summarizer)\n4. Compute new leaf target:\n - selecting a **user** message: leaf moves to its parent, and message text is returned for editor prefill\n - selecting a **custom_message**: same rule as user message (leaf = parent, text prefills editor)\n - selecting any other entry: leaf = selected entry id\n5. Apply leaf move:\n - with summary: `branchWithSummary(newLeafId, ...)`\n - without summary and `newLeafId === null`: `resetLeaf()`\n - otherwise: `branch(newLeafId)`\n6. Rebuild agent context from new leaf and emit `session_tree`\n\nImportant: summary entries are attached at the **new navigation position**, not on the abandoned branch tail.\n\n## `/branch` behavior (new session file)\n\n`/branch` and `/tree` are intentionally different:\n\n- `/tree` navigates within the current session file.\n- `/branch` creates a new session branch file (or in-memory replacement for non-persistent mode).\n\nUser-facing `/branch` flow (`SelectorController.showUserMessageSelector` → `AgentSession.branch`):\n\n- Branch source must be a **user message**.\n- Selected user text is extracted for editor prefill.\n- If selected user message is root (`parentId === null`): start a new session via `newSession({ parentSession: previousSessionFile })`.\n- Otherwise: `createBranchedSession(selectedEntry.parentId)` to fork history up to the selected prompt boundary.\n\n`SessionManager.createBranchedSession(leafId)` specifics:\n\n- Builds root→leaf path via `getBranch(leafId)`; throws if missing.\n- Excludes existing `label` entries from copied path.\n- Rebuilds fresh label entries from resolved `labelsById` for entries that remain in path.\n- Persistent mode: writes new JSONL file and switches manager to it; returns new file path.\n- In-memory mode: replaces in-memory entries; returns `undefined`.\n\n## Context reconstruction and summary/custom integration\n\n`buildSessionContext()` (in `session-manager.ts`) resolves the active root→leaf path and builds effective LLM context state:\n\n- Tracks latest thinking/model/service-tier/mode/TTSR/MCP-selection state on path.\n- Handles latest compaction on path:\n - emits compaction summary first\n - replays kept messages from `firstKeptEntryId` to compaction point\n - then replays post-compaction messages\n- Includes `branch_summary` and `custom_message` entries as `AgentMessage` objects.\n\n`session/messages.ts` then maps these message types for model input:\n\n- `branchSummary` and `compactionSummary` become user-role templated context messages\n- `custom`/`hookMessage` become user-role content messages\n\nSo tree movement changes context by changing the active leaf path, not by mutating old entries.\n\n## Labels and tree UI behavior\n\nLabel persistence:\n\n- `appendLabelChange(targetId, label?)` writes `label` entries on the current leaf chain.\n- `labelsById` is updated immediately (set or delete).\n- `getTree()` resolves current label onto each returned node.\n\nTree selector behavior (`tree-selector.ts`):\n\n- Flattens tree for navigation, keeps active-path highlighting, and prioritizes displaying the active branch first.\n- Supports filter modes: `default`, `no-tools`, `user-only`, `labeled-only`, `all`.\n - `default` suppresses `label`, `custom`, `model_change`, and `thinking_level_change`; it is not a complete \"hide all internal entries\" filter.\n- Supports free-text search over rendered semantic content.\n- `Shift+L` opens inline label editing and writes via `appendLabelChange`.\n\nCommand routing:\n\n- `/tree` always opens tree selector.\n- `/branch` opens user-message selector unless `doubleEscapeAction=tree`, in which case it also uses tree selector UX.\n\n## Extension and hook touchpoints for tree operations\n\nCommand-time extension API (`ExtensionCommandContext`):\n\n- `branch(entryId)` — create branched session file\n- `navigateTree(targetId, { summarize? })` — move within current tree/file\n\nEvents around tree navigation:\n\n- `session_before_tree`\n - receives `TreePreparation`:\n - `targetId`\n - `oldLeafId`\n - `commonAncestorId`\n - `entriesToSummarize`\n - `userWantsSummary`\n - may cancel navigation\n - may provide summary payload used instead of built-in summarizer\n - receives abort `signal` (Escape cancellation path)\n- `session_tree`\n - emits `newLeafId`, `oldLeafId`\n - includes `summaryEntry` when a summary was created\n - `fromExtension` indicates summary origin\n\nAdjacent but related lifecycle hooks:\n\n- `session_before_branch` / `session_branch` for `/branch` flow\n- `session_before_compact`, `session.compacting`, `session_compact` for compaction entries that later affect tree-context reconstruction\n\n## Real constraints and edge conditions\n\n- `branch()` cannot target `null`; use `resetLeaf()` for root-before-first-entry state.\n- `branchWithSummary()` supports `null` target and records `fromId: \"root\"`.\n- Selecting current leaf in tree selector is a no-op.\n- Summarization requires an active model; if absent, summarize navigation fails fast.\n- If summarization is aborted, navigation is cancelled and leaf is unchanged.\n- In-memory sessions never return a branch file path from `createBranchedSession`.\n- Tree context reconstruction includes service-tier and MCP tool-selection state, but those entries do not become LLM messages.\n\n## Plan approval session naming\n\nWhen a user approves a plan from plan mode (`InteractiveMode.#approvePlan`), the approval handler seeds the session name from the plan's title so the resulting (fresh or compacted) session does not stay unnamed.\n\nTrigger:\n\n- Plan approval reaches `#approvePlan(...)` with `options.title` populated from the plan-approval details.\n- This runs for every approval choice (`Approve and execute`, `Approve and compact context`, plain `Approve`); the synthetic `plan-approved` prompt is what otherwise bypasses the input-controller's title-generation path.\n\nNaming source:\n\n- The normalized plan title is humanized via `humanizePlanTitle(title)` (`packages/coding-agent/src/plan-mode/approved-plan.ts`):\n - replaces runs of `-`/`_` with a single space\n - trims whitespace\n - capitalizes the first character\n - returns `\"\"` for whitespace-only / separator-only input\n- The humanized name is applied only when the current session has no name (`!sessionManager.getSessionName()`). It then calls `sessionManager.setSessionName(name, \"auto\")`, which also refuses to overwrite user-named sessions.\n- On successful apply, the terminal title (`setSessionTerminalTitle`) and the editor border color are refreshed to reflect the new name.\n\nExamples (from `humanizePlanTitle`):\n\n- `migrate-mcp-loader` → `Migrate mcp loader`\n- `fix_session_naming` → `Fix session naming`\n- `foo--bar__baz` → `Foo bar baz`\n- `RefactorRouter` → `RefactorRouter` (no separators to expand)\n- `\"\"` / `\"---\"` → `\"\"` (no name applied)\n\n## Legacy compatibility still present\n\nSession migrations still run on load:\n\n- v1→v2 adds `id`/`parentId` and converts compaction index anchor to id anchor\n- v2→v3 migrates legacy `hookMessage` role to `custom`\n\nCurrent runtime behavior is version-3 tree semantics after migration.\n",
58
58
  "session.md": "# Session Storage and Entry Model\n\nThis document is the source of truth for how coding-agent sessions are represented, persisted, migrated, and reconstructed at runtime.\n\n## Scope\n\nCovers:\n\n- Session JSONL format and versioning\n- Entry taxonomy and tree semantics (`id`/`parentId` + leaf pointer)\n- Migration/compatibility behavior when loading old or malformed files\n- Context reconstruction (`buildSessionContext`)\n- Persistence guarantees, failure behavior, truncation/blob externalization\n- Storage abstractions (`FileSessionStorage`, `MemorySessionStorage`) and related utilities\n\nDoes not cover `/tree` UI rendering behavior beyond semantics that affect session data.\n\n## Implementation Files\n\n- [`src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`src/session/messages.ts`](../packages/coding-agent/src/session/messages.ts)\n- [`src/session/session-storage.ts`](../packages/coding-agent/src/session/session-storage.ts)\n- [`src/session/history-storage.ts`](../packages/coding-agent/src/session/history-storage.ts)\n- [`src/session/blob-store.ts`](../packages/coding-agent/src/session/blob-store.ts)\n\n## On-Disk Layout\n\nDefault session file location:\n\n```text\n~/.omp/agent/sessions/--<cwd-encoded>--/<timestamp>_<sessionId>.jsonl\n```\n\n`<cwd-encoded>` is derived from the working directory by stripping leading slash and replacing `/`, `\\\\`, and `:` with `-`.\n\nBlob store location:\n\n```text\n~/.omp/agent/blobs/<sha256>\n```\n\nTerminal breadcrumb files are written under:\n\n```text\n~/.omp/agent/terminal-sessions/<terminal-id>\n```\n\nBreadcrumb content is two lines: original cwd, then session file path. `continueRecent()` prefers this terminal-scoped pointer before scanning most-recent mtime.\n\n## File Format\n\nSession files are JSONL: one JSON object per line.\n\n- Line 1 is always the session header (`type: \"session\"`).\n- Remaining lines are `SessionEntry` values.\n- Entries are append-only at runtime; branch navigation moves a pointer (`leafId`) rather than mutating existing entries.\n\n### Header (`SessionHeader`)\n\n```json\n{\n \"type\": \"session\",\n \"version\": 3,\n \"id\": \"1f9d2a6b9c0d1234\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\",\n \"cwd\": \"/work/pi\",\n \"title\": \"optional session title\",\n \"titleSource\": \"auto\",\n \"parentSession\": \"optional lineage marker\"\n}\n```\n\nNotes:\n\n- `version` is optional in v1 files; absence means v1.\n- `parentSession` is an opaque lineage string. Current code writes either a session id or a session path depending on flow (`fork`, `forkFrom`, `createBranchedSession`, or explicit `newSession({ parentSession })`). Treat as metadata, not a typed foreign key.\n\n### Entry Base (`SessionEntryBase`)\n\nAll non-header entries include:\n\n```json\n{\n \"type\": \"...\",\n \"id\": \"8-char-id\",\n \"parentId\": \"previous-or-branch-parent\",\n \"timestamp\": \"2026-02-16T10:20:30.000Z\"\n}\n```\n\n`parentId` can be `null` for a root entry (first append, or after `resetLeaf()`).\n\n## Entry Taxonomy\n\n`SessionEntry` is the union of:\n\n- `message`\n- `thinking_level_change`\n- `model_change`\n- `service_tier_change`\n- `compaction`\n- `branch_summary`\n- `custom`\n- `custom_message`\n- `label`\n- `ttsr_injection`\n- `session_init`\n- `mode_change`\n- `mcp_tool_selection`\n\n### `message`\n\nStores an `AgentMessage` directly.\n\n```json\n{\n \"type\": \"message\",\n \"id\": \"a1b2c3d4\",\n \"parentId\": null,\n \"timestamp\": \"2026-02-16T10:21:00.000Z\",\n \"message\": {\n \"role\": \"assistant\",\n \"provider\": \"anthropic\",\n \"model\": \"claude-sonnet-4-5\",\n \"content\": [{ \"type\": \"text\", \"text\": \"Done.\" }],\n \"usage\": {\n \"input\": 100,\n \"output\": 20,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"cost\": {\n \"input\": 0,\n \"output\": 0,\n \"cacheRead\": 0,\n \"cacheWrite\": 0,\n \"total\": 0\n }\n },\n \"timestamp\": 1760000000000\n }\n}\n```\n\n### `model_change`\n\n```json\n{\n \"type\": \"model_change\",\n \"id\": \"b1c2d3e4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:21:30.000Z\",\n \"model\": \"openai/gpt-4o\",\n \"role\": \"default\"\n}\n```\n\n`role` is optional; missing is treated as `default` in context reconstruction.\n\n### `service_tier_change`\n\n```json\n{\n \"type\": \"service_tier_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:21:45.000Z\",\n \"serviceTier\": \"flex\"\n}\n```\n\n`serviceTier` can also be `null`.\n\n### `thinking_level_change`\n\n```json\n{\n \"type\": \"thinking_level_change\",\n \"id\": \"c1d2e3f4\",\n \"parentId\": \"b1c2d3e4\",\n \"timestamp\": \"2026-02-16T10:22:00.000Z\",\n \"thinkingLevel\": \"high\"\n}\n```\n\n### `compaction`\n\n```json\n{\n \"type\": \"compaction\",\n \"id\": \"d1e2f3a4\",\n \"parentId\": \"c1d2e3f4\",\n \"timestamp\": \"2026-02-16T10:23:00.000Z\",\n \"summary\": \"Conversation summary\",\n \"shortSummary\": \"Short recap\",\n \"firstKeptEntryId\": \"a1b2c3d4\",\n \"tokensBefore\": 42000,\n \"details\": { \"readFiles\": [\"src/a.ts\"] },\n \"preserveData\": { \"hookState\": true },\n \"fromExtension\": false\n}\n```\n\n### `branch_summary`\n\n```json\n{\n \"type\": \"branch_summary\",\n \"id\": \"e1f2a3b4\",\n \"parentId\": \"a1b2c3d4\",\n \"timestamp\": \"2026-02-16T10:24:00.000Z\",\n \"fromId\": \"a1b2c3d4\",\n \"summary\": \"Summary of abandoned path\",\n \"details\": { \"note\": \"optional\" },\n \"fromExtension\": true\n}\n```\n\nIf branching from root (`branchFromId === null`), `fromId` is the literal string `\"root\"`.\n\n### `custom`\n\nExtension state persistence; ignored by `buildSessionContext`.\n\n```json\n{\n \"type\": \"custom\",\n \"id\": \"f1a2b3c4\",\n \"parentId\": \"e1f2a3b4\",\n \"timestamp\": \"2026-02-16T10:25:00.000Z\",\n \"customType\": \"my-extension\",\n \"data\": { \"state\": 1 }\n}\n```\n\n### `custom_message`\n\nExtension-provided message that does participate in LLM context. `content` can be a string or text/image content blocks, and `attribution` records whether the user or agent initiated it.\n\n```json\n{\n \"type\": \"custom_message\",\n \"id\": \"a2b3c4d5\",\n \"parentId\": \"f1a2b3c4\",\n \"timestamp\": \"2026-02-16T10:26:00.000Z\",\n \"customType\": \"my-extension\",\n \"content\": \"Injected context\",\n \"display\": true,\n \"details\": { \"debug\": false },\n \"attribution\": \"agent\"\n}\n```\n\n### `label`\n\n```json\n{\n \"type\": \"label\",\n \"id\": \"b2c3d4e5\",\n \"parentId\": \"a2b3c4d5\",\n \"timestamp\": \"2026-02-16T10:27:00.000Z\",\n \"targetId\": \"a1b2c3d4\",\n \"label\": \"checkpoint\"\n}\n```\n\n`label: undefined` clears a label for `targetId`.\n\n### `ttsr_injection`\n\n```json\n{\n \"type\": \"ttsr_injection\",\n \"id\": \"c2d3e4f5\",\n \"parentId\": \"b2c3d4e5\",\n \"timestamp\": \"2026-02-16T10:28:00.000Z\",\n \"injectedRules\": [\"ruleA\", \"ruleB\"]\n}\n```\n\n### `mcp_tool_selection`\n\n```json\n{\n \"type\": \"mcp_tool_selection\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:28:30.000Z\",\n \"selectedToolNames\": [\"server.tool\"]\n}\n```\n\n### `session_init`\n\n```json\n{\n \"type\": \"session_init\",\n \"id\": \"d2e3f4a5\",\n \"parentId\": \"c2d3e4f5\",\n \"timestamp\": \"2026-02-16T10:29:00.000Z\",\n \"systemPrompt\": \"...\",\n \"task\": \"...\",\n \"tools\": [\"read\", \"edit\"],\n \"outputSchema\": { \"type\": \"object\" }\n}\n```\n\n### `mode_change`\n\n```json\n{\n \"type\": \"mode_change\",\n \"id\": \"e2f3a4b5\",\n \"parentId\": \"d2e3f4a5\",\n \"timestamp\": \"2026-02-16T10:30:00.000Z\",\n \"mode\": \"plan\",\n \"data\": { \"planFile\": \"/tmp/plan.md\" }\n}\n```\n\n## Versioning and Migration\n\nCurrent session version: `3`.\n\n### v1 -> v2\n\nApplied when header `version` is missing or `< 2`:\n\n- Adds `id` and `parentId` to each non-header entry.\n- Reconstructs a linear parent chain using file order.\n- Migrates compaction field `firstKeptEntryIndex` -> `firstKeptEntryId` when present.\n- Sets header `version = 2`.\n\n### v2 -> v3\n\nApplied when header `version < 3`:\n\n- For `message` entries: rewrites legacy `message.role === \"hookMessage\"` to `\"custom\"`.\n- Sets header `version = 3`.\n\n### Migration Trigger and Persistence\n\n- Migrations run during session load (`setSessionFile`).\n- If any migration ran, the entire file is rewritten to disk immediately.\n- Migration mutates in-memory entries first, then persists rewritten JSONL.\n\n## Load and Compatibility Behavior\n\n`loadEntriesFromFile(path)` behavior:\n\n- Missing file (`ENOENT`) -> returns `[]`.\n- Non-parseable lines are handled by lenient JSONL parser (`parseJsonlLenient`).\n- If first parsed entry is not a valid session header (`type !== \"session\"` or missing string `id`) -> returns `[]`.\n\n`SessionManager.setSessionFile()` behavior:\n\n- `[]` from loader is treated as empty/nonexistent session and replaced with a new initialized session file at that path.\n- Valid files are loaded, migrated if needed, blob refs resolved, then indexed.\n\n## Tree and Leaf Semantics\n\nThe underlying model is append-only tree + mutable leaf pointer:\n\n- Every append method creates exactly one new entry whose `parentId` is current `leafId`.\n- The new entry becomes the new `leafId`.\n- `branch(entryId)` moves only `leafId`; existing entries remain unchanged.\n- `resetLeaf()` sets `leafId = null`; next append creates a new root entry (`parentId: null`).\n- `branchWithSummary()` sets leaf to branch target and appends a `branch_summary` entry.\n\n`getEntries()` returns all non-header entries in insertion order. Existing entries are not deleted in normal operation; rewrites preserve logical history while updating representation (migrations, move, targeted rewrite helpers).\n\n## Context Reconstruction (`buildSessionContext`)\n\n`buildSessionContext(entries, leafId, byId?)` resolves what is sent to the model.\n\nAlgorithm:\n\n1. Determine leaf:\n - `leafId === null` -> return empty context.\n - explicit `leafId` -> use that entry if found.\n - otherwise fallback to last entry.\n2. Walk `parentId` chain from leaf to root and reverse to root->leaf path.\n3. Derive runtime state across path:\n - `thinkingLevel` from latest `thinking_level_change` (default `\"off\"`)\n - `serviceTier` from latest `service_tier_change`\n - model map from `model_change` entries (`role ?? \"default\"`)\n - fallback `models.default` from assistant message provider/model if no explicit model change\n - deduplicated `injectedTtsrRules` from all `ttsr_injection` entries\n - selected MCP discovery tools from latest `mcp_tool_selection`\n - mode/modeData from latest `mode_change` (default mode `\"none\"`)\n4. Build message list:\n - `message` entries pass through\n - `custom_message` entries become `custom` AgentMessages via `createCustomMessage`\n - `branch_summary` entries become `branchSummary` AgentMessages via `createBranchSummaryMessage`\n - if a `compaction` exists on path:\n - emit compaction summary first (`createCompactionSummaryMessage`)\n - emit path entries starting at `firstKeptEntryId` up to the compaction boundary\n - emit entries after the compaction boundary\n\n`custom`, `session_init`, `service_tier_change`, `mcp_tool_selection`, and `ttsr_injection` entries do not inject model context directly.\n\n## Persistence Guarantees and Failure Model\n\n### Persist vs in-memory\n\n- `SessionManager.create/open/continueRecent/forkFrom` -> persistent mode (`persist = true`).\n- `SessionManager.inMemory` -> non-persistent mode (`persist = false`) with `MemorySessionStorage`.\n\n### Write pipeline\n\nWrites are serialized through an internal promise chain (`#persistChain`) and `NdjsonFileWriter`.\n\n- `append*` updates in-memory state immediately.\n- Persistence is deferred until at least one assistant message exists.\n - Before first assistant: entries are retained in memory; no file append occurs.\n - When first assistant exists: full in-memory session is flushed to file.\n - Afterwards: new entries append incrementally.\n\nRationale in code: avoid persisting sessions that never produced an assistant response.\n\n### Durability operations\n\n- `flush()` flushes writer and calls `fsync()`.\n- Atomic full rewrites (`#rewriteFile`) write to temp file, flush+fsync, close, then rename over target.\n- Used for migrations, `setSessionName`, `rewriteEntries`, move operations, and tool-call arg rewrites.\n\n### Error behavior\n\n- Persistence errors are latched (`#persistError`) and rethrown on subsequent operations.\n- First error is logged once with session file context.\n- Writer close is best-effort but propagates the first meaningful error.\n\n## Data Size Controls and Blob Externalization\n\nBefore persisting entries:\n\n- Large strings are truncated to `MAX_PERSIST_CHARS` (500,000 chars) with notice:\n - `\"[Session persistence truncated large content]\"`\n- Transient fields `partialJson` and `jsonlEvents` are removed.\n- If object has both `content` and `lineCount`, line count is recomputed after truncation.\n- Image blocks in `content` arrays with base64 length >= 1024 are externalized to blob refs:\n - stored as `blob:sha256:<hash>`\n - raw bytes written to blob store (`BlobStore.put`)\n\nOn load, blob refs are resolved back to base64 for message/custom_message image blocks.\n\n## Storage Abstractions\n\n`SessionStorage` interface provides all filesystem operations used by `SessionManager`:\n\n- sync: `ensureDirSync`, `existsSync`, `writeTextSync`, `statSync`, `listFilesSync`\n- async: `exists`, `readText`, `readTextSlices`, `writeText`, `rename`, `unlink`, `openWriter`\n\nImplementations:\n\n- `FileSessionStorage`: real filesystem (Bun + node fs)\n- `MemorySessionStorage`: map-backed in-memory implementation for tests/non-persistent sessions\n\n`SessionStorageWriter` exposes `writeLine`, `flush`, `fsync`, `close`, `getError`.\n\n## Session Discovery Utilities\n\nDefined in `session-manager.ts`:\n\n- `getRecentSessions(sessionDir, limit)` -> lightweight metadata for UI/session picker, capped by `limit`\n- `findMostRecentSession(sessionDir)` -> newest by mtime\n- `list(cwd, sessionDir?)` -> sessions in one project scope\n- `listAll()` -> sessions across all project scopes under `~/.omp/agent/sessions`\n- `resolveResumableSession(sessionArg, cwd, sessionDir?)` -> local then global resume/fork target lookup\n\nMetadata extraction for `getRecentSessions` reads a prefix via `readTextSlices(..., 4096, 0)`. `list`/`listAll` read a 4KB prefix plus a bounded 32 KiB tail through one `readTextSlices(...)` call per file, using the prefix for metadata and the tail for lifecycle status. Resume matching is case-insensitive and accepts session id prefixes, full filename prefixes, or the id suffix after the timestamp in `<timestamp>_<sessionId>.jsonl`.\n\n## Related but Distinct: Prompt History Storage\n\n`HistoryStorage` (`history-storage.ts`) is a separate SQLite subsystem for prompt recall/search, not session replay.\n\n- DB: `~/.omp/agent/history.db`\n- Table: `history(id, prompt, created_at, cwd)`\n- FTS5 index: `history_fts` with trigger-maintained sync\n- Deduplicates consecutive identical prompts using in-memory last-prompt cache\n- Async insertion (`setImmediate`) so prompt capture does not block turn execution\n\nUse session files for conversation graph/state replay; use `HistoryStorage` for prompt history UX.\n",
@@ -66,7 +66,7 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
66
66
  "slash-command-internals.md": "# Slash command internals\n\nThis document describes how slash commands are discovered, deduplicated, surfaced in interactive mode, and expanded at prompt time in `coding-agent`.\n\n## Implementation files\n\n- [`src/extensibility/slash-commands.ts`](../packages/coding-agent/src/extensibility/slash-commands.ts)\n- [`src/capability/slash-command.ts`](../packages/coding-agent/src/capability/slash-command.ts)\n- [`src/discovery/builtin.ts`](../packages/coding-agent/src/discovery/builtin.ts)\n- [`src/discovery/claude.ts`](../packages/coding-agent/src/discovery/claude.ts)\n- [`src/discovery/codex.ts`](../packages/coding-agent/src/discovery/codex.ts)\n- [`src/discovery/claude-plugins.ts`](../packages/coding-agent/src/discovery/claude-plugins.ts)\n- [`src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`src/modes/utils/ui-helpers.ts`](../packages/coding-agent/src/modes/utils/ui-helpers.ts)\n- [`src/modes/controllers/command-controller.ts`](../packages/coding-agent/src/modes/controllers/command-controller.ts)\n\n## 1) Discovery model\n\nSlash commands are a capability (`id: \"slash-commands\"`) keyed by command name (`key: cmd => cmd.name`).\n\nThe capability registry loads all registered providers, sorted by provider priority descending, and deduplicates by key with **first wins** semantics.\n\n### Provider precedence\n\nCurrent slash-command providers and priorities:\n\n1. `native` (OMP) — priority `100`\n2. `claude` — priority `80`\n3. `claude-plugins` — priority `70`\n4. `codex` — priority `70`\n5. `opencode` — priority `55`\n\nTie behavior: equal-priority providers keep registration order. Current import order registers `claude-plugins` before `codex`, so plugin commands win over codex commands on name collisions.\n\n### Name-collision behavior\n\nFor `slash-commands`, collisions are resolved strictly by capability dedup:\n\n- highest-precedence item is kept in `result.items`\n- lower-precedence duplicates remain only in `result.all` and are marked `_shadowed = true`\n\nThis applies across providers and also within a provider if it returns duplicate names.\n\n### File scanning behavior\n\nProviders mostly use `loadFilesFromDir(...)`, which currently:\n\n- defaults to non-recursive matching (`*.md`)\n- uses native glob with `gitignore: true`, `hidden: false`, `fileType: File`\n- reads matching files in parallel and transforms them into `SlashCommand` items\n\nSo hidden files/directories are not loaded, ignored paths are skipped, and file order follows native glob result order unless a provider adds its own ordering.\n\n## 2) Provider-specific source paths and local precedence\n\n## `native` provider (`builtin.ts`)\n\nSearch roots come from `.omp` directories:\n\n- project: `<cwd>/.omp/commands/*.md`\n- user: `~/.omp/agent/commands/*.md`\n\n`getConfigDirs()` returns project first, then user, so **project native commands beat user native commands** when names collide.\n\n## `claude` provider (`claude.ts`)\n\nLoads, subject to `commands.enableClaudeUser` and `commands.enableClaudeProject` settings:\n\n- user: `~/.claude/commands/*.md`\n- project: `<cwd>/.claude/commands/*.md`\n\nThe provider pushes user items before project items, so **user Claude commands beat project Claude commands** on same-name collisions inside this provider.\n\n## `codex` provider (`codex.ts`)\n\nLoads:\n\n- user: `~/.codex/commands/*.md`\n- project: `<cwd>/.codex/commands/*.md`\n\nBoth sides are loaded then flattened in user-first order, so **user Codex commands beat project Codex commands** on collisions.\n\nCodex command content is parsed with frontmatter stripping (`parseFrontmatter`), and command name can be overridden by frontmatter `name`; otherwise filename is used.\n\n## `opencode` provider (`opencode.ts`)\n\nLoads, subject to `commands.enableOpencodeUser` and `commands.enableOpencodeProject` settings:\n\n- user: `~/.config/opencode/commands/*.md`\n- project: `<cwd>/.opencode/commands/*.md`\n\nBoth sides are loaded then flattened in user-first order, so **user OpenCode commands beat project OpenCode commands** on collisions. OpenCode command content is parsed with frontmatter stripping, and command name can be overridden by frontmatter `name`; otherwise filename is used.\n\n## `claude-plugins` provider (`claude-plugins.ts`)\n\nLoads plugin command roots from `~/.claude/plugins/installed_plugins.json`, then scans `<pluginRoot>/commands/*.md`.\n\nOrdering follows registry iteration order and per-plugin entry order from that JSON data. There is no additional sort step.\n\n## 3) Materialization to runtime `FileSlashCommand`\n\n`loadSlashCommands()` in `src/extensibility/slash-commands.ts` converts capability items into `FileSlashCommand` objects used at prompt time.\n\nFor each command:\n\n1. parse frontmatter/body (`parseFrontmatter`)\n2. description source:\n - `frontmatter.description` if present\n - else first non-empty body line (trimmed, max 60 chars with `...`)\n3. keep parsed body as executable template content\n4. compute a display source string like `via Claude Code Project`\n\nFrontmatter parse severity is source-dependent:\n\n- `native` level -> parse errors are `fatal`\n- `user`/`project` levels -> parse errors are `warn` with fallback parsing\n\n### Bundled fallback commands\n\nAfter filesystem/provider commands, embedded command templates are appended (`EMBEDDED_COMMAND_TEMPLATES`) if their names are not already present.\n\nCurrent embedded set comes from `src/task/commands.ts` and is used as a fallback (`source: \"bundled\"`).\n\n## 4) Interactive mode: where command lists come from\n\nInteractive mode combines multiple command sources for autocomplete and command routing.\n\nAt construction time it builds a pending command list from:\n\n- built-ins (`BUILTIN_SLASH_COMMANDS`, includes argument completion and inline hints for selected commands)\n- extension-registered slash commands (`extensionRunner.getRegisteredCommands(...)`)\n- TypeScript custom commands (`session.customCommands`), mapped to slash command labels\n- optional skill commands (`/skill:<name>`) when `skills.enableSkillCommands` is enabled\n\nThen `init()` calls `refreshSlashCommandState(...)` to load file-based commands and install one `CombinedAutocompleteProvider` containing:\n\n- pending commands above\n- discovered file-based commands\n\n`refreshSlashCommandState(...)` also updates `session.setSlashCommands(...)` so prompt expansion uses the same discovered file command set.\n\n### Refresh lifecycle\n\nSlash command state is refreshed:\n\n- during interactive init\n- after `/move` changes working directory (`handleMoveCommand` calls `resetCapabilities()` then `refreshSlashCommandState(newCwd)`)\n\nThere is no continuous file watcher for command directories.\n\n### Other surfacing\n\nThe Extensions dashboard also loads `slash-commands` capability and displays active/shadowed command entries, including `_shadowed` duplicates.\n\n## 5) Prompt pipeline placement\n\n`AgentSession.prompt(...)` slash handling order (when `expandPromptTemplates !== false`):\n\n1. **Extension commands** (`#tryExecuteExtensionCommand`) \n If `/name` matches extension-registered command, handler executes immediately and prompt returns.\n2. **TypeScript custom commands** (`#tryExecuteCustomCommand`) \n Boundary only: if matched, it executes and may return:\n - `string` -> replace prompt text with that string\n - `void/undefined` -> treated as handled; no LLM prompt\n3. **File-based slash commands** (`expandSlashCommand`) \n If text still starts with `/`, attempt markdown command expansion.\n4. **Prompt templates** (`expandPromptTemplate`) \n Applied after slash/custom processing.\n5. **Delivery**\n - idle: prompt is sent immediately to agent\n - streaming: prompt is queued as steer/follow-up depending on `streamingBehavior`\n\nThis is why slash command expansion sits before prompt-template expansion, and why custom commands can transform away the leading slash before file-command matching.\n\n## 6) Expansion semantics for file-based slash commands\n\n`expandSlashCommand(text, fileCommands)` behavior:\n\n- only runs when text begins with `/`\n- parses command name from first token after `/`\n- parses args from remaining text via `parseCommandArgs`\n- finds exact name match in loaded `fileCommands`\n- if matched, applies:\n - positional replacement: `$1`, `$2`, ...\n - slice replacement: `$@[start]` / `$@[start:length]` using 1-based positions\n - aggregate replacement: `$ARGUMENTS` and `$@`\n - template rendering via `prompt.render` with `{ args, ARGUMENTS, arguments }`\n - inline-argument fallback append when the template did not use an inline argument placeholder\n\n### `parseCommandArgs` caveats\n\nThe parser is simple quote-aware splitting:\n\n- supports `'single'` and `\"double\"` quoting to keep spaces\n- strips quote delimiters\n- does not implement backslash escaping rules\n- unmatched quote is not an error; parser consumes until end\n\n## 7) Unknown `/...` behavior\n\nUnknown slash input is **not rejected** by core slash logic.\n\nIf command is not handled by extension/custom/file layers, `expandSlashCommand` returns original text, and the literal `/...` prompt proceeds through normal prompt-template expansion and LLM delivery.\n\nInteractive mode separately hard-handles many built-ins in `InputController` (for example `/settings`, `/model`, `/mcp`, `/move`, `/exit`). Those are consumed before `session.prompt(...)` and therefore never reach file-command expansion in that path.\n\n## 8) Streaming-time differences vs idle\n\n## Idle path\n\n- `session.prompt(\"/x ...\")` runs command pipeline and either executes command immediately or sends expanded text directly.\n\n## Streaming path (`session.isStreaming === true`)\n\n- `prompt(...)` still runs extension/custom/file/template transforms first\n- then requires `streamingBehavior`:\n - `\"steer\"` -> queue interrupt message (`agent.steer`)\n - `\"followUp\"` -> queue post-turn message (`agent.followUp`)\n- if `streamingBehavior` is omitted, prompt throws an error\n\n### Important command-specific streaming behavior\n\n- Extension commands are executed immediately even during streaming (not queued as text).\n- `steer(...)`/`followUp(...)` helper methods reject extension commands (`#throwIfExtensionCommand`) to avoid queuing command text for handlers that must run synchronously.\n- Compaction queue replay uses `isKnownSlashCommand(...)` to decide whether queued entries should be replayed via `session.prompt(...)` (for known slash commands) vs raw steer/follow-up methods.\n\n## 9) Error handling and failure surfaces\n\n- Provider load failures are isolated; registry collects warnings and continues with other providers.\n- Invalid slash command items (missing name/path/content or invalid level) are dropped by capability validation.\n- Frontmatter parse failures:\n - native commands: fatal parse error bubbles\n - non-native commands: warning + fallback key/value parse\n- Extension/custom command handler exceptions are caught and reported via extension error channel (or logger fallback for custom commands without extension runner), and treated as handled (no unintended fallback execution).\n",
67
67
  "system-prompt-customization.md": "# System Prompt Customization\n\nHow the coding-agent assembles the system prompt sent to the model, and what users can control via `SYSTEM.md`, `APPEND_SYSTEM.md`, and the matching CLI flags.\n\nPrimary implementation:\n\n- `packages/coding-agent/src/system-prompt.ts` (`buildSystemPrompt`, `loadSystemPromptFiles`)\n- `packages/coding-agent/src/main.ts` (`discoverSystemPromptFile`, `discoverAppendSystemPromptFile`)\n- `packages/coding-agent/src/prompts/system/system-prompt.md` (default stable instruction template)\n- `packages/coding-agent/src/prompts/system/custom-system-prompt.md` (internal custom-prompt template; not the normal CLI `SYSTEM.md` path)\n- `packages/coding-agent/src/prompts/system/project-prompt.md` (project/environment footer)\n\n---\n\n## 1) Inputs\n\nFour user-controllable inputs feed prompt assembly. All four resolve a value as either a literal string or, if the argument looks like a file path, the contents of that file (`resolvePromptInput`).\n\n| Input | Source | Effect |\n|---|---|---|\n| `--system-prompt <text-or-file>` | CLI flag | Replaces block 0: the default stable instructions. Highest precedence. |\n| `SYSTEM.md` | `<cwd>/.omp/SYSTEM.md`, then `~/.omp/agent/SYSTEM.md` (and equivalent paths under `.claude`, `.codex`, `.gemini`) | Same effect as `--system-prompt`; used when the flag is absent. |\n| `--append-system-prompt <text-or-file>` | CLI flag | Adds a prompt block. Without a custom system prompt it goes after all default blocks; with one it goes after the custom block and before the preserved project/environment footer. |\n| `APPEND_SYSTEM.md` | Same discovery as `SYSTEM.md` | Same effect as `--append-system-prompt`; used when the flag is absent. |\n\nDiscovery for `SYSTEM.md` / `APPEND_SYSTEM.md` uses `findConfigFile` (`packages/coding-agent/src/config.ts`): the first existing file across the ordered bases (`.omp`, `.claude`, `.codex`, `.gemini` — project-level at `<cwd>` first, then user-level at `~`) wins. **No ancestor walk-up.** Running `omp` from `<repo>/subdir` does not pick up `<repo>/.omp/SYSTEM.md`; the file must live directly under the cwd's config base or in the user-level location. See [`docs/config-usage.md`](./config-usage.md) for the full discovery contract.\n\nPrecedence (highest first):\n\n1. `--system-prompt`\n2. project `SYSTEM.md`\n3. user `SYSTEM.md`\n\nFor append, the same precedence applies between `--append-system-prompt`, project `APPEND_SYSTEM.md`, and user `APPEND_SYSTEM.md`.\n\n---\n\n## 2) Replace vs. append\n\nNormal CLI startup builds the default provider-facing prompt blocks first, then applies CLI / discovered file overrides in `packages/coding-agent/src/main.ts`:\n\n```ts\nif (resolvedSystemPrompt && resolvedAppendPrompt) {\n options.systemPrompt = defaultPrompt => [resolvedSystemPrompt, resolvedAppendPrompt, ...defaultPrompt.slice(1)];\n} else if (resolvedSystemPrompt) {\n options.systemPrompt = defaultPrompt => [resolvedSystemPrompt, ...defaultPrompt.slice(1)];\n} else if (resolvedAppendPrompt) {\n options.systemPrompt = defaultPrompt => [...defaultPrompt, resolvedAppendPrompt];\n}\n```\n\nThe default blocks come from `buildSystemPrompt`:\n\n- block 0: `system-prompt.md` — the stable default instructions (staff-engineer preamble, tool inventory, exploration rules, workflow rules, etc.);\n- block 1, when non-empty: `project-prompt.md` — dynamic project/environment context (workstation info, context files, dir-context list, workspace tree, current date/cwd, and other project footer content).\n\nConsequences for normal CLI use:\n\n- Providing `--system-prompt` or `SYSTEM.md` replaces only block 0. The stable default instructions are removed, but the dynamic project/environment footer from `project-prompt.md` remains as `defaultPrompt.slice(1)`.\n- Providing `--append-system-prompt` or `APPEND_SYSTEM.md` without a custom system prompt appends a new block after all default blocks.\n- Providing both a custom system prompt and an append prompt produces: custom system prompt block, append prompt block, then the preserved dynamic project/environment footer.\n\nIf you want to keep both default blocks and add to them, use `--append-system-prompt` / `APPEND_SYSTEM.md` without `--system-prompt` / `SYSTEM.md`. If you want to replace the stable default instructions while keeping the dynamic footer, use `--system-prompt` / `SYSTEM.md`.\n\n---\n\n## 3) Templating contract\n\n**Contents of `SYSTEM.md`, `APPEND_SYSTEM.md`, `--system-prompt`, and `--append-system-prompt` are treated as plain text.** They are resolved before prompt-block replacement and are not rendered as Handlebars templates.\n\nThe built-in prompt templates are Handlebars (`packages/utils/src/prompt.ts`), but user-provided strings are not compiled with that renderer. The secondary capability path can insert `systemPromptCustomization` into a Handlebars parent template, but a `{{value}}` reference in Handlebars still does not recursively render its substituted contents — the value is emitted as a string. Concretely:\n```handlebars\n{{! parent template — handled by Handlebars }}\n{{#if systemPromptCustomization}}\n{{systemPromptCustomization}}\n{{/if}}\n```\n\nIf `SYSTEM.md` contains:\n\n```handlebars\nWorking in {{cwd}} on {{date}}.\n{{#if hasMemoryRoot}}Memory enabled.{{/if}}\n```\n\nthe rendered output contains those characters verbatim — `{{cwd}}`, `{{#if hasMemoryRoot}}`, etc. are NOT substituted. They will be shown to the model as literal Handlebars syntax.\n\nThis is by design. The internal template variables (`cwd`, `date`, `environment`, `workspaceTree`, `skills`, `rules`, `toolRefs`, `hasMemoryRoot`, `hasObsidian`, `mcpDiscoveryServerSummaries`, ...) are not a supported public surface — they change between releases as the prompt is rewritten, and they would couple user configs to internals. Treat them as private.\n\nIf a future release exposes a templating surface for `SYSTEM.md`, it will be opt-in (e.g. via a settings flag or a different filename) and documented here.\n\n---\n\n## 4) Recommended patterns\n\n### \"Tweak the default\" — keep default, add a few rules\n\nUse `APPEND_SYSTEM.md` (or `--append-system-prompt`) without `SYSTEM.md`. The default stable instructions and the dynamic project/environment footer stay intact; your text is appended as an additional block.\n\n```text\n# ~/.omp/agent/APPEND_SYSTEM.md\nPrefer Bun APIs over Node APIs in this project.\nWhen you change a public function, run `bun check` before yielding.\n```\n\n### \"Replace the stable default instructions\" — bring your own base prompt\n\nUse `SYSTEM.md` (or `--system-prompt`). You replace the stable default instructions in block 0, but normal CLI startup still preserves the dynamic project/environment footer block (`project-prompt.md`): workstation info, context files, dir-context list, workspace tree, current date, cwd, and related project context.\n\n```text\n# ~/.omp/agent/SYSTEM.md\nYou are a code reviewer. Read diffs, surface issues, never edit files.\n- Cite paths with backticks.\n- Prefer concrete fixes over abstract advice.\n```\n\nIf you do this and want default tool guidance, exploration rules, or workflow rules, copy what you need from `packages/coding-agent/src/prompts/system/system-prompt.md` and maintain it yourself — there is currently no way to inherit selected sections from that stable default instruction block.\n\n### \"Customize while keeping generated skills/rules/tool guidance\"\n\nUse `APPEND_SYSTEM.md`, not `SYSTEM.md`. Skills, rulebook summaries, always-apply rules, the tool inventory, and the built-in guidance that tells the model when to read `skill://<name>` are part of block 0 (`system-prompt.md`). Because `SYSTEM.md` replaces block 0, those generated lists are not available to the model in a custom system prompt.\n\nThe dynamic project/environment footer that remains after `SYSTEM.md` is only block 1 (`project-prompt.md`): workstation info, AGENTS.md context files, dir-context list, workspace tree, current date, cwd, and related project context. It does not include discovered skills.\n\nThere is currently no supported CLI mode for \"replace the stable default instructions but keep the generated skills/rules/tool guidance.\" If you need automatic skills loading, keep the default block and add your customization via `APPEND_SYSTEM.md`. If you fully replace with `SYSTEM.md`, you must hard-code any skill names/instructions you want the model to know about, and those will not track discovery automatically.\n\n### \"Replace everything, including project context\" — SDK-only\n\nThe normal CLI file/flag path intentionally preserves `defaultPrompt.slice(1)`. Code using `CreateAgentSessionOptions.systemPrompt` directly can return a full replacement array and omit the project footer, but that is not what `.omp/SYSTEM.md`, `~/.omp/agent/SYSTEM.md`, or `--system-prompt` do.\n\n### \"Replace, but keep one section of the default instructions\" — not directly supported\n\nThere is no built-in way to inherit specific sections from `system-prompt.md` while replacing the rest. The supported CLI modes are: append to the default prompt, or replace block 0 and keep the dynamic footer.\n\n---\n\n## 5) Deduplication\n\nThe CLI path avoids double-injecting discovered `SYSTEM.md` by replacing block 0 after the default prompt blocks are rendered. Any `systemPromptCustomization` from the secondary capability path would have been rendered into block 0, and that block is discarded when `main.ts` applies `[resolvedSystemPrompt, ...defaultPrompt.slice(1)]`.\n\nInside `buildSystemPrompt` itself, secondary customization and always-apply rules are still deduplicated:\n\n- `dedupePromptSource` drops a `systemPromptCustomization` block when it already appears in an internally supplied `customPrompt` or append prompt.\n- `dedupeAlwaysApplyRules` omits always-apply rules whose body appears verbatim in any of `{customPrompt, appendPrompt, systemPromptCustomization}`.\n\n---\n\n## 6) Discovery paths\n\nOnly one path actually drives the customization a CLI user sees: the primary CLI path. The capability layer exists but its `SYSTEM.md` output never reaches the rendered prompt under normal CLI startup.\n\n- The primary CLI path (`discoverSystemPromptFile` / `discoverAppendSystemPromptFile` in `main.ts`, which feeds `resolvedSystemPrompt` / `resolvedAppendPrompt`) calls `findConfigFile`. `findConfigFile` checks only `<cwd>/.omp`, `<cwd>/.claude`, `<cwd>/.codex`, `<cwd>/.gemini`, and the user-level equivalents — it does **not** walk up ancestors. Files in `<ancestor>/.omp/SYSTEM.md` are ignored when `omp` is started from a subdirectory.\n- The secondary capability path (`loadSystemPromptFiles` → builtin discovery) does walk up via `findNearestProjectConfigDir` and requires the project `.omp/` directory to be non-empty. Its result is rendered into the template variable `systemPromptCustomization`. Under normal CLI startup the default template (`system-prompt.md`) never references that variable, so ancestor-walk capability content has no user-visible effect.\n\nNet effect for CLI users: put `SYSTEM.md` / `APPEND_SYSTEM.md` directly under `<cwd>/.omp` (or another supported config base under cwd) or in the user-level location (`~/.omp/agent/SYSTEM.md` etc.). Ancestor paths are not searched.\n\n---\n\n## 7) Quick reference\n\n| Goal | Use |\n|---|---|\n| Add an instruction on top of the full default prompt | `APPEND_SYSTEM.md` or `--append-system-prompt` |\n| Replace the stable default instructions but keep project/environment context | `SYSTEM.md` or `--system-prompt` |\n| Preserve generated skills/rules/tool guidance while customizing | `APPEND_SYSTEM.md`; `SYSTEM.md` replaces that generated block |\n| Use `{{cwd}}` / `{{date}}` / other internals in my file | Not supported. Files are inserted verbatim. |\n| Inherit specific sections from `system-prompt.md` | Not supported; use append, or copy what you need into `SYSTEM.md`. |\n| Override at a per-repo level | Project `.omp/SYSTEM.md` under the cwd you launch `omp` from |\n| Override globally | `~/.omp/agent/SYSTEM.md` or `~/.omp/agent/APPEND_SYSTEM.md` |\n",
68
68
  "task-agent-discovery.md": "# Task Agent Discovery and Selection\n\nThis document describes how the task subsystem discovers agent definitions, merges multiple sources, and resolves a requested agent at execution time.\n\nIt covers runtime behavior as implemented today, including precedence, invalid-definition handling, and spawn/depth constraints that can make an agent effectively unavailable.\n\n## Implementation files\n\n- [`src/task/discovery.ts`](../packages/coding-agent/src/task/discovery.ts)\n- [`src/task/agents.ts`](../packages/coding-agent/src/task/agents.ts)\n- [`src/task/types.ts`](../packages/coding-agent/src/task/types.ts)\n- [`src/task/index.ts`](../packages/coding-agent/src/task/index.ts)\n- [`src/task/commands.ts`](../packages/coding-agent/src/task/commands.ts)\n- [`src/prompts/agents/task.md`](../packages/coding-agent/src/prompts/agents/task.md)\n- [`src/prompts/tools/task.md`](../packages/coding-agent/src/prompts/tools/task.md)\n- [`src/discovery/helpers.ts`](../packages/coding-agent/src/discovery/helpers.ts)\n- [`src/config.ts`](../packages/coding-agent/src/config.ts)\n- [`src/task/executor.ts`](../packages/coding-agent/src/task/executor.ts)\n\n---\n\n## Agent definition shape\n\nTask agents normalize into `AgentDefinition` (`src/task/types.ts`):\n\n- `name`, `description`, `systemPrompt` (required for a valid loaded agent)\n- optional `tools`, `spawns`, `model`, `thinkingLevel`, `output`, `blocking`, `autoloadSkills`, `readSummarize`\n- `source`: `\"bundled\" | \"user\" | \"project\"`\n- optional `filePath`\n\nParsing comes from frontmatter via `parseAgentFields()` (`src/discovery/helpers.ts`):\n\n- missing `name` or `description` => invalid (`null`), caller treats as parse failure\n- `tools` accepts CSV or array; if provided, `yield` is auto-added\n- `spawns` accepts `*`, CSV, or array\n- backward-compat behavior: if `spawns` missing but `tools` includes `task`, `spawns` becomes `*`\n- `output` is passed through as opaque schema data\n- `read-summarize: false` (parsed as `readSummarize`) forces the subagent's `read` tool to return verbatim file content instead of structural summaries — `runSubprocess` applies it as a `read.summarize.enabled: false` override on the subagent's isolated settings (`src/task/executor.ts`). `explore` and `librarian` ship with it disabled. Defaults to enabled when the field is absent.\n\n## Bundled agents\n\nBundled agents are embedded at build time (`src/task/agents.ts`) using text imports.\n\n`EMBEDDED_AGENT_DEFS` defines:\n\n- `explore`, `plan`, `designer`, `reviewer` from prompt files\n- `task` and `quick_task` from shared `task.md` body plus injected frontmatter\n\nLoading path:\n\n1. `loadBundledAgents()` parses embedded markdown with `parseAgent(..., \"bundled\", \"fatal\")`\n2. results are cached in-memory (`bundledAgentsCache`)\n3. `clearBundledAgentsCache()` is test-only cache reset\n\nBecause bundled parsing uses `level: \"fatal\"`, malformed bundled frontmatter throws and can fail discovery entirely.\n\n## Filesystem and plugin discovery\n\n`discoverAgents(cwd, home)` (`src/task/discovery.ts`) merges agents from multiple places before appending bundled definitions.\n\n### Discovery inputs\n\n1. User config agent dirs from `getConfigDirs(\"agents\", { project: false })`\n2. Nearest project agent dirs from `findAllNearestProjectConfigDirs(\"agents\", cwd)`\n3. Claude plugin roots (`listClaudePluginRoots(home)`) with `agents/` subdirs\n4. Bundled agents (`loadBundledAgents()`)\n\n### Actual source order\n\nSource-family order comes from `getConfigDirs(\"\", { project: false })`, which is derived from `priorityList` in `src/config.ts`:\n\n1. `.omp`\n2. `.claude`\n3. `.codex`\n4. `.gemini`\n\nFor each source family, discovery order is:\n\n1. nearest project dir for that source (if found)\n2. user dir for that source\n\nAfter all source-family dirs, plugin `agents/` dirs are appended (project-scope plugins first, then user-scope).\n\nBundled agents are appended last.\n\n### Important caveat: stale comments vs current code\n\n`discovery.ts` header comments still mention `.pi` and do not mention `.codex`/`.gemini`. Actual runtime order is driven by `src/config.ts` and currently uses `.omp`, `.claude`, `.codex`, `.gemini`.\n\n## Merge and collision rules\n\nDiscovery uses first-wins dedup by exact `agent.name`:\n\n- A `Set<string>` tracks seen names.\n- Loaded agents are flattened in directory order and kept only if name unseen.\n- Bundled agents are filtered against the same set and only added if still unseen.\n\nImplications:\n\n- Project overrides user for same source family.\n- Higher-priority source family overrides lower (`.omp` before `.claude`, etc.).\n- Non-bundled agents override bundled agents with the same name.\n- Name matching is case-sensitive (`Task` and `task` are distinct).\n- Within one directory, markdown files are read in lexicographic filename order before dedup.\n\n## Invalid/missing agent file behavior\n\nPer directory (`loadAgentsFromDir`):\n\n- unreadable/missing directory: treated as empty (`readdir(...).catch(() => [])`)\n- file read or parse failure: warning logged, file skipped\n- parse path uses `parseAgent(..., level: \"warn\")`\n\nFrontmatter failure behavior comes from `parseFrontmatter`:\n\n- parse error at `warn` level logs warning\n- parser falls back to a simple `key: value` line parser\n- if required fields are still missing, `parseAgentFields` fails, then `AgentParsingError` is thrown and caught by caller (file skipped)\n\nNet effect: one bad custom agent file does not abort discovery of other files.\n\n## Agent lookup and selection\n\nLookup is exact-name linear search:\n\n- `getAgent(agents, name)` => `agents.find(a => a.name === name)`\n\nIn synchronous task execution (`TaskTool.#executeSync`):\n\n1. agents are rediscovered at execution time (`discoverAgents(this.session.cwd)`)\n2. requested `params.agent` is resolved through `getAgent`\n3. missing agent returns immediate tool response:\n - `Unknown agent \"...\". Available: ...`\n - no subprocess runs\n\n### Description vs execution-time discovery\n\n`TaskTool.create()` builds the tool description from discovery results at initialization time. `#executeSync` rediscovers agents, so the runtime set can differ from what was listed in the earlier tool description if agent files changed mid-session. The async entry path still uses the initialization-time list to decide whether an agent is marked `blocking` before scheduling.\n\n## Structured-output guardrails and schema precedence\n\nRuntime output schema precedence in `TaskTool.execute`:\n\n1. task call `params.schema` when `task.simple` allows custom schemas\n2. agent frontmatter `output`\n3. parent session `outputSchema`\n\n(`effectiveOutputSchema = outputSchema ?? effectiveAgent.output ?? this.session.outputSchema` when custom task schemas are enabled; otherwise task-call schema is skipped.)\n\nPrompt-time guardrail text in `src/prompts/tools/task.md` warns about mismatch behavior for structured-output agents (`explore`, `reviewer`): output-format instructions in prose can conflict with built-in schema and produce `null` outputs.\n\nThis is guidance, not hard runtime validation logic in `discoverAgents`.\n\n## Command discovery interaction\n\n`src/task/commands.ts` is parallel infrastructure for workflow commands (not agent definitions), but it follows the same overall pattern:\n\n- discover from capability providers first\n- deduplicate by name with first-wins\n- append bundled commands if still unseen\n- exact-name lookup via `getCommand`\n\nIn `src/task/index.ts`, command helpers are re-exported with agent discovery helpers. Agent discovery itself does not depend on command discovery at runtime.\n\n## Availability constraints beyond discovery\n\nAn agent can be discoverable but still unavailable to run because of execution guardrails.\n\n### Disabled-agent settings\n\n`TaskTool.#executeSync` checks `task.disabledAgents` after resolving the agent. If the requested name is disabled, execution returns an immediate error listing enabled alternatives when available.\n\n### Parent spawn policy\n\n`TaskTool.#executeSync` checks `session.getSessionSpawns()`:\n\n- `\"*\"` => allow any\n- `\"\"` => deny all\n- CSV list => allow only listed names\n\nIf denied: immediate `Cannot spawn '...'. Allowed: ...` response.\n\n### Blocked self-recursion env guard\n\n`PI_BLOCKED_AGENT` is read at tool construction. If request matches, execution is rejected with recursion-prevention message.\n\n### Recursion-depth gating (task tool availability inside child sessions)\n\nIn `runSubprocess` (`src/task/executor.ts`):\n\n- depth computed from `taskDepth`\n- `task.maxRecursionDepth` controls cutoff\n- when at max depth:\n - `task` tool is removed from child tool list\n - child `spawns` env is set to empty\n\nSo deeper levels cannot spawn further tasks even if the agent definition includes `spawns`.\n\n## Plan mode behavior\n\nWhen parent plan mode is enabled, `TaskTool.execute` builds an `effectiveAgent` before launching subprocesses:\n\n- prepends the plan-mode subagent system prompt\n- restricts tools to `read`, `search`, `find`, `lsp`, and `web_search`\n- clears child spawns\n\nThe same `effectiveAgent` is used for subprocess launch, model/thinking overrides, and output-schema selection.\n",
69
- "theme.md": "# Theming Reference\n\nThis document describes how theming works in the coding-agent today: schema, loading, runtime behavior, and failure modes.\n\n## What the theme system controls\n\nThe theme system drives:\n\n- foreground/background color tokens used across the TUI\n- markdown styling adapters (`getMarkdownTheme()`)\n- selector/editor/settings list adapters (`getSelectListTheme()`, `getEditorTheme()`, `getSettingsListTheme()`)\n- symbol preset + symbol overrides (`unicode`, `nerd`, `ascii`)\n- syntax highlighting colors used by native highlighter (`@oh-my-pi/pi-natives`)\n- status line segment colors\n\nPrimary implementation: `src/modes/theme/theme.ts`.\n\n## Theme JSON shape\n\nTheme files are JSON objects validated against the runtime schema in `theme.ts` (`ThemeJsonSchema`) and mirrored by `src/modes/theme/theme-schema.json`.\n\nTop-level fields:\n\n- `name` (required)\n- `colors` (required; all color tokens required)\n- `vars` (optional; reusable color variables)\n- `export` (optional; HTML export colors)\n- `symbols` (optional)\n - `preset` (optional: `unicode | nerd | ascii`)\n - `overrides` (optional: key/value overrides for `SymbolKey`)\n\nColor values accept:\n\n- hex string (`\"#RRGGBB\"`)\n- 256-color index (`0..255`)\n- variable reference string (resolved through `vars`)\n- empty string (`\"\"`) meaning terminal default (`\\x1b[39m` fg, `\\x1b[49m` bg)\n\n## Required color tokens (current)\n\nAll tokens below are required in `colors`.\n\n### Core text and borders (11)\n\n`accent`, `border`, `borderAccent`, `borderMuted`, `success`, `error`, `warning`, `muted`, `dim`, `text`, `thinkingText`\n\n### Background blocks (7)\n\n`selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`, `statusLineBg`\n\n### Message/tool text (5)\n\n`userMessageText`, `customMessageText`, `customMessageLabel`, `toolTitle`, `toolOutput`\n\n### Markdown (10)\n\n`mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet`\n\n### Tool diff + syntax highlighting (12)\n\n`toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext`,\n`syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation`\n\n### Mode/thinking borders (8)\n\n`thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh`, `bashMode`, `pythonMode`\n\n### Status line segment colors (14)\n\n`statusLineSep`, `statusLineModel`, `statusLinePath`, `statusLineGitClean`, `statusLineGitDirty`, `statusLineContext`, `statusLineSpend`, `statusLineStaged`, `statusLineDirty`, `statusLineUntracked`, `statusLineOutput`, `statusLineCost`, `statusLineSubagents`\n\n## Optional tokens\n\n### `export` section (optional)\n\nUsed for HTML export theming helpers:\n\n- `export.pageBg`\n- `export.cardBg`\n- `export.infoBg`\n\nIf omitted, export code derives defaults from resolved theme colors.\n\n### `symbols` section (optional)\n\n- `symbols.preset` sets a theme-level default symbol set.\n- `symbols.overrides` can override individual `SymbolKey` values.\n- `symbols.spinnerFrames` overrides the loading spinner frames. Accepts either a flat `string[]` (applied to both spinner types) or an object `{ \"status\"?: string[], \"activity\"?: string[] }` to override each type independently. Any type not specified falls back to the symbol preset's default frames. `status` drives the ~12.5fps spinner used by loaders and tool-execution indicators; `activity` drives the ~60fps spinner used by markdown progress bars and similar high-frequency UI.\n\nRuntime precedence:\n\n1. settings `symbolPreset` override (if set)\n2. theme JSON `symbols.preset`\n3. fallback `\"unicode\"`\n\nInvalid override keys are ignored and logged (`logger.debug`).\n\n## Built-in vs custom theme sources\n\nTheme lookup order (`loadThemeJson`):\n\n1. built-in embedded themes (`dark.json`, `light.json`, and all `defaults/*.json` compiled into `defaultThemes`)\n2. custom theme file: `<customThemesDir>/<name>.json`\n\nCustom themes directory comes from `getCustomThemesDir()`:\n\n- default: `~/.omp/agent/themes`\n- overridden by `PI_CODING_AGENT_DIR` (`$PI_CODING_AGENT_DIR/themes`)\n\n`getAvailableThemes()` returns merged built-in + custom names, sorted, with built-ins taking precedence on name collision.\n\n## Loading, validation, and resolution\n\nFor custom theme files:\n\n1. read JSON\n2. parse JSON\n3. validate against `ThemeJsonSchema`\n4. resolve `vars` references recursively\n5. convert resolved values to ANSI by terminal capability mode\n\nValidation behavior:\n\n- missing required color tokens: explicit grouped error message\n- bad token types/values: validation errors with JSON path\n- unknown theme file: `Theme not found: <name>`\n\nVar reference behavior:\n\n- supports nested references\n- throws on missing variable reference\n- throws on circular references\n\n## Terminal color mode behavior\n\nColor mode detection (`detectColorMode`):\n\n- `COLORTERM=truecolor|24bit` => truecolor\n- `WT_SESSION` => truecolor\n- `TERM` in `dumb`, `linux`, or empty => 256color\n- otherwise => truecolor\n\nConversion behavior:\n\n- hex -> `Bun.color(..., \"ansi-16m\" | \"ansi-256\")`\n- numeric -> `38;5` / `48;5` ANSI\n- `\"\"` -> default fg/bg reset\n\n## Runtime switching behavior\n\n### Initial theme (`initTheme`)\n\n`main.ts` initializes theme with settings:\n\n- `symbolPreset`\n- `colorBlindMode`\n- `theme.dark`\n- `theme.light`\n\nAuto theme slot selection uses terminal appearance in this order:\n\n1. terminal-reported OSC 11 background luminance, unless the macOS/Zellij fallback path is active\n2. `COLORFGBG` background index (`< 8` => dark, `>= 8` => light)\n3. macOS appearance fallback only for the known-broken macOS/Zellij OSC 11 path\n4. dark slot fallback\n\nCurrent defaults from settings schema:\n\n- `theme.dark = \"titanium\"`\n- `theme.light = \"light\"`\n- `symbolPreset = \"unicode\"`\n- `colorBlindMode = false`\n\n### Explicit switching (`setTheme`)\n\n- loads selected theme\n- updates global `theme` singleton\n- optionally starts watcher\n- triggers `onThemeChange` callback\n\nOn failure:\n\n- falls back to built-in `dark`\n- returns `{ success: false, error }`\n\n### Preview switching (`previewTheme`)\n\n- applies temporary preview theme to global `theme`\n- does **not** change persisted settings by itself\n- returns success/error without fallback replacement\n\nSettings UI uses this for live preview and restores prior theme on cancel.\n\n## Watchers and live reload\n\nWhen watcher is enabled (`setTheme(..., true)` / interactive init):\n\n- watches `<customThemesDir>/<currentTheme>.json` only when that file exists\n- built-ins are effectively not watched; built-in theme lookup also takes precedence over same-name custom files\n- matching file changes schedule a debounced reload; reload errors or temporary file absence keep the last successfully loaded theme\n- the watcher does not perform a delete/rename fallback; it waits for a future successful reload or explicit theme switch\n\nAuto mode also reevaluates dark/light slot mapping from terminal appearance changes, `SIGWINCH`, and the macOS fallback observer when active.\n\n## Color-blind mode behavior\n\n`colorBlindMode` changes only one token at runtime:\n\n- `toolDiffAdded` is HSV-adjusted (green shifted toward blue)\n- adjustment is applied only when resolved value is a hex string\n\nOther tokens are unchanged.\n\n## Where theme settings are persisted\n\nTheme-related settings are persisted by `Settings` to global config YAML:\n\n- path: `<agentDir>/config.yml`\n- default agent dir: `~/.omp/agent`\n- effective default file: `~/.omp/agent/config.yml`\n\nPersisted keys:\n\n- `theme.dark`\n- `theme.light`\n- `symbolPreset`\n- `colorBlindMode`\n\nLegacy migration exists: old flat `theme: \"name\"` is migrated to nested `theme.dark` or `theme.light` based on luminance detection.\n\n## Creating a custom theme (practical)\n\n1. Create file in custom themes dir, e.g. `~/.omp/agent/themes/my-theme.json`.\n2. Include `name`, optional `vars`, and **all required** `colors` tokens.\n3. Optionally include `symbols` and `export`.\n4. Select the theme in Settings (`Display -> Dark theme` or `Display -> Light theme`) depending on which auto slot you want.\n\nMinimal skeleton:\n\n```json\n{\n \"name\": \"my-theme\",\n \"vars\": {\n \"accent\": \"#7aa2f7\",\n \"muted\": 244\n },\n \"colors\": {\n \"accent\": \"accent\",\n \"border\": \"#4c566a\",\n \"borderAccent\": \"accent\",\n \"borderMuted\": \"muted\",\n \"success\": \"#9ece6a\",\n \"error\": \"#f7768e\",\n \"warning\": \"#e0af68\",\n \"muted\": \"muted\",\n \"dim\": 240,\n \"text\": \"\",\n \"thinkingText\": \"muted\",\n\n \"selectedBg\": \"#2a2f45\",\n \"userMessageBg\": \"#1f2335\",\n \"userMessageText\": \"\",\n \"customMessageBg\": \"#24283b\",\n \"customMessageText\": \"\",\n \"customMessageLabel\": \"accent\",\n \"toolPendingBg\": \"#1f2335\",\n \"toolSuccessBg\": \"#1f2d2a\",\n \"toolErrorBg\": \"#2d1f2a\",\n \"toolTitle\": \"\",\n \"toolOutput\": \"muted\",\n\n \"mdHeading\": \"accent\",\n \"mdLink\": \"accent\",\n \"mdLinkUrl\": \"muted\",\n \"mdCode\": \"#c0caf5\",\n \"mdCodeBlock\": \"#c0caf5\",\n \"mdCodeBlockBorder\": \"muted\",\n \"mdQuote\": \"muted\",\n \"mdQuoteBorder\": \"muted\",\n \"mdHr\": \"muted\",\n \"mdListBullet\": \"accent\",\n\n \"toolDiffAdded\": \"#9ece6a\",\n \"toolDiffRemoved\": \"#f7768e\",\n \"toolDiffContext\": \"muted\",\n\n \"syntaxComment\": \"#565f89\",\n \"syntaxKeyword\": \"#bb9af7\",\n \"syntaxFunction\": \"#7aa2f7\",\n \"syntaxVariable\": \"#c0caf5\",\n \"syntaxString\": \"#9ece6a\",\n \"syntaxNumber\": \"#ff9e64\",\n \"syntaxType\": \"#2ac3de\",\n \"syntaxOperator\": \"#89ddff\",\n \"syntaxPunctuation\": \"#9aa5ce\",\n\n \"thinkingOff\": 240,\n \"thinkingMinimal\": 244,\n \"thinkingLow\": \"#7aa2f7\",\n \"thinkingMedium\": \"#2ac3de\",\n \"thinkingHigh\": \"#bb9af7\",\n \"thinkingXhigh\": \"#f7768e\",\n\n \"bashMode\": \"#2ac3de\",\n \"pythonMode\": \"#bb9af7\",\n\n \"statusLineBg\": \"#16161e\",\n \"statusLineSep\": 240,\n \"statusLineModel\": \"#bb9af7\",\n \"statusLinePath\": \"#7aa2f7\",\n \"statusLineGitClean\": \"#9ece6a\",\n \"statusLineGitDirty\": \"#e0af68\",\n \"statusLineContext\": \"#2ac3de\",\n \"statusLineSpend\": \"#7dcfff\",\n \"statusLineStaged\": \"#9ece6a\",\n \"statusLineDirty\": \"#e0af68\",\n \"statusLineUntracked\": \"#f7768e\",\n \"statusLineOutput\": \"#c0caf5\",\n \"statusLineCost\": \"#ff9e64\",\n \"statusLineSubagents\": \"#bb9af7\"\n }\n}\n```\n\n## Testing custom themes\n\nUse this workflow:\n\n1. Start interactive mode (watcher enabled from startup).\n2. Open settings and preview theme values (live `previewTheme`).\n3. For custom theme files, edit the JSON while running and confirm auto-reload on save.\n4. Exercise critical surfaces:\n - markdown rendering\n - tool blocks (pending/success/error)\n - diff rendering (added/removed/context)\n - status line readability\n - thinking level border changes\n - bash/python mode border colors\n5. Validate both symbol presets if your theme depends on glyph width/appearance.\n\n## Real constraints and caveats\n\n- All `colors` tokens are required for custom themes.\n- `export` and `symbols` are optional.\n- `$schema` in theme JSON is informational; runtime validation is enforced by a Zod schema in code.\n- `setTheme` failure falls back to `dark`; `previewTheme` failure does not replace current theme.\n- File watcher reload errors or temporary missing files keep the current loaded theme until a successful reload or explicit theme switch.\n",
69
+ "theme.md": "# Theming Reference\n\nThis document describes how theming works in the coding-agent today: schema, loading, runtime behavior, and failure modes.\n\n## What the theme system controls\n\nThe theme system drives:\n\n- foreground/background color tokens used across the TUI\n- markdown styling adapters (`getMarkdownTheme()`)\n- selector/editor/settings list adapters (`getSelectListTheme()`, `getEditorTheme()`, `getSettingsListTheme()`)\n- symbol preset + symbol overrides (`unicode`, `nerd`, `ascii`)\n- syntax highlighting colors used by native highlighter (`@oh-my-pi/pi-natives`)\n- status line segment colors\n\nPrimary implementation: `src/modes/theme/theme.ts`.\n\n## Theme JSON shape\n\nTheme files are JSON objects validated against the runtime schema in `theme.ts` (`ThemeJsonSchema`) and mirrored by `src/modes/theme/theme-schema.json`.\n\nTop-level fields:\n\n- `name` (required)\n- `colors` (required; all color tokens required)\n- `vars` (optional; reusable color variables)\n- `export` (optional; HTML export colors)\n- `symbols` (optional)\n - `preset` (optional: `unicode | nerd | ascii`)\n - `overrides` (optional: key/value overrides for `SymbolKey`)\n\nColor values accept:\n\n- hex string (`\"#RRGGBB\"`)\n- 256-color index (`0..255`)\n- variable reference string (resolved through `vars`)\n- empty string (`\"\"`) meaning terminal default (`\\x1b[39m` fg, `\\x1b[49m` bg)\n\n## Required color tokens (current)\n\nAll tokens below are required in `colors`.\n\n### Core text and borders (11)\n\n`accent`, `border`, `borderAccent`, `borderMuted`, `success`, `error`, `warning`, `muted`, `dim`, `text`, `thinkingText`\n\n### Background blocks (7)\n\n`selectedBg`, `userMessageBg`, `customMessageBg`, `toolPendingBg`, `toolSuccessBg`, `toolErrorBg`, `statusLineBg`\n\n### Message/tool text (5)\n\n`userMessageText`, `customMessageText`, `customMessageLabel`, `toolTitle`, `toolOutput`\n\n### Markdown (10)\n\n`mdHeading`, `mdLink`, `mdLinkUrl`, `mdCode`, `mdCodeBlock`, `mdCodeBlockBorder`, `mdQuote`, `mdQuoteBorder`, `mdHr`, `mdListBullet`\n\n### Tool diff + syntax highlighting (12)\n\n`toolDiffAdded`, `toolDiffRemoved`, `toolDiffContext`,\n`syntaxComment`, `syntaxKeyword`, `syntaxFunction`, `syntaxVariable`, `syntaxString`, `syntaxNumber`, `syntaxType`, `syntaxOperator`, `syntaxPunctuation`\n\n### Mode/thinking borders (8)\n\n`thinkingOff`, `thinkingMinimal`, `thinkingLow`, `thinkingMedium`, `thinkingHigh`, `thinkingXhigh`, `bashMode`, `pythonMode`\n\n### Status line segment colors (14)\n\n`statusLineSep`, `statusLineModel`, `statusLinePath`, `statusLineGitClean`, `statusLineGitDirty`, `statusLineContext`, `statusLineSpend`, `statusLineStaged`, `statusLineDirty`, `statusLineUntracked`, `statusLineOutput`, `statusLineCost`, `statusLineSubagents`\n\n## Optional tokens\n\n### `export` section (optional)\n\nUsed for HTML export theming helpers:\n\n- `export.pageBg`\n- `export.cardBg`\n- `export.infoBg`\n\nIf omitted, export code derives defaults from resolved theme colors.\n\n### `symbols` section (optional)\n\n- `symbols.preset` sets a theme-level default symbol set.\n- `symbols.overrides` can override individual `SymbolKey` values.\n- `symbols.spinnerFrames` overrides the loading spinner frames. Accepts either a flat `string[]` (applied to both spinner types) or an object `{ \"status\"?: string[], \"activity\"?: string[] }` to override each type independently. Any type not specified falls back to the symbol preset's default frames. `status` drives the ~12.5fps spinner used by loaders and tool-execution indicators; `activity` drives the ~30fps spinner used by markdown progress bars and similar high-frequency UI.\n\nRuntime precedence:\n\n1. settings `symbolPreset` override (if set)\n2. theme JSON `symbols.preset`\n3. fallback `\"unicode\"`\n\nInvalid override keys are ignored and logged (`logger.debug`).\n\n## Built-in vs custom theme sources\n\nTheme lookup order (`loadThemeJson`):\n\n1. built-in embedded themes (`dark.json`, `light.json`, and all `defaults/*.json` compiled into `defaultThemes`)\n2. custom theme file: `<customThemesDir>/<name>.json`\n\nCustom themes directory comes from `getCustomThemesDir()`:\n\n- default: `~/.omp/agent/themes`\n- overridden by `PI_CODING_AGENT_DIR` (`$PI_CODING_AGENT_DIR/themes`)\n\n`getAvailableThemes()` returns merged built-in + custom names, sorted, with built-ins taking precedence on name collision.\n\n## Loading, validation, and resolution\n\nFor custom theme files:\n\n1. read JSON\n2. parse JSON\n3. validate against `ThemeJsonSchema`\n4. resolve `vars` references recursively\n5. convert resolved values to ANSI by terminal capability mode\n\nValidation behavior:\n\n- missing required color tokens: explicit grouped error message\n- bad token types/values: validation errors with JSON path\n- unknown theme file: `Theme not found: <name>`\n\nVar reference behavior:\n\n- supports nested references\n- throws on missing variable reference\n- throws on circular references\n\n## Terminal color mode behavior\n\nColor mode detection (`detectColorMode`):\n\n- `COLORTERM=truecolor|24bit` => truecolor\n- `WT_SESSION` => truecolor\n- `TERM` in `dumb`, `linux`, or empty => 256color\n- otherwise => truecolor\n\nConversion behavior:\n\n- hex -> `Bun.color(..., \"ansi-16m\" | \"ansi-256\")`\n- numeric -> `38;5` / `48;5` ANSI\n- `\"\"` -> default fg/bg reset\n\n## Runtime switching behavior\n\n### Initial theme (`initTheme`)\n\n`main.ts` initializes theme with settings:\n\n- `symbolPreset`\n- `colorBlindMode`\n- `theme.dark`\n- `theme.light`\n\nAuto theme slot selection uses terminal appearance in this order:\n\n1. terminal-reported OSC 11 background luminance, unless the macOS/Zellij fallback path is active\n2. `COLORFGBG` background index (`< 8` => dark, `>= 8` => light)\n3. macOS appearance fallback only for the known-broken macOS/Zellij OSC 11 path\n4. dark slot fallback\n\nCurrent defaults from settings schema:\n\n- `theme.dark = \"titanium\"`\n- `theme.light = \"light\"`\n- `symbolPreset = \"unicode\"`\n- `colorBlindMode = false`\n\n### Explicit switching (`setTheme`)\n\n- loads selected theme\n- updates global `theme` singleton\n- optionally starts watcher\n- triggers `onThemeChange` callback\n\nOn failure:\n\n- falls back to built-in `dark`\n- returns `{ success: false, error }`\n\n### Preview switching (`previewTheme`)\n\n- applies temporary preview theme to global `theme`\n- does **not** change persisted settings by itself\n- returns success/error without fallback replacement\n\nSettings UI uses this for live preview and restores prior theme on cancel.\n\n## Watchers and live reload\n\nWhen watcher is enabled (`setTheme(..., true)` / interactive init):\n\n- watches `<customThemesDir>/<currentTheme>.json` only when that file exists\n- built-ins are effectively not watched; built-in theme lookup also takes precedence over same-name custom files\n- matching file changes schedule a debounced reload; reload errors or temporary file absence keep the last successfully loaded theme\n- the watcher does not perform a delete/rename fallback; it waits for a future successful reload or explicit theme switch\n\nAuto mode also reevaluates dark/light slot mapping from terminal appearance changes, `SIGWINCH`, and the macOS fallback observer when active.\n\n## Color-blind mode behavior\n\n`colorBlindMode` changes only one token at runtime:\n\n- `toolDiffAdded` is HSV-adjusted (green shifted toward blue)\n- adjustment is applied only when resolved value is a hex string\n\nOther tokens are unchanged.\n\n## Where theme settings are persisted\n\nTheme-related settings are persisted by `Settings` to global config YAML:\n\n- path: `<agentDir>/config.yml`\n- default agent dir: `~/.omp/agent`\n- effective default file: `~/.omp/agent/config.yml`\n\nPersisted keys:\n\n- `theme.dark`\n- `theme.light`\n- `symbolPreset`\n- `colorBlindMode`\n\nLegacy migration exists: old flat `theme: \"name\"` is migrated to nested `theme.dark` or `theme.light` based on luminance detection.\n\n## Creating a custom theme (practical)\n\n1. Create file in custom themes dir, e.g. `~/.omp/agent/themes/my-theme.json`.\n2. Include `name`, optional `vars`, and **all required** `colors` tokens.\n3. Optionally include `symbols` and `export`.\n4. Select the theme in Settings (`Display -> Dark theme` or `Display -> Light theme`) depending on which auto slot you want.\n\nMinimal skeleton:\n\n```json\n{\n \"name\": \"my-theme\",\n \"vars\": {\n \"accent\": \"#7aa2f7\",\n \"muted\": 244\n },\n \"colors\": {\n \"accent\": \"accent\",\n \"border\": \"#4c566a\",\n \"borderAccent\": \"accent\",\n \"borderMuted\": \"muted\",\n \"success\": \"#9ece6a\",\n \"error\": \"#f7768e\",\n \"warning\": \"#e0af68\",\n \"muted\": \"muted\",\n \"dim\": 240,\n \"text\": \"\",\n \"thinkingText\": \"muted\",\n\n \"selectedBg\": \"#2a2f45\",\n \"userMessageBg\": \"#1f2335\",\n \"userMessageText\": \"\",\n \"customMessageBg\": \"#24283b\",\n \"customMessageText\": \"\",\n \"customMessageLabel\": \"accent\",\n \"toolPendingBg\": \"#1f2335\",\n \"toolSuccessBg\": \"#1f2d2a\",\n \"toolErrorBg\": \"#2d1f2a\",\n \"toolTitle\": \"\",\n \"toolOutput\": \"muted\",\n\n \"mdHeading\": \"accent\",\n \"mdLink\": \"accent\",\n \"mdLinkUrl\": \"muted\",\n \"mdCode\": \"#c0caf5\",\n \"mdCodeBlock\": \"#c0caf5\",\n \"mdCodeBlockBorder\": \"muted\",\n \"mdQuote\": \"muted\",\n \"mdQuoteBorder\": \"muted\",\n \"mdHr\": \"muted\",\n \"mdListBullet\": \"accent\",\n\n \"toolDiffAdded\": \"#9ece6a\",\n \"toolDiffRemoved\": \"#f7768e\",\n \"toolDiffContext\": \"muted\",\n\n \"syntaxComment\": \"#565f89\",\n \"syntaxKeyword\": \"#bb9af7\",\n \"syntaxFunction\": \"#7aa2f7\",\n \"syntaxVariable\": \"#c0caf5\",\n \"syntaxString\": \"#9ece6a\",\n \"syntaxNumber\": \"#ff9e64\",\n \"syntaxType\": \"#2ac3de\",\n \"syntaxOperator\": \"#89ddff\",\n \"syntaxPunctuation\": \"#9aa5ce\",\n\n \"thinkingOff\": 240,\n \"thinkingMinimal\": 244,\n \"thinkingLow\": \"#7aa2f7\",\n \"thinkingMedium\": \"#2ac3de\",\n \"thinkingHigh\": \"#bb9af7\",\n \"thinkingXhigh\": \"#f7768e\",\n\n \"bashMode\": \"#2ac3de\",\n \"pythonMode\": \"#bb9af7\",\n\n \"statusLineBg\": \"#16161e\",\n \"statusLineSep\": 240,\n \"statusLineModel\": \"#bb9af7\",\n \"statusLinePath\": \"#7aa2f7\",\n \"statusLineGitClean\": \"#9ece6a\",\n \"statusLineGitDirty\": \"#e0af68\",\n \"statusLineContext\": \"#2ac3de\",\n \"statusLineSpend\": \"#7dcfff\",\n \"statusLineStaged\": \"#9ece6a\",\n \"statusLineDirty\": \"#e0af68\",\n \"statusLineUntracked\": \"#f7768e\",\n \"statusLineOutput\": \"#c0caf5\",\n \"statusLineCost\": \"#ff9e64\",\n \"statusLineSubagents\": \"#bb9af7\"\n }\n}\n```\n\n## Testing custom themes\n\nUse this workflow:\n\n1. Start interactive mode (watcher enabled from startup).\n2. Open settings and preview theme values (live `previewTheme`).\n3. For custom theme files, edit the JSON while running and confirm auto-reload on save.\n4. Exercise critical surfaces:\n - markdown rendering\n - tool blocks (pending/success/error)\n - diff rendering (added/removed/context)\n - status line readability\n - thinking level border changes\n - bash/python mode border colors\n5. Validate both symbol presets if your theme depends on glyph width/appearance.\n\n## Real constraints and caveats\n\n- All `colors` tokens are required for custom themes.\n- `export` and `symbols` are optional.\n- `$schema` in theme JSON is informational; runtime validation is enforced by a Zod schema in code.\n- `setTheme` failure falls back to `dark`; `previewTheme` failure does not replace current theme.\n- File watcher reload errors or temporary missing files keep the current loaded theme until a successful reload or explicit theme switch.\n",
70
70
  "tools/ask.md": "# ask\n\n> Prompts the interactive user for one or more option-picker or free-form answers.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ask.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ask.md`\n- Key collaborators:\n - `packages/coding-agent/src/config/settings-schema.ts` — `ask.timeout` / `ask.notify` defaults\n - `packages/coding-agent/src/modes/theme/theme.ts` — checkbox and tree glyphs for TUI rendering\n - `packages/coding-agent/src/tui.ts` — status-line rendering\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `questions` | `Question[]` | Yes | One or more questions. Empty arrays are rejected by schema and also guarded at runtime. |\n\n### `Question`\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `id` | `string` | Yes | Stable identifier used in multi-question results. |\n| `question` | `string` | Yes | Prompt text shown to the user. |\n| `options` | `{ label: string }[]` | Yes | Option labels for the picker. The schema does not require a minimum length; the UI always appends `Other (type your own)`, and callers must not include it. |\n| `multi` | `boolean` | No | Enables multi-select mode. Default: `false`. |\n| `recommended` | `number` | No | Zero-based recommended option index. In single-select mode the label gets ` (Recommended)` appended in the UI. |\n\n## Outputs\n- Single-shot result.\n- `content[0].text` is plain text:\n - single question: `User selected: ...` and/or `User provided custom input: ...`\n - multiple questions: `User answers:` followed by one line per `id`\n- `details`:\n - single question: `{ question, options, multi, selectedOptions, customInput? }`\n - multiple questions: `{ results: QuestionResult[] }`, where each item includes `id`, `question`, `options`, `multi`, `selectedOptions`, and optional `customInput`\n- Cancellation and headless cases throw instead of returning a structured success result.\n\n## Flow\n1. `AskTool.createIf()` only registers the tool when `session.hasUI` is true; headless sessions never get it.\n2. `execute()` requires `context.ui`; if missing it aborts the context and throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n3. It reads `ask.timeout` from settings, converts seconds to milliseconds (`0` disables timeout), and disables timeout entirely while plan mode is enabled (`packages/coding-agent/src/tools/ask.ts`).\n4. If `ask.notify` is not `off`, it sends a terminal notification: `Waiting for input`.\n5. For each question, `askSingleQuestion()` drives either:\n - single-select list + optional editor for `Other`\n - multi-select checkbox loop + `Done selecting` sentinel + optional editor for `Other`\n6. In multi-question mode, left/right arrow handlers enable back/forward navigation between questions and preserve prior selections.\n7. If a timeout fires before any selection/custom input, the tool auto-selects the recommended option, or the first option when no valid `recommended` index exists.\n8. If the user cancels without timeout, `execute()` aborts the tool context and throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n9. On success it formats human-readable text plus structured `details`; the TUI renderer uses `details` for rich display.\n\n## Modes / Variants\n- Single question: returns flattened `details` fields for one question.\n- Multiple questions: returns `details.results[]` and allows back/forward navigation across questions.\n- Single-select: one option or custom input.\n- Multi-select: toggled checkbox list, `Done selecting` sentinel only when forward navigation is not active.\n\n## Side Effects\n- User-visible prompts / interactive UI\n - Opens a selection dialog via `context.ui.select(...)`.\n - Opens a text editor dialog via `context.ui.editor(...)` for `Other`.\n - Sends a terminal notification unless `ask.notify=off`.\n- Session state\n - Reads plan-mode state to disable timeouts.\n - Calls `context.abort()` on headless use or user cancellation.\n- Background work / cancellation\n - Wraps UI waits in `untilAborted(...)` so abort signals interrupt pending dialogs.\n\n## Limits & Caps\n- `questions` must contain at least 1 item (`askSchema` in `packages/coding-agent/src/tools/ask.ts`).\n- `ask.timeout` default is `0` seconds, which disables timeout (`packages/coding-agent/src/config/settings-schema.ts`). Configured non-zero values are seconds.\n- Prompt guidance says provide 2-5 options, but code only requires the `options` array field and does not enforce a minimum or maximum length (`packages/coding-agent/src/prompts/tools/ask.md`).\n- Timeout only applies to the option picker; once the user chooses `Other`, the editor has no timeout (`packages/coding-agent/src/prompts/tools/ask.md`).\n\n## Errors\n- Missing interactive UI: throws `ToolAbortError(\"Ask tool requires interactive mode\")`.\n- User cancels picker/editor without timeout: throws `ToolAbortError(\"Ask tool was cancelled by the user\")`.\n- Abort signal during input: converted to `ToolAbortError(\"Ask input was cancelled\")`.\n- Empty `questions` at runtime returns a text error payload instead of throwing: `Error: questions must not be empty`.\n\n## Notes\n- `recommended` is only a UI hint; invalid indexes are ignored.\n- In single-select mode the returned `selectedOptions` value strips the appended ` (Recommended)` suffix.\n- Multi-select results preserve selection order by `Set` insertion order, not original option order after arbitrary toggles.\n- Option labels and prompt text are returned verbatim in `details`; the tool does not interpret them beyond UI affordances like `Other` and ` (Recommended)`.\n",
71
71
  "tools/ast-edit.md": "# ast_edit\n\n> Preview and apply structural rewrites over source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-edit.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-edit.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native rewrite planning and file mutation\n - `crates/pi-ast/src/language/mod.rs` — language aliases and extension inference used by the native wrapper.\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/resolve.ts` — preview/apply queueing\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number diff references\n - `packages/hashline/src/format.ts` — stable hashline header formatting for preview anchors\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `ops` | `{ pat: string; out: string }[]` | Yes | One or more rewrite rules. `pat` must be non-empty. Duplicate `pat` values fail before native execution. Empty `out` deletes the matched node. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n\nShared AST pattern grammar and language catalog: see [`ast_grep`](./ast-grep.md#inputs).\n\n- `ast_edit` uses the same `$NAME`, `$_`, `$$$NAME`, and `$$$` metavariable semantics.\n- The tool prompt adds rewrite-specific constraints:\n - metavariable names must be uppercase and must stand for whole AST nodes,\n - captures from `pat` are substituted into `out`,\n - each rewrite is a 1:1 structural substitution; one capture cannot expand into multiple sibling nodes unless the grammar itself permits that expansion at that position.\n\n## Outputs\n- Single-shot preview result from `ast_edit` itself.\n- Model-facing `content` is one text block showing proposed edits, grouped by file for directory/multi-file runs.\n - Each change renders as two lines. Hashline mode uses `-LINE:before` / `+LINE:after` under a `¶PATH#TAG` header; plain mode uses `-LINE:COLUMN before` / `+LINE:COLUMN after`.\n - Only the first line of each `before`/`after` snippet is shown, truncated to 120 characters in the wrapper.\n - `Limit reached; narrow paths.` and formatted parse issues are appended when applicable.\n- If no rewrites match, text is `No replacements made` plus formatted parse issues when present.\n- `details` includes aggregate preview metadata:\n - `totalReplacements`, `filesTouched`, `filesSearched`, `applied`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileReplacements`, `displayContent`, `meta`\n- The tool always previews first (`applied: false` in the direct result). Actual file writes happen only later through `resolve(action: \"apply\", ...)`.\n- When preview produced replacements, `ast_edit` also queues a pending `resolve` action. Successful apply returns a separate `resolve` result, not another `ast_edit` result.\n\n## Flow\n1. `AstEditTool.execute()` validates each op in `packages/coding-agent/src/tools/ast-edit.ts`:\n - empty `pat` fails,\n - at least one op is required,\n - duplicate `pat` values fail,\n - ops are converted to a `Record<pattern, replacement>`.\n2. The wrapper reads `PI_MAX_AST_FILES` via `$envpos(..., 1000)` and uses that as the native `maxFiles` cap for both preview and apply.\n3. Path normalization, internal URL handling, missing-path partitioning, and multi-path resolution follow the same `path-utils.ts` flow as `ast_grep`.\n4. The wrapper stats the resolved base path to decide whether to render grouped directory output.\n5. `runAstEditOnce(...)` always runs native `astEdit(...)` with `dryRun: true` and `failOnParseError: false` on the first pass.\n6. Native `ast_edit` in `crates/pi-natives/src/ast.rs`:\n - normalizes the rewrite map and sorts rules by pattern string,\n - resolves strictness (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers a single language for the whole call unless `lang` was supplied,\n - compiles every rewrite pattern for that language,\n - parses each file, skips files with syntax-error trees, collects `replace_by(...)` edits for every match, enforces replacement and file caps, and returns textual before/after slices plus source ranges.\n7. The TS wrapper deduplicates parse errors, groups changes by file, and renders preview diff lines.\n8. If preview found replacements and `applied` is false, `queueResolveHandler(...)` registers a forced `resolve` action and injects a `resolve-reminder` steering message.\n9. On `resolve(action: \"apply\")`, the queued callback reruns the same rewrite set with `dryRun: false`, recomputes counts, and returns an error result if the live result no longer matches the preview (`stalePreview`). The current implementation compares replacement totals and per-file counts after the rerun; if the new run has already written different counts, the result is marked error.\n10. On a non-stale apply, the callback returns `Applied N replacements in M files.`; on discard, `resolve` returns a discard message without mutating files.\n\n## Modes / Variants\n- Single file: preview or apply against one file.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router resolves them to a backing file path.\n- Preview mode: always the direct `ast_edit` tool result.\n- Apply mode: only reachable through the queued `resolve` callback after a preview.\n- Hashline output mode vs plain line/column mode: controlled by `resolveFileDisplayMode()`.\n\n## Side Effects\n- Filesystem\n - Preview reads files and scans directories.\n - Apply rewrites files in place with `std::fs::write(...)`, but only when the computed output differs from the original source.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - Queues a one-shot forced `resolve` tool choice through `queueResolveHandler(...)`.\n - Adds a `resolve-reminder` steering message.\n- User-visible prompts / interactive UI\n - Direct `ast_edit` results are previews.\n - Follow-up apply/discard is exposed through the hidden `resolve` tool.\n- Background work / cancellation\n - Native preview/apply work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- File cap exposed by the wrapper: `PI_MAX_AST_FILES`, default `1000`, in `packages/coding-agent/src/tools/ast-edit.ts`.\n- Native `maxFiles` and `maxReplacements` are both clamped to at least `1` when provided in `crates/pi-natives/src/ast.rs`.\n- The wrapper never sets `maxReplacements`; native behavior therefore defaults to effectively unbounded replacements for a run.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` is deduplicated but not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No separate glob-expansion count cap exists. Candidate count is whatever the resolved path/glob expands to after gitignore filtering, then native `maxFiles` stops mutations after the configured number of touched files.\n- Preview text truncates each rendered `before` and `after` first line to 120 characters in `packages/coding-agent/src/tools/ast-edit.ts`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, duplicate rewrite patterns, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - inability to infer one language across all candidates when `lang` is absent,\n - unsupported explicit `lang`,\n - bad glob compilation or unreadable search roots,\n - overlapping computed edits (`Overlapping replacements detected; refine pattern to avoid ambiguous edits`),\n - out-of-bounds edit ranges or non-UTF-8 replacement text,\n - write failures during apply,\n - cancellation or timeout.\n- With `failOnParseError: false` (the wrapper always uses this), pattern compile failures and file parse failures become `parseErrors` instead of aborting the whole run.\n- If every rewrite pattern fails to compile, native `ast_edit` returns a successful zero-replacement result with `parseErrors` populated.\n- Files containing tree-sitter error nodes are skipped for rewriting; they do not get partial edits.\n- Apply can fail after a successful preview if the preview becomes stale. The resolve callback compares replacement totals and per-file counts and returns an error result rather than silently reporting success for a mismatched preview.\n\n## Notes\n- `ast_edit` does not expose the native `lang`, `strictness`, `selector`, `maxReplacements`, `failOnParseError`, or `timeoutMs` fields to the model. The runtime fixes the call shape to a preview-first, smart-strictness, best-effort parse mode.\n- Because the wrapper does not expose `lang`, mixed-language rewrites only succeed when every candidate infers to the same canonical language. This is stricter than `ast_grep`.\n- Idempotency is not enforced syntactically. A rewrite like `foo($A) -> foo($A)` previews zero changes because output equals input; a rewrite that keeps matching its own output may still produce replacements on repeated calls.\n- Rewrites are accumulated per file, then applied from the end of the file backward after an overlap check. Independent matches can coexist; overlapping matches abort the run.\n- Native rewrite rule order is by pattern-string sort, not by the original `ops` array order, because `normalize_rewrite_map(...)` sorts the `(pattern, rewrite)` pairs.\n- Preview/apply parity is validated by totals and per-file counts after the apply rerun, not by a byte-for-byte diff of every replacement payload.",
72
72
  "tools/ast-grep.md": "# ast_grep\n\n> Structural code search over supported source files via native ast-grep.\n\n## Source\n- Entry: `packages/coding-agent/src/tools/ast-grep.ts`\n- Model-facing prompt: `packages/coding-agent/src/prompts/tools/ast-grep.md`\n- Key collaborators:\n - `crates/pi-natives/src/ast.rs` — native scan, parse, match engine\n - `crates/pi-ast/src/language/mod.rs` — language aliases and extension inference used by the native wrapper.\n - `packages/coding-agent/src/tools/path-utils.ts` — path/glob parsing and multi-path resolution\n - `packages/coding-agent/src/tools/render-utils.ts` — parse-error dedupe and display caps\n - `packages/coding-agent/src/tools/match-line-format.ts` — hashline match rendering\n - `packages/coding-agent/src/utils/file-display-mode.ts` — hashline vs line-number output mode\n - `packages/natives/native/index.d.ts` — JS-visible native binding contract\n\n## Inputs\n\n| Field | Type | Required | Description |\n| --- | --- | --- | --- |\n| `pat` | `string` | Yes | Single AST pattern. The wrapper trims it and rejects empty strings. |\n| `paths` | `string[]` | Yes | One or more files, directories, globs, or internal URLs with backing files. Empty entries are rejected. Globs are forbidden for internal URLs. |\n| `skip` | `number` | No | Match offset. Defaults to `0`, then `Math.floor(...)`; negatives and non-finite values fail. |\n\nPattern grammar and language support exposed to the model:\n- `$NAME` — capture one AST node.\n- `$_` — match one AST node without binding.\n- `$$$NAME` — capture zero or more AST nodes; ast-grep stops lazily at the next satisfiable node.\n- `$$$` — match zero or more AST nodes without binding.\n- Metavariable names must be uppercase and must stand for whole AST nodes, not partial tokens or string fragments.\n- Reusing the same metavariable requires identical code at each occurrence.\n- Patterns must parse as one valid AST node for the inferred target language.\n- Supported canonical languages come from `SupportLang::all_langs()` in `crates/pi-ast/src/language/mod.rs`: `astro`, `bash`, `c`, `cmake`, `cpp`, `csharp`, `dart`, `clojure`, `css`, `diff`, `dockerfile`, `elixir`, `erlang`, `go`, `graphql`, `haskell`, `hcl`, `html`, `ini`, `java`, `javascript`, `json`, `just`, `julia`, `kotlin`, `lua`, `make`, `markdown`, `nix`, `objc`, `ocaml`, `odin`, `perl`, `php`, `powershell`, `protobuf`, `python`, `r`, `regex`, `ruby`, `rust`, `scala`, `solidity`, `sql`, `starlark`, `svelte`, `swift`, `toml`, `tlaplus`, `tsx`, `typescript`, `verilog`, `vue`, `xml`, `yaml`, `zig`.\n\n## Outputs\n- Single-shot tool result.\n- Model-facing `content` is one text block:\n - grouped by file for directory/multi-file searches,\n - match lines rendered under `¶PATH#HASH` as `*LINE:text` in hashline mode or `*LINE|text` otherwise,\n - continuation lines for multi-line matches rendered with a leading space,\n - optional `meta: NAME=value` lines when ast-grep captured metavariables.\n- If no matches are found, text is `No matches found` or `No matches found. Parse issues mean the query may be mis-scoped; narrow paths before concluding absence.` plus formatted parse issues.\n- If the wrapper truncates visible results, the text ends with `Result limit reached; narrow paths or increase limit.`\n- `details` includes counts and metadata, not full match payloads:\n - `matchCount`, `fileCount`, `filesSearched`, `limitReached`\n - optional `parseErrors`, `scopePath`, `files`, `fileMatches`, `displayContent`, `meta`\n- Native ranges (`byteStart`, `byteEnd`, `startLine`, `startColumn`, `endLine`, `endColumn`) exist only inside the native result; the wrapper does not emit them directly to the model.\n\n## Flow\n1. `AstGrepTool.execute()` validates `pat`, normalizes `skip`, and normalizes each `paths` entry in `packages/coding-agent/src/tools/ast-grep.ts`.\n2. Internal URLs are resolved through `session.internalRouter`; entries without `sourcePath` fail, and internal-URL globs fail early.\n3. For multiple path inputs, `partitionExistingPaths()` drops missing bases only when at least one surviving base remains; if all bases are missing the call fails.\n4. `parseSearchPath()` splits a single path into `basePath` plus optional `glob`. `resolveExplicitSearchPaths()` collapses multiple inputs into a common base plus a brace-union glob, or separate `targets` when the only common base is a filesystem root.\n5. The wrapper stats the resolved base path to decide whether output should be grouped as a directory result.\n6. Execution dispatches to either:\n - one native `astGrep(...)` call for a single resolved base, or\n - `runMultiTargetAstGrep(...)`, which calls the native binding once per target, rebases paths back to the common root, sorts globally, then applies `skip` and the wrapper limit.\n7. Native `ast_grep` in `crates/pi-natives/src/ast.rs`:\n - normalizes and deduplicates patterns,\n - resolves a `MatchStrictness` (`smart` by default),\n - collects candidate files from a file or gitignore-aware directory scan,\n - infers language per candidate from extension unless `lang` was provided,\n - compiles the pattern separately for each language present,\n - reads each file, reports syntax-error trees as parse issues, runs `find_all`, and optionally captures metavariable bindings.\n8. Native results are sorted by path and source position, then paged by `offset`/`limit`.\n9. The TS wrapper normalizes parse-error strings, deduplicates them, groups matches by formatted path, renders anchor lines, appends limit/parse notices, and returns `toolResult(...).text(...).done()`.\n\n## Modes / Variants\n- Single file: native path is the file; output is a flat list of rendered match lines.\n- Directory + optional glob: native scan walks the directory, then filters by compiled glob.\n- Multiple explicit paths/globs: wrapper unions them into one synthetic scope or runs per-target native calls when paths only meet at root.\n- Internal URL inputs: only supported when the router can resolve them to a backing file path.\n- Hashline output mode vs plain line-number mode: controlled by `resolveFileDisplayMode()`; hashline mode requires the edit tool and non-raw, mutable sources.\n\n## Side Effects\n- Filesystem\n - Stats input paths in the TS wrapper.\n - Native code reads matched files and scans directories through `fs_cache`.\n- Session state (transcript, memory, jobs, checkpoints, registries)\n - None beyond normal tool transcript/result metadata.\n- Background work / cancellation\n - Native work runs on a blocking worker via `task::blocking(...)`.\n - Cancellation and optional native timeout are cooperative through `CancelToken::heartbeat()`.\n\n## Limits & Caps\n- Wrapper-visible result cap: `DEFAULT_AST_LIMIT = 50` in `packages/coding-agent/src/tools/ast-grep.ts`.\n - Single-target calls rely on the native default limit of 50 in `crates/pi-natives/src/ast.rs`.\n - Multi-target calls fetch `skip + 50 + 1` matches per target, then re-page after global sort.\n- Native `limit` is clamped to at least `1`; omitted `offset` defaults to `0` in `crates/pi-natives/src/ast.rs`.\n- Parse issues are rendered with at most `PARSE_ERRORS_LIMIT = 20` lines in `packages/coding-agent/src/tools/render-utils.ts`; `details.parseErrors` itself is only deduplicated, not capped.\n- Directory scans use `include_hidden: true`, `use_gitignore: true`, and skip `node_modules` unless the glob text explicitly mentions `node_modules` in `crates/pi-natives/src/ast.rs`.\n- No hard file-count cap is applied by the wrapper or native `ast_grep`; candidate count is whatever the resolved path/glob expands to after gitignore filtering.\n- Multi-path union deduplicates identical path inputs before resolution in `resolveExplicitSearchPaths()`.\n\n## Errors\n- TS wrapper throws `ToolError` for empty patterns, invalid `skip`, empty path entries, unsupported internal-URL globs, internal URLs without `sourcePath`, and missing paths.\n- Native code returns hard errors for:\n - unsupported explicit `lang`,\n - inability to infer language for a candidate when `lang` is not supplied,\n - invalid AST pattern compilation for every relevant language,\n - unreadable search roots or bad glob compilation,\n - cancellation (`Aborted: Signal`) or timeout (`Aborted: Timeout`).\n- File-level parse failures and many per-language pattern compile failures are non-fatal: they are accumulated in `parseErrors` and surfaced alongside successful matches.\n- `no matches` is not an error, even when parse issues were recorded.\n\n## Notes\n- `pat` is always wrapped into a one-element `patterns` array by the TS tool; the model cannot send multiple patterns through `ast_grep` even though the native binding supports it.\n- `ast_grep` can search mixed-language trees because native compilation happens per discovered language, but the prompt still tells the model to keep calls single-language when possible to reduce parse noise.\n- Pattern compilation is per language present in the candidate set. One pattern can succeed for some languages and generate per-file parse errors for others in the same run.\n- A file with tree-sitter error nodes still gets searched; the syntax warning is additive, not a skip condition.\n- For glob semantics, `*.ts` matches only direct children while `**/*.ts` recurses; this is covered by native tests in `crates/pi-natives/src/ast.rs`.\n- Output anchors are intended for follow-up tools, but the exact anchor format depends on session edit mode (`hashline` vs line-number mode).",
@@ -99,6 +99,6 @@ export const EMBEDDED_DOCS: Readonly<Record<string, string>> = {
99
99
  "tree.md": "# `/tree` Command Reference\n\n`/tree` opens the interactive **Session Tree** navigator. It lets you jump to any entry in the current session file and continue from that point.\n\nThis is an in-file leaf move, not a new session export.\n\n## What `/tree` does\n\n- Builds a tree from current session entries (`SessionManager.getTree()`)\n- Opens `TreeSelectorComponent` with keyboard navigation, filters, and search\n- On selection, calls `AgentSession.navigateTree(targetId, { summarize, customInstructions })`\n- Rebuilds visible chat from the new leaf path\n- Optionally prefills editor text when selecting a user/custom message\n\nPrimary implementation:\n\n- `src/modes/controllers/input-controller.ts` (`/tree`, keybinding wiring, double-escape behavior)\n- `src/modes/controllers/selector-controller.ts` (tree UI launch + summary prompt flow)\n- `src/modes/components/tree-selector.ts` (navigation, filters, search, labels, rendering)\n- `src/session/agent-session.ts` (`navigateTree` leaf switching + optional summary)\n- `src/session/session-manager.ts` (`getTree`, `branch`, `branchWithSummary`, `resetLeaf`, label persistence)\n\n## How to open it\n\nAny of the following opens the same selector:\n\n- `/tree`\n- configured keybinding action `tree`\n- double-escape on empty editor when `doubleEscapeAction = \"tree\"` (default)\n- `/branch` when `doubleEscapeAction = \"tree\"` (routes to tree selector instead of user-only branch picker)\n\n## Tree UI model\n\nThe tree is rendered from session entry parent pointers (`id` / `parentId`).\n\n- Children are sorted by timestamp ascending (older first, newer lower)\n- Active branch (path from root to current leaf) is marked with a bullet\n- Labels (if present) render as `[label]` before node text\n- If multiple roots exist (orphaned/broken parent chains), they are shown under a virtual branching root\n\n```text\nExample tree view (active path marked with •):\n\n├─ user: \"Start task\"\n│ └─ assistant: \"Plan\"\n│ ├─ • user: \"Try approach A\"\n│ │ └─ • assistant: \"A result\"\n│ │ └─ • [milestone] user: \"Continue A\"\n│ └─ user: \"Try approach B\"\n│ └─ assistant: \"B result\"\n```\n\nThe selector recenters around current selection and shows up to:\n\n- `max(5, floor(terminalHeight / 2))` rows\n\n## Keybindings inside tree selector\n\n- `Up` / `Down`: move selection (wraps)\n- `Left` / `Right`: page up / page down\n- `Enter`: select node\n- `Esc`: clear search if active; otherwise close selector\n- `Ctrl+C`: close selector\n- `Type`: append to search query\n- `Backspace`: delete search character\n- `Shift+L`: edit/clear label on selected entry\n- `Ctrl+O`: cycle filter forward\n- `Shift+Ctrl+O`: cycle filter backward\n- `Alt+D/T/U/L/A`: jump directly to specific filter mode\n\n## Filters and search semantics\n\nFilter modes (`TreeList`):\n\n1. `default`\n2. `no-tools`\n3. `user-only`\n4. `labeled-only`\n5. `all`\n\n### `default`\n\nShows conversational nodes plus any entry types not explicitly suppressed. It hides these setting/bookkeeping entry types:\n\n- `label`\n- `custom`\n- `model_change`\n- `thinking_level_change`\n\nOther internal entry types that are not rendered specially may appear as blank rows in current code.\n\n### `no-tools`\n\nSame as `default`, plus hides `toolResult` messages.\n\n### `user-only`\n\nOnly `message` entries where role is `user`.\n\n### `labeled-only`\n\nOnly entries that currently resolve to a label.\n\n### `all`\n\nEverything in the session tree, including bookkeeping/custom entries.\n\n### Tool-only assistant node behavior\n\nAssistant messages that contain **only tool calls** (no text) are hidden by default in all filtered views unless:\n\n- message is error/aborted (`stopReason` not `stop`/`toolUse`), or\n- it is the current leaf (always kept visible)\n\n### Search behavior\n\n- Query is tokenized by spaces\n- Matching is case-insensitive\n- All tokens must match (AND semantics)\n- Searchable text includes label, role, and type-specific content (message text, branch summary text, custom type, tool command snippets, etc.)\n\n## Selection outcomes (important)\n\n`navigateTree` computes new leaf behavior from selected entry type:\n\n### Selecting `user` message\n\n- New leaf becomes selected entry’s `parentId`\n- If parent is `null` (root user message), leaf resets to root (`resetLeaf()`)\n- Selected message text is copied to editor for editing/resubmit\n\n### Selecting `custom_message`\n\n- Same leaf rule as user messages (`parentId`)\n- Text content is extracted and copied to editor\n\n### Selecting non-user node (assistant/tool/summary/compaction/custom bookkeeping/etc.)\n\n- New leaf becomes selected node id\n- Editor is not prefilled\n\n### Selecting current leaf\n\n- No-op; selector closes with “Already at this point”\n\n```text\nSelection decision (simplified):\n\nselected node\n │\n ├─ is current leaf? ── yes ──> close selector (no-op)\n │\n ├─ is user/custom_message? ── yes ──> leaf := parentId (or resetLeaf for root)\n │ + prefill editor text\n │\n └─ otherwise ──> leaf := selected node id\n + no editor prefill\n```\n\n## Summary-on-switch flow\n\nSummary prompt is controlled by `branchSummary.enabled` (default: `false`).\n\nWhen enabled, after picking a node the UI asks:\n\n- `No summary`\n- `Summarize`\n- `Summarize with custom prompt`\n\nFlow details:\n\n- Escape in summary prompt reopens tree selector\n- Custom prompt cancellation returns to summary choice loop\n- During summarization, UI shows loader and binds `Esc` to `abortBranchSummary()`\n- If summarization aborts, tree selector reopens and no move is applied\n\n`navigateTree` internals:\n\n- Collects abandoned-branch entries from old leaf to common ancestor\n- Emits `session_before_tree` (extensions can cancel or inject summary)\n- Uses default summarizer only if requested and needed\n- Applies move with:\n - `branchWithSummary(...)` when summary exists\n - `branch(newLeafId)` for non-root move without summary\n - `resetLeaf()` for root move without summary\n- Replaces agent conversation with rebuilt session context\n- Emits `session_tree`\n\nNote: if user requests summary but there is nothing to summarize, navigation proceeds without creating a summary entry.\n\n## Labels\n\nLabel edits in tree UI call `appendLabelChange(targetId, label)`.\n\n- non-empty label sets/updates resolved label\n- empty label clears it\n- labels are stored as append-only `label` entries\n- tree nodes display resolved label state, not raw label-entry history\n\n## `/tree` vs adjacent operations\n\n| Operation | Scope | Result |\n| --------- | ------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| `/tree` | Current session file | Moves leaf to selected point (same file) |\n| `/branch` | Usually current session file -> new session file | By default branches from selected **user** message into a new session file; if `doubleEscapeAction = \"tree\"`, `/branch` opens tree navigation UI instead |\n| `/fork` | Whole current session | Duplicates session into a new persisted session file |\n| `/resume` | Session list | Switches to another session file |\n\nKey distinction: `/tree` is a navigation/repositioning tool inside one session file. `/branch`, `/fork`, and `/resume` all change session-file context.\n\n## Operator workflows\n\n### Re-run from an earlier user prompt without losing current branch\n\n1. `/tree`\n2. search/select earlier user message\n3. choose `No summary` (or summarize if needed)\n4. edit prefilled text in editor\n5. submit\n\nEffect: new branch grows from selected point within same session file.\n\n### Leave current branch with context breadcrumb\n\n1. enable `branchSummary.enabled`\n2. `/tree` and select target node\n3. choose `Summarize` (or custom prompt)\n\nEffect: a `branch_summary` entry is appended at the target position before continuing.\n\n### Investigate hidden bookkeeping entries\n\n1. `/tree`\n2. press `Alt+A` (all)\n3. search for `model`, `thinking`, `custom`, or labels\n\nEffect: inspect full internal timeline, not just conversational nodes.\n\n### Bookmark pivot points for later jumps\n\n1. `/tree`\n2. move to entry\n3. `Shift+L` and set label\n4. later use `Alt+L` (`labeled-only`) to jump quickly\n\nEffect: fast navigation among durable branch landmarks.\n",
100
100
  "ttsr-injection-lifecycle.md": "# TTSR Injection Lifecycle\n\nThis document covers the current Time Traveling Stream Rules (TTSR) runtime path from rule discovery to stream interruption, retry injection, extension notifications, and session-state handling.\n\n## Implementation files\n\n- [`../src/sdk.ts`](../packages/coding-agent/src/sdk.ts)\n- [`../src/export/ttsr.ts`](../packages/coding-agent/src/export/ttsr.ts)\n- [`../src/session/agent-session.ts`](../packages/coding-agent/src/session/agent-session.ts)\n- [`../src/session/session-manager.ts`](../packages/coding-agent/src/session/session-manager.ts)\n- [`../src/prompts/system/ttsr-interrupt.md`](../packages/coding-agent/src/prompts/system/ttsr-interrupt.md)\n- [`../src/capability/index.ts`](../packages/coding-agent/src/capability/index.ts)\n- [`../src/extensibility/extensions/types.ts`](../packages/coding-agent/src/extensibility/extensions/types.ts)\n- [`../src/extensibility/hooks/types.ts`](../packages/coding-agent/src/extensibility/hooks/types.ts)\n- [`../src/extensibility/custom-tools/types.ts`](../packages/coding-agent/src/extensibility/custom-tools/types.ts)\n- [`../src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n\n## 1. Discovery feed and rule registration\n\nAt session creation, `createAgentSession()` loads discovered rules, constructs a `TtsrManager`, and buckets rules through `bucketRules(...)`:\n\n```ts\nconst ttsrSettings = settings.getGroup(\"ttsr\");\nconst ttsrManager = new TtsrManager(ttsrSettings);\nconst rulesResult = await loadCapability<Rule>(ruleCapability.id, { cwd });\nconst { rulebookRules, alwaysApplyRules } = bucketRules(\n rulesResult.items,\n ttsrManager,\n {\n builtinRules: ttsrSettings.builtinRules,\n disabledRules: ttsrSettings.disabledRules,\n },\n);\n```\n\n`bucketRules(...)` drops names listed in `ttsr.disabledRules`, drops embedded `builtin-defaults` rules when `ttsr.builtinRules === false`, registers accepted TTSR rules, and then routes the remaining rules to always-apply/rulebook buckets.\n\n### Pre-registration dedupe behavior\n\n`loadCapability(\"rules\")` deduplicates by `rule.name` with first-wins semantics (higher provider priority first). Shadowed duplicates are removed before TTSR registration.\n\n### `TtsrManager.addRule()` behavior\n\nRegistration is skipped when:\n\n- `rule.condition` is absent or all condition regexes fail to compile\n- a rule with the same `rule.name` was already registered in this manager\n- the rule scope excludes all monitored streams\n\nInvalid regex conditions and unreachable scopes are logged as warnings and ignored; session startup continues. If a TTSR rule defines `globs`, those globs are compiled as a global file-path gate for matching.\n\n### Setting caveat\n\n`TtsrSettings.enabled` is loaded into the manager but is not currently checked in runtime gating. If TTSR rules exist, matching still runs.\n\n## 2. Streaming monitor lifecycle\n\nTTSR detection runs inside `AgentSession.#handleAgentEvent`.\n\n### Turn start\n\nOn `turn_start`, the stream buffer is reset:\n\n- `ttsrManager.resetBuffer()`\n\n### During stream (`message_update`)\n\nWhen assistant updates arrive and rules exist:\n\n- monitor `text_delta`, `thinking_delta`, and `toolcall_delta`\n- append delta into a source/tool scoped manager buffer\n- call `checkDelta(delta, matchContext)`\n\n`checkDelta()` iterates registered rules and returns all matching rules that pass scope, global path-glob, condition, and repeat policy checks.\n\n## 3. Trigger decision and immediate abort path\n\nWhen one or more rules match and at least one matched rule allows interruption:\n\n1. Matched rules are deduplicated into `#pendingTtsrInjections`.\n2. `#ttsrAbortPending = true` and a TTSR resume gate is created.\n3. `agent.abort()` is called immediately.\n4. `ttsr_triggered` event is emitted asynchronously (fire-and-forget).\n5. retry work is scheduled via the post-prompt task scheduler with a 50ms delay.\n\nAbort is not blocked on extension callbacks.\n\n## 4. Retry scheduling, context mode, and reminder injection\n\nAfter the 50ms timeout:\n\n1. `#ttsrAbortPending = false`\n2. read `ttsrManager.getSettings().contextMode`\n3. if `contextMode === \"discard\"`, drop the targeted partial assistant output with `agent.replaceMessages(...slice(0, targetAssistantIndex))`\n4. build injection content from pending rules using `ttsr-interrupt.md` template\n5. append and persist a hidden `custom_message`/runtime custom message with `customType: \"ttsr-injection\"` and `details.rules`\n6. mark those rule names injected, persist a `ttsr_injection` entry, and call `agent.continue()` to retry generation\n\nTemplate payload is:\n\n```xml\n<system-interrupt reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n...\n{{content}}\n</system-interrupt>\n```\n\nPending injections are cleared after content generation.\n\n### `contextMode` behavior on partial output\n\n- `discard`: partial/aborted assistant message is removed before retry.\n- `keep`: partial assistant output remains in conversation state; reminder is appended after it.\n\n### Non-interrupting matches\n\nNon-interrupting matches split by `matchContext.source`:\n\n- **`source === \"tool\"` (tool-source match).** The rule is bucketed into `#perToolTtsrInjections`, keyed by the matched tool call's `id`. There is **no** deferred follow-up turn and the stream is not aborted. When the tool actually produces a result, the `afterToolCall` hook prepends a rendered `ttsr-tool-reminder.md` block to `ctx.result.content` (a single `text` block inserted ahead of the tool's own content), and persists a `ttsr_injection` entry with the consumed rule names. The template payload is:\n\n ```xml\n <system-reminder reason=\"rule_violation\" rule=\"{{name}}\" path=\"{{path}}\">\n ...\n {{content}}\n </system-reminder>\n ```\n\n- **`source === \"text\"` / `\"thinking\"` (prose-source match).** Behavior is unchanged: the rule is queued in `#pendingTtsrInjections` and, after a successful non-error, non-aborted assistant message, `AgentSession` injects the hidden `ttsr-injection` custom message as a follow-up and schedules continuation.\n\nWithin a single matching batch, each rule is attached to exactly one sibling tool call — if multiple sibling tool calls would satisfy the same rule, deduplication picks one and the others are left untouched. Multiple distinct rules can still fold onto the same tool call.\n\n#### Implications for tool authors and transcript readers\n\n- The tool's own `toolResult` content is preserved verbatim; the reminder is **prepended** as an additional leading text block. Renderers that assume `content[0]` is the tool's primary output must scan past any block whose text begins with `<system-reminder reason=\"rule_violation\"` (or filter on the wrapper tag) to find the real payload.\n- The reminder is in-band on the tool result, not a separate `custom_message`/`ttsr-injection` entry. Transcript readers looking for non-interrupting TTSR activity on tool-source rules MUST inspect tool results (and the persisted `ttsr_injection` entry list), not just synthetic injection entries.\n- A single tool result may carry reminders for several rules concatenated with a blank line between rendered templates.\n- If the assistant message ends with `stopReason === \"aborted\"` or `\"error\"` before the matched tools run, the pending per-tool buckets are cleared — those rules are **not** persisted as injected and remain eligible to re-trigger on a future turn (subject to repeat policy).\n\n## 5. Repeat policy and gap logic\n\n`TtsrManager` tracks `#messageCount` and per-rule `lastInjectedAt`.\n\n### `repeatMode: \"once\"`\n\nA rule can trigger only once after it has an injection record.\n\n### `repeatMode: \"after-gap\"`\n\nA rule can re-trigger only when:\n\n- `messageCount - lastInjectedAt >= repeatGap`\n\n`messageCount` increments on `turn_end`, so gap is measured in completed turns, not stream chunks.\n\n## 6. Event emission and extension/hook surfaces\n\n### Session event\n\n`AgentSessionEvent` includes:\n\n```ts\n{ type: \"ttsr_triggered\"; rules: Rule[] }\n```\n\n### Extension runner\n\n`#emitSessionEvent()` routes the event to:\n\n- extension listeners (`ExtensionRunner.emit({ type: \"ttsr_triggered\", rules })`)\n- local session subscribers\n\n### Hook and custom-tool typing\n\n- extension API exposes `on(\"ttsr_triggered\", ...)`\n- hook API exposes `on(\"ttsr_triggered\", ...)`\n- custom tools receive `onSession({ reason: \"ttsr_triggered\", rules })`\n\n### Interactive-mode rendering difference\n\nInteractive mode uses `session.isTtsrAbortPending` to suppress showing the aborted assistant stop reason as a visible failure during TTSR interruption, and renders a `TtsrNotificationComponent` when the event arrives.\n\n## 7. Persistence and resume state (current implementation)\n\n`SessionManager` persists injected-rule state:\n\n- entry type: `ttsr_injection`\n- append API: `appendTtsrInjection(ruleNames)`\n- query API: `getInjectedTtsrRules()`\n- context reconstruction includes `SessionContext.injectedTtsrRules`\n\n`TtsrManager` supports restoration via `restoreInjected(ruleNames)`.\n\n### Current wiring status\n\nIn the current runtime path:\n\n- interrupted injections append a hidden `custom_message` with `customType: \"ttsr-injection\"` and append a `ttsr_injection` entry via `appendTtsrInjection(...)`\n- deferred non-interrupting prose-source injections are marked/persisted when their queued custom message reaches `message_end`\n- non-interrupting tool-source injections are marked at match time and persisted via `appendTtsrInjection(...)` from the `afterToolCall` hook when the matched tool's result is produced\n- `createAgentSession()` restores `existingSession.injectedTtsrRules` into `ttsrManager`\n\nNet effect: injected-rule suppression is persisted/restored across session reload/resume for the current branch path.\n\n## 8. Race boundaries and ordering guarantees\n\n### Abort vs retry callback\n\n- abort is synchronous from TTSR handler perspective (`agent.abort()` called immediately)\n- retry is deferred by timer (`50ms`)\n- extension notification is asynchronous and intentionally not awaited before abort/retry scheduling\n\n### Multiple matches in same stream window\n\n`checkDelta()` returns all currently matching eligible rules for that scoped buffer. Pending injections are deduplicated by rule name before injection.\n\n### Between abort and continue\n\nDuring the timer window, state can change (user interruption, mode actions, additional events). The retry call is best-effort: `agent.continue().catch(() => {})` swallows follow-up errors.\n\n## 9. Edge cases summary\n\n- Invalid `condition` regex: skipped with warning; other conditions/rules continue.\n- Duplicate rule names at capability layer: lower-priority duplicates are shadowed before registration.\n- Duplicate names at manager layer: second registration is ignored.\n- `ttsr.disabledRules`: listed names are dropped before TTSR registration and are not surfaced through always-apply/rulebook buckets.\n- `ttsr.builtinRules: false`: embedded `builtin-defaults` rules are dropped before TTSR registration; user/project rules still load.\n- `globs` on a TTSR rule require the stream match context to include at least one matching file path.\n- `contextMode: \"keep\"`: partial violating output can remain in context before reminder retry.\n- `interruptMode: \"never\"`: prose-source matches queue a deferred hidden injection after a successful assistant message; tool-source matches fold an in-band `<system-reminder>` into the matched tool call's `toolResult` content via the `afterToolCall` hook (no mid-stream abort, no separate follow-up turn).\n- Tool-source non-interrupting buckets are cleared when the parent assistant message ends with `stopReason === \"aborted\"` or `\"error\"`, so rules whose target tool never produced a result remain eligible to re-trigger.\n- Repeat-after-gap depends on turn count increments at `turn_end`; mid-turn chunks do not advance gap counters.\n",
101
101
  "tui-core-renderer.md": "# TUI core renderer — invariants & failure modes\n\nWhat you are dealing with before you touch the rendering engine. This is the\ncompanion to [`tui-runtime-internals.md`](./tui-runtime-internals.md): that doc\nmaps the *flow* (input → component tree → render); this doc explains what\n**does not work, why it keeps breaking, and the invariants you must not\nviolate**. Scope is the core engine only:\n\n- [`packages/tui/src/tui.ts`](../packages/tui/src/tui.ts) — render planner, intent emitters, native-scrollback bookkeeping, cursor placement.\n- [`packages/tui/src/terminal.ts`](../packages/tui/src/terminal.ts) — `ProcessTerminal`, capability probes, private-CSI reassembly.\n- [`packages/tui/src/terminal-capabilities.ts`](../packages/tui/src/terminal-capabilities.ts) — `TERMINAL` profile, ED3 risk / sync-output / DECCARA / image detection.\n- [`packages/tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts) — escape-sequence reassembly.\n- [`packages/tui/src/utils.ts`](../packages/tui/src/utils.ts) — width/slice/wrap (the width model).\n- [`packages/tui/src/kitty-graphics.ts`](../packages/tui/src/kitty-graphics.ts) + [`components/image.ts`](../packages/tui/src/components/image.ts) — inline images.\n- [`packages/tui/src/deccara.ts`](../packages/tui/src/deccara.ts) — rectangular-fill optimizer.\n\nApplication-layer renderers (transcript, tool calls, session tree, editor,\nwidgets) are **out of scope** — they live in `packages/coding-agent`.\n\n---\n\n## 1. The one thing to understand first\n\n> **The renderer cannot observe the terminal's scroll position on most hosts it\n> runs on.** Every decision about rewriting native scrollback is therefore a\n> *guess*, and the guess has two opposite failure modes that cannot both be\n> avoided by a single policy.\n\nWe keep our transcript on the **normal screen**. We deliberately have not moved\nthe engine to the alternate screen: alt-screen would make the terminal handle\nviewport isolation, but the transcript/resume affordances would disappear with\nthe alternate buffer. Keeping the normal screen means\n*we* own native scrollback, which means we must decide, per frame, whether it is\nsafe to rebuild it. To rebuild history we emit xterm **ED3** (`CSI 3 J`, erase\nsaved lines). Deciding when ED3 is safe requires knowing whether the user has\nscrolled up — and we usually can't:\n\n- **ConPTY hosts** (Windows Terminal, Tabby, Hyper, VS Code, conhost): the\n pseudo-console buffer is pinned to the visible grid, so any \"am I at the\n bottom?\" console query answers \"yes\" even when the reader scrolled up. The\n probe *lies*.\n- **POSIX terminals**: there is no scroll-position API at all. The probe is\n *absent*.\n\nSo `Terminal.isNativeViewportAtBottom()` returns `true` / `false` / **`undefined`**,\nand `undefined` (\"unknown\") is the common case. The whole renderer is built\naround not trusting `undefined`.\n\n### The two-way bind\n\n| If you guess… | …and you're wrong | Symptom |\n|---|---|---|\n| **Eager** (rebuild now → emit `CSI 3 J`) | reader was scrolled up | **YANK** to top + **FLASH** on terminals that snap scroll on ED3 |\n| **Defer** (emit nothing, reconcile later) | viewport really was at the bottom | **CORRUPTION** (stale/duplicated rows) + **invisible-until-resize** |\n\nYank, flash, and buffer corruption are **the same bug wearing three masks.**\nHistorically, every fix that suppressed one mask for one terminal class\nre-enabled the opposite mask for a neighbouring class, and the follow-on\ncomplaint landed within a day. If you \"fix flashing\" by making rebuilds more\neager, you will reintroduce yank. If you \"fix yank\" by deferring more, you will\nreintroduce corruption / invisibility. **Do not move this lever without the\nfidelity harness (§9) green.**\n\n---\n\n## 2. The render-intent planner (what you are editing)\n\n`#doRender` is split into a **planner** (`#planRender`) that classifies a frame\ninto exactly one `RenderIntent`, and one `#emit*` method per intent that owns\nthe bytes written and the state update. All state flows through a single\n`#commit` checkpoint at the end of every emitter. The intent union\n(`tui.ts`, search `type RenderIntent`):\n\n| Intent | Emits | When |\n|---|---|---|\n| `noop` | cursor only | nothing visible changed |\n| `initial` | clear viewport, paint transcript, **keep** prior shell scrollback | first paint after `start()` |\n| `sessionReplace` | clear viewport **+ ED3** (outside multiplexers) | caller forced `{ clearScrollback: true }` (switch/branch/reload/resume) |\n| `historyRebuild` | clear viewport **+ ED3** (outside multiplexers) | geometry change rewrapped history, or a proven-at-tail rebuild |\n| `overlayRebuild` | rebuild viewport with overlay composite | overlay visibility changed |\n| `liveRegionPinned` | relative moves + per-row rewrite/suffix-clear + `\\r\\n` | foreground streaming on an ED3-risk host, commit-as-you-go |\n| `viewportRepaint` | rewrite the visible viewport in place (optional `appendFrom` tail first) | safe non-destructive repaint |\n| `deferredShrink` | padded viewport repaint, history left dirty | bottom-anchored shrink, viewport unobservable |\n| `deferredMutation` | **zero bytes**, history left dirty | row-reindexing edit while possibly scrolled |\n| `shrink` / `diff` | trailing-row clear / changed-line diff | ordinary in-place updates |\n\n**ED3 (`CSI 3 J`) is emitted in exactly one place** — `#emitFullPaint` when\n`clearScrollback: true` (`\\x1b[2J\\x1b[H\\x1b[3J`). The ordinary clear is\n**non-destructive**: `\\x1b[22J` (copy-screen-to-scrollback, only when\n`TERMINAL.supportsScreenToScrollback`) then `\\x1b[2J\\x1b[H`, **no `3J`**. ED3 is\nreached only by `sessionReplace`/`historyRebuild`/`overlayRebuild`, and those\nsuppress the scrollback clear inside multiplexers (`isMultiplexerSession()` =\n`TMUX || STY || ZELLIJ`).\n\n### The predicate gates\n\nThree private predicates encode the guessing policy. Do not \"simplify\" them —\neach branch is load-bearing:\n\n- `#canReplayNativeScrollbackAtCheckpoint(atBottom)` → `atBottom === true`. A\n rebuild at a **keystroke checkpoint** (prompt submit) is allowed only with a\n *positive* at-tail proof. A prompt submit is **no longer** treated as implicit\n proof for an unobservable host.\n- `#canRebuildNativeScrollbackLive(atBottom, allowUnknown)` → `true` iff\n `atBottom === true`, **or** (`atBottom === undefined && allowUnknown &&\n platform !== \"win32\"`). i.e. live ED3 during streaming requires either proof\n or an explicit direct-user-input opt-in, and **never** on win32.\n- `#nativeViewportIsScrolled(atBottom, allowUnknown)` → `true` if\n `atBottom === false`, or (`undefined && win32 && !allowUnknown`). Used to\n decide deferral.\n\n`allowUnknownViewportMutation` is the **direct-user-input opt-in** (autocomplete\n/ IME / a keystroke the user just typed). A keystroke pins the host viewport to\nthe bottom, so it is safe to repaint live then. It is **not** set by passive\nstreaming. `setEagerNativeScrollbackRebuild(true)` is the streaming opt-in; on\nED3-risk hosts it is downgraded so it never promotes to a live ED3 clear.\n\n### Deferral + checkpoint discipline\n\nWhen the viewport is unobservable during **passive streaming**, the planner\ndefers (`deferredMutation`/`deferredShrink`/`viewportRepaint`) and marks native\nscrollback dirty (`#markNativeScrollbackDirty()`). Reconciliation happens later\nat a checkpoint via `refreshNativeScrollbackIfDirty()` — and only if\n`#canReplayNativeScrollbackAtCheckpoint` proves at-tail. The streaming-defer +\nlive-region-pin seam (`NativeScrollbackLiveRegion`,\n`getNativeScrollbackLiveRegionStart` / `getNativeScrollbackCommitSafeEnd`) is the\n**actively-churning** part of the engine; if you change how transient rows are\ncommitted, every structural-mutation branch (shrink **and** grow/offscreen-edit)\nmust defer **symmetrically**, or you reopen the corruption family.\n\n---\n\n## 3. The five fault families\n\n### YANK — viewport snapped to top — NOT fully converged\n- **Mechanism:** a live `historyRebuild` fires `CSI 3 J` while the reader is\n scrolled up; ED3-snap terminals reset the visible viewport to the top of the\n (now-erased) scrollback.\n- **Trigger to avoid:** treating an unobservable probe as \"at bottom\" during\n *passive* streaming, or OR-ing an eager-streaming flag into the live ED3 path.\n- **Current stance:** never emit ED3 on an unobservable host during passive\n streaming; defer and reconcile at a keystroke checkpoint. ConPTY/win32 never\n trust the probe at all.\n\n### CORRUPTION — duplicated / stale rows — NOT fully converged\n- **Mechanism:** the flip side of the yank fix. A deferred/repainted frame\n leaves rows already committed to native scrollback out of sync with the live\n viewport; the scrollback↔viewport seam duplicates (e.g. a 2-row dup, a\n streaming-tail dup, or an async-expansion dup).\n- **Trigger to avoid:** repainting the viewport over scrollback that still holds\n the old copy; a frozen/deferred block whose snapshot no longer matches after\n the region above it reflowed; one mutation branch deferring while its mirror\n branch repaints.\n- **Current stance:** commit only the **stable prefix** line-count to native\n history; keep unstable rows out; reconcile drift at the checkpoint; park the\n hardware cursor at real content bottom, not padded bottom.\n\n### FLASH (and invisible-until-resize) — NOT fully converged\n- **Two distinct causes, one symptom:**\n - *Flash* = eager ED3 rebuild wrapped in DEC 2026 BSU/ESU fired per streaming\n frame on a terminal that clamps scroll on ED3 (VTE/GNOME family).\n - *Invisible-until-resize* = the defer fix over-firing, so a structural frame\n emits **zero bytes** (`deferredMutation` returns nothing) until a resize\n forces a repaint.\n- **Trigger to avoid:** env-detection that misses a flashing terminal (SSH\n strips `VTE_VERSION`; some hosts set no distinguishing var); collapsing an\n `undefined` probe into a definite scrolled/at-bottom verdict.\n- **Current stance:** confine ED3 to the destructive path; auto-disable DEC 2026\n at runtime when the terminal reports it unsupported (DECRQM), with\n `PI_NO_SYNC_OUTPUT` as a manual hatch; keep autowrap discipline regardless.\n\n### WIDTH — measurement crashes / fidelity — crash class dead, accuracy unproven\n- **Mechanism:** the measured column width of a line disagreed with the\n terminal's painted cells (emoji, wide graphemes, combining marks, Hangul\n jamo), and the old render loop **threw** on any mismatch — a 1-cell cosmetic\n error became a fatal whole-agent crash.\n- **Current stance:** **never throw in the render hot path — clamp.** The loop\n truncates over-wide lines with `truncateToWidth`/`sliceByColumn` and logs\n (under debug) instead of dying. Width is owned end-to-end by one native UAX#11\n engine shared by measure/slice/wrap (see §6). Accuracy across all scripts\n (e.g. RTL/combining marks) is still not proven by a green gate.\n\n### PROBE — stray bytes injected as keystrokes — RESOLVED\n- **Mechanism:** a private-CSI probe reply (DA1 / kitty / mode 2031) split\n across a stdin flush; the unmatched prefix was dropped and the continuation\n bytes were forwarded as keystrokes.\n- **Current stance:** buffer-and-reassemble partial CSI responses; give each\n probe a typed sentinel owner. This is the **one cleanly-closed family** —\n because its contract is *bounded and observable* (bytes in = bytes out),\n unlike the unobservable-viewport families. See §7.\n\n---\n\n## 4. Invariants — MUST / NEVER\n\nThese are the rules the recurrence taught us. Treat them as load-bearing.\n\n1. **NEVER add a new `CSI 3 J` (ED3) callsite.** ED3 must flow only through\n `#emitFullPaint({ clearScrollback: true })`, for the existing destructive\n intents (`sessionReplace`, proven/safe `historyRebuild`, `overlayRebuild`).\n Ordinary redraws use the non-destructive `\\x1b[22J` + `\\x1b[2J\\x1b[H` clear.\n2. **NEVER trust an unobservable viewport probe (`undefined`) for *passive*\n streaming.** Only a positive at-tail proof, or a direct-user-input opt-in\n (`allowUnknownViewportMutation`), authorizes a live rebuild — and never on\n win32/ConPTY.\n3. **NEVER throw in the render hot path.** Clamp over-wide lines; a width\n mismatch is cosmetic, not fatal.\n4. **NEVER let a defer path emit a structurally-changed frame as zero bytes\n while at the bottom** — that is invisible-until-resize. `deferredMutation`/\n `deferredShrink` are only safe when the viewport is (or may be) scrolled.\n5. **Defer symmetrically.** If one structural-mutation branch (shrink) defers on\n an unobservable ED3-risk host, the mirror branch (grow / offscreen-edit) must\n too. Asymmetry reopens corruption.\n6. **Commit only the stable prefix to native history.** Transient/unsettled rows\n stay out of scrollback until a checkpoint; reconcile drift at the checkpoint.\n7. **Park the hardware cursor at real content bottom**, not the padded viewport\n bottom, or height shrinks scroll live rows into scrollback and duplicate them\n per resize step.\n8. **Cursor writes live *inside* the synchronized-output frame**, before ESU —\n never as a second frame after it (that teleports/blinks the caret).\n9. **Detect terminal *risk*, not terminal *brand*, and default unknown to\n risky.** Env sniffing is necessarily incomplete (see §5); never assume an\n un-enumerated host is safe.\n10. **Multiplexers (tmux/screen/zellij) get no destructive scrollback clear and\n no viewport probe.** ED3 is a no-op there and a full replay duplicates the\n transcript; repaint in place and rely on the pinned/commit-as-you-go path.\n11. **Any change to the eager/defer lever, the predicates, or the live-region\n seam must be validated by the render-stress fidelity harness (§9)** across\n `{win32, POSIX} × {unknown, scrolled, at-bottom}`, not by a single-terminal\n smoke test.\n\n---\n\n## 5. Terminal capability detection (and why it is fragile)\n\n`TERMINAL` (`terminal-capabilities.ts`) is resolved once at import from\n`TERMINAL_ID` plus environment sniffing. The detection helpers are pure and\nparameterized over `(env, platform)` so they are unit-testable:\n\n- `detectTerminalEagerEraseScrollbackRisk(env, platform)` → is a live ED3\n rebuild unsafe here? Current policy: `false` on win32 (dedicated ConPTY\n deferral paths handle it) and when `PI_TUI_ED3_SAFE=1`; otherwise **`true`**\n for `WT_SESSION` (WT fronting WSL), SSH/tmux/screen/zellij, known\n ED3-snap/scrollback-clearing terminals (WezTerm, kitty, ghostty, alacritty,\n VTE, iTerm2, Apple Terminal, GNOME Terminal, Ptyxis, xfce4-terminal), Linux\n truecolor, **and every other unknown POSIX terminal**. The default is *risky*\n on purpose.\n- `shouldEnableSynchronizedOutputByDefault(env, id)` → DEC 2026 default. Precedence:\n user opt-out (`PI_NO_SYNC_OUTPUT`/`PI_TUI_SYNC_OUTPUT=0`) → user force-on\n (`PI_FORCE_SYNC_OUTPUT=1`/`PI_TUI_SYNC_OUTPUT=1`) → `TERM_FEATURES` advertises\n `Sy` → `WT_SESSION` (WT/WSL) → known direct terminals\n (kitty/ghostty/wezterm/iterm2/alacritty/vscode; SSH passes through) → off for\n risky multiplexers and everything else (VTE-family, GNU screen, Apple Terminal,\n legacy conhost, unknown). Reconciled at runtime by the DECRQM mode-2026 report:\n a positive report **enables** sync (upgrading default-off muxes like\n zellij/tmux-master), a negative one disables it; a user override still wins.\n `synchronizedOutputUserOverride(env)` is the shared opt-out/force resolver.\n- `detectRectangularSgrSupport(id, env)` → DECCARA fills: **kitty only**\n (ghostty does not implement the SGR-background extension), off in multiplexers\n and under `PI_NO_DECCARA`.\n\n**Why this keeps leaking:** terminal class is inferred from env vars that are\n**not durable**. `VTE_VERSION` is stripped by `sshd` (default `AcceptEnv`);\n`COLORTERM` is also not in default `AcceptEnv`; some hosts (Tabby) set no\ndistinguishing var; WSL-fronting-WT is neither pure win32 nor pure POSIX. Every\nmissed env var is a missed terminal class is a new complaint. The mitigations\nare: (a) **default unknown to risky** rather than safe, and (b) detect by\n*behavior/handshake* (DECRQM) where possible rather than a host allow-list. When\nyou add a terminal, add it to the pure detector and add the **SSH-stripped env\nshape** to the test, not just the env-present shape.\n\n---\n\n## 6. Width model\n\n`visibleWidth` / `truncateToWidth` / `sliceByColumn` / `wrapTextWithAnsi`\n(`utils.ts`) all route through **one native UAX#11 engine** (`@oh-my-pi/pi-natives`,\nRust `unicode-width`). We deliberately dropped `Bun.stringWidth` because it\ndisagreed with the engine on combining marks and jamo, and mixing two width\nmodels in measure-vs-slice produced the crashes.\n\n- Fast path: printable ASCII is one cell per code unit.\n- ZWJ pictographic emoji take the `visibleWidthByGrapheme` override (ANSI spans\n excised first, then `Intl.Segmenter`), because the native scanner double-counts\n SGR bytes when a sequence is split by the segmenter.\n- OSC 66 sized text (`\\x1b]66;…`) takes the native path.\n\n**Rule:** if you add a code path that measures width, route it through these\nhelpers. Never reintroduce `Bun.stringWidth` or a parallel width table — the\nmeasure model and the slice/wrap model must agree, or you get over-wide lines\nthat the hot-path clamp silently truncates (cosmetic loss) or, worse, seam\nduplication.\n\n---\n\n## 7. Capability probes & stdin reassembly\n\n`ProcessTerminal` fuses capability queries with a bare DA1 (`CSI c`) sentinel so\na non-answering terminal is detected when DA1 returns first. Replies can arrive\n**split across a stdin flush**, so:\n\n- `#privateCsiResponseBuffer` accumulates `\\x1b[?…` partials while a sentinel is\n outstanding, rejoins on the terminator byte (0x40–0x7e), then runs the\n DA1/kitty/mode-2031 handlers on the **complete** reply. A new `\\x1b`\n mid-reassembly or >256 bytes abandons the partial so real keys (e.g. arrow\n `\\x1b[A`) still reach input.\n- `#da1SentinelOwners` is a **typed FIFO** discriminated by `kind` (`keyboard`,\n `osc11`, `privateMode`, `kittyGraphicsProbe`, `osc99Probe`) so a keyboard DA1\n cannot be mistaken for an OSC 11 / DECRQM / graphics-probe sentinel.\n- DECRQM probes (`#queryPrivateMode(2026/2048/2031)`) record support via DECRPM\n and drive runtime feature gating (e.g. auto-disabling DEC 2026 sync output).\n\n**Rule:** any new probe must own a typed sentinel and survive a split reply. The\ncontract is bytes-in = bytes-out; it is testable, so test it (feed the reply\nbyte-by-byte and assert nothing leaks to the input handler).\n\n---\n\n## 8. Inline images & memory\n\nKitty images are **transmit-once, place-many** (`kitty-graphics.ts`):\n`encodeKittyTransmit` (`a=t`, keyed by a stable `i=`) writes the base64 a single\ntime; repaints emit only `encodeKittyPlacement` (`a=p`). Text clears\n(`CSI 2 J` / `CSI 3 J`) do **not** purge the terminal's image store — only\n`encodeKittyDeleteImage` (`a=d,d=I`) does. `ImageBudget` (`components/image.ts`)\nkeeps only the most-recent N images live; demoted images render their text\nfallback and are explicitly purged.\n\n**Rule:** never re-emit full base64 per frame (it pegged RAM and pinned the UI\nthread). Kitty Unicode placeholders are default-on only for kitty/ghostty\n(`PI_NO_KITTY_PLACEHOLDERS` / `PI_KITTY_PLACEHOLDERS`); other Kitty-protocol\nhosts render placeholder cells as literal PUA glyphs, so they fall back to\ndirect `a=p` placement.\n\n---\n\n## 9. The fidelity gate (use it)\n\n`packages/tui/test/render-stress-harness.ts` renders the renderer's **real emitted ANSI** into\na ghostty-web `VirtualTerminal` and asserts viewport fidelity (a scrolled reader\nstays put), background-column fidelity, and scrollback-buffer fidelity, across\nparameterized terminal shapes and randomized op sequences.\n\nThis harness is the structural fix for the whole recurrence: every guess-flip and\nsniffing-gap regression historically **shipped blind and was caught by a user**,\nbecause no automated \"a scrolled-up reader stays pinned across kitty/WT/WSL/\nConPTY\" assertion gated CI. **Before you change the eager/defer lever, a\npredicate, the live-region seam, or width math, run the stress harness and the\ntargeted repro tests** (`packages/tui/test/render-regressions.test.ts`,\n`packages/tui/test/streaming-scrollback-defer.test.ts`, the `issue-*-repro.test.ts` files).\nA change that passes one terminal and one seed is not verified.\n\n---\n\n## 10. Escape hatches (env vars)\n\n| Var | Effect |\n|---|---|\n| `PI_NO_SYNC_OUTPUT=1` | Disable DEC 2026 BSU/ESU wrappers (autowrap discipline stays on). For terminals that advertise but mishandle mode 2026. |\n| `PI_TUI_SYNC_OUTPUT=0\\|1` / `PI_FORCE_SYNC_OUTPUT=1` | Force sync output off / on. |\n| `PI_TUI_ED3_SAFE=1` | Declare the terminal safe for live ED3 (disables `eagerEraseScrollbackRisk`). |\n| `PI_NO_DECCARA` | Disable Kitty DECCARA rectangular-fill optimization (force padded-string fills). |\n| `PI_FORCE_IMAGE_PROTOCOL=kitty\\|iterm2\\|sixel\\|off` | Override image protocol detection. |\n| `PI_NO_KITTY_PLACEHOLDERS=1` / `PI_KITTY_PLACEHOLDERS=1` | Force Kitty Unicode placeholders off / on. |\n| `PI_CLEAR_ON_SHRINK=1` | Clear empty rows when content shrinks (default off). |\n| `PI_HARDWARE_CURSOR=1` | Show the real hardware cursor instead of a rendered one. |\n| `PI_NOTIFICATIONS=off\\|0\\|false` | Suppress terminal notifications. |\n| `PI_DEBUG_REDRAW=1` | Log the chosen render intent per frame to the debug log. |\n| `PI_TUI_DEBUG=1` | Dump per-render diff state under `/tmp/tui`. |\n\n---\n\n## 11. Before you touch the render core — checklist\n\n- [ ] Are you about to emit `CSI 3 J` anywhere other than the destructive\n `clearScrollback` path? **Stop.**\n- [ ] Does your change trust `isNativeViewportAtBottom() === undefined` as\n \"at bottom\" during passive streaming? **Stop.**\n- [ ] Did you change one structural-mutation branch without mirroring its\n sibling (shrink ↔ grow)? **Defer symmetrically.**\n- [ ] Could any frame now emit zero bytes while the viewport is at the bottom?\n That's invisible-until-resize.\n- [ ] Did you add a terminal by brand instead of by behavior, or skip the\n SSH-stripped env shape in the test?\n- [ ] Did you run `packages/tui/test/render-stress-harness.ts` + the repro suite across\n win32/POSIX × unknown/scrolled/at-bottom — not just one terminal?\n- [ ] New probe? Typed sentinel owner + split-reply test.\n- [ ] New width path? Routed through the shared native engine, clamped (never\n thrown) in the hot path.\n",
102
- "tui-runtime-internals.md": "# TUI runtime internals\n\nThis document maps the non-theme runtime path from terminal input to rendered output in interactive mode. It focuses on behavior in `packages/tui` and its integration from `packages/coding-agent` controllers.\n\n> **Editing the rendering engine itself?** Read\n> [`tui-core-renderer.md`](./tui-core-renderer.md) first — it documents the\n> failure modes (yank / corruption / flash / width crashes) and the invariants\n> the render planner, native-scrollback bookkeeping, and capability detection\n> must not violate.\n\n## Runtime layers and ownership\n\n- **`packages/tui` engine**: terminal lifecycle, stdin normalization, focus routing, render scheduling, differential painting, overlay composition, hardware cursor placement.\n- **`packages/coding-agent` interactive mode**: builds component tree, binds editor callbacks and keymaps, reacts to agent/session events, and translates domain state (streaming, tool execution, retries, plan mode) into UI components.\n\nBoundary rule: the TUI engine is message-agnostic. It only knows `Component.render(width)`, `handleInput(data)`, focus, and overlays. Agent semantics stay in interactive controllers.\n\n## Implementation files\n\n- [`packages/coding-agent/src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`packages/coding-agent/src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n- [`packages/coding-agent/src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`packages/coding-agent/src/modes/components/custom-editor.ts`](../packages/coding-agent/src/modes/components/custom-editor.ts)\n- [`packages/tui/src/tui.ts`](../packages/tui/src/tui.ts)\n- [`packages/tui/src/terminal.ts`](../packages/tui/src/terminal.ts)\n- [`packages/tui/src/editor-component.ts`](../packages/tui/src/editor-component.ts)\n- [`packages/tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts)\n- [`packages/tui/src/components/loader.ts`](../packages/tui/src/components/loader.ts)\n\n## Boot and component tree assembly\n\n`InteractiveMode` constructs `TUI(new ProcessTerminal(), settings.get(\"showHardwareCursor\"))`, applies `clearOnShrink`, `tui.maxInlineImages`, and Kitty text-sizing settings, then creates persistent containers:\n\n- `chatContainer`\n- `pendingMessagesContainer`\n- `statusContainer`\n- `todoContainer`\n- `btwContainer`\n- `omfgContainer`\n- `errorBannerContainer`\n- `statusLine`\n- `hookWidgetContainerAbove`\n- `editorContainer` (holds `CustomEditor`)\n- `hookWidgetContainerBelow`\n\n`init()` wires the tree in that order after any startup warnings/welcome/changelog, focuses the editor, registers input handlers via `InputController`, starts TUI, pushes terminal title state, updates the editor border, and requests a forced render.\nA forced render (`requestRender(true)`) queues a viewport repaint or explicit session replacement; it does **not** throw away previous-line history by default.\n\n## Terminal lifecycle and stdin normalization\n\n`ProcessTerminal.start()`:\n\n1. Enables raw mode and bracketed paste.\n2. Attaches resize handler and refreshes dimensions.\n3. Enables Windows VT input mode when running on win32.\n4. Creates a `StdinBuffer` to split partial escape chunks into complete sequences.\n5. Queries Kitty keyboard protocol support (`CSI ? u`), then enables protocol flags if supported; otherwise enables modifyOtherKeys fallback after a short timeout.\n6. Queries OSC 11 background color and Mode 2031 appearance notifications for dark/light theme detection.\n7. Queries OSC 99 notification capabilities and Kitty temp-file graphics support.\n8. Starts periodic OSC 11 polling only where safe, then probes DEC private modes 2026/2048/2031 via DECRQM.\n\n`StdinBuffer` behavior:\n\n- Buffers fragmented escape sequences (CSI/OSC/DCS/APC/SS3).\n- Emits `data` only when a sequence is complete or timeout-flushed.\n- Detects bracketed paste and emits a `paste` event with raw pasted text.\n\nThis prevents partial escape chunks from being misinterpreted as normal keypresses.\n\n## Input routing and focus model\n\nInput path:\n\n`stdin -> ProcessTerminal -> StdinBuffer -> TUI.#handleInput -> focusedComponent.handleInput`\n\nRouting details:\n\n1. TUI runs registered input listeners first (`addInputListener`), allowing consume/transform behavior.\n2. TUI handles global debug shortcut (`shift+ctrl+d`) before component dispatch.\n3. If focused component belongs to an overlay that is now hidden/invisible, TUI reassigns focus to next visible overlay or saved pre-overlay focus.\n4. Key release events are filtered unless focused component sets `wantsKeyRelease = true`.\n5. After dispatch, TUI schedules render.\n\n`setFocus()` also toggles `Focusable.focused`, which controls whether components emit `CURSOR_MARKER` for hardware cursor placement.\n\n## Key handling split: editor vs controller\n\n`CustomEditor` intercepts high-priority combos first (escape, ctrl-c/d/z, ctrl-v, ctrl-p variants, ctrl-t, alt-up, extension custom keys) and delegates the rest to base `Editor` behavior (text editing, history, autocomplete, cursor movement).\n\n`InputController.setupKeyHandlers()` then binds editor callbacks to mode actions:\n\n- cancellation / mode exits on `Escape`\n- shutdown on double `Ctrl+C` or empty-editor `Ctrl+D`\n- suspend/resume on `Ctrl+Z`\n- slash-command and selector hotkeys\n- follow-up/dequeue toggles and expansion toggles\n\nThis keeps key parsing/editor mechanics in `packages/tui` and mode semantics in coding-agent controllers.\n\n## Render loop and diffing strategy\n\n`TUI.requestRender()` coalesces render requests and rate-limits ordinary frames:\n\n- forced renders (`requestRender(true, ...)`) schedule an immediate frame and set `#forceViewportRepaintOnNextRender`; with `clearScrollback`, they also queue `sessionReplace`\n- ordinary renders schedule through `#scheduleRender()` and respect `TUI.#MIN_RENDER_INTERVAL_MS`\n- repeated requests while a render is pending collapse into the same scheduled frame\n\n`#doRender()` pipeline:\n\n1. Render root component tree to `newLines`.\n2. Composite visible overlays (if any).\n3. Extract and strip `CURSOR_MARKER` from the visible viewport.\n4. Normalize non-image lines and append reset/hyperlink terminators.\n5. Classify the frame into a render intent:\n - initial paint / forced viewport repaint\n - explicit session replacement or native scrollback rebuild\n - viewport repaint for width/height/offscreen mutations\n - deferred mutation/shrink when native scrollback is scrolled\n - trailing shrink\n - changed-line diff\n - noop\n6. Emit only the bytes required by the intent and commit cached frame/cursor/viewport state.\n\nRender writes use synchronized output mode (`CSI ? 2026 h/l`) when enabled; capability detection, DECRQM, or `PI_NO_SYNC_OUTPUT` can disable the wrappers while leaving autowrap discipline on.\n\n## Render safety constraints\n\nCritical safety checks in `TUI`:\n\n- Non-image rendered lines are expected to fit terminal width; the differential path truncates overwide lines as a last-resort guard and can write debug diagnostics when redraw debugging is enabled.\n- Overlay compositing includes defensive truncation and post-composite width guarding.\n- Width changes force repaint/rebuild planning because wrapping semantics change.\n- Cursor position is clamped before movement.\n\nThese constraints are runtime guards plus component conventions; renderers should still return width-safe lines rather than rely on truncation.\n\nThe deeper reasons these guards exist — why the renderer cannot observe scroll\nposition, why ED3 (`CSI 3 J`) is confined to one path, and why the hot path\nclamps instead of throwing — are documented in\n[`tui-core-renderer.md`](./tui-core-renderer.md).\n\n## Resize handling\n\nResize events are event-driven from `ProcessTerminal` to `TUI.requestRender()`.\n\nEffects:\n\n- Width or height changes repaint or rebuild because terminal reflow invalidates wrapping, viewport, and cursor anchors.\n- Inside terminal multiplexers, resize uses viewport repaint instead of destructive native-scrollback replay; pane history cannot be erased safely and a full replay duplicates transcript rows.\n- Viewport/top tracking (`#viewportTopRow`, `#maxLinesRendered`, scrollback high-water state) avoids invalid relative cursor math and defers destructive native scrollback rewrites while the user is scrolled into history.\n- Overlay visibility can depend on terminal dimensions (`OverlayOptions.visible`); focus is corrected when overlays become non-visible after resize.\n\n## Streaming and incremental UI updates\n\n`EventController` subscribes to `AgentSessionEvent` and updates UI incrementally:\n\n- `agent_start`: starts loader in `statusContainer`.\n- `message_start` assistant: creates `streamingComponent` and mounts it.\n- `message_update`: updates streaming assistant content; creates/updates tool execution components as tool calls appear.\n- `tool_execution_update/end`: updates tool result components and completion state.\n- `message_end`: finalizes assistant stream, handles aborted/error annotations, marks pending tool args complete on normal stop.\n- `agent_end`: stops loaders, clears transient stream state, flushes deferred model switch, issues completion notification if backgrounded.\n\nRead-tool grouping is intentionally stateful (`#lastReadGroup`) to coalesce consecutive read tool calls into one visual block until a non-read break occurs.\n\n## Status and loader orchestration\n\nStatus lane ownership:\n\n- `statusContainer` holds transient loaders (`loadingAnimation`, `autoCompactionLoader`, `retryLoader`).\n- `statusLine` renders persistent status/hooks/plan indicators and drives editor top border updates.\n\nLoader behavior:\n\n- `Loader` updates every 80ms via interval and requests render each frame.\n- Escape handlers are temporarily overridden during auto-compaction and auto-retry to cancel those operations.\n- On end/cancel paths, controllers restore prior escape handlers and stop/clear loader components.\n\n## Mode transitions and backgrounding\n\n### Bash/Python input modes\n\nInput text prefixes toggle editor border mode flags:\n\n- `!` -> bash mode\n- `$` (non-template literal prefix) -> python mode\n\nEscape exits inactive mode by clearing editor text and restoring border color; when execution is active, escape aborts the running task instead.\n\n### Plan mode\n\n`InteractiveMode` tracks plan mode flags, status-line state, active tools, and model switching. Enter/exit updates session mode entries and status/UI state, including deferred model switch if streaming is active.\n\n### Suspend/resume (`Ctrl+Z`)\n\n`InputController.handleCtrlZ()`:\n\n1. Registers one-shot `SIGCONT` handler to restart TUI and force render.\n2. Stops TUI before suspend.\n3. Sends `SIGTSTP` to process group.\n\n### Background mode (`/background` or `/bg`)\n\n`handleBackgroundCommand()`:\n\n- Rejects when idle.\n- Switches tool UI context to non-interactive (`hasUI=false`) so interactive UI tools fail fast.\n- Stops loaders/status line and unsubscribes foreground event handler.\n- Subscribes background event handler (primarily waits for `agent_end`).\n- Stops TUI and sends `SIGTSTP` (POSIX job control path).\n\nOn `agent_end` in background with no queued work, controller sends completion notification and shuts down.\n\n## Cancellation paths\n\nPrimary cancellation inputs:\n\n- `Escape` during active stream loader: restores queued messages to editor and aborts agent.\n- `Escape` during bash/python execution: aborts running command.\n- `Escape` during auto-compaction/retry: invokes dedicated abort methods through temporary escape handlers.\n- `Ctrl+C` single press: clear editor; double press within 500ms: shutdown.\n\nCancellation is state-conditional; same key can mean abort, mode-exit, selector trigger, or no-op depending on runtime state.\n\n## Event-driven vs throttled behavior\n\nEvent-driven updates:\n\n- Agent session events (`EventController`)\n- Key input callbacks (`InputController`)\n- terminal resize callback\n- terminal appearance callbacks, SIGWINCH theme reevaluation, and git branch watchers in `InteractiveMode`\n\nThrottled/debounced paths:\n\n- TUI rendering is tick-debounced (`requestRender` coalescing).\n- Loader animation is fixed-interval (80ms), each frame requesting render.\n- Editor autocomplete updates (inside `Editor`) use debounce timers, reducing recompute churn during typing.\n\nThe runtime therefore mixes event-driven state transitions with bounded render cadence to keep interactivity responsive without repaint storms.\n",
103
- "tui.md": "# TUI integration for extensions and custom tools\n\nThis document covers the **current** TUI contract used by `packages/coding-agent` and `packages/tui` for extension UI, custom tool UI, and custom renderers.\n\n## What this subsystem is\n\nThe runtime has two layers:\n\n- **Rendering engine (`packages/tui`)**: differential terminal renderer, input dispatch, focus, overlays, cursor placement.\n- **Integration layer (`packages/coding-agent`)**: mounts extension/custom-tool components, wires keybindings/theme, and restores editor state.\n\n## Runtime behavior by mode\n\n| Mode | `ctx.ui.custom(...)` availability | Notes |\n| ------------------- | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ |\n| Interactive TUI | Supported | Component is mounted in the editor area or overlay, focused, and must call `done(result)` to resolve. |\n| Background/headless | Not interactive | UI context is no-op (`hasUI === false`). |\n| RPC mode | Not mounted | `custom()` is implemented as unsupported UI and returns `undefined as never`; do not depend on interactive UI in RPC handlers. |\n\nIf your extension/tool can run in non-interactive mode, guard with `ctx.hasUI` / `pi.hasUI`.\n\n## Core component contract (`@oh-my-pi/pi-tui`)\n\n`packages/tui/src/tui.ts` defines:\n\n```ts\nexport interface Component {\n render(width: number): string[];\n handleInput?(data: string): void;\n wantsKeyRelease?: boolean;\n invalidate(): void;\n}\n```\n\n`Focusable` is separate:\n\n```ts\nexport interface Focusable {\n focused: boolean;\n}\n```\n\nCursor behavior uses `CURSOR_MARKER` (not `getCursorPosition`). Focused components emit the marker in rendered text; `TUI` extracts it and positions the hardware cursor.\n\n## Rendering constraints (terminal safety)\n\nYour `render(width)` output must be terminal-safe:\n\n1. **Do not intentionally exceed `width` on any line**. The renderer truncates overwide non-image lines as a last-resort guard, but components should still return width-safe output.\n2. **Measure visual width**, not string length: use `visibleWidth()`.\n3. **Truncate/wrap ANSI-aware text** with `truncateToWidth()` / `wrapTextWithAnsi()`.\n4. **Sanitize tabs/content** from external sources using `replaceTabs()` (and higher-level sanitizers in coding-agent render paths).\n\nMinimal pattern:\n\n```ts\nimport { replaceTabs, truncateToWidth } from \"@oh-my-pi/pi-tui\";\n\nrender(width: number): string[] {\n return this.lines.map(line => truncateToWidth(replaceTabs(line), width));\n}\n```\n\n## Input handling and keybindings\n\n### Raw key matching\n\nUse `matchesKey(data, \"...\")` for navigation keys and combos.\n\n### Respect user-configured app keybindings\n\nExtension UI factories receive a `KeybindingsManager` (interactive mode) so you can honor mapped actions instead of hardcoding keys:\n\n```ts\nif (keybindings.matches(data, \"interrupt\")) {\n done(undefined);\n return;\n}\n```\n\n### Key release/repeat events\n\nKey release events are filtered unless your component sets:\n\n```ts\nwantsKeyRelease = true;\n```\n\nThen use `isKeyRelease()` / `isKeyRepeat()` if needed.\n\n## Focus, overlays, and cursor\n\n- `TUI.setFocus(component)` routes input to that component.\n- Overlay APIs exist in `TUI` (`showOverlay`, `OverlayHandle`). In interactive extension/custom UI, `custom(..., { overlay: true })` mounts your component through `TUI.showOverlay(...)`; without `overlay`, it replaces the editor component area directly.\n- Overlay custom UI is anchored at `bottom-center` with full terminal width/max height and is removed through the returned overlay handle when `done(...)` closes the flow.\n\n## Mount points and return contracts\n\n## 1) Extension UI (`ExtensionUIContext`)\n\nCurrent signature (`extensibility/extensions/types.ts`):\n\n```ts\ncustom<T>(\n factory: (\n tui: TUI,\n theme: Theme,\n keybindings: KeybindingsManager,\n done: (result: T) => void,\n ) => (Component & { dispose?(): void }) | Promise<Component & { dispose?(): void }>,\n options?: { overlay?: boolean },\n): Promise<T>\n```\n\nBehavior in interactive mode (`extension-ui-controller.ts`):\n\n- Saves editor text.\n- Without `options.overlay`, replaces the editor component with your component.\n- With `options.overlay`, mounts your component as a bottom-centered overlay instead of replacing the editor.\n- Focuses your component.\n- On `done(result)`: calls `component.dispose?.()`, hides the overlay if present, restores editor + text for non-overlay flows, focuses editor, resolves promise.\n So `done(...)` is mandatory for completion.\n\n## 2) Hook/custom-tool UI context (legacy typing)\n\n`HookUIContext.custom` is typed as `(tui, theme, done)` in hook/custom-tool types.\nUnderlying interactive implementation calls factories with `(tui, theme, keybindings, done)`. JS consumers can use the extra arg; type-level compatibility still reflects the 3-arg legacy signature.\n\nCustom tools typically use the same UI entrypoint via the factory-scoped `pi.ui` object, then return the selected value in normal tool content:\n\n```ts\nasync execute(toolCallId, params, onUpdate, ctx, signal) {\n if (!pi.hasUI) {\n return { content: [{ type: \"text\", text: \"UI unavailable\" }] };\n }\n\n const picked = await pi.ui.custom<string | undefined>((tui, theme, done) => {\n const component = new MyPickerComponent(done, signal);\n return component;\n });\n\n return { content: [{ type: \"text\", text: picked ? `Picked: ${picked}` : \"Cancelled\" }] };\n}\n```\n\n## 3) Custom tool call/result renderers\n\nCustom tools and extension tools can return components from:\n\n- `renderCall(args, options, theme)`\n- `renderResult(result, options, theme, args?)`\n\n`options` currently includes:\n\n- `expanded: boolean`\n- `isPartial: boolean`\n- `spinnerFrame?: number`\n\nThese renderers are mounted by `ToolExecutionComponent`.\n\n## Lifecycle and cancellation\n\n- `dispose()` is optional at type level but should be implemented when you own timers, subprocesses, watchers, sockets, or overlays.\n- `done(...)` should be called exactly once from your component flow.\n- For cancellable long-running UI, pair `CancellableLoader` with `AbortSignal` and call `done(...)` from `onAbort`.\n\nExample cancellation pattern:\n\n```ts\nconst loader = new CancellableLoader(\n tui,\n theme.fg(\"accent\"),\n theme.fg(\"muted\"),\n \"Working...\",\n);\nloader.onAbort = () => done(undefined);\nvoid doWork(loader.signal).then((result) => done(result));\nreturn loader;\n```\n\n## Realistic custom component example (extension command)\n\n```ts\nimport type { Component } from \"@oh-my-pi/pi-tui\";\nimport {\n SelectList,\n matchesKey,\n replaceTabs,\n truncateToWidth,\n} from \"@oh-my-pi/pi-tui\";\nimport {\n getSelectListTheme,\n type ExtensionAPI,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nclass Picker implements Component {\n list: SelectList;\n keybindings: any;\n done: (value: string | undefined) => void;\n\n constructor(\n items: Array<{ value: string; label: string }>,\n keybindings: any,\n done: (value: string | undefined) => void,\n ) {\n this.list = new SelectList(items, 8, getSelectListTheme());\n this.keybindings = keybindings;\n this.done = done;\n this.list.onSelect = (item) => this.done(item.value);\n this.list.onCancel = () => this.done(undefined);\n }\n\n handleInput(data: string): void {\n if (this.keybindings.matches(data, \"interrupt\")) {\n this.done(undefined);\n return;\n }\n this.list.handleInput(data);\n }\n\n render(width: number): string[] {\n return this.list\n .render(width)\n .map((line) => truncateToWidth(replaceTabs(line), width));\n }\n\n invalidate(): void {\n this.list.invalidate();\n }\n}\n\nexport default function extension(pi: ExtensionAPI): void {\n pi.registerCommand(\"pick-model\", {\n description: \"Pick a model profile\",\n handler: async (_args, ctx) => {\n if (!ctx.hasUI) return;\n\n const selected = await ctx.ui.custom<string | undefined>(\n (tui, theme, keybindings, done) => {\n const items = [\n { value: \"fast\", label: theme.fg(\"accent\", \"Fast\") },\n { value: \"balanced\", label: \"Balanced\" },\n { value: \"quality\", label: \"Quality\" },\n ];\n return new Picker(items, keybindings, done);\n },\n );\n\n if (selected) ctx.ui.notify(`Selected profile: ${selected}`, \"info\");\n },\n });\n}\n```\n\n## Key implementation files\n\n- `packages/tui/src/tui.ts` — `Component`, `Focusable`, cursor marker, focus, overlay, input dispatch.\n- `packages/tui/src/utils.ts` — width/truncation/sanitization primitives.\n- `packages/tui/src/keys.ts` / `keybindings.ts` — key parsing and configurable action mapping.\n- `packages/coding-agent/src/modes/controllers/extension-ui-controller.ts` — interactive mounting/unmounting for extension/hook/custom-tool UI.\n- `packages/coding-agent/src/extensibility/extensions/types.ts` — extension UI and renderer contracts.\n- `packages/coding-agent/src/extensibility/hooks/types.ts` — hook UI contract (legacy custom signature).\n- `packages/coding-agent/src/extensibility/custom-tools/types.ts` — custom tool execute/render contracts.\n- `packages/coding-agent/src/modes/components/tool-execution.ts` — mounting `renderCall`/`renderResult` components and partial-state options.\n- `packages/coding-agent/src/tools/context.ts` — tool UI context propagation (`hasUI`, `ui`).\n",
102
+ "tui-runtime-internals.md": "# TUI runtime internals\n\nThis document maps the non-theme runtime path from terminal input to rendered output in interactive mode. It focuses on behavior in `packages/tui` and its integration from `packages/coding-agent` controllers.\n\n> **Editing the rendering engine itself?** Read\n> [`tui-core-renderer.md`](./tui-core-renderer.md) first — it documents the\n> failure modes (yank / corruption / flash / width crashes) and the invariants\n> the render planner, native-scrollback bookkeeping, and capability detection\n> must not violate.\n\n## Runtime layers and ownership\n\n- **`packages/tui` engine**: terminal lifecycle, stdin normalization, focus routing, render scheduling, differential painting, overlay composition, hardware cursor placement.\n- **`packages/coding-agent` interactive mode**: builds component tree, binds editor callbacks and keymaps, reacts to agent/session events, and translates domain state (streaming, tool execution, retries, plan mode) into UI components.\n\nBoundary rule: the TUI engine is message-agnostic. It only knows `Component.render(width)`, `handleInput(data)`, focus, and overlays. Agent semantics stay in interactive controllers.\n\n## Implementation files\n\n- [`packages/coding-agent/src/modes/interactive-mode.ts`](../packages/coding-agent/src/modes/interactive-mode.ts)\n- [`packages/coding-agent/src/modes/controllers/event-controller.ts`](../packages/coding-agent/src/modes/controllers/event-controller.ts)\n- [`packages/coding-agent/src/modes/controllers/input-controller.ts`](../packages/coding-agent/src/modes/controllers/input-controller.ts)\n- [`packages/coding-agent/src/modes/components/custom-editor.ts`](../packages/coding-agent/src/modes/components/custom-editor.ts)\n- [`packages/tui/src/tui.ts`](../packages/tui/src/tui.ts)\n- [`packages/tui/src/terminal.ts`](../packages/tui/src/terminal.ts)\n- [`packages/tui/src/editor-component.ts`](../packages/tui/src/editor-component.ts)\n- [`packages/tui/src/stdin-buffer.ts`](../packages/tui/src/stdin-buffer.ts)\n- [`packages/tui/src/components/loader.ts`](../packages/tui/src/components/loader.ts)\n\n## Boot and component tree assembly\n\n`InteractiveMode` constructs `TUI(new ProcessTerminal(), settings.get(\"showHardwareCursor\"))`, applies `clearOnShrink`, `tui.maxInlineImages`, and Kitty text-sizing settings, then creates persistent containers:\n\n- `chatContainer`\n- `pendingMessagesContainer`\n- `statusContainer`\n- `todoContainer`\n- `btwContainer`\n- `omfgContainer`\n- `errorBannerContainer`\n- `statusLine`\n- `hookWidgetContainerAbove`\n- `editorContainer` (holds `CustomEditor`)\n- `hookWidgetContainerBelow`\n\n`init()` wires the tree in that order after any startup warnings/welcome/changelog, focuses the editor, registers input handlers via `InputController`, starts TUI, pushes terminal title state, updates the editor border, and requests a forced render.\nA forced render (`requestRender(true)`) queues a viewport repaint or explicit session replacement; it does **not** throw away previous-line history by default.\n\n## Terminal lifecycle and stdin normalization\n\n`ProcessTerminal.start()`:\n\n1. Enables raw mode and bracketed paste.\n2. Attaches resize handler and refreshes dimensions.\n3. Enables Windows VT input mode when running on win32.\n4. Creates a `StdinBuffer` to split partial escape chunks into complete sequences.\n5. Queries Kitty keyboard protocol support (`CSI ? u`), then enables protocol flags if supported; otherwise enables modifyOtherKeys fallback after a short timeout.\n6. Queries OSC 11 background color and Mode 2031 appearance notifications for dark/light theme detection.\n7. Queries OSC 99 notification capabilities.\n8. Starts periodic OSC 11 polling only where safe, then probes DEC private modes 2026/2048/2031 via DECRQM.\n\n`StdinBuffer` behavior:\n\n- Buffers fragmented escape sequences (CSI/OSC/DCS/APC/SS3).\n- Emits `data` only when a sequence is complete or timeout-flushed.\n- Detects bracketed paste and emits a `paste` event with raw pasted text.\n\nThis prevents partial escape chunks from being misinterpreted as normal keypresses.\n\n## Input routing and focus model\n\nInput path:\n\n`stdin -> ProcessTerminal -> StdinBuffer -> TUI.#handleInput -> focusedComponent.handleInput`\n\nRouting details:\n\n1. TUI runs registered input listeners first (`addInputListener`), allowing consume/transform behavior.\n2. TUI handles global debug shortcut (`shift+ctrl+d`) before component dispatch.\n3. If focused component belongs to an overlay that is now hidden/invisible, TUI reassigns focus to next visible overlay or saved pre-overlay focus.\n4. Key release events are filtered unless focused component sets `wantsKeyRelease = true`.\n5. After dispatch, TUI schedules render.\n\n`setFocus()` also toggles `Focusable.focused`, which controls whether components emit `CURSOR_MARKER` for hardware cursor placement.\n\n## Key handling split: editor vs controller\n\n`CustomEditor` intercepts high-priority combos first (escape, ctrl-c/d/z, ctrl-v, ctrl-p variants, ctrl-t, alt-up, extension custom keys) and delegates the rest to base `Editor` behavior (text editing, history, autocomplete, cursor movement).\n\n`InputController.setupKeyHandlers()` then binds editor callbacks to mode actions:\n\n- cancellation / mode exits on `Escape`\n- shutdown on double `Ctrl+C` or empty-editor `Ctrl+D`\n- suspend/resume on `Ctrl+Z`\n- slash-command and selector hotkeys\n- follow-up/dequeue toggles and expansion toggles\n\nThis keeps key parsing/editor mechanics in `packages/tui` and mode semantics in coding-agent controllers.\n\n## Render loop and diffing strategy\n\n`TUI.requestRender()` coalesces render requests and rate-limits ordinary frames:\n\n- forced renders (`requestRender(true, ...)`) schedule an immediate frame and set `#forceViewportRepaintOnNextRender`; with `clearScrollback`, they also queue `sessionReplace`\n- ordinary renders schedule through `#scheduleRender()` and respect `TUI.#MIN_RENDER_INTERVAL_MS`\n- repeated requests while a render is pending collapse into the same scheduled frame\n\n`#doRender()` pipeline:\n\n1. Render root component tree to `newLines`.\n2. Composite visible overlays (if any).\n3. Extract and strip `CURSOR_MARKER` from the visible viewport.\n4. Normalize non-image lines and append reset/hyperlink terminators.\n5. Classify the frame into a render intent:\n - initial paint / forced viewport repaint\n - explicit session replacement or native scrollback rebuild\n - viewport repaint for width/height/offscreen mutations\n - deferred mutation/shrink when native scrollback is scrolled\n - trailing shrink\n - changed-line diff\n - noop\n6. Emit only the bytes required by the intent and commit cached frame/cursor/viewport state.\n\nRender writes use synchronized output mode (`CSI ? 2026 h/l`) when enabled; capability detection, DECRQM, or `PI_NO_SYNC_OUTPUT` can disable the wrappers while leaving autowrap discipline on.\n\n## Render safety constraints\n\nCritical safety checks in `TUI`:\n\n- Non-image rendered lines are expected to fit terminal width; the differential path truncates overwide lines as a last-resort guard and can write debug diagnostics when redraw debugging is enabled.\n- Overlay compositing includes defensive truncation and post-composite width guarding.\n- Width changes force repaint/rebuild planning because wrapping semantics change.\n- Cursor position is clamped before movement.\n\nThese constraints are runtime guards plus component conventions; renderers should still return width-safe lines rather than rely on truncation.\n\nThe deeper reasons these guards exist — why the renderer cannot observe scroll\nposition, why ED3 (`CSI 3 J`) is confined to one path, and why the hot path\nclamps instead of throwing — are documented in\n[`tui-core-renderer.md`](./tui-core-renderer.md).\n\n## Resize handling\n\nResize events are event-driven from `ProcessTerminal` to `TUI.requestRender()`.\n\nEffects:\n\n- Width or height changes repaint or rebuild because terminal reflow invalidates wrapping, viewport, and cursor anchors.\n- Inside terminal multiplexers, resize uses viewport repaint instead of destructive native-scrollback replay; pane history cannot be erased safely and a full replay duplicates transcript rows.\n- Viewport/top tracking (`#viewportTopRow`, `#maxLinesRendered`, scrollback high-water state) avoids invalid relative cursor math and defers destructive native scrollback rewrites while the user is scrolled into history.\n- Overlay visibility can depend on terminal dimensions (`OverlayOptions.visible`); focus is corrected when overlays become non-visible after resize.\n\n## Streaming and incremental UI updates\n\n`EventController` subscribes to `AgentSessionEvent` and updates UI incrementally:\n\n- `agent_start`: starts loader in `statusContainer`.\n- `message_start` assistant: creates `streamingComponent` and mounts it.\n- `message_update`: updates streaming assistant content; creates/updates tool execution components as tool calls appear.\n- `tool_execution_update/end`: updates tool result components and completion state.\n- `message_end`: finalizes assistant stream, handles aborted/error annotations, marks pending tool args complete on normal stop.\n- `agent_end`: stops loaders, clears transient stream state, flushes deferred model switch, issues completion notification if backgrounded.\n\nRead-tool grouping is intentionally stateful (`#lastReadGroup`) to coalesce consecutive read tool calls into one visual block until a non-read break occurs.\n\n## Status and loader orchestration\n\nStatus lane ownership:\n\n- `statusContainer` holds transient loaders (`loadingAnimation`, `autoCompactionLoader`, `retryLoader`).\n- `statusLine` renders persistent status/hooks/plan indicators and drives editor top border updates.\n\nLoader behavior:\n\n- `Loader` updates every 80ms via interval and requests render each frame.\n- Escape handlers are temporarily overridden during auto-compaction and auto-retry to cancel those operations.\n- On end/cancel paths, controllers restore prior escape handlers and stop/clear loader components.\n\n## Mode transitions and backgrounding\n\n### Bash/Python input modes\n\nInput text prefixes toggle editor border mode flags:\n\n- `!` -> bash mode\n- `$` (non-template literal prefix) -> python mode\n\nEscape exits inactive mode by clearing editor text and restoring border color; when execution is active, escape aborts the running task instead.\n\n### Plan mode\n\n`InteractiveMode` tracks plan mode flags, status-line state, active tools, and model switching. Enter/exit updates session mode entries and status/UI state, including deferred model switch if streaming is active.\n\n### Suspend/resume (`Ctrl+Z`)\n\n`InputController.handleCtrlZ()`:\n\n1. Registers one-shot `SIGCONT` handler to restart TUI and force render.\n2. Stops TUI before suspend.\n3. Sends `SIGTSTP` to process group.\n\n## Cancellation paths\n\nPrimary cancellation inputs:\n\n- `Escape` during active stream loader: restores queued messages to editor and aborts agent.\n- `Escape` during bash/python execution: aborts running command.\n- `Escape` during auto-compaction/retry: invokes dedicated abort methods through temporary escape handlers.\n- `Ctrl+C` single press: clear editor; double press within 500ms: shutdown.\n\nCancellation is state-conditional; same key can mean abort, mode-exit, selector trigger, or no-op depending on runtime state.\n\n## Event-driven vs throttled behavior\n\nEvent-driven updates:\n\n- Agent session events (`EventController`)\n- Key input callbacks (`InputController`)\n- terminal resize callback\n- terminal appearance callbacks, SIGWINCH theme reevaluation, and git branch watchers in `InteractiveMode`\n\nThrottled/debounced paths:\n\n- TUI rendering is tick-debounced (`requestRender` coalescing).\n- Loader animation is fixed-interval (80ms), each frame requesting render.\n- Editor autocomplete updates (inside `Editor`) use debounce timers, reducing recompute churn during typing.\n\nThe runtime therefore mixes event-driven state transitions with bounded render cadence to keep interactivity responsive without repaint storms.\n",
103
+ "tui.md": "# TUI integration for extensions and custom tools\n\nThis document covers the **current** TUI contract used by `packages/coding-agent` and `packages/tui` for extension UI, custom tool UI, and custom renderers.\n\n## What this subsystem is\n\nThe runtime has two layers:\n\n- **Rendering engine (`packages/tui`)**: differential terminal renderer, input dispatch, focus, overlays, cursor placement.\n- **Integration layer (`packages/coding-agent`)**: mounts extension/custom-tool components, wires keybindings/theme, and restores editor state.\n\n## Runtime behavior by mode\n\n| Mode | `ctx.ui.custom(...)` availability | Notes |\n| ------------------- | --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ |\n| Interactive TUI | Supported | Component is mounted in the editor area or overlay, focused, and must call `done(result)` to resolve. |\n| Background/headless | Not interactive | UI context is no-op (`hasUI === false`). |\n| RPC mode | Not mounted | `custom()` is implemented as unsupported UI and returns `undefined as never`; do not depend on interactive UI in RPC handlers. |\n\nIf your extension/tool can run in non-interactive mode, guard with `ctx.hasUI` / `pi.hasUI`.\n\n## Core component contract (`@oh-my-pi/pi-tui`)\n\n`packages/tui/src/tui.ts` defines:\n\n```ts\nexport interface Component {\n render(width: number): string[];\n handleInput?(data: string): void;\n wantsKeyRelease?: boolean;\n invalidate?(): void;\n}\n```\n\n`Focusable` is separate:\n\n```ts\nexport interface Focusable {\n focused: boolean;\n}\n```\n\nCursor behavior uses `CURSOR_MARKER` (not `getCursorPosition`). Focused components emit the marker in rendered text; `TUI` extracts it and positions the hardware cursor.\n\n## Rendering constraints (terminal safety)\n\nYour `render(width)` output must be terminal-safe:\n\n1. **Do not intentionally exceed `width` on any line**. The renderer truncates overwide non-image lines as a last-resort guard, but components should still return width-safe output.\n2. **Measure visual width**, not string length: use `visibleWidth()`.\n3. **Truncate/wrap ANSI-aware text** with `truncateToWidth()` / `wrapTextWithAnsi()`.\n4. **Sanitize tabs/content** from external sources using `replaceTabs()` (and higher-level sanitizers in coding-agent render paths).\n\nMinimal pattern:\n\n```ts\nimport { replaceTabs, truncateToWidth } from \"@oh-my-pi/pi-tui\";\n\nrender(width: number): string[] {\n return this.lines.map(line => truncateToWidth(replaceTabs(line), width));\n}\n```\n\n## Input handling and keybindings\n\n### Raw key matching\n\nUse `matchesKey(data, \"...\")` for navigation keys and combos.\n\n### Respect user-configured app keybindings\n\nExtension UI factories receive a `KeybindingsManager` (interactive mode) so you can honor mapped actions instead of hardcoding keys:\n\n```ts\nif (keybindings.matches(data, \"interrupt\")) {\n done(undefined);\n return;\n}\n```\n\n### Key release/repeat events\n\nKey release events are filtered unless your component sets:\n\n```ts\nwantsKeyRelease = true;\n```\n\nThen use `isKeyRelease()` / `isKeyRepeat()` if needed.\n\n## Focus, overlays, and cursor\n\n- `TUI.setFocus(component)` routes input to that component.\n- Overlay APIs exist in `TUI` (`showOverlay`, `OverlayHandle`). In interactive extension/custom UI, `custom(..., { overlay: true })` mounts your component through `TUI.showOverlay(...)`; without `overlay`, it replaces the editor component area directly.\n- Overlay custom UI is anchored at `bottom-center` with full terminal width/max height and is removed through the returned overlay handle when `done(...)` closes the flow.\n\n## Mount points and return contracts\n\n## 1) Extension UI (`ExtensionUIContext`)\n\nCurrent signature (`extensibility/extensions/types.ts`):\n\n```ts\ncustom<T>(\n factory: (\n tui: TUI,\n theme: Theme,\n keybindings: KeybindingsManager,\n done: (result: T) => void,\n ) => (Component & { dispose?(): void }) | Promise<Component & { dispose?(): void }>,\n options?: { overlay?: boolean },\n): Promise<T>\n```\n\nBehavior in interactive mode (`extension-ui-controller.ts`):\n\n- Saves editor text.\n- Without `options.overlay`, replaces the editor component with your component.\n- With `options.overlay`, mounts your component as a bottom-centered overlay instead of replacing the editor.\n- Focuses your component.\n- On `done(result)`: calls `component.dispose?.()`, hides the overlay if present, restores editor + text for non-overlay flows, focuses editor, resolves promise.\n So `done(...)` is mandatory for completion.\n\n## 2) Hook/custom-tool UI context (legacy typing)\n\n`HookUIContext.custom` is typed as `(tui, theme, done)` in hook/custom-tool types.\nUnderlying interactive implementation calls factories with `(tui, theme, keybindings, done)`. JS consumers can use the extra arg; type-level compatibility still reflects the 3-arg legacy signature.\n\nCustom tools typically use the same UI entrypoint via the factory-scoped `pi.ui` object, then return the selected value in normal tool content:\n\n```ts\nasync execute(toolCallId, params, onUpdate, ctx, signal) {\n if (!pi.hasUI) {\n return { content: [{ type: \"text\", text: \"UI unavailable\" }] };\n }\n\n const picked = await pi.ui.custom<string | undefined>((tui, theme, done) => {\n const component = new MyPickerComponent(done, signal);\n return component;\n });\n\n return { content: [{ type: \"text\", text: picked ? `Picked: ${picked}` : \"Cancelled\" }] };\n}\n```\n\n## 3) Custom tool call/result renderers\n\nCustom tools and extension tools can return components from:\n\n- `renderCall(args, options, theme)`\n- `renderResult(result, options, theme, args?)`\n\n`options` currently includes:\n\n- `expanded: boolean`\n- `isPartial: boolean`\n- `spinnerFrame?: number`\n\nThese renderers are mounted by `ToolExecutionComponent`.\n\n## Lifecycle and cancellation\n\n- `dispose()` is optional at type level but should be implemented when you own timers, subprocesses, watchers, sockets, or overlays.\n- `done(...)` should be called exactly once from your component flow.\n- For cancellable long-running UI, pair `CancellableLoader` with `AbortSignal` and call `done(...)` from `onAbort`.\n\nExample cancellation pattern:\n\n```ts\nconst loader = new CancellableLoader(\n tui,\n theme.fg(\"accent\"),\n theme.fg(\"muted\"),\n \"Working...\",\n);\nloader.onAbort = () => done(undefined);\nvoid doWork(loader.signal).then((result) => done(result));\nreturn loader;\n```\n\n## Realistic custom component example (extension command)\n\n```ts\nimport type { Component } from \"@oh-my-pi/pi-tui\";\nimport {\n SelectList,\n matchesKey,\n replaceTabs,\n truncateToWidth,\n} from \"@oh-my-pi/pi-tui\";\nimport {\n getSelectListTheme,\n type ExtensionAPI,\n} from \"@oh-my-pi/pi-coding-agent\";\n\nclass Picker implements Component {\n list: SelectList;\n keybindings: any;\n done: (value: string | undefined) => void;\n\n constructor(\n items: Array<{ value: string; label: string }>,\n keybindings: any,\n done: (value: string | undefined) => void,\n ) {\n this.list = new SelectList(items, 8, getSelectListTheme());\n this.keybindings = keybindings;\n this.done = done;\n this.list.onSelect = (item) => this.done(item.value);\n this.list.onCancel = () => this.done(undefined);\n }\n\n handleInput(data: string): void {\n if (this.keybindings.matches(data, \"interrupt\")) {\n this.done(undefined);\n return;\n }\n this.list.handleInput(data);\n }\n\n render(width: number): string[] {\n return this.list\n .render(width)\n .map((line) => truncateToWidth(replaceTabs(line), width));\n }\n\n invalidate(): void {\n this.list.invalidate();\n }\n}\n\nexport default function extension(pi: ExtensionAPI): void {\n pi.registerCommand(\"pick-model\", {\n description: \"Pick a model profile\",\n handler: async (_args, ctx) => {\n if (!ctx.hasUI) return;\n\n const selected = await ctx.ui.custom<string | undefined>(\n (tui, theme, keybindings, done) => {\n const items = [\n { value: \"fast\", label: theme.fg(\"accent\", \"Fast\") },\n { value: \"balanced\", label: \"Balanced\" },\n { value: \"quality\", label: \"Quality\" },\n ];\n return new Picker(items, keybindings, done);\n },\n );\n\n if (selected) ctx.ui.notify(`Selected profile: ${selected}`, \"info\");\n },\n });\n}\n```\n\n## Key implementation files\n\n- `packages/tui/src/tui.ts` — `Component`, `Focusable`, cursor marker, focus, overlay, input dispatch.\n- `packages/tui/src/utils.ts` — width/truncation/sanitization primitives.\n- `packages/tui/src/keys.ts` / `keybindings.ts` — key parsing and configurable action mapping.\n- `packages/coding-agent/src/modes/controllers/extension-ui-controller.ts` — interactive mounting/unmounting for extension/hook/custom-tool UI.\n- `packages/coding-agent/src/extensibility/extensions/types.ts` — extension UI and renderer contracts.\n- `packages/coding-agent/src/extensibility/hooks/types.ts` — hook UI contract (legacy custom signature).\n- `packages/coding-agent/src/extensibility/custom-tools/types.ts` — custom tool execute/render contracts.\n- `packages/coding-agent/src/modes/components/tool-execution.ts` — mounting `renderCall`/`renderResult` components and partial-state options.\n- `packages/coding-agent/src/tools/context.ts` — tool UI context propagation (`hasUI`, `ui`).\n",
104
104
  };